2022-04-22 19:22:00

by Dan Williams

[permalink] [raw]
Subject: [PATCH v3 0/8] device-core: Enable device_lock() lockdep validation

Changes since v2 [1]
- Use lockdep_set_class(), lockdep_set_class_and_subclass(), and
lock_set_class() instead of a 'lockdep_mutex' in 'struct device'.
(Peter and Waiman)
- Include a fix identifed by this new infrastructure

[1]: https://lore.kernel.org/r/164982968798.684294.15817853329823976469.stgit@dwillia2-desk3.amr.corp.intel.com

The device_lock() uses lockdep_set_novalidate_class() because it is
taken in too many contexts that cannot be described by a single mutex
lock class. The lack of lockdep coverage leads to deadlock scenarios
landing upstream. To mitigate that problem the lockdep_mutex was added
[2].

The lockdep_mutex, however, is an unscalable hack that overlooks
advancements in the lockdep API to change a given lock's lock class [3].
With lockdep_set_class() a device subsystem can initialize a dedicated
lock class per device type at device creation time, with
lock_set_class() a device-driver can temporarily override a lockdep
class after-the-fact. Use lockdep class assignment APIs to replace the
usage of lockdep_mutex in the CXL and NVDIMM subsystems, and delete
lockdep_mutex.

[2]: commit 87a30e1f05d7 ("driver-core, libnvdimm: Let device subsystems add local lockdep coverage")
[3]: https://lore.kernel.org/r/[email protected]

---

Dan Williams (8):
cxl: Replace lockdep_mutex with local lock classes
cxl/acpi: Add root device lockdep validation
cxl: Drop cxl_device_lock()
nvdimm: Replace lockdep_mutex with local lock classes
ACPI: NFIT: Drop nfit_device_lock()
nvdimm: Drop nd_device_lock()
device-core: Kill the lockdep_mutex
nvdimm: Fix firmware activation deadlock scenarios


drivers/acpi/nfit/core.c | 30 ++++++++-------
drivers/acpi/nfit/nfit.h | 24 ------------
drivers/base/core.c | 3 --
drivers/cxl/acpi.c | 15 ++++++++
drivers/cxl/core/memdev.c | 3 ++
drivers/cxl/core/pmem.c | 10 ++++-
drivers/cxl/core/port.c | 68 ++++++++++++++++------------------
drivers/cxl/cxl.h | 78 ---------------------------------------
drivers/cxl/mem.c | 4 +-
drivers/cxl/pmem.c | 12 +++---
drivers/nvdimm/btt_devs.c | 23 +++++++-----
drivers/nvdimm/bus.c | 38 ++++++++-----------
drivers/nvdimm/core.c | 14 +++----
drivers/nvdimm/dax_devs.c | 4 +-
drivers/nvdimm/dimm_devs.c | 12 ++++--
drivers/nvdimm/namespace_devs.c | 46 ++++++++++++++---------
drivers/nvdimm/nd-core.h | 68 +---------------------------------
drivers/nvdimm/pfn_devs.c | 31 +++++++++-------
drivers/nvdimm/pmem.c | 2 +
drivers/nvdimm/region.c | 2 +
drivers/nvdimm/region_devs.c | 20 ++++++----
include/linux/device.h | 30 +++++++++++++--
lib/Kconfig.debug | 23 ------------
23 files changed, 209 insertions(+), 351 deletions(-)

base-commit: ce522ba9ef7e2d9fb22a39eb3371c0c64e2a433e


2022-04-22 22:08:42

by Dan Williams

[permalink] [raw]
Subject: [PATCH v3 1/8] cxl: Replace lockdep_mutex with local lock classes

In response to an attempt to expand dev->lockdep_mutex for device_lock()
validation [1], Peter points out [2] that the lockdep API already has
the ability to assign a dedicated lock class per subsystem device-type.

Use lockdep_set_class() to override the default device_lock()
'__lockdep_no_validate__' class for each CXL subsystem device-type. This
enables lockdep to detect deadlocks and recursive locking within the
device-driver core and the subsystem. The
lockdep_set_class_and_subclass() API is used for port objects that
recursively lock the 'cxl_port_key' class by hierarchical topology
depth.

Link: https://lore.kernel.org/r/164982968798.684294.15817853329823976469.stgit@dwillia2-desk3.amr.corp.intel.com [1]
Link: https://lore.kernel.org/r/[email protected] [2]
Suggested-by: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Waiman Long <[email protected]>
Cc: Boqun Feng <[email protected]>
Cc: Alison Schofield <[email protected]>
Cc: Vishal Verma <[email protected]>
Cc: Ira Weiny <[email protected]>
Cc: Ben Widawsky <[email protected]>
Cc: Jonathan Cameron <[email protected]>
Signed-off-by: Dan Williams <[email protected]>
---
drivers/cxl/core/memdev.c | 3 +++
drivers/cxl/core/pmem.c | 6 ++++++
drivers/cxl/core/port.c | 13 +++++++++----
3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 1f76b28f9826..f7cdcd33504a 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -228,6 +228,8 @@ static void detach_memdev(struct work_struct *work)
put_device(&cxlmd->dev);
}

+static struct lock_class_key cxl_memdev_key;
+
static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
const struct file_operations *fops)
{
@@ -247,6 +249,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,

dev = &cxlmd->dev;
device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &cxl_memdev_key);
dev->parent = cxlds->dev;
dev->bus = &cxl_bus_type;
dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index 8de240c4d96b..e825e261278d 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -80,6 +80,8 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd)
}
EXPORT_SYMBOL_NS_GPL(cxl_find_nvdimm_bridge, CXL);

+static struct lock_class_key cxl_nvdimm_bridge_key;
+
static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port)
{
struct cxl_nvdimm_bridge *cxl_nvb;
@@ -99,6 +101,7 @@ static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port)
cxl_nvb->port = port;
cxl_nvb->state = CXL_NVB_NEW;
device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &cxl_nvdimm_bridge_key);
device_set_pm_not_required(dev);
dev->parent = &port->dev;
dev->bus = &cxl_bus_type;
@@ -214,6 +217,8 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev)
}
EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm, CXL);

+static struct lock_class_key cxl_nvdimm_key;
+
static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
{
struct cxl_nvdimm *cxl_nvd;
@@ -226,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
dev = &cxl_nvd->dev;
cxl_nvd->cxlmd = cxlmd;
device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &cxl_nvdimm_key);
device_set_pm_not_required(dev);
dev->parent = &cxlmd->dev;
dev->bus = &cxl_bus_type;
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 2ab1ba4499b3..750aac95ed5f 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -391,6 +391,8 @@ static int devm_cxl_link_uport(struct device *host, struct cxl_port *port)
return devm_add_action_or_reset(host, cxl_unlink_uport, port);
}

+static struct lock_class_key cxl_port_key;
+
static struct cxl_port *cxl_port_alloc(struct device *uport,
resource_size_t component_reg_phys,
struct cxl_port *parent_port)
@@ -415,9 +417,10 @@ static struct cxl_port *cxl_port_alloc(struct device *uport,
* description.
*/
dev = &port->dev;
- if (parent_port)
+ if (parent_port) {
dev->parent = &parent_port->dev;
- else
+ port->depth = parent_port->depth + 1;
+ } else
dev->parent = uport;

port->uport = uport;
@@ -427,6 +430,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport,
INIT_LIST_HEAD(&port->endpoints);

device_initialize(dev);
+ lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth);
device_set_pm_not_required(dev);
dev->bus = &cxl_bus_type;
dev->type = &cxl_port_type;
@@ -457,8 +461,6 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
if (IS_ERR(port))
return port;

- if (parent_port)
- port->depth = parent_port->depth + 1;
dev = &port->dev;
if (is_cxl_memdev(uport))
rc = dev_set_name(dev, "endpoint%d", port->id);
@@ -1173,6 +1175,8 @@ static int decoder_populate_targets(struct cxl_decoder *cxld,
return rc;
}

+static struct lock_class_key cxl_decoder_key;
+
/**
* cxl_decoder_alloc - Allocate a new CXL decoder
* @port: owning port of this decoder
@@ -1214,6 +1218,7 @@ static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port,
seqlock_init(&cxld->target_lock);
dev = &cxld->dev;
device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &cxl_decoder_key);
device_set_pm_not_required(dev);
dev->parent = &port->dev;
dev->bus = &cxl_bus_type;

2022-04-23 00:21:55

by Ira Weiny

[permalink] [raw]
Subject: Re: [PATCH v3 1/8] cxl: Replace lockdep_mutex with local lock classes

On Thu, Apr 21, 2022 at 08:33:13AM -0700, Dan Williams wrote:
> In response to an attempt to expand dev->lockdep_mutex for device_lock()
> validation [1], Peter points out [2] that the lockdep API already has
> the ability to assign a dedicated lock class per subsystem device-type.
>
> Use lockdep_set_class() to override the default device_lock()
> '__lockdep_no_validate__' class for each CXL subsystem device-type. This
> enables lockdep to detect deadlocks and recursive locking within the
> device-driver core and the subsystem. The
> lockdep_set_class_and_subclass() API is used for port objects that
> recursively lock the 'cxl_port_key' class by hierarchical topology
> depth.
>
> Link: https://lore.kernel.org/r/164982968798.684294.15817853329823976469.stgit@dwillia2-desk3.amr.corp.intel.com [1]
> Link: https://lore.kernel.org/r/[email protected] [2]
> Suggested-by: Peter Zijlstra <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Will Deacon <[email protected]>
> Cc: Waiman Long <[email protected]>
> Cc: Boqun Feng <[email protected]>
> Cc: Alison Schofield <[email protected]>
> Cc: Vishal Verma <[email protected]>
> Cc: Ira Weiny <[email protected]>
> Cc: Ben Widawsky <[email protected]>
> Cc: Jonathan Cameron <[email protected]>
> Signed-off-by: Dan Williams <[email protected]>

Reviewed-by: Ira Weiny <[email protected]>

> ---
> drivers/cxl/core/memdev.c | 3 +++
> drivers/cxl/core/pmem.c | 6 ++++++
> drivers/cxl/core/port.c | 13 +++++++++----
> 3 files changed, 18 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index 1f76b28f9826..f7cdcd33504a 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -228,6 +228,8 @@ static void detach_memdev(struct work_struct *work)
> put_device(&cxlmd->dev);
> }
>
> +static struct lock_class_key cxl_memdev_key;
> +
> static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
> const struct file_operations *fops)
> {
> @@ -247,6 +249,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
>
> dev = &cxlmd->dev;
> device_initialize(dev);
> + lockdep_set_class(&dev->mutex, &cxl_memdev_key);
> dev->parent = cxlds->dev;
> dev->bus = &cxl_bus_type;
> dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
> diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
> index 8de240c4d96b..e825e261278d 100644
> --- a/drivers/cxl/core/pmem.c
> +++ b/drivers/cxl/core/pmem.c
> @@ -80,6 +80,8 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd)
> }
> EXPORT_SYMBOL_NS_GPL(cxl_find_nvdimm_bridge, CXL);
>
> +static struct lock_class_key cxl_nvdimm_bridge_key;
> +
> static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port)
> {
> struct cxl_nvdimm_bridge *cxl_nvb;
> @@ -99,6 +101,7 @@ static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port)
> cxl_nvb->port = port;
> cxl_nvb->state = CXL_NVB_NEW;
> device_initialize(dev);
> + lockdep_set_class(&dev->mutex, &cxl_nvdimm_bridge_key);
> device_set_pm_not_required(dev);
> dev->parent = &port->dev;
> dev->bus = &cxl_bus_type;
> @@ -214,6 +217,8 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev)
> }
> EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm, CXL);
>
> +static struct lock_class_key cxl_nvdimm_key;
> +
> static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
> {
> struct cxl_nvdimm *cxl_nvd;
> @@ -226,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
> dev = &cxl_nvd->dev;
> cxl_nvd->cxlmd = cxlmd;
> device_initialize(dev);
> + lockdep_set_class(&dev->mutex, &cxl_nvdimm_key);
> device_set_pm_not_required(dev);
> dev->parent = &cxlmd->dev;
> dev->bus = &cxl_bus_type;
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index 2ab1ba4499b3..750aac95ed5f 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -391,6 +391,8 @@ static int devm_cxl_link_uport(struct device *host, struct cxl_port *port)
> return devm_add_action_or_reset(host, cxl_unlink_uport, port);
> }
>
> +static struct lock_class_key cxl_port_key;
> +
> static struct cxl_port *cxl_port_alloc(struct device *uport,
> resource_size_t component_reg_phys,
> struct cxl_port *parent_port)
> @@ -415,9 +417,10 @@ static struct cxl_port *cxl_port_alloc(struct device *uport,
> * description.
> */
> dev = &port->dev;
> - if (parent_port)
> + if (parent_port) {
> dev->parent = &parent_port->dev;
> - else
> + port->depth = parent_port->depth + 1;
> + } else
> dev->parent = uport;
>
> port->uport = uport;
> @@ -427,6 +430,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport,
> INIT_LIST_HEAD(&port->endpoints);
>
> device_initialize(dev);
> + lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth);
> device_set_pm_not_required(dev);
> dev->bus = &cxl_bus_type;
> dev->type = &cxl_port_type;
> @@ -457,8 +461,6 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
> if (IS_ERR(port))
> return port;
>
> - if (parent_port)
> - port->depth = parent_port->depth + 1;
> dev = &port->dev;
> if (is_cxl_memdev(uport))
> rc = dev_set_name(dev, "endpoint%d", port->id);
> @@ -1173,6 +1175,8 @@ static int decoder_populate_targets(struct cxl_decoder *cxld,
> return rc;
> }
>
> +static struct lock_class_key cxl_decoder_key;
> +
> /**
> * cxl_decoder_alloc - Allocate a new CXL decoder
> * @port: owning port of this decoder
> @@ -1214,6 +1218,7 @@ static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port,
> seqlock_init(&cxld->target_lock);
> dev = &cxld->dev;
> device_initialize(dev);
> + lockdep_set_class(&dev->mutex, &cxl_decoder_key);
> device_set_pm_not_required(dev);
> dev->parent = &port->dev;
> dev->bus = &cxl_bus_type;
>