On 7/28/23 16:00, Ira Weiny wrote:
> Using the proposed type-2 cxl-test device[1] the following
> splat was observed:
>
> BUG: kernel NULL pointer dereference, address: 0000000000000278
> [...]
> RIP: 0010:devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]
> [...]
> Call Trace:
> <TASK>
> ? __die+0x1f/0x70
> ? page_fault_oops+0x149/0x420
> ? fixup_exception+0x22/0x310
> ? kernelmode_fixup_or_oops+0x84/0x110
> ? exc_page_fault+0x6d/0x150
> ? asm_exc_page_fault+0x22/0x30
> ? devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]
> cxl_mock_mem_probe+0x632/0x870 [cxl_mock_mem]
> platform_probe+0x40/0x90
> really_probe+0x19e/0x3e0
> ? __pfx___driver_attach+0x10/0x10
> __driver_probe_device+0x78/0x160
> driver_probe_device+0x1f/0x90
> __driver_attach+0xce/0x1c0
> bus_for_each_dev+0x63/0xa0
> bus_add_driver+0x112/0x210
> driver_register+0x55/0x100
> ? __pfx_cxl_mock_mem_driver_init+0x10/0x10 [cxl_mock_mem]
> [...]
>
> Commit f6b8ab32e3ec made the mailbox functionality optional. However,
> some mailbox functionality was merged after that patch. Therefore some
> mailbox functionality can be accessed on a device which did not set up
> the mailbox.
>
> While no devices currently exist, commit f6b8ab32e3ec is incomplete.
> Complete the checks for memdev state to bring the code to a consistent
> state for when type-2 devices are introduced.
>
> [1] https://lore.kernel.org/all/168592160379.1948938.12863272903570476312.stgit@dwillia2-xfh.jf.intel.com/
>
> Fixes: f6b8ab32e3ec ("cxl/memdev: Make mailbox functionality optional")
> Cc: Dan Williams <[email protected]>
> Signed-off-by: Ira Weiny <[email protected]>
I think you'll need to coordinate with this patch from Davidlohr?
https://lore.kernel.org/linux-cxl/[email protected]/
> ---
> drivers/cxl/core/mbox.c | 9 +++++++++
> drivers/cxl/core/memdev.c | 26 ++++++++++++++++++++++++++
> drivers/cxl/mem.c | 18 ++++++++++--------
> drivers/cxl/pci.c | 5 ++++-
> drivers/cxl/pmem.c | 3 +++
> 5 files changed, 52 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index d6d067fbee97..eb1758fb8cdf 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -482,6 +482,9 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
>
> dev_dbg(dev, "Query IOCTL\n");
>
> + if (!mds)
> + return -EIO;
> +
> if (get_user(n_commands, &q->n_commands))
> return -EFAULT;
>
> @@ -586,6 +589,9 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
>
> dev_dbg(dev, "Send IOCTL\n");
>
> + if (!mds)
> + return -EIO;
> +
> if (copy_from_user(&send, s, sizeof(send)))
> return -EFAULT;
>
> @@ -1245,6 +1251,9 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
> int nr_records = 0;
> int rc;
>
> + if (!mds)
> + return -EIO;
> +
> rc = mutex_lock_interruptible(&mds->poison.lock);
> if (rc)
> return rc;
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index f99e7ec3cc40..629e479f751b 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -201,6 +201,19 @@ static ssize_t security_erase_store(struct device *dev,
> static struct device_attribute dev_attr_security_erase =
> __ATTR(erase, 0200, NULL, security_erase_store);
>
> +static umode_t cxl_memdev_security_visible(struct kobject *kobj,
> + struct attribute *a, int n)
> +{
> + struct device *dev = kobj_to_dev(kobj);
> + struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
> +
> + if (!mds)
> + return 0;
> +
> + return a->mode;
> +}
> +
> static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
> {
> struct cxl_dev_state *cxlds = cxlmd->cxlds;
> @@ -332,6 +345,9 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
> struct cxl_region *cxlr;
> int rc;
>
> + if (!mds)
> + return -EIO;
> +
> if (!IS_ENABLED(CONFIG_DEBUG_FS))
> return 0;
>
> @@ -380,6 +396,9 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
> struct cxl_region *cxlr;
> int rc;
>
> + if (!mds)
> + return -EIO;
> +
> if (!IS_ENABLED(CONFIG_DEBUG_FS))
> return 0;
>
> @@ -480,6 +499,7 @@ static struct attribute_group cxl_memdev_pmem_attribute_group = {
> static struct attribute_group cxl_memdev_security_attribute_group = {
> .name = "security",
> .attrs = cxl_memdev_security_attributes,
> + .is_visible = cxl_memdev_security_visible,
> };
>
> static const struct attribute_group *cxl_memdev_attribute_groups[] = {
> @@ -542,6 +562,9 @@ static void cxl_memdev_security_shutdown(struct device *dev)
> struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
>
> + if (!mds)
> + return;
> +
> if (mds->security.poll)
> cancel_delayed_work_sync(&mds->security.poll_dwork);
> }
> @@ -997,6 +1020,9 @@ static int cxl_memdev_security_init(struct cxl_memdev *cxlmd)
> struct device *dev = &cxlmd->dev;
> struct kernfs_node *sec;
>
> + if (!mds)
> + return 0;
> +
> sec = sysfs_get_dirent(dev->kobj.sd, "security");
> if (!sec) {
> dev_err(dev, "sysfs_get_dirent 'security' failed\n");
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index 317c7548e4e9..4755a890018d 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -132,12 +132,14 @@ static int cxl_mem_probe(struct device *dev)
> dentry = cxl_debugfs_create_dir(dev_name(dev));
> debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show);
>
> - if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds))
> - debugfs_create_file("inject_poison", 0200, dentry, cxlmd,
> - &cxl_poison_inject_fops);
> - if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds))
> - debugfs_create_file("clear_poison", 0200, dentry, cxlmd,
> - &cxl_poison_clear_fops);
> + if (mds) {
> + if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds))
> + debugfs_create_file("inject_poison", 0200, dentry, cxlmd,
> + &cxl_poison_inject_fops);
> + if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds))
> + debugfs_create_file("clear_poison", 0200, dentry, cxlmd,
> + &cxl_poison_clear_fops);
> + }
>
> rc = devm_add_action_or_reset(dev, remove_debugfs, dentry);
> if (rc)
> @@ -222,8 +224,8 @@ static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
> struct cxl_memdev_state *mds =
> to_cxl_memdev_state(cxlmd->cxlds);
>
> - if (!test_bit(CXL_POISON_ENABLED_LIST,
> - mds->poison.enabled_cmds))
> + if (!mds || !test_bit(CXL_POISON_ENABLED_LIST,
> + mds->poison.enabled_cmds))
> return 0;
> }
> return a->mode;
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 1cb1494c28fe..93f6140432cd 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -122,7 +122,7 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
> struct cxl_dev_state *cxlds = dev_id->cxlds;
> struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
>
> - if (!cxl_mbox_background_complete(cxlds))
> + if (!mds || !cxl_mbox_background_complete(cxlds))
> return IRQ_NONE;
>
> reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
> @@ -624,6 +624,9 @@ static irqreturn_t cxl_event_thread(int irq, void *id)
> struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
> u32 status;
>
> + if (!mds)
> + return IRQ_HANDLED;
> +
> do {
> /*
> * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status;
> diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
> index 7cb8994f8809..f1adfdd1a2b3 100644
> --- a/drivers/cxl/pmem.c
> +++ b/drivers/cxl/pmem.c
> @@ -70,6 +70,9 @@ static int cxl_nvdimm_probe(struct device *dev)
> struct nvdimm *nvdimm;
> int rc;
>
> + if (WARN_ON_ONCE(!mds))
> + return -EIO;
> +
> set_exclusive_cxl_commands(mds, exclusive_cmds);
> rc = devm_add_action_or_reset(dev, clear_exclusive, mds);
> if (rc)
>
> ---
> base-commit: 20ea1e7d13c1b544fe67c4a8dc3943bb1ab33e6f
> change-id: 20230728-cxl-fix-devmemdev-5003ce927f68
>
> Best regards,