2023-06-20 13:39:07

by Usyskin, Alexander

[permalink] [raw]
Subject: [PATCH 0/2] mtd: prepare for dynamically removed devices

Prepare mtd subsystem for devices that can be dynamically removed,
like memory on PCIE card.
Use refcount to prevent crashes when underlying device
removed unexpectedly and reshuffle __get and __put functions
to allow underlying device to clean it memory according to refcount.

Alexander Usyskin (1):
mtd: call external _get and _put in right order

Tomas Winkler (1):
mtd: use refcount to prevent corruption

drivers/mtd/mtdcore.c | 86 ++++++++++++++++++++++-------------------
drivers/mtd/mtdcore.h | 1 +
drivers/mtd/mtdpart.c | 14 ++++---
include/linux/mtd/mtd.h | 2 +-
4 files changed, 56 insertions(+), 47 deletions(-)

--
2.34.1



2023-06-20 13:39:41

by Usyskin, Alexander

[permalink] [raw]
Subject: [PATCH 1/2] mtd: use refcount to prevent corruption

From: Tomas Winkler <[email protected]>

When underlying device is removed mtd core will crash
in case user space is holding open handle.
Need to use proper refcounting so device is release
only when has no users.

Signed-off-by: Tomas Winkler <[email protected]>
Signed-off-by: Alexander Usyskin <[email protected]>
---
drivers/mtd/mtdcore.c | 72 ++++++++++++++++++++++-------------------
drivers/mtd/mtdcore.h | 1 +
drivers/mtd/mtdpart.c | 14 ++++----
include/linux/mtd/mtd.h | 2 +-
4 files changed, 49 insertions(+), 40 deletions(-)

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index abf4cb58a8ab..84bd1878367d 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -93,10 +93,33 @@ static void mtd_release(struct device *dev)
struct mtd_info *mtd = dev_get_drvdata(dev);
dev_t index = MTD_DEVT(mtd->index);

+ if (mtd_is_partition(mtd))
+ release_mtd_partition(mtd);
+
/* remove /dev/mtdXro node */
device_destroy(&mtd_class, index + 1);
}

+static void mtd_device_release(struct kref *kref)
+{
+ struct mtd_info *mtd = container_of(kref, struct mtd_info, refcnt);
+
+ debugfs_remove_recursive(mtd->dbg.dfs_dir);
+
+ /* Try to remove the NVMEM provider */
+ nvmem_unregister(mtd->nvmem);
+
+ device_unregister(&mtd->dev);
+
+ /* Clear dev so mtd can be safely re-registered later if desired */
+ memset(&mtd->dev, 0, sizeof(mtd->dev));
+
+ idr_remove(&mtd_idr, mtd->index);
+ of_node_put(mtd_get_of_node(mtd));
+
+ module_put(THIS_MODULE);
+}
+
#define MTD_DEVICE_ATTR_RO(name) \
static DEVICE_ATTR(name, 0444, mtd_##name##_show, NULL)

@@ -666,7 +689,7 @@ int add_mtd_device(struct mtd_info *mtd)
}

mtd->index = i;
- mtd->usecount = 0;
+ kref_init(&mtd->refcnt);

/* default value if not set by driver */
if (mtd->bitflip_threshold == 0)
@@ -779,7 +802,6 @@ int del_mtd_device(struct mtd_info *mtd)
{
int ret;
struct mtd_notifier *not;
- struct device_node *mtd_of_node;

mutex_lock(&mtd_table_mutex);

@@ -793,28 +815,8 @@ int del_mtd_device(struct mtd_info *mtd)
list_for_each_entry(not, &mtd_notifiers, list)
not->remove(mtd);

- if (mtd->usecount) {
- printk(KERN_NOTICE "Removing MTD device #%d (%s) with use count %d\n",
- mtd->index, mtd->name, mtd->usecount);
- ret = -EBUSY;
- } else {
- mtd_of_node = mtd_get_of_node(mtd);
- debugfs_remove_recursive(mtd->dbg.dfs_dir);
-
- /* Try to remove the NVMEM provider */
- nvmem_unregister(mtd->nvmem);
-
- device_unregister(&mtd->dev);
-
- /* Clear dev so mtd can be safely re-registered later if desired */
- memset(&mtd->dev, 0, sizeof(mtd->dev));
-
- idr_remove(&mtd_idr, mtd->index);
- of_node_put(mtd_of_node);
-
- module_put(THIS_MODULE);
- ret = 0;
- }
+ kref_put(&mtd->refcnt, mtd_device_release);
+ ret = 0;

out_error:
mutex_unlock(&mtd_table_mutex);
@@ -1228,19 +1230,21 @@ int __get_mtd_device(struct mtd_info *mtd)
if (!try_module_get(master->owner))
return -ENODEV;

+ kref_get(&mtd->refcnt);
+
if (master->_get_device) {
err = master->_get_device(mtd);

if (err) {
+ kref_put(&mtd->refcnt, mtd_device_release);
module_put(master->owner);
return err;
}
}

- master->usecount++;
-
while (mtd->parent) {
- mtd->usecount++;
+ if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd->parent != master)
+ kref_get(&mtd->parent->refcnt);
mtd = mtd->parent;
}

@@ -1327,18 +1331,20 @@ void __put_mtd_device(struct mtd_info *mtd)
{
struct mtd_info *master = mtd_get_master(mtd);

- while (mtd->parent) {
- --mtd->usecount;
- BUG_ON(mtd->usecount < 0);
- mtd = mtd->parent;
- }
+ while (mtd != master) {
+ struct mtd_info *parent = mtd->parent;

- master->usecount--;
+ kref_put(&mtd->refcnt, mtd_device_release);
+ mtd = parent;
+ }

if (master->_put_device)
master->_put_device(master);

module_put(master->owner);
+
+ if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
+ kref_put(&master->refcnt, mtd_device_release);
}
EXPORT_SYMBOL_GPL(__put_mtd_device);

diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h
index b5eefeabf310..b014861a06a6 100644
--- a/drivers/mtd/mtdcore.h
+++ b/drivers/mtd/mtdcore.h
@@ -12,6 +12,7 @@ int __must_check add_mtd_device(struct mtd_info *mtd);
int del_mtd_device(struct mtd_info *mtd);
int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int);
int del_mtd_partitions(struct mtd_info *);
+void release_mtd_partition(struct mtd_info *mtd);

struct mtd_partitions;

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index a46affbb037d..23483db8f30c 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -32,6 +32,12 @@ static inline void free_partition(struct mtd_info *mtd)
kfree(mtd);
}

+void release_mtd_partition(struct mtd_info *mtd)
+{
+ WARN_ON(!list_empty(&mtd->part.node));
+ free_partition(mtd);
+}
+
static struct mtd_info *allocate_partition(struct mtd_info *parent,
const struct mtd_partition *part,
int partno, uint64_t cur_offset)
@@ -309,13 +315,11 @@ static int __mtd_del_partition(struct mtd_info *mtd)

sysfs_remove_files(&mtd->dev.kobj, mtd_partition_attrs);

+ list_del_init(&mtd->part.node);
err = del_mtd_device(mtd);
if (err)
return err;

- list_del(&mtd->part.node);
- free_partition(mtd);
-
return 0;
}

@@ -333,6 +337,7 @@ static int __del_mtd_partitions(struct mtd_info *mtd)
__del_mtd_partitions(child);

pr_info("Deleting %s MTD partition\n", child->name);
+ list_del_init(&child->part.node);
ret = del_mtd_device(child);
if (ret < 0) {
pr_err("Error when deleting partition \"%s\" (%d)\n",
@@ -340,9 +345,6 @@ static int __del_mtd_partitions(struct mtd_info *mtd)
err = ret;
continue;
}
-
- list_del(&child->part.node);
- free_partition(child);
}

return err;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 7c58c44662b8..914a9f974baa 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -379,7 +379,7 @@ struct mtd_info {

struct module *owner;
struct device dev;
- int usecount;
+ struct kref refcnt;
struct mtd_debug_info dbg;
struct nvmem_device *nvmem;
struct nvmem_device *otp_user_nvmem;
--
2.34.1


2023-06-20 13:41:03

by Usyskin, Alexander

[permalink] [raw]
Subject: [PATCH 2/2] mtd: call external _get and _put in right order

MTD provider provides mtd_info object to mtd subsystem.
With kref patch the mtd_info object can be alive after
provider released mtd device.
Fix calling order in _get and _put functions to allow
mtd provider to safely alloc and release mtd object.

Execute:
1) call external _get
2) get_module
3) add internal kref
in the get function and opposite order in the put one.

The _put_device callback should be the last in put
as the master struct memory may be freed in this callback.

Signed-off-by: Alexander Usyskin <[email protected]>
---
drivers/mtd/mtdcore.c | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 84bd1878367d..a5bc60013edf 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1227,21 +1227,20 @@ int __get_mtd_device(struct mtd_info *mtd)
struct mtd_info *master = mtd_get_master(mtd);
int err;

- if (!try_module_get(master->owner))
- return -ENODEV;
-
- kref_get(&mtd->refcnt);
-
if (master->_get_device) {
err = master->_get_device(mtd);
-
- if (err) {
- kref_put(&mtd->refcnt, mtd_device_release);
- module_put(master->owner);
+ if (err)
return err;
- }
}

+ if (!try_module_get(master->owner)) {
+ if (master->_put_device)
+ master->_put_device(master);
+ return -ENODEV;
+ }
+
+ kref_get(&mtd->refcnt);
+
while (mtd->parent) {
if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd->parent != master)
kref_get(&mtd->parent->refcnt);
@@ -1338,13 +1337,14 @@ void __put_mtd_device(struct mtd_info *mtd)
mtd = parent;
}

- if (master->_put_device)
- master->_put_device(master);
+ if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
+ kref_put(&master->refcnt, mtd_device_release);

module_put(master->owner);

- if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
- kref_put(&master->refcnt, mtd_device_release);
+ /* must be the last as master can be freed in the _put_device */
+ if (master->_put_device)
+ master->_put_device(master);
}
EXPORT_SYMBOL_GPL(__put_mtd_device);

--
2.34.1


2023-06-22 08:53:06

by Miquel Raynal

[permalink] [raw]
Subject: Re: [PATCH 0/2] mtd: prepare for dynamically removed devices

Hi Alexander,

[email protected] wrote on Tue, 20 Jun 2023 16:19:03 +0300:

> Prepare mtd subsystem for devices that can be dynamically removed,
> like memory on PCIE card.
> Use refcount to prevent crashes when underlying device
> removed unexpectedly and reshuffle __get and __put functions
> to allow underlying device to clean it memory according to refcount.
>
> Alexander Usyskin (1):
> mtd: call external _get and _put in right order
>
> Tomas Winkler (1):
> mtd: use refcount to prevent corruption
>
> drivers/mtd/mtdcore.c | 86 ++++++++++++++++++++++-------------------
> drivers/mtd/mtdcore.h | 1 +
> drivers/mtd/mtdpart.c | 14 ++++---
> include/linux/mtd/mtd.h | 2 +-
> 4 files changed, 56 insertions(+), 47 deletions(-)
>

The idea is of course great, the implementation looks fine, but the
risk is too high at -rc7 so I'll take this at -rc1 for the next
release.

Thanks, Miquèl

2023-07-12 14:16:51

by Miquel Raynal

[permalink] [raw]
Subject: Re: [PATCH 2/2] mtd: call external _get and _put in right order

On Tue, 2023-06-20 at 13:19:05 UTC, Alexander Usyskin wrote:
> MTD provider provides mtd_info object to mtd subsystem.
> With kref patch the mtd_info object can be alive after
> provider released mtd device.
> Fix calling order in _get and _put functions to allow
> mtd provider to safely alloc and release mtd object.
>
> Execute:
> 1) call external _get
> 2) get_module
> 3) add internal kref
> in the get function and opposite order in the put one.
>
> The _put_device callback should be the last in put
> as the master struct memory may be freed in this callback.
>
> Signed-off-by: Alexander Usyskin <[email protected]>

Applied to https://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git mtd/next, thanks.

Miquel

2023-07-12 14:25:06

by Miquel Raynal

[permalink] [raw]
Subject: Re: [PATCH 1/2] mtd: use refcount to prevent corruption

On Tue, 2023-06-20 at 13:19:04 UTC, Alexander Usyskin wrote:
> From: Tomas Winkler <[email protected]>
>
> When underlying device is removed mtd core will crash
> in case user space is holding open handle.
> Need to use proper refcounting so device is release
> only when has no users.
>
> Signed-off-by: Tomas Winkler <[email protected]>
> Signed-off-by: Alexander Usyskin <[email protected]>

Applied to https://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git mtd/next, thanks.

Miquel

2023-07-14 16:28:36

by Fabrizio Castro

[permalink] [raw]
Subject: RE: [PATCH 1/2] mtd: use refcount to prevent corruption

Dear All,

I am sorry for reopening this topic, but as it turns out (after bisecting
linux-next/master) this patch is interfering with a use case I am working
on.

I am using a Renesas RZ/V2M EVK v2.0 platform, I have an SPI NOR memory
("micron,mt25ql256a") wired up to a connector on the platform, the SPI
master is using driver (built as module):
drivers/spi/spi-rzv2m-csi.c

Although the board device tree in mainline does not reflect the connection
of CSI4 (which is the SPI master) from the SoC to the "micron,mt25ql256a"
(SPI slave device), my local device tree comes with the necessary definitions.

Without this patch, when I load up the module, I get the below 3 devices:
/dev/mtd0
/dev/mtd0ro
/dev/mtdblock0

They get cleaned up correctly upon module removal.
I can reload the same module, and everything works just fine.

With this patch applied, when I load up the module, I get the same 3
devices:
/dev/mtd0
/dev/mtd0ro
/dev/mtdblock0

Upon removal, the below 2 devices still hang around:
/dev/mtd0
/dev/mtd0ro

Preventing the module from being (re)loaded correctly:
rzv2m_csi a4020200.spi: error -EBUSY: register controller failed
rzv2m_csi: probe of a4020200.spi failed with error -16

Are you guys aware of this sort of side effect?

Thanks,
Fab

> From: Alexander Usyskin <[email protected]>
> Subject: [PATCH 1/2] mtd: use refcount to prevent corruption
>
> From: Tomas Winkler <[email protected]>
>
> When underlying device is removed mtd core will crash
> in case user space is holding open handle.
> Need to use proper refcounting so device is release
> only when has no users.
>
> Signed-off-by: Tomas Winkler <[email protected]>
> Signed-off-by: Alexander Usyskin <[email protected]>
> ---
> drivers/mtd/mtdcore.c | 72 ++++++++++++++++++++++------------------
> -
> drivers/mtd/mtdcore.h | 1 +
> drivers/mtd/mtdpart.c | 14 ++++----
> include/linux/mtd/mtd.h | 2 +-
> 4 files changed, 49 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
> index abf4cb58a8ab..84bd1878367d 100644
> --- a/drivers/mtd/mtdcore.c
> +++ b/drivers/mtd/mtdcore.c
> @@ -93,10 +93,33 @@ static void mtd_release(struct device *dev)
> struct mtd_info *mtd = dev_get_drvdata(dev);
> dev_t index = MTD_DEVT(mtd->index);
>
> + if (mtd_is_partition(mtd))
> + release_mtd_partition(mtd);
> +
> /* remove /dev/mtdXro node */
> device_destroy(&mtd_class, index + 1);
> }
>
> +static void mtd_device_release(struct kref *kref)
> +{
> + struct mtd_info *mtd = container_of(kref, struct mtd_info,
> refcnt);
> +
> + debugfs_remove_recursive(mtd->dbg.dfs_dir);
> +
> + /* Try to remove the NVMEM provider */
> + nvmem_unregister(mtd->nvmem);
> +
> + device_unregister(&mtd->dev);
> +
> + /* Clear dev so mtd can be safely re-registered later if desired
> */
> + memset(&mtd->dev, 0, sizeof(mtd->dev));
> +
> + idr_remove(&mtd_idr, mtd->index);
> + of_node_put(mtd_get_of_node(mtd));
> +
> + module_put(THIS_MODULE);
> +}
> +
> #define MTD_DEVICE_ATTR_RO(name) \
> static DEVICE_ATTR(name, 0444, mtd_##name##_show, NULL)
>
> @@ -666,7 +689,7 @@ int add_mtd_device(struct mtd_info *mtd)
> }
>
> mtd->index = i;
> - mtd->usecount = 0;
> + kref_init(&mtd->refcnt);
>
> /* default value if not set by driver */
> if (mtd->bitflip_threshold == 0)
> @@ -779,7 +802,6 @@ int del_mtd_device(struct mtd_info *mtd)
> {
> int ret;
> struct mtd_notifier *not;
> - struct device_node *mtd_of_node;
>
> mutex_lock(&mtd_table_mutex);
>
> @@ -793,28 +815,8 @@ int del_mtd_device(struct mtd_info *mtd)
> list_for_each_entry(not, &mtd_notifiers, list)
> not->remove(mtd);
>
> - if (mtd->usecount) {
> - printk(KERN_NOTICE "Removing MTD device #%d (%s) with use
> count %d\n",
> - mtd->index, mtd->name, mtd->usecount);
> - ret = -EBUSY;
> - } else {
> - mtd_of_node = mtd_get_of_node(mtd);
> - debugfs_remove_recursive(mtd->dbg.dfs_dir);
> -
> - /* Try to remove the NVMEM provider */
> - nvmem_unregister(mtd->nvmem);
> -
> - device_unregister(&mtd->dev);
> -
> - /* Clear dev so mtd can be safely re-registered later if
> desired */
> - memset(&mtd->dev, 0, sizeof(mtd->dev));
> -
> - idr_remove(&mtd_idr, mtd->index);
> - of_node_put(mtd_of_node);
> -
> - module_put(THIS_MODULE);
> - ret = 0;
> - }
> + kref_put(&mtd->refcnt, mtd_device_release);
> + ret = 0;
>
> out_error:
> mutex_unlock(&mtd_table_mutex);
> @@ -1228,19 +1230,21 @@ int __get_mtd_device(struct mtd_info *mtd)
> if (!try_module_get(master->owner))
> return -ENODEV;
>
> + kref_get(&mtd->refcnt);
> +
> if (master->_get_device) {
> err = master->_get_device(mtd);
>
> if (err) {
> + kref_put(&mtd->refcnt, mtd_device_release);
> module_put(master->owner);
> return err;
> }
> }
>
> - master->usecount++;
> -
> while (mtd->parent) {
> - mtd->usecount++;
> + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd-
> >parent != master)
> + kref_get(&mtd->parent->refcnt);
> mtd = mtd->parent;
> }
>
> @@ -1327,18 +1331,20 @@ void __put_mtd_device(struct mtd_info *mtd)
> {
> struct mtd_info *master = mtd_get_master(mtd);
>
> - while (mtd->parent) {
> - --mtd->usecount;
> - BUG_ON(mtd->usecount < 0);
> - mtd = mtd->parent;
> - }
> + while (mtd != master) {
> + struct mtd_info *parent = mtd->parent;
>
> - master->usecount--;
> + kref_put(&mtd->refcnt, mtd_device_release);
> + mtd = parent;
> + }
>
> if (master->_put_device)
> master->_put_device(master);
>
> module_put(master->owner);
> +
> + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
> + kref_put(&master->refcnt, mtd_device_release);
> }
> EXPORT_SYMBOL_GPL(__put_mtd_device);
>
> diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h
> index b5eefeabf310..b014861a06a6 100644
> --- a/drivers/mtd/mtdcore.h
> +++ b/drivers/mtd/mtdcore.h
> @@ -12,6 +12,7 @@ int __must_check add_mtd_device(struct mtd_info
> *mtd);
> int del_mtd_device(struct mtd_info *mtd);
> int add_mtd_partitions(struct mtd_info *, const struct mtd_partition
> *, int);
> int del_mtd_partitions(struct mtd_info *);
> +void release_mtd_partition(struct mtd_info *mtd);
>
> struct mtd_partitions;
>
> diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
> index a46affbb037d..23483db8f30c 100644
> --- a/drivers/mtd/mtdpart.c
> +++ b/drivers/mtd/mtdpart.c
> @@ -32,6 +32,12 @@ static inline void free_partition(struct mtd_info
> *mtd)
> kfree(mtd);
> }
>
> +void release_mtd_partition(struct mtd_info *mtd)
> +{
> + WARN_ON(!list_empty(&mtd->part.node));
> + free_partition(mtd);
> +}
> +
> static struct mtd_info *allocate_partition(struct mtd_info *parent,
> const struct mtd_partition *part,
> int partno, uint64_t cur_offset)
> @@ -309,13 +315,11 @@ static int __mtd_del_partition(struct mtd_info
> *mtd)
>
> sysfs_remove_files(&mtd->dev.kobj, mtd_partition_attrs);
>
> + list_del_init(&mtd->part.node);
> err = del_mtd_device(mtd);
> if (err)
> return err;
>
> - list_del(&mtd->part.node);
> - free_partition(mtd);
> -
> return 0;
> }
>
> @@ -333,6 +337,7 @@ static int __del_mtd_partitions(struct mtd_info
> *mtd)
> __del_mtd_partitions(child);
>
> pr_info("Deleting %s MTD partition\n", child->name);
> + list_del_init(&child->part.node);
> ret = del_mtd_device(child);
> if (ret < 0) {
> pr_err("Error when deleting partition \"%s\" (%d)\n",
> @@ -340,9 +345,6 @@ static int __del_mtd_partitions(struct mtd_info
> *mtd)
> err = ret;
> continue;
> }
> -
> - list_del(&child->part.node);
> - free_partition(child);
> }
>
> return err;
> diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
> index 7c58c44662b8..914a9f974baa 100644
> --- a/include/linux/mtd/mtd.h
> +++ b/include/linux/mtd/mtd.h
> @@ -379,7 +379,7 @@ struct mtd_info {
>
> struct module *owner;
> struct device dev;
> - int usecount;
> + struct kref refcnt;
> struct mtd_debug_info dbg;
> struct nvmem_device *nvmem;
> struct nvmem_device *otp_user_nvmem;
> --
> 2.34.1


2023-07-15 15:56:24

by Miquel Raynal

[permalink] [raw]
Subject: Re: [PATCH 1/2] mtd: use refcount to prevent corruption

Hi Fabrizio,

[email protected] wrote on Fri, 14 Jul 2023 16:10:45 +0000:

> Dear All,
>
> I am sorry for reopening this topic, but as it turns out (after bisecting
> linux-next/master) this patch is interfering with a use case I am working
> on.
>
> I am using a Renesas RZ/V2M EVK v2.0 platform, I have an SPI NOR memory
> ("micron,mt25ql256a") wired up to a connector on the platform, the SPI
> master is using driver (built as module):
> drivers/spi/spi-rzv2m-csi.c
>
> Although the board device tree in mainline does not reflect the connection
> of CSI4 (which is the SPI master) from the SoC to the "micron,mt25ql256a"
> (SPI slave device), my local device tree comes with the necessary definitions.
>
> Without this patch, when I load up the module, I get the below 3 devices:
> /dev/mtd0
> /dev/mtd0ro
> /dev/mtdblock0
>
> They get cleaned up correctly upon module removal.
> I can reload the same module, and everything works just fine.
>
> With this patch applied, when I load up the module, I get the same 3
> devices:
> /dev/mtd0
> /dev/mtd0ro
> /dev/mtdblock0
>
> Upon removal, the below 2 devices still hang around:
> /dev/mtd0
> /dev/mtd0ro

Looks like the refcounting change is still not even in some cases, can
you investigate and come up with a proper patch? You can either improve
the existing patch or revert it and try your own approach if deemed
better.

Thanks,
Miquèl

> Preventing the module from being (re)loaded correctly:
> rzv2m_csi a4020200.spi: error -EBUSY: register controller failed
> rzv2m_csi: probe of a4020200.spi failed with error -16
>
> Are you guys aware of this sort of side effect?
>
> Thanks,
> Fab
>
> > From: Alexander Usyskin <[email protected]>
> > Subject: [PATCH 1/2] mtd: use refcount to prevent corruption
> >
> > From: Tomas Winkler <[email protected]>
> >
> > When underlying device is removed mtd core will crash
> > in case user space is holding open handle.
> > Need to use proper refcounting so device is release
> > only when has no users.
> >
> > Signed-off-by: Tomas Winkler <[email protected]>
> > Signed-off-by: Alexander Usyskin <[email protected]>
> > ---
> > drivers/mtd/mtdcore.c | 72 ++++++++++++++++++++++------------------
> > -
> > drivers/mtd/mtdcore.h | 1 +
> > drivers/mtd/mtdpart.c | 14 ++++----
> > include/linux/mtd/mtd.h | 2 +-
> > 4 files changed, 49 insertions(+), 40 deletions(-)
> >
> > diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
> > index abf4cb58a8ab..84bd1878367d 100644
> > --- a/drivers/mtd/mtdcore.c
> > +++ b/drivers/mtd/mtdcore.c
> > @@ -93,10 +93,33 @@ static void mtd_release(struct device *dev)
> > struct mtd_info *mtd = dev_get_drvdata(dev);
> > dev_t index = MTD_DEVT(mtd->index);
> >
> > + if (mtd_is_partition(mtd))
> > + release_mtd_partition(mtd);
> > +
> > /* remove /dev/mtdXro node */
> > device_destroy(&mtd_class, index + 1);
> > }
> >
> > +static void mtd_device_release(struct kref *kref)
> > +{
> > + struct mtd_info *mtd = container_of(kref, struct mtd_info,
> > refcnt);
> > +
> > + debugfs_remove_recursive(mtd->dbg.dfs_dir);
> > +
> > + /* Try to remove the NVMEM provider */
> > + nvmem_unregister(mtd->nvmem);
> > +
> > + device_unregister(&mtd->dev);
> > +
> > + /* Clear dev so mtd can be safely re-registered later if desired
> > */
> > + memset(&mtd->dev, 0, sizeof(mtd->dev));
> > +
> > + idr_remove(&mtd_idr, mtd->index);
> > + of_node_put(mtd_get_of_node(mtd));
> > +
> > + module_put(THIS_MODULE);
> > +}
> > +
> > #define MTD_DEVICE_ATTR_RO(name) \
> > static DEVICE_ATTR(name, 0444, mtd_##name##_show, NULL)
> >
> > @@ -666,7 +689,7 @@ int add_mtd_device(struct mtd_info *mtd)
> > }
> >
> > mtd->index = i;
> > - mtd->usecount = 0;
> > + kref_init(&mtd->refcnt);
> >
> > /* default value if not set by driver */
> > if (mtd->bitflip_threshold == 0)
> > @@ -779,7 +802,6 @@ int del_mtd_device(struct mtd_info *mtd)
> > {
> > int ret;
> > struct mtd_notifier *not;
> > - struct device_node *mtd_of_node;
> >
> > mutex_lock(&mtd_table_mutex);
> >
> > @@ -793,28 +815,8 @@ int del_mtd_device(struct mtd_info *mtd)
> > list_for_each_entry(not, &mtd_notifiers, list)
> > not->remove(mtd);
> >
> > - if (mtd->usecount) {
> > - printk(KERN_NOTICE "Removing MTD device #%d (%s) with use
> > count %d\n",
> > - mtd->index, mtd->name, mtd->usecount);
> > - ret = -EBUSY;
> > - } else {
> > - mtd_of_node = mtd_get_of_node(mtd);
> > - debugfs_remove_recursive(mtd->dbg.dfs_dir);
> > -
> > - /* Try to remove the NVMEM provider */
> > - nvmem_unregister(mtd->nvmem);
> > -
> > - device_unregister(&mtd->dev);
> > -
> > - /* Clear dev so mtd can be safely re-registered later if
> > desired */
> > - memset(&mtd->dev, 0, sizeof(mtd->dev));
> > -
> > - idr_remove(&mtd_idr, mtd->index);
> > - of_node_put(mtd_of_node);
> > -
> > - module_put(THIS_MODULE);
> > - ret = 0;
> > - }
> > + kref_put(&mtd->refcnt, mtd_device_release);
> > + ret = 0;
> >
> > out_error:
> > mutex_unlock(&mtd_table_mutex);
> > @@ -1228,19 +1230,21 @@ int __get_mtd_device(struct mtd_info *mtd)
> > if (!try_module_get(master->owner))
> > return -ENODEV;
> >
> > + kref_get(&mtd->refcnt);
> > +
> > if (master->_get_device) {
> > err = master->_get_device(mtd);
> >
> > if (err) {
> > + kref_put(&mtd->refcnt, mtd_device_release);
> > module_put(master->owner);
> > return err;
> > }
> > }
> >
> > - master->usecount++;
> > -
> > while (mtd->parent) {
> > - mtd->usecount++;
> > + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd-
> > >parent != master)
> > + kref_get(&mtd->parent->refcnt);
> > mtd = mtd->parent;
> > }
> >
> > @@ -1327,18 +1331,20 @@ void __put_mtd_device(struct mtd_info *mtd)
> > {
> > struct mtd_info *master = mtd_get_master(mtd);
> >
> > - while (mtd->parent) {
> > - --mtd->usecount;
> > - BUG_ON(mtd->usecount < 0);
> > - mtd = mtd->parent;
> > - }
> > + while (mtd != master) {
> > + struct mtd_info *parent = mtd->parent;
> >
> > - master->usecount--;
> > + kref_put(&mtd->refcnt, mtd_device_release);
> > + mtd = parent;
> > + }
> >
> > if (master->_put_device)
> > master->_put_device(master);
> >
> > module_put(master->owner);
> > +
> > + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
> > + kref_put(&master->refcnt, mtd_device_release);
> > }
> > EXPORT_SYMBOL_GPL(__put_mtd_device);
> >
> > diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h
> > index b5eefeabf310..b014861a06a6 100644
> > --- a/drivers/mtd/mtdcore.h
> > +++ b/drivers/mtd/mtdcore.h
> > @@ -12,6 +12,7 @@ int __must_check add_mtd_device(struct mtd_info
> > *mtd);
> > int del_mtd_device(struct mtd_info *mtd);
> > int add_mtd_partitions(struct mtd_info *, const struct mtd_partition
> > *, int);
> > int del_mtd_partitions(struct mtd_info *);
> > +void release_mtd_partition(struct mtd_info *mtd);
> >
> > struct mtd_partitions;
> >
> > diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
> > index a46affbb037d..23483db8f30c 100644
> > --- a/drivers/mtd/mtdpart.c
> > +++ b/drivers/mtd/mtdpart.c
> > @@ -32,6 +32,12 @@ static inline void free_partition(struct mtd_info
> > *mtd)
> > kfree(mtd);
> > }
> >
> > +void release_mtd_partition(struct mtd_info *mtd)
> > +{
> > + WARN_ON(!list_empty(&mtd->part.node));
> > + free_partition(mtd);
> > +}
> > +
> > static struct mtd_info *allocate_partition(struct mtd_info *parent,
> > const struct mtd_partition *part,
> > int partno, uint64_t cur_offset)
> > @@ -309,13 +315,11 @@ static int __mtd_del_partition(struct mtd_info
> > *mtd)
> >
> > sysfs_remove_files(&mtd->dev.kobj, mtd_partition_attrs);
> >
> > + list_del_init(&mtd->part.node);
> > err = del_mtd_device(mtd);
> > if (err)
> > return err;
> >
> > - list_del(&mtd->part.node);
> > - free_partition(mtd);
> > -
> > return 0;
> > }
> >
> > @@ -333,6 +337,7 @@ static int __del_mtd_partitions(struct mtd_info
> > *mtd)
> > __del_mtd_partitions(child);
> >
> > pr_info("Deleting %s MTD partition\n", child->name);
> > + list_del_init(&child->part.node);
> > ret = del_mtd_device(child);
> > if (ret < 0) {
> > pr_err("Error when deleting partition \"%s\" (%d)\n",
> > @@ -340,9 +345,6 @@ static int __del_mtd_partitions(struct mtd_info
> > *mtd)
> > err = ret;
> > continue;
> > }
> > -
> > - list_del(&child->part.node);
> > - free_partition(child);
> > }
> >
> > return err;
> > diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
> > index 7c58c44662b8..914a9f974baa 100644
> > --- a/include/linux/mtd/mtd.h
> > +++ b/include/linux/mtd/mtd.h
> > @@ -379,7 +379,7 @@ struct mtd_info {
> >
> > struct module *owner;
> > struct device dev;
> > - int usecount;
> > + struct kref refcnt;
> > struct mtd_debug_info dbg;
> > struct nvmem_device *nvmem;
> > struct nvmem_device *otp_user_nvmem;
> > --
> > 2.34.1
>

2023-07-16 07:21:23

by Usyskin, Alexander

[permalink] [raw]
Subject: RE: [PATCH 1/2] mtd: use refcount to prevent corruption

> > With this patch applied, when I load up the module, I get the same 3
> > devices:
> > /dev/mtd0
> > /dev/mtd0ro
> > /dev/mtdblock0
> >
> > Upon removal, the below 2 devices still hang around:
> > /dev/mtd0
> > /dev/mtd0ro
>
Our use-case do not produce mtdblock, maybe there are some imbalances of get/put?
I have somewhere version with pr_debug after every kref_get/put. That may help to catch where
it missed, I hope.

--
Alexander (Sasha) Usyskin

CSE FW Dev - Host SW
Intel Israel (74) Limited


2023-07-16 14:20:22

by Miquel Raynal

[permalink] [raw]
Subject: Re: [PATCH 1/2] mtd: use refcount to prevent corruption

Hi Alexander,

[email protected] wrote on Sun, 16 Jul 2023 06:29:02 +0000:

> > > With this patch applied, when I load up the module, I get the same 3
> > > devices:
> > > /dev/mtd0
> > > /dev/mtd0ro
> > > /dev/mtdblock0
> > >
> > > Upon removal, the below 2 devices still hang around:
> > > /dev/mtd0
> > > /dev/mtd0ro
> >
> Our use-case do not produce mtdblock, maybe there are some imbalances of get/put?
> I have somewhere version with pr_debug after every kref_get/put. That may help to catch where
> it missed, I hope.

I believe mtdblock is the good citizen here. Just disable
CONFIG_MTD_BLOCK from your configuration and you will likely observe
the same issue, just a bit narrowed, perhaps. Indeed, if you manage to
follow all the get/put calls it can help to find an imbalance.

Thanks,
Miquèl

2023-07-24 12:14:48

by Miquel Raynal

[permalink] [raw]
Subject: Re: [PATCH 1/2] mtd: use refcount to prevent corruption

Hi Alexander,

[email protected] wrote on Mon, 24 Jul 2023 11:43:59 +0000:

> > > > > With this patch applied, when I load up the module, I get the same 3
> > > > > devices:
> > > > > /dev/mtd0
> > > > > /dev/mtd0ro
> > > > > /dev/mtdblock0
> > > > >
> > > > > Upon removal, the below 2 devices still hang around:
> > > > > /dev/mtd0
> > > > > /dev/mtd0ro
> > > >
> > > Our use-case do not produce mtdblock, maybe there are some imbalances
> > of get/put?
> > > I have somewhere version with pr_debug after every kref_get/put. That may
> > help to catch where
> > > it missed, I hope.
> >
> > I believe mtdblock is the good citizen here. Just disable
> > CONFIG_MTD_BLOCK from your configuration and you will likely observe
> > the same issue, just a bit narrowed, perhaps. Indeed, if you manage to
> > follow all the get/put calls it can help to find an imbalance.
> >
> > Thanks,
> > Miquèl
>
> Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your config?

Not sure I get your question. You can enable or disable it, it should
work in both cases (yet, the handling is of course a bit different as
the top level device will be retained/not retained).

Thanks,
Miquèl

2023-07-24 12:31:42

by Usyskin, Alexander

[permalink] [raw]
Subject: RE: [PATCH 1/2] mtd: use refcount to prevent corruption

> > > > With this patch applied, when I load up the module, I get the same 3
> > > > devices:
> > > > /dev/mtd0
> > > > /dev/mtd0ro
> > > > /dev/mtdblock0
> > > >
> > > > Upon removal, the below 2 devices still hang around:
> > > > /dev/mtd0
> > > > /dev/mtd0ro
> > >
> > Our use-case do not produce mtdblock, maybe there are some imbalances
> of get/put?
> > I have somewhere version with pr_debug after every kref_get/put. That may
> help to catch where
> > it missed, I hope.
>
> I believe mtdblock is the good citizen here. Just disable
> CONFIG_MTD_BLOCK from your configuration and you will likely observe
> the same issue, just a bit narrowed, perhaps. Indeed, if you manage to
> follow all the get/put calls it can help to find an imbalance.
>
> Thanks,
> Miquèl

Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your config?

--
Thanks,
Sasha


2023-07-24 12:35:47

by Usyskin, Alexander

[permalink] [raw]
Subject: RE: [PATCH 1/2] mtd: use refcount to prevent corruption

Hi Miquel,
>
> Hi Alexander,
>
> [email protected] wrote on Mon, 24 Jul 2023 11:43:59 +0000:
>
> > > > > > With this patch applied, when I load up the module, I get the same 3
> > > > > > devices:
> > > > > > /dev/mtd0
> > > > > > /dev/mtd0ro
> > > > > > /dev/mtdblock0
> > > > > >
> > > > > > Upon removal, the below 2 devices still hang around:
> > > > > > /dev/mtd0
> > > > > > /dev/mtd0ro
> > > > >
> > > > Our use-case do not produce mtdblock, maybe there are some
> imbalances
> > > of get/put?
> > > > I have somewhere version with pr_debug after every kref_get/put. That
> may
> > > help to catch where
> > > > it missed, I hope.
> > >
> > > I believe mtdblock is the good citizen here. Just disable
> > > CONFIG_MTD_BLOCK from your configuration and you will likely observe
> > > the same issue, just a bit narrowed, perhaps. Indeed, if you manage to
> > > follow all the get/put calls it can help to find an imbalance.
> > >
> > > Thanks,
> > > Miquèl
> >
> > Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your
> config?
>
> Not sure I get your question. You can enable or disable it, it should
> work in both cases (yet, the handling is of course a bit different as
> the top level device will be retained/not retained).
>
> Thanks,
> Miquèl

I'm trying to understand why I can't reproduce the problem in my scenario.
I found an important difference in upstreamed patch and internal version:
The IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) check around kref_get/put does not exists in the internal tree.
The code before my patch do not have such check, so I tend to assume that this check should be removed.
If you reproduce happens with CONFIG_MTD_PARTITIONED_MASTER disabled that may explain problems that you see.

--
Thanks,
Sasha



2023-07-25 13:38:33

by Usyskin, Alexander

[permalink] [raw]
Subject: RE: [PATCH 1/2] mtd: use refcount to prevent corruption

Hi
>
> Hi Miquel,
> >
> > Hi Alexander,
> >
> > [email protected] wrote on Mon, 24 Jul 2023 11:43:59 +0000:
> >
> > > > > > > With this patch applied, when I load up the module, I get the same 3
> > > > > > > devices:
> > > > > > > /dev/mtd0
> > > > > > > /dev/mtd0ro
> > > > > > > /dev/mtdblock0
> > > > > > >
> > > > > > > Upon removal, the below 2 devices still hang around:
> > > > > > > /dev/mtd0
> > > > > > > /dev/mtd0ro
> > > > > >
> > > > > Our use-case do not produce mtdblock, maybe there are some
> > imbalances
> > > > of get/put?
> > > > > I have somewhere version with pr_debug after every kref_get/put. That
> > may
> > > > help to catch where
> > > > > it missed, I hope.
> > > >
> > > > I believe mtdblock is the good citizen here. Just disable
> > > > CONFIG_MTD_BLOCK from your configuration and you will likely observe
> > > > the same issue, just a bit narrowed, perhaps. Indeed, if you manage to
> > > > follow all the get/put calls it can help to find an imbalance.
> > > >
> > > > Thanks,
> > > > Miquèl
> > >
> > > Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your
> > config?
> >
> > Not sure I get your question. You can enable or disable it, it should
> > work in both cases (yet, the handling is of course a bit different as
> > the top level device will be retained/not retained).
> >
> > Thanks,
> > Miquèl
>
> I'm trying to understand why I can't reproduce the problem in my scenario.
> I found an important difference in upstreamed patch and internal version:
> The IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) check around
> kref_get/put does not exists in the internal tree.
> The code before my patch do not have such check, so I tend to assume that
> this check should be removed.
> If you reproduce happens with CONFIG_MTD_PARTITIONED_MASTER
> disabled that may explain problems that you see.
>
> --
> Thanks,
> Sasha
>

I've tried to reproduce this with latest Linux 6.5-rc1 and my two patches.
The manual modprobe mtdblock creates mtdblock0 over my partitions too.
I can't reproduce problem neither with MTD_PARTITIONED_MASTER nor without.

Let's try to debug on your system, can you enable dynamic debug for mtd subsystem,
reproduce and publish dmesg?

The prints for kref get/put can be added as below:

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 2466ea466466..374835831428 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1242,10 +1242,13 @@ int __get_mtd_device(struct mtd_info *mtd)
}

kref_get(&mtd->refcnt);
+ pr_debug("get mtd %s %d\n", mtd->name, kref_read(&mtd->refcnt));

while (mtd->parent) {
- if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd->parent != master)
+ if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd->parent != master) {
kref_get(&mtd->parent->refcnt);
+ pr_debug("get mtd %s %d\n", mtd->parent->name, kref_read(&mtd->parent->refcnt));
+ }
mtd = mtd->parent;
}

@@ -1335,12 +1338,15 @@ void __put_mtd_device(struct mtd_info *mtd)
while (mtd != master) {
struct mtd_info *parent = mtd->parent;

+ pr_debug("put mtd %s %d\n", mtd->name, kref_read(&mtd->refcnt));
kref_put(&mtd->refcnt, mtd_device_release);
mtd = parent;
}

- if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
+ if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
+ pr_debug("put mtd %s %d\n", master->name, kref_read(&master->refcnt));
kref_put(&master->refcnt, mtd_device_release);
+ }

module_put(master->owner);


--
Thanks,
Sasha


2023-07-27 06:42:31

by Miquel Raynal

[permalink] [raw]
Subject: Re: [PATCH 1/2] mtd: use refcount to prevent corruption

Hi Alexander,

[email protected] wrote on Tue, 25 Jul 2023 12:50:04 +0000:

> Hi
> >
> > Hi Miquel,
> > >
> > > Hi Alexander,
> > >
> > > [email protected] wrote on Mon, 24 Jul 2023 11:43:59 +0000:
> > >
> > > > > > > > With this patch applied, when I load up the module, I get the same 3
> > > > > > > > devices:
> > > > > > > > /dev/mtd0
> > > > > > > > /dev/mtd0ro
> > > > > > > > /dev/mtdblock0
> > > > > > > >
> > > > > > > > Upon removal, the below 2 devices still hang around:
> > > > > > > > /dev/mtd0
> > > > > > > > /dev/mtd0ro
> > > > > > >
> > > > > > Our use-case do not produce mtdblock, maybe there are some
> > > imbalances
> > > > > of get/put?
> > > > > > I have somewhere version with pr_debug after every kref_get/put. That
> > > may
> > > > > help to catch where
> > > > > > it missed, I hope.
> > > > >
> > > > > I believe mtdblock is the good citizen here. Just disable
> > > > > CONFIG_MTD_BLOCK from your configuration and you will likely observe
> > > > > the same issue, just a bit narrowed, perhaps. Indeed, if you manage to
> > > > > follow all the get/put calls it can help to find an imbalance.
> > > > >
> > > > > Thanks,
> > > > > Miquèl
> > > >
> > > > Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your
> > > config?
> > >
> > > Not sure I get your question. You can enable or disable it, it should
> > > work in both cases (yet, the handling is of course a bit different as
> > > the top level device will be retained/not retained).
> > >
> > > Thanks,
> > > Miquèl
> >
> > I'm trying to understand why I can't reproduce the problem in my scenario.
> > I found an important difference in upstreamed patch and internal version:
> > The IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) check around
> > kref_get/put does not exists in the internal tree.
> > The code before my patch do not have such check, so I tend to assume that
> > this check should be removed.
> > If you reproduce happens with CONFIG_MTD_PARTITIONED_MASTER
> > disabled that may explain problems that you see.
> >
> > --
> > Thanks,
> > Sasha
> >
>
> I've tried to reproduce this with latest Linux 6.5-rc1 and my two patches.
> The manual modprobe mtdblock creates mtdblock0 over my partitions too.
> I can't reproduce problem neither with MTD_PARTITIONED_MASTER nor without.
>
> Let's try to debug on your system, can you enable dynamic debug for mtd subsystem,
> reproduce and publish dmesg?
>
> The prints for kref get/put can be added as below:
>
> diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
> index 2466ea466466..374835831428 100644
> --- a/drivers/mtd/mtdcore.c
> +++ b/drivers/mtd/mtdcore.c
> @@ -1242,10 +1242,13 @@ int __get_mtd_device(struct mtd_info *mtd)
> }
>
> kref_get(&mtd->refcnt);
> + pr_debug("get mtd %s %d\n", mtd->name, kref_read(&mtd->refcnt));
>
> while (mtd->parent) {
> - if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd->parent != master)
> + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd->parent != master) {
> kref_get(&mtd->parent->refcnt);
> + pr_debug("get mtd %s %d\n", mtd->parent->name, kref_read(&mtd->parent->refcnt));
> + }
> mtd = mtd->parent;
> }
>
> @@ -1335,12 +1338,15 @@ void __put_mtd_device(struct mtd_info *mtd)
> while (mtd != master) {
> struct mtd_info *parent = mtd->parent;
>
> + pr_debug("put mtd %s %d\n", mtd->name, kref_read(&mtd->refcnt));
> kref_put(&mtd->refcnt, mtd_device_release);
> mtd = parent;
> }
>
> - if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
> + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
> + pr_debug("put mtd %s %d\n", master->name, kref_read(&master->refcnt));
> kref_put(&master->refcnt, mtd_device_release);
> + }
>
> module_put(master->owner);
>
>

Could this be helpful?

https://lore.kernel.org/all/[email protected]/

If you successfully test it, please send your Tested-by.

Thanks,
Miquèl

2023-07-27 07:02:46

by Miquel Raynal

[permalink] [raw]
Subject: Re: [PATCH 1/2] mtd: use refcount to prevent corruption

Hi Tomas,

[email protected] wrote on Thu, 27 Jul 2023 06:32:39 +0000:

> >
> > Hi Alexander,
> >
> > [email protected] wrote on Tue, 25 Jul 2023 12:50:04 +0000:
> >
> > > Hi
> > > >
> > > > Hi Miquel,
> > > > >
> > > > > Hi Alexander,
> > > > >
> > > > > [email protected] wrote on Mon, 24 Jul 2023 11:43:59
> > +0000:
> > > > >
> > > > > > > > > > With this patch applied, when I load up the module, I
> > > > > > > > > > get the same 3
> > > > > > > > > > devices:
> > > > > > > > > > /dev/mtd0
> > > > > > > > > > /dev/mtd0ro
> > > > > > > > > > /dev/mtdblock0
> > > > > > > > > >
> > > > > > > > > > Upon removal, the below 2 devices still hang around:
> > > > > > > > > > /dev/mtd0
> > > > > > > > > > /dev/mtd0ro
> > > > > > > > >
> > > > > > > > Our use-case do not produce mtdblock, maybe there are some
> > > > > imbalances
> > > > > > > of get/put?
> > > > > > > > I have somewhere version with pr_debug after every
> > > > > > > > kref_get/put. That
> > > > > may
> > > > > > > help to catch where
> > > > > > > > it missed, I hope.
> > > > > > >
> > > > > > > I believe mtdblock is the good citizen here. Just disable
> > > > > > > CONFIG_MTD_BLOCK from your configuration and you will likely
> > > > > > > observe the same issue, just a bit narrowed, perhaps. Indeed,
> > > > > > > if you manage to follow all the get/put calls it can help to find an
> > imbalance.
> > > > > > >
> > > > > > > Thanks,
> > > > > > > Miquèl
> > > > > >
> > > > > > Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your
> > > > > config?
> > > > >
> > > > > Not sure I get your question. You can enable or disable it, it
> > > > > should work in both cases (yet, the handling is of course a bit
> > > > > different as the top level device will be retained/not retained).
> > > > >
> > > > > Thanks,
> > > > > Miquèl
> > > >
> > > > I'm trying to understand why I can't reproduce the problem in my
> > scenario.
> > > > I found an important difference in upstreamed patch and internal version:
> > > > The IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) check around
> > > > kref_get/put does not exists in the internal tree.
> > > > The code before my patch do not have such check, so I tend to assume
> > > > that this check should be removed.
> > > > If you reproduce happens with CONFIG_MTD_PARTITIONED_MASTER
> > disabled
> > > > that may explain problems that you see.
> > > >
> > > > --
> > > > Thanks,
> > > > Sasha
> > > >
> > >
> > > I've tried to reproduce this with latest Linux 6.5-rc1 and my two patches.
> > > The manual modprobe mtdblock creates mtdblock0 over my partitions too.
> > > I can't reproduce problem neither with MTD_PARTITIONED_MASTER nor
> > without.
> > >
> > > Let's try to debug on your system, can you enable dynamic debug for
> > > mtd subsystem, reproduce and publish dmesg?
> > >
> > > The prints for kref get/put can be added as below:
> > >
> > > diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index
> > > 2466ea466466..374835831428 100644
> > > --- a/drivers/mtd/mtdcore.c
> > > +++ b/drivers/mtd/mtdcore.c
> > > @@ -1242,10 +1242,13 @@ int __get_mtd_device(struct mtd_info *mtd)
> > > }
> > >
> > > kref_get(&mtd->refcnt);
> > > + pr_debug("get mtd %s %d\n", mtd->name,
> > > + kref_read(&mtd->refcnt));
> > >
> > > while (mtd->parent) {
> > > - if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd-
> > >parent != master)
> > > + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) ||
> > > + mtd->parent != master) {
> > > kref_get(&mtd->parent->refcnt);
> > > + pr_debug("get mtd %s %d\n", mtd->parent->name,
> > kref_read(&mtd->parent->refcnt));
> > > + }
> > > mtd = mtd->parent;
> > > }
> > >
> > > @@ -1335,12 +1338,15 @@ void __put_mtd_device(struct mtd_info
> > *mtd)
> > > while (mtd != master) {
> > > struct mtd_info *parent = mtd->parent;
> > >
> > > + pr_debug("put mtd %s %d\n", mtd->name,
> > > + kref_read(&mtd->refcnt));
> > > kref_put(&mtd->refcnt, mtd_device_release);
> > > mtd = parent;
> > > }
> > >
> > > - if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
> > > + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
> > > + pr_debug("put mtd %s %d\n", master->name,
> > > + kref_read(&master->refcnt));
> > > kref_put(&master->refcnt, mtd_device_release);
> > > + }
> > >
> > > module_put(master->owner);
> > >
> > >
> >
> > Could this be helpful?
> >
> > https://lore.kernel.org/all/20230725215539.3135304-1-
> > [email protected]/
> >
> > If you successfully test it, please send your Tested-by.
> In the first glance it doesn't look correct, we have the reproduced using kasan, so hopefully the fix will follow, shortly.
> Thanks
> Tomas
>

Why is this fix not correct?

Are you currently writing a fix yourself?

Thanks,
Miquèl

2023-07-27 07:06:23

by Winkler, Tomas

[permalink] [raw]
Subject: RE: [PATCH 1/2] mtd: use refcount to prevent corruption

>
> Hi Alexander,
>
> [email protected] wrote on Tue, 25 Jul 2023 12:50:04 +0000:
>
> > Hi
> > >
> > > Hi Miquel,
> > > >
> > > > Hi Alexander,
> > > >
> > > > [email protected] wrote on Mon, 24 Jul 2023 11:43:59
> +0000:
> > > >
> > > > > > > > > With this patch applied, when I load up the module, I
> > > > > > > > > get the same 3
> > > > > > > > > devices:
> > > > > > > > > /dev/mtd0
> > > > > > > > > /dev/mtd0ro
> > > > > > > > > /dev/mtdblock0
> > > > > > > > >
> > > > > > > > > Upon removal, the below 2 devices still hang around:
> > > > > > > > > /dev/mtd0
> > > > > > > > > /dev/mtd0ro
> > > > > > > >
> > > > > > > Our use-case do not produce mtdblock, maybe there are some
> > > > imbalances
> > > > > > of get/put?
> > > > > > > I have somewhere version with pr_debug after every
> > > > > > > kref_get/put. That
> > > > may
> > > > > > help to catch where
> > > > > > > it missed, I hope.
> > > > > >
> > > > > > I believe mtdblock is the good citizen here. Just disable
> > > > > > CONFIG_MTD_BLOCK from your configuration and you will likely
> > > > > > observe the same issue, just a bit narrowed, perhaps. Indeed,
> > > > > > if you manage to follow all the get/put calls it can help to find an
> imbalance.
> > > > > >
> > > > > > Thanks,
> > > > > > Miquèl
> > > > >
> > > > > Miquel, do you have CONFIG_MTD_PARTITIONED_MASTER set in your
> > > > config?
> > > >
> > > > Not sure I get your question. You can enable or disable it, it
> > > > should work in both cases (yet, the handling is of course a bit
> > > > different as the top level device will be retained/not retained).
> > > >
> > > > Thanks,
> > > > Miquèl
> > >
> > > I'm trying to understand why I can't reproduce the problem in my
> scenario.
> > > I found an important difference in upstreamed patch and internal version:
> > > The IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) check around
> > > kref_get/put does not exists in the internal tree.
> > > The code before my patch do not have such check, so I tend to assume
> > > that this check should be removed.
> > > If you reproduce happens with CONFIG_MTD_PARTITIONED_MASTER
> disabled
> > > that may explain problems that you see.
> > >
> > > --
> > > Thanks,
> > > Sasha
> > >
> >
> > I've tried to reproduce this with latest Linux 6.5-rc1 and my two patches.
> > The manual modprobe mtdblock creates mtdblock0 over my partitions too.
> > I can't reproduce problem neither with MTD_PARTITIONED_MASTER nor
> without.
> >
> > Let's try to debug on your system, can you enable dynamic debug for
> > mtd subsystem, reproduce and publish dmesg?
> >
> > The prints for kref get/put can be added as below:
> >
> > diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index
> > 2466ea466466..374835831428 100644
> > --- a/drivers/mtd/mtdcore.c
> > +++ b/drivers/mtd/mtdcore.c
> > @@ -1242,10 +1242,13 @@ int __get_mtd_device(struct mtd_info *mtd)
> > }
> >
> > kref_get(&mtd->refcnt);
> > + pr_debug("get mtd %s %d\n", mtd->name,
> > + kref_read(&mtd->refcnt));
> >
> > while (mtd->parent) {
> > - if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd-
> >parent != master)
> > + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) ||
> > + mtd->parent != master) {
> > kref_get(&mtd->parent->refcnt);
> > + pr_debug("get mtd %s %d\n", mtd->parent->name,
> kref_read(&mtd->parent->refcnt));
> > + }
> > mtd = mtd->parent;
> > }
> >
> > @@ -1335,12 +1338,15 @@ void __put_mtd_device(struct mtd_info
> *mtd)
> > while (mtd != master) {
> > struct mtd_info *parent = mtd->parent;
> >
> > + pr_debug("put mtd %s %d\n", mtd->name,
> > + kref_read(&mtd->refcnt));
> > kref_put(&mtd->refcnt, mtd_device_release);
> > mtd = parent;
> > }
> >
> > - if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
> > + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
> > + pr_debug("put mtd %s %d\n", master->name,
> > + kref_read(&master->refcnt));
> > kref_put(&master->refcnt, mtd_device_release);
> > + }
> >
> > module_put(master->owner);
> >
> >
>
> Could this be helpful?
>
> https://lore.kernel.org/all/20230725215539.3135304-1-
> [email protected]/
>
> If you successfully test it, please send your Tested-by.
In the first glance it doesn't look correct, we have the reproduced using kasan, so hopefully the fix will follow, shortly.
Thanks
Tomas