2021-06-04 01:21:08

by Shiyang Ruan

[permalink] [raw]
Subject: [PATCH v4 02/10] dax: Introduce holder for dax_device

To easily track filesystem from a pmem device, we introduce a holder for
dax_device structure, and also its operation. This holder is used to
remember who is using this dax_device:
- When it is the backend of a filesystem, the holder will be the
superblock of this filesystem.
- When this pmem device is one of the targets in a mapped device, the
holder will be this mapped device. In this case, the mapped device
has its own dax_device and it will follow the first rule. So that we
can finally track to the filesystem we needed.

The holder and holder_ops will be set when filesystem is being mounted,
or an target device is being activated.

Signed-off-by: Shiyang Ruan <[email protected]>
---
drivers/dax/super.c | 38 ++++++++++++++++++++++++++++++++++++++
include/linux/dax.h | 10 ++++++++++
2 files changed, 48 insertions(+)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 5fa6ae9dbc8b..d118e2a7dc70 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -222,8 +222,10 @@ struct dax_device {
struct cdev cdev;
const char *host;
void *private;
+ void *holder;
unsigned long flags;
const struct dax_operations *ops;
+ const struct dax_holder_operations *holder_ops;
};

static ssize_t write_cache_show(struct device *dev,
@@ -373,6 +375,24 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
}
EXPORT_SYMBOL_GPL(dax_zero_page_range);

+int dax_corrupted_range(struct dax_device *dax_dev, struct block_device *bdev,
+ loff_t offset, size_t size, void *data)
+{
+ int rc = -ENXIO;
+ if (!dax_dev)
+ return rc;
+
+ if (dax_dev->holder) {
+ rc = dax_dev->holder_ops->corrupted_range(dax_dev, bdev, offset,
+ size, data);
+ if (rc == -ENODEV)
+ rc = -ENXIO;
+ } else
+ rc = -EOPNOTSUPP;
+ return rc;
+}
+EXPORT_SYMBOL_GPL(dax_corrupted_range);
+
#ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_wb_cache_pmem(void *addr, size_t size);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
@@ -624,6 +644,24 @@ void put_dax(struct dax_device *dax_dev)
}
EXPORT_SYMBOL_GPL(put_dax);

+void dax_set_holder(struct dax_device *dax_dev, void *holder,
+ const struct dax_holder_operations *ops)
+{
+ if (!dax_dev)
+ return;
+ dax_dev->holder = holder;
+ dax_dev->holder_ops = ops;
+}
+EXPORT_SYMBOL_GPL(dax_set_holder);
+
+void *dax_get_holder(struct dax_device *dax_dev)
+{
+ if (!dax_dev)
+ return NULL;
+ return dax_dev->holder;
+}
+EXPORT_SYMBOL_GPL(dax_get_holder);
+
/**
* dax_get_by_host() - temporary lookup mechanism for filesystem-dax
* @host: alternate name for the device registered by a dax driver
diff --git a/include/linux/dax.h b/include/linux/dax.h
index b52f084aa643..1ce343a960ab 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -38,10 +38,18 @@ struct dax_operations {
int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
};

+struct dax_holder_operations {
+ int (*corrupted_range)(struct dax_device *, struct block_device *,
+ loff_t, size_t, void *);
+};
+
extern struct attribute_group dax_attribute_group;

#if IS_ENABLED(CONFIG_DAX)
struct dax_device *dax_get_by_host(const char *host);
+void dax_set_holder(struct dax_device *dax_dev, void *holder,
+ const struct dax_holder_operations *ops);
+void *dax_get_holder(struct dax_device *dax_dev);
struct dax_device *alloc_dax(void *private, const char *host,
const struct dax_operations *ops, unsigned long flags);
void put_dax(struct dax_device *dax_dev);
@@ -226,6 +234,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i);
int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages);
+int dax_corrupted_range(struct dax_device *dax_dev, struct block_device *bdev,
+ loff_t offset, size_t size, void *data);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);

ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
--
2.31.1




2021-06-16 00:48:12

by Dan Williams

[permalink] [raw]
Subject: Re: [PATCH v4 02/10] dax: Introduce holder for dax_device

On Thu, Jun 3, 2021 at 6:19 PM Shiyang Ruan <[email protected]> wrote:
>
> To easily track filesystem from a pmem device, we introduce a holder for
> dax_device structure, and also its operation. This holder is used to
> remember who is using this dax_device:
> - When it is the backend of a filesystem, the holder will be the
> superblock of this filesystem.
> - When this pmem device is one of the targets in a mapped device, the
> holder will be this mapped device. In this case, the mapped device
> has its own dax_device and it will follow the first rule. So that we
> can finally track to the filesystem we needed.
>
> The holder and holder_ops will be set when filesystem is being mounted,
> or an target device is being activated.
>
> Signed-off-by: Shiyang Ruan <[email protected]>
> ---
> drivers/dax/super.c | 38 ++++++++++++++++++++++++++++++++++++++
> include/linux/dax.h | 10 ++++++++++
> 2 files changed, 48 insertions(+)
>
> diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> index 5fa6ae9dbc8b..d118e2a7dc70 100644
> --- a/drivers/dax/super.c
> +++ b/drivers/dax/super.c
> @@ -222,8 +222,10 @@ struct dax_device {
> struct cdev cdev;
> const char *host;
> void *private;

@private is likely too generic of a name now, it would be better to
call this @parent.

> + void *holder;

This should probably be called holder_data, and this structure could
use some kernel-doc to clarify what the fields mean.

> unsigned long flags;
> const struct dax_operations *ops;
> + const struct dax_holder_operations *holder_ops;
> };
>
> static ssize_t write_cache_show(struct device *dev,
> @@ -373,6 +375,24 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> }
> EXPORT_SYMBOL_GPL(dax_zero_page_range);
>
> +int dax_corrupted_range(struct dax_device *dax_dev, struct block_device *bdev,
> + loff_t offset, size_t size, void *data)

Why is @bdev an argument to this routine? The primary motivation for
a 'struct dax_device' is to break the association with 'struct
block_device'. The filesystem may know that the logical addresses
associated with a given dax_dev alias with the logical addresses of a
given bdev, but that knowledge need not leak into the API.

> +{
> + int rc = -ENXIO;
> + if (!dax_dev)
> + return rc;
> +
> + if (dax_dev->holder) {
> + rc = dax_dev->holder_ops->corrupted_range(dax_dev, bdev, offset,
> + size, data);

A bikeshed comment, but I do not like the name corrupted_range(),
because "corrupted" implies a permanent state. The source of this
notification is memory_failure() and that does not convey "permanent"
vs "transient" it just reports "failure". So, to keep the naming
consistent with the pgmap notification callback lets call this one
"notify_failure".

> + if (rc == -ENODEV)
> + rc = -ENXIO;
> + } else
> + rc = -EOPNOTSUPP;
> + return rc;
> +}
> +EXPORT_SYMBOL_GPL(dax_corrupted_range);

dax_holder_notify_failure() makes it clearer that this is
communicating a failure up the holder stack.

> +
> #ifdef CONFIG_ARCH_HAS_PMEM_API
> void arch_wb_cache_pmem(void *addr, size_t size);
> void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
> @@ -624,6 +644,24 @@ void put_dax(struct dax_device *dax_dev)
> }
> EXPORT_SYMBOL_GPL(put_dax);
>
> +void dax_set_holder(struct dax_device *dax_dev, void *holder,
> + const struct dax_holder_operations *ops)
> +{
> + if (!dax_dev)
> + return;
> + dax_dev->holder = holder;
> + dax_dev->holder_ops = ops;

I think there needs to be some synchronization here, perhaps a global
dax_dev_rwsem that is taken for read in the notification path and
write when adding a holder to the chain.

I also wonder if this should be an event that triggers a dax_dev stack
to re-report any failure notifications. For example the pmem driver
may have recorded a list of bad blocks at the beginning of time.
Likely the filesystem or other holder would like to get that
pre-existing list of failures at first registration. Have you given
thought about how the filesystem is told about pre-existing badblocks?

> +}
> +EXPORT_SYMBOL_GPL(dax_set_holder);
> +
> +void *dax_get_holder(struct dax_device *dax_dev)
> +{
> + if (!dax_dev)
> + return NULL;
> + return dax_dev->holder;
> +}
> +EXPORT_SYMBOL_GPL(dax_get_holder);

Where is this used?