To easily track filesystem from a pmem device, we introduce a holder for
dax_device structure, and also its operation. This holder is used to
remember who is using this dax_device:
- When it is the backend of a filesystem, the holder will be the
instance of this filesystem.
- When this pmem device is one of the targets in a mapped device, the
holder will be this mapped device. In this case, the mapped device
has its own dax_device and it will follow the first rule. So that we
can finally track to the filesystem we needed.
The holder and holder_ops will be set when filesystem is being mounted,
or an target device is being activated.
Signed-off-by: Shiyang Ruan <[email protected]>
---
drivers/dax/super.c | 62 +++++++++++++++++++++++++++++++++++++++++++++
include/linux/dax.h | 29 +++++++++++++++++++++
2 files changed, 91 insertions(+)
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index e3029389d809..8e2733b78437 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -21,6 +21,9 @@
* @cdev: optional character interface for "device dax"
* @private: dax driver private data
* @flags: state and boolean properties
+ * @ops: operations for dax_device
+ * @holder_data: holder of a dax_device: could be filesystem or mapped device
+ * @holder_ops: operations for the inner holder
*/
struct dax_device {
struct inode inode;
@@ -28,6 +31,8 @@ struct dax_device {
void *private;
unsigned long flags;
const struct dax_operations *ops;
+ void *holder_data;
+ const struct dax_holder_operations *holder_ops;
};
static dev_t dax_devt;
@@ -193,6 +198,29 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
}
EXPORT_SYMBOL_GPL(dax_zero_page_range);
+int dax_holder_notify_failure(struct dax_device *dax_dev, u64 off,
+ u64 len, int mf_flags)
+{
+ int rc, id;
+
+ id = dax_read_lock();
+ if (!dax_alive(dax_dev)) {
+ rc = -ENXIO;
+ goto out;
+ }
+
+ if (!dax_dev->holder_ops) {
+ rc = -EOPNOTSUPP;
+ goto out;
+ }
+
+ rc = dax_dev->holder_ops->notify_failure(dax_dev, off, len, mf_flags);
+out:
+ dax_read_unlock(id);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(dax_holder_notify_failure);
+
#ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_wb_cache_pmem(void *addr, size_t size);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
@@ -268,6 +296,10 @@ void kill_dax(struct dax_device *dax_dev)
clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
synchronize_srcu(&dax_srcu);
+
+ /* clear holder data */
+ dax_dev->holder_ops = NULL;
+ dax_dev->holder_data = NULL;
}
EXPORT_SYMBOL_GPL(kill_dax);
@@ -409,6 +441,36 @@ void put_dax(struct dax_device *dax_dev)
}
EXPORT_SYMBOL_GPL(put_dax);
+void dax_register_holder(struct dax_device *dax_dev, void *holder,
+ const struct dax_holder_operations *ops)
+{
+ if (!dax_alive(dax_dev))
+ return;
+
+ dax_dev->holder_data = holder;
+ dax_dev->holder_ops = ops;
+}
+EXPORT_SYMBOL_GPL(dax_register_holder);
+
+void dax_unregister_holder(struct dax_device *dax_dev)
+{
+ if (!dax_alive(dax_dev))
+ return;
+
+ dax_dev->holder_data = NULL;
+ dax_dev->holder_ops = NULL;
+}
+EXPORT_SYMBOL_GPL(dax_unregister_holder);
+
+void *dax_get_holder(struct dax_device *dax_dev)
+{
+ if (!dax_alive(dax_dev))
+ return NULL;
+
+ return dax_dev->holder_data;
+}
+EXPORT_SYMBOL_GPL(dax_get_holder);
+
/**
* inode_dax: convert a public inode into its dax_dev
* @inode: An inode with i_cdev pointing to a dax_dev
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9fc5f99a0ae2..96cfc63b12fd 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -34,6 +34,22 @@ struct dax_operations {
#if IS_ENABLED(CONFIG_DAX)
struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
+struct dax_holder_operations {
+ /*
+ * notify_failure - notify memory failure into inner holder device
+ * @dax_dev: the dax device which contains the holder
+ * @offset: offset on this dax device where memory failure occurs
+ * @len: length of this memory failure event
+ * @flags: action flags for memory failure handler
+ */
+ int (*notify_failure)(struct dax_device *dax_dev, u64 offset,
+ u64 len, int mf_flags);
+};
+
+void dax_register_holder(struct dax_device *dax_dev, void *holder,
+ const struct dax_holder_operations *ops);
+void dax_unregister_holder(struct dax_device *dax_dev);
+void *dax_get_holder(struct dax_device *dax_dev);
void put_dax(struct dax_device *dax_dev);
void kill_dax(struct dax_device *dax_dev);
void dax_write_cache(struct dax_device *dax_dev, bool wc);
@@ -53,6 +69,17 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
return dax_synchronous(dax_dev);
}
#else
+static inline void dax_register_holder(struct dax_device *dax_dev, void *holder,
+ const struct dax_holder_operations *ops)
+{
+}
+static inline void dax_unregister_holder(struct dax_device *dax_dev)
+{
+}
+static inline void *dax_get_holder(struct dax_device *dax_dev)
+{
+ return NULL;
+}
static inline struct dax_device *alloc_dax(void *private,
const struct dax_operations *ops)
{
@@ -185,6 +212,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i);
int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages);
+int dax_holder_notify_failure(struct dax_device *dax_dev, u64 off, u64 len,
+ int mf_flags);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
--
2.34.1
Hi Shiyang,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on linux/master]
[also build test WARNING on linus/master v5.17-rc1 next-20220127]
[cannot apply to xfs-linux/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Shiyang-Ruan/fsdax-introduce-fs-query-to-support-reflink/20220127-204239
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 2c271fe77d52a0555161926c232cd5bc07178b39
config: arm-imx_v4_v5_defconfig (https://download.01.org/0day-ci/archive/20220128/[email protected]/config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project f32dccb9a43b02ce4e540d6ba5dbbdb188f2dc7d)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm cross compiling tool for clang build
# apt-get install binutils-arm-linux-gnueabi
# https://github.com/0day-ci/linux/commit/57669ed05e93b37d995c5247eebe218ab2058c9a
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Shiyang-Ruan/fsdax-introduce-fs-query-to-support-reflink/20220127-204239
git checkout 57669ed05e93b37d995c5247eebe218ab2058c9a
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash fs/iomap/
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All warnings (new ones prefixed by >>):
In file included from fs/iomap/buffered-io.c:13:
>> include/linux/dax.h:73:16: warning: declaration of 'struct dax_holder_operations' will not be visible outside of this function [-Wvisibility]
const struct dax_holder_operations *ops)
^
1 warning generated.
vim +73 include/linux/dax.h
48
49 void dax_register_holder(struct dax_device *dax_dev, void *holder,
50 const struct dax_holder_operations *ops);
51 void dax_unregister_holder(struct dax_device *dax_dev);
52 void *dax_get_holder(struct dax_device *dax_dev);
53 void put_dax(struct dax_device *dax_dev);
54 void kill_dax(struct dax_device *dax_dev);
55 void dax_write_cache(struct dax_device *dax_dev, bool wc);
56 bool dax_write_cache_enabled(struct dax_device *dax_dev);
57 bool dax_synchronous(struct dax_device *dax_dev);
58 void set_dax_synchronous(struct dax_device *dax_dev);
59 /*
60 * Check if given mapping is supported by the file / underlying device.
61 */
62 static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
63 struct dax_device *dax_dev)
64 {
65 if (!(vma->vm_flags & VM_SYNC))
66 return true;
67 if (!IS_DAX(file_inode(vma->vm_file)))
68 return false;
69 return dax_synchronous(dax_dev);
70 }
71 #else
72 static inline void dax_register_holder(struct dax_device *dax_dev, void *holder,
> 73 const struct dax_holder_operations *ops)
74 {
75 }
76 static inline void dax_unregister_holder(struct dax_device *dax_dev)
77 {
78 }
79 static inline void *dax_get_holder(struct dax_device *dax_dev)
80 {
81 return NULL;
82 }
83 static inline struct dax_device *alloc_dax(void *private,
84 const struct dax_operations *ops)
85 {
86 /*
87 * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
88 * NULL is an error or expected.
89 */
90 return NULL;
91 }
92 static inline void put_dax(struct dax_device *dax_dev)
93 {
94 }
95 static inline void kill_dax(struct dax_device *dax_dev)
96 {
97 }
98 static inline void dax_write_cache(struct dax_device *dax_dev, bool wc)
99 {
100 }
101 static inline bool dax_write_cache_enabled(struct dax_device *dax_dev)
102 {
103 return false;
104 }
105 static inline bool dax_synchronous(struct dax_device *dax_dev)
106 {
107 return true;
108 }
109 static inline void set_dax_synchronous(struct dax_device *dax_dev)
110 {
111 }
112 static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
113 struct dax_device *dax_dev)
114 {
115 return !(vma->vm_flags & VM_SYNC);
116 }
117 #endif
118
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
Hi Shiyang,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on linux/master]
[also build test WARNING on linus/master v5.17-rc1 next-20220127]
[cannot apply to xfs-linux/for-next hnaz-mm/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Shiyang-Ruan/fsdax-introduce-fs-query-to-support-reflink/20220127-204239
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 2c271fe77d52a0555161926c232cd5bc07178b39
config: powerpc-allnoconfig (https://download.01.org/0day-ci/archive/20220128/[email protected]/config)
compiler: powerpc-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/57669ed05e93b37d995c5247eebe218ab2058c9a
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Shiyang-Ruan/fsdax-introduce-fs-query-to-support-reflink/20220127-204239
git checkout 57669ed05e93b37d995c5247eebe218ab2058c9a
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=powerpc SHELL=/bin/bash
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All warnings (new ones prefixed by >>):
In file included from mm/filemap.c:15:
>> include/linux/dax.h:73:30: warning: 'struct dax_holder_operations' declared inside parameter list will not be visible outside of this definition or declaration
73 | const struct dax_holder_operations *ops)
| ^~~~~~~~~~~~~~~~~~~~~
vim +73 include/linux/dax.h
48
49 void dax_register_holder(struct dax_device *dax_dev, void *holder,
50 const struct dax_holder_operations *ops);
51 void dax_unregister_holder(struct dax_device *dax_dev);
52 void *dax_get_holder(struct dax_device *dax_dev);
53 void put_dax(struct dax_device *dax_dev);
54 void kill_dax(struct dax_device *dax_dev);
55 void dax_write_cache(struct dax_device *dax_dev, bool wc);
56 bool dax_write_cache_enabled(struct dax_device *dax_dev);
57 bool dax_synchronous(struct dax_device *dax_dev);
58 void set_dax_synchronous(struct dax_device *dax_dev);
59 /*
60 * Check if given mapping is supported by the file / underlying device.
61 */
62 static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
63 struct dax_device *dax_dev)
64 {
65 if (!(vma->vm_flags & VM_SYNC))
66 return true;
67 if (!IS_DAX(file_inode(vma->vm_file)))
68 return false;
69 return dax_synchronous(dax_dev);
70 }
71 #else
72 static inline void dax_register_holder(struct dax_device *dax_dev, void *holder,
> 73 const struct dax_holder_operations *ops)
74 {
75 }
76 static inline void dax_unregister_holder(struct dax_device *dax_dev)
77 {
78 }
79 static inline void *dax_get_holder(struct dax_device *dax_dev)
80 {
81 return NULL;
82 }
83 static inline struct dax_device *alloc_dax(void *private,
84 const struct dax_operations *ops)
85 {
86 /*
87 * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
88 * NULL is an error or expected.
89 */
90 return NULL;
91 }
92 static inline void put_dax(struct dax_device *dax_dev)
93 {
94 }
95 static inline void kill_dax(struct dax_device *dax_dev)
96 {
97 }
98 static inline void dax_write_cache(struct dax_device *dax_dev, bool wc)
99 {
100 }
101 static inline bool dax_write_cache_enabled(struct dax_device *dax_dev)
102 {
103 return false;
104 }
105 static inline bool dax_synchronous(struct dax_device *dax_dev)
106 {
107 return true;
108 }
109 static inline void set_dax_synchronous(struct dax_device *dax_dev)
110 {
111 }
112 static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
113 struct dax_device *dax_dev)
114 {
115 return !(vma->vm_flags & VM_SYNC);
116 }
117 #endif
118
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
On Thu, Jan 27, 2022 at 08:40:50PM +0800, Shiyang Ruan wrote:
> +void dax_register_holder(struct dax_device *dax_dev, void *holder,
> + const struct dax_holder_operations *ops)
> +{
> + if (!dax_alive(dax_dev))
> + return;
> +
> + dax_dev->holder_data = holder;
> + dax_dev->holder_ops = ops;
This needs to return an error if there is another holder already. And
some kind of locking to prevent concurrent registrations.
Also please add kerneldoc comments for the new exported functions.
> +void *dax_get_holder(struct dax_device *dax_dev)
> +{
> + if (!dax_alive(dax_dev))
> + return NULL;
> +
> + return dax_dev->holder_data;
> +}
> +EXPORT_SYMBOL_GPL(dax_get_holder);
get tends to imply getting a reference. Maybe just dax_holder()?
That being said I can't see where we'd even want to use the holder
outside of this file.
v10.1 update:
- Fix build error reported by kernel test robot
- Add return code to dax_register_holder()
- Add write lock to prevent concurrent registrations
- Rename dax_get_holder() to dax_holder()
- Add kernel doc for new added functions
To easily track filesystem from a pmem device, we introduce a holder for
dax_device structure, and also its operation. This holder is used to
remember who is using this dax_device:
- When it is the backend of a filesystem, the holder will be the
instance of this filesystem.
- When this pmem device is one of the targets in a mapped device, the
holder will be this mapped device. In this case, the mapped device
has its own dax_device and it will follow the first rule. So that we
can finally track to the filesystem we needed.
The holder and holder_ops will be set when filesystem is being mounted,
or an target device is being activated.
Signed-off-by: Shiyang Ruan <[email protected]>
---
drivers/dax/super.c | 95 +++++++++++++++++++++++++++++++++++++++++++++
include/linux/dax.h | 30 ++++++++++++++
2 files changed, 125 insertions(+)
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index e3029389d809..d7fb4c36bf16 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -21,6 +21,9 @@
* @cdev: optional character interface for "device dax"
* @private: dax driver private data
* @flags: state and boolean properties
+ * @ops: operations for dax_device
+ * @holder_data: holder of a dax_device: could be filesystem or mapped device
+ * @holder_ops: operations for the inner holder
*/
struct dax_device {
struct inode inode;
@@ -28,6 +31,9 @@ struct dax_device {
void *private;
unsigned long flags;
const struct dax_operations *ops;
+ void *holder_data;
+ struct percpu_rw_semaphore holder_rwsem;
+ const struct dax_holder_operations *holder_ops;
};
static dev_t dax_devt;
@@ -193,6 +199,29 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
}
EXPORT_SYMBOL_GPL(dax_zero_page_range);
+int dax_holder_notify_failure(struct dax_device *dax_dev, u64 off,
+ u64 len, int mf_flags)
+{
+ int rc, id;
+
+ id = dax_read_lock();
+ if (!dax_alive(dax_dev)) {
+ rc = -ENXIO;
+ goto out;
+ }
+
+ if (!dax_dev->holder_ops) {
+ rc = -EOPNOTSUPP;
+ goto out;
+ }
+
+ rc = dax_dev->holder_ops->notify_failure(dax_dev, off, len, mf_flags);
+out:
+ dax_read_unlock(id);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(dax_holder_notify_failure);
+
#ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_wb_cache_pmem(void *addr, size_t size);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
@@ -268,6 +297,13 @@ void kill_dax(struct dax_device *dax_dev)
clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
synchronize_srcu(&dax_srcu);
+
+ /* Lock to prevent concurrent registrations. */
+ percpu_down_write(&dax_dev->holder_rwsem);
+ /* clear holder data */
+ dax_dev->holder_ops = NULL;
+ dax_dev->holder_data = NULL;
+ percpu_up_write(&dax_dev->holder_rwsem);
}
EXPORT_SYMBOL_GPL(kill_dax);
@@ -393,6 +429,7 @@ struct dax_device *alloc_dax(void *private, const struct dax_operations *ops)
dax_dev->ops = ops;
dax_dev->private = private;
+ percpu_init_rwsem(&dax_dev->holder_rwsem);
return dax_dev;
err_dev:
@@ -409,6 +446,64 @@ void put_dax(struct dax_device *dax_dev)
}
EXPORT_SYMBOL_GPL(put_dax);
+/**
+ * dax_holder() - obtain the holder of a dax device
+ * @dax_dev: a dax_device instance
+
+ * Return: the holder's data which represents the holder if registered,
+ * otherwize NULL.
+ */
+void *dax_holder(struct dax_device *dax_dev)
+{
+ if (!dax_alive(dax_dev))
+ return NULL;
+
+ return dax_dev->holder_data;
+}
+EXPORT_SYMBOL_GPL(dax_holder);
+
+/**
+ * dax_register_holder() - register a holder to a dax device
+ * @dax_dev: a dax_device instance
+ * @holder: a pointer to a holder's data which represents the holder
+ * @ops: operations of this holder
+
+ * Return: negative errno if an error occurs, otherwise 0.
+ */
+int dax_register_holder(struct dax_device *dax_dev, void *holder,
+ const struct dax_holder_operations *ops)
+{
+ if (!dax_alive(dax_dev))
+ return -ENXIO;
+
+ /* Already registered */
+ if (dax_holder(dax_dev))
+ return -EBUSY;
+
+ /* Lock to prevent concurrent registrations. */
+ percpu_down_write(&dax_dev->holder_rwsem);
+ dax_dev->holder_data = holder;
+ dax_dev->holder_ops = ops;
+ percpu_up_write(&dax_dev->holder_rwsem);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dax_register_holder);
+
+/**
+ * dax_unregister_holder() - unregister the holder for a dax device
+ * @dax_dev: a dax_device instance
+ */
+void dax_unregister_holder(struct dax_device *dax_dev)
+{
+ if (!dax_alive(dax_dev))
+ return;
+
+ dax_dev->holder_data = NULL;
+ dax_dev->holder_ops = NULL;
+}
+EXPORT_SYMBOL_GPL(dax_unregister_holder);
+
/**
* inode_dax: convert a public inode into its dax_dev
* @inode: An inode with i_cdev pointing to a dax_dev
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9fc5f99a0ae2..9800d84e5b7d 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -32,8 +32,24 @@ struct dax_operations {
int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
};
+struct dax_holder_operations {
+ /*
+ * notify_failure - notify memory failure into inner holder device
+ * @dax_dev: the dax device which contains the holder
+ * @offset: offset on this dax device where memory failure occurs
+ * @len: length of this memory failure event
+ * @flags: action flags for memory failure handler
+ */
+ int (*notify_failure)(struct dax_device *dax_dev, u64 offset,
+ u64 len, int mf_flags);
+};
+
#if IS_ENABLED(CONFIG_DAX)
struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
+int dax_register_holder(struct dax_device *dax_dev, void *holder,
+ const struct dax_holder_operations *ops);
+void dax_unregister_holder(struct dax_device *dax_dev);
+void *dax_holder(struct dax_device *dax_dev);
void put_dax(struct dax_device *dax_dev);
void kill_dax(struct dax_device *dax_dev);
void dax_write_cache(struct dax_device *dax_dev, bool wc);
@@ -53,6 +69,18 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
return dax_synchronous(dax_dev);
}
#else
+static inline int dax_register_holder(struct dax_device *dax_dev, void *holder,
+ const struct dax_holder_operations *ops)
+{
+ return 0;
+}
+static inline void dax_unregister_holder(struct dax_device *dax_dev)
+{
+}
+static inline void *dax_holder(struct dax_device *dax_dev)
+{
+ return NULL;
+}
static inline struct dax_device *alloc_dax(void *private,
const struct dax_operations *ops)
{
@@ -185,6 +213,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i);
int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages);
+int dax_holder_notify_failure(struct dax_device *dax_dev, u64 off, u64 len,
+ int mf_flags);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
--
2.34.1
On Sun, Feb 13, 2022 at 4:58 AM Shiyang Ruan <[email protected]> wrote:
>
> v10.1 update:
> - Fix build error reported by kernel test robot
> - Add return code to dax_register_holder()
> - Add write lock to prevent concurrent registrations
> - Rename dax_get_holder() to dax_holder()
> - Add kernel doc for new added functions
>
> To easily track filesystem from a pmem device, we introduce a holder for
> dax_device structure, and also its operation. This holder is used to
> remember who is using this dax_device:
> - When it is the backend of a filesystem, the holder will be the
> instance of this filesystem.
> - When this pmem device is one of the targets in a mapped device, the
> holder will be this mapped device. In this case, the mapped device
> has its own dax_device and it will follow the first rule. So that we
> can finally track to the filesystem we needed.
>
> The holder and holder_ops will be set when filesystem is being mounted,
> or an target device is being activated.
>
> Signed-off-by: Shiyang Ruan <[email protected]>
> ---
> drivers/dax/super.c | 95 +++++++++++++++++++++++++++++++++++++++++++++
> include/linux/dax.h | 30 ++++++++++++++
> 2 files changed, 125 insertions(+)
>
> diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> index e3029389d809..d7fb4c36bf16 100644
> --- a/drivers/dax/super.c
> +++ b/drivers/dax/super.c
> @@ -21,6 +21,9 @@
> * @cdev: optional character interface for "device dax"
> * @private: dax driver private data
> * @flags: state and boolean properties
> + * @ops: operations for dax_device
> + * @holder_data: holder of a dax_device: could be filesystem or mapped device
> + * @holder_ops: operations for the inner holder
> */
> struct dax_device {
> struct inode inode;
> @@ -28,6 +31,9 @@ struct dax_device {
> void *private;
> unsigned long flags;
> const struct dax_operations *ops;
> + void *holder_data;
> + struct percpu_rw_semaphore holder_rwsem;
This lock is not necessary, see below...
> + const struct dax_holder_operations *holder_ops;
> };
>
> static dev_t dax_devt;
> @@ -193,6 +199,29 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> }
> EXPORT_SYMBOL_GPL(dax_zero_page_range);
>
> +int dax_holder_notify_failure(struct dax_device *dax_dev, u64 off,
> + u64 len, int mf_flags)
> +{
> + int rc, id;
> +
> + id = dax_read_lock();
> + if (!dax_alive(dax_dev)) {
> + rc = -ENXIO;
> + goto out;
> + }
> +
> + if (!dax_dev->holder_ops) {
> + rc = -EOPNOTSUPP;
> + goto out;
> + }
> +
> + rc = dax_dev->holder_ops->notify_failure(dax_dev, off, len, mf_flags);
> +out:
> + dax_read_unlock(id);
> + return rc;
> +}
> +EXPORT_SYMBOL_GPL(dax_holder_notify_failure);
> +
> #ifdef CONFIG_ARCH_HAS_PMEM_API
> void arch_wb_cache_pmem(void *addr, size_t size);
> void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
> @@ -268,6 +297,13 @@ void kill_dax(struct dax_device *dax_dev)
>
> clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
> synchronize_srcu(&dax_srcu);
> +
> + /* Lock to prevent concurrent registrations. */
> + percpu_down_write(&dax_dev->holder_rwsem);
> + /* clear holder data */
> + dax_dev->holder_ops = NULL;
> + dax_dev->holder_data = NULL;
> + percpu_up_write(&dax_dev->holder_rwsem);
> }
> EXPORT_SYMBOL_GPL(kill_dax);
>
> @@ -393,6 +429,7 @@ struct dax_device *alloc_dax(void *private, const struct dax_operations *ops)
>
> dax_dev->ops = ops;
> dax_dev->private = private;
> + percpu_init_rwsem(&dax_dev->holder_rwsem);
> return dax_dev;
>
> err_dev:
> @@ -409,6 +446,64 @@ void put_dax(struct dax_device *dax_dev)
> }
> EXPORT_SYMBOL_GPL(put_dax);
>
> +/**
> + * dax_holder() - obtain the holder of a dax device
> + * @dax_dev: a dax_device instance
> +
> + * Return: the holder's data which represents the holder if registered,
> + * otherwize NULL.
> + */
> +void *dax_holder(struct dax_device *dax_dev)
> +{
> + if (!dax_alive(dax_dev))
> + return NULL;
> +
> + return dax_dev->holder_data;
> +}
> +EXPORT_SYMBOL_GPL(dax_holder);
> +
> +/**
> + * dax_register_holder() - register a holder to a dax device
> + * @dax_dev: a dax_device instance
> + * @holder: a pointer to a holder's data which represents the holder
> + * @ops: operations of this holder
> +
> + * Return: negative errno if an error occurs, otherwise 0.
> + */
> +int dax_register_holder(struct dax_device *dax_dev, void *holder,
> + const struct dax_holder_operations *ops)
> +{
> + if (!dax_alive(dax_dev))
> + return -ENXIO;
> +
> + /* Already registered */
> + if (dax_holder(dax_dev))
> + return -EBUSY;
Delete this...
> +
> + /* Lock to prevent concurrent registrations. */
> + percpu_down_write(&dax_dev->holder_rwsem);
...and just use cmpxchg:
if (cmpxchg(&dax_dev->holder_data, NULL, holder))
return -EBUSY;
dax_dev->holder_ops = ops;
...and then on the release side you can require that the caller
specify @holder before clearing to make the unregistration symmetric:
if (cmpxchg(&dax_dev->holder_data, holder, NULL) != holder))
return -EBUSY;
dax_dev->holder_ops = NULL;
> + dax_dev->holder_data = holder;
> + dax_dev->holder_ops = ops;
> + percpu_up_write(&dax_dev->holder_rwsem);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(dax_register_holder);
> +
> +/**
> + * dax_unregister_holder() - unregister the holder for a dax device
> + * @dax_dev: a dax_device instance
> + */
> +void dax_unregister_holder(struct dax_device *dax_dev)
Per above, require the holder to pass in their holder_data again.
> +{
> + if (!dax_alive(dax_dev))
> + return;
> +
> + dax_dev->holder_data = NULL;
> + dax_dev->holder_ops = NULL;
> +}
> +EXPORT_SYMBOL_GPL(dax_unregister_holder);
> +
> /**
> * inode_dax: convert a public inode into its dax_dev
> * @inode: An inode with i_cdev pointing to a dax_dev
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index 9fc5f99a0ae2..9800d84e5b7d 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -32,8 +32,24 @@ struct dax_operations {
> int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
> };
>
> +struct dax_holder_operations {
> + /*
> + * notify_failure - notify memory failure into inner holder device
> + * @dax_dev: the dax device which contains the holder
> + * @offset: offset on this dax device where memory failure occurs
> + * @len: length of this memory failure event
> + * @flags: action flags for memory failure handler
> + */
> + int (*notify_failure)(struct dax_device *dax_dev, u64 offset,
> + u64 len, int mf_flags);
> +};
> +
> #if IS_ENABLED(CONFIG_DAX)
> struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
> +int dax_register_holder(struct dax_device *dax_dev, void *holder,
> + const struct dax_holder_operations *ops);
> +void dax_unregister_holder(struct dax_device *dax_dev);
> +void *dax_holder(struct dax_device *dax_dev);
> void put_dax(struct dax_device *dax_dev);
> void kill_dax(struct dax_device *dax_dev);
> void dax_write_cache(struct dax_device *dax_dev, bool wc);
> @@ -53,6 +69,18 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
> return dax_synchronous(dax_dev);
> }
> #else
> +static inline int dax_register_holder(struct dax_device *dax_dev, void *holder,
> + const struct dax_holder_operations *ops)
> +{
> + return 0;
> +}
> +static inline void dax_unregister_holder(struct dax_device *dax_dev)
> +{
> +}
> +static inline void *dax_holder(struct dax_device *dax_dev)
> +{
> + return NULL;
> +}
> static inline struct dax_device *alloc_dax(void *private,
> const struct dax_operations *ops)
> {
> @@ -185,6 +213,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
> size_t bytes, struct iov_iter *i);
> int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> size_t nr_pages);
> +int dax_holder_notify_failure(struct dax_device *dax_dev, u64 off, u64 len,
> + int mf_flags);
> void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
>
> ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
> --
> 2.34.1
>
>
>