From: Prasad Singamsetty <[email protected]>
Extend statx system call to return additional info for atomic write support
support if the specified file is a block device.
Add initial support for a block device.
Signed-off-by: Prasad Singamsetty <[email protected]>
Signed-off-by: John Garry <[email protected]>
---
block/bdev.c | 31 ++++++++++++++++++---------
fs/stat.c | 44 ++++++++++++++++++++++++++++++++-------
include/linux/blkdev.h | 4 ++--
include/linux/fs.h | 3 +++
include/linux/stat.h | 2 ++
include/uapi/linux/stat.h | 7 ++++++-
6 files changed, 71 insertions(+), 20 deletions(-)
diff --git a/block/bdev.c b/block/bdev.c
index 750aec178b6a..1b514df48dac 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -1081,24 +1081,35 @@ void sync_bdevs(bool wait)
iput(old_inode);
}
+#define BDEV_STATX_SUPPORTED_MSK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)
+
/*
- * Handle STATX_DIOALIGN for block devices.
- *
- * Note that the inode passed to this is the inode of a block device node file,
- * not the block device's internal inode. Therefore it is *not* valid to use
- * I_BDEV() here; the block device has to be looked up by i_rdev instead.
+ * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
*/
-void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask)
{
struct block_device *bdev;
- bdev = blkdev_get_no_open(inode->i_rdev);
+ if (!(request_mask & BDEV_STATX_SUPPORTED_MSK))
+ return;
+
+ bdev = blkdev_get_no_open(d_backing_inode(dentry)->i_rdev);
if (!bdev)
return;
- stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
- stat->dio_offset_align = bdev_logical_block_size(bdev);
- stat->result_mask |= STATX_DIOALIGN;
+ if (request_mask & STATX_DIOALIGN) {
+ stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
+ stat->dio_offset_align = bdev_logical_block_size(bdev);
+ stat->result_mask |= STATX_DIOALIGN;
+ }
+
+ if (request_mask & STATX_WRITE_ATOMIC) {
+ struct request_queue *bd_queue = bdev->bd_queue;
+
+ generic_fill_statx_atomic_writes(stat,
+ queue_atomic_write_unit_min_bytes(bd_queue),
+ queue_atomic_write_unit_max_bytes(bd_queue));
+ }
blkdev_put_no_open(bdev);
}
diff --git a/fs/stat.c b/fs/stat.c
index f721d26ec3f7..ad8f9235f1c9 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -89,6 +89,35 @@ void generic_fill_statx_attr(struct inode *inode, struct kstat *stat)
}
EXPORT_SYMBOL(generic_fill_statx_attr);
+/**
+ * generic_fill_statx_atomic_writes - Fill in the atomic writes statx attributes
+ * @stat: Where to fill in the attribute flags
+ * @unit_min: Minimum supported atomic write length
+ * @unit_max: Maximum supported atomic write length
+ *
+ * Fill in the STATX{_ATTR}_WRITE_ATOMIC flags in the kstat structure from
+ * atomic write unit_min and unit_max values.
+ */
+void generic_fill_statx_atomic_writes(struct kstat *stat,
+ unsigned int unit_min,
+ unsigned int unit_max)
+{
+ /* Confirm that the request type is known */
+ stat->result_mask |= STATX_WRITE_ATOMIC;
+
+ /* Confirm that the file attribute type is known */
+ stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC;
+
+ if (unit_min) {
+ stat->atomic_write_unit_min = unit_min;
+ stat->atomic_write_unit_max = unit_max;
+
+ /* Confirm atomic writes are actually supported */
+ stat->attributes |= STATX_ATTR_WRITE_ATOMIC;
+ }
+}
+EXPORT_SYMBOL(generic_fill_statx_atomic_writes);
+
/**
* vfs_getattr_nosec - getattr without security checks
* @path: file to get attributes from
@@ -254,13 +283,12 @@ static int vfs_statx(int dfd, struct filename *filename, int flags,
stat->attributes |= STATX_ATTR_MOUNT_ROOT;
stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
- /* Handle STATX_DIOALIGN for block devices. */
- if (request_mask & STATX_DIOALIGN) {
- struct inode *inode = d_backing_inode(path.dentry);
-
- if (S_ISBLK(inode->i_mode))
- bdev_statx_dioalign(inode, stat);
- }
+ /* If this is a block device inode, override the filesystem
+ * attributes with the block device specific parameters
+ * that need to be obtained from the bdev backing inode
+ */
+ if (S_ISBLK(d_backing_inode(path.dentry)->i_mode))
+ bdev_statx(path.dentry, stat, request_mask);
path_put(&path);
if (retry_estale(error, lookup_flags)) {
@@ -653,6 +681,8 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
tmp.stx_mnt_id = stat->mnt_id;
tmp.stx_dio_mem_align = stat->dio_mem_align;
tmp.stx_dio_offset_align = stat->dio_offset_align;
+ tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
+ tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ab53163dd187..ab7289995615 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1548,7 +1548,7 @@ int sync_blockdev(struct block_device *bdev);
int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend);
int sync_blockdev_nowait(struct block_device *bdev);
void sync_bdevs(bool wait);
-void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
+void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask);
void printk_all_partitions(void);
int __init early_lookup_bdev(const char *pathname, dev_t *dev);
#else
@@ -1566,7 +1566,7 @@ static inline int sync_blockdev_nowait(struct block_device *bdev)
static inline void sync_bdevs(bool wait)
{
}
-static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+static inline void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask)
{
}
static inline void printk_all_partitions(void)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 98b7a7a8c42e..70329c81be31 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3084,6 +3084,9 @@ extern const struct inode_operations page_symlink_inode_operations;
extern void kfree_link(void *);
void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
+void generic_fill_statx_atomic_writes(struct kstat *stat,
+ unsigned int unit_min,
+ unsigned int unit_max);
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
void __inode_add_bytes(struct inode *inode, loff_t bytes);
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 52150570d37a..ba690ff89efd 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -53,6 +53,8 @@ struct kstat {
u32 dio_mem_align;
u32 dio_offset_align;
u64 change_cookie;
+ u32 atomic_write_unit_min;
+ u32 atomic_write_unit_max;
};
/* These definitions are internal to the kernel for now. Mainly used by nfsd. */
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 7cab2c65d3d7..64dfc1baa640 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -127,7 +127,10 @@ struct statx {
__u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
__u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
/* 0xa0 */
- __u64 __spare3[12]; /* Spare space for future expansion */
+ __u32 stx_atomic_write_unit_min;
+ __u32 stx_atomic_write_unit_max;
+ /* 0xb0 */
+ __u64 __spare3[11]; /* Spare space for future expansion */
/* 0x100 */
};
@@ -154,6 +157,7 @@ struct statx {
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
+#define STATX_WRITE_ATOMIC 0x00004000U /* Want/got atomic_write_* fields */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
@@ -189,6 +193,7 @@ struct statx {
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */
+#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */
#endif /* _UAPI_LINUX_STAT_H */
--
2.35.3
On Tue 12-12-23 11:08:31, John Garry wrote:
> From: Prasad Singamsetty <[email protected]>
>
> Extend statx system call to return additional info for atomic write support
> support if the specified file is a block device.
>
> Add initial support for a block device.
>
> Signed-off-by: Prasad Singamsetty <[email protected]>
> Signed-off-by: John Garry <[email protected]>
Just some nits below.
> +#define BDEV_STATX_SUPPORTED_MSK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)
^^^
I believe saving one letter here is not
really beneficial so just spell out MASK here...
> /*
> - * Handle STATX_DIOALIGN for block devices.
> - *
> - * Note that the inode passed to this is the inode of a block device node file,
> - * not the block device's internal inode. Therefore it is *not* valid to use
> - * I_BDEV() here; the block device has to be looked up by i_rdev instead.
> + * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
> */
Please keep "Note ..." from the above comment (or you can move the note in
front of blkdev_get_no_open() if you want).
Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR
On 13/12/2023 10:24, Jan Kara wrote:
>> Signed-off-by: Prasad Singamsetty<[email protected]>
>> Signed-off-by: John Garry<[email protected]>
> Just some nits below.
>
>> +#define BDEV_STATX_SUPPORTED_MSK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)
> ^^^
> I believe saving one letter here is not
> really beneficial so just spell out MASK here...
ok
>
>> /*
>> - * Handle STATX_DIOALIGN for block devices.
>> - *
>> - * Note that the inode passed to this is the inode of a block device node file,
>> - * not the block device's internal inode. Therefore it is*not* valid to use
>> - * I_BDEV() here; the block device has to be looked up by i_rdev instead.
>> + * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
>> */
> Please keep "Note ..." from the above comment (or you can move the note in
> front of blkdev_get_no_open() if you want).
ok, fine, I think that moving it to in front of blkdev_get_no_open() may
be best.
Thanks,
John