Hi all,
Al pointed out multiple times that seq_get_buf is highly dangerous as
it opens up the tight seq_file abstractions to buffer overflows. The
last such caller now is sysfs.
This series allows attributes to implement a seq_show method and switch
the block and XFS code as users that I'm most familiar with to use
seq_files directly after a few preparatory cleanups. With this series
"leaf" users of sysfs_ops can be converted one at at a time, after that
we can move the seq_get_buf into the multiplexers (e.g. kobj, device,
class attributes) and remove the show method in sysfs_ops and repeat the
process until all attributes are converted. This will probably take a
fair amount of time.
Diffstat:
block/bfq-iosched.c | 12 +-
block/blk-integrity.c | 44 +++++----
block/blk-mq-sysfs.c | 64 ++++++--------
block/blk-sysfs.c | 209 ++++++++++++++++++++++++++---------------------
block/blk-throttle.c | 5 -
block/blk.h | 2
block/elevator.c | 42 +++++----
block/kyber-iosched.c | 7 -
block/mq-deadline.c | 5 -
fs/sysfs/file.c | 135 +++++++++++++++++-------------
fs/sysfs/group.c | 15 +--
fs/sysfs/sysfs.h | 8 +
fs/xfs/xfs_error.c | 14 +--
fs/xfs/xfs_stats.c | 24 ++---
fs/xfs/xfs_stats.h | 2
fs/xfs/xfs_sysfs.c | 96 ++++++++++-----------
include/linux/elevator.h | 4
include/linux/kernfs.h | 28 ------
include/linux/seq_file.h | 4
include/linux/sysfs.h | 9 +-
20 files changed, 376 insertions(+), 353 deletions(-)
This function pokes a big hole into the seq_file abstraction.
Signed-off-by: Christoph Hellwig <[email protected]>
---
include/linux/seq_file.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index dd99569595fd3..db16b11477875 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -59,6 +59,10 @@ static inline bool seq_has_overflowed(struct seq_file *m)
*
* Return the number of bytes available in the buffer, or zero if
* there's no space.
+ *
+ * DOT NOT USE IN NEW CODE! This function pokes a hole into the whole seq_file
+ * abstraction. The only remaining user outside of seq_file.c is sysfs, which
+ * is gradually moving away from using seq_get_buf directly.
*/
static inline size_t seq_get_buf(struct seq_file *m, char **bufp)
{
--
2.30.2
All callers actually use __kernfs_create_file.
Signed-off-by: Christoph Hellwig <[email protected]>
---
include/linux/kernfs.h | 24 ------------------------
1 file changed, 24 deletions(-)
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 1093abf7c28cc..cecfeedb7361d 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -568,30 +568,6 @@ kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
priv, NULL);
}
-static inline struct kernfs_node *
-kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
- umode_t mode, kuid_t uid, kgid_t gid,
- loff_t size, const struct kernfs_ops *ops,
- void *priv, const void *ns)
-{
- struct lock_class_key *key = NULL;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- key = (struct lock_class_key *)&ops->lockdep_key;
-#endif
- return __kernfs_create_file(parent, name, mode, uid, gid,
- size, ops, priv, ns, key);
-}
-
-static inline struct kernfs_node *
-kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode,
- loff_t size, const struct kernfs_ops *ops, void *priv)
-{
- return kernfs_create_file_ns(parent, name, mode,
- GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
- size, ops, priv, NULL);
-}
-
static inline int kernfs_remove_by_name(struct kernfs_node *parent,
const char *name)
{
--
2.30.2
Split adding binary attributes into a separate handler instead of
overloading sysfs_add_file_mode_ns.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/sysfs/file.c | 120 ++++++++++++++++++++++++++---------------------
fs/sysfs/group.c | 15 +++---
fs/sysfs/sysfs.h | 8 ++--
3 files changed, 78 insertions(+), 65 deletions(-)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index d019d6ac6ad09..f737bd61f71bf 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -255,59 +255,73 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = {
};
int sysfs_add_file_mode_ns(struct kernfs_node *parent,
- const struct attribute *attr, bool is_bin,
- umode_t mode, kuid_t uid, kgid_t gid, const void *ns)
+ const struct attribute *attr, umode_t mode, kuid_t uid,
+ kgid_t gid, const void *ns)
{
+ struct kobject *kobj = parent->priv;
+ const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops;
struct lock_class_key *key = NULL;
const struct kernfs_ops *ops;
struct kernfs_node *kn;
- loff_t size;
-
- if (!is_bin) {
- struct kobject *kobj = parent->priv;
- const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops;
-
- /* every kobject with an attribute needs a ktype assigned */
- if (WARN(!sysfs_ops, KERN_ERR
- "missing sysfs attribute operations for kobject: %s\n",
- kobject_name(kobj)))
- return -EINVAL;
-
- if (sysfs_ops->show && sysfs_ops->store) {
- if (mode & SYSFS_PREALLOC)
- ops = &sysfs_prealloc_kfops_rw;
- else
- ops = &sysfs_file_kfops_rw;
- } else if (sysfs_ops->show) {
- if (mode & SYSFS_PREALLOC)
- ops = &sysfs_prealloc_kfops_ro;
- else
- ops = &sysfs_file_kfops_ro;
- } else if (sysfs_ops->store) {
- if (mode & SYSFS_PREALLOC)
- ops = &sysfs_prealloc_kfops_wo;
- else
- ops = &sysfs_file_kfops_wo;
- } else
- ops = &sysfs_file_kfops_empty;
-
- size = PAGE_SIZE;
- } else {
- struct bin_attribute *battr = (void *)attr;
-
- if (battr->mmap)
- ops = &sysfs_bin_kfops_mmap;
- else if (battr->read && battr->write)
- ops = &sysfs_bin_kfops_rw;
- else if (battr->read)
- ops = &sysfs_bin_kfops_ro;
- else if (battr->write)
- ops = &sysfs_bin_kfops_wo;
+
+ /* every kobject with an attribute needs a ktype assigned */
+ if (WARN(!sysfs_ops, KERN_ERR
+ "missing sysfs attribute operations for kobject: %s\n",
+ kobject_name(kobj)))
+ return -EINVAL;
+
+ if (sysfs_ops->show && sysfs_ops->store) {
+ if (mode & SYSFS_PREALLOC)
+ ops = &sysfs_prealloc_kfops_rw;
else
- ops = &sysfs_file_kfops_empty;
+ ops = &sysfs_file_kfops_rw;
+ } else if (sysfs_ops->show) {
+ if (mode & SYSFS_PREALLOC)
+ ops = &sysfs_prealloc_kfops_ro;
+ else
+ ops = &sysfs_file_kfops_ro;
+ } else if (sysfs_ops->store) {
+ if (mode & SYSFS_PREALLOC)
+ ops = &sysfs_prealloc_kfops_wo;
+ else
+ ops = &sysfs_file_kfops_wo;
+ } else
+ ops = &sysfs_file_kfops_empty;
- size = battr->size;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ if (!attr->ignore_lockdep)
+ key = attr->key ?: (struct lock_class_key *)&attr->skey;
+#endif
+
+ kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid,
+ PAGE_SIZE, ops, (void *)attr, ns, key);
+ if (IS_ERR(kn)) {
+ if (PTR_ERR(kn) == -EEXIST)
+ sysfs_warn_dup(parent, attr->name);
+ return PTR_ERR(kn);
}
+ return 0;
+}
+
+int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent,
+ const struct bin_attribute *battr, umode_t mode,
+ kuid_t uid, kgid_t gid, const void *ns)
+{
+ const struct attribute *attr = &battr->attr;
+ struct lock_class_key *key = NULL;
+ const struct kernfs_ops *ops;
+ struct kernfs_node *kn;
+
+ if (battr->mmap)
+ ops = &sysfs_bin_kfops_mmap;
+ else if (battr->read && battr->write)
+ ops = &sysfs_bin_kfops_rw;
+ else if (battr->read)
+ ops = &sysfs_bin_kfops_ro;
+ else if (battr->write)
+ ops = &sysfs_bin_kfops_wo;
+ else
+ ops = &sysfs_file_kfops_empty;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
if (!attr->ignore_lockdep)
@@ -315,7 +329,7 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
#endif
kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid,
- size, ops, (void *)attr, ns, key);
+ battr->size, ops, (void *)attr, ns, key);
if (IS_ERR(kn)) {
if (PTR_ERR(kn) == -EEXIST)
sysfs_warn_dup(parent, attr->name);
@@ -340,9 +354,7 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
return -EINVAL;
kobject_get_ownership(kobj, &uid, &gid);
- return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode,
- uid, gid, ns);
-
+ return sysfs_add_file_mode_ns(kobj->sd, attr, attr->mode, uid, gid, ns);
}
EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
@@ -385,8 +397,8 @@ int sysfs_add_file_to_group(struct kobject *kobj,
return -ENOENT;
kobject_get_ownership(kobj, &uid, &gid);
- error = sysfs_add_file_mode_ns(parent, attr, false,
- attr->mode, uid, gid, NULL);
+ error = sysfs_add_file_mode_ns(parent, attr, attr->mode, uid, gid,
+ NULL);
kernfs_put(parent);
return error;
@@ -555,8 +567,8 @@ int sysfs_create_bin_file(struct kobject *kobj,
return -EINVAL;
kobject_get_ownership(kobj, &uid, &gid);
- return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true,
- attr->attr.mode, uid, gid, NULL);
+ return sysfs_add_bin_file_mode_ns(kobj->sd, attr, attr->attr.mode, uid,
+ gid, NULL);
}
EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index f29d620045272..eeb0e30994215 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -61,8 +61,8 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
(*attr)->name, mode);
mode &= SYSFS_PREALLOC | 0664;
- error = sysfs_add_file_mode_ns(parent, *attr, false,
- mode, uid, gid, NULL);
+ error = sysfs_add_file_mode_ns(parent, *attr, mode, uid,
+ gid, NULL);
if (unlikely(error))
break;
}
@@ -90,10 +90,9 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
(*bin_attr)->attr.name, mode);
mode &= SYSFS_PREALLOC | 0664;
- error = sysfs_add_file_mode_ns(parent,
- &(*bin_attr)->attr, true,
- mode,
- uid, gid, NULL);
+ error = sysfs_add_bin_file_mode_ns(parent, *bin_attr,
+ mode, uid, gid,
+ NULL);
if (error)
break;
}
@@ -340,8 +339,8 @@ int sysfs_merge_group(struct kobject *kobj,
kobject_get_ownership(kobj, &uid, &gid);
for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr))
- error = sysfs_add_file_mode_ns(parent, *attr, false,
- (*attr)->mode, uid, gid, NULL);
+ error = sysfs_add_file_mode_ns(parent, *attr, (*attr)->mode,
+ uid, gid, NULL);
if (error) {
while (--i >= 0)
kernfs_remove_by_name(parent, (*--attr)->name);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 0050cc0c0236d..3f28c9af57562 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -28,9 +28,11 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name);
* file.c
*/
int sysfs_add_file_mode_ns(struct kernfs_node *parent,
- const struct attribute *attr, bool is_bin,
- umode_t amode, kuid_t uid, kgid_t gid,
- const void *ns);
+ const struct attribute *attr, umode_t amode, kuid_t uid,
+ kgid_t gid, const void *ns);
+int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent,
+ const struct bin_attribute *battr, umode_t mode,
+ kuid_t uid, kgid_t gid, const void *ns);
/*
* symlink.c
--
2.30.2
No actually used anywhere.
Signed-off-by: Christoph Hellwig <[email protected]>
---
include/linux/kernfs.h | 4 ----
1 file changed, 4 deletions(-)
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index cecfeedb7361d..3ccce6f245484 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -269,10 +269,6 @@ struct kernfs_ops {
struct poll_table_struct *pt);
int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma);
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lock_class_key lockdep_key;
-#endif
};
/*
--
2.30.2
Regroup the code so that preallocated attributes and normal attributes are
handled in clearly separate blocks.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/sysfs/file.c | 27 ++++++++++++++-------------
1 file changed, 14 insertions(+), 13 deletions(-)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index f737bd61f71bf..74a2a8021c8bb 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -261,7 +261,7 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
struct kobject *kobj = parent->priv;
const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops;
struct lock_class_key *key = NULL;
- const struct kernfs_ops *ops;
+ const struct kernfs_ops *ops = NULL;
struct kernfs_node *kn;
/* every kobject with an attribute needs a ktype assigned */
@@ -270,22 +270,23 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
kobject_name(kobj)))
return -EINVAL;
- if (sysfs_ops->show && sysfs_ops->store) {
- if (mode & SYSFS_PREALLOC)
+ if (mode & SYSFS_PREALLOC) {
+ if (sysfs_ops->show && sysfs_ops->store)
ops = &sysfs_prealloc_kfops_rw;
- else
- ops = &sysfs_file_kfops_rw;
- } else if (sysfs_ops->show) {
- if (mode & SYSFS_PREALLOC)
+ else if (sysfs_ops->show)
ops = &sysfs_prealloc_kfops_ro;
- else
- ops = &sysfs_file_kfops_ro;
- } else if (sysfs_ops->store) {
- if (mode & SYSFS_PREALLOC)
+ else if (sysfs_ops->store)
ops = &sysfs_prealloc_kfops_wo;
- else
+ } else {
+ if (sysfs_ops->show && sysfs_ops->store)
+ ops = &sysfs_file_kfops_rw;
+ else if (sysfs_ops->show)
+ ops = &sysfs_file_kfops_ro;
+ else if (sysfs_ops->store)
ops = &sysfs_file_kfops_wo;
- } else
+ }
+
+ if (!ops)
ops = &sysfs_file_kfops_empty;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
--
2.30.2
Allow attributes to directly use the seq_file method instead of
carving out a buffer that can easily lead to buffer overflows.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/sysfs/file.c | 19 ++++++++++++++-----
include/linux/sysfs.h | 9 +++++++--
2 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 42dcf96881b68..12e0bfe40a2b4 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -45,6 +45,9 @@ static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
ssize_t count;
char *buf;
+ if (ops->seq_show)
+ return ops->seq_show(kobj, of->kn->priv, sf);
+
if (WARN_ON_ONCE(!ops->show))
return -EINVAL;
@@ -268,6 +271,10 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
return -EINVAL;
if (mode & SYSFS_PREALLOC) {
+ if (WARN(sysfs_ops->seq_show, KERN_ERR
+ "seq_show not supported on prealloc file: %s\n",
+ kobject_name(kobj)))
+ return -EINVAL;
if (sysfs_ops->show && sysfs_ops->store)
ops = &sysfs_prealloc_kfops_rw;
else if (sysfs_ops->show)
@@ -275,12 +282,14 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
else if (sysfs_ops->store)
ops = &sysfs_prealloc_kfops_wo;
} else {
- if (sysfs_ops->show && sysfs_ops->store)
- ops = &sysfs_file_kfops_rw;
- else if (sysfs_ops->show)
- ops = &sysfs_file_kfops_ro;
- else if (sysfs_ops->store)
+ if (sysfs_ops->seq_show || sysfs_ops->show) {
+ if (sysfs_ops->store)
+ ops = &sysfs_file_kfops_rw;
+ else
+ ops = &sysfs_file_kfops_ro;
+ } else if (sysfs_ops->store) {
ops = &sysfs_file_kfops_wo;
+ }
}
if (!ops)
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index e3f1e8ac1f85b..e1ab4da716730 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -236,8 +236,13 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_WO(_name, _size)
struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size)
struct sysfs_ops {
- ssize_t (*show)(struct kobject *, struct attribute *, char *);
- ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
+ int (*seq_show)(struct kobject *kobj, struct attribute *attr,
+ struct seq_file *sf);
+ ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t size);
+
+ /* deprecated except for preallocated attributes: */
+ ssize_t (*show)(struct kobject *kob, struct attribute *attr, char *buf);
};
#ifdef CONFIG_SYSFS
--
2.30.2
Contrary to the comment ->show is never called from lseek for sysfs,
given that sysfs does not use seq_lseek. So remove the NULL ->show
case and just WARN and return an error if some future code path ends
up here.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/sysfs/file.c | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 74a2a8021c8bb..42dcf96881b68 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -45,6 +45,9 @@ static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
ssize_t count;
char *buf;
+ if (WARN_ON_ONCE(!ops->show))
+ return -EINVAL;
+
/* acquire buffer and ensure that it's >= PAGE_SIZE and clear */
count = seq_get_buf(sf, &buf);
if (count < PAGE_SIZE) {
@@ -53,15 +56,9 @@ static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
}
memset(buf, 0, PAGE_SIZE);
- /*
- * Invoke show(). Control may reach here via seq file lseek even
- * if @ops->show() isn't implemented.
- */
- if (ops->show) {
- count = ops->show(kobj, of->kn->priv, buf);
- if (count < 0)
- return count;
- }
+ count = ops->show(kobj, of->kn->priv, buf);
+ if (count < 0)
+ return count;
/*
* The code works fine with PAGE_SIZE return but it's likely to
--
2.30.2
Trivial conversion to the seq_file based sysfs attributes.
Signed-off-by: Christoph Hellwig <[email protected]>
---
block/blk-mq-sysfs.c | 64 +++++++++++++++++++-------------------------
1 file changed, 28 insertions(+), 36 deletions(-)
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 253c857cba47c..cae649b83bd54 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -47,29 +47,27 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj)
struct blk_mq_hw_ctx_sysfs_entry {
struct attribute attr;
- ssize_t (*show)(struct blk_mq_hw_ctx *, char *);
- ssize_t (*store)(struct blk_mq_hw_ctx *, const char *, size_t);
+ void (*show)(struct blk_mq_hw_ctx *hctx, struct seq_file *sf);
+ ssize_t (*store)(struct blk_mq_hw_ctx *hctx, const char *buf,
+ size_t size);
};
-static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
- struct attribute *attr, char *page)
+static int blk_mq_hw_sysfs_seq_show(struct kobject *kobj,
+ struct attribute *attr, struct seq_file *sf)
{
- struct blk_mq_hw_ctx_sysfs_entry *entry;
- struct blk_mq_hw_ctx *hctx;
- struct request_queue *q;
- ssize_t res;
-
- entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr);
- hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj);
- q = hctx->queue;
+ struct blk_mq_hw_ctx_sysfs_entry *entry =
+ container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr);
+ struct blk_mq_hw_ctx *hctx =
+ container_of(kobj, struct blk_mq_hw_ctx, kobj);
+ struct request_queue *q = hctx->queue;
if (!entry->show)
return -EIO;
mutex_lock(&q->sysfs_lock);
- res = entry->show(hctx, page);
+ entry->show(hctx, sf);
mutex_unlock(&q->sysfs_lock);
- return res;
+ return 0;
}
static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
@@ -94,39 +92,33 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
return res;
}
-static ssize_t blk_mq_hw_sysfs_nr_tags_show(struct blk_mq_hw_ctx *hctx,
- char *page)
+static void blk_mq_hw_sysfs_nr_tags_show(struct blk_mq_hw_ctx *hctx,
+ struct seq_file *sf)
{
- return sprintf(page, "%u\n", hctx->tags->nr_tags);
+ seq_printf(sf, "%u\n", hctx->tags->nr_tags);
}
-static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
- char *page)
+static void blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
+ struct seq_file *sf)
{
- return sprintf(page, "%u\n", hctx->tags->nr_reserved_tags);
+ seq_printf(sf, "%u\n", hctx->tags->nr_reserved_tags);
}
-static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
+static void blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx,
+ struct seq_file *sf)
{
- const size_t size = PAGE_SIZE - 1;
- unsigned int i, first = 1;
- int ret = 0, pos = 0;
+ bool first = true;
+ unsigned int i;
for_each_cpu(i, hctx->cpumask) {
if (first)
- ret = snprintf(pos + page, size - pos, "%u", i);
+ seq_printf(sf, "%u", i);
else
- ret = snprintf(pos + page, size - pos, ", %u", i);
-
- if (ret >= size - pos)
- break;
-
- first = 0;
- pos += ret;
+ seq_printf(sf, ", %u", i);
+ first = false;
}
- ret = snprintf(pos + page, size + 1 - pos, "\n");
- return pos + ret;
+ seq_printf(sf, "\n");
}
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
@@ -151,8 +143,8 @@ static struct attribute *default_hw_ctx_attrs[] = {
ATTRIBUTE_GROUPS(default_hw_ctx);
static const struct sysfs_ops blk_mq_hw_sysfs_ops = {
- .show = blk_mq_hw_sysfs_show,
- .store = blk_mq_hw_sysfs_store,
+ .seq_show = blk_mq_hw_sysfs_seq_show,
+ .store = blk_mq_hw_sysfs_store,
};
static struct kobj_type blk_mq_ktype = {
--
2.30.2
Trivial conversion to the seq_file based sysfs attributes.
Signed-off-by: Christoph Hellwig <[email protected]>
---
block/blk-integrity.c | 44 ++++++++++++++++++++++---------------------
1 file changed, 23 insertions(+), 21 deletions(-)
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 69a12177dfb62..dca753b395539 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -214,19 +214,20 @@ bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
struct integrity_sysfs_entry {
struct attribute attr;
- ssize_t (*show)(struct blk_integrity *, char *);
+ void (*show)(struct blk_integrity *bi, struct seq_file *sf);
ssize_t (*store)(struct blk_integrity *, const char *, size_t);
};
-static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr,
- char *page)
+static int integrity_attr_seq_show(struct kobject *kobj,
+ struct attribute *attr, struct seq_file *sf)
{
struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj);
struct blk_integrity *bi = &disk->queue->integrity;
struct integrity_sysfs_entry *entry =
container_of(attr, struct integrity_sysfs_entry, attr);
- return entry->show(bi, page);
+ entry->show(bi, sf);
+ return 0;
}
static ssize_t integrity_attr_store(struct kobject *kobj,
@@ -245,23 +246,24 @@ static ssize_t integrity_attr_store(struct kobject *kobj,
return ret;
}
-static ssize_t integrity_format_show(struct blk_integrity *bi, char *page)
+static void integrity_format_show(struct blk_integrity *bi, struct seq_file *sf)
{
if (bi->profile && bi->profile->name)
- return sprintf(page, "%s\n", bi->profile->name);
+ seq_printf(sf, "%s\n", bi->profile->name);
else
- return sprintf(page, "none\n");
+ seq_printf(sf, "none\n");
}
-static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
+static void integrity_tag_size_show(struct blk_integrity *bi,
+ struct seq_file *sf)
{
- return sprintf(page, "%u\n", bi->tag_size);
+ seq_printf(sf, "%u\n", bi->tag_size);
}
-static ssize_t integrity_interval_show(struct blk_integrity *bi, char *page)
+static void integrity_interval_show(struct blk_integrity *bi,
+ struct seq_file *sf)
{
- return sprintf(page, "%u\n",
- bi->interval_exp ? 1 << bi->interval_exp : 0);
+ seq_printf(sf, "%u\n", bi->interval_exp ? 1 << bi->interval_exp : 0);
}
static ssize_t integrity_verify_store(struct blk_integrity *bi,
@@ -278,9 +280,9 @@ static ssize_t integrity_verify_store(struct blk_integrity *bi,
return count;
}
-static ssize_t integrity_verify_show(struct blk_integrity *bi, char *page)
+static void integrity_verify_show(struct blk_integrity *bi, struct seq_file *sf)
{
- return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_VERIFY) != 0);
+ seq_printf(sf, "%d\n", (bi->flags & BLK_INTEGRITY_VERIFY) != 0);
}
static ssize_t integrity_generate_store(struct blk_integrity *bi,
@@ -297,15 +299,15 @@ static ssize_t integrity_generate_store(struct blk_integrity *bi,
return count;
}
-static ssize_t integrity_generate_show(struct blk_integrity *bi, char *page)
+static void integrity_generate_show(struct blk_integrity *bi,
+ struct seq_file *sf)
{
- return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_GENERATE) != 0);
+ seq_printf(sf, "%d\n", (bi->flags & BLK_INTEGRITY_GENERATE) != 0);
}
-static ssize_t integrity_device_show(struct blk_integrity *bi, char *page)
+static void integrity_device_show(struct blk_integrity *bi, struct seq_file *sf)
{
- return sprintf(page, "%u\n",
- (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) != 0);
+ seq_printf(sf, "%u\n", (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) != 0);
}
static struct integrity_sysfs_entry integrity_format_entry = {
@@ -352,8 +354,8 @@ static struct attribute *integrity_attrs[] = {
ATTRIBUTE_GROUPS(integrity);
static const struct sysfs_ops integrity_ops = {
- .show = &integrity_attr_show,
- .store = &integrity_attr_store,
+ .seq_show = &integrity_attr_seq_show,
+ .store = &integrity_attr_store,
};
static struct kobj_type integrity_ktype = {
--
2.30.2
Trivial conversion to the seq_file based sysfs attributes.
Signed-off-by: Christoph Hellwig <[email protected]>
---
block/blk-sysfs.c | 209 ++++++++++++++++++++++-----------------
block/blk-throttle.c | 5 +-
block/blk.h | 2 +-
block/elevator.c | 21 ++--
include/linux/elevator.h | 2 +-
5 files changed, 134 insertions(+), 105 deletions(-)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 614d9d47de36b..ac57256db8710 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -20,14 +20,14 @@
struct queue_sysfs_entry {
struct attribute attr;
- ssize_t (*show)(struct request_queue *, char *);
+ int (*show)(struct request_queue *q, struct seq_file *sf);
ssize_t (*store)(struct request_queue *, const char *, size_t);
};
-static ssize_t
-queue_var_show(unsigned long var, char *page)
+static int queue_var_show(unsigned long var, struct seq_file *sf)
{
- return sprintf(page, "%lu\n", var);
+ seq_printf(sf, "%lu\n", var);
+ return 0;
}
static ssize_t
@@ -58,9 +58,9 @@ static ssize_t queue_var_store64(s64 *var, const char *page)
return 0;
}
-static ssize_t queue_requests_show(struct request_queue *q, char *page)
+static int queue_requests_show(struct request_queue *q, struct seq_file *sf)
{
- return queue_var_show(q->nr_requests, page);
+ return queue_var_show(q->nr_requests, sf);
}
static ssize_t
@@ -86,14 +86,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
return ret;
}
-static ssize_t queue_ra_show(struct request_queue *q, char *page)
+static int queue_ra_show(struct request_queue *q, struct seq_file *sf)
{
unsigned long ra_kb;
if (!q->disk)
return -EINVAL;
ra_kb = q->disk->bdi->ra_pages << (PAGE_SHIFT - 10);
- return queue_var_show(ra_kb, page);
+ return queue_var_show(ra_kb, sf);
}
static ssize_t
@@ -111,75 +111,83 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
return ret;
}
-static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
+static int queue_max_sectors_show(struct request_queue *q, struct seq_file *sf)
{
int max_sectors_kb = queue_max_sectors(q) >> 1;
- return queue_var_show(max_sectors_kb, page);
+ return queue_var_show(max_sectors_kb, sf);
}
-static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
+static int queue_max_segments_show(struct request_queue *q, struct seq_file *sf)
{
- return queue_var_show(queue_max_segments(q), page);
+ return queue_var_show(queue_max_segments(q), sf);
}
-static ssize_t queue_max_discard_segments_show(struct request_queue *q,
- char *page)
+static int queue_max_discard_segments_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return queue_var_show(queue_max_discard_segments(q), page);
+ return queue_var_show(queue_max_discard_segments(q), sf);
}
-static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page)
+static int queue_max_integrity_segments_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return queue_var_show(q->limits.max_integrity_segments, page);
+ return queue_var_show(q->limits.max_integrity_segments, sf);
}
-static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
+static int queue_max_segment_size_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return queue_var_show(queue_max_segment_size(q), page);
+ return queue_var_show(queue_max_segment_size(q), sf);
}
-static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
+static int queue_logical_block_size_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return queue_var_show(queue_logical_block_size(q), page);
+ return queue_var_show(queue_logical_block_size(q), sf);
}
-static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page)
+static int queue_physical_block_size_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return queue_var_show(queue_physical_block_size(q), page);
+ return queue_var_show(queue_physical_block_size(q), sf);
}
-static ssize_t queue_chunk_sectors_show(struct request_queue *q, char *page)
+static int queue_chunk_sectors_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return queue_var_show(q->limits.chunk_sectors, page);
+ return queue_var_show(q->limits.chunk_sectors, sf);
}
-static ssize_t queue_io_min_show(struct request_queue *q, char *page)
+static int queue_io_min_show(struct request_queue *q, struct seq_file *sf)
{
- return queue_var_show(queue_io_min(q), page);
+ return queue_var_show(queue_io_min(q), sf);
}
-static ssize_t queue_io_opt_show(struct request_queue *q, char *page)
+static int queue_io_opt_show(struct request_queue *q, struct seq_file *sf)
{
- return queue_var_show(queue_io_opt(q), page);
+ return queue_var_show(queue_io_opt(q), sf);
}
-static ssize_t queue_discard_granularity_show(struct request_queue *q, char *page)
+static int queue_discard_granularity_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return queue_var_show(q->limits.discard_granularity, page);
+ return queue_var_show(q->limits.discard_granularity, sf);
}
-static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page)
+static int queue_discard_max_hw_show(struct request_queue *q,
+ struct seq_file *sf)
{
-
- return sprintf(page, "%llu\n",
+ seq_printf(sf, "%llu\n",
(unsigned long long)q->limits.max_hw_discard_sectors << 9);
+ return 0;
}
-static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
+static int queue_discard_max_show(struct request_queue *q, struct seq_file *sf)
{
- return sprintf(page, "%llu\n",
- (unsigned long long)q->limits.max_discard_sectors << 9);
+ seq_printf(sf, "%llu\n",
+ (unsigned long long)q->limits.max_discard_sectors << 9);
+ return 0;
}
static ssize_t queue_discard_max_store(struct request_queue *q,
@@ -205,34 +213,41 @@ static ssize_t queue_discard_max_store(struct request_queue *q,
return ret;
}
-static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
+static int queue_discard_zeroes_data_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return queue_var_show(0, page);
+ return queue_var_show(0, sf);
}
-static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
+static int queue_write_same_max_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return sprintf(page, "%llu\n",
+ seq_printf(sf, "%llu\n",
(unsigned long long)q->limits.max_write_same_sectors << 9);
+ return 0;
}
-static ssize_t queue_write_zeroes_max_show(struct request_queue *q, char *page)
+static int queue_write_zeroes_max_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return sprintf(page, "%llu\n",
+ seq_printf(sf, "%llu\n",
(unsigned long long)q->limits.max_write_zeroes_sectors << 9);
+ return 0;
}
-static ssize_t queue_zone_write_granularity_show(struct request_queue *q,
- char *page)
+static int queue_zone_write_granularity_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return queue_var_show(queue_zone_write_granularity(q), page);
+ return queue_var_show(queue_zone_write_granularity(q), sf);
}
-static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
+static int queue_zone_append_max_show(struct request_queue *q,
+ struct seq_file *sf)
{
unsigned long long max_sectors = q->limits.max_zone_append_sectors;
- return sprintf(page, "%llu\n", max_sectors << SECTOR_SHIFT);
+ seq_printf(sf, "%llu\n", max_sectors << SECTOR_SHIFT);
+ return 0;
}
static ssize_t
@@ -261,25 +276,27 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
return ret;
}
-static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
+static int queue_max_hw_sectors_show(struct request_queue *q,
+ struct seq_file *sf)
{
int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1;
- return queue_var_show(max_hw_sectors_kb, page);
+ return queue_var_show(max_hw_sectors_kb, sf);
}
-static ssize_t queue_virt_boundary_mask_show(struct request_queue *q, char *page)
+static int queue_virt_boundary_mask_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return queue_var_show(q->limits.virt_boundary_mask, page);
+ return queue_var_show(q->limits.virt_boundary_mask, sf);
}
#define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \
-static ssize_t \
-queue_##name##_show(struct request_queue *q, char *page) \
+static int \
+queue_##name##_show(struct request_queue *q, struct seq_file *sf) \
{ \
int bit; \
bit = test_bit(QUEUE_FLAG_##flag, &q->queue_flags); \
- return queue_var_show(neg ? !bit : bit, page); \
+ return queue_var_show(neg ? !bit : bit, sf); \
} \
static ssize_t \
queue_##name##_store(struct request_queue *q, const char *page, size_t count) \
@@ -305,37 +322,43 @@ QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0);
#undef QUEUE_SYSFS_BIT_FNS
-static ssize_t queue_zoned_show(struct request_queue *q, char *page)
+static int queue_zoned_show(struct request_queue *q,
+ struct seq_file *sf)
{
switch (blk_queue_zoned_model(q)) {
case BLK_ZONED_HA:
- return sprintf(page, "host-aware\n");
+ seq_printf(sf, "host-aware\n");
+ return 0;
case BLK_ZONED_HM:
- return sprintf(page, "host-managed\n");
+ seq_printf(sf, "host-managed\n");
+ return 0;
default:
- return sprintf(page, "none\n");
+ seq_printf(sf, "none\n");
+ return 0;
}
}
-static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
+static int queue_nr_zones_show(struct request_queue *q, struct seq_file *sf)
{
- return queue_var_show(blk_queue_nr_zones(q), page);
+ return queue_var_show(blk_queue_nr_zones(q), sf);
}
-static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
+static int queue_max_open_zones_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return queue_var_show(queue_max_open_zones(q), page);
+ return queue_var_show(queue_max_open_zones(q), sf);
}
-static ssize_t queue_max_active_zones_show(struct request_queue *q, char *page)
+static int queue_max_active_zones_show(struct request_queue *q,
+ struct seq_file *sf)
{
- return queue_var_show(queue_max_active_zones(q), page);
+ return queue_var_show(queue_max_active_zones(q), sf);
}
-static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
+static int queue_nomerges_show(struct request_queue *q, struct seq_file *sf)
{
return queue_var_show((blk_queue_nomerges(q) << 1) |
- blk_queue_noxmerges(q), page);
+ blk_queue_noxmerges(q), sf);
}
static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
@@ -357,12 +380,12 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
return ret;
}
-static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
+static int queue_rq_affinity_show(struct request_queue *q, struct seq_file *sf)
{
bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags);
- return queue_var_show(set << force, page);
+ return queue_var_show(set << force, sf);
}
static ssize_t
@@ -390,7 +413,7 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
return ret;
}
-static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
+static int queue_poll_delay_show(struct request_queue *q, struct seq_file *sf)
{
int val;
@@ -399,7 +422,8 @@ static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
else
val = q->poll_nsec / 1000;
- return sprintf(page, "%d\n", val);
+ seq_printf(sf, "%d\n", val);
+ return 0;
}
static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
@@ -424,9 +448,9 @@ static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
return count;
}
-static ssize_t queue_poll_show(struct request_queue *q, char *page)
+static int queue_poll_show(struct request_queue *q, struct seq_file *sf)
{
- return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page);
+ return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), sf);
}
static ssize_t queue_poll_store(struct request_queue *q, const char *page,
@@ -454,9 +478,10 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
return ret;
}
-static ssize_t queue_io_timeout_show(struct request_queue *q, char *page)
+static int queue_io_timeout_show(struct request_queue *q, struct seq_file *sf)
{
- return sprintf(page, "%u\n", jiffies_to_msecs(q->rq_timeout));
+ seq_printf(sf, "%u\n", jiffies_to_msecs(q->rq_timeout));
+ return 0;
}
static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
@@ -474,12 +499,12 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
return count;
}
-static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
+static int queue_wb_lat_show(struct request_queue *q, struct seq_file *sf)
{
if (!wbt_rq_qos(q))
return -EINVAL;
-
- return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
+ seq_printf(sf, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
+ return 0;
}
static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
@@ -526,12 +551,13 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
return count;
}
-static ssize_t queue_wc_show(struct request_queue *q, char *page)
+static int queue_wc_show(struct request_queue *q, struct seq_file *sf)
{
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
- return sprintf(page, "write back\n");
-
- return sprintf(page, "write through\n");
+ seq_printf(sf, "write back\n");
+ else
+ seq_printf(sf, "write through\n");
+ return 0;
}
static ssize_t queue_wc_store(struct request_queue *q, const char *page,
@@ -556,14 +582,15 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
return count;
}
-static ssize_t queue_fua_show(struct request_queue *q, char *page)
+static int queue_fua_show(struct request_queue *q, struct seq_file *sf)
{
- return sprintf(page, "%u\n", test_bit(QUEUE_FLAG_FUA, &q->queue_flags));
+ seq_printf(sf, "%u\n", test_bit(QUEUE_FLAG_FUA, &q->queue_flags));
+ return 0;
}
-static ssize_t queue_dax_show(struct request_queue *q, char *page)
+static int queue_dax_show(struct request_queue *q, struct seq_file *sf)
{
- return queue_var_show(blk_queue_dax(q), page);
+ return queue_var_show(blk_queue_dax(q), sf);
}
#define QUEUE_RO_ENTRY(_prefix, _name) \
@@ -710,8 +737,8 @@ static struct attribute_group queue_attr_group = {
#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
-static ssize_t
-queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+static int queue_attr_seq_show(struct kobject *kobj, struct attribute *attr,
+ struct seq_file *sf)
{
struct queue_sysfs_entry *entry = to_queue(attr);
struct request_queue *q =
@@ -721,7 +748,7 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
if (!entry->show)
return -EIO;
mutex_lock(&q->sysfs_lock);
- res = entry->show(q, page);
+ res = entry->show(q, sf);
mutex_unlock(&q->sysfs_lock);
return res;
}
@@ -837,8 +864,8 @@ static void blk_release_queue(struct kobject *kobj)
}
static const struct sysfs_ops queue_sysfs_ops = {
- .show = queue_attr_show,
- .store = queue_attr_store,
+ .seq_show = queue_attr_seq_show,
+ .store = queue_attr_store,
};
struct kobj_type blk_queue_ktype = {
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 7c4e7993ba970..579d31f0269bf 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2496,11 +2496,12 @@ void blk_throtl_register_queue(struct request_queue *q)
}
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page)
+int blk_throtl_sample_time_show(struct request_queue *q, struct seq_file *sf)
{
if (!q->td)
return -EINVAL;
- return sprintf(page, "%u\n", jiffies_to_msecs(q->td->throtl_slice));
+ seq_printf(sf, "%u\n", jiffies_to_msecs(q->td->throtl_slice));
+ return 0;
}
ssize_t blk_throtl_sample_time_store(struct request_queue *q,
diff --git a/block/blk.h b/block/blk.h
index 7d2a0ba7ed21d..69b9b6b3b2169 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -300,7 +300,7 @@ static inline void blk_throtl_charge_bio_split(struct bio *bio) { }
static inline bool blk_throtl_bio(struct bio *bio) { return false; }
#endif /* CONFIG_BLK_DEV_THROTTLING */
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
+int blk_throtl_sample_time_show(struct request_queue *q, struct seq_file *sf);
extern ssize_t blk_throtl_sample_time_store(struct request_queue *q,
const char *page, size_t count);
extern void blk_throtl_bio_endio(struct bio *bio);
diff --git a/block/elevator.c b/block/elevator.c
index ff45d8388f487..f068585f2f9b8 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -782,39 +782,40 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
return ret;
}
-ssize_t elv_iosched_show(struct request_queue *q, char *name)
+int elv_iosched_show(struct request_queue *q, struct seq_file *sf)
{
struct elevator_queue *e = q->elevator;
struct elevator_type *elv = NULL;
struct elevator_type *__e;
- int len = 0;
- if (!queue_is_mq(q))
- return sprintf(name, "none\n");
+ if (!queue_is_mq(q)) {
+ seq_printf(sf, "none\n");
+ return 0;
+ }
if (!q->elevator)
- len += sprintf(name+len, "[none] ");
+ seq_printf(sf, "[none] ");
else
elv = e->type;
spin_lock(&elv_list_lock);
list_for_each_entry(__e, &elv_list, list) {
if (elv && elevator_match(elv, __e->elevator_name, 0)) {
- len += sprintf(name+len, "[%s] ", elv->elevator_name);
+ seq_printf(sf, "[%s] ", elv->elevator_name);
continue;
}
if (elv_support_iosched(q) &&
elevator_match(__e, __e->elevator_name,
q->required_elevator_features))
- len += sprintf(name+len, "%s ", __e->elevator_name);
+ seq_printf(sf, "%s ", __e->elevator_name);
}
spin_unlock(&elv_list_lock);
if (q->elevator)
- len += sprintf(name+len, "none");
+ seq_printf(sf, "none");
- len += sprintf(len+name, "\n");
- return len;
+ seq_printf(sf, "\n");
+ return 0;
}
struct request *elv_rb_former_request(struct request_queue *q,
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index ef9ceead3db13..deecf7f9ff21a 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -132,7 +132,7 @@ extern void elv_unregister(struct elevator_type *);
/*
* io scheduler sysfs switching
*/
-extern ssize_t elv_iosched_show(struct request_queue *, char *);
+int elv_iosched_show(struct request_queue *q, struct seq_file *sf);
extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
extern bool elv_bio_merge_ok(struct request *, struct bio *);
--
2.30.2
Trivial conversion to the seq_file based sysfs attributes.
Signed-off-by: Christoph Hellwig <[email protected]>
---
block/bfq-iosched.c | 12 ++++++------
block/elevator.c | 21 ++++++++++++---------
block/kyber-iosched.c | 7 ++++---
block/mq-deadline.c | 5 +++--
include/linux/elevator.h | 2 +-
5 files changed, 26 insertions(+), 21 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index dd13c2bbc29c1..a72b4f90f3ba2 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7092,9 +7092,9 @@ static int __init bfq_slab_setup(void)
return 0;
}
-static ssize_t bfq_var_show(unsigned int var, char *page)
+static void bfq_var_show(unsigned int var, struct seq_file *sf)
{
- return sprintf(page, "%u\n", var);
+ seq_printf(sf, "%u\n", var);
}
static int bfq_var_store(unsigned long *var, const char *page)
@@ -7109,7 +7109,7 @@ static int bfq_var_store(unsigned long *var, const char *page)
}
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
-static ssize_t __FUNC(struct elevator_queue *e, char *page) \
+static void __FUNC(struct elevator_queue *e, struct seq_file *sf) \
{ \
struct bfq_data *bfqd = e->elevator_data; \
u64 __data = __VAR; \
@@ -7117,7 +7117,7 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \
__data = jiffies_to_msecs(__data); \
else if (__CONV == 2) \
__data = div_u64(__data, NSEC_PER_MSEC); \
- return bfq_var_show(__data, (page)); \
+ bfq_var_show(__data, (sf)); \
}
SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 2);
SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 2);
@@ -7131,12 +7131,12 @@ SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
#undef SHOW_FUNCTION
#define USEC_SHOW_FUNCTION(__FUNC, __VAR) \
-static ssize_t __FUNC(struct elevator_queue *e, char *page) \
+static void __FUNC(struct elevator_queue *e, struct seq_file *sf) \
{ \
struct bfq_data *bfqd = e->elevator_data; \
u64 __data = __VAR; \
__data = div_u64(__data, NSEC_PER_USEC); \
- return bfq_var_show(__data, (page)); \
+ bfq_var_show(__data, (sf)); \
}
USEC_SHOW_FUNCTION(bfq_slice_idle_us_show, bfqd->bfq_slice_idle);
#undef USEC_SHOW_FUNCTION
diff --git a/block/elevator.c b/block/elevator.c
index f068585f2f9b8..951bae559e5cb 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -442,21 +442,24 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
-static ssize_t
-elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+static int elv_attr_seq_show(struct kobject *kobj, struct attribute *attr,
+ struct seq_file *sf)
{
struct elv_fs_entry *entry = to_elv(attr);
- struct elevator_queue *e;
- ssize_t error;
+ struct elevator_queue *e =
+ container_of(kobj, struct elevator_queue, kobj);
if (!entry->show)
return -EIO;
- e = container_of(kobj, struct elevator_queue, kobj);
mutex_lock(&e->sysfs_lock);
- error = e->type ? entry->show(e, page) : -ENOENT;
+ if (!e->type) {
+ mutex_unlock(&e->sysfs_lock);
+ return -ENOENT;
+ }
+ entry->show(e, sf);
mutex_unlock(&e->sysfs_lock);
- return error;
+ return 0;
}
static ssize_t
@@ -478,8 +481,8 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr,
}
static const struct sysfs_ops elv_sysfs_ops = {
- .show = elv_attr_show,
- .store = elv_attr_store,
+ .seq_show = elv_attr_seq_show,
+ .store = elv_attr_store,
};
static struct kobj_type elv_ktype = {
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 15a8be57203d6..633f9654b99b9 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -12,6 +12,7 @@
#include <linux/elevator.h>
#include <linux/module.h>
#include <linux/sbitmap.h>
+#include <linux/seq_file.h>
#include <trace/events/block.h>
@@ -857,12 +858,12 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
}
#define KYBER_LAT_SHOW_STORE(domain, name) \
-static ssize_t kyber_##name##_lat_show(struct elevator_queue *e, \
- char *page) \
+static void kyber_##name##_lat_show(struct elevator_queue *e, \
+ struct seq_file *sf) \
{ \
struct kyber_queue_data *kqd = e->elevator_data; \
\
- return sprintf(page, "%llu\n", kqd->latency_targets[domain]); \
+ seq_printf(sf, "%llu\n", kqd->latency_targets[domain]); \
} \
\
static ssize_t kyber_##name##_lat_store(struct elevator_queue *e, \
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 7f3c3932b723e..0e1fc6d3e5d64 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -17,6 +17,7 @@
#include <linux/compiler.h>
#include <linux/rbtree.h>
#include <linux/sbitmap.h>
+#include <linux/seq_file.h>
#include <trace/events/block.h>
@@ -800,11 +801,11 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
* sysfs parts below
*/
#define SHOW_INT(__FUNC, __VAR) \
-static ssize_t __FUNC(struct elevator_queue *e, char *page) \
+static void __FUNC(struct elevator_queue *e, struct seq_file *sf) \
{ \
struct deadline_data *dd = e->elevator_data; \
\
- return sysfs_emit(page, "%d\n", __VAR); \
+ seq_printf(sf, "%d\n", __VAR); \
}
#define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index deecf7f9ff21a..ad5e4cb653a30 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -56,7 +56,7 @@ struct elevator_mq_ops {
struct elv_fs_entry {
struct attribute attr;
- ssize_t (*show)(struct elevator_queue *, char *);
+ void (*show)(struct elevator_queue *eq, struct seq_file *sf);
ssize_t (*store)(struct elevator_queue *, const char *, size_t);
};
--
2.30.2
Trivial conversion to the seq_file based sysfs attributes.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/xfs/xfs_error.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 81c445e9489bd..143a1e0b12ffe 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -104,22 +104,22 @@ xfs_errortag_attr_store(
return count;
}
-STATIC ssize_t
-xfs_errortag_attr_show(
+STATIC int
+xfs_errortag_attr_seq_show(
struct kobject *kobject,
struct attribute *attr,
- char *buf)
+ struct seq_file *sf)
{
struct xfs_mount *mp = to_mp(kobject);
struct xfs_errortag_attr *xfs_attr = to_attr(attr);
- return snprintf(buf, PAGE_SIZE, "%u\n",
- xfs_errortag_get(mp, xfs_attr->tag));
+ seq_printf(sf, "%u\n", xfs_errortag_get(mp, xfs_attr->tag));
+ return 0;
}
static const struct sysfs_ops xfs_errortag_sysfs_ops = {
- .show = xfs_errortag_attr_show,
- .store = xfs_errortag_attr_store,
+ .seq_show = xfs_errortag_attr_seq_show,
+ .store = xfs_errortag_attr_store,
};
#define XFS_ERRORTAG_ATTR_RW(_name, _tag) \
--
2.30.2
Trivial conversion to the seq_file based sysfs attributes.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/xfs/xfs_stats.c | 24 +++++-------
fs/xfs/xfs_stats.h | 2 +-
fs/xfs/xfs_sysfs.c | 96 +++++++++++++++++++++++-----------------------
3 files changed, 58 insertions(+), 64 deletions(-)
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 20e0534a772c9..71e7a84ba0403 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -16,10 +16,9 @@ static int counter_val(struct xfsstats __percpu *stats, int idx)
return val;
}
-int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
+void xfs_stats_format(struct xfsstats __percpu *stats, struct seq_file *sf)
{
int i, j;
- int len = 0;
uint64_t xs_xstrat_bytes = 0;
uint64_t xs_write_bytes = 0;
uint64_t xs_read_bytes = 0;
@@ -58,13 +57,12 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
/* Loop over all stats groups */
for (i = j = 0; i < ARRAY_SIZE(xstats); i++) {
- len += scnprintf(buf + len, PATH_MAX - len, "%s",
- xstats[i].desc);
+ seq_printf(sf, "%s", xstats[i].desc);
+
/* inner loop does each group */
for (; j < xstats[i].endpoint; j++)
- len += scnprintf(buf + len, PATH_MAX - len, " %u",
- counter_val(stats, j));
- len += scnprintf(buf + len, PATH_MAX - len, "\n");
+ seq_printf(sf, " %u", counter_val(stats, j));
+ seq_printf(sf, "\n");
}
/* extra precision counters */
for_each_possible_cpu(i) {
@@ -74,18 +72,14 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
defer_relog += per_cpu_ptr(stats, i)->s.defer_relog;
}
- len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
+ seq_printf(sf, "xpc %Lu %Lu %Lu\n",
xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
- len += scnprintf(buf + len, PATH_MAX-len, "defer_relog %llu\n",
- defer_relog);
- len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n",
+ seq_printf(sf, "defer_relog %llu\n", defer_relog);
#if defined(DEBUG)
- 1);
+ seq_printf(sf, "debug 1\n");
#else
- 0);
+ seq_printf(sf, "debug 0\n");
#endif
-
- return len;
}
void xfs_stats_clearall(struct xfsstats __percpu *stats)
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 43ffba74f045e..6bf80565893cf 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -156,7 +156,7 @@ struct xfsstats {
(offsetof(struct __xfsstats, member) / (int)sizeof(uint32_t))
-int xfs_stats_format(struct xfsstats __percpu *stats, char *buf);
+void xfs_stats_format(struct xfsstats __percpu *stats, struct seq_file *sf);
void xfs_stats_clearall(struct xfsstats __percpu *stats);
extern struct xstats xfsstats;
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 18dc5eca6c045..efa2aa07f1865 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -16,7 +16,7 @@
struct xfs_sysfs_attr {
struct attribute attr;
- ssize_t (*show)(struct kobject *kobject, char *buf);
+ void (*show)(struct kobject *kobject, struct seq_file *sf);
ssize_t (*store)(struct kobject *kobject, const char *buf,
size_t count);
};
@@ -36,15 +36,17 @@ to_attr(struct attribute *attr)
#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
-STATIC ssize_t
-xfs_sysfs_object_show(
+STATIC int
+xfs_sysfs_object_seq_show(
struct kobject *kobject,
struct attribute *attr,
- char *buf)
+ struct seq_file *sf)
{
struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
- return xfs_attr->show ? xfs_attr->show(kobject, buf) : 0;
+ if (xfs_attr->show)
+ xfs_attr->show(kobject, sf);
+ return 0;
}
STATIC ssize_t
@@ -60,8 +62,8 @@ xfs_sysfs_object_store(
}
static const struct sysfs_ops xfs_sysfs_ops = {
- .show = xfs_sysfs_object_show,
- .store = xfs_sysfs_object_store,
+ .seq_show = xfs_sysfs_object_seq_show,
+ .store = xfs_sysfs_object_store,
};
static struct attribute *xfs_mp_attrs[] = {
@@ -100,12 +102,12 @@ bug_on_assert_store(
return count;
}
-STATIC ssize_t
+STATIC void
bug_on_assert_show(
struct kobject *kobject,
- char *buf)
+ struct seq_file *sf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bug_on_assert ? 1 : 0);
+ seq_printf(sf, "%d\n", xfs_globals.bug_on_assert ? 1 : 0);
}
XFS_SYSFS_ATTR_RW(bug_on_assert);
@@ -130,12 +132,12 @@ log_recovery_delay_store(
return count;
}
-STATIC ssize_t
+STATIC void
log_recovery_delay_show(
struct kobject *kobject,
- char *buf)
+ struct seq_file *sf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay);
+ seq_printf(sf, "%d\n", xfs_globals.log_recovery_delay);
}
XFS_SYSFS_ATTR_RW(log_recovery_delay);
@@ -160,12 +162,12 @@ mount_delay_store(
return count;
}
-STATIC ssize_t
+STATIC void
mount_delay_show(
struct kobject *kobject,
- char *buf)
+ struct seq_file *sf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.mount_delay);
+ seq_printf(sf, "%d\n", xfs_globals.mount_delay);
}
XFS_SYSFS_ATTR_RW(mount_delay);
@@ -183,12 +185,12 @@ always_cow_store(
return count;
}
-static ssize_t
+static void
always_cow_show(
struct kobject *kobject,
- char *buf)
+ struct seq_file *sf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.always_cow);
+ seq_printf(sf, "%d\n", xfs_globals.always_cow);
}
XFS_SYSFS_ATTR_RW(always_cow);
@@ -219,12 +221,12 @@ pwork_threads_store(
return count;
}
-STATIC ssize_t
+STATIC void
pwork_threads_show(
struct kobject *kobject,
- char *buf)
+ struct seq_file *sf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.pwork_threads);
+ seq_printf(sf, "%d\n", xfs_globals.pwork_threads);
}
XFS_SYSFS_ATTR_RW(pwork_threads);
#endif /* DEBUG */
@@ -258,14 +260,12 @@ to_xstats(struct kobject *kobject)
return container_of(kobj, struct xstats, xs_kobj);
}
-STATIC ssize_t
+STATIC void
stats_show(
struct kobject *kobject,
- char *buf)
+ struct seq_file *sf)
{
- struct xstats *stats = to_xstats(kobject);
-
- return xfs_stats_format(stats->xs_stats, buf);
+ return xfs_stats_format(to_xstats(kobject)->xs_stats, sf);
}
XFS_SYSFS_ATTR_RO(stats);
@@ -313,10 +313,10 @@ to_xlog(struct kobject *kobject)
return container_of(kobj, struct xlog, l_kobj);
}
-STATIC ssize_t
+STATIC void
log_head_lsn_show(
struct kobject *kobject,
- char *buf)
+ struct seq_file *sf)
{
int cycle;
int block;
@@ -327,28 +327,28 @@ log_head_lsn_show(
block = log->l_curr_block;
spin_unlock(&log->l_icloglock);
- return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block);
+ seq_printf(sf, "%d:%d\n", cycle, block);
}
XFS_SYSFS_ATTR_RO(log_head_lsn);
-STATIC ssize_t
+STATIC void
log_tail_lsn_show(
struct kobject *kobject,
- char *buf)
+ struct seq_file *sf)
{
int cycle;
int block;
struct xlog *log = to_xlog(kobject);
xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block);
- return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block);
+ seq_printf(sf, "%d:%d\n", cycle, block);
}
XFS_SYSFS_ATTR_RO(log_tail_lsn);
-STATIC ssize_t
+STATIC void
reserve_grant_head_show(
struct kobject *kobject,
- char *buf)
+ struct seq_file *sf)
{
int cycle;
@@ -356,21 +356,21 @@ reserve_grant_head_show(
struct xlog *log = to_xlog(kobject);
xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes);
- return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
+ seq_printf(sf, "%d:%d\n", cycle, bytes);
}
XFS_SYSFS_ATTR_RO(reserve_grant_head);
-STATIC ssize_t
+STATIC void
write_grant_head_show(
struct kobject *kobject,
- char *buf)
+ struct seq_file *sf)
{
int cycle;
int bytes;
struct xlog *log = to_xlog(kobject);
xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes);
- return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
+ seq_printf(sf, "%d:%d\n", cycle, bytes);
}
XFS_SYSFS_ATTR_RO(write_grant_head);
@@ -412,10 +412,10 @@ err_to_mp(struct kobject *kobject)
return container_of(kobj, struct xfs_mount, m_error_kobj);
}
-static ssize_t
+static void
max_retries_show(
struct kobject *kobject,
- char *buf)
+ struct seq_file *sf)
{
int retries;
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
@@ -425,7 +425,7 @@ max_retries_show(
else
retries = cfg->max_retries;
- return snprintf(buf, PAGE_SIZE, "%d\n", retries);
+ seq_printf(sf, "%d\n", retries);
}
static ssize_t
@@ -453,10 +453,10 @@ max_retries_store(
}
XFS_SYSFS_ATTR_RW(max_retries);
-static ssize_t
+static void
retry_timeout_seconds_show(
struct kobject *kobject,
- char *buf)
+ struct seq_file *sf)
{
int timeout;
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
@@ -466,7 +466,7 @@ retry_timeout_seconds_show(
else
timeout = jiffies_to_msecs(cfg->retry_timeout) / MSEC_PER_SEC;
- return snprintf(buf, PAGE_SIZE, "%d\n", timeout);
+ seq_printf(sf, "%d\n", timeout);
}
static ssize_t
@@ -497,14 +497,14 @@ retry_timeout_seconds_store(
}
XFS_SYSFS_ATTR_RW(retry_timeout_seconds);
-static ssize_t
+static void
fail_at_unmount_show(
struct kobject *kobject,
- char *buf)
+ struct seq_file *sf)
{
struct xfs_mount *mp = err_to_mp(kobject);
- return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_unmount);
+ seq_printf(sf, "%d\n", mp->m_fail_unmount);
}
static ssize_t
--
2.30.2
On Mon, Sep 13, 2021 at 07:41:21AM +0200, Christoph Hellwig wrote:
> Trivial conversion to the seq_file based sysfs attributes.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> fs/xfs/xfs_stats.c | 24 +++++-------
> fs/xfs/xfs_stats.h | 2 +-
> fs/xfs/xfs_sysfs.c | 96 +++++++++++++++++++++++-----------------------
> 3 files changed, 58 insertions(+), 64 deletions(-)
>
> diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
> index 20e0534a772c9..71e7a84ba0403 100644
> --- a/fs/xfs/xfs_stats.c
> +++ b/fs/xfs/xfs_stats.c
> @@ -16,10 +16,9 @@ static int counter_val(struct xfsstats __percpu *stats, int idx)
> return val;
> }
>
> -int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
> +void xfs_stats_format(struct xfsstats __percpu *stats, struct seq_file *sf)
> {
> int i, j;
> - int len = 0;
> uint64_t xs_xstrat_bytes = 0;
> uint64_t xs_write_bytes = 0;
> uint64_t xs_read_bytes = 0;
> @@ -58,13 +57,12 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
> /* Loop over all stats groups */
>
> for (i = j = 0; i < ARRAY_SIZE(xstats); i++) {
> - len += scnprintf(buf + len, PATH_MAX - len, "%s",
> - xstats[i].desc);
> + seq_printf(sf, "%s", xstats[i].desc);
> +
> /* inner loop does each group */
> for (; j < xstats[i].endpoint; j++)
> - len += scnprintf(buf + len, PATH_MAX - len, " %u",
> - counter_val(stats, j));
> - len += scnprintf(buf + len, PATH_MAX - len, "\n");
> + seq_printf(sf, " %u", counter_val(stats, j));
> + seq_printf(sf, "\n");
> }
> /* extra precision counters */
> for_each_possible_cpu(i) {
> @@ -74,18 +72,14 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
> defer_relog += per_cpu_ptr(stats, i)->s.defer_relog;
> }
>
> - len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
> + seq_printf(sf, "xpc %Lu %Lu %Lu\n",
> xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
> - len += scnprintf(buf + len, PATH_MAX-len, "defer_relog %llu\n",
> - defer_relog);
> - len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n",
> + seq_printf(sf, "defer_relog %llu\n", defer_relog);
> #if defined(DEBUG)
> - 1);
> + seq_printf(sf, "debug 1\n");
> #else
> - 0);
> + seq_printf(sf, "debug 0\n");
> #endif
> -
> - return len;
> }
That is a sysfs file? What happened to the "one value per file" rule
here?
Ugh.
Anyway, I like the idea, but as you can see here, it could lead to even
more abuse of sysfs files. We are just now getting people to use
sysfs_emit() and that is showing us where people have been abusing the
api in bad ways.
Is there any way that sysfs can keep the existing show functionality and
just do the seq_printf() for the buffer returned by the attribute file
inside of the sysfs core? That would allow us to keep all of the
existing attribute file functions as-is, and still get rid of the sysfs
core usage here?
thanks,
greg k-h
On Mon, Sep 13, 2021 at 07:41:12AM +0200, Christoph Hellwig wrote:
> Split adding binary attributes into a separate handler instead of
> overloading sysfs_add_file_mode_ns.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
Acked-by: Christian Brauner <[email protected]>
On Mon, Sep 13, 2021 at 07:41:11AM +0200, Christoph Hellwig wrote:
> No actually used anywhere.
^
typo
Thanks.
--
tejun
On 9/12/21 10:41 PM, Christoph Hellwig wrote:
> Al pointed out multiple times that seq_get_buf is highly dangerous as
> it opens up the tight seq_file abstractions to buffer overflows. The
> last such caller now is sysfs.
>
> This series allows attributes to implement a seq_show method and switch
> the block and XFS code as users that I'm most familiar with to use
> seq_files directly after a few preparatory cleanups. With this series
> "leaf" users of sysfs_ops can be converted one at at a time, after that
> we can move the seq_get_buf into the multiplexers (e.g. kobj, device,
> class attributes) and remove the show method in sysfs_ops and repeat the
> process until all attributes are converted. This will probably take a
> fair amount of time.
Hi Christoph,
Thanks for having done this work. In case you would need it, some time ago
I posted the following sysfs patch but did not receive any feedback:
"[PATCH] kernfs: Improve lockdep annotation for files which implement mmap"
(https://lore.kernel.org/linux-kernel/[email protected]/).
Bart.
On Mon, Sep 13, 2021 at 07:41:08AM +0200, Christoph Hellwig wrote:
> Hi all,
>
> Al pointed out multiple times that seq_get_buf is highly dangerous as
> it opens up the tight seq_file abstractions to buffer overflows. The
> last such caller now is sysfs.
>
> This series allows attributes to implement a seq_show method and switch
> the block and XFS code as users that I'm most familiar with to use
> seq_files directly after a few preparatory cleanups. With this series
> "leaf" users of sysfs_ops can be converted one at at a time, after that
> we can move the seq_get_buf into the multiplexers (e.g. kobj, device,
> class attributes) and remove the show method in sysfs_ops and repeat the
> process until all attributes are converted. This will probably take a
> fair amount of time.
The whole series looks good to me. With Greg's sysfs_emit argument aside on
which I don't have any opinion,
Acked-by: Tejun Heo <[email protected]>
Thanks.
--
tejun
On Mon, Sep 13, 2021 at 09:39:56AM -0700, Bart Van Assche wrote:
> On 9/12/21 10:41 PM, Christoph Hellwig wrote:
> > Al pointed out multiple times that seq_get_buf is highly dangerous as
> > it opens up the tight seq_file abstractions to buffer overflows. The
> > last such caller now is sysfs.
> >
> > This series allows attributes to implement a seq_show method and switch
> > the block and XFS code as users that I'm most familiar with to use
> > seq_files directly after a few preparatory cleanups. With this series
> > "leaf" users of sysfs_ops can be converted one at at a time, after that
> > we can move the seq_get_buf into the multiplexers (e.g. kobj, device,
> > class attributes) and remove the show method in sysfs_ops and repeat the
> > process until all attributes are converted. This will probably take a
> > fair amount of time.
>
> Hi Christoph,
>
> Thanks for having done this work. In case you would need it, some time ago
> I posted the following sysfs patch but did not receive any feedback:
> "[PATCH] kernfs: Improve lockdep annotation for files which implement mmap"
> (https://lore.kernel.org/linux-kernel/[email protected]/).
>
That was from back in 2019, sorry I must have missed it.
Care to rebase and resend it if it is still needed?
thanks,
greg k-h
On Mon, Sep 13, 2021 at 07:41:09AM +0200, Christoph Hellwig wrote:
> This function pokes a big hole into the seq_file abstraction.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
Good idea.
Acked-by: Christian Brauner <[email protected]>
On Mon, Sep 13, 2021 at 07:41:10AM +0200, Christoph Hellwig wrote:
> All callers actually use __kernfs_create_file.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
Acked-by: Christian Brauner <[email protected]>
> include/linux/kernfs.h | 24 ------------------------
> 1 file changed, 24 deletions(-)
>
> diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
> index 1093abf7c28cc..cecfeedb7361d 100644
> --- a/include/linux/kernfs.h
> +++ b/include/linux/kernfs.h
> @@ -568,30 +568,6 @@ kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
> priv, NULL);
> }
>
> -static inline struct kernfs_node *
> -kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
> - umode_t mode, kuid_t uid, kgid_t gid,
> - loff_t size, const struct kernfs_ops *ops,
> - void *priv, const void *ns)
> -{
> - struct lock_class_key *key = NULL;
> -
> -#ifdef CONFIG_DEBUG_LOCK_ALLOC
> - key = (struct lock_class_key *)&ops->lockdep_key;
> -#endif
> - return __kernfs_create_file(parent, name, mode, uid, gid,
> - size, ops, priv, ns, key);
> -}
> -
> -static inline struct kernfs_node *
> -kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode,
> - loff_t size, const struct kernfs_ops *ops, void *priv)
> -{
> - return kernfs_create_file_ns(parent, name, mode,
> - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
> - size, ops, priv, NULL);
> -}
> -
> static inline int kernfs_remove_by_name(struct kernfs_node *parent,
> const char *name)
> {
> --
> 2.30.2
>
On Mon, Sep 13, 2021 at 07:41:11AM +0200, Christoph Hellwig wrote:
> No actually used anywhere.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
Acked-by: Christian Brauner <[email protected]>
On Mon, Sep 13, 2021 at 07:41:13AM +0200, Christoph Hellwig wrote:
> Regroup the code so that preallocated attributes and normal attributes are
> handled in clearly separate blocks.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
Acked-by: Christian Brauner <[email protected]>
On Mon, Sep 13, 2021 at 07:41:15AM +0200, Christoph Hellwig wrote:
> Allow attributes to directly use the seq_file method instead of
> carving out a buffer that can easily lead to buffer overflows.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
Acked-by: Christian Brauner <[email protected]>
> fs/sysfs/file.c | 19 ++++++++++++++-----
> include/linux/sysfs.h | 9 +++++++--
> 2 files changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
> index 42dcf96881b68..12e0bfe40a2b4 100644
> --- a/fs/sysfs/file.c
> +++ b/fs/sysfs/file.c
> @@ -45,6 +45,9 @@ static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
> ssize_t count;
> char *buf;
>
> + if (ops->seq_show)
> + return ops->seq_show(kobj, of->kn->priv, sf);
> +
> if (WARN_ON_ONCE(!ops->show))
> return -EINVAL;
>
> @@ -268,6 +271,10 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
> return -EINVAL;
>
> if (mode & SYSFS_PREALLOC) {
> + if (WARN(sysfs_ops->seq_show, KERN_ERR
> + "seq_show not supported on prealloc file: %s\n",
> + kobject_name(kobj)))
> + return -EINVAL;
> if (sysfs_ops->show && sysfs_ops->store)
> ops = &sysfs_prealloc_kfops_rw;
> else if (sysfs_ops->show)
> @@ -275,12 +282,14 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
> else if (sysfs_ops->store)
> ops = &sysfs_prealloc_kfops_wo;
> } else {
> - if (sysfs_ops->show && sysfs_ops->store)
> - ops = &sysfs_file_kfops_rw;
> - else if (sysfs_ops->show)
> - ops = &sysfs_file_kfops_ro;
> - else if (sysfs_ops->store)
> + if (sysfs_ops->seq_show || sysfs_ops->show) {
> + if (sysfs_ops->store)
> + ops = &sysfs_file_kfops_rw;
> + else
> + ops = &sysfs_file_kfops_ro;
> + } else if (sysfs_ops->store) {
> ops = &sysfs_file_kfops_wo;
> + }
> }
>
> if (!ops)
> diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
> index e3f1e8ac1f85b..e1ab4da716730 100644
> --- a/include/linux/sysfs.h
> +++ b/include/linux/sysfs.h
> @@ -236,8 +236,13 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_WO(_name, _size)
> struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size)
>
> struct sysfs_ops {
> - ssize_t (*show)(struct kobject *, struct attribute *, char *);
> - ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
> + int (*seq_show)(struct kobject *kobj, struct attribute *attr,
> + struct seq_file *sf);
> + ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
> + const char *buf, size_t size);
> +
> + /* deprecated except for preallocated attributes: */
> + ssize_t (*show)(struct kobject *kob, struct attribute *attr, char *buf);
> };
>
> #ifdef CONFIG_SYSFS
> --
> 2.30.2
>
> + *
> + * DOT NOT USE IN NEW CODE! This function pokes a hole into the whole seq_file
> + * abstraction. The only remaining user outside of seq_file.c is sysfs, which
> + * is gradually moving away from using seq_get_buf directly.
> */
Maybe adding a check to checkpatch could also help?
On Mon, Sep 13, 2021 at 07:41:09AM +0200, Christoph Hellwig wrote:
> This function pokes a big hole into the seq_file abstraction.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> include/linux/seq_file.h | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
> index dd99569595fd3..db16b11477875 100644
> --- a/include/linux/seq_file.h
> +++ b/include/linux/seq_file.h
> @@ -59,6 +59,10 @@ static inline bool seq_has_overflowed(struct seq_file *m)
> *
> * Return the number of bytes available in the buffer, or zero if
> * there's no space.
> + *
> + * DOT NOT USE IN NEW CODE! This function pokes a hole into the whole seq_file
^
typo
Thanks.
--
tejun
On Mon, Sep 13, 2021 at 08:27:50AM +0200, Greg Kroah-Hartman wrote:
> On Mon, Sep 13, 2021 at 07:41:21AM +0200, Christoph Hellwig wrote:
> > Trivial conversion to the seq_file based sysfs attributes.
> >
> > Signed-off-by: Christoph Hellwig <[email protected]>
> > ---
> > fs/xfs/xfs_stats.c | 24 +++++-------
> > fs/xfs/xfs_stats.h | 2 +-
> > fs/xfs/xfs_sysfs.c | 96 +++++++++++++++++++++++-----------------------
> > 3 files changed, 58 insertions(+), 64 deletions(-)
> >
> > diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
> > index 20e0534a772c9..71e7a84ba0403 100644
> > --- a/fs/xfs/xfs_stats.c
> > +++ b/fs/xfs/xfs_stats.c
> > @@ -16,10 +16,9 @@ static int counter_val(struct xfsstats __percpu *stats, int idx)
> > return val;
> > }
> >
> > -int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
> > +void xfs_stats_format(struct xfsstats __percpu *stats, struct seq_file *sf)
> > {
> > int i, j;
> > - int len = 0;
> > uint64_t xs_xstrat_bytes = 0;
> > uint64_t xs_write_bytes = 0;
> > uint64_t xs_read_bytes = 0;
> > @@ -58,13 +57,12 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
> > /* Loop over all stats groups */
> >
> > for (i = j = 0; i < ARRAY_SIZE(xstats); i++) {
> > - len += scnprintf(buf + len, PATH_MAX - len, "%s",
> > - xstats[i].desc);
> > + seq_printf(sf, "%s", xstats[i].desc);
> > +
> > /* inner loop does each group */
> > for (; j < xstats[i].endpoint; j++)
> > - len += scnprintf(buf + len, PATH_MAX - len, " %u",
> > - counter_val(stats, j));
> > - len += scnprintf(buf + len, PATH_MAX - len, "\n");
> > + seq_printf(sf, " %u", counter_val(stats, j));
> > + seq_printf(sf, "\n");
> > }
> > /* extra precision counters */
> > for_each_possible_cpu(i) {
> > @@ -74,18 +72,14 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
> > defer_relog += per_cpu_ptr(stats, i)->s.defer_relog;
> > }
> >
> > - len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
> > + seq_printf(sf, "xpc %Lu %Lu %Lu\n",
> > xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
> > - len += scnprintf(buf + len, PATH_MAX-len, "defer_relog %llu\n",
> > - defer_relog);
> > - len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n",
> > + seq_printf(sf, "defer_relog %llu\n", defer_relog);
> > #if defined(DEBUG)
> > - 1);
> > + seq_printf(sf, "debug 1\n");
> > #else
> > - 0);
> > + seq_printf(sf, "debug 0\n");
> > #endif
> > -
> > - return len;
> > }
>
> That is a sysfs file? What happened to the "one value per file" rule
> here?
There is no "rule" that says syfs files must contain one value per
file; the documentation says that one value per file is the
"preferred" format. Documentation/filesystems/sysfs.rst:
[...]
Attributes
...
Attributes should be ASCII text files, preferably with only one value
per file. It is noted that it may not be efficient to contain only one
value per file, so it is socially acceptable to express an array of
values of the same type.
[...]
We are exposing a large array of integer values here, so multiple
values per file are explicitly considered an acceptible format.
Further, as there are roughly 200 individual stats in this file and
calculating each stat requires per-cpu aggregation, the the cost of
calculating and reading each stat individually is prohibitive, not
just inefficient.
So, yes, we might have multiple lines in the file that you can frown
about, but OTOH the file format has been exposed as a kernel ABI for
a couple of decades via /proc/fs/xfs/stat. Hence exposing it in
sysfs to provide a more fine-grained breakdown of the stats (per
mount instead of global) is a no-brainer. We don't have to rewrite
the parsing engines in multiple userspace monitoring programs to
extract this information from the kernel - they just create a new
instance and read a different file and it all just works.
Indeed, there's precedence for such /proc file formats in more
fine-grained sysfs files. e.g. /sys/bus/node/devices/node<n>/vmstat
and /sys/bus/node/devices/node<n>/meminfo retain the same format
(and hence userspace parsers) for the per-node stats as /proc/vmstat
and /proc/meminfo use for the global stats...
tl;dr: the file contains arrays of values, it's inefficient to read
values one at a time, it's a pre-existing ABI-constrainted file
format, there's precedence in core kernel statistics
implementations and the documented guidelines allow this sort of
usage in these cases.
Cheers,
Dave.
--
Dave Chinner
[email protected]
On 9/13/21 09:46, Greg Kroah-Hartman wrote:
> On Mon, Sep 13, 2021 at 09:39:56AM -0700, Bart Van Assche wrote:
>> On 9/12/21 10:41 PM, Christoph Hellwig wrote:
>>> Al pointed out multiple times that seq_get_buf is highly dangerous as
>>> it opens up the tight seq_file abstractions to buffer overflows. The
>>> last such caller now is sysfs.
>>>
>>> This series allows attributes to implement a seq_show method and switch
>>> the block and XFS code as users that I'm most familiar with to use
>>> seq_files directly after a few preparatory cleanups. With this series
>>> "leaf" users of sysfs_ops can be converted one at at a time, after that
>>> we can move the seq_get_buf into the multiplexers (e.g. kobj, device,
>>> class attributes) and remove the show method in sysfs_ops and repeat the
>>> process until all attributes are converted. This will probably take a
>>> fair amount of time.
>>
>> Hi Christoph,
>>
>> Thanks for having done this work. In case you would need it, some time ago
>> I posted the following sysfs patch but did not receive any feedback:
>> "[PATCH] kernfs: Improve lockdep annotation for files which implement mmap"
>> (https://lore.kernel.org/linux-kernel/[email protected]/).
>>
>
> That was from back in 2019, sorry I must have missed it.
>
> Care to rebase and resend it if it is still needed?
Hi Greg,
I think that patch is still relevant. It removes some ugly code from
sysfs. I will rebase, retest and resend it.
Thanks,
Bart.
On Tue, Sep 14, 2021 at 11:20:29AM +1000, Dave Chinner wrote:
> On Mon, Sep 13, 2021 at 08:27:50AM +0200, Greg Kroah-Hartman wrote:
> > On Mon, Sep 13, 2021 at 07:41:21AM +0200, Christoph Hellwig wrote:
> > > Trivial conversion to the seq_file based sysfs attributes.
> > >
> > > Signed-off-by: Christoph Hellwig <[email protected]>
> > > ---
> > > fs/xfs/xfs_stats.c | 24 +++++-------
> > > fs/xfs/xfs_stats.h | 2 +-
> > > fs/xfs/xfs_sysfs.c | 96 +++++++++++++++++++++++-----------------------
> > > 3 files changed, 58 insertions(+), 64 deletions(-)
> > >
> > > diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
> > > index 20e0534a772c9..71e7a84ba0403 100644
> > > --- a/fs/xfs/xfs_stats.c
> > > +++ b/fs/xfs/xfs_stats.c
> > > @@ -16,10 +16,9 @@ static int counter_val(struct xfsstats __percpu *stats, int idx)
> > > return val;
> > > }
> > >
> > > -int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
> > > +void xfs_stats_format(struct xfsstats __percpu *stats, struct seq_file *sf)
> > > {
> > > int i, j;
> > > - int len = 0;
> > > uint64_t xs_xstrat_bytes = 0;
> > > uint64_t xs_write_bytes = 0;
> > > uint64_t xs_read_bytes = 0;
> > > @@ -58,13 +57,12 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
> > > /* Loop over all stats groups */
> > >
> > > for (i = j = 0; i < ARRAY_SIZE(xstats); i++) {
> > > - len += scnprintf(buf + len, PATH_MAX - len, "%s",
> > > - xstats[i].desc);
> > > + seq_printf(sf, "%s", xstats[i].desc);
> > > +
> > > /* inner loop does each group */
> > > for (; j < xstats[i].endpoint; j++)
> > > - len += scnprintf(buf + len, PATH_MAX - len, " %u",
> > > - counter_val(stats, j));
> > > - len += scnprintf(buf + len, PATH_MAX - len, "\n");
> > > + seq_printf(sf, " %u", counter_val(stats, j));
> > > + seq_printf(sf, "\n");
> > > }
> > > /* extra precision counters */
> > > for_each_possible_cpu(i) {
> > > @@ -74,18 +72,14 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
> > > defer_relog += per_cpu_ptr(stats, i)->s.defer_relog;
> > > }
> > >
> > > - len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
> > > + seq_printf(sf, "xpc %Lu %Lu %Lu\n",
> > > xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
> > > - len += scnprintf(buf + len, PATH_MAX-len, "defer_relog %llu\n",
> > > - defer_relog);
> > > - len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n",
> > > + seq_printf(sf, "defer_relog %llu\n", defer_relog);
> > > #if defined(DEBUG)
> > > - 1);
> > > + seq_printf(sf, "debug 1\n");
> > > #else
> > > - 0);
> > > + seq_printf(sf, "debug 0\n");
> > > #endif
> > > -
> > > - return len;
> > > }
> >
> > That is a sysfs file? What happened to the "one value per file" rule
> > here?
>
>
> There is no "rule" that says syfs files must contain one value per
> file; the documentation says that one value per file is the
> "preferred" format. Documentation/filesystems/sysfs.rst:
>
> [...]
> Attributes
> ...
> Attributes should be ASCII text files, preferably with only one value
> per file. It is noted that it may not be efficient to contain only one
> value per file, so it is socially acceptable to express an array of
> values of the same type.
> [...]
>
An array of values is one thing like "what is the power states for this
device". A list of different key/value pairs is a totally different
thing entirely.
> We are exposing a large array of integer values here, so multiple
> values per file are explicitly considered an acceptible format.
Not really, that was not the goal of sysfs at all.
> Further, as there are roughly 200 individual stats in this file and
> calculating each stat requires per-cpu aggregation, the the cost of
> calculating and reading each stat individually is prohibitive, not
> just inefficient.
Have you measured it? How often does the file get read and by what
tools?
We have learned from our past mistakes in /proc where we did this in the
past and required keeping obsolete values and constantly tweaking
userspace parsers. That is why we made sysfs one-value-per-file. If
the file is not there, the value is not there, much easier to handle
future changes.
> So, yes, we might have multiple lines in the file that you can frown
> about, but OTOH the file format has been exposed as a kernel ABI for
> a couple of decades via /proc/fs/xfs/stat.
proc had no such rules, but we have learned :)
> Hence exposing it in
> sysfs to provide a more fine-grained breakdown of the stats (per
> mount instead of global) is a no-brainer. We don't have to rewrite
> the parsing engines in multiple userspace monitoring programs to
> extract this information from the kernel - they just create a new
> instance and read a different file and it all just works.
But then you run into the max size restriction on sysfs files
(PAGE_SIZE) and things break down.
Please don't do this.
> Indeed, there's precedence for such /proc file formats in more
> fine-grained sysfs files. e.g. /sys/bus/node/devices/node<n>/vmstat
> and /sys/bus/node/devices/node<n>/meminfo retain the same format
> (and hence userspace parsers) for the per-node stats as /proc/vmstat
> and /proc/meminfo use for the global stats...
And I have complained about those files in the past many times. And
they are running into problems in places dealing with them too.
> tl;dr: the file contains arrays of values, it's inefficient to read
> values one at a time, it's a pre-existing ABI-constrainted file
> format, there's precedence in core kernel statistics
> implementations and the documented guidelines allow this sort of
> usage in these cases.
I would prefer not to do this, and I will not take core sysfs changes to
make this any easier.
Which is one big reason why I don't like just making sysfs use the seq
file api, it would allow stuff like this to propagate to other places in
the kernel.
Maybe I should cut the file size of a sysfs file down to PAGE_SIZE/4 or
less, that might be better :)
thanks,
greg k-h
On Mon, Sep 13, 2021 at 08:27:50AM +0200, Greg Kroah-Hartman wrote:
> Anyway, I like the idea, but as you can see here, it could lead to even
> more abuse of sysfs files. We are just now getting people to use
> sysfs_emit() and that is showing us where people have been abusing the
> api in bad ways.
To be honest I've always seen sysfs_emit as at best a horrible band aid
to enforce the PAGE_SIZE bounds checking. Better than nothing, but
not a solution at all, as you can't force anyone to actually use it.
> Is there any way that sysfs can keep the existing show functionality and
> just do the seq_printf() for the buffer returned by the attribute file
> inside of the sysfs core?
Well, you'd need one page allocated in the seq_file code, and one in
the sysfs code. At which point we might as well drop using seq_file
at all. But in general seq_file seems like a very nice helper for
over flow free printing into a buffer. If sysfs files actually were
all limited to a single print we wouldn't really need it, and could
just have something like sysfs_emit just with the buffer hidden inside
a structure that is opaqueue to the caller. But looking at various
attributes that is not exactly the case. While the majority certainly
uses a single value and a single print statement there is plenty where
this is not the case. Either because they use multiple values, or
often also because they dynamically append to the string to print
things like comma-separated flags.
On Tue, Sep 14, 2021 at 07:12:43AM +0200, Greg Kroah-Hartman wrote:
> On Tue, Sep 14, 2021 at 11:20:29AM +1000, Dave Chinner wrote:
> > On Mon, Sep 13, 2021 at 08:27:50AM +0200, Greg Kroah-Hartman wrote:
> > > On Mon, Sep 13, 2021 at 07:41:21AM +0200, Christoph Hellwig wrote:
> > > That is a sysfs file? What happened to the "one value per file" rule
> > > here?
> >
> >
> > There is no "rule" that says syfs files must contain one value per
> > file; the documentation says that one value per file is the
> > "preferred" format. Documentation/filesystems/sysfs.rst:
> >
> > [...]
> > Attributes
> > ...
> > Attributes should be ASCII text files, preferably with only one value
> > per file. It is noted that it may not be efficient to contain only one
> > value per file, so it is socially acceptable to express an array of
> > values of the same type.
> > [...]
> >
>
> An array of values is one thing like "what is the power states for this
> device". A list of different key/value pairs is a totally different
> thing entirely.
Sure, that's your opinion. Nothing more, nothing less. I could
easily argue that "what power states does this thing have" is a
list and not an array, but this would be just another opinion and
completely beside the point of the discussion.
> > We are exposing a large array of integer values here, so multiple
> > values per file are explicitly considered an acceptible format.
>
> Not really, that was not the goal of sysfs at all.
Sure. We all know this and we follow the guidelines in most cases.
Indeed, every other sysfs file that XFS exposes is a value-per-file
object. This makes sense for most things we expose through sysfs,
Not everything maps to value-per-file cleanly or efficiently and the
documentation acknowledges that there are alternative use cases for
sysfs files like this. Just because you don't like it doesn't mean
there aren't cases where multiple values per file is the best thing
to do.
Blind obedience to rules almost always results in poor outcomes.
> > Further, as there are roughly 200 individual stats in this file and
> > calculating each stat requires per-cpu aggregation, the the cost of
> > calculating and reading each stat individually is prohibitive, not
> > just inefficient.
>
> Have you measured it? How often does the file get read and by what
> tools?
Of course I've measured it. I sample and graph hundreds of stats in
realtime at 10-20Hz across multiple machines as part of my daily
development routine with PCP? I see the overhead of reading stats
out of the kernel in kernel profiles and dropped samples in recorded
archives all the time.
For example, reading the global XFS stats at 20Hz via pmcd/pmdaxfs
during testing on a 32p machine results in xfs_stats_format()
consuming about 1% of a CPU. From the flat perf top profile:
0.93 [kernel] [k] xfs_stats_format
That's typical high frequency sampling overhead of a single
filesystem being actively used.
Note that micro-benchmarks give highly unrealistic results. If I pin
a cpu in a tight loop doing like this:
for (i = 0; i < cnt; i++) {
fd = open("/sys/fs/xfs/stats/stats", O_RDONLY);
read(fd, buf, 4096);
close(fd);
}
It gives an number of 20,000 reads a second, with 90% of teh CPU
usage being in xfs_stats_format(). This is unrealistic because it is
hitting hot, clean caches and doing absolutely nothing with the data
that is returned.
Userspace needs to do work with the data, and that is as least as
much CPU overhead as formatting the stats to turn them back into
proper integer values and dumping them into a shared memory segment
for another process to collate before it can read more data from the
kernel (the underlying PCP userspace architecture). So,
realistically, sampling from 5,000 XFS stats files per CPU per
second from userspace on a 32p machine is about the max rate we can
reliably sustain on a single CPU.
The biggest cost is the read() cost (open/close is less than 10% of
the cost of the above loop), and the flat profile shows:
27.47% [k] xfs_stats_format
16.11% [k] _find_next_bit
11.11% [k] cpumask_next
4.36% [k] vsnprintf
4.07% [k] format_decode
3.99% [k] number
70% of the "cpu usage" of xfs_stats_format() is the L1 cacheline
miss in the for_each_possible_cpu() loop reading the percpu stats
values. That's despite the cachelines being clean and likely hot in
other larger local CPU caches. The percpu loop also accounts for
the cpumask_next() CPU usage, and the _find_next_bit CPU usage comes
from cpumask_next() calling it.
IOWs, about half of the kernel CPU usage reading the XFS stats in a
tight loop on a 32p machine comes from the per-cpu aggregation
overhead. That aggregation overhead will be the overall limiting
factor for XFS stats sampling.
With 100 filesystems we could, in theory, sample them all at about
50Hz if we're doing nothing else with that CPU. But the data set
size we are reading has gone up by a factor of 100, and the stats
data is not going to be hot in CPU caches anymore (neither in the
kernel or userspace). Per-cpu aggregation will also hit dirty
remote cachelines on active systems, so it's *much* slower than the
above loop would indicate. The original profile indicates maybe 20Hz
is acheivable, not 50Hz.
With this in mind, we then have to consider the overhead on machines
with thousands of CPUs rather than a few tens of CPUs. The cost of
per-cpu aggregation on those machines is a couple of magnitudes
higher than these machines (aggregation overhead increases linearly
with CPU count), so if we are burning the same amount of CPU time
doing aggregation, then the number of times we can aggregate the
stats goes down by a couple of orders of magnitude. That's the
behavioural characteristics that we have to design for, not what my
tiny little 32p system can do.
IOWs, our "all stats in one file" setup on a system with a hundred
filesystems and a thousand CPUs can currently manage (roughly) a 1Hz
sampling rate on all filesystems using a single CPU and the existing
"all stats in one file" setup.
Now, let's put that in a value-per-file setup.
This means we have to read 200 individual files per filesystem to
build the entire sample set. This means we do 200 per-cpu
aggregations instead of 1 for this sample set. Given that the
majority of overhead is the per-cpu aggregation, the filesystem
stats set takes 100-200x longer to sample than a "all in one file"
setup.
IOWs, a sample set for a *single* filesystem on a 1000 CPU machine
now takes 1-2s to complete. At 100 filesystems, we now take
100-200s of CPU time to sample all the filesystem stats once. That's
a massive amount of extra overhead, and simply not acceptible nor
necessary.
To compound the regression caused by moving to a value per file, we
then have to spend another whole chunk of engineering work to
mitigate it. We'd have to completely rewrite the stats aggregation
code and we'd probably end up losing high frequency aggregation
capabilities as a result.
This is a lose-lose-lose-lose scenario, and no one in their right
mind would consider a viable way forward. The "efficiency exception"
in the documentation was intended for exactly this sort of
situation. So, really it doesn't make what you like or not, it's the
only viable solution we currently have.
> We have learned from our past mistakes in /proc where we did this in the
> past and required keeping obsolete values and constantly tweaking
> userspace parsers. That is why we made sysfs one-value-per-file. If
> the file is not there, the value is not there, much easier to handle
> future changes.
We've had rules for extending the xfs stats file to avoid breaking
the userspace parsers for a couple of decades, too. There are some
advantages to value-per-file when it comes to deprecation, but
really that's not a problem we need to solve for the XFS stats.
> > So, yes, we might have multiple lines in the file that you can frown
> > about, but OTOH the file format has been exposed as a kernel ABI for
> > a couple of decades via /proc/fs/xfs/stat.
>
> proc had no such rules, but we have learned :)
And so now you know better than everyone else. :/
> > Hence exposing it in
> > sysfs to provide a more fine-grained breakdown of the stats (per
> > mount instead of global) is a no-brainer. We don't have to rewrite
> > the parsing engines in multiple userspace monitoring programs to
> > extract this information from the kernel - they just create a new
> > instance and read a different file and it all just works.
>
> But then you run into the max size restriction on sysfs files
> (PAGE_SIZE) and things break down.
Yup, but we're nowhere near the max size restriction. Output
currently requires 12 bytes per stat maximum, so we're still
good to add another 100-150 or so stats before we have to worry
about PAGE_SIZE limits...
> Please don't do this.
Please don't take us for idiots - you know full well that the ABI
horse bolted long ago and we don't break userspace like this.
If you've got a more efficient generic way of exporting large
volumes of dynamically instanced stats arrays at high frequency than
what we do right now, then I'm all ears. But if all you are going to
say is "I say you can't do that" then you're not being helpful or
constructive.
> > Indeed, there's precedence for such /proc file formats in more
> > fine-grained sysfs files. e.g. /sys/bus/node/devices/node<n>/vmstat
> > and /sys/bus/node/devices/node<n>/meminfo retain the same format
> > (and hence userspace parsers) for the per-node stats as /proc/vmstat
> > and /proc/meminfo use for the global stats...
>
> And I have complained about those files in the past many times. And
> they are running into problems in places dealing with them too.
So come up with a generic, scalable, low overhead solution to the
stats export problem, convert all the kernel code and userspace
applications to use it, address all the regressions on large CPU
count machines it causes, start a long term deprecation period for
the existing stats files (because it's part of the kernel ABI), and
then maybe in 10 or 15 years time we can get rid of this stuff.
> > tl;dr: the file contains arrays of values, it's inefficient to read
> > values one at a time, it's a pre-existing ABI-constrainted file
> > format, there's precedence in core kernel statistics
> > implementations and the documented guidelines allow this sort of
> > usage in these cases.
>
> I would prefer not to do this, and I will not take core sysfs changes to
> make this any easier.
>
> Which is one big reason why I don't like just making sysfs use the seq
> file api, it would allow stuff like this to propagate to other places in
> the kernel.
>
> Maybe I should cut the file size of a sysfs file down to PAGE_SIZE/4 or
> less, that might be better :)
Yeah, good on yah, Greg. That'll show everyone who's boss, won't it?
-Dave.
--
Dave Chinner
[email protected]
On Tue, Sep 14, 2021 at 09:30:03AM +0200, Christoph Hellwig wrote:
> On Mon, Sep 13, 2021 at 08:27:50AM +0200, Greg Kroah-Hartman wrote:
> > Anyway, I like the idea, but as you can see here, it could lead to even
> > more abuse of sysfs files. We are just now getting people to use
> > sysfs_emit() and that is showing us where people have been abusing the
> > api in bad ways.
>
> To be honest I've always seen sysfs_emit as at best a horrible band aid
> to enforce the PAGE_SIZE bounds checking. Better than nothing, but
> not a solution at all, as you can't force anyone to actually use it.
We can "force" it by not allowing buffers to be bigger than that, which
is what the code has always done. I think we want to keep that for now
and not add the new seq_show api.
I've taken patches 2-6 in this series now, as those were great sysfs
and kernfs cleanups, thanks for those.
I can also take patch 1 if no one objects (I can edit the typo.)
I agree that getting rid of seq_get_buf() is good, and can work on
getting rid of the use here in sysfs if it's annoying people.
thanks,
greg k-h
On Tue, Sep 14, 2021 at 05:28:08PM +0200, Greg Kroah-Hartman wrote:
> We can "force" it by not allowing buffers to be bigger than that, which
> is what the code has always done. I think we want to keep that for now
> and not add the new seq_show api.
The buffer already is not larger than that. The problem is that
sysfs_emit does not actually work for the non-trivial attributes,
which generally are the source of bugs.
On Tue, Sep 14, 2021 at 05:30:11PM +0200, Christoph Hellwig wrote:
> On Tue, Sep 14, 2021 at 05:28:08PM +0200, Greg Kroah-Hartman wrote:
> > We can "force" it by not allowing buffers to be bigger than that, which
> > is what the code has always done. I think we want to keep that for now
> > and not add the new seq_show api.
>
> The buffer already is not larger than that. The problem is that
> sysfs_emit does not actually work for the non-trivial attributes,
> which generally are the source of bugs.
They huge majority of sysfs attributes are "trivial". So for maybe at
least 95% of the users, if not more, using sysfs_emit() is just fine as
all you "should" be doing is emitting a single value.
For those that are non-trivial, yes, that will be harder, but as the xfs
discussion shows, those are not normal at all, and I do not want to make
creating them easier as that is not the model that sysfs was designed
for if at all possible.
thanks,
greg k-h
On Wed, Sep 15, 2021 at 09:04:45AM +0200, Christoph Hellwig wrote:
> On Tue, Sep 14, 2021 at 05:41:37PM +0200, Greg Kroah-Hartman wrote:
> > They huge majority of sysfs attributes are "trivial". So for maybe at
> > least 95% of the users, if not more, using sysfs_emit() is just fine as
> > all you "should" be doing is emitting a single value.
>
> It is just fine if no one does the obvious mistakes that an interface
> with a char * pointer leads to. And 5% of all attributes is still a huge
> attack surface.
It is probably less, I just pulled that number out of the air. With the
other work we are doing to make sure we have documentation for all sysfs
attributes in the kernel, we will soon know the real number.
thanks,
greg k-h
On Tue, Sep 14, 2021 at 05:41:37PM +0200, Greg Kroah-Hartman wrote:
> They huge majority of sysfs attributes are "trivial". So for maybe at
> least 95% of the users, if not more, using sysfs_emit() is just fine as
> all you "should" be doing is emitting a single value.
It is just fine if no one does the obvious mistakes that an interface
with a char * pointer leads to. And 5% of all attributes is still a huge
attack surface.