2021-06-01 02:57:50

by Tian Tao

[permalink] [raw]
Subject: [PATCH 0/2] use bin_attribute to avoid buff overflow

The first patch adds a new function cpumap_print_to_buf and uses
this function in drivers/base/topology.c, and the second patch uses
this new function in drivers/base/node.c

Tian Tao (2):
topology: use bin_attribute to avoid buff overflow
drivers/base/node.c: use bin_attribute to avoid buff overflow

drivers/base/node.c | 49 +++++++++++++--------
drivers/base/topology.c | 115 ++++++++++++++++++++++++++----------------------
include/linux/bitmap.h | 3 ++
include/linux/cpumask.h | 25 +++++++++++
lib/bitmap.c | 34 ++++++++++++++
5 files changed, 156 insertions(+), 70 deletions(-)

--
2.7.4


2021-06-01 02:58:02

by Tian Tao

[permalink] [raw]
Subject: [PATCH 2/2] drivers/base/node.c: use bin_attribute to avoid buff overflow

Reading sys/devices/system/cpu/cpuX/nodeX/ returns cpumap and cpulist.
However, the size of this file is limited to PAGE_SIZE because of the
limitation for sysfs attribute. so we use bin_attribute instead of
attribute to avoid NR_CPUS too big to cause buff overflow.

Signed-off-by: Tian Tao <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Cc: "Rafael J. Wysocki" <[email protected]>
---
drivers/base/node.c | 49 +++++++++++++++++++++++++++++++------------------
1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index f449dbb..a19be64 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -27,42 +27,42 @@ static struct bus_type node_subsys = {
};


-static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf)
+static ssize_t node_read_cpumap(struct kobject *kobj, bool list,
+ char *buf, loff_t off, size_t count)
{
ssize_t n;
cpumask_var_t mask;
+ struct device *dev = kobj_to_dev(kobj);
struct node *node_dev = to_node(dev);

- /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
- BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
-
if (!alloc_cpumask_var(&mask, GFP_KERNEL))
return 0;

cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
- n = cpumap_print_to_pagebuf(list, buf, mask);
+ n = cpumap_print_to_buf(list, buf, mask, off, count);
free_cpumask_var(mask);

return n;
}

-static inline ssize_t cpumap_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
{
- return node_read_cpumap(dev, false, buf);
+ return node_read_cpumap(kobj, false, buf, off, count);
}

-static DEVICE_ATTR_RO(cpumap);

-static inline ssize_t cpulist_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static BIN_ATTR_RO(cpumap, 0);
+
+static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
{
- return node_read_cpumap(dev, true, buf);
+ return node_read_cpumap(kobj, true, buf, off, count);
}

-static DEVICE_ATTR_RO(cpulist);
+static BIN_ATTR_RO(cpulist, 0);

/**
* struct node_access_nodes - Access class device to hold user visible
@@ -557,15 +557,28 @@ static ssize_t node_read_distance(struct device *dev,
static DEVICE_ATTR(distance, 0444, node_read_distance, NULL);

static struct attribute *node_dev_attrs[] = {
- &dev_attr_cpumap.attr,
- &dev_attr_cpulist.attr,
&dev_attr_meminfo.attr,
&dev_attr_numastat.attr,
&dev_attr_distance.attr,
&dev_attr_vmstat.attr,
NULL
};
-ATTRIBUTE_GROUPS(node_dev);
+
+static struct bin_attribute *node_dev_bin_attrs[] = {
+ &bin_attr_cpumap,
+ &bin_attr_cpulist,
+ NULL,
+};
+
+static const struct attribute_group node_dev_group = {
+ .attrs = node_dev_attrs,
+ .bin_attrs = node_dev_bin_attrs
+};
+
+static const struct attribute_group *node_dev_groups[] = {
+ &node_dev_group,
+ NULL,
+};

#ifdef CONFIG_HUGETLBFS
/*
--
2.7.4

2021-06-01 02:58:24

by Tian Tao

[permalink] [raw]
Subject: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow

Reading sys/devices/system/cpu/cpuX/topology/ returns cpu topology.
However, the size of this file is limited to PAGE_SIZE because of the
limitation for sysfs attribute. so we use bin_attribute instead of
attribute to avoid NR_CPUS too big to cause buff overflow.

This patch is based on the following discussion.
https://www.spinics.net/lists/linux-doc/msg95921.html

Signed-off-by: Tian Tao <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Andy Shevchenko <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Cc: "Rafael J. Wysocki" <[email protected]>
---
drivers/base/topology.c | 115 ++++++++++++++++++++++++++----------------------
include/linux/bitmap.h | 3 ++
include/linux/cpumask.h | 25 +++++++++++
lib/bitmap.c | 34 ++++++++++++++
4 files changed, 125 insertions(+), 52 deletions(-)

diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 4d254fc..013edbb 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -21,25 +21,27 @@ static ssize_t name##_show(struct device *dev, \
return sysfs_emit(buf, "%d\n", topology_##name(dev->id)); \
}

-#define define_siblings_show_map(name, mask) \
-static ssize_t name##_show(struct device *dev, \
- struct device_attribute *attr, char *buf) \
-{ \
- return cpumap_print_to_pagebuf(false, buf, topology_##mask(dev->id));\
+#define define_siblings_read_func(name, mask) \
+static ssize_t name##_read(struct file *file, struct kobject *kobj, \
+ struct bin_attribute *attr, char *buf, \
+ loff_t off, size_t count) \
+{ \
+ struct device *dev = kobj_to_dev(kobj); \
+ \
+ return cpumap_print_to_buf(false, buf, topology_##mask(dev->id), \
+ off, count); \
+} \
+ \
+static ssize_t name##_list_read(struct file *file, struct kobject *kobj, \
+ struct bin_attribute *attr, char *buf, \
+ loff_t off, size_t count) \
+{ \
+ struct device *dev = kobj_to_dev(kobj); \
+ \
+ return cpumap_print_to_buf(true, buf, topology_##mask(dev->id), \
+ off, count); \
}

-#define define_siblings_show_list(name, mask) \
-static ssize_t name##_list_show(struct device *dev, \
- struct device_attribute *attr, \
- char *buf) \
-{ \
- return cpumap_print_to_pagebuf(true, buf, topology_##mask(dev->id));\
-}
-
-#define define_siblings_show_func(name, mask) \
- define_siblings_show_map(name, mask); \
- define_siblings_show_list(name, mask)
-
define_id_show_func(physical_package_id);
static DEVICE_ATTR_RO(physical_package_id);

@@ -49,71 +51,80 @@ static DEVICE_ATTR_RO(die_id);
define_id_show_func(core_id);
static DEVICE_ATTR_RO(core_id);

-define_siblings_show_func(thread_siblings, sibling_cpumask);
-static DEVICE_ATTR_RO(thread_siblings);
-static DEVICE_ATTR_RO(thread_siblings_list);
+define_siblings_read_func(thread_siblings, sibling_cpumask);
+static BIN_ATTR_RO(thread_siblings, 0);
+static BIN_ATTR_RO(thread_siblings_list, 0);

-define_siblings_show_func(core_cpus, sibling_cpumask);
-static DEVICE_ATTR_RO(core_cpus);
-static DEVICE_ATTR_RO(core_cpus_list);
+define_siblings_read_func(core_cpus, sibling_cpumask);
+static BIN_ATTR_RO(core_cpus, 0);
+static BIN_ATTR_RO(core_cpus_list, 0);

-define_siblings_show_func(core_siblings, core_cpumask);
-static DEVICE_ATTR_RO(core_siblings);
-static DEVICE_ATTR_RO(core_siblings_list);
+define_siblings_read_func(core_siblings, core_cpumask);
+static BIN_ATTR_RO(core_siblings, 0);
+static BIN_ATTR_RO(core_siblings_list, 0);

-define_siblings_show_func(die_cpus, die_cpumask);
-static DEVICE_ATTR_RO(die_cpus);
-static DEVICE_ATTR_RO(die_cpus_list);
+define_siblings_read_func(die_cpus, die_cpumask);
+static BIN_ATTR_RO(die_cpus, 0);
+static BIN_ATTR_RO(die_cpus_list, 0);

-define_siblings_show_func(package_cpus, core_cpumask);
-static DEVICE_ATTR_RO(package_cpus);
-static DEVICE_ATTR_RO(package_cpus_list);
+define_siblings_read_func(package_cpus, core_cpumask);
+static BIN_ATTR_RO(package_cpus, 0);
+static BIN_ATTR_RO(package_cpus_list, 0);

#ifdef CONFIG_SCHED_BOOK
define_id_show_func(book_id);
static DEVICE_ATTR_RO(book_id);
-define_siblings_show_func(book_siblings, book_cpumask);
-static DEVICE_ATTR_RO(book_siblings);
-static DEVICE_ATTR_RO(book_siblings_list);
+define_siblings_read_func(book_siblings, book_cpumask);
+static BIN_ATTR_RO(book_siblings, 0);
+static BIN_ATTR_RO(book_siblings_list, 0);
#endif

#ifdef CONFIG_SCHED_DRAWER
define_id_show_func(drawer_id);
static DEVICE_ATTR_RO(drawer_id);
-define_siblings_show_func(drawer_siblings, drawer_cpumask);
-static DEVICE_ATTR_RO(drawer_siblings);
-static DEVICE_ATTR_RO(drawer_siblings_list);
+define_siblings_read_func(drawer_siblings, drawer_cpumask);
+static BIN_ATTR_RO(drawer_siblings, 0);
+static BIN_ATTR_RO(drawer_siblings_list, 0);
#endif

+static struct bin_attribute *bin_attrs[] = {
+ &bin_attr_core_cpus,
+ &bin_attr_core_cpus_list,
+ &bin_attr_thread_siblings,
+ &bin_attr_thread_siblings_list,
+ &bin_attr_core_siblings,
+ &bin_attr_core_siblings_list,
+ &bin_attr_die_cpus,
+ &bin_attr_die_cpus_list,
+ &bin_attr_package_cpus,
+ &bin_attr_package_cpus_list,
+#ifdef CONFIG_SCHED_BOOK
+ &bin_attr_book_siblings,
+ &bin_attr_book_siblings_list,
+#endif
+#ifdef CONFIG_SCHED_DRAWER
+ &bin_attr_drawer_siblings,
+ &bin_attr_drawer_siblings_list,
+#endif
+ NULL,
+};
+
static struct attribute *default_attrs[] = {
&dev_attr_physical_package_id.attr,
&dev_attr_die_id.attr,
&dev_attr_core_id.attr,
- &dev_attr_thread_siblings.attr,
- &dev_attr_thread_siblings_list.attr,
- &dev_attr_core_cpus.attr,
- &dev_attr_core_cpus_list.attr,
- &dev_attr_core_siblings.attr,
- &dev_attr_core_siblings_list.attr,
- &dev_attr_die_cpus.attr,
- &dev_attr_die_cpus_list.attr,
- &dev_attr_package_cpus.attr,
- &dev_attr_package_cpus_list.attr,
#ifdef CONFIG_SCHED_BOOK
&dev_attr_book_id.attr,
- &dev_attr_book_siblings.attr,
- &dev_attr_book_siblings_list.attr,
#endif
#ifdef CONFIG_SCHED_DRAWER
&dev_attr_drawer_id.attr,
- &dev_attr_drawer_siblings.attr,
- &dev_attr_drawer_siblings_list.attr,
#endif
NULL
};

static const struct attribute_group topology_attr_group = {
.attrs = default_attrs,
+ .bin_attrs = bin_attrs,
.name = "topology"
};

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 70a9324..bc401bd9b 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -219,6 +219,9 @@ extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int
extern int bitmap_print_to_pagebuf(bool list, char *buf,
const unsigned long *maskp, int nmaskbits);

+extern int bitmap_print_to_buf(bool list, char *buf,
+ const unsigned long *maskp, int nmaskbits, loff_t off, size_t count);
+
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 383684e..e4810b3e 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -928,6 +928,31 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
nr_cpu_ids);
}

+/**
+ * cpumap_print_to_buf - copies the cpumask into the buffer either
+ * as comma-separated list of cpus or hex values of cpumask
+ * @list: indicates whether the cpumap must be list
+ * @mask: the cpumask to copy
+ * @buf: the buffer to copy into
+ * @off: the offset that buffer to copy into
+ * @count: the count thatbuffer to copy into
+ *
+ * the role of cpumap_print_to_buf and cpumap_print_to_pagebuf is
+ * the same, the difference is that the second parameter of
+ * bitmap_print_to_buf can be more than one pagesize.
+ *
+ * Returns the length of the (null-terminated) @buf string, zero if
+ * nothing is copied.
+ */
+
+static inline ssize_t
+cpumap_print_to_buf(bool list, char *buf, const struct cpumask *mask,
+ loff_t off, size_t count)
+{
+ return bitmap_print_to_buf(list, buf, cpumask_bits(mask),
+ nr_cpu_ids, off, count);
+}
+
#if NR_CPUS <= BITS_PER_LONG
#define CPU_MASK_ALL \
(cpumask_t) { { \
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 75006c4..5bf89f1 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -460,6 +460,40 @@ int bitmap_parse_user(const char __user *ubuf,
EXPORT_SYMBOL(bitmap_parse_user);

/**
+ * bitmap_print_to_buf - convert bitmap to list or hex format ASCII string
+ * @list: indicates whether the bitmap must be list
+ * @buf: page aligned buffer into which string is placed
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ * @off: offset in buf
+ * @count: count that already output
+ *
+ * the role of bitmap_print_to_buf and bitmap_print_to_pagebuf is
+ * the same, the difference is that the second parameter of
+ * bitmap_print_to_buf can be more than one pagesize.
+ */
+int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
+ int nmaskbits, loff_t off, size_t count)
+{
+ int len, size;
+ void *data;
+ char *fmt = list ? "%*pbl\n" : "%*pb\n";
+
+ len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
+
+ data = kvmalloc(len+1, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
+ size = memory_read_from_buffer(buf, count, &off, data, size);
+ kvfree(data);
+
+ return size;
+}
+EXPORT_SYMBOL(bitmap_print_to_buf);
+
+/**
* bitmap_print_to_pagebuf - convert bitmap to list or hex format ASCII string
* @list: indicates whether the bitmap must be list
* @buf: page aligned buffer into which string is placed
--
2.7.4

2021-06-01 05:00:40

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow

On Tue, Jun 01, 2021 at 10:56:49AM +0800, Tian Tao wrote:
> Reading sys/devices/system/cpu/cpuX/topology/ returns cpu topology.
> However, the size of this file is limited to PAGE_SIZE because of the
> limitation for sysfs attribute. so we use bin_attribute instead of
> attribute to avoid NR_CPUS too big to cause buff overflow.
>
> This patch is based on the following discussion.
> https://www.spinics.net/lists/linux-doc/msg95921.html

Please use lore.kernel.org for links as we have no control over other
sites to ensure that they will work in the future. Use the message id
in the link as well, so that if something were to happen to lore, we can
figure it out.

Also, you are modifying a bunch of different files here, do you mean to
do it for all of them?

>
> Signed-off-by: Tian Tao <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Andy Shevchenko <[email protected]>
> Cc: Greg Kroah-Hartman <[email protected]>
> Cc: "Rafael J. Wysocki" <[email protected]>
> ---
> drivers/base/topology.c | 115 ++++++++++++++++++++++++++----------------------
> include/linux/bitmap.h | 3 ++
> include/linux/cpumask.h | 25 +++++++++++
> lib/bitmap.c | 34 ++++++++++++++
> 4 files changed, 125 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/base/topology.c b/drivers/base/topology.c
> index 4d254fc..013edbb 100644
> --- a/drivers/base/topology.c
> +++ b/drivers/base/topology.c
> @@ -21,25 +21,27 @@ static ssize_t name##_show(struct device *dev, \
> return sysfs_emit(buf, "%d\n", topology_##name(dev->id)); \
> }
>
> -#define define_siblings_show_map(name, mask) \
> -static ssize_t name##_show(struct device *dev, \
> - struct device_attribute *attr, char *buf) \
> -{ \
> - return cpumap_print_to_pagebuf(false, buf, topology_##mask(dev->id));\
> +#define define_siblings_read_func(name, mask) \
> +static ssize_t name##_read(struct file *file, struct kobject *kobj, \
> + struct bin_attribute *attr, char *buf, \
> + loff_t off, size_t count) \
> +{ \
> + struct device *dev = kobj_to_dev(kobj); \
> + \
> + return cpumap_print_to_buf(false, buf, topology_##mask(dev->id), \
> + off, count); \
> +} \
> + \
> +static ssize_t name##_list_read(struct file *file, struct kobject *kobj, \
> + struct bin_attribute *attr, char *buf, \
> + loff_t off, size_t count) \
> +{ \
> + struct device *dev = kobj_to_dev(kobj); \
> + \
> + return cpumap_print_to_buf(true, buf, topology_##mask(dev->id), \
> + off, count); \
> }
>
> -#define define_siblings_show_list(name, mask) \
> -static ssize_t name##_list_show(struct device *dev, \
> - struct device_attribute *attr, \
> - char *buf) \
> -{ \
> - return cpumap_print_to_pagebuf(true, buf, topology_##mask(dev->id));\
> -}
> -
> -#define define_siblings_show_func(name, mask) \
> - define_siblings_show_map(name, mask); \
> - define_siblings_show_list(name, mask)
> -
> define_id_show_func(physical_package_id);
> static DEVICE_ATTR_RO(physical_package_id);
>
> @@ -49,71 +51,80 @@ static DEVICE_ATTR_RO(die_id);
> define_id_show_func(core_id);
> static DEVICE_ATTR_RO(core_id);
>
> -define_siblings_show_func(thread_siblings, sibling_cpumask);
> -static DEVICE_ATTR_RO(thread_siblings);
> -static DEVICE_ATTR_RO(thread_siblings_list);
> +define_siblings_read_func(thread_siblings, sibling_cpumask);
> +static BIN_ATTR_RO(thread_siblings, 0);
> +static BIN_ATTR_RO(thread_siblings_list, 0);
>
> -define_siblings_show_func(core_cpus, sibling_cpumask);
> -static DEVICE_ATTR_RO(core_cpus);
> -static DEVICE_ATTR_RO(core_cpus_list);
> +define_siblings_read_func(core_cpus, sibling_cpumask);
> +static BIN_ATTR_RO(core_cpus, 0);
> +static BIN_ATTR_RO(core_cpus_list, 0);
>
> -define_siblings_show_func(core_siblings, core_cpumask);
> -static DEVICE_ATTR_RO(core_siblings);
> -static DEVICE_ATTR_RO(core_siblings_list);
> +define_siblings_read_func(core_siblings, core_cpumask);
> +static BIN_ATTR_RO(core_siblings, 0);
> +static BIN_ATTR_RO(core_siblings_list, 0);
>
> -define_siblings_show_func(die_cpus, die_cpumask);
> -static DEVICE_ATTR_RO(die_cpus);
> -static DEVICE_ATTR_RO(die_cpus_list);
> +define_siblings_read_func(die_cpus, die_cpumask);
> +static BIN_ATTR_RO(die_cpus, 0);
> +static BIN_ATTR_RO(die_cpus_list, 0);
>
> -define_siblings_show_func(package_cpus, core_cpumask);
> -static DEVICE_ATTR_RO(package_cpus);
> -static DEVICE_ATTR_RO(package_cpus_list);
> +define_siblings_read_func(package_cpus, core_cpumask);
> +static BIN_ATTR_RO(package_cpus, 0);
> +static BIN_ATTR_RO(package_cpus_list, 0);
>
> #ifdef CONFIG_SCHED_BOOK
> define_id_show_func(book_id);
> static DEVICE_ATTR_RO(book_id);
> -define_siblings_show_func(book_siblings, book_cpumask);
> -static DEVICE_ATTR_RO(book_siblings);
> -static DEVICE_ATTR_RO(book_siblings_list);
> +define_siblings_read_func(book_siblings, book_cpumask);
> +static BIN_ATTR_RO(book_siblings, 0);
> +static BIN_ATTR_RO(book_siblings_list, 0);
> #endif
>
> #ifdef CONFIG_SCHED_DRAWER
> define_id_show_func(drawer_id);
> static DEVICE_ATTR_RO(drawer_id);
> -define_siblings_show_func(drawer_siblings, drawer_cpumask);
> -static DEVICE_ATTR_RO(drawer_siblings);
> -static DEVICE_ATTR_RO(drawer_siblings_list);
> +define_siblings_read_func(drawer_siblings, drawer_cpumask);
> +static BIN_ATTR_RO(drawer_siblings, 0);
> +static BIN_ATTR_RO(drawer_siblings_list, 0);
> #endif
>
> +static struct bin_attribute *bin_attrs[] = {
> + &bin_attr_core_cpus,
> + &bin_attr_core_cpus_list,
> + &bin_attr_thread_siblings,
> + &bin_attr_thread_siblings_list,
> + &bin_attr_core_siblings,
> + &bin_attr_core_siblings_list,
> + &bin_attr_die_cpus,
> + &bin_attr_die_cpus_list,
> + &bin_attr_package_cpus,
> + &bin_attr_package_cpus_list,
> +#ifdef CONFIG_SCHED_BOOK
> + &bin_attr_book_siblings,
> + &bin_attr_book_siblings_list,
> +#endif
> +#ifdef CONFIG_SCHED_DRAWER
> + &bin_attr_drawer_siblings,
> + &bin_attr_drawer_siblings_list,
> +#endif
> + NULL,
> +};
> +
> static struct attribute *default_attrs[] = {
> &dev_attr_physical_package_id.attr,
> &dev_attr_die_id.attr,
> &dev_attr_core_id.attr,
> - &dev_attr_thread_siblings.attr,
> - &dev_attr_thread_siblings_list.attr,
> - &dev_attr_core_cpus.attr,
> - &dev_attr_core_cpus_list.attr,
> - &dev_attr_core_siblings.attr,
> - &dev_attr_core_siblings_list.attr,
> - &dev_attr_die_cpus.attr,
> - &dev_attr_die_cpus_list.attr,
> - &dev_attr_package_cpus.attr,
> - &dev_attr_package_cpus_list.attr,
> #ifdef CONFIG_SCHED_BOOK
> &dev_attr_book_id.attr,
> - &dev_attr_book_siblings.attr,
> - &dev_attr_book_siblings_list.attr,
> #endif
> #ifdef CONFIG_SCHED_DRAWER
> &dev_attr_drawer_id.attr,
> - &dev_attr_drawer_siblings.attr,
> - &dev_attr_drawer_siblings_list.attr,
> #endif
> NULL
> };
>
> static const struct attribute_group topology_attr_group = {
> .attrs = default_attrs,
> + .bin_attrs = bin_attrs,
> .name = "topology"
> };
>
> diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
> index 70a9324..bc401bd9b 100644
> --- a/include/linux/bitmap.h
> +++ b/include/linux/bitmap.h
> @@ -219,6 +219,9 @@ extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int
> extern int bitmap_print_to_pagebuf(bool list, char *buf,
> const unsigned long *maskp, int nmaskbits);
>
> +extern int bitmap_print_to_buf(bool list, char *buf,
> + const unsigned long *maskp, int nmaskbits, loff_t off, size_t count);
> +
> #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
> #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
>
> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> index 383684e..e4810b3e 100644
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -928,6 +928,31 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
> nr_cpu_ids);
> }
>
> +/**
> + * cpumap_print_to_buf - copies the cpumask into the buffer either
> + * as comma-separated list of cpus or hex values of cpumask
> + * @list: indicates whether the cpumap must be list
> + * @mask: the cpumask to copy
> + * @buf: the buffer to copy into
> + * @off: the offset that buffer to copy into
> + * @count: the count thatbuffer to copy into
> + *
> + * the role of cpumap_print_to_buf and cpumap_print_to_pagebuf is
> + * the same, the difference is that the second parameter of
> + * bitmap_print_to_buf can be more than one pagesize.
> + *
> + * Returns the length of the (null-terminated) @buf string, zero if
> + * nothing is copied.
> + */
> +
> +static inline ssize_t
> +cpumap_print_to_buf(bool list, char *buf, const struct cpumask *mask,
> + loff_t off, size_t count)
> +{
> + return bitmap_print_to_buf(list, buf, cpumask_bits(mask),
> + nr_cpu_ids, off, count);
> +}
> +
> #if NR_CPUS <= BITS_PER_LONG
> #define CPU_MASK_ALL \
> (cpumask_t) { { \
> diff --git a/lib/bitmap.c b/lib/bitmap.c
> index 75006c4..5bf89f1 100644
> --- a/lib/bitmap.c
> +++ b/lib/bitmap.c
> @@ -460,6 +460,40 @@ int bitmap_parse_user(const char __user *ubuf,
> EXPORT_SYMBOL(bitmap_parse_user);
>
> /**
> + * bitmap_print_to_buf - convert bitmap to list or hex format ASCII string
> + * @list: indicates whether the bitmap must be list
> + * @buf: page aligned buffer into which string is placed
> + * @maskp: pointer to bitmap to convert
> + * @nmaskbits: size of bitmap, in bits
> + * @off: offset in buf
> + * @count: count that already output
> + *
> + * the role of bitmap_print_to_buf and bitmap_print_to_pagebuf is
> + * the same, the difference is that the second parameter of
> + * bitmap_print_to_buf can be more than one pagesize.
> + */
> +int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
> + int nmaskbits, loff_t off, size_t count)
> +{
> + int len, size;
> + void *data;
> + char *fmt = list ? "%*pbl\n" : "%*pb\n";
> +
> + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
> +
> + data = kvmalloc(len+1, GFP_KERNEL);
> + if (!data)
> + return -ENOMEM;
> +
> + size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
> + size = memory_read_from_buffer(buf, count, &off, data, size);
> + kvfree(data);
> +
> + return size;

Why is this so different from bitmap_print_to_pagebuf()? Can't you just
use this function as the "real" function and then change
bitmap_print_to_pagebuf() to call it with a size of PAGE_SIZE?

Can you add the new function as the first patch in the series and then
use it in the later ones? That makes it easier to review on its own.

thanks,

greg k-h

2021-06-01 05:02:44

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH 2/2] drivers/base/node.c: use bin_attribute to avoid buff overflow

On Tue, Jun 01, 2021 at 10:56:50AM +0800, Tian Tao wrote:
> Reading sys/devices/system/cpu/cpuX/nodeX/ returns cpumap and cpulist.
> However, the size of this file is limited to PAGE_SIZE because of the
> limitation for sysfs attribute. so we use bin_attribute instead of
> attribute to avoid NR_CPUS too big to cause buff overflow.
>
> Signed-off-by: Tian Tao <[email protected]>
> Cc: Greg Kroah-Hartman <[email protected]>
> Cc: "Rafael J. Wysocki" <[email protected]>
> ---
> drivers/base/node.c | 49 +++++++++++++++++++++++++++++++------------------
> 1 file changed, 31 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index f449dbb..a19be64 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -27,42 +27,42 @@ static struct bus_type node_subsys = {
> };
>
>
> -static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf)
> +static ssize_t node_read_cpumap(struct kobject *kobj, bool list,

Why not stick with the dev pointer here? These are devices, please use
them.

thanks,

greg k-h

Subject: RE: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow



> -----Original Message-----
> From: Greg KH [mailto:[email protected]]
> Sent: Tuesday, June 1, 2021 4:59 PM
> To: tiantao (H) <[email protected]>
> Cc: [email protected]; [email protected]; Song Bao Hua
> (Barry Song) <[email protected]>; Andy Shevchenko
> <[email protected]>; Rafael J. Wysocki <[email protected]>
> Subject: Re: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow
>
> On Tue, Jun 01, 2021 at 10:56:49AM +0800, Tian Tao wrote:
> > Reading sys/devices/system/cpu/cpuX/topology/ returns cpu topology.
> > However, the size of this file is limited to PAGE_SIZE because of the
> > limitation for sysfs attribute. so we use bin_attribute instead of
> > attribute to avoid NR_CPUS too big to cause buff overflow.
> >
> > This patch is based on the following discussion.
> > https://www.spinics.net/lists/linux-doc/msg95921.html
>
> Please use lore.kernel.org for links as we have no control over other
> sites to ensure that they will work in the future. Use the message id
> in the link as well, so that if something were to happen to lore, we can
> figure it out.
>
> Also, you are modifying a bunch of different files here, do you mean to
> do it for all of them?

The plan is providing a common wrapper similar with cpumap_print_to_pagebuf
so that all modules which have bitmap and list topology ABI can move to use
it.

Right now, drivers/base/topology.c and drivers/base/node.c are the first
two examples involved. We are still having around 40 other places in
the todo list.

Thanks
Barry

2021-06-01 07:14:44

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow

On Tue, Jun 01, 2021 at 07:04:33AM +0000, Song Bao Hua (Barry Song) wrote:
>
>
> > -----Original Message-----
> > From: Greg KH [mailto:[email protected]]
> > Sent: Tuesday, June 1, 2021 4:59 PM
> > To: tiantao (H) <[email protected]>
> > Cc: [email protected]; [email protected]; Song Bao Hua
> > (Barry Song) <[email protected]>; Andy Shevchenko
> > <[email protected]>; Rafael J. Wysocki <[email protected]>
> > Subject: Re: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow
> >
> > On Tue, Jun 01, 2021 at 10:56:49AM +0800, Tian Tao wrote:
> > > Reading sys/devices/system/cpu/cpuX/topology/ returns cpu topology.
> > > However, the size of this file is limited to PAGE_SIZE because of the
> > > limitation for sysfs attribute. so we use bin_attribute instead of
> > > attribute to avoid NR_CPUS too big to cause buff overflow.
> > >
> > > This patch is based on the following discussion.
> > > https://www.spinics.net/lists/linux-doc/msg95921.html
> >
> > Please use lore.kernel.org for links as we have no control over other
> > sites to ensure that they will work in the future. Use the message id
> > in the link as well, so that if something were to happen to lore, we can
> > figure it out.
> >
> > Also, you are modifying a bunch of different files here, do you mean to
> > do it for all of them?
>
> The plan is providing a common wrapper similar with cpumap_print_to_pagebuf
> so that all modules which have bitmap and list topology ABI can move to use
> it.

Ok, then create the new function and then convert to use it, do not
bundle it together in the same patch.

thanks,

greg k-h

2021-06-02 06:44:27

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow

On Wed, Jun 02, 2021 at 02:14:49PM +0800, tiantao (H) wrote:
>
> 在 2021/6/1 12:58, Greg KH 写道:
> > On Tue, Jun 01, 2021 at 10:56:49AM +0800, Tian Tao wrote:
> > > Reading sys/devices/system/cpu/cpuX/topology/ returns cpu topology.
> > > However, the size of this file is limited to PAGE_SIZE because of the
> > > limitation for sysfs attribute. so we use bin_attribute instead of
> > > attribute to avoid NR_CPUS too big to cause buff overflow.
> > >
> > > This patch is based on the following discussion.
> > > https://www.spinics.net/lists/linux-doc/msg95921.html
> > Please use lore.kernel.org for links as we have no control over other
> > sites to ensure that they will work in the future. Use the message id
> > in the link as well, so that if something were to happen to lore, we can
> > figure it out.
> >
> > Also, you are modifying a bunch of different files here, do you mean to
> > do it for all of them?
> >
> > > Signed-off-by: Tian Tao <[email protected]>
> > > Cc: Andrew Morton <[email protected]>
> > > Cc: Andy Shevchenko <[email protected]>
> > > Cc: Greg Kroah-Hartman <[email protected]>
> > > Cc: "Rafael J. Wysocki" <[email protected]>
> > > ---
> > > drivers/base/topology.c | 115 ++++++++++++++++++++++++++----------------------
> > > include/linux/bitmap.h | 3 ++
> > > include/linux/cpumask.h | 25 +++++++++++
> > > lib/bitmap.c | 34 ++++++++++++++
> > > 4 files changed, 125 insertions(+), 52 deletions(-)
> > >
> > > diff --git a/drivers/base/topology.c b/drivers/base/topology.c
> > > index 4d254fc..013edbb 100644
> > > --- a/drivers/base/topology.c
> > > +++ b/drivers/base/topology.c
> > > @@ -21,25 +21,27 @@ static ssize_t name##_show(struct device *dev, \
> > > return sysfs_emit(buf, "%d\n", topology_##name(dev->id)); \
> > > }
> > > -#define define_siblings_show_map(name, mask) \
> > > -static ssize_t name##_show(struct device *dev, \
> > > - struct device_attribute *attr, char *buf) \
> > > -{ \
> > > - return cpumap_print_to_pagebuf(false, buf, topology_##mask(dev->id));\
> > > +#define define_siblings_read_func(name, mask) \
> > > +static ssize_t name##_read(struct file *file, struct kobject *kobj, \
> > > + struct bin_attribute *attr, char *buf, \
> > > + loff_t off, size_t count) \
> > > +{ \
> > > + struct device *dev = kobj_to_dev(kobj); \
> > > + \
> > > + return cpumap_print_to_buf(false, buf, topology_##mask(dev->id), \
> > > + off, count); \
> > > +} \
> > > + \
> > > +static ssize_t name##_list_read(struct file *file, struct kobject *kobj, \
> > > + struct bin_attribute *attr, char *buf, \
> > > + loff_t off, size_t count) \
> > > +{ \
> > > + struct device *dev = kobj_to_dev(kobj); \
> > > + \
> > > + return cpumap_print_to_buf(true, buf, topology_##mask(dev->id), \
> > > + off, count); \
> > > }
> > > -#define define_siblings_show_list(name, mask) \
> > > -static ssize_t name##_list_show(struct device *dev, \
> > > - struct device_attribute *attr, \
> > > - char *buf) \
> > > -{ \
> > > - return cpumap_print_to_pagebuf(true, buf, topology_##mask(dev->id));\
> > > -}
> > > -
> > > -#define define_siblings_show_func(name, mask) \
> > > - define_siblings_show_map(name, mask); \
> > > - define_siblings_show_list(name, mask)
> > > -
> > > define_id_show_func(physical_package_id);
> > > static DEVICE_ATTR_RO(physical_package_id);
> > > @@ -49,71 +51,80 @@ static DEVICE_ATTR_RO(die_id);
> > > define_id_show_func(core_id);
> > > static DEVICE_ATTR_RO(core_id);
> > > -define_siblings_show_func(thread_siblings, sibling_cpumask);
> > > -static DEVICE_ATTR_RO(thread_siblings);
> > > -static DEVICE_ATTR_RO(thread_siblings_list);
> > > +define_siblings_read_func(thread_siblings, sibling_cpumask);
> > > +static BIN_ATTR_RO(thread_siblings, 0);
> > > +static BIN_ATTR_RO(thread_siblings_list, 0);
> > > -define_siblings_show_func(core_cpus, sibling_cpumask);
> > > -static DEVICE_ATTR_RO(core_cpus);
> > > -static DEVICE_ATTR_RO(core_cpus_list);
> > > +define_siblings_read_func(core_cpus, sibling_cpumask);
> > > +static BIN_ATTR_RO(core_cpus, 0);
> > > +static BIN_ATTR_RO(core_cpus_list, 0);
> > > -define_siblings_show_func(core_siblings, core_cpumask);
> > > -static DEVICE_ATTR_RO(core_siblings);
> > > -static DEVICE_ATTR_RO(core_siblings_list);
> > > +define_siblings_read_func(core_siblings, core_cpumask);
> > > +static BIN_ATTR_RO(core_siblings, 0);
> > > +static BIN_ATTR_RO(core_siblings_list, 0);
> > > -define_siblings_show_func(die_cpus, die_cpumask);
> > > -static DEVICE_ATTR_RO(die_cpus);
> > > -static DEVICE_ATTR_RO(die_cpus_list);
> > > +define_siblings_read_func(die_cpus, die_cpumask);
> > > +static BIN_ATTR_RO(die_cpus, 0);
> > > +static BIN_ATTR_RO(die_cpus_list, 0);
> > > -define_siblings_show_func(package_cpus, core_cpumask);
> > > -static DEVICE_ATTR_RO(package_cpus);
> > > -static DEVICE_ATTR_RO(package_cpus_list);
> > > +define_siblings_read_func(package_cpus, core_cpumask);
> > > +static BIN_ATTR_RO(package_cpus, 0);
> > > +static BIN_ATTR_RO(package_cpus_list, 0);
> > > #ifdef CONFIG_SCHED_BOOK
> > > define_id_show_func(book_id);
> > > static DEVICE_ATTR_RO(book_id);
> > > -define_siblings_show_func(book_siblings, book_cpumask);
> > > -static DEVICE_ATTR_RO(book_siblings);
> > > -static DEVICE_ATTR_RO(book_siblings_list);
> > > +define_siblings_read_func(book_siblings, book_cpumask);
> > > +static BIN_ATTR_RO(book_siblings, 0);
> > > +static BIN_ATTR_RO(book_siblings_list, 0);
> > > #endif
> > > #ifdef CONFIG_SCHED_DRAWER
> > > define_id_show_func(drawer_id);
> > > static DEVICE_ATTR_RO(drawer_id);
> > > -define_siblings_show_func(drawer_siblings, drawer_cpumask);
> > > -static DEVICE_ATTR_RO(drawer_siblings);
> > > -static DEVICE_ATTR_RO(drawer_siblings_list);
> > > +define_siblings_read_func(drawer_siblings, drawer_cpumask);
> > > +static BIN_ATTR_RO(drawer_siblings, 0);
> > > +static BIN_ATTR_RO(drawer_siblings_list, 0);
> > > #endif
> > > +static struct bin_attribute *bin_attrs[] = {
> > > + &bin_attr_core_cpus,
> > > + &bin_attr_core_cpus_list,
> > > + &bin_attr_thread_siblings,
> > > + &bin_attr_thread_siblings_list,
> > > + &bin_attr_core_siblings,
> > > + &bin_attr_core_siblings_list,
> > > + &bin_attr_die_cpus,
> > > + &bin_attr_die_cpus_list,
> > > + &bin_attr_package_cpus,
> > > + &bin_attr_package_cpus_list,
> > > +#ifdef CONFIG_SCHED_BOOK
> > > + &bin_attr_book_siblings,
> > > + &bin_attr_book_siblings_list,
> > > +#endif
> > > +#ifdef CONFIG_SCHED_DRAWER
> > > + &bin_attr_drawer_siblings,
> > > + &bin_attr_drawer_siblings_list,
> > > +#endif
> > > + NULL,
> > > +};
> > > +
> > > static struct attribute *default_attrs[] = {
> > > &dev_attr_physical_package_id.attr,
> > > &dev_attr_die_id.attr,
> > > &dev_attr_core_id.attr,
> > > - &dev_attr_thread_siblings.attr,
> > > - &dev_attr_thread_siblings_list.attr,
> > > - &dev_attr_core_cpus.attr,
> > > - &dev_attr_core_cpus_list.attr,
> > > - &dev_attr_core_siblings.attr,
> > > - &dev_attr_core_siblings_list.attr,
> > > - &dev_attr_die_cpus.attr,
> > > - &dev_attr_die_cpus_list.attr,
> > > - &dev_attr_package_cpus.attr,
> > > - &dev_attr_package_cpus_list.attr,
> > > #ifdef CONFIG_SCHED_BOOK
> > > &dev_attr_book_id.attr,
> > > - &dev_attr_book_siblings.attr,
> > > - &dev_attr_book_siblings_list.attr,
> > > #endif
> > > #ifdef CONFIG_SCHED_DRAWER
> > > &dev_attr_drawer_id.attr,
> > > - &dev_attr_drawer_siblings.attr,
> > > - &dev_attr_drawer_siblings_list.attr,
> > > #endif
> > > NULL
> > > };
> > > static const struct attribute_group topology_attr_group = {
> > > .attrs = default_attrs,
> > > + .bin_attrs = bin_attrs,
> > > .name = "topology"
> > > };
> > > diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
> > > index 70a9324..bc401bd9b 100644
> > > --- a/include/linux/bitmap.h
> > > +++ b/include/linux/bitmap.h
> > > @@ -219,6 +219,9 @@ extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int
> > > extern int bitmap_print_to_pagebuf(bool list, char *buf,
> > > const unsigned long *maskp, int nmaskbits);
> > > +extern int bitmap_print_to_buf(bool list, char *buf,
> > > + const unsigned long *maskp, int nmaskbits, loff_t off, size_t count);
> > > +
> > > #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
> > > #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
> > > diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> > > index 383684e..e4810b3e 100644
> > > --- a/include/linux/cpumask.h
> > > +++ b/include/linux/cpumask.h
> > > @@ -928,6 +928,31 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
> > > nr_cpu_ids);
> > > }
> > > +/**
> > > + * cpumap_print_to_buf - copies the cpumask into the buffer either
> > > + * as comma-separated list of cpus or hex values of cpumask
> > > + * @list: indicates whether the cpumap must be list
> > > + * @mask: the cpumask to copy
> > > + * @buf: the buffer to copy into
> > > + * @off: the offset that buffer to copy into
> > > + * @count: the count thatbuffer to copy into
> > > + *
> > > + * the role of cpumap_print_to_buf and cpumap_print_to_pagebuf is
> > > + * the same, the difference is that the second parameter of
> > > + * bitmap_print_to_buf can be more than one pagesize.
> > > + *
> > > + * Returns the length of the (null-terminated) @buf string, zero if
> > > + * nothing is copied.
> > > + */
> > > +
> > > +static inline ssize_t
> > > +cpumap_print_to_buf(bool list, char *buf, const struct cpumask *mask,
> > > + loff_t off, size_t count)
> > > +{
> > > + return bitmap_print_to_buf(list, buf, cpumask_bits(mask),
> > > + nr_cpu_ids, off, count);
> > > +}
> > > +
> > > #if NR_CPUS <= BITS_PER_LONG
> > > #define CPU_MASK_ALL \
> > > (cpumask_t) { { \
> > > diff --git a/lib/bitmap.c b/lib/bitmap.c
> > > index 75006c4..5bf89f1 100644
> > > --- a/lib/bitmap.c
> > > +++ b/lib/bitmap.c
> > > @@ -460,6 +460,40 @@ int bitmap_parse_user(const char __user *ubuf,
> > > EXPORT_SYMBOL(bitmap_parse_user);
> > > /**
> > > + * bitmap_print_to_buf - convert bitmap to list or hex format ASCII string
> > > + * @list: indicates whether the bitmap must be list
> > > + * @buf: page aligned buffer into which string is placed
> > > + * @maskp: pointer to bitmap to convert
> > > + * @nmaskbits: size of bitmap, in bits
> > > + * @off: offset in buf
> > > + * @count: count that already output
> > > + *
> > > + * the role of bitmap_print_to_buf and bitmap_print_to_pagebuf is
> > > + * the same, the difference is that the second parameter of
> > > + * bitmap_print_to_buf can be more than one pagesize.
> > > + */
> > > +int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
> > > + int nmaskbits, loff_t off, size_t count)
> > > +{
> > > + int len, size;
> > > + void *data;
> > > + char *fmt = list ? "%*pbl\n" : "%*pb\n";
> > > +
> > > + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
> > > +
> > > + data = kvmalloc(len+1, GFP_KERNEL);
> > > + if (!data)
> > > + return -ENOMEM;
> > > +
> > > + size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
> > > + size = memory_read_from_buffer(buf, count, &off, data, size);
> > > + kvfree(data);
> > > +
> > > + return size;
> > Why is this so different from bitmap_print_to_pagebuf()? Can't you just
> > use this function as the "real" function and then change
> > bitmap_print_to_pagebuf() to call it with a size of PAGE_SIZE?
>
> Do you mean do following change, is that correct? :-)

Maybe, it is whitespace corrupted, and it still feels like this function
is much bigger than it needs to be given the function it is replacing is
only a simple sprintf() call.

> +int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
> +                       int nmaskbits, loff_t off, size_t count)
> +{
> +       int len, size;
> +       void *data;
> +       const char *fmt = list ? "%*pbl\n" : "%*pb\n";
> +
> +       if (off == LLONG_MAX && count == PAGE_SIZE - offset_in_page(buf))
> +               return scnprintf(buf, count, fmt, nmaskbits, maskp);
> +
> +       len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
> +
> +       data = kvmalloc(len+1, GFP_KERNEL);

Why do you need to allocate more memory? And why kvmalloc()?

> +       if (!data)
> +               return -ENOMEM;
> +
> +       size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
> +
> +       size = memory_read_from_buffer(buf, count, &off, data, size);
> +       kvfree(data);
> +
> +       return size;
> +}
> +EXPORT_SYMBOL(bitmap_print_to_buf);

thanks,

greg k-h

2021-06-02 06:44:50

by tiantao (H)

[permalink] [raw]
Subject: Re: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow


在 2021/6/1 12:58, Greg KH 写道:
> On Tue, Jun 01, 2021 at 10:56:49AM +0800, Tian Tao wrote:
>> Reading sys/devices/system/cpu/cpuX/topology/ returns cpu topology.
>> However, the size of this file is limited to PAGE_SIZE because of the
>> limitation for sysfs attribute. so we use bin_attribute instead of
>> attribute to avoid NR_CPUS too big to cause buff overflow.
>>
>> This patch is based on the following discussion.
>> https://www.spinics.net/lists/linux-doc/msg95921.html
> Please use lore.kernel.org for links as we have no control over other
> sites to ensure that they will work in the future. Use the message id
> in the link as well, so that if something were to happen to lore, we can
> figure it out.
>
> Also, you are modifying a bunch of different files here, do you mean to
> do it for all of them?
>
>> Signed-off-by: Tian Tao <[email protected]>
>> Cc: Andrew Morton <[email protected]>
>> Cc: Andy Shevchenko <[email protected]>
>> Cc: Greg Kroah-Hartman <[email protected]>
>> Cc: "Rafael J. Wysocki" <[email protected]>
>> ---
>> drivers/base/topology.c | 115 ++++++++++++++++++++++++++----------------------
>> include/linux/bitmap.h | 3 ++
>> include/linux/cpumask.h | 25 +++++++++++
>> lib/bitmap.c | 34 ++++++++++++++
>> 4 files changed, 125 insertions(+), 52 deletions(-)
>>
>> diff --git a/drivers/base/topology.c b/drivers/base/topology.c
>> index 4d254fc..013edbb 100644
>> --- a/drivers/base/topology.c
>> +++ b/drivers/base/topology.c
>> @@ -21,25 +21,27 @@ static ssize_t name##_show(struct device *dev, \
>> return sysfs_emit(buf, "%d\n", topology_##name(dev->id)); \
>> }
>>
>> -#define define_siblings_show_map(name, mask) \
>> -static ssize_t name##_show(struct device *dev, \
>> - struct device_attribute *attr, char *buf) \
>> -{ \
>> - return cpumap_print_to_pagebuf(false, buf, topology_##mask(dev->id));\
>> +#define define_siblings_read_func(name, mask) \
>> +static ssize_t name##_read(struct file *file, struct kobject *kobj, \
>> + struct bin_attribute *attr, char *buf, \
>> + loff_t off, size_t count) \
>> +{ \
>> + struct device *dev = kobj_to_dev(kobj); \
>> + \
>> + return cpumap_print_to_buf(false, buf, topology_##mask(dev->id), \
>> + off, count); \
>> +} \
>> + \
>> +static ssize_t name##_list_read(struct file *file, struct kobject *kobj, \
>> + struct bin_attribute *attr, char *buf, \
>> + loff_t off, size_t count) \
>> +{ \
>> + struct device *dev = kobj_to_dev(kobj); \
>> + \
>> + return cpumap_print_to_buf(true, buf, topology_##mask(dev->id), \
>> + off, count); \
>> }
>>
>> -#define define_siblings_show_list(name, mask) \
>> -static ssize_t name##_list_show(struct device *dev, \
>> - struct device_attribute *attr, \
>> - char *buf) \
>> -{ \
>> - return cpumap_print_to_pagebuf(true, buf, topology_##mask(dev->id));\
>> -}
>> -
>> -#define define_siblings_show_func(name, mask) \
>> - define_siblings_show_map(name, mask); \
>> - define_siblings_show_list(name, mask)
>> -
>> define_id_show_func(physical_package_id);
>> static DEVICE_ATTR_RO(physical_package_id);
>>
>> @@ -49,71 +51,80 @@ static DEVICE_ATTR_RO(die_id);
>> define_id_show_func(core_id);
>> static DEVICE_ATTR_RO(core_id);
>>
>> -define_siblings_show_func(thread_siblings, sibling_cpumask);
>> -static DEVICE_ATTR_RO(thread_siblings);
>> -static DEVICE_ATTR_RO(thread_siblings_list);
>> +define_siblings_read_func(thread_siblings, sibling_cpumask);
>> +static BIN_ATTR_RO(thread_siblings, 0);
>> +static BIN_ATTR_RO(thread_siblings_list, 0);
>>
>> -define_siblings_show_func(core_cpus, sibling_cpumask);
>> -static DEVICE_ATTR_RO(core_cpus);
>> -static DEVICE_ATTR_RO(core_cpus_list);
>> +define_siblings_read_func(core_cpus, sibling_cpumask);
>> +static BIN_ATTR_RO(core_cpus, 0);
>> +static BIN_ATTR_RO(core_cpus_list, 0);
>>
>> -define_siblings_show_func(core_siblings, core_cpumask);
>> -static DEVICE_ATTR_RO(core_siblings);
>> -static DEVICE_ATTR_RO(core_siblings_list);
>> +define_siblings_read_func(core_siblings, core_cpumask);
>> +static BIN_ATTR_RO(core_siblings, 0);
>> +static BIN_ATTR_RO(core_siblings_list, 0);
>>
>> -define_siblings_show_func(die_cpus, die_cpumask);
>> -static DEVICE_ATTR_RO(die_cpus);
>> -static DEVICE_ATTR_RO(die_cpus_list);
>> +define_siblings_read_func(die_cpus, die_cpumask);
>> +static BIN_ATTR_RO(die_cpus, 0);
>> +static BIN_ATTR_RO(die_cpus_list, 0);
>>
>> -define_siblings_show_func(package_cpus, core_cpumask);
>> -static DEVICE_ATTR_RO(package_cpus);
>> -static DEVICE_ATTR_RO(package_cpus_list);
>> +define_siblings_read_func(package_cpus, core_cpumask);
>> +static BIN_ATTR_RO(package_cpus, 0);
>> +static BIN_ATTR_RO(package_cpus_list, 0);
>>
>> #ifdef CONFIG_SCHED_BOOK
>> define_id_show_func(book_id);
>> static DEVICE_ATTR_RO(book_id);
>> -define_siblings_show_func(book_siblings, book_cpumask);
>> -static DEVICE_ATTR_RO(book_siblings);
>> -static DEVICE_ATTR_RO(book_siblings_list);
>> +define_siblings_read_func(book_siblings, book_cpumask);
>> +static BIN_ATTR_RO(book_siblings, 0);
>> +static BIN_ATTR_RO(book_siblings_list, 0);
>> #endif
>>
>> #ifdef CONFIG_SCHED_DRAWER
>> define_id_show_func(drawer_id);
>> static DEVICE_ATTR_RO(drawer_id);
>> -define_siblings_show_func(drawer_siblings, drawer_cpumask);
>> -static DEVICE_ATTR_RO(drawer_siblings);
>> -static DEVICE_ATTR_RO(drawer_siblings_list);
>> +define_siblings_read_func(drawer_siblings, drawer_cpumask);
>> +static BIN_ATTR_RO(drawer_siblings, 0);
>> +static BIN_ATTR_RO(drawer_siblings_list, 0);
>> #endif
>>
>> +static struct bin_attribute *bin_attrs[] = {
>> + &bin_attr_core_cpus,
>> + &bin_attr_core_cpus_list,
>> + &bin_attr_thread_siblings,
>> + &bin_attr_thread_siblings_list,
>> + &bin_attr_core_siblings,
>> + &bin_attr_core_siblings_list,
>> + &bin_attr_die_cpus,
>> + &bin_attr_die_cpus_list,
>> + &bin_attr_package_cpus,
>> + &bin_attr_package_cpus_list,
>> +#ifdef CONFIG_SCHED_BOOK
>> + &bin_attr_book_siblings,
>> + &bin_attr_book_siblings_list,
>> +#endif
>> +#ifdef CONFIG_SCHED_DRAWER
>> + &bin_attr_drawer_siblings,
>> + &bin_attr_drawer_siblings_list,
>> +#endif
>> + NULL,
>> +};
>> +
>> static struct attribute *default_attrs[] = {
>> &dev_attr_physical_package_id.attr,
>> &dev_attr_die_id.attr,
>> &dev_attr_core_id.attr,
>> - &dev_attr_thread_siblings.attr,
>> - &dev_attr_thread_siblings_list.attr,
>> - &dev_attr_core_cpus.attr,
>> - &dev_attr_core_cpus_list.attr,
>> - &dev_attr_core_siblings.attr,
>> - &dev_attr_core_siblings_list.attr,
>> - &dev_attr_die_cpus.attr,
>> - &dev_attr_die_cpus_list.attr,
>> - &dev_attr_package_cpus.attr,
>> - &dev_attr_package_cpus_list.attr,
>> #ifdef CONFIG_SCHED_BOOK
>> &dev_attr_book_id.attr,
>> - &dev_attr_book_siblings.attr,
>> - &dev_attr_book_siblings_list.attr,
>> #endif
>> #ifdef CONFIG_SCHED_DRAWER
>> &dev_attr_drawer_id.attr,
>> - &dev_attr_drawer_siblings.attr,
>> - &dev_attr_drawer_siblings_list.attr,
>> #endif
>> NULL
>> };
>>
>> static const struct attribute_group topology_attr_group = {
>> .attrs = default_attrs,
>> + .bin_attrs = bin_attrs,
>> .name = "topology"
>> };
>>
>> diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
>> index 70a9324..bc401bd9b 100644
>> --- a/include/linux/bitmap.h
>> +++ b/include/linux/bitmap.h
>> @@ -219,6 +219,9 @@ extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int
>> extern int bitmap_print_to_pagebuf(bool list, char *buf,
>> const unsigned long *maskp, int nmaskbits);
>>
>> +extern int bitmap_print_to_buf(bool list, char *buf,
>> + const unsigned long *maskp, int nmaskbits, loff_t off, size_t count);
>> +
>> #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
>> #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
>>
>> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
>> index 383684e..e4810b3e 100644
>> --- a/include/linux/cpumask.h
>> +++ b/include/linux/cpumask.h
>> @@ -928,6 +928,31 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
>> nr_cpu_ids);
>> }
>>
>> +/**
>> + * cpumap_print_to_buf - copies the cpumask into the buffer either
>> + * as comma-separated list of cpus or hex values of cpumask
>> + * @list: indicates whether the cpumap must be list
>> + * @mask: the cpumask to copy
>> + * @buf: the buffer to copy into
>> + * @off: the offset that buffer to copy into
>> + * @count: the count thatbuffer to copy into
>> + *
>> + * the role of cpumap_print_to_buf and cpumap_print_to_pagebuf is
>> + * the same, the difference is that the second parameter of
>> + * bitmap_print_to_buf can be more than one pagesize.
>> + *
>> + * Returns the length of the (null-terminated) @buf string, zero if
>> + * nothing is copied.
>> + */
>> +
>> +static inline ssize_t
>> +cpumap_print_to_buf(bool list, char *buf, const struct cpumask *mask,
>> + loff_t off, size_t count)
>> +{
>> + return bitmap_print_to_buf(list, buf, cpumask_bits(mask),
>> + nr_cpu_ids, off, count);
>> +}
>> +
>> #if NR_CPUS <= BITS_PER_LONG
>> #define CPU_MASK_ALL \
>> (cpumask_t) { { \
>> diff --git a/lib/bitmap.c b/lib/bitmap.c
>> index 75006c4..5bf89f1 100644
>> --- a/lib/bitmap.c
>> +++ b/lib/bitmap.c
>> @@ -460,6 +460,40 @@ int bitmap_parse_user(const char __user *ubuf,
>> EXPORT_SYMBOL(bitmap_parse_user);
>>
>> /**
>> + * bitmap_print_to_buf - convert bitmap to list or hex format ASCII string
>> + * @list: indicates whether the bitmap must be list
>> + * @buf: page aligned buffer into which string is placed
>> + * @maskp: pointer to bitmap to convert
>> + * @nmaskbits: size of bitmap, in bits
>> + * @off: offset in buf
>> + * @count: count that already output
>> + *
>> + * the role of bitmap_print_to_buf and bitmap_print_to_pagebuf is
>> + * the same, the difference is that the second parameter of
>> + * bitmap_print_to_buf can be more than one pagesize.
>> + */
>> +int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
>> + int nmaskbits, loff_t off, size_t count)
>> +{
>> + int len, size;
>> + void *data;
>> + char *fmt = list ? "%*pbl\n" : "%*pb\n";
>> +
>> + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
>> +
>> + data = kvmalloc(len+1, GFP_KERNEL);
>> + if (!data)
>> + return -ENOMEM;
>> +
>> + size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
>> + size = memory_read_from_buffer(buf, count, &off, data, size);
>> + kvfree(data);
>> +
>> + return size;
> Why is this so different from bitmap_print_to_pagebuf()? Can't you just
> use this function as the "real" function and then change
> bitmap_print_to_pagebuf() to call it with a size of PAGE_SIZE?

Do you mean do following change, is that correct? :-)

+int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
+                       int nmaskbits, loff_t off, size_t count)
+{
+       int len, size;
+       void *data;
+       const char *fmt = list ? "%*pbl\n" : "%*pb\n";
+
+       if (off == LLONG_MAX && count == PAGE_SIZE - offset_in_page(buf))
+               return scnprintf(buf, count, fmt, nmaskbits, maskp);
+
+       len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
+
+       data = kvmalloc(len+1, GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
+
+       size = memory_read_from_buffer(buf, count, &off, data, size);
+       kvfree(data);
+
+       return size;
+}
+EXPORT_SYMBOL(bitmap_print_to_buf);
+
+/**
  * bitmap_print_to_pagebuf - convert bitmap to list or hex format
ASCII string
  * @list: indicates whether the bitmap must be list
  * @buf: page aligned buffer into which string is placed
@@ -480,8 +518,8 @@ int bitmap_print_to_pagebuf(bool list, char *buf,
const unsigned long *maskp,
 {
        ptrdiff_t len = PAGE_SIZE - offset_in_page(buf);

-       return list ? scnprintf(buf, len, "%*pbl\n", nmaskbits, maskp) :
-                     scnprintf(buf, len, "%*pb\n", nmaskbits, maskp);
+       return bitmap_print_to_buf(list, buf, maskp, nmaskbits,
+                                  LLONG_MAX, len);
 }

>
> Can you add the new function as the first patch in the series and then
> use it in the later ones? That makes it easier to review on its own.
>
> thanks,
>
> greg k-h
> .
>

2021-06-02 06:45:41

by tiantao (H)

[permalink] [raw]
Subject: Re: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow


在 2021/6/2 14:18, Greg KH 写道:
> On Wed, Jun 02, 2021 at 02:14:49PM +0800, tiantao (H) wrote:
>> 在 2021/6/1 12:58, Greg KH 写道:
>>> On Tue, Jun 01, 2021 at 10:56:49AM +0800, Tian Tao wrote:
>>>> Reading sys/devices/system/cpu/cpuX/topology/ returns cpu topology.
>>>> However, the size of this file is limited to PAGE_SIZE because of the
>>>> limitation for sysfs attribute. so we use bin_attribute instead of
>>>> attribute to avoid NR_CPUS too big to cause buff overflow.
>>>>
>>>> This patch is based on the following discussion.
>>>> https://www.spinics.net/lists/linux-doc/msg95921.html
>>> Please use lore.kernel.org for links as we have no control over other
>>> sites to ensure that they will work in the future. Use the message id
>>> in the link as well, so that if something were to happen to lore, we can
>>> figure it out.
>>>
>>> Also, you are modifying a bunch of different files here, do you mean to
>>> do it for all of them?
>>>
>>>> Signed-off-by: Tian Tao <[email protected]>
>>>> Cc: Andrew Morton <[email protected]>
>>>> Cc: Andy Shevchenko <[email protected]>
>>>> Cc: Greg Kroah-Hartman <[email protected]>
>>>> Cc: "Rafael J. Wysocki" <[email protected]>
>>>> ---
>>>> drivers/base/topology.c | 115 ++++++++++++++++++++++++++----------------------
>>>> include/linux/bitmap.h | 3 ++
>>>> include/linux/cpumask.h | 25 +++++++++++
>>>> lib/bitmap.c | 34 ++++++++++++++
>>>> 4 files changed, 125 insertions(+), 52 deletions(-)
>>>>
>>>> diff --git a/drivers/base/topology.c b/drivers/base/topology.c
>>>> index 4d254fc..013edbb 100644
>>>> --- a/drivers/base/topology.c
>>>> +++ b/drivers/base/topology.c
>>>> @@ -21,25 +21,27 @@ static ssize_t name##_show(struct device *dev, \
>>>> return sysfs_emit(buf, "%d\n", topology_##name(dev->id)); \
>>>> }
>>>> -#define define_siblings_show_map(name, mask) \
>>>> -static ssize_t name##_show(struct device *dev, \
>>>> - struct device_attribute *attr, char *buf) \
>>>> -{ \
>>>> - return cpumap_print_to_pagebuf(false, buf, topology_##mask(dev->id));\
>>>> +#define define_siblings_read_func(name, mask) \
>>>> +static ssize_t name##_read(struct file *file, struct kobject *kobj, \
>>>> + struct bin_attribute *attr, char *buf, \
>>>> + loff_t off, size_t count) \
>>>> +{ \
>>>> + struct device *dev = kobj_to_dev(kobj); \
>>>> + \
>>>> + return cpumap_print_to_buf(false, buf, topology_##mask(dev->id), \
>>>> + off, count); \
>>>> +} \
>>>> + \
>>>> +static ssize_t name##_list_read(struct file *file, struct kobject *kobj, \
>>>> + struct bin_attribute *attr, char *buf, \
>>>> + loff_t off, size_t count) \
>>>> +{ \
>>>> + struct device *dev = kobj_to_dev(kobj); \
>>>> + \
>>>> + return cpumap_print_to_buf(true, buf, topology_##mask(dev->id), \
>>>> + off, count); \
>>>> }
>>>> -#define define_siblings_show_list(name, mask) \
>>>> -static ssize_t name##_list_show(struct device *dev, \
>>>> - struct device_attribute *attr, \
>>>> - char *buf) \
>>>> -{ \
>>>> - return cpumap_print_to_pagebuf(true, buf, topology_##mask(dev->id));\
>>>> -}
>>>> -
>>>> -#define define_siblings_show_func(name, mask) \
>>>> - define_siblings_show_map(name, mask); \
>>>> - define_siblings_show_list(name, mask)
>>>> -
>>>> define_id_show_func(physical_package_id);
>>>> static DEVICE_ATTR_RO(physical_package_id);
>>>> @@ -49,71 +51,80 @@ static DEVICE_ATTR_RO(die_id);
>>>> define_id_show_func(core_id);
>>>> static DEVICE_ATTR_RO(core_id);
>>>> -define_siblings_show_func(thread_siblings, sibling_cpumask);
>>>> -static DEVICE_ATTR_RO(thread_siblings);
>>>> -static DEVICE_ATTR_RO(thread_siblings_list);
>>>> +define_siblings_read_func(thread_siblings, sibling_cpumask);
>>>> +static BIN_ATTR_RO(thread_siblings, 0);
>>>> +static BIN_ATTR_RO(thread_siblings_list, 0);
>>>> -define_siblings_show_func(core_cpus, sibling_cpumask);
>>>> -static DEVICE_ATTR_RO(core_cpus);
>>>> -static DEVICE_ATTR_RO(core_cpus_list);
>>>> +define_siblings_read_func(core_cpus, sibling_cpumask);
>>>> +static BIN_ATTR_RO(core_cpus, 0);
>>>> +static BIN_ATTR_RO(core_cpus_list, 0);
>>>> -define_siblings_show_func(core_siblings, core_cpumask);
>>>> -static DEVICE_ATTR_RO(core_siblings);
>>>> -static DEVICE_ATTR_RO(core_siblings_list);
>>>> +define_siblings_read_func(core_siblings, core_cpumask);
>>>> +static BIN_ATTR_RO(core_siblings, 0);
>>>> +static BIN_ATTR_RO(core_siblings_list, 0);
>>>> -define_siblings_show_func(die_cpus, die_cpumask);
>>>> -static DEVICE_ATTR_RO(die_cpus);
>>>> -static DEVICE_ATTR_RO(die_cpus_list);
>>>> +define_siblings_read_func(die_cpus, die_cpumask);
>>>> +static BIN_ATTR_RO(die_cpus, 0);
>>>> +static BIN_ATTR_RO(die_cpus_list, 0);
>>>> -define_siblings_show_func(package_cpus, core_cpumask);
>>>> -static DEVICE_ATTR_RO(package_cpus);
>>>> -static DEVICE_ATTR_RO(package_cpus_list);
>>>> +define_siblings_read_func(package_cpus, core_cpumask);
>>>> +static BIN_ATTR_RO(package_cpus, 0);
>>>> +static BIN_ATTR_RO(package_cpus_list, 0);
>>>> #ifdef CONFIG_SCHED_BOOK
>>>> define_id_show_func(book_id);
>>>> static DEVICE_ATTR_RO(book_id);
>>>> -define_siblings_show_func(book_siblings, book_cpumask);
>>>> -static DEVICE_ATTR_RO(book_siblings);
>>>> -static DEVICE_ATTR_RO(book_siblings_list);
>>>> +define_siblings_read_func(book_siblings, book_cpumask);
>>>> +static BIN_ATTR_RO(book_siblings, 0);
>>>> +static BIN_ATTR_RO(book_siblings_list, 0);
>>>> #endif
>>>> #ifdef CONFIG_SCHED_DRAWER
>>>> define_id_show_func(drawer_id);
>>>> static DEVICE_ATTR_RO(drawer_id);
>>>> -define_siblings_show_func(drawer_siblings, drawer_cpumask);
>>>> -static DEVICE_ATTR_RO(drawer_siblings);
>>>> -static DEVICE_ATTR_RO(drawer_siblings_list);
>>>> +define_siblings_read_func(drawer_siblings, drawer_cpumask);
>>>> +static BIN_ATTR_RO(drawer_siblings, 0);
>>>> +static BIN_ATTR_RO(drawer_siblings_list, 0);
>>>> #endif
>>>> +static struct bin_attribute *bin_attrs[] = {
>>>> + &bin_attr_core_cpus,
>>>> + &bin_attr_core_cpus_list,
>>>> + &bin_attr_thread_siblings,
>>>> + &bin_attr_thread_siblings_list,
>>>> + &bin_attr_core_siblings,
>>>> + &bin_attr_core_siblings_list,
>>>> + &bin_attr_die_cpus,
>>>> + &bin_attr_die_cpus_list,
>>>> + &bin_attr_package_cpus,
>>>> + &bin_attr_package_cpus_list,
>>>> +#ifdef CONFIG_SCHED_BOOK
>>>> + &bin_attr_book_siblings,
>>>> + &bin_attr_book_siblings_list,
>>>> +#endif
>>>> +#ifdef CONFIG_SCHED_DRAWER
>>>> + &bin_attr_drawer_siblings,
>>>> + &bin_attr_drawer_siblings_list,
>>>> +#endif
>>>> + NULL,
>>>> +};
>>>> +
>>>> static struct attribute *default_attrs[] = {
>>>> &dev_attr_physical_package_id.attr,
>>>> &dev_attr_die_id.attr,
>>>> &dev_attr_core_id.attr,
>>>> - &dev_attr_thread_siblings.attr,
>>>> - &dev_attr_thread_siblings_list.attr,
>>>> - &dev_attr_core_cpus.attr,
>>>> - &dev_attr_core_cpus_list.attr,
>>>> - &dev_attr_core_siblings.attr,
>>>> - &dev_attr_core_siblings_list.attr,
>>>> - &dev_attr_die_cpus.attr,
>>>> - &dev_attr_die_cpus_list.attr,
>>>> - &dev_attr_package_cpus.attr,
>>>> - &dev_attr_package_cpus_list.attr,
>>>> #ifdef CONFIG_SCHED_BOOK
>>>> &dev_attr_book_id.attr,
>>>> - &dev_attr_book_siblings.attr,
>>>> - &dev_attr_book_siblings_list.attr,
>>>> #endif
>>>> #ifdef CONFIG_SCHED_DRAWER
>>>> &dev_attr_drawer_id.attr,
>>>> - &dev_attr_drawer_siblings.attr,
>>>> - &dev_attr_drawer_siblings_list.attr,
>>>> #endif
>>>> NULL
>>>> };
>>>> static const struct attribute_group topology_attr_group = {
>>>> .attrs = default_attrs,
>>>> + .bin_attrs = bin_attrs,
>>>> .name = "topology"
>>>> };
>>>> diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
>>>> index 70a9324..bc401bd9b 100644
>>>> --- a/include/linux/bitmap.h
>>>> +++ b/include/linux/bitmap.h
>>>> @@ -219,6 +219,9 @@ extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int
>>>> extern int bitmap_print_to_pagebuf(bool list, char *buf,
>>>> const unsigned long *maskp, int nmaskbits);
>>>> +extern int bitmap_print_to_buf(bool list, char *buf,
>>>> + const unsigned long *maskp, int nmaskbits, loff_t off, size_t count);
>>>> +
>>>> #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
>>>> #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
>>>> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
>>>> index 383684e..e4810b3e 100644
>>>> --- a/include/linux/cpumask.h
>>>> +++ b/include/linux/cpumask.h
>>>> @@ -928,6 +928,31 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
>>>> nr_cpu_ids);
>>>> }
>>>> +/**
>>>> + * cpumap_print_to_buf - copies the cpumask into the buffer either
>>>> + * as comma-separated list of cpus or hex values of cpumask
>>>> + * @list: indicates whether the cpumap must be list
>>>> + * @mask: the cpumask to copy
>>>> + * @buf: the buffer to copy into
>>>> + * @off: the offset that buffer to copy into
>>>> + * @count: the count thatbuffer to copy into
>>>> + *
>>>> + * the role of cpumap_print_to_buf and cpumap_print_to_pagebuf is
>>>> + * the same, the difference is that the second parameter of
>>>> + * bitmap_print_to_buf can be more than one pagesize.
>>>> + *
>>>> + * Returns the length of the (null-terminated) @buf string, zero if
>>>> + * nothing is copied.
>>>> + */
>>>> +
>>>> +static inline ssize_t
>>>> +cpumap_print_to_buf(bool list, char *buf, const struct cpumask *mask,
>>>> + loff_t off, size_t count)
>>>> +{
>>>> + return bitmap_print_to_buf(list, buf, cpumask_bits(mask),
>>>> + nr_cpu_ids, off, count);
>>>> +}
>>>> +
>>>> #if NR_CPUS <= BITS_PER_LONG
>>>> #define CPU_MASK_ALL \
>>>> (cpumask_t) { { \
>>>> diff --git a/lib/bitmap.c b/lib/bitmap.c
>>>> index 75006c4..5bf89f1 100644
>>>> --- a/lib/bitmap.c
>>>> +++ b/lib/bitmap.c
>>>> @@ -460,6 +460,40 @@ int bitmap_parse_user(const char __user *ubuf,
>>>> EXPORT_SYMBOL(bitmap_parse_user);
>>>> /**
>>>> + * bitmap_print_to_buf - convert bitmap to list or hex format ASCII string
>>>> + * @list: indicates whether the bitmap must be list
>>>> + * @buf: page aligned buffer into which string is placed
>>>> + * @maskp: pointer to bitmap to convert
>>>> + * @nmaskbits: size of bitmap, in bits
>>>> + * @off: offset in buf
>>>> + * @count: count that already output
>>>> + *
>>>> + * the role of bitmap_print_to_buf and bitmap_print_to_pagebuf is
>>>> + * the same, the difference is that the second parameter of
>>>> + * bitmap_print_to_buf can be more than one pagesize.
>>>> + */
>>>> +int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
>>>> + int nmaskbits, loff_t off, size_t count)
>>>> +{
>>>> + int len, size;
>>>> + void *data;
>>>> + char *fmt = list ? "%*pbl\n" : "%*pb\n";
>>>> +
>>>> + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
>>>> +
>>>> + data = kvmalloc(len+1, GFP_KERNEL);
>>>> + if (!data)
>>>> + return -ENOMEM;
>>>> +
>>>> + size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
>>>> + size = memory_read_from_buffer(buf, count, &off, data, size);
>>>> + kvfree(data);
>>>> +
>>>> + return size;
>>> Why is this so different from bitmap_print_to_pagebuf()? Can't you just
>>> use this function as the "real" function and then change
>>> bitmap_print_to_pagebuf() to call it with a size of PAGE_SIZE?
>> Do you mean do following change, is that correct? :-)
> Maybe, it is whitespace corrupted, and it still feels like this function
> is much bigger than it needs to be given the function it is replacing is
> only a simple sprintf() call.
>
>> +int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
>> +                       int nmaskbits, loff_t off, size_t count)
>> +{
>> +       int len, size;
>> +       void *data;
>> +       const char *fmt = list ? "%*pbl\n" : "%*pb\n";
>> +
>> +       if (off == LLONG_MAX && count == PAGE_SIZE - offset_in_page(buf))
>> +               return scnprintf(buf, count, fmt, nmaskbits, maskp);
>> +
>> +       len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
>> +
>> +       data = kvmalloc(len+1, GFP_KERNEL);
> Why do you need to allocate more memory? And why kvmalloc()?

Because the memory here will exceed a pagesize and we don't know the
exact size, we have to call

snprintf first to get the actual size. kvmalloc() is used because when
physical memory is tight, kmalloc

may fail, but vmalloc will succeed. It is not so bad that the memory is
not requested here.

>
>> +       if (!data)
>> +               return -ENOMEM;
>> +
>> +       size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
>> +
>> +       size = memory_read_from_buffer(buf, count, &off, data, size);
>> +       kvfree(data);
>> +
>> +       return size;
>> +}
>> +EXPORT_SYMBOL(bitmap_print_to_buf);
> thanks,
>
> greg k-h
> .
>

2021-06-02 09:54:13

by tiantao (H)

[permalink] [raw]
Subject: Re: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow


在 2021/6/2 16:48, Andy Shevchenko 写道:
> On Wed, Jun 2, 2021 at 9:45 AM tiantao (H) <[email protected]> wrote:
>> 在 2021/6/2 14:18, Greg KH 写道:
>>> On Wed, Jun 02, 2021 at 02:14:49PM +0800, tiantao (H) wrote:
>>>> 在 2021/6/1 12:58, Greg KH 写道:
>>>>> On Tue, Jun 01, 2021 at 10:56:49AM +0800, Tian Tao wrote:
> ...
>
>>>>>> /**
>>>>>> + * bitmap_print_to_buf - convert bitmap to list or hex format ASCII string
>>>>>> + * @list: indicates whether the bitmap must be list
>>>>>> + * @buf: page aligned buffer into which string is placed
>>>>>> + * @maskp: pointer to bitmap to convert
>>>>>> + * @nmaskbits: size of bitmap, in bits
>>>>>> + * @off: offset in buf
>>>>>> + * @count: count that already output
>>>>>> + *
>>>>>> + * the role of bitmap_print_to_buf and bitmap_print_to_pagebuf is
>>>>>> + * the same, the difference is that the second parameter of
>>>>>> + * bitmap_print_to_buf can be more than one pagesize.
>>>>>> + */
>>>>>> +int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
>>>>>> + int nmaskbits, loff_t off, size_t count)
>>>>>> +{
>>>>>> + int len, size;
>>>>>> + void *data;
>>>>>> + char *fmt = list ? "%*pbl\n" : "%*pb\n";
>>>>>> +
>>>>>> + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
>>>>>> +
>>>>>> + data = kvmalloc(len+1, GFP_KERNEL);
>>>>>> + if (!data)
>>>>>> + return -ENOMEM;
>>>>>> +
>>>>>> + size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
>>>>>> + size = memory_read_from_buffer(buf, count, &off, data, size);
>>>>>> + kvfree(data);
>>>>>> +
>>>>>> + return size;
>>>>> Why is this so different from bitmap_print_to_pagebuf()? Can't you just
>>>>> use this function as the "real" function and then change
>>>>> bitmap_print_to_pagebuf() to call it with a size of PAGE_SIZE?
>>>> Do you mean do following change, is that correct? :-)
>>> Maybe, it is whitespace corrupted, and it still feels like this function
>>> is much bigger than it needs to be given the function it is replacing is
>>> only a simple sprintf() call.
>>>
>>>> +int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
>>>> + int nmaskbits, loff_t off, size_t count)
>>>> +{
>>>> + int len, size;
>>>> + void *data;
>>>> + const char *fmt = list ? "%*pbl\n" : "%*pb\n";
>>>> +
>>>> + if (off == LLONG_MAX && count == PAGE_SIZE - offset_in_page(buf))
>>>> + return scnprintf(buf, count, fmt, nmaskbits, maskp);
>>>> +
>>>> + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
>>>> +
>>>> + data = kvmalloc(len+1, GFP_KERNEL);
>>> Why do you need to allocate more memory? And why kvmalloc()?
>> Because the memory here will exceed a pagesize and we don't know the
>> exact size, we have to call
>>
>> snprintf first to get the actual size. kvmalloc() is used because when
>> physical memory is tight, kmalloc
>>
>> may fail, but vmalloc will succeed. It is not so bad that the memory is
>> not requested here.
> To me it sounds like the function is overengineered / lacks thought
> through / optimization.
> Can you provide a few examples that require the above algorithm?

so you think we should use kmalloc instead of kvmalloc ?


>
>>>> + if (!data)
>>>> + return -ENOMEM;
>>>> +
>>>> + size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
>>>> +
>>>> + size = memory_read_from_buffer(buf, count, &off, data, size);
>>>> + kvfree(data);
>>>> +
>>>> + return size;
>>>> +}
>
> --
> With Best Regards,
> Andy Shevchenko
> .
>

2021-06-02 11:51:26

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow

On Wed, Jun 2, 2021 at 9:45 AM tiantao (H) <[email protected]> wrote:
> 在 2021/6/2 14:18, Greg KH 写道:
> > On Wed, Jun 02, 2021 at 02:14:49PM +0800, tiantao (H) wrote:
> >> 在 2021/6/1 12:58, Greg KH 写道:
> >>> On Tue, Jun 01, 2021 at 10:56:49AM +0800, Tian Tao wrote:

...

> >>>> /**
> >>>> + * bitmap_print_to_buf - convert bitmap to list or hex format ASCII string
> >>>> + * @list: indicates whether the bitmap must be list
> >>>> + * @buf: page aligned buffer into which string is placed
> >>>> + * @maskp: pointer to bitmap to convert
> >>>> + * @nmaskbits: size of bitmap, in bits
> >>>> + * @off: offset in buf
> >>>> + * @count: count that already output
> >>>> + *
> >>>> + * the role of bitmap_print_to_buf and bitmap_print_to_pagebuf is
> >>>> + * the same, the difference is that the second parameter of
> >>>> + * bitmap_print_to_buf can be more than one pagesize.
> >>>> + */
> >>>> +int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
> >>>> + int nmaskbits, loff_t off, size_t count)
> >>>> +{
> >>>> + int len, size;
> >>>> + void *data;
> >>>> + char *fmt = list ? "%*pbl\n" : "%*pb\n";
> >>>> +
> >>>> + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
> >>>> +
> >>>> + data = kvmalloc(len+1, GFP_KERNEL);
> >>>> + if (!data)
> >>>> + return -ENOMEM;
> >>>> +
> >>>> + size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
> >>>> + size = memory_read_from_buffer(buf, count, &off, data, size);
> >>>> + kvfree(data);
> >>>> +
> >>>> + return size;
> >>> Why is this so different from bitmap_print_to_pagebuf()? Can't you just
> >>> use this function as the "real" function and then change
> >>> bitmap_print_to_pagebuf() to call it with a size of PAGE_SIZE?
> >> Do you mean do following change, is that correct? :-)
> > Maybe, it is whitespace corrupted, and it still feels like this function
> > is much bigger than it needs to be given the function it is replacing is
> > only a simple sprintf() call.
> >
> >> +int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
> >> + int nmaskbits, loff_t off, size_t count)
> >> +{
> >> + int len, size;
> >> + void *data;
> >> + const char *fmt = list ? "%*pbl\n" : "%*pb\n";
> >> +
> >> + if (off == LLONG_MAX && count == PAGE_SIZE - offset_in_page(buf))
> >> + return scnprintf(buf, count, fmt, nmaskbits, maskp);
> >> +
> >> + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
> >> +
> >> + data = kvmalloc(len+1, GFP_KERNEL);
> > Why do you need to allocate more memory? And why kvmalloc()?
>
> Because the memory here will exceed a pagesize and we don't know the
> exact size, we have to call
>
> snprintf first to get the actual size. kvmalloc() is used because when
> physical memory is tight, kmalloc
>
> may fail, but vmalloc will succeed. It is not so bad that the memory is
> not requested here.

To me it sounds like the function is overengineered / lacks thought
through / optimization.
Can you provide a few examples that require the above algorithm?

> >> + if (!data)
> >> + return -ENOMEM;
> >> +
> >> + size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
> >> +
> >> + size = memory_read_from_buffer(buf, count, &off, data, size);
> >> + kvfree(data);
> >> +
> >> + return size;
> >> +}


--
With Best Regards,
Andy Shevchenko

2021-06-02 11:52:26

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow

On Wed, Jun 02, 2021 at 05:00:16PM +0800, tiantao (H) wrote:
>
> 在 2021/6/2 16:48, Andy Shevchenko 写道:
> > On Wed, Jun 2, 2021 at 9:45 AM tiantao (H) <[email protected]> wrote:
> > > 在 2021/6/2 14:18, Greg KH 写道:
> > > > On Wed, Jun 02, 2021 at 02:14:49PM +0800, tiantao (H) wrote:
> > > > > 在 2021/6/1 12:58, Greg KH 写道:
> > > > > > On Tue, Jun 01, 2021 at 10:56:49AM +0800, Tian Tao wrote:
> > ...
> >
> > > > > > > /**
> > > > > > > + * bitmap_print_to_buf - convert bitmap to list or hex format ASCII string
> > > > > > > + * @list: indicates whether the bitmap must be list
> > > > > > > + * @buf: page aligned buffer into which string is placed
> > > > > > > + * @maskp: pointer to bitmap to convert
> > > > > > > + * @nmaskbits: size of bitmap, in bits
> > > > > > > + * @off: offset in buf
> > > > > > > + * @count: count that already output
> > > > > > > + *
> > > > > > > + * the role of bitmap_print_to_buf and bitmap_print_to_pagebuf is
> > > > > > > + * the same, the difference is that the second parameter of
> > > > > > > + * bitmap_print_to_buf can be more than one pagesize.
> > > > > > > + */
> > > > > > > +int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
> > > > > > > + int nmaskbits, loff_t off, size_t count)
> > > > > > > +{
> > > > > > > + int len, size;
> > > > > > > + void *data;
> > > > > > > + char *fmt = list ? "%*pbl\n" : "%*pb\n";
> > > > > > > +
> > > > > > > + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
> > > > > > > +
> > > > > > > + data = kvmalloc(len+1, GFP_KERNEL);
> > > > > > > + if (!data)
> > > > > > > + return -ENOMEM;
> > > > > > > +
> > > > > > > + size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
> > > > > > > + size = memory_read_from_buffer(buf, count, &off, data, size);
> > > > > > > + kvfree(data);
> > > > > > > +
> > > > > > > + return size;
> > > > > > Why is this so different from bitmap_print_to_pagebuf()? Can't you just
> > > > > > use this function as the "real" function and then change
> > > > > > bitmap_print_to_pagebuf() to call it with a size of PAGE_SIZE?
> > > > > Do you mean do following change, is that correct? :-)
> > > > Maybe, it is whitespace corrupted, and it still feels like this function
> > > > is much bigger than it needs to be given the function it is replacing is
> > > > only a simple sprintf() call.
> > > >
> > > > > +int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp,
> > > > > + int nmaskbits, loff_t off, size_t count)
> > > > > +{
> > > > > + int len, size;
> > > > > + void *data;
> > > > > + const char *fmt = list ? "%*pbl\n" : "%*pb\n";
> > > > > +
> > > > > + if (off == LLONG_MAX && count == PAGE_SIZE - offset_in_page(buf))
> > > > > + return scnprintf(buf, count, fmt, nmaskbits, maskp);
> > > > > +
> > > > > + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
> > > > > +
> > > > > + data = kvmalloc(len+1, GFP_KERNEL);
> > > > Why do you need to allocate more memory? And why kvmalloc()?
> > > Because the memory here will exceed a pagesize and we don't know the
> > > exact size, we have to call
> > >
> > > snprintf first to get the actual size. kvmalloc() is used because when
> > > physical memory is tight, kmalloc
> > >
> > > may fail, but vmalloc will succeed. It is not so bad that the memory is
> > > not requested here.
> > To me it sounds like the function is overengineered / lacks thought
> > through / optimization.
> > Can you provide a few examples that require the above algorithm?
>
> so you think we should use kmalloc instead of kvmalloc ?

What size bitmap would trigger a vmalloc() call to be forced here?

thanks,

greg k-h

Subject: RE: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow



> -----Original Message-----
> From: Greg KH [mailto:[email protected]]
> Sent: Wednesday, June 2, 2021 9:06 PM
> To: tiantao (H) <[email protected]>
> Cc: Andy Shevchenko <[email protected]>; tiantao (H)
> <[email protected]>; Linux Kernel Mailing List
> <[email protected]>; Andrew Morton <[email protected]>;
> Song Bao Hua (Barry Song) <[email protected]>; Andy Shevchenko
> <[email protected]>; Rafael J. Wysocki <[email protected]>;
> Jonathan Cameron <[email protected]>
> Subject: Re: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow
>
> On Wed, Jun 02, 2021 at 05:00:16PM +0800, tiantao (H) wrote:
> >
> > 在 2021/6/2 16:48, Andy Shevchenko 写道:
> > > On Wed, Jun 2, 2021 at 9:45 AM tiantao (H) <[email protected]> wrote:
> > > > 在 2021/6/2 14:18, Greg KH 写道:
> > > > > On Wed, Jun 02, 2021 at 02:14:49PM +0800, tiantao (H) wrote:
> > > > > > 在 2021/6/1 12:58, Greg KH 写道:
> > > > > > > On Tue, Jun 01, 2021 at 10:56:49AM +0800, Tian Tao wrote:
> > > ...
> > >
> > > > > > > > /**
> > > > > > > > + * bitmap_print_to_buf - convert bitmap to list or hex format
> ASCII string
> > > > > > > > + * @list: indicates whether the bitmap must be list
> > > > > > > > + * @buf: page aligned buffer into which string is placed
> > > > > > > > + * @maskp: pointer to bitmap to convert
> > > > > > > > + * @nmaskbits: size of bitmap, in bits
> > > > > > > > + * @off: offset in buf
> > > > > > > > + * @count: count that already output
> > > > > > > > + *
> > > > > > > > + * the role of bitmap_print_to_buf and bitmap_print_to_pagebuf
> is
> > > > > > > > + * the same, the difference is that the second parameter of
> > > > > > > > + * bitmap_print_to_buf can be more than one pagesize.
> > > > > > > > + */
> > > > > > > > +int bitmap_print_to_buf(bool list, char *buf, const unsigned
> long *maskp,
> > > > > > > > + int nmaskbits, loff_t off, size_t count)
> > > > > > > > +{
> > > > > > > > + int len, size;
> > > > > > > > + void *data;
> > > > > > > > + char *fmt = list ? "%*pbl\n" : "%*pb\n";
> > > > > > > > +
> > > > > > > > + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
> > > > > > > > +
> > > > > > > > + data = kvmalloc(len+1, GFP_KERNEL);
> > > > > > > > + if (!data)
> > > > > > > > + return -ENOMEM;
> > > > > > > > +
> > > > > > > > + size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
> > > > > > > > + size = memory_read_from_buffer(buf, count, &off, data, size);
> > > > > > > > + kvfree(data);
> > > > > > > > +
> > > > > > > > + return size;
> > > > > > > Why is this so different from bitmap_print_to_pagebuf()? Can't you
> just
> > > > > > > use this function as the "real" function and then change
> > > > > > > bitmap_print_to_pagebuf() to call it with a size of PAGE_SIZE?
> > > > > > Do you mean do following change, is that correct? :-)
> > > > > Maybe, it is whitespace corrupted, and it still feels like this function
> > > > > is much bigger than it needs to be given the function it is replacing
> is
> > > > > only a simple sprintf() call.
> > > > >
> > > > > > +int bitmap_print_to_buf(bool list, char *buf, const unsigned long
> *maskp,
> > > > > > + int nmaskbits, loff_t off, size_t count)
> > > > > > +{
> > > > > > + int len, size;
> > > > > > + void *data;
> > > > > > + const char *fmt = list ? "%*pbl\n" : "%*pb\n";
> > > > > > +
> > > > > > + if (off == LLONG_MAX && count == PAGE_SIZE - offset_in_page(buf))
> > > > > > + return scnprintf(buf, count, fmt, nmaskbits, maskp);
> > > > > > +
> > > > > > + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
> > > > > > +
> > > > > > + data = kvmalloc(len+1, GFP_KERNEL);
> > > > > Why do you need to allocate more memory? And why kvmalloc()?
> > > > Because the memory here will exceed a pagesize and we don't know the
> > > > exact size, we have to call
> > > >
> > > > snprintf first to get the actual size. kvmalloc() is used because when
> > > > physical memory is tight, kmalloc
> > > >
> > > > may fail, but vmalloc will succeed. It is not so bad that the memory is
> > > > not requested here.
> > > To me it sounds like the function is overengineered / lacks thought
> > > through / optimization.
> > > Can you provide a few examples that require the above algorithm?
> >
> > so you think we should use kmalloc instead of kvmalloc ?
>
> What size bitmap would trigger a vmalloc() call to be forced here?
>

According to kvmalloc_node(), only if size is larger than PAGE_SIZE,
kvmalloc will move to vmalloc if kmalloc fails to get memory. Otherwise,
it will return error.
void *kvmalloc_node(size_t size, gfp_t flags, int node)
{
gfp_t kmalloc_flags = flags;
void *ret;

...

ret = kmalloc_node(size, kmalloc_flags, node);

/*
* It doesn't really make sense to fallback to vmalloc for sub page
* requests
*/
if (ret || size <= PAGE_SIZE)
return ret;

return __vmalloc_node(size, 1, flags, node,
__builtin_return_address(0));
}

For bitmap, it is clear a large NR_CPUS can trigger vmalloc:
Code copy-paste from drivers/base/node.c:
/* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));

But for list, it would be much more tricky. As a list could be as simple
as:
0-2047
It could also be as complex as:
0,1,3,5,7,9,11,13,.....,2045,2047

It totally depends on how the bitmap is like.

That's why tiantao's code is detecting size before malloc.

> thanks,
>
> greg k-h

Thanks
Barry

2021-06-02 13:16:50

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow

On Wed, Jun 02, 2021 at 09:20:45AM +0000, Song Bao Hua (Barry Song) wrote:
>
>
> > -----Original Message-----
> > From: Greg KH [mailto:[email protected]]
> > Sent: Wednesday, June 2, 2021 9:06 PM
> > To: tiantao (H) <[email protected]>
> > Cc: Andy Shevchenko <[email protected]>; tiantao (H)
> > <[email protected]>; Linux Kernel Mailing List
> > <[email protected]>; Andrew Morton <[email protected]>;
> > Song Bao Hua (Barry Song) <[email protected]>; Andy Shevchenko
> > <[email protected]>; Rafael J. Wysocki <[email protected]>;
> > Jonathan Cameron <[email protected]>
> > Subject: Re: [PATCH 1/2] topology: use bin_attribute to avoid buff overflow
> >
> > On Wed, Jun 02, 2021 at 05:00:16PM +0800, tiantao (H) wrote:
> > >
> > > 在 2021/6/2 16:48, Andy Shevchenko 写道:
> > > > On Wed, Jun 2, 2021 at 9:45 AM tiantao (H) <[email protected]> wrote:
> > > > > 在 2021/6/2 14:18, Greg KH 写道:
> > > > > > On Wed, Jun 02, 2021 at 02:14:49PM +0800, tiantao (H) wrote:
> > > > > > > 在 2021/6/1 12:58, Greg KH 写道:
> > > > > > > > On Tue, Jun 01, 2021 at 10:56:49AM +0800, Tian Tao wrote:
> > > > ...
> > > >
> > > > > > > > > /**
> > > > > > > > > + * bitmap_print_to_buf - convert bitmap to list or hex format
> > ASCII string
> > > > > > > > > + * @list: indicates whether the bitmap must be list
> > > > > > > > > + * @buf: page aligned buffer into which string is placed
> > > > > > > > > + * @maskp: pointer to bitmap to convert
> > > > > > > > > + * @nmaskbits: size of bitmap, in bits
> > > > > > > > > + * @off: offset in buf
> > > > > > > > > + * @count: count that already output
> > > > > > > > > + *
> > > > > > > > > + * the role of bitmap_print_to_buf and bitmap_print_to_pagebuf
> > is
> > > > > > > > > + * the same, the difference is that the second parameter of
> > > > > > > > > + * bitmap_print_to_buf can be more than one pagesize.
> > > > > > > > > + */
> > > > > > > > > +int bitmap_print_to_buf(bool list, char *buf, const unsigned
> > long *maskp,
> > > > > > > > > + int nmaskbits, loff_t off, size_t count)
> > > > > > > > > +{
> > > > > > > > > + int len, size;
> > > > > > > > > + void *data;
> > > > > > > > > + char *fmt = list ? "%*pbl\n" : "%*pb\n";
> > > > > > > > > +
> > > > > > > > > + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
> > > > > > > > > +
> > > > > > > > > + data = kvmalloc(len+1, GFP_KERNEL);
> > > > > > > > > + if (!data)
> > > > > > > > > + return -ENOMEM;
> > > > > > > > > +
> > > > > > > > > + size = scnprintf(data, len+1, fmt, nmaskbits, maskp);
> > > > > > > > > + size = memory_read_from_buffer(buf, count, &off, data, size);
> > > > > > > > > + kvfree(data);
> > > > > > > > > +
> > > > > > > > > + return size;
> > > > > > > > Why is this so different from bitmap_print_to_pagebuf()? Can't you
> > just
> > > > > > > > use this function as the "real" function and then change
> > > > > > > > bitmap_print_to_pagebuf() to call it with a size of PAGE_SIZE?
> > > > > > > Do you mean do following change, is that correct? :-)
> > > > > > Maybe, it is whitespace corrupted, and it still feels like this function
> > > > > > is much bigger than it needs to be given the function it is replacing
> > is
> > > > > > only a simple sprintf() call.
> > > > > >
> > > > > > > +int bitmap_print_to_buf(bool list, char *buf, const unsigned long
> > *maskp,
> > > > > > > + int nmaskbits, loff_t off, size_t count)
> > > > > > > +{
> > > > > > > + int len, size;
> > > > > > > + void *data;
> > > > > > > + const char *fmt = list ? "%*pbl\n" : "%*pb\n";
> > > > > > > +
> > > > > > > + if (off == LLONG_MAX && count == PAGE_SIZE - offset_in_page(buf))
> > > > > > > + return scnprintf(buf, count, fmt, nmaskbits, maskp);
> > > > > > > +
> > > > > > > + len = snprintf(NULL, 0, fmt, nmaskbits, maskp);
> > > > > > > +
> > > > > > > + data = kvmalloc(len+1, GFP_KERNEL);
> > > > > > Why do you need to allocate more memory? And why kvmalloc()?
> > > > > Because the memory here will exceed a pagesize and we don't know the
> > > > > exact size, we have to call
> > > > >
> > > > > snprintf first to get the actual size. kvmalloc() is used because when
> > > > > physical memory is tight, kmalloc
> > > > >
> > > > > may fail, but vmalloc will succeed. It is not so bad that the memory is
> > > > > not requested here.
> > > > To me it sounds like the function is overengineered / lacks thought
> > > > through / optimization.
> > > > Can you provide a few examples that require the above algorithm?
> > >
> > > so you think we should use kmalloc instead of kvmalloc ?
> >
> > What size bitmap would trigger a vmalloc() call to be forced here?
> >
>
> According to kvmalloc_node(), only if size is larger than PAGE_SIZE,
> kvmalloc will move to vmalloc if kmalloc fails to get memory. Otherwise,
> it will return error.

I know, so how big of a bitmap would you need before a kmalloc call
would fail given that there is almost always free memory in these
"smaller" amounts.

And if there is no memory, why not fail like normal, this is not
something that needs to be complex and crazy, it's just a sysfs file to
provide information.

Please do not over-engineer something that does not need it, until you
have proof it is needed.

thanks,

greg k-h