Correct maxnode parameter value passed to mbind() syscall to be
the amount of node mask bits to analyze plus 1. Dynamically allocate
node mask memory depending on the index of node of cpu being profiled.
Fixes: c44a8b44ca9f ("perf record: Bind the AIO user space buffers to nodes")
Signed-off-by: Alexey Budankov <[email protected]>
---
tools/perf/util/mmap.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 3b664fa673a6..6d604cd67a95 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -98,20 +98,29 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
{
void *data;
size_t mmap_len;
- unsigned long node_mask;
+ unsigned long *node_mask;
+ unsigned long node_index;
+ int err = 0;
if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
data = map->aio.data[idx];
mmap_len = mmap__mmap_len(map);
- node_mask = 1UL << cpu__get_node(cpu);
- if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) {
- pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n",
- data, data + mmap_len, cpu__get_node(cpu));
+ node_index = cpu__get_node(cpu);
+ node_mask = bitmap_alloc(node_index + 1);
+ if (!node_mask) {
+ pr_err("Failed to allocate node mask for mbind: error %m\n");
return -1;
}
+ set_bit(node_index, node_mask);
+ if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1/*nr_bits + 1*/, 0)) {
+ pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n",
+ data, data + mmap_len, node_index);
+ err = -1;
+ }
+ bitmap_free(node_mask);
}
- return 0;
+ return err;
}
#else /* !HAVE_LIBNUMA_SUPPORT */
static int perf_mmap__aio_alloc(struct mmap *map, int idx)
--
2.24.1
On 12.03.2020 17:31, Arnaldo Carvalho de Melo wrote:
> Em Thu, Mar 12, 2020 at 03:21:45PM +0300, Alexey Budankov escreveu:
>>
>> Correct maxnode parameter value passed to mbind() syscall to be
>> the amount of node mask bits to analyze plus 1. Dynamically allocate
>> node mask memory depending on the index of node of cpu being profiled.
>> Fixes: c44a8b44ca9f ("perf record: Bind the AIO user space buffers to nodes")
>> Signed-off-by: Alexey Budankov <[email protected]>
>> ---
>> tools/perf/util/mmap.c | 21 +++++++++++++++------
>> 1 file changed, 15 insertions(+), 6 deletions(-)
>>
>> diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
>> index 3b664fa673a6..6d604cd67a95 100644
>> --- a/tools/perf/util/mmap.c
>> +++ b/tools/perf/util/mmap.c
>> @@ -98,20 +98,29 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
>> {
>> void *data;
>> size_t mmap_len;
>> - unsigned long node_mask;
>> + unsigned long *node_mask;
>> + unsigned long node_index;
>> + int err = 0;
>>
>> if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
>> data = map->aio.data[idx];
>> mmap_len = mmap__mmap_len(map);
>> - node_mask = 1UL << cpu__get_node(cpu);
>> - if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) {
>> - pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n",
>> - data, data + mmap_len, cpu__get_node(cpu));
>> + node_index = cpu__get_node(cpu);
>> + node_mask = bitmap_alloc(node_index + 1);
>> + if (!node_mask) {
>> + pr_err("Failed to allocate node mask for mbind: error %m\n");
>> return -1;
>> }
>> + set_bit(node_index, node_mask);
>> + if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1/*nr_bits + 1*/, 0)) {
>
> ^^^^^^^^^^^^^^
> Leftover?
Intentionally put it here to document kernel behavior for mbind() syscall
because currently it is different from the man page [1] documented:
"nodemask points to a bit mask of nodes containing up to maxnode bits.
The bit mask size is rounded to the next multiple of sizeof(unsigned
long), but the kernel will use bits only up to maxnode. A NULL value
of nodemask or a maxnode value of zero specifies the empty set of
nodes. If the value of maxnode is zero, the nodemask argument is
ignored. Where a nodemask is required, it must contain at least one
node that is on-line, allowed by the thread's current cpuset context
(unless the MPOL_F_STATIC_NODES mode flag is specified), and contains
memory."
~Alexey
[1] http://man7.org/linux/man-pages/man2/mbind.2.html
Em Thu, Mar 12, 2020 at 07:09:56PM +0300, Alexey Budankov escreveu:
>
> On 12.03.2020 17:31, Arnaldo Carvalho de Melo wrote:
> > Em Thu, Mar 12, 2020 at 03:21:45PM +0300, Alexey Budankov escreveu:
> >>
> >> Correct maxnode parameter value passed to mbind() syscall to be
> >> the amount of node mask bits to analyze plus 1. Dynamically allocate
> >> node mask memory depending on the index of node of cpu being profiled.
> >> Fixes: c44a8b44ca9f ("perf record: Bind the AIO user space buffers to nodes")
> >> Signed-off-by: Alexey Budankov <[email protected]>
> >> ---
> >> tools/perf/util/mmap.c | 21 +++++++++++++++------
> >> 1 file changed, 15 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
> >> index 3b664fa673a6..6d604cd67a95 100644
> >> --- a/tools/perf/util/mmap.c
> >> +++ b/tools/perf/util/mmap.c
> >> @@ -98,20 +98,29 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
> >> {
> >> void *data;
> >> size_t mmap_len;
> >> - unsigned long node_mask;
> >> + unsigned long *node_mask;
> >> + unsigned long node_index;
> >> + int err = 0;
> >>
> >> if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
> >> data = map->aio.data[idx];
> >> mmap_len = mmap__mmap_len(map);
> >> - node_mask = 1UL << cpu__get_node(cpu);
> >> - if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) {
> >> - pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n",
> >> - data, data + mmap_len, cpu__get_node(cpu));
> >> + node_index = cpu__get_node(cpu);
> >> + node_mask = bitmap_alloc(node_index + 1);
> >> + if (!node_mask) {
> >> + pr_err("Failed to allocate node mask for mbind: error %m\n");
> >> return -1;
> >> }
> >> + set_bit(node_index, node_mask);
> >> + if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1/*nr_bits + 1*/, 0)) {
> >
> > ^^^^^^^^^^^^^^
> > Leftover?
>
> Intentionally put it here to document kernel behavior for mbind() syscall
> because currently it is different from the man page [1] documented:
>
> "nodemask points to a bit mask of nodes containing up to maxnode bits.
> The bit mask size is rounded to the next multiple of sizeof(unsigned
> long), but the kernel will use bits only up to maxnode. A NULL value
> of nodemask or a maxnode value of zero specifies the empty set of
> nodes. If the value of maxnode is zero, the nodemask argument is
> ignored. Where a nodemask is required, it must contain at least one
> node that is on-line, allowed by the thread's current cpuset context
> (unless the MPOL_F_STATIC_NODES mode flag is specified), and contains
> memory."
Ok, will add the above as a comment above the line with that comment.
> ~Alexey
>
> [1] http://man7.org/linux/man-pages/man2/mbind.2.html
--
- Arnaldo
On 12.03.2020 20:12, Arnaldo Carvalho de Melo wrote:
> Em Thu, Mar 12, 2020 at 07:09:56PM +0300, Alexey Budankov escreveu:
>>
>> On 12.03.2020 17:31, Arnaldo Carvalho de Melo wrote:
>>> Em Thu, Mar 12, 2020 at 03:21:45PM +0300, Alexey Budankov escreveu:
>>>>
>>>> Correct maxnode parameter value passed to mbind() syscall to be
>>>> the amount of node mask bits to analyze plus 1. Dynamically allocate
>>>> node mask memory depending on the index of node of cpu being profiled.
>>>> Fixes: c44a8b44ca9f ("perf record: Bind the AIO user space buffers to nodes")
>>>> Signed-off-by: Alexey Budankov <[email protected]>
>>>> ---
>>>> tools/perf/util/mmap.c | 21 +++++++++++++++------
>>>> 1 file changed, 15 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
>>>> index 3b664fa673a6..6d604cd67a95 100644
>>>> --- a/tools/perf/util/mmap.c
>>>> +++ b/tools/perf/util/mmap.c
>>>> @@ -98,20 +98,29 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
>>>> {
>>>> void *data;
>>>> size_t mmap_len;
>>>> - unsigned long node_mask;
>>>> + unsigned long *node_mask;
>>>> + unsigned long node_index;
>>>> + int err = 0;
>>>>
>>>> if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
>>>> data = map->aio.data[idx];
>>>> mmap_len = mmap__mmap_len(map);
>>>> - node_mask = 1UL << cpu__get_node(cpu);
>>>> - if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) {
>>>> - pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n",
>>>> - data, data + mmap_len, cpu__get_node(cpu));
>>>> + node_index = cpu__get_node(cpu);
>>>> + node_mask = bitmap_alloc(node_index + 1);
>>>> + if (!node_mask) {
>>>> + pr_err("Failed to allocate node mask for mbind: error %m\n");
>>>> return -1;
>>>> }
>>>> + set_bit(node_index, node_mask);
>>>> + if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1/*nr_bits + 1*/, 0)) {
>>>
>>> ^^^^^^^^^^^^^^
>>> Leftover?
>>
>> Intentionally put it here to document kernel behavior for mbind() syscall
>> because currently it is different from the man page [1] documented:
>>
>> "nodemask points to a bit mask of nodes containing up to maxnode bits.
>> The bit mask size is rounded to the next multiple of sizeof(unsigned
>> long), but the kernel will use bits only up to maxnode. A NULL value
>> of nodemask or a maxnode value of zero specifies the empty set of
>> nodes. If the value of maxnode is zero, the nodemask argument is
>> ignored. Where a nodemask is required, it must contain at least one
>> node that is on-line, allowed by the thread's current cpuset context
>> (unless the MPOL_F_STATIC_NODES mode flag is specified), and contains
>> memory."
>
> Ok, will add the above as a comment above the line with that comment.
Thanks!
~Alexey
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 44d462acc0bf3eabe1522471fd1f683d8ce612cb
Gitweb: https://git.kernel.org/tip/44d462acc0bf3eabe1522471fd1f683d8ce612cb
Author: Alexey Budankov <[email protected]>
AuthorDate: Thu, 12 Mar 2020 15:21:45 +03:00
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitterDate: Thu, 12 Mar 2020 11:32:46 -03:00
perf record: Fix binding of AIO user space buffers to nodes
Correct maxnode parameter value passed to mbind() syscall to be the
amount of node mask bits to analyze plus 1. Dynamically allocate node
mask memory depending on the index of node of cpu being profiled.
Fixes: c44a8b44ca9f ("perf record: Bind the AIO user space buffers to nodes")
Signed-off-by: Alexey Budankov <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: http://lore.kernel.org/lkml/[email protected]
[ Remove leftover nr_bits + 1 comment in mbind() call ]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/util/mmap.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 3b664fa..ab7108d 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -98,20 +98,29 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
{
void *data;
size_t mmap_len;
- unsigned long node_mask;
+ unsigned long *node_mask;
+ unsigned long node_index;
+ int err = 0;
if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
data = map->aio.data[idx];
mmap_len = mmap__mmap_len(map);
- node_mask = 1UL << cpu__get_node(cpu);
- if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) {
- pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n",
- data, data + mmap_len, cpu__get_node(cpu));
+ node_index = cpu__get_node(cpu);
+ node_mask = bitmap_alloc(node_index + 1);
+ if (!node_mask) {
+ pr_err("Failed to allocate node mask for mbind: error %m\n");
return -1;
}
+ set_bit(node_index, node_mask);
+ if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) {
+ pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n",
+ data, data + mmap_len, node_index);
+ err = -1;
+ }
+ bitmap_free(node_mask);
}
- return 0;
+ return err;
}
#else /* !HAVE_LIBNUMA_SUPPORT */
static int perf_mmap__aio_alloc(struct mmap *map, int idx)