When I was implementing a new per-cpu kthread cfs_migration, I found the
comm of it "cfs_migration/%u" is truncated due to the limitation of
TASK_COMM_LEN. For example, the comm of the percpu thread on CPU10~19 are
all with the same name "cfs_migration/1", which will confuse the user. This
issue is not critical, because we can get the corresponding CPU from the
task's Cpus_allowed. But for kthreads correspoinding to other hardware
devices, it is not easy to get the detailed device info from task comm,
for example,
jbd2/nvme0n1p2-
xfs-reclaim/sdf
We can also shorten the name to work around this problem, but I find
there are so many truncated kthreads:
rcu_tasks_kthre
rcu_tasks_rude_
rcu_tasks_trace
poll_mpt3sas0_s
ext4-rsv-conver
xfs-reclaim/sd{a, b, c, ...}
xfs-blockgc/sd{a, b, c, ...}
xfs-inodegc/sd{a, b, c, ...}
audit_send_repl
ecryptfs-kthrea
vfio-irqfd-clea
jbd2/nvme0n1p2-
...
We should improve this problem fundamentally.
This patch extends the size of task comm to 24 bytes, which is the
same length with workqueue's, for the CONFIG_BASE_FULL case. And for the
CONFIG_BASE_SMALL case, the size of task comm is still kept as 16 bytes.
If the kthread's comm is still truncated, a warning will be printed.
Below is the result of my test case:
truncated kthread comm:I-am-a-kthread-with-lon, pid:14 by 6 characters
Changes since v2:
- avoid change UAPI code per Kees
- remove the description of out of tree code from commit log per Peter
Changes since v1:
- extend task comm to 24bytes, per Petr
- improve the warning per Petr
- make the checkpatch warning a separate patch
Yafang Shao (4):
connector: use __get_task_comm in proc_comm_connector
fs/exec: use strscpy instead of strlcpy in __set_task_comm
sched.h: extend task comm from 16 to 24 for CONFIG_BASE_FULL
kernel/kthread: show a warning if kthread's comm is truncated
drivers/connector/cn_proc.c | 5 ++++-
fs/exec.c | 2 +-
include/linux/sched.h | 4 ++++
kernel/kthread.c | 7 ++++++-
4 files changed, 15 insertions(+), 3 deletions(-)
--
2.18.2
Fix a warning by checkpatch -
WARNING: Prefer strscpy over strlcpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/
Signed-off-by: Yafang Shao <[email protected]>
Acked-by: Kees Cook <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Al Viro <[email protected]>
Cc: Petr Mladek <[email protected]>
---
fs/exec.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/exec.c b/fs/exec.c
index a098c133d8d7..de804c566200 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1224,7 +1224,7 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
{
task_lock(tsk);
trace_task_rename(tsk, buf);
- strlcpy(tsk->comm, buf, sizeof(tsk->comm));
+ strscpy(tsk->comm, buf, sizeof(tsk->comm));
task_unlock(tsk);
perf_event_comm(tsk, exec);
}
--
2.18.2
connector comm was introduced in commit
f786ecba4158 ("connector: add comm change event report to proc connector").
struct comm_proc_event was defined in include/linux/cn_proc.h first and
then been moved into file include/uapi/linux/cn_proc.h in commit
607ca46e97a1 ("UAPI: (Scripted) Disintegrate include/linux").
As this is the UAPI code, we can't change it without potentially breaking
things (i.e. userspace binaries have this size built in, so we can't just
change the size). To prepare for the followup change - extending task
comm, we have to use __get_task_comm() to avoid the BUILD_BUG_ON() in
proc_comm_connector().
Suggested-by: Kees Cook <[email protected]>
Signed-off-by: Yafang Shao <[email protected]>
Cc: Vladimir Zapolskiy <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: David Howells <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Al Viro <[email protected]>
Cc: Petr Mladek <[email protected]>
---
drivers/connector/cn_proc.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 646ad385e490..c88ba2dc1eae 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -230,7 +230,10 @@ void proc_comm_connector(struct task_struct *task)
ev->what = PROC_EVENT_COMM;
ev->event_data.comm.process_pid = task->pid;
ev->event_data.comm.process_tgid = task->tgid;
- get_task_comm(ev->event_data.comm.comm, task);
+
+ /* This may get truncated. */
+ __get_task_comm(ev->event_data.comm.comm,
+ sizeof(ev->event_data.comm.comm), task);
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
--
2.18.2
Show a warning if task comm is truncated. Below is the result
of my test case:
truncated kthread comm:I-am-a-kthread-with-lon, pid:14 by 6 characters
Suggested-by: Petr Mladek <[email protected]>
Signed-off-by: Yafang Shao <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Al Viro <[email protected]>
Cc: Petr Mladek <[email protected]>
---
kernel/kthread.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 5b37a8567168..46b924c92078 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -399,12 +399,17 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
if (!IS_ERR(task)) {
static const struct sched_param param = { .sched_priority = 0 };
char name[TASK_COMM_LEN];
+ int len;
/*
* task is already visible to other tasks, so updating
* COMM must be protected.
*/
- vsnprintf(name, sizeof(name), namefmt, args);
+ len = vsnprintf(name, sizeof(name), namefmt, args);
+ if (len >= TASK_COMM_LEN) {
+ pr_warn("truncated kthread comm:%s, pid:%d by %d characters\n",
+ name, task->pid, len - TASK_COMM_LEN + 1);
+ }
set_task_comm(task, name);
/*
* root may have changed our (kthreadd's) priority or CPU mask.
--
2.18.2
When I was implementing a new per-cpu kthread cfs_migration, I found the
comm of it "cfs_migration/%u" is truncated due to the limitation of
TASK_COMM_LEN. For example, the comm of the percpu thread on CPU10~19 are
all with the same name "cfs_migration/1", which will confuse the user. This
issue is not critical, because we can get the corresponding CPU from the
task's Cpus_allowed. But for kthreads correspoinding to other hardware
devices, it is not easy to get the detailed device info from task comm,
for example,
jbd2/nvme0n1p2-
xfs-reclaim/sdf
We can also shorten the name to work around this problem, but I find
there are so many truncated kthreads:
rcu_tasks_kthre
rcu_tasks_rude_
rcu_tasks_trace
poll_mpt3sas0_s
ext4-rsv-conver
xfs-reclaim/sd{a, b, c, ...}
xfs-blockgc/sd{a, b, c, ...}
xfs-inodegc/sd{a, b, c, ...}
audit_send_repl
ecryptfs-kthrea
vfio-irqfd-clea
jbd2/nvme0n1p2-
...
We should improve this problem fundamentally.
This patch extends the size of task comm to 24 bytes, which is the
same length with workqueue's, for the CONFIG_BASE_FULL case. And for the
CONFIG_BASE_SMALL case, the size of task comm is still kept as 16 bytes.
Suggested-by: Petr Mladek <[email protected]>
Signed-off-by: Yafang Shao <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Al Viro <[email protected]>
Cc: Petr Mladek <[email protected]>
---
include/linux/sched.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 39039ce8ac4c..e0796068dee0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -275,7 +275,11 @@ struct task_group;
#define get_current_state() READ_ONCE(current->__state)
/* Task command name length: */
+#if CONFIG_BASE_SMALL
#define TASK_COMM_LEN 16
+#else
+#define TASK_COMM_LEN 24
+#endif
extern void scheduler_tick(void);
--
2.18.2
Greeting,
FYI, we noticed the following commit (built with gcc-9):
commit: 317419b91ef4eff4e2f046088201e4dc4065caa0 ("[PATCH v3 3/4] sched.h: extend task comm from 16 to 24 for CONFIG_BASE_FULL")
url: https://github.com/0day-ci/linux/commits/Yafang-Shao/task_struct-extend-task-comm-from-16-to-24-for-CONFIG_BASE_FULL/20211010-182548
base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git 7fd2bf83d59a2d32e0d596c5d3e623b9a0e7e2d5
in testcase: perf-sanity-tests
version: perf-x86_64-7fd2bf83d59a-1_20211010
with following parameters:
perf_compiler: clang
ucode: 0xde
on test machine: 8 threads 1 sockets Intel(R) Core(TM) i7-7700 CPU @ 3.60GHz with 32G memory
caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
If you fix the issue, kindly add following tag
Reported-by: kernel test robot <[email protected]>
2021-10-13 18:00:46 sudo /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf test 13
13: DSO data reopen : Ok
2021-10-13 18:00:46 sudo /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf test 14
14: Roundtrip evsel->name : Ok
2021-10-13 18:00:46 sudo /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf test 15
15: Parse sched tracepoints fields : FAILED!
2021-10-13 18:00:46 sudo /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf test 16
16: syscalls:sys_enter_openat event fields : Ok
To reproduce:
git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
sudo bin/lkp install job.yaml # job file is attached in this email
bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
sudo bin/lkp run generated-yaml-file
# if come across any failure that blocks the test,
# please remove ~/.lkp and /lkp dir to run from a clean state.
---
0DAY/LKP+ Test Infrastructure Open Source Technology Center
https://lists.01.org/hyperkitty/list/[email protected] Intel Corporation
Thanks,
Oliver Sang
On Thu, Oct 14, 2021 at 3:08 PM kernel test robot <[email protected]> wrote:
>
>
>
> Greeting,
>
> FYI, we noticed the following commit (built with gcc-9):
>
> commit: 317419b91ef4eff4e2f046088201e4dc4065caa0 ("[PATCH v3 3/4] sched.h: extend task comm from 16 to 24 for CONFIG_BASE_FULL")
> url: https://github.com/0day-ci/linux/commits/Yafang-Shao/task_struct-extend-task-comm-from-16-to-24-for-CONFIG_BASE_FULL/20211010-182548
> base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git 7fd2bf83d59a2d32e0d596c5d3e623b9a0e7e2d5
>
> in testcase: perf-sanity-tests
> version: perf-x86_64-7fd2bf83d59a-1_20211010
> with following parameters:
>
> perf_compiler: clang
> ucode: 0xde
>
>
>
> on test machine: 8 threads 1 sockets Intel(R) Core(TM) i7-7700 CPU @ 3.60GHz with 32G memory
>
> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
>
>
>
>
> If you fix the issue, kindly add following tag
> Reported-by: kernel test robot <[email protected]>
>
> 2021-10-13 18:00:46 sudo /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf test 13
> 13: DSO data reopen : Ok
> 2021-10-13 18:00:46 sudo /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf test 14
> 14: Roundtrip evsel->name : Ok
> 2021-10-13 18:00:46 sudo /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf test 15
> 15: Parse sched tracepoints fields : FAILED!
That issue is caused by another hardcode 16 ...
Seems we should make some change as below,
diff --git a/tools/perf/tests/evsel-tp-sched.c
b/tools/perf/tests/evsel-tp-sched.c
index f9e34bd26cf3..401a737b1d85 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -42,7 +42,7 @@ int test__perf_evsel__tp_sched_test(struct test
*test __maybe_unused, int subtes
return -1;
}
- if (evsel__test_field(evsel, "prev_comm", 16, false))
+ if (evsel__test_field(evsel, "prev_comm", TASK_COMM_LEN, false))
ret = -1;
if (evsel__test_field(evsel, "prev_pid", 4, true))
@@ -54,7 +54,7 @@ int test__perf_evsel__tp_sched_test(struct test
*test __maybe_unused, int subtes
if (evsel__test_field(evsel, "prev_state", sizeof(long), true))
ret = -1;
- if (evsel__test_field(evsel, "next_comm", 16, false))
+ if (evsel__test_field(evsel, "next_comm", TASK_COMM_LEN, false))
ret = -1;
if (evsel__test_field(evsel, "next_pid", 4, true))
@@ -72,7 +72,7 @@ int test__perf_evsel__tp_sched_test(struct test
*test __maybe_unused, int subtes
return -1;
}
- if (evsel__test_field(evsel, "comm", 16, false))
+ if (evsel__test_field(evsel, "comm", TASK_COMM_LEN, false))
ret = -1;
if (evsel__test_field(evsel, "pid", 4, true))
> 2021-10-13 18:00:46 sudo /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf test 16
> 16: syscalls:sys_enter_openat event fields : Ok
>
>
>
> To reproduce:
>
> git clone https://github.com/intel/lkp-tests.git
> cd lkp-tests
> sudo bin/lkp install job.yaml # job file is attached in this email
> bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
> sudo bin/lkp run generated-yaml-file
>
> # if come across any failure that blocks the test,
> # please remove ~/.lkp and /lkp dir to run from a clean state.
>
>
>
> ---
> 0DAY/LKP+ Test Infrastructure Open Source Technology Center
> https://lists.01.org/hyperkitty/list/[email protected] Intel Corporation
>
> Thanks,
> Oliver Sang
>
--
Thanks
Yafang
----- On Oct 14, 2021, at 5:24 AM, Yafang Shao [email protected] wrote:
> On Thu, Oct 14, 2021 at 3:08 PM kernel test robot <[email protected]> wrote:
>>
>>
>>
>> Greeting,
>>
>> FYI, we noticed the following commit (built with gcc-9):
>>
>> commit: 317419b91ef4eff4e2f046088201e4dc4065caa0 ("[PATCH v3 3/4] sched.h:
>> extend task comm from 16 to 24 for CONFIG_BASE_FULL")
>> url:
>> https://github.com/0day-ci/linux/commits/Yafang-Shao/task_struct-extend-task-comm-from-16-to-24-for-CONFIG_BASE_FULL/20211010-182548
>> base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git
>> 7fd2bf83d59a2d32e0d596c5d3e623b9a0e7e2d5
>>
>> in testcase: perf-sanity-tests
>> version: perf-x86_64-7fd2bf83d59a-1_20211010
>> with following parameters:
>>
>> perf_compiler: clang
>> ucode: 0xde
>>
>>
>>
>> on test machine: 8 threads 1 sockets Intel(R) Core(TM) i7-7700 CPU @ 3.60GHz
>> with 32G memory
>>
>> caused below changes (please refer to attached dmesg/kmsg for entire
>> log/backtrace):
>>
>>
>>
>>
>> If you fix the issue, kindly add following tag
>> Reported-by: kernel test robot <[email protected]>
>>
>> 2021-10-13 18:00:46 sudo
>> /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf
>> test 13
>> 13: DSO data reopen : Ok
>> 2021-10-13 18:00:46 sudo
>> /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf
>> test 14
>> 14: Roundtrip evsel->name : Ok
>> 2021-10-13 18:00:46 sudo
>> /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf
>> test 15
>> 15: Parse sched tracepoints fields : FAILED!
>
>
> That issue is caused by another hardcode 16 ...
>
> Seems we should make some change as below,
>
> diff --git a/tools/perf/tests/evsel-tp-sched.c
> b/tools/perf/tests/evsel-tp-sched.c
> index f9e34bd26cf3..401a737b1d85 100644
> --- a/tools/perf/tests/evsel-tp-sched.c
> +++ b/tools/perf/tests/evsel-tp-sched.c
> @@ -42,7 +42,7 @@ int test__perf_evsel__tp_sched_test(struct test
> *test __maybe_unused, int subtes
> return -1;
> }
>
> - if (evsel__test_field(evsel, "prev_comm", 16, false))
> + if (evsel__test_field(evsel, "prev_comm", TASK_COMM_LEN, false))
tools/perf/tests/* contains userspace test programs. This means it gets the
TASK_COMM_LEN from the uapi. The fix you propose won't do any good here.
ftrace and perf access a description of the sched_switch tracepoint prev_comm field
from tracefs and store it into their respective traces. The size of this field is
derived from the kernel's TASK_COMM_LEN, which is changed by the patch triggering this
failure. Therefore, if we strictly consider the field layout as ABI, this is an ABI break.
However, if we consider that trace viewers should adapt to the changes in described event
field layout, then we should tweak this test program to accept larger values of prev_comm
field size.
A simple solution would be to tweak this test program so it can adapt to the size
change, and then figure out if any other relevant program out there notice the break.
If it happens that this ABI break is noticed by more than an in-tree test program, then
the kernel's ABI rules will require that this trace field size stays unchanged. This brings
up once more the whole topic of "Tracepoints ABI" which has been discussed repeatedly in
the past.
In short, because TRACE_EVENT exposes the tracepoint layout as ABI, if any trace viewer out
there expects a fixed-size prev_comm field for the sched_switch event, its size cannot be
changed.
Thanks,
Mathieu
> ret = -1;
>
> if (evsel__test_field(evsel, "prev_pid", 4, true))
> @@ -54,7 +54,7 @@ int test__perf_evsel__tp_sched_test(struct test
> *test __maybe_unused, int subtes
> if (evsel__test_field(evsel, "prev_state", sizeof(long), true))
> ret = -1;
>
> - if (evsel__test_field(evsel, "next_comm", 16, false))
> + if (evsel__test_field(evsel, "next_comm", TASK_COMM_LEN, false))
> ret = -1;
>
> if (evsel__test_field(evsel, "next_pid", 4, true))
> @@ -72,7 +72,7 @@ int test__perf_evsel__tp_sched_test(struct test
> *test __maybe_unused, int subtes
> return -1;
> }
>
> - if (evsel__test_field(evsel, "comm", 16, false))
> + if (evsel__test_field(evsel, "comm", TASK_COMM_LEN, false))
> ret = -1;
>
> if (evsel__test_field(evsel, "pid", 4, true))
>
>
>> 2021-10-13 18:00:46 sudo
>> /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf
>> test 16
>> 16: syscalls:sys_enter_openat event fields : Ok
>>
>>
>>
>> To reproduce:
>>
>> git clone https://github.com/intel/lkp-tests.git
>> cd lkp-tests
>> sudo bin/lkp install job.yaml # job file is attached in this email
>> bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
>> sudo bin/lkp run generated-yaml-file
>>
>> # if come across any failure that blocks the test,
>> # please remove ~/.lkp and /lkp dir to run from a clean state.
>>
>>
>>
>> ---
>> 0DAY/LKP+ Test Infrastructure Open Source Technology Center
>> https://lists.01.org/hyperkitty/list/[email protected] Intel Corporation
>>
>> Thanks,
>> Oliver Sang
>>
>
>
> --
> Thanks
> Yafang
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
On Thu, Oct 14, 2021 at 8:42 PM Mathieu Desnoyers
<[email protected]> wrote:
>
> ----- On Oct 14, 2021, at 5:24 AM, Yafang Shao [email protected] wrote:
>
> > On Thu, Oct 14, 2021 at 3:08 PM kernel test robot <[email protected]> wrote:
> >>
> >>
> >>
> >> Greeting,
> >>
> >> FYI, we noticed the following commit (built with gcc-9):
> >>
> >> commit: 317419b91ef4eff4e2f046088201e4dc4065caa0 ("[PATCH v3 3/4] sched.h:
> >> extend task comm from 16 to 24 for CONFIG_BASE_FULL")
> >> url:
> >> https://github.com/0day-ci/linux/commits/Yafang-Shao/task_struct-extend-task-comm-from-16-to-24-for-CONFIG_BASE_FULL/20211010-182548
> >> base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git
> >> 7fd2bf83d59a2d32e0d596c5d3e623b9a0e7e2d5
> >>
> >> in testcase: perf-sanity-tests
> >> version: perf-x86_64-7fd2bf83d59a-1_20211010
> >> with following parameters:
> >>
> >> perf_compiler: clang
> >> ucode: 0xde
> >>
> >>
> >>
> >> on test machine: 8 threads 1 sockets Intel(R) Core(TM) i7-7700 CPU @ 3.60GHz
> >> with 32G memory
> >>
> >> caused below changes (please refer to attached dmesg/kmsg for entire
> >> log/backtrace):
> >>
> >>
> >>
> >>
> >> If you fix the issue, kindly add following tag
> >> Reported-by: kernel test robot <[email protected]>
> >>
> >> 2021-10-13 18:00:46 sudo
> >> /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf
> >> test 13
> >> 13: DSO data reopen : Ok
> >> 2021-10-13 18:00:46 sudo
> >> /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf
> >> test 14
> >> 14: Roundtrip evsel->name : Ok
> >> 2021-10-13 18:00:46 sudo
> >> /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf
> >> test 15
> >> 15: Parse sched tracepoints fields : FAILED!
> >
> >
> > That issue is caused by another hardcode 16 ...
> >
> > Seems we should make some change as below,
> >
> > diff --git a/tools/perf/tests/evsel-tp-sched.c
> > b/tools/perf/tests/evsel-tp-sched.c
> > index f9e34bd26cf3..401a737b1d85 100644
> > --- a/tools/perf/tests/evsel-tp-sched.c
> > +++ b/tools/perf/tests/evsel-tp-sched.c
> > @@ -42,7 +42,7 @@ int test__perf_evsel__tp_sched_test(struct test
> > *test __maybe_unused, int subtes
> > return -1;
> > }
> >
> > - if (evsel__test_field(evsel, "prev_comm", 16, false))
> > + if (evsel__test_field(evsel, "prev_comm", TASK_COMM_LEN, false))
>
> tools/perf/tests/* contains userspace test programs. This means it gets the
> TASK_COMM_LEN from the uapi. The fix you propose won't do any good here.
>
> ftrace and perf access a description of the sched_switch tracepoint prev_comm field
> from tracefs and store it into their respective traces. The size of this field is
> derived from the kernel's TASK_COMM_LEN, which is changed by the patch triggering this
> failure. Therefore, if we strictly consider the field layout as ABI, this is an ABI break.
> However, if we consider that trace viewers should adapt to the changes in described event
> field layout, then we should tweak this test program to accept larger values of prev_comm
> field size.
>
I have verfied that perf works well after this change.
It seems that perf can adapt to this change.
So we can tweak this test program.
> A simple solution would be to tweak this test program so it can adapt to the size
> change, and then figure out if any other relevant program out there notice the break.
The other in-tree tools bpf uses the TASK_COMM_LEN as well, but it
doesn't require the fixed-size of task comm,
if task comm is larger than 16, it will be truncated, see also
bpf_get_current_comm().
IOW, this change doesn't break the bpf tools.
> If it happens that this ABI break is noticed by more than an in-tree test program, then
> the kernel's ABI rules will require that this trace field size stays unchanged. This brings
> up once more the whole topic of "Tracepoints ABI" which has been discussed repeatedly in
> the past.
>
I will check if any other in-tree tools depends on TASK_COMM_LEN.
> In short, because TRACE_EVENT exposes the tracepoint layout as ABI, if any trace viewer out
> there expects a fixed-size prev_comm field for the sched_switch event, its size cannot be
> changed.
>
Thanks for the explanation.
>
> > ret = -1;
> >
> > if (evsel__test_field(evsel, "prev_pid", 4, true))
> > @@ -54,7 +54,7 @@ int test__perf_evsel__tp_sched_test(struct test
> > *test __maybe_unused, int subtes
> > if (evsel__test_field(evsel, "prev_state", sizeof(long), true))
> > ret = -1;
> >
> > - if (evsel__test_field(evsel, "next_comm", 16, false))
> > + if (evsel__test_field(evsel, "next_comm", TASK_COMM_LEN, false))
> > ret = -1;
> >
> > if (evsel__test_field(evsel, "next_pid", 4, true))
> > @@ -72,7 +72,7 @@ int test__perf_evsel__tp_sched_test(struct test
> > *test __maybe_unused, int subtes
> > return -1;
> > }
> >
> > - if (evsel__test_field(evsel, "comm", 16, false))
> > + if (evsel__test_field(evsel, "comm", TASK_COMM_LEN, false))
> > ret = -1;
> >
> > if (evsel__test_field(evsel, "pid", 4, true))
> >
> >
> >> 2021-10-13 18:00:46 sudo
> >> /usr/src/perf_selftests-x86_64-rhel-8.3-317419b91ef4eff4e2f046088201e4dc4065caa0/tools/perf/perf
> >> test 16
> >> 16: syscalls:sys_enter_openat event fields : Ok
> >>
> >>
> >>
> >> To reproduce:
> >>
> >> git clone https://github.com/intel/lkp-tests.git
> >> cd lkp-tests
> >> sudo bin/lkp install job.yaml # job file is attached in this email
> >> bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
> >> sudo bin/lkp run generated-yaml-file
> >>
> >> # if come across any failure that blocks the test,
> >> # please remove ~/.lkp and /lkp dir to run from a clean state.
> >>
> >>
> >>
> >> ---
> >> 0DAY/LKP+ Test Infrastructure Open Source Technology Center
> >> https://lists.01.org/hyperkitty/list/[email protected] Intel Corporation
> >>
> >> Thanks,
> >> Oliver Sang
> >>
> >
> >
> > --
> > Thanks
> > Yafang
>
> --
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com
--
Thanks
Yafang
Em Thu, Oct 14, 2021 at 08:42:05AM -0400, Mathieu Desnoyers escreveu:
> ----- On Oct 14, 2021, at 5:24 AM, Yafang Shao [email protected] wrote:
> > On Thu, Oct 14, 2021 at 3:08 PM kernel test robot <[email protected]> wrote:
> > That issue is caused by another hardcode 16 ...
> > Seems we should make some change as below,
> > diff --git a/tools/perf/tests/evsel-tp-sched.c
> > b/tools/perf/tests/evsel-tp-sched.c
> > index f9e34bd26cf3..401a737b1d85 100644
> > --- a/tools/perf/tests/evsel-tp-sched.c
> > +++ b/tools/perf/tests/evsel-tp-sched.c
> > @@ -42,7 +42,7 @@ int test__perf_evsel__tp_sched_test(struct test
> > *test __maybe_unused, int subtes
> > return -1;
> > }
> > - if (evsel__test_field(evsel, "prev_comm", 16, false))
> > + if (evsel__test_field(evsel, "prev_comm", TASK_COMM_LEN, false))
>
> tools/perf/tests/* contains userspace test programs. This means it gets the
> TASK_COMM_LEN from the uapi. The fix you propose won't do any good here.
That specific test is just checking if the parsing is being done as
expected, i.e. we know beforehand that COMMs have 16 bytes, so the test
expects that.
Now that it can have a different size, then the test should accept the
two sizes as possible and pass if it is 16 or 24.
Like in this patch:
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index f9e34bd26cf33536..182328f3f7f70e0e 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -5,7 +5,7 @@
#include "tests.h"
#include "debug.h"
-static int evsel__test_field(struct evsel *evsel, const char *name, int size, bool should_be_signed)
+static int evsel__test_field_alt(struct evsel *evsel, const char *name, int size, int alternate_size, bool should_be_signed)
{
struct tep_format_field *field = evsel__field(evsel, name);
int is_signed;
@@ -23,15 +23,23 @@ static int evsel__test_field(struct evsel *evsel, const char *name, int size, bo
ret = -1;
}
- if (field->size != size) {
- pr_debug("%s: \"%s\" size (%d) should be %d!\n",
+ if (field->size != size && field->size != alternate_size) {
+ pr_debug("%s: \"%s\" size (%d) should be %d",
evsel->name, name, field->size, size);
+ if (alternate_size > 0)
+ pr_debug(" or %d", alternate_size);
+ pr_debug("!\n");
ret = -1;
}
return ret;
}
+static int evsel__test_field(struct evsel *evsel, const char *name, int size, bool should_be_signed)
+{
+ return evsel__test_field_alt(evsel, name, size, -1, should_be_signed);
+}
+
int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused)
{
struct evsel *evsel = evsel__newtp("sched", "sched_switch");
@@ -42,7 +50,7 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
return -1;
}
- if (evsel__test_field(evsel, "prev_comm", 16, false))
+ if (evsel__test_field_alt(evsel, "prev_comm", 16, 24, false))
ret = -1;
if (evsel__test_field(evsel, "prev_pid", 4, true))
@@ -54,7 +62,7 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
if (evsel__test_field(evsel, "prev_state", sizeof(long), true))
ret = -1;
- if (evsel__test_field(evsel, "next_comm", 16, false))
+ if (evsel__test_field_alt(evsel, "next_comm", 16, 24, false))
ret = -1;
if (evsel__test_field(evsel, "next_pid", 4, true))
@@ -72,7 +80,7 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
return -1;
}
- if (evsel__test_field(evsel, "comm", 16, false))
+ if (evsel__test_field_alt(evsel, "comm", 16, 24, false))
ret = -1;
if (evsel__test_field(evsel, "pid", 4, true))
On Wed, Oct 20, 2021 at 12:51 AM Arnaldo Carvalho de Melo
<[email protected]> wrote:
>
> Em Thu, Oct 14, 2021 at 08:42:05AM -0400, Mathieu Desnoyers escreveu:
> > ----- On Oct 14, 2021, at 5:24 AM, Yafang Shao [email protected] wrote:
> > > On Thu, Oct 14, 2021 at 3:08 PM kernel test robot <[email protected]> wrote:
> > > That issue is caused by another hardcode 16 ...
>
> > > Seems we should make some change as below,
>
> > > diff --git a/tools/perf/tests/evsel-tp-sched.c
> > > b/tools/perf/tests/evsel-tp-sched.c
> > > index f9e34bd26cf3..401a737b1d85 100644
> > > --- a/tools/perf/tests/evsel-tp-sched.c
> > > +++ b/tools/perf/tests/evsel-tp-sched.c
> > > @@ -42,7 +42,7 @@ int test__perf_evsel__tp_sched_test(struct test
> > > *test __maybe_unused, int subtes
> > > return -1;
> > > }
>
> > > - if (evsel__test_field(evsel, "prev_comm", 16, false))
> > > + if (evsel__test_field(evsel, "prev_comm", TASK_COMM_LEN, false))
> >
> > tools/perf/tests/* contains userspace test programs. This means it gets the
> > TASK_COMM_LEN from the uapi. The fix you propose won't do any good here.
>
> That specific test is just checking if the parsing is being done as
> expected, i.e. we know beforehand that COMMs have 16 bytes, so the test
> expects that.
>
Right.
> Now that it can have a different size, then the test should accept the
> two sizes as possible and pass if it is 16 or 24.
>
> Like in this patch:
>
Thanks for the suggestion. I will do it as you suggested.
> diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
> index f9e34bd26cf33536..182328f3f7f70e0e 100644
> --- a/tools/perf/tests/evsel-tp-sched.c
> +++ b/tools/perf/tests/evsel-tp-sched.c
> @@ -5,7 +5,7 @@
> #include "tests.h"
> #include "debug.h"
>
> -static int evsel__test_field(struct evsel *evsel, const char *name, int size, bool should_be_signed)
> +static int evsel__test_field_alt(struct evsel *evsel, const char *name, int size, int alternate_size, bool should_be_signed)
> {
> struct tep_format_field *field = evsel__field(evsel, name);
> int is_signed;
> @@ -23,15 +23,23 @@ static int evsel__test_field(struct evsel *evsel, const char *name, int size, bo
> ret = -1;
> }
>
> - if (field->size != size) {
> - pr_debug("%s: \"%s\" size (%d) should be %d!\n",
> + if (field->size != size && field->size != alternate_size) {
> + pr_debug("%s: \"%s\" size (%d) should be %d",
> evsel->name, name, field->size, size);
> + if (alternate_size > 0)
> + pr_debug(" or %d", alternate_size);
> + pr_debug("!\n");
> ret = -1;
> }
>
> return ret;
> }
>
> +static int evsel__test_field(struct evsel *evsel, const char *name, int size, bool should_be_signed)
> +{
> + return evsel__test_field_alt(evsel, name, size, -1, should_be_signed);
> +}
> +
> int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused)
> {
> struct evsel *evsel = evsel__newtp("sched", "sched_switch");
> @@ -42,7 +50,7 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
> return -1;
> }
>
> - if (evsel__test_field(evsel, "prev_comm", 16, false))
> + if (evsel__test_field_alt(evsel, "prev_comm", 16, 24, false))
> ret = -1;
>
> if (evsel__test_field(evsel, "prev_pid", 4, true))
> @@ -54,7 +62,7 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
> if (evsel__test_field(evsel, "prev_state", sizeof(long), true))
> ret = -1;
>
> - if (evsel__test_field(evsel, "next_comm", 16, false))
> + if (evsel__test_field_alt(evsel, "next_comm", 16, 24, false))
> ret = -1;
>
> if (evsel__test_field(evsel, "next_pid", 4, true))
> @@ -72,7 +80,7 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
> return -1;
> }
>
> - if (evsel__test_field(evsel, "comm", 16, false))
> + if (evsel__test_field_alt(evsel, "comm", 16, 24, false))
> ret = -1;
>
> if (evsel__test_field(evsel, "pid", 4, true))
--
Thanks
Yafang