2020-11-16 17:46:00

by Gabriel Krisman Bertazi

[permalink] [raw]
Subject: [PATCH v2 05/10] tracepoints: Migrate to use SYSCALL_WORK flag

For architectures that rely on the generic syscall entry code, use the
syscall_work field in struct thread_info and the specific SYSCALL_WORK
flag. This set of flags has the advantage of being architecture
independent.

Users of the flag outside of the generic entry code should rely on the
accessor macros, such that the flag is still correctly resolved for
architectures that don't use the generic entry code and still rely on
TIF flags for system call work.

Signed-off-by: Gabriel Krisman Bertazi <[email protected]>

---
Changes since v2:
- Drop explicit value assignment in enum (tglx)
- Avoid FLAG/_FLAG defines (tglx)
- Fix comment to refer to SYSCALL_WORK_SECCOMP (me)
---
include/linux/entry-common.h | 13 +++++--------
include/linux/thread_info.h | 2 ++
include/trace/syscall.h | 6 +++---
kernel/entry/common.c | 4 ++--
kernel/trace/trace_events.c | 8 ++++----
kernel/tracepoint.c | 4 ++--
6 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index f3fc4457f63f..8aba367e5c79 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -17,10 +17,6 @@
# define _TIF_SYSCALL_EMU (0)
#endif

-#ifndef _TIF_SYSCALL_TRACEPOINT
-# define _TIF_SYSCALL_TRACEPOINT (0)
-#endif
-
#ifndef _TIF_SYSCALL_AUDIT
# define _TIF_SYSCALL_AUDIT (0)
#endif
@@ -46,7 +42,7 @@

#define SYSCALL_ENTER_WORK \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
- _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \
+ _TIF_SYSCALL_EMU | \
ARCH_SYSCALL_ENTER_WORK)

/*
@@ -58,10 +54,11 @@

#define SYSCALL_EXIT_WORK \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
- _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK)
+ ARCH_SYSCALL_EXIT_WORK)

-#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP)
-#define SYSCALL_WORK_EXIT (0)
+#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \
+ SYSCALL_WORK_SYSCALL_TRACEPOINT)
+#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT)

/*
* TIF flags handled in exit_to_user_mode_loop()
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 1d6488130b5c..ff0ac2ebb4ff 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -37,9 +37,11 @@ enum {

enum syscall_work_bit {
SYSCALL_WORK_BIT_SECCOMP,
+ SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT,
};

#define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP)
+#define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT)

#include <asm/thread_info.h>

diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index dc8ac27d27c1..8e193f3a33b3 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -37,10 +37,10 @@ struct syscall_metadata {
#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS)
static inline void syscall_tracepoint_update(struct task_struct *p)
{
- if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
- set_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
+ if (test_syscall_work(SYSCALL_TRACEPOINT))
+ set_task_syscall_work(p, SYSCALL_TRACEPOINT);
else
- clear_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
+ clear_task_syscall_work(p, SYSCALL_TRACEPOINT);
}
#else
static inline void syscall_tracepoint_update(struct task_struct *p)
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index c321056c73d7..4e2b3c08d939 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -63,7 +63,7 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall,
/* Either of the above might have changed the syscall number */
syscall = syscall_get_nr(current, regs);

- if (unlikely(ti_work & _TIF_SYSCALL_TRACEPOINT))
+ if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT))
trace_sys_enter(regs, syscall);

syscall_enter_audit(regs, syscall);
@@ -233,7 +233,7 @@ static void syscall_exit_work(struct pt_regs *regs, unsigned long ti_work,

audit_syscall_exit(regs);

- if (ti_work & _TIF_SYSCALL_TRACEPOINT)
+ if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
trace_sys_exit(regs, syscall_get_return_value(current, regs));

step = report_single_step(ti_work);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 47a71f96e5bc..adf65b502453 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -3428,10 +3428,10 @@ static __init int event_trace_enable(void)
* initialize events and perhaps start any events that are on the
* command line. Unfortunately, there are some events that will not
* start this early, like the system call tracepoints that need
- * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable()
- * is called before pid 1 starts, and this flag is never set, making
- * the syscall tracepoint never get reached, but the event is enabled
- * regardless (and not doing anything).
+ * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But
+ * event_trace_enable() is called before pid 1 starts, and this flag
+ * is never set, making the syscall tracepoint never get reached, but
+ * the event is enabled regardless (and not doing anything).
*/
static __init int event_trace_enable_again(void)
{
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 3f659f855074..7261fa0f5e3c 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -594,7 +594,7 @@ int syscall_regfunc(void)
if (!sys_tracepoint_refcount) {
read_lock(&tasklist_lock);
for_each_process_thread(p, t) {
- set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
+ set_task_syscall_work(t, SYSCALL_TRACEPOINT);
}
read_unlock(&tasklist_lock);
}
@@ -611,7 +611,7 @@ void syscall_unregfunc(void)
if (!sys_tracepoint_refcount) {
read_lock(&tasklist_lock);
for_each_process_thread(p, t) {
- clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
+ clear_task_syscall_work(t, SYSCALL_TRACEPOINT);
}
read_unlock(&tasklist_lock);
}
--
2.29.2


Subject: [tip: core/entry] tracepoints: Migrate to use SYSCALL_WORK flag

The following commit has been merged into the core/entry branch of tip:

Commit-ID: 524666cb5de7c38a1925e7401a6e59d68682dd8c
Gitweb: https://git.kernel.org/tip/524666cb5de7c38a1925e7401a6e59d68682dd8c
Author: Gabriel Krisman Bertazi <[email protected]>
AuthorDate: Mon, 16 Nov 2020 12:42:01 -05:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 16 Nov 2020 21:53:15 +01:00

tracepoints: Migrate to use SYSCALL_WORK flag

On architectures using the generic syscall entry code the architecture
independent syscall work is moved to flags in thread_info::syscall_work.
This removes architecture dependencies and frees up TIF bits.

Define SYSCALL_WORK_SYSCALL_TRACEPOINT, use it in the generic entry code
and convert the code which uses the TIF specific helper functions to use
the new *_syscall_work() helpers which either resolve to the new mode for
users of the generic entry code or to the TIF based functions for the other
architectures.

Signed-off-by: Gabriel Krisman Bertazi <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Andy Lutomirski <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
include/linux/entry-common.h | 13 +++++--------
include/linux/thread_info.h | 2 ++
include/trace/syscall.h | 6 +++---
kernel/entry/common.c | 4 ++--
kernel/trace/trace_events.c | 8 ++++----
kernel/tracepoint.c | 4 ++--
6 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index fa3cdb1..2a01eee 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -17,10 +17,6 @@
# define _TIF_SYSCALL_EMU (0)
#endif

-#ifndef _TIF_SYSCALL_TRACEPOINT
-# define _TIF_SYSCALL_TRACEPOINT (0)
-#endif
-
#ifndef _TIF_SYSCALL_AUDIT
# define _TIF_SYSCALL_AUDIT (0)
#endif
@@ -46,7 +42,7 @@

#define SYSCALL_ENTER_WORK \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
- _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \
+ _TIF_SYSCALL_EMU | \
ARCH_SYSCALL_ENTER_WORK)

/*
@@ -58,10 +54,11 @@

#define SYSCALL_EXIT_WORK \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
- _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK)
+ ARCH_SYSCALL_EXIT_WORK)

-#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP)
-#define SYSCALL_WORK_EXIT (0)
+#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \
+ SYSCALL_WORK_SYSCALL_TRACEPOINT)
+#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT)

/*
* TIF flags handled in exit_to_user_mode_loop()
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index a308ba4..c232043 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -37,9 +37,11 @@ enum {

enum syscall_work_bit {
SYSCALL_WORK_BIT_SECCOMP,
+ SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT,
};

#define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP)
+#define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT)

#include <asm/thread_info.h>

diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index dc8ac27..8e193f3 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -37,10 +37,10 @@ struct syscall_metadata {
#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS)
static inline void syscall_tracepoint_update(struct task_struct *p)
{
- if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
- set_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
+ if (test_syscall_work(SYSCALL_TRACEPOINT))
+ set_task_syscall_work(p, SYSCALL_TRACEPOINT);
else
- clear_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
+ clear_task_syscall_work(p, SYSCALL_TRACEPOINT);
}
#else
static inline void syscall_tracepoint_update(struct task_struct *p)
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 5747a6e..f651967 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -63,7 +63,7 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall,
/* Either of the above might have changed the syscall number */
syscall = syscall_get_nr(current, regs);

- if (unlikely(ti_work & _TIF_SYSCALL_TRACEPOINT))
+ if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT))
trace_sys_enter(regs, syscall);

syscall_enter_audit(regs, syscall);
@@ -233,7 +233,7 @@ static void syscall_exit_work(struct pt_regs *regs, unsigned long ti_work,

audit_syscall_exit(regs);

- if (ti_work & _TIF_SYSCALL_TRACEPOINT)
+ if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
trace_sys_exit(regs, syscall_get_return_value(current, regs));

step = report_single_step(ti_work);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 47a71f9..adf65b5 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -3428,10 +3428,10 @@ static __init int event_trace_enable(void)
* initialize events and perhaps start any events that are on the
* command line. Unfortunately, there are some events that will not
* start this early, like the system call tracepoints that need
- * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable()
- * is called before pid 1 starts, and this flag is never set, making
- * the syscall tracepoint never get reached, but the event is enabled
- * regardless (and not doing anything).
+ * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But
+ * event_trace_enable() is called before pid 1 starts, and this flag
+ * is never set, making the syscall tracepoint never get reached, but
+ * the event is enabled regardless (and not doing anything).
*/
static __init int event_trace_enable_again(void)
{
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 3f659f8..7261fa0 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -594,7 +594,7 @@ int syscall_regfunc(void)
if (!sys_tracepoint_refcount) {
read_lock(&tasklist_lock);
for_each_process_thread(p, t) {
- set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
+ set_task_syscall_work(t, SYSCALL_TRACEPOINT);
}
read_unlock(&tasklist_lock);
}
@@ -611,7 +611,7 @@ void syscall_unregfunc(void)
if (!sys_tracepoint_refcount) {
read_lock(&tasklist_lock);
for_each_process_thread(p, t) {
- clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
+ clear_task_syscall_work(t, SYSCALL_TRACEPOINT);
}
read_unlock(&tasklist_lock);
}

2020-11-17 19:14:38

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v2 05/10] tracepoints: Migrate to use SYSCALL_WORK flag

On Mon, 16 Nov 2020 12:42:01 -0500
Gabriel Krisman Bertazi <[email protected]> wrote:

> For architectures that rely on the generic syscall entry code, use the
> syscall_work field in struct thread_info and the specific SYSCALL_WORK
> flag. This set of flags has the advantage of being architecture
> independent.
>
> Users of the flag outside of the generic entry code should rely on the
> accessor macros, such that the flag is still correctly resolved for
> architectures that don't use the generic entry code and still rely on
> TIF flags for system call work.
>
> Signed-off-by: Gabriel Krisman Bertazi <[email protected]>

Reviewed-by: Steven Rostedt (VMware) <[email protected]>

Also added Mathieu to Cc.

-- Steve

>
> ---
> Changes since v2:
> - Drop explicit value assignment in enum (tglx)
> - Avoid FLAG/_FLAG defines (tglx)
> - Fix comment to refer to SYSCALL_WORK_SECCOMP (me)
> ---
> include/linux/entry-common.h | 13 +++++--------
> include/linux/thread_info.h | 2 ++
> include/trace/syscall.h | 6 +++---
> kernel/entry/common.c | 4 ++--
> kernel/trace/trace_events.c | 8 ++++----
> kernel/tracepoint.c | 4 ++--
> 6 files changed, 18 insertions(+), 19 deletions(-)
>
> diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
> index f3fc4457f63f..8aba367e5c79 100644
> --- a/include/linux/entry-common.h
> +++ b/include/linux/entry-common.h
> @@ -17,10 +17,6 @@
> # define _TIF_SYSCALL_EMU (0)
> #endif
>
> -#ifndef _TIF_SYSCALL_TRACEPOINT
> -# define _TIF_SYSCALL_TRACEPOINT (0)
> -#endif
> -
> #ifndef _TIF_SYSCALL_AUDIT
> # define _TIF_SYSCALL_AUDIT (0)
> #endif
> @@ -46,7 +42,7 @@
>
> #define SYSCALL_ENTER_WORK \
> (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
> - _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \
> + _TIF_SYSCALL_EMU | \
> ARCH_SYSCALL_ENTER_WORK)
>
> /*
> @@ -58,10 +54,11 @@
>
> #define SYSCALL_EXIT_WORK \
> (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
> - _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK)
> + ARCH_SYSCALL_EXIT_WORK)
>
> -#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP)
> -#define SYSCALL_WORK_EXIT (0)
> +#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \
> + SYSCALL_WORK_SYSCALL_TRACEPOINT)
> +#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT)
>
> /*
> * TIF flags handled in exit_to_user_mode_loop()
> diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
> index 1d6488130b5c..ff0ac2ebb4ff 100644
> --- a/include/linux/thread_info.h
> +++ b/include/linux/thread_info.h
> @@ -37,9 +37,11 @@ enum {
>
> enum syscall_work_bit {
> SYSCALL_WORK_BIT_SECCOMP,
> + SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT,
> };
>
> #define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP)
> +#define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT)
>
> #include <asm/thread_info.h>
>
> diff --git a/include/trace/syscall.h b/include/trace/syscall.h
> index dc8ac27d27c1..8e193f3a33b3 100644
> --- a/include/trace/syscall.h
> +++ b/include/trace/syscall.h
> @@ -37,10 +37,10 @@ struct syscall_metadata {
> #if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS)
> static inline void syscall_tracepoint_update(struct task_struct *p)
> {
> - if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
> - set_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
> + if (test_syscall_work(SYSCALL_TRACEPOINT))
> + set_task_syscall_work(p, SYSCALL_TRACEPOINT);
> else
> - clear_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
> + clear_task_syscall_work(p, SYSCALL_TRACEPOINT);
> }
> #else
> static inline void syscall_tracepoint_update(struct task_struct *p)
> diff --git a/kernel/entry/common.c b/kernel/entry/common.c
> index c321056c73d7..4e2b3c08d939 100644
> --- a/kernel/entry/common.c
> +++ b/kernel/entry/common.c
> @@ -63,7 +63,7 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall,
> /* Either of the above might have changed the syscall number */
> syscall = syscall_get_nr(current, regs);
>
> - if (unlikely(ti_work & _TIF_SYSCALL_TRACEPOINT))
> + if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT))
> trace_sys_enter(regs, syscall);
>
> syscall_enter_audit(regs, syscall);
> @@ -233,7 +233,7 @@ static void syscall_exit_work(struct pt_regs *regs, unsigned long ti_work,
>
> audit_syscall_exit(regs);
>
> - if (ti_work & _TIF_SYSCALL_TRACEPOINT)
> + if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
> trace_sys_exit(regs, syscall_get_return_value(current, regs));
>
> step = report_single_step(ti_work);
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index 47a71f96e5bc..adf65b502453 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -3428,10 +3428,10 @@ static __init int event_trace_enable(void)
> * initialize events and perhaps start any events that are on the
> * command line. Unfortunately, there are some events that will not
> * start this early, like the system call tracepoints that need
> - * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable()
> - * is called before pid 1 starts, and this flag is never set, making
> - * the syscall tracepoint never get reached, but the event is enabled
> - * regardless (and not doing anything).
> + * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But
> + * event_trace_enable() is called before pid 1 starts, and this flag
> + * is never set, making the syscall tracepoint never get reached, but
> + * the event is enabled regardless (and not doing anything).
> */
> static __init int event_trace_enable_again(void)
> {
> diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
> index 3f659f855074..7261fa0f5e3c 100644
> --- a/kernel/tracepoint.c
> +++ b/kernel/tracepoint.c
> @@ -594,7 +594,7 @@ int syscall_regfunc(void)
> if (!sys_tracepoint_refcount) {
> read_lock(&tasklist_lock);
> for_each_process_thread(p, t) {
> - set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
> + set_task_syscall_work(t, SYSCALL_TRACEPOINT);
> }
> read_unlock(&tasklist_lock);
> }
> @@ -611,7 +611,7 @@ void syscall_unregfunc(void)
> if (!sys_tracepoint_refcount) {
> read_lock(&tasklist_lock);
> for_each_process_thread(p, t) {
> - clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
> + clear_task_syscall_work(t, SYSCALL_TRACEPOINT);
> }
> read_unlock(&tasklist_lock);
> }