2023-08-12 06:07:00

by Masami Hiramatsu

[permalink] [raw]
Subject: [PATCH v3 5/8] tracing/fprobe: Enable fprobe events with CONFIG_DYNAMIC_FTRACE_WITH_ARGS

From: Masami Hiramatsu (Google) <[email protected]>

Allow fprobe events to be enabled with CONFIG_DYNAMIC_FTRACE_WITH_ARGS.
With this change, fprobe events mostly use ftrace_regs instead of pt_regs.
Note that if the arch doesn't enable HAVE_PT_REGS_COMPAT_FTRACE_REGS,
fprobe events will not be able to be used from perf.

Signed-off-by: Masami Hiramatsu (Google) <[email protected]>
---
Changes in v3:
- introduce ftrace_regs_get_kernel_stack_nth().
- fix typo.
---
include/linux/ftrace.h | 15 +++++++++
kernel/trace/Kconfig | 1 -
kernel/trace/trace_fprobe.c | 65 ++++++++++++++++++++-------------------
kernel/trace/trace_probe_tmpl.h | 2 +
4 files changed, 50 insertions(+), 33 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fe335d861f08..3b3a0b1f592f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -171,6 +171,21 @@ static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs)
return ftrace_get_regs(fregs) != NULL;
}

+#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API
+static __always_inline unsigned long
+ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth)
+{
+ unsigned long *stackp;
+
+ stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+ if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) ==
+ ((unsigned long)stackp & ~(THREAD_SIZE - 1)))
+ return *(stackp + nth);
+
+ return 0;
+}
+#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+
#ifdef CONFIG_FUNCTION_TRACER

extern int ftrace_enabled;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d56304276318..6fb4ecf8767d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -679,7 +679,6 @@ config FPROBE_EVENTS
select TRACING
select PROBE_EVENTS
select DYNAMIC_EVENTS
- depends on DYNAMIC_FTRACE_WITH_REGS
default y
help
This allows user to add tracing events on the function entry and
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index f440c97e050f..94c01dc061ec 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -132,7 +132,7 @@ static int
process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
void *base)
{
- struct pt_regs *regs = rec;
+ struct ftrace_regs *fregs = rec;
unsigned long val;
int ret;

@@ -140,17 +140,17 @@ process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
/* 1st stage: get value from context */
switch (code->op) {
case FETCH_OP_STACK:
- val = regs_get_kernel_stack_nth(regs, code->param);
+ val = ftrace_regs_get_kernel_stack_nth(fregs, code->param);
break;
case FETCH_OP_STACKP:
- val = kernel_stack_pointer(regs);
+ val = ftrace_regs_get_stack_pointer(fregs);
break;
case FETCH_OP_RETVAL:
- val = regs_return_value(regs);
+ val = ftrace_regs_return_value(fregs);
break;
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
case FETCH_OP_ARG:
- val = regs_get_kernel_argument(regs, code->param);
+ val = ftrace_regs_get_argument(fregs, code->param);
break;
#endif
case FETCH_NOP_SYMBOL: /* Ignore a place holder */
@@ -170,7 +170,7 @@ NOKPROBE_SYMBOL(process_fetch_insn)
/* function entry handler */
static nokprobe_inline void
__fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
- struct pt_regs *regs,
+ struct ftrace_regs *fregs,
struct trace_event_file *trace_file)
{
struct fentry_trace_entry_head *entry;
@@ -184,36 +184,36 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
if (trace_trigger_soft_disabled(trace_file))
return;

- dsize = __get_data_size(&tf->tp, regs);
+ dsize = __get_data_size(&tf->tp, fregs);

entry = trace_event_buffer_reserve(&fbuffer, trace_file,
sizeof(*entry) + tf->tp.size + dsize);
if (!entry)
return;

- fbuffer.regs = regs;
+ fbuffer.regs = ftrace_get_regs(fregs);
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
entry->ip = entry_ip;
- store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+ store_trace_args(&entry[1], &tf->tp, fregs, sizeof(*entry), dsize);

trace_event_buffer_commit(&fbuffer);
}

static void
fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
- struct pt_regs *regs)
+ struct ftrace_regs *fregs)
{
struct event_file_link *link;

trace_probe_for_each_link_rcu(link, &tf->tp)
- __fentry_trace_func(tf, entry_ip, regs, link->file);
+ __fentry_trace_func(tf, entry_ip, fregs, link->file);
}
NOKPROBE_SYMBOL(fentry_trace_func);

/* Kretprobe handler */
static nokprobe_inline void
__fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
struct trace_event_file *trace_file)
{
struct fexit_trace_entry_head *entry;
@@ -227,37 +227,37 @@ __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
if (trace_trigger_soft_disabled(trace_file))
return;

- dsize = __get_data_size(&tf->tp, regs);
+ dsize = __get_data_size(&tf->tp, fregs);

entry = trace_event_buffer_reserve(&fbuffer, trace_file,
sizeof(*entry) + tf->tp.size + dsize);
if (!entry)
return;

- fbuffer.regs = regs;
+ fbuffer.regs = ftrace_get_regs(fregs);
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
entry->func = entry_ip;
entry->ret_ip = ret_ip;
- store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+ store_trace_args(&entry[1], &tf->tp, fregs, sizeof(*entry), dsize);

trace_event_buffer_commit(&fbuffer);
}

static void
fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs)
+ unsigned long ret_ip, struct ftrace_regs *fregs)
{
struct event_file_link *link;

trace_probe_for_each_link_rcu(link, &tf->tp)
- __fexit_trace_func(tf, entry_ip, ret_ip, regs, link->file);
+ __fexit_trace_func(tf, entry_ip, ret_ip, fregs, link->file);
}
NOKPROBE_SYMBOL(fexit_trace_func);

#ifdef CONFIG_PERF_EVENTS

static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
- struct pt_regs *regs)
+ struct ftrace_regs *fregs, struct pt_regs *regs)
{
struct trace_event_call *call = trace_probe_event_call(&tf->tp);
struct fentry_trace_entry_head *entry;
@@ -269,7 +269,7 @@ static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
if (hlist_empty(head))
return 0;

- dsize = __get_data_size(&tf->tp, regs);
+ dsize = __get_data_size(&tf->tp, fregs);
__size = sizeof(*entry) + tf->tp.size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
@@ -280,7 +280,7 @@ static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,

entry->ip = entry_ip;
memset(&entry[1], 0, dsize);
- store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+ store_trace_args(&entry[1], &tf->tp, fregs, sizeof(*entry), dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
return 0;
@@ -289,7 +289,8 @@ NOKPROBE_SYMBOL(fentry_perf_func);

static void
fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs)
+ unsigned long ret_ip, struct ftrace_regs *fregs,
+ struct pt_regs *regs)
{
struct trace_event_call *call = trace_probe_event_call(&tf->tp);
struct fexit_trace_entry_head *entry;
@@ -301,7 +302,7 @@ fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
if (hlist_empty(head))
return;

- dsize = __get_data_size(&tf->tp, regs);
+ dsize = __get_data_size(&tf->tp, fregs);
__size = sizeof(*entry) + tf->tp.size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
@@ -312,7 +313,7 @@ fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,

entry->func = entry_ip;
entry->ret_ip = ret_ip;
- store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+ store_trace_args(&entry[1], &tf->tp, fregs, sizeof(*entry), dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
}
@@ -327,14 +328,15 @@ static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
struct pt_regs *regs = ftrace_get_regs(fregs);
int ret = 0;

+ if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
+ fentry_trace_func(tf, entry_ip, fregs);
+
+#ifdef CONFIG_PERF_EVENTS
if (!regs)
return 0;

- if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
- fentry_trace_func(tf, entry_ip, regs);
-#ifdef CONFIG_PERF_EVENTS
if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
- ret = fentry_perf_func(tf, entry_ip, regs);
+ ret = fentry_perf_func(tf, entry_ip, fregs, regs);
#endif
return ret;
}
@@ -347,14 +349,15 @@ static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip,
struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
struct pt_regs *regs = ftrace_get_regs(fregs);

+ if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
+ fexit_trace_func(tf, entry_ip, ret_ip, fregs);
+
+#ifdef CONFIG_PERF_EVENTS
if (!regs)
return;

- if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
- fexit_trace_func(tf, entry_ip, ret_ip, regs);
-#ifdef CONFIG_PERF_EVENTS
if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
- fexit_perf_func(tf, entry_ip, ret_ip, regs);
+ fexit_perf_func(tf, entry_ip, ret_ip, fregs, regs);
#endif
}
NOKPROBE_SYMBOL(fexit_dispatcher);
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index 3935b347f874..05445a745a07 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -232,7 +232,7 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,

/* Sum up total data length for dynamic arrays (strings) */
static nokprobe_inline int
-__get_data_size(struct trace_probe *tp, struct pt_regs *regs)
+__get_data_size(struct trace_probe *tp, void *regs)
{
struct probe_arg *arg;
int i, len, ret = 0;



2023-08-19 13:14:46

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH v3 5/8] tracing/fprobe: Enable fprobe events with CONFIG_DYNAMIC_FTRACE_WITH_ARGS

On Thu, 17 Aug 2023 10:57:50 +0200
Florent Revest <[email protected]> wrote:

> On Sat, Aug 12, 2023 at 7:37 AM Masami Hiramatsu (Google)
> <[email protected]> wrote:
> >
> > diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> > index d56304276318..6fb4ecf8767d 100644
> > --- a/kernel/trace/Kconfig
> > +++ b/kernel/trace/Kconfig
> > @@ -679,7 +679,6 @@ config FPROBE_EVENTS
> > select TRACING
> > select PROBE_EVENTS
> > select DYNAMIC_EVENTS
> > - depends on DYNAMIC_FTRACE_WITH_REGS
>
> I believe that, in practice, fprobe events still rely on WITH_REGS:
>
> > diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
> > index f440c97e050f..94c01dc061ec 100644
> > --- a/kernel/trace/trace_fprobe.c
> > +++ b/kernel/trace/trace_fprobe.c
> > @@ -327,14 +328,15 @@ static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
> > struct pt_regs *regs = ftrace_get_regs(fregs);
>
> Because here you require the entry handler needs ftrace_regs that are
> full pt_regs.

Ah, that is for perf events. Yes, that is the problematic point.
Since perf's interfaces are depending on the pt_regs (especially stacktrace)
I can not remove this part. This is the next issue to be solved.
Maybe we can use partial pt_regs for stack tracing, so we can swap the order
of the patches to introduce ftrace_partial_regs() before this and use it for
perf event.

>
> > int ret = 0;
> >
> > + if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
> > + fentry_trace_func(tf, entry_ip, fregs);
> > +
> > +#ifdef CONFIG_PERF_EVENTS
> > if (!regs)
> > return 0;
> >
> > - if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
> > - fentry_trace_func(tf, entry_ip, regs);
> > -#ifdef CONFIG_PERF_EVENTS
> > if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
> > - ret = fentry_perf_func(tf, entry_ip, regs);
> > + ret = fentry_perf_func(tf, entry_ip, fregs, regs);
> > #endif
> > return ret;
> > }
> > @@ -347,14 +349,15 @@ static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip,
> > struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
> > struct pt_regs *regs = ftrace_get_regs(fregs);
>
> And same here with the return handler
>
> I think fprobe events would need the same sort of refactoring as
> kprobe_multi bpf: using ftrace_partial_regs so they work on build
> !WITH_REGS.

Actually, kprobe_multi is using fprobe directly, so this is not related
to bpf part.

Thank you,


--
Masami Hiramatsu (Google) <[email protected]>

2023-08-19 17:36:23

by Florent Revest

[permalink] [raw]
Subject: Re: [PATCH v3 5/8] tracing/fprobe: Enable fprobe events with CONFIG_DYNAMIC_FTRACE_WITH_ARGS

On Sat, Aug 12, 2023 at 7:37 AM Masami Hiramatsu (Google)
<[email protected]> wrote:
>
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index d56304276318..6fb4ecf8767d 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -679,7 +679,6 @@ config FPROBE_EVENTS
> select TRACING
> select PROBE_EVENTS
> select DYNAMIC_EVENTS
> - depends on DYNAMIC_FTRACE_WITH_REGS

I believe that, in practice, fprobe events still rely on WITH_REGS:

> diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
> index f440c97e050f..94c01dc061ec 100644
> --- a/kernel/trace/trace_fprobe.c
> +++ b/kernel/trace/trace_fprobe.c
> @@ -327,14 +328,15 @@ static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
> struct pt_regs *regs = ftrace_get_regs(fregs);

Because here you require the entry handler needs ftrace_regs that are
full pt_regs.

> int ret = 0;
>
> + if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
> + fentry_trace_func(tf, entry_ip, fregs);
> +
> +#ifdef CONFIG_PERF_EVENTS
> if (!regs)
> return 0;
>
> - if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
> - fentry_trace_func(tf, entry_ip, regs);
> -#ifdef CONFIG_PERF_EVENTS
> if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
> - ret = fentry_perf_func(tf, entry_ip, regs);
> + ret = fentry_perf_func(tf, entry_ip, fregs, regs);
> #endif
> return ret;
> }
> @@ -347,14 +349,15 @@ static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip,
> struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
> struct pt_regs *regs = ftrace_get_regs(fregs);

And same here with the return handler

I think fprobe events would need the same sort of refactoring as
kprobe_multi bpf: using ftrace_partial_regs so they work on build
!WITH_REGS.

2023-08-20 16:28:13

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH v3 5/8] tracing/fprobe: Enable fprobe events with CONFIG_DYNAMIC_FTRACE_WITH_ARGS

On Thu, 17 Aug 2023 13:44:41 -0700
Alexei Starovoitov <[email protected]> wrote:

> On Fri, Aug 11, 2023 at 10:37 PM Masami Hiramatsu (Google)
> <[email protected]> wrote:
> >
> > +#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API
> > +static __always_inline unsigned long
> > +ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth)
> > +{
> > + unsigned long *stackp;
> > +
> > + stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs);
> > + if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) ==
> > + ((unsigned long)stackp & ~(THREAD_SIZE - 1)))
> > + return *(stackp + nth);
> > +
> > + return 0;
> > +}
> > +#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
> ...
> >
> > @@ -140,17 +140,17 @@ process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
> > /* 1st stage: get value from context */
> > switch (code->op) {
> > case FETCH_OP_STACK:
> > - val = regs_get_kernel_stack_nth(regs, code->param);
> > + val = ftrace_regs_get_kernel_stack_nth(fregs, code->param);
> > break;
>
> Just noticed that bit.
> You probably want to document that access to arguments and
> especially arguments on stack is not precise. It's "best effort".
> x86-64 calling convention is not as simple as it might appear.
> For example if 6th argument is a 16-byte struct like sockptr_t it will be
> passed on the stack and 7th actual argument (if it's <= 8 byte) will
> be the register.

Yes, that's right. To access the precise arguments, the easiest way is
using DWARF (e.g. perf probe), and another way is reconstruct the ABI
from the type like BTF.

>
> Things similar on 32-bit and there is a non-zero chance that
> regs_get_kernel_argument() doesn't return the actual arg.

Yeah, it is hard to get the actual argument...

Let me update the document.

Thank you,

--
Masami Hiramatsu (Google) <[email protected]>