Hello,
Here is the 2nd version of the series to fix the stacktrace with kretprobe.
The 1st series is here;
https://lore.kernel.org/bpf/161495873696.346821.10161501768906432924.stgit@devnote2/
In this version I merged the ORC unwinder fix for kretprobe which discussed in the
previous thread. [3/10] is updated according to the Miroslav's comment. [4/10] is
updated for simplify the code. [5/10]-[9/10] are discussed in the previsous tread
and are introduced to the series.
Daniel, can you also test this again? I and Josh discussed a bit different
method and I've implemented it on this version.
This actually changes the kretprobe behavisor a bit, now the instraction pointer in
the pt_regs passed to kretprobe user handler is correctly set the real return
address. So user handlers can get it via instruction_pointer() API.
Thank you,
---
Josh Poimboeuf (1):
x86/kprobes: Add UNWIND_HINT_FUNC on kretprobe_trampoline code
Masami Hiramatsu (9):
ia64: kprobes: Fix to pass correct trampoline address to the handler
kprobes: treewide: Replace arch_deref_entry_point() with dereference_function_descriptor()
kprobes: treewide: Remove trampoline_address from kretprobe_trampoline_handler()
kprobes: stacktrace: Recover the address changed by kretprobe
ARC: Add instruction_pointer_set() API
ia64: Add instruction_pointer_set() API
kprobes: Setup instruction pointer in __kretprobe_trampoline_handler
x86/unwind/orc: Fixup kretprobe trampoline entry
tracing: Remove kretprobe unknown indicator from stacktrace
arch/arc/include/asm/ptrace.h | 5 ++
arch/arc/kernel/kprobes.c | 2 -
arch/arm/probes/kprobes/core.c | 3 -
arch/arm64/kernel/probes/kprobes.c | 3 -
arch/csky/kernel/probes/kprobes.c | 2 -
arch/ia64/include/asm/ptrace.h | 6 +++
arch/ia64/kernel/kprobes.c | 15 ++----
arch/mips/kernel/kprobes.c | 3 -
arch/parisc/kernel/kprobes.c | 4 +-
arch/powerpc/kernel/kprobes.c | 13 -----
arch/riscv/kernel/probes/kprobes.c | 2 -
arch/s390/kernel/kprobes.c | 2 -
arch/sh/kernel/kprobes.c | 2 -
arch/sparc/kernel/kprobes.c | 2 -
arch/x86/include/asm/unwind.h | 4 ++
arch/x86/include/asm/unwind_hints.h | 5 ++
arch/x86/kernel/kprobes/core.c | 5 +-
arch/x86/kernel/unwind_orc.c | 16 +++++++
include/linux/kprobes.h | 41 +++++++++++++++--
kernel/kprobes.c | 84 +++++++++++++++++++++--------------
kernel/stacktrace.c | 22 +++++++++
kernel/trace/trace_output.c | 27 ++---------
lib/error-inject.c | 3 +
23 files changed, 170 insertions(+), 101 deletions(-)
--
Masami Hiramatsu (Linaro) <[email protected]>
From: Josh Poimboeuf <[email protected]>
Add UNWIND_HINT_FUNC on kretporbe_trampoline code so that ORC
information is generated on the kretprobe_trampoline correctly.
Signed-off-by: Josh Poimboeuf <[email protected]>
---
[MH] Add patch description.
---
arch/x86/include/asm/unwind_hints.h | 5 +++++
arch/x86/kernel/kprobes/core.c | 3 ++-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index 8e574c0afef8..8b33674288ea 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -52,6 +52,11 @@
UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
.endm
+#else
+
+#define UNWIND_HINT_FUNC \
+ UNWIND_HINT(ORC_REG_SP, 8, UNWIND_HINT_TYPE_FUNC, 0)
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_UNWIND_HINTS_H */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 3c00b773fe2e..e079eb487ecd 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1023,6 +1023,7 @@ asm(
/* We don't bother saving the ss register */
#ifdef CONFIG_X86_64
" pushq %rsp\n"
+ UNWIND_HINT_FUNC
" pushfq\n"
SAVE_REGS_STRING
" movq %rsp, %rdi\n"
@@ -1033,6 +1034,7 @@ asm(
" popfq\n"
#else
" pushl %esp\n"
+ UNWIND_HINT_FUNC
" pushfl\n"
SAVE_REGS_STRING
" movl %esp, %eax\n"
@@ -1046,7 +1048,6 @@ asm(
".size kretprobe_trampoline, .-kretprobe_trampoline\n"
);
NOKPROBE_SYMBOL(kretprobe_trampoline);
-STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
/*
Add instruction_pointer_set() API for arc.
Signed-off-by: Masami Hiramatsu <[email protected]>
---
arch/arc/include/asm/ptrace.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 4c3c9be5bd16..cca8d6583e31 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -149,6 +149,11 @@ static inline long regs_return_value(struct pt_regs *regs)
return (long)regs->r0;
}
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ instruction_pointer(regs) = val;
+}
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PTRACE_H */
Add instruction_pointer_set() API for ia64.
Signed-off-by: Masami Hiramatsu <[email protected]>
---
arch/ia64/include/asm/ptrace.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
index b3aa46090101..dbd9e85cbc77 100644
--- a/arch/ia64/include/asm/ptrace.h
+++ b/arch/ia64/include/asm/ptrace.h
@@ -71,6 +71,12 @@ static inline long regs_return_value(struct pt_regs *regs)
return -regs->r8;
}
+static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val)
+{
+ ia64_psr(regs)->ri = (val & 0xf);
+ regs->cr_iip = (val & ~0xfULL);
+}
+
/* Conserve space in histogram by encoding slot bits in address
* bits 2 and 3 rather than bits 0 and 1.
*/
Remove trampoline_address from kretprobe_trampoline_handler().
Instead of passing the address, kretprobe_trampoline_handler()
can use new kretprobe_trampoline_addr().
Signed-off-by: Masami Hiramatsu <[email protected]>
---
Changes in v2:
- Remove arch_deref_entry_point() from comment.
---
arch/arc/kernel/kprobes.c | 2 +-
arch/arm/probes/kprobes/core.c | 3 +--
arch/arm64/kernel/probes/kprobes.c | 3 +--
arch/csky/kernel/probes/kprobes.c | 2 +-
arch/ia64/kernel/kprobes.c | 5 ++---
arch/mips/kernel/kprobes.c | 3 +--
arch/parisc/kernel/kprobes.c | 4 ++--
arch/powerpc/kernel/kprobes.c | 2 +-
arch/riscv/kernel/probes/kprobes.c | 2 +-
arch/s390/kernel/kprobes.c | 2 +-
arch/sh/kernel/kprobes.c | 2 +-
arch/sparc/kernel/kprobes.c | 2 +-
arch/x86/kernel/kprobes/core.c | 2 +-
include/linux/kprobes.h | 18 +++++++++++++-----
kernel/kprobes.c | 3 +--
15 files changed, 29 insertions(+), 26 deletions(-)
diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
index cabef45f11df..3ae01bb5820c 100644
--- a/arch/arc/kernel/kprobes.c
+++ b/arch/arc/kernel/kprobes.c
@@ -397,7 +397,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
static int __kprobes trampoline_probe_handler(struct kprobe *p,
struct pt_regs *regs)
{
- regs->ret = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+ regs->ret = __kretprobe_trampoline_handler(regs, NULL);
/* By returning a non zero value, we are telling the kprobe handler
* that we don't want the post_handler to run
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index a9653117ca0d..1782b41df095 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -413,8 +413,7 @@ void __naked __kprobes kretprobe_trampoline(void)
/* Called from kretprobe_trampoline */
static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
{
- return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline,
- (void *)regs->ARM_fp);
+ return (void *)kretprobe_trampoline_handler(regs, (void *)regs->ARM_fp);
}
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 66aac2881ba8..fce681fdfce6 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -412,8 +412,7 @@ int __init arch_populate_kprobe_blacklist(void)
void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
{
- return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline,
- (void *)kernel_stack_pointer(regs));
+ return (void *)kretprobe_trampoline_handler(regs, (void *)kernel_stack_pointer(regs));
}
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c
index 589f090f48b9..cc589bc11904 100644
--- a/arch/csky/kernel/probes/kprobes.c
+++ b/arch/csky/kernel/probes/kprobes.c
@@ -404,7 +404,7 @@ int __init arch_populate_kprobe_blacklist(void)
void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
{
- return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+ return (void *)kretprobe_trampoline_handler(regs, NULL);
}
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 15871eb170c0..a008df8e7203 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -392,14 +392,13 @@ static void __kprobes set_current_kprobe(struct kprobe *p,
__this_cpu_write(current_kprobe, p);
}
-static void kretprobe_trampoline(void)
+void kretprobe_trampoline(void)
{
}
int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
- regs->cr_iip = __kretprobe_trampoline_handler(regs,
- dereference_function_descriptor(kretprobe_trampoline), NULL);
+ regs->cr_iip = __kretprobe_trampoline_handler(regs, NULL);
/*
* By returning a non-zero value, we are telling
* kprobe_handler() that we don't want the post_handler
diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c
index 54dfba8fa77c..001a2f07ef44 100644
--- a/arch/mips/kernel/kprobes.c
+++ b/arch/mips/kernel/kprobes.c
@@ -489,8 +489,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
static int __kprobes trampoline_probe_handler(struct kprobe *p,
struct pt_regs *regs)
{
- instruction_pointer(regs) = __kretprobe_trampoline_handler(regs,
- kretprobe_trampoline, NULL);
+ instruction_pointer(regs) = __kretprobe_trampoline_handler(regs, NULL);
/*
* By returning a non-zero value, we are telling
* kprobe_handler() that we don't want the post_handler
diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c
index 6d21a515eea5..4a35ac6e2ca2 100644
--- a/arch/parisc/kernel/kprobes.c
+++ b/arch/parisc/kernel/kprobes.c
@@ -175,7 +175,7 @@ int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs)
return 1;
}
-static inline void kretprobe_trampoline(void)
+void kretprobe_trampoline(void)
{
asm volatile("nop");
asm volatile("nop");
@@ -193,7 +193,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
{
unsigned long orig_ret_address;
- orig_ret_address = __kretprobe_trampoline_handler(regs, trampoline_p.addr, NULL);
+ orig_ret_address = __kretprobe_trampoline_handler(regs, NULL);
instruction_pointer_set(regs, orig_ret_address);
return 1;
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index eb0460949e1b..dfd532c43525 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -399,7 +399,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
unsigned long orig_ret_address;
- orig_ret_address = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+ orig_ret_address = __kretprobe_trampoline_handler(regs, NULL);
/*
* We get here through one of two paths:
* 1. by taking a trap -> kprobe_handler() -> here
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index a2ec18662fee..619339f1d3ba 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -376,7 +376,7 @@ int __init arch_populate_kprobe_blacklist(void)
void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
{
- return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+ return (void *)kretprobe_trampoline_handler(regs, NULL);
}
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index aae24dc75df6..b149e9169709 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -351,7 +351,7 @@ static void __used kretprobe_trampoline_holder(void)
*/
static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
- regs->psw.addr = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+ regs->psw.addr = __kretprobe_trampoline_handler(regs, NULL);
/*
* By returning a non-zero value, we are telling
* kprobe_handler() that we don't want the post_handler
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index 756100b01e84..48356e81836a 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -303,7 +303,7 @@ static void __used kretprobe_trampoline_holder(void)
*/
int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
- regs->pc = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+ regs->pc = __kretprobe_trampoline_handler(regs, NULL);
return 1;
}
diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c
index 217c21a6986a..fa30f9dadff8 100644
--- a/arch/sparc/kernel/kprobes.c
+++ b/arch/sparc/kernel/kprobes.c
@@ -468,7 +468,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
{
unsigned long orig_ret_address = 0;
- orig_ret_address = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+ orig_ret_address = __kretprobe_trampoline_handler(regs, NULL);
regs->tpc = orig_ret_address;
regs->tnpc = orig_ret_address + 4;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 1f58e89eeccd..3c00b773fe2e 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1062,7 +1062,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
regs->ip = (unsigned long)&kretprobe_trampoline;
regs->orig_ax = ~0UL;
- return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, ®s->sp);
+ return (void *)kretprobe_trampoline_handler(regs, ®s->sp);
}
NOKPROBE_SYMBOL(trampoline_handler);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index d65c041b5c22..65dadd4238a2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -205,15 +205,23 @@ extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs);
extern int arch_trampoline_kprobe(struct kprobe *p);
+void kretprobe_trampoline(void);
+/*
+ * Since some architecture uses structured function pointer,
+ * use dereference_function_descriptor() to get real function address.
+ */
+static nokprobe_inline void *kretprobe_trampoline_addr(void)
+{
+ return dereference_function_descriptor(kretprobe_trampoline);
+}
+
/* If the trampoline handler called from a kprobe, use this version */
unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
- void *trampoline_address,
- void *frame_pointer);
+ void *frame_pointer);
static nokprobe_inline
unsigned long kretprobe_trampoline_handler(struct pt_regs *regs,
- void *trampoline_address,
- void *frame_pointer)
+ void *frame_pointer)
{
unsigned long ret;
/*
@@ -222,7 +230,7 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs,
* be running at this point.
*/
kprobe_busy_begin();
- ret = __kretprobe_trampoline_handler(regs, trampoline_address, frame_pointer);
+ ret = __kretprobe_trampoline_handler(regs, frame_pointer);
kprobe_busy_end();
return ret;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 2913de07f4a3..75c0a58c19c2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1859,7 +1859,6 @@ static struct notifier_block kprobe_exceptions_nb = {
#ifdef CONFIG_KRETPROBES
unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
- void *trampoline_address,
void *frame_pointer)
{
kprobe_opcode_t *correct_ret_addr = NULL;
@@ -1874,7 +1873,7 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
BUG_ON(ri->fp != frame_pointer);
- if (ri->ret_addr != trampoline_address) {
+ if (ri->ret_addr != kretprobe_trampoline_addr()) {
correct_ret_addr = ri->ret_addr;
/*
* This is the real return address. Any other
Since the kretprobe replaces the function return address with
the kretprobe_trampoline on the stack, the ORC unwinder can not
continue the stack unwinding at that point.
To fix this issue, correct state->ip as like as function-graph
tracer in the unwind_next_frame().
Signed-off-by: Masami Hiramatsu <[email protected]>
---
Changes in v2:
- Remove kretprobe wrapper functions from unwind_orc.c
- Do not fixup state->ip when unwinding with regs because
kretprobe fixup instruction pointer before calling handler.
---
arch/x86/include/asm/unwind.h | 4 ++++
arch/x86/kernel/unwind_orc.c | 16 ++++++++++++++++
2 files changed, 20 insertions(+)
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 70fc159ebe69..ab5e45b848d5 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <linux/ftrace.h>
+#include <linux/llist.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
@@ -20,6 +21,9 @@ struct unwind_state {
bool signal, full_regs;
unsigned long sp, bp, ip;
struct pt_regs *regs, *prev_regs;
+#if defined(CONFIG_KRETPROBES)
+ struct llist_node *kr_iter;
+#endif
#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
bool got_irq;
unsigned long *bp, *orig_sp, ip;
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 2a1d47f47eee..1d1b9388a1b1 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -2,6 +2,7 @@
#include <linux/objtool.h>
#include <linux/module.h>
#include <linux/sort.h>
+#include <linux/kprobes.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
@@ -536,6 +537,21 @@ bool unwind_next_frame(struct unwind_state *state)
state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
state->ip, (void *)ip_p);
+ /*
+ * When the stack unwinder is called from the kretprobe handler
+ * or the interrupt handler which occurs in the kretprobe
+ * trampoline code, %sp is shown on the stack instead of the
+ * return address because kretprobe_trampoline() does
+ * "push %sp" at first.
+ * And also the unwinder may find the kretprobe_trampoline
+ * instead of the real return address on stack.
+ * In those cases, find the correct return address from
+ * task->kretprobe_instances list.
+ */
+ if (state->ip == sp ||
+ is_kretprobe_trampoline(state->ip))
+ state->ip = kretprobe_find_ret_addr(state->task,
+ &state->kr_iter);
state->sp = sp;
state->regs = NULL;
Since the stacktrace API fixup the kretprobed address correctly,
there is no need to convert the "kretprobe_trampoline" to
"[unknown/kretprobe'd]" anymore. Remove it.
Signed-off-by: Masami Hiramatsu <[email protected]>
Acked-by: Steven Rostedt (VMware) <[email protected]>
---
kernel/trace/trace_output.c | 27 ++++-----------------------
1 file changed, 4 insertions(+), 23 deletions(-)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 61255bad7e01..f5f8b081b668 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -346,37 +346,18 @@ int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...)
}
EXPORT_SYMBOL_GPL(trace_output_call);
-#ifdef CONFIG_KRETPROBES
-static inline const char *kretprobed(const char *name)
-{
- static const char tramp_name[] = "kretprobe_trampoline";
- int size = sizeof(tramp_name);
-
- if (strncmp(tramp_name, name, size) == 0)
- return "[unknown/kretprobe'd]";
- return name;
-}
-#else
-static inline const char *kretprobed(const char *name)
-{
- return name;
-}
-#endif /* CONFIG_KRETPROBES */
-
void
trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
{
#ifdef CONFIG_KALLSYMS
- char str[KSYM_SYMBOL_LEN];
- const char *name;
+ char name[KSYM_SYMBOL_LEN];
if (offset)
- sprint_symbol(str, address);
+ sprint_symbol(name, address);
else
- kallsyms_lookup(address, NULL, NULL, NULL, str);
- name = kretprobed(str);
+ kallsyms_lookup(address, NULL, NULL, NULL, name);
- if (name && strlen(name)) {
+ if (strlen(name)) {
trace_seq_puts(s, name);
return;
}
To simplify the stacktrace with pt_regs from kretprobe handler,
set the correct return address to the instruction pointer in
the pt_regs before calling kretprobe handlers.
Suggested-by: Josh Poimboeuf <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
---
kernel/kprobes.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 2550521ff64d..51d0057382a5 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1897,6 +1897,9 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
BUG_ON(1);
}
+ /* Set the instruction pointer to the correct address */
+ instruction_pointer_set(regs, correct_ret_addr);
+
/* Run them. */
first = current->kretprobe_instances.first;
while (first) {
Replace arch_deref_entry_point() with dereference_function_descriptor()
because those are doing same thing.
Signed-off-by: Masami Hiramatsu <[email protected]>
---
arch/ia64/kernel/kprobes.c | 5 -----
arch/powerpc/kernel/kprobes.c | 11 -----------
include/linux/kprobes.h | 1 -
kernel/kprobes.c | 7 +------
lib/error-inject.c | 3 ++-
5 files changed, 3 insertions(+), 24 deletions(-)
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 006fbc1d7ae9..15871eb170c0 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -907,11 +907,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
return ret;
}
-unsigned long arch_deref_entry_point(void *entry)
-{
- return ((struct fnptr *)entry)->ip;
-}
-
static struct kprobe trampoline_p = {
.pre_handler = trampoline_probe_handler
};
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 01ab2163659e..eb0460949e1b 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -539,17 +539,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
}
NOKPROBE_SYMBOL(kprobe_fault_handler);
-unsigned long arch_deref_entry_point(void *entry)
-{
-#ifdef PPC64_ELF_ABI_v1
- if (!kernel_text_address((unsigned long)entry))
- return ppc_global_function_entry(entry);
- else
-#endif
- return (unsigned long)entry;
-}
-NOKPROBE_SYMBOL(arch_deref_entry_point);
-
static struct kprobe trampoline_p = {
.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
.pre_handler = trampoline_probe_handler
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1883a4a9f16a..d65c041b5c22 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -390,7 +390,6 @@ int register_kprobe(struct kprobe *p);
void unregister_kprobe(struct kprobe *p);
int register_kprobes(struct kprobe **kps, int num);
void unregister_kprobes(struct kprobe **kps, int num);
-unsigned long arch_deref_entry_point(void *);
int register_kretprobe(struct kretprobe *rp);
void unregister_kretprobe(struct kretprobe *rp);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 745f08fdd7a6..2913de07f4a3 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1856,11 +1856,6 @@ static struct notifier_block kprobe_exceptions_nb = {
.priority = 0x7fffffff /* we need to be notified first */
};
-unsigned long __weak arch_deref_entry_point(void *entry)
-{
- return (unsigned long)entry;
-}
-
#ifdef CONFIG_KRETPROBES
unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
@@ -2324,7 +2319,7 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
int ret;
for (iter = start; iter < end; iter++) {
- entry = arch_deref_entry_point((void *)*iter);
+ entry = (unsigned long)dereference_function_descriptor((void *)*iter);
ret = kprobe_add_ksym_blacklist(entry);
if (ret == -EINVAL)
continue;
diff --git a/lib/error-inject.c b/lib/error-inject.c
index c73651b15b76..f71875ac5f9f 100644
--- a/lib/error-inject.c
+++ b/lib/error-inject.c
@@ -8,6 +8,7 @@
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <asm/sections.h>
/* Whitelist of symbols that can be overridden for error injection. */
static LIST_HEAD(error_injection_list);
@@ -64,7 +65,7 @@ static void populate_error_injection_list(struct error_injection_entry *start,
mutex_lock(&ei_mutex);
for (iter = start; iter < end; iter++) {
- entry = arch_deref_entry_point((void *)iter->addr);
+ entry = (unsigned long)dereference_function_descriptor((void *)iter->addr);
if (!kernel_text_address(entry) ||
!kallsyms_lookup_size_offset(entry, &size, &offset)) {
Commit e792ff804f49 ("ia64: kprobes: Use generic kretprobe trampoline handler")
missed to pass the wrong trampoline address (it passes the descriptor address
instead of function entry address).
This fixes it to pass correct trampoline address to __kretprobe_trampoline_handler().
This also changes to use correct symbol dereference function to get the
function address from the kretprobe_trampoline.
Fixes: e792ff804f49 ("ia64: kprobes: Use generic kretprobe trampoline handler")
Signed-off-by: Masami Hiramatsu <[email protected]>
---
arch/ia64/kernel/kprobes.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index fc1ff8a4d7de..006fbc1d7ae9 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -398,7 +398,8 @@ static void kretprobe_trampoline(void)
int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
- regs->cr_iip = __kretprobe_trampoline_handler(regs, kretprobe_trampoline, NULL);
+ regs->cr_iip = __kretprobe_trampoline_handler(regs,
+ dereference_function_descriptor(kretprobe_trampoline), NULL);
/*
* By returning a non-zero value, we are telling
* kprobe_handler() that we don't want the post_handler
@@ -414,7 +415,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
ri->fp = NULL;
/* Replace the return addr with trampoline addr */
- regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
+ regs->b0 = (unsigned long)dereference_function_descriptor(kretprobe_trampoline);
}
/* Check the instruction in the slot is break */
@@ -918,14 +919,14 @@ static struct kprobe trampoline_p = {
int __init arch_init_kprobes(void)
{
trampoline_p.addr =
- (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip;
+ dereference_function_description(kretprobe_trampoline);
return register_kprobe(&trampoline_p);
}
int __kprobes arch_trampoline_kprobe(struct kprobe *p)
{
if (p->addr ==
- (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip)
+ dereference_function_descriptor(kretprobe_trampoline))
return 1;
return 0;
On Fri, Mar 12, 2021 at 03:41:44PM +0900, Masami Hiramatsu wrote:
> Hello,
>
> Here is the 2nd version of the series to fix the stacktrace with kretprobe.
>
> The 1st series is here;
>
> https://lore.kernel.org/bpf/161495873696.346821.10161501768906432924.stgit@devnote2/
>
> In this version I merged the ORC unwinder fix for kretprobe which discussed in the
> previous thread. [3/10] is updated according to the Miroslav's comment. [4/10] is
> updated for simplify the code. [5/10]-[9/10] are discussed in the previsous tread
> and are introduced to the series.
>
> Daniel, can you also test this again? I and Josh discussed a bit different
> method and I've implemented it on this version.
Works great, thanks!
Tested-by: Daniel Xu <[email protected]>
>
> This actually changes the kretprobe behavisor a bit, now the instraction pointer in
> the pt_regs passed to kretprobe user handler is correctly set the real return
> address. So user handlers can get it via instruction_pointer() API.
While this changes behavior a little bit, I don't think anyone will
mind. I think it's more accurate now.
<...>
Thanks,
Daniel
On Fri, Mar 12, 2021 at 03:41:44PM +0900, Masami Hiramatsu wrote:
> Hello,
>
> Here is the 2nd version of the series to fix the stacktrace with kretprobe.
>
> The 1st series is here;
>
> https://lore.kernel.org/bpf/161495873696.346821.10161501768906432924.stgit@devnote2/
>
> In this version I merged the ORC unwinder fix for kretprobe which discussed in the
> previous thread. [3/10] is updated according to the Miroslav's comment. [4/10] is
> updated for simplify the code. [5/10]-[9/10] are discussed in the previsous tread
> and are introduced to the series.
>
> Daniel, can you also test this again? I and Josh discussed a bit different
> method and I've implemented it on this version.
>
> This actually changes the kretprobe behavisor a bit, now the instraction pointer in
> the pt_regs passed to kretprobe user handler is correctly set the real return
> address. So user handlers can get it via instruction_pointer() API.
When I add WARN_ON(1) to a test kretprobe, it doesn't unwind properly.
show_trace_log_lvl() reads the entire stack in lockstep with calls to
the unwinder so that it can decide which addresses get prefixed with
'?'. So it expects to find an actual return address on the stack.
Instead it finds %rsp. So it never syncs up with unwind_next_frame()
and shows all remaining addresses as unreliable.
Call Trace:
__kretprobe_trampoline_handler+0xca/0x1a0
trampoline_handler+0x3d/0x50
kretprobe_trampoline+0x25/0x50
? init_test_probes.cold+0x323/0x387
? init_kprobes+0x144/0x14c
? init_optprobes+0x15/0x15
? do_one_initcall+0x5b/0x300
? lock_is_held_type+0xe8/0x140
? kernel_init_freeable+0x174/0x2cd
? rest_init+0x233/0x233
? kernel_init+0xa/0x11d
? ret_from_fork+0x22/0x30
How about pushing 'kretprobe_trampoline' instead of %rsp for the return
address placeholder. That fixes the above test, and removes the need
for the weird 'state->ip == sp' check:
Call Trace:
__kretprobe_trampoline_handler+0xca/0x150
trampoline_handler+0x3d/0x50
kretprobe_trampoline+0x29/0x50
? init_test_probes.cold+0x323/0x387
elfcorehdr_read+0x10/0x10
init_kprobes+0x144/0x14c
? init_optprobes+0x15/0x15
do_one_initcall+0x72/0x280
kernel_init_freeable+0x174/0x2cd
? rest_init+0x122/0x122
kernel_init+0xa/0x10e
ret_from_fork+0x22/0x30
Though, init_test_probes.cold() (the real return address) is still
listed as unreliable. Maybe we need a call to kretprobe_find_ret_addr()
in show_trace_log_lvl(), similar to the ftrace_graph_ret_addr() call
there.
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 06f33bfebc50..70188fdd288e 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -766,19 +766,19 @@ asm(
"kretprobe_trampoline:\n"
/* We don't bother saving the ss register */
#ifdef CONFIG_X86_64
- " pushq %rsp\n"
+ /* Push fake return address to tell the unwinder it's a kretprobe */
+ " pushq $kretprobe_trampoline\n"
UNWIND_HINT_FUNC
" pushfq\n"
SAVE_REGS_STRING
" movq %rsp, %rdi\n"
" call trampoline_handler\n"
- /* Replace saved sp with true return address. */
+ /* Replace the fake return address with the real one. */
" movq %rax, 19*8(%rsp)\n"
RESTORE_REGS_STRING
" popfq\n"
#else
" pushl %esp\n"
- UNWIND_HINT_FUNC
" pushfl\n"
SAVE_REGS_STRING
" movl %esp, %eax\n"
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 1d1b9388a1b1..1d3de84d2410 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -548,8 +548,7 @@ bool unwind_next_frame(struct unwind_state *state)
* In those cases, find the correct return address from
* task->kretprobe_instances list.
*/
- if (state->ip == sp ||
- is_kretprobe_trampoline(state->ip))
+ if (is_kretprobe_trampoline(state->ip))
state->ip = kretprobe_find_ret_addr(state->task,
&state->kr_iter);
On Mon, 15 Mar 2021 21:30:03 -0500
Josh Poimboeuf <[email protected]> wrote:
> On Fri, Mar 12, 2021 at 03:41:44PM +0900, Masami Hiramatsu wrote:
> > Hello,
> >
> > Here is the 2nd version of the series to fix the stacktrace with kretprobe.
> >
> > The 1st series is here;
> >
> > https://lore.kernel.org/bpf/161495873696.346821.10161501768906432924.stgit@devnote2/
> >
> > In this version I merged the ORC unwinder fix for kretprobe which discussed in the
> > previous thread. [3/10] is updated according to the Miroslav's comment. [4/10] is
> > updated for simplify the code. [5/10]-[9/10] are discussed in the previsous tread
> > and are introduced to the series.
> >
> > Daniel, can you also test this again? I and Josh discussed a bit different
> > method and I've implemented it on this version.
> >
> > This actually changes the kretprobe behavisor a bit, now the instraction pointer in
> > the pt_regs passed to kretprobe user handler is correctly set the real return
> > address. So user handlers can get it via instruction_pointer() API.
>
> When I add WARN_ON(1) to a test kretprobe, it doesn't unwind properly.
>
> show_trace_log_lvl() reads the entire stack in lockstep with calls to
> the unwinder so that it can decide which addresses get prefixed with
> '?'. So it expects to find an actual return address on the stack.
> Instead it finds %rsp. So it never syncs up with unwind_next_frame()
> and shows all remaining addresses as unreliable.
>
> Call Trace:
> __kretprobe_trampoline_handler+0xca/0x1a0
> trampoline_handler+0x3d/0x50
> kretprobe_trampoline+0x25/0x50
> ? init_test_probes.cold+0x323/0x387
> ? init_kprobes+0x144/0x14c
> ? init_optprobes+0x15/0x15
> ? do_one_initcall+0x5b/0x300
> ? lock_is_held_type+0xe8/0x140
> ? kernel_init_freeable+0x174/0x2cd
> ? rest_init+0x233/0x233
> ? kernel_init+0xa/0x11d
> ? ret_from_fork+0x22/0x30
>
> How about pushing 'kretprobe_trampoline' instead of %rsp for the return
> address placeholder. That fixes the above test, and removes the need
> for the weird 'state->ip == sp' check:
>
> Call Trace:
> __kretprobe_trampoline_handler+0xca/0x150
> trampoline_handler+0x3d/0x50
> kretprobe_trampoline+0x29/0x50
> ? init_test_probes.cold+0x323/0x387
> elfcorehdr_read+0x10/0x10
> init_kprobes+0x144/0x14c
> ? init_optprobes+0x15/0x15
> do_one_initcall+0x72/0x280
> kernel_init_freeable+0x174/0x2cd
> ? rest_init+0x122/0x122
> kernel_init+0xa/0x10e
> ret_from_fork+0x22/0x30
>
> Though, init_test_probes.cold() (the real return address) is still
> listed as unreliable. Maybe we need a call to kretprobe_find_ret_addr()
> in show_trace_log_lvl(), similar to the ftrace_graph_ret_addr() call
> there.
Thanks for the test!
OK, that could be acceptable. However, push %sp still needed for accessing
stack address from kretprobe handler via pt_regs. (regs->sp must point
the stack address)
Anyway, with int3, it pushes one more entry for emulating call, so I think
it is OK.
Let me update the series!
Thank you!
>
>
>
> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> index 06f33bfebc50..70188fdd288e 100644
> --- a/arch/x86/kernel/kprobes/core.c
> +++ b/arch/x86/kernel/kprobes/core.c
> @@ -766,19 +766,19 @@ asm(
> "kretprobe_trampoline:\n"
> /* We don't bother saving the ss register */
> #ifdef CONFIG_X86_64
> - " pushq %rsp\n"
> + /* Push fake return address to tell the unwinder it's a kretprobe */
> + " pushq $kretprobe_trampoline\n"
> UNWIND_HINT_FUNC
> " pushfq\n"
> SAVE_REGS_STRING
> " movq %rsp, %rdi\n"
> " call trampoline_handler\n"
> - /* Replace saved sp with true return address. */
> + /* Replace the fake return address with the real one. */
> " movq %rax, 19*8(%rsp)\n"
> RESTORE_REGS_STRING
> " popfq\n"
> #else
> " pushl %esp\n"
> - UNWIND_HINT_FUNC
> " pushfl\n"
> SAVE_REGS_STRING
> " movl %esp, %eax\n"
> diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
> index 1d1b9388a1b1..1d3de84d2410 100644
> --- a/arch/x86/kernel/unwind_orc.c
> +++ b/arch/x86/kernel/unwind_orc.c
> @@ -548,8 +548,7 @@ bool unwind_next_frame(struct unwind_state *state)
> * In those cases, find the correct return address from
> * task->kretprobe_instances list.
> */
> - if (state->ip == sp ||
> - is_kretprobe_trampoline(state->ip))
> + if (is_kretprobe_trampoline(state->ip))
> state->ip = kretprobe_find_ret_addr(state->task,
> &state->kr_iter);
>
>
>
--
Masami Hiramatsu <[email protected]>
On Tue, Mar 16, 2021 at 1:45 AM Masami Hiramatsu <[email protected]> wrote:
>
> On Mon, 15 Mar 2021 21:30:03 -0500
> Josh Poimboeuf <[email protected]> wrote:
>
> > On Fri, Mar 12, 2021 at 03:41:44PM +0900, Masami Hiramatsu wrote:
> > > Hello,
> > >
> > > Here is the 2nd version of the series to fix the stacktrace with kretprobe.
> > >
> > > The 1st series is here;
> > >
> > > https://lore.kernel.org/bpf/161495873696.346821.10161501768906432924.stgit@devnote2/
> > >
> > > In this version I merged the ORC unwinder fix for kretprobe which discussed in the
> > > previous thread. [3/10] is updated according to the Miroslav's comment. [4/10] is
> > > updated for simplify the code. [5/10]-[9/10] are discussed in the previsous tread
> > > and are introduced to the series.
> > >
> > > Daniel, can you also test this again? I and Josh discussed a bit different
> > > method and I've implemented it on this version.
> > >
> > > This actually changes the kretprobe behavisor a bit, now the instraction pointer in
> > > the pt_regs passed to kretprobe user handler is correctly set the real return
> > > address. So user handlers can get it via instruction_pointer() API.
> >
> > When I add WARN_ON(1) to a test kretprobe, it doesn't unwind properly.
> >
> > show_trace_log_lvl() reads the entire stack in lockstep with calls to
> > the unwinder so that it can decide which addresses get prefixed with
> > '?'. So it expects to find an actual return address on the stack.
> > Instead it finds %rsp. So it never syncs up with unwind_next_frame()
> > and shows all remaining addresses as unreliable.
> >
> > Call Trace:
> > __kretprobe_trampoline_handler+0xca/0x1a0
> > trampoline_handler+0x3d/0x50
> > kretprobe_trampoline+0x25/0x50
> > ? init_test_probes.cold+0x323/0x387
> > ? init_kprobes+0x144/0x14c
> > ? init_optprobes+0x15/0x15
> > ? do_one_initcall+0x5b/0x300
> > ? lock_is_held_type+0xe8/0x140
> > ? kernel_init_freeable+0x174/0x2cd
> > ? rest_init+0x233/0x233
> > ? kernel_init+0xa/0x11d
> > ? ret_from_fork+0x22/0x30
> >
> > How about pushing 'kretprobe_trampoline' instead of %rsp for the return
> > address placeholder. That fixes the above test, and removes the need
> > for the weird 'state->ip == sp' check:
> >
> > Call Trace:
> > __kretprobe_trampoline_handler+0xca/0x150
> > trampoline_handler+0x3d/0x50
> > kretprobe_trampoline+0x29/0x50
> > ? init_test_probes.cold+0x323/0x387
> > elfcorehdr_read+0x10/0x10
> > init_kprobes+0x144/0x14c
> > ? init_optprobes+0x15/0x15
> > do_one_initcall+0x72/0x280
> > kernel_init_freeable+0x174/0x2cd
> > ? rest_init+0x122/0x122
> > kernel_init+0xa/0x10e
> > ret_from_fork+0x22/0x30
> >
> > Though, init_test_probes.cold() (the real return address) is still
> > listed as unreliable. Maybe we need a call to kretprobe_find_ret_addr()
> > in show_trace_log_lvl(), similar to the ftrace_graph_ret_addr() call
> > there.
>
> Thanks for the test!
> OK, that could be acceptable. However, push %sp still needed for accessing
> stack address from kretprobe handler via pt_regs. (regs->sp must point
> the stack address)
> Anyway, with int3, it pushes one more entry for emulating call, so I think
> it is OK.
> Let me update the series!
>
Hi Misami,
Did you get a chance to follow up on this? I checked v5.13-rc1 and it
still has this issue. Inability to capture a stack trace from BPF
kretprobes is a pretty big problem for some applications, it would be
great to get this fixed. Thanks!
> Thank you!
>
> >
> >
> >
> > diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> > index 06f33bfebc50..70188fdd288e 100644
> > --- a/arch/x86/kernel/kprobes/core.c
> > +++ b/arch/x86/kernel/kprobes/core.c
> > @@ -766,19 +766,19 @@ asm(
> > "kretprobe_trampoline:\n"
> > /* We don't bother saving the ss register */
> > #ifdef CONFIG_X86_64
> > - " pushq %rsp\n"
> > + /* Push fake return address to tell the unwinder it's a kretprobe */
> > + " pushq $kretprobe_trampoline\n"
> > UNWIND_HINT_FUNC
> > " pushfq\n"
> > SAVE_REGS_STRING
> > " movq %rsp, %rdi\n"
> > " call trampoline_handler\n"
> > - /* Replace saved sp with true return address. */
> > + /* Replace the fake return address with the real one. */
> > " movq %rax, 19*8(%rsp)\n"
> > RESTORE_REGS_STRING
> > " popfq\n"
> > #else
> > " pushl %esp\n"
> > - UNWIND_HINT_FUNC
> > " pushfl\n"
> > SAVE_REGS_STRING
> > " movl %esp, %eax\n"
> > diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
> > index 1d1b9388a1b1..1d3de84d2410 100644
> > --- a/arch/x86/kernel/unwind_orc.c
> > +++ b/arch/x86/kernel/unwind_orc.c
> > @@ -548,8 +548,7 @@ bool unwind_next_frame(struct unwind_state *state)
> > * In those cases, find the correct return address from
> > * task->kretprobe_instances list.
> > */
> > - if (state->ip == sp ||
> > - is_kretprobe_trampoline(state->ip))
> > + if (is_kretprobe_trampoline(state->ip))
> > state->ip = kretprobe_find_ret_addr(state->task,
> > &state->kr_iter);
> >
> >
> >
>
>
> --
> Masami Hiramatsu <[email protected]>
On Mon, 17 May 2021 14:06:24 -0700
Andrii Nakryiko <[email protected]> wrote:
> On Tue, Mar 16, 2021 at 1:45 AM Masami Hiramatsu <[email protected]> wrote:
> >
> > On Mon, 15 Mar 2021 21:30:03 -0500
> > Josh Poimboeuf <[email protected]> wrote:
> >
> > > On Fri, Mar 12, 2021 at 03:41:44PM +0900, Masami Hiramatsu wrote:
> > > > Hello,
> > > >
> > > > Here is the 2nd version of the series to fix the stacktrace with kretprobe.
> > > >
> > > > The 1st series is here;
> > > >
> > > > https://lore.kernel.org/bpf/161495873696.346821.10161501768906432924.stgit@devnote2/
> > > >
> > > > In this version I merged the ORC unwinder fix for kretprobe which discussed in the
> > > > previous thread. [3/10] is updated according to the Miroslav's comment. [4/10] is
> > > > updated for simplify the code. [5/10]-[9/10] are discussed in the previsous tread
> > > > and are introduced to the series.
> > > >
> > > > Daniel, can you also test this again? I and Josh discussed a bit different
> > > > method and I've implemented it on this version.
> > > >
> > > > This actually changes the kretprobe behavisor a bit, now the instraction pointer in
> > > > the pt_regs passed to kretprobe user handler is correctly set the real return
> > > > address. So user handlers can get it via instruction_pointer() API.
> > >
> > > When I add WARN_ON(1) to a test kretprobe, it doesn't unwind properly.
> > >
> > > show_trace_log_lvl() reads the entire stack in lockstep with calls to
> > > the unwinder so that it can decide which addresses get prefixed with
> > > '?'. So it expects to find an actual return address on the stack.
> > > Instead it finds %rsp. So it never syncs up with unwind_next_frame()
> > > and shows all remaining addresses as unreliable.
> > >
> > > Call Trace:
> > > __kretprobe_trampoline_handler+0xca/0x1a0
> > > trampoline_handler+0x3d/0x50
> > > kretprobe_trampoline+0x25/0x50
> > > ? init_test_probes.cold+0x323/0x387
> > > ? init_kprobes+0x144/0x14c
> > > ? init_optprobes+0x15/0x15
> > > ? do_one_initcall+0x5b/0x300
> > > ? lock_is_held_type+0xe8/0x140
> > > ? kernel_init_freeable+0x174/0x2cd
> > > ? rest_init+0x233/0x233
> > > ? kernel_init+0xa/0x11d
> > > ? ret_from_fork+0x22/0x30
> > >
> > > How about pushing 'kretprobe_trampoline' instead of %rsp for the return
> > > address placeholder. That fixes the above test, and removes the need
> > > for the weird 'state->ip == sp' check:
> > >
> > > Call Trace:
> > > __kretprobe_trampoline_handler+0xca/0x150
> > > trampoline_handler+0x3d/0x50
> > > kretprobe_trampoline+0x29/0x50
> > > ? init_test_probes.cold+0x323/0x387
> > > elfcorehdr_read+0x10/0x10
> > > init_kprobes+0x144/0x14c
> > > ? init_optprobes+0x15/0x15
> > > do_one_initcall+0x72/0x280
> > > kernel_init_freeable+0x174/0x2cd
> > > ? rest_init+0x122/0x122
> > > kernel_init+0xa/0x10e
> > > ret_from_fork+0x22/0x30
> > >
> > > Though, init_test_probes.cold() (the real return address) is still
> > > listed as unreliable. Maybe we need a call to kretprobe_find_ret_addr()
> > > in show_trace_log_lvl(), similar to the ftrace_graph_ret_addr() call
> > > there.
> >
> > Thanks for the test!
> > OK, that could be acceptable. However, push %sp still needed for accessing
> > stack address from kretprobe handler via pt_regs. (regs->sp must point
> > the stack address)
> > Anyway, with int3, it pushes one more entry for emulating call, so I think
> > it is OK.
> > Let me update the series!
> >
>
> Hi Misami,
>
> Did you get a chance to follow up on this? I checked v5.13-rc1 and it
> still has this issue. Inability to capture a stack trace from BPF
> kretprobes is a pretty big problem for some applications, it would be
> great to get this fixed. Thanks!
OK, let me rework this series.
Thank you,
>
>
> > Thank you!
> >
> > >
> > >
> > >
> > > diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> > > index 06f33bfebc50..70188fdd288e 100644
> > > --- a/arch/x86/kernel/kprobes/core.c
> > > +++ b/arch/x86/kernel/kprobes/core.c
> > > @@ -766,19 +766,19 @@ asm(
> > > "kretprobe_trampoline:\n"
> > > /* We don't bother saving the ss register */
> > > #ifdef CONFIG_X86_64
> > > - " pushq %rsp\n"
> > > + /* Push fake return address to tell the unwinder it's a kretprobe */
> > > + " pushq $kretprobe_trampoline\n"
> > > UNWIND_HINT_FUNC
> > > " pushfq\n"
> > > SAVE_REGS_STRING
> > > " movq %rsp, %rdi\n"
> > > " call trampoline_handler\n"
> > > - /* Replace saved sp with true return address. */
> > > + /* Replace the fake return address with the real one. */
> > > " movq %rax, 19*8(%rsp)\n"
> > > RESTORE_REGS_STRING
> > > " popfq\n"
> > > #else
> > > " pushl %esp\n"
> > > - UNWIND_HINT_FUNC
> > > " pushfl\n"
> > > SAVE_REGS_STRING
> > > " movl %esp, %eax\n"
> > > diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
> > > index 1d1b9388a1b1..1d3de84d2410 100644
> > > --- a/arch/x86/kernel/unwind_orc.c
> > > +++ b/arch/x86/kernel/unwind_orc.c
> > > @@ -548,8 +548,7 @@ bool unwind_next_frame(struct unwind_state *state)
> > > * In those cases, find the correct return address from
> > > * task->kretprobe_instances list.
> > > */
> > > - if (state->ip == sp ||
> > > - is_kretprobe_trampoline(state->ip))
> > > + if (is_kretprobe_trampoline(state->ip))
> > > state->ip = kretprobe_find_ret_addr(state->task,
> > > &state->kr_iter);
> > >
> > >
> > >
> >
> >
> > --
> > Masami Hiramatsu <[email protected]>
--
Masami Hiramatsu <[email protected]>
Masami Hiramatsu wrote:
> Replace arch_deref_entry_point() with dereference_function_descriptor()
> because those are doing same thing.
It's not quite the same -- you need dereference_symbol_descriptor().
- Naveen
On Sun, May 23, 2021 at 7:22 AM Masami Hiramatsu <[email protected]> wrote:
>
> On Mon, 17 May 2021 14:06:24 -0700
> Andrii Nakryiko <[email protected]> wrote:
>
> > On Tue, Mar 16, 2021 at 1:45 AM Masami Hiramatsu <[email protected]> wrote:
> > >
> > > On Mon, 15 Mar 2021 21:30:03 -0500
> > > Josh Poimboeuf <[email protected]> wrote:
> > >
> > > > On Fri, Mar 12, 2021 at 03:41:44PM +0900, Masami Hiramatsu wrote:
> > > > > Hello,
> > > > >
> > > > > Here is the 2nd version of the series to fix the stacktrace with kretprobe.
> > > > >
> > > > > The 1st series is here;
> > > > >
> > > > > https://lore.kernel.org/bpf/161495873696.346821.10161501768906432924.stgit@devnote2/
> > > > >
> > > > > In this version I merged the ORC unwinder fix for kretprobe which discussed in the
> > > > > previous thread. [3/10] is updated according to the Miroslav's comment. [4/10] is
> > > > > updated for simplify the code. [5/10]-[9/10] are discussed in the previsous tread
> > > > > and are introduced to the series.
> > > > >
> > > > > Daniel, can you also test this again? I and Josh discussed a bit different
> > > > > method and I've implemented it on this version.
> > > > >
> > > > > This actually changes the kretprobe behavisor a bit, now the instraction pointer in
> > > > > the pt_regs passed to kretprobe user handler is correctly set the real return
> > > > > address. So user handlers can get it via instruction_pointer() API.
> > > >
> > > > When I add WARN_ON(1) to a test kretprobe, it doesn't unwind properly.
> > > >
> > > > show_trace_log_lvl() reads the entire stack in lockstep with calls to
> > > > the unwinder so that it can decide which addresses get prefixed with
> > > > '?'. So it expects to find an actual return address on the stack.
> > > > Instead it finds %rsp. So it never syncs up with unwind_next_frame()
> > > > and shows all remaining addresses as unreliable.
> > > >
> > > > Call Trace:
> > > > __kretprobe_trampoline_handler+0xca/0x1a0
> > > > trampoline_handler+0x3d/0x50
> > > > kretprobe_trampoline+0x25/0x50
> > > > ? init_test_probes.cold+0x323/0x387
> > > > ? init_kprobes+0x144/0x14c
> > > > ? init_optprobes+0x15/0x15
> > > > ? do_one_initcall+0x5b/0x300
> > > > ? lock_is_held_type+0xe8/0x140
> > > > ? kernel_init_freeable+0x174/0x2cd
> > > > ? rest_init+0x233/0x233
> > > > ? kernel_init+0xa/0x11d
> > > > ? ret_from_fork+0x22/0x30
> > > >
> > > > How about pushing 'kretprobe_trampoline' instead of %rsp for the return
> > > > address placeholder. That fixes the above test, and removes the need
> > > > for the weird 'state->ip == sp' check:
> > > >
> > > > Call Trace:
> > > > __kretprobe_trampoline_handler+0xca/0x150
> > > > trampoline_handler+0x3d/0x50
> > > > kretprobe_trampoline+0x29/0x50
> > > > ? init_test_probes.cold+0x323/0x387
> > > > elfcorehdr_read+0x10/0x10
> > > > init_kprobes+0x144/0x14c
> > > > ? init_optprobes+0x15/0x15
> > > > do_one_initcall+0x72/0x280
> > > > kernel_init_freeable+0x174/0x2cd
> > > > ? rest_init+0x122/0x122
> > > > kernel_init+0xa/0x10e
> > > > ret_from_fork+0x22/0x30
> > > >
> > > > Though, init_test_probes.cold() (the real return address) is still
> > > > listed as unreliable. Maybe we need a call to kretprobe_find_ret_addr()
> > > > in show_trace_log_lvl(), similar to the ftrace_graph_ret_addr() call
> > > > there.
> > >
> > > Thanks for the test!
> > > OK, that could be acceptable. However, push %sp still needed for accessing
> > > stack address from kretprobe handler via pt_regs. (regs->sp must point
> > > the stack address)
> > > Anyway, with int3, it pushes one more entry for emulating call, so I think
> > > it is OK.
> > > Let me update the series!
> > >
> >
> > Hi Misami,
> >
> > Did you get a chance to follow up on this? I checked v5.13-rc1 and it
> > still has this issue. Inability to capture a stack trace from BPF
> > kretprobes is a pretty big problem for some applications, it would be
> > great to get this fixed. Thanks!
>
> OK, let me rework this series.
Great, thank you! Looking forward.
>
> Thank you,
>
>
> >
> >
> > > Thank you!
> > >
> > > >
> > > >
> > > >
> > > > diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> > > > index 06f33bfebc50..70188fdd288e 100644
> > > > --- a/arch/x86/kernel/kprobes/core.c
> > > > +++ b/arch/x86/kernel/kprobes/core.c
> > > > @@ -766,19 +766,19 @@ asm(
> > > > "kretprobe_trampoline:\n"
> > > > /* We don't bother saving the ss register */
> > > > #ifdef CONFIG_X86_64
> > > > - " pushq %rsp\n"
> > > > + /* Push fake return address to tell the unwinder it's a kretprobe */
> > > > + " pushq $kretprobe_trampoline\n"
> > > > UNWIND_HINT_FUNC
> > > > " pushfq\n"
> > > > SAVE_REGS_STRING
> > > > " movq %rsp, %rdi\n"
> > > > " call trampoline_handler\n"
> > > > - /* Replace saved sp with true return address. */
> > > > + /* Replace the fake return address with the real one. */
> > > > " movq %rax, 19*8(%rsp)\n"
> > > > RESTORE_REGS_STRING
> > > > " popfq\n"
> > > > #else
> > > > " pushl %esp\n"
> > > > - UNWIND_HINT_FUNC
> > > > " pushfl\n"
> > > > SAVE_REGS_STRING
> > > > " movl %esp, %eax\n"
> > > > diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
> > > > index 1d1b9388a1b1..1d3de84d2410 100644
> > > > --- a/arch/x86/kernel/unwind_orc.c
> > > > +++ b/arch/x86/kernel/unwind_orc.c
> > > > @@ -548,8 +548,7 @@ bool unwind_next_frame(struct unwind_state *state)
> > > > * In those cases, find the correct return address from
> > > > * task->kretprobe_instances list.
> > > > */
> > > > - if (state->ip == sp ||
> > > > - is_kretprobe_trampoline(state->ip))
> > > > + if (is_kretprobe_trampoline(state->ip))
> > > > state->ip = kretprobe_find_ret_addr(state->task,
> > > > &state->kr_iter);
> > > >
> > > >
> > > >
> > >
> > >
> > > --
> > > Masami Hiramatsu <[email protected]>
>
>
> --
> Masami Hiramatsu <[email protected]>
Hi Naveen,
On Mon, 24 May 2021 14:56:50 +0530
"Naveen N. Rao" <[email protected]> wrote:
> Masami Hiramatsu wrote:
> > Replace arch_deref_entry_point() with dereference_function_descriptor()
> > because those are doing same thing.
>
> It's not quite the same -- you need dereference_symbol_descriptor().
Got it! dereference_function_descriptor() doesn't handle the symbols
in the modules.
Thank you!
>
>
> - Naveen
>
--
Masami Hiramatsu <[email protected]>