Hello.
Currently ret-probes can't work (the application will likely crash)
if the probed function does not return, and this is even documented
in handle_trampoline().
This series tries to make the first step to fix the problem on x86:
it assumes that the probed functions use the same stack.
TODO: sigaltstack() can obviously break this assumption.
NOTE: I don't think it is possible to make this logic 100% correct,
the user-space can do everything with its stack. For example, the
application can do longjmp-like tricks to implement the coroutines,
the kernel can do nothing in this case. The application (or debugger)
should cooperate somehow to let the kernel know whats going on.
Jan, David, Pratyush, Ananth, do you think your architecure can reuse
the hacks/hooks added by this series and do something like x86 does?
If not, we will probably need "plan B" mentioned in 08/10, but I'd
like to avoid this if possible. Or any other thoughts?
Oleg.
arch/arm/include/asm/uprobes.h | 3 +
arch/arm/probes/uprobes/core.c | 3 +-
arch/powerpc/include/asm/uprobes.h | 3 +
arch/powerpc/kernel/uprobes.c | 3 +-
arch/s390/include/asm/uprobes.h | 3 +
arch/s390/kernel/uprobes.c | 3 +-
arch/x86/include/asm/uprobes.h | 4 +
arch/x86/kernel/uprobes.c | 15 +++-
include/linux/uprobes.h | 5 +-
kernel/events/uprobes.c | 170 +++++++++++++++++++++---------------
10 files changed, 136 insertions(+), 76 deletions(-)
Cosmetic. Add the new trivial helper, get_uprobe(). It matches
put_uprobe() we already have and we can simplify a couple of its
users.
Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/events/uprobes.c | 39 ++++++++++++++++++++-------------------
1 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index cb346f2..a9847b4 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -366,6 +366,18 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v
return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)&auprobe->insn);
}
+static struct uprobe *get_uprobe(struct uprobe *uprobe)
+{
+ atomic_inc(&uprobe->ref);
+ return uprobe;
+}
+
+static void put_uprobe(struct uprobe *uprobe)
+{
+ if (atomic_dec_and_test(&uprobe->ref))
+ kfree(uprobe);
+}
+
static int match_uprobe(struct uprobe *l, struct uprobe *r)
{
if (l->inode < r->inode)
@@ -393,10 +405,8 @@ static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
while (n) {
uprobe = rb_entry(n, struct uprobe, rb_node);
match = match_uprobe(&u, uprobe);
- if (!match) {
- atomic_inc(&uprobe->ref);
- return uprobe;
- }
+ if (!match)
+ return get_uprobe(uprobe);
if (match < 0)
n = n->rb_left;
@@ -432,10 +442,8 @@ static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
parent = *p;
u = rb_entry(parent, struct uprobe, rb_node);
match = match_uprobe(uprobe, u);
- if (!match) {
- atomic_inc(&u->ref);
- return u;
- }
+ if (!match)
+ return get_uprobe(u);
if (match < 0)
p = &parent->rb_left;
@@ -472,12 +480,6 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
return u;
}
-static void put_uprobe(struct uprobe *uprobe)
-{
- if (atomic_dec_and_test(&uprobe->ref))
- kfree(uprobe);
-}
-
static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
{
struct uprobe *uprobe, *cur_uprobe;
@@ -1039,14 +1041,14 @@ static void build_probe_list(struct inode *inode,
if (u->inode != inode || u->offset < min)
break;
list_add(&u->pending_list, head);
- atomic_inc(&u->ref);
+ get_uprobe(u);
}
for (t = n; (t = rb_next(t)); ) {
u = rb_entry(t, struct uprobe, rb_node);
if (u->inode != inode || u->offset > max)
break;
list_add(&u->pending_list, head);
- atomic_inc(&u->ref);
+ get_uprobe(u);
}
}
spin_unlock(&uprobes_treelock);
@@ -1437,7 +1439,7 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
return -ENOMEM;
*n = *o;
- atomic_inc(&n->uprobe->ref);
+ get_uprobe(n->uprobe);
n->next = NULL;
*p = n;
@@ -1565,8 +1567,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
}
- atomic_inc(&uprobe->ref);
- ri->uprobe = uprobe;
+ ri->uprobe = get_uprobe(uprobe);
ri->func = instruction_pointer(regs);
ri->orig_ret_vaddr = orig_ret_vaddr;
ri->chained = chained;
--
1.5.5.1
We can simplify uprobe_free_utask() and handle_uretprobe_chain()
if we add a simple helper which does put_uprobe/kfree and returns
the ->next return_instance.
Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/events/uprobes.c | 27 +++++++++++++--------------
1 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a9847b4..d8c702f 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1378,6 +1378,14 @@ unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
return instruction_pointer(regs);
}
+static struct return_instance *free_ret_instance(struct return_instance *ri)
+{
+ struct return_instance *next = ri->next;
+ put_uprobe(ri->uprobe);
+ kfree(ri);
+ return next;
+}
+
/*
* Called with no locks held.
* Called in context of a exiting or a exec-ing thread.
@@ -1385,7 +1393,7 @@ unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
void uprobe_free_utask(struct task_struct *t)
{
struct uprobe_task *utask = t->utask;
- struct return_instance *ri, *tmp;
+ struct return_instance *ri;
if (!utask)
return;
@@ -1394,13 +1402,8 @@ void uprobe_free_utask(struct task_struct *t)
put_uprobe(utask->active_uprobe);
ri = utask->return_instances;
- while (ri) {
- tmp = ri;
- ri = ri->next;
-
- put_uprobe(tmp->uprobe);
- kfree(tmp);
- }
+ while (ri)
+ ri = free_ret_instance(ri);
xol_free_insn_slot(t);
kfree(utask);
@@ -1770,7 +1773,7 @@ handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
static bool handle_trampoline(struct pt_regs *regs)
{
struct uprobe_task *utask;
- struct return_instance *ri, *tmp;
+ struct return_instance *ri;
bool chained;
utask = current->utask;
@@ -1792,11 +1795,7 @@ static bool handle_trampoline(struct pt_regs *regs)
handle_uretprobe_chain(ri, regs);
chained = ri->chained;
- put_uprobe(ri->uprobe);
-
- tmp = ri;
- ri = ri->next;
- kfree(tmp);
+ ri = free_ret_instance(ri);
utask->depth--;
if (!chained)
--
1.5.5.1
1. It doesn't make sense to continue if handle_trampoline() fails,
change handle_swbp() to always return after this call.
2. Turn pr_warn() into uprobe_warn(), and change handle_trampoline()
to send SIGILL on failure. It is pointless to return to user mode
with the corrupted instruction_pointer() which we can't restore.
Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/events/uprobes.c | 21 ++++++++++-----------
1 files changed, 10 insertions(+), 11 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index d8c702f..f84d940 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1770,7 +1770,7 @@ handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
up_read(&uprobe->register_rwsem);
}
-static bool handle_trampoline(struct pt_regs *regs)
+static void handle_trampoline(struct pt_regs *regs)
{
struct uprobe_task *utask;
struct return_instance *ri;
@@ -1778,11 +1778,11 @@ static bool handle_trampoline(struct pt_regs *regs)
utask = current->utask;
if (!utask)
- return false;
+ goto sigill;
ri = utask->return_instances;
if (!ri)
- return false;
+ goto sigill;
/*
* TODO: we should throw out return_instance's invalidated by
@@ -1804,8 +1804,12 @@ static bool handle_trampoline(struct pt_regs *regs)
}
utask->return_instances = ri;
+ return;
+
+ sigill:
+ uprobe_warn(current, "handle uretprobe, sending SIGILL.");
+ force_sig_info(SIGILL, SEND_SIG_FORCED, current);
- return true;
}
bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs)
@@ -1824,13 +1828,8 @@ static void handle_swbp(struct pt_regs *regs)
int uninitialized_var(is_swbp);
bp_vaddr = uprobe_get_swbp_addr(regs);
- if (bp_vaddr == get_trampoline_vaddr()) {
- if (handle_trampoline(regs))
- return;
-
- pr_warn("uprobe: unable to handle uretprobe pid/tgid=%d/%d\n",
- current->pid, current->tgid);
- }
+ if (bp_vaddr == get_trampoline_vaddr())
+ return handle_trampoline(regs);
uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
if (!uprobe) {
--
1.5.5.1
Turn the last pr_warn() in uprobes.c into uprobe_warn().
While at it:
- s/kzalloc/kmalloc, we initialize every member of ri
- remove the pointless comment above the obvious code
Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/events/uprobes.c | 10 +++-------
1 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index f84d940..2d51580 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1541,9 +1541,9 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
return;
}
- ri = kzalloc(sizeof(struct return_instance), GFP_KERNEL);
+ ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
if (!ri)
- goto fail;
+ return;
trampoline_vaddr = get_trampoline_vaddr();
orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
@@ -1561,8 +1561,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
* This situation is not possible. Likely we have an
* attack from user-space.
*/
- pr_warn("uprobe: unable to set uretprobe pid/tgid=%d/%d\n",
- current->pid, current->tgid);
+ uprobe_warn(current, "handle tail call");
goto fail;
}
@@ -1576,13 +1575,10 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
ri->chained = chained;
utask->depth++;
-
- /* add instance to the stack */
ri->next = utask->return_instances;
utask->return_instances = ri;
return;
-
fail:
kfree(ri);
}
--
1.5.5.1
No functional changes, preparation.
Add the new helper, find_next_ret_chain(), which finds the first !chained
entry and returns its ->next. Yes, it is suboptimal. We probably want to
turn ->chained into ->start_of_this_chain pointer and avoid another loop.
But this needs the boring changes in dup_utask(), so lets do this later.
Change the main loop in handle_trampoline() to unwind the stack until ri
is equal to the pointer returned by this new helper.
Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/events/uprobes.c | 27 ++++++++++++++++-----------
1 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2d51580..12245ad 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1766,11 +1766,22 @@ handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
up_read(&uprobe->register_rwsem);
}
+static struct return_instance *find_next_ret_chain(struct return_instance *ri)
+{
+ bool chained;
+
+ do {
+ chained = ri->chained;
+ ri = ri->next; /* can't be NULL if chained */
+ } while (chained);
+
+ return ri;
+}
+
static void handle_trampoline(struct pt_regs *regs)
{
struct uprobe_task *utask;
- struct return_instance *ri;
- bool chained;
+ struct return_instance *ri, *next;
utask = current->utask;
if (!utask)
@@ -1780,24 +1791,18 @@ static void handle_trampoline(struct pt_regs *regs)
if (!ri)
goto sigill;
+ next = find_next_ret_chain(ri);
/*
* TODO: we should throw out return_instance's invalidated by
* longjmp(), currently we assume that the probed function always
* returns.
*/
instruction_pointer_set(regs, ri->orig_ret_vaddr);
-
- for (;;) {
+ do {
handle_uretprobe_chain(ri, regs);
-
- chained = ri->chained;
ri = free_ret_instance(ri);
utask->depth--;
-
- if (!chained)
- break;
- BUG_ON(!ri);
- }
+ } while (ri != next);
utask->return_instances = ri;
return;
--
1.5.5.1
Introduce the empty "struct arch_uretprobe", add the new member
of this type into "struct return_instance" and pass it as the new
argument to arch_uretprobe_hijack_return_addr().
Signed-off-by: Oleg Nesterov <[email protected]>
---
arch/arm/include/asm/uprobes.h | 3 +++
arch/arm/probes/uprobes/core.c | 3 ++-
arch/powerpc/include/asm/uprobes.h | 3 +++
arch/powerpc/kernel/uprobes.c | 3 ++-
arch/s390/include/asm/uprobes.h | 3 +++
arch/s390/kernel/uprobes.c | 3 ++-
arch/x86/include/asm/uprobes.h | 3 +++
arch/x86/kernel/uprobes.c | 3 ++-
include/linux/uprobes.h | 3 ++-
kernel/events/uprobes.c | 6 ++++--
10 files changed, 26 insertions(+), 7 deletions(-)
diff --git a/arch/arm/include/asm/uprobes.h b/arch/arm/include/asm/uprobes.h
index 9472c20..dec6313 100644
--- a/arch/arm/include/asm/uprobes.h
+++ b/arch/arm/include/asm/uprobes.h
@@ -27,6 +27,9 @@ struct arch_uprobe_task {
unsigned long saved_trap_no;
};
+struct arch_uretprobe {
+};
+
struct arch_uprobe {
u8 insn[MAX_UINSN_BYTES];
unsigned long ixol[2];
diff --git a/arch/arm/probes/uprobes/core.c b/arch/arm/probes/uprobes/core.c
index d1329f1..2787490 100644
--- a/arch/arm/probes/uprobes/core.c
+++ b/arch/arm/probes/uprobes/core.c
@@ -61,7 +61,8 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
}
unsigned long
-arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
+arch_uretprobe_hijack_return_addr(struct arch_uretprobe *auret,
+ unsigned long trampoline_vaddr,
struct pt_regs *regs)
{
unsigned long orig_ret_vaddr;
diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
index 7422a99..82ecba9 100644
--- a/arch/powerpc/include/asm/uprobes.h
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -41,6 +41,9 @@ struct arch_uprobe {
};
};
+struct arch_uretprobe {
+};
+
struct arch_uprobe_task {
unsigned long saved_trap_nr;
};
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index 003b209..70e3a31 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -194,7 +194,8 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
}
unsigned long
-arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
+arch_uretprobe_hijack_return_addr(struct arch_uretprobe *auret,
+ unsigned long trampoline_vaddr, struct pt_regs *regs)
{
unsigned long orig_ret_vaddr;
diff --git a/arch/s390/include/asm/uprobes.h b/arch/s390/include/asm/uprobes.h
index 1411dff..a59c10f 100644
--- a/arch/s390/include/asm/uprobes.h
+++ b/arch/s390/include/asm/uprobes.h
@@ -26,6 +26,9 @@ struct arch_uprobe {
unsigned int saved_int_code;
};
+struct arch_uretprobe {
+};
+
struct arch_uprobe_task {
};
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
index cc73280..dc03d3c 100644
--- a/arch/s390/kernel/uprobes.c
+++ b/arch/s390/kernel/uprobes.c
@@ -137,7 +137,8 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
current->thread.per_event.address = current->utask->vaddr;
}
-unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline,
+unsigned long arch_uretprobe_hijack_return_addr(struct arch_uretprobe *auret,
+ unsigned long trampoline,
struct pt_regs *regs)
{
unsigned long orig;
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 74f4c2f..f011fd0 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -56,6 +56,9 @@ struct arch_uprobe {
};
};
+struct arch_uretprobe {
+};
+
struct arch_uprobe_task {
#ifdef CONFIG_X86_64
unsigned long saved_scratch_register;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 8b96a94..0270315 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -901,7 +901,8 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
}
unsigned long
-arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
+arch_uretprobe_hijack_return_addr(struct arch_uretprobe *auret,
+ unsigned long trampoline_vaddr, struct pt_regs *regs)
{
int rasize = sizeof_long(), nleft;
unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 60beb5d..144571b 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -127,7 +127,8 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
+extern unsigned long arch_uretprobe_hijack_return_addr(struct arch_uretprobe *auret,
+ unsigned long trampoline_vaddr, struct pt_regs *regs);
extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
void *src, unsigned long len);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 12245ad..19af44a 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -90,9 +90,11 @@ struct return_instance {
struct uprobe *uprobe;
unsigned long func;
unsigned long orig_ret_vaddr; /* original return address */
- bool chained; /* true, if instance is nested */
+ bool chained; /* true, if instance is nested */
struct return_instance *next; /* keep as stack */
+
+ struct arch_uretprobe auret;
};
/*
@@ -1546,7 +1548,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
return;
trampoline_vaddr = get_trampoline_vaddr();
- orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
+ orig_ret_vaddr = arch_uretprobe_hijack_return_addr(&ri->auret, trampoline_vaddr, regs);
if (orig_ret_vaddr == -1)
goto fail;
--
1.5.5.1
Add the x86-specific arch_uretprobe_is_alive() helper and define its
"weak" version for other architectures.
It returns true if the stack frame mangled by prepare_uretprobe() is
still on stack. So if it returns false, we know that the probed func
has already returned.
TODO: this assumes that the probed app can't use multiple stacks (say
sigaltstack). We will try to improve this logic later.
Signed-off-by: Oleg Nesterov <[email protected]>
---
arch/x86/include/asm/uprobes.h | 1 +
arch/x86/kernel/uprobes.c | 6 ++++++
include/linux/uprobes.h | 1 +
kernel/events/uprobes.c | 4 ++++
4 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index f011fd0..60777f3 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -57,6 +57,7 @@ struct arch_uprobe {
};
struct arch_uretprobe {
+ unsigned long sp;
};
struct arch_uprobe_task {
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 0270315..868dc47 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -907,6 +907,7 @@ arch_uretprobe_hijack_return_addr(struct arch_uretprobe *auret,
int rasize = sizeof_long(), nleft;
unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */
+ auret->sp = regs->sp;
if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize))
return -1;
@@ -927,3 +928,8 @@ arch_uretprobe_hijack_return_addr(struct arch_uretprobe *auret,
return -1;
}
+
+bool arch_uretprobe_is_alive(struct arch_uretprobe *auret, struct pt_regs *regs)
+{
+ return regs->sp <= auret->sp;
+}
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 144571b..1ed7502 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -129,6 +129,7 @@ extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned l
extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern unsigned long arch_uretprobe_hijack_return_addr(struct arch_uretprobe *auret,
unsigned long trampoline_vaddr, struct pt_regs *regs);
+extern bool arch_uretprobe_is_alive(struct arch_uretprobe *auret, struct pt_regs *regs);
extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
void *src, unsigned long len);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 19af44a..0f68ea2 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1820,6 +1820,10 @@ bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs)
return false;
}
+bool __weak arch_uretprobe_is_alive(struct arch_uretprobe *auret, struct pt_regs *regs)
+{
+ return true;
+}
/*
* Run handler and ask thread to singlestep.
* Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
--
1.5.5.1
Test-case:
#include <stdio.h>
#include <setjmp.h>
jmp_buf jmp;
void func_2(void)
{
longjmp(jmp, 1);
}
void func_1(void)
{
if (setjmp(jmp))
return;
func_2();
printf("ERR!! I am running on the caller's stack\n");
}
int main(void)
{
func_1();
return 0;
}
fails if you probe func_1() and func_2() because handle_trampoline()
assumes that the probed function should must return and hit the bp
installed be prepare_uretprobe(). But in this case func_2() does not
return, so when func_1() returns the kernel uses the no longer valid
return_instance of func_2().
Change handle_trampoline() to unwind ->return_instances until we know
that the next chain is alive or NULL, this ensures that the current
chain is the last we need to report and free.
Alternatively, every return_instance could use unique trampoline_vaddr,
in this case we could use it as a key. And this could solve the problem
with sigaltstack() automatically.
But this approach needs more changes, and it puts the "hard" limit on
MAX_URETPROBE_DEPTH. Plus it can not solve another problem partially
fixed by the next patch.
Note: this change has no effect on !x86, the arch-agnostic version of
arch_uretprobe_is_alive() just returns "true".
TODO: as documented by the previous change, arch_uretprobe_is_alive()
can be fooled by sigaltstack/etc.
Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/events/uprobes.c | 29 ++++++++++++++++++-----------
1 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 0f68ea2..0dd7ff7 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1784,6 +1784,7 @@ static void handle_trampoline(struct pt_regs *regs)
{
struct uprobe_task *utask;
struct return_instance *ri, *next;
+ bool valid;
utask = current->utask;
if (!utask)
@@ -1793,18 +1794,24 @@ static void handle_trampoline(struct pt_regs *regs)
if (!ri)
goto sigill;
- next = find_next_ret_chain(ri);
- /*
- * TODO: we should throw out return_instance's invalidated by
- * longjmp(), currently we assume that the probed function always
- * returns.
- */
- instruction_pointer_set(regs, ri->orig_ret_vaddr);
do {
- handle_uretprobe_chain(ri, regs);
- ri = free_ret_instance(ri);
- utask->depth--;
- } while (ri != next);
+ /*
+ * We should throw out the frames invalidated by longjmp().
+ * If this chain is valid, then the next one should be alive
+ * or NULL; the latter case means that nobody but ri->func
+ * could hit this trampoline on return. TODO: sigaltstack().
+ */
+ next = find_next_ret_chain(ri);
+ valid = !next || arch_uretprobe_is_alive(&next->auret, regs);
+
+ instruction_pointer_set(regs, ri->orig_ret_vaddr);
+ do {
+ if (valid)
+ handle_uretprobe_chain(ri, regs);
+ ri = free_ret_instance(ri);
+ utask->depth--;
+ } while (ri != next);
+ } while (!valid);
utask->return_instances = ri;
return;
--
1.5.5.1
Change prepare_uretprobe() to flush the !arch_uretprobe_is_alive()
return_instance's. This is not needed correctness-wise, but can help
to avoid the failure caused by MAX_URETPROBE_DEPTH.
Note: in this case arch_uretprobe_is_alive() can be false positive,
the stack can grow after longjmp(). Unfortunately, the kernel can't
100% solve this problem, but see the next patch.
Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/events/uprobes.c | 13 +++++++++++++
1 files changed, 13 insertions(+), 0 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 0dd7ff7..b6433fb 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1522,6 +1522,16 @@ static unsigned long get_trampoline_vaddr(void)
return trampoline_vaddr;
}
+static void cleanup_return_instances(struct uprobe_task *utask, struct pt_regs *regs)
+{
+ struct return_instance *ri = utask->return_instances;
+ while (ri && !arch_uretprobe_is_alive(&ri->auret, regs)) {
+ ri = free_ret_instance(ri);
+ utask->depth--;
+ }
+ utask->return_instances = ri;
+}
+
static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
{
struct return_instance *ri;
@@ -1576,6 +1586,9 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
ri->orig_ret_vaddr = orig_ret_vaddr;
ri->chained = chained;
+ if (utask->depth) /* drop the entries invalidated by longjmp() */
+ cleanup_return_instances(utask, regs);
+
utask->depth++;
ri->next = utask->return_instances;
utask->return_instances = ri;
--
1.5.5.1
The previous change documents that cleanup_return_instances() can't
always detect the dead frames, the stack can grow. But there is one
special case which imho worth fixing: arch_uretprobe_is_alive() can
return true when the stack didn't actually grow, but the next "call"
insn uses the already invalidated frame.
Test-case:
#include <stdio.h>
#include <setjmp.h>
jmp_buf jmp;
int nr = 1024;
void func_2(void)
{
if (--nr == 0)
return;
longjmp(jmp, 1);
}
void func_1(void)
{
setjmp(jmp);
func_2();
}
int main(void)
{
func_1();
return 0;
}
If you ret-probe func_1() and func_2() prepare_uretprobe() hits the
MAX_URETPROBE_DEPTH limit and "return" from func_2() is not reported.
Add the new "bool on_call" argument to arch_uretprobe_is_alive().
prepare_uretprobe()->cleanup_return_instances() path passes "true"
when we know that we can do a more strict check to detect the dead
frames: chained == F. In this case "sp" points to the new ret-addr,
so every frame which uses the same "sp" must be dead.
Note: arch_uretprobe_is_alive() could also re-read *sp and check if
this word is still trampoline_vaddr. This could obviously improve the
logic, but I'd like to avoid another copy_from_user() especially in
a case when we can't avoid the false "alive == T" positives.
Signed-off-by: Oleg Nesterov <[email protected]>
---
arch/x86/kernel/uprobes.c | 10 ++++++++--
include/linux/uprobes.h | 3 ++-
kernel/events/uprobes.c | 12 +++++++-----
3 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 868dc47..f0ace39 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -929,7 +929,13 @@ arch_uretprobe_hijack_return_addr(struct arch_uretprobe *auret,
return -1;
}
-bool arch_uretprobe_is_alive(struct arch_uretprobe *auret, struct pt_regs *regs)
+bool arch_uretprobe_is_alive(struct arch_uretprobe *auret,
+ bool on_call, struct pt_regs *regs)
{
- return regs->sp <= auret->sp;
+ unsigned long sp = regs->sp;
+
+ if (on_call) /* ->sp was just decremented by "call" insn */
+ sp += sizeof_long();
+
+ return sp <= auret->sp;
}
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 1ed7502..9907be4 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -129,7 +129,8 @@ extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned l
extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern unsigned long arch_uretprobe_hijack_return_addr(struct arch_uretprobe *auret,
unsigned long trampoline_vaddr, struct pt_regs *regs);
-extern bool arch_uretprobe_is_alive(struct arch_uretprobe *auret, struct pt_regs *regs);
+extern bool arch_uretprobe_is_alive(struct arch_uretprobe *auret,
+ bool on_call, struct pt_regs *regs);
extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
void *src, unsigned long len);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index b6433fb..ec697da 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1522,10 +1522,11 @@ static unsigned long get_trampoline_vaddr(void)
return trampoline_vaddr;
}
-static void cleanup_return_instances(struct uprobe_task *utask, struct pt_regs *regs)
+static void cleanup_return_instances(struct uprobe_task *utask,
+ bool on_call, struct pt_regs *regs)
{
struct return_instance *ri = utask->return_instances;
- while (ri && !arch_uretprobe_is_alive(&ri->auret, regs)) {
+ while (ri && !arch_uretprobe_is_alive(&ri->auret, on_call, regs)) {
ri = free_ret_instance(ri);
utask->depth--;
}
@@ -1587,7 +1588,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
ri->chained = chained;
if (utask->depth) /* drop the entries invalidated by longjmp() */
- cleanup_return_instances(utask, regs);
+ cleanup_return_instances(utask, !chained, regs);
utask->depth++;
ri->next = utask->return_instances;
@@ -1815,7 +1816,7 @@ static void handle_trampoline(struct pt_regs *regs)
* could hit this trampoline on return. TODO: sigaltstack().
*/
next = find_next_ret_chain(ri);
- valid = !next || arch_uretprobe_is_alive(&next->auret, regs);
+ valid = !next || arch_uretprobe_is_alive(&next->auret, false, regs);
instruction_pointer_set(regs, ri->orig_ret_vaddr);
do {
@@ -1840,7 +1841,8 @@ bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs)
return false;
}
-bool __weak arch_uretprobe_is_alive(struct arch_uretprobe *auret, struct pt_regs *regs)
+bool __weak arch_uretprobe_is_alive(struct arch_uretprobe *auret,
+ bool on_call, struct pt_regs *regs)
{
return true;
}
--
1.5.5.1
* Oleg Nesterov <[email protected]> [2015-05-04 14:48:50]:
> Cosmetic. Add the new trivial helper, get_uprobe(). It matches
> put_uprobe() we already have and we can simplify a couple of its
> users.
>
> Signed-off-by: Oleg Nesterov <[email protected]>
Looks good to me.
Acked-by: Srikar Dronamraju <[email protected]>
* Oleg Nesterov <[email protected]> [2015-05-04 14:48:54]:
> We can simplify uprobe_free_utask() and handle_uretprobe_chain()
> if we add a simple helper which does put_uprobe/kfree and returns
> the ->next return_instance.
>
> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
Looks good to me.
Acked-by: Srikar Dronamraju <[email protected]>
--
Thanks and Regards
Srikar Dronamraju
* Oleg Nesterov <[email protected]> [2015-05-04 14:48:58]:
> 1. It doesn't make sense to continue if handle_trampoline() fails,
> change handle_swbp() to always return after this call.
>
> 2. Turn pr_warn() into uprobe_warn(), and change handle_trampoline()
> to send SIGILL on failure. It is pointless to return to user mode
> with the corrupted instruction_pointer() which we can't restore.
I agree
>
> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
Acked-by: Srikar Dronamraju <[email protected]>
--
Thanks and Regards
Srikar Dronamraju
* Oleg Nesterov <[email protected]> [2015-05-04 14:49:02]:
> Turn the last pr_warn() in uprobes.c into uprobe_warn().
>
> While at it:
>
> - s/kzalloc/kmalloc, we initialize every member of ri
>
> - remove the pointless comment above the obvious code
>
> Signed-off-by: Oleg Nesterov <[email protected]>
Looks good to me.
Acked-by: Srikar Dronamraju <[email protected]>
> ---
--
Thanks and Regards
Srikar Dronamraju
* Oleg Nesterov <[email protected]> [2015-05-04 14:49:06]:
> No functional changes, preparation.
>
> Add the new helper, find_next_ret_chain(), which finds the first !chained
> entry and returns its ->next. Yes, it is suboptimal. We probably want to
> turn ->chained into ->start_of_this_chain pointer and avoid another loop.
> But this needs the boring changes in dup_utask(), so lets do this later.
>
> Change the main loop in handle_trampoline() to unwind the stack until ri
> is equal to the pointer returned by this new helper.
>
> Signed-off-by: Oleg Nesterov <[email protected]>
Looks good to me.
Acked-by: Srikar Dronamraju <[email protected]>
--
Thanks and Regards
Srikar Dronamraju
* Oleg Nesterov <[email protected]> [2015-05-04 14:49:10]:
> Introduce the empty "struct arch_uretprobe", add the new member
> of this type into "struct return_instance" and pass it as the new
> argument to arch_uretprobe_hijack_return_addr().
>
> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
Looks good to me.
Acked-by: Srikar Dronamraju <[email protected]>
--
Thanks and Regards
Srikar Dronamraju
* Oleg Nesterov <[email protected]> [2015-05-04 14:49:14]:
> Add the x86-specific arch_uretprobe_is_alive() helper and define its
> "weak" version for other architectures.
>
> It returns true if the stack frame mangled by prepare_uretprobe() is
> still on stack. So if it returns false, we know that the probed func
> has already returned.
>
> TODO: this assumes that the probed app can't use multiple stacks (say
> sigaltstack). We will try to improve this logic later.
>
> Signed-off-by: Oleg Nesterov <[email protected]>
Looks good to me.
Acked-by: Srikar Dronamraju <[email protected]>
> ---
--
Thanks and Regards
Srikar Dronamraju
* Oleg Nesterov <[email protected]> [2015-05-04 14:49:18]:
>
> fails if you probe func_1() and func_2() because handle_trampoline()
> assumes that the probed function should must return and hit the bp
> installed be prepare_uretprobe(). But in this case func_2() does not
> return, so when func_1() returns the kernel uses the no longer valid
> return_instance of func_2().
>
> Change handle_trampoline() to unwind ->return_instances until we know
> that the next chain is alive or NULL, this ensures that the current
> chain is the last we need to report and free.
>
> Alternatively, every return_instance could use unique trampoline_vaddr,
> in this case we could use it as a key. And this could solve the problem
> with sigaltstack() automatically.
>
> But this approach needs more changes, and it puts the "hard" limit on
> MAX_URETPROBE_DEPTH. Plus it can not solve another problem partially
> fixed by the next patch.
>
> Note: this change has no effect on !x86, the arch-agnostic version of
> arch_uretprobe_is_alive() just returns "true".
>
> TODO: as documented by the previous change, arch_uretprobe_is_alive()
> can be fooled by sigaltstack/etc.
>
> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
Looks good to me.
Acked-by: Srikar Dronamraju <[email protected]>
--
Thanks and Regards
Srikar Dronamraju
>
> diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
> index f011fd0..60777f3 100644
> --- a/arch/x86/include/asm/uprobes.h
> +++ b/arch/x86/include/asm/uprobes.h
> @@ -57,6 +57,7 @@ struct arch_uprobe {
> };
>
> struct arch_uretprobe {
> + unsigned long sp;
While this looks good, I was wondering if you did think of having the sp
in the return_instance structure itself. I mean can we use
user_stack_pointer() to populate the ri->sp?
In which case the weak function itself should suffice for most archs.
Something like this.
prepare_uretprobe() we can have
ri->sp = user_stack_pointer(regs)
and handle_trampoline() would call something like
arch_uretprobe_is_alive(next->sp, regs);
bool __weak arch_uretprobe_is_alive(unsigned long sp, struct pt_regs *regs)
{
return user_stack_pointer(regs) <= sp;
}
Am I missing something?
>
--
Thanks and Regards
Srikar Dronamraju
* Oleg Nesterov <[email protected]> [2015-05-04 14:49:22]:
> Change prepare_uretprobe() to flush the !arch_uretprobe_is_alive()
> return_instance's. This is not needed correctness-wise, but can help
> to avoid the failure caused by MAX_URETPROBE_DEPTH.
>
> Note: in this case arch_uretprobe_is_alive() can be false positive,
> the stack can grow after longjmp(). Unfortunately, the kernel can't
> 100% solve this problem, but see the next patch.
>
> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
> kernel/events/uprobes.c | 13 +++++++++++++
Looks good to me.
Acked-by: Srikar Dronamraju <[email protected]>
--
Thanks and Regards
Srikar Dronamraju
On 05/07, Srikar Dronamraju wrote:
>
> >
> > diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
> > index f011fd0..60777f3 100644
> > --- a/arch/x86/include/asm/uprobes.h
> > +++ b/arch/x86/include/asm/uprobes.h
> > @@ -57,6 +57,7 @@ struct arch_uprobe {
> > };
> >
> > struct arch_uretprobe {
> > + unsigned long sp;
>
> While this looks good, I was wondering if you did think of having the sp
> in the return_instance structure itself. I mean can we use
> user_stack_pointer() to populate the ri->sp?
Yes, yes, I considered this option. And note that we can cleanup the
(a bit ugly) arch_uretprobe_hijack_return_addr() if we export
"struct return_instance" and pass it to arch_ helper.
> In which case the weak function itself should suffice for most archs.
>
> Something like this.
> prepare_uretprobe() we can have
> ri->sp = user_stack_pointer(regs)
Yes, and we can do this without changing arch_uretprobe_hijack_return_addr()
interface (which imo should be changed anyway, but this is off-topic).
> and handle_trampoline() would call something like
>
> arch_uretprobe_is_alive(next->sp, regs);
>
> bool __weak arch_uretprobe_is_alive(unsigned long sp, struct pt_regs *regs)
> {
> return user_stack_pointer(regs) <= sp;
> }
The problem is, I simply do not know if this is right on !x86.
And. I wanted to ensure that if (say) arch/ppc needs something else to
save/check in hijack/alive, then this architecture can just add the new
members in arch_uretprobe and change the arch_ helpers.
> Am I missing something?
I do not know. Lets wait for the comments from arch/ maintainers?
Oleg.
>
> Yes, and we can do this without changing arch_uretprobe_hijack_return_addr()
> interface (which imo should be changed anyway, but this is off-topic).
>
> > and handle_trampoline() would call something like
> >
> > arch_uretprobe_is_alive(next->sp, regs);
> >
> > bool __weak arch_uretprobe_is_alive(unsigned long sp, struct pt_regs *regs)
> > {
> > return user_stack_pointer(regs) <= sp;
> > }
>
> The problem is, I simply do not know if this is right on !x86.
>
> And. I wanted to ensure that if (say) arch/ppc needs something else to
> save/check in hijack/alive, then this architecture can just add the new
> members in arch_uretprobe and change the arch_ helpers.
>
The above weak function should work with ppc. Infact I see only 2 arch
that define CONFIG_STACK_GROWSUP (metag and parisc) and both don't
seem to have uprobes support for now. And even if they get uprobes
support, then supporting arch that define CONFIG_STACK_GROWSUP within
the weak function should be easy. Also the archs can also overload this
function. (In which case we could even drop the weak notation for now).
We even seem to use this assumption when kprobe_tracer/uprobe_tracer
fetch arguments from stack. See fetch_kernel_stack_address() /
fetch_user_stack_address() and get_user_stack_nth().
--
Thanks and Regards
Srikar Dronamraju
On 05/08, Srikar Dronamraju wrote:
>
> > Yes, and we can do this without changing arch_uretprobe_hijack_return_addr()
> > interface (which imo should be changed anyway, but this is off-topic).
> >
> > > and handle_trampoline() would call something like
> > >
> > > arch_uretprobe_is_alive(next->sp, regs);
> > >
> > > bool __weak arch_uretprobe_is_alive(unsigned long sp, struct pt_regs *regs)
> > > {
> > > return user_stack_pointer(regs) <= sp;
> > > }
> >
> > The problem is, I simply do not know if this is right on !x86.
> >
> > And. I wanted to ensure that if (say) arch/ppc needs something else to
> > save/check in hijack/alive, then this architecture can just add the new
> > members in arch_uretprobe and change the arch_ helpers.
>
> The above weak function should work with ppc.
I don't think so. Even if I know nothing about !x86.
> Infact I see only 2 arch
> that define CONFIG_STACK_GROWSUP
Ah, please forget about GROWSUP, this is not the problem.
> We even seem to use this assumption when kprobe_tracer/uprobe_tracer
> fetch arguments from stack. See fetch_kernel_stack_address() /
> fetch_user_stack_address() and get_user_stack_nth().
But this all is completely different.
No. I don't think arch_uretprobe_is_alive() above can work for powerpc,
at least the same way.
The problem is, when the function is called, the ret-addr is not pushed
on stack. If it was, then arch_uretprobe_hijack_return_addr() on powerpc
is just wrong. But I guess it is correct ;)
x86 is "simple". We know that the probed function should do "ret" and the
ret-addr lives on stack. This means that "regs->sp <= sp" is correct, it
can't be false-negative. Simply because if regs->sp > sp then *sp can be
never used by "ret". And everything above regs->sp can be overwritten by
a signal handler. powerpc/etc differs, they use the link register.
Just for example. Lets look at prepare_uretprobe(). Suppose it adds the
new return_instance to ->return_instances list. Note that on 86
arch_uretprobe_is_alive(&new_ri->auret) is obviously (and correctly) true.
Is it also true on powerpc? I am not sure, I think it is not. Yes, this
doesn't really matter in prepare_uretprobe(), but this will matter if
the new ret-addr won't be saved on stack when we hit the next bp.
So. Lets do this per-arch. Try to do, actually. I am not even sure these
new hooks can actually help powerpc/etc. If not, we will have to switch
to "plan B".
If x86 can share the same code with (say) powerpc, we can always cleanup
this later, this is trivial. Right now I'd like to ensure that if the
same or similar logic can work on powerpc, it only needs to touch the
code in arch/powerpc.
Oleg.
> >
> > The above weak function should work with ppc.
>
> I don't think so. Even if I know nothing about !x86.
>
> > Infact I see only 2 arch
> > that define CONFIG_STACK_GROWSUP
>
> Ah, please forget about GROWSUP, this is not the problem.
>
Ok
> > We even seem to use this assumption when kprobe_tracer/uprobe_tracer
> > fetch arguments from stack. See fetch_kernel_stack_address() /
> > fetch_user_stack_address() and get_user_stack_nth().
>
> But this all is completely different.
>
> No. I don't think arch_uretprobe_is_alive() above can work for powerpc,
> at least the same way.
>
> The problem is, when the function is called, the ret-addr is not pushed
> on stack. If it was, then arch_uretprobe_hijack_return_addr() on powerpc
> is just wrong. But I guess it is correct ;)
>
> x86 is "simple". We know that the probed function should do "ret" and the
> ret-addr lives on stack. This means that "regs->sp <= sp" is correct, it
> can't be false-negative. Simply because if regs->sp > sp then *sp can be
> never used by "ret". And everything above regs->sp can be overwritten by
> a signal handler. powerpc/etc differs, they use the link register.
>
In ppc, the return address for the current function may not be in stack
but in link register, but the return address for the previous functions
end up in the stack. Lets assume main() had called foo(). Now when foo()
calls bar (by using the b/bl instruction), we would save the current
link register (that has address corresponding to main function) to the
link register save area of the stack and update the stack pointer and
the link register to an address to where we need to jump back in foo().
In ppc,
- Stack grows from higher addresses down towards lower addresses.
- Most function invocations create a new stack frame
- Except for leaf functions that don't have many local variables
I dont see a relation why storing the return address in the link
register would cause issues with arch_uretprobe_is_alive().
So I am pretty sure that arch_uretprobe_is_alive should work.
> Just for example. Lets look at prepare_uretprobe(). Suppose it adds the
> new return_instance to ->return_instances list. Note that on 86
> arch_uretprobe_is_alive(&new_ri->auret) is obviously (and correctly) true.
> Is it also true on powerpc? I am not sure, I think it is not. Yes, this
> doesn't really matter in prepare_uretprobe(), but this will matter if
> the new ret-addr won't be saved on stack when we hit the next bp.
>
> So. Lets do this per-arch. Try to do, actually. I am not even sure these
> new hooks can actually help powerpc/etc. If not, we will have to switch
> to "plan B".
Okay, lets do it per-arch now and yes it can always be cleaned up later.
>
> If x86 can share the same code with (say) powerpc, we can always cleanup
> this later, this is trivial. Right now I'd like to ensure that if the
> same or similar logic can work on powerpc, it only needs to touch the
> code in arch/powerpc.
>
> Oleg.
>
--
Thanks and Regards
Srikar Dronamraju
Hi Oleg,
On Monday 04 May 2015 06:19 PM, Oleg Nesterov wrote:
> +bool __weak arch_uretprobe_is_alive(struct arch_uretprobe *auret, struct pt_regs *regs)
> +{
> + return true;
> +}
IIUC, then this function should return false when both auret and regs
are corresponding to same retprobe, else we need to return true, right?
If that is the case, then should n't following work for all the cases:
return sp != auret->sp;
~Pratyush
Hi Pratyush,
sorry for delay, vacation.
On 05/18, Pratyush Anand wrote:
> Hi Oleg,
>
> On Monday 04 May 2015 06:19 PM, Oleg Nesterov wrote:
>> +bool __weak arch_uretprobe_is_alive(struct arch_uretprobe *auret, struct pt_regs *regs)
>> +{
>> + return true;
>> +}
>
> IIUC, then this function should return false when both auret and regs
> are corresponding to same retprobe, else we need to return true, right?
Not sure I understand what you mean...
This function should return false when we know that this return_instance
was invalidated by longjmp().
> If that is the case, then should n't following work for all the cases:
>
> return sp != auret->sp;
No, this can't work.
On x86 "sp == auret->sp" only right after the "call" insn, the stack
can grow after that but this does not mean that this instance is no
longer valid.
Oleg.
Srikar,
sorry for delay, vacation.
On 05/13, Srikar Dronamraju wrote:
>
> > No. I don't think arch_uretprobe_is_alive() above can work for powerpc,
> > at least the same way.
> >
> > The problem is, when the function is called, the ret-addr is not pushed
> > on stack. If it was, then arch_uretprobe_hijack_return_addr() on powerpc
> > is just wrong. But I guess it is correct ;)
> >
> > x86 is "simple". We know that the probed function should do "ret" and the
> > ret-addr lives on stack. This means that "regs->sp <= sp" is correct, it
> > can't be false-negative. Simply because if regs->sp > sp then *sp can be
> > never used by "ret". And everything above regs->sp can be overwritten by
> > a signal handler. powerpc/etc differs, they use the link register.
> >
>
> In ppc, the return address for the current function may not be in stack
> but in link register, but the return address for the previous functions
> end up in the stack.
Yes, yes, I understand. That is why I hope that this series can help
other arches too ;)
But note that at least this means that the "on_call" arg should be ignored,
although this is not the problem too.
> Lets assume main() had called foo(). Now when foo()
> calls bar (by using the b/bl instruction), we would save the current
> link register (that has address corresponding to main function) to the
> link register save area of the stack and update the stack pointer and
> the link register to an address to where we need to jump back in foo().
Yes. Now suppose that you ret-probe both main() and foo(). What happens
when foo() returns?
I guess it should cleanup the stack and remove the main's ret-addr from
stack, doesn't this mean that arch_uretprobe_is_alive(auret_for_main)
becomes false if we just use user_stack_pointer(regs) <= sp for every arch?
This will break handle_trampoline().
> > So. Lets do this per-arch. Try to do, actually. I am not even sure these
> > new hooks can actually help powerpc/etc. If not, we will have to switch
> > to "plan B".
>
> Okay, lets do it per-arch now and yes it can always be cleaned up later.
Yes, this just looks safer. At least this way we can't introduce the new
problems on !x86.
Oleg.
On 05/04, Oleg Nesterov wrote:
>
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -1522,6 +1522,16 @@ static unsigned long get_trampoline_vaddr(void)
> return trampoline_vaddr;
> }
>
> +static void cleanup_return_instances(struct uprobe_task *utask, struct pt_regs *regs)
> +{
> + struct return_instance *ri = utask->return_instances;
> + while (ri && !arch_uretprobe_is_alive(&ri->auret, regs)) {
> + ri = free_ret_instance(ri);
> + utask->depth--;
> + }
> + utask->return_instances = ri;
> +}
> +
> static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
> {
> struct return_instance *ri;
> @@ -1576,6 +1586,9 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
> ri->orig_ret_vaddr = orig_ret_vaddr;
> ri->chained = chained;
>
> + if (utask->depth) /* drop the entries invalidated by longjmp() */
> + cleanup_return_instances(utask, regs);
> +
Self nack ;)
Note that that prepare_uretprobe() does
if (orig_ret_vaddr == trampoline_vaddr) {
if (!utask->return_instances) {
/*
* This situation is not possible. Likely we have an
* attack from user-space.
*/
pr_warn("uprobe: unable to set uretprobe pid/tgid=%d/%d\n",
current->pid, current->tgid);
goto fail;
_before_ we do cleanup_return_instances().
This is actually fine in a sense that ->return_instances == NULL after
cleanup_return_instances() is not possible if chained, there should be
another _alive() frame. But malicious user can obviously fool the kernel.
Easy to fix. But after discussion with Srikar and Pratyush (thanks!) I
decided to update 6-10.
Oleg.