Current implementation calls kprobe related functions before doing
ftrace recursion check in fprobe_kprobe_handler, which opens door
to kernel crash due to stack recursion if preempt_count_{add, sub}
is traceable in kprobe_busy_{begin, end}.
Things goes like this without this patch quoted from Steven:
"
fprobe_kprobe_handler() {
kprobe_busy_begin() {
preempt_disable() {
preempt_count_add() { <-- trace
fprobe_kprobe_handler() {
[ wash, rinse, repeat, CRASH!!! ]
"
By refactoring the common part out of fprobe_kprobe_handler and
fprobe_handler and call ftrace recursion detection at the very beginning,
the whole fprobe_kprobe_handler is free from recursion.
Signed-off-by: Ze Gao <[email protected]>
Acked-by: Masami Hiramatsu (Google) <[email protected]>
Link: https://lore.kernel.org/linux-trace-kernel/[email protected]
---
kernel/trace/fprobe.c | 59 ++++++++++++++++++++++++++++++++-----------
1 file changed, 44 insertions(+), 15 deletions(-)
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 9abb3905bc8e..097c740799ba 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -20,30 +20,22 @@ struct fprobe_rethook_node {
char data[];
};
-static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct ftrace_regs *fregs)
+static inline void __fprobe_handler(unsigned long ip, unsigned long
+ parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
struct fprobe_rethook_node *fpr;
struct rethook_node *rh = NULL;
struct fprobe *fp;
void *entry_data = NULL;
- int bit, ret;
+ int ret;
fp = container_of(ops, struct fprobe, ops);
- if (fprobe_disabled(fp))
- return;
-
- bit = ftrace_test_recursion_trylock(ip, parent_ip);
- if (bit < 0) {
- fp->nmissed++;
- return;
- }
if (fp->exit_handler) {
rh = rethook_try_get(fp->rethook);
if (!rh) {
fp->nmissed++;
- goto out;
+ return;
}
fpr = container_of(rh, struct fprobe_rethook_node, node);
fpr->entry_ip = ip;
@@ -61,23 +53,60 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
else
rethook_hook(rh, ftrace_get_regs(fregs), true);
}
-out:
+}
+
+static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+ struct fprobe *fp;
+ int bit;
+
+ fp = container_of(ops, struct fprobe, ops);
+ if (fprobe_disabled(fp))
+ return;
+
+ /* recursion detection has to go before any traceable function and
+ * all functions before this point should be marked as notrace
+ */
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0) {
+ fp->nmissed++;
+ return;
+ }
+ __fprobe_handler(ip, parent_ip, ops, fregs);
ftrace_test_recursion_unlock(bit);
+
}
NOKPROBE_SYMBOL(fprobe_handler);
static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
- struct fprobe *fp = container_of(ops, struct fprobe, ops);
+ struct fprobe *fp;
+ int bit;
+
+ fp = container_of(ops, struct fprobe, ops);
+ if (fprobe_disabled(fp))
+ return;
+
+ /* recursion detection has to go before any traceable function and
+ * all functions called before this point should be marked as notrace
+ */
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0) {
+ fp->nmissed++;
+ return;
+ }
if (unlikely(kprobe_running())) {
fp->nmissed++;
return;
}
+
kprobe_busy_begin();
- fprobe_handler(ip, parent_ip, ops, fregs);
+ __fprobe_handler(ip, parent_ip, ops, fregs);
kprobe_busy_end();
+ ftrace_test_recursion_unlock(bit);
}
static void fprobe_exit_handler(struct rethook_node *rh, void *data,
--
2.40.1
On Wed, May 17, 2023 at 11:45:07AM +0800, Ze Gao wrote:
> Current implementation calls kprobe related functions before doing
> ftrace recursion check in fprobe_kprobe_handler, which opens door
> to kernel crash due to stack recursion if preempt_count_{add, sub}
> is traceable in kprobe_busy_{begin, end}.
>
> Things goes like this without this patch quoted from Steven:
> "
> fprobe_kprobe_handler() {
> kprobe_busy_begin() {
> preempt_disable() {
> preempt_count_add() { <-- trace
> fprobe_kprobe_handler() {
> [ wash, rinse, repeat, CRASH!!! ]
> "
>
> By refactoring the common part out of fprobe_kprobe_handler and
> fprobe_handler and call ftrace recursion detection at the very beginning,
> the whole fprobe_kprobe_handler is free from recursion.
>
> Signed-off-by: Ze Gao <[email protected]>
> Acked-by: Masami Hiramatsu (Google) <[email protected]>
> Link: https://lore.kernel.org/linux-trace-kernel/[email protected]
> ---
> kernel/trace/fprobe.c | 59 ++++++++++++++++++++++++++++++++-----------
> 1 file changed, 44 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> index 9abb3905bc8e..097c740799ba 100644
> --- a/kernel/trace/fprobe.c
> +++ b/kernel/trace/fprobe.c
> @@ -20,30 +20,22 @@ struct fprobe_rethook_node {
> char data[];
> };
>
> -static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> - struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +static inline void __fprobe_handler(unsigned long ip, unsigned long
> + parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs)
> {
> struct fprobe_rethook_node *fpr;
> struct rethook_node *rh = NULL;
> struct fprobe *fp;
> void *entry_data = NULL;
> - int bit, ret;
> + int ret;
>
this change uncovered bug for me introduced by [1]
the bpf's kprobe multi uses either fprobe's entry_handler or exit_handler,
so the 'ret' value is undefined for return probe path and occasionally we
won't setup rethook and miss the return probe
we can either squash this change into your patch or I can make separate
patch for that.. but given that [1] is quite recent we could just silently
fix that ;-)
jirka
[1] 39d954200bf6 fprobe: Skip exit_handler if entry_handler returns !0
---
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 9abb3905bc8e..293184227394 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -27,7 +27,7 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
struct rethook_node *rh = NULL;
struct fprobe *fp;
void *entry_data = NULL;
- int bit, ret;
+ int bit, ret = 0;
fp = container_of(ops, struct fprobe, ops);
if (fprobe_disabled(fp))
On Wed, 17 May 2023 12:47:42 +0200
Jiri Olsa <[email protected]> wrote:
> On Wed, May 17, 2023 at 11:45:07AM +0800, Ze Gao wrote:
> > Current implementation calls kprobe related functions before doing
> > ftrace recursion check in fprobe_kprobe_handler, which opens door
> > to kernel crash due to stack recursion if preempt_count_{add, sub}
> > is traceable in kprobe_busy_{begin, end}.
> >
> > Things goes like this without this patch quoted from Steven:
> > "
> > fprobe_kprobe_handler() {
> > kprobe_busy_begin() {
> > preempt_disable() {
> > preempt_count_add() { <-- trace
> > fprobe_kprobe_handler() {
> > [ wash, rinse, repeat, CRASH!!! ]
> > "
> >
> > By refactoring the common part out of fprobe_kprobe_handler and
> > fprobe_handler and call ftrace recursion detection at the very beginning,
> > the whole fprobe_kprobe_handler is free from recursion.
> >
> > Signed-off-by: Ze Gao <[email protected]>
> > Acked-by: Masami Hiramatsu (Google) <[email protected]>
> > Link: https://lore.kernel.org/linux-trace-kernel/[email protected]
> > ---
> > kernel/trace/fprobe.c | 59 ++++++++++++++++++++++++++++++++-----------
> > 1 file changed, 44 insertions(+), 15 deletions(-)
> >
> > diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> > index 9abb3905bc8e..097c740799ba 100644
> > --- a/kernel/trace/fprobe.c
> > +++ b/kernel/trace/fprobe.c
> > @@ -20,30 +20,22 @@ struct fprobe_rethook_node {
> > char data[];
> > };
> >
> > -static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> > - struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > +static inline void __fprobe_handler(unsigned long ip, unsigned long
> > + parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > {
> > struct fprobe_rethook_node *fpr;
> > struct rethook_node *rh = NULL;
> > struct fprobe *fp;
> > void *entry_data = NULL;
> > - int bit, ret;
> > + int ret;
> >
>
> this change uncovered bug for me introduced by [1]
>
> the bpf's kprobe multi uses either fprobe's entry_handler or exit_handler,
> so the 'ret' value is undefined for return probe path and occasionally we
> won't setup rethook and miss the return probe
Oops, I missed to push my fix.
https://lore.kernel.org/all/168100731160.79534.374827110083836722.stgit@devnote2/
>
> we can either squash this change into your patch or I can make separate
> patch for that.. but given that [1] is quite recent we could just silently
> fix that ;-)
Jiri, I think the above will fix the issue, right?
>
> jirka
>
>
> [1] 39d954200bf6 fprobe: Skip exit_handler if entry_handler returns !0
>
> ---
> diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> index 9abb3905bc8e..293184227394 100644
> --- a/kernel/trace/fprobe.c
> +++ b/kernel/trace/fprobe.c
> @@ -27,7 +27,7 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> struct rethook_node *rh = NULL;
> struct fprobe *fp;
> void *entry_data = NULL;
> - int bit, ret;
> + int bit, ret = 0;
>
> fp = container_of(ops, struct fprobe, ops);
> if (fprobe_disabled(fp))
>
>
--
Masami Hiramatsu (Google) <[email protected]>
On Wed, May 17, 2023 at 08:42:36PM +0900, Masami Hiramatsu wrote:
> On Wed, 17 May 2023 12:47:42 +0200
> Jiri Olsa <[email protected]> wrote:
>
> > On Wed, May 17, 2023 at 11:45:07AM +0800, Ze Gao wrote:
> > > Current implementation calls kprobe related functions before doing
> > > ftrace recursion check in fprobe_kprobe_handler, which opens door
> > > to kernel crash due to stack recursion if preempt_count_{add, sub}
> > > is traceable in kprobe_busy_{begin, end}.
> > >
> > > Things goes like this without this patch quoted from Steven:
> > > "
> > > fprobe_kprobe_handler() {
> > > kprobe_busy_begin() {
> > > preempt_disable() {
> > > preempt_count_add() { <-- trace
> > > fprobe_kprobe_handler() {
> > > [ wash, rinse, repeat, CRASH!!! ]
> > > "
> > >
> > > By refactoring the common part out of fprobe_kprobe_handler and
> > > fprobe_handler and call ftrace recursion detection at the very beginning,
> > > the whole fprobe_kprobe_handler is free from recursion.
> > >
> > > Signed-off-by: Ze Gao <[email protected]>
> > > Acked-by: Masami Hiramatsu (Google) <[email protected]>
> > > Link: https://lore.kernel.org/linux-trace-kernel/[email protected]
> > > ---
> > > kernel/trace/fprobe.c | 59 ++++++++++++++++++++++++++++++++-----------
> > > 1 file changed, 44 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> > > index 9abb3905bc8e..097c740799ba 100644
> > > --- a/kernel/trace/fprobe.c
> > > +++ b/kernel/trace/fprobe.c
> > > @@ -20,30 +20,22 @@ struct fprobe_rethook_node {
> > > char data[];
> > > };
> > >
> > > -static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> > > - struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > > +static inline void __fprobe_handler(unsigned long ip, unsigned long
> > > + parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > > {
> > > struct fprobe_rethook_node *fpr;
> > > struct rethook_node *rh = NULL;
> > > struct fprobe *fp;
> > > void *entry_data = NULL;
> > > - int bit, ret;
> > > + int ret;
> > >
> >
> > this change uncovered bug for me introduced by [1]
> >
> > the bpf's kprobe multi uses either fprobe's entry_handler or exit_handler,
> > so the 'ret' value is undefined for return probe path and occasionally we
> > won't setup rethook and miss the return probe
>
> Oops, I missed to push my fix.
>
> https://lore.kernel.org/all/168100731160.79534.374827110083836722.stgit@devnote2/
>
> >
> > we can either squash this change into your patch or I can make separate
> > patch for that.. but given that [1] is quite recent we could just silently
> > fix that ;-)
>
> Jiri, I think the above will fix the issue, right?
yes, it's the same fix, great, thanks
jirka
>
> >
> > jirka
> >
> >
> > [1] 39d954200bf6 fprobe: Skip exit_handler if entry_handler returns !0
> >
> > ---
> > diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> > index 9abb3905bc8e..293184227394 100644
> > --- a/kernel/trace/fprobe.c
> > +++ b/kernel/trace/fprobe.c
> > @@ -27,7 +27,7 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> > struct rethook_node *rh = NULL;
> > struct fprobe *fp;
> > void *entry_data = NULL;
> > - int bit, ret;
> > + int bit, ret = 0;
> >
> > fp = container_of(ops, struct fprobe, ops);
> > if (fprobe_disabled(fp))
> >
> >
>
>
> --
> Masami Hiramatsu (Google) <[email protected]>
On Wed, 17 May 2023 11:45:07 +0800
Ze Gao <[email protected]> wrote:
> Current implementation calls kprobe related functions before doing
> ftrace recursion check in fprobe_kprobe_handler, which opens door
> to kernel crash due to stack recursion if preempt_count_{add, sub}
> is traceable in kprobe_busy_{begin, end}.
>
> Things goes like this without this patch quoted from Steven:
> "
> fprobe_kprobe_handler() {
> kprobe_busy_begin() {
> preempt_disable() {
> preempt_count_add() { <-- trace
> fprobe_kprobe_handler() {
> [ wash, rinse, repeat, CRASH!!! ]
> "
>
> By refactoring the common part out of fprobe_kprobe_handler and
> fprobe_handler and call ftrace recursion detection at the very beginning,
> the whole fprobe_kprobe_handler is free from recursion.
>
> Signed-off-by: Ze Gao <[email protected]>
> Acked-by: Masami Hiramatsu (Google) <[email protected]>
> Link: https://lore.kernel.org/linux-trace-kernel/[email protected]
> ---
> kernel/trace/fprobe.c | 59 ++++++++++++++++++++++++++++++++-----------
> 1 file changed, 44 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> index 9abb3905bc8e..097c740799ba 100644
> --- a/kernel/trace/fprobe.c
> +++ b/kernel/trace/fprobe.c
> @@ -20,30 +20,22 @@ struct fprobe_rethook_node {
> char data[];
> };
>
> -static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> - struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +static inline void __fprobe_handler(unsigned long ip, unsigned long
> + parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs)
OK, I picked up this series to probes/fixes. Note that I fixed this line
because the "unsigned long parent_ip" was split into 2 lines.
Thank you,
> {
> struct fprobe_rethook_node *fpr;
> struct rethook_node *rh = NULL;
> struct fprobe *fp;
> void *entry_data = NULL;
> - int bit, ret;
> + int ret;
>
> fp = container_of(ops, struct fprobe, ops);
> - if (fprobe_disabled(fp))
> - return;
> -
> - bit = ftrace_test_recursion_trylock(ip, parent_ip);
> - if (bit < 0) {
> - fp->nmissed++;
> - return;
> - }
>
> if (fp->exit_handler) {
> rh = rethook_try_get(fp->rethook);
> if (!rh) {
> fp->nmissed++;
> - goto out;
> + return;
> }
> fpr = container_of(rh, struct fprobe_rethook_node, node);
> fpr->entry_ip = ip;
> @@ -61,23 +53,60 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> else
> rethook_hook(rh, ftrace_get_regs(fregs), true);
> }
> -out:
> +}
> +
> +static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> + struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +{
> + struct fprobe *fp;
> + int bit;
> +
> + fp = container_of(ops, struct fprobe, ops);
> + if (fprobe_disabled(fp))
> + return;
> +
> + /* recursion detection has to go before any traceable function and
> + * all functions before this point should be marked as notrace
> + */
> + bit = ftrace_test_recursion_trylock(ip, parent_ip);
> + if (bit < 0) {
> + fp->nmissed++;
> + return;
> + }
> + __fprobe_handler(ip, parent_ip, ops, fregs);
> ftrace_test_recursion_unlock(bit);
> +
> }
> NOKPROBE_SYMBOL(fprobe_handler);
>
> static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
> struct ftrace_ops *ops, struct ftrace_regs *fregs)
> {
> - struct fprobe *fp = container_of(ops, struct fprobe, ops);
> + struct fprobe *fp;
> + int bit;
> +
> + fp = container_of(ops, struct fprobe, ops);
> + if (fprobe_disabled(fp))
> + return;
> +
> + /* recursion detection has to go before any traceable function and
> + * all functions called before this point should be marked as notrace
> + */
> + bit = ftrace_test_recursion_trylock(ip, parent_ip);
> + if (bit < 0) {
> + fp->nmissed++;
> + return;
> + }
>
> if (unlikely(kprobe_running())) {
> fp->nmissed++;
> return;
> }
> +
> kprobe_busy_begin();
> - fprobe_handler(ip, parent_ip, ops, fregs);
> + __fprobe_handler(ip, parent_ip, ops, fregs);
> kprobe_busy_end();
> + ftrace_test_recursion_unlock(bit);
> }
>
> static void fprobe_exit_handler(struct rethook_node *rh, void *data,
> --
> 2.40.1
>
--
Masami Hiramatsu (Google) <[email protected]>
On Wed, May 17, 2023 at 7:28 AM Masami Hiramatsu <[email protected]> wrote:
>
> On Wed, 17 May 2023 11:45:07 +0800
> Ze Gao <[email protected]> wrote:
>
> > Current implementation calls kprobe related functions before doing
> > ftrace recursion check in fprobe_kprobe_handler, which opens door
> > to kernel crash due to stack recursion if preempt_count_{add, sub}
> > is traceable in kprobe_busy_{begin, end}.
> >
> > Things goes like this without this patch quoted from Steven:
> > "
> > fprobe_kprobe_handler() {
> > kprobe_busy_begin() {
> > preempt_disable() {
> > preempt_count_add() { <-- trace
> > fprobe_kprobe_handler() {
> > [ wash, rinse, repeat, CRASH!!! ]
> > "
> >
> > By refactoring the common part out of fprobe_kprobe_handler and
> > fprobe_handler and call ftrace recursion detection at the very beginning,
> > the whole fprobe_kprobe_handler is free from recursion.
> >
> > Signed-off-by: Ze Gao <[email protected]>
> > Acked-by: Masami Hiramatsu (Google) <[email protected]>
> > Link: https://lore.kernel.org/linux-trace-kernel/[email protected]
> > ---
> > kernel/trace/fprobe.c | 59 ++++++++++++++++++++++++++++++++-----------
> > 1 file changed, 44 insertions(+), 15 deletions(-)
> >
> > diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> > index 9abb3905bc8e..097c740799ba 100644
> > --- a/kernel/trace/fprobe.c
> > +++ b/kernel/trace/fprobe.c
> > @@ -20,30 +20,22 @@ struct fprobe_rethook_node {
> > char data[];
> > };
> >
> > -static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> > - struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > +static inline void __fprobe_handler(unsigned long ip, unsigned long
> > + parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs)
>
> OK, I picked up this series to probes/fixes. Note that I fixed this line
> because the "unsigned long parent_ip" was split into 2 lines.
>
Hey Masami,
Regarding [0], I was bisecting BPF CI failures related to
multi-kprobes, and it turned out that [0] is the fix we need. It would
be great if you can make sure this fix gets into Linus' tree ASAP, so
that we can get it back into bpf/bpf-next trees and fix BPF selftests
for everyone (we mitigated this for BPF CI as a temporary workaround
for now). Thanks!
[0] https://lore.kernel.org/all/168100731160.79534.374827110083836722.stgit@devnote2/
> Thank you,
>
>
> > {
> > struct fprobe_rethook_node *fpr;
> > struct rethook_node *rh = NULL;
> > struct fprobe *fp;
> > void *entry_data = NULL;
> > - int bit, ret;
> > + int ret;
> >
> > fp = container_of(ops, struct fprobe, ops);
> > - if (fprobe_disabled(fp))
> > - return;
> > -
> > - bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > - if (bit < 0) {
> > - fp->nmissed++;
> > - return;
> > - }
> >
> > if (fp->exit_handler) {
> > rh = rethook_try_get(fp->rethook);
> > if (!rh) {
> > fp->nmissed++;
> > - goto out;
> > + return;
> > }
> > fpr = container_of(rh, struct fprobe_rethook_node, node);
> > fpr->entry_ip = ip;
> > @@ -61,23 +53,60 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> > else
> > rethook_hook(rh, ftrace_get_regs(fregs), true);
> > }
> > -out:
> > +}
> > +
> > +static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> > + struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > +{
> > + struct fprobe *fp;
> > + int bit;
> > +
> > + fp = container_of(ops, struct fprobe, ops);
> > + if (fprobe_disabled(fp))
> > + return;
> > +
> > + /* recursion detection has to go before any traceable function and
> > + * all functions before this point should be marked as notrace
> > + */
> > + bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > + if (bit < 0) {
> > + fp->nmissed++;
> > + return;
> > + }
> > + __fprobe_handler(ip, parent_ip, ops, fregs);
> > ftrace_test_recursion_unlock(bit);
> > +
> > }
> > NOKPROBE_SYMBOL(fprobe_handler);
> >
> > static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
> > struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > {
> > - struct fprobe *fp = container_of(ops, struct fprobe, ops);
> > + struct fprobe *fp;
> > + int bit;
> > +
> > + fp = container_of(ops, struct fprobe, ops);
> > + if (fprobe_disabled(fp))
> > + return;
> > +
> > + /* recursion detection has to go before any traceable function and
> > + * all functions called before this point should be marked as notrace
> > + */
> > + bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > + if (bit < 0) {
> > + fp->nmissed++;
> > + return;
> > + }
> >
> > if (unlikely(kprobe_running())) {
> > fp->nmissed++;
> > return;
> > }
> > +
> > kprobe_busy_begin();
> > - fprobe_handler(ip, parent_ip, ops, fregs);
> > + __fprobe_handler(ip, parent_ip, ops, fregs);
> > kprobe_busy_end();
> > + ftrace_test_recursion_unlock(bit);
> > }
> >
> > static void fprobe_exit_handler(struct rethook_node *rh, void *data,
> > --
> > 2.40.1
> >
>
>
> --
> Masami Hiramatsu (Google) <[email protected]>
>
Glad to hear that, hooray! :)
Thanks
Ze
On Wed, May 17, 2023 at 10:27 PM Masami Hiramatsu <[email protected]> wrote:
>
> On Wed, 17 May 2023 11:45:07 +0800
> Ze Gao <[email protected]> wrote:
>
> > Current implementation calls kprobe related functions before doing
> > ftrace recursion check in fprobe_kprobe_handler, which opens door
> > to kernel crash due to stack recursion if preempt_count_{add, sub}
> > is traceable in kprobe_busy_{begin, end}.
> >
> > Things goes like this without this patch quoted from Steven:
> > "
> > fprobe_kprobe_handler() {
> > kprobe_busy_begin() {
> > preempt_disable() {
> > preempt_count_add() { <-- trace
> > fprobe_kprobe_handler() {
> > [ wash, rinse, repeat, CRASH!!! ]
> > "
> >
> > By refactoring the common part out of fprobe_kprobe_handler and
> > fprobe_handler and call ftrace recursion detection at the very beginning,
> > the whole fprobe_kprobe_handler is free from recursion.
> >
> > Signed-off-by: Ze Gao <[email protected]>
> > Acked-by: Masami Hiramatsu (Google) <[email protected]>
> > Link: https://lore.kernel.org/linux-trace-kernel/[email protected]
> > ---
> > kernel/trace/fprobe.c | 59 ++++++++++++++++++++++++++++++++-----------
> > 1 file changed, 44 insertions(+), 15 deletions(-)
> >
> > diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> > index 9abb3905bc8e..097c740799ba 100644
> > --- a/kernel/trace/fprobe.c
> > +++ b/kernel/trace/fprobe.c
> > @@ -20,30 +20,22 @@ struct fprobe_rethook_node {
> > char data[];
> > };
> >
> > -static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> > - struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > +static inline void __fprobe_handler(unsigned long ip, unsigned long
> > + parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs)
>
> OK, I picked up this series to probes/fixes. Note that I fixed this line
> because the "unsigned long parent_ip" was split into 2 lines.
>
> Thank you,
>
>
> > {
> > struct fprobe_rethook_node *fpr;
> > struct rethook_node *rh = NULL;
> > struct fprobe *fp;
> > void *entry_data = NULL;
> > - int bit, ret;
> > + int ret;
> >
> > fp = container_of(ops, struct fprobe, ops);
> > - if (fprobe_disabled(fp))
> > - return;
> > -
> > - bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > - if (bit < 0) {
> > - fp->nmissed++;
> > - return;
> > - }
> >
> > if (fp->exit_handler) {
> > rh = rethook_try_get(fp->rethook);
> > if (!rh) {
> > fp->nmissed++;
> > - goto out;
> > + return;
> > }
> > fpr = container_of(rh, struct fprobe_rethook_node, node);
> > fpr->entry_ip = ip;
> > @@ -61,23 +53,60 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> > else
> > rethook_hook(rh, ftrace_get_regs(fregs), true);
> > }
> > -out:
> > +}
> > +
> > +static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> > + struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > +{
> > + struct fprobe *fp;
> > + int bit;
> > +
> > + fp = container_of(ops, struct fprobe, ops);
> > + if (fprobe_disabled(fp))
> > + return;
> > +
> > + /* recursion detection has to go before any traceable function and
> > + * all functions before this point should be marked as notrace
> > + */
> > + bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > + if (bit < 0) {
> > + fp->nmissed++;
> > + return;
> > + }
> > + __fprobe_handler(ip, parent_ip, ops, fregs);
> > ftrace_test_recursion_unlock(bit);
> > +
> > }
> > NOKPROBE_SYMBOL(fprobe_handler);
> >
> > static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
> > struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > {
> > - struct fprobe *fp = container_of(ops, struct fprobe, ops);
> > + struct fprobe *fp;
> > + int bit;
> > +
> > + fp = container_of(ops, struct fprobe, ops);
> > + if (fprobe_disabled(fp))
> > + return;
> > +
> > + /* recursion detection has to go before any traceable function and
> > + * all functions called before this point should be marked as notrace
> > + */
> > + bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > + if (bit < 0) {
> > + fp->nmissed++;
> > + return;
> > + }
> >
> > if (unlikely(kprobe_running())) {
> > fp->nmissed++;
> > return;
> > }
> > +
> > kprobe_busy_begin();
> > - fprobe_handler(ip, parent_ip, ops, fregs);
> > + __fprobe_handler(ip, parent_ip, ops, fregs);
> > kprobe_busy_end();
> > + ftrace_test_recursion_unlock(bit);
> > }
> >
> > static void fprobe_exit_handler(struct rethook_node *rh, void *data,
> > --
> > 2.40.1
> >
>
>
> --
> Masami Hiramatsu (Google) <[email protected]>
On Wed, May 17, 2023 at 11:45 AM Ze Gao <[email protected]> wrote:
>
> Current implementation calls kprobe related functions before doing
> ftrace recursion check in fprobe_kprobe_handler, which opens door
> to kernel crash due to stack recursion if preempt_count_{add, sub}
> is traceable in kprobe_busy_{begin, end}.
>
> Things goes like this without this patch quoted from Steven:
> "
> fprobe_kprobe_handler() {
> kprobe_busy_begin() {
> preempt_disable() {
> preempt_count_add() { <-- trace
> fprobe_kprobe_handler() {
> [ wash, rinse, repeat, CRASH!!! ]
> "
>
> By refactoring the common part out of fprobe_kprobe_handler and
> fprobe_handler and call ftrace recursion detection at the very beginning,
> the whole fprobe_kprobe_handler is free from recursion.
>
> Signed-off-by: Ze Gao <[email protected]>
> Acked-by: Masami Hiramatsu (Google) <[email protected]>
> Link: https://lore.kernel.org/linux-trace-kernel/[email protected]
> ---
> kernel/trace/fprobe.c | 59 ++++++++++++++++++++++++++++++++-----------
> 1 file changed, 44 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> index 9abb3905bc8e..097c740799ba 100644
> --- a/kernel/trace/fprobe.c
> +++ b/kernel/trace/fprobe.c
> @@ -20,30 +20,22 @@ struct fprobe_rethook_node {
> char data[];
> };
>
> -static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> - struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +static inline void __fprobe_handler(unsigned long ip, unsigned long
> + parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs)
> {
> struct fprobe_rethook_node *fpr;
> struct rethook_node *rh = NULL;
> struct fprobe *fp;
> void *entry_data = NULL;
> - int bit, ret;
> + int ret;
>
> fp = container_of(ops, struct fprobe, ops);
> - if (fprobe_disabled(fp))
> - return;
> -
> - bit = ftrace_test_recursion_trylock(ip, parent_ip);
> - if (bit < 0) {
> - fp->nmissed++;
> - return;
> - }
>
> if (fp->exit_handler) {
> rh = rethook_try_get(fp->rethook);
> if (!rh) {
> fp->nmissed++;
> - goto out;
> + return;
> }
> fpr = container_of(rh, struct fprobe_rethook_node, node);
> fpr->entry_ip = ip;
> @@ -61,23 +53,60 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> else
> rethook_hook(rh, ftrace_get_regs(fregs), true);
> }
> -out:
> +}
> +
> +static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> + struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +{
> + struct fprobe *fp;
> + int bit;
> +
> + fp = container_of(ops, struct fprobe, ops);
> + if (fprobe_disabled(fp))
> + return;
> +
> + /* recursion detection has to go before any traceable function and
> + * all functions before this point should be marked as notrace
> + */
> + bit = ftrace_test_recursion_trylock(ip, parent_ip);
> + if (bit < 0) {
> + fp->nmissed++;
> + return;
> + }
> + __fprobe_handler(ip, parent_ip, ops, fregs);
> ftrace_test_recursion_unlock(bit);
> +
> }
> NOKPROBE_SYMBOL(fprobe_handler);
>
> static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
> struct ftrace_ops *ops, struct ftrace_regs *fregs)
> {
> - struct fprobe *fp = container_of(ops, struct fprobe, ops);
> + struct fprobe *fp;
> + int bit;
> +
> + fp = container_of(ops, struct fprobe, ops);
> + if (fprobe_disabled(fp))
> + return;
> +
> + /* recursion detection has to go before any traceable function and
> + * all functions called before this point should be marked as notrace
> + */
> + bit = ftrace_test_recursion_trylock(ip, parent_ip);
> + if (bit < 0) {
> + fp->nmissed++;
> + return;
> + }
>
> if (unlikely(kprobe_running())) {
> fp->nmissed++;
I have just looked through this patchset, just out of curiosity,
shouldn't we call ftrace_test_recursion_unlock(bit) here ?
We have already locked it successfully, so why should we not unlock it?
> return;
> }
> +
> kprobe_busy_begin();
> - fprobe_handler(ip, parent_ip, ops, fregs);
> + __fprobe_handler(ip, parent_ip, ops, fregs);
> kprobe_busy_end();
> + ftrace_test_recursion_unlock(bit);
> }
>
> static void fprobe_exit_handler(struct rethook_node *rh, void *data,
> --
> 2.40.1
>
>
--
Regards
Yafang
Hi, yafang.
You're right, it should do the unlock before return for the sake of
sanity. (Please
ignore the last misleading reply :)
Will send a new patch to fix it.
Thanks
Ze
On Wed, Jun 28, 2023 at 3:17 PM Yafang Shao <[email protected]> wrote:
>
> On Wed, May 17, 2023 at 11:45 AM Ze Gao <[email protected]> wrote:
> >
> > Current implementation calls kprobe related functions before doing
> > ftrace recursion check in fprobe_kprobe_handler, which opens door
> > to kernel crash due to stack recursion if preempt_count_{add, sub}
> > is traceable in kprobe_busy_{begin, end}.
> >
> > Things goes like this without this patch quoted from Steven:
> > "
> > fprobe_kprobe_handler() {
> > kprobe_busy_begin() {
> > preempt_disable() {
> > preempt_count_add() { <-- trace
> > fprobe_kprobe_handler() {
> > [ wash, rinse, repeat, CRASH!!! ]
> > "
> >
> > By refactoring the common part out of fprobe_kprobe_handler and
> > fprobe_handler and call ftrace recursion detection at the very beginning,
> > the whole fprobe_kprobe_handler is free from recursion.
> >
> > Signed-off-by: Ze Gao <[email protected]>
> > Acked-by: Masami Hiramatsu (Google) <[email protected]>
> > Link: https://lore.kernel.org/linux-trace-kernel/[email protected]
> > ---
> > kernel/trace/fprobe.c | 59 ++++++++++++++++++++++++++++++++-----------
> > 1 file changed, 44 insertions(+), 15 deletions(-)
> >
> > diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> > index 9abb3905bc8e..097c740799ba 100644
> > --- a/kernel/trace/fprobe.c
> > +++ b/kernel/trace/fprobe.c
> > @@ -20,30 +20,22 @@ struct fprobe_rethook_node {
> > char data[];
> > };
> >
> > -static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> > - struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > +static inline void __fprobe_handler(unsigned long ip, unsigned long
> > + parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > {
> > struct fprobe_rethook_node *fpr;
> > struct rethook_node *rh = NULL;
> > struct fprobe *fp;
> > void *entry_data = NULL;
> > - int bit, ret;
> > + int ret;
> >
> > fp = container_of(ops, struct fprobe, ops);
> > - if (fprobe_disabled(fp))
> > - return;
> > -
> > - bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > - if (bit < 0) {
> > - fp->nmissed++;
> > - return;
> > - }
> >
> > if (fp->exit_handler) {
> > rh = rethook_try_get(fp->rethook);
> > if (!rh) {
> > fp->nmissed++;
> > - goto out;
> > + return;
> > }
> > fpr = container_of(rh, struct fprobe_rethook_node, node);
> > fpr->entry_ip = ip;
> > @@ -61,23 +53,60 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> > else
> > rethook_hook(rh, ftrace_get_regs(fregs), true);
> > }
> > -out:
> > +}
> > +
> > +static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> > + struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > +{
> > + struct fprobe *fp;
> > + int bit;
> > +
> > + fp = container_of(ops, struct fprobe, ops);
> > + if (fprobe_disabled(fp))
> > + return;
> > +
> > + /* recursion detection has to go before any traceable function and
> > + * all functions before this point should be marked as notrace
> > + */
> > + bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > + if (bit < 0) {
> > + fp->nmissed++;
> > + return;
> > + }
> > + __fprobe_handler(ip, parent_ip, ops, fregs);
> > ftrace_test_recursion_unlock(bit);
> > +
> > }
> > NOKPROBE_SYMBOL(fprobe_handler);
> >
> > static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
> > struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > {
> > - struct fprobe *fp = container_of(ops, struct fprobe, ops);
> > + struct fprobe *fp;
> > + int bit;
> > +
> > + fp = container_of(ops, struct fprobe, ops);
> > + if (fprobe_disabled(fp))
> > + return;
> > +
> > + /* recursion detection has to go before any traceable function and
> > + * all functions called before this point should be marked as notrace
> > + */
> > + bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > + if (bit < 0) {
> > + fp->nmissed++;
> > + return;
> > + }
> >
> > if (unlikely(kprobe_running())) {
> > fp->nmissed++;
>
> I have just looked through this patchset, just out of curiosity,
> shouldn't we call ftrace_test_recursion_unlock(bit) here ?
> We have already locked it successfully, so why should we not unlock it?
>
> > return;
> > }
> > +
> > kprobe_busy_begin();
> > - fprobe_handler(ip, parent_ip, ops, fregs);
> > + __fprobe_handler(ip, parent_ip, ops, fregs);
> > kprobe_busy_end();
> > + ftrace_test_recursion_unlock(bit);
> > }
> >
> > static void fprobe_exit_handler(struct rethook_node *rh, void *data,
> > --
> > 2.40.1
> >
> >
>
>
> --
> Regards
> Yafang