Subject: [PATCH -tip v2 0/9]ftrace,kprobes: Ftrace-based kprobe optimization

Hi Steven,

Here is the version 2 patches for kprobe-ftrace. Since I didn't
touch ftrace part in this version, old gcc bug still be there.
So, if you have another implementation, prease replace former 3
patches in this series with your patch.

In this version, I've fixed kprobe-ftrace recursion problem
by initializing ftrace_ops filter correctly and registering/
unregistering on-demand.

This series of patches which allows kprobes to use ftrace
for optimizing probing path if the probe on ftrace (mcount call).
The optimization is transparently done by kprobes.

Only if kprobe.break_handler is set, that probe can not be
optimized with ftrace (nor put on ftrace). The reason why this
limitation comes is that this break_handler may be used only
from jprobes which changes ip address (for fetching the function
arguments). In this series, ftrace doesn't allow to change regs->ip,
since I don't want to make any non-essential trouble ;)

After deep consideration, I've decided to remove "real_addr"
method at this time, since it is hard to achieve complete
transparency with the combination of jprobe and aggregated
kprobes.
And also, at least on x86, ftrace-based kprobes is available.

Anyway, I can say jprobes is an out-dated probe because we
already has kprobe-tracer and perf-probe which allows us to
get function arguments directly from kprobes. :)

On the other hand, since kprobes has self-recursion check,
it is safe if a probe is recursively hit, or hit a fault and
call fault_handler.

For using ftrace from kprobes, this series introduces two
new interface for ftrace;

- ftrace_ops.regs_func, which is a callback handler
invoked with pt_regs as third argument. For enable
this feature, ftrace_ops.flags must set
FTRACE_OPS_FL_SAVE_REGS bit.
- ftrace_set_filter_ip(), which allows to set new
address-based filter instead of glob pattern.

ftrace_ops.regs_func is still under discussion because older
gcc doesn't support initalizing unnamed union member.

In this version, I've removed notrace dependency from __kprobes.
Instead, it just set filter correctly before adding ftrace_ops
not to trace all functions.

Changes in v2:
- Fix ftrace_ops registering right after setting its filter.
- Unregister ftrace_ops if there is no kprobe useing.
- Remove notrace dependency from __kprobes macro.

Thank you,

---

Masami Hiramatsu (8):
kprobes/x86: ftrace based optimization for x86
kprobes: introduce ftrace based optimization
kprobes: Move locks into appropriate functions
kprobes: cleanup to separate probe-able check
ftrace: add ftrace_set_filter_ip() for address based filter
ftrace/x86: Support SAVE_REGS feature on i386
ftrace/x86-64: support SAVE_REGS feature on x86-64
ftrace: Add pt_regs acceptable trace callback

Steven Rostedt (1):
kprobes: Inverse taking of module_mutex with kprobe_mutex


arch/x86/include/asm/ftrace.h | 4 +
arch/x86/include/asm/kprobes.h | 1
arch/x86/kernel/entry_32.S | 64 +++++++++-
arch/x86/kernel/entry_64.S | 38 +++++-
arch/x86/kernel/kprobes.c | 48 ++++++++
include/linux/ftrace.h | 24 ++++
include/linux/kprobes.h | 27 ++++
kernel/kprobes.c | 250 +++++++++++++++++++++++++++++-----------
kernel/trace/ftrace.c | 103 ++++++++++++++--
9 files changed, 458 insertions(+), 101 deletions(-)

--
Masami HIRAMATSU
Software Platform Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]


Subject: [PATCH -tip v2 1/9] ftrace: Add pt_regs acceptable trace callback

Add a new callback interface regs_func to ftrace_ops
which is an anonymous union to share the pointer
with func member. So, current ftrace user doesn't
need to change their callbacks.
This callback must be used with FTRACE_OPS_FL_SAVE_REGS.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Steven Rostedt <[email protected]>
---

include/linux/ftrace.h | 21 ++++++++++++++++++++-
kernel/trace/ftrace.c | 44 +++++++++++++++++++++++++++++++-------------
2 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 55e6d63..11abe4e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -10,6 +10,8 @@
#include <linux/kallsyms.h>
#include <linux/linkage.h>
#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
#include <linux/ktime.h>
#include <linux/sched.h>
#include <linux/types.h>
@@ -18,6 +20,14 @@

#include <asm/ftrace.h>

+/*
+ * If the arch supports saving the regs to the ftrace_ops
+ * then it should set this to 1.
+ */
+#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#define ARCH_SUPPORTS_FTRACE_SAVE_REGS 0
+#endif
+
struct module;
struct ftrace_hash;

@@ -30,6 +40,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
loff_t *ppos);

typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
+typedef void (*ftrace_regs_func_t)(unsigned long ip, unsigned long parent_ip,
+ struct pt_regs *regs);

/*
* FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are
@@ -45,16 +57,22 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
* could be controled by following calls:
* ftrace_function_local_enable
* ftrace_function_local_disable
+ * SAVE_REGS - set manualy by ftrace_ops user to denote the ftrace_ops
+ * requests to pass pt_regs to callback.
*/
enum {
FTRACE_OPS_FL_ENABLED = 1 << 0,
FTRACE_OPS_FL_GLOBAL = 1 << 1,
FTRACE_OPS_FL_DYNAMIC = 1 << 2,
FTRACE_OPS_FL_CONTROL = 1 << 3,
+ FTRACE_OPS_FL_SAVE_REGS = 1 << 4,
};

struct ftrace_ops {
- ftrace_func_t func;
+ union {
+ ftrace_func_t func;
+ ftrace_regs_func_t regs_func;
+ };
struct ftrace_ops *next;
unsigned long flags;
int __percpu *disabled;
@@ -164,6 +182,7 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops)
}

extern void ftrace_stub(unsigned long a0, unsigned long a1);
+#define ftrace_regs_stub (ftrace_regs_func_t)(ftrace_stub)

#else /* !CONFIG_FUNCTION_TRACER */
/*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a008663..357b15b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -101,7 +101,9 @@ static struct ftrace_ops global_ops;
static struct ftrace_ops control_ops;

static void
-ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
+ftrace_ops_list_regs_func(unsigned long ip, unsigned long parent_ip,
+ struct pt_regs *regs);
+#define ftrace_ops_list_func (ftrace_func_t)(ftrace_ops_list_regs_func)

/*
* Traverse the ftrace_global_list, invoking all entries. The reason that we
@@ -112,8 +114,9 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
*
* Silly Alpha and silly pointer-speculation compiler optimizations!
*/
-static void ftrace_global_list_func(unsigned long ip,
- unsigned long parent_ip)
+static void
+ftrace_global_list_regs_func(unsigned long ip, unsigned long parent_ip,
+ struct pt_regs *regs)
{
struct ftrace_ops *op;

@@ -123,19 +126,24 @@ static void ftrace_global_list_func(unsigned long ip,
trace_recursion_set(TRACE_GLOBAL_BIT);
op = rcu_dereference_raw(ftrace_global_list); /*see above*/
while (op != &ftrace_list_end) {
- op->func(ip, parent_ip);
+ op->regs_func(ip, parent_ip, regs);
op = rcu_dereference_raw(op->next); /*see above*/
};
trace_recursion_clear(TRACE_GLOBAL_BIT);
}
+#define ftrace_global_list_func (ftrace_func_t)(ftrace_global_list_regs_func)

-static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
+static void ftrace_pid_regs_func(unsigned long ip, unsigned long parent_ip,
+ struct pt_regs *regs)
{
+ ftrace_regs_func_t func = (ftrace_regs_func_t)ftrace_pid_function;
+
if (!test_tsk_trace_trace(current))
return;

- ftrace_pid_function(ip, parent_ip);
+ func(ip, parent_ip, regs);
}
+#define ftrace_pid_func (ftrace_func_t)(ftrace_pid_regs_func)

static void set_ftrace_pid_function(ftrace_func_t func)
{
@@ -328,6 +336,10 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
if (!core_kernel_data((unsigned long)ops))
ops->flags |= FTRACE_OPS_FL_DYNAMIC;

+ if ((ops->flags & FTRACE_OPS_FL_SAVE_REGS) &&
+ !ARCH_SUPPORTS_FTRACE_SAVE_REGS)
+ return -ENOSYS;
+
if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops);
ops->flags |= FTRACE_OPS_FL_ENABLED;
@@ -1042,9 +1054,10 @@ static const struct ftrace_hash empty_hash = {
#define EMPTY_HASH ((struct ftrace_hash *)&empty_hash)

static struct ftrace_ops global_ops = {
- .func = ftrace_stub,
+ .regs_func = ftrace_regs_stub,
.notrace_hash = EMPTY_HASH,
.filter_hash = EMPTY_HASH,
+ .flags = FTRACE_OPS_FL_SAVE_REGS,
};

static DEFINE_MUTEX(ftrace_regex_lock);
@@ -3911,7 +3924,8 @@ void __init ftrace_init(void)
#else

static struct ftrace_ops global_ops = {
- .func = ftrace_stub,
+ .regs_func = ftrace_regs_stub,
+ .flags = FTRACE_OPS_FL_SAVE_REGS,
};

static int __init ftrace_nodyn_init(void)
@@ -3942,7 +3956,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
#endif /* CONFIG_DYNAMIC_FTRACE */

static void
-ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip)
+ftrace_ops_control_regs_func(unsigned long ip, unsigned long parent_ip,
+ struct pt_regs *regs)
{
struct ftrace_ops *op;

@@ -3959,7 +3974,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip)
while (op != &ftrace_list_end) {
if (!ftrace_function_local_disabled(op) &&
ftrace_ops_test(op, ip))
- op->func(ip, parent_ip);
+ op->regs_func(ip, parent_ip, regs);

op = rcu_dereference_raw(op->next);
};
@@ -3968,11 +3983,13 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip)
}

static struct ftrace_ops control_ops = {
- .func = ftrace_ops_control_func,
+ .regs_func = ftrace_ops_control_regs_func,
+ .flags = FTRACE_OPS_FL_SAVE_REGS,
};

static void
-ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
+ftrace_ops_list_regs_func(unsigned long ip, unsigned long parent_ip,
+ struct pt_regs *regs)
{
struct ftrace_ops *op;

@@ -3988,7 +4005,8 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
op = rcu_dereference_raw(ftrace_ops_list);
while (op != &ftrace_list_end) {
if (ftrace_ops_test(op, ip))
- op->func(ip, parent_ip);
+ /* regs will be ignored if op->func is set */
+ op->regs_func(ip, parent_ip, regs);
op = rcu_dereference_raw(op->next);
};
preempt_enable_notrace();

Subject: [PATCH -tip v2 2/9] ftrace/x86-64: support SAVE_REGS feature on x86-64

Add register saving/restoring sequence in ftrace_caller
on x86-64 for enabling SAVE_REGS feature.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Steven Rostedt <[email protected]>
---

arch/x86/include/asm/ftrace.h | 4 ++++
arch/x86/kernel/entry_64.S | 38 +++++++++++++++++++++++++++++++++-----
2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 18d9005..ffb9564 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -32,6 +32,10 @@
#define MCOUNT_ADDR ((long)(mcount))
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */

+#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_X86_64)
+#define ARCH_SUPPORTS_FTRACE_SAVE_REGS 1
+#endif
+
#ifndef __ASSEMBLY__
extern void mcount(void);
extern int modifying_ftrace_code;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 320852d..6d0545b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -73,21 +73,44 @@ ENTRY(mcount)
retq
END(mcount)

+ .macro MCOUNT_SAVE_ALL
+ SAVE_ARGS (RSP-ORIG_RAX)
+ SAVE_REST
+ /* Adjust eflags, rsp and rip */
+ movq RSP(%rsp), %rdx
+ movq %rdx, EFLAGS(%rsp)
+ leaq SS+8(%rsp), %rdx
+ movq %rdx, RSP(%rsp)
+ movq SS(%rsp), %rdi
+ subq $MCOUNT_INSN_SIZE, %rdi
+ movq %rdi, RIP(%rsp)
+ .endm
+
+ .macro MCOUNT_RESTORE_ALL
+ /* store eflags into regs->rsp */
+ movq EFLAGS(%rsp), %rax
+ movq %rax, RSP(%rsp)
+ RESTORE_REST
+ RESTORE_ARGS 1, (RSP-ORIG_RAX)
+ .endm
+
ENTRY(ftrace_caller)
+ CFI_STARTPROC simple
+ pushfq_cfi
cmpl $0, function_trace_stop
- jne ftrace_stub
+ jne ftrace_exit

- MCOUNT_SAVE_FRAME
+ MCOUNT_SAVE_ALL

- movq 0x38(%rsp), %rdi
movq 8(%rbp), %rsi
- subq $MCOUNT_INSN_SIZE, %rdi
+ movq %rsp, %rdx

GLOBAL(ftrace_call)
call ftrace_stub

- MCOUNT_RESTORE_FRAME
+ MCOUNT_RESTORE_ALL

+ popfq_cfi
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
GLOBAL(ftrace_graph_call)
jmp ftrace_stub
@@ -95,6 +118,11 @@ GLOBAL(ftrace_graph_call)

GLOBAL(ftrace_stub)
retq
+
+ftrace_exit:
+ popfq_cfi
+ retq
+ CFI_ENDPROC
END(ftrace_caller)

#else /* ! CONFIG_DYNAMIC_FTRACE */

Subject: [PATCH -tip v2 3/9] ftrace/x86: Support SAVE_REGS feature on i386

Add register saving/restoring sequence in ftrace_caller
on i386 for enabling SAVE_REGS feature.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Steven Rostedt <[email protected]>
---

arch/x86/include/asm/ftrace.h | 2 +
arch/x86/kernel/entry_32.S | 64 +++++++++++++++++++++++++++++++++++------
2 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index ffb9564..e2f7269 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -32,7 +32,7 @@
#define MCOUNT_ADDR ((long)(mcount))
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */

-#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_X86_64)
+#if defined(CONFIG_DYNAMIC_FTRACE)
#define ARCH_SUPPORTS_FTRACE_SAVE_REGS 1
#endif

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 01ccf9b..79c45a2 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1095,24 +1095,66 @@ ENTRY(mcount)
ret
END(mcount)

+ .macro FTRACE_SAVE_ALL
+ /* eflags is saved on cs */
+ subl $8, %esp /* skip ip and orig_ax */
+ pushl %gs
+ pushl %fs
+ pushl %es
+ pushl %ds
+ pushl %eax
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %edx
+ pushl %ecx
+ pushl %ebx
+ movl 14*4(%esp), %eax /* Load return address */
+ pushl %eax /* Save return address (+4) */
+ subl $MCOUNT_INSN_SIZE, %eax
+ movl %eax, 12*4+4(%esp) /* Store IP */
+ movl 13*4+4(%esp), %edx /* Load flags */
+ movl %edx, 14*4+4(%esp) /* Store flags */
+ movl $__KERNEL_CS, %edx
+ movl %edx, 13*4+4(%esp)
+ .endm
+
+ .macro FTRACE_RESTORE_ALL
+ movl 14*4+4(%esp), %eax /* Load flags */
+ movl %eax, 13*4+4(%esp) /* Restore flags */
+ popl %eax
+ movl %eax, 14*4(%esp) /* Restore return address */
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+ popl %ds
+ popl %es
+ popl %fs
+ popl %gs
+ addl $8, %esp
+ .endm
+
ENTRY(ftrace_caller)
+ pushf /* flags on regs->cs */
cmpl $0, function_trace_stop
- jne ftrace_stub
+ jne ftrace_exit
+
+ FTRACE_SAVE_ALL

- pushl %eax
- pushl %ecx
- pushl %edx
- movl 0xc(%esp), %eax
movl 0x4(%ebp), %edx
- subl $MCOUNT_INSN_SIZE, %eax
+ lea 4(%esp), %ecx

.globl ftrace_call
ftrace_call:
call ftrace_stub

- popl %edx
- popl %ecx
- popl %eax
+ FTRACE_RESTORE_ALL
+
+ popf
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
@@ -1122,6 +1164,10 @@ ftrace_graph_call:
.globl ftrace_stub
ftrace_stub:
ret
+
+ftrace_exit:
+ popf
+ ret
END(ftrace_caller)

#else /* ! CONFIG_DYNAMIC_FTRACE */

Subject: [PATCH -tip v2 5/9] kprobes: Inverse taking of module_mutex with kprobe_mutex

From: Steven Rostedt <[email protected]>

Currently module_mutex is taken before kprobe_mutex, but this
can cause issues when we have kprobes register ftrace, as the ftrace
mutex is taken before enabling a tracepoint, which currently takes
the module mutex.

If module_mutex is taken before kprobe_mutex, then we can not
have kprobes use the ftrace infrastructure.

There seems to be no reason that the kprobe_mutex can't be taken
before the module_mutex. Running lockdep shows that it is safe
among the kernels I've run.

Cc: Masami Hiramatsu <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
---

kernel/kprobes.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c62b854..7a8a122 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -561,9 +561,9 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
{
LIST_HEAD(free_list);

+ mutex_lock(&kprobe_mutex);
/* Lock modules while optimizing kprobes */
mutex_lock(&module_mutex);
- mutex_lock(&kprobe_mutex);

/*
* Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
@@ -586,8 +586,8 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
/* Step 4: Free cleaned kprobes after quiesence period */
do_free_cleaned_kprobes(&free_list);

- mutex_unlock(&kprobe_mutex);
mutex_unlock(&module_mutex);
+ mutex_unlock(&kprobe_mutex);

/* Step 5: Kick optimizer again if needed */
if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))

Subject: [PATCH -tip v2 7/9] kprobes: Move locks into appropriate functions

Break a big critical region into fine-grained pieces at
registering kprobe path. This helps us to solve circular
locking dependency when introducing ftrace-based kprobes.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Steven Rostedt <[email protected]>
---

kernel/kprobes.c | 63 ++++++++++++++++++++++++++++++++++++------------------
1 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 6137fe3..9e47f44 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -759,20 +759,28 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
struct kprobe *ap;
struct optimized_kprobe *op;

+ /* For preparing optimization, jump_label_text_reserved() is called */
+ jump_label_lock();
+ mutex_lock(&text_mutex);
+
ap = alloc_aggr_kprobe(p);
if (!ap)
- return;
+ goto out;

op = container_of(ap, struct optimized_kprobe, kp);
if (!arch_prepared_optinsn(&op->optinsn)) {
/* If failed to setup optimizing, fallback to kprobe */
arch_remove_optimized_kprobe(op);
kfree(op);
- return;
+ goto out;
}

init_aggr_kprobe(ap, p);
- optimize_kprobe(ap);
+ optimize_kprobe(ap); /* This just kicks optimizer thread */
+
+out:
+ mutex_unlock(&text_mutex);
+ jump_label_unlock();
}

#ifdef CONFIG_SYSCTL
@@ -1144,12 +1152,6 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
if (p->post_handler && !ap->post_handler)
ap->post_handler = aggr_post_handler;

- if (kprobe_disabled(ap) && !kprobe_disabled(p)) {
- ap->flags &= ~KPROBE_FLAG_DISABLED;
- if (!kprobes_all_disarmed)
- /* Arm the breakpoint again. */
- __arm_kprobe(ap);
- }
return 0;
}

@@ -1189,11 +1191,22 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
int ret = 0;
struct kprobe *ap = orig_p;

+ /* For preparing optimization, jump_label_text_reserved() is called */
+ jump_label_lock();
+ /*
+ * Get online CPUs to avoid text_mutex deadlock.with stop machine,
+ * which is invoked by unoptimize_kprobe() in add_new_kprobe()
+ */
+ get_online_cpus();
+ mutex_lock(&text_mutex);
+
if (!kprobe_aggrprobe(orig_p)) {
/* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
ap = alloc_aggr_kprobe(orig_p);
- if (!ap)
- return -ENOMEM;
+ if (!ap) {
+ ret = -ENOMEM;
+ goto out;
+ }
init_aggr_kprobe(ap, orig_p);
} else if (kprobe_unused(ap))
/* This probe is going to die. Rescue it */
@@ -1213,7 +1226,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
* free aggr_probe. It will be used next time, or
* freed by unregister_kprobe.
*/
- return ret;
+ goto out;

/* Prepare optimized instructions if possible. */
prepare_optimized_kprobe(ap);
@@ -1228,7 +1241,20 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,

/* Copy ap's insn slot to p */
copy_kprobe(ap, p);
- return add_new_kprobe(ap, p);
+ ret = add_new_kprobe(ap, p);
+
+out:
+ mutex_unlock(&text_mutex);
+ put_online_cpus();
+ jump_label_unlock();
+
+ if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
+ ap->flags &= ~KPROBE_FLAG_DISABLED;
+ if (!kprobes_all_disarmed)
+ /* Arm the breakpoint again. */
+ arm_kprobe(ap);
+ }
+ return ret;
}

static int __kprobes in_kprobes_functions(unsigned long addr)
@@ -1387,10 +1413,6 @@ int __kprobes register_kprobe(struct kprobe *p)
return ret;

mutex_lock(&kprobe_mutex);
- jump_label_lock(); /* needed to call jump_label_text_reserved() */
-
- get_online_cpus(); /* For avoiding text_mutex deadlock. */
- mutex_lock(&text_mutex);

old_p = get_kprobe(p->addr);
if (old_p) {
@@ -1399,7 +1421,9 @@ int __kprobes register_kprobe(struct kprobe *p)
goto out;
}

+ mutex_lock(&text_mutex); /* Avoiding text modification */
ret = arch_prepare_kprobe(p);
+ mutex_unlock(&text_mutex);
if (ret)
goto out;

@@ -1408,15 +1432,12 @@ int __kprobes register_kprobe(struct kprobe *p)
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);

if (!kprobes_all_disarmed && !kprobe_disabled(p))
- __arm_kprobe(p);
+ arm_kprobe(p);

/* Try to optimize kprobe */
try_to_optimize_kprobe(p);

out:
- mutex_unlock(&text_mutex);
- put_online_cpus();
- jump_label_unlock();
mutex_unlock(&kprobe_mutex);

if (probed_mod)

Subject: [PATCH -tip v2 8/9] kprobes: introduce ftrace based optimization

Introduce function trace based kprobes optimization.

With using ftrace optimization, kprobes on the mcount calling
address, use ftrace's mcount call instead of breakpoint.
Furthermore, this optimization works with preemptive kernel
not like as current jump-based optimization. Of cource,
this feature works only if the probe is on mcount call.

Only if kprobe.break_handler is set, that probe is not
optimized with ftrace (nor put on ftrace). The reason why this
limitation comes is that this break_handler may be used only
from jprobes which changes ip address (for fetching the function
arguments), but function tracer ignores modified ip address.

Changes in v2:
- Fix ftrace_ops registering right after setting its filter.
- Unregister ftrace_ops if there is no kprobe using.
- Remove notrace dependency from __kprobes macro.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Steven Rostedt <[email protected]>
---

include/linux/kprobes.h | 27 ++++++++++++
kernel/kprobes.c | 105 +++++++++++++++++++++++++++++++++++++++++------
2 files changed, 119 insertions(+), 13 deletions(-)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index b6e1f8c..aa0d05e 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -38,6 +38,7 @@
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/mutex.h>
+#include <linux/ftrace.h>

#ifdef CONFIG_KPROBES
#include <asm/kprobes.h>
@@ -48,14 +49,26 @@
#define KPROBE_REENTER 0x00000004
#define KPROBE_HIT_SSDONE 0x00000008

+/*
+ * If function tracer is enabled and the arch supports full
+ * passing of pt_regs to function tracing, then kprobes can
+ * optimize on top of function tracing.
+ */
+#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \
+ && defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE)
+# define KPROBES_CAN_USE_FTRACE
+#endif
+
/* Attach to insert probes on any functions which should be ignored*/
#define __kprobes __attribute__((__section__(".kprobes.text")))
+
#else /* CONFIG_KPROBES */
typedef int kprobe_opcode_t;
struct arch_specific_insn {
int dummy;
};
#define __kprobes
+
#endif /* CONFIG_KPROBES */

struct kprobe;
@@ -128,6 +141,7 @@ struct kprobe {
* NOTE:
* this flag is only for optimized_kprobe.
*/
+#define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */

/* Has this kprobe gone ? */
static inline int kprobe_gone(struct kprobe *p)
@@ -146,6 +160,13 @@ static inline int kprobe_optimized(struct kprobe *p)
{
return p->flags & KPROBE_FLAG_OPTIMIZED;
}
+
+/* Is this kprobe uses ftrace ? */
+static inline int kprobe_ftrace(struct kprobe *p)
+{
+ return p->flags & KPROBE_FLAG_FTRACE;
+}
+
/*
* Special probe type that uses setjmp-longjmp type tricks to resume
* execution at a specified entry with a matching prototype corresponding
@@ -295,6 +316,12 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
#endif

#endif /* CONFIG_OPTPROBES */
+#ifdef KPROBES_CAN_USE_FTRACE
+extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct pt_regs *regs);
+extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
+#endif
+

/* Get the kprobe at this addr (if any) - called with preemption disabled */
struct kprobe *get_kprobe(void *addr);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9e47f44..69c16ef 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -759,6 +759,10 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
struct kprobe *ap;
struct optimized_kprobe *op;

+ /* Impossible to optimize ftrace-based kprobe */
+ if (kprobe_ftrace(p))
+ return;
+
/* For preparing optimization, jump_label_text_reserved() is called */
jump_label_lock();
mutex_lock(&text_mutex);
@@ -915,9 +919,64 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
}
#endif /* CONFIG_OPTPROBES */

+#ifdef KPROBES_CAN_USE_FTRACE
+static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
+ .regs_func = kprobe_ftrace_handler,
+ .flags = FTRACE_OPS_FL_SAVE_REGS,
+};
+static int kprobe_ftrace_enabled;
+
+/* Must ensure p->addr is really on ftrace */
+static int __kprobes prepare_kprobe(struct kprobe *p)
+{
+ if (!kprobe_ftrace(p))
+ return arch_prepare_kprobe(p);
+
+ return arch_prepare_kprobe_ftrace(p);
+}
+
+/* Caller must lock kprobe_mutex */
+static void __kprobes arm_kprobe_ftrace(struct kprobe *p)
+{
+ int ret;
+
+ ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
+ (unsigned long)p->addr, 0, 0);
+ WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+ kprobe_ftrace_enabled++;
+ if (kprobe_ftrace_enabled == 1) {
+ ret = register_ftrace_function(&kprobe_ftrace_ops);
+ WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+ }
+}
+
+/* Caller must lock kprobe_mutex */
+static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
+{
+ int ret;
+
+ kprobe_ftrace_enabled--;
+ if (kprobe_ftrace_enabled == 0) {
+ ret = unregister_ftrace_function(&kprobe_ftrace_ops);
+ WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+ }
+ ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
+ (unsigned long)p->addr, 1, 0);
+ WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+}
+#else /* !KPROBES_CAN_USE_FTRACE */
+#define prepare_kprobe(p) arch_prepare_kprobe(p)
+#define arm_kprobe_ftrace(p) do {} while (0)
+#define disarm_kprobe_ftrace(p) do {} while (0)
+#endif
+
/* Arm a kprobe with text_mutex */
static void __kprobes arm_kprobe(struct kprobe *kp)
{
+ if (unlikely(kprobe_ftrace(kp))) {
+ arm_kprobe_ftrace(kp);
+ return;
+ }
/*
* Here, since __arm_kprobe() doesn't use stop_machine(),
* this doesn't cause deadlock on text_mutex. So, we don't
@@ -929,11 +988,15 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
}

/* Disarm a kprobe with text_mutex */
-static void __kprobes disarm_kprobe(struct kprobe *kp)
+static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt)
{
+ if (unlikely(kprobe_ftrace(kp))) {
+ disarm_kprobe_ftrace(kp);
+ return;
+ }
/* Ditto */
mutex_lock(&text_mutex);
- __disarm_kprobe(kp, true);
+ __disarm_kprobe(kp, reopt);
mutex_unlock(&text_mutex);
}

@@ -1343,6 +1406,26 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
struct module **probed_mod)
{
int ret = 0;
+ unsigned long ftrace_addr;
+
+ /*
+ * If the address is located on a ftrace nop, set the
+ * breakpoint to the following instruction.
+ */
+ ftrace_addr = ftrace_location((unsigned long)p->addr);
+ if (ftrace_addr) {
+#ifdef KPROBES_CAN_USE_FTRACE
+ /* Given address is not on the instruction boundary */
+ if ((unsigned long)p->addr != ftrace_addr)
+ return -EILSEQ;
+ /* break_handler (jprobe) can not work with ftrace */
+ if (p->break_handler)
+ return -EINVAL;
+ p->flags |= KPROBE_FLAG_FTRACE;
+#else /* !KPROBES_CAN_USE_FTRACE */
+ return -EINVAL;
+#endif
+ }

jump_label_lock();
preempt_disable();
@@ -1350,7 +1433,6 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
/* Ensure it is not in reserved area nor out of text */
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr) ||
- ftrace_text_reserved(p->addr, p->addr) ||
jump_label_text_reserved(p->addr, p->addr)) {
ret = -EINVAL;
goto out;
@@ -1422,7 +1504,7 @@ int __kprobes register_kprobe(struct kprobe *p)
}

mutex_lock(&text_mutex); /* Avoiding text modification */
- ret = arch_prepare_kprobe(p);
+ ret = prepare_kprobe(p);
mutex_unlock(&text_mutex);
if (ret)
goto out;
@@ -1480,7 +1562,7 @@ static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)

/* Try to disarm and disable this/parent probe */
if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
- disarm_kprobe(orig_p);
+ disarm_kprobe(orig_p, true);
orig_p->flags |= KPROBE_FLAG_DISABLED;
}
}
@@ -2078,10 +2160,11 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,

if (!pp)
pp = p;
- seq_printf(pi, "%s%s%s\n",
+ seq_printf(pi, "%s%s%s%s\n",
(kprobe_gone(p) ? "[GONE]" : ""),
((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : ""),
- (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""));
+ (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""),
+ (kprobe_ftrace(pp) ? "[FTRACE]" : ""));
}

static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -2160,14 +2243,12 @@ static void __kprobes arm_all_kprobes(void)
goto already_enabled;

/* Arming kprobes doesn't optimize kprobe itself */
- mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist)
if (!kprobe_disabled(p))
- __arm_kprobe(p);
+ arm_kprobe(p);
}
- mutex_unlock(&text_mutex);

kprobes_all_disarmed = false;
printk(KERN_INFO "Kprobes globally enabled\n");
@@ -2195,15 +2276,13 @@ static void __kprobes disarm_all_kprobes(void)
kprobes_all_disarmed = true;
printk(KERN_INFO "Kprobes globally disabled\n");

- mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist) {
if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
- __disarm_kprobe(p, false);
+ disarm_kprobe(p, false);
}
}
- mutex_unlock(&text_mutex);
mutex_unlock(&kprobe_mutex);

/* Wait for disarming all kprobes by optimizer */

Subject: [PATCH -tip v2 9/9] kprobes/x86: ftrace based optimization for x86

Add function tracer based kprobe optimization support
handlers on x86. This allows kprobes to use function
tracer for probing on mcount call.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Steven Rostedt <[email protected]>
---

arch/x86/include/asm/kprobes.h | 1 +
arch/x86/kernel/kprobes.c | 48 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 49 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 5478825..d3ddd17 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -27,6 +27,7 @@
#include <asm/insn.h>

#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define ARCH_SUPPORTS_KPROBES_ON_FTRACE

struct pt_regs;
struct kprobe;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e2f751e..4c46c55 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1052,6 +1052,54 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}

+#ifdef KPROBES_CAN_USE_FTRACE
+/* Ftrace callback handler for kprobes */
+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct pt_regs *regs)
+{
+ struct kprobe *p;
+ struct kprobe_ctlblk *kcb;
+ unsigned long flags;
+
+ /* Disable irq for emulating a breakpoint and avoiding preempt */
+ local_irq_save(flags);
+
+ p = get_kprobe((kprobe_opcode_t *)ip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ goto end;
+
+ kcb = get_kprobe_ctlblk();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ } else {
+ regs->ip += sizeof(kprobe_opcode_t);
+
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (p->pre_handler)
+ p->pre_handler(p, regs);
+
+ if (unlikely(p->post_handler)) {
+ /* Emulate singlestep as if there is a 5byte nop */
+ regs->ip = ip + MCOUNT_INSN_SIZE;
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ __this_cpu_write(current_kprobe, NULL);
+ regs->ip = ip; /* Recover for next callback */
+ }
+end:
+ local_irq_restore(flags);
+}
+
+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.insn = NULL;
+ p->ainsn.boostable = -1;
+ return 0;
+}
+#endif
+
int __init arch_init_kprobes(void)
{
return arch_init_optprobes();

Subject: [PATCH -tip v2 6/9] kprobes: cleanup to separate probe-able check

Separate probe-able address checking code from
register_kprobe().

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Steven Rostedt <[email protected]>
---

kernel/kprobes.c | 82 ++++++++++++++++++++++++++++++------------------------
1 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7a8a122..6137fe3 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1313,67 +1313,80 @@ static inline int check_kprobe_rereg(struct kprobe *p)
return ret;
}

-int __kprobes register_kprobe(struct kprobe *p)
+static __kprobes int check_kprobe_address_safe(struct kprobe *p,
+ struct module **probed_mod)
{
int ret = 0;
- struct kprobe *old_p;
- struct module *probed_mod;
- kprobe_opcode_t *addr;
-
- addr = kprobe_addr(p);
- if (IS_ERR(addr))
- return PTR_ERR(addr);
- p->addr = addr;
-
- ret = check_kprobe_rereg(p);
- if (ret)
- return ret;

jump_label_lock();
preempt_disable();
+
+ /* Ensure it is not in reserved area nor out of text */
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr) ||
ftrace_text_reserved(p->addr, p->addr) ||
jump_label_text_reserved(p->addr, p->addr)) {
ret = -EINVAL;
- goto cannot_probe;
+ goto out;
}

- /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
- p->flags &= KPROBE_FLAG_DISABLED;
-
- /*
- * Check if are we probing a module.
- */
- probed_mod = __module_text_address((unsigned long) p->addr);
- if (probed_mod) {
- /* Return -ENOENT if fail. */
- ret = -ENOENT;
+ /* Check if are we probing a module */
+ *probed_mod = __module_text_address((unsigned long) p->addr);
+ if (*probed_mod) {
/*
* We must hold a refcount of the probed module while updating
* its code to prohibit unexpected unloading.
*/
- if (unlikely(!try_module_get(probed_mod)))
- goto cannot_probe;
+ if (unlikely(!try_module_get(*probed_mod))) {
+ ret = -ENOENT;
+ goto out;
+ }

/*
* If the module freed .init.text, we couldn't insert
* kprobes in there.
*/
- if (within_module_init((unsigned long)p->addr, probed_mod) &&
- probed_mod->state != MODULE_STATE_COMING) {
- module_put(probed_mod);
- goto cannot_probe;
+ if (within_module_init((unsigned long)p->addr, *probed_mod) &&
+ (*probed_mod)->state != MODULE_STATE_COMING) {
+ module_put(*probed_mod);
+ *probed_mod = NULL;
+ ret = -ENOENT;
}
- /* ret will be updated by following code */
}
+out:
preempt_enable();
jump_label_unlock();

+ return ret;
+}
+
+int __kprobes register_kprobe(struct kprobe *p)
+{
+ int ret;
+ struct kprobe *old_p;
+ struct module *probed_mod;
+ kprobe_opcode_t *addr;
+
+ /* Adjust probe address from symbol */
+ addr = kprobe_addr(p);
+ if (IS_ERR(addr))
+ return PTR_ERR(addr);
+ p->addr = addr;
+
+ ret = check_kprobe_rereg(p);
+ if (ret)
+ return ret;
+
+ /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
+ p->flags &= KPROBE_FLAG_DISABLED;
p->nmissed = 0;
INIT_LIST_HEAD(&p->list);
- mutex_lock(&kprobe_mutex);

+ ret = check_kprobe_address_safe(p, &probed_mod);
+ if (ret)
+ return ret;
+
+ mutex_lock(&kprobe_mutex);
jump_label_lock(); /* needed to call jump_label_text_reserved() */

get_online_cpus(); /* For avoiding text_mutex deadlock. */
@@ -1410,11 +1423,6 @@ out:
module_put(probed_mod);

return ret;
-
-cannot_probe:
- preempt_enable();
- jump_label_unlock();
- return ret;
}
EXPORT_SYMBOL_GPL(register_kprobe);

Subject: [PATCH -tip v2 4/9] ftrace: add ftrace_set_filter_ip() for address based filter

Add a new filter update interface ftrace_set_filter_ip()
to set ftrace filter by ip address, not only glob pattern.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Steven Rostedt <[email protected]>
---

include/linux/ftrace.h | 3 ++
kernel/trace/ftrace.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 11abe4e..f3c5487 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -263,6 +263,8 @@ struct dyn_ftrace {
};

int ftrace_force_update(void);
+int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
+ int remove, int reset);
int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
int len, int reset);
int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
@@ -432,6 +434,7 @@ static inline int ftrace_text_reserved(void *start, void *end)
*/
#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
#define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
+#define ftrace_set_filter_ip(ops, ip, remove, reset) ({ -ENODEV; })
#define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; })
#define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; })
#define ftrace_free_filter(ops) do { } while (0)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 357b15b..3ce1219 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3175,8 +3175,27 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf,
}

static int
-ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
- int reset, int enable)
+ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
+{
+ struct ftrace_func_entry *entry;
+
+ if (!ftrace_location(ip))
+ return -EINVAL;
+
+ if (remove) {
+ entry = ftrace_lookup_ip(hash, ip);
+ if (!entry)
+ return -ENOENT;
+ free_hash_entry(hash, entry);
+ return 0;
+ }
+
+ return add_hash_entry(hash, ip);
+}
+
+static int
+ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
+ unsigned long ip, int remove, int reset, int enable)
{
struct ftrace_hash **orig_hash;
struct ftrace_hash *hash;
@@ -3205,6 +3224,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
ret = -EINVAL;
goto out_regex_unlock;
}
+ if (ip) {
+ ret = ftrace_match_addr(hash, ip, remove);
+ if (ret < 0)
+ goto out_regex_unlock;
+ }

mutex_lock(&ftrace_lock);
ret = ftrace_hash_move(ops, enable, orig_hash, hash);
@@ -3221,6 +3245,37 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
return ret;
}

+static int
+ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove,
+ int reset, int enable)
+{
+ return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable);
+}
+
+/**
+ * ftrace_set_filter_ip - set a function to filter on in ftrace by address
+ * @ops - the ops to set the filter with
+ * @ip - the address to add to or remove from the filter.
+ * @remove - non zero to remove the ip from the filter
+ * @reset - non zero to reset all filters before applying this filter.
+ *
+ * Filters denote which functions should be enabled when tracing is enabled
+ * If @ip is NULL, it failes to update filter.
+ */
+int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
+ int remove, int reset)
+{
+ return ftrace_set_addr(ops, ip, remove, reset, 1);
+}
+EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);
+
+static int
+ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
+ int reset, int enable)
+{
+ return ftrace_set_hash(ops, buf, len, 0, 0, reset, enable);
+}
+
/**
* ftrace_set_filter - set a function to filter on in ftrace
* @ops - the ops to set the filter with

2012-06-05 11:48:51

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH -tip v2 0/9]ftrace,kprobes: Ftrace-based kprobe optimization

On Tue, 2012-06-05 at 19:27 +0900, Masami Hiramatsu wrote:
> Hi Steven,
>
> Here is the version 2 patches for kprobe-ftrace. Since I didn't
> touch ftrace part in this version, old gcc bug still be there.
> So, if you have another implementation, prease replace former 3
> patches in this series with your patch.
>

Thanks Masami,

I'll be incorporating these patches into my own set, and see how they
fit. I would have had more done yesterday, but I'm also testing against
PPC and discovered a bug in the PPC function tracing and was side
tracked in fixing it (ended up being a new function that had to be
tagged with 'notrace').


> ftrace_ops.regs_func is still under discussion because older
> gcc doesn't support initalizing unnamed union member.

Right, I working on adding the ops and regs parameters for all users,
and working on making sure this work for other archs. I'll be testing
against i386, x86_64, PPC64, PPC32, and ARM (snowball board, and this is
the perfect excuse for me to get my panda board running too :-)

Thanks!

-- Steve

>
> In this version, I've removed notrace dependency from __kprobes.
> Instead, it just set filter correctly before adding ftrace_ops
> not to trace all functions.
>
> Changes in v2:
> - Fix ftrace_ops registering right after setting its filter.
> - Unregister ftrace_ops if there is no kprobe useing.
> - Remove notrace dependency from __kprobes macro.
>

2012-06-05 20:37:50

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH -tip v2 3/9] ftrace/x86: Support SAVE_REGS feature on i386

On Tue, 2012-06-05 at 19:28 +0900, Masami Hiramatsu wrote:
> Add register saving/restoring sequence in ftrace_caller
> on i386 for enabling SAVE_REGS feature.
>
> Signed-off-by: Masami Hiramatsu <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> ---
>
> arch/x86/include/asm/ftrace.h | 2 +
> arch/x86/kernel/entry_32.S | 64 +++++++++++++++++++++++++++++++++++------
> 2 files changed, 56 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
> index ffb9564..e2f7269 100644
> --- a/arch/x86/include/asm/ftrace.h
> +++ b/arch/x86/include/asm/ftrace.h
> @@ -32,7 +32,7 @@
> #define MCOUNT_ADDR ((long)(mcount))
> #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
>
> -#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_X86_64)
> +#if defined(CONFIG_DYNAMIC_FTRACE)
> #define ARCH_SUPPORTS_FTRACE_SAVE_REGS 1
> #endif
>
> diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
> index 01ccf9b..79c45a2 100644
> --- a/arch/x86/kernel/entry_32.S
> +++ b/arch/x86/kernel/entry_32.S
> @@ -1095,24 +1095,66 @@ ENTRY(mcount)
> ret
> END(mcount)
>
> + .macro FTRACE_SAVE_ALL
> + /* eflags is saved on cs */
> + subl $8, %esp /* skip ip and orig_ax */
> + pushl %gs
> + pushl %fs
> + pushl %es
> + pushl %ds
> + pushl %eax
> + pushl %ebp
> + pushl %edi
> + pushl %esi
> + pushl %edx
> + pushl %ecx
> + pushl %ebx
> + movl 14*4(%esp), %eax /* Load return address */
> + pushl %eax /* Save return address (+4) */
> + subl $MCOUNT_INSN_SIZE, %eax
> + movl %eax, 12*4+4(%esp) /* Store IP */
> + movl 13*4+4(%esp), %edx /* Load flags */
> + movl %edx, 14*4+4(%esp) /* Store flags */
> + movl $__KERNEL_CS, %edx
> + movl %edx, 13*4+4(%esp)
> + .endm
> +
> + .macro FTRACE_RESTORE_ALL
> + movl 14*4+4(%esp), %eax /* Load flags */
> + movl %eax, 13*4+4(%esp) /* Restore flags */
> + popl %eax
> + movl %eax, 14*4(%esp) /* Restore return address */

I'm not sure we really need to restore all the regs. I'll keep this for
now, but for an optimization, we can just restore the ones that mcount
mcount needs to.

Or do you expect kprobes to change any of theses?


> + popl %ebx
> + popl %ecx
> + popl %edx
> + popl %esi
> + popl %edi
> + popl %ebp
> + popl %eax
> + popl %ds
> + popl %es
> + popl %fs
> + popl %gs
> + addl $8, %esp
> + .endm
> +
> ENTRY(ftrace_caller)
> + pushf /* flags on regs->cs */
> cmpl $0, function_trace_stop
> - jne ftrace_stub
> + jne ftrace_exit
> +
> + FTRACE_SAVE_ALL
>
> - pushl %eax
> - pushl %ecx
> - pushl %edx
> - movl 0xc(%esp), %eax
> movl 0x4(%ebp), %edx
> - subl $MCOUNT_INSN_SIZE, %eax
> + lea 4(%esp), %ecx
>
> .globl ftrace_call
> ftrace_call:
> call ftrace_stub
>
> - popl %edx
> - popl %ecx
> - popl %eax
> + FTRACE_RESTORE_ALL
> +
> + popf

Flags do not need to be restored. mcount calls give no guarantee that
flags will be persistent.

-- Steve

> #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> .globl ftrace_graph_call
> ftrace_graph_call:
> @@ -1122,6 +1164,10 @@ ftrace_graph_call:
> .globl ftrace_stub
> ftrace_stub:
> ret
> +
> +ftrace_exit:
> + popf
> + ret
> END(ftrace_caller)
>
> #else /* ! CONFIG_DYNAMIC_FTRACE */

2012-06-05 21:24:35

by Frank Ch. Eigler

[permalink] [raw]
Subject: Re: [PATCH -tip v2 3/9] ftrace/x86: Support SAVE_REGS feature on i386

Hi -

On Tue, Jun 05, 2012 at 04:37:46PM -0400, Steven Rostedt wrote:
> [...]
> I'm not sure we really need to restore all the regs. I'll keep this for
> now, but for an optimization, we can just restore the ones that mcount
> mcount needs to.
>
> Or do you expect kprobes to change any of theses?

That would be the way for a kprobe to modify variables/values that
happen to be in the registers. In systemtap, for example:

# stap -g -e 'probe kernel.function("foo") { $bar = 1 }'

- FChE

2012-06-05 21:51:20

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH -tip v2 3/9] ftrace/x86: Support SAVE_REGS feature on i386

Masami Hiramatsu <[email protected]> writes:
>
> + .macro FTRACE_SAVE_ALL
> + /* eflags is saved on cs */
> + subl $8, %esp /* skip ip and orig_ax */
> + pushl %gs
> + pushl %fs

For pure in kernel use you don't need to save/restore fs/gs
This is only needed on kernel/user space boundaries.

And for this usage probably also not flags


-Andi

--
[email protected] -- Speaking for myself only

2012-06-05 23:37:56

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH -tip v2 3/9] ftrace/x86: Support SAVE_REGS feature on i386

On Tue, 2012-06-05 at 17:24 -0400, Frank Ch. Eigler wrote:
> Hi -
>
> On Tue, Jun 05, 2012 at 04:37:46PM -0400, Steven Rostedt wrote:
> > [...]
> > I'm not sure we really need to restore all the regs. I'll keep this for
> > now, but for an optimization, we can just restore the ones that mcount
> > mcount needs to.
> >
> > Or do you expect kprobes to change any of theses?
>
> That would be the way for a kprobe to modify variables/values that
> happen to be in the registers. In systemtap, for example:
>
> # stap -g -e 'probe kernel.function("foo") { $bar = 1 }'
>

And why would we want to allow this?


Modifying variables with probes is another way to lead to disaster. If
the system did not intend for a variable to be a certain value, why let
someone change it?

What real world example leads to external sources modifying internal
core variables? With the obvious exception of rootkits.

-- Steve

2012-06-05 23:41:47

by Frank Ch. Eigler

[permalink] [raw]
Subject: Re: [PATCH -tip v2 3/9] ftrace/x86: Support SAVE_REGS feature on i386

Hi -

> > That would be the way for a kprobe to modify variables/values that
> > happen to be in the registers. In systemtap, for example:
> > # stap -g -e 'probe kernel.function("foo") { $bar = 1 }'
>
> And why would we want to allow this?
> Modifying variables with probes is another way to lead to disaster. [...]
> What real world example leads to external sources modifying internal
> core variables? With the obvious exception of rootkits.

Among others, systemtap has been successfully used for fault injection
for development/testing, as well as band-aids for kernel security
vulnerabilities, where a small change of state can improve the state
of the system. Obviously, this functionality is restricted to highly
privileged users.

- FChE

Subject: Re: [PATCH -tip v2 3/9] ftrace/x86: Support SAVE_REGS feature on i386

(2012/06/06 6:51), Andi Kleen wrote:
> Masami Hiramatsu <[email protected]> writes:
>>
>> + .macro FTRACE_SAVE_ALL
>> + /* eflags is saved on cs */
>> + subl $8, %esp /* skip ip and orig_ax */
>> + pushl %gs
>> + pushl %fs
>
> For pure in kernel use you don't need to save/restore fs/gs
> This is only needed on kernel/user space boundaries.
>
> And for this usage probably also not flags

I see, most of the case, user doesn't really need it.
But in some case, it can be easily imagined that pt_regs
is used for dumping all registers. Suppose that this
ftrace-based kprobe optimization is done transparently,
they may see that the results of dumping registers
are different even if the probe moves just one instruction
ahead. I'd like to avoid this kind of differences.

Thank you,

--
Masami HIRAMATSU
Software Platform Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

Subject: Re: [PATCH -tip v2 3/9] ftrace/x86: Support SAVE_REGS feature on i386

(2012/06/06 8:41), Frank Ch. Eigler wrote:
> Hi -
>
>>> That would be the way for a kprobe to modify variables/values that
>>> happen to be in the registers. In systemtap, for example:
>>> # stap -g -e 'probe kernel.function("foo") { $bar = 1 }'
>>
>> And why would we want to allow this?
>> Modifying variables with probes is another way to lead to disaster. [...]
>> What real world example leads to external sources modifying internal
>> core variables? With the obvious exception of rootkits.
>
> Among others, systemtap has been successfully used for fault injection
> for development/testing, as well as band-aids for kernel security
> vulnerabilities, where a small change of state can improve the state
> of the system. Obviously, this functionality is restricted to highly
> privileged users.

I agree with Frank. Register restoring should be done as far as the
ftrace is used for kprobes. Of course, one reason is for the fault
injection, which is very useful for debugging system failure. And
another technical reason is that we should do "optimization"
transparently. IMHO, kprobes works normally doing something,
optimized kprobes also should do so.

But if you introduce FTRACE_OPS_FL_RSTR_REGS flag for restoring
registers, it could be possible to provide corresponding flag
from kprobes side. (perhaps KPROBE_FLAG_NOMODREGS? :))

Thank you,

--
Masami HIRAMATSU
Software Platform Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

2012-06-06 14:46:29

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH -tip v2 3/9] ftrace/x86: Support SAVE_REGS feature on i386

On Wed, 2012-06-06 at 23:37 +0900, Masami Hiramatsu wrote:

> I agree with Frank. Register restoring should be done as far as the
> ftrace is used for kprobes. Of course, one reason is for the fault
> injection, which is very useful for debugging system failure. And
> another technical reason is that we should do "optimization"
> transparently. IMHO, kprobes works normally doing something,
> optimized kprobes also should do so.
>
> But if you introduce FTRACE_OPS_FL_RSTR_REGS flag for restoring
> registers, it could be possible to provide corresponding flag
> from kprobes side. (perhaps KPROBE_FLAG_NOMODREGS? :))

That's fine, my patch set restores the regs anyway.

-- Steve

Subject: [tip:perf/core] ftrace: add ftrace_set_filter_ip() for address based filter

Commit-ID: 647664eaf4033501739ac1f42dd52ce8c9266ccc
Gitweb: http://git.kernel.org/tip/647664eaf4033501739ac1f42dd52ce8c9266ccc
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Tue, 5 Jun 2012 19:28:08 +0900
Committer: Steven Rostedt <[email protected]>
CommitDate: Tue, 31 Jul 2012 10:29:55 -0400

ftrace: add ftrace_set_filter_ip() for address based filter

Add a new filter update interface ftrace_set_filter_ip()
to set ftrace filter by ip address, not only glob pattern.

Link: http://lkml.kernel.org/r/[email protected]

Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: "Frank Ch. Eigler" <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
---
include/linux/ftrace.h | 3 ++
kernel/trace/ftrace.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9962e95..3e71112 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -317,6 +317,8 @@ struct dyn_ftrace {
};

int ftrace_force_update(void);
+int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
+ int remove, int reset);
int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
int len, int reset);
int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
@@ -544,6 +546,7 @@ static inline int ftrace_text_reserved(void *start, void *end)
*/
#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
#define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
+#define ftrace_set_filter_ip(ops, ip, remove, reset) ({ -ENODEV; })
#define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; })
#define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; })
#define ftrace_free_filter(ops) do { } while (0)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 528d997..9dcf15d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3242,8 +3242,27 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf,
}

static int
-ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
- int reset, int enable)
+ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
+{
+ struct ftrace_func_entry *entry;
+
+ if (!ftrace_location(ip))
+ return -EINVAL;
+
+ if (remove) {
+ entry = ftrace_lookup_ip(hash, ip);
+ if (!entry)
+ return -ENOENT;
+ free_hash_entry(hash, entry);
+ return 0;
+ }
+
+ return add_hash_entry(hash, ip);
+}
+
+static int
+ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
+ unsigned long ip, int remove, int reset, int enable)
{
struct ftrace_hash **orig_hash;
struct ftrace_hash *hash;
@@ -3272,6 +3291,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
ret = -EINVAL;
goto out_regex_unlock;
}
+ if (ip) {
+ ret = ftrace_match_addr(hash, ip, remove);
+ if (ret < 0)
+ goto out_regex_unlock;
+ }

mutex_lock(&ftrace_lock);
ret = ftrace_hash_move(ops, enable, orig_hash, hash);
@@ -3288,6 +3312,37 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
return ret;
}

+static int
+ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove,
+ int reset, int enable)
+{
+ return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable);
+}
+
+/**
+ * ftrace_set_filter_ip - set a function to filter on in ftrace by address
+ * @ops - the ops to set the filter with
+ * @ip - the address to add to or remove from the filter.
+ * @remove - non zero to remove the ip from the filter
+ * @reset - non zero to reset all filters before applying this filter.
+ *
+ * Filters denote which functions should be enabled when tracing is enabled
+ * If @ip is NULL, it failes to update filter.
+ */
+int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
+ int remove, int reset)
+{
+ return ftrace_set_addr(ops, ip, remove, reset, 1);
+}
+EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);
+
+static int
+ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
+ int reset, int enable)
+{
+ return ftrace_set_hash(ops, buf, len, 0, 0, reset, enable);
+}
+
/**
* ftrace_set_filter - set a function to filter on in ftrace
* @ops - the ops to set the filter with

2012-08-21 15:10:26

by Steven Rostedt

[permalink] [raw]
Subject: [tip:perf/core] kprobes: Inverse taking of module_mutex with kprobe_mutex

Commit-ID: 72ef3794c5cd5f5f0e6355c24a529224c449cd14
Gitweb: http://git.kernel.org/tip/72ef3794c5cd5f5f0e6355c24a529224c449cd14
Author: Steven Rostedt <[email protected]>
AuthorDate: Tue, 5 Jun 2012 19:28:14 +0900
Committer: Steven Rostedt <[email protected]>
CommitDate: Tue, 31 Jul 2012 10:29:55 -0400

kprobes: Inverse taking of module_mutex with kprobe_mutex

Currently module_mutex is taken before kprobe_mutex, but this
can cause issues when we have kprobes register ftrace, as the ftrace
mutex is taken before enabling a tracepoint, which currently takes
the module mutex.

If module_mutex is taken before kprobe_mutex, then we can not
have kprobes use the ftrace infrastructure.

There seems to be no reason that the kprobe_mutex can't be taken
before the module_mutex. Running lockdep shows that it is safe
among the kernels I've run.

Link: http://lkml.kernel.org/r/[email protected]

Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: "Frank Ch. Eigler" <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
---
kernel/kprobes.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c62b854..7a8a122 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -561,9 +561,9 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
{
LIST_HEAD(free_list);

+ mutex_lock(&kprobe_mutex);
/* Lock modules while optimizing kprobes */
mutex_lock(&module_mutex);
- mutex_lock(&kprobe_mutex);

/*
* Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
@@ -586,8 +586,8 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
/* Step 4: Free cleaned kprobes after quiesence period */
do_free_cleaned_kprobes(&free_list);

- mutex_unlock(&kprobe_mutex);
mutex_unlock(&module_mutex);
+ mutex_unlock(&kprobe_mutex);

/* Step 5: Kick optimizer again if needed */
if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))

Subject: [tip:perf/core] kprobes: cleanup to separate probe-able check

Commit-ID: f7fa6ef0ded995aad68650a877198f70e44b7621
Gitweb: http://git.kernel.org/tip/f7fa6ef0ded995aad68650a877198f70e44b7621
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Tue, 5 Jun 2012 19:28:20 +0900
Committer: Steven Rostedt <[email protected]>
CommitDate: Tue, 31 Jul 2012 10:29:56 -0400

kprobes: cleanup to separate probe-able check

Separate probe-able address checking code from
register_kprobe().

Link: http://lkml.kernel.org/r/[email protected]

Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: "Frank Ch. Eigler" <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
---
kernel/kprobes.c | 82 +++++++++++++++++++++++++++++------------------------
1 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7a8a122..6137fe3 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1313,67 +1313,80 @@ static inline int check_kprobe_rereg(struct kprobe *p)
return ret;
}

-int __kprobes register_kprobe(struct kprobe *p)
+static __kprobes int check_kprobe_address_safe(struct kprobe *p,
+ struct module **probed_mod)
{
int ret = 0;
- struct kprobe *old_p;
- struct module *probed_mod;
- kprobe_opcode_t *addr;
-
- addr = kprobe_addr(p);
- if (IS_ERR(addr))
- return PTR_ERR(addr);
- p->addr = addr;
-
- ret = check_kprobe_rereg(p);
- if (ret)
- return ret;

jump_label_lock();
preempt_disable();
+
+ /* Ensure it is not in reserved area nor out of text */
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr) ||
ftrace_text_reserved(p->addr, p->addr) ||
jump_label_text_reserved(p->addr, p->addr)) {
ret = -EINVAL;
- goto cannot_probe;
+ goto out;
}

- /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
- p->flags &= KPROBE_FLAG_DISABLED;
-
- /*
- * Check if are we probing a module.
- */
- probed_mod = __module_text_address((unsigned long) p->addr);
- if (probed_mod) {
- /* Return -ENOENT if fail. */
- ret = -ENOENT;
+ /* Check if are we probing a module */
+ *probed_mod = __module_text_address((unsigned long) p->addr);
+ if (*probed_mod) {
/*
* We must hold a refcount of the probed module while updating
* its code to prohibit unexpected unloading.
*/
- if (unlikely(!try_module_get(probed_mod)))
- goto cannot_probe;
+ if (unlikely(!try_module_get(*probed_mod))) {
+ ret = -ENOENT;
+ goto out;
+ }

/*
* If the module freed .init.text, we couldn't insert
* kprobes in there.
*/
- if (within_module_init((unsigned long)p->addr, probed_mod) &&
- probed_mod->state != MODULE_STATE_COMING) {
- module_put(probed_mod);
- goto cannot_probe;
+ if (within_module_init((unsigned long)p->addr, *probed_mod) &&
+ (*probed_mod)->state != MODULE_STATE_COMING) {
+ module_put(*probed_mod);
+ *probed_mod = NULL;
+ ret = -ENOENT;
}
- /* ret will be updated by following code */
}
+out:
preempt_enable();
jump_label_unlock();

+ return ret;
+}
+
+int __kprobes register_kprobe(struct kprobe *p)
+{
+ int ret;
+ struct kprobe *old_p;
+ struct module *probed_mod;
+ kprobe_opcode_t *addr;
+
+ /* Adjust probe address from symbol */
+ addr = kprobe_addr(p);
+ if (IS_ERR(addr))
+ return PTR_ERR(addr);
+ p->addr = addr;
+
+ ret = check_kprobe_rereg(p);
+ if (ret)
+ return ret;
+
+ /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
+ p->flags &= KPROBE_FLAG_DISABLED;
p->nmissed = 0;
INIT_LIST_HEAD(&p->list);
- mutex_lock(&kprobe_mutex);

+ ret = check_kprobe_address_safe(p, &probed_mod);
+ if (ret)
+ return ret;
+
+ mutex_lock(&kprobe_mutex);
jump_label_lock(); /* needed to call jump_label_text_reserved() */

get_online_cpus(); /* For avoiding text_mutex deadlock. */
@@ -1410,11 +1423,6 @@ out:
module_put(probed_mod);

return ret;
-
-cannot_probe:
- preempt_enable();
- jump_label_unlock();
- return ret;
}
EXPORT_SYMBOL_GPL(register_kprobe);

Subject: [tip:perf/core] kprobes: Move locks into appropriate functions

Commit-ID: 25764288d8dc4792f0f487baf043ccfee5d8c2ba
Gitweb: http://git.kernel.org/tip/25764288d8dc4792f0f487baf043ccfee5d8c2ba
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Tue, 5 Jun 2012 19:28:26 +0900
Committer: Steven Rostedt <[email protected]>
CommitDate: Tue, 31 Jul 2012 10:29:57 -0400

kprobes: Move locks into appropriate functions

Break a big critical region into fine-grained pieces at
registering kprobe path. This helps us to solve circular
locking dependency when introducing ftrace-based kprobes.

Link: http://lkml.kernel.org/r/[email protected]

Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: "Frank Ch. Eigler" <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
---
kernel/kprobes.c | 63 ++++++++++++++++++++++++++++++++++++------------------
1 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 6137fe3..9e47f44 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -759,20 +759,28 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
struct kprobe *ap;
struct optimized_kprobe *op;

+ /* For preparing optimization, jump_label_text_reserved() is called */
+ jump_label_lock();
+ mutex_lock(&text_mutex);
+
ap = alloc_aggr_kprobe(p);
if (!ap)
- return;
+ goto out;

op = container_of(ap, struct optimized_kprobe, kp);
if (!arch_prepared_optinsn(&op->optinsn)) {
/* If failed to setup optimizing, fallback to kprobe */
arch_remove_optimized_kprobe(op);
kfree(op);
- return;
+ goto out;
}

init_aggr_kprobe(ap, p);
- optimize_kprobe(ap);
+ optimize_kprobe(ap); /* This just kicks optimizer thread */
+
+out:
+ mutex_unlock(&text_mutex);
+ jump_label_unlock();
}

#ifdef CONFIG_SYSCTL
@@ -1144,12 +1152,6 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
if (p->post_handler && !ap->post_handler)
ap->post_handler = aggr_post_handler;

- if (kprobe_disabled(ap) && !kprobe_disabled(p)) {
- ap->flags &= ~KPROBE_FLAG_DISABLED;
- if (!kprobes_all_disarmed)
- /* Arm the breakpoint again. */
- __arm_kprobe(ap);
- }
return 0;
}

@@ -1189,11 +1191,22 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
int ret = 0;
struct kprobe *ap = orig_p;

+ /* For preparing optimization, jump_label_text_reserved() is called */
+ jump_label_lock();
+ /*
+ * Get online CPUs to avoid text_mutex deadlock.with stop machine,
+ * which is invoked by unoptimize_kprobe() in add_new_kprobe()
+ */
+ get_online_cpus();
+ mutex_lock(&text_mutex);
+
if (!kprobe_aggrprobe(orig_p)) {
/* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
ap = alloc_aggr_kprobe(orig_p);
- if (!ap)
- return -ENOMEM;
+ if (!ap) {
+ ret = -ENOMEM;
+ goto out;
+ }
init_aggr_kprobe(ap, orig_p);
} else if (kprobe_unused(ap))
/* This probe is going to die. Rescue it */
@@ -1213,7 +1226,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
* free aggr_probe. It will be used next time, or
* freed by unregister_kprobe.
*/
- return ret;
+ goto out;

/* Prepare optimized instructions if possible. */
prepare_optimized_kprobe(ap);
@@ -1228,7 +1241,20 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,

/* Copy ap's insn slot to p */
copy_kprobe(ap, p);
- return add_new_kprobe(ap, p);
+ ret = add_new_kprobe(ap, p);
+
+out:
+ mutex_unlock(&text_mutex);
+ put_online_cpus();
+ jump_label_unlock();
+
+ if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
+ ap->flags &= ~KPROBE_FLAG_DISABLED;
+ if (!kprobes_all_disarmed)
+ /* Arm the breakpoint again. */
+ arm_kprobe(ap);
+ }
+ return ret;
}

static int __kprobes in_kprobes_functions(unsigned long addr)
@@ -1387,10 +1413,6 @@ int __kprobes register_kprobe(struct kprobe *p)
return ret;

mutex_lock(&kprobe_mutex);
- jump_label_lock(); /* needed to call jump_label_text_reserved() */
-
- get_online_cpus(); /* For avoiding text_mutex deadlock. */
- mutex_lock(&text_mutex);

old_p = get_kprobe(p->addr);
if (old_p) {
@@ -1399,7 +1421,9 @@ int __kprobes register_kprobe(struct kprobe *p)
goto out;
}

+ mutex_lock(&text_mutex); /* Avoiding text modification */
ret = arch_prepare_kprobe(p);
+ mutex_unlock(&text_mutex);
if (ret)
goto out;

@@ -1408,15 +1432,12 @@ int __kprobes register_kprobe(struct kprobe *p)
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);

if (!kprobes_all_disarmed && !kprobe_disabled(p))
- __arm_kprobe(p);
+ arm_kprobe(p);

/* Try to optimize kprobe */
try_to_optimize_kprobe(p);

out:
- mutex_unlock(&text_mutex);
- put_online_cpus();
- jump_label_unlock();
mutex_unlock(&kprobe_mutex);

if (probed_mod)

Subject: [tip:perf/core] kprobes: introduce ftrace based optimization

Commit-ID: ae6aa16fdc163afe6b04b6c073ad4ddd4663c03b
Gitweb: http://git.kernel.org/tip/ae6aa16fdc163afe6b04b6c073ad4ddd4663c03b
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Tue, 5 Jun 2012 19:28:32 +0900
Committer: Steven Rostedt <[email protected]>
CommitDate: Tue, 31 Jul 2012 10:29:58 -0400

kprobes: introduce ftrace based optimization

Introduce function trace based kprobes optimization.

With using ftrace optimization, kprobes on the mcount calling
address, use ftrace's mcount call instead of breakpoint.
Furthermore, this optimization works with preemptive kernel
not like as current jump-based optimization. Of cource,
this feature works only if the probe is on mcount call.

Only if kprobe.break_handler is set, that probe is not
optimized with ftrace (nor put on ftrace). The reason why this
limitation comes is that this break_handler may be used only
from jprobes which changes ip address (for fetching the function
arguments), but function tracer ignores modified ip address.

Changes in v2:
- Fix ftrace_ops registering right after setting its filter.
- Unregister ftrace_ops if there is no kprobe using.
- Remove notrace dependency from __kprobes macro.

Link: http://lkml.kernel.org/r/[email protected]

Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: "Frank Ch. Eigler" <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
---
include/linux/kprobes.h | 27 ++++++++++++
kernel/kprobes.c | 105 +++++++++++++++++++++++++++++++++++++++++------
2 files changed, 119 insertions(+), 13 deletions(-)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index b6e1f8c..aa0d05e 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -38,6 +38,7 @@
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/mutex.h>
+#include <linux/ftrace.h>

#ifdef CONFIG_KPROBES
#include <asm/kprobes.h>
@@ -48,14 +49,26 @@
#define KPROBE_REENTER 0x00000004
#define KPROBE_HIT_SSDONE 0x00000008

+/*
+ * If function tracer is enabled and the arch supports full
+ * passing of pt_regs to function tracing, then kprobes can
+ * optimize on top of function tracing.
+ */
+#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \
+ && defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE)
+# define KPROBES_CAN_USE_FTRACE
+#endif
+
/* Attach to insert probes on any functions which should be ignored*/
#define __kprobes __attribute__((__section__(".kprobes.text")))
+
#else /* CONFIG_KPROBES */
typedef int kprobe_opcode_t;
struct arch_specific_insn {
int dummy;
};
#define __kprobes
+
#endif /* CONFIG_KPROBES */

struct kprobe;
@@ -128,6 +141,7 @@ struct kprobe {
* NOTE:
* this flag is only for optimized_kprobe.
*/
+#define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */

/* Has this kprobe gone ? */
static inline int kprobe_gone(struct kprobe *p)
@@ -146,6 +160,13 @@ static inline int kprobe_optimized(struct kprobe *p)
{
return p->flags & KPROBE_FLAG_OPTIMIZED;
}
+
+/* Is this kprobe uses ftrace ? */
+static inline int kprobe_ftrace(struct kprobe *p)
+{
+ return p->flags & KPROBE_FLAG_FTRACE;
+}
+
/*
* Special probe type that uses setjmp-longjmp type tricks to resume
* execution at a specified entry with a matching prototype corresponding
@@ -295,6 +316,12 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
#endif

#endif /* CONFIG_OPTPROBES */
+#ifdef KPROBES_CAN_USE_FTRACE
+extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct pt_regs *regs);
+extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
+#endif
+

/* Get the kprobe at this addr (if any) - called with preemption disabled */
struct kprobe *get_kprobe(void *addr);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9e47f44..69c16ef 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -759,6 +759,10 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
struct kprobe *ap;
struct optimized_kprobe *op;

+ /* Impossible to optimize ftrace-based kprobe */
+ if (kprobe_ftrace(p))
+ return;
+
/* For preparing optimization, jump_label_text_reserved() is called */
jump_label_lock();
mutex_lock(&text_mutex);
@@ -915,9 +919,64 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
}
#endif /* CONFIG_OPTPROBES */

+#ifdef KPROBES_CAN_USE_FTRACE
+static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
+ .regs_func = kprobe_ftrace_handler,
+ .flags = FTRACE_OPS_FL_SAVE_REGS,
+};
+static int kprobe_ftrace_enabled;
+
+/* Must ensure p->addr is really on ftrace */
+static int __kprobes prepare_kprobe(struct kprobe *p)
+{
+ if (!kprobe_ftrace(p))
+ return arch_prepare_kprobe(p);
+
+ return arch_prepare_kprobe_ftrace(p);
+}
+
+/* Caller must lock kprobe_mutex */
+static void __kprobes arm_kprobe_ftrace(struct kprobe *p)
+{
+ int ret;
+
+ ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
+ (unsigned long)p->addr, 0, 0);
+ WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+ kprobe_ftrace_enabled++;
+ if (kprobe_ftrace_enabled == 1) {
+ ret = register_ftrace_function(&kprobe_ftrace_ops);
+ WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+ }
+}
+
+/* Caller must lock kprobe_mutex */
+static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
+{
+ int ret;
+
+ kprobe_ftrace_enabled--;
+ if (kprobe_ftrace_enabled == 0) {
+ ret = unregister_ftrace_function(&kprobe_ftrace_ops);
+ WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+ }
+ ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
+ (unsigned long)p->addr, 1, 0);
+ WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+}
+#else /* !KPROBES_CAN_USE_FTRACE */
+#define prepare_kprobe(p) arch_prepare_kprobe(p)
+#define arm_kprobe_ftrace(p) do {} while (0)
+#define disarm_kprobe_ftrace(p) do {} while (0)
+#endif
+
/* Arm a kprobe with text_mutex */
static void __kprobes arm_kprobe(struct kprobe *kp)
{
+ if (unlikely(kprobe_ftrace(kp))) {
+ arm_kprobe_ftrace(kp);
+ return;
+ }
/*
* Here, since __arm_kprobe() doesn't use stop_machine(),
* this doesn't cause deadlock on text_mutex. So, we don't
@@ -929,11 +988,15 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
}

/* Disarm a kprobe with text_mutex */
-static void __kprobes disarm_kprobe(struct kprobe *kp)
+static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt)
{
+ if (unlikely(kprobe_ftrace(kp))) {
+ disarm_kprobe_ftrace(kp);
+ return;
+ }
/* Ditto */
mutex_lock(&text_mutex);
- __disarm_kprobe(kp, true);
+ __disarm_kprobe(kp, reopt);
mutex_unlock(&text_mutex);
}

@@ -1343,6 +1406,26 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
struct module **probed_mod)
{
int ret = 0;
+ unsigned long ftrace_addr;
+
+ /*
+ * If the address is located on a ftrace nop, set the
+ * breakpoint to the following instruction.
+ */
+ ftrace_addr = ftrace_location((unsigned long)p->addr);
+ if (ftrace_addr) {
+#ifdef KPROBES_CAN_USE_FTRACE
+ /* Given address is not on the instruction boundary */
+ if ((unsigned long)p->addr != ftrace_addr)
+ return -EILSEQ;
+ /* break_handler (jprobe) can not work with ftrace */
+ if (p->break_handler)
+ return -EINVAL;
+ p->flags |= KPROBE_FLAG_FTRACE;
+#else /* !KPROBES_CAN_USE_FTRACE */
+ return -EINVAL;
+#endif
+ }

jump_label_lock();
preempt_disable();
@@ -1350,7 +1433,6 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
/* Ensure it is not in reserved area nor out of text */
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr) ||
- ftrace_text_reserved(p->addr, p->addr) ||
jump_label_text_reserved(p->addr, p->addr)) {
ret = -EINVAL;
goto out;
@@ -1422,7 +1504,7 @@ int __kprobes register_kprobe(struct kprobe *p)
}

mutex_lock(&text_mutex); /* Avoiding text modification */
- ret = arch_prepare_kprobe(p);
+ ret = prepare_kprobe(p);
mutex_unlock(&text_mutex);
if (ret)
goto out;
@@ -1480,7 +1562,7 @@ static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)

/* Try to disarm and disable this/parent probe */
if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
- disarm_kprobe(orig_p);
+ disarm_kprobe(orig_p, true);
orig_p->flags |= KPROBE_FLAG_DISABLED;
}
}
@@ -2078,10 +2160,11 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,

if (!pp)
pp = p;
- seq_printf(pi, "%s%s%s\n",
+ seq_printf(pi, "%s%s%s%s\n",
(kprobe_gone(p) ? "[GONE]" : ""),
((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : ""),
- (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""));
+ (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""),
+ (kprobe_ftrace(pp) ? "[FTRACE]" : ""));
}

static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -2160,14 +2243,12 @@ static void __kprobes arm_all_kprobes(void)
goto already_enabled;

/* Arming kprobes doesn't optimize kprobe itself */
- mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist)
if (!kprobe_disabled(p))
- __arm_kprobe(p);
+ arm_kprobe(p);
}
- mutex_unlock(&text_mutex);

kprobes_all_disarmed = false;
printk(KERN_INFO "Kprobes globally enabled\n");
@@ -2195,15 +2276,13 @@ static void __kprobes disarm_all_kprobes(void)
kprobes_all_disarmed = true;
printk(KERN_INFO "Kprobes globally disabled\n");

- mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist) {
if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
- __disarm_kprobe(p, false);
+ disarm_kprobe(p, false);
}
}
- mutex_unlock(&text_mutex);
mutex_unlock(&kprobe_mutex);

/* Wait for disarming all kprobes by optimizer */

Subject: [tip:perf/core] kprobes/x86: ftrace based optimization for x86

Commit-ID: e52538965119319447c0800c534da73142c27be2
Gitweb: http://git.kernel.org/tip/e52538965119319447c0800c534da73142c27be2
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Tue, 5 Jun 2012 19:28:38 +0900
Committer: Steven Rostedt <[email protected]>
CommitDate: Tue, 31 Jul 2012 10:29:59 -0400

kprobes/x86: ftrace based optimization for x86

Add function tracer based kprobe optimization support
handlers on x86. This allows kprobes to use function
tracer for probing on mcount call.

Link: http://lkml.kernel.org/r/[email protected]

Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: "Frank Ch. Eigler" <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>

[ Updated to new port of ftrace save regs functions ]

Signed-off-by: Steven Rostedt <[email protected]>
---
arch/x86/include/asm/kprobes.h | 1 +
arch/x86/kernel/kprobes.c | 48 ++++++++++++++++++++++++++++++++++++++++
include/linux/kprobes.h | 2 +-
kernel/kprobes.c | 2 +-
4 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 5478825..d3ddd17 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -27,6 +27,7 @@
#include <asm/insn.h>

#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define ARCH_SUPPORTS_KPROBES_ON_FTRACE

struct pt_regs;
struct kprobe;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e2f751e..47ae102 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1052,6 +1052,54 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}

+#ifdef KPROBES_CAN_USE_FTRACE
+/* Ftrace callback handler for kprobes */
+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct pt_regs *regs)
+{
+ struct kprobe *p;
+ struct kprobe_ctlblk *kcb;
+ unsigned long flags;
+
+ /* Disable irq for emulating a breakpoint and avoiding preempt */
+ local_irq_save(flags);
+
+ p = get_kprobe((kprobe_opcode_t *)ip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ goto end;
+
+ kcb = get_kprobe_ctlblk();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ } else {
+ regs->ip += sizeof(kprobe_opcode_t);
+
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (p->pre_handler)
+ p->pre_handler(p, regs);
+
+ if (unlikely(p->post_handler)) {
+ /* Emulate singlestep as if there is a 5byte nop */
+ regs->ip = ip + MCOUNT_INSN_SIZE;
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ __this_cpu_write(current_kprobe, NULL);
+ regs->ip = ip; /* Recover for next callback */
+ }
+end:
+ local_irq_restore(flags);
+}
+
+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.insn = NULL;
+ p->ainsn.boostable = -1;
+ return 0;
+}
+#endif
+
int __init arch_init_kprobes(void)
{
return arch_init_optprobes();
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index aa0d05e..23755ba 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -318,7 +318,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
#endif /* CONFIG_OPTPROBES */
#ifdef KPROBES_CAN_USE_FTRACE
extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
- struct pt_regs *regs);
+ struct ftrace_ops *ops, struct pt_regs *regs);
extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
#endif

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 69c16ef..35b4315 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -921,7 +921,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)

#ifdef KPROBES_CAN_USE_FTRACE
static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
- .regs_func = kprobe_ftrace_handler,
+ .func = kprobe_ftrace_handler,
.flags = FTRACE_OPS_FL_SAVE_REGS,
};
static int kprobe_ftrace_enabled;