Subject: [PATCH ftrace/core v4 0/4] ftrace, kprobes: Introduce IPMODIFY flag for ftrace_ops to detect conflicts

Hi,

Here is the 4th version of the series of patches which introduces
IPMODIFY flag for ftrace_ops to detect conflicts of ftrace users
who can modify regs->ip in their handler.
This version is basically an update of previous version, and
I've splitted a patch which adding IPMODIFY to kprobes from ftrace
IPMODIFY introducing patch.

Currently, only kprobes can change the regs->ip in the handler,
but recently kpatch is also want to change it. Moreover, since
the ftrace itself exported to modules, it might be considerable
senario.

Here we talked on github.
https://github.com/dynup/kpatch/issues/47

To protect modified regs-ip from each other, this series
introduces FTRACE_OPS_FL_IPMODIFY flag and ftrace now ensures
the flag can be set on each function entry location. If there
is someone who already reserve regs->ip on target function
entry, ftrace_set_filter_ip or register_ftrace_function will
return -EBUSY. Users must handle that.
The ftrace_ops with IPMODIFY flag requires at least one
entry for filter hash, and its notrace_hash must be empty,
because the IPMODIFY action is very address sensitve and
user must consider the ip address.

The 3rd patch adds a special reservation of IPMODIFY on the
jprobed address, since it is the only user who will change
the regs->ip. Other kprobes do not change it anymore.

Here is the new testcase.
-------
#!/bin/bash

echo "IPMODIFY test"
ADDR=0x`grep do_fork /proc/kallsyms | cut -f 1 -d" "`

echo "Case1: kprobe->jprobe->ipmodify(fail)"
modprobe kprobe_example
modprobe jprobe_example
insmod ./ipmodify.ko target=$ADDR && echo "Case1: [FAIL]" || echo "Case1: [OK]"
rmmod kprobe_example jprobe_example

echo "Case2: jprobe->kprobe->ipmodify(fail)"
modprobe jprobe_example
modprobe kprobe_example
insmod ./ipmodify.ko target=$ADDR && echo "Case2: [FAIL]" || echo "Case2: [OK]"
rmmod kprobe_example jprobe_example

echo "Case3: jprobe->ipmodify(fail)"
modprobe jprobe_example
insmod ./ipmodify.ko target=$ADDR && echo "Case3: [FAIL]" || echo "Case3: [OK]"
rmmod jprobe_example

echo "Case4: ipmodify->jprobe(fail)"
insmod ./ipmodify.ko target=$ADDR
modprobe jprobe_example && echo "Case4: [FAIL]" || echo "Case4: [OK]"
rmmod ipmodify

echo "Case5: ipmodify->kprobe->jprobe(fail)"
insmod ./ipmodify.ko target=$ADDR
modprobe kprobe_example
modprobe jprobe_example && echo "Case5: [FAIL]" || echo "Case5: [OK]"
rmmod ipmodify kprobe_example

echo "Case6: ipmodify with targeting all functions(fail)"
insmod ./ipmodify.ko && echo "Case6: [FAIL]" || echo "Case6: [OK]"
rmmod ipmodify

echo "Case7: ipmodify with notrace filter (fail)"
insmod ./ipmodify.ko target=$ADDR noexp="do_fork" && echo "Case7: [FAIL]" || echo "Case7: [OK]"
rmmod ipmodify
-------

And ipmodify.c
-------
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/ftrace.h>


static void ftrace_ipmodify_handler(unsigned long a0, unsigned long a1,
struct ftrace_ops *op, struct pt_regs *regs)
{
return;
}

static struct ftrace_ops test_ops __read_mostly = {
.func = ftrace_ipmodify_handler,
.flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
};

static unsigned long target;
module_param(target, ulong, 0444);
static char *noexp = "";
module_param(noexp, charp, 0444);

static int __init ipmodify_init(void)
{
int ret;
unsigned long ip = target;

if (strlen(noexp) != 0) {
ret = ftrace_set_notrace(&test_ops, noexp, 0, 0);
pr_err("ipmodify_test: set notrace \"%s\", ret = %d\n",
noexp, ret);
}
if (ret < 0)
return ret;

ret = ftrace_set_filter_ip(&test_ops, ip, 0, 0);
pr_err("ipmodify_test: set filter ip 0x%lx, ret = %d\n", ip, ret);

if (ret >= 0) {
ret = register_ftrace_function(&test_ops);
pr_err("ipmodify_test: register ftrace on 0x%lx, ret = %d\n",
ip, ret);
}

return ret;
}

static void __exit ipmodify_exit(void)
{
int ret;
unsigned long ip = target;

ret = unregister_ftrace_function(&test_ops);
pr_err("ipmodify_test: unregister ftrace on 0x%lx, ret = %d\n", ip, ret);
if (ret >= 0) {
ret = ftrace_set_filter_ip(&test_ops, ip, 1, 0);
pr_err("ipmodify_test: clear filter ip 0x%lx, ret = %d\n",
ip, ret);
}
}

module_init(ipmodify_init)
module_exit(ipmodify_exit)
MODULE_LICENSE("GPL");
-------

Thank you,

---

Masami Hiramatsu (4):
kprobes/ftrace: Recover original IP if pre_handler doesn't change it
ftrace, kprobes: Support IPMODIFY flag to find IP modify conflict
kprobes: Add IPMODIFY flag to kprobe_ftrace_ops
kprobes: Set IPMODIFY flag only if the probe can change regs->ip


Documentation/kprobes.txt | 12 +--
Documentation/trace/ftrace.txt | 5 +
arch/x86/kernel/kprobes/ftrace.c | 9 ++
include/linux/ftrace.h | 16 ++++
kernel/kprobes.c | 123 +++++++++++++++++++++++++++++----
kernel/trace/ftrace.c | 143 +++++++++++++++++++++++++++++++++++++-
6 files changed, 279 insertions(+), 29 deletions(-)

--


Subject: [PATCH ftrace/core v4 1/4] kprobes/ftrace: Recover original IP if pre_handler doesn't change it

Recover original IP register if the pre_handler doesn't change it.
Since current kprobes doesn't expect that another ftrace handler
may change regs->ip, it sets kprobe.addr + MCOUNT_INSN_SIZE to
regs->ip and returns to ftrace.
This seems wrong behavior since kprobes can recover regs->ip
and safely pass it to another handler.

This adds a code which recovers original regs->ip passed from
ftrace right before returning to ftrace, so that another ftrace
user can change regs->ip.

Changes in v4:
- Fix description and remove bugfix tag.

Signed-off-by: Masami Hiramatsu <[email protected]>
---
arch/x86/kernel/kprobes/ftrace.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 717b02a..5f8f0b3 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -27,7 +27,7 @@

static nokprobe_inline
int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
+ struct kprobe_ctlblk *kcb, unsigned long orig_ip)
{
/*
* Emulate singlestep (and also recover regs->ip)
@@ -39,6 +39,8 @@ int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
p->post_handler(p, regs, 0);
}
__this_cpu_write(current_kprobe, NULL);
+ if (orig_ip)
+ regs->ip = orig_ip;
return 1;
}

@@ -46,7 +48,7 @@ int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
if (kprobe_ftrace(p))
- return __skip_singlestep(p, regs, kcb);
+ return __skip_singlestep(p, regs, kcb, 0);
else
return 0;
}
@@ -71,13 +73,14 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
if (kprobe_running()) {
kprobes_inc_nmissed_count(p);
} else {
+ unsigned long orig_ip = regs->ip;
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
regs->ip = ip + sizeof(kprobe_opcode_t);

__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs))
- __skip_singlestep(p, regs, kcb);
+ __skip_singlestep(p, regs, kcb, orig_ip);
/*
* If pre_handler returns !0, it sets regs->ip and
* resets current kprobe.

Subject: [PATCH ftrace/core v4 2/4] ftrace, kprobes: Support IPMODIFY flag to find IP modify conflict

Introduce FTRACE_OPS_FL_IPMODIFY to avoid conflict among
ftrace users who may modify regs->ip to change the execution
path. If two or more users modify the regs->ip on the same
function entry, one of them will be broken. So they must add
IPMODIFY flag and make sure that ftrace_set_filter_ip() succeeds.

Note that ftrace doesn't allow ftrace_ops which has IPMODIFY
flag to have notrace hash, and the ftrace_ops must have a
filter hash (so that the ftrace_ops can hook only specific
entries), because it strongly depends on the address and
must be allowed for only few selected functions.

Changes in v4:
- Split kprobe's part from this patch.
- Add no-notrace hash check.
- Reject if the filter_hash is empty. Since IPMODIFY action
is very address sensitive, empty hash (means hooking all
functions) must not be allowed.

Changes in v3:
- Update for the latest ftrace/core.
- Add a comment about FTRACE_OPS_FL_* attribute flags.
- Don't check FTRACE_OPS_FL_SAVE_REGS in
__ftrace_hash_update_ipmodify().
- Fix comments.

Changes in v2:
- Add a description how __ftrace_hash_update_ipmodify() will
handle the given hashes (NULL and EMPTY_HASH cases).
- Clear FTRACE_OPS_FL_ENABLED after calling
__unregister_ftrace_function() in error path.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Namhyung Kim <[email protected]>
---
Documentation/trace/ftrace.txt | 5 +
include/linux/ftrace.h | 16 ++++
kernel/trace/ftrace.c | 143 +++++++++++++++++++++++++++++++++++++++-
3 files changed, 160 insertions(+), 4 deletions(-)

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 2479b2a..aa7e5bd 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -234,6 +234,11 @@ of ftrace. Here is a list of some of the key files:
will be displayed on the same line as the function that
is returning registers.

+ If the callback registered to be traced by a function with
+ the "ip modify" attribute (thus the regs->ip can be changed),
+ an 'I' will be displayed on the same line as the function that
+ can be overridden.
+
function_profile_enabled:

When set it will enable all functions with either the function
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 6bb5e3f..905ec9f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -59,6 +59,11 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
/*
* FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are
* set in the flags member.
+ * CONTROL, SAVE_REGS, SAVE_REGS_IF_SUPPORTED, RECURSION_SAFE, STUB and
+ * IPMODIFY are a kind of attribute flags which can be set only before
+ * registering the ftrace_ops, and can not be modified while registered.
+ * Changing those attribute flags after regsitering ftrace_ops will
+ * cause unexpected results.
*
* ENABLED - set/unset when ftrace_ops is registered/unregistered
* DYNAMIC - set when ftrace_ops is registered to denote dynamically
@@ -89,6 +94,10 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
* INITIALIZED - The ftrace_ops has already been initialized (first use time
* register_ftrace_function() is called, it will initialized the ops)
* DELETED - The ops are being deleted, do not let them be registered again.
+ * IPMODIFY - The ops can modify the IP register. This can only be set with
+ * SAVE_REGS. If another ops is already registered for any of the
+ * functions that this ops will be registered for, then this ops
+ * will fail to register or set_filter_ip.
*/
enum {
FTRACE_OPS_FL_ENABLED = 1 << 0,
@@ -100,6 +109,7 @@ enum {
FTRACE_OPS_FL_STUB = 1 << 6,
FTRACE_OPS_FL_INITIALIZED = 1 << 7,
FTRACE_OPS_FL_DELETED = 1 << 8,
+ FTRACE_OPS_FL_IPMODIFY = 1 << 9,
};

/*
@@ -281,6 +291,7 @@ extern int ftrace_nr_registered_ops(void);
* ENABLED - the function is being traced
* REGS - the record wants the function to save regs
* REGS_EN - the function is set up to save regs.
+ * IPMODIFY - the record allows for the IP address to be changed.
*
* When a new ftrace_ops is registered and wants a function to save
* pt_regs, the rec->flag REGS is set. When the function has been
@@ -294,10 +305,11 @@ enum {
FTRACE_FL_REGS_EN = (1UL << 29),
FTRACE_FL_TRAMP = (1UL << 28),
FTRACE_FL_TRAMP_EN = (1UL << 27),
+ FTRACE_FL_IPMODIFY = (1UL << 26),
};

-#define FTRACE_REF_MAX_SHIFT 27
-#define FTRACE_FL_BITS 5
+#define FTRACE_REF_MAX_SHIFT 26
+#define FTRACE_FL_BITS 6
#define FTRACE_FL_MASKED_BITS ((1UL << FTRACE_FL_BITS) - 1)
#define FTRACE_FL_MASK (FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT)
#define FTRACE_REF_MAX ((1UL << FTRACE_REF_MAX_SHIFT) - 1)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 979bd8c..788c500 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1292,6 +1292,9 @@ ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash);
static void
ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash);

+static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
+ struct ftrace_hash *new_hash);
+
static int
ftrace_hash_move(struct ftrace_ops *ops, int enable,
struct ftrace_hash **dst, struct ftrace_hash *src)
@@ -1303,8 +1306,13 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
struct ftrace_hash *new_hash;
int size = src->count;
int bits = 0;
+ int ret;
int i;

+ /* Reject setting notrace hash on IPMODIFY ftrace_ops */
+ if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable)
+ return -EINVAL;
+
/*
* If the new source is empty, just free dst and assign it
* the empty_hash.
@@ -1338,6 +1346,16 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
}

update:
+ /* Make sure this can be applied if it is IPMODIFY ftrace_ops */
+ if (enable) {
+ /* IPMODIFY should be updated only when filter_hash updating */
+ ret = ftrace_hash_ipmodify_update(ops, new_hash);
+ if (ret < 0) {
+ free_ftrace_hash(new_hash);
+ return ret;
+ }
+ }
+
/*
* Remove the current set, update the hash and add
* them back.
@@ -1682,6 +1700,115 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
__ftrace_hash_rec_update(ops, filter_hash, 1);
}

+/*
+ * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
+ * or no-needed to update, -EBUSY if it detects a conflict of the flag
+ * on a ftrace_rec, and -EINVAL if the new_hash tries to trace all recs.
+ * Note that old_hash and new_hash has below meanings
+ * - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected)
+ * - If the hash is EMPTY_HASH, it hits nothing
+ * - Anything else hits the recs which match the hash entries.
+ */
+static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
+ struct ftrace_hash *old_hash,
+ struct ftrace_hash *new_hash)
+{
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec, *end = NULL;
+ int in_old, in_new;
+
+ /* Only update if the ops has been registered */
+ if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+ return 0;
+
+ if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
+ return 0;
+
+ /*
+ * Since the IPMODIFY is very address sensitive action, we do not allow
+ * ftrace_ops to set all functions to new hash.
+ */
+ if (!new_hash || !old_hash)
+ return -EINVAL;
+
+ /* Update rec->flags */
+ do_for_each_ftrace_rec(pg, rec) {
+ /* We need to update only differences of filter_hash */
+ in_old = !!ftrace_lookup_ip(old_hash, rec->ip);
+ in_new = !!ftrace_lookup_ip(new_hash, rec->ip);
+ if (in_old == in_new)
+ continue;
+
+ if (in_new) {
+ /* New entries must ensure no others are using it */
+ if (rec->flags & FTRACE_FL_IPMODIFY)
+ goto rollback;
+ rec->flags |= FTRACE_FL_IPMODIFY;
+ } else /* Removed entry */
+ rec->flags &= ~FTRACE_FL_IPMODIFY;
+ } while_for_each_ftrace_rec();
+
+ return 0;
+
+rollback:
+ end = rec;
+
+ /* Roll back what we did above */
+ do_for_each_ftrace_rec(pg, rec) {
+ if (rec == end)
+ goto err_out;
+
+ in_old = !!ftrace_lookup_ip(old_hash, rec->ip);
+ in_new = !!ftrace_lookup_ip(new_hash, rec->ip);
+ if (in_old == in_new)
+ continue;
+
+ if (in_new)
+ rec->flags &= ~FTRACE_FL_IPMODIFY;
+ else
+ rec->flags |= FTRACE_FL_IPMODIFY;
+ } while_for_each_ftrace_rec();
+
+err_out:
+ return -EBUSY;
+}
+
+static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops)
+{
+ struct ftrace_hash *hash = ops->filter_hash;
+
+ if (ftrace_hash_empty(hash))
+ hash = NULL;
+
+ return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash);
+}
+
+/* Disabling always succeeds */
+static void ftrace_hash_ipmodify_disable(struct ftrace_ops *ops)
+{
+ struct ftrace_hash *hash = ops->filter_hash;
+
+ if (ftrace_hash_empty(hash))
+ hash = NULL;
+
+ __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH);
+}
+
+static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
+ struct ftrace_hash *new_hash)
+{
+ struct ftrace_hash *old_hash = ops->filter_hash;
+
+ if (ftrace_hash_empty(old_hash))
+ old_hash = NULL;
+
+ if (ftrace_hash_empty(new_hash))
+ new_hash = NULL;
+
+ return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash);
+}
+
+
static void print_ip_ins(const char *fmt, unsigned char *p)
{
int i;
@@ -2322,6 +2449,15 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)

ops->flags |= FTRACE_OPS_FL_ENABLED;

+ ret = ftrace_hash_ipmodify_enable(ops);
+ if (ret < 0) {
+ /* Rollback registration process */
+ __unregister_ftrace_function(ops);
+ ftrace_start_up--;
+ ops->flags &= ~FTRACE_OPS_FL_ENABLED;
+ return ret;
+ }
+
ftrace_hash_rec_enable(ops, 1);

ftrace_startup_enable(command);
@@ -2348,6 +2484,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
*/
WARN_ON_ONCE(ftrace_start_up < 0);

+ /* Disabling ipmodify never fails */
+ ftrace_hash_ipmodify_disable(ops);
ftrace_hash_rec_disable(ops, 1);

ops->flags &= ~FTRACE_OPS_FL_ENABLED;
@@ -2898,9 +3036,10 @@ static int t_show(struct seq_file *m, void *v)

seq_printf(m, "%ps", (void *)rec->ip);
if (iter->flags & FTRACE_ITER_ENABLED) {
- seq_printf(m, " (%ld)%s",
+ seq_printf(m, " (%ld)%s%s",
ftrace_rec_count(rec),
- rec->flags & FTRACE_FL_REGS ? " R" : " ");
+ rec->flags & FTRACE_FL_REGS ? " R" : " ",
+ rec->flags & FTRACE_FL_IPMODIFY ? " I" : " ");
if (rec->flags & FTRACE_FL_TRAMP_EN) {
struct ftrace_ops *ops;


Subject: [PATCH ftrace/core v4 3/4] kprobes: Add IPMODIFY flag to kprobe_ftrace_ops

Add FTRACE_OPS_FL_IPMODIFY flag to kprobe_ftrace_ops
since kprobes can changes regs->ip.

Signed-off-by: Masami Hiramatsu <[email protected]>
---
kernel/kprobes.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3214289..e52d86f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -915,7 +915,7 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
#ifdef CONFIG_KPROBES_ON_FTRACE
static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
.func = kprobe_ftrace_handler,
- .flags = FTRACE_OPS_FL_SAVE_REGS,
+ .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
};
static int kprobe_ftrace_enabled;


Subject: [PATCH ftrace/core v4 4/4] kprobes: Set IPMODIFY flag only if the probe can change regs->ip

Set FTRACE_OPS_FL_IPMODIFY flag only for the probes which can change
regs->ip, which has kprobe->break_handler.
Currently we can not put jprobe and another ftrace handler which
changes regs->ip on the same function because all kprobes have
FTRACE_OPS_FL_IPMODIFY flag. This removes FTRACE_OPS_FL_IPMODIFY
flag from kprobes and only when the user uses jprobe (or the
kprobe.break_handler != NULL) we add additinal ftrace_ops with
FTRACE_OPS_FL_IPMODIFY on target function.

Note about the implementation: This uses a dummy ftrace_ops to
reserve IPMODIFY flag on the given ftrace address, for the case
that we have a enabled kprobe on a function entry and a jprobe
is added on the same point. In that case, we already have a
ftrace_ops without IPMODIFY flag on the entry, and we have to
add another ftrace_ops with IPMODIFY on the same address.
If we put a same handler on both ftrace_ops, the handler can
be called twice on that entry until the first one is removed.
This means that the kprobe and the jprobe are called twice too,
and that will not what kprobes expected.
Thus I added a dummy ftrace_ops just for reserving IPMODIFY flag.

Changes in v4:
- Increment refcounter after succeeded to register ftrace_ops.

Changes in v3:
- Update __ftrace_add/remove_filter_ip() according to
Namhyng's comments (thanks!)
- Split out regs->ip recovering code from this patch.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Namhyung Kim <[email protected]>
---
Documentation/kprobes.txt | 12 ++--
kernel/kprobes.c | 125 +++++++++++++++++++++++++++++++++++++++------
2 files changed, 114 insertions(+), 23 deletions(-)

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 4bbeca8..177f051 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -264,15 +264,13 @@ stop-machine method that ksplice uses for supporting a CONFIG_PREEMPT=y
kernel.

NOTE for geeks:
-The jump optimization changes the kprobe's pre_handler behavior.
-Without optimization, the pre_handler can change the kernel's execution
+The jump optimization (and ftrace-based kprobes) changes the kprobe's
+pre_handler behavior.
+Without optimizations, the pre_handler can change the kernel's execution
path by changing regs->ip and returning 1. However, when the probe
is optimized, that modification is ignored. Thus, if you want to
-tweak the kernel's execution path, you need to suppress optimization,
-using one of the following techniques:
-- Specify an empty function for the kprobe's post_handler or break_handler.
- or
-- Execute 'sysctl -w debug.kprobes_optimization=n'
+tweak the kernel's execution path, you need to suppress optimization or
+notify your handler will modify regs->ip by setting p->break_handler.

1.5 Blacklist

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e52d86f..509875e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -915,10 +915,93 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
#ifdef CONFIG_KPROBES_ON_FTRACE
static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
.func = kprobe_ftrace_handler,
- .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
+ .flags = FTRACE_OPS_FL_SAVE_REGS,
};
static int kprobe_ftrace_enabled;

+static void kprobe_ftrace_stub(unsigned long a0, unsigned long a1,
+ struct ftrace_ops *op, struct pt_regs *regs)
+{
+ /* Do nothing. This is just a dummy handler */
+}
+
+/* This is only for checking conflict with other ftrace users */
+static struct ftrace_ops kprobe_ipmod_ftrace_ops __read_mostly = {
+ .func = kprobe_ftrace_stub,
+ .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
+};
+static int kprobe_ipmod_ftrace_enabled;
+
+static int __ftrace_add_filter_ip(struct ftrace_ops *ops, unsigned long ip,
+ int *ref)
+{
+ int ret;
+
+ /* Try to set given ip to filter */
+ ret = ftrace_set_filter_ip(ops, ip, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ if (*ref == 0) {
+ ret = register_ftrace_function(ops);
+ if (ret < 0) {
+ /* Rollback the filter */
+ ftrace_set_filter_ip(ops, ip, 1, 0);
+ goto out;
+ }
+ }
+ (*ref)++;
+
+out:
+ return ret;
+}
+
+static int __ftrace_remove_filter_ip(struct ftrace_ops *ops, unsigned long ip,
+ int *ref)
+{
+ int ret;
+
+ if (*ref == 1) {
+ ret = unregister_ftrace_function(ops);
+ if (ret < 0)
+ return ret;
+ /*Ignore failure, because it is already unregistered */
+ ftrace_set_filter_ip(ops, ip, 1, 0);
+ } else {
+ /* Try to remove given ip to filter */
+ ret = ftrace_set_filter_ip(ops, ip, 1, 0);
+ if (ret < 0)
+ return ret;
+ }
+
+ (*ref)--;
+
+ return 0;
+}
+
+static int try_reserve_ftrace_ipmodify(struct kprobe *p)
+{
+ if (!p->break_handler)
+ return 0;
+
+ return __ftrace_add_filter_ip(&kprobe_ipmod_ftrace_ops,
+ (unsigned long)p->addr,
+ &kprobe_ipmod_ftrace_enabled);
+}
+
+static void release_ftrace_ipmodify(struct kprobe *p)
+{
+ int ret;
+
+ if (p->break_handler) {
+ ret = __ftrace_remove_filter_ip(&kprobe_ipmod_ftrace_ops,
+ (unsigned long)p->addr,
+ &kprobe_ipmod_ftrace_enabled);
+ WARN(ret < 0, "Failed to release ftrace at %p (%d)\n",
+ p->addr, ret);
+ }
+}
+
/* Must ensure p->addr is really on ftrace */
static int prepare_kprobe(struct kprobe *p)
{
@@ -933,14 +1016,10 @@ static void arm_kprobe_ftrace(struct kprobe *p)
{
int ret;

- ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
- (unsigned long)p->addr, 0, 0);
+ ret = __ftrace_add_filter_ip(&kprobe_ftrace_ops,
+ (unsigned long)p->addr,
+ &kprobe_ftrace_enabled);
WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
- kprobe_ftrace_enabled++;
- if (kprobe_ftrace_enabled == 1) {
- ret = register_ftrace_function(&kprobe_ftrace_ops);
- WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
- }
}

/* Caller must lock kprobe_mutex */
@@ -948,17 +1027,16 @@ static void disarm_kprobe_ftrace(struct kprobe *p)
{
int ret;

- kprobe_ftrace_enabled--;
- if (kprobe_ftrace_enabled == 0) {
- ret = unregister_ftrace_function(&kprobe_ftrace_ops);
- WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
- }
- ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
- (unsigned long)p->addr, 1, 0);
- WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+ ret = __ftrace_remove_filter_ip(&kprobe_ftrace_ops,
+ (unsigned long)p->addr,
+ &kprobe_ftrace_enabled);
+ WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n",
+ p->addr, ret);
}
#else /* !CONFIG_KPROBES_ON_FTRACE */
#define prepare_kprobe(p) arch_prepare_kprobe(p)
+#define try_reserve_ftrace_ipmodify(p) (0)
+#define release_ftrace_ipmodify(p) do {} while (0)
#define arm_kprobe_ftrace(p) do {} while (0)
#define disarm_kprobe_ftrace(p) do {} while (0)
#endif
@@ -1502,6 +1580,14 @@ int register_kprobe(struct kprobe *p)
mutex_lock(&kprobe_mutex);

old_p = get_kprobe(p->addr);
+
+ /* Try to reserve ftrace ipmodify if needed */
+ if (kprobe_ftrace(p) && (!old_p || !old_p->break_handler)) {
+ ret = try_reserve_ftrace_ipmodify(p);
+ if (ret < 0)
+ goto out_noreserve;
+ }
+
if (old_p) {
/* Since this may unoptimize old_p, locking text_mutex. */
ret = register_aggr_kprobe(old_p, p);
@@ -1525,6 +1611,9 @@ int register_kprobe(struct kprobe *p)
try_to_optimize_kprobe(p);

out:
+ if (ret < 0 && kprobe_ftrace(p))
+ release_ftrace_ipmodify(p);
+out_noreserve:
mutex_unlock(&kprobe_mutex);

if (probed_mod)
@@ -1639,6 +1728,10 @@ static void __unregister_kprobe_bottom(struct kprobe *p)
{
struct kprobe *ap;

+ /* Release reserved ftrace ipmodify if needed */
+ if (kprobe_ftrace(p))
+ release_ftrace_ipmodify(p);
+
if (list_empty(&p->list))
/* This is an independent kprobe */
arch_remove_kprobe(p);