Subject: [PATCH -tip v2 0/3] ftrace, kprobes: Introduce IPMODIFY flag for ftrace_ops to detect conflicts

Hi,

Here is the version 2 of the series of patches which introduces
IPMODIFY flag for ftrace_ops to detect conflicts of ftrace users
who can modify regs->ip in their handler.
In this version, I fixed some bugs in previous version and
added a patch which made kprobe itself free from IPMODIFY
except for jprobe.

Currently, only kprobes can change the regs->ip in the handler,
but recently kpatch is also want to change it. Moreover, since
the ftrace itself exported to modules, it might be considerable
senario.

Here we talked on github.
https://github.com/dynup/kpatch/issues/47

To protect modified regs-ip from each other, this series
introduces FTRACE_OPS_FL_IPMODIFY flag and ftrace now ensures
the flag can be set on each function entry location. If there
is someone who already reserve regs->ip on target function
entry, ftrace_set_filter_ip or register_ftrace_function will
return -EBUSY. Users must handle that.

The 3rd patch adds a special reservation of IPMODIFY on the
jprobed address, since it is the only user who will change
the regs->ip. Other kprobes do not change it anymore.

Testing:
BTW, I've tested it with samples/kprobes/{k,j}probe_example.ko
and a small test module which I added to the last of this mail.

Here is the test script. I think I should put this under
tools/testing, maybe kprobes? or selftest/kprobes?

----
#!/bin/bash

echo "IPMODIFY test"
ADDR=0x`grep do_fork /proc/kallsyms | cut -f 1 -d" "`

echo "Case1: kprobe->jprobe->ipmodify(fail)"
modprobe kprobe_example
modprobe jprobe_example
insmod ./ipmodify.ko && echo "Case1: [FAIL]" || echo "Case1: [OK]"
rmmod kprobe_example jprobe_example

echo "Case2: jprobe->kprobe->ipmodify(fail)"
modprobe jprobe_example
modprobe kprobe_example
insmod ./ipmodify.ko && echo "Case2: [FAIL]" || echo "Case2: [OK]"
rmmod kprobe_example jprobe_example

echo "Case3: jprobe->ipmodify(fail)"
modprobe jprobe_example
insmod ./ipmodify.ko && echo "Case3: [FAIL]" || echo "Case3: [OK]"
rmmod jprobe_example

echo "Case4: ipmodify->jprobe(fail)"
insmod ./ipmodify.ko
modprobe jprobe_example && echo "Case4: [FAIL]" || echo "Case4: [OK]"
rmmod ipmodify

echo "Case5: ipmodify->kprobe->jprobe(fail)"
insmod ./ipmodify.ko
modprobe kprobe_example
modprobe jprobe_example && echo "Case5: [FAIL]" || echo "Case5: [OK]"
rmmod ipmodify kprobe_example
-----

Thank you,

---

Masami Hiramatsu (3):
ftrace: Simplify ftrace_hash_disable/enable path in ftrace_hash_move
ftrace, kprobes: Support IPMODIFY flag to find IP modify conflict
kprobes: Set IPMODIFY flag only if the probe can change regs->ip


Documentation/kprobes.txt | 12 +--
Documentation/trace/ftrace.txt | 5 +
arch/x86/kernel/kprobes/ftrace.c | 9 +-
include/linux/ftrace.h | 10 ++
kernel/kprobes.c | 115 +++++++++++++++++++++++----
kernel/trace/ftrace.c | 164 +++++++++++++++++++++++++++++++++-----
6 files changed, 265 insertions(+), 50 deletions(-)

--

-----<ipmodify.c>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/ftrace.h>


static void ftrace_ipmodify_handler(unsigned long a0, unsigned long a1,
struct ftrace_ops *op, struct pt_regs *regs)
{
return;
}

static struct ftrace_ops test_ops __read_mostly = {
.func = ftrace_ipmodify_handler,
.flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
};

static unsigned long target;
module_param(target, ulong, 0444);

static int __init ipmodify_init(void)
{
int ret;
unsigned long ip = target;

ret = ftrace_set_filter_ip(&test_ops, ip, 0, 0);
pr_err("ipmodify_test: set filter ip 0x%lx, ret = %d\n", ip, ret);

if (ret >= 0) {
ret = register_ftrace_function(&test_ops);
pr_err("ipmodify_test: register ftrace on 0x%lx, ret = %d\n",
ip, ret);
}

return ret;
}

static void __exit ipmodify_exit(void)
{
int ret;
unsigned long ip = target;

ret = unregister_ftrace_function(&test_ops);
pr_err("ipmodify_test: unregister ftrace on 0x%lx, ret = %d\n", ip, ret);
if (ret >= 0) {
ret = ftrace_set_filter_ip(&test_ops, ip, 1, 0);
pr_err("ipmodify_test: clear filter ip 0x%lx, ret = %d\n",
ip, ret);
}
}

module_init(ipmodify_init)
module_exit(ipmodify_exit)
MODULE_LICENSE("GPL");
-----


Subject: [PATCH -tip v2 1/3] ftrace: Simplify ftrace_hash_disable/enable path in ftrace_hash_move

Simplify ftrace_hash_disable/enable path in ftrace_hash_move
for hardening the process if the memory allocation failed.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Namhyung Kim <[email protected]>
---
kernel/trace/ftrace.c | 33 +++++++++++----------------------
1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5b372e3..d65719d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1304,25 +1304,15 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
struct ftrace_hash *new_hash;
int size = src->count;
int bits = 0;
- int ret;
int i;

/*
- * Remove the current set, update the hash and add
- * them back.
- */
- ftrace_hash_rec_disable(ops, enable);
-
- /*
* If the new source is empty, just free dst and assign it
* the empty_hash.
*/
if (!src->count) {
- free_ftrace_hash_rcu(*dst);
- rcu_assign_pointer(*dst, EMPTY_HASH);
- /* still need to update the function records */
- ret = 0;
- goto out;
+ new_hash = EMPTY_HASH;
+ goto update;
}

/*
@@ -1335,10 +1325,9 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
if (bits > FTRACE_HASH_MAX_BITS)
bits = FTRACE_HASH_MAX_BITS;

- ret = -ENOMEM;
new_hash = alloc_ftrace_hash(bits);
if (!new_hash)
- goto out;
+ return -ENOMEM;

size = 1 << src->size_bits;
for (i = 0; i < size; i++) {
@@ -1349,20 +1338,20 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
}
}

+update:
+ /*
+ * Remove the current set, update the hash and add
+ * them back.
+ */
+ ftrace_hash_rec_disable(ops, enable);
+
old_hash = *dst;
rcu_assign_pointer(*dst, new_hash);
free_ftrace_hash_rcu(old_hash);

- ret = 0;
- out:
- /*
- * Enable regardless of ret:
- * On success, we enable the new hash.
- * On failure, we re-enable the original hash.
- */
ftrace_hash_rec_enable(ops, enable);

- return ret;
+ return 0;
}

/*

Subject: [PATCH -tip v2 2/3] ftrace, kprobes: Support IPMODIFY flag to find IP modify conflict

Introduce FTRACE_OPS_FL_IPMODIFY to avoid conflict among
ftrace users who may modify regs->ip to change the execution
path. This also adds the flag to kprobe_ftrace_ops, since
ftrace-based kprobes already modifies regs->ip. Thus, if
another user modifies the regs->ip on the same function entry,
one of them will be broken. So both should add IPMODIFY flag
and make sure that ftrace_set_filter_ip() succeeds.

Note that currently conflicts of IPMODIFY are detected on the
filter hash. It does NOT care about the notrace hash. This means
that if you set filter hash all functions and notrace(mask)
some of them, the IPMODIFY flag will be applied to all
functions.

Changes in v2:
- Add a description how __ftrace_hash_update_ipmodify() will
handle the given hashes (NULL and EMPTY_HASH cases).
- Clear FTRACE_OPS_FL_ENABLED after calling
__unregister_ftrace_function() in error path.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Namhyung Kim <[email protected]>
---
Documentation/trace/ftrace.txt | 5 ++
include/linux/ftrace.h | 10 ++-
kernel/kprobes.c | 2 -
kernel/trace/ftrace.c | 133 +++++++++++++++++++++++++++++++++++++++-
4 files changed, 145 insertions(+), 5 deletions(-)

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 2479b2a..0fcad7d 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -234,6 +234,11 @@ of ftrace. Here is a list of some of the key files:
will be displayed on the same line as the function that
is returning registers.

+ If the callback registered to be traced by a function with
+ the "ip modify" attribute (thus the regs->ip can be changed),
+ a 'I' will be displayed on the same line as the function that
+ can be overridden.
+
function_profile_enabled:

When set it will enable all functions with either the function
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 404a686..5a43f8f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -90,6 +90,9 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
* INITIALIZED - The ftrace_ops has already been initialized (first use time
* register_ftrace_function() is called, it will initialized the ops)
* DELETED - The ops are being deleted, do not let them be registered again.
+ * IPMODIFY - The ops can modify IP register. This must be set with SAVE_REGS
+ * and if the other ops has been set this on same function, filter
+ * update must be failed.
*/
enum {
FTRACE_OPS_FL_ENABLED = 1 << 0,
@@ -101,6 +104,7 @@ enum {
FTRACE_OPS_FL_STUB = 1 << 6,
FTRACE_OPS_FL_INITIALIZED = 1 << 7,
FTRACE_OPS_FL_DELETED = 1 << 8,
+ FTRACE_OPS_FL_IPMODIFY = 1 << 9,
};

/*
@@ -306,6 +310,7 @@ extern int ftrace_nr_registered_ops(void);
* the dyn_ftrace descriptor represents.
*
* The second part is a mask:
+ * IPMODIFY - the record wants to change IP address.
* ENABLED - the function is being traced
* REGS - the record wants the function to save regs
* REGS_EN - the function is set up to save regs.
@@ -317,13 +322,14 @@ extern int ftrace_nr_registered_ops(void);
* from tracing that function.
*/
enum {
+ FTRACE_FL_IPMODIFY = (1UL << 28),
FTRACE_FL_ENABLED = (1UL << 29),
FTRACE_FL_REGS = (1UL << 30),
FTRACE_FL_REGS_EN = (1UL << 31)
};

-#define FTRACE_FL_MASK (0x7UL << 29)
-#define FTRACE_REF_MAX ((1UL << 29) - 1)
+#define FTRACE_FL_MASK (0xfUL << 28)
+#define FTRACE_REF_MAX ((1UL << 28) - 1)

struct dyn_ftrace {
unsigned long ip; /* address of mcount call-site */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3214289..e52d86f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -915,7 +915,7 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
#ifdef CONFIG_KPROBES_ON_FTRACE
static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
.func = kprobe_ftrace_handler,
- .flags = FTRACE_OPS_FL_SAVE_REGS,
+ .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
};
static int kprobe_ftrace_enabled;

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index d65719d..5e09f39 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1293,6 +1293,9 @@ ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash);
static void
ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash);

+static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
+ struct ftrace_hash *new_hash);
+
static int
ftrace_hash_move(struct ftrace_ops *ops, int enable,
struct ftrace_hash **dst, struct ftrace_hash *src)
@@ -1304,6 +1307,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
struct ftrace_hash *new_hash;
int size = src->count;
int bits = 0;
+ int ret;
int i;

/*
@@ -1339,6 +1343,16 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
}

update:
+ /* Before everything, make sure this can be applied */
+ if (enable) {
+ /* IPMODIFY should be updated only when filter_hash updating */
+ ret = ftrace_hash_ipmodify_update(ops, new_hash);
+ if (ret < 0) {
+ free_ftrace_hash(new_hash);
+ return ret;
+ }
+ }
+
/*
* Remove the current set, update the hash and add
* them back.
@@ -1593,6 +1607,109 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
__ftrace_hash_rec_update(ops, filter_hash, 1);
}

+/*
+ * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
+ * or no-needed to update, -EBUSY if it detects a conflict of the flag
+ * on a ftrace_rec.
+ * Note that old_hash and new_hash has below meanings
+ * - If the hash is NULL, it hits all recs
+ * - If the hash is EMPTY_HASH, it hits nothing
+ * - Others hits the recs which match the hash entries.
+ */
+static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
+ struct ftrace_hash *old_hash,
+ struct ftrace_hash *new_hash)
+{
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec, *end = NULL;
+ int in_old, in_new;
+
+ /* Only update if the ops has been registered */
+ if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+ return 0;
+
+ if (!(ops->flags & FTRACE_OPS_FL_SAVE_REGS) ||
+ !(ops->flags & FTRACE_OPS_FL_IPMODIFY))
+ return 0;
+
+ /* Update rec->flags */
+ do_for_each_ftrace_rec(pg, rec) {
+ /* We need to update only differences of filter_hash */
+ in_old = !old_hash || ftrace_lookup_ip(old_hash, rec->ip);
+ in_new = !new_hash || ftrace_lookup_ip(new_hash, rec->ip);
+ if (in_old == in_new)
+ continue;
+
+ if (in_new) {
+ /* New entries must ensure no others are using it */
+ if (rec->flags & FTRACE_FL_IPMODIFY)
+ goto rollback;
+ rec->flags |= FTRACE_FL_IPMODIFY;
+ } else /* Removed entry */
+ rec->flags &= ~FTRACE_FL_IPMODIFY;
+ } while_for_each_ftrace_rec();
+
+ return 0;
+
+rollback:
+ end = rec;
+
+ /* Roll back what we did above */
+ do_for_each_ftrace_rec(pg, rec) {
+ if (rec == end)
+ goto err_out;
+
+ in_old = !old_hash || ftrace_lookup_ip(old_hash, rec->ip);
+ in_new = !new_hash || ftrace_lookup_ip(new_hash, rec->ip);
+ if (in_old == in_new)
+ continue;
+
+ if (in_new)
+ rec->flags &= ~FTRACE_FL_IPMODIFY;
+ else
+ rec->flags |= FTRACE_FL_IPMODIFY;
+ } while_for_each_ftrace_rec();
+
+err_out:
+ return -EBUSY;
+}
+
+static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops)
+{
+ struct ftrace_hash *hash = ops->filter_hash;
+
+ if (ftrace_hash_empty(hash))
+ hash = NULL;
+
+ return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash);
+}
+
+/* Disabling always succeeds */
+static void ftrace_hash_ipmodify_disable(struct ftrace_ops *ops)
+{
+ struct ftrace_hash *hash = ops->filter_hash;
+
+ if (ftrace_hash_empty(hash))
+ hash = NULL;
+
+ __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH);
+}
+
+static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
+ struct ftrace_hash *new_hash)
+{
+ struct ftrace_hash *old_hash = ops->filter_hash;
+
+ if (ftrace_hash_empty(old_hash))
+ old_hash = NULL;
+
+ if (ftrace_hash_empty(new_hash))
+ new_hash = NULL;
+
+ return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash);
+}
+
+
static void print_ip_ins(const char *fmt, unsigned char *p)
{
int i;
@@ -2078,6 +2195,15 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)

ops->flags |= FTRACE_OPS_FL_ENABLED;

+ ret = ftrace_hash_ipmodify_enable(ops);
+ if (ret < 0) {
+ /* Rollback registration process */
+ __unregister_ftrace_function(ops);
+ ftrace_start_up--;
+ ops->flags &= ~FTRACE_OPS_FL_ENABLED;
+ return ret;
+ }
+
ftrace_hash_rec_enable(ops, 1);

ftrace_startup_enable(command);
@@ -2104,6 +2230,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
*/
WARN_ON_ONCE(ftrace_start_up < 0);

+ /* Disabling ipmodify never be failed */
+ ftrace_hash_ipmodify_disable(ops);
ftrace_hash_rec_disable(ops, 1);

if (!global_start_up)
@@ -2641,9 +2769,10 @@ static int t_show(struct seq_file *m, void *v)

seq_printf(m, "%ps", (void *)rec->ip);
if (iter->flags & FTRACE_ITER_ENABLED)
- seq_printf(m, " (%ld)%s",
+ seq_printf(m, " (%ld)%s%s",
rec->flags & ~FTRACE_FL_MASK,
- rec->flags & FTRACE_FL_REGS ? " R" : "");
+ rec->flags & FTRACE_FL_REGS ? " R" : "",
+ rec->flags & FTRACE_FL_IPMODIFY ? " I" : "");
seq_printf(m, "\n");

return 0;

Subject: [PATCH -tip v2 3/3] kprobes: Set IPMODIFY flag only if the probe can change regs->ip

Set FTRACE_OPS_FL_IPMODIFY flag only for the probes which can change
regs->ip, which has kprobe->break_handler.
Currently only jprobe modifies regs->ip to hook a function entry
and jumps to a user handler which has same prototype of the original
function.

In other cases, kprobes restores original regs->ip when returning
to ftrace.

Note about the implementation: This uses a dummy ftrace_ops to
reserve IPMODIFY flag on the given ftrace address, for the case
that we have a enabled kprobe on a function entry and a jprobe
is added on the same point. In that case, we already have a
ftrace_ops without IPMODIFY flag on the entry, and we have to
add another ftrace_ops with IPMODIFY on the same address.
If we put a same handler on both ftrace_ops, the handler can
be called twice on that entry until the first one is removed.
This means that the kprobe and the jprobe are called twice too,
and that will not what kprobes expected.
Thus I added a dummy ftrace_ops just for reserving IPMODIFY flag.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Namhyung Kim <[email protected]>
---
Documentation/kprobes.txt | 12 ++--
arch/x86/kernel/kprobes/ftrace.c | 9 ++-
kernel/kprobes.c | 117 +++++++++++++++++++++++++++++++++-----
3 files changed, 112 insertions(+), 26 deletions(-)

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 4bbeca8..177f051 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -264,15 +264,13 @@ stop-machine method that ksplice uses for supporting a CONFIG_PREEMPT=y
kernel.

NOTE for geeks:
-The jump optimization changes the kprobe's pre_handler behavior.
-Without optimization, the pre_handler can change the kernel's execution
+The jump optimization (and ftrace-based kprobes) changes the kprobe's
+pre_handler behavior.
+Without optimizations, the pre_handler can change the kernel's execution
path by changing regs->ip and returning 1. However, when the probe
is optimized, that modification is ignored. Thus, if you want to
-tweak the kernel's execution path, you need to suppress optimization,
-using one of the following techniques:
-- Specify an empty function for the kprobe's post_handler or break_handler.
- or
-- Execute 'sysctl -w debug.kprobes_optimization=n'
+tweak the kernel's execution path, you need to suppress optimization or
+notify your handler will modify regs->ip by setting p->break_handler.

1.5 Blacklist

diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 717b02a..5f8f0b3 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -27,7 +27,7 @@

static nokprobe_inline
int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
+ struct kprobe_ctlblk *kcb, unsigned long orig_ip)
{
/*
* Emulate singlestep (and also recover regs->ip)
@@ -39,6 +39,8 @@ int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
p->post_handler(p, regs, 0);
}
__this_cpu_write(current_kprobe, NULL);
+ if (orig_ip)
+ regs->ip = orig_ip;
return 1;
}

@@ -46,7 +48,7 @@ int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
if (kprobe_ftrace(p))
- return __skip_singlestep(p, regs, kcb);
+ return __skip_singlestep(p, regs, kcb, 0);
else
return 0;
}
@@ -71,13 +73,14 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
if (kprobe_running()) {
kprobes_inc_nmissed_count(p);
} else {
+ unsigned long orig_ip = regs->ip;
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
regs->ip = ip + sizeof(kprobe_opcode_t);

__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs))
- __skip_singlestep(p, regs, kcb);
+ __skip_singlestep(p, regs, kcb, orig_ip);
/*
* If pre_handler returns !0, it sets regs->ip and
* resets current kprobe.
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e52d86f..8ba1798 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -915,10 +915,85 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
#ifdef CONFIG_KPROBES_ON_FTRACE
static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
.func = kprobe_ftrace_handler,
- .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
+ .flags = FTRACE_OPS_FL_SAVE_REGS,
};
static int kprobe_ftrace_enabled;

+static void kprobe_ftrace_stub(unsigned long a0, unsigned long a1,
+ struct ftrace_ops *op, struct pt_regs *regs)
+{
+ /* Do nothing. This is just a dummy handler */
+}
+
+/* This is only for checking conflict with other ftrace users */
+static struct ftrace_ops kprobe_ipmod_ftrace_ops __read_mostly = {
+ .func = kprobe_ftrace_stub,
+ .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
+};
+static int kprobe_ipmod_ftrace_enabled;
+
+static int __ftrace_add_filter_ip(struct ftrace_ops *ops, unsigned long ip,
+ int *ref)
+{
+ int ret;
+
+ /* Try to set given ip to filter */
+ ret = ftrace_set_filter_ip(ops, ip, 0, 0);
+ if (ret >= 0) {
+ (*ref)++;
+ if (*ref == 1) {
+ ret = register_ftrace_function(ops);
+ if (ret < 0) {
+ /* Rollback refcounter and filter */
+ (*ref)--;
+ ftrace_set_filter_ip(ops, ip, 1, 0);
+ }
+ }
+ }
+
+ return ret;
+}
+
+static int __ftrace_remove_filter_ip(struct ftrace_ops *ops, unsigned long ip,
+ int *ref)
+{
+ int ret = 0;
+
+ (*ref)--;
+ if (*ref == 0)
+ ret = unregister_ftrace_function(ops);
+ if (ret >= 0)
+ /* Try to remove given ip to filter */
+ ret = ftrace_set_filter_ip(ops, ip, 1, 0);
+ if (ret < 0) /* Rollback refcounter if an error occurs */
+ (*ref)++;
+
+ return ret;
+}
+
+static int try_reserve_ftrace_ipmodify(struct kprobe *p)
+{
+ if (!p->break_handler)
+ return 0;
+
+ return __ftrace_add_filter_ip(&kprobe_ipmod_ftrace_ops,
+ (unsigned long)p->addr,
+ &kprobe_ipmod_ftrace_enabled);
+}
+
+static void release_ftrace_ipmodify(struct kprobe *p)
+{
+ int ret;
+
+ if (p->break_handler) {
+ ret = __ftrace_remove_filter_ip(&kprobe_ipmod_ftrace_ops,
+ (unsigned long)p->addr,
+ &kprobe_ipmod_ftrace_enabled);
+ WARN(ret < 0, "Failed to release ftrace at %p (%d)\n",
+ p->addr, ret);
+ }
+}
+
/* Must ensure p->addr is really on ftrace */
static int prepare_kprobe(struct kprobe *p)
{
@@ -933,14 +1008,10 @@ static void arm_kprobe_ftrace(struct kprobe *p)
{
int ret;

- ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
- (unsigned long)p->addr, 0, 0);
+ ret = __ftrace_add_filter_ip(&kprobe_ftrace_ops,
+ (unsigned long)p->addr,
+ &kprobe_ftrace_enabled);
WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
- kprobe_ftrace_enabled++;
- if (kprobe_ftrace_enabled == 1) {
- ret = register_ftrace_function(&kprobe_ftrace_ops);
- WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
- }
}

/* Caller must lock kprobe_mutex */
@@ -948,17 +1019,16 @@ static void disarm_kprobe_ftrace(struct kprobe *p)
{
int ret;

- kprobe_ftrace_enabled--;
- if (kprobe_ftrace_enabled == 0) {
- ret = unregister_ftrace_function(&kprobe_ftrace_ops);
- WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
- }
- ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
- (unsigned long)p->addr, 1, 0);
- WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+ ret = __ftrace_remove_filter_ip(&kprobe_ftrace_ops,
+ (unsigned long)p->addr,
+ &kprobe_ftrace_enabled);
+ WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n",
+ p->addr, ret);
}
#else /* !CONFIG_KPROBES_ON_FTRACE */
#define prepare_kprobe(p) arch_prepare_kprobe(p)
+#define try_reserve_ftrace_ipmodify(p) (0)
+#define release_ftrace_ipmodify(p) do {} while (0)
#define arm_kprobe_ftrace(p) do {} while (0)
#define disarm_kprobe_ftrace(p) do {} while (0)
#endif
@@ -1502,6 +1572,14 @@ int register_kprobe(struct kprobe *p)
mutex_lock(&kprobe_mutex);

old_p = get_kprobe(p->addr);
+
+ /* Try to reserve ftrace ipmodify if needed */
+ if (kprobe_ftrace(p) && (!old_p || !old_p->break_handler)) {
+ ret = try_reserve_ftrace_ipmodify(p);
+ if (ret < 0)
+ goto out_noreserve;
+ }
+
if (old_p) {
/* Since this may unoptimize old_p, locking text_mutex. */
ret = register_aggr_kprobe(old_p, p);
@@ -1525,6 +1603,9 @@ int register_kprobe(struct kprobe *p)
try_to_optimize_kprobe(p);

out:
+ if (ret < 0 && kprobe_ftrace(p))
+ release_ftrace_ipmodify(p);
+out_noreserve:
mutex_unlock(&kprobe_mutex);

if (probed_mod)
@@ -1639,6 +1720,10 @@ static void __unregister_kprobe_bottom(struct kprobe *p)
{
struct kprobe *ap;

+ /* Release reserved ftrace ipmodify if needed */
+ if (kprobe_ftrace(p))
+ release_ftrace_ipmodify(p);
+
if (list_empty(&p->list))
/* This is an independent kprobe */
arch_remove_kprobe(p);

Subject: Re: [PATCH -tip v2 0/3] ftrace, kprobes: Introduce IPMODIFY flag for ftrace_ops to detect conflicts

(2014/06/17 20:04), Masami Hiramatsu wrote:
> Hi,
>
> Here is the version 2 of the series of patches which introduces
> IPMODIFY flag for ftrace_ops to detect conflicts of ftrace users
> who can modify regs->ip in their handler.
> In this version, I fixed some bugs in previous version and
> added a patch which made kprobe itself free from IPMODIFY
> except for jprobe.
>
> Currently, only kprobes can change the regs->ip in the handler,
> but recently kpatch is also want to change it. Moreover, since
> the ftrace itself exported to modules, it might be considerable
> senario.
>
> Here we talked on github.
> https://github.com/dynup/kpatch/issues/47
>
> To protect modified regs-ip from each other, this series
> introduces FTRACE_OPS_FL_IPMODIFY flag and ftrace now ensures
> the flag can be set on each function entry location. If there
> is someone who already reserve regs->ip on target function
> entry, ftrace_set_filter_ip or register_ftrace_function will
> return -EBUSY. Users must handle that.
>
> The 3rd patch adds a special reservation of IPMODIFY on the
> jprobed address, since it is the only user who will change
> the regs->ip. Other kprobes do not change it anymore.
>
> Testing:
> BTW, I've tested it with samples/kprobes/{k,j}probe_example.ko
> and a small test module which I added to the last of this mail.
>
> Here is the test script. I think I should put this under
> tools/testing, maybe kprobes? or selftest/kprobes?
>
> ----
> #!/bin/bash
>
> echo "IPMODIFY test"
> ADDR=0x`grep do_fork /proc/kallsyms | cut -f 1 -d" "`
>
> echo "Case1: kprobe->jprobe->ipmodify(fail)"
> modprobe kprobe_example
> modprobe jprobe_example
> insmod ./ipmodify.ko && echo "Case1: [FAIL]" || echo "Case1: [OK]"

Oops, I missed the target=$ADDR for these insmod...

Anyway, without passing target=$ADDR option, ipmodify.ko tries to
register ftrace_ops with an empty filter hash which means
trace all functions. So anyway jprobe and that ftrace_ops collide
with each other on the do_fork and the 2nd one should be failed.

Thus, of course this test doesn't fail with or without target=
param.

Thank you,


--
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

2014-06-19 02:08:43

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH -tip v2 0/3] ftrace, kprobes: Introduce IPMODIFY flag for ftrace_ops to detect conflicts

On Tue, Jun 17, 2014 at 11:04:36AM +0000, Masami Hiramatsu wrote:
> Hi,
>
> Here is the version 2 of the series of patches which introduces
> IPMODIFY flag for ftrace_ops to detect conflicts of ftrace users
> who can modify regs->ip in their handler.
> In this version, I fixed some bugs in previous version and
> added a patch which made kprobe itself free from IPMODIFY
> except for jprobe.

Hi Masami,

This seems better, but I still saw a few issues. I'm not sure if the
issues are specific to stap or kprobes. For the following issues I used
this command to set a kprobe:

stap -v -e 'probe kernel.function("meminfo_proc_show") {printf("meminfo_proc_show called\n");}'

With patches 1-2, when I used stap to kprobe the function after it was
already kpatched, stap didn't return an error and instead acted like it
succeeded (though the probe didn't work):

$ sudo stap -v -e 'probe kernel.function("meminfo_proc_show") {printf("meminfo_proc_show called\n");}'
Pass 1: parsed user script and 112 library script(s) using 221516virt/41612res/6028shr/36228data kb, in 130usr/0sys/132real ms.
Pass 2: analyzed script: 1 probe(s), 0 function(s), 0 embed(s), 0 global(s) using 255840virt/77132res/7132shr/70552data kb, in 510usr/20sys/577real ms.
Pass 3: translated to C into "/tmp/stap3Qunba/stap_2690192fea570fb7bba78c7ed7fa1e0d_898_src.c" using 255840virt/77392res/7392shr/70552data kb, in 10usr/0sys/4real ms.
Pass 4: compiled C into "stap_2690192fea570fb7bba78c7ed7fa1e0d_898.ko" in 5020usr/640sys/7105real ms.
Pass 5: starting run.
(no error)

With all 3 patches, I expected kprobes and kpatch to be able to ftrace
the same function. But when I tried to kpatch the function after it was
kprobed, I got the following oops in stap:

[ 455.842797] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020
[ 455.843388] IP: [<ffffffffa0833d1e>] _stp_module_notifier+0x1e/0x320 [stap_2690192fea570fb7bba78c7ed7fa1e_20189]
[ 455.844011] PGD 33f898067 PUD 422083067 PMD 0
[ 455.844638] Oops: 0000 [#1] SMP
[ 455.845255] Modules linked in: kpatch(OE+) stap_2690192fea570fb7bba78c7ed7fa1e_20189(OE) rfcomm ipt_MASQUERADE fuse ccm xt_CHECKSUM tun ip6t_rpfilter ip6t_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw bnep arc4 iwldvm mac80211 iTCO_wdt snd_hda_codec_hdmi iTCO_vendor_support x86_pkg_temp_thermal snd_hda_codec_realtek coretemp iwlwifi snd_hda_codec_generic kvm_intel snd_hda_intel kvm uvcvideo snd_hda_controller cfg80211 snd_hda_codec btusb videobuf2_vmalloc bluetooth videobuf2_memops snd_hwdep snd_seq nfsd videobuf2_core
[ 455.848272] v4l2_common videodev snd_seq_device e1000e microcode snd_pcm sdhci_pci media joydev sdhci serio_raw i2c_i801 pcspkr mmc_core thinkpad_acpi mei_me snd_timer auth_rpcgss mei snd lpc_ich ptp mfd_core shpchp nfs_acl lockd pps_core wmi tpm_tis soundcore tpm rfkill sunrpc dm_crypt i915 i2c_algo_bit drm_kms_helper drm crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel i2c_core video
[ 455.850887] CPU: 3 PID: 19857 Comm: insmod Tainted: G W OE 3.16.0-rc1+ #2
[ 455.851768] Hardware name: LENOVO 2356BH8/2356BH8, BIOS G7ET63WW (2.05 ) 11/12/2012
[ 455.852638] task: ffff880095d65460 ti: ffff88039d1d4000 task.ti: ffff88039d1d4000
[ 455.853456] RIP: 0010:[<ffffffffa0833d1e>] [<ffffffffa0833d1e>] _stp_module_notifier+0x1e/0x320 [stap_2690192fea570fb7bba78c7ed7fa1e_20189]
[ 455.854335] RSP: 0018:ffff88039d1d7ce0 EFLAGS: 00010246
[ 455.855212] RAX: ffffffffa0837f50 RBX: 0000000000000000 RCX: 00000000ffffffff
[ 455.856109] RDX: ffffffffa08400e0 RSI: 0000000000000001 RDI: ffffffffa0837f50
[ 455.856986] RBP: ffff88039d1d7d00 R08: 0000000000000000 R09: 0000000000000000
[ 455.857880] R10: 0000000000000001 R11: ffffc9001aed2d8f R12: ffffffff81c593e0
[ 455.858761] R13: 0000000000000001 R14: ffffffffa08400e0 R15: 0000000000000000
[ 455.859640] FS: 00007feac5f10740(0000) GS:ffff88043e2c0000(0000) knlGS:0000000000000000
[ 455.860523] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 455.861403] CR2: 0000000000000020 CR3: 00000004224e7000 CR4: 00000000001407e0
[ 455.862309] Stack:
[ 455.863236] 00000000fffffffd ffffffff81c593e0 0000000000000001 ffffffffa08400e0
[ 455.864163] ffff88039d1d7d38 ffffffff810b45bc ffffffff81c557c0 0000000000000000
[ 455.865103] 0000000000000001 ffffffffa08400e0 00000000ffffffff ffff88039d1d7d78
[ 455.866067] Call Trace:
[ 455.867100] [<ffffffff810b45bc>] notifier_call_chain+0x4c/0x70
[ 455.868202] [<ffffffff810b491d>] __blocking_notifier_call_chain+0x4d/0x70
[ 455.869155] [<ffffffff810b4956>] blocking_notifier_call_chain+0x16/0x20
[ 455.870111] [<ffffffff8110749c>] load_module+0x1aac/0x25f0
[ 455.871067] [<ffffffff811f6720>] ? kernel_read+0x50/0x80
[ 455.872022] [<ffffffff81108196>] SyS_finit_module+0xa6/0xd0
[ 455.872982] [<ffffffff817082e9>] system_call_fastpath+0x16/0x1b
[ 455.873941] Code: 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 83 fe 01 48 89 e5 41 56 49 89 d6 41 55 41 54 53 48 8b 9a d0 01 00 00 <8b> 43 20 0f 84 59 01 00 00 48 85 f6 75 44 85 c0 74 4a 83 e8 01
[ 455.876073] RIP [<ffffffffa0833d1e>] _stp_module_notifier+0x1e/0x320 [stap_2690192fea570fb7bba78c7ed7fa1e_20189]
[ 455.877146] RSP <ffff88039d1d7ce0>
[ 455.878243] CR2: 0000000000000020
[ 455.883707] ---[ end trace 388d9e62d4390d42 ]---

--
Josh

Subject: Re: Re: [PATCH -tip v2 0/3] ftrace, kprobes: Introduce IPMODIFY flag for ftrace_ops to detect conflicts

(2014/06/19 11:08), Josh Poimboeuf wrote:
> On Tue, Jun 17, 2014 at 11:04:36AM +0000, Masami Hiramatsu wrote:
>> Hi,
>>
>> Here is the version 2 of the series of patches which introduces
>> IPMODIFY flag for ftrace_ops to detect conflicts of ftrace users
>> who can modify regs->ip in their handler.
>> In this version, I fixed some bugs in previous version and
>> added a patch which made kprobe itself free from IPMODIFY
>> except for jprobe.
>
> Hi Masami,
>
> This seems better, but I still saw a few issues. I'm not sure if the
> issues are specific to stap or kprobes. For the following issues I used
> this command to set a kprobe:
>
> stap -v -e 'probe kernel.function("meminfo_proc_show") {printf("meminfo_proc_show called\n");}'
>
> With patches 1-2, when I used stap to kprobe the function after it was
> already kpatched, stap didn't return an error and instead acted like it
> succeeded (though the probe didn't work):
>
> $ sudo stap -v -e 'probe kernel.function("meminfo_proc_show") {printf("meminfo_proc_show called\n");}'
> Pass 1: parsed user script and 112 library script(s) using 221516virt/41612res/6028shr/36228data kb, in 130usr/0sys/132real ms.
> Pass 2: analyzed script: 1 probe(s), 0 function(s), 0 embed(s), 0 global(s) using 255840virt/77132res/7132shr/70552data kb, in 510usr/20sys/577real ms.
> Pass 3: translated to C into "/tmp/stap3Qunba/stap_2690192fea570fb7bba78c7ed7fa1e0d_898_src.c" using 255840virt/77392res/7392shr/70552data kb, in 10usr/0sys/4real ms.
> Pass 4: compiled C into "stap_2690192fea570fb7bba78c7ed7fa1e0d_898.ko" in 5020usr/640sys/7105real ms.
> Pass 5: starting run.
> (no error)

Yeah, I guess you can see some warning messages in dmesg (by
arm_kprobe) at this point.

>
> With all 3 patches, I expected kprobes and kpatch to be able to ftrace
> the same function. But when I tried to kpatch the function after it was
> kprobed, I got the following oops in stap:
>
> [ 455.842797] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020
> [ 455.843388] IP: [<ffffffffa0833d1e>] _stp_module_notifier+0x1e/0x320 [stap_2690192fea570fb7bba78c7ed7fa1e_20189]

Hmm, since this happens in _stp_module_notifier() which is a code in systemtap,
I guess it's a systemtap problem.

Could you test it with kprobe-tracer as below?

# (do something kpatch related activation)
# echo p meminfo_proc_show > /sys/kernel/debug/tracing/kprobe_events
# echo 1 > /sys/kernel/debug/tracing/events/kprobe/enable

Thank you,

> [ 455.844011] PGD 33f898067 PUD 422083067 PMD 0
> [ 455.844638] Oops: 0000 [#1] SMP
> [ 455.845255] Modules linked in: kpatch(OE+) stap_2690192fea570fb7bba78c7ed7fa1e_20189(OE) rfcomm ipt_MASQUERADE fuse ccm xt_CHECKSUM tun ip6t_rpfilter ip6t_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw bnep arc4 iwldvm mac80211 iTCO_wdt snd_hda_codec_hdmi iTCO_vendor_support x86_pkg_temp_thermal snd_hda_codec_realtek coretemp iwlwifi snd_hda_codec_generic kvm_intel snd_hda_intel kvm uvcvideo snd_hda_controller cfg80211 snd_hda_codec btusb videobuf2_vmalloc bluetooth videobuf2_memops snd_hwdep snd_seq nfsd videobuf2_core
> [ 455.848272] v4l2_common videodev snd_seq_device e1000e microcode snd_pcm sdhci_pci media joydev sdhci serio_raw i2c_i801 pcspkr mmc_core thinkpad_acpi mei_me snd_timer auth_rpcgss mei snd lpc_ich ptp mfd_core shpchp nfs_acl lockd pps_core wmi tpm_tis soundcore tpm rfkill sunrpc dm_crypt i915 i2c_algo_bit drm_kms_helper drm crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel i2c_core video
> [ 455.850887] CPU: 3 PID: 19857 Comm: insmod Tainted: G W OE 3.16.0-rc1+ #2
> [ 455.851768] Hardware name: LENOVO 2356BH8/2356BH8, BIOS G7ET63WW (2.05 ) 11/12/2012
> [ 455.852638] task: ffff880095d65460 ti: ffff88039d1d4000 task.ti: ffff88039d1d4000
> [ 455.853456] RIP: 0010:[<ffffffffa0833d1e>] [<ffffffffa0833d1e>] _stp_module_notifier+0x1e/0x320 [stap_2690192fea570fb7bba78c7ed7fa1e_20189]
> [ 455.854335] RSP: 0018:ffff88039d1d7ce0 EFLAGS: 00010246
> [ 455.855212] RAX: ffffffffa0837f50 RBX: 0000000000000000 RCX: 00000000ffffffff
> [ 455.856109] RDX: ffffffffa08400e0 RSI: 0000000000000001 RDI: ffffffffa0837f50
> [ 455.856986] RBP: ffff88039d1d7d00 R08: 0000000000000000 R09: 0000000000000000
> [ 455.857880] R10: 0000000000000001 R11: ffffc9001aed2d8f R12: ffffffff81c593e0
> [ 455.858761] R13: 0000000000000001 R14: ffffffffa08400e0 R15: 0000000000000000
> [ 455.859640] FS: 00007feac5f10740(0000) GS:ffff88043e2c0000(0000) knlGS:0000000000000000
> [ 455.860523] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 455.861403] CR2: 0000000000000020 CR3: 00000004224e7000 CR4: 00000000001407e0
> [ 455.862309] Stack:
> [ 455.863236] 00000000fffffffd ffffffff81c593e0 0000000000000001 ffffffffa08400e0
> [ 455.864163] ffff88039d1d7d38 ffffffff810b45bc ffffffff81c557c0 0000000000000000
> [ 455.865103] 0000000000000001 ffffffffa08400e0 00000000ffffffff ffff88039d1d7d78
> [ 455.866067] Call Trace:
> [ 455.867100] [<ffffffff810b45bc>] notifier_call_chain+0x4c/0x70
> [ 455.868202] [<ffffffff810b491d>] __blocking_notifier_call_chain+0x4d/0x70
> [ 455.869155] [<ffffffff810b4956>] blocking_notifier_call_chain+0x16/0x20
> [ 455.870111] [<ffffffff8110749c>] load_module+0x1aac/0x25f0
> [ 455.871067] [<ffffffff811f6720>] ? kernel_read+0x50/0x80
> [ 455.872022] [<ffffffff81108196>] SyS_finit_module+0xa6/0xd0
> [ 455.872982] [<ffffffff817082e9>] system_call_fastpath+0x16/0x1b
> [ 455.873941] Code: 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 83 fe 01 48 89 e5 41 56 49 89 d6 41 55 41 54 53 48 8b 9a d0 01 00 00 <8b> 43 20 0f 84 59 01 00 00 48 85 f6 75 44 85 c0 74 4a 83 e8 01
> [ 455.876073] RIP [<ffffffffa0833d1e>] _stp_module_notifier+0x1e/0x320 [stap_2690192fea570fb7bba78c7ed7fa1e_20189]
> [ 455.877146] RSP <ffff88039d1d7ce0>
> [ 455.878243] CR2: 0000000000000020
> [ 455.883707] ---[ end trace 388d9e62d4390d42 ]---
>


--
Masami HIRAMATSU
Software Platform Research Dept. Linux Technology Research Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

2014-06-19 12:34:39

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH -tip v2 3/3] kprobes: Set IPMODIFY flag only if the probe can change regs->ip

Hi Masami,

2014-06-17 (화), 11:04 +0000, Masami Hiramatsu:
> +static int __ftrace_add_filter_ip(struct ftrace_ops *ops, unsigned long ip,
> + int *ref)
> +{
> + int ret;
> +
> + /* Try to set given ip to filter */
> + ret = ftrace_set_filter_ip(ops, ip, 0, 0);
> + if (ret >= 0) {

Hmm.. this doesn't look comfortable. What not using usual pattern?

if (ret < 0)
return ret;

This way we can reduce a indent level.


> + (*ref)++;
> + if (*ref == 1) {
> + ret = register_ftrace_function(ops);
> + if (ret < 0) {
> + /* Rollback refcounter and filter */
> + (*ref)--;
> + ftrace_set_filter_ip(ops, ip, 1, 0);
> + }
> + }
> + }
> +
> + return ret;
> +}
> +
> +static int __ftrace_remove_filter_ip(struct ftrace_ops *ops, unsigned long ip,
> + int *ref)
> +{
> + int ret = 0;
> +
> + (*ref)--;
> + if (*ref == 0)
> + ret = unregister_ftrace_function(ops);
> + if (ret >= 0)
> + /* Try to remove given ip to filter */
> + ret = ftrace_set_filter_ip(ops, ip, 1, 0);

I think any failure at this time can be problematic. Since we already
unregistered the ops but the refcount will get increased again, future
attemp to register won't enable the ops anymore IMHO.

I think it'd better just ignoring faiure of filter removal here. We'll
miss a filter entry but it'll be usable anyway.

What about this?

static int __ftrace_remove_filter_ip(...)
{
if (*ref == 1) {
int ret = unregister_ftrace_function(ops);
if (ret < 0)
return ret;

ftrace_set_filter_ip(ops, ip, 1, 0);
}

(*ref)--;
return 0;
}

Thanks,
Namhyung


> + if (ret < 0) /* Rollback refcounter if an error occurs */
> + (*ref)++;
> +
> + return ret;
> +}

2014-06-19 14:19:11

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH -tip v2 0/3] ftrace, kprobes: Introduce IPMODIFY flag for ftrace_ops to detect conflicts

On Thu, Jun 19, 2014 at 02:03:31PM +0900, Masami Hiramatsu wrote:
> (2014/06/19 11:08), Josh Poimboeuf wrote:
> > On Tue, Jun 17, 2014 at 11:04:36AM +0000, Masami Hiramatsu wrote:
> >> Hi,
> >>
> >> Here is the version 2 of the series of patches which introduces
> >> IPMODIFY flag for ftrace_ops to detect conflicts of ftrace users
> >> who can modify regs->ip in their handler.
> >> In this version, I fixed some bugs in previous version and
> >> added a patch which made kprobe itself free from IPMODIFY
> >> except for jprobe.
> >
> > Hi Masami,
> >
> > This seems better, but I still saw a few issues. I'm not sure if the
> > issues are specific to stap or kprobes. For the following issues I used
> > this command to set a kprobe:
> >
> > stap -v -e 'probe kernel.function("meminfo_proc_show") {printf("meminfo_proc_show called\n");}'
> >
> > With patches 1-2, when I used stap to kprobe the function after it was
> > already kpatched, stap didn't return an error and instead acted like it
> > succeeded (though the probe didn't work):
> >
> > $ sudo stap -v -e 'probe kernel.function("meminfo_proc_show") {printf("meminfo_proc_show called\n");}'
> > Pass 1: parsed user script and 112 library script(s) using 221516virt/41612res/6028shr/36228data kb, in 130usr/0sys/132real ms.
> > Pass 2: analyzed script: 1 probe(s), 0 function(s), 0 embed(s), 0 global(s) using 255840virt/77132res/7132shr/70552data kb, in 510usr/20sys/577real ms.
> > Pass 3: translated to C into "/tmp/stap3Qunba/stap_2690192fea570fb7bba78c7ed7fa1e0d_898_src.c" using 255840virt/77392res/7392shr/70552data kb, in 10usr/0sys/4real ms.
> > Pass 4: compiled C into "stap_2690192fea570fb7bba78c7ed7fa1e0d_898.ko" in 5020usr/640sys/7105real ms.
> > Pass 5: starting run.
> > (no error)
>
> Yeah, I guess you can see some warning messages in dmesg (by
> arm_kprobe) at this point.

Ah, you're right:

Jun 19 08:03:10 treble kernel: ------------[ cut here ]------------
Jun 19 08:03:10 treble kernel: WARNING: CPU: 1 PID: 17991 at kernel/kprobes.c:953 arm_kprobe+0xa7/0xe0()
Jun 19 08:03:10 treble kernel: Failed to init kprobe-ftrace (-16)
Jun 19 08:03:10 treble kernel: Modules linked in: stap_1faf9cc0ccf85c0d203c74ab6f604b_17991(OE) ...defra
Jun 19 08:03:10 treble kernel: videobuf2_vmalloc serio_raw microcode sdhci_pci bluetooth videobuf2_m...
Jun 19 08:03:10 treble kernel: CPU: 1 PID: 17991 Comm: stapio Tainted: G U W OE 3.15.0+ #1
Jun 19 08:03:10 treble kernel: Hardware name: LENOVO 2356BH8/2356BH8, BIOS G7ET63WW (2.05 ) 11/12/2012
Jun 19 08:03:10 treble kernel: 0000000000000000 000000009023f19e ffff8803dcce7d80 ffffffff816f31fd
Jun 19 08:03:10 treble kernel: ffff8803dcce7dc8 ffff8803dcce7db8 ffffffff8108914d ffffffffa08248e0
Jun 19 08:03:10 treble kernel: ffffffffa08248f0 0000000000000000 0000000000000000 0000000000000000
Jun 19 08:03:10 treble kernel: Call Trace:
Jun 19 08:03:10 treble kernel: [<ffffffff816f31fd>] dump_stack+0x45/0x56
Jun 19 08:03:10 treble kernel: [<ffffffff8108914d>] warn_slowpath_common+0x7d/0xa0
Jun 19 08:03:10 treble kernel: [<ffffffff810891cc>] warn_slowpath_fmt+0x5c/0x80
Jun 19 08:03:10 treble kernel: [<ffffffff816ff9d7>] arm_kprobe+0xa7/0xe0
Jun 19 08:03:10 treble kernel: [<ffffffff817007f7>] register_kprobe+0x557/0x5d0
Jun 19 08:03:10 treble kernel: [<ffffffff81254db0>] ? meminfo_proc_open+0x30/0x30
Jun 19 08:03:10 treble kernel: [<ffffffffa081fc95>] _stp_ctl_write_cmd+0x8d5/0x930 [stap_1faf9c...7991]
Jun 19 08:03:10 treble kernel: [<ffffffff811e5dba>] vfs_write+0xba/0x1e0
Jun 19 08:03:10 treble kernel: [<ffffffff811e6975>] SyS_write+0x55/0xd0
Jun 19 08:03:10 treble kernel: [<ffffffff81703179>] system_call_fastpath+0x16/0x1b
Jun 19 08:03:10 treble kernel: ---[ end trace 19615ed55413a30d ]---

Why not change arm_kprobe() to return an error?


>
> >
> > With all 3 patches, I expected kprobes and kpatch to be able to ftrace
> > the same function. But when I tried to kpatch the function after it was
> > kprobed, I got the following oops in stap:
> >
> > [ 455.842797] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020
> > [ 455.843388] IP: [<ffffffffa0833d1e>] _stp_module_notifier+0x1e/0x320 [stap_2690192fea570fb7bba78c7ed7fa1e_20189]
>
> Hmm, since this happens in _stp_module_notifier() which is a code in systemtap,
> I guess it's a systemtap problem.
>
> Could you test it with kprobe-tracer as below?
>
> # (do something kpatch related activation)
> # echo p meminfo_proc_show > /sys/kernel/debug/tracing/kprobe_events
> # echo 1 > /sys/kernel/debug/tracing/events/kprobe/enable

That worked, thanks.

>
> Thank you,
>
> > [ 455.844011] PGD 33f898067 PUD 422083067 PMD 0
> > [ 455.844638] Oops: 0000 [#1] SMP
> > [ 455.845255] Modules linked in: kpatch(OE+) stap_2690192fea570fb7bba78c7ed7fa1e_20189(OE) rfcomm ipt_MASQUERADE fuse ccm xt_CHECKSUM tun ip6t_rpfilter ip6t_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw bnep arc4 iwldvm mac80211 iTCO_wdt snd_hda_codec_hdmi iTCO_vendor_support x86_pkg_temp_thermal snd_hda_codec_realtek coretemp iwlwifi snd_hda_codec_generic kvm_intel snd_hda_intel kvm uvcvideo snd_hda_controller cfg80211 snd_hda_codec btusb videobuf2_vmalloc bluetooth videobuf2_memops snd_hwdep snd_seq nfsd videobuf2_core
> > [ 455.848272] v4l2_common videodev snd_seq_device e1000e microcode snd_pcm sdhci_pci media joydev sdhci serio_raw i2c_i801 pcspkr mmc_core thinkpad_acpi mei_me snd_timer auth_rpcgss mei snd lpc_ich ptp mfd_core shpchp nfs_acl lockd pps_core wmi tpm_tis soundcore tpm rfkill sunrpc dm_crypt i915 i2c_algo_bit drm_kms_helper drm crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel i2c_core video
> > [ 455.850887] CPU: 3 PID: 19857 Comm: insmod Tainted: G W OE 3.16.0-rc1+ #2
> > [ 455.851768] Hardware name: LENOVO 2356BH8/2356BH8, BIOS G7ET63WW (2.05 ) 11/12/2012
> > [ 455.852638] task: ffff880095d65460 ti: ffff88039d1d4000 task.ti: ffff88039d1d4000
> > [ 455.853456] RIP: 0010:[<ffffffffa0833d1e>] [<ffffffffa0833d1e>] _stp_module_notifier+0x1e/0x320 [stap_2690192fea570fb7bba78c7ed7fa1e_20189]
> > [ 455.854335] RSP: 0018:ffff88039d1d7ce0 EFLAGS: 00010246
> > [ 455.855212] RAX: ffffffffa0837f50 RBX: 0000000000000000 RCX: 00000000ffffffff
> > [ 455.856109] RDX: ffffffffa08400e0 RSI: 0000000000000001 RDI: ffffffffa0837f50
> > [ 455.856986] RBP: ffff88039d1d7d00 R08: 0000000000000000 R09: 0000000000000000
> > [ 455.857880] R10: 0000000000000001 R11: ffffc9001aed2d8f R12: ffffffff81c593e0
> > [ 455.858761] R13: 0000000000000001 R14: ffffffffa08400e0 R15: 0000000000000000
> > [ 455.859640] FS: 00007feac5f10740(0000) GS:ffff88043e2c0000(0000) knlGS:0000000000000000
> > [ 455.860523] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 455.861403] CR2: 0000000000000020 CR3: 00000004224e7000 CR4: 00000000001407e0
> > [ 455.862309] Stack:
> > [ 455.863236] 00000000fffffffd ffffffff81c593e0 0000000000000001 ffffffffa08400e0
> > [ 455.864163] ffff88039d1d7d38 ffffffff810b45bc ffffffff81c557c0 0000000000000000
> > [ 455.865103] 0000000000000001 ffffffffa08400e0 00000000ffffffff ffff88039d1d7d78
> > [ 455.866067] Call Trace:
> > [ 455.867100] [<ffffffff810b45bc>] notifier_call_chain+0x4c/0x70
> > [ 455.868202] [<ffffffff810b491d>] __blocking_notifier_call_chain+0x4d/0x70
> > [ 455.869155] [<ffffffff810b4956>] blocking_notifier_call_chain+0x16/0x20
> > [ 455.870111] [<ffffffff8110749c>] load_module+0x1aac/0x25f0
> > [ 455.871067] [<ffffffff811f6720>] ? kernel_read+0x50/0x80
> > [ 455.872022] [<ffffffff81108196>] SyS_finit_module+0xa6/0xd0
> > [ 455.872982] [<ffffffff817082e9>] system_call_fastpath+0x16/0x1b
> > [ 455.873941] Code: 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 83 fe 01 48 89 e5 41 56 49 89 d6 41 55 41 54 53 48 8b 9a d0 01 00 00 <8b> 43 20 0f 84 59 01 00 00 48 85 f6 75 44 85 c0 74 4a 83 e8 01
> > [ 455.876073] RIP [<ffffffffa0833d1e>] _stp_module_notifier+0x1e/0x320 [stap_2690192fea570fb7bba78c7ed7fa1e_20189]
> > [ 455.877146] RSP <ffff88039d1d7ce0>
> > [ 455.878243] CR2: 0000000000000020
> > [ 455.883707] ---[ end trace 388d9e62d4390d42 ]---
> >
>
>
> --
> Masami HIRAMATSU
> Software Platform Research Dept. Linux Technology Research Center
> Hitachi, Ltd., Yokohama Research Laboratory
> E-mail: [email protected]
>
>

--
Josh

2014-06-20 00:09:23

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH -tip v2 3/3] kprobes: Set IPMODIFY flag only if the probe can change regs->ip

On Thu, 19 Jun 2014 21:34:31 +0900, Namhyung Kim wrote:
> What about this?
>
> static int __ftrace_remove_filter_ip(...)
> {
> if (*ref == 1) {
> int ret = unregister_ftrace_function(ops);
> if (ret < 0)
> return ret;
>
> ftrace_set_filter_ip(ops, ip, 1, 0);
> }
>
> (*ref)--;
> return 0;
> }


Hmm.. I missed removing in *ref > 1 case. Here's a v2. :)


static int __ftrace_remove_filter_ip(...)
{
int ret;

if (*ref == 1) {
ret = unregister_ftrace_function(ops);
if (ret < 0)
return ret;

/* ignore failure here */
ftrace_set_filter_ip(ops, ip, 1, 0);
} else
ret = ftrace_set_filter_ip(ops, ip, 1, 0);
if (ret < 0)
return ret;
}

(*ref)--;
return 0;
}

2014-06-20 02:08:06

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH -tip v2 1/3] ftrace: Simplify ftrace_hash_disable/enable path in ftrace_hash_move

On Tue, 17 Jun 2014 11:04:42 +0000
Masami Hiramatsu <[email protected]> wrote:

> Simplify ftrace_hash_disable/enable path in ftrace_hash_move
> for hardening the process if the memory allocation failed.
>

Regardless of what we do with IPMODIFY, I pulled this into my 3.17
queue. You don't need to resend it with any new versions of the patches.

-- Steve

> Signed-off-by: Masami Hiramatsu <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> Cc: Josh Poimboeuf <[email protected]>
> Cc: Namhyung Kim <[email protected]>
> ---
> kernel/trace/ftrace.c | 33 +++++++++++----------------------
> 1 file changed, 11 insertions(+), 22 deletions(-)
>
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 5b372e3..d65719d 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1304,25 +1304,15 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
> struct ftrace_hash *new_hash;
> int size = src->count;
> int bits = 0;
> - int ret;
> int i;
>
> /*
> - * Remove the current set, update the hash and add
> - * them back.
> - */
> - ftrace_hash_rec_disable(ops, enable);
> -
> - /*
> * If the new source is empty, just free dst and assign it
> * the empty_hash.
> */
> if (!src->count) {
> - free_ftrace_hash_rcu(*dst);
> - rcu_assign_pointer(*dst, EMPTY_HASH);
> - /* still need to update the function records */
> - ret = 0;
> - goto out;
> + new_hash = EMPTY_HASH;
> + goto update;
> }
>
> /*
> @@ -1335,10 +1325,9 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
> if (bits > FTRACE_HASH_MAX_BITS)
> bits = FTRACE_HASH_MAX_BITS;
>
> - ret = -ENOMEM;
> new_hash = alloc_ftrace_hash(bits);
> if (!new_hash)
> - goto out;
> + return -ENOMEM;
>
> size = 1 << src->size_bits;
> for (i = 0; i < size; i++) {
> @@ -1349,20 +1338,20 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
> }
> }
>
> +update:
> + /*
> + * Remove the current set, update the hash and add
> + * them back.
> + */
> + ftrace_hash_rec_disable(ops, enable);
> +
> old_hash = *dst;
> rcu_assign_pointer(*dst, new_hash);
> free_ftrace_hash_rcu(old_hash);
>
> - ret = 0;
> - out:
> - /*
> - * Enable regardless of ret:
> - * On success, we enable the new hash.
> - * On failure, we re-enable the original hash.
> - */
> ftrace_hash_rec_enable(ops, enable);
>
> - return ret;
> + return 0;
> }
>
> /*
>

Subject: Re: Re: [PATCH -tip v2 1/3] ftrace: Simplify ftrace_hash_disable/enable path in ftrace_hash_move

(2014/06/20 11:08), Steven Rostedt wrote:
> On Tue, 17 Jun 2014 11:04:42 +0000
> Masami Hiramatsu <[email protected]> wrote:
>
>> Simplify ftrace_hash_disable/enable path in ftrace_hash_move
>> for hardening the process if the memory allocation failed.
>>
>
> Regardless of what we do with IPMODIFY, I pulled this into my 3.17
> queue. You don't need to resend it with any new versions of the patches.

OK, thanks Steve!

--
Masami HIRAMATSU
Software Platform Research Dept. Linux Technology Research Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

Subject: Re: [PATCH -tip v2 3/3] kprobes: Set IPMODIFY flag only if the probe can change regs->ip

(2014/06/19 21:34), Namhyung Kim wrote:
> Hi Masami,
>
> 2014-06-17 (화), 11:04 +0000, Masami Hiramatsu:
>> +static int __ftrace_add_filter_ip(struct ftrace_ops *ops, unsigned long ip,
>> + int *ref)
>> +{
>> + int ret;
>> +
>> + /* Try to set given ip to filter */
>> + ret = ftrace_set_filter_ip(ops, ip, 0, 0);
>> + if (ret >= 0) {
>
> Hmm.. this doesn't look comfortable. What not using usual pattern?
>
> if (ret < 0)
> return ret;
>
> This way we can reduce a indent level.

OK, I'll do so :)

>
>
>> + (*ref)++;
>> + if (*ref == 1) {
>> + ret = register_ftrace_function(ops);
>> + if (ret < 0) {
>> + /* Rollback refcounter and filter */
>> + (*ref)--;
>> + ftrace_set_filter_ip(ops, ip, 1, 0);
>> + }
>> + }
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static int __ftrace_remove_filter_ip(struct ftrace_ops *ops, unsigned long ip,
>> + int *ref)
>> +{
>> + int ret = 0;
>> +
>> + (*ref)--;
>> + if (*ref == 0)
>> + ret = unregister_ftrace_function(ops);
>> + if (ret >= 0)
>> + /* Try to remove given ip to filter */
>> + ret = ftrace_set_filter_ip(ops, ip, 1, 0);
>
> I think any failure at this time can be problematic. Since we already
> unregistered the ops but the refcount will get increased again, future
> attemp to register won't enable the ops anymore IMHO.

Agreed.

> I think it'd better just ignoring faiure of filter removal here. We'll
> miss a filter entry but it'll be usable anyway.
>
> What about this?

OK, I'll use your v2 code :)

Thank you for review!


--
Masami HIRAMATSU
Software Platform Research Dept. Linux Technology Research Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

2014-06-20 02:48:55

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH -tip v2 2/3] ftrace, kprobes: Support IPMODIFY flag to find IP modify conflict

On Tue, 17 Jun 2014 11:04:49 +0000
Masami Hiramatsu <[email protected]> wrote:

> Introduce FTRACE_OPS_FL_IPMODIFY to avoid conflict among
> ftrace users who may modify regs->ip to change the execution
> path. This also adds the flag to kprobe_ftrace_ops, since
> ftrace-based kprobes already modifies regs->ip. Thus, if
> another user modifies the regs->ip on the same function entry,
> one of them will be broken. So both should add IPMODIFY flag
> and make sure that ftrace_set_filter_ip() succeeds.
>
> Note that currently conflicts of IPMODIFY are detected on the
> filter hash. It does NOT care about the notrace hash. This means
> that if you set filter hash all functions and notrace(mask)
> some of them, the IPMODIFY flag will be applied to all
> functions.

Hmm, this worries me. I'm not sure I care about ignoring the notrace
hash.

>
> Changes in v2:
> - Add a description how __ftrace_hash_update_ipmodify() will
> handle the given hashes (NULL and EMPTY_HASH cases).
> - Clear FTRACE_OPS_FL_ENABLED after calling
> __unregister_ftrace_function() in error path.
>
> Signed-off-by: Masami Hiramatsu <[email protected]>
> Cc: Ananth N Mavinakayanahalli <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> Cc: Josh Poimboeuf <[email protected]>
> Cc: Namhyung Kim <[email protected]>
> ---
> Documentation/trace/ftrace.txt | 5 ++
> include/linux/ftrace.h | 10 ++-
> kernel/kprobes.c | 2 -
> kernel/trace/ftrace.c | 133 +++++++++++++++++++++++++++++++++++++++-
> 4 files changed, 145 insertions(+), 5 deletions(-)
>
> diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
> index 2479b2a..0fcad7d 100644
> --- a/Documentation/trace/ftrace.txt
> +++ b/Documentation/trace/ftrace.txt
> @@ -234,6 +234,11 @@ of ftrace. Here is a list of some of the key files:
> will be displayed on the same line as the function that
> is returning registers.
>
> + If the callback registered to be traced by a function with
> + the "ip modify" attribute (thus the regs->ip can be changed),
> + a 'I' will be displayed on the same line as the function that
> + can be overridden.
> +
> function_profile_enabled:
>
> When set it will enable all functions with either the function
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 404a686..5a43f8f 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -90,6 +90,9 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
> * INITIALIZED - The ftrace_ops has already been initialized (first use time
> * register_ftrace_function() is called, it will initialized the ops)
> * DELETED - The ops are being deleted, do not let them be registered again.
> + * IPMODIFY - The ops can modify IP register. This must be set with SAVE_REGS
> + * and if the other ops has been set this on same function, filter
> + * update must be failed.
> */
> enum {
> FTRACE_OPS_FL_ENABLED = 1 << 0,
> @@ -101,6 +104,7 @@ enum {
> FTRACE_OPS_FL_STUB = 1 << 6,
> FTRACE_OPS_FL_INITIALIZED = 1 << 7,
> FTRACE_OPS_FL_DELETED = 1 << 8,
> + FTRACE_OPS_FL_IPMODIFY = 1 << 9,
> };
>
> /*
> @@ -306,6 +310,7 @@ extern int ftrace_nr_registered_ops(void);
> * the dyn_ftrace descriptor represents.
> *
> * The second part is a mask:
> + * IPMODIFY - the record wants to change IP address.
> * ENABLED - the function is being traced
> * REGS - the record wants the function to save regs
> * REGS_EN - the function is set up to save regs.
> @@ -317,13 +322,14 @@ extern int ftrace_nr_registered_ops(void);
> * from tracing that function.
> */
> enum {
> + FTRACE_FL_IPMODIFY = (1UL << 28),
> FTRACE_FL_ENABLED = (1UL << 29),
> FTRACE_FL_REGS = (1UL << 30),
> FTRACE_FL_REGS_EN = (1UL << 31)
> };
>
> -#define FTRACE_FL_MASK (0x7UL << 29)
> -#define FTRACE_REF_MAX ((1UL << 29) - 1)
> +#define FTRACE_FL_MASK (0xfUL << 28)
> +#define FTRACE_REF_MAX ((1UL << 28) - 1)

Note, this is going to conflict with my queue for 3.17, as I'm starting
to write individual trampolines.

You can take a look at my ftrace/next branch, but be warned, it will
rebase.

>
> struct dyn_ftrace {
> unsigned long ip; /* address of mcount call-site */
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 3214289..e52d86f 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -915,7 +915,7 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
> #ifdef CONFIG_KPROBES_ON_FTRACE
> static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
> .func = kprobe_ftrace_handler,
> - .flags = FTRACE_OPS_FL_SAVE_REGS,
> + .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,

We probably should comment somewhere that once you set the IPMODIFY
flag (or do not set it), it should never change. An ops either has it
or it doesn't, it can't change its mind. Otherwise it could play havoc
with the update code below.

> };
> static int kprobe_ftrace_enabled;
>
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index d65719d..5e09f39 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1293,6 +1293,9 @@ ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash);
> static void
> ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash);
>
> +static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
> + struct ftrace_hash *new_hash);
> +
> static int
> ftrace_hash_move(struct ftrace_ops *ops, int enable,
> struct ftrace_hash **dst, struct ftrace_hash *src)
> @@ -1304,6 +1307,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
> struct ftrace_hash *new_hash;
> int size = src->count;
> int bits = 0;
> + int ret;
> int i;
>
> /*
> @@ -1339,6 +1343,16 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
> }
>
> update:
> + /* Before everything, make sure this can be applied */
> + if (enable) {
> + /* IPMODIFY should be updated only when filter_hash updating */
> + ret = ftrace_hash_ipmodify_update(ops, new_hash);
> + if (ret < 0) {
> + free_ftrace_hash(new_hash);
> + return ret;
> + }
> + }
> +
> /*
> * Remove the current set, update the hash and add
> * them back.
> @@ -1593,6 +1607,109 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
> __ftrace_hash_rec_update(ops, filter_hash, 1);
> }
>
> +/*
> + * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
> + * or no-needed to update, -EBUSY if it detects a conflict of the flag
> + * on a ftrace_rec.
> + * Note that old_hash and new_hash has below meanings
> + * - If the hash is NULL, it hits all recs
> + * - If the hash is EMPTY_HASH, it hits nothing
> + * - Others hits the recs which match the hash entries.

- Anything else hits the recs ...

> + */
> +static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
> + struct ftrace_hash *old_hash,
> + struct ftrace_hash *new_hash)
> +{
> + struct ftrace_page *pg;
> + struct dyn_ftrace *rec, *end = NULL;
> + int in_old, in_new;
> +
> + /* Only update if the ops has been registered */
> + if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
> + return 0;
> +
> + if (!(ops->flags & FTRACE_OPS_FL_SAVE_REGS) ||
> + !(ops->flags & FTRACE_OPS_FL_IPMODIFY))

Only check the IPMODIFY flag. In the future, I may allow this without
SAVE_REGS. That is, the function will get a regs pointer that has a
limited number of regs set. Maybe just ip.


> + return 0;
> +
> + /* Update rec->flags */
> + do_for_each_ftrace_rec(pg, rec) {
> + /* We need to update only differences of filter_hash */
> + in_old = !old_hash || ftrace_lookup_ip(old_hash, rec->ip);
> + in_new = !new_hash || ftrace_lookup_ip(new_hash, rec->ip);
> + if (in_old == in_new)
> + continue;
> +
> + if (in_new) {
> + /* New entries must ensure no others are using it */
> + if (rec->flags & FTRACE_FL_IPMODIFY)
> + goto rollback;
> + rec->flags |= FTRACE_FL_IPMODIFY;
> + } else /* Removed entry */
> + rec->flags &= ~FTRACE_FL_IPMODIFY;

Like I mentioned before. This code depends on oldhash also having
IPMODIFY set. I wonder if we can detect if it changed.

> + } while_for_each_ftrace_rec();
> +
> + return 0;
> +
> +rollback:
> + end = rec;
> +
> + /* Roll back what we did above */
> + do_for_each_ftrace_rec(pg, rec) {
> + if (rec == end)
> + goto err_out;
> +
> + in_old = !old_hash || ftrace_lookup_ip(old_hash, rec->ip);
> + in_new = !new_hash || ftrace_lookup_ip(new_hash, rec->ip);
> + if (in_old == in_new)
> + continue;
> +
> + if (in_new)
> + rec->flags &= ~FTRACE_FL_IPMODIFY;
> + else
> + rec->flags |= FTRACE_FL_IPMODIFY;
> + } while_for_each_ftrace_rec();
> +
> +err_out:
> + return -EBUSY;
> +}
> +
> +static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops)
> +{
> + struct ftrace_hash *hash = ops->filter_hash;
> +
> + if (ftrace_hash_empty(hash))
> + hash = NULL;
> +
> + return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash);
> +}
> +
> +/* Disabling always succeeds */
> +static void ftrace_hash_ipmodify_disable(struct ftrace_ops *ops)
> +{
> + struct ftrace_hash *hash = ops->filter_hash;
> +
> + if (ftrace_hash_empty(hash))
> + hash = NULL;
> +
> + __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH);
> +}
> +
> +static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
> + struct ftrace_hash *new_hash)
> +{
> + struct ftrace_hash *old_hash = ops->filter_hash;
> +
> + if (ftrace_hash_empty(old_hash))
> + old_hash = NULL;
> +
> + if (ftrace_hash_empty(new_hash))
> + new_hash = NULL;
> +
> + return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash);
> +}
> +
> +
> static void print_ip_ins(const char *fmt, unsigned char *p)
> {
> int i;
> @@ -2078,6 +2195,15 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
>
> ops->flags |= FTRACE_OPS_FL_ENABLED;
>
> + ret = ftrace_hash_ipmodify_enable(ops);
> + if (ret < 0) {
> + /* Rollback registration process */
> + __unregister_ftrace_function(ops);
> + ftrace_start_up--;
> + ops->flags &= ~FTRACE_OPS_FL_ENABLED;
> + return ret;
> + }
> +
> ftrace_hash_rec_enable(ops, 1);
>
> ftrace_startup_enable(command);
> @@ -2104,6 +2230,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
> */
> WARN_ON_ONCE(ftrace_start_up < 0);
>
> + /* Disabling ipmodify never be failed */

"Disabling ipmodify never fails"

> + ftrace_hash_ipmodify_disable(ops);
> ftrace_hash_rec_disable(ops, 1);
>
> if (!global_start_up)
> @@ -2641,9 +2769,10 @@ static int t_show(struct seq_file *m, void *v)
>
> seq_printf(m, "%ps", (void *)rec->ip);
> if (iter->flags & FTRACE_ITER_ENABLED)
> - seq_printf(m, " (%ld)%s",
> + seq_printf(m, " (%ld)%s%s",
> rec->flags & ~FTRACE_FL_MASK,
> - rec->flags & FTRACE_FL_REGS ? " R" : "");
> + rec->flags & FTRACE_FL_REGS ? " R" : "",
> + rec->flags & FTRACE_FL_IPMODIFY ? " I" : "");

Perhaps even add 'I' as we may allow setting it without regs.

-- Steve

> seq_printf(m, "\n");
>
> return 0;
>

Subject: Re: [PATCH -tip v2 0/3] ftrace, kprobes: Introduce IPMODIFY flag for ftrace_ops to detect conflicts

(2014/06/19 23:18), Josh Poimboeuf wrote:
> On Thu, Jun 19, 2014 at 02:03:31PM +0900, Masami Hiramatsu wrote:
>> (2014/06/19 11:08), Josh Poimboeuf wrote:
>>> On Tue, Jun 17, 2014 at 11:04:36AM +0000, Masami Hiramatsu wrote:
>>>> Hi,
>>>>
>>>> Here is the version 2 of the series of patches which introduces
>>>> IPMODIFY flag for ftrace_ops to detect conflicts of ftrace users
>>>> who can modify regs->ip in their handler.
>>>> In this version, I fixed some bugs in previous version and
>>>> added a patch which made kprobe itself free from IPMODIFY
>>>> except for jprobe.
>>>
>>> Hi Masami,
>>>
>>> This seems better, but I still saw a few issues. I'm not sure if the
>>> issues are specific to stap or kprobes. For the following issues I used
>>> this command to set a kprobe:
>>>
>>> stap -v -e 'probe kernel.function("meminfo_proc_show") {printf("meminfo_proc_show called\n");}'
>>>
>>> With patches 1-2, when I used stap to kprobe the function after it was
>>> already kpatched, stap didn't return an error and instead acted like it
>>> succeeded (though the probe didn't work):
>>>
>>> $ sudo stap -v -e 'probe kernel.function("meminfo_proc_show") {printf("meminfo_proc_show called\n");}'
>>> Pass 1: parsed user script and 112 library script(s) using 221516virt/41612res/6028shr/36228data kb, in 130usr/0sys/132real ms.
>>> Pass 2: analyzed script: 1 probe(s), 0 function(s), 0 embed(s), 0 global(s) using 255840virt/77132res/7132shr/70552data kb, in 510usr/20sys/577real ms.
>>> Pass 3: translated to C into "/tmp/stap3Qunba/stap_2690192fea570fb7bba78c7ed7fa1e0d_898_src.c" using 255840virt/77392res/7392shr/70552data kb, in 10usr/0sys/4real ms.
>>> Pass 4: compiled C into "stap_2690192fea570fb7bba78c7ed7fa1e0d_898.ko" in 5020usr/640sys/7105real ms.
>>> Pass 5: starting run.
>>> (no error)
>>
>> Yeah, I guess you can see some warning messages in dmesg (by
>> arm_kprobe) at this point.
>
> Ah, you're right:
>
> Jun 19 08:03:10 treble kernel: ------------[ cut here ]------------
> Jun 19 08:03:10 treble kernel: WARNING: CPU: 1 PID: 17991 at kernel/kprobes.c:953 arm_kprobe+0xa7/0xe0()
> Jun 19 08:03:10 treble kernel: Failed to init kprobe-ftrace (-16)
> Jun 19 08:03:10 treble kernel: Modules linked in: stap_1faf9cc0ccf85c0d203c74ab6f604b_17991(OE) ...defra
> Jun 19 08:03:10 treble kernel: videobuf2_vmalloc serio_raw microcode sdhci_pci bluetooth videobuf2_m...
> Jun 19 08:03:10 treble kernel: CPU: 1 PID: 17991 Comm: stapio Tainted: G U W OE 3.15.0+ #1
> Jun 19 08:03:10 treble kernel: Hardware name: LENOVO 2356BH8/2356BH8, BIOS G7ET63WW (2.05 ) 11/12/2012
> Jun 19 08:03:10 treble kernel: 0000000000000000 000000009023f19e ffff8803dcce7d80 ffffffff816f31fd
> Jun 19 08:03:10 treble kernel: ffff8803dcce7dc8 ffff8803dcce7db8 ffffffff8108914d ffffffffa08248e0
> Jun 19 08:03:10 treble kernel: ffffffffa08248f0 0000000000000000 0000000000000000 0000000000000000
> Jun 19 08:03:10 treble kernel: Call Trace:
> Jun 19 08:03:10 treble kernel: [<ffffffff816f31fd>] dump_stack+0x45/0x56
> Jun 19 08:03:10 treble kernel: [<ffffffff8108914d>] warn_slowpath_common+0x7d/0xa0
> Jun 19 08:03:10 treble kernel: [<ffffffff810891cc>] warn_slowpath_fmt+0x5c/0x80
> Jun 19 08:03:10 treble kernel: [<ffffffff816ff9d7>] arm_kprobe+0xa7/0xe0
> Jun 19 08:03:10 treble kernel: [<ffffffff817007f7>] register_kprobe+0x557/0x5d0
> Jun 19 08:03:10 treble kernel: [<ffffffff81254db0>] ? meminfo_proc_open+0x30/0x30
> Jun 19 08:03:10 treble kernel: [<ffffffffa081fc95>] _stp_ctl_write_cmd+0x8d5/0x930 [stap_1faf9c...7991]
> Jun 19 08:03:10 treble kernel: [<ffffffff811e5dba>] vfs_write+0xba/0x1e0
> Jun 19 08:03:10 treble kernel: [<ffffffff811e6975>] SyS_write+0x55/0xd0
> Jun 19 08:03:10 treble kernel: [<ffffffff81703179>] system_call_fastpath+0x16/0x1b
> Jun 19 08:03:10 treble kernel: ---[ end trace 19615ed55413a30d ]---
>
> Why not change arm_kprobe() to return an error?

Actually, arm_kprobe() is widely used at deeper point.
And the 3rd patch will solve the problem. So I decided just adding
IPMODIFY flag at the 2nd patch.

>>> With all 3 patches, I expected kprobes and kpatch to be able to ftrace
>>> the same function. But when I tried to kpatch the function after it was
>>> kprobed, I got the following oops in stap:
>>>
>>> [ 455.842797] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020
>>> [ 455.843388] IP: [<ffffffffa0833d1e>] _stp_module_notifier+0x1e/0x320 [stap_2690192fea570fb7bba78c7ed7fa1e_20189]
>>
>> Hmm, since this happens in _stp_module_notifier() which is a code in systemtap,
>> I guess it's a systemtap problem.
>>
>> Could you test it with kprobe-tracer as below?
>>
>> # (do something kpatch related activation)
>> # echo p meminfo_proc_show > /sys/kernel/debug/tracing/kprobe_events
>> # echo 1 > /sys/kernel/debug/tracing/events/kprobe/enable
>
> That worked, thanks.

Moreover, I couldn't reproduced the stap case on my fedora20.
Perhaps, Maybe a different version of systemtap I used.

Thank you,

--
Masami HIRAMATSU
Software Platform Research Dept. Linux Technology Research Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

Subject: Re: [PATCH -tip v2 2/3] ftrace, kprobes: Support IPMODIFY flag to find IP modify conflict

(2014/06/20 11:48), Steven Rostedt wrote:
> On Tue, 17 Jun 2014 11:04:49 +0000
> Masami Hiramatsu <[email protected]> wrote:
>
>> Introduce FTRACE_OPS_FL_IPMODIFY to avoid conflict among
>> ftrace users who may modify regs->ip to change the execution
>> path. This also adds the flag to kprobe_ftrace_ops, since
>> ftrace-based kprobes already modifies regs->ip. Thus, if
>> another user modifies the regs->ip on the same function entry,
>> one of them will be broken. So both should add IPMODIFY flag
>> and make sure that ftrace_set_filter_ip() succeeds.
>>
>> Note that currently conflicts of IPMODIFY are detected on the
>> filter hash. It does NOT care about the notrace hash. This means
>> that if you set filter hash all functions and notrace(mask)
>> some of them, the IPMODIFY flag will be applied to all
>> functions.
>
> Hmm, this worries me. I'm not sure I care about ignoring the notrace
> hash.

Since the notrace hash is not updated atomically (it is disabled ->
updated -> enabled), there could be a small window where only filter
hash is enabled. I considered that.


[...]
>> @@ -317,13 +322,14 @@ extern int ftrace_nr_registered_ops(void);
>> * from tracing that function.
>> */
>> enum {
>> + FTRACE_FL_IPMODIFY = (1UL << 28),
>> FTRACE_FL_ENABLED = (1UL << 29),
>> FTRACE_FL_REGS = (1UL << 30),
>> FTRACE_FL_REGS_EN = (1UL << 31)
>> };
>>
>> -#define FTRACE_FL_MASK (0x7UL << 29)
>> -#define FTRACE_REF_MAX ((1UL << 29) - 1)
>> +#define FTRACE_FL_MASK (0xfUL << 28)
>> +#define FTRACE_REF_MAX ((1UL << 28) - 1)
>
> Note, this is going to conflict with my queue for 3.17, as I'm starting
> to write individual trampolines.
>
> You can take a look at my ftrace/next branch, but be warned, it will
> rebase.

OK, anyway, at least this flag can easily be changed. :)


>> struct dyn_ftrace {
>> unsigned long ip; /* address of mcount call-site */
>> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
>> index 3214289..e52d86f 100644
>> --- a/kernel/kprobes.c
>> +++ b/kernel/kprobes.c
>> @@ -915,7 +915,7 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
>> #ifdef CONFIG_KPROBES_ON_FTRACE
>> static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
>> .func = kprobe_ftrace_handler,
>> - .flags = FTRACE_OPS_FL_SAVE_REGS,
>> + .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
>
> We probably should comment somewhere that once you set the IPMODIFY
> flag (or do not set it), it should never change. An ops either has it
> or it doesn't, it can't change its mind. Otherwise it could play havoc
> with the update code below.

Agreed. Hmm, it seems that we currently have no document about ftrace_ops.
This is a kind of undocumented kmodule API. At least we need to add a comment
on the header.

[...]
>> @@ -1593,6 +1607,109 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
>> __ftrace_hash_rec_update(ops, filter_hash, 1);
>> }
>>
>> +/*
>> + * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
>> + * or no-needed to update, -EBUSY if it detects a conflict of the flag
>> + * on a ftrace_rec.
>> + * Note that old_hash and new_hash has below meanings
>> + * - If the hash is NULL, it hits all recs
>> + * - If the hash is EMPTY_HASH, it hits nothing
>> + * - Others hits the recs which match the hash entries.
>
> - Anything else hits the recs ...

Oh, thanks!

>
>> + */
>> +static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
>> + struct ftrace_hash *old_hash,
>> + struct ftrace_hash *new_hash)
>> +{
>> + struct ftrace_page *pg;
>> + struct dyn_ftrace *rec, *end = NULL;
>> + int in_old, in_new;
>> +
>> + /* Only update if the ops has been registered */
>> + if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
>> + return 0;
>> +
>> + if (!(ops->flags & FTRACE_OPS_FL_SAVE_REGS) ||
>> + !(ops->flags & FTRACE_OPS_FL_IPMODIFY))
>
> Only check the IPMODIFY flag. In the future, I may allow this without
> SAVE_REGS. That is, the function will get a regs pointer that has a
> limited number of regs set. Maybe just ip.

Ah, I see. I'll change that.

>> + return 0;
>> +
>> + /* Update rec->flags */
>> + do_for_each_ftrace_rec(pg, rec) {
>> + /* We need to update only differences of filter_hash */
>> + in_old = !old_hash || ftrace_lookup_ip(old_hash, rec->ip);
>> + in_new = !new_hash || ftrace_lookup_ip(new_hash, rec->ip);
>> + if (in_old == in_new)
>> + continue;
>> +
>> + if (in_new) {
>> + /* New entries must ensure no others are using it */
>> + if (rec->flags & FTRACE_FL_IPMODIFY)
>> + goto rollback;
>> + rec->flags |= FTRACE_FL_IPMODIFY;
>> + } else /* Removed entry */
>> + rec->flags &= ~FTRACE_FL_IPMODIFY;
>
> Like I mentioned before. This code depends on oldhash also having
> IPMODIFY set. I wonder if we can detect if it changed.

Yeah, I guess that SAVE_REGS flag has same issue. And, of course
kprobe->flags has same. I think we just need comments or a document
for the ftrace_ops API.

[...]
>> @@ -2078,6 +2195,15 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
>>
>> ops->flags |= FTRACE_OPS_FL_ENABLED;
>>
>> + ret = ftrace_hash_ipmodify_enable(ops);
>> + if (ret < 0) {
>> + /* Rollback registration process */
>> + __unregister_ftrace_function(ops);
>> + ftrace_start_up--;
>> + ops->flags &= ~FTRACE_OPS_FL_ENABLED;
>> + return ret;
>> + }
>> +
>> ftrace_hash_rec_enable(ops, 1);
>>
>> ftrace_startup_enable(command);
>> @@ -2104,6 +2230,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
>> */
>> WARN_ON_ONCE(ftrace_start_up < 0);
>>
>> + /* Disabling ipmodify never be failed */
>
> "Disabling ipmodify never fails"

OK, I'll fix that.

>
>> + ftrace_hash_ipmodify_disable(ops);
>> ftrace_hash_rec_disable(ops, 1);
>>
>> if (!global_start_up)
>> @@ -2641,9 +2769,10 @@ static int t_show(struct seq_file *m, void *v)
>>
>> seq_printf(m, "%ps", (void *)rec->ip);
>> if (iter->flags & FTRACE_ITER_ENABLED)
>> - seq_printf(m, " (%ld)%s",
>> + seq_printf(m, " (%ld)%s%s",
>> rec->flags & ~FTRACE_FL_MASK,
>> - rec->flags & FTRACE_FL_REGS ? " R" : "");
>> + rec->flags & FTRACE_FL_REGS ? " R" : "",
>> + rec->flags & FTRACE_FL_IPMODIFY ? " I" : "");
>
> Perhaps even add 'I' as we may allow setting it without regs.

Yes, here we do so. :)

Thank you,

--
Masami HIRAMATSU
Software Platform Research Dept. Linux Technology Research Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]