Subject: [PATCH -tip v5 0/8] Kprobes/x86: Batch optimization support

Hi,

Here is batch (un)optimizing patch series version 5.
Ingo, I've splitted the biggest patch into 6 patches.

Since current kprobes jump optimization calls stop_machine() for each
probe, it can make a lot of latency noises when (un)registering a lot of
probes (~1000) at once. For example, on 4 core Xeon, 256 probes
optimization takes 770us in average (max 3.3ms).
This patch series introduces batch (un)optimization which modifies
code with just one stop_machine(), and it improves optimization time
to 90us in average (max 330us).

- Introduce text_poke_smp_batch() which modifies multiple
codes with one stop_machine().
- Limit how many probes can be (un)optimized at once.
- Introduce delayed unoptimization for batch processing.

text_poke_smp_batch() will also help Jump label to reduce
its delay coming from text_poke_smp().

Changes in v5:
- Split delayed unregistering patch into 3 cleanup patches
and 3 feature-adding patches.
- Add comments to explain code.
- Fix missing of text_mutex locks.

Changes in v4:
- Update to the latest tip tree.

Changes in v3:
- Set kp.addr = NULL according to Ananth's comment.

Changes in v2:
- Add kprobes selftest bugfix patch.
- Add some comments about locks according to Mathieu's comment.
- Allocate working buffers when initializing kprobes, instead of
using static arraies.
- Merge max optimization limit patch into batch optimizing patch.

Thank you,

---

Masami Hiramatsu (8):
kprobes: Use text_poke_smp_batch for unoptimizing
kprobes: Use text_poke_smp_batch for optimizing
x86: Introduce text_poke_smp_batch() for batch-code modifying
kprobes: Reuse unused kprobe
kprobes: Support delayed unoptimizing
[CLEANUP]kprobes: Separate kprobe optimizing code from optimizer
[CLEANUP]kprobes: Cleanup disabling and unregistering path
[CLEANUP] kprobes: Rename old_p to more appropriate name


arch/x86/include/asm/alternative.h | 7
arch/x86/kernel/alternative.c | 49 +++
arch/x86/kernel/kprobes.c | 113 +++++++
include/linux/kprobes.h | 4
kernel/kprobes.c | 565 ++++++++++++++++++++++++------------
5 files changed, 539 insertions(+), 199 deletions(-)

--
Masami HIRAMATSU
2nd Dept. Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: [email protected]


Subject: [PATCH -tip v5 1/8] [CLEANUP] kprobes: Rename old_p to more appropriate name

Rename irrelevant uses of "old_p" to more appropriate names.
Originally, "old_p" just meant "the old kprobe on given address"
but current code uses that name as "just another kprobe" or
something like that. This patch renames those pointer names
to more appropriate one for maintainability.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: [email protected]
---

kernel/kprobes.c | 93 +++++++++++++++++++++++++++---------------------------
1 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9737a76..8c3aa14 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -357,10 +357,10 @@ static inline int kprobe_aggrprobe(struct kprobe *p)
/*
* Keep all fields in the kprobe consistent
*/
-static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
+static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
{
- memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
- memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
+ memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
+ memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
}

#ifdef CONFIG_OPTPROBES
@@ -671,12 +671,12 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,

static void __kprobes __arm_kprobe(struct kprobe *p)
{
- struct kprobe *old_p;
+ struct kprobe *_p;

/* Check collision with other optimized kprobes */
- old_p = get_optimized_kprobe((unsigned long)p->addr);
- if (unlikely(old_p))
- unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */
+ _p = get_optimized_kprobe((unsigned long)p->addr);
+ if (unlikely(_p))
+ unoptimize_kprobe(_p); /* Fallback to unoptimized kprobe */

arch_arm_kprobe(p);
optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */
@@ -684,15 +684,15 @@ static void __kprobes __arm_kprobe(struct kprobe *p)

static void __kprobes __disarm_kprobe(struct kprobe *p)
{
- struct kprobe *old_p;
+ struct kprobe *_p;

unoptimize_kprobe(p); /* Try to unoptimize */
arch_disarm_kprobe(p);

/* If another kprobe was blocked, optimize it. */
- old_p = get_optimized_kprobe((unsigned long)p->addr);
- if (unlikely(old_p))
- optimize_kprobe(old_p);
+ _p = get_optimized_kprobe((unsigned long)p->addr);
+ if (unlikely(_p))
+ optimize_kprobe(_p);
}

#else /* !CONFIG_OPTPROBES */
@@ -993,18 +993,18 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
* This is the second or subsequent kprobe at the address - handle
* the intricacies
*/
-static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
+static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
struct kprobe *p)
{
int ret = 0;
- struct kprobe *ap = old_p;
+ struct kprobe *ap = orig_p;

- if (!kprobe_aggrprobe(old_p)) {
- /* If old_p is not an aggr_kprobe, create new aggr_kprobe. */
- ap = alloc_aggr_kprobe(old_p);
+ if (!kprobe_aggrprobe(orig_p)) {
+ /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
+ ap = alloc_aggr_kprobe(orig_p);
if (!ap)
return -ENOMEM;
- init_aggr_kprobe(ap, old_p);
+ init_aggr_kprobe(ap, orig_p);
}

if (kprobe_gone(ap)) {
@@ -1098,34 +1098,33 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
/* Check passed kprobe is valid and return kprobe in kprobe_table. */
static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
{
- struct kprobe *old_p, *list_p;
+ struct kprobe *ap, *list_p;

- old_p = get_kprobe(p->addr);
- if (unlikely(!old_p))
+ ap = get_kprobe(p->addr);
+ if (unlikely(!ap))
return NULL;

- if (p != old_p) {
- list_for_each_entry_rcu(list_p, &old_p->list, list)
+ if (p != ap) {
+ list_for_each_entry_rcu(list_p, &ap->list, list)
if (list_p == p)
/* kprobe p is a valid probe */
goto valid;
return NULL;
}
valid:
- return old_p;
+ return ap;
}

/* Return error if the kprobe is being re-registered */
static inline int check_kprobe_rereg(struct kprobe *p)
{
int ret = 0;
- struct kprobe *old_p;

mutex_lock(&kprobe_mutex);
- old_p = __get_valid_kprobe(p);
- if (old_p)
+ if (__get_valid_kprobe(p))
ret = -EINVAL;
mutex_unlock(&kprobe_mutex);
+
return ret;
}

@@ -1234,43 +1233,43 @@ EXPORT_SYMBOL_GPL(register_kprobe);
*/
static int __kprobes __unregister_kprobe_top(struct kprobe *p)
{
- struct kprobe *old_p, *list_p;
+ struct kprobe *ap, *list_p;

- old_p = __get_valid_kprobe(p);
- if (old_p == NULL)
+ ap = __get_valid_kprobe(p);
+ if (ap == NULL)
return -EINVAL;

- if (old_p == p ||
- (kprobe_aggrprobe(old_p) &&
- list_is_singular(&old_p->list))) {
+ if (ap == p ||
+ (kprobe_aggrprobe(ap) &&
+ list_is_singular(&ap->list))) {
/*
* Only probe on the hash list. Disarm only if kprobes are
* enabled and not gone - otherwise, the breakpoint would
* already have been removed. We save on flushing icache.
*/
- if (!kprobes_all_disarmed && !kprobe_disabled(old_p))
- disarm_kprobe(old_p);
- hlist_del_rcu(&old_p->hlist);
+ if (!kprobes_all_disarmed && !kprobe_disabled(ap))
+ disarm_kprobe(ap);
+ hlist_del_rcu(&ap->hlist);
} else {
if (p->break_handler && !kprobe_gone(p))
- old_p->break_handler = NULL;
+ ap->break_handler = NULL;
if (p->post_handler && !kprobe_gone(p)) {
- list_for_each_entry_rcu(list_p, &old_p->list, list) {
+ list_for_each_entry_rcu(list_p, &ap->list, list) {
if ((list_p != p) && (list_p->post_handler))
goto noclean;
}
- old_p->post_handler = NULL;
+ ap->post_handler = NULL;
}
noclean:
list_del_rcu(&p->list);
- if (!kprobe_disabled(old_p)) {
- try_to_disable_aggr_kprobe(old_p);
+ if (!kprobe_disabled(ap)) {
+ try_to_disable_aggr_kprobe(ap);
if (!kprobes_all_disarmed) {
- if (kprobe_disabled(old_p))
- disarm_kprobe(old_p);
+ if (kprobe_disabled(ap))
+ disarm_kprobe(ap);
else
/* Try to optimize this probe again */
- optimize_kprobe(old_p);
+ optimize_kprobe(ap);
}
}
}
@@ -1279,16 +1278,16 @@ noclean:

static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
{
- struct kprobe *old_p;
+ struct kprobe *ap;

if (list_empty(&p->list))
arch_remove_kprobe(p);
else if (list_is_singular(&p->list)) {
/* "p" is the last child of an aggr_kprobe */
- old_p = list_entry(p->list.next, struct kprobe, list);
+ ap = list_entry(p->list.next, struct kprobe, list);
list_del(&p->list);
- arch_remove_kprobe(old_p);
- free_aggr_kprobe(old_p);
+ arch_remove_kprobe(ap);
+ free_aggr_kprobe(ap);
}
}

Subject: [PATCH -tip v5 2/8] [CLEANUP]kprobes: Cleanup disabling and unregistering path

Merge disabling kprobe to unregistering kprobe function
and add comments for disabing/unregistring process.

Current unregistering code disables(disarms) kprobes after
checking target kprobe status. This patch changes it to
disabling kprobe first after that it changing the kprobe's
state. This allows to share probe disabling code between
disable_kprobe() and unregister_kprobe().

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: [email protected]
---

kernel/kprobes.c | 128 ++++++++++++++++++++++++++++++------------------------
1 files changed, 72 insertions(+), 56 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8c3aa14..ab99caf 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1039,23 +1039,6 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
return add_new_kprobe(ap, p);
}

-/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
-static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
-{
- struct kprobe *kp;
-
- list_for_each_entry_rcu(kp, &p->list, list) {
- if (!kprobe_disabled(kp))
- /*
- * There is an active probe on the list.
- * We can't disable aggr_kprobe.
- */
- return 0;
- }
- p->flags |= KPROBE_FLAG_DISABLED;
- return 1;
-}
-
static int __kprobes in_kprobes_functions(unsigned long addr)
{
struct kprobe_blackpoint *kb;
@@ -1228,6 +1211,47 @@ fail_with_jump_label:
}
EXPORT_SYMBOL_GPL(register_kprobe);

+/* Check if all probes on the aggrprobe are disabled */
+static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
+{
+ struct kprobe *kp;
+
+ list_for_each_entry_rcu(kp, &ap->list, list)
+ if (!kprobe_disabled(kp))
+ /*
+ * There is an active probe on the list.
+ * We can't disable this ap.
+ */
+ return 0;
+
+ return 1;
+}
+
+/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
+static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
+{
+ struct kprobe *orig_p;
+
+ /* Get an original kprobe for return */
+ orig_p = __get_valid_kprobe(p);
+ if (unlikely(orig_p == NULL))
+ return NULL;
+
+ if (!kprobe_disabled(p)) {
+ /* Disable probe if it is a child probe */
+ if (p != orig_p)
+ p->flags |= KPROBE_FLAG_DISABLED;
+
+ /* Try to disarm and disable this/parent probe */
+ if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
+ disarm_kprobe(orig_p);
+ orig_p->flags |= KPROBE_FLAG_DISABLED;
+ }
+ }
+
+ return orig_p;
+}
+
/*
* Unregister a kprobe without a scheduler synchronization.
*/
@@ -1235,22 +1259,26 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
{
struct kprobe *ap, *list_p;

- ap = __get_valid_kprobe(p);
+ /* Disable kprobe. This will disarm it if needed. */
+ ap = __disable_kprobe(p);
if (ap == NULL)
return -EINVAL;

- if (ap == p ||
- (kprobe_aggrprobe(ap) &&
- list_is_singular(&ap->list))) {
+ if (ap == p)
/*
- * Only probe on the hash list. Disarm only if kprobes are
- * enabled and not gone - otherwise, the breakpoint would
- * already have been removed. We save on flushing icache.
+ * This probe is an independent(and non-optimized) kprobe
+ * (not an aggrprobe). Remove from the hash list.
*/
- if (!kprobes_all_disarmed && !kprobe_disabled(ap))
- disarm_kprobe(ap);
- hlist_del_rcu(&ap->hlist);
- } else {
+ goto disarmed;
+
+ /* Following process expects this probe is an aggrprobe */
+ WARN_ON(!kprobe_aggrprobe(ap));
+
+ if (list_is_singular(&ap->list))
+ /* This probe is the last child of aggrprobe */
+ goto disarmed;
+ else {
+ /* If disabling probe has special handlers, update aggrprobe */
if (p->break_handler && !kprobe_gone(p))
ap->break_handler = NULL;
if (p->post_handler && !kprobe_gone(p)) {
@@ -1261,19 +1289,23 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
ap->post_handler = NULL;
}
noclean:
+ /*
+ * Remove from the aggrprobe: this path will do nothing in
+ * __unregister_kprobe_bottom().
+ */
list_del_rcu(&p->list);
- if (!kprobe_disabled(ap)) {
- try_to_disable_aggr_kprobe(ap);
- if (!kprobes_all_disarmed) {
- if (kprobe_disabled(ap))
- disarm_kprobe(ap);
- else
- /* Try to optimize this probe again */
- optimize_kprobe(ap);
- }
- }
+ if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
+ /*
+ * Try to optimize this probe again, because post
+ * handler may have been changed.
+ */
+ optimize_kprobe(ap);
}
return 0;
+
+disarmed:
+ hlist_del_rcu(&ap->hlist);
+ return 0;
}

static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
@@ -1606,29 +1638,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
int __kprobes disable_kprobe(struct kprobe *kp)
{
int ret = 0;
- struct kprobe *p;

mutex_lock(&kprobe_mutex);

- /* Check whether specified probe is valid. */
- p = __get_valid_kprobe(kp);
- if (unlikely(p == NULL)) {
+ /* Disable this kprobe */
+ if (__disable_kprobe(kp) == NULL)
ret = -EINVAL;
- goto out;
- }
-
- /* If the probe is already disabled (or gone), just return */
- if (kprobe_disabled(kp))
- goto out;

- kp->flags |= KPROBE_FLAG_DISABLED;
- if (p != kp)
- /* When kp != p, p is always enabled. */
- try_to_disable_aggr_kprobe(p);
-
- if (!kprobes_all_disarmed && kprobe_disabled(p))
- disarm_kprobe(p);
-out:
mutex_unlock(&kprobe_mutex);
return ret;
}

Subject: [PATCH -tip v5 3/8] [CLEANUP]kprobes: Separate kprobe optimizing code from optimizer

Separate kprobe optimizing code from optimizer, this
will make easy to introducing unoptimizing code in
optimizer.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: [email protected]
---

kernel/kprobes.c | 43 ++++++++++++++++++++++++++-----------------
1 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ab99caf..1f4f9b9 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -427,26 +427,14 @@ static void kprobe_optimizer(struct work_struct *work);
static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
#define OPTIMIZE_DELAY 5

-/* Kprobe jump optimizer */
-static __kprobes void kprobe_optimizer(struct work_struct *work)
+/*
+ * Optimize (replace a breakpoint with a jump) kprobes listed on
+ * optimizing_list.
+ */
+static __kprobes void do_optimize_kprobes(void)
{
struct optimized_kprobe *op, *tmp;

- /* Lock modules while optimizing kprobes */
- mutex_lock(&module_mutex);
- mutex_lock(&kprobe_mutex);
- if (kprobes_all_disarmed || !kprobes_allow_optimization)
- goto end;
-
- /*
- * Wait for quiesence period to ensure all running interrupts
- * are done. Because optprobe may modify multiple instructions
- * there is a chance that Nth instruction is interrupted. In that
- * case, running interrupt can return to 2nd-Nth byte of jump
- * instruction. This wait is for avoiding it.
- */
- synchronize_sched();
-
/*
* The optimization/unoptimization refers online_cpus via
* stop_machine() and cpu-hotplug modifies online_cpus.
@@ -467,6 +455,27 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
}
mutex_unlock(&text_mutex);
put_online_cpus();
+}
+
+/* Kprobe jump optimizer */
+static __kprobes void kprobe_optimizer(struct work_struct *work)
+{
+ /* Lock modules while optimizing kprobes */
+ mutex_lock(&module_mutex);
+ mutex_lock(&kprobe_mutex);
+ if (kprobes_all_disarmed || !kprobes_allow_optimization)
+ goto end;
+
+ /*
+ * Wait for quiesence period to ensure all running interrupts
+ * are done. Because optprobe may modify multiple instructions
+ * there is a chance that Nth instruction is interrupted. In that
+ * case, running interrupt can return to 2nd-Nth byte of jump
+ * instruction. This wait is for avoiding it.
+ */
+ synchronize_sched();
+
+ do_optimize_kprobes();
end:
mutex_unlock(&kprobe_mutex);
mutex_unlock(&module_mutex);

Subject: [PATCH -tip v5 4/8] kprobes: Support delayed unoptimizing

Unoptimization occurs when a probe is unregistered or disabled, and
is heavy because it recovers instructions by using stop_machine().
This patch delays unoptimization operations and unoptimize several
probes at once by using text_poke_smp_batch().
This can avoid unexpected system slowdown coming from stop_machine().

Changes in v5:
- Split this patch into several cleanup patches and this patch.
- Fix some text_mutex lock miss.
- Use bool instead of int for behavior flags.
- Add additional comment for (un)optimizing path.

Changes in v2:
- Use dynamic allocated buffers and params.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: [email protected]
---

arch/x86/kernel/kprobes.c | 4 +
kernel/kprobes.c | 310 ++++++++++++++++++++++++++++++++++-----------
2 files changed, 237 insertions(+), 77 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1cbd54c..da51dc8 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

+ /* This is possible if op is under delayed unoptimizing */
+ if (kprobe_disabled(&op->kp))
+ return;
+
preempt_disable();
if (kprobe_running()) {
kprobes_inc_nmissed_count(&op->kp);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1f4f9b9..ba4d4c0 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -354,6 +354,13 @@ static inline int kprobe_aggrprobe(struct kprobe *p)
return p->pre_handler == aggr_pre_handler;
}

+/* Return true(!0) if the kprobe is unused */
+static inline int kprobe_unused(struct kprobe *p)
+{
+ return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
+ list_empty(&p->list);
+}
+
/*
* Keep all fields in the kprobe consistent
*/
@@ -384,6 +391,17 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
}
}

+/* Free optimized instructions and optimized_kprobe */
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+ struct optimized_kprobe *op;
+
+ op = container_of(p, struct optimized_kprobe, kp);
+ arch_remove_optimized_kprobe(op);
+ arch_remove_kprobe(p);
+ kfree(op);
+}
+
/* Return true(!0) if the kprobe is ready for optimization. */
static inline int kprobe_optready(struct kprobe *p)
{
@@ -397,6 +415,33 @@ static inline int kprobe_optready(struct kprobe *p)
return 0;
}

+/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
+static inline int kprobe_disarmed(struct kprobe *p)
+{
+ struct optimized_kprobe *op;
+
+ /* If kprobe is not aggr/opt probe, just return kprobe is disabled */
+ if (!kprobe_aggrprobe(p))
+ return kprobe_disabled(p);
+
+ op = container_of(p, struct optimized_kprobe, kp);
+
+ return kprobe_disabled(p) && list_empty(&op->list);
+}
+
+/* Return true(!0) if the probe is queued on (un)optimizing lists */
+static int __kprobes kprobe_queued(struct kprobe *p)
+{
+ struct optimized_kprobe *op;
+
+ if (kprobe_aggrprobe(p)) {
+ op = container_of(p, struct optimized_kprobe, kp);
+ if (!list_empty(&op->list))
+ return 1;
+ }
+ return 0;
+}
+
/*
* Return an optimized kprobe whose optimizing code replaces
* instructions including addr (exclude breakpoint).
@@ -422,9 +467,11 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)

/* Optimization staging list, protected by kprobe_mutex */
static LIST_HEAD(optimizing_list);
+static LIST_HEAD(unoptimizing_list);

static void kprobe_optimizer(struct work_struct *work);
static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
+static DECLARE_COMPLETION(optimizer_comp);
#define OPTIMIZE_DELAY 5

/*
@@ -435,6 +482,11 @@ static __kprobes void do_optimize_kprobes(void)
{
struct optimized_kprobe *op, *tmp;

+ /* Optimization never be done when disarmed */
+ if (kprobes_all_disarmed || !kprobes_allow_optimization ||
+ list_empty(&optimizing_list))
+ return;
+
/*
* The optimization/unoptimization refers online_cpus via
* stop_machine() and cpu-hotplug modifies online_cpus.
@@ -457,17 +509,79 @@ static __kprobes void do_optimize_kprobes(void)
put_online_cpus();
}

+/*
+ * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
+ * if need) kprobes listed on unoptimizing_list.
+ */
+static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
+{
+ struct optimized_kprobe *op, *tmp;
+
+ /* Unoptimization must be done anytime */
+ if (list_empty(&unoptimizing_list))
+ return;
+
+ /* Ditto to do_optimize_kprobes */
+ get_online_cpus();
+ mutex_lock(&text_mutex);
+ list_for_each_entry_safe(op, tmp, &unoptimizing_list, list) {
+ /* Unoptimize kprobes */
+ arch_unoptimize_kprobe(op);
+ /* Disarm probes if marked disabled */
+ if (kprobe_disabled(&op->kp))
+ arch_disarm_kprobe(&op->kp);
+ if (kprobe_unused(&op->kp)) {
+ /*
+ * Remove unused probes from hash list. After waiting
+ * for synchronization, these probes are reclaimed.
+ * (reclaiming is done by do_free_cleaned_kprobes.)
+ */
+ hlist_del_rcu(&op->kp.hlist);
+ /* Move only unused probes on free_list */
+ list_move(&op->list, free_list);
+ } else
+ list_del_init(&op->list);
+ }
+ mutex_unlock(&text_mutex);
+ put_online_cpus();
+}
+
+/* Reclaim all kprobes on the free_list */
+static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list)
+{
+ struct optimized_kprobe *op, *tmp;
+
+ list_for_each_entry_safe(op, tmp, free_list, list) {
+ BUG_ON(!kprobe_unused(&op->kp));
+ list_del_init(&op->list);
+ free_aggr_kprobe(&op->kp);
+ }
+}
+
+/* Start optimizer after OPTIMIZE_DELAY passed */
+static __kprobes void kick_kprobe_optimizer(void)
+{
+ if (!delayed_work_pending(&optimizing_work))
+ schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+}
+
/* Kprobe jump optimizer */
static __kprobes void kprobe_optimizer(struct work_struct *work)
{
+ LIST_HEAD(free_list);
+
/* Lock modules while optimizing kprobes */
mutex_lock(&module_mutex);
mutex_lock(&kprobe_mutex);
- if (kprobes_all_disarmed || !kprobes_allow_optimization)
- goto end;

/*
- * Wait for quiesence period to ensure all running interrupts
+ * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
+ * kprobes before waiting for quiesence period.
+ */
+ do_unoptimize_kprobes(&free_list);
+
+ /*
+ * Step 2: Wait for quiesence period to ensure all running interrupts
* are done. Because optprobe may modify multiple instructions
* there is a chance that Nth instruction is interrupted. In that
* case, running interrupt can return to 2nd-Nth byte of jump
@@ -475,10 +589,24 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
*/
synchronize_sched();

+ /* Step 3: Optimize kprobes after quiesence period */
do_optimize_kprobes();
-end:
+
+ /* Step 4: Free cleaned kprobes after quiesence period */
+ do_free_cleaned_kprobes(&free_list);
+
mutex_unlock(&kprobe_mutex);
mutex_unlock(&module_mutex);
+
+ /* Wake up all waiters */
+ complete_all(&optimizer_comp);
+}
+
+/* Wait for completing optimization and unoptimization */
+static __kprobes void wait_for_kprobe_optimizer(void)
+{
+ if (delayed_work_pending(&optimizing_work))
+ wait_for_completion(&optimizer_comp);
}

/* Optimize kprobe if p is ready to be optimized */
@@ -504,27 +632,63 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
/* Check if it is already optimized. */
if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
return;
-
op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
- list_add(&op->list, &optimizing_list);
- if (!delayed_work_pending(&optimizing_work))
- schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+
+ if (!list_empty(&op->list))
+ /* This is under unoptimizing. Just dequeue the probe */
+ list_del_init(&op->list);
+ else {
+ list_add(&op->list, &optimizing_list);
+ kick_kprobe_optimizer();
+ }
+}
+
+/* Short cut to direct unoptimizing */
+static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+ get_online_cpus();
+ arch_unoptimize_kprobe(op);
+ put_online_cpus();
+ if (kprobe_disabled(&op->kp))
+ arch_disarm_kprobe(&op->kp);
}

/* Unoptimize a kprobe if p is optimized */
-static __kprobes void unoptimize_kprobe(struct kprobe *p)
+static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
{
struct optimized_kprobe *op;

- if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) {
- op = container_of(p, struct optimized_kprobe, kp);
- if (!list_empty(&op->list))
- /* Dequeue from the optimization queue */
+ if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
+ return; /* This is not an optprobe nor optimized */
+
+ op = container_of(p, struct optimized_kprobe, kp);
+ if (!kprobe_optimized(p)) {
+ /* Unoptimized or unoptimizing case */
+ if (force && !list_empty(&op->list)) {
+ /*
+ * Only if this is unoptimizing kprobe and forced,
+ * forcibly unoptimize it. (No need to unoptimize
+ * unoptimized kprobe again :)
+ */
list_del_init(&op->list);
- else
- /* Replace jump with break */
- arch_unoptimize_kprobe(op);
- op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+ force_unoptimize_kprobe(op);
+ }
+ return;
+ }
+
+ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+ if (!list_empty(&op->list)) {
+ /* Dequeue from the optimization queue */
+ list_del_init(&op->list);
+ return;
+ }
+ /* Optimized kprobe case */
+ if (force)
+ /* Forcibly update the code: this is a special case */
+ force_unoptimize_kprobe(op);
+ else {
+ list_add(&op->list, &unoptimizing_list);
+ kick_kprobe_optimizer();
}
}

@@ -534,12 +698,12 @@ static void __kprobes kill_optimized_kprobe(struct kprobe *p)
struct optimized_kprobe *op;

op = container_of(p, struct optimized_kprobe, kp);
- if (!list_empty(&op->list)) {
- /* Dequeue from the optimization queue */
+ if (!list_empty(&op->list))
+ /* Dequeue from the (un)optimization queue */
list_del_init(&op->list);
- op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- }
- /* Don't unoptimize, because the target code will be freed. */
+
+ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+ /* Don't touch the code, because it is already freed. */
arch_remove_optimized_kprobe(op);
}

@@ -552,16 +716,6 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
arch_prepare_optimized_kprobe(op);
}

-/* Free optimized instructions and optimized_kprobe */
-static __kprobes void free_aggr_kprobe(struct kprobe *p)
-{
- struct optimized_kprobe *op;
-
- op = container_of(p, struct optimized_kprobe, kp);
- arch_remove_optimized_kprobe(op);
- kfree(op);
-}
-
/* Allocate new optimized_kprobe and try to prepare optimized instructions */
static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
{
@@ -596,7 +750,8 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
op = container_of(ap, struct optimized_kprobe, kp);
if (!arch_prepared_optinsn(&op->optinsn)) {
/* If failed to setup optimizing, fallback to kprobe */
- free_aggr_kprobe(ap);
+ arch_remove_optimized_kprobe(op);
+ kfree(op);
return;
}

@@ -640,21 +795,16 @@ static void __kprobes unoptimize_all_kprobes(void)
return;

kprobes_allow_optimization = false;
- printk(KERN_INFO "Kprobes globally unoptimized\n");
- get_online_cpus(); /* For avoiding text_mutex deadlock */
- mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist) {
if (!kprobe_disabled(p))
- unoptimize_kprobe(p);
+ unoptimize_kprobe(p, false);
}
}
-
- mutex_unlock(&text_mutex);
- put_online_cpus();
- /* Allow all currently running kprobes to complete */
- synchronize_sched();
+ /* Wait for unoptimizing completion */
+ wait_for_kprobe_optimizer();
+ printk(KERN_INFO "Kprobes globally unoptimized\n");
}

int sysctl_kprobes_optimization;
@@ -678,6 +828,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
}
#endif /* CONFIG_SYSCTL */

+/* Put a breakpoint for a probe. Must be called with text_mutex locked */
static void __kprobes __arm_kprobe(struct kprobe *p)
{
struct kprobe *_p;
@@ -685,37 +836,45 @@ static void __kprobes __arm_kprobe(struct kprobe *p)
/* Check collision with other optimized kprobes */
_p = get_optimized_kprobe((unsigned long)p->addr);
if (unlikely(_p))
- unoptimize_kprobe(_p); /* Fallback to unoptimized kprobe */
+ /* Fallback to unoptimized kprobe */
+ unoptimize_kprobe(_p, true);

arch_arm_kprobe(p);
optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */
}

-static void __kprobes __disarm_kprobe(struct kprobe *p)
+/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
+static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
{
struct kprobe *_p;

- unoptimize_kprobe(p); /* Try to unoptimize */
- arch_disarm_kprobe(p);
+ unoptimize_kprobe(p, false); /* Try to unoptimize */

- /* If another kprobe was blocked, optimize it. */
- _p = get_optimized_kprobe((unsigned long)p->addr);
- if (unlikely(_p))
- optimize_kprobe(_p);
+ if (!kprobe_queued(p)) {
+ arch_disarm_kprobe(p);
+ /* If another kprobe was blocked, optimize it. */
+ _p = get_optimized_kprobe((unsigned long)p->addr);
+ if (unlikely(_p) && reopt)
+ optimize_kprobe(_p);
+ }
+ /* TODO: reoptimize others after unoptimized this probe */
}

#else /* !CONFIG_OPTPROBES */

#define optimize_kprobe(p) do {} while (0)
-#define unoptimize_kprobe(p) do {} while (0)
+#define unoptimize_kprobe(p, f) do {} while (0)
#define kill_optimized_kprobe(p) do {} while (0)
#define prepare_optimized_kprobe(p) do {} while (0)
#define try_to_optimize_kprobe(p) do {} while (0)
#define __arm_kprobe(p) arch_arm_kprobe(p)
-#define __disarm_kprobe(p) arch_disarm_kprobe(p)
+#define __disarm_kprobe(p, o) arch_disarm_kprobe(p)
+#define kprobe_disarmed(p) kprobe_disabled(p)
+#define wait_for_kprobe_optimizer() do {} while (0)

static __kprobes void free_aggr_kprobe(struct kprobe *p)
{
+ arch_remove_kprobe(p);
kfree(p);
}

@@ -741,11 +900,10 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
/* Disarm a kprobe with text_mutex */
static void __kprobes disarm_kprobe(struct kprobe *kp)
{
- get_online_cpus(); /* For avoiding text_mutex deadlock */
+ /* Ditto */
mutex_lock(&text_mutex);
- __disarm_kprobe(kp);
+ __disarm_kprobe(kp, true);
mutex_unlock(&text_mutex);
- put_online_cpus();
}

/*
@@ -951,7 +1109,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
BUG_ON(kprobe_gone(ap) || kprobe_gone(p));

if (p->break_handler || p->post_handler)
- unoptimize_kprobe(ap); /* Fall back to normal kprobe */
+ unoptimize_kprobe(ap, true); /* Fall back to normal kprobe */

if (p->break_handler) {
if (ap->break_handler)
@@ -1014,7 +1172,9 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
if (!ap)
return -ENOMEM;
init_aggr_kprobe(ap, orig_p);
- }
+ } else if (kprobe_unused(ap))
+ /* Busy to die */
+ return -EBUSY;

if (kprobe_gone(ap)) {
/*
@@ -1283,8 +1443,11 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
/* Following process expects this probe is an aggrprobe */
WARN_ON(!kprobe_aggrprobe(ap));

- if (list_is_singular(&ap->list))
- /* This probe is the last child of aggrprobe */
+ if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
+ /*
+ * !disarmed could be happen if the probe is under delayed
+ * unoptimizing.
+ */
goto disarmed;
else {
/* If disabling probe has special handlers, update aggrprobe */
@@ -1313,6 +1476,7 @@ noclean:
return 0;

disarmed:
+ BUG_ON(!kprobe_disarmed(ap));
hlist_del_rcu(&ap->hlist);
return 0;
}
@@ -1322,14 +1486,15 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
struct kprobe *ap;

if (list_empty(&p->list))
+ /* This is an independent kprobe */
arch_remove_kprobe(p);
else if (list_is_singular(&p->list)) {
- /* "p" is the last child of an aggr_kprobe */
+ /* This is the last child of an aggrprobe */
ap = list_entry(p->list.next, struct kprobe, list);
list_del(&p->list);
- arch_remove_kprobe(ap);
free_aggr_kprobe(ap);
}
+ /* Otherwise, do nothing. */
}

int __kprobes register_kprobes(struct kprobe **kps, int num)
@@ -1951,36 +2116,27 @@ static void __kprobes disarm_all_kprobes(void)
mutex_lock(&kprobe_mutex);

/* If kprobes are already disarmed, just return */
- if (kprobes_all_disarmed)
- goto already_disabled;
+ if (kprobes_all_disarmed) {
+ mutex_unlock(&kprobe_mutex);
+ return;
+ }

kprobes_all_disarmed = true;
printk(KERN_INFO "Kprobes globally disabled\n");

- /*
- * Here we call get_online_cpus() for avoiding text_mutex deadlock,
- * because disarming may also unoptimize kprobes.
- */
- get_online_cpus();
mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist) {
if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
- __disarm_kprobe(p);
+ __disarm_kprobe(p, false);
}
}
-
mutex_unlock(&text_mutex);
- put_online_cpus();
mutex_unlock(&kprobe_mutex);
- /* Allow all currently running kprobes to complete */
- synchronize_sched();
- return;

-already_disabled:
- mutex_unlock(&kprobe_mutex);
- return;
+ /* Wait for disarming all kprobes by optimizer */
+ wait_for_kprobe_optimizer();
}

/*

Subject: [PATCH -tip v5 5/8] kprobes: Reuse unused kprobe

Reuse unused (waiting for unoptimizing and no user handler) kprobe
on given address instead of returning -EBUSY for registering a
new kprobe.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: [email protected]
---

kernel/kprobes.c | 32 ++++++++++++++++++++++++++++++--
1 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ba4d4c0..134754d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -692,6 +692,27 @@ static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
}
}

+/* Cancel unoptimizing for reusing */
+static void reuse_unused_kprobe(struct kprobe *ap)
+{
+ struct optimized_kprobe *op;
+
+ BUG_ON(!kprobe_unused(ap));
+ /*
+ * Unused kprobe MUST be on the way of delayed unoptimizing (means
+ * there is still a relative jump) and disabled.
+ */
+ op = container_of(ap, struct optimized_kprobe, kp);
+ if (unlikely(list_empty(&op->list)))
+ printk(KERN_WARNING "Warning: found a stray unused "
+ "aggrprobe@%p\n", ap->addr);
+ /* Enable the probe again */
+ ap->flags &= ~KPROBE_FLAG_DISABLED;
+ /* Optimize it again (remove from op->list) */
+ BUG_ON(!kprobe_optready(ap));
+ optimize_kprobe(ap);
+}
+
/* Remove optimized instructions */
static void __kprobes kill_optimized_kprobe(struct kprobe *p)
{
@@ -872,6 +893,13 @@ static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
#define kprobe_disarmed(p) kprobe_disabled(p)
#define wait_for_kprobe_optimizer() do {} while (0)

+/* There should be no unused kprobes can be reused without optimization */
+static void reuse_unused_kprobe(struct kprobe *ap)
+{
+ printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
+ BUG_ON(kprobe_unused(ap));
+}
+
static __kprobes void free_aggr_kprobe(struct kprobe *p)
{
arch_remove_kprobe(p);
@@ -1173,8 +1201,8 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
return -ENOMEM;
init_aggr_kprobe(ap, orig_p);
} else if (kprobe_unused(ap))
- /* Busy to die */
- return -EBUSY;
+ /* This probe is going to die. Rescue it */
+ reuse_unused_kprobe(ap);

if (kprobe_gone(ap)) {
/*

Subject: [PATCH -tip v5 6/8] x86: Introduce text_poke_smp_batch() for batch-code modifying

Introduce text_poke_smp_batch(). This function modifies several
text areas with one stop_machine() on SMP. Because calling
stop_machine() is heavy task, it is better to aggregate text_poke
requests.

Frederic, I've talked with Rusty about this interface, and
he would not like to expand stop_machine() interface, since
it is not for generic use.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: [email protected]
Cc: Jan Beulich <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: [email protected]
Cc: Rusty Russell <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
---

arch/x86/include/asm/alternative.h | 7 +++++
arch/x86/kernel/alternative.c | 49 +++++++++++++++++++++++++++++-------
2 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 76561d2..4a2adaa 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -180,8 +180,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
* On the local CPU you need to be protected again NMI or MCE handlers seeing an
* inconsistent instruction while you patch.
*/
+struct text_poke_param {
+ void *addr;
+ const void *opcode;
+ size_t len;
+};
+
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
+extern void text_poke_smp_batch(struct text_poke_param *params, int n);

#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
#define IDEAL_NOP_SIZE_5 5
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5079f24..553d0b0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -591,17 +591,21 @@ static atomic_t stop_machine_first;
static int wrote_text;

struct text_poke_params {
- void *addr;
- const void *opcode;
- size_t len;
+ struct text_poke_param *params;
+ int nparams;
};

static int __kprobes stop_machine_text_poke(void *data)
{
struct text_poke_params *tpp = data;
+ struct text_poke_param *p;
+ int i;

if (atomic_dec_and_test(&stop_machine_first)) {
- text_poke(tpp->addr, tpp->opcode, tpp->len);
+ for (i = 0; i < tpp->nparams; i++) {
+ p = &tpp->params[i];
+ text_poke(p->addr, p->opcode, p->len);
+ }
smp_wmb(); /* Make sure other cpus see that this has run */
wrote_text = 1;
} else {
@@ -610,8 +614,12 @@ static int __kprobes stop_machine_text_poke(void *data)
smp_mb(); /* Load wrote_text before following execution */
}

- flush_icache_range((unsigned long)tpp->addr,
- (unsigned long)tpp->addr + tpp->len);
+ for (i = 0; i < tpp->nparams; i++) {
+ p = &tpp->params[i];
+ flush_icache_range((unsigned long)p->addr,
+ (unsigned long)p->addr + p->len);
+ }
+
return 0;
}

@@ -631,10 +639,13 @@ static int __kprobes stop_machine_text_poke(void *data)
void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
{
struct text_poke_params tpp;
+ struct text_poke_param p;

- tpp.addr = addr;
- tpp.opcode = opcode;
- tpp.len = len;
+ p.addr = addr;
+ p.opcode = opcode;
+ p.len = len;
+ tpp.params = &p;
+ tpp.nparams = 1;
atomic_set(&stop_machine_first, 1);
wrote_text = 0;
/* Use __stop_machine() because the caller already got online_cpus. */
@@ -642,6 +653,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
return addr;
}

+/**
+ * text_poke_smp_batch - Update instructions on a live kernel on SMP
+ * @params: an array of text_poke parameters
+ * @n: the number of elements in params.
+ *
+ * Modify multi-byte instruction by using stop_machine() on SMP. Since the
+ * stop_machine() is heavy task, it is better to aggregate text_poke requests
+ * and do it once if possible.
+ *
+ * Note: Must be called under get_online_cpus() and text_mutex.
+ */
+void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
+{
+ struct text_poke_params tpp = {.params = params, .nparams = n};
+
+ atomic_set(&stop_machine_first, 1);
+ wrote_text = 0;
+ stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+}
+
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)

#ifdef CONFIG_X86_64

Subject: [PATCH -tip v5 8/8] kprobes: Use text_poke_smp_batch for unoptimizing

Use text_poke_smp_batch() on unoptimization path for reducing
the number of stop_machine() issues. If the number of unoptimizing
probes is more than MAX_OPTIMIZE_PROBES(=256), kprobes unoptimizes
first MAX_OPTIMIZE_PROBES probes and kicks optimizer for remaining
probes.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: [email protected]
Cc: Mathieu Desnoyers <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: [email protected]
Cc: Rusty Russell <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
---

arch/x86/kernel/kprobes.c | 40 ++++++++++++++++++++++++++++++++++++++++
include/linux/kprobes.h | 2 ++
kernel/kprobes.c | 10 ++++------
3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 25a8af7..5940282 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1457,6 +1457,46 @@ void __kprobes arch_optimize_kprobes(struct list_head *oplist)
text_poke_smp_batch(jump_poke_params, c);
}

+static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
+ u8 *insn_buf,
+ struct optimized_kprobe *op)
+{
+ /* Set int3 to first byte for kprobes */
+ insn_buf[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+ tprm->addr = op->kp.addr;
+ tprm->opcode = insn_buf;
+ tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Recover original instructions and breakpoints from relative jumps.
+ * Caller must call with locking kprobe_mutex.
+ */
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+ struct list_head *done_list)
+{
+ struct optimized_kprobe *op, *tmp;
+ int c = 0;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ /* Setup param */
+ setup_unoptimize_kprobe(&jump_poke_params[c],
+ jump_poke_bufs[c].buf, op);
+ list_move(&op->list, done_list);
+ if (++c >= MAX_OPTIMIZE_PROBES)
+ break;
+ }
+
+ /*
+ * text_poke_smp doesn't support NMI/MCE code modifying.
+ * However, since kprobes itself also doesn't support NMI/MCE
+ * code probing, it's not a problem.
+ */
+ text_poke_smp_batch(jump_poke_params, c);
+}
+
/* Replace a relative jump with a breakpoint (int3). */
void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
{
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index fe157ba..b78edb5 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -276,6 +276,8 @@ extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
extern void arch_optimize_kprobes(struct list_head *oplist);
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+ struct list_head *done_list);
extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
extern kprobe_opcode_t *get_optinsn_slot(void);
extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 531e101..7663e5d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -517,9 +517,9 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
/* Ditto to do_optimize_kprobes */
get_online_cpus();
mutex_lock(&text_mutex);
- list_for_each_entry_safe(op, tmp, &unoptimizing_list, list) {
- /* Unoptimize kprobes */
- arch_unoptimize_kprobe(op);
+ arch_unoptimize_kprobes(&unoptimizing_list, free_list);
+ /* Loop free_list for disarming */
+ list_for_each_entry_safe(op, tmp, free_list, list) {
/* Disarm probes if marked disabled */
if (kprobe_disabled(&op->kp))
arch_disarm_kprobe(&op->kp);
@@ -530,8 +530,6 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
* (reclaiming is done by do_free_cleaned_kprobes.)
*/
hlist_del_rcu(&op->kp.hlist);
- /* Move only unused probes on free_list */
- list_move(&op->list, free_list);
} else
list_del_init(&op->list);
}
@@ -592,7 +590,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
mutex_unlock(&module_mutex);

/* Step 5: Kick optimizer again if needed */
- if (!list_empty(&optimizing_list))
+ if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
kick_kprobe_optimizer();
else
/* Wake up all waiters */

Subject: [PATCH -tip v5 7/8] kprobes: Use text_poke_smp_batch for optimizing

Use text_poke_smp_batch() in optimization path for reducing
the number of stop_machine() issues. If the number of optimizing
probes is more than MAX_OPTIMIZE_PROBES(=256), kprobes optimizes
first MAX_OPTIMIZE_PROBES probes and kicks optimizer for remaining
probes.

Changes in v5:
- Use kick_kprobe_optimizer() instead of directly calling
schedule_delayed_work().
- Rescheduling optimizer outside of kprobe mutex lock.

Changes in v2:
- Allocate code buffer and parameters in arch_init_kprobes()
instead of using static arraies.
- Merge previous max optimization limit patch into this patch.
So, this patch introduces upper limit of optimization at
once.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: [email protected]
Cc: Peter Zijlstra <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: [email protected]
Cc: Rusty Russell <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
---

arch/x86/kernel/kprobes.c | 69 ++++++++++++++++++++++++++++++++++++++++-----
include/linux/kprobes.h | 2 +
kernel/kprobes.c | 17 +++++------
3 files changed, 69 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index da51dc8..25a8af7 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1405,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
return 0;
}

-/* Replace a breakpoint (int3) with a relative jump. */
-int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
+#define MAX_OPTIMIZE_PROBES 256
+static struct text_poke_param *jump_poke_params;
+static struct jump_poke_buffer {
+ u8 buf[RELATIVEJUMP_SIZE];
+} *jump_poke_bufs;
+
+static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
+ u8 *insn_buf,
+ struct optimized_kprobe *op)
{
- unsigned char jmp_code[RELATIVEJUMP_SIZE];
s32 rel = (s32)((long)op->optinsn.insn -
((long)op->kp.addr + RELATIVEJUMP_SIZE));

@@ -1416,16 +1422,39 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
RELATIVE_ADDR_SIZE);

- jmp_code[0] = RELATIVEJUMP_OPCODE;
- *(s32 *)(&jmp_code[1]) = rel;
+ insn_buf[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&insn_buf[1]) = rel;
+
+ tprm->addr = op->kp.addr;
+ tprm->opcode = insn_buf;
+ tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Replace breakpoints (int3) with relative jumps.
+ * Caller must call with locking kprobe_mutex and text_mutex.
+ */
+void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+{
+ struct optimized_kprobe *op, *tmp;
+ int c = 0;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ WARN_ON(kprobe_disabled(&op->kp));
+ /* Setup param */
+ setup_optimize_kprobe(&jump_poke_params[c],
+ jump_poke_bufs[c].buf, op);
+ list_del_init(&op->list);
+ if (++c >= MAX_OPTIMIZE_PROBES)
+ break;
+ }

/*
* text_poke_smp doesn't support NMI/MCE code modifying.
* However, since kprobes itself also doesn't support NMI/MCE
* code probing, it's not a problem.
*/
- text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
- return 0;
+ text_poke_smp_batch(jump_poke_params, c);
}

/* Replace a relative jump with a breakpoint (int3). */
@@ -1457,11 +1486,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p,
}
return 0;
}
+
+static int __kprobes init_poke_params(void)
+{
+ /* Allocate code buffer and parameter array */
+ jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
+ MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+ if (!jump_poke_bufs)
+ return -ENOMEM;
+
+ jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
+ MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+ if (!jump_poke_params) {
+ kfree(jump_poke_bufs);
+ jump_poke_bufs = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+#else /* !CONFIG_OPTPROBES */
+static int __kprobes init_poke_params(void)
+{
+ return 0;
+}
#endif

int __init arch_init_kprobes(void)
{
- return 0;
+ return init_poke_params();
}

int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index e7d1b2e..fe157ba 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -275,7 +275,7 @@ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
-extern int arch_optimize_kprobe(struct optimized_kprobe *op);
+extern void arch_optimize_kprobes(struct list_head *oplist);
extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
extern kprobe_opcode_t *get_optinsn_slot(void);
extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 134754d..531e101 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -480,8 +480,6 @@ static DECLARE_COMPLETION(optimizer_comp);
*/
static __kprobes void do_optimize_kprobes(void)
{
- struct optimized_kprobe *op, *tmp;
-
/* Optimization never be done when disarmed */
if (kprobes_all_disarmed || !kprobes_allow_optimization ||
list_empty(&optimizing_list))
@@ -499,12 +497,7 @@ static __kprobes void do_optimize_kprobes(void)
*/
get_online_cpus();
mutex_lock(&text_mutex);
- list_for_each_entry_safe(op, tmp, &optimizing_list, list) {
- WARN_ON(kprobe_disabled(&op->kp));
- if (arch_optimize_kprobe(op) < 0)
- op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- list_del_init(&op->list);
- }
+ arch_optimize_kprobes(&optimizing_list);
mutex_unlock(&text_mutex);
put_online_cpus();
}
@@ -598,8 +591,12 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
mutex_unlock(&kprobe_mutex);
mutex_unlock(&module_mutex);

- /* Wake up all waiters */
- complete_all(&optimizer_comp);
+ /* Step 5: Kick optimizer again if needed */
+ if (!list_empty(&optimizing_list))
+ kick_kprobe_optimizer();
+ else
+ /* Wake up all waiters */
+ complete_all(&optimizer_comp);
}

/* Wait for completing optimization and unoptimization */

2010-12-03 14:45:03

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH -tip v5 6/8] x86: Introduce text_poke_smp_batch() for batch-code modifying

On Fri, 2010-12-03 at 18:54 +0900, Masami Hiramatsu wrote:
> Introduce text_poke_smp_batch(). This function modifies several
> text areas with one stop_machine() on SMP. Because calling
> stop_machine() is heavy task, it is better to aggregate text_poke
> requests.
>
> Frederic, I've talked with Rusty about this interface, and
> he would not like to expand stop_machine() interface, since
> it is not for generic use.
>
> Signed-off-by: Masami Hiramatsu <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: "H. Peter Anvin" <[email protected]>
> Cc: [email protected]
> Cc: Jan Beulich <[email protected]>

Looks good,

Acked-by: Steven Rostedt <[email protected]>

-- Steve

> Cc: Jason Baron <[email protected]>
> Cc: [email protected]
> Cc: Rusty Russell <[email protected]>
> Cc: Frederic Weisbecker <[email protected]>
> ---

Subject: [tip:perf/core] kprobes: Rename old_p to more appropriate name

Commit-ID: 6d8e40a85ef72a0514ebd00748eb18cab432b200
Gitweb: http://git.kernel.org/tip/6d8e40a85ef72a0514ebd00748eb18cab432b200
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Fri, 3 Dec 2010 18:53:50 +0900
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 6 Dec 2010 17:59:29 +0100

kprobes: Rename old_p to more appropriate name

Rename irrelevant uses of "old_p" to more appropriate names.
Originally, "old_p" just meant "the old kprobe on given address"
but current code uses that name as "just another kprobe" or
something like that. This patch renames those pointer names
to more appropriate one for maintainability.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Rusty Russell <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: Mathieu Desnoyers <[email protected]>
Cc: [email protected]
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/kprobes.c | 93 ++++++++++++++++++++++++++---------------------------
1 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9737a76..8c3aa14 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -357,10 +357,10 @@ static inline int kprobe_aggrprobe(struct kprobe *p)
/*
* Keep all fields in the kprobe consistent
*/
-static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
+static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
{
- memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
- memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
+ memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
+ memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
}

#ifdef CONFIG_OPTPROBES
@@ -671,12 +671,12 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,

static void __kprobes __arm_kprobe(struct kprobe *p)
{
- struct kprobe *old_p;
+ struct kprobe *_p;

/* Check collision with other optimized kprobes */
- old_p = get_optimized_kprobe((unsigned long)p->addr);
- if (unlikely(old_p))
- unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */
+ _p = get_optimized_kprobe((unsigned long)p->addr);
+ if (unlikely(_p))
+ unoptimize_kprobe(_p); /* Fallback to unoptimized kprobe */

arch_arm_kprobe(p);
optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */
@@ -684,15 +684,15 @@ static void __kprobes __arm_kprobe(struct kprobe *p)

static void __kprobes __disarm_kprobe(struct kprobe *p)
{
- struct kprobe *old_p;
+ struct kprobe *_p;

unoptimize_kprobe(p); /* Try to unoptimize */
arch_disarm_kprobe(p);

/* If another kprobe was blocked, optimize it. */
- old_p = get_optimized_kprobe((unsigned long)p->addr);
- if (unlikely(old_p))
- optimize_kprobe(old_p);
+ _p = get_optimized_kprobe((unsigned long)p->addr);
+ if (unlikely(_p))
+ optimize_kprobe(_p);
}

#else /* !CONFIG_OPTPROBES */
@@ -993,18 +993,18 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
* This is the second or subsequent kprobe at the address - handle
* the intricacies
*/
-static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
+static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
struct kprobe *p)
{
int ret = 0;
- struct kprobe *ap = old_p;
+ struct kprobe *ap = orig_p;

- if (!kprobe_aggrprobe(old_p)) {
- /* If old_p is not an aggr_kprobe, create new aggr_kprobe. */
- ap = alloc_aggr_kprobe(old_p);
+ if (!kprobe_aggrprobe(orig_p)) {
+ /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
+ ap = alloc_aggr_kprobe(orig_p);
if (!ap)
return -ENOMEM;
- init_aggr_kprobe(ap, old_p);
+ init_aggr_kprobe(ap, orig_p);
}

if (kprobe_gone(ap)) {
@@ -1098,34 +1098,33 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
/* Check passed kprobe is valid and return kprobe in kprobe_table. */
static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
{
- struct kprobe *old_p, *list_p;
+ struct kprobe *ap, *list_p;

- old_p = get_kprobe(p->addr);
- if (unlikely(!old_p))
+ ap = get_kprobe(p->addr);
+ if (unlikely(!ap))
return NULL;

- if (p != old_p) {
- list_for_each_entry_rcu(list_p, &old_p->list, list)
+ if (p != ap) {
+ list_for_each_entry_rcu(list_p, &ap->list, list)
if (list_p == p)
/* kprobe p is a valid probe */
goto valid;
return NULL;
}
valid:
- return old_p;
+ return ap;
}

/* Return error if the kprobe is being re-registered */
static inline int check_kprobe_rereg(struct kprobe *p)
{
int ret = 0;
- struct kprobe *old_p;

mutex_lock(&kprobe_mutex);
- old_p = __get_valid_kprobe(p);
- if (old_p)
+ if (__get_valid_kprobe(p))
ret = -EINVAL;
mutex_unlock(&kprobe_mutex);
+
return ret;
}

@@ -1234,43 +1233,43 @@ EXPORT_SYMBOL_GPL(register_kprobe);
*/
static int __kprobes __unregister_kprobe_top(struct kprobe *p)
{
- struct kprobe *old_p, *list_p;
+ struct kprobe *ap, *list_p;

- old_p = __get_valid_kprobe(p);
- if (old_p == NULL)
+ ap = __get_valid_kprobe(p);
+ if (ap == NULL)
return -EINVAL;

- if (old_p == p ||
- (kprobe_aggrprobe(old_p) &&
- list_is_singular(&old_p->list))) {
+ if (ap == p ||
+ (kprobe_aggrprobe(ap) &&
+ list_is_singular(&ap->list))) {
/*
* Only probe on the hash list. Disarm only if kprobes are
* enabled and not gone - otherwise, the breakpoint would
* already have been removed. We save on flushing icache.
*/
- if (!kprobes_all_disarmed && !kprobe_disabled(old_p))
- disarm_kprobe(old_p);
- hlist_del_rcu(&old_p->hlist);
+ if (!kprobes_all_disarmed && !kprobe_disabled(ap))
+ disarm_kprobe(ap);
+ hlist_del_rcu(&ap->hlist);
} else {
if (p->break_handler && !kprobe_gone(p))
- old_p->break_handler = NULL;
+ ap->break_handler = NULL;
if (p->post_handler && !kprobe_gone(p)) {
- list_for_each_entry_rcu(list_p, &old_p->list, list) {
+ list_for_each_entry_rcu(list_p, &ap->list, list) {
if ((list_p != p) && (list_p->post_handler))
goto noclean;
}
- old_p->post_handler = NULL;
+ ap->post_handler = NULL;
}
noclean:
list_del_rcu(&p->list);
- if (!kprobe_disabled(old_p)) {
- try_to_disable_aggr_kprobe(old_p);
+ if (!kprobe_disabled(ap)) {
+ try_to_disable_aggr_kprobe(ap);
if (!kprobes_all_disarmed) {
- if (kprobe_disabled(old_p))
- disarm_kprobe(old_p);
+ if (kprobe_disabled(ap))
+ disarm_kprobe(ap);
else
/* Try to optimize this probe again */
- optimize_kprobe(old_p);
+ optimize_kprobe(ap);
}
}
}
@@ -1279,16 +1278,16 @@ noclean:

static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
{
- struct kprobe *old_p;
+ struct kprobe *ap;

if (list_empty(&p->list))
arch_remove_kprobe(p);
else if (list_is_singular(&p->list)) {
/* "p" is the last child of an aggr_kprobe */
- old_p = list_entry(p->list.next, struct kprobe, list);
+ ap = list_entry(p->list.next, struct kprobe, list);
list_del(&p->list);
- arch_remove_kprobe(old_p);
- free_aggr_kprobe(old_p);
+ arch_remove_kprobe(ap);
+ free_aggr_kprobe(ap);
}
}

Subject: [tip:perf/core] kprobes: Cleanup disabling and unregistering path

Commit-ID: 6f0f1dd71953d4243c11e490dd49ef24ebaf6c0b
Gitweb: http://git.kernel.org/tip/6f0f1dd71953d4243c11e490dd49ef24ebaf6c0b
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Fri, 3 Dec 2010 18:53:57 +0900
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 6 Dec 2010 17:59:29 +0100

kprobes: Cleanup disabling and unregistering path

Merge disabling kprobe to unregistering kprobe function
and add comments for disabing/unregistring process.

Current unregistering code disables(disarms) kprobes after
checking target kprobe status. This patch changes it to
disabling kprobe first after that it changing the kprobe's
state. This allows to share probe disabling code between
disable_kprobe() and unregister_kprobe().

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Rusty Russell <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: Mathieu Desnoyers <[email protected]>
Cc: [email protected]
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/kprobes.c | 128 ++++++++++++++++++++++++++++++-----------------------
1 files changed, 72 insertions(+), 56 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8c3aa14..ab99caf 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1039,23 +1039,6 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
return add_new_kprobe(ap, p);
}

-/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
-static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
-{
- struct kprobe *kp;
-
- list_for_each_entry_rcu(kp, &p->list, list) {
- if (!kprobe_disabled(kp))
- /*
- * There is an active probe on the list.
- * We can't disable aggr_kprobe.
- */
- return 0;
- }
- p->flags |= KPROBE_FLAG_DISABLED;
- return 1;
-}
-
static int __kprobes in_kprobes_functions(unsigned long addr)
{
struct kprobe_blackpoint *kb;
@@ -1228,6 +1211,47 @@ fail_with_jump_label:
}
EXPORT_SYMBOL_GPL(register_kprobe);

+/* Check if all probes on the aggrprobe are disabled */
+static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
+{
+ struct kprobe *kp;
+
+ list_for_each_entry_rcu(kp, &ap->list, list)
+ if (!kprobe_disabled(kp))
+ /*
+ * There is an active probe on the list.
+ * We can't disable this ap.
+ */
+ return 0;
+
+ return 1;
+}
+
+/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
+static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
+{
+ struct kprobe *orig_p;
+
+ /* Get an original kprobe for return */
+ orig_p = __get_valid_kprobe(p);
+ if (unlikely(orig_p == NULL))
+ return NULL;
+
+ if (!kprobe_disabled(p)) {
+ /* Disable probe if it is a child probe */
+ if (p != orig_p)
+ p->flags |= KPROBE_FLAG_DISABLED;
+
+ /* Try to disarm and disable this/parent probe */
+ if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
+ disarm_kprobe(orig_p);
+ orig_p->flags |= KPROBE_FLAG_DISABLED;
+ }
+ }
+
+ return orig_p;
+}
+
/*
* Unregister a kprobe without a scheduler synchronization.
*/
@@ -1235,22 +1259,26 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
{
struct kprobe *ap, *list_p;

- ap = __get_valid_kprobe(p);
+ /* Disable kprobe. This will disarm it if needed. */
+ ap = __disable_kprobe(p);
if (ap == NULL)
return -EINVAL;

- if (ap == p ||
- (kprobe_aggrprobe(ap) &&
- list_is_singular(&ap->list))) {
+ if (ap == p)
/*
- * Only probe on the hash list. Disarm only if kprobes are
- * enabled and not gone - otherwise, the breakpoint would
- * already have been removed. We save on flushing icache.
+ * This probe is an independent(and non-optimized) kprobe
+ * (not an aggrprobe). Remove from the hash list.
*/
- if (!kprobes_all_disarmed && !kprobe_disabled(ap))
- disarm_kprobe(ap);
- hlist_del_rcu(&ap->hlist);
- } else {
+ goto disarmed;
+
+ /* Following process expects this probe is an aggrprobe */
+ WARN_ON(!kprobe_aggrprobe(ap));
+
+ if (list_is_singular(&ap->list))
+ /* This probe is the last child of aggrprobe */
+ goto disarmed;
+ else {
+ /* If disabling probe has special handlers, update aggrprobe */
if (p->break_handler && !kprobe_gone(p))
ap->break_handler = NULL;
if (p->post_handler && !kprobe_gone(p)) {
@@ -1261,19 +1289,23 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
ap->post_handler = NULL;
}
noclean:
+ /*
+ * Remove from the aggrprobe: this path will do nothing in
+ * __unregister_kprobe_bottom().
+ */
list_del_rcu(&p->list);
- if (!kprobe_disabled(ap)) {
- try_to_disable_aggr_kprobe(ap);
- if (!kprobes_all_disarmed) {
- if (kprobe_disabled(ap))
- disarm_kprobe(ap);
- else
- /* Try to optimize this probe again */
- optimize_kprobe(ap);
- }
- }
+ if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
+ /*
+ * Try to optimize this probe again, because post
+ * handler may have been changed.
+ */
+ optimize_kprobe(ap);
}
return 0;
+
+disarmed:
+ hlist_del_rcu(&ap->hlist);
+ return 0;
}

static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
@@ -1606,29 +1638,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
int __kprobes disable_kprobe(struct kprobe *kp)
{
int ret = 0;
- struct kprobe *p;

mutex_lock(&kprobe_mutex);

- /* Check whether specified probe is valid. */
- p = __get_valid_kprobe(kp);
- if (unlikely(p == NULL)) {
+ /* Disable this kprobe */
+ if (__disable_kprobe(kp) == NULL)
ret = -EINVAL;
- goto out;
- }
-
- /* If the probe is already disabled (or gone), just return */
- if (kprobe_disabled(kp))
- goto out;

- kp->flags |= KPROBE_FLAG_DISABLED;
- if (p != kp)
- /* When kp != p, p is always enabled. */
- try_to_disable_aggr_kprobe(p);
-
- if (!kprobes_all_disarmed && kprobe_disabled(p))
- disarm_kprobe(p);
-out:
mutex_unlock(&kprobe_mutex);
return ret;
}

Subject: [tip:perf/core] kprobes: Separate kprobe optimizing code from optimizer

Commit-ID: 61f4e13ffd85c037a942c5b7fd13f2b0c3162862
Gitweb: http://git.kernel.org/tip/61f4e13ffd85c037a942c5b7fd13f2b0c3162862
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Fri, 3 Dec 2010 18:54:03 +0900
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 6 Dec 2010 17:59:30 +0100

kprobes: Separate kprobe optimizing code from optimizer

Separate kprobe optimizing code from optimizer, this
will make easy to introducing unoptimizing code in
optimizer.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Rusty Russell <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: Mathieu Desnoyers <[email protected]>
Cc: [email protected]
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/kprobes.c | 43 ++++++++++++++++++++++++++-----------------
1 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ab99caf..1f4f9b9 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -427,26 +427,14 @@ static void kprobe_optimizer(struct work_struct *work);
static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
#define OPTIMIZE_DELAY 5

-/* Kprobe jump optimizer */
-static __kprobes void kprobe_optimizer(struct work_struct *work)
+/*
+ * Optimize (replace a breakpoint with a jump) kprobes listed on
+ * optimizing_list.
+ */
+static __kprobes void do_optimize_kprobes(void)
{
struct optimized_kprobe *op, *tmp;

- /* Lock modules while optimizing kprobes */
- mutex_lock(&module_mutex);
- mutex_lock(&kprobe_mutex);
- if (kprobes_all_disarmed || !kprobes_allow_optimization)
- goto end;
-
- /*
- * Wait for quiesence period to ensure all running interrupts
- * are done. Because optprobe may modify multiple instructions
- * there is a chance that Nth instruction is interrupted. In that
- * case, running interrupt can return to 2nd-Nth byte of jump
- * instruction. This wait is for avoiding it.
- */
- synchronize_sched();
-
/*
* The optimization/unoptimization refers online_cpus via
* stop_machine() and cpu-hotplug modifies online_cpus.
@@ -467,6 +455,27 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
}
mutex_unlock(&text_mutex);
put_online_cpus();
+}
+
+/* Kprobe jump optimizer */
+static __kprobes void kprobe_optimizer(struct work_struct *work)
+{
+ /* Lock modules while optimizing kprobes */
+ mutex_lock(&module_mutex);
+ mutex_lock(&kprobe_mutex);
+ if (kprobes_all_disarmed || !kprobes_allow_optimization)
+ goto end;
+
+ /*
+ * Wait for quiesence period to ensure all running interrupts
+ * are done. Because optprobe may modify multiple instructions
+ * there is a chance that Nth instruction is interrupted. In that
+ * case, running interrupt can return to 2nd-Nth byte of jump
+ * instruction. This wait is for avoiding it.
+ */
+ synchronize_sched();
+
+ do_optimize_kprobes();
end:
mutex_unlock(&kprobe_mutex);
mutex_unlock(&module_mutex);

Subject: [tip:perf/core] kprobes: Support delayed unoptimizing

Commit-ID: 6274de4984a630b45c6934b3ee62e5692c745328
Gitweb: http://git.kernel.org/tip/6274de4984a630b45c6934b3ee62e5692c745328
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Fri, 3 Dec 2010 18:54:09 +0900
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 6 Dec 2010 17:59:30 +0100

kprobes: Support delayed unoptimizing

Unoptimization occurs when a probe is unregistered or disabled,
and is heavy because it recovers instructions by using
stop_machine(). This patch delays unoptimization operations and
unoptimize several probes at once by using
text_poke_smp_batch(). This can avoid unexpected system slowdown
coming from stop_machine().

Changes in v5:
- Split this patch into several cleanup patches and this patch.
- Fix some text_mutex lock miss.
- Use bool instead of int for behavior flags.
- Add additional comment for (un)optimizing path.

Changes in v2:
- Use dynamic allocated buffers and params.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Rusty Russell <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: Mathieu Desnoyers <[email protected]>
Cc: [email protected]
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/kprobes.c | 4 +
kernel/kprobes.c | 310 ++++++++++++++++++++++++++++++++++-----------
2 files changed, 237 insertions(+), 77 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1cbd54c..da51dc8 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

+ /* This is possible if op is under delayed unoptimizing */
+ if (kprobe_disabled(&op->kp))
+ return;
+
preempt_disable();
if (kprobe_running()) {
kprobes_inc_nmissed_count(&op->kp);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1f4f9b9..ba4d4c0 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -354,6 +354,13 @@ static inline int kprobe_aggrprobe(struct kprobe *p)
return p->pre_handler == aggr_pre_handler;
}

+/* Return true(!0) if the kprobe is unused */
+static inline int kprobe_unused(struct kprobe *p)
+{
+ return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
+ list_empty(&p->list);
+}
+
/*
* Keep all fields in the kprobe consistent
*/
@@ -384,6 +391,17 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
}
}

+/* Free optimized instructions and optimized_kprobe */
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+ struct optimized_kprobe *op;
+
+ op = container_of(p, struct optimized_kprobe, kp);
+ arch_remove_optimized_kprobe(op);
+ arch_remove_kprobe(p);
+ kfree(op);
+}
+
/* Return true(!0) if the kprobe is ready for optimization. */
static inline int kprobe_optready(struct kprobe *p)
{
@@ -397,6 +415,33 @@ static inline int kprobe_optready(struct kprobe *p)
return 0;
}

+/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
+static inline int kprobe_disarmed(struct kprobe *p)
+{
+ struct optimized_kprobe *op;
+
+ /* If kprobe is not aggr/opt probe, just return kprobe is disabled */
+ if (!kprobe_aggrprobe(p))
+ return kprobe_disabled(p);
+
+ op = container_of(p, struct optimized_kprobe, kp);
+
+ return kprobe_disabled(p) && list_empty(&op->list);
+}
+
+/* Return true(!0) if the probe is queued on (un)optimizing lists */
+static int __kprobes kprobe_queued(struct kprobe *p)
+{
+ struct optimized_kprobe *op;
+
+ if (kprobe_aggrprobe(p)) {
+ op = container_of(p, struct optimized_kprobe, kp);
+ if (!list_empty(&op->list))
+ return 1;
+ }
+ return 0;
+}
+
/*
* Return an optimized kprobe whose optimizing code replaces
* instructions including addr (exclude breakpoint).
@@ -422,9 +467,11 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)

/* Optimization staging list, protected by kprobe_mutex */
static LIST_HEAD(optimizing_list);
+static LIST_HEAD(unoptimizing_list);

static void kprobe_optimizer(struct work_struct *work);
static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
+static DECLARE_COMPLETION(optimizer_comp);
#define OPTIMIZE_DELAY 5

/*
@@ -435,6 +482,11 @@ static __kprobes void do_optimize_kprobes(void)
{
struct optimized_kprobe *op, *tmp;

+ /* Optimization never be done when disarmed */
+ if (kprobes_all_disarmed || !kprobes_allow_optimization ||
+ list_empty(&optimizing_list))
+ return;
+
/*
* The optimization/unoptimization refers online_cpus via
* stop_machine() and cpu-hotplug modifies online_cpus.
@@ -457,17 +509,79 @@ static __kprobes void do_optimize_kprobes(void)
put_online_cpus();
}

+/*
+ * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
+ * if need) kprobes listed on unoptimizing_list.
+ */
+static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
+{
+ struct optimized_kprobe *op, *tmp;
+
+ /* Unoptimization must be done anytime */
+ if (list_empty(&unoptimizing_list))
+ return;
+
+ /* Ditto to do_optimize_kprobes */
+ get_online_cpus();
+ mutex_lock(&text_mutex);
+ list_for_each_entry_safe(op, tmp, &unoptimizing_list, list) {
+ /* Unoptimize kprobes */
+ arch_unoptimize_kprobe(op);
+ /* Disarm probes if marked disabled */
+ if (kprobe_disabled(&op->kp))
+ arch_disarm_kprobe(&op->kp);
+ if (kprobe_unused(&op->kp)) {
+ /*
+ * Remove unused probes from hash list. After waiting
+ * for synchronization, these probes are reclaimed.
+ * (reclaiming is done by do_free_cleaned_kprobes.)
+ */
+ hlist_del_rcu(&op->kp.hlist);
+ /* Move only unused probes on free_list */
+ list_move(&op->list, free_list);
+ } else
+ list_del_init(&op->list);
+ }
+ mutex_unlock(&text_mutex);
+ put_online_cpus();
+}
+
+/* Reclaim all kprobes on the free_list */
+static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list)
+{
+ struct optimized_kprobe *op, *tmp;
+
+ list_for_each_entry_safe(op, tmp, free_list, list) {
+ BUG_ON(!kprobe_unused(&op->kp));
+ list_del_init(&op->list);
+ free_aggr_kprobe(&op->kp);
+ }
+}
+
+/* Start optimizer after OPTIMIZE_DELAY passed */
+static __kprobes void kick_kprobe_optimizer(void)
+{
+ if (!delayed_work_pending(&optimizing_work))
+ schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+}
+
/* Kprobe jump optimizer */
static __kprobes void kprobe_optimizer(struct work_struct *work)
{
+ LIST_HEAD(free_list);
+
/* Lock modules while optimizing kprobes */
mutex_lock(&module_mutex);
mutex_lock(&kprobe_mutex);
- if (kprobes_all_disarmed || !kprobes_allow_optimization)
- goto end;

/*
- * Wait for quiesence period to ensure all running interrupts
+ * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
+ * kprobes before waiting for quiesence period.
+ */
+ do_unoptimize_kprobes(&free_list);
+
+ /*
+ * Step 2: Wait for quiesence period to ensure all running interrupts
* are done. Because optprobe may modify multiple instructions
* there is a chance that Nth instruction is interrupted. In that
* case, running interrupt can return to 2nd-Nth byte of jump
@@ -475,10 +589,24 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
*/
synchronize_sched();

+ /* Step 3: Optimize kprobes after quiesence period */
do_optimize_kprobes();
-end:
+
+ /* Step 4: Free cleaned kprobes after quiesence period */
+ do_free_cleaned_kprobes(&free_list);
+
mutex_unlock(&kprobe_mutex);
mutex_unlock(&module_mutex);
+
+ /* Wake up all waiters */
+ complete_all(&optimizer_comp);
+}
+
+/* Wait for completing optimization and unoptimization */
+static __kprobes void wait_for_kprobe_optimizer(void)
+{
+ if (delayed_work_pending(&optimizing_work))
+ wait_for_completion(&optimizer_comp);
}

/* Optimize kprobe if p is ready to be optimized */
@@ -504,27 +632,63 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
/* Check if it is already optimized. */
if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
return;
-
op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
- list_add(&op->list, &optimizing_list);
- if (!delayed_work_pending(&optimizing_work))
- schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+
+ if (!list_empty(&op->list))
+ /* This is under unoptimizing. Just dequeue the probe */
+ list_del_init(&op->list);
+ else {
+ list_add(&op->list, &optimizing_list);
+ kick_kprobe_optimizer();
+ }
+}
+
+/* Short cut to direct unoptimizing */
+static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+ get_online_cpus();
+ arch_unoptimize_kprobe(op);
+ put_online_cpus();
+ if (kprobe_disabled(&op->kp))
+ arch_disarm_kprobe(&op->kp);
}

/* Unoptimize a kprobe if p is optimized */
-static __kprobes void unoptimize_kprobe(struct kprobe *p)
+static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
{
struct optimized_kprobe *op;

- if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) {
- op = container_of(p, struct optimized_kprobe, kp);
- if (!list_empty(&op->list))
- /* Dequeue from the optimization queue */
+ if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
+ return; /* This is not an optprobe nor optimized */
+
+ op = container_of(p, struct optimized_kprobe, kp);
+ if (!kprobe_optimized(p)) {
+ /* Unoptimized or unoptimizing case */
+ if (force && !list_empty(&op->list)) {
+ /*
+ * Only if this is unoptimizing kprobe and forced,
+ * forcibly unoptimize it. (No need to unoptimize
+ * unoptimized kprobe again :)
+ */
list_del_init(&op->list);
- else
- /* Replace jump with break */
- arch_unoptimize_kprobe(op);
- op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+ force_unoptimize_kprobe(op);
+ }
+ return;
+ }
+
+ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+ if (!list_empty(&op->list)) {
+ /* Dequeue from the optimization queue */
+ list_del_init(&op->list);
+ return;
+ }
+ /* Optimized kprobe case */
+ if (force)
+ /* Forcibly update the code: this is a special case */
+ force_unoptimize_kprobe(op);
+ else {
+ list_add(&op->list, &unoptimizing_list);
+ kick_kprobe_optimizer();
}
}

@@ -534,12 +698,12 @@ static void __kprobes kill_optimized_kprobe(struct kprobe *p)
struct optimized_kprobe *op;

op = container_of(p, struct optimized_kprobe, kp);
- if (!list_empty(&op->list)) {
- /* Dequeue from the optimization queue */
+ if (!list_empty(&op->list))
+ /* Dequeue from the (un)optimization queue */
list_del_init(&op->list);
- op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- }
- /* Don't unoptimize, because the target code will be freed. */
+
+ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+ /* Don't touch the code, because it is already freed. */
arch_remove_optimized_kprobe(op);
}

@@ -552,16 +716,6 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
arch_prepare_optimized_kprobe(op);
}

-/* Free optimized instructions and optimized_kprobe */
-static __kprobes void free_aggr_kprobe(struct kprobe *p)
-{
- struct optimized_kprobe *op;
-
- op = container_of(p, struct optimized_kprobe, kp);
- arch_remove_optimized_kprobe(op);
- kfree(op);
-}
-
/* Allocate new optimized_kprobe and try to prepare optimized instructions */
static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
{
@@ -596,7 +750,8 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
op = container_of(ap, struct optimized_kprobe, kp);
if (!arch_prepared_optinsn(&op->optinsn)) {
/* If failed to setup optimizing, fallback to kprobe */
- free_aggr_kprobe(ap);
+ arch_remove_optimized_kprobe(op);
+ kfree(op);
return;
}

@@ -640,21 +795,16 @@ static void __kprobes unoptimize_all_kprobes(void)
return;

kprobes_allow_optimization = false;
- printk(KERN_INFO "Kprobes globally unoptimized\n");
- get_online_cpus(); /* For avoiding text_mutex deadlock */
- mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist) {
if (!kprobe_disabled(p))
- unoptimize_kprobe(p);
+ unoptimize_kprobe(p, false);
}
}
-
- mutex_unlock(&text_mutex);
- put_online_cpus();
- /* Allow all currently running kprobes to complete */
- synchronize_sched();
+ /* Wait for unoptimizing completion */
+ wait_for_kprobe_optimizer();
+ printk(KERN_INFO "Kprobes globally unoptimized\n");
}

int sysctl_kprobes_optimization;
@@ -678,6 +828,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
}
#endif /* CONFIG_SYSCTL */

+/* Put a breakpoint for a probe. Must be called with text_mutex locked */
static void __kprobes __arm_kprobe(struct kprobe *p)
{
struct kprobe *_p;
@@ -685,37 +836,45 @@ static void __kprobes __arm_kprobe(struct kprobe *p)
/* Check collision with other optimized kprobes */
_p = get_optimized_kprobe((unsigned long)p->addr);
if (unlikely(_p))
- unoptimize_kprobe(_p); /* Fallback to unoptimized kprobe */
+ /* Fallback to unoptimized kprobe */
+ unoptimize_kprobe(_p, true);

arch_arm_kprobe(p);
optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */
}

-static void __kprobes __disarm_kprobe(struct kprobe *p)
+/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
+static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
{
struct kprobe *_p;

- unoptimize_kprobe(p); /* Try to unoptimize */
- arch_disarm_kprobe(p);
+ unoptimize_kprobe(p, false); /* Try to unoptimize */

- /* If another kprobe was blocked, optimize it. */
- _p = get_optimized_kprobe((unsigned long)p->addr);
- if (unlikely(_p))
- optimize_kprobe(_p);
+ if (!kprobe_queued(p)) {
+ arch_disarm_kprobe(p);
+ /* If another kprobe was blocked, optimize it. */
+ _p = get_optimized_kprobe((unsigned long)p->addr);
+ if (unlikely(_p) && reopt)
+ optimize_kprobe(_p);
+ }
+ /* TODO: reoptimize others after unoptimized this probe */
}

#else /* !CONFIG_OPTPROBES */

#define optimize_kprobe(p) do {} while (0)
-#define unoptimize_kprobe(p) do {} while (0)
+#define unoptimize_kprobe(p, f) do {} while (0)
#define kill_optimized_kprobe(p) do {} while (0)
#define prepare_optimized_kprobe(p) do {} while (0)
#define try_to_optimize_kprobe(p) do {} while (0)
#define __arm_kprobe(p) arch_arm_kprobe(p)
-#define __disarm_kprobe(p) arch_disarm_kprobe(p)
+#define __disarm_kprobe(p, o) arch_disarm_kprobe(p)
+#define kprobe_disarmed(p) kprobe_disabled(p)
+#define wait_for_kprobe_optimizer() do {} while (0)

static __kprobes void free_aggr_kprobe(struct kprobe *p)
{
+ arch_remove_kprobe(p);
kfree(p);
}

@@ -741,11 +900,10 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
/* Disarm a kprobe with text_mutex */
static void __kprobes disarm_kprobe(struct kprobe *kp)
{
- get_online_cpus(); /* For avoiding text_mutex deadlock */
+ /* Ditto */
mutex_lock(&text_mutex);
- __disarm_kprobe(kp);
+ __disarm_kprobe(kp, true);
mutex_unlock(&text_mutex);
- put_online_cpus();
}

/*
@@ -951,7 +1109,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
BUG_ON(kprobe_gone(ap) || kprobe_gone(p));

if (p->break_handler || p->post_handler)
- unoptimize_kprobe(ap); /* Fall back to normal kprobe */
+ unoptimize_kprobe(ap, true); /* Fall back to normal kprobe */

if (p->break_handler) {
if (ap->break_handler)
@@ -1014,7 +1172,9 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
if (!ap)
return -ENOMEM;
init_aggr_kprobe(ap, orig_p);
- }
+ } else if (kprobe_unused(ap))
+ /* Busy to die */
+ return -EBUSY;

if (kprobe_gone(ap)) {
/*
@@ -1283,8 +1443,11 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
/* Following process expects this probe is an aggrprobe */
WARN_ON(!kprobe_aggrprobe(ap));

- if (list_is_singular(&ap->list))
- /* This probe is the last child of aggrprobe */
+ if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
+ /*
+ * !disarmed could be happen if the probe is under delayed
+ * unoptimizing.
+ */
goto disarmed;
else {
/* If disabling probe has special handlers, update aggrprobe */
@@ -1313,6 +1476,7 @@ noclean:
return 0;

disarmed:
+ BUG_ON(!kprobe_disarmed(ap));
hlist_del_rcu(&ap->hlist);
return 0;
}
@@ -1322,14 +1486,15 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
struct kprobe *ap;

if (list_empty(&p->list))
+ /* This is an independent kprobe */
arch_remove_kprobe(p);
else if (list_is_singular(&p->list)) {
- /* "p" is the last child of an aggr_kprobe */
+ /* This is the last child of an aggrprobe */
ap = list_entry(p->list.next, struct kprobe, list);
list_del(&p->list);
- arch_remove_kprobe(ap);
free_aggr_kprobe(ap);
}
+ /* Otherwise, do nothing. */
}

int __kprobes register_kprobes(struct kprobe **kps, int num)
@@ -1951,36 +2116,27 @@ static void __kprobes disarm_all_kprobes(void)
mutex_lock(&kprobe_mutex);

/* If kprobes are already disarmed, just return */
- if (kprobes_all_disarmed)
- goto already_disabled;
+ if (kprobes_all_disarmed) {
+ mutex_unlock(&kprobe_mutex);
+ return;
+ }

kprobes_all_disarmed = true;
printk(KERN_INFO "Kprobes globally disabled\n");

- /*
- * Here we call get_online_cpus() for avoiding text_mutex deadlock,
- * because disarming may also unoptimize kprobes.
- */
- get_online_cpus();
mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist) {
if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
- __disarm_kprobe(p);
+ __disarm_kprobe(p, false);
}
}
-
mutex_unlock(&text_mutex);
- put_online_cpus();
mutex_unlock(&kprobe_mutex);
- /* Allow all currently running kprobes to complete */
- synchronize_sched();
- return;

-already_disabled:
- mutex_unlock(&kprobe_mutex);
- return;
+ /* Wait for disarming all kprobes by optimizer */
+ wait_for_kprobe_optimizer();
}

/*

Subject: [tip:perf/core] kprobes: Reuse unused kprobe

Commit-ID: 0490cd1f9d99569d3bd64e17adc88db06a5007be
Gitweb: http://git.kernel.org/tip/0490cd1f9d99569d3bd64e17adc88db06a5007be
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Fri, 3 Dec 2010 18:54:16 +0900
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 6 Dec 2010 17:59:31 +0100

kprobes: Reuse unused kprobe

Reuse unused (waiting for unoptimizing and no user handler)
kprobe on given address instead of returning -EBUSY for
registering a new kprobe.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Rusty Russell <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: Mathieu Desnoyers <[email protected]>
Cc: [email protected]
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/kprobes.c | 32 ++++++++++++++++++++++++++++++--
1 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ba4d4c0..134754d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -692,6 +692,27 @@ static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
}
}

+/* Cancel unoptimizing for reusing */
+static void reuse_unused_kprobe(struct kprobe *ap)
+{
+ struct optimized_kprobe *op;
+
+ BUG_ON(!kprobe_unused(ap));
+ /*
+ * Unused kprobe MUST be on the way of delayed unoptimizing (means
+ * there is still a relative jump) and disabled.
+ */
+ op = container_of(ap, struct optimized_kprobe, kp);
+ if (unlikely(list_empty(&op->list)))
+ printk(KERN_WARNING "Warning: found a stray unused "
+ "aggrprobe@%p\n", ap->addr);
+ /* Enable the probe again */
+ ap->flags &= ~KPROBE_FLAG_DISABLED;
+ /* Optimize it again (remove from op->list) */
+ BUG_ON(!kprobe_optready(ap));
+ optimize_kprobe(ap);
+}
+
/* Remove optimized instructions */
static void __kprobes kill_optimized_kprobe(struct kprobe *p)
{
@@ -872,6 +893,13 @@ static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
#define kprobe_disarmed(p) kprobe_disabled(p)
#define wait_for_kprobe_optimizer() do {} while (0)

+/* There should be no unused kprobes can be reused without optimization */
+static void reuse_unused_kprobe(struct kprobe *ap)
+{
+ printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
+ BUG_ON(kprobe_unused(ap));
+}
+
static __kprobes void free_aggr_kprobe(struct kprobe *p)
{
arch_remove_kprobe(p);
@@ -1173,8 +1201,8 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
return -ENOMEM;
init_aggr_kprobe(ap, orig_p);
} else if (kprobe_unused(ap))
- /* Busy to die */
- return -EBUSY;
+ /* This probe is going to die. Rescue it */
+ reuse_unused_kprobe(ap);

if (kprobe_gone(ap)) {
/*

Subject: [tip:perf/core] x86: Introduce text_poke_smp_batch() for batch-code modifying

Commit-ID: 7deb18dcf0478940ac979de002db1ed8ba6531dc
Gitweb: http://git.kernel.org/tip/7deb18dcf0478940ac979de002db1ed8ba6531dc
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Fri, 3 Dec 2010 18:54:22 +0900
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 6 Dec 2010 17:59:31 +0100

x86: Introduce text_poke_smp_batch() for batch-code modifying

Introduce text_poke_smp_batch(). This function modifies several
text areas with one stop_machine() on SMP. Because calling
stop_machine() is heavy task, it is better to aggregate
text_poke requests.

( Note: I've talked with Rusty about this interface, and
he would not like to expand stop_machine() interface, since
it is not for generic use. )

Signed-off-by: Masami Hiramatsu <[email protected]>
Acked-by: Steven Rostedt <[email protected]>
Cc: Rusty Russell <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: Mathieu Desnoyers <[email protected]>
Cc: Jan Beulich <[email protected]>
Cc: [email protected]
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/alternative.h | 7 +++++
arch/x86/kernel/alternative.c | 49 +++++++++++++++++++++++++++++------
2 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 76561d2..4a2adaa 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -180,8 +180,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
* On the local CPU you need to be protected again NMI or MCE handlers seeing an
* inconsistent instruction while you patch.
*/
+struct text_poke_param {
+ void *addr;
+ const void *opcode;
+ size_t len;
+};
+
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
+extern void text_poke_smp_batch(struct text_poke_param *params, int n);

#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
#define IDEAL_NOP_SIZE_5 5
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5079f24..553d0b0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -591,17 +591,21 @@ static atomic_t stop_machine_first;
static int wrote_text;

struct text_poke_params {
- void *addr;
- const void *opcode;
- size_t len;
+ struct text_poke_param *params;
+ int nparams;
};

static int __kprobes stop_machine_text_poke(void *data)
{
struct text_poke_params *tpp = data;
+ struct text_poke_param *p;
+ int i;

if (atomic_dec_and_test(&stop_machine_first)) {
- text_poke(tpp->addr, tpp->opcode, tpp->len);
+ for (i = 0; i < tpp->nparams; i++) {
+ p = &tpp->params[i];
+ text_poke(p->addr, p->opcode, p->len);
+ }
smp_wmb(); /* Make sure other cpus see that this has run */
wrote_text = 1;
} else {
@@ -610,8 +614,12 @@ static int __kprobes stop_machine_text_poke(void *data)
smp_mb(); /* Load wrote_text before following execution */
}

- flush_icache_range((unsigned long)tpp->addr,
- (unsigned long)tpp->addr + tpp->len);
+ for (i = 0; i < tpp->nparams; i++) {
+ p = &tpp->params[i];
+ flush_icache_range((unsigned long)p->addr,
+ (unsigned long)p->addr + p->len);
+ }
+
return 0;
}

@@ -631,10 +639,13 @@ static int __kprobes stop_machine_text_poke(void *data)
void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
{
struct text_poke_params tpp;
+ struct text_poke_param p;

- tpp.addr = addr;
- tpp.opcode = opcode;
- tpp.len = len;
+ p.addr = addr;
+ p.opcode = opcode;
+ p.len = len;
+ tpp.params = &p;
+ tpp.nparams = 1;
atomic_set(&stop_machine_first, 1);
wrote_text = 0;
/* Use __stop_machine() because the caller already got online_cpus. */
@@ -642,6 +653,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
return addr;
}

+/**
+ * text_poke_smp_batch - Update instructions on a live kernel on SMP
+ * @params: an array of text_poke parameters
+ * @n: the number of elements in params.
+ *
+ * Modify multi-byte instruction by using stop_machine() on SMP. Since the
+ * stop_machine() is heavy task, it is better to aggregate text_poke requests
+ * and do it once if possible.
+ *
+ * Note: Must be called under get_online_cpus() and text_mutex.
+ */
+void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
+{
+ struct text_poke_params tpp = {.params = params, .nparams = n};
+
+ atomic_set(&stop_machine_first, 1);
+ wrote_text = 0;
+ stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+}
+
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)

#ifdef CONFIG_X86_64

Subject: [tip:perf/core] kprobes: Use text_poke_smp_batch for optimizing

Commit-ID: cd7ebe2298ff1c3112232878678ce5fe6be8a15b
Gitweb: http://git.kernel.org/tip/cd7ebe2298ff1c3112232878678ce5fe6be8a15b
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Fri, 3 Dec 2010 18:54:28 +0900
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 6 Dec 2010 17:59:31 +0100

kprobes: Use text_poke_smp_batch for optimizing

Use text_poke_smp_batch() in optimization path for reducing
the number of stop_machine() issues. If the number of optimizing
probes is more than MAX_OPTIMIZE_PROBES(=256), kprobes optimizes
first MAX_OPTIMIZE_PROBES probes and kicks optimizer for
remaining probes.

Changes in v5:
- Use kick_kprobe_optimizer() instead of directly calling
schedule_delayed_work().
- Rescheduling optimizer outside of kprobe mutex lock.

Changes in v2:
- Allocate code buffer and parameters in arch_init_kprobes()
instead of using static arraies.
- Merge previous max optimization limit patch into this patch.
So, this patch introduces upper limit of optimization at
once.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Rusty Russell <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: Mathieu Desnoyers <[email protected]>
Cc: [email protected]
Cc: Peter Zijlstra <[email protected]>
Cc: Steven Rostedt <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/kprobes.c | 69 +++++++++++++++++++++++++++++++++++++++-----
include/linux/kprobes.h | 2 +-
kernel/kprobes.c | 17 ++++------
3 files changed, 69 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index da51dc8..25a8af7 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1405,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
return 0;
}

-/* Replace a breakpoint (int3) with a relative jump. */
-int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
+#define MAX_OPTIMIZE_PROBES 256
+static struct text_poke_param *jump_poke_params;
+static struct jump_poke_buffer {
+ u8 buf[RELATIVEJUMP_SIZE];
+} *jump_poke_bufs;
+
+static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
+ u8 *insn_buf,
+ struct optimized_kprobe *op)
{
- unsigned char jmp_code[RELATIVEJUMP_SIZE];
s32 rel = (s32)((long)op->optinsn.insn -
((long)op->kp.addr + RELATIVEJUMP_SIZE));

@@ -1416,16 +1422,39 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
RELATIVE_ADDR_SIZE);

- jmp_code[0] = RELATIVEJUMP_OPCODE;
- *(s32 *)(&jmp_code[1]) = rel;
+ insn_buf[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&insn_buf[1]) = rel;
+
+ tprm->addr = op->kp.addr;
+ tprm->opcode = insn_buf;
+ tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Replace breakpoints (int3) with relative jumps.
+ * Caller must call with locking kprobe_mutex and text_mutex.
+ */
+void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+{
+ struct optimized_kprobe *op, *tmp;
+ int c = 0;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ WARN_ON(kprobe_disabled(&op->kp));
+ /* Setup param */
+ setup_optimize_kprobe(&jump_poke_params[c],
+ jump_poke_bufs[c].buf, op);
+ list_del_init(&op->list);
+ if (++c >= MAX_OPTIMIZE_PROBES)
+ break;
+ }

/*
* text_poke_smp doesn't support NMI/MCE code modifying.
* However, since kprobes itself also doesn't support NMI/MCE
* code probing, it's not a problem.
*/
- text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
- return 0;
+ text_poke_smp_batch(jump_poke_params, c);
}

/* Replace a relative jump with a breakpoint (int3). */
@@ -1457,11 +1486,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p,
}
return 0;
}
+
+static int __kprobes init_poke_params(void)
+{
+ /* Allocate code buffer and parameter array */
+ jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
+ MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+ if (!jump_poke_bufs)
+ return -ENOMEM;
+
+ jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
+ MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+ if (!jump_poke_params) {
+ kfree(jump_poke_bufs);
+ jump_poke_bufs = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+#else /* !CONFIG_OPTPROBES */
+static int __kprobes init_poke_params(void)
+{
+ return 0;
+}
#endif

int __init arch_init_kprobes(void)
{
- return 0;
+ return init_poke_params();
}

int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index e7d1b2e..fe157ba 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -275,7 +275,7 @@ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
-extern int arch_optimize_kprobe(struct optimized_kprobe *op);
+extern void arch_optimize_kprobes(struct list_head *oplist);
extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
extern kprobe_opcode_t *get_optinsn_slot(void);
extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 134754d..531e101 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -480,8 +480,6 @@ static DECLARE_COMPLETION(optimizer_comp);
*/
static __kprobes void do_optimize_kprobes(void)
{
- struct optimized_kprobe *op, *tmp;
-
/* Optimization never be done when disarmed */
if (kprobes_all_disarmed || !kprobes_allow_optimization ||
list_empty(&optimizing_list))
@@ -499,12 +497,7 @@ static __kprobes void do_optimize_kprobes(void)
*/
get_online_cpus();
mutex_lock(&text_mutex);
- list_for_each_entry_safe(op, tmp, &optimizing_list, list) {
- WARN_ON(kprobe_disabled(&op->kp));
- if (arch_optimize_kprobe(op) < 0)
- op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- list_del_init(&op->list);
- }
+ arch_optimize_kprobes(&optimizing_list);
mutex_unlock(&text_mutex);
put_online_cpus();
}
@@ -598,8 +591,12 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
mutex_unlock(&kprobe_mutex);
mutex_unlock(&module_mutex);

- /* Wake up all waiters */
- complete_all(&optimizer_comp);
+ /* Step 5: Kick optimizer again if needed */
+ if (!list_empty(&optimizing_list))
+ kick_kprobe_optimizer();
+ else
+ /* Wake up all waiters */
+ complete_all(&optimizer_comp);
}

/* Wait for completing optimization and unoptimization */

Subject: [tip:perf/core] kprobes: Use text_poke_smp_batch for unoptimizing

Commit-ID: f984ba4eb575e4a27ed28a76d4126d2aa9233c32
Gitweb: http://git.kernel.org/tip/f984ba4eb575e4a27ed28a76d4126d2aa9233c32
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Fri, 3 Dec 2010 18:54:34 +0900
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 6 Dec 2010 17:59:32 +0100

kprobes: Use text_poke_smp_batch for unoptimizing

Use text_poke_smp_batch() on unoptimization path for reducing
the number of stop_machine() issues. If the number of
unoptimizing probes is more than MAX_OPTIMIZE_PROBES(=256),
kprobes unoptimizes first MAX_OPTIMIZE_PROBES probes and kicks
optimizer for remaining probes.

Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Rusty Russell <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: Mathieu Desnoyers <[email protected]>
Cc: [email protected]
Cc: Peter Zijlstra <[email protected]>
Cc: Steven Rostedt <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/kprobes.c | 40 ++++++++++++++++++++++++++++++++++++++++
include/linux/kprobes.h | 2 ++
kernel/kprobes.c | 10 ++++------
3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 25a8af7..5940282 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1457,6 +1457,46 @@ void __kprobes arch_optimize_kprobes(struct list_head *oplist)
text_poke_smp_batch(jump_poke_params, c);
}

+static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
+ u8 *insn_buf,
+ struct optimized_kprobe *op)
+{
+ /* Set int3 to first byte for kprobes */
+ insn_buf[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+ tprm->addr = op->kp.addr;
+ tprm->opcode = insn_buf;
+ tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Recover original instructions and breakpoints from relative jumps.
+ * Caller must call with locking kprobe_mutex.
+ */
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+ struct list_head *done_list)
+{
+ struct optimized_kprobe *op, *tmp;
+ int c = 0;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ /* Setup param */
+ setup_unoptimize_kprobe(&jump_poke_params[c],
+ jump_poke_bufs[c].buf, op);
+ list_move(&op->list, done_list);
+ if (++c >= MAX_OPTIMIZE_PROBES)
+ break;
+ }
+
+ /*
+ * text_poke_smp doesn't support NMI/MCE code modifying.
+ * However, since kprobes itself also doesn't support NMI/MCE
+ * code probing, it's not a problem.
+ */
+ text_poke_smp_batch(jump_poke_params, c);
+}
+
/* Replace a relative jump with a breakpoint (int3). */
void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
{
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index fe157ba..b78edb5 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -276,6 +276,8 @@ extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
extern void arch_optimize_kprobes(struct list_head *oplist);
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+ struct list_head *done_list);
extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
extern kprobe_opcode_t *get_optinsn_slot(void);
extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 531e101..7663e5d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -517,9 +517,9 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
/* Ditto to do_optimize_kprobes */
get_online_cpus();
mutex_lock(&text_mutex);
- list_for_each_entry_safe(op, tmp, &unoptimizing_list, list) {
- /* Unoptimize kprobes */
- arch_unoptimize_kprobe(op);
+ arch_unoptimize_kprobes(&unoptimizing_list, free_list);
+ /* Loop free_list for disarming */
+ list_for_each_entry_safe(op, tmp, free_list, list) {
/* Disarm probes if marked disabled */
if (kprobe_disabled(&op->kp))
arch_disarm_kprobe(&op->kp);
@@ -530,8 +530,6 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
* (reclaiming is done by do_free_cleaned_kprobes.)
*/
hlist_del_rcu(&op->kp.hlist);
- /* Move only unused probes on free_list */
- list_move(&op->list, free_list);
} else
list_del_init(&op->list);
}
@@ -592,7 +590,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
mutex_unlock(&module_mutex);

/* Step 5: Kick optimizer again if needed */
- if (!list_empty(&optimizing_list))
+ if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
kick_kprobe_optimizer();
else
/* Wake up all waiters */

2011-02-11 21:06:46

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip:perf/core] x86: Introduce text_poke_smp_batch() for batch-code modifying

On Mon, 2010-12-06 at 18:17 +0000, tip-bot for Masami Hiramatsu wrote:

> @@ -631,10 +639,13 @@ static int __kprobes stop_machine_text_poke(void *data)
> void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
> {
> struct text_poke_params tpp;
> + struct text_poke_param p;
>
> - tpp.addr = addr;
> - tpp.opcode = opcode;
> - tpp.len = len;
> + p.addr = addr;
> + p.opcode = opcode;
> + p.len = len;
> + tpp.params = &p;
> + tpp.nparams = 1;
> atomic_set(&stop_machine_first, 1);
> wrote_text = 0;
> /* Use __stop_machine() because the caller already got online_cpus. */

^^^^^^^^^

> @@ -642,6 +653,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
> return addr;
> }
>
> +/**
> + * text_poke_smp_batch - Update instructions on a live kernel on SMP
> + * @params: an array of text_poke parameters
> + * @n: the number of elements in params.
> + *
> + * Modify multi-byte instruction by using stop_machine() on SMP. Since the
> + * stop_machine() is heavy task, it is better to aggregate text_poke requests
> + * and do it once if possible.
> + *
> + * Note: Must be called under get_online_cpus() and text_mutex.
> + */
> +void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
> +{
> + struct text_poke_params tpp = {.params = params, .nparams = n};
> +
> + atomic_set(&stop_machine_first, 1);
> + wrote_text = 0;
> + stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
> +}

^^^^^^^^^^^^^^


---
Subject: x86: Fix text_poke_smp_batch() deadlock

=======================================================
[ INFO: possible circular locking dependency detected ]
2.6.38-rc4-test+ #1
-------------------------------------------------------
bash/1850 is trying to acquire lock:
(text_mutex){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f

but task is already holding lock:
(smp_alt){+.+...}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #2 (smp_alt){+.+...}:
[<ffffffff81082d02>] lock_acquire+0xcd/0xf8
[<ffffffff8192e119>] __mutex_lock_common+0x4c/0x339
[<ffffffff8192e4ca>] mutex_lock_nested+0x3e/0x43
[<ffffffff8101050f>] alternatives_smp_switch+0x77/0x1d8
[<ffffffff81926a6f>] do_boot_cpu+0xd7/0x762
[<ffffffff819277dd>] native_cpu_up+0xe6/0x16a
[<ffffffff81928e28>] _cpu_up+0x9d/0xee
[<ffffffff81928f4c>] cpu_up+0xd3/0xe7
[<ffffffff82268d4b>] kernel_init+0xe8/0x20a
[<ffffffff8100ba24>] kernel_thread_helper+0x4/0x10

-> #1 (cpu_hotplug.lock){+.+.+.}:
[<ffffffff81082d02>] lock_acquire+0xcd/0xf8
[<ffffffff8192e119>] __mutex_lock_common+0x4c/0x339
[<ffffffff8192e4ca>] mutex_lock_nested+0x3e/0x43
[<ffffffff810568cc>] get_online_cpus+0x41/0x55
[<ffffffff810a1348>] stop_machine+0x1e/0x3e
[<ffffffff819314c1>] text_poke_smp_batch+0x3a/0x3c
[<ffffffff81932b6c>] arch_optimize_kprobes+0x10d/0x11c
[<ffffffff81933a51>] kprobe_optimizer+0x152/0x222
[<ffffffff8106bb71>] process_one_work+0x1d3/0x335
[<ffffffff8106cfae>] worker_thread+0x104/0x1a4
[<ffffffff810707c4>] kthread+0x9d/0xa5
[<ffffffff8100ba24>] kernel_thread_helper+0x4/0x10

-> #0 (text_mutex){+.+.+.}:


other info that might help us debug this:

6 locks held by bash/1850:
#0: (&buffer->mutex){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
#1: (s_active#75){.+.+.+}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
#2: (x86_cpu_hotplug_driver_mutex){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
#3: (cpu_add_remove_lock){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
#4: (cpu_hotplug.lock){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
#5: (smp_alt){+.+...}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f

stack backtrace:
Pid: 1850, comm: bash Not tainted 2.6.38-rc4-test+ #1
Call Trace:

[<ffffffff81080eb2>] print_circular_bug+0xa8/0xb7
[<ffffffff8192e4ca>] mutex_lock_nested+0x3e/0x43
[<ffffffff81010302>] alternatives_smp_unlock+0x3d/0x93
[<ffffffff81010630>] alternatives_smp_switch+0x198/0x1d8
[<ffffffff8102568a>] native_cpu_die+0x65/0x95
[<ffffffff818cc4ec>] _cpu_down+0x13e/0x202
[<ffffffff8117a619>] sysfs_write_file+0x108/0x144
[<ffffffff8111f5a2>] vfs_write+0xac/0xff
[<ffffffff8111f7a9>] sys_write+0x4a/0x6e

Reported-by: Steven Rostedt <[email protected]>
Tested-by: Steven Rostedt <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
---
arch/x86/kernel/alternative.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 1236085..7038b95 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -671,7 +671,7 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)

atomic_set(&stop_machine_first, 1);
wrote_text = 0;
- stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+ __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
}

#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)

2011-02-12 01:37:14

by Peter Zijlstra

[permalink] [raw]
Subject: [tip:perf/urgent] x86: Fix text_poke_smp_batch() deadlock

Commit-ID: d91309f69b7bdb64aeb30106fde8d18c5dd354b5
Gitweb: http://git.kernel.org/tip/d91309f69b7bdb64aeb30106fde8d18c5dd354b5
Author: Peter Zijlstra <[email protected]>
AuthorDate: Fri, 11 Feb 2011 22:07:46 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Sat, 12 Feb 2011 02:34:34 +0100

x86: Fix text_poke_smp_batch() deadlock

Fix this deadlock - we are already holding the mutex:

=======================================================
[ INFO: possible circular locking dependency detected ] 2.6.38-rc4-test+ #1
-------------------------------------------------------
bash/1850 is trying to acquire lock:
(text_mutex){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f

but task is already holding lock:
(smp_alt){+.+...}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (smp_alt){+.+...}:
[<ffffffff81082d02>] lock_acquire+0xcd/0xf8
[<ffffffff8192e119>] __mutex_lock_common+0x4c/0x339
[<ffffffff8192e4ca>] mutex_lock_nested+0x3e/0x43
[<ffffffff8101050f>] alternatives_smp_switch+0x77/0x1d8
[<ffffffff81926a6f>] do_boot_cpu+0xd7/0x762
[<ffffffff819277dd>] native_cpu_up+0xe6/0x16a
[<ffffffff81928e28>] _cpu_up+0x9d/0xee
[<ffffffff81928f4c>] cpu_up+0xd3/0xe7
[<ffffffff82268d4b>] kernel_init+0xe8/0x20a
[<ffffffff8100ba24>] kernel_thread_helper+0x4/0x10

-> #1 (cpu_hotplug.lock){+.+.+.}:
[<ffffffff81082d02>] lock_acquire+0xcd/0xf8
[<ffffffff8192e119>] __mutex_lock_common+0x4c/0x339
[<ffffffff8192e4ca>] mutex_lock_nested+0x3e/0x43
[<ffffffff810568cc>] get_online_cpus+0x41/0x55
[<ffffffff810a1348>] stop_machine+0x1e/0x3e
[<ffffffff819314c1>] text_poke_smp_batch+0x3a/0x3c
[<ffffffff81932b6c>] arch_optimize_kprobes+0x10d/0x11c
[<ffffffff81933a51>] kprobe_optimizer+0x152/0x222
[<ffffffff8106bb71>] process_one_work+0x1d3/0x335
[<ffffffff8106cfae>] worker_thread+0x104/0x1a4
[<ffffffff810707c4>] kthread+0x9d/0xa5
[<ffffffff8100ba24>] kernel_thread_helper+0x4/0x10

-> #0 (text_mutex){+.+.+.}:

other info that might help us debug this:

6 locks held by bash/1850:
#0: (&buffer->mutex){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
#1: (s_active#75){.+.+.+}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
#2: (x86_cpu_hotplug_driver_mutex){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
#3: (cpu_add_remove_lock){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
#4: (cpu_hotplug.lock){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
#5: (smp_alt){+.+...}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f

stack backtrace:
Pid: 1850, comm: bash Not tainted 2.6.38-rc4-test+ #1
Call Trace:

[<ffffffff81080eb2>] print_circular_bug+0xa8/0xb7
[<ffffffff8192e4ca>] mutex_lock_nested+0x3e/0x43
[<ffffffff81010302>] alternatives_smp_unlock+0x3d/0x93
[<ffffffff81010630>] alternatives_smp_switch+0x198/0x1d8
[<ffffffff8102568a>] native_cpu_die+0x65/0x95
[<ffffffff818cc4ec>] _cpu_down+0x13e/0x202
[<ffffffff8117a619>] sysfs_write_file+0x108/0x144
[<ffffffff8111f5a2>] vfs_write+0xac/0xff
[<ffffffff8111f7a9>] sys_write+0x4a/0x6e

Reported-by: Steven Rostedt <[email protected]>
Tested-by: Steven Rostedt <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
LKML-Reference: <1297458466.5226.93.camel@laptop>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/alternative.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 1236085..7038b95 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -671,7 +671,7 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)

atomic_set(&stop_machine_first, 1);
wrote_text = 0;
- stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+ __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
}

#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)

Subject: Re: [tip:perf/core] x86: Introduce text_poke_smp_batch() for batch-code modifying

(2011/02/12 6:07), Peter Zijlstra wrote:
> On Mon, 2010-12-06 at 18:17 +0000, tip-bot for Masami Hiramatsu wrote:
>
>> @@ -631,10 +639,13 @@ static int __kprobes stop_machine_text_poke(void *data)
>> void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
>> {
>> struct text_poke_params tpp;
>> + struct text_poke_param p;
>>
>> - tpp.addr = addr;
>> - tpp.opcode = opcode;
>> - tpp.len = len;
>> + p.addr = addr;
>> + p.opcode = opcode;
>> + p.len = len;
>> + tpp.params = &p;
>> + tpp.nparams = 1;
>> atomic_set(&stop_machine_first, 1);
>> wrote_text = 0;
>> /* Use __stop_machine() because the caller already got online_cpus. */
>
> ^^^^^^^^^
>
>> @@ -642,6 +653,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
>> return addr;
>> }
>>
>> +/**
>> + * text_poke_smp_batch - Update instructions on a live kernel on SMP
>> + * @params: an array of text_poke parameters
>> + * @n: the number of elements in params.
>> + *
>> + * Modify multi-byte instruction by using stop_machine() on SMP. Since the
>> + * stop_machine() is heavy task, it is better to aggregate text_poke requests
>> + * and do it once if possible.
>> + *
>> + * Note: Must be called under get_online_cpus() and text_mutex.
>> + */
>> +void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
>> +{
>> + struct text_poke_params tpp = {.params = params, .nparams = n};
>> +
>> + atomic_set(&stop_machine_first, 1);
>> + wrote_text = 0;
>> + stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
>> +}
>
> ^^^^^^^^^^^^^^

Oops! Indeed, it's my mistake.
Thank you for reporting & fixing!

>
>
> ---
> Subject: x86: Fix text_poke_smp_batch() deadlock
>
> =======================================================
> [ INFO: possible circular locking dependency detected ]
> 2.6.38-rc4-test+ #1
> -------------------------------------------------------
> bash/1850 is trying to acquire lock:
> (text_mutex){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
>
> but task is already holding lock:
> (smp_alt){+.+...}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
>
> which lock already depends on the new lock.
>
>
> the existing dependency chain (in reverse order) is:
>
> -> #2 (smp_alt){+.+...}:
> [<ffffffff81082d02>] lock_acquire+0xcd/0xf8
> [<ffffffff8192e119>] __mutex_lock_common+0x4c/0x339
> [<ffffffff8192e4ca>] mutex_lock_nested+0x3e/0x43
> [<ffffffff8101050f>] alternatives_smp_switch+0x77/0x1d8
> [<ffffffff81926a6f>] do_boot_cpu+0xd7/0x762
> [<ffffffff819277dd>] native_cpu_up+0xe6/0x16a
> [<ffffffff81928e28>] _cpu_up+0x9d/0xee
> [<ffffffff81928f4c>] cpu_up+0xd3/0xe7
> [<ffffffff82268d4b>] kernel_init+0xe8/0x20a
> [<ffffffff8100ba24>] kernel_thread_helper+0x4/0x10
>
> -> #1 (cpu_hotplug.lock){+.+.+.}:
> [<ffffffff81082d02>] lock_acquire+0xcd/0xf8
> [<ffffffff8192e119>] __mutex_lock_common+0x4c/0x339
> [<ffffffff8192e4ca>] mutex_lock_nested+0x3e/0x43
> [<ffffffff810568cc>] get_online_cpus+0x41/0x55
> [<ffffffff810a1348>] stop_machine+0x1e/0x3e
> [<ffffffff819314c1>] text_poke_smp_batch+0x3a/0x3c
> [<ffffffff81932b6c>] arch_optimize_kprobes+0x10d/0x11c
> [<ffffffff81933a51>] kprobe_optimizer+0x152/0x222
> [<ffffffff8106bb71>] process_one_work+0x1d3/0x335
> [<ffffffff8106cfae>] worker_thread+0x104/0x1a4
> [<ffffffff810707c4>] kthread+0x9d/0xa5
> [<ffffffff8100ba24>] kernel_thread_helper+0x4/0x10
>
> -> #0 (text_mutex){+.+.+.}:
>
>
> other info that might help us debug this:
>
> 6 locks held by bash/1850:
> #0: (&buffer->mutex){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
> #1: (s_active#75){.+.+.+}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
> #2: (x86_cpu_hotplug_driver_mutex){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
> #3: (cpu_add_remove_lock){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
> #4: (cpu_hotplug.lock){+.+.+.}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
> #5: (smp_alt){+.+...}, at: [<ffffffff8100a9c1>] return_to_handler+0x0/0x2f
>
> stack backtrace:
> Pid: 1850, comm: bash Not tainted 2.6.38-rc4-test+ #1
> Call Trace:
>
> [<ffffffff81080eb2>] print_circular_bug+0xa8/0xb7
> [<ffffffff8192e4ca>] mutex_lock_nested+0x3e/0x43
> [<ffffffff81010302>] alternatives_smp_unlock+0x3d/0x93
> [<ffffffff81010630>] alternatives_smp_switch+0x198/0x1d8
> [<ffffffff8102568a>] native_cpu_die+0x65/0x95
> [<ffffffff818cc4ec>] _cpu_down+0x13e/0x202
> [<ffffffff8117a619>] sysfs_write_file+0x108/0x144
> [<ffffffff8111f5a2>] vfs_write+0xac/0xff
> [<ffffffff8111f7a9>] sys_write+0x4a/0x6e
>
> Reported-by: Steven Rostedt <[email protected]>
> Tested-by: Steven Rostedt <[email protected]>
> Signed-off-by: Peter Zijlstra <[email protected]>

Acked-by: Masami Hiramatsu <[email protected]>

> ---
> arch/x86/kernel/alternative.c | 2 +-
> 1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index 1236085..7038b95 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -671,7 +671,7 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
>
> atomic_set(&stop_machine_first, 1);
> wrote_text = 0;
> - stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
> + __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
> }
>
> #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
>
--
Masami HIRAMATSU
2nd Dept. Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: [email protected]