With CONFIG_DEBUG_PREEMPT we observed reports like:
BUG: using smp_processor_id() in preemptible
caller is perf_ftrace_function_call+0x6f/0x2e0
CPU: 1 PID: 680 Comm: a.out Not tainted
Call Trace:
<TASK>
dump_stack_lvl+0x8d/0xcf
check_preemption_disabled+0x104/0x110
? optimize_nops.isra.7+0x230/0x230
? text_poke_bp_batch+0x9f/0x310
perf_ftrace_function_call+0x6f/0x2e0
...
__text_poke+0x5/0x620
text_poke_bp_batch+0x9f/0x310
This tell us the CPU could be changed after task is preempted, and
the checking on CPU before preemption is helpless.
This patch just turn off preemption in perf_ftrace_function_call()
to prevent CPU changing.
Signed-off-by: Michael Wang <[email protected]>
---
kernel/trace/trace_event_perf.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 6aed10e..5486b18 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -438,15 +438,17 @@ void perf_trace_buf_update(void *record, u16 type)
int rctx;
int bit;
+ preempt_disable_notrace();
+
if (!rcu_is_watching())
- return;
+ goto out;
if ((unsigned long)ops->private != smp_processor_id())
- return;
+ goto out;
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
- return;
+ goto out;
event = container_of(ops, struct perf_event, ftrace_ops);
@@ -468,16 +470,18 @@ void perf_trace_buf_update(void *record, u16 type)
entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
if (!entry)
- goto out;
+ goto out_unlock;
entry->ip = ip;
entry->parent_ip = parent_ip;
perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
1, ®s, &head, NULL);
-out:
+out_unlock:
ftrace_test_recursion_unlock(bit);
#undef ENTRY_SIZE
+out:
+ preempt_enable_notrace();
}
static int perf_ftrace_function_register(struct perf_event *event)
--
1.8.3.1
Reported-by: Abaci <[email protected]>
On 2021/9/23 上午11:42, 王贇 wrote:
> With CONFIG_DEBUG_PREEMPT we observed reports like:
>
> BUG: using smp_processor_id() in preemptible
> caller is perf_ftrace_function_call+0x6f/0x2e0
> CPU: 1 PID: 680 Comm: a.out Not tainted
> Call Trace:
> <TASK>
> dump_stack_lvl+0x8d/0xcf
> check_preemption_disabled+0x104/0x110
> ? optimize_nops.isra.7+0x230/0x230
> ? text_poke_bp_batch+0x9f/0x310
> perf_ftrace_function_call+0x6f/0x2e0
> ...
> __text_poke+0x5/0x620
> text_poke_bp_batch+0x9f/0x310
>
> This tell us the CPU could be changed after task is preempted, and
> the checking on CPU before preemption is helpless.
>
> This patch just turn off preemption in perf_ftrace_function_call()
> to prevent CPU changing.
>
> Signed-off-by: Michael Wang <[email protected]>
> ---
> kernel/trace/trace_event_perf.c | 14 +++++++++-----
> 1 file changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
> index 6aed10e..5486b18 100644
> --- a/kernel/trace/trace_event_perf.c
> +++ b/kernel/trace/trace_event_perf.c
> @@ -438,15 +438,17 @@ void perf_trace_buf_update(void *record, u16 type)
> int rctx;
> int bit;
>
> + preempt_disable_notrace();
> +
> if (!rcu_is_watching())
> - return;
> + goto out;
>
> if ((unsigned long)ops->private != smp_processor_id())
> - return;
> + goto out;
>
> bit = ftrace_test_recursion_trylock(ip, parent_ip);
> if (bit < 0)
> - return;
> + goto out;
>
> event = container_of(ops, struct perf_event, ftrace_ops);
>
> @@ -468,16 +470,18 @@ void perf_trace_buf_update(void *record, u16 type)
>
> entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
> if (!entry)
> - goto out;
> + goto out_unlock;
>
> entry->ip = ip;
> entry->parent_ip = parent_ip;
> perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
> 1, ®s, &head, NULL);
>
> -out:
> +out_unlock:
> ftrace_test_recursion_unlock(bit);
> #undef ENTRY_SIZE
> +out:
> + preempt_enable_notrace();
> }
>
> static int perf_ftrace_function_register(struct perf_event *event)
>
On Thu, 23 Sep 2021 11:42:56 +0800
王贇 <[email protected]> wrote:
> With CONFIG_DEBUG_PREEMPT we observed reports like:
>
> BUG: using smp_processor_id() in preemptible
> caller is perf_ftrace_function_call+0x6f/0x2e0
> CPU: 1 PID: 680 Comm: a.out Not tainted
> Call Trace:
> <TASK>
> dump_stack_lvl+0x8d/0xcf
> check_preemption_disabled+0x104/0x110
> ? optimize_nops.isra.7+0x230/0x230
> ? text_poke_bp_batch+0x9f/0x310
> perf_ftrace_function_call+0x6f/0x2e0
> ...
> __text_poke+0x5/0x620
> text_poke_bp_batch+0x9f/0x310
>
> This tell us the CPU could be changed after task is preempted, and
> the checking on CPU before preemption is helpless.
>
> This patch just turn off preemption in perf_ftrace_function_call()
> to prevent CPU changing.
>
> Signed-off-by: Michael Wang <[email protected]>
> ---
> kernel/trace/trace_event_perf.c | 14 +++++++++-----
> 1 file changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
> index 6aed10e..5486b18 100644
> --- a/kernel/trace/trace_event_perf.c
> +++ b/kernel/trace/trace_event_perf.c
> @@ -438,15 +438,17 @@ void perf_trace_buf_update(void *record, u16 type)
> int rctx;
> int bit;
>
> + preempt_disable_notrace();
> +
> if (!rcu_is_watching())
> - return;
> + goto out;
You don't need preemption disabled for the above check. It's better to
disable it here and leave the above return untouched.
-- Steve
>
> if ((unsigned long)ops->private != smp_processor_id())
> - return;
> + goto out;
>
> bit = ftrace_test_recursion_trylock(ip, parent_ip);
> if (bit < 0)
> - return;
> + goto out;
>
> event = container_of(ops, struct perf_event, ftrace_ops);
>
> @@ -468,16 +470,18 @@ void perf_trace_buf_update(void *record, u16 type)
>
> entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
> if (!entry)
> - goto out;
> + goto out_unlock;
>
> entry->ip = ip;
> entry->parent_ip = parent_ip;
> perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
> 1, ®s, &head, NULL);
>
> -out:
> +out_unlock:
> ftrace_test_recursion_unlock(bit);
> #undef ENTRY_SIZE
> +out:
> + preempt_enable_notrace();
> }
>
> static int perf_ftrace_function_register(struct perf_event *event)
On 2021/9/23 下午9:33, Steven Rostedt wrote:
[snip]
>> ---
>> kernel/trace/trace_event_perf.c | 14 +++++++++-----
>> 1 file changed, 9 insertions(+), 5 deletions(-)
>>
>> diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
>> index 6aed10e..5486b18 100644
>> --- a/kernel/trace/trace_event_perf.c
>> +++ b/kernel/trace/trace_event_perf.c
>> @@ -438,15 +438,17 @@ void perf_trace_buf_update(void *record, u16 type)
>> int rctx;
>> int bit;
>>
>> + preempt_disable_notrace();
>> +
>> if (!rcu_is_watching())
>> - return;
>> + goto out;
>
> You don't need preemption disabled for the above check. It's better to
> disable it here and leave the above return untouched.
I found the rcu tree implementation of rcu_is_watching() will check
this_cpu_ptr(&rcu_data.dynticks), and after that enable the preemption.
If preemption happened after that and before we disable here, there are
still possibility that the CPU changed and make the dynticks checking
invalid, isn't it?
Regards,
Michael Wang
>
> -- Steve
>
>>
>> if ((unsigned long)ops->private != smp_processor_id())
>> - return;
>> + goto out;
>>
>> bit = ftrace_test_recursion_trylock(ip, parent_ip);
>> if (bit < 0)
>> - return;
>> + goto out;
>>
>> event = container_of(ops, struct perf_event, ftrace_ops);
>>
>> @@ -468,16 +470,18 @@ void perf_trace_buf_update(void *record, u16 type)
>>
>> entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
>> if (!entry)
>> - goto out;
>> + goto out_unlock;
>>
>> entry->ip = ip;
>> entry->parent_ip = parent_ip;
>> perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
>> 1, ®s, &head, NULL);
>>
>> -out:
>> +out_unlock:
>> ftrace_test_recursion_unlock(bit);
>> #undef ENTRY_SIZE
>> +out:
>> + preempt_enable_notrace();
>> }
>>
>> static int perf_ftrace_function_register(struct perf_event *event)
On Fri, 24 Sep 2021 10:08:10 +0800
王贇 <[email protected]> wrote:
> I found the rcu tree implementation of rcu_is_watching() will check
> this_cpu_ptr(&rcu_data.dynticks), and after that enable the preemption.
>
> If preemption happened after that and before we disable here, there are
> still possibility that the CPU changed and make the dynticks checking
> invalid, isn't it?
If it can be scheduled, then RCU is definitely watching ;-)
The rcu_is_watching() is a safe guard for places that are in between
context switches. Not task context switches, but transitioning between
kernel and user space, or going into or out of idle, or transitioning
in and out of an interrupt. There are small critical sections that RCU
is not watching, and we are actually working on making those locations
disable instrumentation (like tracing), where rcu_is_watching() will no
longer be needed.
-- Steve
On Thu, 23 Sep 2021 23:26:19 -0400
Steven Rostedt <[email protected]> wrote:
> If it can be scheduled, then RCU is definitely watching ;-)
In other words. Nothing ever gets scheduled into a location that RCU is
not watching.
-- Steve
On 2021/9/24 上午11:26, Steven Rostedt wrote:
> On Fri, 24 Sep 2021 10:08:10 +0800
> 王贇 <[email protected]> wrote:
>
>> I found the rcu tree implementation of rcu_is_watching() will check
>> this_cpu_ptr(&rcu_data.dynticks), and after that enable the preemption.
>>
>> If preemption happened after that and before we disable here, there are
>> still possibility that the CPU changed and make the dynticks checking
>> invalid, isn't it?
>
> If it can be scheduled, then RCU is definitely watching ;-)
>
> The rcu_is_watching() is a safe guard for places that are in between
> context switches. Not task context switches, but transitioning between
> kernel and user space, or going into or out of idle, or transitioning
> in and out of an interrupt. There are small critical sections that RCU
> is not watching, and we are actually working on making those locations
> disable instrumentation (like tracing), where rcu_is_watching() will no
> longer be needed.
Thanks for the explain :-)
Context available for scheduling should not in these situations, will
move down the 'disable' in v2.
Regards,
Michael Wang
>
> -- Steve
>
With CONFIG_DEBUG_PREEMPT we observed reports like:
BUG: using smp_processor_id() in preemptible
caller is perf_ftrace_function_call+0x6f/0x2e0
CPU: 1 PID: 680 Comm: a.out Not tainted
Call Trace:
<TASK>
dump_stack_lvl+0x8d/0xcf
check_preemption_disabled+0x104/0x110
? optimize_nops.isra.7+0x230/0x230
? text_poke_bp_batch+0x9f/0x310
perf_ftrace_function_call+0x6f/0x2e0
...
__text_poke+0x5/0x620
text_poke_bp_batch+0x9f/0x310
This telling us the CPU could be changed after task is preempted, and
the checking on CPU before preemption will be invalid.
This patch just turn off preemption in perf_ftrace_function_call()
to prevent CPU changing.
Reported-by: Abaci <[email protected]>
Signed-off-by: Michael Wang <[email protected]>
---
kernel/trace/trace_event_perf.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 6aed10e..dcbefdf 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -441,12 +441,19 @@ void perf_trace_buf_update(void *record, u16 type)
if (!rcu_is_watching())
return;
+ /*
+ * Prevent CPU changing from now on. rcu must
+ * be in watching if the task was migrated and
+ * scheduled.
+ */
+ preempt_disable_notrace();
+
if ((unsigned long)ops->private != smp_processor_id())
- return;
+ goto out;
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
- return;
+ goto out;
event = container_of(ops, struct perf_event, ftrace_ops);
@@ -468,16 +475,18 @@ void perf_trace_buf_update(void *record, u16 type)
entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
if (!entry)
- goto out;
+ goto unlock;
entry->ip = ip;
entry->parent_ip = parent_ip;
perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
1, ®s, &head, NULL);
-out:
+unlock:
ftrace_test_recursion_unlock(bit);
#undef ENTRY_SIZE
+out:
+ preempt_enable_notrace();
}
static int perf_ftrace_function_register(struct perf_event *event)
--
1.8.3.1