When I review the sensitive code ftrace_nmi_enter(), I found
the atomic variable nmi_running does protect NMI VS do_ftrace_mod_code(),
but it can not protects NMI(entered nmi) VS NMI(ftrace_nmi_enter()).
cpu#1 | cpu#2 | cpu#3
ftrace_nmi_enter() | do_ftrace_mod_code() |
not modify | |
------------------------|-----------------------|--
executing | set mod_code_write = 1|
executing --|-----------------------|--------------------
executing | | ftrace_nmi_enter()
executing | | do modify
------------------------|-----------------------|-----------------
ftrace_nmi_exit() | |
cpu#3 may be being modified the code which is still being executed on cpu#1,
it will have undefined results and possibly take a GPF, this patch
prevents it occurred.
Signed-off-by: Lai Jiangshan <[email protected]>
---
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d0d7f4..e016f5e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -87,7 +87,8 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
*
* If an NMI is executed, the first thing it does is to call
* "ftrace_nmi_enter". This will check if the flag is set to write
- * and if it is, it will write what is in the IP and "code" buffers.
+ * and if it is, and there is no executing nmi, it will write
+ * what is in the IP and "code" buffers.
*
* The trick is, it does not matter if everyone is writing the same
* content to the code location. Also, if a CPU is executing code
@@ -96,6 +97,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
*/
static atomic_t nmi_running = ATOMIC_INIT(0);
+static atomic_t nmi_executing = ATOMIC_INIT(0);
static int mod_code_status; /* holds return value of text write */
static int mod_code_write; /* set when NMI should do the write */
static void *mod_code_ip; /* holds the IP to write to */
@@ -135,14 +137,18 @@ void ftrace_nmi_enter(void)
atomic_inc(&nmi_running);
/* Must have nmi_running seen before reading write flag */
smp_mb();
- if (mod_code_write) {
+ if (!atomic_read(&nmi_executing) && mod_code_write) {
ftrace_mod_code();
atomic_inc(&nmi_update_count);
}
+ atomic_inc(&nmi_executing);
+ smp_mb();
}
void ftrace_nmi_exit(void)
{
+ smp_mb();
+ atomic_dec(&nmi_executing);
/* Finish all executions before clearing nmi_running */
smp_wmb();
atomic_dec(&nmi_running);
On Mon, 2009-03-16 at 20:54 +0800, Lai Jiangshan wrote:
> When I review the sensitive code ftrace_nmi_enter(), I found
> the atomic variable nmi_running does protect NMI VS do_ftrace_mod_code(),
> but it can not protects NMI(entered nmi) VS NMI(ftrace_nmi_enter()).
>
> cpu#1 | cpu#2 | cpu#3
> ftrace_nmi_enter() | do_ftrace_mod_code() |
> not modify | |
> ------------------------|-----------------------|--
> executing | set mod_code_write = 1|
> executing --|-----------------------|--------------------
> executing | | ftrace_nmi_enter()
> executing | | do modify
> ------------------------|-----------------------|-----------------
> ftrace_nmi_exit() | |
Very good review!
This race is possible, although very unlikely, but must be fixed
regardless.
>
> cpu#3 may be being modified the code which is still being executed on cpu#1,
> it will have undefined results and possibly take a GPF, this patch
> prevents it occurred.
Unfortunately your patch does not solve the problem. It only makes the
race window smaller.
>
> Signed-off-by: Lai Jiangshan <[email protected]>
> ---
> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> index 1d0d7f4..e016f5e 100644
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -87,7 +87,8 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
> *
> * If an NMI is executed, the first thing it does is to call
> * "ftrace_nmi_enter". This will check if the flag is set to write
> - * and if it is, it will write what is in the IP and "code" buffers.
> + * and if it is, and there is no executing nmi, it will write
> + * what is in the IP and "code" buffers.
> *
> * The trick is, it does not matter if everyone is writing the same
> * content to the code location. Also, if a CPU is executing code
> @@ -96,6 +97,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
> */
>
> static atomic_t nmi_running = ATOMIC_INIT(0);
> +static atomic_t nmi_executing = ATOMIC_INIT(0);
> static int mod_code_status; /* holds return value of text write */
> static int mod_code_write; /* set when NMI should do the write */
> static void *mod_code_ip; /* holds the IP to write to */
> @@ -135,14 +137,18 @@ void ftrace_nmi_enter(void)
> atomic_inc(&nmi_running);
> /* Must have nmi_running seen before reading write flag */
> smp_mb();
> - if (mod_code_write) {
> + if (!atomic_read(&nmi_executing) && mod_code_write) {
> ftrace_mod_code();
> atomic_inc(&nmi_update_count);
> }
Here we have another race window. If cpu#1 has that NMI and right here
we get a SMI (something to make the race window bigger). cpu#2 could
have set the mod_code_write and cpu#3 could have another NMI that sees
it but does not see the nmi_executing flag. Now we are in the same
scenario as you nicely described up above.
> + atomic_inc(&nmi_executing);
> + smp_mb();
> }
>
> void ftrace_nmi_exit(void)
> {
> + smp_mb();
> + atomic_dec(&nmi_executing);
> /* Finish all executions before clearing nmi_running */
> smp_wmb();
> atomic_dec(&nmi_running);
>
The solution is to connect the mod_code_write with the nmi_enter and
nmi_exit. Make mod_code_write an atomic.
void ftrace_nmi_enter(void)
{
if (atomic_inc_return(&mod_code_write) > 10000) {
ftrace_mod_code();
atomic_inc(&nmi_update_count);
}
smp_mb();
}
void ftrace_nmi_exit(void)
{
smp_mb();
atomic_dec(&mod_code_write);
}
Then in do_ftrace_mod_code ...
while (atomic_cmpxchg(&mod_code_write, 0, 10001) != 0)
;
[...]
while (atomic_cmpxchg(&mode_code_write, 10001, 0) != 10001)
;
Does this look like it would solve the issue?
-- Steve
Steven Rostedt wrote:
>> atomic_inc(&nmi_running);
>> /* Must have nmi_running seen before reading write flag */
>> smp_mb();
>> - if (mod_code_write) {
>> + if (!atomic_read(&nmi_executing) && mod_code_write) {
>> ftrace_mod_code();
>> atomic_inc(&nmi_update_count);
>> }
>
> Here we have another race window. If cpu#1 has that NMI and right here
> we get a SMI (something to make the race window bigger). cpu#2 could
> have set the mod_code_write and cpu#3 could have another NMI that sees
> it but does not see the nmi_executing flag. Now we are in the same
> scenario as you nicely described up above.
I missed this window.
> void ftrace_nmi_enter(void)
> {
> if (atomic_inc_return(&mod_code_write) > 10000) {
> ftrace_mod_code();
> atomic_inc(&nmi_update_count);
> }
> smp_mb();
> }
>
> void ftrace_nmi_exit(void)
> {
> smp_mb();
> atomic_dec(&mod_code_write);
> }
>
> Then in do_ftrace_mod_code ...
>
>
> while (atomic_cmpxchg(&mod_code_write, 0, 10001) != 0)
> ;
>
> [...]
>
>
> while (atomic_cmpxchg(&mode_code_write, 10001, 0) != 10001)
> ;
>
>
> Does this look like it would solve the issue?
>
It's very nice. The write-flag and the counter are put into an atomic
together. The write-flag is changed only when there is no running NMI.
So any NMI sees this flag, all other running NMIs also see this flag
when them were entering.
Subject: [PATCH 1/2] ftrace: protect running nmi (V2)
When I review the sensitive code ftrace_nmi_enter(), I found
the atomic variable nmi_running does protect NMI VS do_ftrace_mod_code(),
but it can not protects NMI(entered nmi) VS NMI(ftrace_nmi_enter()).
cpu#1 | cpu#2 | cpu#3
ftrace_nmi_enter() | do_ftrace_mod_code() |
not modify | |
------------------------|-----------------------|--
executing | set mod_code_write = 1|
executing --|-----------------------|--------------------
executing | | ftrace_nmi_enter()
executing | | do modify
------------------------|-----------------------|-----------------
ftrace_nmi_exit() | |
cpu#3 may be being modified the code which is still being executed on cpu#1,
it will have undefined results and possibly take a GPF, this patch
prevents it occurred.
Signed-off-by: Lai Jiangshan <[email protected]>
---
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d0d7f4..699a1c0 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -79,11 +79,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
*
* 1) Put the instruction pointer into the IP buffer
* and the new code into the "code" buffer.
- * 2) Set a flag that says we are modifying code
- * 3) Wait for any running NMIs to finish.
- * 4) Write the code
- * 5) clear the flag.
- * 6) Wait for any running NMIs to finish.
+ * 2) Wait for any running NMIs to finish and set a flag that says
+ * we are modifying code, it is done in an atomic operation.
+ * 3) Write the code
+ * 4) clear the flag and wait for any running NMIs to finish,
+ * it is also done in an atomic operation.
*
* If an NMI is executed, the first thing it does is to call
* "ftrace_nmi_enter". This will check if the flag is set to write
@@ -95,9 +95,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
* are the same as what exists.
*/
+#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
static atomic_t nmi_running = ATOMIC_INIT(0);
static int mod_code_status; /* holds return value of text write */
-static int mod_code_write; /* set when NMI should do the write */
static void *mod_code_ip; /* holds the IP to write to */
static void *mod_code_newcode; /* holds the text to write to the IP */
@@ -124,40 +124,36 @@ static void ftrace_mod_code(void)
*/
mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
MCOUNT_INSN_SIZE);
-
- /* if we fail, then kill any new writers */
- if (mod_code_status)
- mod_code_write = 0;
}
void ftrace_nmi_enter(void)
{
- atomic_inc(&nmi_running);
- /* Must have nmi_running seen before reading write flag */
- smp_mb();
- if (mod_code_write) {
+ if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
+ smp_rmb();
ftrace_mod_code();
atomic_inc(&nmi_update_count);
}
+ /* Must have previous changes seen before executions */
+ smp_mb();
}
void ftrace_nmi_exit(void)
{
/* Finish all executions before clearing nmi_running */
- smp_wmb();
+ smp_mb();
atomic_dec(&nmi_running);
}
-static void wait_for_nmi(void)
+static void wait_and_set(int wait_val, int set_val)
{
- if (!atomic_read(&nmi_running))
- return;
+ int wait = 0;
- do {
+ while (atomic_cmpxchg(&nmi_running, wait_val, set_val) != wait_val) {
+ wait = 1;
cpu_relax();
- } while (atomic_read(&nmi_running));
+ }
- nmi_wait_count++;
+ nmi_wait_count += wait;
}
static int
@@ -166,15 +162,13 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
mod_code_ip = (void *)ip;
mod_code_newcode = new_code;
- /* The buffers need to be visible before we let NMIs write them */
- smp_wmb();
-
- mod_code_write = 1;
-
- /* Make sure write bit is visible before we wait on NMIs */
+ /*
+ * The previous variables need to be visible before NMIs sees
+ * the MOD_CODE_WRITE_FLAG.
+ */
smp_mb();
- wait_for_nmi();
+ wait_and_set(0, MOD_CODE_WRITE_FLAG);
/* Make sure all running NMIs have finished before we write the code */
smp_mb();
@@ -182,14 +176,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
ftrace_mod_code();
/* Make sure the write happens before clearing the bit */
- smp_wmb();
-
- mod_code_write = 0;
-
- /* make sure NMIs see the cleared bit */
smp_mb();
- wait_for_nmi();
+ wait_and_set(MOD_CODE_WRITE_FLAG, 0);
return mod_code_status;
}
Lai Jiangshan wrote:
>
> Subject: [PATCH 1/2] ftrace: protect running nmi (V2)
>
Subject: [PATCH 2/2] ftrace: fast path for do_ftrace_mod_code()
commit 90c7ac49aa819feb9433b5310089fca6399881c0
adds a fast path to prevent NMI lockup.
But the previous patch "protect executing nmi" changes
do_ftrace_mod_code()'s implementation, we still need fix to
prevent NMI lockup by adding a fast path.
A difference between this fix and 90c7ac49aa819feb9433b5310089fca6399881c0
is that: We kill any new writers in spite of probe_kernel_write()
success or fail, not only when probe_kernel_write() fail.
(When probe_kernel_write() success, new writers do not need to do
it again.)
Signed-off-by: Lai Jiangshan <[email protected]>
---
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 699a1c0..61cb520 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -98,6 +98,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
static atomic_t nmi_running = ATOMIC_INIT(0);
static int mod_code_status; /* holds return value of text write */
+static int mod_code_no_write = 1; /* set when NMI not need do the write */
static void *mod_code_ip; /* holds the IP to write to */
static void *mod_code_newcode; /* holds the text to write to the IP */
@@ -124,14 +125,19 @@ static void ftrace_mod_code(void)
*/
mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
MCOUNT_INSN_SIZE);
+
+ smb_wmb();
+ mod_code_no_write = 1;
}
void ftrace_nmi_enter(void)
{
if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
smp_rmb();
- ftrace_mod_code();
- atomic_inc(&nmi_update_count);
+ if (!mod_code_no_write) {
+ ftrace_mod_code();
+ atomic_inc(&nmi_update_count);
+ }
}
/* Must have previous changes seen before executions */
smp_mb();
@@ -161,6 +167,7 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
{
mod_code_ip = (void *)ip;
mod_code_newcode = new_code;
+ mod_code_no_write = 0;
/*
* The previous variables need to be visible before NMIs sees
@@ -173,7 +180,8 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
/* Make sure all running NMIs have finished before we write the code */
smp_mb();
- ftrace_mod_code();
+ if (!mod_code_no_write)
+ ftrace_mod_code();
/* Make sure the write happens before clearing the bit */
smp_mb();
On Tue, 2009-03-17 at 20:58 +0800, Lai Jiangshan wrote:
> Lai Jiangshan wrote:
> >
> > Subject: [PATCH 1/2] ftrace: protect running nmi (V2)
> >
>
>
> Subject: [PATCH 2/2] ftrace: fast path for do_ftrace_mod_code()
>
> commit 90c7ac49aa819feb9433b5310089fca6399881c0
> adds a fast path to prevent NMI lockup.
>
> But the previous patch "protect executing nmi" changes
> do_ftrace_mod_code()'s implementation, we still need fix to
> prevent NMI lockup by adding a fast path.
>
> A difference between this fix and 90c7ac49aa819feb9433b5310089fca6399881c0
> is that: We kill any new writers in spite of probe_kernel_write()
> success or fail, not only when probe_kernel_write() fail.
> (When probe_kernel_write() success, new writers do not need to do
> it again.)
I'm a bit nervous about this code. We do not get much benefit from it,
because the NMI case is an anomaly, and is not a fast path anyway. This
code only happens when we are running the stop_machine, and this adds
added complexity for little benefit.
The original patch was to prevent an actual live lock I got in one of my
tests. The problem was that the failure of the write caused a printk
stack dump. But the time it took the print to go out over the serial was
long enough that the next NMI triggered when it finished. The new NMI
hit the same error and did another print. Thus, all I got was a lot of
prints out over the serial, but the system was dead.
I like the first patch. but you remove the protection there. It should
have been in this patch. But it should have still added the
functionality of the previous method.
>
> Signed-off-by: Lai Jiangshan <[email protected]>
> ---
> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> index 699a1c0..61cb520 100644
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -98,6 +98,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
> #define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
> static atomic_t nmi_running = ATOMIC_INIT(0);
> static int mod_code_status; /* holds return value of text write */
> +static int mod_code_no_write = 1; /* set when NMI not need do the write */
> static void *mod_code_ip; /* holds the IP to write to */
> static void *mod_code_newcode; /* holds the text to write to the IP */
>
> @@ -124,14 +125,19 @@ static void ftrace_mod_code(void)
> */
> mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
> MCOUNT_INSN_SIZE);
> +
> + smb_wmb();
I still rather have this only set when mod_code_status fails.
> + mod_code_no_write = 1;
> }
>
> void ftrace_nmi_enter(void)
> {
> if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
> smp_rmb();
> - ftrace_mod_code();
> - atomic_inc(&nmi_update_count);
> + if (!mod_code_no_write) {
> + ftrace_mod_code();
> + atomic_inc(&nmi_update_count);
> + }
> }
> /* Must have previous changes seen before executions */
> smp_mb();
> @@ -161,6 +167,7 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
> {
> mod_code_ip = (void *)ip;
> mod_code_newcode = new_code;
> + mod_code_no_write = 0;
Here's another issue, if mod_code_status failed, we do not want to have
mod_code_no_write become zero again. The logic may indeed prevent this,
but I rather have the logic be straight forward, and just set this to
one when we have a failure and forget about it. Yes, it is a bit more
expensive, but it makes the code clearer.
-- Steve
>
> /*
> * The previous variables need to be visible before NMIs sees
> @@ -173,7 +180,8 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
> /* Make sure all running NMIs have finished before we write the code */
> smp_mb();
>
> - ftrace_mod_code();
> + if (!mod_code_no_write)
> + ftrace_mod_code();
>
> /* Make sure the write happens before clearing the bit */
> smp_mb();
>
>
>
Steven Rostedt wrote:
>
> I'm a bit nervous about this code. We do not get much benefit from it,
> because the NMI case is an anomaly, and is not a fast path anyway. This
> code only happens when we are running the stop_machine, and this adds
> added complexity for little benefit.
>
> The original patch was to prevent an actual live lock I got in one of my
> tests. The problem was that the failure of the write caused a printk
> stack dump. But the time it took the print to go out over the serial was
> long enough that the next NMI triggered when it finished. The new NMI
> hit the same error and did another print. Thus, all I got was a lot of
> prints out over the serial, but the system was dead.
>
Thank you. I understand.
> I like the first patch. but you remove the protection there. It should
> have been in this patch. But it should have still added the
> functionality of the previous method.
I separated it into two parts, I thought it will good for review.
But I wrote two bad patches.
>> @@ -161,6 +167,7 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
>> {
>> mod_code_ip = (void *)ip;
>> mod_code_newcode = new_code;
>> + mod_code_no_write = 0;
>
> Here's another issue, if mod_code_status failed, we do not want to have
> mod_code_no_write become zero again. The logic may indeed prevent this,
> but I rather have the logic be straight forward, and just set this to
> one when we have a failure and forget about it. Yes, it is a bit more
> expensive, but it makes the code clearer.
It confused me.
do_ftrace_mod_code() is called sequently, mod_code_no_write should become zero
in new calls.
Not like old code, when the first patch is applied, there is no NMI
is attempt to call probe_kernel_write() when we just enter do_ftrace_mod_code(),
so setting mod_code_no_write to 0 is safe.(Because the flag is not set)
Lai.
When I review the sensitive code ftrace_nmi_enter(), I found
the atomic variable nmi_running does protect NMI VS do_ftrace_mod_code(),
but it can not protects NMI(entered nmi) VS NMI(ftrace_nmi_enter()).
cpu#1 | cpu#2 | cpu#3
ftrace_nmi_enter() | do_ftrace_mod_code() |
not modify | |
------------------------|-----------------------|--
executing | set mod_code_write = 1|
executing --|-----------------------|--------------------
executing | | ftrace_nmi_enter()
executing | | do modify
------------------------|-----------------------|-----------------
ftrace_nmi_exit() | |
cpu#3 may be being modified the code which is still being executed on cpu#1,
it will have undefined results and possibly take a GPF, this patch
prevents it occurred.
Signed-off-by: Lai Jiangshan <[email protected]>
---
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d0d7f4..0edb5c2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -79,11 +79,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
*
* 1) Put the instruction pointer into the IP buffer
* and the new code into the "code" buffer.
- * 2) Set a flag that says we are modifying code
- * 3) Wait for any running NMIs to finish.
- * 4) Write the code
- * 5) clear the flag.
- * 6) Wait for any running NMIs to finish.
+ * 2) Wait for any running NMIs to finish and set a flag that says
+ * we are modifying code, it is done in an atomic operation.
+ * 3) Write the code
+ * 4) clear the flag.
+ * 5) Wait for any running NMIs to finish.
*
* If an NMI is executed, the first thing it does is to call
* "ftrace_nmi_enter". This will check if the flag is set to write
@@ -95,9 +95,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
* are the same as what exists.
*/
+#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
static atomic_t nmi_running = ATOMIC_INIT(0);
static int mod_code_status; /* holds return value of text write */
-static int mod_code_write; /* set when NMI should do the write */
static void *mod_code_ip; /* holds the IP to write to */
static void *mod_code_newcode; /* holds the text to write to the IP */
@@ -114,6 +114,20 @@ int ftrace_arch_read_dyn_info(char *buf, int size)
return r;
}
+static void clear_mod_flag(void)
+{
+ int old = atomic_read(&nmi_running);
+
+ for (;;) {
+ int new = old & ~MOD_CODE_WRITE_FLAG;
+
+ if (old == new)
+ break;
+
+ old = atomic_cmpxchg(&nmi_running, old, new);
+ }
+}
+
static void ftrace_mod_code(void)
{
/*
@@ -127,27 +141,39 @@ static void ftrace_mod_code(void)
/* if we fail, then kill any new writers */
if (mod_code_status)
- mod_code_write = 0;
+ clear_mod_flag();
}
void ftrace_nmi_enter(void)
{
- atomic_inc(&nmi_running);
- /* Must have nmi_running seen before reading write flag */
- smp_mb();
- if (mod_code_write) {
+ if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
+ smp_rmb();
ftrace_mod_code();
atomic_inc(&nmi_update_count);
}
+ /* Must have previous changes seen before executions */
+ smp_mb();
}
void ftrace_nmi_exit(void)
{
/* Finish all executions before clearing nmi_running */
- smp_wmb();
+ smp_mb();
atomic_dec(&nmi_running);
}
+static void wait_for_nmi_and_set_mod_flag(void)
+{
+ if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
+ rerurn;
+
+ do {
+ cpu_relax();
+ } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
+
+ nmi_wait_count++;
+}
+
static void wait_for_nmi(void)
{
if (!atomic_read(&nmi_running))
@@ -167,14 +193,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
mod_code_newcode = new_code;
/* The buffers need to be visible before we let NMIs write them */
- smp_wmb();
-
- mod_code_write = 1;
-
- /* Make sure write bit is visible before we wait on NMIs */
smp_mb();
- wait_for_nmi();
+ wait_for_nmi_and_set_mod_flag();
/* Make sure all running NMIs have finished before we write the code */
smp_mb();
@@ -182,13 +203,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
ftrace_mod_code();
/* Make sure the write happens before clearing the bit */
- smp_wmb();
-
- mod_code_write = 0;
-
- /* make sure NMIs see the cleared bit */
smp_mb();
+ clear_mod_flag();
wait_for_nmi();
return mod_code_status;
On Wed, 2009-03-18 at 16:42 +0800, Lai Jiangshan wrote:
>
> +static void wait_for_nmi_and_set_mod_flag(void)
> +{
> + if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
> + rerurn;
You might want to compile check patches before sending. I'll look this
patch over in detail, and make the necessary fixes.
-- Steve
> +
> + do {
> + cpu_relax();
> + } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
> +
> + nmi_wait_count++;
> +}
> +
Steven Rostedt wrote:
> On Wed, 2009-03-18 at 16:42 +0800, Lai Jiangshan wrote:
>
>>
>> +static void wait_for_nmi_and_set_mod_flag(void)
>> +{
>> + if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
>> + rerurn;
>
> You might want to compile check patches before sending. I'll look this
> patch over in detail, and make the necessary fixes.
>
Sorry for it.
(I set CONFIG_DYNAMIC_FTRACE=n temporarily yesterday, ^_^)
Subject: [PATCH] ftrace: protect running nmi (V3)
When I review the sensitive code ftrace_nmi_enter(), I found
the atomic variable nmi_running does protect NMI VS do_ftrace_mod_code(),
but it can not protects NMI(entered nmi) VS NMI(ftrace_nmi_enter()).
cpu#1 | cpu#2 | cpu#3
ftrace_nmi_enter() | do_ftrace_mod_code() |
not modify | |
------------------------|-----------------------|--
executing | set mod_code_write = 1|
executing --|-----------------------|--------------------
executing | | ftrace_nmi_enter()
executing | | do modify
------------------------|-----------------------|-----------------
ftrace_nmi_exit() | |
cpu#3 may be being modified the code which is still being executed on cpu#1,
it will have undefined results and possibly take a GPF, this patch
prevents it occurred.
Signed-off-by: Lai Jiangshan <[email protected]>
---
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d0d7f4..f0c7fad 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -79,11 +79,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
*
* 1) Put the instruction pointer into the IP buffer
* and the new code into the "code" buffer.
- * 2) Set a flag that says we are modifying code
- * 3) Wait for any running NMIs to finish.
- * 4) Write the code
- * 5) clear the flag.
- * 6) Wait for any running NMIs to finish.
+ * 2) Wait for any running NMIs to finish and set a flag that says
+ * we are modifying code, it is done in an atomic operation.
+ * 3) Write the code
+ * 4) clear the flag.
+ * 5) Wait for any running NMIs to finish.
*
* If an NMI is executed, the first thing it does is to call
* "ftrace_nmi_enter". This will check if the flag is set to write
@@ -95,9 +95,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
* are the same as what exists.
*/
+#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
static atomic_t nmi_running = ATOMIC_INIT(0);
static int mod_code_status; /* holds return value of text write */
-static int mod_code_write; /* set when NMI should do the write */
static void *mod_code_ip; /* holds the IP to write to */
static void *mod_code_newcode; /* holds the text to write to the IP */
@@ -114,6 +114,19 @@ int ftrace_arch_read_dyn_info(char *buf, int size)
return r;
}
+static void clear_mod_flag(void)
+{
+ for (;;) {
+ int old = atomic_read(&nmi_running);
+ int new = old & ~MOD_CODE_WRITE_FLAG;
+
+ if (old == new)
+ break;
+
+ old = atomic_cmpxchg(&nmi_running, old, new);
+ }
+}
+
static void ftrace_mod_code(void)
{
/*
@@ -127,27 +140,39 @@ static void ftrace_mod_code(void)
/* if we fail, then kill any new writers */
if (mod_code_status)
- mod_code_write = 0;
+ clear_mod_flag();
}
void ftrace_nmi_enter(void)
{
- atomic_inc(&nmi_running);
- /* Must have nmi_running seen before reading write flag */
- smp_mb();
- if (mod_code_write) {
+ if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
+ smp_rmb();
ftrace_mod_code();
atomic_inc(&nmi_update_count);
}
+ /* Must have previous changes seen before executions */
+ smp_mb();
}
void ftrace_nmi_exit(void)
{
/* Finish all executions before clearing nmi_running */
- smp_wmb();
+ smp_mb();
atomic_dec(&nmi_running);
}
+static void wait_for_nmi_and_set_mod_flag(void)
+{
+ if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
+ return;
+
+ do {
+ cpu_relax();
+ } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
+
+ nmi_wait_count++;
+}
+
static void wait_for_nmi(void)
{
if (!atomic_read(&nmi_running))
@@ -167,14 +192,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
mod_code_newcode = new_code;
/* The buffers need to be visible before we let NMIs write them */
- smp_wmb();
-
- mod_code_write = 1;
-
- /* Make sure write bit is visible before we wait on NMIs */
smp_mb();
- wait_for_nmi();
+ wait_for_nmi_and_set_mod_flag();
/* Make sure all running NMIs have finished before we write the code */
smp_mb();
@@ -182,13 +202,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
ftrace_mod_code();
/* Make sure the write happens before clearing the bit */
- smp_wmb();
-
- mod_code_write = 0;
-
- /* make sure NMIs see the cleared bit */
smp_mb();
+ clear_mod_flag();
wait_for_nmi();
return mod_code_status;
Commit-ID: e9d9df44736d116726f4596f7e2f9ce2764ffc0a
Gitweb: http://git.kernel.org/tip/e9d9df44736d116726f4596f7e2f9ce2764ffc0a
Author: Lai Jiangshan <[email protected]>
AuthorDate: Wed, 18 Mar 2009 16:42:57 +0800
Committer: Steven Rostedt <[email protected]>
CommitDate: Wed, 18 Mar 2009 20:36:59 -0400
ftrace: protect running nmi (V3)
When I review the sensitive code ftrace_nmi_enter(), I found
the atomic variable nmi_running does protect NMI VS do_ftrace_mod_code(),
but it can not protects NMI(entered nmi) VS NMI(ftrace_nmi_enter()).
cpu#1 | cpu#2 | cpu#3
ftrace_nmi_enter() | do_ftrace_mod_code() |
not modify | |
------------------------|-----------------------|--
executing | set mod_code_write = 1|
executing --|-----------------------|--------------------
executing | | ftrace_nmi_enter()
executing | | do modify
------------------------|-----------------------|-----------------
ftrace_nmi_exit() | |
cpu#3 may be being modified the code which is still being executed on cpu#1,
it will have undefined results and possibly take a GPF, this patch
prevents it occurred.
Signed-off-by: Lai Jiangshan <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
---
arch/x86/kernel/ftrace.c | 63 +++++++++++++++++++++++++++++----------------
1 files changed, 40 insertions(+), 23 deletions(-)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d0d7f4..57b33ed 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -79,11 +79,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
*
* 1) Put the instruction pointer into the IP buffer
* and the new code into the "code" buffer.
- * 2) Set a flag that says we are modifying code
- * 3) Wait for any running NMIs to finish.
- * 4) Write the code
- * 5) clear the flag.
- * 6) Wait for any running NMIs to finish.
+ * 2) Wait for any running NMIs to finish and set a flag that says
+ * we are modifying code, it is done in an atomic operation.
+ * 3) Write the code
+ * 4) clear the flag.
+ * 5) Wait for any running NMIs to finish.
*
* If an NMI is executed, the first thing it does is to call
* "ftrace_nmi_enter". This will check if the flag is set to write
@@ -95,9 +95,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
* are the same as what exists.
*/
+#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
static atomic_t nmi_running = ATOMIC_INIT(0);
static int mod_code_status; /* holds return value of text write */
-static int mod_code_write; /* set when NMI should do the write */
static void *mod_code_ip; /* holds the IP to write to */
static void *mod_code_newcode; /* holds the text to write to the IP */
@@ -114,6 +114,20 @@ int ftrace_arch_read_dyn_info(char *buf, int size)
return r;
}
+static void clear_mod_flag(void)
+{
+ int old = atomic_read(&nmi_running);
+
+ for (;;) {
+ int new = old & ~MOD_CODE_WRITE_FLAG;
+
+ if (old == new)
+ break;
+
+ old = atomic_cmpxchg(&nmi_running, old, new);
+ }
+}
+
static void ftrace_mod_code(void)
{
/*
@@ -127,27 +141,39 @@ static void ftrace_mod_code(void)
/* if we fail, then kill any new writers */
if (mod_code_status)
- mod_code_write = 0;
+ clear_mod_flag();
}
void ftrace_nmi_enter(void)
{
- atomic_inc(&nmi_running);
- /* Must have nmi_running seen before reading write flag */
- smp_mb();
- if (mod_code_write) {
+ if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
+ smp_rmb();
ftrace_mod_code();
atomic_inc(&nmi_update_count);
}
+ /* Must have previous changes seen before executions */
+ smp_mb();
}
void ftrace_nmi_exit(void)
{
/* Finish all executions before clearing nmi_running */
- smp_wmb();
+ smp_mb();
atomic_dec(&nmi_running);
}
+static void wait_for_nmi_and_set_mod_flag(void)
+{
+ if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
+ return;
+
+ do {
+ cpu_relax();
+ } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
+
+ nmi_wait_count++;
+}
+
static void wait_for_nmi(void)
{
if (!atomic_read(&nmi_running))
@@ -167,14 +193,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
mod_code_newcode = new_code;
/* The buffers need to be visible before we let NMIs write them */
- smp_wmb();
-
- mod_code_write = 1;
-
- /* Make sure write bit is visible before we wait on NMIs */
smp_mb();
- wait_for_nmi();
+ wait_for_nmi_and_set_mod_flag();
/* Make sure all running NMIs have finished before we write the code */
smp_mb();
@@ -182,13 +203,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
ftrace_mod_code();
/* Make sure the write happens before clearing the bit */
- smp_wmb();
-
- mod_code_write = 0;
-
- /* make sure NMIs see the cleared bit */
smp_mb();
+ clear_mod_flag();
wait_for_nmi();
return mod_code_status;