This and the next patch resolve memory corruption problems while CPU
hotplug. Without these patches, memory corruption can triggered easily
as below:
On a quad-core MIPS platform, use "spawn" of UnixBench-5.1.3 (http://
code.google.com/p/byte-unixbench/) and a CPU hotplug script like this
(hotplug.sh):
while true; do
echo 0 >/sys/devices/system/cpu/cpu1/online
echo 0 >/sys/devices/system/cpu/cpu2/online
echo 0 >/sys/devices/system/cpu/cpu3/online
sleep 1
echo 1 >/sys/devices/system/cpu/cpu1/online
echo 1 >/sys/devices/system/cpu/cpu2/online
echo 1 >/sys/devices/system/cpu/cpu3/online
sleep 1
done
Run "hotplug.sh" and then run "spawn 10000", spawn will get segfault
after a few minutes.
This patch:
Currently, clear_page()/copy_page() are generated by Micro-assembler
dynamically. But they are unavailable until uasm_resolve_relocs() has
finished because jump labels are illegal before that. Since these
functions are shared by every CPU, we only call build_clear_page()/
build_copy_page() only once at boot time. Without this patch, programs
will get random memory corruption (segmentation fault, bus error, etc.)
while CPU Hotplug (e.g. one CPU is using clear_page() while another is
generating it in cpu_cache_init()).
For similar reasons we modify build_tlb_refill_handler()'s invocation.
V2:
1, Rework the code to make CPU#0 can be online/offline.
2, Introduce cpu_has_local_ebase feature since some types of MIPS CPU
need a per-CPU tlb_refill_handler().
Signed-off-by: Huacai Chen <[email protected]>
Signed-off-by: Hongbing Hu <[email protected]>
---
arch/mips/include/asm/cpu-features.h | 3 +++
.../asm/mach-loongson/cpu-feature-overrides.h | 1 +
arch/mips/mm/page.c | 10 ++++++++++
arch/mips/mm/tlbex.c | 10 ++++++++--
4 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 1a57e8b..e5ec8fc 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -113,6 +113,9 @@
#ifndef cpu_has_pindexed_dcache
#define cpu_has_pindexed_dcache (cpu_data[0].dcache.flags & MIPS_CACHE_PINDEX)
#endif
+#ifndef cpu_has_local_ebase
+#define cpu_has_local_ebase 1
+#endif
/*
* I-Cache snoops remote store. This only matters on SMP. Some multiprocessors
diff --git a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
index 75fd8c0..c0f3ef4 100644
--- a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
@@ -57,5 +57,6 @@
#define cpu_has_vint 0
#define cpu_has_vtag_icache 0
#define cpu_has_watch 1
+#define cpu_has_local_ebase 0
#endif /* __ASM_MACH_LOONGSON_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index a29fba5..4eb8dcf 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -247,6 +247,11 @@ void __cpuinit build_clear_page(void)
struct uasm_label *l = labels;
struct uasm_reloc *r = relocs;
int i;
+ static atomic_t run_once = ATOMIC_INIT(0);
+
+ if (atomic_xchg(&run_once, 1)) {
+ return;
+ }
memset(labels, 0, sizeof(labels));
memset(relocs, 0, sizeof(relocs));
@@ -389,6 +394,11 @@ void __cpuinit build_copy_page(void)
struct uasm_label *l = labels;
struct uasm_reloc *r = relocs;
int i;
+ static atomic_t run_once = ATOMIC_INIT(0);
+
+ if (atomic_xchg(&run_once, 1)) {
+ return;
+ }
memset(labels, 0, sizeof(labels));
memset(relocs, 0, sizeof(relocs));
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 820e661..6bc28b4 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -2162,8 +2162,11 @@ void __cpuinit build_tlb_refill_handler(void)
case CPU_TX3922:
case CPU_TX3927:
#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
- build_r3000_tlb_refill_handler();
+ if (cpu_has_local_ebase)
+ build_r3000_tlb_refill_handler();
if (!run_once) {
+ if (!cpu_has_local_ebase)
+ build_r3000_tlb_refill_handler();
build_r3000_tlb_load_handler();
build_r3000_tlb_store_handler();
build_r3000_tlb_modify_handler();
@@ -2192,9 +2195,12 @@ void __cpuinit build_tlb_refill_handler(void)
build_r4000_tlb_load_handler();
build_r4000_tlb_store_handler();
build_r4000_tlb_modify_handler();
+ if (!cpu_has_local_ebase)
+ build_r4000_tlb_refill_handler();
run_once++;
}
- build_r4000_tlb_refill_handler();
+ if (cpu_has_local_ebase)
+ build_r4000_tlb_refill_handler();
}
}
--
1.7.7.3
On 03/17/2013 05:49 AM, Huacai Chen wrote:
> This and the next patch resolve memory corruption problems while CPU
> hotplug. Without these patches, memory corruption can triggered easily
> as below:
>
> On a quad-core MIPS platform, use "spawn" of UnixBench-5.1.3 (http://
> code.google.com/p/byte-unixbench/) and a CPU hotplug script like this
> (hotplug.sh):
> while true; do
> echo 0 >/sys/devices/system/cpu/cpu1/online
> echo 0 >/sys/devices/system/cpu/cpu2/online
> echo 0 >/sys/devices/system/cpu/cpu3/online
> sleep 1
> echo 1 >/sys/devices/system/cpu/cpu1/online
> echo 1 >/sys/devices/system/cpu/cpu2/online
> echo 1 >/sys/devices/system/cpu/cpu3/online
> sleep 1
> done
>
> Run "hotplug.sh" and then run "spawn 10000", spawn will get segfault
> after a few minutes.
>
> This patch:
> Currently, clear_page()/copy_page() are generated by Micro-assembler
> dynamically. But they are unavailable until uasm_resolve_relocs() has
> finished because jump labels are illegal before that. Since these
> functions are shared by every CPU, we only call build_clear_page()/
> build_copy_page() only once at boot time. Without this patch, programs
> will get random memory corruption (segmentation fault, bus error, etc.)
> while CPU Hotplug (e.g. one CPU is using clear_page() while another is
> generating it in cpu_cache_init()).
>
> For similar reasons we modify build_tlb_refill_handler()'s invocation.
>
> V2:
> 1, Rework the code to make CPU#0 can be online/offline.
> 2, Introduce cpu_has_local_ebase feature since some types of MIPS CPU
> need a per-CPU tlb_refill_handler().
>
> Signed-off-by: Huacai Chen <[email protected]>
> Signed-off-by: Hongbing Hu <[email protected]>
We were seeing the same crashes, this patch set seems to fix the problem.
Acked-by: David Daney <[email protected]>
> ---
> arch/mips/include/asm/cpu-features.h | 3 +++
> .../asm/mach-loongson/cpu-feature-overrides.h | 1 +
> arch/mips/mm/page.c | 10 ++++++++++
> arch/mips/mm/tlbex.c | 10 ++++++++--
> 4 files changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
> index 1a57e8b..e5ec8fc 100644
> --- a/arch/mips/include/asm/cpu-features.h
> +++ b/arch/mips/include/asm/cpu-features.h
> @@ -113,6 +113,9 @@
> #ifndef cpu_has_pindexed_dcache
> #define cpu_has_pindexed_dcache (cpu_data[0].dcache.flags & MIPS_CACHE_PINDEX)
> #endif
> +#ifndef cpu_has_local_ebase
> +#define cpu_has_local_ebase 1
> +#endif
>
> /*
> * I-Cache snoops remote store. This only matters on SMP. Some multiprocessors
> diff --git a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
> index 75fd8c0..c0f3ef4 100644
> --- a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
> +++ b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
> @@ -57,5 +57,6 @@
> #define cpu_has_vint 0
> #define cpu_has_vtag_icache 0
> #define cpu_has_watch 1
> +#define cpu_has_local_ebase 0
>
> #endif /* __ASM_MACH_LOONGSON_CPU_FEATURE_OVERRIDES_H */
> diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
> index a29fba5..4eb8dcf 100644
> --- a/arch/mips/mm/page.c
> +++ b/arch/mips/mm/page.c
> @@ -247,6 +247,11 @@ void __cpuinit build_clear_page(void)
> struct uasm_label *l = labels;
> struct uasm_reloc *r = relocs;
> int i;
> + static atomic_t run_once = ATOMIC_INIT(0);
> +
> + if (atomic_xchg(&run_once, 1)) {
> + return;
> + }
>
> memset(labels, 0, sizeof(labels));
> memset(relocs, 0, sizeof(relocs));
> @@ -389,6 +394,11 @@ void __cpuinit build_copy_page(void)
> struct uasm_label *l = labels;
> struct uasm_reloc *r = relocs;
> int i;
> + static atomic_t run_once = ATOMIC_INIT(0);
> +
> + if (atomic_xchg(&run_once, 1)) {
> + return;
> + }
>
> memset(labels, 0, sizeof(labels));
> memset(relocs, 0, sizeof(relocs));
> diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
> index 820e661..6bc28b4 100644
> --- a/arch/mips/mm/tlbex.c
> +++ b/arch/mips/mm/tlbex.c
> @@ -2162,8 +2162,11 @@ void __cpuinit build_tlb_refill_handler(void)
> case CPU_TX3922:
> case CPU_TX3927:
> #ifndef CONFIG_MIPS_PGD_C0_CONTEXT
> - build_r3000_tlb_refill_handler();
> + if (cpu_has_local_ebase)
> + build_r3000_tlb_refill_handler();
> if (!run_once) {
> + if (!cpu_has_local_ebase)
> + build_r3000_tlb_refill_handler();
> build_r3000_tlb_load_handler();
> build_r3000_tlb_store_handler();
> build_r3000_tlb_modify_handler();
> @@ -2192,9 +2195,12 @@ void __cpuinit build_tlb_refill_handler(void)
> build_r4000_tlb_load_handler();
> build_r4000_tlb_store_handler();
> build_r4000_tlb_modify_handler();
> + if (!cpu_has_local_ebase)
> + build_r4000_tlb_refill_handler();
> run_once++;
> }
> - build_r4000_tlb_refill_handler();
> + if (cpu_has_local_ebase)
> + build_r4000_tlb_refill_handler();
> }
> }
>
>
On 03/20/2013 04:14 PM, David Daney wrote:
> On 03/17/2013 05:49 AM, Huacai Chen wrote:
>> This and the next patch resolve memory corruption problems while CPU
>> hotplug. Without these patches, memory corruption can triggered easily
>> as below:
>>
[...]
>
> We were seeing the same crashes, this patch set seems to fix the problem.
>
> Acked-by: David Daney <[email protected]>
On second thought...
>
>> ---
>> arch/mips/include/asm/cpu-features.h | 3 +++
>> .../asm/mach-loongson/cpu-feature-overrides.h | 1 +
>> arch/mips/mm/page.c | 10 ++++++++++
>> arch/mips/mm/tlbex.c | 10 ++++++++--
>> 4 files changed, 22 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/mips/include/asm/cpu-features.h
>> b/arch/mips/include/asm/cpu-features.h
>> index 1a57e8b..e5ec8fc 100644
>> --- a/arch/mips/include/asm/cpu-features.h
>> +++ b/arch/mips/include/asm/cpu-features.h
>> @@ -113,6 +113,9 @@
>> #ifndef cpu_has_pindexed_dcache
>> #define cpu_has_pindexed_dcache (cpu_data[0].dcache.flags &
>> MIPS_CACHE_PINDEX)
>> #endif
>> +#ifndef cpu_has_local_ebase
>> +#define cpu_has_local_ebase 1
This really should default to 0 and only be set for (??who knows what??).
David Daney
>> +#endif
>>
>> /*
>> * I-Cache snoops remote store. This only matters on SMP. Some
>> multiprocessors
>> diff --git
>> a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
>> b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
>> index 75fd8c0..c0f3ef4 100644
>> --- a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
>> +++ b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
>> @@ -57,5 +57,6 @@
>> #define cpu_has_vint 0
>> #define cpu_has_vtag_icache 0
>> #define cpu_has_watch 1
>> +#define cpu_has_local_ebase 0
>>
>> #endif /* __ASM_MACH_LOONGSON_CPU_FEATURE_OVERRIDES_H */
>> diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
>> index a29fba5..4eb8dcf 100644
>> --- a/arch/mips/mm/page.c
>> +++ b/arch/mips/mm/page.c
>> @@ -247,6 +247,11 @@ void __cpuinit build_clear_page(void)
>> struct uasm_label *l = labels;
>> struct uasm_reloc *r = relocs;
>> int i;
>> + static atomic_t run_once = ATOMIC_INIT(0);
>> +
>> + if (atomic_xchg(&run_once, 1)) {
>> + return;
>> + }
>>
>> memset(labels, 0, sizeof(labels));
>> memset(relocs, 0, sizeof(relocs));
>> @@ -389,6 +394,11 @@ void __cpuinit build_copy_page(void)
>> struct uasm_label *l = labels;
>> struct uasm_reloc *r = relocs;
>> int i;
>> + static atomic_t run_once = ATOMIC_INIT(0);
>> +
>> + if (atomic_xchg(&run_once, 1)) {
>> + return;
>> + }
>>
>> memset(labels, 0, sizeof(labels));
>> memset(relocs, 0, sizeof(relocs));
>> diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
>> index 820e661..6bc28b4 100644
>> --- a/arch/mips/mm/tlbex.c
>> +++ b/arch/mips/mm/tlbex.c
>> @@ -2162,8 +2162,11 @@ void __cpuinit build_tlb_refill_handler(void)
>> case CPU_TX3922:
>> case CPU_TX3927:
>> #ifndef CONFIG_MIPS_PGD_C0_CONTEXT
>> - build_r3000_tlb_refill_handler();
>> + if (cpu_has_local_ebase)
>> + build_r3000_tlb_refill_handler();
>> if (!run_once) {
>> + if (!cpu_has_local_ebase)
>> + build_r3000_tlb_refill_handler();
>> build_r3000_tlb_load_handler();
>> build_r3000_tlb_store_handler();
>> build_r3000_tlb_modify_handler();
>> @@ -2192,9 +2195,12 @@ void __cpuinit build_tlb_refill_handler(void)
>> build_r4000_tlb_load_handler();
>> build_r4000_tlb_store_handler();
>> build_r4000_tlb_modify_handler();
>> + if (!cpu_has_local_ebase)
>> + build_r4000_tlb_refill_handler();
>> run_once++;
>> }
>> - build_r4000_tlb_refill_handler();
>> + if (cpu_has_local_ebase)
>> + build_r4000_tlb_refill_handler();
>> }
>> }
>>
>>
>
On Thu, Mar 21, 2013 at 11:53 PM, David Daney <[email protected]> wrote:
> On 03/20/2013 04:14 PM, David Daney wrote:
>>
>> On 03/17/2013 05:49 AM, Huacai Chen wrote:
>>>
>>> This and the next patch resolve memory corruption problems while CPU
>>> hotplug. Without these patches, memory corruption can triggered easily
>>> as below:
>>>
> [...]
>
>>
>> We were seeing the same crashes, this patch set seems to fix the problem.
>>
>> Acked-by: David Daney <[email protected]>
>
>
> On second thought...
>
>
>
>>
>>> ---
>>> arch/mips/include/asm/cpu-features.h | 3 +++
>>> .../asm/mach-loongson/cpu-feature-overrides.h | 1 +
>>> arch/mips/mm/page.c | 10 ++++++++++
>>> arch/mips/mm/tlbex.c | 10 ++++++++--
>>> 4 files changed, 22 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/mips/include/asm/cpu-features.h
>>> b/arch/mips/include/asm/cpu-features.h
>>> index 1a57e8b..e5ec8fc 100644
>>> --- a/arch/mips/include/asm/cpu-features.h
>>> +++ b/arch/mips/include/asm/cpu-features.h
>>> @@ -113,6 +113,9 @@
>>> #ifndef cpu_has_pindexed_dcache
>>> #define cpu_has_pindexed_dcache (cpu_data[0].dcache.flags &
>>> MIPS_CACHE_PINDEX)
>>> #endif
>>> +#ifndef cpu_has_local_ebase
>>> +#define cpu_has_local_ebase 1
>
>
>
> This really should default to 0 and only be set for (??who knows what??).
The original code before this patch assume all MIPS has a local ebase.
To minimize the modification, we default it to 1 (but I don't know
which CPU has local ebase).
>
> David Daney
>
>
>
>>> +#endif
>>>
>>> /*
>>> * I-Cache snoops remote store. This only matters on SMP. Some
>>> multiprocessors
>>> diff --git
>>> a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
>>> b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
>>> index 75fd8c0..c0f3ef4 100644
>>> --- a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
>>> +++ b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
>>> @@ -57,5 +57,6 @@
>>> #define cpu_has_vint 0
>>> #define cpu_has_vtag_icache 0
>>> #define cpu_has_watch 1
>>> +#define cpu_has_local_ebase 0
>>>
>>> #endif /* __ASM_MACH_LOONGSON_CPU_FEATURE_OVERRIDES_H */
>>> diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
>>> index a29fba5..4eb8dcf 100644
>>> --- a/arch/mips/mm/page.c
>>> +++ b/arch/mips/mm/page.c
>>> @@ -247,6 +247,11 @@ void __cpuinit build_clear_page(void)
>>> struct uasm_label *l = labels;
>>> struct uasm_reloc *r = relocs;
>>> int i;
>>> + static atomic_t run_once = ATOMIC_INIT(0);
>>> +
>>> + if (atomic_xchg(&run_once, 1)) {
>>> + return;
>>> + }
>>>
>>> memset(labels, 0, sizeof(labels));
>>> memset(relocs, 0, sizeof(relocs));
>>> @@ -389,6 +394,11 @@ void __cpuinit build_copy_page(void)
>>> struct uasm_label *l = labels;
>>> struct uasm_reloc *r = relocs;
>>> int i;
>>> + static atomic_t run_once = ATOMIC_INIT(0);
>>> +
>>> + if (atomic_xchg(&run_once, 1)) {
>>> + return;
>>> + }
>>>
>>> memset(labels, 0, sizeof(labels));
>>> memset(relocs, 0, sizeof(relocs));
>>> diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
>>> index 820e661..6bc28b4 100644
>>> --- a/arch/mips/mm/tlbex.c
>>> +++ b/arch/mips/mm/tlbex.c
>>> @@ -2162,8 +2162,11 @@ void __cpuinit build_tlb_refill_handler(void)
>>> case CPU_TX3922:
>>> case CPU_TX3927:
>>> #ifndef CONFIG_MIPS_PGD_C0_CONTEXT
>>> - build_r3000_tlb_refill_handler();
>>> + if (cpu_has_local_ebase)
>>> + build_r3000_tlb_refill_handler();
>>> if (!run_once) {
>>> + if (!cpu_has_local_ebase)
>>> + build_r3000_tlb_refill_handler();
>>> build_r3000_tlb_load_handler();
>>> build_r3000_tlb_store_handler();
>>> build_r3000_tlb_modify_handler();
>>> @@ -2192,9 +2195,12 @@ void __cpuinit build_tlb_refill_handler(void)
>>> build_r4000_tlb_load_handler();
>>> build_r4000_tlb_store_handler();
>>> build_r4000_tlb_modify_handler();
>>> + if (!cpu_has_local_ebase)
>>> + build_r4000_tlb_refill_handler();
>>> run_once++;
>>> }
>>> - build_r4000_tlb_refill_handler();
>>> + if (cpu_has_local_ebase)
>>> + build_r4000_tlb_refill_handler();
>>> }
>>> }
>>>
>>>
>>
>
>