2015-07-03 09:38:37

by Zhu Guihua

[permalink] [raw]
Subject: [PATCH v3 0/2] x86, espfix: silence warning on espfix initialization for ap

The following lockdep warning occurrs when running with latest kernel:
[ 3.178000] ------------[ cut here ]------------
[ 3.183000] WARNING: CPU: 128 PID: 0 at kernel/locking/lockdep.c:2755 lockdep_trace_alloc+0xdd/0xe0()
[ 3.193000] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
[ 3.199000] Modules linked in:

[ 3.203000] CPU: 128 PID: 0 Comm: swapper/128 Not tainted 4.1.0-rc3 #70
[ 3.221000] 0000000000000000 2d6601fb3e6d4e4c ffff88086fd5fc38 ffffffff81773f0a
[ 3.230000] 0000000000000000 ffff88086fd5fc90 ffff88086fd5fc78 ffffffff8108c85a
[ 3.238000] ffff88086fd60000 0000000000000092 ffff88086fd60000 00000000000000d0
[ 3.246000] Call Trace:
[ 3.249000] [<ffffffff81773f0a>] dump_stack+0x4c/0x65
[ 3.255000] [<ffffffff8108c85a>] warn_slowpath_common+0x8a/0xc0
[ 3.261000] [<ffffffff8108c8e5>] warn_slowpath_fmt+0x55/0x70
[ 3.268000] [<ffffffff810ee24d>] lockdep_trace_alloc+0xdd/0xe0
[ 3.274000] [<ffffffff811cda0d>] __alloc_pages_nodemask+0xad/0xca0
[ 3.281000] [<ffffffff810ec7ad>] ? __lock_acquire+0xf6d/0x1560
[ 3.288000] [<ffffffff81219c8a>] alloc_page_interleave+0x3a/0x90
[ 3.295000] [<ffffffff8121b32d>] alloc_pages_current+0x17d/0x1a0
[ 3.301000] [<ffffffff811c869e>] ? __get_free_pages+0xe/0x50
[ 3.308000] [<ffffffff811c869e>] __get_free_pages+0xe/0x50
[ 3.314000] [<ffffffff8102640b>] init_espfix_ap+0x17b/0x320
[ 3.320000] [<ffffffff8105c691>] start_secondary+0xf1/0x1f0
[ 3.327000] ---[ end trace 1b3327d9d6a1d62c ]---

As we alloc pages with GFP_KERNEL in init_espfix_ap() which is called
before enabled local irq, and the lockdep sub-system considers this
behaviour as allocating memory with GFP_FS with local irq disabled,
then trigger the warning as mentioned about.

So we allocate them on the boot CPU side when the target CPU is bringing
up by the primary CPU, and hand them over to the secondary CPU.

v3:
-split the original path into two parts

v2:
-allocate espfix stack pages when the targert CPU is bringing up by the
primary CPU
-commit messages changed

v1:
-Alloc the page on the node the target CPU is on.

Zhu Guihua (2):
espfix: add cpu parameter to init_espfix_ap()
x86, espfix: init espfix on the boot cpu side

arch/x86/include/asm/espfix.h | 2 +-
arch/x86/kernel/espfix_64.c | 28 ++++++++++++++++------------
arch/x86/kernel/smpboot.c | 14 +++++++-------
3 files changed, 24 insertions(+), 20 deletions(-)

--
1.9.3


2015-07-03 09:38:31

by Zhu Guihua

[permalink] [raw]
Subject: [PATCH v3 1/2] espfix: add cpu parameter to init_espfix_ap()

Add cpu index parameter to init_espfix_ap(), so that the parameter
could be propagated to the function for pages allocation.

Signed-off-by: Zhu Guihua <[email protected]>
---
arch/x86/include/asm/espfix.h | 2 +-
arch/x86/kernel/espfix_64.c | 7 +++----
arch/x86/kernel/smpboot.c | 2 +-
3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
index 99efebb..ca3ce9a 100644
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -9,7 +9,7 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);

extern void init_espfix_bsp(void);
-extern void init_espfix_ap(void);
+extern void init_espfix_ap(int cpu);

#endif /* CONFIG_X86_64 */

diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index f5d0730..1fb0e00 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -131,12 +131,12 @@ void __init init_espfix_bsp(void)
init_espfix_random();

/* The rest is the same as for any other processor */
- init_espfix_ap();
+ init_espfix_ap(0);
}

-void init_espfix_ap(void)
+void init_espfix_ap(int cpu)
{
- unsigned int cpu, page;
+ unsigned int page;
unsigned long addr;
pud_t pud, *pud_p;
pmd_t pmd, *pmd_p;
@@ -149,7 +149,6 @@ void init_espfix_ap(void)
if (likely(this_cpu_read(espfix_stack)))
return; /* Already initialized */

- cpu = smp_processor_id();
addr = espfix_base_addr(cpu);
page = cpu/ESPFIX_STACKS_PER_PAGE;

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8add66b..6f5abac 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -242,7 +242,7 @@ static void notrace start_secondary(void *unused)
* Enable the espfix hack for this CPU
*/
#ifdef CONFIG_X86_ESPFIX64
- init_espfix_ap();
+ init_espfix_ap(smp_processor_id());
#endif

/*
--
1.9.3

2015-07-03 09:39:05

by Zhu Guihua

[permalink] [raw]
Subject: [PATCH v3 2/2] x86, espfix: init espfix on the boot cpu side

As we alloc pages with GFP_KERNEL in init_espfix_ap() which is called
before enabled local irq, the lockdep sub-system would trigger a warning.

So we allocate them on the boot CPU side when the target CPU is bringing
up by the primary CPU, and hand them over to the secondary CPU.

And we use alloc_pages_node() with the secondary CPU's node, to make sure
the espfix stack is NUMA-local to the CPU that is going to use it.

Signed-off-by: Zhu Guihua <[email protected]>
---
arch/x86/kernel/espfix_64.c | 21 +++++++++++++--------
arch/x86/kernel/smpboot.c | 14 +++++++-------
2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 1fb0e00..ce95676 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -141,12 +141,12 @@ void init_espfix_ap(int cpu)
pud_t pud, *pud_p;
pmd_t pmd, *pmd_p;
pte_t pte, *pte_p;
- int n;
+ int n, node;
void *stack_page;
pteval_t ptemask;

/* We only have to do this once... */
- if (likely(this_cpu_read(espfix_stack)))
+ if (likely(per_cpu(espfix_stack, cpu)))
return; /* Already initialized */

addr = espfix_base_addr(cpu);
@@ -164,12 +164,15 @@ void init_espfix_ap(int cpu)
if (stack_page)
goto unlock_done;

+ node = cpu_to_node(cpu);
ptemask = __supported_pte_mask;

pud_p = &espfix_pud_page[pud_index(addr)];
pud = *pud_p;
if (!pud_present(pud)) {
- pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
+ struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
+
+ pmd_p = (pmd_t *)page_address(page);
pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
for (n = 0; n < ESPFIX_PUD_CLONES; n++)
@@ -179,7 +182,9 @@ void init_espfix_ap(int cpu)
pmd_p = pmd_offset(&pud, addr);
pmd = *pmd_p;
if (!pmd_present(pmd)) {
- pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
+ struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
+
+ pte_p = (pte_t *)page_address(page);
pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
for (n = 0; n < ESPFIX_PMD_CLONES; n++)
@@ -187,7 +192,7 @@ void init_espfix_ap(int cpu)
}

pte_p = pte_offset_kernel(&pmd, addr);
- stack_page = (void *)__get_free_page(GFP_KERNEL);
+ stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
for (n = 0; n < ESPFIX_PTE_CLONES; n++)
set_pte(&pte_p[n*PTE_STRIDE], pte);
@@ -198,7 +203,7 @@ void init_espfix_ap(int cpu)
unlock_done:
mutex_unlock(&espfix_init_mutex);
done:
- this_cpu_write(espfix_stack, addr);
- this_cpu_write(espfix_waddr, (unsigned long)stack_page
- + (addr & ~PAGE_MASK));
+ per_cpu(espfix_stack, cpu) = addr;
+ per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page
+ + (addr & ~PAGE_MASK);
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6f5abac..0bd8c1d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -239,13 +239,6 @@ static void notrace start_secondary(void *unused)
check_tsc_sync_target();

/*
- * Enable the espfix hack for this CPU
- */
-#ifdef CONFIG_X86_ESPFIX64
- init_espfix_ap(smp_processor_id());
-#endif
-
- /*
* We need to hold vector_lock so there the set of online cpus
* does not change while we are assigning vectors to cpus. Holding
* this lock ensures we don't half assign or remove an irq from a cpu.
@@ -854,6 +847,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
initial_code = (unsigned long)start_secondary;
stack_start = idle->thread.sp;

+ /*
+ * Enable the espfix hack for this CPU
+ */
+#ifdef CONFIG_X86_ESPFIX64
+ init_espfix_ap(cpu);
+#endif
+
/* So we see what's up */
announce_cpu(cpu, apicid);

--
1.9.3

Subject: [tip:x86/urgent] x86/espfix: Add 'cpu' parameter to init_espfix_ap()

Commit-ID: 1db875631f8d5bbf497f67e47f254eece888d51d
Gitweb: http://git.kernel.org/tip/1db875631f8d5bbf497f67e47f254eece888d51d
Author: Zhu Guihua <[email protected]>
AuthorDate: Fri, 3 Jul 2015 17:37:18 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 6 Jul 2015 15:00:33 +0200

x86/espfix: Add 'cpu' parameter to init_espfix_ap()

Add a CPU index parameter to init_espfix_ap(), so that the
parameter could be propagated to the function for espfix
page allocation.

Signed-off-by: Zhu Guihua <[email protected]>
Cc: <[email protected]>
Cc: <[email protected]>
Cc: <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: http://lkml.kernel.org/r/cde3fcf1b3211f3f03feb1a995bce3fee850f0fc.1435824469.git.zhugh.fnst@cn.fujitsu.com
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/espfix.h | 2 +-
arch/x86/kernel/espfix_64.c | 7 +++----
arch/x86/kernel/smpboot.c | 2 +-
3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
index 99efebb..ca3ce9a 100644
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -9,7 +9,7 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);

extern void init_espfix_bsp(void);
-extern void init_espfix_ap(void);
+extern void init_espfix_ap(int cpu);

#endif /* CONFIG_X86_64 */

diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index f5d0730..1fb0e00 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -131,12 +131,12 @@ void __init init_espfix_bsp(void)
init_espfix_random();

/* The rest is the same as for any other processor */
- init_espfix_ap();
+ init_espfix_ap(0);
}

-void init_espfix_ap(void)
+void init_espfix_ap(int cpu)
{
- unsigned int cpu, page;
+ unsigned int page;
unsigned long addr;
pud_t pud, *pud_p;
pmd_t pmd, *pmd_p;
@@ -149,7 +149,6 @@ void init_espfix_ap(void)
if (likely(this_cpu_read(espfix_stack)))
return; /* Already initialized */

- cpu = smp_processor_id();
addr = espfix_base_addr(cpu);
page = cpu/ESPFIX_STACKS_PER_PAGE;

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8add66b..6f5abac 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -242,7 +242,7 @@ static void notrace start_secondary(void *unused)
* Enable the espfix hack for this CPU
*/
#ifdef CONFIG_X86_ESPFIX64
- init_espfix_ap();
+ init_espfix_ap(smp_processor_id());
#endif

/*

Subject: [tip:x86/urgent] x86/espfix: Init espfix on the boot CPU side

Commit-ID: 20d5e4a9cd453991e2590a4c25230a99b42dee0c
Gitweb: http://git.kernel.org/tip/20d5e4a9cd453991e2590a4c25230a99b42dee0c
Author: Zhu Guihua <[email protected]>
AuthorDate: Fri, 3 Jul 2015 17:37:19 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 6 Jul 2015 15:00:34 +0200

x86/espfix: Init espfix on the boot CPU side

As we alloc pages with GFP_KERNEL in init_espfix_ap() which is
called before we enable local irqs, so the lockdep sub-system
would (correctly) trigger a warning about the potentially
blocking API.

So we allocate them on the boot CPU side when the secondary CPU is
brought up by the boot CPU, and hand them over to the secondary
CPU.

And we use alloc_pages_node() with the secondary CPU's node, to
make sure the espfix stack is NUMA-local to the CPU that is
going to use it.

Signed-off-by: Zhu Guihua <[email protected]>
Cc: <[email protected]>
Cc: <[email protected]>
Cc: <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: http://lkml.kernel.org/r/c97add2670e9abebb90095369f0cfc172373ac94.1435824469.git.zhugh.fnst@cn.fujitsu.com
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/espfix_64.c | 21 +++++++++++++--------
arch/x86/kernel/smpboot.c | 14 +++++++-------
2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 1fb0e00..ce95676 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -141,12 +141,12 @@ void init_espfix_ap(int cpu)
pud_t pud, *pud_p;
pmd_t pmd, *pmd_p;
pte_t pte, *pte_p;
- int n;
+ int n, node;
void *stack_page;
pteval_t ptemask;

/* We only have to do this once... */
- if (likely(this_cpu_read(espfix_stack)))
+ if (likely(per_cpu(espfix_stack, cpu)))
return; /* Already initialized */

addr = espfix_base_addr(cpu);
@@ -164,12 +164,15 @@ void init_espfix_ap(int cpu)
if (stack_page)
goto unlock_done;

+ node = cpu_to_node(cpu);
ptemask = __supported_pte_mask;

pud_p = &espfix_pud_page[pud_index(addr)];
pud = *pud_p;
if (!pud_present(pud)) {
- pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
+ struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
+
+ pmd_p = (pmd_t *)page_address(page);
pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
for (n = 0; n < ESPFIX_PUD_CLONES; n++)
@@ -179,7 +182,9 @@ void init_espfix_ap(int cpu)
pmd_p = pmd_offset(&pud, addr);
pmd = *pmd_p;
if (!pmd_present(pmd)) {
- pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
+ struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
+
+ pte_p = (pte_t *)page_address(page);
pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
for (n = 0; n < ESPFIX_PMD_CLONES; n++)
@@ -187,7 +192,7 @@ void init_espfix_ap(int cpu)
}

pte_p = pte_offset_kernel(&pmd, addr);
- stack_page = (void *)__get_free_page(GFP_KERNEL);
+ stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
for (n = 0; n < ESPFIX_PTE_CLONES; n++)
set_pte(&pte_p[n*PTE_STRIDE], pte);
@@ -198,7 +203,7 @@ void init_espfix_ap(int cpu)
unlock_done:
mutex_unlock(&espfix_init_mutex);
done:
- this_cpu_write(espfix_stack, addr);
- this_cpu_write(espfix_waddr, (unsigned long)stack_page
- + (addr & ~PAGE_MASK));
+ per_cpu(espfix_stack, cpu) = addr;
+ per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page
+ + (addr & ~PAGE_MASK);
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6f5abac..0bd8c1d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -239,13 +239,6 @@ static void notrace start_secondary(void *unused)
check_tsc_sync_target();

/*
- * Enable the espfix hack for this CPU
- */
-#ifdef CONFIG_X86_ESPFIX64
- init_espfix_ap(smp_processor_id());
-#endif
-
- /*
* We need to hold vector_lock so there the set of online cpus
* does not change while we are assigning vectors to cpus. Holding
* this lock ensures we don't half assign or remove an irq from a cpu.
@@ -854,6 +847,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
initial_code = (unsigned long)start_secondary;
stack_start = idle->thread.sp;

+ /*
+ * Enable the espfix hack for this CPU
+ */
+#ifdef CONFIG_X86_ESPFIX64
+ init_espfix_ap(cpu);
+#endif
+
/* So we see what's up */
announce_cpu(cpu, apicid);