Hello, Brian, Ingo.
This patchset is 1-12 of Brian's move PDA fields to percpu patchset.
0001-x86-64-Move-irq-stats-from-PDA-to-per-cpu-and-conso.patch
0002-x86-64-Move-TLB-state-from-PDA-to-per-cpu-and-conso.patch
0003-x86-64-Convert-irqstacks-to-per-cpu.patch
0004-x86-64-Convert-exception-stacks-to-per-cpu.patch
0005-x86-64-Move-cpu-number-from-PDA-to-per-cpu-and-cons.patch
0006-x86-64-Move-current-task-from-PDA-to-per-cpu-and-co.patch
0007-x86-64-Move-kernelstack-from-PDA-to-per-cpu.patch
0008-x86-64-Move-oldrsp-from-PDA-to-per-cpu.patch
0009-x86-64-Move-irqcount-from-PDA-to-per-cpu.patch
0010-x86-64-Move-nodenumber-from-PDA-to-per-cpu.patch
0011-x86-64-Move-isidle-from-PDA-to-per-cpu.patch
0012-x86-64-Use-absolute-displacements-for-per-cpu-acces.patch
with the following changes
* Add underbars between words in variable names (e.g. irqstack ->
irq_stack)
* Drop irq_stack_ptr early initialization on SMP from 0003.
* Change DECLARE/DEFINE_PER_CPU(char, irq_stack[IRQ_STACK_SIZE]) to
DECLARE/DEFINE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack) in 0003.
* Move cpu_number definition out of CONFIG_HAVE_SETUP_PER_CPU_AREA
in 0005.
* Remove now unused stack_thread_info() in 0007.
Please pull from
git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git tj-percpu
diffstat follows.
arch/x86/ia32/ia32entry.S | 8 +--
arch/x86/include/asm/current.h | 24 +----------
arch/x86/include/asm/hardirq_64.h | 24 +++++++++--
arch/x86/include/asm/mmu_context_64.h | 16 +++----
arch/x86/include/asm/page_64.h | 4 -
arch/x86/include/asm/pda.h | 29 ++-----------
arch/x86/include/asm/percpu.h | 26 ++++++------
arch/x86/include/asm/processor.h | 3 +
arch/x86/include/asm/smp.h | 4 -
arch/x86/include/asm/system.h | 4 -
arch/x86/include/asm/thread_info.h | 20 +++------
arch/x86/include/asm/tlbflush.h | 7 ---
arch/x86/include/asm/topology.h | 3 -
arch/x86/kernel/asm-offsets_64.c | 6 --
arch/x86/kernel/cpu/common.c | 71 ++++++++++++----------------------
arch/x86/kernel/dumpstack_64.c | 35 ++++++++--------
arch/x86/kernel/entry_64.S | 34 ++++++++--------
arch/x86/kernel/irq.c | 6 --
arch/x86/kernel/irq_64.c | 3 +
arch/x86/kernel/nmi.c | 10 ----
arch/x86/kernel/process_32.c | 3 -
arch/x86/kernel/process_64.c | 22 ++++++----
arch/x86/kernel/setup_percpu.c | 18 +++++++-
arch/x86/kernel/smpboot.c | 6 +-
arch/x86/kernel/smpcommon.c | 2
arch/x86/kernel/tlb_32.c | 12 -----
arch/x86/kernel/tlb_64.c | 13 +++---
arch/x86/xen/mmu.c | 6 --
arch/x86/xen/smp.c | 21 +---------
arch/x86/xen/xen-asm_64.S | 31 +++++++-------
30 files changed, 203 insertions(+), 268 deletions(-)
--
tejun
From: Brian Gerst <[email protected]>
Move the exception stacks to per-cpu, removing specific allocation code.
Signed-off-by: Brian Gerst <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
arch/x86/kernel/cpu/common.c | 23 ++++++++---------------
1 files changed, 8 insertions(+), 15 deletions(-)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 496f0a0..b6d7eec 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -913,8 +913,9 @@ void __cpuinit pda_init(int cpu)
}
}
-static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
- DEBUG_STKSZ] __page_aligned_bss;
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
+ __aligned(PAGE_SIZE);
extern asmlinkage void ignore_sysret(void);
@@ -972,15 +973,12 @@ void __cpuinit cpu_init(void)
struct tss_struct *t = &per_cpu(init_tss, cpu);
struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
unsigned long v;
- char *estacks = NULL;
struct task_struct *me;
int i;
/* CPU 0 is initialised in head64.c */
if (cpu != 0)
pda_init(cpu);
- else
- estacks = boot_exception_stacks;
me = current;
@@ -1014,18 +1012,13 @@ void __cpuinit cpu_init(void)
* set up and load the per-CPU TSS
*/
if (!orig_ist->ist[0]) {
- static const unsigned int order[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+ static const unsigned int sizes[N_EXCEPTION_STACKS] = {
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
};
+ char *estacks = per_cpu(exception_stacks, cpu);
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
- if (cpu) {
- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
- if (!estacks)
- panic("Cannot allocate exception "
- "stack %ld %d\n", v, cpu);
- }
- estacks += PAGE_SIZE << order[v];
+ estacks += sizes[v];
orig_ist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
}
--
1.6.0.2
From: Brian Gerst <[email protected]>
Signed-off-by: Brian Gerst <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
arch/x86/include/asm/current.h | 24 +++---------------------
arch/x86/include/asm/pda.h | 4 ++--
arch/x86/include/asm/system.h | 4 ++--
arch/x86/kernel/asm-offsets_64.c | 1 -
arch/x86/kernel/cpu/common.c | 5 +----
arch/x86/kernel/dumpstack_64.c | 2 +-
arch/x86/kernel/process_64.c | 5 ++++-
arch/x86/kernel/smpboot.c | 3 +--
arch/x86/xen/smp.c | 3 +--
9 files changed, 15 insertions(+), 36 deletions(-)
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 0728480..c68c361 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -1,39 +1,21 @@
#ifndef _ASM_X86_CURRENT_H
#define _ASM_X86_CURRENT_H
-#ifdef CONFIG_X86_32
#include <linux/compiler.h>
#include <asm/percpu.h>
+#ifndef __ASSEMBLY__
struct task_struct;
DECLARE_PER_CPU(struct task_struct *, current_task);
-static __always_inline struct task_struct *get_current(void)
-{
- return percpu_read(current_task);
-}
-
-#else /* X86_32 */
-
-#ifndef __ASSEMBLY__
-#include <asm/pda.h>
-
-struct task_struct;
static __always_inline struct task_struct *get_current(void)
{
- return read_pda(pcurrent);
+ return percpu_read(current_task);
}
-#else /* __ASSEMBLY__ */
-
-#include <asm/asm-offsets.h>
-#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
+#define current get_current()
#endif /* __ASSEMBLY__ */
-#endif /* X86_32 */
-
-#define current get_current()
-
#endif /* _ASM_X86_CURRENT_H */
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 668d5a5..7209302 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -11,8 +11,8 @@
/* Per processor datastructure. %gs points to it while the kernel runs */
struct x8664_pda {
- struct task_struct *pcurrent; /* 0 Current process */
- unsigned long dummy;
+ unsigned long unused1;
+ unsigned long unused2;
unsigned long kernelstack; /* 16 top of kernel stack for current */
unsigned long oldrsp; /* 24 user rsp for system call */
int irqcount; /* 32 Irq nesting counter. Starts -1 */
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 8e626ea..4399aac 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -94,7 +94,7 @@ do { \
"call __switch_to\n\t" \
".globl thread_return\n" \
"thread_return:\n\t" \
- "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
+ "movq "__percpu_seg_str"%P[current_task],%%rsi\n\t" \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
"movq %%rax,%%rdi\n\t" \
@@ -106,7 +106,7 @@ do { \
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
[tif_fork] "i" (TIF_FORK), \
[thread_info] "i" (offsetof(struct task_struct, stack)), \
- [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
+ [current_task] "m" (per_cpu_var(current_task)) \
: "memory", "cc" __EXTRA_CLOBBER)
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index cae6697..4f7a210 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -51,7 +51,6 @@ int main(void)
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
ENTRY(kernelstack);
ENTRY(oldrsp);
- ENTRY(pcurrent);
ENTRY(irqcount);
DEFINE(pda_size, sizeof(struct x8664_pda));
BLANK();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4221e92..b50e38d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -903,10 +903,7 @@ void __cpuinit pda_init(int cpu)
pda->kernelstack = (unsigned long)stack_thread_info() -
PDA_STACKOFFSET + THREAD_SIZE;
- if (cpu == 0) {
- /* others are initialized in smpboot.c */
- pda->pcurrent = &init_task;
- } else {
+ if (cpu != 0) {
if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
pda->nodenumber = cpu_to_node(cpu);
}
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 28e26a4..d35db59 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -242,7 +242,7 @@ void show_registers(struct pt_regs *regs)
int i;
unsigned long sp;
const int cpu = smp_processor_id();
- struct task_struct *cur = cpu_pda(cpu)->pcurrent;
+ struct task_struct *cur = current;
sp = regs->sp;
printk("CPU %d ", cpu);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 416fb92..e00c31a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -57,6 +57,9 @@
asmlinkage extern void ret_from_fork(void);
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -615,7 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
prev->usersp = read_pda(oldrsp);
write_pda(oldrsp, next->usersp);
- write_pda(pcurrent, next_p);
+ percpu_write(current_task, next_p);
write_pda(kernelstack,
(unsigned long)task_stack_page(next_p) +
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2f0e0f1..5854be0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -790,13 +790,12 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
set_idle_for_cpu(cpu, c_idle.idle);
do_rest:
-#ifdef CONFIG_X86_32
per_cpu(current_task, cpu) = c_idle.idle;
+#ifdef CONFIG_X86_32
init_gdt(cpu);
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
#else
- cpu_pda(cpu)->pcurrent = c_idle.idle;
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
#endif
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 9ff3b09..72c2eb9 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -279,12 +279,11 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
struct task_struct *idle = idle_task(cpu);
int rc;
+ per_cpu(current_task, cpu) = idle;
#ifdef CONFIG_X86_32
init_gdt(cpu);
- per_cpu(current_task, cpu) = idle;
irq_ctx_init(cpu);
#else
- cpu_pda(cpu)->pcurrent = idle;
clear_tsk_thread_flag(idle, TIF_FORK);
#endif
xen_setup_timer(cpu);
--
1.6.0.2
From: Brian Gerst <[email protected]>
Move the irqstackptr variable from the PDA to per-cpu. Make the
stacks themselves per-cpu, removing some specific allocation code.
Add a seperate flag (is_boot_cpu) to simplify the per-cpu boot
adjustments.
tj: * sprinkle some underbars around.
* irq_stack_ptr is not used till traps_init(), no reason to
initialize it early. On SMP, just leaving it NULL till proper
initialization in setup_per_cpu_areas() works. Dropped
is_boot_cpu and early irq_stack_ptr initialization.
* do DECLARE/DEFINE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack)
instead of (char, irq_stack[IRQ_STACK_SIZE]).
Signed-off-by: Brian Gerst <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
arch/x86/include/asm/page_64.h | 4 ++--
arch/x86/include/asm/pda.h | 1 -
arch/x86/include/asm/processor.h | 3 +++
arch/x86/kernel/asm-offsets_64.c | 1 -
arch/x86/kernel/cpu/common.c | 19 +++++++------------
arch/x86/kernel/dumpstack_64.c | 33 +++++++++++++++++----------------
arch/x86/kernel/entry_64.S | 6 +++---
arch/x86/kernel/setup_percpu.c | 4 +++-
8 files changed, 35 insertions(+), 36 deletions(-)
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 5ebca29..e27fdbe 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -13,8 +13,8 @@
#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
-#define IRQSTACK_ORDER 2
-#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
+#define IRQ_STACK_ORDER 2
+#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
#define STACKFAULT_STACK 1
#define DOUBLEFAULT_STACK 2
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 8ee835e..09965f7 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -22,7 +22,6 @@ struct x8664_pda {
/* gcc-ABI: this canary MUST be at
offset 40!!! */
#endif
- char *irqstackptr;
short nodenumber; /* number of current node (32k max) */
short in_bootmem; /* pda lives in bootmem */
short isidle;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 091cd88..f511246 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -378,6 +378,9 @@ union thread_xstate {
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist);
+
+DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack);
+DECLARE_PER_CPU(char *, irq_stack_ptr);
#endif
extern void print_cpu_info(struct cpuinfo_x86 *);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f4cc81b..5b821fb 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -54,7 +54,6 @@ int main(void)
ENTRY(pcurrent);
ENTRY(irqcount);
ENTRY(cpunumber);
- ENTRY(irqstackptr);
DEFINE(pda_size, sizeof(struct x8664_pda));
BLANK();
#undef ENTRY
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3d0cc6f..496f0a0 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -881,7 +881,13 @@ __setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
-static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
+#else
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+ per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
+#endif
void __cpuinit pda_init(int cpu)
{
@@ -901,18 +907,7 @@ void __cpuinit pda_init(int cpu)
if (cpu == 0) {
/* others are initialized in smpboot.c */
pda->pcurrent = &init_task;
- pda->irqstackptr = boot_cpu_stack;
- pda->irqstackptr += IRQSTACKSIZE - 64;
} else {
- if (!pda->irqstackptr) {
- pda->irqstackptr = (char *)
- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
- if (!pda->irqstackptr)
- panic("cannot allocate irqstack for cpu %d",
- cpu);
- pda->irqstackptr += IRQSTACKSIZE - 64;
- }
-
if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
pda->nodenumber = cpu_to_node(cpu);
}
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index c302d07..28e26a4 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
- unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
+ unsigned long *irq_stack_end =
+ (unsigned long *)per_cpu(irq_stack_ptr, cpu);
unsigned used = 0;
struct thread_info *tinfo;
int graph = 0;
@@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
stack = (unsigned long *) estack_end[-2];
continue;
}
- if (irqstack_end) {
- unsigned long *irqstack;
- irqstack = irqstack_end -
- (IRQSTACKSIZE - 64) / sizeof(*irqstack);
+ if (irq_stack_end) {
+ unsigned long *irq_stack;
+ irq_stack = irq_stack_end -
+ (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
- if (stack >= irqstack && stack < irqstack_end) {
+ if (stack >= irq_stack && stack < irq_stack_end) {
if (ops->stack(data, "IRQ") < 0)
break;
bp = print_context_stack(tinfo, stack, bp,
- ops, data, irqstack_end, &graph);
+ ops, data, irq_stack_end, &graph);
/*
* We link to the next stack (which would be
* the process stack normally) the last
* pointer (index -1 to end) in the IRQ stack:
*/
- stack = (unsigned long *) (irqstack_end[-1]);
- irqstack_end = NULL;
+ stack = (unsigned long *) (irq_stack_end[-1]);
+ irq_stack_end = NULL;
ops->stack(data, "EOI");
continue;
}
@@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack;
int i;
const int cpu = smp_processor_id();
- unsigned long *irqstack_end =
- (unsigned long *) (cpu_pda(cpu)->irqstackptr);
- unsigned long *irqstack =
- (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+ unsigned long *irq_stack_end =
+ (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
+ unsigned long *irq_stack =
+ (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
/*
* debugging aid: "show_stack(NULL, NULL);" prints the
@@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
- if (stack >= irqstack && stack <= irqstack_end) {
- if (stack == irqstack_end) {
- stack = (unsigned long *) (irqstack_end[-1]);
+ if (stack >= irq_stack && stack <= irq_stack_end) {
+ if (stack == irq_stack_end) {
+ stack = (unsigned long *) (irq_stack_end[-1]);
printk(" <EOI> ");
}
} else {
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 4833f3a..d22677a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -345,7 +345,7 @@ ENTRY(save_args)
1: incl %gs:pda_irqcount
jne 2f
popq_cfi %rax /* move return address... */
- mov %gs:pda_irqstackptr,%rsp
+ mov PER_CPU_VAR(irq_stack_ptr),%rsp
EMPTY_FRAME 0
pushq_cfi %rax /* ... to the new stack */
/*
@@ -1261,7 +1261,7 @@ ENTRY(call_softirq)
mov %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
incl %gs:pda_irqcount
- cmove %gs:pda_irqstackptr,%rsp
+ cmove PER_CPU_VAR(irq_stack_ptr),%rsp
push %rbp # backlink for old unwinder
call __do_softirq
leaveq
@@ -1300,7 +1300,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
11: incl %gs:pda_irqcount
movq %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
- cmovzq %gs:pda_irqstackptr,%rsp
+ cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
pushq %rbp # backlink for old unwinder
call xen_evtchn_do_upcall
popq %rsp
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index b5c35af..8b53ef8 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -192,7 +192,10 @@ void __init setup_per_cpu_areas(void)
memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
per_cpu_offset(cpu) = ptr - __per_cpu_start;
+ per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
#ifdef CONFIG_X86_64
+ per_cpu(irq_stack_ptr, cpu) =
+ (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
/*
* CPU0 modified pda in the init data area, reload pda
* offset for CPU0 and clear the area for others.
@@ -202,7 +205,6 @@ void __init setup_per_cpu_areas(void)
else
memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
#endif
- per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
}
--
1.6.0.2
From: Brian Gerst <[email protected]>
tj: * in asm-offsets_64.c, pda.h inclusion shouldn't be removed as pda
is still referenced in the file
* s/oldrsp/old_rsp/
Signed-off-by: Brian Gerst <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
arch/x86/include/asm/pda.h | 2 +-
arch/x86/kernel/asm-offsets_64.c | 1 -
arch/x86/kernel/entry_64.S | 10 +++++-----
arch/x86/kernel/process_64.c | 8 +++++---
arch/x86/xen/xen-asm_64.S | 8 ++++----
5 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 4d28ffb..ae23deb 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -14,7 +14,7 @@ struct x8664_pda {
unsigned long unused1;
unsigned long unused2;
unsigned long unused3;
- unsigned long oldrsp; /* 24 user rsp for system call */
+ unsigned long unused4;
int irqcount; /* 32 Irq nesting counter. Starts -1 */
unsigned int unused6; /* 36 was cpunumber */
#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index cafff5f..afda6de 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -49,7 +49,6 @@ int main(void)
BLANK();
#undef ENTRY
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
- ENTRY(oldrsp);
ENTRY(irqcount);
DEFINE(pda_size, sizeof(struct x8664_pda));
BLANK();
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 0dd4585..7c27da4 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -210,7 +210,7 @@ ENTRY(native_usergs_sysret64)
/* %rsp:at FRAMEEND */
.macro FIXUP_TOP_OF_STACK tmp offset=0
- movq %gs:pda_oldrsp,\tmp
+ movq PER_CPU_VAR(old_rsp),\tmp
movq \tmp,RSP+\offset(%rsp)
movq $__USER_DS,SS+\offset(%rsp)
movq $__USER_CS,CS+\offset(%rsp)
@@ -221,7 +221,7 @@ ENTRY(native_usergs_sysret64)
.macro RESTORE_TOP_OF_STACK tmp offset=0
movq RSP+\offset(%rsp),\tmp
- movq \tmp,%gs:pda_oldrsp
+ movq \tmp,PER_CPU_VAR(old_rsp)
movq EFLAGS+\offset(%rsp),\tmp
movq \tmp,R11+\offset(%rsp)
.endm
@@ -479,7 +479,7 @@ ENTRY(system_call)
*/
ENTRY(system_call_after_swapgs)
- movq %rsp,%gs:pda_oldrsp
+ movq %rsp,PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs off/on section - it's straight
@@ -523,7 +523,7 @@ sysret_check:
CFI_REGISTER rip,rcx
RESTORE_ARGS 0,-ARG_SKIP,1
/*CFI_REGISTER rflags,r11*/
- movq %gs:pda_oldrsp, %rsp
+ movq PER_CPU_VAR(old_rsp), %rsp
USERGS_SYSRET64
CFI_RESTORE_STATE
@@ -833,7 +833,7 @@ common_interrupt:
XCPT_FRAME
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ
- /* 0(%rsp): oldrsp-ARGOFFSET */
+ /* 0(%rsp): old_rsp-ARGOFFSET */
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6c5f576..4801289 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -60,6 +60,8 @@ asmlinkage extern void ret_from_fork(void);
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
+DEFINE_PER_CPU(unsigned long, old_rsp);
+
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -395,7 +397,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
load_gs_index(0);
regs->ip = new_ip;
regs->sp = new_sp;
- write_pda(oldrsp, new_sp);
+ percpu_write(old_rsp, new_sp);
regs->cs = __USER_CS;
regs->ss = __USER_DS;
regs->flags = 0x200;
@@ -616,8 +618,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch the PDA and FPU contexts.
*/
- prev->usersp = read_pda(oldrsp);
- write_pda(oldrsp, next->usersp);
+ prev->usersp = percpu_read(old_rsp);
+ percpu_write(old_rsp, next->usersp);
percpu_write(current_task, next_p);
percpu_write(kernel_stack,
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 5a23e89..d6fc51f 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -194,11 +194,11 @@ RELOC(xen_sysexit, 1b+1)
ENTRY(xen_sysret64)
/* We're already on the usermode stack at this point, but still
with the kernel gs, so we can easily switch back */
- movq %rsp, %gs:pda_oldrsp
+ movq %rsp, PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack),%rsp
pushq $__USER_DS
- pushq %gs:pda_oldrsp
+ pushq PER_CPU_VAR(old_rsp)
pushq %r11
pushq $__USER_CS
pushq %rcx
@@ -211,11 +211,11 @@ RELOC(xen_sysret64, 1b+1)
ENTRY(xen_sysret32)
/* We're already on the usermode stack at this point, but still
with the kernel gs, so we can easily switch back */
- movq %rsp, %gs:pda_oldrsp
+ movq %rsp, PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack), %rsp
pushq $__USER32_DS
- pushq %gs:pda_oldrsp
+ pushq PER_CPU_VAR(old_rsp)
pushq %r11
pushq $__USER32_CS
pushq %rcx
--
1.6.0.2
From: Brian Gerst <[email protected]>
tj: * s/nodenumber/node_number/
* removed now unused pda variable from pda_init()
Signed-off-by: Brian Gerst <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
arch/x86/include/asm/pda.h | 1 -
arch/x86/include/asm/topology.h | 3 ++-
arch/x86/kernel/cpu/common.c | 13 ++++++-------
arch/x86/kernel/setup_percpu.c | 4 +++-
4 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 4527d70..b30ef6b 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -22,7 +22,6 @@ struct x8664_pda {
/* gcc-ABI: this canary MUST be at
offset 40!!! */
#endif
- short nodenumber; /* number of current node (32k max) */
short in_bootmem; /* pda lives in bootmem */
short isidle;
} ____cacheline_aligned_in_smp;
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 87ca3fd..ffea1fe 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -83,7 +83,8 @@ extern cpumask_t *node_to_cpumask_map;
DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
/* Returns the number of the current Node. */
-#define numa_node_id() read_pda(nodenumber)
+DECLARE_PER_CPU(int, node_number);
+#define numa_node_id() percpu_read(node_number)
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
extern int cpu_to_node(int cpu);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e2323ec..7976a6a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -897,18 +897,11 @@ DEFINE_PER_CPU(unsigned int, irq_count) = -1;
void __cpuinit pda_init(int cpu)
{
- struct x8664_pda *pda = cpu_pda(cpu);
-
/* Setup up data that may be needed in __get_free_pages early */
loadsegment(fs, 0);
loadsegment(gs, 0);
load_pda_offset(cpu);
-
- if (cpu != 0) {
- if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
- pda->nodenumber = cpu_to_node(cpu);
- }
}
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
@@ -978,6 +971,12 @@ void __cpuinit cpu_init(void)
if (cpu != 0)
pda_init(cpu);
+#ifdef CONFIG_NUMA
+ if (cpu != 0 && percpu_read(node_number) == 0 &&
+ cpu_to_node(cpu) != NUMA_NO_NODE)
+ percpu_write(node_number, cpu_to_node(cpu));
+#endif
+
me = current;
if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 258497f..efbafbb 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -53,6 +53,8 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
#define X86_64_NUMA 1 /* (used later) */
+DEFINE_PER_CPU(int, node_number) = 0;
+EXPORT_PER_CPU_SYMBOL(node_number);
/*
* Map cpu index to node index
@@ -283,7 +285,7 @@ void __cpuinit numa_set_node(int cpu, int node)
per_cpu(x86_cpu_to_node_map, cpu) = node;
if (node != NUMA_NO_NODE)
- cpu_pda(cpu)->nodenumber = node;
+ per_cpu(node_number, cpu) = node;
}
void __cpuinit numa_clear_node(int cpu)
--
1.6.0.2
From: Brian Gerst <[email protected]>
tj: moved cpu_number definition out of CONFIG_HAVE_SETUP_PER_CPU_AREA
for voyager.
Signed-off-by: Brian Gerst <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
arch/x86/include/asm/pda.h | 2 +-
arch/x86/include/asm/smp.h | 4 +---
arch/x86/kernel/asm-offsets_64.c | 1 -
arch/x86/kernel/cpu/common.c | 1 -
arch/x86/kernel/process_32.c | 3 ---
arch/x86/kernel/setup_percpu.c | 10 ++++++++++
arch/x86/kernel/smpcommon.c | 2 --
7 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 09965f7..668d5a5 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -16,7 +16,7 @@ struct x8664_pda {
unsigned long kernelstack; /* 16 top of kernel stack for current */
unsigned long oldrsp; /* 24 user rsp for system call */
int irqcount; /* 32 Irq nesting counter. Starts -1 */
- unsigned int cpunumber; /* 36 Logical CPU number */
+ unsigned int unused6; /* 36 was cpunumber */
#ifdef CONFIG_CC_STACKPROTECTOR
unsigned long stack_canary; /* 40 stack canary value */
/* gcc-ABI: this canary MUST be at
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index c7bbbbe..68636e7 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -25,9 +25,7 @@ extern unsigned int num_processors;
DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_t, cpu_core_map);
DECLARE_PER_CPU(u16, cpu_llc_id);
-#ifdef CONFIG_X86_32
DECLARE_PER_CPU(int, cpu_number);
-#endif
static inline struct cpumask *cpu_sibling_mask(int cpu)
{
@@ -164,7 +162,7 @@ extern unsigned disabled_cpus __cpuinitdata;
extern int safe_smp_processor_id(void);
#elif defined(CONFIG_X86_64_SMP)
-#define raw_smp_processor_id() read_pda(cpunumber)
+#define raw_smp_processor_id() (percpu_read(cpu_number))
#define stack_smp_processor_id() \
({ \
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 5b821fb..cae6697 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -53,7 +53,6 @@ int main(void)
ENTRY(oldrsp);
ENTRY(pcurrent);
ENTRY(irqcount);
- ENTRY(cpunumber);
DEFINE(pda_size, sizeof(struct x8664_pda));
BLANK();
#undef ENTRY
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b6d7eec..4221e92 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -899,7 +899,6 @@ void __cpuinit pda_init(int cpu)
load_pda_offset(cpu);
- pda->cpunumber = cpu;
pda->irqcount = -1;
pda->kernelstack = (unsigned long)stack_thread_info() -
PDA_STACKOFFSET + THREAD_SIZE;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 77d5468..2c00a57 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
-DEFINE_PER_CPU(int, cpu_number);
-EXPORT_PER_CPU_SYMBOL(cpu_number);
-
/*
* Return saved PC of a blocked thread.
*/
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 8b53ef8..258497f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -22,6 +22,15 @@
# define DBG(x...)
#endif
+/*
+ * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but
+ * voyager wants cpu_number too.
+ */
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU(int, cpu_number);
+EXPORT_PER_CPU_SYMBOL(cpu_number);
+#endif
+
#ifdef CONFIG_X86_LOCAL_APIC
unsigned int num_processors;
unsigned disabled_cpus __cpuinitdata;
@@ -193,6 +202,7 @@ void __init setup_per_cpu_areas(void)
memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
per_cpu_offset(cpu) = ptr - __per_cpu_start;
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
+ per_cpu(cpu_number, cpu) = cpu;
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
(char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 7e15781..add36b4 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -28,7 +28,5 @@ __cpuinit void init_gdt(int cpu)
write_gdt_entry(get_cpu_gdt_table(cpu),
GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
-
- per_cpu(cpu_number, cpu) = cpu;
}
#endif
--
1.6.0.2
From: Brian Gerst <[email protected]>
Signed-off-by: Brian Gerst <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
arch/x86/include/asm/hardirq_64.h | 24 +++++++++++++++++++-----
arch/x86/include/asm/pda.h | 10 ----------
arch/x86/kernel/irq.c | 6 +-----
arch/x86/kernel/irq_64.c | 3 +++
arch/x86/kernel/nmi.c | 10 +---------
arch/x86/xen/smp.c | 18 +++---------------
6 files changed, 27 insertions(+), 44 deletions(-)
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
index b5a6b5d..a65bab2 100644
--- a/arch/x86/include/asm/hardirq_64.h
+++ b/arch/x86/include/asm/hardirq_64.h
@@ -3,22 +3,36 @@
#include <linux/threads.h>
#include <linux/irq.h>
-#include <asm/pda.h>
#include <asm/apic.h>
+typedef struct {
+ unsigned int __softirq_pending;
+ unsigned int __nmi_count; /* arch dependent */
+ unsigned int apic_timer_irqs; /* arch dependent */
+ unsigned int irq0_irqs;
+ unsigned int irq_resched_count;
+ unsigned int irq_call_count;
+ unsigned int irq_tlb_count;
+ unsigned int irq_thermal_count;
+ unsigned int irq_spurious_count;
+ unsigned int irq_threshold_count;
+} ____cacheline_aligned irq_cpustat_t;
+
+DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+
/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
#define MAX_HARDIRQS_PER_CPU NR_VECTORS
#define __ARCH_IRQ_STAT 1
-#define inc_irq_stat(member) add_pda(member, 1)
+#define inc_irq_stat(member) percpu_add(irq_stat.member, 1)
-#define local_softirq_pending() read_pda(__softirq_pending)
+#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
#define __ARCH_SET_SOFTIRQ_PENDING 1
-#define set_softirq_pending(x) write_pda(__softirq_pending, (x))
-#define or_softirq_pending(x) or_pda(__softirq_pending, (x))
+#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
+#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
extern void ack_bad_irq(unsigned int irq);
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 47f274f..69a4075 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -25,19 +25,9 @@ struct x8664_pda {
char *irqstackptr;
short nodenumber; /* number of current node (32k max) */
short in_bootmem; /* pda lives in bootmem */
- unsigned int __softirq_pending;
- unsigned int __nmi_count; /* number of NMI on this CPUs */
short mmu_state;
short isidle;
struct mm_struct *active_mm;
- unsigned apic_timer_irqs;
- unsigned irq0_irqs;
- unsigned irq_resched_count;
- unsigned irq_call_count;
- unsigned irq_tlb_count;
- unsigned irq_thermal_count;
- unsigned irq_threshold_count;
- unsigned irq_spurious_count;
} ____cacheline_aligned_in_smp;
DECLARE_PER_CPU(struct x8664_pda, __pda);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3973e2d..8b30d0c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq)
#endif
}
-#ifdef CONFIG_X86_32
-# define irq_stats(x) (&per_cpu(irq_stat, x))
-#else
-# define irq_stats(x) cpu_pda(x)
-#endif
+#define irq_stats(x) (&per_cpu(irq_stat, x))
/*
* /proc/interrupts printing:
*/
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 0b21cb1..1db0524 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -19,6 +19,9 @@
#include <asm/io_apic.h>
#include <asm/idle.h>
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
/*
* Probabilistic stack overflow check:
*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 7228979..23b6d9e 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -61,11 +61,7 @@ static int endflag __initdata;
static inline unsigned int get_nmi_count(int cpu)
{
-#ifdef CONFIG_X86_64
- return cpu_pda(cpu)->__nmi_count;
-#else
- return nmi_count(cpu);
-#endif
+ return per_cpu(irq_stat, cpu).__nmi_count;
}
static inline int mce_in_progress(void)
@@ -82,12 +78,8 @@ static inline int mce_in_progress(void)
*/
static inline unsigned int get_timer_irqs(int cpu)
{
-#ifdef CONFIG_X86_64
- return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
-#else
return per_cpu(irq_stat, cpu).apic_timer_irqs +
per_cpu(irq_stat, cpu).irq0_irqs;
-#endif
}
#ifdef CONFIG_SMP
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 3bfd6dd..9ff3b09 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
*/
static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
{
-#ifdef CONFIG_X86_32
- __get_cpu_var(irq_stat).irq_resched_count++;
-#else
- add_pda(irq_resched_count, 1);
-#endif
+ inc_irq_stat(irq_resched_count);
return IRQ_HANDLED;
}
@@ -435,11 +431,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
{
irq_enter();
generic_smp_call_function_interrupt();
-#ifdef CONFIG_X86_32
- __get_cpu_var(irq_stat).irq_call_count++;
-#else
- add_pda(irq_call_count, 1);
-#endif
+ inc_irq_stat(irq_call_count);
irq_exit();
return IRQ_HANDLED;
@@ -449,11 +441,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
{
irq_enter();
generic_smp_call_function_single_interrupt();
-#ifdef CONFIG_X86_32
- __get_cpu_var(irq_stat).irq_call_count++;
-#else
- add_pda(irq_call_count, 1);
-#endif
+ inc_irq_stat(irq_call_count);
irq_exit();
return IRQ_HANDLED;
--
1.6.0.2
From: Brian Gerst <[email protected]>
Signed-off-by: Brian Gerst <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
arch/x86/include/asm/mmu_context_64.h | 16 +++++++---------
arch/x86/include/asm/pda.h | 2 --
arch/x86/include/asm/tlbflush.h | 7 ++-----
arch/x86/kernel/cpu/common.c | 2 --
arch/x86/kernel/tlb_32.c | 12 ++----------
arch/x86/kernel/tlb_64.c | 13 ++++++++-----
arch/x86/xen/mmu.c | 6 +-----
7 files changed, 20 insertions(+), 38 deletions(-)
diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h
index 677d36e..c457250 100644
--- a/arch/x86/include/asm/mmu_context_64.h
+++ b/arch/x86/include/asm/mmu_context_64.h
@@ -1,13 +1,11 @@
#ifndef _ASM_X86_MMU_CONTEXT_64_H
#define _ASM_X86_MMU_CONTEXT_64_H
-#include <asm/pda.h>
-
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
#ifdef CONFIG_SMP
- if (read_pda(mmu_state) == TLBSTATE_OK)
- write_pda(mmu_state, TLBSTATE_LAZY);
+ if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+ percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
#endif
}
@@ -19,8 +17,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
#ifdef CONFIG_SMP
- write_pda(mmu_state, TLBSTATE_OK);
- write_pda(active_mm, next);
+ percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ percpu_write(cpu_tlbstate.active_mm, next);
#endif
cpu_set(cpu, next->cpu_vm_mask);
load_cr3(next->pgd);
@@ -30,9 +28,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
}
#ifdef CONFIG_SMP
else {
- write_pda(mmu_state, TLBSTATE_OK);
- if (read_pda(active_mm) != next)
- BUG();
+ percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
+
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 69a4075..8ee835e 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -25,9 +25,7 @@ struct x8664_pda {
char *irqstackptr;
short nodenumber; /* number of current node (32k max) */
short in_bootmem; /* pda lives in bootmem */
- short mmu_state;
short isidle;
- struct mm_struct *active_mm;
} ____cacheline_aligned_in_smp;
DECLARE_PER_CPU(struct x8664_pda, __pda);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 17feaa9..d3539f9 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -148,20 +148,17 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
#define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2
-#ifdef CONFIG_X86_32
struct tlb_state {
struct mm_struct *active_mm;
int state;
- char __cacheline_padding[L1_CACHE_BYTES-8];
};
DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
-void reset_lazy_tlbstate(void);
-#else
static inline void reset_lazy_tlbstate(void)
{
+ percpu_write(cpu_tlbstate.state, 0);
+ percpu_write(cpu_tlbstate.active_mm, &init_mm);
}
-#endif
#endif /* SMP */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c49498d..3d0cc6f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -897,8 +897,6 @@ void __cpuinit pda_init(int cpu)
pda->irqcount = -1;
pda->kernelstack = (unsigned long)stack_thread_info() -
PDA_STACKOFFSET + THREAD_SIZE;
- pda->active_mm = &init_mm;
- pda->mmu_state = 0;
if (cpu == 0) {
/* others are initialized in smpboot.c */
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index e65449d..abf0808 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -4,8 +4,8 @@
#include <asm/tlbflush.h>
-DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
- ____cacheline_aligned = { &init_mm, 0, };
+DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
+ = { &init_mm, 0, };
/* must come after the send_IPI functions above for inlining */
#include <mach_ipi.h>
@@ -231,14 +231,6 @@ void flush_tlb_all(void)
on_each_cpu(do_flush_tlb_all, NULL, 1);
}
-void reset_lazy_tlbstate(void)
-{
- int cpu = raw_smp_processor_id();
-
- per_cpu(cpu_tlbstate, cpu).state = 0;
- per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
-}
-
static int init_flush_cpumask(void)
{
alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 7f4141d..e64a32c 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -18,6 +18,9 @@
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_bau.h>
+DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
+ = { &init_mm, 0, };
+
#include <mach_ipi.h>
/*
* Smarter SMP flushing macros.
@@ -62,9 +65,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state);
*/
void leave_mm(int cpu)
{
- if (read_pda(mmu_state) == TLBSTATE_OK)
+ if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
BUG();
- cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
+ cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
@@ -142,8 +145,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
* BUG();
*/
- if (f->flush_mm == read_pda(active_mm)) {
- if (read_pda(mmu_state) == TLBSTATE_OK) {
+ if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
+ if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
if (f->flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
@@ -281,7 +284,7 @@ static void do_flush_tlb_all(void *info)
unsigned long cpu = smp_processor_id();
__flush_tlb_all();
- if (read_pda(mmu_state) == TLBSTATE_LAZY)
+ if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
leave_mm(cpu);
}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 7bc7852..98cb986 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1063,11 +1063,7 @@ static void drop_other_mm_ref(void *info)
struct mm_struct *mm = info;
struct mm_struct *active_mm;
-#ifdef CONFIG_X86_64
- active_mm = read_pda(active_mm);
-#else
- active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
-#endif
+ active_mm = percpu_read(cpu_tlbstate.active_mm);
if (active_mm == mm)
leave_mm(smp_processor_id());
--
1.6.0.2
From: Brian Gerst <[email protected]>
Accessing memory through %gs should not use rip-relative addressing.
Adding a P prefix for the argument tells gcc to not add (%rip) to
the memory references.
Signed-off-by: Brian Gerst <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
arch/x86/include/asm/percpu.h | 26 +++++++++++++-------------
arch/x86/include/asm/system.h | 2 +-
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 03aa4b0..165d527 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -39,10 +39,10 @@
#include <linux/stringify.h>
#ifdef CONFIG_SMP
-#define __percpu_seg_str "%%"__stringify(__percpu_seg)":"
+#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
#define __my_cpu_offset percpu_read(this_cpu_off)
#else
-#define __percpu_seg_str
+#define __percpu_arg(x) "%" #x
#endif
/* For arch-specific code, we can use direct single-insn ops (they
@@ -58,22 +58,22 @@ do { \
} \
switch (sizeof(var)) { \
case 1: \
- asm(op "b %1,"__percpu_seg_str"%0" \
+ asm(op "b %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((T__)val)); \
break; \
case 2: \
- asm(op "w %1,"__percpu_seg_str"%0" \
+ asm(op "w %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((T__)val)); \
break; \
case 4: \
- asm(op "l %1,"__percpu_seg_str"%0" \
+ asm(op "l %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((T__)val)); \
break; \
case 8: \
- asm(op "q %1,"__percpu_seg_str"%0" \
+ asm(op "q %1,"__percpu_arg(0) \
: "+m" (var) \
: "r" ((T__)val)); \
break; \
@@ -86,22 +86,22 @@ do { \
typeof(var) ret__; \
switch (sizeof(var)) { \
case 1: \
- asm(op "b "__percpu_seg_str"%1,%0" \
+ asm(op "b "__percpu_arg(1)",%0" \
: "=r" (ret__) \
: "m" (var)); \
break; \
case 2: \
- asm(op "w "__percpu_seg_str"%1,%0" \
+ asm(op "w "__percpu_arg(1)",%0" \
: "=r" (ret__) \
: "m" (var)); \
break; \
case 4: \
- asm(op "l "__percpu_seg_str"%1,%0" \
+ asm(op "l "__percpu_arg(1)",%0" \
: "=r" (ret__) \
: "m" (var)); \
break; \
case 8: \
- asm(op "q "__percpu_seg_str"%1,%0" \
+ asm(op "q "__percpu_arg(1)",%0" \
: "=r" (ret__) \
: "m" (var)); \
break; \
@@ -122,9 +122,9 @@ do { \
#define x86_test_and_clear_bit_percpu(bit, var) \
({ \
int old__; \
- asm volatile("btr %1,"__percpu_seg_str"%c2\n\tsbbl %0,%0" \
- : "=r" (old__) \
- : "dIr" (bit), "i" (&per_cpu__##var) : "memory"); \
+ asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \
+ : "=r" (old__), "+m" (per_cpu__##var) \
+ : "dIr" (bit)); \
old__; \
})
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 4399aac..d1dc27d 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -94,7 +94,7 @@ do { \
"call __switch_to\n\t" \
".globl thread_return\n" \
"thread_return:\n\t" \
- "movq "__percpu_seg_str"%P[current_task],%%rsi\n\t" \
+ "movq "__percpu_arg([current_task])",%%rsi\n\t" \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
"movq %%rax,%%rdi\n\t" \
--
1.6.0.2
From: Brian Gerst <[email protected]>
Also clean up PER_CPU_VAR usage in xen-asm_64.S
tj: * remove now unused stack_thread_info()
* s/kernelstack/kernel_stack/
* added FIXME comment in xen-asm_64.S
Signed-off-by: Brian Gerst <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
arch/x86/ia32/ia32entry.S | 8 ++++----
arch/x86/include/asm/pda.h | 4 +---
arch/x86/include/asm/thread_info.h | 20 ++++++++------------
arch/x86/kernel/asm-offsets_64.c | 1 -
arch/x86/kernel/cpu/common.c | 6 ++++--
arch/x86/kernel/entry_64.S | 4 ++--
arch/x86/kernel/process_64.c | 4 ++--
arch/x86/kernel/smpboot.c | 3 +++
arch/x86/xen/xen-asm_64.S | 23 +++++++++++------------
9 files changed, 35 insertions(+), 38 deletions(-)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 256b00b..9c79b24 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target)
CFI_DEF_CFA rsp,0
CFI_REGISTER rsp,rbp
SWAPGS_UNSAFE_STACK
- movq %gs:pda_kernelstack, %rsp
- addq $(PDA_STACKOFFSET),%rsp
+ movq PER_CPU_VAR(kernel_stack), %rsp
+ addq $(KERNEL_STACK_OFFSET),%rsp
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs, here we enable it straight after entry:
@@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target)
ENTRY(ia32_cstar_target)
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,PDA_STACKOFFSET
+ CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
SWAPGS_UNSAFE_STACK
movl %esp,%r8d
CFI_REGISTER rsp,r8
- movq %gs:pda_kernelstack,%rsp
+ movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs and here we enable it straight after entry:
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 7209302..4d28ffb 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -13,7 +13,7 @@
struct x8664_pda {
unsigned long unused1;
unsigned long unused2;
- unsigned long kernelstack; /* 16 top of kernel stack for current */
+ unsigned long unused3;
unsigned long oldrsp; /* 24 user rsp for system call */
int irqcount; /* 32 Irq nesting counter. Starts -1 */
unsigned int unused6; /* 36 was cpunumber */
@@ -44,6 +44,4 @@ extern void pda_init(int);
#endif
-#define PDA_STACKOFFSET (5*8)
-
#endif /* _ASM_X86_PDA_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 9878964..b46f8ca 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -194,25 +194,21 @@ static inline struct thread_info *current_thread_info(void)
#else /* X86_32 */
-#include <asm/pda.h>
+#include <asm/percpu.h>
+#define KERNEL_STACK_OFFSET (5*8)
/*
* macros/functions for gaining access to the thread information structure
* preempt_count needs to be 1 initially, until the scheduler is functional.
*/
#ifndef __ASSEMBLY__
-static inline struct thread_info *current_thread_info(void)
-{
- struct thread_info *ti;
- ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
- return ti;
-}
+DECLARE_PER_CPU(unsigned long, kernel_stack);
-/* do not use in interrupt context */
-static inline struct thread_info *stack_thread_info(void)
+static inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
- asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
+ ti = (void *)(percpu_read(kernel_stack) +
+ KERNEL_STACK_OFFSET - THREAD_SIZE);
return ti;
}
@@ -220,8 +216,8 @@ static inline struct thread_info *stack_thread_info(void)
/* how to get the thread information struct from ASM */
#define GET_THREAD_INFO(reg) \
- movq %gs:pda_kernelstack,reg ; \
- subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
+ movq PER_CPU_VAR(kernel_stack),reg ; \
+ subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4f7a210..cafff5f 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -49,7 +49,6 @@ int main(void)
BLANK();
#undef ENTRY
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
- ENTRY(kernelstack);
ENTRY(oldrsp);
ENTRY(irqcount);
DEFINE(pda_size, sizeof(struct x8664_pda));
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b50e38d..06b6290 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -889,6 +889,10 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
#endif
+DEFINE_PER_CPU(unsigned long, kernel_stack) =
+ (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(kernel_stack);
+
void __cpuinit pda_init(int cpu)
{
struct x8664_pda *pda = cpu_pda(cpu);
@@ -900,8 +904,6 @@ void __cpuinit pda_init(int cpu)
load_pda_offset(cpu);
pda->irqcount = -1;
- pda->kernelstack = (unsigned long)stack_thread_info() -
- PDA_STACKOFFSET + THREAD_SIZE;
if (cpu != 0) {
if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index d22677a..0dd4585 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -468,7 +468,7 @@ END(ret_from_fork)
ENTRY(system_call)
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,PDA_STACKOFFSET
+ CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
SWAPGS_UNSAFE_STACK
@@ -480,7 +480,7 @@ ENTRY(system_call)
ENTRY(system_call_after_swapgs)
movq %rsp,%gs:pda_oldrsp
- movq %gs:pda_kernelstack,%rsp
+ movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs off/on section - it's straight
* and short:
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e00c31a..6c5f576 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -620,9 +620,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
write_pda(oldrsp, next->usersp);
percpu_write(current_task, next_p);
- write_pda(kernelstack,
+ percpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
- THREAD_SIZE - PDA_STACKOFFSET);
+ THREAD_SIZE - KERNEL_STACK_OFFSET);
#ifdef CONFIG_CC_STACKPROTECTOR
write_pda(stack_canary, next_p->stack_canary);
/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5854be0..869b988 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -798,6 +798,9 @@ do_rest:
#else
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
+ per_cpu(kernel_stack, cpu) =
+ (unsigned long)task_stack_page(c_idle.idle) -
+ KERNEL_STACK_OFFSET + THREAD_SIZE;
#endif
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 05794c5..5a23e89 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -17,6 +17,7 @@
#include <asm/processor-flags.h>
#include <asm/errno.h>
#include <asm/segment.h>
+#include <asm/percpu.h>
#include <xen/interface/xen.h>
@@ -28,12 +29,10 @@
#if 1
/*
- x86-64 does not yet support direct access to percpu variables
- via a segment override, so we just need to make sure this code
- never gets used
+ FIXME: x86_64 now can support direct access to percpu variables
+ via a segment override. Update xen accordingly.
*/
#define BUG ud2a
-#define PER_CPU_VAR(var, off) 0xdeadbeef
#endif
/*
@@ -45,14 +44,14 @@ ENTRY(xen_irq_enable_direct)
BUG
/* Unmask events */
- movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+ movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
/* Preempt here doesn't matter because that will deal with
any pending interrupts. The pending check may end up being
run on the wrong CPU, but that doesn't hurt. */
/* Test for pending */
- testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+ testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
jz 1f
2: call check_events
@@ -69,7 +68,7 @@ ENDPATCH(xen_irq_enable_direct)
ENTRY(xen_irq_disable_direct)
BUG
- movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+ movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
ENDPATCH(xen_irq_disable_direct)
ret
ENDPROC(xen_irq_disable_direct)
@@ -87,7 +86,7 @@ ENDPATCH(xen_irq_disable_direct)
ENTRY(xen_save_fl_direct)
BUG
- testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+ testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
setz %ah
addb %ah,%ah
ENDPATCH(xen_save_fl_direct)
@@ -107,13 +106,13 @@ ENTRY(xen_restore_fl_direct)
BUG
testb $X86_EFLAGS_IF>>8, %ah
- setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+ setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
/* Preempt here doesn't matter because that will deal with
any pending interrupts. The pending check may end up being
run on the wrong CPU, but that doesn't hurt. */
/* check for unmasked and pending */
- cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+ cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
jz 1f
2: call check_events
1:
@@ -196,7 +195,7 @@ ENTRY(xen_sysret64)
/* We're already on the usermode stack at this point, but still
with the kernel gs, so we can easily switch back */
movq %rsp, %gs:pda_oldrsp
- movq %gs:pda_kernelstack,%rsp
+ movq PER_CPU_VAR(kernel_stack),%rsp
pushq $__USER_DS
pushq %gs:pda_oldrsp
@@ -213,7 +212,7 @@ ENTRY(xen_sysret32)
/* We're already on the usermode stack at this point, but still
with the kernel gs, so we can easily switch back */
movq %rsp, %gs:pda_oldrsp
- movq %gs:pda_kernelstack, %rsp
+ movq PER_CPU_VAR(kernel_stack), %rsp
pushq $__USER32_DS
pushq %gs:pda_oldrsp
--
1.6.0.2
From: Brian Gerst <[email protected]>
tj: s/irqcount/irq_count/
Signed-off-by: Brian Gerst <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
arch/x86/include/asm/pda.h | 2 +-
arch/x86/kernel/asm-offsets_64.c | 1 -
arch/x86/kernel/cpu/common.c | 4 ++--
arch/x86/kernel/entry_64.S | 14 +++++++-------
4 files changed, 10 insertions(+), 11 deletions(-)
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index ae23deb..4527d70 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -15,7 +15,7 @@ struct x8664_pda {
unsigned long unused2;
unsigned long unused3;
unsigned long unused4;
- int irqcount; /* 32 Irq nesting counter. Starts -1 */
+ int unused5;
unsigned int unused6; /* 36 was cpunumber */
#ifdef CONFIG_CC_STACKPROTECTOR
unsigned long stack_canary; /* 40 stack canary value */
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index afda6de..64c834a 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -49,7 +49,6 @@ int main(void)
BLANK();
#undef ENTRY
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
- ENTRY(irqcount);
DEFINE(pda_size, sizeof(struct x8664_pda));
BLANK();
#undef ENTRY
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 06b6290..e2323ec 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -893,6 +893,8 @@ DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
+DEFINE_PER_CPU(unsigned int, irq_count) = -1;
+
void __cpuinit pda_init(int cpu)
{
struct x8664_pda *pda = cpu_pda(cpu);
@@ -903,8 +905,6 @@ void __cpuinit pda_init(int cpu)
load_pda_offset(cpu);
- pda->irqcount = -1;
-
if (cpu != 0) {
if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
pda->nodenumber = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7c27da4..c52b609 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -337,12 +337,12 @@ ENTRY(save_args)
je 1f
SWAPGS
/*
- * irqcount is used to check if a CPU is already on an interrupt stack
+ * irq_count is used to check if a CPU is already on an interrupt stack
* or not. While this is essentially redundant with preempt_count it is
* a little cheaper to use a separate counter in the PDA (short of
* moving irq_enter into assembly, which would be too much work)
*/
-1: incl %gs:pda_irqcount
+1: incl PER_CPU_VAR(irq_count)
jne 2f
popq_cfi %rax /* move return address... */
mov PER_CPU_VAR(irq_stack_ptr),%rsp
@@ -837,7 +837,7 @@ common_interrupt:
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- decl %gs:pda_irqcount
+ decl PER_CPU_VAR(irq_count)
leaveq
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
@@ -1260,14 +1260,14 @@ ENTRY(call_softirq)
CFI_REL_OFFSET rbp,0
mov %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
- incl %gs:pda_irqcount
+ incl PER_CPU_VAR(irq_count)
cmove PER_CPU_VAR(irq_stack_ptr),%rsp
push %rbp # backlink for old unwinder
call __do_softirq
leaveq
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
- decl %gs:pda_irqcount
+ decl PER_CPU_VAR(irq_count)
ret
CFI_ENDPROC
END(call_softirq)
@@ -1297,7 +1297,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
movq %rdi, %rsp # we don't return, adjust the stack frame
CFI_ENDPROC
DEFAULT_FRAME
-11: incl %gs:pda_irqcount
+11: incl PER_CPU_VAR(irq_count)
movq %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
@@ -1305,7 +1305,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
call xen_evtchn_do_upcall
popq %rsp
CFI_DEF_CFA_REGISTER rsp
- decl %gs:pda_irqcount
+ decl PER_CPU_VAR(irq_count)
jmp error_exit
CFI_ENDPROC
END(do_hypervisor_callback)
--
1.6.0.2
From: Brian Gerst <[email protected]>
tj: s/isidle/is_idle/
Signed-off-by: Brian Gerst <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
arch/x86/include/asm/pda.h | 1 -
arch/x86/kernel/process_64.c | 5 +++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index b30ef6b..c31ca04 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -23,7 +23,6 @@ struct x8664_pda {
offset 40!!! */
#endif
short in_bootmem; /* pda lives in bootmem */
- short isidle;
} ____cacheline_aligned_in_smp;
DECLARE_PER_CPU(struct x8664_pda, __pda);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4801289..4523ff8 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -61,6 +61,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(unsigned long, old_rsp);
+static DEFINE_PER_CPU(unsigned char, is_idle);
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
@@ -80,13 +81,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
void enter_idle(void)
{
- write_pda(isidle, 1);
+ percpu_write(is_idle, 1);
atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
}
static void __exit_idle(void)
{
- if (test_and_clear_bit_pda(0, isidle) == 0)
+ if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
return;
atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
}
--
1.6.0.2
* Tejun Heo <[email protected]> wrote:
>
> Hello, Brian, Ingo.
>
> This patchset is 1-12 of Brian's move PDA fields to percpu patchset.
>
> 0001-x86-64-Move-irq-stats-from-PDA-to-per-cpu-and-conso.patch
> 0002-x86-64-Move-TLB-state-from-PDA-to-per-cpu-and-conso.patch
> 0003-x86-64-Convert-irqstacks-to-per-cpu.patch
> 0004-x86-64-Convert-exception-stacks-to-per-cpu.patch
> 0005-x86-64-Move-cpu-number-from-PDA-to-per-cpu-and-cons.patch
> 0006-x86-64-Move-current-task-from-PDA-to-per-cpu-and-co.patch
> 0007-x86-64-Move-kernelstack-from-PDA-to-per-cpu.patch
> 0008-x86-64-Move-oldrsp-from-PDA-to-per-cpu.patch
> 0009-x86-64-Move-irqcount-from-PDA-to-per-cpu.patch
> 0010-x86-64-Move-nodenumber-from-PDA-to-per-cpu.patch
> 0011-x86-64-Move-isidle-from-PDA-to-per-cpu.patch
> 0012-x86-64-Use-absolute-displacements-for-per-cpu-acces.patch
>
> with the following changes
>
> * Add underbars between words in variable names (e.g. irqstack ->
> irq_stack)
>
> * Drop irq_stack_ptr early initialization on SMP from 0003.
>
> * Change DECLARE/DEFINE_PER_CPU(char, irq_stack[IRQ_STACK_SIZE]) to
> DECLARE/DEFINE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack) in 0003.
>
> * Move cpu_number definition out of CONFIG_HAVE_SETUP_PER_CPU_AREA
> in 0005.
>
> * Remove now unused stack_thread_info() in 0007.
>
> Please pull from
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git tj-percpu
Pulled, thanks Tejun!
Ingo
On Sun, Jan 18, 2009 at 11:39 AM, Tejun Heo <[email protected]> wrote:
> From: Brian Gerst <[email protected]>
>
> Move the irqstackptr variable from the PDA to per-cpu. Make the
> stacks themselves per-cpu, removing some specific allocation code.
> Add a seperate flag (is_boot_cpu) to simplify the per-cpu boot
> adjustments.
>
> tj: * sprinkle some underbars around.
>
> * irq_stack_ptr is not used till traps_init(), no reason to
> initialize it early. On SMP, just leaving it NULL till proper
> initialization in setup_per_cpu_areas() works. Dropped
> is_boot_cpu and early irq_stack_ptr initialization.
And it adds more #ifdefs. All these conditional initializations on
CONFIG_SMP are really cluttering up the code. It also conflicts with
one of my goals with these patches: have as much state as possible
already prepared when a cpu boots. For the boot cpu this means static
initialization. For secondary cpus, that means setting up the values
in setup_per_cpu_areas(). This eliminates any window where the state
isn't ready yet, as you've already seen with per_cpu_offset.
And is_boot_cpu was a worthwhile optimization on its own. I had plans
on using it in more places later.
--
Brian Gerst
Hello, Brian.
Brian Gerst wrote:
>> * irq_stack_ptr is not used till traps_init(), no reason to
>> initialize it early. On SMP, just leaving it NULL till proper
>> initialization in setup_per_cpu_areas() works. Dropped
>> is_boot_cpu and early irq_stack_ptr initialization.
>
> And it adds more #ifdefs. All these conditional initializations on
> CONFIG_SMP are really cluttering up the code.
Yes, at the cost of removing a hidden relocation in head_64.S. We can
also remove #ifdef there and just comment that it will be overridden
during setup_per_cpu_areas() but I think #ifdef there is better for
documentation purposes.
> It also conflicts with one of my goals with these patches: have as
> much state as possible already prepared when a cpu boots. For the
> boot cpu this means static initialization. For secondary cpus, that
> means setting up the values in setup_per_cpu_areas(). This
> eliminates any window where the state isn't ready yet, as you've
> already seen with per_cpu_offset.
I like the goal but it has certain dangers in the current form because
the boot cpu is using the init data area not its actual percpu area,
so each usage needs to be closely controlled till actual per cpu areas
are setup, so I think in the current form I don't think there's a good
reason to hurry initialization of variables which are not used early.
It also in a way fragments initialization paths further. It would be
nice if boot cpu and secondary cpus can do it using the same path at
different times.
> And is_boot_cpu was a worthwhile optimization on its own. I had plans
> on using it in more places later.
I didn't have any objection against it but with the additional
relocation removed, it looked out of place in the patch. Please feel
free to add it back as necessary.
Thanks.
--
tejun
btw., i think we could (and should) now remove pda.h altogether. The stack
canary percpu structure should be defined in stackprotector.h [that file
is available in tip/master] - and all the PDA references and accessors can
be removed, right?
Ingo
On Sun, Jan 18, 2009 at 11:39 AM, Tejun Heo <[email protected]> wrote:
>
> Hello, Brian, Ingo.
>
> This patchset is 1-12 of Brian's move PDA fields to percpu patchset.
>
> 0001-x86-64-Move-irq-stats-from-PDA-to-per-cpu-and-conso.patch
> 0002-x86-64-Move-TLB-state-from-PDA-to-per-cpu-and-conso.patch
> 0003-x86-64-Convert-irqstacks-to-per-cpu.patch
> 0004-x86-64-Convert-exception-stacks-to-per-cpu.patch
> 0005-x86-64-Move-cpu-number-from-PDA-to-per-cpu-and-cons.patch
> 0006-x86-64-Move-current-task-from-PDA-to-per-cpu-and-co.patch
> 0007-x86-64-Move-kernelstack-from-PDA-to-per-cpu.patch
> 0008-x86-64-Move-oldrsp-from-PDA-to-per-cpu.patch
> 0009-x86-64-Move-irqcount-from-PDA-to-per-cpu.patch
> 0010-x86-64-Move-nodenumber-from-PDA-to-per-cpu.patch
> 0011-x86-64-Move-isidle-from-PDA-to-per-cpu.patch
> 0012-x86-64-Use-absolute-displacements-for-per-cpu-acces.patch
Here are the remaining patches rebased on top of these.
--
Brian Gerst
Copy the code to cpu_init() to satisfy the requirement that the cpu
be reinitialized. Remove all other calls, since the segments are
already initialized in head_64.S.
Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/include/asm/pda.h | 1 -
arch/x86/kernel/cpu/common.c | 15 +++------------
arch/x86/kernel/head64.c | 2 --
arch/x86/xen/enlighten.c | 1 -
4 files changed, 3 insertions(+), 16 deletions(-)
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index c31ca04..6ca7bc0 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -26,7 +26,6 @@ struct x8664_pda {
} ____cacheline_aligned_in_smp;
DECLARE_PER_CPU(struct x8664_pda, __pda);
-extern void pda_init(int);
#define cpu_pda(cpu) (&per_cpu(__pda, cpu))
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7976a6a..f83a4d6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -895,15 +895,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
-void __cpuinit pda_init(int cpu)
-{
- /* Setup up data that may be needed in __get_free_pages early */
- loadsegment(fs, 0);
- loadsegment(gs, 0);
-
- load_pda_offset(cpu);
-}
-
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
__aligned(PAGE_SIZE);
@@ -967,9 +958,9 @@ void __cpuinit cpu_init(void)
struct task_struct *me;
int i;
- /* CPU 0 is initialised in head64.c */
- if (cpu != 0)
- pda_init(cpu);
+ loadsegment(fs, 0);
+ loadsegment(gs, 0);
+ load_pda_offset(cpu);
#ifdef CONFIG_NUMA
if (cpu != 0 && percpu_read(node_number) == 0 &&
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index af67d32..f5b2722 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
if (console_loglevel == 10)
early_printk("Kernel alive\n");
- pda_init(0);
-
x86_64_start_reservations(real_mode_data);
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 75b9413..bef941f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1645,7 +1645,6 @@ asmlinkage void __init xen_start_kernel(void)
#ifdef CONFIG_X86_64
/* Disable until direct per-cpu data access. */
have_vcpu_info_placement = 0;
- pda_init(0);
#endif
xen_smp_init();
--
1.6.1.rc1
Refactor the DEFINE_PER_CPU_* macros and add .data.percpu.first
section.
Signed-off-by: Brian Gerst <[email protected]>
---
include/asm-generic/vmlinux.lds.h | 1 +
include/linux/percpu.h | 41 ++++++++++++++++++++----------------
2 files changed, 24 insertions(+), 18 deletions(-)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index aa6b9b1..32bbf50 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -486,6 +486,7 @@
*/
#define PERCPU_VADDR(vaddr, phdr) \
PERCPU_PROLOG(vaddr) \
+ *(.data.percpu.first) \
*(.data.percpu.page_aligned) \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 9f2a375..0e24202 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -9,34 +9,39 @@
#include <asm/percpu.h>
#ifdef CONFIG_SMP
-#define DEFINE_PER_CPU(type, name) \
- __attribute__((__section__(".data.percpu"))) \
- PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+#define PER_CPU_BASE_SECTION ".data.percpu"
#ifdef MODULE
-#define SHARED_ALIGNED_SECTION ".data.percpu"
+#define PER_CPU_SHARED_ALIGNED_SECTION ""
#else
-#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned"
+#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned"
#endif
+#define PER_CPU_FIRST_SECTION ".first"
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- __attribute__((__section__(SHARED_ALIGNED_SECTION))) \
- PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \
- ____cacheline_aligned_in_smp
+#else
+
+#define PER_CPU_BASE_SECTION ".data"
+#define PER_CPU_SHARED_ALIGNED_SECTION ""
+#define PER_CPU_FIRST_SECTION ""
+
+#endif
-#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
- __attribute__((__section__(".data.percpu.page_aligned"))) \
+#define DEFINE_PER_CPU_SECTION(type, name, section) \
+ __attribute__((__section__(PER_CPU_BASE_SECTION section))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
-#else
+
#define DEFINE_PER_CPU(type, name) \
- PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+ DEFINE_PER_CPU_SECTION(type, name, "")
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- DEFINE_PER_CPU(type, name)
+#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
+ ____cacheline_aligned_in_smp
-#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
- DEFINE_PER_CPU(type, name)
-#endif
+#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, ".page_aligned")
+
+#define DEFINE_PER_CPU_FIRST(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
--
1.6.1.rc1
Use cpu_number to determine if the adjustment is necessary.
Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/kernel/head_64.S | 24 ++++++++----------------
1 files changed, 8 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c8ace88..98ea26a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -207,19 +207,15 @@ ENTRY(secondary_startup_64)
#ifdef CONFIG_SMP
/*
- * early_gdt_base should point to the gdt_page in static percpu init
- * data area. Computing this requires two symbols - __per_cpu_load
- * and per_cpu__gdt_page. As linker can't do no such relocation, do
- * it by hand. As early_gdt_descr is manipulated by C code for
- * secondary CPUs, this should be done only once for the boot CPU
- * when early_gdt_descr_base contains zero.
+ * Fix up static pointers that need __per_cpu_load added. The assembler
+ * is unable to do this directly. This is only needed for the boot cpu.
+ * These values are set up with the correct base addresses by C code for
+ * secondary cpus.
*/
- movq early_gdt_descr_base(%rip), %rax
- testq %rax, %rax
- jnz 1f
- movq $__per_cpu_load, %rax
- addq $per_cpu__gdt_page, %rax
- movq %rax, early_gdt_descr_base(%rip)
+ movq initial_gs(%rip), %rax
+ cmpl $0, per_cpu__cpu_number(%rax)
+ jne 1f
+ addq %rax, early_gdt_descr_base(%rip)
1:
#endif
/*
@@ -431,12 +427,8 @@ NEXT_PAGE(level2_spare_pgt)
.globl early_gdt_descr
early_gdt_descr:
.word GDT_ENTRIES*8-1
-#ifdef CONFIG_SMP
early_gdt_descr_base:
- .quad 0x0000000000000000
-#else
.quad per_cpu__gdt_page
-#endif
ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */
--
1.6.1.rc1
Now that the PDA is empty except for the stack canary, it can be removed.
The irqstack is moved to the start of the per-cpu section. If the stack
protector is enabled, the canary overlaps the bottom 48 bytes of the irqstack.
Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/include/asm/pda.h | 5 -----
arch/x86/include/asm/percpu.h | 6 ------
arch/x86/include/asm/processor.h | 25 ++++++++++++++++++++++++-
arch/x86/kernel/asm-offsets_64.c | 4 ----
arch/x86/kernel/cpu/common.c | 10 +++-------
arch/x86/kernel/head_64.S | 13 +++++--------
arch/x86/kernel/process_64.c | 6 +++---
arch/x86/kernel/setup_percpu.c | 34 ++++------------------------------
arch/x86/kernel/vmlinux_64.lds.S | 8 ++++++--
9 files changed, 45 insertions(+), 66 deletions(-)
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 6ca7bc0..ba46416 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -17,11 +17,6 @@ struct x8664_pda {
unsigned long unused4;
int unused5;
unsigned int unused6; /* 36 was cpunumber */
-#ifdef CONFIG_CC_STACKPROTECTOR
- unsigned long stack_canary; /* 40 stack canary value */
- /* gcc-ABI: this canary MUST be at
- offset 40!!! */
-#endif
short in_bootmem; /* pda lives in bootmem */
} ____cacheline_aligned_in_smp;
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 165d527..ce980db 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -133,12 +133,6 @@ do { \
/* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off);
-#ifdef CONFIG_X86_64
-extern void load_pda_offset(int cpu);
-#else
-static inline void load_pda_offset(int cpu) { }
-#endif
-
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f511246..cfe3237 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -379,8 +379,31 @@ union thread_xstate {
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist);
-DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack);
+union irq_stack_union {
+ char irq_stack[IRQ_STACK_SIZE];
+ /*
+ * GCC hardcodes the stack canary as %gs:40. Since the
+ * irq_stack is the object at %gs:0, we reserve the bottom
+ * 48 bytes of the irq stack for the canary.
+ */
+ struct {
+ char gs_base[40];
+#ifdef CONFIG_CC_STACKPROTECTOR
+ unsigned long stack_canary;
+#endif
+ };
+};
+
+DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
DECLARE_PER_CPU(char *, irq_stack_ptr);
+
+static inline void load_gs_base(int cpu)
+{
+ /* Memory clobbers used to order pda/percpu accesses */
+ mb();
+ wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
+ mb();
+}
#endif
extern void print_cpu_info(struct cpuinfo_x86 *);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 64c834a..94f9c8b 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -48,10 +48,6 @@ int main(void)
#endif
BLANK();
#undef ENTRY
-#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
- DEFINE(pda_size, sizeof(struct x8664_pda));
- BLANK();
-#undef ENTRY
#ifdef CONFIG_PARAVIRT
BLANK();
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f83a4d6..94ea5cb 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -881,13 +881,9 @@ __setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
-DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
-#ifdef CONFIG_SMP
-DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
-#else
+DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE);
DEFINE_PER_CPU(char *, irq_stack_ptr) =
- per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
-#endif
+ per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
@@ -960,7 +956,7 @@ void __cpuinit cpu_init(void)
loadsegment(fs, 0);
loadsegment(gs, 0);
- load_pda_offset(cpu);
+ load_gs_base(cpu);
#ifdef CONFIG_NUMA
if (cpu != 0 && percpu_read(node_number) == 0 &&
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 98ea26a..8c83de6 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -216,6 +216,7 @@ ENTRY(secondary_startup_64)
cmpl $0, per_cpu__cpu_number(%rax)
jne 1f
addq %rax, early_gdt_descr_base(%rip)
+ addq %rax, per_cpu__irq_stack_ptr(%rax)
1:
#endif
/*
@@ -242,13 +243,9 @@ ENTRY(secondary_startup_64)
/* Set up %gs.
*
- * On SMP, %gs should point to the per-cpu area. For initial
- * boot, make %gs point to the init data section. For a
- * secondary CPU,initial_gs should be set to its pda address
- * before the CPU runs this code.
- *
- * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't
- * change.
+ * The base of %gs always points to the bottom of the irqstack
+ * union. If the stack protector canary is enabled, it is located
+ * at %gs:40.
*/
movl $MSR_GS_BASE,%ecx
movq initial_gs(%rip),%rax
@@ -281,7 +278,7 @@ ENTRY(secondary_startup_64)
#ifdef CONFIG_SMP
.quad __per_cpu_load
#else
- .quad PER_CPU_VAR(__pda)
+ .quad per_cpu__irq_stack_union
#endif
__FINITDATA
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4523ff8..604b69e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -627,12 +627,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET);
#ifdef CONFIG_CC_STACKPROTECTOR
- write_pda(stack_canary, next_p->stack_canary);
+ percpu_write(irq_stack_union.stack_canary, canary);
/*
* Build time only check to make sure the stack_canary is at
- * offset 40 in the pda; this is a gcc ABI requirement
+ * %gs:40; this is a gcc ABI requirement
*/
- BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
+ BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
#endif
/*
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index efbafbb..65c10c4 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void);
static inline void setup_node_to_cpumask_map(void) { }
#endif
-/*
- * Define load_pda_offset() and per-cpu __pda for x86_64.
- * load_pda_offset() is responsible for loading the offset of pda into
- * %gs.
- *
- * On SMP, pda offset also duals as percpu base address and thus it
- * should be at the start of per-cpu area. To achieve this, it's
- * preallocated in vmlinux_64.lds.S directly instead of using
- * DEFINE_PER_CPU().
- */
-#ifdef CONFIG_X86_64
-void __cpuinit load_pda_offset(int cpu)
-{
- /* Memory clobbers used to order pda/percpu accesses */
- mb();
- wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
- mb();
-}
-#ifndef CONFIG_SMP
-DEFINE_PER_CPU(struct x8664_pda, __pda);
-#endif
-EXPORT_PER_CPU_SYMBOL(__pda);
-#endif /* CONFIG_SMP && CONFIG_X86_64 */
-
#ifdef CONFIG_X86_64
/* correctly size the local cpu masks */
@@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void)
per_cpu(cpu_number, cpu) = cpu;
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
- (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
+ per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
/*
- * CPU0 modified pda in the init data area, reload pda
- * offset for CPU0 and clear the area for others.
+ * CPU0 modified data in the init per-cpu area, reload %gs
+ * offset for CPU0.
*/
if (cpu == 0)
- load_pda_offset(0);
- else
- memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
+ load_gs_base(cpu);
#endif
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index a09abb8..c974099 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -220,8 +220,7 @@ SECTIONS
* so that it can be accessed as a percpu variable.
*/
. = ALIGN(PAGE_SIZE);
- PERCPU_VADDR_PREALLOC(0, :percpu, pda_size)
- per_cpu____pda = __per_cpu_start;
+ PERCPU_VADDR(0, :percpu)
#else
PERCPU(PAGE_SIZE)
#endif
@@ -262,3 +261,8 @@ SECTIONS
*/
ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE")
+
+#ifdef CONFIG_SMP
+ASSERT((per_cpu__irq_stack_union == 0),
+ "irq_stack_union is not at start of per-cpu area");
+#endif
--
1.6.1.rc1
Ingo Molnar wrote:
> btw., i think we could (and should) now remove pda.h altogether. The stack
> canary percpu structure should be defined in stackprotector.h [that file
> is available in tip/master] - and all the PDA references and accessors can
> be removed, right?
Yeah, Brian has patches for that. Patches 0013-0017 do exactly that.
I'm waiting for updated patches.
Thans.
--
tejun
Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/include/asm/pda.h | 39 -------------------------------------
arch/x86/include/asm/pgtable_64.h | 1 -
arch/x86/include/asm/smp.h | 1 -
arch/x86/kernel/asm-offsets_64.c | 1 -
arch/x86/kernel/cpu/common.c | 1 -
arch/x86/kernel/process_64.c | 1 -
arch/x86/kernel/traps.c | 1 -
7 files changed, 0 insertions(+), 45 deletions(-)
delete mode 100644 arch/x86/include/asm/pda.h
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
deleted file mode 100644
index ba46416..0000000
--- a/arch/x86/include/asm/pda.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef _ASM_X86_PDA_H
-#define _ASM_X86_PDA_H
-
-#ifndef __ASSEMBLY__
-#include <linux/stddef.h>
-#include <linux/types.h>
-#include <linux/cache.h>
-#include <linux/threads.h>
-#include <asm/page.h>
-#include <asm/percpu.h>
-
-/* Per processor datastructure. %gs points to it while the kernel runs */
-struct x8664_pda {
- unsigned long unused1;
- unsigned long unused2;
- unsigned long unused3;
- unsigned long unused4;
- int unused5;
- unsigned int unused6; /* 36 was cpunumber */
- short in_bootmem; /* pda lives in bootmem */
-} ____cacheline_aligned_in_smp;
-
-DECLARE_PER_CPU(struct x8664_pda, __pda);
-
-#define cpu_pda(cpu) (&per_cpu(__pda, cpu))
-
-#define read_pda(field) percpu_read(__pda.field)
-#define write_pda(field, val) percpu_write(__pda.field, val)
-#define add_pda(field, val) percpu_add(__pda.field, val)
-#define sub_pda(field, val) percpu_sub(__pda.field, val)
-#define or_pda(field, val) percpu_or(__pda.field, val)
-
-/* This is not atomic against other CPUs -- CPU preemption needs to be off */
-#define test_and_clear_bit_pda(bit, field) \
- x86_test_and_clear_bit_percpu(bit, __pda.field)
-
-#endif
-
-#endif /* _ASM_X86_PDA_H */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ba09289..1df9637 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -11,7 +11,6 @@
#include <asm/processor.h>
#include <linux/bitops.h>
#include <linux/threads.h>
-#include <asm/pda.h>
extern pud_t level3_kernel_pgt[512];
extern pud_t level3_ident_pgt[512];
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 68636e7..45ef8a1 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -15,7 +15,6 @@
# include <asm/io_apic.h>
# endif
#endif
-#include <asm/pda.h>
#include <asm/thread_info.h>
#include <asm/cpumask.h>
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 94f9c8b..8793ab3 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -11,7 +11,6 @@
#include <linux/hardirq.h>
#include <linux/suspend.h>
#include <linux/kbuild.h>
-#include <asm/pda.h>
#include <asm/processor.h>
#include <asm/segment.h>
#include <asm/thread_info.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 94ea5cb..d025877 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -30,7 +30,6 @@
#include <asm/genapic.h>
#endif
-#include <asm/pda.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/desc.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 604b69e..ec07a17 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -46,7 +46,6 @@
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/mmu_context.h>
-#include <asm/pda.h>
#include <asm/prctl.h>
#include <asm/desc.h>
#include <asm/proto.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98c2d05..ed5aee5 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -59,7 +59,6 @@
#ifdef CONFIG_X86_64
#include <asm/pgalloc.h>
#include <asm/proto.h>
-#include <asm/pda.h>
#else
#include <asm/processor-flags.h>
#include <asm/arch_hooks.h>
--
1.6.1.rc1
Hello, Brian.
Brian Gerst wrote:
...
> @@ -881,13 +881,9 @@ __setup("clearcpuid=", setup_disablecpuid);
> #ifdef CONFIG_X86_64
> struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
>
> -DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
> -#ifdef CONFIG_SMP
> -DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
> -#else
> +DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE);
> DEFINE_PER_CPU(char *, irq_stack_ptr) =
> - per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
> -#endif
> + per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
>
> DEFINE_PER_CPU(unsigned long, kernel_stack) =
> (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
>
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index 98ea26a..8c83de6 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -216,6 +216,7 @@ ENTRY(secondary_startup_64)
> cmpl $0, per_cpu__cpu_number(%rax)
> jne 1f
> addq %rax, early_gdt_descr_base(%rip)
> + addq %rax, per_cpu__irq_stack_ptr(%rax)
> 1:
> #endif
> /*
As discussed before, the above chunks do drop one #ifdef CONFIG_SMP
but it does add a obscure relocation and please note that it's
different from early_gdt_descr. early_gdt_descr is needed to bring up
the cpu so there's no other way to do it but to relocate it in
assembly. If you absolutely have to relocate irq_stack_ptr early,
please do it in C code in head64.c but then again irq_stack_ptr is not
even necessary till traps_init() which is way after per cpu area
setup. So, the above two chunks are not necessary && even if they go
in, they don't have much to do with this patch.
In general, I think trying to early initialize per cpu pointer to per
cpu variable just isn't a good idea and should be avoided as much as
possible. I think the #ifdef there is fine - it's short and apparent
and represntative of the two separate percpu implementations we have.
If you're annoyed by it, I think it would be better to either
consolidate such #ifdefs (there are a few other places) or define a
wrapper macro to conditionalize and document the different
initialization paths, but please don't add obscure assembly
relocation, especially not without comment explaining it.
Thanks.
--
tejun
Hello, Ingo, Brian.
Brian Gerst wrote:
>
> Here are the remaining patches rebased on top of these.
Ingo, here's git tree for kill-pda patches.
git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git tj-percpu
I dropped early irq_stack_ptr relocation from 0004. I'll post updated
patch as reply to the original patch. If you think it's still
necessary, please feel free to discuss it. I also added a patch to
kill PERCPU_VADDR_PREALLOC(). Will post the patch as a reply to 0005.
Thanks.
--
tejun
From: Brian Gerst <[email protected]>
Now that the PDA is empty except for the stack canary, it can be removed.
The irqstack is moved to the start of the per-cpu section. If the stack
protector is enabled, the canary overlaps the bottom 48 bytes of the irqstack.
tj: * updated subject
* dropped asm relocation of irq_stack_ptr
* updated comments a bit
Signed-off-by: Brian Gerst <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
This is the version which ended up in my git tree.
arch/x86/include/asm/pda.h | 5 -----
arch/x86/include/asm/percpu.h | 6 ------
arch/x86/include/asm/processor.h | 25 ++++++++++++++++++++++++-
arch/x86/kernel/asm-offsets_64.c | 4 ----
arch/x86/kernel/cpu/common.c | 7 ++++---
arch/x86/kernel/head_64.S | 6 +++---
arch/x86/kernel/process_64.c | 6 +++---
arch/x86/kernel/setup_percpu.c | 34 ++++------------------------------
arch/x86/kernel/vmlinux_64.lds.S | 8 ++++++--
9 files changed, 44 insertions(+), 57 deletions(-)
Index: work/arch/x86/include/asm/pda.h
===================================================================
--- work.orig/arch/x86/include/asm/pda.h
+++ work/arch/x86/include/asm/pda.h
@@ -17,11 +17,6 @@ struct x8664_pda {
unsigned long unused4;
int unused5;
unsigned int unused6; /* 36 was cpunumber */
-#ifdef CONFIG_CC_STACKPROTECTOR
- unsigned long stack_canary; /* 40 stack canary value */
- /* gcc-ABI: this canary MUST be at
- offset 40!!! */
-#endif
short in_bootmem; /* pda lives in bootmem */
} ____cacheline_aligned_in_smp;
Index: work/arch/x86/include/asm/percpu.h
===================================================================
--- work.orig/arch/x86/include/asm/percpu.h
+++ work/arch/x86/include/asm/percpu.h
@@ -133,12 +133,6 @@ do { \
/* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off);
-#ifdef CONFIG_X86_64
-extern void load_pda_offset(int cpu);
-#else
-static inline void load_pda_offset(int cpu) { }
-#endif
-
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_SMP
Index: work/arch/x86/include/asm/processor.h
===================================================================
--- work.orig/arch/x86/include/asm/processor.h
+++ work/arch/x86/include/asm/processor.h
@@ -379,8 +379,31 @@ union thread_xstate {
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist);
-DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack);
+union irq_stack_union {
+ char irq_stack[IRQ_STACK_SIZE];
+ /*
+ * GCC hardcodes the stack canary as %gs:40. Since the
+ * irq_stack is the object at %gs:0, we reserve the bottom
+ * 48 bytes of the irq stack for the canary.
+ */
+ struct {
+ char gs_base[40];
+#ifdef CONFIG_CC_STACKPROTECTOR
+ unsigned long stack_canary;
+#endif
+ };
+};
+
+DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
DECLARE_PER_CPU(char *, irq_stack_ptr);
+
+static inline void load_gs_base(int cpu)
+{
+ /* Memory clobbers used to order pda/percpu accesses */
+ mb();
+ wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
+ mb();
+}
#endif
extern void print_cpu_info(struct cpuinfo_x86 *);
Index: work/arch/x86/kernel/asm-offsets_64.c
===================================================================
--- work.orig/arch/x86/kernel/asm-offsets_64.c
+++ work/arch/x86/kernel/asm-offsets_64.c
@@ -48,10 +48,6 @@ int main(void)
#endif
BLANK();
#undef ENTRY
-#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
- DEFINE(pda_size, sizeof(struct x8664_pda));
- BLANK();
-#undef ENTRY
#ifdef CONFIG_PARAVIRT
BLANK();
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
Index: work/arch/x86/kernel/cpu/common.c
===================================================================
--- work.orig/arch/x86/kernel/cpu/common.c
+++ work/arch/x86/kernel/cpu/common.c
@@ -881,12 +881,13 @@ __setup("clearcpuid=", setup_disablecpui
#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
-DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
+DEFINE_PER_CPU_FIRST(union irq_stack_union,
+ irq_stack_union) __aligned(PAGE_SIZE);
#ifdef CONFIG_SMP
DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
#else
DEFINE_PER_CPU(char *, irq_stack_ptr) =
- per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
+ per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
#endif
DEFINE_PER_CPU(unsigned long, kernel_stack) =
@@ -960,7 +961,7 @@ void __cpuinit cpu_init(void)
loadsegment(fs, 0);
loadsegment(gs, 0);
- load_pda_offset(cpu);
+ load_gs_base(cpu);
#ifdef CONFIG_NUMA
if (cpu != 0 && percpu_read(node_number) == 0 &&
Index: work/arch/x86/kernel/head_64.S
===================================================================
--- work.orig/arch/x86/kernel/head_64.S
+++ work/arch/x86/kernel/head_64.S
@@ -247,8 +247,8 @@ ENTRY(secondary_startup_64)
* secondary CPU,initial_gs should be set to its pda address
* before the CPU runs this code.
*
- * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't
- * change.
+ * On UP, initial_gs points to PER_CPU_VAR(irq_stack_union)
+ * and doesn't change.
*/
movl $MSR_GS_BASE,%ecx
movq initial_gs(%rip),%rax
@@ -281,7 +281,7 @@ ENTRY(secondary_startup_64)
#ifdef CONFIG_SMP
.quad __per_cpu_load
#else
- .quad PER_CPU_VAR(__pda)
+ .quad PER_CPU_VAR(irq_stack_union)
#endif
__FINITDATA
Index: work/arch/x86/kernel/process_64.c
===================================================================
--- work.orig/arch/x86/kernel/process_64.c
+++ work/arch/x86/kernel/process_64.c
@@ -627,12 +627,12 @@ __switch_to(struct task_struct *prev_p,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET);
#ifdef CONFIG_CC_STACKPROTECTOR
- write_pda(stack_canary, next_p->stack_canary);
+ percpu_write(irq_stack_union.stack_canary, canary);
/*
* Build time only check to make sure the stack_canary is at
- * offset 40 in the pda; this is a gcc ABI requirement
+ * %gs:40; this is a gcc ABI requirement
*/
- BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
+ BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
#endif
/*
Index: work/arch/x86/kernel/setup_percpu.c
===================================================================
--- work.orig/arch/x86/kernel/setup_percpu.c
+++ work/arch/x86/kernel/setup_percpu.c
@@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask
static inline void setup_node_to_cpumask_map(void) { }
#endif
-/*
- * Define load_pda_offset() and per-cpu __pda for x86_64.
- * load_pda_offset() is responsible for loading the offset of pda into
- * %gs.
- *
- * On SMP, pda offset also duals as percpu base address and thus it
- * should be at the start of per-cpu area. To achieve this, it's
- * preallocated in vmlinux_64.lds.S directly instead of using
- * DEFINE_PER_CPU().
- */
-#ifdef CONFIG_X86_64
-void __cpuinit load_pda_offset(int cpu)
-{
- /* Memory clobbers used to order pda/percpu accesses */
- mb();
- wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
- mb();
-}
-#ifndef CONFIG_SMP
-DEFINE_PER_CPU(struct x8664_pda, __pda);
-#endif
-EXPORT_PER_CPU_SYMBOL(__pda);
-#endif /* CONFIG_SMP && CONFIG_X86_64 */
-
#ifdef CONFIG_X86_64
/* correctly size the local cpu masks */
@@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void)
per_cpu(cpu_number, cpu) = cpu;
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
- (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
+ per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
/*
- * CPU0 modified pda in the init data area, reload pda
- * offset for CPU0 and clear the area for others.
+ * Up to this point, CPU0 has been using .data.init
+ * area. Reload %gs offset for CPU0.
*/
if (cpu == 0)
- load_pda_offset(0);
- else
- memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
+ load_gs_base(cpu);
#endif
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
Index: work/arch/x86/kernel/vmlinux_64.lds.S
===================================================================
--- work.orig/arch/x86/kernel/vmlinux_64.lds.S
+++ work/arch/x86/kernel/vmlinux_64.lds.S
@@ -220,8 +220,7 @@ SECTIONS
* so that it can be accessed as a percpu variable.
*/
. = ALIGN(PAGE_SIZE);
- PERCPU_VADDR_PREALLOC(0, :percpu, pda_size)
- per_cpu____pda = __per_cpu_start;
+ PERCPU_VADDR(0, :percpu)
#else
PERCPU(PAGE_SIZE)
#endif
@@ -262,3 +261,8 @@ SECTIONS
*/
ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE")
+
+#ifdef CONFIG_SMP
+ASSERT((per_cpu__irq_stack_union == 0),
+ "irq_stack_union is not at start of per-cpu area");
+#endif
With .data.percpu.first in place, PERCPU_VADDR_PREALLOC() is no longer
necessary. Kill it.
Signed-off-by: Tejun Heo <[email protected]>
---
include/asm-generic/vmlinux.lds.h | 45 ++++++--------------------------------
1 file changed, 8 insertions(+), 37 deletions(-)
Index: work/include/asm-generic/vmlinux.lds.h
===================================================================
--- work.orig/include/asm-generic/vmlinux.lds.h
+++ work/include/asm-generic/vmlinux.lds.h
@@ -430,22 +430,10 @@
*(.initcall7.init) \
*(.initcall7s.init)
-#define PERCPU_PROLOG(vaddr) \
- VMLINUX_SYMBOL(__per_cpu_load) = .; \
- .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
- - LOAD_OFFSET) { \
- VMLINUX_SYMBOL(__per_cpu_start) = .;
-
-#define PERCPU_EPILOG(phdr) \
- VMLINUX_SYMBOL(__per_cpu_end) = .; \
- } phdr \
- . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
-
/**
- * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc
+ * PERCPU_VADDR - define output section for percpu area
* @vaddr: explicit base address (optional)
* @phdr: destination PHDR (optional)
- * @prealloc: the size of prealloc area
*
* Macro which expands to output section for percpu area. If @vaddr
* is not blank, it specifies explicit base address and all percpu
@@ -457,40 +445,23 @@
* section in the linker script will go there too. @phdr should have
* a leading colon.
*
- * If @prealloc is non-zero, the specified number of bytes will be
- * reserved at the start of percpu area. As the prealloc area is
- * likely to break alignment, this macro puts areas in increasing
- * alignment order.
- *
* This macro defines three symbols, __per_cpu_load, __per_cpu_start
* and __per_cpu_end. The first one is the vaddr of loaded percpu
* init data. __per_cpu_start equals @vaddr and __per_cpu_end is the
* end offset.
*/
-#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc) \
- PERCPU_PROLOG(vaddr) \
- . += prealloc; \
- *(.data.percpu) \
- *(.data.percpu.shared_aligned) \
- *(.data.percpu.page_aligned) \
- PERCPU_EPILOG(segment)
-
-/**
- * PERCPU_VADDR - define output section for percpu area
- * @vaddr: explicit base address (optional)
- * @phdr: destination PHDR (optional)
- *
- * Macro which expands to output section for percpu area. Mostly
- * identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than
- * using slighly different layout.
- */
#define PERCPU_VADDR(vaddr, phdr) \
- PERCPU_PROLOG(vaddr) \
+ VMLINUX_SYMBOL(__per_cpu_load) = .; \
+ .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
+ - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__per_cpu_start) = .; \
*(.data.percpu.first) \
*(.data.percpu.page_aligned) \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
- PERCPU_EPILOG(phdr)
+ VMLINUX_SYMBOL(__per_cpu_end) = .; \
+ } phdr \
+ . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
/**
* PERCPU - define output section for percpu area, simple version
On Sun, Jan 18, 2009 at 9:18 PM, Tejun Heo <[email protected]> wrote:
> Hello, Brian.
>
> Brian Gerst wrote:
> ...
>> @@ -881,13 +881,9 @@ __setup("clearcpuid=", setup_disablecpuid);
>> #ifdef CONFIG_X86_64
>> struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
>>
>> -DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
>> -#ifdef CONFIG_SMP
>> -DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
>> -#else
>> +DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE);
>> DEFINE_PER_CPU(char *, irq_stack_ptr) =
>> - per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
>> -#endif
>> + per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
>>
>> DEFINE_PER_CPU(unsigned long, kernel_stack) =
>> (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
>>
>> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
>> index 98ea26a..8c83de6 100644
>> --- a/arch/x86/kernel/head_64.S
>> +++ b/arch/x86/kernel/head_64.S
>> @@ -216,6 +216,7 @@ ENTRY(secondary_startup_64)
>> cmpl $0, per_cpu__cpu_number(%rax)
>> jne 1f
>> addq %rax, early_gdt_descr_base(%rip)
>> + addq %rax, per_cpu__irq_stack_ptr(%rax)
>> 1:
>> #endif
>> /*
>
> As discussed before, the above chunks do drop one #ifdef CONFIG_SMP
> but it does add a obscure relocation and please note that it's
> different from early_gdt_descr. early_gdt_descr is needed to bring up
> the cpu so there's no other way to do it but to relocate it in
> assembly. If you absolutely have to relocate irq_stack_ptr early,
> please do it in C code in head64.c but then again irq_stack_ptr is not
> even necessary till traps_init() which is way after per cpu area
> setup. So, the above two chunks are not necessary && even if they go
> in, they don't have much to do with this patch.
I'll give you that this particular variable doesn't need early
adjustment currently. I'd prefer if you left the ifdef off the first
hunk, though. A comment will suffice to document that the initial
value is going to be overwritten later on SMP.
--
Brian Gerst
Hello, Brian.
Brian Gerst wrote:
>> As discussed before, the above chunks do drop one #ifdef CONFIG_SMP
>> but it does add a obscure relocation and please note that it's
>> different from early_gdt_descr. early_gdt_descr is needed to bring up
>> the cpu so there's no other way to do it but to relocate it in
>> assembly. If you absolutely have to relocate irq_stack_ptr early,
>> please do it in C code in head64.c but then again irq_stack_ptr is not
>> even necessary till traps_init() which is way after per cpu area
>> setup. So, the above two chunks are not necessary && even if they go
>> in, they don't have much to do with this patch.
>
> I'll give you that this particular variable doesn't need early
> adjustment currently. I'd prefer if you left the ifdef off the first
> hunk, though. A comment will suffice to document that the initial
> value is going to be overwritten later on SMP.
Yeah, maybe. I don't know. If we ever get to a point where we can
fully initialize per cpu area for cpu0, things like this can probably
go away, but for now, I just thought it would be better to make it
clear that UP and SMP are taking different initialization paths &&
that was how the code looked like before this patch, so... But no big
deal one way or another, right?
Thanks.
--
tejun
Brian, can you please use --no-chain-reply-to when sending patches with
git? These very deep threads are most annoying as one cannot read the
subjects anymore.
* Tejun Heo <[email protected]> wrote:
> Hello, Ingo, Brian.
>
> Brian Gerst wrote:
> >
> > Here are the remaining patches rebased on top of these.
>
> Ingo, here's git tree for kill-pda patches.
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git tj-percpu
>
> I dropped early irq_stack_ptr relocation from 0004. I'll post updated
> patch as reply to the original patch. If you think it's still
> necessary, please feel free to discuss it. I also added a patch to kill
> PERCPU_VADDR_PREALLOC(). Will post the patch as a reply to 0005.
Pulled these commit into tip/core/percpu:
9624f47: linker script: kill PERCPU_VADDR_PREALLOC()
625a266: x86-64: Remove pda.h
a5f0e6c: x86-64: Move stack_canary into irq_stack
0840513: x86-64: Rework __per_cpu_load adjustments
53372e8: percpu: Refactor percpu.h
f6ced7a: x86-64: Remove pda_init()
thanks guys!
Ingo
* Ingo Molnar <[email protected]> wrote:
>
> * Tejun Heo <[email protected]> wrote:
>
> > Hello, Ingo, Brian.
> >
> > Brian Gerst wrote:
> > >
> > > Here are the remaining patches rebased on top of these.
> >
> > Ingo, here's git tree for kill-pda patches.
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git tj-percpu
> >
> > I dropped early irq_stack_ptr relocation from 0004. I'll post updated
> > patch as reply to the original patch. If you think it's still
> > necessary, please feel free to discuss it. I also added a patch to kill
> > PERCPU_VADDR_PREALLOC(). Will post the patch as a reply to 0005.
>
> Pulled these commit into tip/core/percpu:
hm, actually - the stackprotector impact is getting quite nasty. So i
think we are better off by merging the stackprotector fixes into
core/percpu, and then rebasing these commits on top of that. I pushed out
a new core/percpu branch with that done - and stackprotector works in that
lineup.
Would you mind to redo these commits on top of that? I tried the conflict
resolution but it looks quite complex and non-obvious - it would be
cleaner to have it this way.
Ingo
Ingo Molnar wrote:
> * Ingo Molnar <[email protected]> wrote:
>
>> * Tejun Heo <[email protected]> wrote:
>>
>>> Hello, Ingo, Brian.
>>>
>>> Brian Gerst wrote:
>>>> Here are the remaining patches rebased on top of these.
>>> Ingo, here's git tree for kill-pda patches.
>>>
>>> git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git tj-percpu
>>>
>>> I dropped early irq_stack_ptr relocation from 0004. I'll post updated
>>> patch as reply to the original patch. If you think it's still
>>> necessary, please feel free to discuss it. I also added a patch to kill
>>> PERCPU_VADDR_PREALLOC(). Will post the patch as a reply to 0005.
>> Pulled these commit into tip/core/percpu:
>
> hm, actually - the stackprotector impact is getting quite nasty. So i
> think we are better off by merging the stackprotector fixes into
> core/percpu, and then rebasing these commits on top of that. I pushed out
> a new core/percpu branch with that done - and stackprotector works in that
> lineup.
>
> Would you mind to redo these commits on top of that? I tried the conflict
> resolution but it looks quite complex and non-obvious - it would be
> cleaner to have it this way.
Sure, will rebase and repost.
Thanks.
--
tejun
Tejun Heo wrote:
>> Would you mind to redo these commits on top of that? I tried the conflict
>> resolution but it looks quite complex and non-obvious - it would be
>> cleaner to have it this way.
>
> Sure, will rebase and repost.
Okay, updatd & rebased (6b7c38d55587f43bcd2cbce3a98b1c0826982090).
git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git tj-percpu
Thanks.
--
tejun
* Tejun Heo <[email protected]> wrote:
> Tejun Heo wrote:
> >> Would you mind to redo these commits on top of that? I tried the conflict
> >> resolution but it looks quite complex and non-obvious - it would be
> >> cleaner to have it this way.
> >
> > Sure, will rebase and repost.
>
> Okay, updatd & rebased (6b7c38d55587f43bcd2cbce3a98b1c0826982090).
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git tj-percpu
Pulled into tip/core/percpu, thanks!
Ingo