LinuxLists.cc - [GIT PATCH core/percpu] percpu: convert most archs to dynamic percpu, take#3

2009-06-17 03:42:31

Subject: [GIT PATCH core/percpu] percpu: convert most archs to dynamic percpu, take#3

2009-06-17 03:41:42

by Tejun Heo

[permalink] [raw]

Subject: [PATCH 3/9] CRIS: Change DEFINE_PER_CPU of current_pgd to be non volatile.

From: Jesper Nilsson <[email protected]>

The DEFINE_PER_CPU of current_pgd was on CRIS defined using volatile,
which is not needed. Remove volatile.

Tested on an ARTPEC-3 (CRISv32) board.

[ Impact: code cleanup ]

Signed-off-by: Jesper Nilsson <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
arch/cris/include/asm/mmu_context.h | 3 ++-
arch/cris/mm/fault.c | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/cris/include/asm/mmu_context.h b/arch/cris/include/asm/mmu_context.h
index 72ba08d..476cd9e 100644
--- a/arch/cris/include/asm/mmu_context.h
+++ b/arch/cris/include/asm/mmu_context.h
@@ -17,7 +17,8 @@ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
* registers like cr3 on the i386
*/

-extern volatile DEFINE_PER_CPU(pgd_t *,current_pgd); /* defined in arch/cris/mm/fault.c */
+/* defined in arch/cris/mm/fault.c */
+extern DEFINE_PER_CPU(pgd_t *, current_pgd);

static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index c4c76db..84d22ae 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -29,7 +29,7 @@ extern void die_if_kernel(const char *, struct pt_regs *, long);

/* current active page directory */

-volatile DEFINE_PER_CPU(pgd_t *,current_pgd);
+DEFINE_PER_CPU(pgd_t *, current_pgd);
unsigned long cris_signal_return_page;

/*
--
1.6.0.2

2009-06-17 03:41:58

by Tejun Heo

[permalink] [raw]

Subject: [PATCH 1/9] percpu: fix too lazy vunmap cache flushing

In pcpu_unmap(), flushing virtual cache on vunmap can't be delayed as
the page is going to be returned to the page allocator. Only TLB
flushing can be put off such that vmalloc code can handle it lazily.
Fix it.

[ Impact: fix subtle virtual cache flush bug ]

Signed-off-by: Tejun Heo <[email protected]>
Cc: Nick Piggin <[email protected]>
Cc: Ingo Molnar <[email protected]>
---
mm/percpu.c | 11 +++++------
1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index c0b2c1a..d06f474 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -549,14 +549,14 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
* @chunk: chunk of interest
* @page_start: page index of the first page to unmap
* @page_end: page index of the last page to unmap + 1
- * @flush: whether to flush cache and tlb or not
+ * @flush_tlb: whether to flush tlb or not
*
* For each cpu, unmap pages [@page_start,@page_end) out of @chunk.
* If @flush is true, vcache is flushed before unmapping and tlb
* after.
*/
static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
- bool flush)
+ bool flush_tlb)
{
unsigned int last = num_possible_cpus() - 1;
unsigned int cpu;
@@ -569,9 +569,8 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
* the whole region at once rather than doing it for each cpu.
* This could be an overkill but is more scalable.
*/
- if (flush)
- flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start),
- pcpu_chunk_addr(chunk, last, page_end));
+ flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start),
+ pcpu_chunk_addr(chunk, last, page_end));

for_each_possible_cpu(cpu)
unmap_kernel_range_noflush(
@@ -579,7 +578,7 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
(page_end - page_start) << PAGE_SHIFT);

/* ditto as flush_cache_vunmap() */
- if (flush)
+ if (flush_tlb)
flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start),
pcpu_chunk_addr(chunk, last, page_end));
}
--
1.6.0.2

2009-06-17 03:42:45

by Tejun Heo

[permalink] [raw]

Subject: [PATCH 7/9] alpha: kill unnecessary __used attribute in PER_CPU_ATTRIBUTES

With the previous percpu variable definition change, all percpu
variables are global and there's no need to specify __used, which only
triggers on recent compilers anyway. Kill it.

[ Impact: remove unnecessary percpu attribute ]

Signed-off-by: Tejun Heo <[email protected]>
Cc: Ivan Kokshaysky <[email protected]>
Cc: Richard Henderson <[email protected]>
---
arch/alpha/include/asm/percpu.h | 5 -----
1 files changed, 0 insertions(+), 5 deletions(-)

diff --git a/arch/alpha/include/asm/percpu.h b/arch/alpha/include/asm/percpu.h
index 06c5c7a..7f0a9c4 100644
--- a/arch/alpha/include/asm/percpu.h
+++ b/arch/alpha/include/asm/percpu.h
@@ -30,7 +30,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS];

#ifndef MODULE
#define SHIFT_PERCPU_PTR(var, offset) RELOC_HIDE(&per_cpu_var(var), (offset))
-#define PER_CPU_ATTRIBUTES
#else
/*
* To calculate addresses of locally defined variables, GCC uses 32-bit
@@ -49,8 +48,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
: "=&r"(__ptr), "=&r"(tmp_gp)); \
(typeof(&per_cpu_var(var)))(__ptr + (offset)); })

-#define PER_CPU_ATTRIBUTES __used
-
#endif /* MODULE */

/*
@@ -71,8 +68,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
#define __get_cpu_var(var) per_cpu_var(var)
#define __raw_get_cpu_var(var) per_cpu_var(var)

-#define PER_CPU_ATTRIBUTES
-
#endif /* SMP */

#ifdef CONFIG_SMP
--
1.6.0.2

2009-06-17 03:43:03

by Tejun Heo

[permalink] [raw]

Subject: [PATCH 4/9] percpu: cleanup percpu array definitions

Currently, the following three different ways to define percpu arrays
are in use.

1. DEFINE_PER_CPU(elem_type[array_len], array_name);
2. DEFINE_PER_CPU(elem_type, array_name[array_len]);
3. DEFINE_PER_CPU(elem_type, array_name)[array_len];

Unify to #1 which correctly separates the roles of the two parameters
and thus allows more flexibility in the way percpu variables are
defined.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Cc: [email protected]
Cc: Christoph Lameter <[email protected]>
Cc: David S. Miller <[email protected]>
---
arch/ia64/kernel/smp.c | 2 +-
arch/ia64/sn/kernel/setup.c | 2 +-
arch/powerpc/mm/stab.c | 2 +-
arch/powerpc/platforms/ps3/smp.c | 2 +-
arch/x86/kernel/cpu/cpu_debug.c | 4 ++--
arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 2 +-
drivers/xen/events.c | 4 ++--
mm/quicklist.c | 2 +-
mm/slub.c | 4 ++--
net/ipv4/syncookies.c | 2 +-
net/ipv6/syncookies.c | 2 +-
11 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 5230eaa..3e0840c 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -58,7 +58,7 @@ static struct local_tlb_flush_counts {
unsigned int count;
} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS];

-static DEFINE_PER_CPU(unsigned short, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned;
+static DEFINE_PER_CPU(unsigned short [NR_CPUS], shadow_flush_counts) ____cacheline_aligned;

#define IPI_CALL_FUNC 0
#define IPI_CPU_STOP 1
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index e456f06..ece1bf9 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -71,7 +71,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
EXPORT_PER_CPU_SYMBOL(__sn_hub_info);

-DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]);
+DEFINE_PER_CPU(short [MAX_COMPACT_NODES], __sn_cnodeid_to_nasid);
EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid);

DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda);
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 98cd1dc..6e9b69c 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -31,7 +31,7 @@ struct stab_entry {

#define NR_STAB_CACHE_ENTRIES 8
static DEFINE_PER_CPU(long, stab_cache_ptr);
-static DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
+static DEFINE_PER_CPU(long [NR_STAB_CACHE_ENTRIES], stab_cache);

/*
* Create a segment table entry for the given esid/vsid pair.
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index f6e04bc..51ffde4 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -37,7 +37,7 @@
*/

#define MSG_COUNT 4
-static DEFINE_PER_CPU(unsigned int, ps3_ipi_virqs[MSG_COUNT]);
+static DEFINE_PER_CPU(unsigned int [MSG_COUNT], ps3_ipi_virqs);

static void do_message_pass(int target, int msg)
{
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
index 6b2a52d..dca325c 100644
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -30,8 +30,8 @@
#include <asm/apic.h>
#include <asm/desc.h>

-static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
-static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
+static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr);
+static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr);
static DEFINE_PER_CPU(int, cpu_priv_count);

static DEFINE_MUTEX(cpu_debug_lock);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index ddae216..bd2a2fa 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -69,7 +69,7 @@ struct threshold_bank {
struct threshold_block *blocks;
cpumask_var_t cpus;
};
-static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
+static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);

#ifdef CONFIG_SMP
static unsigned char shared_bank[NR_BANKS] = {
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 891d2e9..ab581fa 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -47,10 +47,10 @@
static DEFINE_SPINLOCK(irq_mapping_update_lock);

/* IRQ <-> VIRQ mapping. */
-static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
+static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};

/* IRQ <-> IPI mapping */
-static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
+static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};

/* Interrupt types. */
enum xen_irq_type {
diff --git a/mm/quicklist.c b/mm/quicklist.c
index e66d07d..6eedf7e 100644
--- a/mm/quicklist.c
+++ b/mm/quicklist.c
@@ -19,7 +19,7 @@
#include <linux/module.h>
#include <linux/quicklist.h>

-DEFINE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK];
+DEFINE_PER_CPU(struct quicklist [CONFIG_NR_QUICK], quicklist);

#define FRACTION_OF_NODE_MEM 16

diff --git a/mm/slub.c b/mm/slub.c
index 30354bf..0933b91 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1998,8 +1998,8 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
*/
#define NR_KMEM_CACHE_CPU 100

-static DEFINE_PER_CPU(struct kmem_cache_cpu,
- kmem_cache_cpu)[NR_KMEM_CACHE_CPU];
+static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
+ kmem_cache_cpu);

static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index cd2b97f..84d90f2 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -37,7 +37,7 @@ __initcall(init_syncookies);
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)

-static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS];
+static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], cookie_scratch);

static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
u32 count, int c)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 8c25139..23d0d6d 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -74,7 +74,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
return child;
}

-static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS];
+static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], cookie_scratch);

static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr,
__be16 sport, __be16 dport, u32 count, int c)
--
1.6.0.2

2009-06-17 03:43:27

by Tejun Heo

[permalink] [raw]

Subject: [PATCH 8/9] alpha: switch to dynamic percpu allocator

Alpha implements custom SHIFT_PERCPU_PTR for modules because percpu
area can be located far away from the 4G area where the module text is
located. The custom SHIFT_PERCPU_PTR forces GOT usage using ldq
instruction with literal relocation; however, the relocation can't be
used with dynamically allocatd percpu variables. Fortunately, similar
result can be achieved using weak attribute on percpu variable
declarations and definitions, which is allowed with previous changes.

This patch makes alpha use weak attribute instead and switch to
dynamic percpu allocator.

asm/tlbflush.h was getting linux/sched.h via asm/percpu.h which no
longer needs it. Include linux/sched.h directly in asm/tlbflush.h.

Compile tested. Generation of litereal relocation verified.

This patch is based on Ivan Kokshaysky's alpha percpu patch.

[ Impact: use dynamic percpu allocator ]

Signed-off-by: Tejun Heo <[email protected]>
Acked-by: Ivan Kokshaysky <[email protected]>
Cc: Richard Henderson <[email protected]>
---
arch/alpha/Kconfig | 3 -
arch/alpha/include/asm/percpu.h | 96 ++++---------------------------------
arch/alpha/include/asm/tlbflush.h | 1 +
3 files changed, 10 insertions(+), 90 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 05d8640..9fb8aae 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -70,9 +70,6 @@ config AUTO_IRQ_AFFINITY
depends on SMP
default y

-config HAVE_LEGACY_PER_CPU_AREA
- def_bool y
-
source "init/Kconfig"
source "kernel/Kconfig.freezer"

diff --git a/arch/alpha/include/asm/percpu.h b/arch/alpha/include/asm/percpu.h
index 7f0a9c4..2b0c79c 100644
--- a/arch/alpha/include/asm/percpu.h
+++ b/arch/alpha/include/asm/percpu.h
@@ -1,97 +1,19 @@
#ifndef __ALPHA_PERCPU_H
#define __ALPHA_PERCPU_H

-#include <linux/compiler.h>
-#include <linux/threads.h>
-#include <linux/percpu-defs.h>
-
/*
- * Determine the real variable name from the name visible in the
- * kernel sources.
- */
-#define per_cpu_var(var) per_cpu__##var
-
-#ifdef CONFIG_SMP
-
-/*
- * per_cpu_offset() is the offset that has to be added to a
- * percpu variable to get to the instance for a certain processor.
- */
-extern unsigned long __per_cpu_offset[NR_CPUS];
-
-#define per_cpu_offset(x) (__per_cpu_offset[x])
-
-#define __my_cpu_offset per_cpu_offset(raw_smp_processor_id())
-#ifdef CONFIG_DEBUG_PREEMPT
-#define my_cpu_offset per_cpu_offset(smp_processor_id())
-#else
-#define my_cpu_offset __my_cpu_offset
-#endif
-
-#ifndef MODULE
-#define SHIFT_PERCPU_PTR(var, offset) RELOC_HIDE(&per_cpu_var(var), (offset))
-#else
-/*
- * To calculate addresses of locally defined variables, GCC uses 32-bit
- * displacement from the GP. Which doesn't work for per cpu variables in
- * modules, as an offset to the kernel per cpu area is way above 4G.
+ * To calculate addresses of locally defined variables, GCC uses
+ * 32-bit displacement from the GP. Which doesn't work for per cpu
+ * variables in modules, as an offset to the kernel per cpu area is
+ * way above 4G.
*
- * This forces allocation of a GOT entry for per cpu variable using
- * ldq instruction with a 'literal' relocation.
- */
-#define SHIFT_PERCPU_PTR(var, offset) ({ \
- extern int simple_identifier_##var(void); \
- unsigned long __ptr, tmp_gp; \
- asm ( "br %1, 1f \n\
- 1: ldgp %1, 0(%1) \n\
- ldq %0, per_cpu__" #var"(%1)\t!literal" \
- : "=&r"(__ptr), "=&r"(tmp_gp)); \
- (typeof(&per_cpu_var(var)))(__ptr + (offset)); })
-
-#endif /* MODULE */
-
-/*
- * A percpu variable may point to a discarded regions. The following are
- * established ways to produce a usable pointer from the percpu variable
- * offset.
+ * Use "weak" attribute to force the compiler to generate external
+ * reference.
*/
-#define per_cpu(var, cpu) \
- (*SHIFT_PERCPU_PTR(var, per_cpu_offset(cpu)))
-#define __get_cpu_var(var) \
- (*SHIFT_PERCPU_PTR(var, my_cpu_offset))
-#define __raw_get_cpu_var(var) \
- (*SHIFT_PERCPU_PTR(var, __my_cpu_offset))
-
-#else /* ! SMP */
-
-#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var)))
-#define __get_cpu_var(var) per_cpu_var(var)
-#define __raw_get_cpu_var(var) per_cpu_var(var)
-
-#endif /* SMP */
-
-#ifdef CONFIG_SMP
-#define PER_CPU_BASE_SECTION ".data.percpu"
-#else
-#define PER_CPU_BASE_SECTION ".data"
-#endif
-
-#ifdef CONFIG_SMP
-
-#ifdef MODULE
-#define PER_CPU_SHARED_ALIGNED_SECTION ""
-#else
-#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned"
-#endif
-#define PER_CPU_FIRST_SECTION ".first"
-
-#else
-
-#define PER_CPU_SHARED_ALIGNED_SECTION ""
-#define PER_CPU_FIRST_SECTION ""
-
+#if defined(MODULE) && defined(CONFIG_SMP)
+#define PER_CPU_ATTRIBUTES __attribute__((weak))
#endif

-#define PER_CPU_ATTRIBUTES
+#include <asm-generic/percpu.h>

#endif /* __ALPHA_PERCPU_H */
diff --git a/arch/alpha/include/asm/tlbflush.h b/arch/alpha/include/asm/tlbflush.h
index 9d87aaa..e89e0c2 100644
--- a/arch/alpha/include/asm/tlbflush.h
+++ b/arch/alpha/include/asm/tlbflush.h
@@ -2,6 +2,7 @@
#define _ALPHA_TLBFLUSH_H

#include <linux/mm.h>
+#include <linux/sched.h>
#include <asm/compiler.h>
#include <asm/pgalloc.h>

--
1.6.0.2

2009-06-17 03:43:43

by Tejun Heo

[permalink] [raw]

Subject: [PATCH 9/9] s390: switch to dynamic percpu allocator

64bit s390 shares the same problem with percpu symbol addressing from
modules. It needs assembly magic to force GOTENT reference when
building module as the percpu address will be outside the usual 4G
range from the module text. Simiarly to alpha, this can be solved by
using the weak attribute.

This patch makes s390 use weak attribute instead and switch to dynamic
percpu allocator. Please note that weak attribute is not added if
!SMP as percpu variables behave exactly the same as normal variables
on UP.

Compile tested. Generation of GOTENT reference verified.

This patch is based on Ivan Kokshaysky's alpha percpu patch.

[ Impact: use dynamic percpu allocator ]

Signed-off-by: Tejun Heo <[email protected]>
Acked-by: Martin Schwidefsky <[email protected]>
Cc: Heiko Carstens <[email protected]>
---
arch/s390/Kconfig | 3 ---
arch/s390/include/asm/percpu.h | 32 ++++++++------------------------
2 files changed, 8 insertions(+), 27 deletions(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f4a3cc6..a14dba0 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -75,9 +75,6 @@ config VIRT_CPU_ACCOUNTING
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
def_bool y

-config HAVE_LEGACY_PER_CPU_AREA
- def_bool y
-
mainmenu "Linux Kernel Configuration"

config S390
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 408d60b..36672ff 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -1,37 +1,21 @@
#ifndef __ARCH_S390_PERCPU__
#define __ARCH_S390_PERCPU__

-#include <linux/compiler.h>
-#include <asm/lowcore.h>
-
/*
* s390 uses its own implementation for per cpu data, the offset of
* the cpu local data area is cached in the cpu's lowcore memory.
- * For 64 bit module code s390 forces the use of a GOT slot for the
- * address of the per cpu variable. This is needed because the module
- * may be more than 4G above the per cpu area.
*/
-#if defined(__s390x__) && defined(MODULE)
-
-#define SHIFT_PERCPU_PTR(ptr,offset) (({ \
- extern int simple_identifier_##var(void); \
- unsigned long *__ptr; \
- asm ( "larl %0, %1@GOTENT" \
- : "=a" (__ptr) : "X" (ptr) ); \
- (typeof(ptr))((*__ptr) + (offset)); }))
-
-#else
-
-#define SHIFT_PERCPU_PTR(ptr, offset) (({ \
- extern int simple_identifier_##var(void); \
- unsigned long __ptr; \
- asm ( "" : "=a" (__ptr) : "0" (ptr) ); \
- (typeof(ptr)) (__ptr + (offset)); }))
+#define __my_cpu_offset S390_lowcore.percpu_offset

+/*
+ * For 64 bit module code, the module may be more than 4G above the
+ * per cpu area, use "weak" attribute to force the compiler to
+ * generate an external reference.
+ */
+#if defined(CONFIG_SMP) && defined(__s390x__) && defined(MODULE)
+#define PER_CPU_ATTRIBUTES __attribute__((weak))
#endif

-#define __my_cpu_offset S390_lowcore.percpu_offset
-
#include <asm-generic/percpu.h>

#endif /* __ARCH_S390_PERCPU__ */
--
1.6.0.2

2009-06-17 03:44:22

by Tejun Heo

[permalink] [raw]

Subject: [PATCH 5/9] percpu: clean up percpu variable definitions

Percpu variable definition is about to be updated such that no static
declaration is allowed. Update percpu variable definitions
accoringly.

* as,cfq: rename ioc_count uniquely

* cpufreq: rename cpu_dbs_info uniquely

* xen: move nesting_count out of xen_evtchn_do_upcall() and rename it

* mm: move ratelimits out of balance_dirty_pages_ratelimited_nr() and
rename it

* ipv4,6: rename cookie_scratch uniquely

While at it, make cris:use DECLARE_PER_CPU() instead of extern
volatile DEFINE_PER_CPU() for declaration.

[ Impact: percpu usage cleanups, no duplicate static percpu var names ]

Signed-off-by: Tejun Heo <[email protected]>
Cc: Ivan Kokshaysky <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Dave Jones <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Cc: linux-mm <[email protected]>
Cc: David S. Miller <[email protected]>
---
arch/cris/include/asm/mmu_context.h | 2 +-
block/as-iosched.c | 10 +++++-----
block/cfq-iosched.c | 10 +++++-----
drivers/cpufreq/cpufreq_conservative.c | 12 ++++++------
drivers/cpufreq/cpufreq_ondemand.c | 15 ++++++++-------
drivers/xen/events.c | 9 +++++----
mm/page-writeback.c | 5 +++--
net/ipv4/syncookies.c | 5 +++--
net/ipv6/syncookies.c | 5 +++--
9 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/arch/cris/include/asm/mmu_context.h b/arch/cris/include/asm/mmu_context.h
index 476cd9e..1d45fd6 100644
--- a/arch/cris/include/asm/mmu_context.h
+++ b/arch/cris/include/asm/mmu_context.h
@@ -18,7 +18,7 @@ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*/

/* defined in arch/cris/mm/fault.c */
-extern DEFINE_PER_CPU(pgd_t *, current_pgd);
+DECLARE_PER_CPU(pgd_t *, current_pgd);

static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 7a12cf6..ce8ba57 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -146,7 +146,7 @@ enum arq_state {
#define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2)
#define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state)

-static DEFINE_PER_CPU(unsigned long, ioc_count);
+static DEFINE_PER_CPU(unsigned long, as_ioc_count);
static struct completion *ioc_gone;
static DEFINE_SPINLOCK(ioc_gone_lock);

@@ -161,7 +161,7 @@ static void as_antic_stop(struct as_data *ad);
static void free_as_io_context(struct as_io_context *aic)
{
kfree(aic);
- elv_ioc_count_dec(ioc_count);
+ elv_ioc_count_dec(as_ioc_count);
if (ioc_gone) {
/*
* AS scheduler is exiting, grab exit lock and check
@@ -169,7 +169,7 @@ static void free_as_io_context(struct as_io_context *aic)
* complete ioc_gone and set it back to NULL.
*/
spin_lock(&ioc_gone_lock);
- if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+ if (ioc_gone && !elv_ioc_count_read(as_ioc_count)) {
complete(ioc_gone);
ioc_gone = NULL;
}
@@ -211,7 +211,7 @@ static struct as_io_context *alloc_as_io_context(void)
ret->seek_total = 0;
ret->seek_samples = 0;
ret->seek_mean = 0;
- elv_ioc_count_inc(ioc_count);
+ elv_ioc_count_inc(as_ioc_count);
}

return ret;
@@ -1507,7 +1507,7 @@ static void __exit as_exit(void)
ioc_gone = &all_gone;
/* ioc_gone's update must be visible before reading ioc_count */
smp_wmb();
- if (elv_ioc_count_read(ioc_count))
+ if (elv_ioc_count_read(as_ioc_count))
wait_for_completion(&all_gone);
synchronize_rcu();
}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 833ec18..0f1cc7d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -48,7 +48,7 @@ static int cfq_slice_idle = HZ / 125;
static struct kmem_cache *cfq_pool;
static struct kmem_cache *cfq_ioc_pool;

-static DEFINE_PER_CPU(unsigned long, ioc_count);
+static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
static struct completion *ioc_gone;
static DEFINE_SPINLOCK(ioc_gone_lock);

@@ -1422,7 +1422,7 @@ static void cfq_cic_free_rcu(struct rcu_head *head)
cic = container_of(head, struct cfq_io_context, rcu_head);

kmem_cache_free(cfq_ioc_pool, cic);
- elv_ioc_count_dec(ioc_count);
+ elv_ioc_count_dec(cfq_ioc_count);

if (ioc_gone) {
/*
@@ -1431,7 +1431,7 @@ static void cfq_cic_free_rcu(struct rcu_head *head)
* complete ioc_gone and set it back to NULL
*/
spin_lock(&ioc_gone_lock);
- if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+ if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) {
complete(ioc_gone);
ioc_gone = NULL;
}
@@ -1557,7 +1557,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
INIT_HLIST_NODE(&cic->cic_list);
cic->dtor = cfq_free_io_context;
cic->exit = cfq_exit_io_context;
- elv_ioc_count_inc(ioc_count);
+ elv_ioc_count_inc(cfq_ioc_count);
}

return cic;
@@ -2658,7 +2658,7 @@ static void __exit cfq_exit(void)
* this also protects us from entering cfq_slab_kill() with
* pending RCU callbacks
*/
- if (elv_ioc_count_read(ioc_count))
+ if (elv_ioc_count_read(cfq_ioc_count))
wait_for_completion(&all_gone);
cfq_slab_kill();
}
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 7a74d17..8191d04 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -80,7 +80,7 @@ struct cpu_dbs_info_s {
int cpu;
unsigned int enable:1;
};
-static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
+static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info);

static unsigned int dbs_enable; /* number of CPUs using this policy */

@@ -153,7 +153,7 @@ dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct cpufreq_freqs *freq = data;
- struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info,
+ struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cs_cpu_dbs_info,
freq->cpu);

struct cpufreq_policy *policy;
@@ -326,7 +326,7 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
/* we need to re-evaluate prev_cpu_idle */
for_each_online_cpu(j) {
struct cpu_dbs_info_s *dbs_info;
- dbs_info = &per_cpu(cpu_dbs_info, j);
+ dbs_info = &per_cpu(cs_cpu_dbs_info, j);
dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&dbs_info->prev_cpu_wall);
if (dbs_tuners_ins.ignore_nice)
@@ -416,7 +416,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
cputime64_t cur_wall_time, cur_idle_time;
unsigned int idle_time, wall_time;

- j_dbs_info = &per_cpu(cpu_dbs_info, j);
+ j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);

cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);

@@ -556,7 +556,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
unsigned int j;
int rc;

- this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
+ this_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu);

switch (event) {
case CPUFREQ_GOV_START:
@@ -576,7 +576,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,

for_each_cpu(j, policy->cpus) {
struct cpu_dbs_info_s *j_dbs_info;
- j_dbs_info = &per_cpu(cpu_dbs_info, j);
+ j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);
j_dbs_info->cur_policy = policy;

j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index e741c33..04de476 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -87,7 +87,7 @@ struct cpu_dbs_info_s {
unsigned int enable:1,
sample_type:1;
};
-static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
+static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);

static unsigned int dbs_enable; /* number of CPUs using this policy */

@@ -165,7 +165,8 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
unsigned int freq_hi, freq_lo;
unsigned int index = 0;
unsigned int jiffies_total, jiffies_hi, jiffies_lo;
- struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu);
+ struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
+ policy->cpu);

if (!dbs_info->freq_table) {
dbs_info->freq_lo = 0;
@@ -210,7 +211,7 @@ static void ondemand_powersave_bias_init(void)
{
int i;
for_each_online_cpu(i) {
- struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i);
+ struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, i);
dbs_info->freq_table = cpufreq_frequency_get_table(i);
dbs_info->freq_lo = 0;
}
@@ -325,7 +326,7 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
/* we need to re-evaluate prev_cpu_idle */
for_each_online_cpu(j) {
struct cpu_dbs_info_s *dbs_info;
- dbs_info = &per_cpu(cpu_dbs_info, j);
+ dbs_info = &per_cpu(od_cpu_dbs_info, j);
dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&dbs_info->prev_cpu_wall);
if (dbs_tuners_ins.ignore_nice)
@@ -419,7 +420,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
unsigned int load, load_freq;
int freq_avg;

- j_dbs_info = &per_cpu(cpu_dbs_info, j);
+ j_dbs_info = &per_cpu(od_cpu_dbs_info, j);

cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);

@@ -576,7 +577,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
unsigned int j;
int rc;

- this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
+ this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);

switch (event) {
case CPUFREQ_GOV_START:
@@ -598,7 +599,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,

for_each_cpu(j, policy->cpus) {
struct cpu_dbs_info_s *j_dbs_info;
- j_dbs_info = &per_cpu(cpu_dbs_info, j);
+ j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
j_dbs_info->cur_policy = policy;

j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index ab581fa..7d2987e 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -602,6 +602,8 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}

+static DEFINE_PER_CPU(unsigned, xed_nesting_count);
+
/*
* Search the CPUs pending events bitmasks. For each one found, map
* the event number to an irq, and feed it into do_IRQ() for
@@ -617,7 +619,6 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
- static DEFINE_PER_CPU(unsigned, nesting_count);
unsigned count;

exit_idle();
@@ -628,7 +629,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)

vcpu_info->evtchn_upcall_pending = 0;

- if (__get_cpu_var(nesting_count)++)
+ if (__get_cpu_var(xed_nesting_count)++)
goto out;

#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
@@ -653,8 +654,8 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)

BUG_ON(!irqs_disabled());

- count = __get_cpu_var(nesting_count);
- __get_cpu_var(nesting_count) = 0;
+ count = __get_cpu_var(xed_nesting_count);
+ __get_cpu_var(xed_nesting_count) = 0;
} while(count != 1);

out:
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index bb553c3..0e0c9de 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -606,6 +606,8 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
}
}

+static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0;
+
/**
* balance_dirty_pages_ratelimited_nr - balance dirty memory state
* @mapping: address_space which was dirtied
@@ -623,7 +625,6 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
unsigned long nr_pages_dirtied)
{
- static DEFINE_PER_CPU(unsigned long, ratelimits) = 0;
unsigned long ratelimit;
unsigned long *p;

@@ -636,7 +637,7 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
* tasks in balance_dirty_pages(). Period.
*/
preempt_disable();
- p = &__get_cpu_var(ratelimits);
+ p = &__get_cpu_var(bdp_ratelimits);
*p += nr_pages_dirtied;
if (unlikely(*p >= ratelimit)) {
*p = 0;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 84d90f2..a6e0e07 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -37,12 +37,13 @@ __initcall(init_syncookies);
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)

-static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], cookie_scratch);
+static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
+ ipv4_cookie_scratch);

static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
u32 count, int c)
{
- __u32 *tmp = __get_cpu_var(cookie_scratch);
+ __u32 *tmp = __get_cpu_var(ipv4_cookie_scratch);

memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c]));
tmp[0] = (__force u32)saddr;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 23d0d6d..6b6ae91 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -74,12 +74,13 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
return child;
}

-static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], cookie_scratch);
+static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
+ ipv6_cookie_scratch);

static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr,
__be16 sport, __be16 dport, u32 count, int c)
{
- __u32 *tmp = __get_cpu_var(cookie_scratch);
+ __u32 *tmp = __get_cpu_var(ipv6_cookie_scratch);

/*
* we have 320 bits of information to hash, copy in the remaining
--
1.6.0.2

2009-06-17 03:44:00

by Tejun Heo

[permalink] [raw]

Subject: [PATCH 2/9] percpu: use dynamic percpu allocator as the default percpu allocator

This patch makes most !CONFIG_HAVE_SETUP_PER_CPU_AREA archs use
dynamic percpu allocator. The first chunk is allocated using
embedding helper and 8k is reserved for modules. This ensures that
the new allocator behaves almost identically to the original allocator
as long as static percpu variables are concerned, so it shouldn't
introduce much breakage.

s390 and alpha use custom SHIFT_PERCPU_PTR() to work around addressing
range limit the addressing model imposes. Unfortunately, this breaks
if the address is specified using a variable, so for now, the two
archs aren't converted.

The following architectures are affected by this change.

* sh
* arm
* cris
* mips
* sparc(32)
* blackfin
* avr32
* parisc (broken, under investigation)
* m32r
* powerpc(32)

As this change makes the dynamic allocator the default one,
CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is replaced with its invert -
CONFIG_HAVE_LEGACY_PER_CPU_AREA, which is added to yet-to-be converted
archs. These archs implement their own setup_per_cpu_areas() and the
conversion is not trivial.

* powerpc(64)
* sparc(64)
* ia64
* alpha
* s390

Boot and batch alloc/free tests on x86_32 with debug code (x86_32
doesn't use default first chunk initialization). Compile tested on
sparc(32), powerpc(32), arm and alpha.

Kyle McMartin reported that this change breaks parisc. The problem is
still under investigation and he is okay with pushing this patch
forward and fixing parisc later.

[ Impact: use dynamic allocator for most archs w/o custom percpu setup ]

Signed-off-by: Tejun Heo <[email protected]>
Acked-by: Rusty Russell <[email protected]>
Acked-by: David S. Miller <[email protected]>
Acked-by: Benjamin Herrenschmidt <[email protected]>
Acked-by: Martin Schwidefsky <[email protected]>
Reviewed-by: Christoph Lameter <[email protected]>
Cc: Paul Mundt <[email protected]>
Cc: Russell King <[email protected]>
Cc: Mikael Starvik <[email protected]>
Cc: Ralf Baechle <[email protected]>
Cc: Bryan Wu <[email protected]>
Cc: Kyle McMartin <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Grant Grundler <[email protected]>
Cc: Hirokazu Takata <[email protected]>
Cc: Richard Henderson <[email protected]>
Cc: Ivan Kokshaysky <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Ingo Molnar <[email protected]>
---
arch/alpha/Kconfig | 3 +++
arch/ia64/Kconfig | 3 +++
arch/powerpc/Kconfig | 3 +++
arch/s390/Kconfig | 3 +++
arch/sparc/Kconfig | 3 +++
arch/x86/Kconfig | 3 ---
include/linux/percpu.h | 12 +++++++++---
init/main.c | 24 ------------------------
kernel/module.c | 6 +++---
mm/Makefile | 2 +-
mm/allocpercpu.c | 28 ++++++++++++++++++++++++++++
mm/percpu.c | 40 +++++++++++++++++++++++++++++++++++++++-
12 files changed, 95 insertions(+), 35 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 9fb8aae..05d8640 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -70,6 +70,9 @@ config AUTO_IRQ_AFFINITY
depends on SMP
default y

+config HAVE_LEGACY_PER_CPU_AREA
+ def_bool y
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 294a3b1..8e88df7 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -88,6 +88,9 @@ config GENERIC_TIME_VSYSCALL
bool
default y

+config HAVE_LEGACY_PER_CPU_AREA
+ def_bool y
+
config HAVE_SETUP_PER_CPU_AREA
def_bool y

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9fb344d..e799536 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -46,6 +46,9 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
bool
default y

+config HAVE_LEGACY_PER_CPU_AREA
+ def_bool PPC64
+
config HAVE_SETUP_PER_CPU_AREA
def_bool PPC64

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a14dba0..f4a3cc6 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -75,6 +75,9 @@ config VIRT_CPU_ACCOUNTING
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
def_bool y

+config HAVE_LEGACY_PER_CPU_AREA
+ def_bool y
+
mainmenu "Linux Kernel Configuration"

config S390
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 3f8b6a9..7a8698b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -92,6 +92,9 @@ config AUDIT_ARCH
bool
default y

+config HAVE_LEGACY_PER_CPU_AREA
+ def_bool y if SPARC64
+
config HAVE_SETUP_PER_CPU_AREA
def_bool y if SPARC64

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 356d2ec..b973f4a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -146,9 +146,6 @@ config ARCH_HAS_CACHE_LINE_SIZE
config HAVE_SETUP_PER_CPU_AREA
def_bool y

-config HAVE_DYNAMIC_PER_CPU_AREA
- def_bool y
-
config HAVE_CPUMASK_OF_CPU_MAP
def_bool X86_64_SMP

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 26fd9d1..e500034 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -34,7 +34,7 @@

#ifdef CONFIG_SMP

-#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA

/* minimum unit size, also is the maximum supported allocation size */
#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10)
@@ -80,7 +80,7 @@ extern ssize_t __init pcpu_embed_first_chunk(

extern void *__alloc_reserved_percpu(size_t size, size_t align);

-#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#else /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */

struct percpu_data {
void *ptrs[1];
@@ -99,11 +99,15 @@ struct percpu_data {
(__typeof__(ptr))__p->ptrs[(cpu)]; \
})

-#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */

extern void *__alloc_percpu(size_t size, size_t align);
extern void free_percpu(void *__pdata);

+#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
+extern void __init setup_per_cpu_areas(void);
+#endif
+
#else /* CONFIG_SMP */

#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
@@ -124,6 +128,8 @@ static inline void free_percpu(void *p)
kfree(p);
}

+static inline void __init setup_per_cpu_areas(void) { }
+
#endif /* CONFIG_SMP */

#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \
diff --git a/init/main.c b/init/main.c
index f6204f7..7b87bd1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -356,7 +356,6 @@ static void __init smp_init(void)
#define smp_init() do { } while (0)
#endif

-static inline void setup_per_cpu_areas(void) { }
static inline void setup_nr_cpu_ids(void) { }
static inline void smp_prepare_cpus(unsigned int maxcpus) { }

@@ -377,29 +376,6 @@ static void __init setup_nr_cpu_ids(void)
nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
}

-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-
-EXPORT_SYMBOL(__per_cpu_offset);
-
-static void __init setup_per_cpu_areas(void)
-{
- unsigned long size, i;
- char *ptr;
- unsigned long nr_possible_cpus = num_possible_cpus();
-
- /* Copy section for each CPU (we discard the original) */
- size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
- ptr = alloc_bootmem_pages(size * nr_possible_cpus);
-
- for_each_possible_cpu(i) {
- __per_cpu_offset[i] = ptr - __per_cpu_start;
- memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
- ptr += size;
- }
-}
-#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
-
/* Called by boot processor to activate the rest. */
static void __init smp_init(void)
{
diff --git a/kernel/module.c b/kernel/module.c
index 215aaab..9f8631c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -364,7 +364,7 @@ EXPORT_SYMBOL_GPL(find_module);

#ifdef CONFIG_SMP

-#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA

static void *percpu_modalloc(unsigned long size, unsigned long align,
const char *name)
@@ -389,7 +389,7 @@ static void percpu_modfree(void *freeme)
free_percpu(freeme);
}

-#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */

/* Number of blocks used and allocated. */
static unsigned int pcpu_num_used, pcpu_num_allocated;
@@ -535,7 +535,7 @@ static int percpu_modinit(void)
}
__initcall(percpu_modinit);

-#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */

static unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
diff --git a/mm/Makefile b/mm/Makefile
index e89acb0..71d9fa8 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
-ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
obj-$(CONFIG_SMP) += percpu.o
else
obj-$(CONFIG_SMP) += allocpercpu.o
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index dfdee6a..df34cea 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -5,6 +5,8 @@
*/
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <asm/sections.h>

#ifndef cache_line_size
#define cache_line_size() L1_CACHE_BYTES
@@ -147,3 +149,29 @@ void free_percpu(void *__pdata)
kfree(__percpu_disguise(__pdata));
}
EXPORT_SYMBOL_GPL(free_percpu);
+
+/*
+ * Generic percpu area setup.
+ */
+#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+
+EXPORT_SYMBOL(__per_cpu_offset);
+
+void __init setup_per_cpu_areas(void)
+{
+ unsigned long size, i;
+ char *ptr;
+ unsigned long nr_possible_cpus = num_possible_cpus();
+
+ /* Copy section for each CPU (we discard the original) */
+ size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
+ ptr = alloc_bootmem_pages(size * nr_possible_cpus);
+
+ for_each_possible_cpu(i) {
+ __per_cpu_offset[i] = ptr - __per_cpu_start;
+ memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+ ptr += size;
+ }
+}
+#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
diff --git a/mm/percpu.c b/mm/percpu.c
index d06f474..01a6fd9 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -43,7 +43,7 @@
*
* To use this allocator, arch code should do the followings.
*
- * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+ * - drop CONFIG_HAVE_LEGACY_PER_CPU_AREA
*
* - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
* regular address to percpu pointer and back if they need to be
@@ -1270,3 +1270,41 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
reserved_size, dyn_size,
pcpue_unit_size, pcpue_ptr, NULL);
}
+
+/*
+ * Generic percpu area setup.
+ *
+ * The embedding helper is used because its behavior closely resembles
+ * the original non-dynamic generic percpu area setup. This is
+ * important because many archs have addressing restrictions and might
+ * fail if the percpu area is located far away from the previous
+ * location. As an added bonus, in non-NUMA cases, embedding is
+ * generally a good idea TLB-wise because percpu area can piggy back
+ * on the physical linear memory mapping which uses large page
+ * mappings on applicable archs.
+ */
+#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+void __init setup_per_cpu_areas(void)
+{
+ size_t static_size = __per_cpu_end - __per_cpu_start;
+ ssize_t unit_size;
+ unsigned long delta;
+ unsigned int cpu;
+
+ /*
+ * Always reserve area for module percpu variables. That's
+ * what the legacy allocator did.
+ */
+ unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE,
+ PERCPU_DYNAMIC_RESERVE, -1);
+ if (unit_size < 0)
+ panic("Failed to initialized percpu areas.");
+
+ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+ for_each_possible_cpu(cpu)
+ __per_cpu_offset[cpu] = delta + cpu * unit_size;
+}
+#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
--
1.6.0.2

2009-06-17 12:41:20

by James Bottomley

[permalink] [raw]

Subject: Re: [PATCH 2/9] percpu: use dynamic percpu allocator as the default percpu allocator

On Wed, 2009-06-17 at 12:40 +0900, Tejun Heo wrote:
>
> The following architectures are affected by this change.
>
> * sh
> * arm
> * cris
> * mips
> * sparc(32)
> * blackfin
> * avr32
> * parisc (broken, under investigation)

I haven't seen anything of this on the parisc list ... what's the
problem? and do you need systems or any other help to fix it?

James

2009-06-17 13:16:32

by Tejun Heo

[permalink] [raw]

Subject: Re: [PATCH 2/9] percpu: use dynamic percpu allocator as the default percpu allocator

James Bottomley wrote:
> On Wed, 2009-06-17 at 12:40 +0900, Tejun Heo wrote:
>> The following architectures are affected by this change.
>>
>> * sh
>> * arm
>> * cris
>> * mips
>> * sparc(32)
>> * blackfin
>> * avr32
>> * parisc (broken, under investigation)
>
> I haven't seen anything of this on the parisc list ... what's the
> problem? and do you need systems or any other help to fix it?

I really don't know what's wrong as nothing should be different from
the old percpu allocator as seen from the arch code.

Kyle reported that the system didn't boot with the patch applied.
Access to a system would be nice but boot debugging usually is done
much easier with local access, so I was hoping he would follow up.
Kyle, do you have any details?

Thanks.

--
tejun

2009-06-17 13:33:18

by Kyle McMartin

[permalink] [raw]

Subject: Re: [PATCH 2/9] percpu: use dynamic percpu allocator as the default percpu allocator

On Wed, Jun 17, 2009 at 08:40:59AM -0400, James Bottomley wrote:
> > * parisc (broken, under investigation)
>
> I haven't seen anything of this on the parisc list ... what's the
> problem? and do you need systems or any other help to fix it?
>

Ah, maybe that was only on linux-kernel or in private mail; we're
oops-ing again in the oops-handler... I almost had this debugged last
weekend, but ran out of time.

2009-06-17 16:58:36

by Christoph Lameter

[permalink] [raw]

Subject: Re: [PATCH 4/9] percpu: cleanup percpu array definitions

Reviewed-by: Christoph Lameter <[email protected]>

2009-06-17 17:00:57

by Christoph Lameter

[permalink] [raw]

Subject: Re: [PATCH 5/9] percpu: clean up percpu variable definitions

The wrapping fixes could have been put in the earlier patch.

Reviewed-by: Christoph Lameter <[email protected]>

2009-06-18 07:26:35

by Tejun Heo

[permalink] [raw]

Subject: [UPDATED PATCH 3/9] CRIS: Change DEFINE_PER_CPU of current_pgd to be non volatile.

From: Jesper Nilsson <[email protected]>
Subject: CRIS: Change DEFINE_PER_CPU of current_pgd to be non volatile.

The DEFINE_PER_CPU of current_pgd was on CRIS defined using volatile,
which is not needed. Remove volatile.

Tested on an ARTPEC-3 (CRISv32) board.

tj: extern DEFINE_PER_CPU() replaced with DECLARE_PER_CPU()

[ Impact: code cleanup ]

Signed-off-by: Jesper Nilsson <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
---
Updated such that extern DEFINE -> DECLARE change is done here not
later in the series as suggested by Christoph Lameter.

arch/cris/include/asm/mmu_context.h | 3 ++-
arch/cris/mm/fault.c | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)

Index: work/arch/cris/include/asm/mmu_context.h
===================================================================
--- work.orig/arch/cris/include/asm/mmu_context.h
+++ work/arch/cris/include/asm/mmu_context.h
@@ -17,7 +17,8 @@ extern void switch_mm(struct mm_struct *
* registers like cr3 on the i386
*/

-extern volatile DEFINE_PER_CPU(pgd_t *,current_pgd); /* defined in arch/cris/mm/fault.c */
+/* defined in arch/cris/mm/fault.c */
+DECLARE_PER_CPU(pgd_t *, current_pgd);

static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
Index: work/arch/cris/mm/fault.c
===================================================================
--- work.orig/arch/cris/mm/fault.c
+++ work/arch/cris/mm/fault.c
@@ -29,7 +29,7 @@ extern void die_if_kernel(const char *,

/* current active page directory */

-volatile DEFINE_PER_CPU(pgd_t *,current_pgd);
+DEFINE_PER_CPU(pgd_t *, current_pgd);
unsigned long cris_signal_return_page;

/*

2009-06-18 07:36:46

by Tejun Heo

[permalink] [raw]

Subject: [PATCH 5/9 UPDATED] percpu: clean up percpu variable definitions

Percpu variable definition is about to be updated such that no static
declaration is allowed. Update percpu variable definitions
accordingly.

* as,cfq: rename ioc_count uniquely

* cpufreq: rename cpu_dbs_info uniquely

* xen: move nesting_count out of xen_evtchn_do_upcall() and rename it

* mm: move ratelimits out of balance_dirty_pages_ratelimited_nr() and
rename it

* ipv4,6: rename cookie_scratch uniquely

* x86 pref_counter: rename prev_left to pmc_prev_left, irq_entry to
pmc_irq_entry and nmi_entry to pmc_nmi_entry

* perf_counter: rename disable_count to perf_disable_count

* ftrace: rename test_event_disable to ftrace_test_event_disable

* kmemleak: rename test_pointer to kmemleak_test_pointer

[ Impact: percpu usage cleanups, no duplicate static percpu var names ]

Signed-off-by: Tejun Heo <[email protected]>
Reviewed-by: Christoph Lameter <[email protected]>
Cc: Ivan Kokshaysky <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Dave Jones <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Cc: linux-mm <[email protected]>
Cc: David S. Miller <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Catalin Marinas <[email protected]>
---
Updates for newly added percpu variables in perf_counter, kmemleak and
ftrace added. Note that these changes were accidentally put in the
next patch in the original posting. The end result remains the same.

arch/x86/kernel/cpu/perf_counter.c | 14 +++++++-------
block/as-iosched.c | 10 +++++-----
block/cfq-iosched.c | 10 +++++-----
drivers/cpufreq/cpufreq_conservative.c | 12 ++++++------
drivers/cpufreq/cpufreq_ondemand.c | 15 ++++++++-------
drivers/xen/events.c | 9 +++++----
kernel/perf_counter.c | 6 +++---
kernel/trace/trace_events.c | 6 +++---
mm/kmemleak-test.c | 6 +++---
mm/page-writeback.c | 5 +++--
net/ipv4/syncookies.c | 5 +++--
net/ipv6/syncookies.c | 5 +++--
12 files changed, 54 insertions(+), 49 deletions(-)

Index: work/block/as-iosched.c
===================================================================
--- work.orig/block/as-iosched.c
+++ work/block/as-iosched.c
@@ -146,7 +146,7 @@ enum arq_state {
#define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2)
#define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state)

-static DEFINE_PER_CPU(unsigned long, ioc_count);
+static DEFINE_PER_CPU(unsigned long, as_ioc_count);
static struct completion *ioc_gone;
static DEFINE_SPINLOCK(ioc_gone_lock);

@@ -161,7 +161,7 @@ static void as_antic_stop(struct as_data
static void free_as_io_context(struct as_io_context *aic)
{
kfree(aic);
- elv_ioc_count_dec(ioc_count);
+ elv_ioc_count_dec(as_ioc_count);
if (ioc_gone) {
/*
* AS scheduler is exiting, grab exit lock and check
@@ -169,7 +169,7 @@ static void free_as_io_context(struct as
* complete ioc_gone and set it back to NULL.
*/
spin_lock(&ioc_gone_lock);
- if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+ if (ioc_gone && !elv_ioc_count_read(as_ioc_count)) {
complete(ioc_gone);
ioc_gone = NULL;
}
@@ -211,7 +211,7 @@ static struct as_io_context *alloc_as_io
ret->seek_total = 0;
ret->seek_samples = 0;
ret->seek_mean = 0;
- elv_ioc_count_inc(ioc_count);
+ elv_ioc_count_inc(as_ioc_count);
}

return ret;
@@ -1507,7 +1507,7 @@ static void __exit as_exit(void)
ioc_gone = &all_gone;
/* ioc_gone's update must be visible before reading ioc_count */
smp_wmb();
- if (elv_ioc_count_read(ioc_count))
+ if (elv_ioc_count_read(as_ioc_count))
wait_for_completion(&all_gone);
synchronize_rcu();
}
Index: work/block/cfq-iosched.c
===================================================================
--- work.orig/block/cfq-iosched.c
+++ work/block/cfq-iosched.c
@@ -48,7 +48,7 @@ static int cfq_slice_idle = HZ / 125;
static struct kmem_cache *cfq_pool;
static struct kmem_cache *cfq_ioc_pool;

-static DEFINE_PER_CPU(unsigned long, ioc_count);
+static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
static struct completion *ioc_gone;
static DEFINE_SPINLOCK(ioc_gone_lock);

@@ -1422,7 +1422,7 @@ static void cfq_cic_free_rcu(struct rcu_
cic = container_of(head, struct cfq_io_context, rcu_head);

kmem_cache_free(cfq_ioc_pool, cic);
- elv_ioc_count_dec(ioc_count);
+ elv_ioc_count_dec(cfq_ioc_count);

if (ioc_gone) {
/*
@@ -1431,7 +1431,7 @@ static void cfq_cic_free_rcu(struct rcu_
* complete ioc_gone and set it back to NULL
*/
spin_lock(&ioc_gone_lock);
- if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+ if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) {
complete(ioc_gone);
ioc_gone = NULL;
}
@@ -1557,7 +1557,7 @@ cfq_alloc_io_context(struct cfq_data *cf
INIT_HLIST_NODE(&cic->cic_list);
cic->dtor = cfq_free_io_context;
cic->exit = cfq_exit_io_context;
- elv_ioc_count_inc(ioc_count);
+ elv_ioc_count_inc(cfq_ioc_count);
}

return cic;
@@ -2658,7 +2658,7 @@ static void __exit cfq_exit(void)
* this also protects us from entering cfq_slab_kill() with
* pending RCU callbacks
*/
- if (elv_ioc_count_read(ioc_count))
+ if (elv_ioc_count_read(cfq_ioc_count))
wait_for_completion(&all_gone);
cfq_slab_kill();
}
Index: work/drivers/cpufreq/cpufreq_conservative.c
===================================================================
--- work.orig/drivers/cpufreq/cpufreq_conservative.c
+++ work/drivers/cpufreq/cpufreq_conservative.c
@@ -80,7 +80,7 @@ struct cpu_dbs_info_s {
int cpu;
unsigned int enable:1;
};
-static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
+static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info);

static unsigned int dbs_enable; /* number of CPUs using this policy */

@@ -153,7 +153,7 @@ dbs_cpufreq_notifier(struct notifier_blo
void *data)
{
struct cpufreq_freqs *freq = data;
- struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info,
+ struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cs_cpu_dbs_info,
freq->cpu);

struct cpufreq_policy *policy;
@@ -326,7 +326,7 @@ static ssize_t store_ignore_nice_load(st
/* we need to re-evaluate prev_cpu_idle */
for_each_online_cpu(j) {
struct cpu_dbs_info_s *dbs_info;
- dbs_info = &per_cpu(cpu_dbs_info, j);
+ dbs_info = &per_cpu(cs_cpu_dbs_info, j);
dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&dbs_info->prev_cpu_wall);
if (dbs_tuners_ins.ignore_nice)
@@ -416,7 +416,7 @@ static void dbs_check_cpu(struct cpu_dbs
cputime64_t cur_wall_time, cur_idle_time;
unsigned int idle_time, wall_time;

- j_dbs_info = &per_cpu(cpu_dbs_info, j);
+ j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);

cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);

@@ -556,7 +556,7 @@ static int cpufreq_governor_dbs(struct c
unsigned int j;
int rc;

- this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
+ this_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu);

switch (event) {
case CPUFREQ_GOV_START:
@@ -576,7 +576,7 @@ static int cpufreq_governor_dbs(struct c

for_each_cpu(j, policy->cpus) {
struct cpu_dbs_info_s *j_dbs_info;
- j_dbs_info = &per_cpu(cpu_dbs_info, j);
+ j_dbs_info = &per_cpu(cs_cpu_dbs_info, j);
j_dbs_info->cur_policy = policy;

j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
Index: work/drivers/cpufreq/cpufreq_ondemand.c
===================================================================
--- work.orig/drivers/cpufreq/cpufreq_ondemand.c
+++ work/drivers/cpufreq/cpufreq_ondemand.c
@@ -87,7 +87,7 @@ struct cpu_dbs_info_s {
unsigned int enable:1,
sample_type:1;
};
-static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
+static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);

static unsigned int dbs_enable; /* number of CPUs using this policy */

@@ -165,7 +165,8 @@ static unsigned int powersave_bias_targe
unsigned int freq_hi, freq_lo;
unsigned int index = 0;
unsigned int jiffies_total, jiffies_hi, jiffies_lo;
- struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu);
+ struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
+ policy->cpu);

if (!dbs_info->freq_table) {
dbs_info->freq_lo = 0;
@@ -210,7 +211,7 @@ static void ondemand_powersave_bias_init
{
int i;
for_each_online_cpu(i) {
- struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i);
+ struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, i);
dbs_info->freq_table = cpufreq_frequency_get_table(i);
dbs_info->freq_lo = 0;
}
@@ -325,7 +326,7 @@ static ssize_t store_ignore_nice_load(st
/* we need to re-evaluate prev_cpu_idle */
for_each_online_cpu(j) {
struct cpu_dbs_info_s *dbs_info;
- dbs_info = &per_cpu(cpu_dbs_info, j);
+ dbs_info = &per_cpu(od_cpu_dbs_info, j);
dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&dbs_info->prev_cpu_wall);
if (dbs_tuners_ins.ignore_nice)
@@ -419,7 +420,7 @@ static void dbs_check_cpu(struct cpu_dbs
unsigned int load, load_freq;
int freq_avg;

- j_dbs_info = &per_cpu(cpu_dbs_info, j);
+ j_dbs_info = &per_cpu(od_cpu_dbs_info, j);

cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);

@@ -576,7 +577,7 @@ static int cpufreq_governor_dbs(struct c
unsigned int j;
int rc;

- this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
+ this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);

switch (event) {
case CPUFREQ_GOV_START:
@@ -598,7 +599,7 @@ static int cpufreq_governor_dbs(struct c

for_each_cpu(j, policy->cpus) {
struct cpu_dbs_info_s *j_dbs_info;
- j_dbs_info = &per_cpu(cpu_dbs_info, j);
+ j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
j_dbs_info->cur_policy = policy;

j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
Index: work/drivers/xen/events.c
===================================================================
--- work.orig/drivers/xen/events.c
+++ work/drivers/xen/events.c
@@ -602,6 +602,8 @@ irqreturn_t xen_debug_interrupt(int irq,
return IRQ_HANDLED;
}

+static DEFINE_PER_CPU(unsigned, xed_nesting_count);
+
/*
* Search the CPUs pending events bitmasks. For each one found, map
* the event number to an irq, and feed it into do_IRQ() for
@@ -617,7 +619,6 @@ void xen_evtchn_do_upcall(struct pt_regs
struct pt_regs *old_regs = set_irq_regs(regs);
struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
- static DEFINE_PER_CPU(unsigned, nesting_count);
unsigned count;

exit_idle();
@@ -628,7 +629,7 @@ void xen_evtchn_do_upcall(struct pt_regs

vcpu_info->evtchn_upcall_pending = 0;

- if (__get_cpu_var(nesting_count)++)
+ if (__get_cpu_var(xed_nesting_count)++)
goto out;

#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
@@ -653,8 +654,8 @@ void xen_evtchn_do_upcall(struct pt_regs

BUG_ON(!irqs_disabled());

- count = __get_cpu_var(nesting_count);
- __get_cpu_var(nesting_count) = 0;
+ count = __get_cpu_var(xed_nesting_count);
+ __get_cpu_var(xed_nesting_count) = 0;
} while(count != 1);

out:
Index: work/mm/page-writeback.c
===================================================================
--- work.orig/mm/page-writeback.c
+++ work/mm/page-writeback.c
@@ -606,6 +606,8 @@ void set_page_dirty_balance(struct page
}
}

+static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0;
+
/**
* balance_dirty_pages_ratelimited_nr - balance dirty memory state
* @mapping: address_space which was dirtied
@@ -623,7 +625,6 @@ void set_page_dirty_balance(struct page
void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
unsigned long nr_pages_dirtied)
{
- static DEFINE_PER_CPU(unsigned long, ratelimits) = 0;
unsigned long ratelimit;
unsigned long *p;

@@ -636,7 +637,7 @@ void balance_dirty_pages_ratelimited_nr(
* tasks in balance_dirty_pages(). Period.
*/
preempt_disable();
- p = &__get_cpu_var(ratelimits);
+ p = &__get_cpu_var(bdp_ratelimits);
*p += nr_pages_dirtied;
if (unlikely(*p >= ratelimit)) {
*p = 0;
Index: work/net/ipv4/syncookies.c
===================================================================
--- work.orig/net/ipv4/syncookies.c
+++ work/net/ipv4/syncookies.c
@@ -37,12 +37,13 @@ __initcall(init_syncookies);
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)

-static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], cookie_scratch);
+static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
+ ipv4_cookie_scratch);

static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
u32 count, int c)
{
- __u32 *tmp = __get_cpu_var(cookie_scratch);
+ __u32 *tmp = __get_cpu_var(ipv4_cookie_scratch);

memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c]));
tmp[0] = (__force u32)saddr;
Index: work/net/ipv6/syncookies.c
===================================================================
--- work.orig/net/ipv6/syncookies.c
+++ work/net/ipv6/syncookies.c
@@ -74,12 +74,13 @@ static inline struct sock *get_cookie_so
return child;
}

-static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], cookie_scratch);
+static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
+ ipv6_cookie_scratch);

static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr,
__be16 sport, __be16 dport, u32 count, int c)
{
- __u32 *tmp = __get_cpu_var(cookie_scratch);
+ __u32 *tmp = __get_cpu_var(ipv6_cookie_scratch);

/*
* we have 320 bits of information to hash, copy in the remaining
Index: work/arch/x86/kernel/cpu/perf_counter.c
===================================================================
--- work.orig/arch/x86/kernel/cpu/perf_counter.c
+++ work/arch/x86/kernel/cpu/perf_counter.c
@@ -861,7 +861,7 @@ amd_pmu_disable_counter(struct hw_perf_c
x86_pmu_disable_counter(hwc, idx);
}

-static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
+static DEFINE_PER_CPU(u64, pmc_prev_left[X86_PMC_IDX_MAX]);

/*
* Set the next IRQ period, based on the hwc->period_left value.
@@ -900,7 +900,7 @@ x86_perf_counter_set_period(struct perf_
if (left > x86_pmu.max_period)
left = x86_pmu.max_period;

- per_cpu(prev_left[idx], smp_processor_id()) = left;
+ per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;

/*
* The hw counter starts counting from this counter offset,
@@ -1088,7 +1088,7 @@ void perf_counter_print_debug(void)
rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
rdmsrl(x86_pmu.perfctr + idx, pmc_count);

- prev_left = per_cpu(prev_left[idx], cpu);
+ prev_left = per_cpu(pmc_prev_left[idx], cpu);

pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
cpu, idx, pmc_ctrl);
@@ -1560,8 +1560,8 @@ void callchain_store(struct perf_callcha
entry->ip[entry->nr++] = ip;
}

-static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
-static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
+static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
+static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);

static void
@@ -1696,9 +1696,9 @@ struct perf_callchain_entry *perf_callch
struct perf_callchain_entry *entry;

if (in_nmi())
- entry = &__get_cpu_var(nmi_entry);
+ entry = &__get_cpu_var(pmc_nmi_entry);
else
- entry = &__get_cpu_var(irq_entry);
+ entry = &__get_cpu_var(pmc_irq_entry);

entry->nr = 0;
entry->hv = 0;
Index: work/kernel/perf_counter.c
===================================================================
--- work.orig/kernel/perf_counter.c
+++ work/kernel/perf_counter.c
@@ -98,16 +98,16 @@ hw_perf_group_sched_in(struct perf_count

void __weak perf_counter_print_debug(void) { }

-static DEFINE_PER_CPU(int, disable_count);
+static DEFINE_PER_CPU(int, perf_disable_count);

void __perf_disable(void)
{
- __get_cpu_var(disable_count)++;
+ __get_cpu_var(perf_disable_count)++;
}

bool __perf_enable(void)
{
- return !--__get_cpu_var(disable_count);
+ return !--__get_cpu_var(perf_disable_count);
}

void perf_disable(void)
Index: work/kernel/trace/trace_events.c
===================================================================
--- work.orig/kernel/trace/trace_events.c
+++ work/kernel/trace/trace_events.c
@@ -1318,7 +1318,7 @@ static __init void event_trace_self_test

#ifdef CONFIG_FUNCTION_TRACER

-static DEFINE_PER_CPU(atomic_t, test_event_disable);
+static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);

static void
function_test_events_call(unsigned long ip, unsigned long parent_ip)
@@ -1334,7 +1334,7 @@ function_test_events_call(unsigned long
pc = preempt_count();
resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
- disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu));
+ disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));

if (disabled != 1)
goto out;
@@ -1352,7 +1352,7 @@ function_test_events_call(unsigned long
trace_nowake_buffer_unlock_commit(event, flags, pc);

out:
- atomic_dec(&per_cpu(test_event_disable, cpu));
+ atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
ftrace_preempt_enable(resched);
}

Index: work/mm/kmemleak-test.c
===================================================================
--- work.orig/mm/kmemleak-test.c
+++ work/mm/kmemleak-test.c
@@ -36,7 +36,7 @@ struct test_node {
};

static LIST_HEAD(test_list);
-static DEFINE_PER_CPU(void *, test_pointer);
+static DEFINE_PER_CPU(void *, kmemleak_test_pointer);

/*
* Some very simple testing. This function needs to be extended for
@@ -86,9 +86,9 @@ static int __init kmemleak_test_init(voi
}

for_each_possible_cpu(i) {
- per_cpu(test_pointer, i) = kmalloc(129, GFP_KERNEL);
+ per_cpu(kmemleak_test_pointer, i) = kmalloc(129, GFP_KERNEL);
pr_info("kmemleak: kmalloc(129) = %p\n",
- per_cpu(test_pointer, i));
+ per_cpu(kmemleak_test_pointer, i));
}

return 0;