2005-12-05 23:59:41

by Ravikiran G Thirumalai

[permalink] [raw]
Subject: [patch 1/2] Change maxaligned_in_smp alignemnt macros to internodealigned_in_smp macros

Andrew,
The following 2 patches changes maxaligned macros to internodealigned
macros, and kills L1_CACHE_SHIFT_MAX. Please consider for to -mm.

Thanks,
Kiran

---

____cacheline_maxaligned_in_smp is currently used to align critical
structures and avoid false sharing. It uses per-arch L1_CACHE_SHIFT_MAX
and people find L1_CACHE_SHIFT_MAX useless.

However, we have been using ____cacheline_maxaligned_in_smp to align
structures on the internode cacheline size. As per Andi's suggestion,
following patch kills ____cacheline_maxaligned_in_smp and
introduces INTERNODE_CACHE_SHIFT, which defaults to L1_CACHE_SHIFT
for all arches. Arches needing L3/Internode cacheline alignment
can define INTERNODE_CACHE_SHIFT in the arch asm/cache.h.
Patch replaces ____cacheline_maxaligned_in_smp with
____cacheline_internodealigned_in_smp

With this patch, L1_CACHE_SHIFT_MAX can be killed

Signed-off-by: Ravikiran Thirumalai <[email protected]>
Signed-off-by: Shai Fultheim <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>


Index: linux-2.6.15-rc5mm1/arch/i386/kernel/init_task.c
===================================================================
--- linux-2.6.15-rc5mm1.orig/arch/i386/kernel/init_task.c 2005-10-27 17:02:08.000000000 -0700
+++ linux-2.6.15-rc5mm1/arch/i386/kernel/init_task.c 2005-12-05 10:40:04.000000000 -0800
@@ -42,5 +42,5 @@
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
* no more per-task TSS's.
*/
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS;
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;

Index: linux-2.6.15-rc5mm1/arch/i386/kernel/irq.c
===================================================================
--- linux-2.6.15-rc5mm1.orig/arch/i386/kernel/irq.c 2005-12-05 10:29:31.000000000 -0800
+++ linux-2.6.15-rc5mm1/arch/i386/kernel/irq.c 2005-12-05 10:40:04.000000000 -0800
@@ -19,7 +19,7 @@
#include <linux/cpu.h>
#include <linux/delay.h>

-DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
+DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);

#ifndef CONFIG_X86_LOCAL_APIC
Index: linux-2.6.15-rc5mm1/arch/x86_64/kernel/init_task.c
===================================================================
--- linux-2.6.15-rc5mm1.orig/arch/x86_64/kernel/init_task.c 2005-10-27 17:02:08.000000000 -0700
+++ linux-2.6.15-rc5mm1/arch/x86_64/kernel/init_task.c 2005-12-05 10:40:04.000000000 -0800
@@ -44,6 +44,6 @@
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS;
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;

#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
Index: linux-2.6.15-rc5mm1/include/linux/cache.h
===================================================================
--- linux-2.6.15-rc5mm1.orig/include/linux/cache.h 2005-10-27 17:02:08.000000000 -0700
+++ linux-2.6.15-rc5mm1/include/linux/cache.h 2005-12-05 10:40:04.000000000 -0800
@@ -45,12 +45,21 @@
#endif /* CONFIG_SMP */
#endif

-#if !defined(____cacheline_maxaligned_in_smp)
+/*
+ * The maximum alignment needed for some critical structures
+ * These could be inter-node cacheline sizes/L3 cacheline
+ * size etc. Define this in asm/cache.h for your arch
+ */
+#ifndef INTERNODE_CACHE_SHIFT
+#define INTERNODE_CACHE_SHIFT L1_CACHE_SHIFT
+#endif
+
+#if !defined(____cacheline_internodealigned_in_smp)
#if defined(CONFIG_SMP)
-#define ____cacheline_maxaligned_in_smp \
- __attribute__((__aligned__(1 << (L1_CACHE_SHIFT_MAX))))
+#define ____cacheline_internodealigned_in_smp \
+ __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT))))
#else
-#define ____cacheline_maxaligned_in_smp
+#define ____cacheline_internodealigned_in_smp
#endif
#endif

Index: linux-2.6.15-rc5mm1/include/linux/ide.h
===================================================================
--- linux-2.6.15-rc5mm1.orig/include/linux/ide.h 2005-12-05 10:29:32.000000000 -0800
+++ linux-2.6.15-rc5mm1/include/linux/ide.h 2005-12-05 10:40:04.000000000 -0800
@@ -922,7 +922,7 @@
unsigned dma;

void (*led_act)(void *data, int rw);
-} ____cacheline_maxaligned_in_smp ide_hwif_t;
+} ____cacheline_internodealigned_in_smp ide_hwif_t;

/*
* internal ide interrupt handler type
Index: linux-2.6.15-rc5mm1/include/linux/mmzone.h
===================================================================
--- linux-2.6.15-rc5mm1.orig/include/linux/mmzone.h 2005-12-05 10:29:32.000000000 -0800
+++ linux-2.6.15-rc5mm1/include/linux/mmzone.h 2005-12-05 10:40:04.000000000 -0800
@@ -38,7 +38,7 @@
#if defined(CONFIG_SMP)
struct zone_padding {
char x[0];
-} ____cacheline_maxaligned_in_smp;
+} ____cacheline_internodealigned_in_smp;
#define ZONE_PADDING(name) struct zone_padding name;
#else
#define ZONE_PADDING(name)
@@ -233,7 +233,7 @@
* rarely used fields:
*/
char *name;
-} ____cacheline_maxaligned_in_smp;
+} ____cacheline_internodealigned_in_smp;


/*
Index: linux-2.6.15-rc5mm1/include/linux/rcupdate.h
===================================================================
--- linux-2.6.15-rc5mm1.orig/include/linux/rcupdate.h 2005-12-05 10:29:32.000000000 -0800
+++ linux-2.6.15-rc5mm1/include/linux/rcupdate.h 2005-12-05 10:40:04.000000000 -0800
@@ -65,7 +65,7 @@
long cur; /* Current batch number. */
long completed; /* Number of the last completed batch */
int next_pending; /* Is the next batch already waiting? */
-} ____cacheline_maxaligned_in_smp;
+} ____cacheline_internodealigned_in_smp;

/* Is batch a before batch b ? */
static inline int rcu_batch_before(long a, long b)
Index: linux-2.6.15-rc5mm1/kernel/rcupdate.c
===================================================================
--- linux-2.6.15-rc5mm1.orig/kernel/rcupdate.c 2005-12-05 10:29:33.000000000 -0800
+++ linux-2.6.15-rc5mm1/kernel/rcupdate.c 2005-12-05 10:40:04.000000000 -0800
@@ -61,9 +61,9 @@
/* for current batch to proceed. */
};

-static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
+static struct rcu_state rcu_state ____cacheline_internodealigned_in_smp =
{.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
-static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
+static struct rcu_state rcu_bh_state ____cacheline_internodealigned_in_smp =
{.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };

DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
Index: linux-2.6.15-rc5mm1/mm/sparse.c
===================================================================
--- linux-2.6.15-rc5mm1.orig/mm/sparse.c 2005-12-05 10:29:15.000000000 -0800
+++ linux-2.6.15-rc5mm1/mm/sparse.c 2005-12-05 10:40:04.000000000 -0800
@@ -18,10 +18,10 @@
*/
#ifdef CONFIG_SPARSEMEM_EXTREME
struct mem_section *mem_section[NR_SECTION_ROOTS]
- ____cacheline_maxaligned_in_smp;
+ ____cacheline_internodealigned_in_smp;
#else
struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
- ____cacheline_maxaligned_in_smp;
+ ____cacheline_internodealigned_in_smp;
#endif
EXPORT_SYMBOL(mem_section);


2005-12-06 00:01:04

by Ravikiran G Thirumalai

[permalink] [raw]
Subject: [patch 2/2] Kill L1_CACHE_SHIFT_MAX

Patch to kill L1_CACHE_SHIFT from all arches.
Since L1_CACHE_SHIFT_MAX is not used anymore with the introduction
of INTERNODE_CACHE, kill L1_CACHE_SHIFT_MAX.

Signed-off-by: Ravikiran Thirumalai <[email protected]>
Signed-off-by: Shai Fultheim <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

Index: linux-2.6.15-rc4/include/asm-alpha/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-alpha/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-alpha/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -20,6 +20,5 @@

#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
#define SMP_CACHE_BYTES L1_CACHE_BYTES
-#define L1_CACHE_SHIFT_MAX L1_CACHE_SHIFT

#endif
Index: linux-2.6.15-rc4/include/asm-arm/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-arm/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-arm/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -7,9 +7,4 @@
#define L1_CACHE_SHIFT 5
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)

-/*
- * largest L1 which this arch supports
- */
-#define L1_CACHE_SHIFT_MAX 5
-
#endif
Index: linux-2.6.15-rc4/include/asm-cris/arch-v10/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-cris/arch-v10/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-cris/arch-v10/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -4,6 +4,5 @@
/* Etrax 100LX have 32-byte cache-lines. */
#define L1_CACHE_BYTES 32
#define L1_CACHE_SHIFT 5
-#define L1_CACHE_SHIFT_MAX 5

#endif /* _ASM_ARCH_CACHE_H */
Index: linux-2.6.15-rc4/include/asm-cris/arch-v32/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-cris/arch-v32/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-cris/arch-v32/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -4,6 +4,5 @@
/* A cache-line is 32 bytes. */
#define L1_CACHE_BYTES 32
#define L1_CACHE_SHIFT 5
-#define L1_CACHE_SHIFT_MAX 5

#endif /* _ASM_CRIS_ARCH_CACHE_H */
Index: linux-2.6.15-rc4/include/asm-cris/dma-mapping.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-cris/dma-mapping.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-cris/dma-mapping.h 2005-12-02 16:59:35.000000000 -0800
@@ -153,7 +153,7 @@
static inline int
dma_get_cache_alignment(void)
{
- return (1 << L1_CACHE_SHIFT_MAX);
+ return (1 << INTERNODE_CACHE_SHIFT);
}

#define dma_is_consistent(d) (1)
Index: linux-2.6.15-rc4/include/asm-generic/dma-mapping.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-generic/dma-mapping.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-generic/dma-mapping.h 2005-12-02 16:59:35.000000000 -0800
@@ -274,7 +274,7 @@
{
/* no easy way to get cache size on all processors, so return
* the maximum possible, to be safe */
- return (1 << L1_CACHE_SHIFT_MAX);
+ return (1 << INTERNODE_CACHE_SHIFT);
}

static inline void
Index: linux-2.6.15-rc4/include/asm-i386/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-i386/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-i386/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -10,6 +10,4 @@
#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)

-#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */
-
#endif
Index: linux-2.6.15-rc4/include/asm-ia64/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-ia64/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-ia64/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -12,8 +12,6 @@
#define L1_CACHE_SHIFT CONFIG_IA64_L1_CACHE_SHIFT
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)

-#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */
-
#ifdef CONFIG_SMP
# define SMP_CACHE_SHIFT L1_CACHE_SHIFT
# define SMP_CACHE_BYTES L1_CACHE_BYTES
Index: linux-2.6.15-rc4/include/asm-m32r/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-m32r/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-m32r/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -7,6 +7,4 @@
#define L1_CACHE_SHIFT 4
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)

-#define L1_CACHE_SHIFT_MAX 4
-
#endif /* _ASM_M32R_CACHE_H */
Index: linux-2.6.15-rc4/include/asm-m68k/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-m68k/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-m68k/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -8,6 +8,4 @@
#define L1_CACHE_SHIFT 4
#define L1_CACHE_BYTES (1<< L1_CACHE_SHIFT)

-#define L1_CACHE_SHIFT_MAX 4 /* largest L1 which this arch supports */
-
#endif
Index: linux-2.6.15-rc4/include/asm-mips/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-mips/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-mips/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -15,7 +15,6 @@
#define L1_CACHE_SHIFT CONFIG_MIPS_L1_CACHE_SHIFT
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)

-#define L1_CACHE_SHIFT_MAX 6
#define SMP_CACHE_SHIFT L1_CACHE_SHIFT
#define SMP_CACHE_BYTES L1_CACHE_BYTES

Index: linux-2.6.15-rc4/include/asm-parisc/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-parisc/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-parisc/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -28,7 +28,6 @@
#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))

#define SMP_CACHE_BYTES L1_CACHE_BYTES
-#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */

extern void flush_data_cache_local(void); /* flushes local data-cache only */
extern void flush_instruction_cache_local(void); /* flushes local code-cache only */
Index: linux-2.6.15-rc4/include/asm-powerpc/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-powerpc/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-powerpc/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -19,7 +19,6 @@
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)

#define SMP_CACHE_BYTES L1_CACHE_BYTES
-#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */

#if defined(__powerpc64__) && !defined(__ASSEMBLY__)
struct ppc64_caches {
Index: linux-2.6.15-rc4/include/asm-s390/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-s390/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-s390/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -13,7 +13,6 @@

#define L1_CACHE_BYTES 256
#define L1_CACHE_SHIFT 8
-#define L1_CACHE_SHIFT_MAX 8 /* largest L1 which this arch supports */

#define ARCH_KMALLOC_MINALIGN 8

Index: linux-2.6.15-rc4/include/asm-sh/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-sh/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-sh/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -22,8 +22,6 @@

#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))

-#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */
-
struct cache_info {
unsigned int ways;
unsigned int sets;
Index: linux-2.6.15-rc4/include/asm-sh64/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-sh64/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-sh64/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -20,8 +20,6 @@
#define L1_CACHE_ALIGN_MASK (~(L1_CACHE_BYTES - 1))
#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES - 1)) & L1_CACHE_ALIGN_MASK)
#define L1_CACHE_SIZE_BYTES (L1_CACHE_BYTES << 10)
-/* Largest L1 which this arch supports */
-#define L1_CACHE_SHIFT_MAX 5

#ifdef MODULE
#define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES)))
Index: linux-2.6.15-rc4/include/asm-sparc/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-sparc/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-sparc/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -13,7 +13,6 @@
#define L1_CACHE_SHIFT 5
#define L1_CACHE_BYTES 32
#define L1_CACHE_ALIGN(x) ((((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)))
-#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */

#define SMP_CACHE_BYTES 32

Index: linux-2.6.15-rc4/include/asm-sparc64/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-sparc64/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-sparc64/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -9,7 +9,6 @@
#define L1_CACHE_BYTES 32 /* Two 16-byte sub-blocks per line. */

#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
-#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */

#define SMP_CACHE_BYTES_SHIFT 6
#define SMP_CACHE_BYTES (1 << SMP_CACHE_BYTES_SHIFT) /* L2 cache line size. */
Index: linux-2.6.15-rc4/include/asm-um/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-um/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-um/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -13,9 +13,6 @@
# define L1_CACHE_SHIFT 5
#endif

-/* XXX: this is valid for x86 and x86_64. */
-#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */
-
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)

#endif
Index: linux-2.6.15-rc4/include/asm-v850/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-v850/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-v850/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -23,6 +23,4 @@
#define L1_CACHE_SHIFT 4
#endif

-#define L1_CACHE_SHIFT_MAX L1_CACHE_SHIFT
-
#endif /* __V850_CACHE_H__ */
Index: linux-2.6.15-rc4/include/asm-x86_64/cache.h
===================================================================
--- linux-2.6.15-rc4.orig/include/asm-x86_64/cache.h 2005-12-02 16:58:05.000000000 -0800
+++ linux-2.6.15-rc4/include/asm-x86_64/cache.h 2005-12-02 16:59:35.000000000 -0800
@@ -9,6 +9,5 @@
/* L1 cache line size */
#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
-#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */

#endif