2007-09-21 20:44:52

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [0/45] x86 2.6.24 patches review I


Nothing too exciting. Lots of cleanups and a few bugfixes

Please review.

-Andi


2007-09-21 20:45:24

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [1/45] x86: misc. constifications


From: "Jan Beulich" <[email protected]>
Miscellaneous x86 stuff that can live in .rodata.

Signed-off-by: Jan Beulich <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

arch/i386/kernel/alternative.c | 30 +++++++++++++++---------------
arch/i386/kernel/smpboot.c | 4 ++--
arch/i386/kernel/trampoline.S | 4 ++--
arch/i386/mach-voyager/voyager_smp.c | 4 ++--
arch/x86_64/kernel/setup.c | 4 ++--
arch/x86_64/kernel/smpboot.c | 4 ++--
arch/x86_64/kernel/trampoline.S | 7 ++++++-
7 files changed, 31 insertions(+), 26 deletions(-)

Index: linux/arch/i386/kernel/alternative.c
===================================================================
--- linux.orig/arch/i386/kernel/alternative.c
+++ linux/arch/i386/kernel/alternative.c
@@ -62,11 +62,11 @@ __setup("noreplace-paravirt", setup_nore
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings. */
-asm("\t.data\nintelnops: "
+asm("\t.section .rodata, \"a\"\nintelnops: "
GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
GENERIC_NOP7 GENERIC_NOP8);
-extern unsigned char intelnops[];
-static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
+extern const unsigned char intelnops[];
+static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
NULL,
intelnops,
intelnops + 1,
@@ -80,11 +80,11 @@ static unsigned char *intel_nops[ASM_NOP
#endif

#ifdef K8_NOP1
-asm("\t.data\nk8nops: "
+asm("\t.section .rodata, \"a\"\nk8nops: "
K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
K8_NOP7 K8_NOP8);
-extern unsigned char k8nops[];
-static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
+extern const unsigned char k8nops[];
+static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
NULL,
k8nops,
k8nops + 1,
@@ -98,11 +98,11 @@ static unsigned char *k8_nops[ASM_NOP_MA
#endif

#ifdef K7_NOP1
-asm("\t.data\nk7nops: "
+asm("\t.section .rodata, \"a\"\nk7nops: "
K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
K7_NOP7 K7_NOP8);
-extern unsigned char k7nops[];
-static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
+extern const unsigned char k7nops[];
+static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
NULL,
k7nops,
k7nops + 1,
@@ -118,25 +118,25 @@ static unsigned char *k7_nops[ASM_NOP_MA
#ifdef CONFIG_X86_64

extern char __vsyscall_0;
-static inline unsigned char** find_nop_table(void)
+static inline const unsigned char*const * find_nop_table(void)
{
return k8_nops;
}

#else /* CONFIG_X86_64 */

-static struct nop {
+static const struct nop {
int cpuid;
- unsigned char **noptable;
+ const unsigned char *const *noptable;
} noptypes[] = {
{ X86_FEATURE_K8, k8_nops },
{ X86_FEATURE_K7, k7_nops },
{ -1, NULL }
};

-static unsigned char** find_nop_table(void)
+static const unsigned char*const * find_nop_table(void)
{
- unsigned char **noptable = intel_nops;
+ const unsigned char *const *noptable = intel_nops;
int i;

for (i = 0; noptypes[i].cpuid >= 0; i++) {
@@ -153,7 +153,7 @@ static unsigned char** find_nop_table(vo
/* Use this to add nops to a buffer, then text_poke the whole buffer. */
static void add_nops(void *insns, unsigned int len)
{
- unsigned char **noptable = find_nop_table();
+ const unsigned char *const *noptable = find_nop_table();

while (len > 0) {
unsigned int noplen = len;
Index: linux/arch/i386/kernel/smpboot.c
===================================================================
--- linux.orig/arch/i386/kernel/smpboot.c
+++ linux/arch/i386/kernel/smpboot.c
@@ -102,8 +102,8 @@ u8 apicid_2_node[MAX_APICID];
* Trampoline 80x86 program as an array.
*/

-extern unsigned char trampoline_data [];
-extern unsigned char trampoline_end [];
+extern const unsigned char trampoline_data [];
+extern const unsigned char trampoline_end [];
static unsigned char *trampoline_base;
static int trampoline_exec;

Index: linux/arch/i386/kernel/trampoline.S
===================================================================
--- linux.orig/arch/i386/kernel/trampoline.S
+++ linux/arch/i386/kernel/trampoline.S
@@ -36,11 +36,11 @@
#include <asm/segment.h>
#include <asm/page.h>

-.data
-
/* We can free up trampoline after bootup if cpu hotplug is not supported. */
#ifndef CONFIG_HOTPLUG_CPU
.section ".init.data","aw",@progbits
+#else
+.section .rodata,"a",@progbits
#endif

.code16
Index: linux/arch/i386/mach-voyager/voyager_smp.c
===================================================================
--- linux.orig/arch/i386/mach-voyager/voyager_smp.c
+++ linux/arch/i386/mach-voyager/voyager_smp.c
@@ -442,8 +442,8 @@ static __u32 __init
setup_trampoline(void)
{
/* these two are global symbols in trampoline.S */
- extern __u8 trampoline_end[];
- extern __u8 trampoline_data[];
+ extern const __u8 trampoline_end[];
+ extern const __u8 trampoline_data[];

memcpy((__u8 *)trampoline_base, trampoline_data,
trampoline_end - trampoline_data);
Index: linux/arch/x86_64/kernel/setup.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup.c
+++ linux/arch/x86_64/kernel/setup.c
@@ -936,7 +936,7 @@ static int show_cpuinfo(struct seq_file
* applications want to get the raw CPUID data, they should access
* /dev/cpu/<cpu_nr>/cpuid instead.
*/
- static char *x86_cap_flags[] = {
+ static const char *const x86_cap_flags[] = {
/* Intel-defined */
"fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
"cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
@@ -990,7 +990,7 @@ static int show_cpuinfo(struct seq_file
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
- static char *x86_power_flags[] = {
+ static const char *const x86_power_flags[] = {
"ts", /* temperature sensor */
"fid", /* frequency id control */
"vid", /* voltage id control */
Index: linux/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux.orig/arch/x86_64/kernel/smpboot.c
+++ linux/arch/x86_64/kernel/smpboot.c
@@ -102,8 +102,8 @@ EXPORT_SYMBOL(cpu_core_map);
* Trampoline 80x86 program as an array.
*/

-extern unsigned char trampoline_data[];
-extern unsigned char trampoline_end[];
+extern const unsigned char trampoline_data[];
+extern const unsigned char trampoline_end[];

/* State of each CPU */
DEFINE_PER_CPU(int, cpu_state) = { 0 };
Index: linux/arch/x86_64/kernel/trampoline.S
===================================================================
--- linux.orig/arch/x86_64/kernel/trampoline.S
+++ linux/arch/x86_64/kernel/trampoline.S
@@ -33,7 +33,12 @@
#include <asm/msr.h>
#include <asm/segment.h>

-.data
+/* We can free up trampoline after bootup if cpu hotplug is not supported. */
+#ifndef CONFIG_HOTPLUG_CPU
+.section .init.data, "aw", @progbits
+#else
+.section .rodata, "a", @progbits
+#endif

.code16

2007-09-21 20:45:48

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [2/45] x86: constify stacktrace_ops


From: "Jan Beulich" <[email protected]>
.. as they're never written to.

Signed-off-by: Jan Beulich <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

arch/i386/kernel/traps.c | 6 +++---
arch/x86_64/kernel/stacktrace.c | 2 +-
arch/x86_64/kernel/traps.c | 4 ++--
include/asm-x86_64/stacktrace.h | 2 +-
4 files changed, 7 insertions(+), 7 deletions(-)

Index: linux/arch/i386/kernel/traps.c
===================================================================
--- linux.orig/arch/i386/kernel/traps.c
+++ linux/arch/i386/kernel/traps.c
@@ -114,7 +114,7 @@ struct stack_frame {

static inline unsigned long print_context_stack(struct thread_info *tinfo,
unsigned long *stack, unsigned long ebp,
- struct stacktrace_ops *ops, void *data)
+ const struct stacktrace_ops *ops, void *data)
{
#ifdef CONFIG_FRAME_POINTER
struct stack_frame *frame = (struct stack_frame *)ebp;
@@ -151,7 +151,7 @@ static inline unsigned long print_contex

void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack,
- struct stacktrace_ops *ops, void *data)
+ const struct stacktrace_ops *ops, void *data)
{
unsigned long ebp = 0;

@@ -223,7 +223,7 @@ static void print_trace_address(void *da
touch_nmi_watchdog();
}

-static struct stacktrace_ops print_trace_ops = {
+static const struct stacktrace_ops print_trace_ops = {
.warning = print_trace_warning,
.warning_symbol = print_trace_warning_symbol,
.stack = print_trace_stack,
Index: linux/arch/x86_64/kernel/stacktrace.c
===================================================================
--- linux.orig/arch/x86_64/kernel/stacktrace.c
+++ linux/arch/x86_64/kernel/stacktrace.c
@@ -35,7 +35,7 @@ static void save_stack_address(void *dat
trace->entries[trace->nr_entries++] = addr;
}

-static struct stacktrace_ops save_stack_ops = {
+static const struct stacktrace_ops save_stack_ops = {
.warning = save_stack_warning,
.warning_symbol = save_stack_warning_symbol,
.stack = save_stack_stack,
Index: linux/arch/x86_64/kernel/traps.c
===================================================================
--- linux.orig/arch/x86_64/kernel/traps.c
+++ linux/arch/x86_64/kernel/traps.c
@@ -217,7 +217,7 @@ static inline int valid_stack_ptr(struct

void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
unsigned long *stack,
- struct stacktrace_ops *ops, void *data)
+ const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
@@ -338,7 +338,7 @@ static void print_trace_address(void *da
printk_address(addr);
}

-static struct stacktrace_ops print_trace_ops = {
+static const struct stacktrace_ops print_trace_ops = {
.warning = print_trace_warning,
.warning_symbol = print_trace_warning_symbol,
.stack = print_trace_stack,
Index: linux/include/asm-x86_64/stacktrace.h
===================================================================
--- linux.orig/include/asm-x86_64/stacktrace.h
+++ linux/include/asm-x86_64/stacktrace.h
@@ -15,6 +15,6 @@ struct stacktrace_ops {
};

void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack,
- struct stacktrace_ops *ops, void *data);
+ const struct stacktrace_ops *ops, void *data);

#endif

2007-09-21 20:46:05

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [3/45] x86: Separate checking of unsynchronized and unstable TSC


Preparationary patch for the new sched/printk_clock()

Signed-off-by: Andi Kleen <[email protected]>

---
arch/i386/kernel/tsc.c | 9 ++++-----
arch/x86_64/kernel/tsc.c | 5 +----
2 files changed, 5 insertions(+), 9 deletions(-)

Index: linux/arch/i386/kernel/tsc.c
===================================================================
--- linux.orig/arch/i386/kernel/tsc.c
+++ linux/arch/i386/kernel/tsc.c
@@ -332,7 +332,7 @@ static struct dmi_system_id __initdata b
*/
__cpuinit int unsynchronized_tsc(void)
{
- if (!cpu_has_tsc || tsc_unstable)
+ if (!cpu_has_tsc)
return 1;
/*
* Intel systems are normally all synchronized.
@@ -341,9 +341,9 @@ __cpuinit int unsynchronized_tsc(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
/* assume multi socket systems are not synchronized: */
if (num_possible_cpus() > 1)
- tsc_unstable = 1;
+ return 1;
}
- return tsc_unstable;
+ return 0;
}

/*
@@ -387,13 +387,12 @@ void __init tsc_init(void)
/* Check and install the TSC clocksource */
dmi_check_system(bad_tsc_dmi_table);

- unsynchronized_tsc();
check_geode_tsc_reliable();
current_tsc_khz = tsc_khz;
clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
clocksource_tsc.shift);
/* lower the rating if we already know its unstable: */
- if (check_tsc_unstable()) {
+ if (check_tsc_unstable() || unsynchronized_tsc()) {
clocksource_tsc.rating = 0;
clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
} else
Index: linux/arch/x86_64/kernel/tsc.c
===================================================================
--- linux.orig/arch/x86_64/kernel/tsc.c
+++ linux/arch/x86_64/kernel/tsc.c
@@ -124,9 +124,6 @@ core_initcall(cpufreq_tsc);
*/
__cpuinit int unsynchronized_tsc(void)
{
- if (tsc_unstable)
- return 1;
-
#ifdef CONFIG_SMP
if (apic_is_clustered_box())
return 1;
@@ -199,7 +196,7 @@ void __init init_tsc_clocksource(void)
if (!notsc) {
clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
clocksource_tsc.shift);
- if (check_tsc_unstable())
+ if (unsynchronized_tsc() || check_tsc_unstable())
clocksource_tsc.rating = 0;

clocksource_register(&clocksource_tsc);

2007-09-21 20:46:27

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [4/45] i386: Rewrite sched_clock


Move it into an own file for easy sharing.
Do everything per CPU. This avoids problems with TSCs that
tick at different frequencies per CPU.
Resync properly on cpufreq changes. CPU frequency is instable
around cpu frequency changing, so fall back during a backing
clock during this period.
Hopefully TSC will work now on all systems except when there isn't a
physical TSC.

And

+From: Jeremy Fitzhardinge <[email protected]>
Three cleanups there:
- change "instable" -> "unstable"
- it's better to use get_cpu_var for getting this cpu's variables
- change cycles_2_ns to do the full computation rather than just the
tsc->ns scaling. It's a simpler interface, and it makes the function

TBD: during cpufreq changes on AMD sched_clock jumps backwards. Need to
TBD: fix that before pushing it further

Signed-off-by: Andi Kleen <[email protected]>

---
arch/i386/kernel/Makefile | 3
arch/i386/kernel/paravirt.c | 2
arch/i386/kernel/sched-clock.c | 270 +++++++++++++++++++++++++++++++++++++++++
arch/i386/kernel/tsc.c | 74 -----------
include/asm-i386/timer.h | 33 -----
include/asm-i386/tsc.h | 1
6 files changed, 276 insertions(+), 107 deletions(-)

Index: linux/arch/i386/kernel/sched-clock.c
===================================================================
--- /dev/null
+++ linux/arch/i386/kernel/sched-clock.c
@@ -0,0 +1,270 @@
+/* A fast clock for the scheduler.
+ * Copyright 2007 Andi Kleen SUSE Labs
+ * Subject to the GNU Public License, version 2 only.
+ */
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/cpufreq.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+#include <linux/smp.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <asm/tsc.h>
+#include <asm/cpufeature.h>
+#include <asm/timer.h>
+
+/*
+ * convert from cycles(64bits) => nanoseconds (64bits)
+ * basic equation:
+ * ns = cycles / (freq / ns_per_sec)
+ * ns = cycles * (ns_per_sec / freq)
+ * ns = cycles * (10^9 / (cpu_khz * 10^3))
+ * ns = cycles * (10^6 / cpu_khz)
+ *
+ * Then we use scaling math (suggested by [email protected]) to get:
+ * ns = cycles * (10^6 * SC / cpu_khz) / SC
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * And since SC is a constant power of two, we can convert the div
+ * into a shift.
+ *
+ * We can use khz divisor instead of mhz to keep a better percision, since
+ * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ * ([email protected])
+ *
+ * [email protected] "math is hard, lets go shopping!"
+ */
+
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+struct sc_data {
+ unsigned cyc2ns_scale;
+ unsigned unstable;
+ unsigned long long sync_base;
+ unsigned long long ns_base; /* nanoseconds at sync point */
+ unsigned long long min_val; /* Minimum value during instable periods */
+};
+
+static DEFINE_PER_CPU(struct sc_data, sc_data) =
+ { .unstable = 1, .sync_base = INITIAL_JIFFIES };
+
+static inline u64 __cycles_2_ns(struct sc_data *sc, u64 cyc)
+{
+ u64 ns;
+
+ cyc -= sc->sync_base;
+ ns = (cyc * sc->cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+ ns += sc->ns_base;
+
+ return ns;
+}
+
+u64 cycles_2_ns(u64 cyc)
+{
+ struct sc_data *sc = &get_cpu_var(sc_data);
+ u64 ns = __cycles_2_ns(sc, cyc);
+ put_cpu_var(sc_data);
+ return ns;
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ * All data is local to the CPU.
+ * The values are approximately[1] monotonic local to a CPU, but not
+ * between CPUs. There might be also an occasionally random error,
+ * but not too bad. Between CPUs the values can be non monotonic.
+ *
+ * [1] no attempt to stop CPU instruction reordering, which can hit
+ * in a 100 instruction window or so.
+ *
+ * The clock can be in two states: stable and unstable.
+ * When it is stable we use the TSC per CPU.
+ * When it is unstable we use jiffies as fallback.
+ * stable->unstable->stable transitions can happen regularly
+ * during CPU frequency changes.
+ * There is special code to avoid having the clock jump backwards
+ * when we switch from TSC to jiffies, which needs to keep some state
+ * per CPU. This state is protected against parallel state changes
+ * with interrupts off.
+ */
+unsigned long long tsc_sched_clock(void)
+{
+ unsigned long long r;
+ struct sc_data *sc = &get_cpu_var(sc_data);
+
+ if (unlikely(sc->unstable)) {
+ r = (jiffies_64 - sc->sync_base) * (1000000000 / HZ);
+ r += sc->ns_base;
+ if (r < sc->min_val)
+ r = sc->min_val;
+ } else {
+ rdtscll(r);
+ r = __cycles_2_ns(sc, r);
+ }
+
+ put_cpu_var(sc_data);
+
+ return r;
+}
+
+/* We need to define a real function for sched_clock, to override the
+ weak default version */
+#ifdef CONFIG_PARAVIRT
+unsigned long long sched_clock(void)
+{
+ return paravirt_sched_clock();
+}
+#else
+unsigned long long sched_clock(void)
+ __attribute__((alias("tsc_sched_clock")));
+#endif
+
+static int no_sc_for_printk;
+
+/*
+ * printk clock: when it is known the sc results are very non monotonic
+ * fall back to jiffies for printk. Other sched_clock users are supposed
+ * to handle this.
+ */
+unsigned long long printk_clock(void)
+{
+ if (unlikely(no_sc_for_printk))
+ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
+ return tsc_sched_clock();
+}
+
+static void resolve_freq(struct cpufreq_freqs *freq)
+{
+ if (!freq->new) {
+ freq->new = cpufreq_get(freq->cpu);
+ if (!freq->new)
+ freq->new = tsc_khz;
+ }
+}
+
+/* Resync with new CPU frequency. Must run on to be synced CPU */
+static void resync_freq(void *arg)
+{
+ struct cpufreq_freqs *freq = arg;
+ struct sc_data *sc = &__get_cpu_var(sc_data);
+
+ sc->sync_base = jiffies_64;
+ if (!cpu_has_tsc) {
+ sc->unstable = 1;
+ return;
+ }
+ resolve_freq(freq);
+
+ /*
+ * Handle nesting, but when we're zero multiple calls in a row
+ * are ok too and not a bug. This can happen during startup
+ * when the different callbacks race with each other.
+ */
+ if (sc->unstable > 0)
+ sc->unstable--;
+ if (sc->unstable)
+ return;
+
+ /* Minor race window here, but should not add significant errors. */
+ sc->ns_base = ktime_to_ns(ktime_get());
+ rdtscll(sc->sync_base);
+ sc->cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR) / freq->new;
+}
+
+static void resync_freq_on_cpu(void *arg)
+{
+ struct cpufreq_freqs f = { .new = 0 };
+
+ f.cpu = get_cpu();
+ resync_freq(&f);
+ put_cpu();
+}
+
+/* Mark CPU as unstable before cpufreq changes the frequency. */
+static void cpu_unstable(void *arg)
+{
+ struct sc_data *sc = &__get_cpu_var(sc_data);
+ unsigned long flags;
+ unsigned long long ns, r;
+ ns = ktime_to_ns(ktime_get());
+ local_irq_save(flags);
+ r = tsc_sched_clock();
+ sync_core();
+ sc->ns_base = ns;
+ sc->min_val = r;
+ sc->sync_base = jiffies_64;
+ sc->unstable = 1;
+ local_irq_restore(flags);
+}
+
+static int sc_freq_event(struct notifier_block *nb, unsigned long event,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+
+ if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
+ return NOTIFY_DONE;
+ if (freq->old == freq->new)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case CPUFREQ_SUSPENDCHANGE:
+ /* Mark TSC unstable during suspend/resume */
+ case CPUFREQ_PRECHANGE:
+ /*
+ * Mark TSC as unstable until cpu frequency change is
+ * done because we don't know when exactly it will
+ * change. unstable in used as a counter to guard
+ * against races between the cpu frequency notifiers
+ * and normal resyncs
+ */
+ smp_call_function_single(freq->cpu, cpu_unstable, NULL, 0, 1);
+ break;
+ case CPUFREQ_RESUMECHANGE:
+ case CPUFREQ_POSTCHANGE:
+ /*
+ * Frequency change or resume is done -- update everything and
+ * mark TSC as stable again.
+ */
+ smp_call_function_single(freq->cpu, resync_freq, freq, 0, 1);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block sc_freq_notifier = {
+ .notifier_call = sc_freq_event
+};
+
+static int __cpuinit
+sc_cpu_event(struct notifier_block *self, unsigned long event, void *hcpu)
+{
+ long cpu = (long)hcpu;
+ if (event == CPU_ONLINE) {
+ struct cpufreq_freqs f = { .cpu = cpu, .new = 0 };
+
+ smp_call_function_single(cpu, resync_freq, &f, 0, 1);
+ }
+ return NOTIFY_DONE;
+}
+
+static __init int init_sched_clock(void)
+{
+ if (unsynchronized_tsc())
+ no_sc_for_printk = 1;
+
+ /*
+ * On a race between the various events the initialization
+ * might be done multiple times, but code is tolerant to
+ * this .
+ */
+ cpufreq_register_notifier(&sc_freq_notifier,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ hotcpu_notifier(sc_cpu_event, 0);
+ on_each_cpu(resync_freq_on_cpu, NULL, 0, 0);
+ return 0;
+}
+core_initcall(init_sched_clock);
Index: linux/arch/i386/kernel/tsc.c
===================================================================
--- linux.orig/arch/i386/kernel/tsc.c
+++ linux/arch/i386/kernel/tsc.c
@@ -65,74 +65,6 @@ int check_tsc_unstable(void)
}
EXPORT_SYMBOL_GPL(check_tsc_unstable);

-/* Accellerators for sched_clock()
- * convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by [email protected]) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better percision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * ([email protected])
- *
- * [email protected] "math is hard, lets go shopping!"
- */
-unsigned long cyc2ns_scale __read_mostly;
-
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
-}
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long native_sched_clock(void)
-{
- unsigned long long this_offset;
-
- /*
- * Fall back to jiffies if there's no TSC available:
- * ( But note that we still use it if the TSC is marked
- * unstable. We do this because unlike Time Of Day,
- * the scheduler clock tolerates small errors and it's
- * very important for it to be as fast as the platform
- * can achive it. )
- */
- if (unlikely(!tsc_enabled && !tsc_unstable))
- /* No locking but a rare wrong value is not a big deal: */
- return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
-
- /* read the Time Stamp Counter: */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return cycles_2_ns(this_offset);
-}
-
-/* We need to define a real function for sched_clock, to override the
- weak default version */
-#ifdef CONFIG_PARAVIRT
-unsigned long long sched_clock(void)
-{
- return paravirt_sched_clock();
-}
-#else
-unsigned long long sched_clock(void)
- __attribute__((alias("native_sched_clock")));
-#endif
-
unsigned long native_calculate_cpu_khz(void)
{
unsigned long long start, end;
@@ -240,11 +172,6 @@ time_cpufreq_notifier(struct notifier_bl
ref_freq, freq->new);
if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
tsc_khz = cpu_khz;
- set_cyc2ns_scale(cpu_khz);
- /*
- * TSC based sched_clock turns
- * to junk w/ cpufreq
- */
mark_tsc_unstable("cpufreq changes");
}
}
@@ -381,7 +308,6 @@ void __init tsc_init(void)
(unsigned long)cpu_khz / 1000,
(unsigned long)cpu_khz % 1000);

- set_cyc2ns_scale(cpu_khz);
use_tsc_delay();

/* Check and install the TSC clocksource */
Index: linux/arch/i386/kernel/Makefile
===================================================================
--- linux.orig/arch/i386/kernel/Makefile
+++ linux/arch/i386/kernel/Makefile
@@ -7,7 +7,8 @@ extra-y := head.o init_task.o vmlinux.ld
obj-y := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o bootflag.o e820.o\
- quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc.o \
+ sched-clock.o

obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
Index: linux/include/asm-i386/timer.h
===================================================================
--- linux.orig/include/asm-i386/timer.h
+++ linux/include/asm-i386/timer.h
@@ -5,7 +5,7 @@

#define TICK_SIZE (tick_nsec / 1000)

-unsigned long long native_sched_clock(void);
+unsigned long long tsc_sched_clock(void);
unsigned long native_calculate_cpu_khz(void);

extern int timer_ack;
@@ -16,35 +16,6 @@ extern int recalibrate_cpu_khz(void);
#define calculate_cpu_khz() native_calculate_cpu_khz()
#endif

-/* Accellerators for sched_clock()
- * convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by [email protected]) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better percision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * ([email protected])
- *
- * [email protected] "math is hard, lets go shopping!"
- */
-extern unsigned long cyc2ns_scale __read_mostly;
-
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
+u64 cycles_2_ns(u64 cyc);

#endif
Index: linux/include/asm-i386/tsc.h
===================================================================
--- linux.orig/include/asm-i386/tsc.h
+++ linux/include/asm-i386/tsc.h
@@ -64,6 +64,7 @@ extern void mark_tsc_unstable(char *reas
extern int unsynchronized_tsc(void);
extern void init_tsc_clocksource(void);
int check_tsc_unstable(void);
+extern unsigned long long tsc_sched_clock(void);

/*
* Boot-time check whether the TSCs are synchronized across
Index: linux/arch/i386/kernel/paravirt.c
===================================================================
--- linux.orig/arch/i386/kernel/paravirt.c
+++ linux/arch/i386/kernel/paravirt.c
@@ -312,7 +312,7 @@ struct paravirt_ops paravirt_ops = {
.write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
- .sched_clock = native_sched_clock,
+ .sched_clock = tsc_sched_clock,
.get_cpu_khz = native_calculate_cpu_khz,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,

2007-09-21 20:46:41

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [5/45] x86_64: Use new shared sched_clock in x86-64 too


Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86_64/kernel/Makefile | 3 ++-
arch/x86_64/kernel/time.c | 1 -
arch/x86_64/kernel/tsc.c | 29 +----------------------------
include/asm-x86_64/timer.h | 5 +++++
include/asm-x86_64/timex.h | 1 -
5 files changed, 8 insertions(+), 31 deletions(-)

Index: linux/arch/x86_64/kernel/tsc.c
===================================================================
--- linux.orig/arch/x86_64/kernel/tsc.c
+++ linux/arch/x86_64/kernel/tsc.c
@@ -8,6 +8,7 @@
#include <linux/cpufreq.h>

#include <asm/timex.h>
+#include <asm/tsc.h>

static int notsc __initdata = 0;

@@ -16,32 +17,6 @@ EXPORT_SYMBOL(cpu_khz);
unsigned int tsc_khz;
EXPORT_SYMBOL(tsc_khz);

-static unsigned int cyc2ns_scale __read_mostly;
-
-void set_cyc2ns_scale(unsigned long khz)
-{
- cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz;
-}
-
-static unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> NS_SCALE;
-}
-
-unsigned long long sched_clock(void)
-{
- unsigned long a = 0;
-
- /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
- * which means it is not completely exact and may not be monotonous
- * between CPUs. But the errors should be too small to matter for
- * scheduling purposes.
- */
-
- rdtscll(a);
- return cycles_2_ns(a);
-}
-
static int tsc_unstable;

inline int check_tsc_unstable(void)
@@ -98,8 +73,6 @@ static int time_cpufreq_notifier(struct
mark_tsc_unstable("cpufreq changes");
}

- set_cyc2ns_scale(tsc_khz_ref);
-
return 0;
}

Index: linux/arch/x86_64/kernel/time.c
===================================================================
--- linux.orig/arch/x86_64/kernel/time.c
+++ linux/arch/x86_64/kernel/time.c
@@ -398,7 +398,6 @@ void __init time_init(void)
else
vgetcpu_mode = VGETCPU_LSL;

- set_cyc2ns_scale(tsc_khz);
printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
init_tsc_clocksource();
Index: linux/include/asm-x86_64/timex.h
===================================================================
--- linux.orig/include/asm-x86_64/timex.h
+++ linux/include/asm-x86_64/timex.h
@@ -27,5 +27,4 @@ extern int read_current_timer(unsigned l
#define US_SCALE 32 /* 2^32, arbitralrily chosen */

extern void mark_tsc_unstable(char *msg);
-extern void set_cyc2ns_scale(unsigned long khz);
#endif
Index: linux/arch/x86_64/kernel/Makefile
===================================================================
--- linux.orig/arch/x86_64/kernel/Makefile
+++ linux/arch/x86_64/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y := process.o signal.o entry.o trap
x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o bugs.o \
- perfctr-watchdog.o
+ perfctr-watchdog.o sched-clock.o

obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o
@@ -62,3 +62,4 @@ msr-$(subst m,y,$(CONFIG_X86_MSR)) += .
alternative-y += ../../i386/kernel/alternative.o
pcspeaker-y += ../../i386/kernel/pcspeaker.o
perfctr-watchdog-y += ../../i386/kernel/cpu/perfctr-watchdog.o
+sched-clock-y += ../../i386/kernel/sched-clock.o
Index: linux/include/asm-x86_64/timer.h
===================================================================
--- /dev/null
+++ linux/include/asm-x86_64/timer.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_TIMER_H
+#define _ASM_TIMER_H 1
+
+#endif
+

2007-09-21 20:46:55

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [6/45] x86: Unify i386 and x86-64 early quirks


They were already very similar; just use the same file now.

Cc: [email protected]

Signed-off-by: Andi Kleen <[email protected]>

---
arch/i386/kernel/Makefile | 2
arch/i386/kernel/acpi/Makefile | 3 -
arch/i386/kernel/acpi/earlyquirk.c | 84 -------------------------------------
arch/i386/kernel/setup.c | 4 -
arch/x86_64/kernel/early-quirks.c | 12 ++++-
include/asm-i386/acpi.h | 6 --
include/asm-x86_64/io_apic.h | 2
include/asm-x86_64/proto.h | 2
8 files changed, 16 insertions(+), 99 deletions(-)

Index: linux/arch/x86_64/kernel/early-quirks.c
===================================================================
--- linux.orig/arch/x86_64/kernel/early-quirks.c
+++ linux/arch/x86_64/kernel/early-quirks.c
@@ -13,9 +13,13 @@
#include <linux/acpi.h>
#include <linux/pci_ids.h>
#include <asm/pci-direct.h>
-#include <asm/proto.h>
-#include <asm/iommu.h>
#include <asm/dma.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+
+#ifdef CONFIG_IOMMU
+#include <asm/iommu.h>
+#endif

static void __init via_bugs(void)
{
@@ -40,6 +44,7 @@ static int __init nvidia_hpet_check(stru
static void __init nvidia_bugs(void)
{
#ifdef CONFIG_ACPI
+#ifdef CONFIG_X86_IO_APIC
/*
* All timer overrides on Nvidia are
* wrong unless HPET is enabled.
@@ -59,17 +64,20 @@ static void __init nvidia_bugs(void)
"try acpi_use_timer_override\n");
}
#endif
+#endif
/* RED-PEN skip them on mptables too? */

}

static void __init ati_bugs(void)
{
+#ifdef CONFIG_X86_IO_APIC
if (timer_over_8254 == 1) {
timer_over_8254 = 0;
printk(KERN_INFO
"ATI board detected. Disabling timer routing over 8254.\n");
}
+#endif
}

struct chipset {
Index: linux/arch/i386/kernel/Makefile
===================================================================
--- linux.orig/arch/i386/kernel/Makefile
+++ linux/arch/i386/kernel/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_MCA) += mca.o
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_MICROCODE) += microcode.o
+obj-$(CONFIG_PCI) += early-quirks.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o
obj-$(CONFIG_SMP) += smpcommon.o
@@ -87,4 +88,5 @@ $(obj)/vsyscall-syms.o: $(src)/vsyscall.

k8-y += ../../x86_64/kernel/k8.o
stacktrace-y += ../../x86_64/kernel/stacktrace.o
+early-quirks-y += ../../x86_64/kernel/early-quirks.o

Index: linux/arch/i386/kernel/acpi/earlyquirk.c
===================================================================
--- linux.orig/arch/i386/kernel/acpi/earlyquirk.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Do early PCI probing for bug detection when the main PCI subsystem is
- * not up yet.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/acpi.h>
-
-#include <asm/pci-direct.h>
-#include <asm/acpi.h>
-#include <asm/apic.h>
-
-#ifdef CONFIG_ACPI
-
-static int __init nvidia_hpet_check(struct acpi_table_header *header)
-{
- return 0;
-}
-#endif
-
-static int __init check_bridge(int vendor, int device)
-{
-#ifdef CONFIG_ACPI
- static int warned;
- /* According to Nvidia all timer overrides are bogus unless HPET
- is enabled. */
- if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) {
- if (!warned && acpi_table_parse(ACPI_SIG_HPET,
- nvidia_hpet_check)) {
- warned = 1;
- acpi_skip_timer_override = 1;
- printk(KERN_INFO "Nvidia board "
- "detected. Ignoring ACPI "
- "timer override.\n");
- printk(KERN_INFO "If you got timer trouble "
- "try acpi_use_timer_override\n");
-
- }
- }
-#endif
- if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) {
- timer_over_8254 = 0;
- printk(KERN_INFO "ATI board detected. Disabling timer routing "
- "over 8254.\n");
- }
- return 0;
-}
-
-void __init check_acpi_pci(void)
-{
- int num, slot, func;
-
- /* Assume the machine supports type 1. If not it will
- always read ffffffff and should not have any side effect.
- Actually a few buggy systems can machine check. Allow the user
- to disable it by command line option at least -AK */
- if (!early_pci_allowed())
- return;
-
- /* Poor man's PCI discovery */
- for (num = 0; num < 32; num++) {
- for (slot = 0; slot < 32; slot++) {
- for (func = 0; func < 8; func++) {
- u32 class;
- u32 vendor;
- class = read_pci_config(num, slot, func,
- PCI_CLASS_REVISION);
- if (class == 0xffffffff)
- break;
-
- if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
- continue;
-
- vendor = read_pci_config(num, slot, func,
- PCI_VENDOR_ID);
-
- if (check_bridge(vendor & 0xffff, vendor >> 16))
- return;
- }
-
- }
- }
-}
Index: linux/arch/i386/kernel/setup.c
===================================================================
--- linux.orig/arch/i386/kernel/setup.c
+++ linux/arch/i386/kernel/setup.c
@@ -619,9 +619,7 @@ void __init setup_arch(char **cmdline_p)
#endif

#ifdef CONFIG_PCI
-#ifdef CONFIG_X86_IO_APIC
- check_acpi_pci(); /* Checks more than just ACPI actually */
-#endif
+ early_quirks();
#endif

#ifdef CONFIG_ACPI
Index: linux/include/asm-i386/acpi.h
===================================================================
--- linux.orig/include/asm-i386/acpi.h
+++ linux/include/asm-i386/acpi.h
@@ -81,11 +81,7 @@ int __acpi_release_global_lock(unsigned
:"=r"(n_hi), "=r"(n_lo) \
:"0"(n_hi), "1"(n_lo))

-#ifdef CONFIG_X86_IO_APIC
-extern void check_acpi_pci(void);
-#else
-static inline void check_acpi_pci(void) { }
-#endif
+extern void early_quirks(void);

#ifdef CONFIG_ACPI
extern int acpi_lapic;
Index: linux/arch/i386/kernel/acpi/Makefile
===================================================================
--- linux.orig/arch/i386/kernel/acpi/Makefile
+++ linux/arch/i386/kernel/acpi/Makefile
@@ -1,7 +1,4 @@
obj-$(CONFIG_ACPI) += boot.o
-ifneq ($(CONFIG_PCI),)
-obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o
-endif
obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o

ifneq ($(CONFIG_ACPI_PROCESSOR),)
Index: linux/include/asm-x86_64/io_apic.h
===================================================================
--- linux.orig/include/asm-x86_64/io_apic.h
+++ linux/include/asm-x86_64/io_apic.h
@@ -133,4 +133,6 @@ void enable_NMI_through_LVT0 (void * dum

extern spinlock_t i8259A_lock;

+extern int timer_over_8254;
+
#endif
Index: linux/include/asm-x86_64/proto.h
===================================================================
--- linux.orig/include/asm-x86_64/proto.h
+++ linux/include/asm-x86_64/proto.h
@@ -86,8 +86,6 @@ extern unsigned tsc_khz;
extern int reboot_force;
extern int notsc_setup(char *);

-extern int timer_over_8254;
-
extern int gsi_irq_sharing(int gsi);

extern void smp_local_timer_interrupt(void);

2007-09-21 20:47:17

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [7/45] x86_64: Replace nvidia timer override quirk with pci id list and unify quirks


This replaces the old NF3/NF4 reference BIOS timer override quirk with a device
ID list. We need to ignore the timer override on these systems, but not
ignore it on NF5 based systems. Previously this was distingushed by checking
for HPET, but a lot of BIOS vendors didn't enable HPET in their pre Vista BIOSes.
Replace the old "for all of nvidia" quirk with a quirk containing pci device
ID. I goobled this list together from pci.ids and googling and it may be incomplete.

I'm still not 100% sure the list is correct, but the only way
to find out is to do testing in mainline. So let's do that.

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86_64/kernel/early-quirks.c | 50 ++++++++++++++++++--------------------
1 file changed, 24 insertions(+), 26 deletions(-)

Index: linux/arch/x86_64/kernel/early-quirks.c
===================================================================
--- linux.orig/arch/x86_64/kernel/early-quirks.c
+++ linux/arch/x86_64/kernel/early-quirks.c
@@ -33,36 +33,20 @@ static void __init via_bugs(void)
#endif
}

-#ifdef CONFIG_ACPI
-
-static int __init nvidia_hpet_check(struct acpi_table_header *header)
-{
- return 0;
-}
-#endif
-
static void __init nvidia_bugs(void)
{
#ifdef CONFIG_ACPI
#ifdef CONFIG_X86_IO_APIC
/*
- * All timer overrides on Nvidia are
- * wrong unless HPET is enabled.
- * Unfortunately that's not true on many Asus boards.
- * We don't know yet how to detect this automatically, but
- * at least allow a command line override.
+ * All timer overrides on Nvidia NF3/NF4 are
+ * wrong.
*/
if (acpi_use_timer_override)
return;

- if (acpi_table_parse(ACPI_SIG_HPET, nvidia_hpet_check)) {
- acpi_skip_timer_override = 1;
- printk(KERN_INFO "Nvidia board "
- "detected. Ignoring ACPI "
- "timer override.\n");
- printk(KERN_INFO "If you got timer trouble "
- "try acpi_use_timer_override\n");
- }
+ acpi_skip_timer_override = 1;
+ printk(KERN_INFO "Nvidia board detected. Ignoring ACPI timer override.\n");
+ printk(KERN_INFO "If you got timer trouble try acpi_use_timer_override\n");
#endif
#endif
/* RED-PEN skip them on mptables too? */
@@ -83,10 +67,19 @@ static void __init ati_bugs(void)
struct chipset {
u16 vendor;
void (*f)(void);
+ int id;
};

static struct chipset early_qrk[] __initdata = {
- { PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
+ /* This list should cover at least one PCI ID from each NF3 or NF4
+ mainboard to handle a bug in their reference BIOS. May be incomplete. */
+ { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x00dd }, /* nforce 3 */
+ { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x00e1 }, /* nforce 3 */
+ { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x00ed }, /* nforce 3 */
+ { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x003d }, /* mcp 04 ?? */
+ { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x005c }, /* ck 804 */
+ { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x026f }, /* mcp 51 / nf4 ? */
+ { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x02f0 }, /* mcp 51 / nf4 ? */
{ PCI_VENDOR_ID_VIA, via_bugs },
{ PCI_VENDOR_ID_ATI, ati_bugs },
{}
@@ -99,12 +92,13 @@ void __init early_quirks(void)
if (!early_pci_allowed())
return;

- /* Poor man's PCI discovery */
+ /* Poor man's PCI discovery.
+ We just look for a chipset unique PCI bridge; not scan all devices */
for (num = 0; num < 32; num++) {
for (slot = 0; slot < 32; slot++) {
for (func = 0; func < 8; func++) {
u32 class;
- u32 vendor;
+ u32 vendor, device;
u8 type;
int i;
class = read_pci_config(num,slot,func,
@@ -117,13 +111,17 @@ void __init early_quirks(void)

vendor = read_pci_config(num, slot, func,
PCI_VENDOR_ID);
+ device = vendor >> 16;
+
vendor &= 0xffff;

- for (i = 0; early_qrk[i].f; i++)
- if (early_qrk[i].vendor == vendor) {
+ for (i = 0; early_qrk[i].f; i++) {
+ struct chipset *c = &early_qrk[i];
+ if (c->vendor == vendor && (!c->id || (c->id && c->id==device))) {
early_qrk[i].f();
return;
}
+ }

type = read_pci_config_byte(num, slot, func,
PCI_HEADER_TYPE);

2007-09-21 20:47:39

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [8/45] x86_64: Use string instruction memcpy on AMD Fam11h


Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86_64/kernel/setup.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux/arch/x86_64/kernel/setup.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup.c
+++ linux/arch/x86_64/kernel/setup.c
@@ -575,7 +575,7 @@ static void __cpuinit init_amd(struct cp
level = cpuid_eax(1);
if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
- if (c->x86 == 0x10)
+ if (c->x86 == 0x10 || c->x86 == 0x11)
set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);

/* Enable workaround for FXSAVE leak */

2007-09-21 20:47:55

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [9/45] i386: Clean up duplicate includes in arch/i386/kernel/


From: Jesper Juhl <[email protected]>

This patch cleans up duplicate includes in arch/i386/kernel/

Signed-off-by: Jesper Juhl <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---

---
arch/i386/kernel/pci-dma.c | 1 -
1 file changed, 1 deletion(-)

Index: linux/arch/i386/kernel/pci-dma.c
===================================================================
--- linux.orig/arch/i386/kernel/pci-dma.c
+++ linux/arch/i386/kernel/pci-dma.c
@@ -12,7 +12,6 @@
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/module.h>
-#include <linux/pci.h>
#include <asm/io.h>

struct dma_coherent_mem {

2007-09-21 20:48:19

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [11/45] x86_64: Remove rogue default m in drivers/video/Kconfig


default m is near always wrong, like here. For some reason ACPI
likes to reintroduce these and I like to immediately squash them again
before they pollute too many .configs.

Cc: [email protected]
Cc: [email protected]

Signed-off-by: Andi Kleen <[email protected]>

---
drivers/video/Kconfig | 1 -
1 file changed, 1 deletion(-)

Index: linux/drivers/video/Kconfig
===================================================================
--- linux.orig/drivers/video/Kconfig
+++ linux/drivers/video/Kconfig
@@ -14,7 +14,6 @@ config VGASTATE

config VIDEO_OUTPUT_CONTROL
tristate "Lowlevel video output switch controls"
- default m
help
This framework adds support for low-level control of the video
output switch.

2007-09-21 20:48:36

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [12/45] i386: Fix arch/i386/kernel/nmi.c - 'unknown_nmi_panic_callback' declared 'static' but never defined warning


I get this warning when CONFIG_SYSCTL is not set :

...

arch/i386/kernel/nmi.c:52: warning: 'unknown_nmi_panic_callback' declared 'static' but never defined

...

Signed-off-by: Gabriel Craciunescu <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---

---
arch/i386/kernel/nmi.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -49,8 +49,9 @@ static unsigned int nmi_hz = HZ;
static DEFINE_PER_CPU(short, wd_enabled);

/* local prototypes */
+#ifdef CONFIG_SYSCTL
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
-
+#endif
static int endflag __initdata = 0;

#ifdef CONFIG_SMP

2007-09-21 20:48:49

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [13/45] x86_64: Increase VDSO_TEXT_OFFSET for ancient binutils


For some reason old binutils genertate larger headers so
increase the text offset of the vdso to avoid linker errors.

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86_64/vdso/voffset.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux/arch/x86_64/vdso/voffset.h
===================================================================
--- linux.orig/arch/x86_64/vdso/voffset.h
+++ linux/arch/x86_64/vdso/voffset.h
@@ -1 +1 @@
-#define VDSO_TEXT_OFFSET 0x500
+#define VDSO_TEXT_OFFSET 0x600

2007-09-21 20:49:08

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [10/45] x86_64: x86_64 Sanitize user specified e820 memmap values


From: Vladimir Berezniker <[email protected]>

Sanitize user specified e820 memory ranges, using the same logic that
is applied to the values returned by the BIOS. This ensures
consistent handling regardless of the source of the memory mappings.

Allows overriding portions of the memory map without specifying one in
it's entirety (memmap=exactmap).

E.g. marking a range of bad RAM as reserved with memmap=48M$528M

BIOS supplied range

BIOS-e820: 0000000000100000 - 000000007fe80000 (usable)

becomes

user: 0000000000100000 - 0000000021000000 (usable)
user: 0000000021000000 - 0000000024000000 (reserved)
user: 0000000024000000 - 000000007fe80000 (usable)

Previously this did not work, as the original BIOS range was left
untouched while the user defined range was appended to the end of the
memory map.

Signed-off-by: Vladimir Berezniker <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>


---
arch/x86_64/kernel/e820.c | 7 +++++++
1 file changed, 7 insertions(+)

Index: linux/arch/x86_64/kernel/e820.c
===================================================================
--- linux.orig/arch/x86_64/kernel/e820.c
+++ linux/arch/x86_64/kernel/e820.c
@@ -639,6 +639,8 @@ static int __init parse_memmap_opt(char
mem_size = memparse(p, &p);
if (p == oldp)
return -EINVAL;
+
+ userdef = 1;
if (*p == '@') {
start_at = memparse(p+1, &p);
add_memory_region(start_at, mem_size, E820_RAM);
@@ -658,6 +660,11 @@ early_param("memmap", parse_memmap_opt);
void __init finish_e820_parsing(void)
{
if (userdef) {
+ char nr = e820.nr_map;
+ if (sanitize_e820_map(e820.map, &nr) < 0)
+ early_panic("Invalid user supplied memory map");
+ e820.nr_map = nr;
+
printk(KERN_INFO "user-defined physical RAM map:\n");
e820_print_map("user");
}

2007-09-21 20:49:33

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [14/45] x86: Create clflush() inline, remove hardcoded wbinvd


From: "H. Peter Anvin" <[email protected]>
Create an inline function for clflush(), with the proper arguments,
and use it instead of hard-coding the instruction.

This also removes one instance of hard-coded wbinvd, based on a patch
by Bauder de Oliveira Costa.

Cc: Andi Kleen <[email protected]>
Cc: Glauber de Oliveira Costa <[email protected]>
Signed-off-by: H. Peter Anvin <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---
arch/i386/mm/pageattr.c | 4 ++--
arch/x86_64/kernel/tce.c | 4 ++--
arch/x86_64/mm/pageattr.c | 2 +-
drivers/char/agp/efficeon-agp.c | 11 ++++++-----
include/asm-i386/system.h | 4 ++++
include/asm-x86_64/system.h | 5 +++++
6 files changed, 20 insertions(+), 10 deletions(-)

Index: linux/arch/i386/mm/pageattr.c
===================================================================
--- linux.orig/arch/i386/mm/pageattr.c
+++ linux/arch/i386/mm/pageattr.c
@@ -70,10 +70,10 @@ static struct page *split_large_page(uns

static void cache_flush_page(struct page *p)
{
- unsigned long adr = (unsigned long)page_address(p);
+ void *adr = page_address(p);
int i;
for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
- asm volatile("clflush (%0)" :: "r" (adr + i));
+ clflush(adr+i);
}

static void flush_kernel_map(void *arg)
Index: linux/arch/x86_64/kernel/tce.c
===================================================================
--- linux.orig/arch/x86_64/kernel/tce.c
+++ linux/arch/x86_64/kernel/tce.c
@@ -40,9 +40,9 @@ static inline void flush_tce(void* tcead
{
/* a single tce can't cross a cache line */
if (cpu_has_clflush)
- asm volatile("clflush (%0)" :: "r" (tceaddr));
+ clflush(tceaddr);
else
- asm volatile("wbinvd":::"memory");
+ wbinvd();
}

void tce_build(struct iommu_table *tbl, unsigned long index,
Index: linux/arch/x86_64/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86_64/mm/pageattr.c
+++ linux/arch/x86_64/mm/pageattr.c
@@ -65,7 +65,7 @@ static void cache_flush_page(void *adr)
{
int i;
for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
- asm volatile("clflush (%0)" :: "r" (adr + i));
+ clflush(adr+i);
}

static void flush_kernel_map(void *arg)
Index: linux/drivers/char/agp/efficeon-agp.c
===================================================================
--- linux.orig/drivers/char/agp/efficeon-agp.c
+++ linux/drivers/char/agp/efficeon-agp.c
@@ -221,7 +221,7 @@ static int efficeon_create_gatt_table(st
SetPageReserved(virt_to_page((char *)page));

for (offset = 0; offset < PAGE_SIZE; offset += clflush_chunk)
- asm volatile("clflush %0" : : "m" (*(char *)(page+offset)));
+ clflush((char *)page+offset);

efficeon_private.l1_table[index] = page;

@@ -268,15 +268,16 @@ static int efficeon_insert_memory(struct
*page = insert;

/* clflush is slow, so don't clflush until we have to */
- if ( last_page &&
- ((unsigned long)page^(unsigned long)last_page) & clflush_mask )
- asm volatile("clflush %0" : : "m" (*last_page));
+ if (last_page &&
+ (((unsigned long)page^(unsigned long)last_page) &
+ clflush_mask))
+ clflush(last_page);

last_page = page;
}

if ( last_page )
- asm volatile("clflush %0" : : "m" (*last_page));
+ clflush(last_page);

agp_bridge->driver->tlb_flush(mem);
return 0;
Index: linux/include/asm-i386/system.h
===================================================================
--- linux.orig/include/asm-i386/system.h
+++ linux/include/asm-i386/system.h
@@ -160,6 +160,10 @@ static inline void native_wbinvd(void)
asm volatile("wbinvd": : :"memory");
}

+static inline void clflush(volatile void *__p)
+{
+ asm volatile("clflush %0" : "+m" (*(char __force *)__p));
+}

#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
Index: linux/include/asm-x86_64/system.h
===================================================================
--- linux.orig/include/asm-x86_64/system.h
+++ linux/include/asm-x86_64/system.h
@@ -137,6 +137,11 @@ static inline void write_cr8(unsigned lo

#endif /* __KERNEL__ */

+static inline void clflush(volatile void *__p)
+{
+ asm volatile("clflush %0" : "+m" (*(char __force *)__p));
+}
+
#define nop() __asm__ __volatile__ ("nop")

#ifdef CONFIG_SMP

2007-09-21 20:49:50

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [15/45] i386: i386 add AMD64 Barcelona PMU MSR definitions to msr.h


From: Stephane Eranian <[email protected]>

[i386] add AMD Barcelona PMU MSR definitions

AK: Not used right now, but will presumably at some point.

Signed-off-by: Stephane Eranian <[email protected]>
Signed-off-by: Robert Richter <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---
include/asm-i386/msr-index.h | 36 +++++++++++++++++++++++++-----------
1 file changed, 25 insertions(+), 11 deletions(-)

Index: linux/include/asm-i386/msr-index.h
===================================================================
--- linux.orig/include/asm-i386/msr-index.h
+++ linux/include/asm-i386/msr-index.h
@@ -73,8 +73,32 @@
#define MSR_P6_EVNTSEL0 0x00000186
#define MSR_P6_EVNTSEL1 0x00000187

-/* K7/K8 MSRs. Not complete. See the architecture manual for a more
+/* AMD64 MSRs. Not complete. See the architecture manual for a more
complete list. */
+
+#define MSR_AMD64_IBSFETCHCTL 0xc0011030
+#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
+#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
+#define MSR_AMD64_IBSOPCTL 0xc0011033
+#define MSR_AMD64_IBSOPRIP 0xc0011034
+#define MSR_AMD64_IBSOPDATA 0xc0011035
+#define MSR_AMD64_IBSOPDATA2 0xc0011036
+#define MSR_AMD64_IBSOPDATA3 0xc0011037
+#define MSR_AMD64_IBSDCLINAD 0xc0011038
+#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
+#define MSR_AMD64_IBSCTL 0xc001103a
+
+/* K8 MSRs */
+#define MSR_K8_TOP_MEM1 0xc001001a
+#define MSR_K8_TOP_MEM2 0xc001001d
+#define MSR_K8_SYSCFG 0xc0010010
+#define MSR_K8_HWCR 0xc0010015
+#define MSR_K8_ENABLE_C1E 0xc0010055
+#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
+#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
+#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
+
+/* K7 MSRs */
#define MSR_K7_EVNTSEL0 0xc0010000
#define MSR_K7_PERFCTR0 0xc0010004
#define MSR_K7_EVNTSEL1 0xc0010001
@@ -83,20 +107,10 @@
#define MSR_K7_PERFCTR2 0xc0010006
#define MSR_K7_EVNTSEL3 0xc0010003
#define MSR_K7_PERFCTR3 0xc0010007
-#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K7_CLK_CTL 0xc001001b
-#define MSR_K8_TOP_MEM2 0xc001001d
-#define MSR_K8_SYSCFG 0xc0010010
-
-#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
-#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
-#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
-
#define MSR_K7_HWCR 0xc0010015
-#define MSR_K8_HWCR 0xc0010015
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
-#define MSR_K8_ENABLE_C1E 0xc0010055

/* K6 MSRs */
#define MSR_K6_EFER 0xc0000080

2007-09-21 20:50:19

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [16/45] i386: do not BUG_ON() when MSR is unknown


From: Stephane Eranian <[email protected]>

Here is a small patch to change the behavior of the PMU msr allocator
to avoid BUG_ON() when the MSR is unknwon. Instead, it now returns
ok, which means "I do not manage". The current allocator is not
yet managing the full set of PMU registers (e.g., GLOBAL_* on Core 2).

[watchdog] do not BUG_ON() in the MSR allocator if MSR is unknown, return ok
instead

Signed-off-by: Stephane Eranian <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---
arch/i386/kernel/cpu/perfctr-watchdog.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)

Index: linux/arch/i386/kernel/cpu/perfctr-watchdog.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/perfctr-watchdog.c
+++ linux/arch/i386/kernel/cpu/perfctr-watchdog.c
@@ -120,7 +120,9 @@ int reserve_perfctr_nmi(unsigned int msr
unsigned int counter;

counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+ /* register not managed by the allocator? */
+ if (counter > NMI_MAX_COUNTER_BITS)
+ return 1;

if (!test_and_set_bit(counter, perfctr_nmi_owner))
return 1;
@@ -132,7 +134,9 @@ void release_perfctr_nmi(unsigned int ms
unsigned int counter;

counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+ /* register not managed by the allocator? */
+ if (counter > NMI_MAX_COUNTER_BITS)
+ return;

clear_bit(counter, perfctr_nmi_owner);
}
@@ -142,7 +146,9 @@ int reserve_evntsel_nmi(unsigned int msr
unsigned int counter;

counter = nmi_evntsel_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+ /* register not managed by the allocator? */
+ if (counter > NMI_MAX_COUNTER_BITS)
+ return 1;

if (!test_and_set_bit(counter, evntsel_nmi_owner))
return 1;
@@ -154,7 +160,9 @@ void release_evntsel_nmi(unsigned int ms
unsigned int counter;

counter = nmi_evntsel_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+ /* register not managed by the allocator? */
+ if (counter > NMI_MAX_COUNTER_BITS)
+ return;

clear_bit(counter, evntsel_nmi_owner);
}

2007-09-21 20:50:46

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [17/45] i386: make Oprofile call shutdown() only once per session


From: Stephane Eranian <[email protected]>

Oprofile: call model->shutdown() only once to avoid calling release_ev*()
multiple times

Signed-off-by: Stephane Eranian <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---
arch/i386/oprofile/nmi_int.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux/arch/i386/oprofile/nmi_int.c
===================================================================
--- linux.orig/arch/i386/oprofile/nmi_int.c
+++ linux/arch/i386/oprofile/nmi_int.c
@@ -269,7 +269,6 @@ static void nmi_cpu_shutdown(void * dumm
apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
apic_write(APIC_LVTERR, v);
nmi_restore_registers(msrs);
- model->shutdown(msrs);
}


@@ -278,6 +277,7 @@ static void nmi_shutdown(void)
nmi_enabled = 0;
on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
unregister_die_notifier(&profile_exceptions_nb);
+ model->shutdown(cpu_msrs);
free_msrs();
}

2007-09-21 20:51:00

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [18/45] x86_64: 0 -> NULL, for arch/x86_64


From: Yoann Padioleau <[email protected]>

When comparing a pointer, it's clearer to compare it to NULL than to 0.

Signed-off-by: Yoann Padioleau <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
---

arch/x86_64/mm/numa.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux/arch/x86_64/mm/numa.c
===================================================================
--- linux.orig/arch/x86_64/mm/numa.c
+++ linux/arch/x86_64/mm/numa.c
@@ -166,7 +166,7 @@ early_node_mem(int nodeid, unsigned long
return __va(mem);
ptr = __alloc_bootmem_nopanic(size,
SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS));
- if (ptr == 0) {
+ if (ptr == NULL) {
printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
size, nodeid);
return NULL;

2007-09-21 20:51:27

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [20/45] x86_64: Use 8 byte stack alignment when possible


Kernel doesn't use SSE2, so it doesn't need 16 byte alignment. Also
the stack can be already unaligned so letting the compiler align
is useless. This may make some stack frames smaller.
Only works with very recent gcc 4.3
Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86_64/Makefile | 2 ++
1 file changed, 2 insertions(+)

Index: linux/arch/x86_64/Makefile
===================================================================
--- linux.orig/arch/x86_64/Makefile
+++ linux/arch/x86_64/Makefile
@@ -49,6 +49,8 @@ CFLAGS += $(call cc-option,-mno-sse -mno
# newer gccs do it by default
CFLAGS += -maccumulate-outgoing-args

+CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
+
# do binutils support CFI?
CFLAGS += $(call as-instr,.cfi_startproc\n.cfi_rel_offset rsp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_rel_offset rsp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)

2007-09-21 20:51:40

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [21/45] x86_64: Some cleanups for pci gart code


- Mark function static
- Clarify license

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86_64/kernel/pci-gart.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

Index: linux/arch/x86_64/kernel/pci-gart.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-gart.c
+++ linux/arch/x86_64/kernel/pci-gart.c
@@ -8,6 +8,7 @@
* See Documentation/DMA-mapping.txt for the interface specification.
*
* Copyright 2002 Andi Kleen, SuSE Labs.
+ * Subject to the GNU General Public License v2 only.
*/

#include <linux/types.h>
@@ -374,7 +375,8 @@ static inline int dma_map_cont(struct sc
* DMA map all entries in a scatterlist.
* Merge chunks that have page aligned sizes into a continuous mapping.
*/
-int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
+static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ int dir)
{
int i;
int out;

2007-09-21 20:51:55

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [19/45] x86_64: Always accumulate compiler options in CFLAGS


This way they are used in further down cc-options tries
This avoids a problem with a probe not failing without -m64.
Analysis of original bug by Andy Whitcroft

TBD do for i386 too

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86_64/Makefile | 62 +++++++++++++++++++++++++++------------------------
1 file changed, 33 insertions(+), 29 deletions(-)

Index: linux/arch/x86_64/Makefile
===================================================================
--- linux.orig/arch/x86_64/Makefile
+++ linux/arch/x86_64/Makefile
@@ -26,50 +26,54 @@ OBJCOPYFLAGS := -O binary -R .note -R .c
LDFLAGS_vmlinux :=
CHECKFLAGS += -D__x86_64__ -m64

-cflags-y :=
-cflags-kernel-y :=
-cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
-cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
-# gcc doesn't support -march=core2 yet as of gcc 4.3, but I hope it
-# will eventually. Use -mtune=generic as fallback
-cflags-$(CONFIG_MCORE2) += \
- $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
-cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
-
-cflags-y += -m64
-cflags-y += -mno-red-zone
-cflags-y += -mcmodel=kernel
-cflags-y += -pipe
-cflags-y += -Wno-sign-compare
-cflags-y += -fno-asynchronous-unwind-tables
-ifneq ($(CONFIG_DEBUG_INFO),y)
+AFLAGS += -m64
+CFLAGS += -m64
+CFLAGS += -mno-red-zone
+CFLAGS += -mcmodel=kernel
+CFLAGS += -pipe
+CFLAGS += -Wno-sign-compare
+CFLAGS += -fno-asynchronous-unwind-tables
+#ifneq ($(CONFIG_DEBUG_INFO),y)
# -fweb shrinks the kernel a bit, but the difference is very small
# it also messes up debugging, so don't use it for now.
-#cflags-y += $(call cc-option,-fweb)
-endif
+#CFLAGS += $(call cc-option,-fweb)
+#endif
# -funit-at-a-time shrinks the kernel .text considerably
# unfortunately it makes reading oopses harder.
-cflags-y += $(call cc-option,-funit-at-a-time)
+CFLAGS += $(call cc-option,-funit-at-a-time)
# prevent gcc from generating any FP code by mistake
-cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
+CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
# this works around some issues with generating unwind tables in older gccs
# newer gccs do it by default
-cflags-y += -maccumulate-outgoing-args
+CFLAGS += -maccumulate-outgoing-args

# do binutils support CFI?
-cflags-y += $(call as-instr,.cfi_startproc\n.cfi_rel_offset rsp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
+CFLAGS += $(call as-instr,.cfi_startproc\n.cfi_rel_offset rsp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_rel_offset rsp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)

# is .cfi_signal_frame supported too?
-cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
+CFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
+CFLAGS_KERNEL += $(cflags-kernel-y)

-cflags-$(CONFIG_CC_STACKPROTECTOR) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector )
-cflags-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector-all )
+# CFLAGS is used by cc-option, so if combinations fail we notice

-CFLAGS += $(cflags-y)
-CFLAGS_KERNEL += $(cflags-kernel-y)
-AFLAGS += -m64
+cflags-k8-$(CONFIG_MK8) += $(call cc-option,-march=k8)
+CFLAGS += $(cflags-k8-y)
+
+cflags-nocona-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
+CFLAGS += $(cflags-nocona-y)
+# gcc doesn't support -march=core2 yet as of gcc 4.3, but I hope it
+# will eventually. Use -mtune=generic as fallback
+cflags-core2-$(CONFIG_MCORE2) += \
+ $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
+CFLAGS += $(cflags-core2-y)
+cflags-generic-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
+CFLAGS += $(cflags-generic-y)
+
+cflags-sp-$(CONFIG_CC_STACKPROTECTOR) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector )
+cflags-sp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector-all )
+CFLAGS += $(cflags-sp-y)

head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o

2007-09-21 20:52:20

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [22/45] x86_64: Enable iommu_merge by default


Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86_64/kernel/pci-dma.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux/arch/x86_64/kernel/pci-dma.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pci-dma.c
+++ linux/arch/x86_64/kernel/pci-dma.c
@@ -11,7 +11,7 @@
#include <asm/iommu.h>
#include <asm/calgary.h>

-int iommu_merge __read_mostly = 0;
+int iommu_merge __read_mostly = 1;
EXPORT_SYMBOL(iommu_merge);

dma_addr_t bad_dma_address __read_mostly;

2007-09-21 20:52:36

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [23/45] i386: Make callgraph use dump_trace() on i386/x86_64


From: [email protected]
This patch improves oprofile callgraphs for i386/x86_64. The old backtracing
code was unable to produce even kernel backtraces if the kernel wasn't
compiled with framepointers. The code now uses dump_trace().

Signed-off-by: Jan Blunck <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---
arch/i386/oprofile/backtrace.c | 104 ++++++++++++++---------------------------
1 file changed, 38 insertions(+), 66 deletions(-)

Index: linux/arch/i386/oprofile/backtrace.c
===================================================================
--- linux.orig/arch/i386/oprofile/backtrace.c
+++ linux/arch/i386/oprofile/backtrace.c
@@ -13,25 +13,45 @@
#include <linux/mm.h>
#include <asm/ptrace.h>
#include <asm/uaccess.h>
+#include <asm/stacktrace.h>

-struct frame_head {
- struct frame_head * ebp;
- unsigned long ret;
-} __attribute__((packed));
+static void backtrace_warning_symbol(void *data, char *msg,
+ unsigned long symbol)
+{
+ /* Ignore warnings */
+}

-static struct frame_head *
-dump_kernel_backtrace(struct frame_head * head)
+static void backtrace_warning(void *data, char *msg)
{
- oprofile_add_trace(head->ret);
+ /* Ignore warnings */
+}

- /* frame pointers should strictly progress back up the stack
- * (towards higher addresses) */
- if (head >= head->ebp)
- return NULL;
+static int backtrace_stack(void *data, char *name)
+{
+ /* Yes, we want all stacks */
+ return 0;
+}

- return head->ebp;
+static void backtrace_address(void *data, unsigned long addr)
+{
+ unsigned int *depth = data;
+
+ if ((*depth)--)
+ oprofile_add_trace(addr);
}

+static struct stacktrace_ops backtrace_ops = {
+ .warning = backtrace_warning,
+ .warning_symbol = backtrace_warning_symbol,
+ .stack = backtrace_stack,
+ .address = backtrace_address,
+};
+
+struct frame_head {
+ struct frame_head * ebp;
+ unsigned long ret;
+} __attribute__((packed));
+
static struct frame_head *
dump_user_backtrace(struct frame_head * head)
{
@@ -53,72 +73,24 @@ dump_user_backtrace(struct frame_head *
return bufhead[0].ebp;
}

-/*
- * | | /\ Higher addresses
- * | |
- * --------------- stack base (address of current_thread_info)
- * | thread info |
- * . .
- * | stack |
- * --------------- saved regs->ebp value if valid (frame_head address)
- * . .
- * --------------- saved regs->rsp value if x86_64
- * | |
- * --------------- struct pt_regs * stored on stack if 32-bit
- * | |
- * . .
- * | |
- * --------------- %esp
- * | |
- * | | \/ Lower addresses
- *
- * Thus, regs (or regs->rsp for x86_64) <-> stack base restricts the
- * valid(ish) ebp values. Note: (1) for x86_64, NMI and several other
- * exceptions use special stacks, maintained by the interrupt stack table
- * (IST). These stacks are set up in trap_init() in
- * arch/x86_64/kernel/traps.c. Thus, for x86_64, regs now does not point
- * to the kernel stack; instead, it points to some location on the NMI
- * stack. On the other hand, regs->rsp is the stack pointer saved when the
- * NMI occurred. (2) For 32-bit, regs->esp is not valid because the
- * processor does not save %esp on the kernel stack when interrupts occur
- * in the kernel mode.
- */
-#ifdef CONFIG_FRAME_POINTER
-static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
-{
- unsigned long headaddr = (unsigned long)head;
-#ifdef CONFIG_X86_64
- unsigned long stack = (unsigned long)regs->rsp;
-#else
- unsigned long stack = (unsigned long)regs;
-#endif
- unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE;
-
- return headaddr > stack && headaddr < stack_base;
-}
-#else
-/* without fp, it's just junk */
-static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
-{
- return 0;
-}
-#endif
-
-
void
x86_backtrace(struct pt_regs * const regs, unsigned int depth)
{
struct frame_head *head;
+ unsigned long stack;

#ifdef CONFIG_X86_64
head = (struct frame_head *)regs->rbp;
+ stack = regs->rsp;
#else
head = (struct frame_head *)regs->ebp;
+ stack = regs->esp;
#endif

if (!user_mode_vm(regs)) {
- while (depth-- && valid_kernel_stack(head, regs))
- head = dump_kernel_backtrace(head);
+ if (depth)
+ dump_trace(NULL, regs, (unsigned long *)stack,
+ &backtrace_ops, &depth);
return;
}

2007-09-21 20:52:51

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [24/45] x86: Introduce frame_pointer() and stack_pointer()


From: [email protected]
This patch defines frame_pointer() and stack_pointer() similar to the
already defined instruction_pointer(). Thus the oprofile code can be written
in a more readable fashion.

Signed-off-by: Jan Blunck <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---
arch/i386/oprofile/backtrace.c | 12 ++----------
include/asm-i386/ptrace.h | 2 ++
include/asm-x86_64/ptrace.h | 2 ++
3 files changed, 6 insertions(+), 10 deletions(-)

Index: linux/arch/i386/oprofile/backtrace.c
===================================================================
--- linux.orig/arch/i386/oprofile/backtrace.c
+++ linux/arch/i386/oprofile/backtrace.c
@@ -76,16 +76,8 @@ dump_user_backtrace(struct frame_head *
void
x86_backtrace(struct pt_regs * const regs, unsigned int depth)
{
- struct frame_head *head;
- unsigned long stack;
-
-#ifdef CONFIG_X86_64
- head = (struct frame_head *)regs->rbp;
- stack = regs->rsp;
-#else
- head = (struct frame_head *)regs->ebp;
- stack = regs->esp;
-#endif
+ struct frame_head *head = (struct frame_head *)frame_pointer(regs);
+ unsigned long stack = stack_pointer(regs);

if (!user_mode_vm(regs)) {
if (depth)
Index: linux/include/asm-i386/ptrace.h
===================================================================
--- linux.orig/include/asm-i386/ptrace.h
+++ linux/include/asm-i386/ptrace.h
@@ -55,6 +55,8 @@ static inline int v8086_mode(struct pt_r
}

#define instruction_pointer(regs) ((regs)->eip)
+#define frame_pointer(regs) ((regs)->ebp)
+#define stack_pointer(regs) ((regs)->esp)
#define regs_return_value(regs) ((regs)->eax)

extern unsigned long profile_pc(struct pt_regs *regs);
Index: linux/include/asm-x86_64/ptrace.h
===================================================================
--- linux.orig/include/asm-x86_64/ptrace.h
+++ linux/include/asm-x86_64/ptrace.h
@@ -40,6 +40,8 @@ struct pt_regs {
#define user_mode(regs) (!!((regs)->cs & 3))
#define user_mode_vm(regs) user_mode(regs)
#define instruction_pointer(regs) ((regs)->rip)
+#define frame_pointer(regs) ((regs)->rbp)
+#define stack_pointer(regs) ((regs)->rsp)
#define regs_return_value(regs) ((regs)->rax)

extern unsigned long profile_pc(struct pt_regs *regs);

2007-09-21 20:53:14

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [25/45] x86_64: remove sync_Arb_IDs


From: Yinghai Lu <[email protected]>
[PATCH] x86_64: remove sync_Arb_IDs

i386 code said
/*
* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
* needed on AMD.
*/
So we don't need sync_Arb_IDs for x86_64...

Signed-off-by: Yinghai Lu <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86_64/kernel/apic.c | 17 -----------------
arch/x86_64/kernel/io_apic.c | 1 -
2 files changed, 18 deletions(-)

Index: linux/arch/x86_64/kernel/apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/apic.c
+++ linux/arch/x86_64/kernel/apic.c
@@ -283,23 +283,6 @@ int __init verify_local_APIC(void)
return 1;
}

-void __init sync_Arb_IDs(void)
-{
- /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
- unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
- if (ver >= 0x14) /* P4 or higher */
- return;
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
- apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
- | APIC_DM_INIT);
-}
-
/*
* An initial setup of the virtual wire mode.
*/
Index: linux/arch/x86_64/kernel/io_apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/io_apic.c
+++ linux/arch/x86_64/kernel/io_apic.c
@@ -1784,7 +1784,6 @@ void __init setup_IO_APIC(void)

apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");

- sync_Arb_IDs();
setup_IO_APIC_irqs();
init_IO_APIC_traps();
check_timer();

2007-09-21 20:53:34

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [26/45] x86_64: clear IO_APIC before enabing apic error vector.


From: Yinghai Lu <[email protected]>
[PATCH] x86_64: clear IO_APIC before enabing apic error vector.

some apic id lifting system: 4 socket quad core, 8 socket quad core will do apic id lifting for BSP.

but io-apic regs for ExtINT still use 0 as dest.

so when we enable apic error vector in BSP, we will get one APIC error.

CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 512K (64 bytes/line)
CPU 0/4 -> Node 0
CPU: Physical Processor ID: 1
CPU: Processor Core ID: 0
SMP alternatives: switching to UP code
ACPI: Core revision 20070126
enabled ExtINT on CPU#0
ESR value after enabling vector: 00000000, after 0000000c
APIC error on CPU0: 0c(08)
ENABLING IO-APIC IRQs
Synchronizing Arb IDs.

So move enable_IO_APIC from setup_IO_APIC into setup_local_APIC and call it
before enabling apic error vector.

Signed-off-by: Yinghai Lu <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86_64/kernel/apic.c | 8 ++++++++
arch/x86_64/kernel/io_apic.c | 7 +++++--
include/asm-x86_64/hw_irq.h | 1 +
3 files changed, 14 insertions(+), 2 deletions(-)

Index: linux/arch/x86_64/kernel/apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/apic.c
+++ linux/arch/x86_64/kernel/apic.c
@@ -419,6 +419,14 @@ void __cpuinit setup_local_APIC (void)
value = APIC_DM_NMI | APIC_LVT_MASKED;
apic_write(APIC_LVT1, value);

+ /*
+ * Now enable IO-APICs, actually call clear_IO_APIC
+ * We need clear_IO_APIC before enabling vector on BP
+ */
+ if (!smp_processor_id())
+ if (!skip_ioapic_setup && nr_ioapics)
+ enable_IO_APIC();
+
{
unsigned oldvalue;
maxlvt = get_maxlvt();
Index: linux/arch/x86_64/kernel/io_apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/io_apic.c
+++ linux/arch/x86_64/kernel/io_apic.c
@@ -1166,7 +1166,7 @@ void __apicdebuginit print_PIC(void)

#endif /* 0 */

-static void __init enable_IO_APIC(void)
+void __init enable_IO_APIC(void)
{
union IO_APIC_reg_01 reg_01;
int i8259_apic, i8259_pin;
@@ -1775,7 +1775,10 @@ __setup("no_timer_check", notimercheck);

void __init setup_IO_APIC(void)
{
- enable_IO_APIC();
+
+ /*
+ * calling enable_IO_APIC() is moved to setup_local_APIC for BP
+ */

if (acpi_ioapic)
io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
Index: linux/include/asm-x86_64/hw_irq.h
===================================================================
--- linux.orig/include/asm-x86_64/hw_irq.h
+++ linux/include/asm-x86_64/hw_irq.h
@@ -135,6 +135,7 @@ extern void init_8259A(int aeoi);
extern void send_IPI_self(int vector);
extern void init_VISWS_APIC_irqs(void);
extern void setup_IO_APIC(void);
+extern void enable_IO_APIC(void);
extern void disable_IO_APIC(void);
extern void print_IO_APIC(void);
extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);

2007-09-21 20:53:50

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [27/45] x86_64: convert mm_context_t semaphore to a mutex


From: "Luiz Fernando N. Capitulino" <[email protected]>

Signed-off-by: Luiz Fernando N. Capitulino <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86_64/kernel/ldt.c | 14 +++++++-------
arch/x86_64/kernel/ptrace.c | 4 ++--
include/asm-x86_64/mmu.h | 4 ++--
3 files changed, 11 insertions(+), 11 deletions(-)

Index: linux/arch/x86_64/kernel/ldt.c
===================================================================
--- linux.orig/arch/x86_64/kernel/ldt.c
+++ linux/arch/x86_64/kernel/ldt.c
@@ -98,13 +98,13 @@ int init_new_context(struct task_struct
struct mm_struct * old_mm;
int retval = 0;

- init_MUTEX(&mm->context.sem);
+ mutex_init(&mm->context.lock);
mm->context.size = 0;
old_mm = current->mm;
if (old_mm && old_mm->context.size > 0) {
- down(&old_mm->context.sem);
+ mutex_lock(&old_mm->context.lock);
retval = copy_ldt(&mm->context, &old_mm->context);
- up(&old_mm->context.sem);
+ mutex_unlock(&old_mm->context.lock);
}
return retval;
}
@@ -135,7 +135,7 @@ static int read_ldt(void __user * ptr, u
if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;

- down(&mm->context.sem);
+ mutex_lock(&mm->context.lock);
size = mm->context.size*LDT_ENTRY_SIZE;
if (size > bytecount)
size = bytecount;
@@ -143,7 +143,7 @@ static int read_ldt(void __user * ptr, u
err = 0;
if (copy_to_user(ptr, mm->context.ldt, size))
err = -EFAULT;
- up(&mm->context.sem);
+ mutex_unlock(&mm->context.lock);
if (err < 0)
goto error_return;
if (size != bytecount) {
@@ -195,7 +195,7 @@ static int write_ldt(void __user * ptr,
goto out;
}

- down(&mm->context.sem);
+ mutex_lock(&mm->context.lock);
if (ldt_info.entry_number >= (unsigned)mm->context.size) {
error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
if (error < 0)
@@ -225,7 +225,7 @@ install:
error = 0;

out_unlock:
- up(&mm->context.sem);
+ mutex_unlock(&mm->context.lock);
out:
return error;
}
Index: linux/arch/x86_64/kernel/ptrace.c
===================================================================
--- linux.orig/arch/x86_64/kernel/ptrace.c
+++ linux/arch/x86_64/kernel/ptrace.c
@@ -104,7 +104,7 @@ unsigned long convert_rip_to_linear(stru

seg &= ~7UL;

- down(&child->mm->context.sem);
+ mutex_lock(&child->mm->context.lock);
if (unlikely((seg >> 3) >= child->mm->context.size))
addr = -1L; /* bogus selector, access would fault */
else {
@@ -118,7 +118,7 @@ unsigned long convert_rip_to_linear(stru
addr &= 0xffff;
addr += base;
}
- up(&child->mm->context.sem);
+ mutex_unlock(&child->mm->context.lock);
}

return addr;
Index: linux/include/asm-x86_64/mmu.h
===================================================================
--- linux.orig/include/asm-x86_64/mmu.h
+++ linux/include/asm-x86_64/mmu.h
@@ -2,7 +2,7 @@
#define __x86_64_MMU_H

#include <linux/spinlock.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>

/*
* The x86_64 doesn't have a mmu context, but
@@ -14,7 +14,7 @@ typedef struct {
void *ldt;
rwlock_t ldtlock;
int size;
- struct semaphore sem;
+ struct mutex lock;
void *vdso;
} mm_context_t;

2007-09-21 20:54:14

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [28/45] x86_64: clean up apicid_to_node declaration


From: Andrew Morton <[email protected]>

Use the correct #define in the declaration of apicid_to_node[], to match the
definition.

Cc: Andi Kleen <[email protected]>
Cc: David Rientjes <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---

include/asm-x86_64/numa.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

Index: linux/include/asm-x86_64/numa.h
===================================================================
--- linux.orig/include/asm-x86_64/numa.h
+++ linux/include/asm-x86_64/numa.h
@@ -2,6 +2,7 @@
#define _ASM_X8664_NUMA_H 1

#include <linux/nodemask.h>
+#include <asm/apicdef.h>

struct bootnode {
u64 start,end;
@@ -19,7 +20,7 @@ extern void numa_set_node(int cpu, int n
extern void srat_reserve_add_area(int nodeid);
extern int hotadd_percent;

-extern unsigned char apicid_to_node[256];
+extern unsigned char apicid_to_node[MAX_LOCAL_APIC];
#ifdef CONFIG_NUMA
extern void __init init_cpu_to_node(void);

2007-09-21 20:54:31

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [29/45] i386: Consolidate show_regs and show_registers for i386


From: Pavel Emelyanov <[email protected]>
Both functions printk the same information, except for CRx and
debug registers in the show_registers() one and a bit different
manner. So move the common code into one place. This is already
done for x86_64, so I think it's worth having the same on i386.

This saves 100 bytes of .rodata section :) ...
but only 8 from .text :(

Signed-off-by: Pavel Emelyanov <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---

arch/i386/kernel/process.c | 56 ++++++++++++++++++++++++++++++++-------------
arch/i386/kernel/traps.c | 32 +++----------------------
include/asm-i386/system.h | 1
3 files changed, 45 insertions(+), 44 deletions(-)

Index: linux/arch/i386/kernel/process.c
===================================================================
--- linux.orig/arch/i386/kernel/process.c
+++ linux/arch/i386/kernel/process.c
@@ -297,34 +297,52 @@ static int __init idle_setup(char *str)
}
early_param("idle", idle_setup);

-void show_regs(struct pt_regs * regs)
+void __show_registers(struct pt_regs *regs, int all)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
unsigned long d0, d1, d2, d3, d6, d7;
+ unsigned long esp;
+ unsigned short ss, gs;
+
+ if (user_mode_vm(regs)) {
+ esp = regs->esp;
+ ss = regs->xss & 0xffff;
+ savesegment(gs, gs);
+ } else {
+ esp = (unsigned long) (&regs->esp);
+ savesegment(ss, ss);
+ savesegment(gs, gs);
+ }

printk("\n");
- printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
- printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
+ printk("Pid: %d, comm: %.*s %s (%s %.*s)\n",
+ current->pid, TASK_COMM_LEN, current->comm,
+ print_tainted(), init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
+
+ printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
+ 0xffff & regs->xcs, regs->eip, regs->eflags,
+ smp_processor_id());
print_symbol("EIP is at %s\n", regs->eip);

- if (user_mode_vm(regs))
- printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
- printk(" EFLAGS: %08lx %s (%s %.*s)\n",
- regs->eflags, print_tainted(), init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
- regs->eax,regs->ebx,regs->ecx,regs->edx);
- printk("ESI: %08lx EDI: %08lx EBP: %08lx",
- regs->esi, regs->edi, regs->ebp);
- printk(" DS: %04x ES: %04x FS: %04x\n",
- 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs);
+ regs->eax, regs->ebx, regs->ecx, regs->edx);
+ printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
+ regs->esi, regs->edi, regs->ebp, esp);
+ printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
+ regs->xds & 0xffff, regs->xes & 0xffff,
+ regs->xfs & 0xffff, gs, ss);
+
+ if (!all)
+ return;

cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
cr4 = read_cr4_safe();
- printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
+ printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
+ cr0, cr2, cr3, cr4);

get_debugreg(d0, 0);
get_debugreg(d1, 1);
@@ -332,10 +350,16 @@ void show_regs(struct pt_regs * regs)
get_debugreg(d3, 3);
printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
d0, d1, d2, d3);
+
get_debugreg(d6, 6);
get_debugreg(d7, 7);
- printk("DR6: %08lx DR7: %08lx\n", d6, d7);
+ printk("DR6: %08lx DR7: %08lx\n",
+ d6, d7);
+}

+void show_regs(struct pt_regs * regs)
+{
+ __show_registers(regs, 1);
show_trace(NULL, regs, &regs->esp);
}

Index: linux/arch/i386/kernel/traps.c
===================================================================
--- linux.orig/arch/i386/kernel/traps.c
+++ linux/arch/i386/kernel/traps.c
@@ -358,33 +358,9 @@ EXPORT_SYMBOL(dump_stack);
void show_registers(struct pt_regs *regs)
{
int i;
- int in_kernel = 1;
- unsigned long esp;
- unsigned short ss, gs;
-
- esp = (unsigned long) (&regs->esp);
- savesegment(ss, ss);
- savesegment(gs, gs);
- if (user_mode_vm(regs)) {
- in_kernel = 0;
- esp = regs->esp;
- ss = regs->xss & 0xffff;
- }
+
print_modules();
- printk(KERN_EMERG "CPU: %d\n"
- KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
- KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
- smp_processor_id(), 0xffff & regs->xcs, regs->eip,
- print_tainted(), regs->eflags, init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
- printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
- regs->eax, regs->ebx, regs->ecx, regs->edx);
- printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
- regs->esi, regs->edi, regs->ebp, esp);
- printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
- regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
+ __show_registers(regs, 0);
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
TASK_COMM_LEN, current->comm, current->pid,
current_thread_info(), current, task_thread_info(current));
@@ -392,14 +368,14 @@ void show_registers(struct pt_regs *regs
* When in-kernel, we also print out the stack and code at the
* time of the fault..
*/
- if (in_kernel) {
+ if (!user_mode_vm(regs)) {
u8 *eip;
unsigned int code_prologue = code_bytes * 43 / 64;
unsigned int code_len = code_bytes;
unsigned char c;

printk("\n" KERN_EMERG "Stack: ");
- show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
+ show_stack_log_lvl(NULL, regs, &regs->esp, KERN_EMERG);

printk(KERN_EMERG "Code: ");

Index: linux/include/asm-i386/system.h
===================================================================
--- linux.orig/include/asm-i386/system.h
+++ linux/include/asm-i386/system.h
@@ -318,5 +318,6 @@ extern unsigned long arch_align_stack(un
extern void free_init_pages(char *what, unsigned long begin, unsigned long end);

void default_idle(void);
+void __show_registers(struct pt_regs *, int all);

#endif

2007-09-21 20:54:47

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [30/45] i386: Remove local CPU logic in MTRR call to smp_call_function_single


From: [email protected]

smp_call_function_single handles the call to local CPU case correctly now,
no need to handle this in the caller

Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---

arch/i386/kernel/cpu/mtrr/main.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)

Index: linux/arch/i386/kernel/cpu/mtrr/main.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/mtrr/main.c
+++ linux/arch/i386/kernel/cpu/mtrr/main.c
@@ -738,13 +738,7 @@ void mtrr_ap_init(void)
*/
void mtrr_save_state(void)
{
- int cpu = get_cpu();
-
- if (cpu == 0)
- mtrr_save_fixed_ranges(NULL);
- else
- smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
- put_cpu();
+ smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
}

static int __init mtrr_init_finialize(void)

2007-09-21 20:55:08

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [31/45] i386: make struct apic_probe static


From: Adrian Bunk <[email protected]>

This patch makes the needlessly global struct apic_probe static.

Signed-off-by: Adrian Bunk <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---

arch/i386/mach-generic/probe.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux/arch/i386/mach-generic/probe.c
===================================================================
--- linux.orig/arch/i386/mach-generic/probe.c
+++ linux/arch/i386/mach-generic/probe.c
@@ -22,7 +22,7 @@ extern struct genapic apic_default;

struct genapic *genapic = &apic_default;

-struct genapic *apic_probe[] __initdata = {
+static struct genapic *apic_probe[] __initdata = {
&apic_summit,
&apic_bigsmp,
&apic_es7000,

2007-09-21 20:55:45

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [32/45] x86_64: hide cond_syscall behind __KERNEL__


From: Mike Frysinger <[email protected]>

This brings x86_64 into line with all other architectures by only defining
cond_syscall() when __KERNEL__ is defined.

Signed-off-by: Mike Frysinger <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---

include/asm-x86_64/unistd.h | 2 ++
1 file changed, 2 insertions(+)

Index: linux/include/asm-x86_64/unistd.h
===================================================================
--- linux.orig/include/asm-x86_64/unistd.h
+++ linux/include/asm-x86_64/unistd.h
@@ -676,6 +676,7 @@ asmlinkage long sys_rt_sigaction(int sig
#endif /* __KERNEL__ */
#endif /* __NO_STUBS */

+#ifdef __KERNEL__
/*
* "Conditional" syscalls
*
@@ -683,5 +684,6 @@ asmlinkage long sys_rt_sigaction(int sig
* but it doesn't work on all toolchains, so we just do it by hand
*/
#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
+#endif /* __KERNEL__ */

#endif /* _ASM_X86_64_UNISTD_H_ */

2007-09-21 20:55:58

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [33/45] i386: es7000 minor cleanups


From: Adrian Bunk <[email protected]>

This patch contains the following cleanups:
- make some needlessly global functions static
- #if 0 the unused es7000_stop_cpu()

AK: actually removed es7000_stop_cpu
AK: fixed a non ISO prototype too


Signed-off-by: Adrian Bunk <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---

arch/i386/mach-es7000/es7000plat.c | 32 ++++++--------------------------
1 file changed, 6 insertions(+), 26 deletions(-)

Index: linux/arch/i386/mach-es7000/es7000plat.c
===================================================================
--- linux.orig/arch/i386/mach-es7000/es7000plat.c
+++ linux/arch/i386/mach-es7000/es7000plat.c
@@ -46,11 +46,11 @@
* ES7000 Globals
*/

-volatile unsigned long *psai = NULL;
-struct mip_reg *mip_reg;
-struct mip_reg *host_reg;
-int mip_port;
-unsigned long mip_addr, host_addr;
+static volatile unsigned long *psai = NULL;
+static struct mip_reg *mip_reg;
+static struct mip_reg *host_reg;
+static int mip_port;
+static unsigned long mip_addr, host_addr;

/*
* GSI override for ES7000 platforms.
@@ -288,28 +288,8 @@ es7000_start_cpu(int cpu, unsigned long

}

-int
-es7000_stop_cpu(int cpu)
-{
- int startup;
-
- if (psai == NULL)
- return -1;
-
- startup= (0x1000000 | cpu);
-
- while ((*psai & 0xff00ffff) != startup)
- ;
-
- startup = (*psai & 0xff0000) >> 16;
- *psai &= 0xffffff;
-
- return 0;
-
-}
-
void __init
-es7000_sw_apic()
+es7000_sw_apic(void)
{
if (es7000_plat) {
int mip_status;

2007-09-21 20:56:23

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [34/45] i386: no need to make enable_cpu_hotplug a variable


From: Adrian Bunk <[email protected]>

As long as there's no write access to this variable there's no reason to let
gcc check it at runtime.

Signed-off-by: Adrian Bunk <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>
Cc: Andi Kleen <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

arch/i386/kernel/topology.c | 2 --
include/asm-i386/cpu.h | 2 +-
2 files changed, 1 insertion(+), 3 deletions(-)

Index: linux/arch/i386/kernel/topology.c
===================================================================
--- linux.orig/arch/i386/kernel/topology.c
+++ linux/arch/i386/kernel/topology.c
@@ -51,8 +51,6 @@ int arch_register_cpu(int num)
}

#ifdef CONFIG_HOTPLUG_CPU
-int enable_cpu_hotplug = 1;
-
void arch_unregister_cpu(int num) {
return unregister_cpu(&cpu_devices[num].cpu);
}
Index: linux/include/asm-i386/cpu.h
===================================================================
--- linux.orig/include/asm-i386/cpu.h
+++ linux/include/asm-i386/cpu.h
@@ -13,7 +13,7 @@ struct i386_cpu {
extern int arch_register_cpu(int num);
#ifdef CONFIG_HOTPLUG_CPU
extern void arch_unregister_cpu(int);
-extern int enable_cpu_hotplug;
+#define enable_cpu_hotplug 1
#else
#define enable_cpu_hotplug 0
#endif

2007-09-21 20:56:36

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [35/45] i386: make some variables static


From: Adrian Bunk <[email protected]>

This patch makes some needlessly global variables static.

Signed-off-by: Adrian Bunk <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>
Cc: Andi Kleen <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

arch/i386/mm/discontig.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

Index: linux/arch/i386/mm/discontig.c
===================================================================
--- linux.orig/arch/i386/mm/discontig.c
+++ linux/arch/i386/mm/discontig.c
@@ -103,14 +103,14 @@ extern unsigned long highend_pfn, highst

#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)

-unsigned long node_remap_start_pfn[MAX_NUMNODES];
+static unsigned long node_remap_start_pfn[MAX_NUMNODES];
unsigned long node_remap_size[MAX_NUMNODES];
-unsigned long node_remap_offset[MAX_NUMNODES];
-void *node_remap_start_vaddr[MAX_NUMNODES];
+static unsigned long node_remap_offset[MAX_NUMNODES];
+static void *node_remap_start_vaddr[MAX_NUMNODES];
void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);

-void *node_remap_end_vaddr[MAX_NUMNODES];
-void *node_remap_alloc_vaddr[MAX_NUMNODES];
+static void *node_remap_end_vaddr[MAX_NUMNODES];
+static void *node_remap_alloc_vaddr[MAX_NUMNODES];
static unsigned long kva_start_pfn;
static unsigned long kva_pages;
/*

2007-09-21 20:56:56

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [36/45] x86_64: kmalloc + memset conversion to kzalloc


From: Mariusz Kozlowski <[email protected]>

arch/x86_64/kernel/io_apic.c | 56080 -> 56038 (-42 bytes)

Signed-off-by: Mariusz Kozlowski <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---

arch/x86_64/kernel/io_apic.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)

Index: linux/arch/x86_64/kernel/io_apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/io_apic.c
+++ linux/arch/x86_64/kernel/io_apic.c
@@ -1847,7 +1847,7 @@ static struct sysdev_class ioapic_sysdev
static int __init ioapic_init_sysfs(void)
{
struct sys_device * dev;
- int i, size, error = 0;
+ int i, size, error;

error = sysdev_class_register(&ioapic_sysdev_class);
if (error)
@@ -1856,12 +1856,11 @@ static int __init ioapic_init_sysfs(void
for (i = 0; i < nr_ioapics; i++ ) {
size = sizeof(struct sys_device) + nr_ioapic_registers[i]
* sizeof(struct IO_APIC_route_entry);
- mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
+ mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
if (!mp_ioapic_data[i]) {
printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
continue;
}
- memset(mp_ioapic_data[i], 0, size);
dev = &mp_ioapic_data[i]->dev;
dev->id = i;
dev->cls = &ioapic_sysdev_class;

2007-09-21 20:57:23

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [37/45] i386: remove -maccumulate-outgoing-args


From: Adrian Bunk <[email protected]>

Contrary to the comment "newer gccs do it by default", newer gcc versions
default to -maccumulate-outgoing-args only with CONFIG_CC_OPTIMIZE_FOR_SIZE=n,
and then only with some CPU settings.

Measured with an i386 defconfig, gcc 4.2.1 and kernel 2.6.23-rc1 ("orig" is
the plain kernel, "changed is with -maccumulate-outgoing-args removed):

$ ls -la vmlinux*
-rwxrwxr-x 1 bunk bunk 6269713 2007-07-24 22:19 vmlinux.changed
-rwxrwxr-x 1 bunk bunk 6425361 2007-07-24 22:19 vmlinux.orig
$ size vmlinux.*
text data bss dec hex filename
4493465 504108 614400 5611973 55a1c5 vmlinux.changed
4646160 504108 614400 5764668 57f63c vmlinux.orig
$

That's a 2.5% size increase that does for sure hurt small systems.

If the stack unwinder ever comes back and needs this as indicated in the
comment, adding it to the cflags when the user enabled the unwinder should be
a better option.

Signed-off-by: Adrian Bunk <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---

arch/i386/Makefile | 4 ----
1 file changed, 4 deletions(-)

Index: linux/arch/i386/Makefile
===================================================================
--- linux.orig/arch/i386/Makefile
+++ linux/arch/i386/Makefile
@@ -46,10 +46,6 @@ cflags-y += -ffreestanding
# newer gccs do it by default
cflags-y += -maccumulate-outgoing-args

-# this works around some issues with generating unwind tables in older gccs
-# newer gccs do it by default
-cflags-y += -maccumulate-outgoing-args
-
# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
# a lot more stack due to the lack of sharing of stacklots:
CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;)

2007-09-21 20:57:53

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [38/45] i386: setup_trampoline() must be __cpuinit


From: Adrian Bunk <[email protected]>

WARNING: arch/i386/kernel/built-in.o(.text+0xf201): Section mismatch: reference to .init.data:trampoline_end (between 'setup_trampoline' and 'cpu_coregroup_map')
WARNING: arch/i386/kernel/built-in.o(.text+0xf207): Section mismatch: reference to .init.data:trampoline_data (between 'setup_trampoline' and 'cpu_coregroup_map')
WARNING: arch/i386/kernel/built-in.o(.text+0xf21a): Section mismatch: reference to .init.data:trampoline_data (between 'setup_trampoline' and 'cpu_coregroup_map')

Harmless but annoying warnings present when building an i386 SMP kernel
with CONFIG_HOTPLUG_CPU=n and gcc < 4.0 .

Signed-off-by: Adrian Bunk <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>

---

arch/i386/kernel/smpboot.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux/arch/i386/kernel/smpboot.c
===================================================================
--- linux.orig/arch/i386/kernel/smpboot.c
+++ linux/arch/i386/kernel/smpboot.c
@@ -118,7 +118,7 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 };
* has made sure it's suitably aligned.
*/

-static unsigned long __devinit setup_trampoline(void)
+static unsigned long __cpuinit setup_trampoline(void)
{
memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
return virt_to_phys(trampoline_base);

2007-09-21 20:58:15

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [39/45] x86_64: block irq balancing for timer


From: Venki Pallipadi <[email protected]>

Disable irq balancing on IRQ0. Several SIS chipsets lock up when you try to
change affinity of IRQ #0.

Signed-off-by: Venkatesh Pallipadi <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: john stultz <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

arch/x86_64/kernel/time.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux/arch/x86_64/kernel/time.c
===================================================================
--- linux.orig/arch/x86_64/kernel/time.c
+++ linux/arch/x86_64/kernel/time.c
@@ -360,7 +360,7 @@ void stop_timer_interrupt(void)

static struct irqaction irq0 = {
.handler = timer_interrupt,
- .flags = IRQF_DISABLED | IRQF_IRQPOLL,
+ .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING,
.mask = CPU_MASK_NONE,
.name = "timer"
};

2007-09-21 20:58:32

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [40/45] i386: deactivate the test for the dead CONFIG_DEBUG_PAGE_TYPE


From: "Robert P. J. Day" <[email protected]>

Signed-off-by: Robert P. J. Day <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>
Acked-by: Zachary Amsden <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

arch/i386/kernel/vmi.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

Index: linux/arch/i386/kernel/vmi.c
===================================================================
--- linux.orig/arch/i386/kernel/vmi.c
+++ linux/arch/i386/kernel/vmi.c
@@ -242,7 +242,7 @@ static void vmi_nop(void)
{
}

-#ifdef CONFIG_DEBUG_PAGE_TYPE
+#if 0 /* debug page type */

#ifdef CONFIG_X86_PAE
#define MAX_BOOT_PTS (2048+4+1)
@@ -343,7 +343,7 @@ static void vmi_check_page_type(u32 pfn,
#else
#define vmi_set_page_type(p,t) do { } while (0)
#define vmi_check_page_type(p,t) do { } while (0)
-#endif
+#endif /* debug page type */

#ifdef CONFIG_HIGHPTE
static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)

2007-09-21 20:58:46

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [41/45] i386: remove unnecessary code


From: Ingo Molnar <[email protected]>

Oleg Nesterov pointed out that the set_fs() calls in setup_frame()
and setup_rt_frame() were superfluous.

Signed-off-by: Ingo Molnar <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>
Cc: Oleg Nesterov <[email protected]>
Cc: Chuck Ebbert <[email protected]>
Cc: Andi Kleen <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

arch/i386/kernel/signal.c | 2 --
1 file changed, 2 deletions(-)

Index: linux/arch/i386/kernel/signal.c
===================================================================
--- linux.orig/arch/i386/kernel/signal.c
+++ linux/arch/i386/kernel/signal.c
@@ -387,7 +387,6 @@ static int setup_frame(int sig, struct k
regs->edx = (unsigned long) 0;
regs->ecx = (unsigned long) 0;

- set_fs(USER_DS);
regs->xds = __USER_DS;
regs->xes = __USER_DS;
regs->xss = __USER_DS;
@@ -481,7 +480,6 @@ static int setup_rt_frame(int sig, struc
regs->edx = (unsigned long) &frame->info;
regs->ecx = (unsigned long) &frame->uc;

- set_fs(USER_DS);
regs->xds = __USER_DS;
regs->xes = __USER_DS;
regs->xss = __USER_DS;

2007-09-21 20:59:05

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [42/45] x86_64: use descriptor's functions instead of inline assembly


From: Glauber de Oliveira Costa <[email protected]>

This patch provides a new set of functions for managing the descriptor
tables that can be used instead of putting the raw assembly in .c files.

Remodeling of store_tr() suggested by Frederik Deweerdt.

Signed-off-by: Glauber de Oliveira Costa <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>
Cc: Andi Kleen <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

arch/x86_64/kernel/head64.c | 2 +-
arch/x86_64/kernel/reboot.c | 3 ++-
arch/x86_64/kernel/setup64.c | 4 ++--
arch/x86_64/kernel/suspend.c | 11 ++++++-----
include/asm-x86_64/desc.h | 29 +++++++++++++++++++++++++++++
5 files changed, 40 insertions(+), 9 deletions(-)

Index: linux/arch/x86_64/kernel/head64.c
===================================================================
--- linux.orig/arch/x86_64/kernel/head64.c
+++ linux/arch/x86_64/kernel/head64.c
@@ -70,7 +70,7 @@ void __init x86_64_start_kernel(char * r

for (i = 0; i < IDT_ENTRIES; i++)
set_intr_gate(i, early_idt_handler);
- asm volatile("lidt %0" :: "m" (idt_descr));
+ load_idt((const struct desc_ptr *)&idt_descr);

early_printk("Kernel alive\n");

Index: linux/arch/x86_64/kernel/reboot.c
===================================================================
--- linux.orig/arch/x86_64/kernel/reboot.c
+++ linux/arch/x86_64/kernel/reboot.c
@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <asm/io.h>
#include <asm/delay.h>
+#include <asm/desc.h>
#include <asm/hw_irq.h>
#include <asm/system.h>
#include <asm/pgtable.h>
@@ -136,7 +137,7 @@ void machine_emergency_restart(void)
}

case BOOT_TRIPLE:
- __asm__ __volatile__("lidt (%0)": :"r" (&no_idt));
+ load_idt((const struct desc_ptr *)&no_idt);
__asm__ __volatile__("int3");

reboot_type = BOOT_KBD;
Index: linux/arch/x86_64/kernel/setup64.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup64.c
+++ linux/arch/x86_64/kernel/setup64.c
@@ -224,8 +224,8 @@ void __cpuinit cpu_init (void)
memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE);

cpu_gdt_descr[cpu].size = GDT_SIZE;
- asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
- asm volatile("lidt %0" :: "m" (idt_descr));
+ load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]);
+ load_idt((const struct desc_ptr *)&idt_descr);

memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
syscall_init();
Index: linux/arch/x86_64/kernel/suspend.c
===================================================================
--- linux.orig/arch/x86_64/kernel/suspend.c
+++ linux/arch/x86_64/kernel/suspend.c
@@ -32,9 +32,9 @@ void __save_processor_state(struct saved
/*
* descriptor tables
*/
- asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit));
- asm volatile ("sidt %0" : "=m" (ctxt->idt_limit));
- asm volatile ("str %0" : "=m" (ctxt->tr));
+ store_gdt((struct desc_ptr *)&ctxt->gdt_limit);
+ store_idt((struct desc_ptr *)&ctxt->idt_limit);
+ store_tr(ctxt->tr);

/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
/*
@@ -91,8 +91,9 @@ void __restore_processor_state(struct sa
* now restore the descriptor tables to their proper values
* ltr is done i fix_processor_context().
*/
- asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
- asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
+ load_gdt((const struct desc_ptr *)&ctxt->gdt_limit);
+ load_idt((const struct desc_ptr *)&ctxt->idt_limit);
+

/*
* segment registers
Index: linux/include/asm-x86_64/desc.h
===================================================================
--- linux.orig/include/asm-x86_64/desc.h
+++ linux/include/asm-x86_64/desc.h
@@ -20,6 +20,15 @@ extern struct desc_struct cpu_gdt_table[
#define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8))
#define clear_LDT() asm volatile("lldt %w0"::"r" (0))

+static inline unsigned long __store_tr(void)
+{
+ unsigned long tr;
+ asm volatile ("str %w0":"=r" (tr));
+ return tr;
+}
+
+#define store_tr(tr) (tr) = __store_tr()
+
/*
* This is the ldt that every process will get unless we need
* something other than this.
@@ -31,6 +40,16 @@ extern struct desc_ptr cpu_gdt_descr[];
/* the cpu gdt accessor */
#define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address)

+static inline void load_gdt(const struct desc_ptr *ptr)
+{
+ asm volatile("lgdt %w0"::"m" (*ptr));
+}
+
+static inline void store_gdt(struct desc_ptr *ptr)
+{
+ asm ("sgdt %w0":"=m" (*ptr));
+}
+
static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist)
{
struct gate_struct s;
@@ -71,6 +90,16 @@ static inline void set_system_gate_ist(i
_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist);
}

+static inline void load_idt(const struct desc_ptr *ptr)
+{
+ asm volatile("lidt %w0"::"m" (*ptr));
+}
+
+static inline void store_idt(struct desc_ptr *dtr)
+{
+ asm ("sidt %w0":"=m" (*dtr));
+}
+
static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type,
unsigned size)
{

2007-09-21 20:59:36

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [43/45] i386: Clean up duplicate includes in arch/i386/xen/


From: Jesper Juhl <[email protected]>

This patch cleans up duplicate includes in
arch/i386/xen/

Signed-off-by: Jesper Juhl <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

arch/i386/xen/enlighten.c | 1 -
arch/i386/xen/mmu.c | 1 -
2 files changed, 2 deletions(-)

Index: linux/arch/i386/xen/enlighten.c
===================================================================
--- linux.orig/arch/i386/xen/enlighten.c
+++ linux/arch/i386/xen/enlighten.c
@@ -25,7 +25,6 @@
#include <linux/mm.h>
#include <linux/page-flags.h>
#include <linux/highmem.h>
-#include <linux/smp.h>

#include <xen/interface/xen.h>
#include <xen/interface/physdev.h>
Index: linux/arch/i386/xen/mmu.c
===================================================================
--- linux.orig/arch/i386/xen/mmu.c
+++ linux/arch/i386/xen/mmu.c
@@ -41,7 +41,6 @@
#include <linux/sched.h>
#include <linux/highmem.h>
#include <linux/bug.h>
-#include <linux/sched.h>

#include <asm/pgtable.h>
#include <asm/tlbflush.h>

2007-09-21 20:59:51

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [44/45] i386: simplify smp_call_function_single() call sequence in cpuid


From: Avi Kivity <[email protected]>

smp_call_function_single() now knows how to call the function on the
current cpu.

Cc: H. Peter Anvin <[email protected]>
Signed-off-by: Avi Kivity <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>
Cc: Andi Kleen <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

arch/i386/kernel/cpuid.c | 22 +++-------------------
1 file changed, 3 insertions(+), 19 deletions(-)

Index: linux/arch/i386/kernel/cpuid.c
===================================================================
--- linux.orig/arch/i386/kernel/cpuid.c
+++ linux/arch/i386/kernel/cpuid.c
@@ -45,8 +45,6 @@

static struct class *cpuid_class;

-#ifdef CONFIG_SMP
-
struct cpuid_command {
u32 reg;
u32 *data;
@@ -64,25 +62,11 @@ static inline void do_cpuid(int cpu, u32
{
struct cpuid_command cmd;

- preempt_disable();
- if (cpu == smp_processor_id()) {
- cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
- } else {
- cmd.reg = reg;
- cmd.data = data;
+ cmd.reg = reg;
+ cmd.data = data;

- smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
- }
- preempt_enable();
+ smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
}
-#else /* ! CONFIG_SMP */
-
-static inline void do_cpuid(int cpu, u32 reg, u32 * data)
-{
- cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
-}
-
-#endif /* ! CONFIG_SMP */

static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
{

2007-09-21 21:00:21

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [45/45] i386: simplify smp_call_function_single() call sequence


From: Avi Kivity <[email protected]>

smp_call_function_single() now knows how to call the function on the
current cpu.

Cc: H. Peter Anvin <[email protected]>
Signed-off-by: Avi Kivity <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>
Cc: Andi Kleen <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

arch/i386/lib/msr-on-cpu.c | 62 +++++++++++++++------------------------------
1 file changed, 22 insertions(+), 40 deletions(-)

Index: linux/arch/i386/lib/msr-on-cpu.c
===================================================================
--- linux.orig/arch/i386/lib/msr-on-cpu.c
+++ linux/arch/i386/lib/msr-on-cpu.c
@@ -26,27 +26,18 @@ static void __rdmsr_safe_on_cpu(void *in
static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe)
{
int err = 0;
- preempt_disable();
- if (smp_processor_id() == cpu)
- if (safe)
- err = rdmsr_safe(msr_no, l, h);
- else
- rdmsr(msr_no, *l, *h);
- else {
- struct msr_info rv;
-
- rv.msr_no = msr_no;
- if (safe) {
- smp_call_function_single(cpu, __rdmsr_safe_on_cpu,
- &rv, 0, 1);
- err = rv.err;
- } else {
- smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1);
- }
- *l = rv.l;
- *h = rv.h;
+ struct msr_info rv;
+
+ rv.msr_no = msr_no;
+ if (safe) {
+ smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 0, 1);
+ err = rv.err;
+ } else {
+ smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1);
}
- preempt_enable();
+ *l = rv.l;
+ *h = rv.h;
+
return err;
}

@@ -67,27 +58,18 @@ static void __wrmsr_safe_on_cpu(void *in
static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe)
{
int err = 0;
- preempt_disable();
- if (smp_processor_id() == cpu)
- if (safe)
- err = wrmsr_safe(msr_no, l, h);
- else
- wrmsr(msr_no, l, h);
- else {
- struct msr_info rv;
-
- rv.msr_no = msr_no;
- rv.l = l;
- rv.h = h;
- if (safe) {
- smp_call_function_single(cpu, __wrmsr_safe_on_cpu,
- &rv, 0, 1);
- err = rv.err;
- } else {
- smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1);
- }
+ struct msr_info rv;
+
+ rv.msr_no = msr_no;
+ rv.l = l;
+ rv.h = h;
+ if (safe) {
+ smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 0, 1);
+ err = rv.err;
+ } else {
+ smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1);
}
- preempt_enable();
+
return err;
}

2007-09-21 21:08:33

by Dave Jones

[permalink] [raw]
Subject: Re: [PATCH] [12/45] i386: Fix arch/i386/kernel/nmi.c - 'unknown_nmi_panic_callback' declared 'static' but never defined warning

On Fri, Sep 21, 2007 at 10:44:53PM +0200, Andi Kleen wrote:
>
> I get this warning when CONFIG_SYSCTL is not set :
>
> ...
>
> arch/i386/kernel/nmi.c:52: warning: 'unknown_nmi_panic_callback' declared 'static' but never defined

If you move the function call after the declaration, you can do without
both the prototype, and the ifdef.

Signed-off-by: Dave Jones <[email protected]>

diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index c7227e2..9a8ff0b 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -48,9 +48,6 @@ static unsigned int nmi_hz = HZ;

static DEFINE_PER_CPU(short, wd_enabled);

-/* local prototypes */
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
-
static int endflag __initdata = 0;

#ifdef CONFIG_SMP
@@ -389,15 +386,6 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
return rc;
}

-int do_nmi_callback(struct pt_regs * regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
- if (unknown_nmi_panic)
- return unknown_nmi_panic_callback(regs, cpu);
-#endif
- return 0;
-}
-
#ifdef CONFIG_SYSCTL

static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
@@ -451,6 +439,16 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,

#endif

+int do_nmi_callback(struct pt_regs * regs, int cpu)
+{
+#ifdef CONFIG_SYSCTL
+ if (unknown_nmi_panic)
+ return unknown_nmi_panic_callback(regs, cpu);
+#endif
+ return 0;
+}
+
+
void __trigger_all_cpu_backtrace(void)
{
int i;

--
http://www.codemonkey.org.uk

2007-09-21 21:14:16

by Dave Jones

[permalink] [raw]
Subject: Re: [PATCH] [20/45] x86_64: Use 8 byte stack alignment when possible

On Fri, Sep 21, 2007 at 10:45:02PM +0200, Andi Kleen wrote:
>
> Kernel doesn't use SSE2, so it doesn't need 16 byte alignment. Also
> the stack can be already unaligned so letting the compiler align
> is useless. This may make some stack frames smaller.
> Only works with very recent gcc 4.3

My gcc 4.1.2 from Fedora 7 (with who knows what backported)
references this in its manpage. How was it broken before 4.3 ?
(I'm curious if I'll suddenly see not expected behaviour with
this change with that compiler).

Dave

--
http://www.codemonkey.org.uk

2007-09-21 21:19:46

by Jakub Jelinek

[permalink] [raw]
Subject: Re: [PATCH] [20/45] x86_64: Use 8 byte stack alignment when possible

On Fri, Sep 21, 2007 at 10:45:02PM +0200, Andi Kleen wrote:
>
> Kernel doesn't use SSE2, so it doesn't need 16 byte alignment. Also
> the stack can be already unaligned so letting the compiler align
> is useless. This may make some stack frames smaller.

Shouldn't sources that are compiled into the VDSO or VSYSCALL pages
revert this to the default?

Jakub

2007-09-21 22:53:46

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] [20/45] x86_64: Use 8 byte stack alignment when possible

On Friday 21 September 2007 23:13, Dave Jones wrote:
> On Fri, Sep 21, 2007 at 10:45:02PM +0200, Andi Kleen wrote:
> > Kernel doesn't use SSE2, so it doesn't need 16 byte alignment. Also
> > the stack can be already unaligned so letting the compiler align
> > is useless. This may make some stack frames smaller.
> > Only works with very recent gcc 4.3
>
> My gcc 4.1.2 from Fedora 7 (with who knows what backported)
> references this in its manpage. How was it broken before 4.3 ?

Try it. It is rejected by the compiler in 64bit mode.

-Andi

2007-09-21 22:58:46

by Dave Jones

[permalink] [raw]
Subject: Re: [PATCH] [20/45] x86_64: Use 8 byte stack alignment when possible

On Sat, Sep 22, 2007 at 12:34:31AM +0200, Andi Kleen wrote:
> On Friday 21 September 2007 23:13, Dave Jones wrote:
> > On Fri, Sep 21, 2007 at 10:45:02PM +0200, Andi Kleen wrote:
> > > Kernel doesn't use SSE2, so it doesn't need 16 byte alignment. Also
> > > the stack can be already unaligned so letting the compiler align
> > > is useless. This may make some stack frames smaller.
> > > Only works with very recent gcc 4.3
> >
> > My gcc 4.1.2 from Fedora 7 (with who knows what backported)
> > references this in its manpage. How was it broken before 4.3 ?
>
> Try it. It is rejected by the compiler in 64bit mode.

Ah yes, it fails if not between 4 & 12, but the call cc-option
catches that. Looks fine to me.

Dave

--
http://www.codemonkey.org.uk

2007-09-22 03:16:25

by Len Brown

[permalink] [raw]
Subject: Re: [PATCH] [11/45] x86_64: Remove rogue default m in drivers/video/Kconfig

Acked-by: Len Brown <[email protected]>

Sorry, i thought we fixed this earlier.

thanks,
-Len

On Friday 21 September 2007 16:44, Andi Kleen wrote:
>
> default m is near always wrong, like here. For some reason ACPI
> likes to reintroduce these and I like to immediately squash them again
> before they pollute too many .configs.
>
> Cc: [email protected]
> Cc: [email protected]
>
> Signed-off-by: Andi Kleen <[email protected]>
>
> ---
> drivers/video/Kconfig | 1 -
> 1 file changed, 1 deletion(-)
>
> Index: linux/drivers/video/Kconfig
> ===================================================================
> --- linux.orig/drivers/video/Kconfig
> +++ linux/drivers/video/Kconfig
> @@ -14,7 +14,6 @@ config VGASTATE
>
> config VIDEO_OUTPUT_CONTROL
> tristate "Lowlevel video output switch controls"
> - default m
> help
> This framework adds support for low-level control of the video
> output switch.
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

2007-09-22 09:26:24

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH] [24/45] x86: Introduce frame_pointer() and stack_pointer()

On Fri, Sep 21, 2007 at 10:45:06PM +0200, Andi Kleen wrote:
> From: [email protected]
> This patch defines frame_pointer() and stack_pointer() similar to the
> already defined instruction_pointer(). Thus the oprofile code can be written
> in a more readable fashion.

Can we do these for all architectures, please? They're quite useful for
non-trivial kprobes.

2007-09-22 09:30:07

by Jan Engelhardt

[permalink] [raw]
Subject: Re: [PATCH] [8/45] x86_64: Use string instruction memcpy on AMD Fam11h


On Sep 21 2007 22:44, Andi Kleen wrote:
>Subject: [PATCH] [8/45] x86_64: Use string instruction memcpy on AMD Fam11h
>
>--- linux.orig/arch/x86_64/kernel/setup.c
>+++ linux/arch/x86_64/kernel/setup.c
>@@ -575,7 +575,7 @@ static void __cpuinit init_amd(struct cp
> level = cpuid_eax(1);
> if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
> set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
>- if (c->x86 == 0x10)
>+ if (c->x86 == 0x10 || c->x86 == 0x11)
> set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);

Perhaps it can be assumed that all future CPU versions have this and
if (c->x86 >= 0x10)
could be used?

2007-09-22 09:30:32

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH] [32/45] x86_64: hide cond_syscall behind __KERNEL__

On Fri, Sep 21, 2007 at 10:45:14PM +0200, Andi Kleen wrote:
> From: Mike Frysinger <[email protected]>
>
> This brings x86_64 into line with all other architectures by only defining
> cond_syscall() when __KERNEL__ is defined.
>
> Signed-off-by: Mike Frysinger <[email protected]>
> Signed-off-by: Andrew Morton <[email protected]>
> Signed-off-by: Andi Kleen <[email protected]>

> Index: linux/include/asm-x86_64/unistd.h
> ===================================================================
> --- linux.orig/include/asm-x86_64/unistd.h
> +++ linux/include/asm-x86_64/unistd.h
> @@ -676,6 +676,7 @@ asmlinkage long sys_rt_sigaction(int sig
> #endif /* __KERNEL__ */

This is the previous __KERNEL__ block.

> #endif /* __NO_STUBS */

And this one shouldn't extent iver the declarations of sys_iopl
and sys_rt_sigactions I think. If it should be there at all.

So please make this file at least semi-clean while you're at it instead
of much worse.

The __KERNEL__ block should also be extended over the various __ARCH_WANT_
definitions because they're not for userspace at all.

2007-09-30 22:08:18

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] [20/45] x86_64: Use 8 byte stack alignment when possible

On Friday 21 September 2007 23:19:35 Jakub Jelinek wrote:
> On Fri, Sep 21, 2007 at 10:45:02PM +0200, Andi Kleen wrote:
> >
> > Kernel doesn't use SSE2, so it doesn't need 16 byte alignment. Also
> > the stack can be already unaligned so letting the compiler align
> > is useless. This may make some stack frames smaller.
>
> Shouldn't sources that are compiled into the VDSO or VSYSCALL pages
> revert this to the default?

I see not reason. vdso/vsyscall don't contain any SSE code and also don't
do any callbacks to other user code. Except for signals and signals
already align the stack by themselves.

-Andi

2007-10-01 16:48:48

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] [24/45] x86: Introduce frame_pointer() and stack_pointer()

On Saturday 22 September 2007 11:26:14 Christoph Hellwig wrote:
> On Fri, Sep 21, 2007 at 10:45:06PM +0200, Andi Kleen wrote:
> > From: [email protected]
> > This patch defines frame_pointer() and stack_pointer() similar to the
> > already defined instruction_pointer(). Thus the oprofile code can be written
> > in a more readable fashion.
>
> Can we do these for all architectures, please? They're quite useful for
> non-trivial kprobes.

I'll leave that to the architecture maintainers. Besides many probably
don't have a frame pointer.

-Andi



2007-11-14 00:25:33

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] [13/45] x86_64: Increase VDSO_TEXT_OFFSET for ancient binutils

On Wednesday 14 November 2007 01:21:07 CaT wrote:
> On Fri, Sep 21, 2007 at 10:44:54PM +0200, Andi Kleen wrote:
> > For some reason old binutils genertate larger headers so
> > increase the text offset of the vdso to avoid linker errors.
> >
> > Signed-off-by: Andi Kleen <[email protected]>
> >
> > ---
> > arch/x86_64/vdso/voffset.h | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > Index: linux/arch/x86_64/vdso/voffset.h
> > ===================================================================
> > --- linux.orig/arch/x86_64/vdso/voffset.h
> > +++ linux/arch/x86_64/vdso/voffset.h
> > @@ -1 +1 @@
> > -#define VDSO_TEXT_OFFSET 0x500
> > +#define VDSO_TEXT_OFFSET 0x600
>
> This still breaks under Debian sarge. There is this (german) forum
> thread about it:

iirc sarge was the one with the unfixable 2MB text pages, generating gigantic
vDSOs. At least from my side i essentially gave up on that one because
there was no way to generate a reasonable vDSO.

You'll need to either up- or downgrade your binutils. There was only a narrow
window where binutils were broken such, but that release unfortunately hit it.

-Andi

2007-11-14 00:51:13

by CaT

[permalink] [raw]
Subject: Re: [PATCH] [13/45] x86_64: Increase VDSO_TEXT_OFFSET for ancient binutils

On Fri, Sep 21, 2007 at 10:44:54PM +0200, Andi Kleen wrote:
> For some reason old binutils genertate larger headers so
> increase the text offset of the vdso to avoid linker errors.
>
> Signed-off-by: Andi Kleen <[email protected]>
>
> ---
> arch/x86_64/vdso/voffset.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> Index: linux/arch/x86_64/vdso/voffset.h
> ===================================================================
> --- linux.orig/arch/x86_64/vdso/voffset.h
> +++ linux/arch/x86_64/vdso/voffset.h
> @@ -1 +1 @@
> -#define VDSO_TEXT_OFFSET 0x500
> +#define VDSO_TEXT_OFFSET 0x600

This still breaks under Debian sarge. There is this (german) forum
thread about it:

http://www.debianforum.de/forum/viewtopic.php?p=570102

Robotic english translation:
http://translate.google.com/translate?u=http%3A%2F%2Fwww.debianforum.de%2Fforum%2Fviewtopic.php%3Fp%3D569756%26sid%3D20278bfd231c5dac45fa0f2763c7ec54%23569756&langpair=de%7Cen&hl=en&ie=UTF-8

And it indicates that 0x510 would be the better offset due to 0x600
making it clash with the data section beneath. Indeed when I change it
to 0x510 I can compile the kernel with binutils 2.15-6.

I'm hoping this wont cause the kernel to eat my grandma. :)

--
"To the extent that we overreact, we proffer the terrorists the
greatest tribute."
- High Court Judge Michael Kirby