2007-10-31 21:59:40

by Glauber Costa

[permalink] [raw]
Subject: [PATCH 0/7] (Re-)introducing pvops for x86_64 - Consolidation part


Hi folks,

Here is the result of the latest work on the pvops front, after the x86
arch merge. From the functionality point of view, almost nothing was
changed, except for proper vsmp support - which was discussed, but not
implemented before - and the introduction of smp_ops in x86_64, which eased
the merging of the smp header.

Speaking of the merge, a significant part (although not majority) of this
work is merging things that was not possible before, due to the lack of
paravirt for x86_64. What was done then, can be thought as a
"lookahead-merge", which is a normal code, but just taking into account that
we'll have support for paravirt_ops in x86_64 in the future. Note for that
files, there can be room left for more integration. This is just the first,
not the final step.

For this reason, I'm splitting this series in two. The first one, which will
follow this message, are really just integration patches. Besides making
review even easier, we believe that those could get into the .24 time frame,
just like all the other merge patches that are already going in.

In a separate series, you'll get the actual pvops implementation.

That said, have fun!


2007-10-31 21:59:23

by Glauber Costa

[permalink] [raw]
Subject: [PATCH 2/7] consolidate spinlock.h

The cli and sti instructions need to be replaced by paravirt hooks.
For the i386 architecture, this is already done. The code requirements
aren't much different from x86_64 POV, so this part is consolidated in
the common header

Signed-off-by: Glauber de Oliveira Costa <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
Acked-by: Jeremy Fitzhardinge <[email protected]>
---
include/asm-x86/spinlock.h | 14 ++++++++++++++
include/asm-x86/spinlock_32.h | 9 ---------
include/asm-x86/spinlock_64.h | 8 +++++---
3 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index d74d85e..e1d555a 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -1,5 +1,19 @@
+#ifndef _X86_SPINLOCK_H_
+#define _X86_SPINLOCK_H_
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define CLI_STRING "cli"
+#define STI_STRING "sti"
+#define CLI_STI_CLOBBERS
+#define CLI_STI_INPUT_ARGS
+#endif /* CONFIG_PARAVIRT */
+
#ifdef CONFIG_X86_32
# include "spinlock_32.h"
#else
# include "spinlock_64.h"
#endif
+
+#endif
diff --git a/include/asm-x86/spinlock_32.h b/include/asm-x86/spinlock_32.h
index d3bcebe..ebbf371 100644
--- a/include/asm-x86/spinlock_32.h
+++ b/include/asm-x86/spinlock_32.h
@@ -7,15 +7,6 @@
#include <asm/processor.h>
#include <linux/compiler.h>

-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define CLI_STRING "cli"
-#define STI_STRING "sti"
-#define CLI_STI_CLOBBERS
-#define CLI_STI_INPUT_ARGS
-#endif /* CONFIG_PARAVIRT */
-
/*
* Your basic SMP spinlocks, allowing only a single CPU anywhere
*
diff --git a/include/asm-x86/spinlock_64.h b/include/asm-x86/spinlock_64.h
index 88bf981..e56b17e 100644
--- a/include/asm-x86/spinlock_64.h
+++ b/include/asm-x86/spinlock_64.h
@@ -48,12 +48,12 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla
"jns 5f\n"
"testl $0x200, %1\n\t" /* interrupts were disabled? */
"jz 4f\n\t"
- "sti\n"
+ STI_STRING "\n"
"3:\t"
"rep;nop\n\t"
"cmpl $0, %0\n\t"
"jle 3b\n\t"
- "cli\n\t"
+ CLI_STRING "\n\t"
"jmp 1b\n"
"4:\t"
"rep;nop\n\t"
@@ -61,7 +61,9 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla
"jg 1b\n\t"
"jmp 4b\n"
"5:\n\t"
- : "+m" (lock->slock) : "r" ((unsigned)flags) : "memory");
+ : "+m" (lock->slock)
+ : "r" ((unsigned)flags) CLI_STI_INPUT_ARGS
+ : "memory" CLI_STI_CLOBBERS);
}
#endif

--
1.4.4.2

2007-10-31 22:00:00

by Glauber Costa

[permalink] [raw]
Subject: [PATCH 5/7] Add debugreg/load_rsp native hooks

This patch adds native hooks for debugreg handling functions,
and for the native load_rsp0 function. The later also have its
call sites patched. There's some room for consolidation in the
processor*.h headers, and it is done, for paravirt related functions

Signed-off-by: Glauber de Oliveira Costa <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
Acked-by: Jeremy Fitzhardinge <[email protected]>
---
arch/x86/kernel/process_64.c | 2 +-
arch/x86/kernel/smpboot_64.c | 2 +-
include/asm-x86/msr.h | 67 ------------------
include/asm-x86/processor.h | 146 ++++++++++++++++++++++++++++++++++++++++
include/asm-x86/processor_32.h | 138 +-------------------------------------
include/asm-x86/processor_64.h | 32 ++++-----
6 files changed, 165 insertions(+), 222 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6309b27..d7da3e6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -596,7 +596,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Reload esp0, LDT and the page table pointer:
*/
- tss->rsp0 = next->rsp0;
+ load_esp0(tss, next);

/*
* Switch DS and ES.
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 49036d0..ecd00f6 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -614,7 +614,7 @@ do_rest:
start_rip = setup_trampoline();

init_rsp = c_idle.idle->thread.rsp;
- per_cpu(init_tss,cpu).rsp0 = init_rsp;
+ load_esp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread);
initial_code = start_secondary;
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);

diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index ba4b314..48f73c7 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -253,73 +253,6 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
: "=a" (low), "=d" (high) \
: "c" (counter))

-static inline void cpuid(int op, unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- __asm__("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (op));
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
- int *edx)
-{
- __asm__("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (op), "c" (count));
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
- unsigned int eax;
-
- __asm__("cpuid"
- : "=a" (eax)
- : "0" (op)
- : "bx", "cx", "dx");
- return eax;
-}
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
- unsigned int eax, ebx;
-
- __asm__("cpuid"
- : "=a" (eax), "=b" (ebx)
- : "0" (op)
- : "cx", "dx" );
- return ebx;
-}
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
- unsigned int eax, ecx;
-
- __asm__("cpuid"
- : "=a" (eax), "=c" (ecx)
- : "0" (op)
- : "bx", "dx" );
- return ecx;
-}
-static inline unsigned int cpuid_edx(unsigned int op)
-{
- unsigned int eax, edx;
-
- __asm__("cpuid"
- : "=a" (eax), "=d" (edx)
- : "0" (op)
- : "bx", "cx");
- return edx;
-}
-
#ifdef CONFIG_SMP
void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 46e1c04..a576a72 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -1,5 +1,151 @@
+#ifndef _X86_PROCESSOR_H_
+#define _X86_PROCESSOR_H_
+
+#include <linux/kernel.h>
+#include <asm/bug.h>
+
+static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ __asm__("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+static inline unsigned long native_get_debugreg(int regno)
+{
+ unsigned long val = 0; /* Damn you, gcc! */
+
+ switch (regno) {
+ case 0:
+ asm volatile ("mov %%db0, %0" :"=r" (val)); break;
+ case 1:
+ asm volatile ("mov %%db1, %0" :"=r" (val)); break;
+ case 2:
+ asm volatile ("mov %%db2, %0" :"=r" (val)); break;
+ case 3:
+ asm volatile ("mov %%db3, %0" :"=r" (val)); break;
+ case 6:
+ asm volatile ("mov %%db6, %0" :"=r" (val)); break;
+ case 7:
+ asm volatile ("mov %%db7, %0" :"=r" (val)); break;
+ default:
+ WARN_ON(1);
+ }
+ return val;
+}
+
+static inline void native_set_debugreg(int regno, unsigned long value)
+{
+ switch (regno) {
+ case 0:
+ asm("mov %0,%%db0" : :"r" (value) : "memory");
+ break;
+ case 1:
+ asm("mov %0,%%db1" : :"r" (value) : "memory");
+ break;
+ case 2:
+ asm("mov %0,%%db2" : :"r" (value) : "memory");
+ break;
+ case 3:
+ asm("mov %0,%%db3" : :"r" (value) : "memory");
+ break;
+ case 6:
+ asm("mov %0,%%db6" : :"r" (value) : "memory");
+ break;
+ case 7:
+ asm("mov %0,%%db7" : :"r" (value) : "memory");
+ break;
+ default:
+ WARN_ON(1);
+ }
+}
+
#ifdef CONFIG_X86_32
# include "processor_32.h"
#else
# include "processor_64.h"
#endif
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define paravirt_enabled() 0
+#define __cpuid native_cpuid
+#define SWAPGS swapgs
+
+static inline void load_esp0(struct tss_struct *tss,
+ struct thread_struct *thread)
+{
+ native_load_esp0(tss, thread);
+}
+
+/*
+ * These special macros can be used to get or set a debugging register
+ */
+#define get_debugreg(var, register) \
+ (var) = native_get_debugreg(register)
+#define set_debugreg(value, register) \
+ native_set_debugreg(register, value)
+
+#define set_iopl_mask native_set_iopl_mask
+#endif /* CONFIG_PARAVIRT */
+
+/*
+ * Generic CPUID function
+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+ * resulting in stale register contents being returned.
+ */
+static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ *eax = op;
+ *ecx = 0;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+/* Some CPUID calls want 'count' to be placed in ecx */
+static inline void cpuid_count(int op, int count, unsigned *eax,
+ unsigned *ebx, unsigned *ecx, unsigned *edx)
+{
+ *eax = op;
+ *ecx = count;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return eax;
+}
+static inline unsigned int cpuid_ebx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return ebx;
+}
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return ecx;
+}
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return edx;
+}
+#endif
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 13976b0..a081689 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -134,17 +134,6 @@ extern void detect_ht(struct cpuinfo_x86 *c);
static inline void detect_ht(struct cpuinfo_x86 *c) {}
#endif

-static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- __asm__("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}

#define load_cr3(pgdir) write_cr3(__pa(pgdir))

@@ -508,56 +497,6 @@ static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct
}
}

-
-static inline unsigned long native_get_debugreg(int regno)
-{
- unsigned long val = 0; /* Damn you, gcc! */
-
- switch (regno) {
- case 0:
- asm("movl %%db0, %0" :"=r" (val)); break;
- case 1:
- asm("movl %%db1, %0" :"=r" (val)); break;
- case 2:
- asm("movl %%db2, %0" :"=r" (val)); break;
- case 3:
- asm("movl %%db3, %0" :"=r" (val)); break;
- case 6:
- asm("movl %%db6, %0" :"=r" (val)); break;
- case 7:
- asm("movl %%db7, %0" :"=r" (val)); break;
- default:
- BUG();
- }
- return val;
-}
-
-static inline void native_set_debugreg(int regno, unsigned long value)
-{
- switch (regno) {
- case 0:
- asm("movl %0,%%db0" : /* no output */ :"r" (value));
- break;
- case 1:
- asm("movl %0,%%db1" : /* no output */ :"r" (value));
- break;
- case 2:
- asm("movl %0,%%db2" : /* no output */ :"r" (value));
- break;
- case 3:
- asm("movl %0,%%db3" : /* no output */ :"r" (value));
- break;
- case 6:
- asm("movl %0,%%db6" : /* no output */ :"r" (value));
- break;
- case 7:
- asm("movl %0,%%db7" : /* no output */ :"r" (value));
- break;
- default:
- BUG();
- }
-}
-
/*
* Set IOPL bits in EFLAGS from given mask
*/
@@ -574,82 +513,9 @@ static inline void native_set_iopl_mask(unsigned mask)
: "i" (~X86_EFLAGS_IOPL), "r" (mask));
}

-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define paravirt_enabled() 0
-#define __cpuid native_cpuid
-
-static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
-{
- native_load_esp0(tss, thread);
-}
-
-/*
- * These special macros can be used to get or set a debugging register
- */
-#define get_debugreg(var, register) \
- (var) = native_get_debugreg(register)
-#define set_debugreg(value, register) \
- native_set_debugreg(register, value)
-
-#define set_iopl_mask native_set_iopl_mask
-#endif /* CONFIG_PARAVIRT */
-
-/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = 0;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(unsigned int op, int count,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = count;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
- return eax;
-}
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
- return ebx;
-}
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
- return ecx;
-}
-static inline unsigned int cpuid_edx(unsigned int op)
+/* We don't really have one */
+static inline void native_swapgs(void)
{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
- return edx;
}

/* generic versions from gas */
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index e4f1997..b1e6c5a 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -116,21 +116,13 @@ extern unsigned long mmu_cr4_features;
static inline void set_in_cr4 (unsigned long mask)
{
mmu_cr4_features |= mask;
- __asm__("movq %%cr4,%%rax\n\t"
- "orq %0,%%rax\n\t"
- "movq %%rax,%%cr4\n"
- : : "irg" (mask)
- :"ax");
+ write_cr4(read_cr4() | mask);
}

static inline void clear_in_cr4 (unsigned long mask)
{
mmu_cr4_features &= ~mask;
- __asm__("movq %%cr4,%%rax\n\t"
- "andq %0,%%rax\n\t"
- "movq %%rax,%%cr4\n"
- : : "irg" (~mask)
- :"ax");
+ write_cr4(read_cr4() & ~mask);
}


@@ -251,6 +243,12 @@ struct thread_struct {
.rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \
}

+static inline void native_load_esp0(struct tss_struct *tss,
+ struct thread_struct *thread)
+{
+ tss->rsp0 = thread->rsp0;
+}
+
#define INIT_MMAP \
{ &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL }

@@ -266,13 +264,13 @@ struct thread_struct {
set_fs(USER_DS); \
} while(0)

-#define get_debugreg(var, register) \
- __asm__("movq %%db" #register ", %0" \
- :"=r" (var))
-#define set_debugreg(value, register) \
- __asm__("movq %0,%%db" #register \
- : /* no output */ \
- :"r" (value))
+extern void native_swapgs(void);
+
+/* We have it for simmetry with i386 code, but we don't really use it */
+static inline void native_set_iopl_mask(unsigned mask)
+{
+
+}

struct task_struct;
struct mm_struct;
--
1.4.4.2

2007-10-31 22:00:40

by Glauber Costa

[permalink] [raw]
Subject: [PATCH 4/7] smp x86 consolidation

This patch consolidates part of the pieces of smp for both architectures.
(i386 and x86_64). It makes part the calls go through smp_ops. Later on,
making the functions themselves have shared code is doable and a logical
next step

Signed-off-by: Glauber de Oliveira Costa <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
Acked-by: Jeremy Fitzhardinge <[email protected]>
---
arch/x86/kernel/smp_64.c | 23 ++++++++++----
arch/x86/kernel/smpboot_64.c | 8 ++--
include/asm-x86/smp.h | 66 ++++++++++++++++++++++++++++++++++++++++++
include/asm-x86/smp_32.h | 58 ------------------------------------
include/asm-x86/smp_64.h | 4 --
5 files changed, 86 insertions(+), 73 deletions(-)

diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
index ad063a6..9ddcb99 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/smp_64.c
@@ -291,9 +291,9 @@ void flush_tlb_all(void)
* anything. Worst case is that we lose a reschedule ...
*/

-void smp_send_reschedule(int cpu)
+void native_smp_send_reschedule(int cpu)
{
- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+ send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
}

/*
@@ -388,7 +388,7 @@ __smp_call_function_mask(cpumask_t mask,
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
-int smp_call_function_mask(cpumask_t mask,
+int native_smp_call_function_mask(cpumask_t mask,
void (*func)(void *), void *info,
int wait)
{
@@ -402,7 +402,6 @@ int smp_call_function_mask(cpumask_t mask,
spin_unlock(&call_lock);
return ret;
}
-EXPORT_SYMBOL(smp_call_function_mask);

/*
* smp_call_function_single - Run a function on a specific CPU
@@ -418,7 +417,7 @@ EXPORT_SYMBOL(smp_call_function_mask);
*/

int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait)
+ int nonatomic, int wait)
{
/* prevent preemption and reschedule on another processor */
int ret;
@@ -458,7 +457,7 @@ EXPORT_SYMBOL(smp_call_function_single);
* Actually there are a few legal cases, like panic.
*/
int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait)
+ int wait)
{
return smp_call_function_mask(cpu_online_map, func, info, wait);
}
@@ -476,7 +475,7 @@ static void stop_this_cpu(void *dummy)
halt();
}

-void smp_send_stop(void)
+void native_smp_send_stop(void)
{
int nolock;
unsigned long flags;
@@ -532,3 +531,13 @@ asmlinkage void smp_call_function_interrupt(void)
}
}

+struct smp_ops smp_ops = {
+ .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
+ .smp_prepare_cpus = native_smp_prepare_cpus,
+ .cpu_up = native_cpu_up,
+ .smp_cpus_done = native_smp_cpus_done,
+
+ .smp_send_stop = native_smp_send_stop,
+ .smp_send_reschedule = native_smp_send_reschedule,
+ .smp_call_function_mask = native_smp_call_function_mask,
+};
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 500670c..49036d0 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -865,7 +865,7 @@ void __init smp_set_apicids(void)
* Prepare for SMP bootup. The MP table or ACPI has been read
* earlier. Just do some sanity checking here and enable APIC mode.
*/
-void __init smp_prepare_cpus(unsigned int max_cpus)
+void __init native_smp_prepare_cpus(unsigned int max_cpus)
{
nmi_watchdog_default();
current_cpu_data = boot_cpu_data;
@@ -909,7 +909,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
/*
* Early setup to make printk work.
*/
-void __init smp_prepare_boot_cpu(void)
+void __init native_smp_prepare_boot_cpu(void)
{
int me = smp_processor_id();
cpu_set(me, cpu_online_map);
@@ -920,7 +920,7 @@ void __init smp_prepare_boot_cpu(void)
/*
* Entry point to boot a CPU.
*/
-int __cpuinit __cpu_up(unsigned int cpu)
+int __cpuinit native_cpu_up(unsigned int cpu)
{
int apicid = cpu_present_to_apicid(cpu);
unsigned long flags;
@@ -978,7 +978,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
/*
* Finish the SMP boot.
*/
-void __init smp_cpus_done(unsigned int max_cpus)
+void __init native_smp_cpus_done(unsigned int max_cpus)
{
smp_cleanup_boot();
setup_ioapic_dest();
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index f2e8319..b2f99df 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -1,5 +1,71 @@
+#ifndef _X86_SMP_H_
+#define _X86_SMP_H_
+
+#ifndef __ASSEMBLY__
+struct smp_ops
+{
+ void (*smp_prepare_boot_cpu)(void);
+ void (*smp_prepare_cpus)(unsigned max_cpus);
+ int (*cpu_up)(unsigned cpu);
+ void (*smp_cpus_done)(unsigned max_cpus);
+
+ void (*smp_send_stop)(void);
+ void (*smp_send_reschedule)(int cpu);
+ int (*smp_call_function_mask)(cpumask_t mask,
+ void (*func)(void *info), void *info,
+ int wait);
+};
+
+extern struct smp_ops smp_ops;
+
+static inline void smp_prepare_boot_cpu(void)
+{
+ smp_ops.smp_prepare_boot_cpu();
+}
+static inline void smp_prepare_cpus(unsigned int max_cpus)
+{
+ smp_ops.smp_prepare_cpus(max_cpus);
+}
+static inline int __cpu_up(unsigned int cpu)
+{
+ return smp_ops.cpu_up(cpu);
+}
+static inline void smp_cpus_done(unsigned int max_cpus)
+{
+ smp_ops.smp_cpus_done(max_cpus);
+}
+
+static inline void smp_send_stop(void)
+{
+ smp_ops.smp_send_stop();
+}
+static inline void smp_send_reschedule(int cpu)
+{
+ smp_ops.smp_send_reschedule(cpu);
+}
+
+static inline int smp_call_function_mask(cpumask_t mask,
+ void (*func) (void *info),
+ void *info, int wait)
+{
+ return smp_ops.smp_call_function_mask(mask, func, info, wait);
+}
+
+void native_smp_prepare_boot_cpu(void);
+void native_smp_prepare_cpus(unsigned int max_cpus);
+int native_cpu_up(unsigned int cpunum);
+void native_smp_cpus_done(unsigned int max_cpus);
+
+#ifndef CONFIG_PARAVIRT
+#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \
+do { } while (0)
+#endif
+#endif /* __ASSEMBLY__ */
+
#ifdef CONFIG_X86_32
# include "smp_32.h"
#else
# include "smp_64.h"
#endif
+
+#endif
diff --git a/include/asm-x86/smp_32.h b/include/asm-x86/smp_32.h
index e10b7af..a53b03f 100644
--- a/include/asm-x86/smp_32.h
+++ b/include/asm-x86/smp_32.h
@@ -53,64 +53,6 @@ extern void cpu_uninit(void);
extern void remove_siblinginfo(int cpu);
#endif

-struct smp_ops
-{
- void (*smp_prepare_boot_cpu)(void);
- void (*smp_prepare_cpus)(unsigned max_cpus);
- int (*cpu_up)(unsigned cpu);
- void (*smp_cpus_done)(unsigned max_cpus);
-
- void (*smp_send_stop)(void);
- void (*smp_send_reschedule)(int cpu);
- int (*smp_call_function_mask)(cpumask_t mask,
- void (*func)(void *info), void *info,
- int wait);
-};
-
-extern struct smp_ops smp_ops;
-
-static inline void smp_prepare_boot_cpu(void)
-{
- smp_ops.smp_prepare_boot_cpu();
-}
-static inline void smp_prepare_cpus(unsigned int max_cpus)
-{
- smp_ops.smp_prepare_cpus(max_cpus);
-}
-static inline int __cpu_up(unsigned int cpu)
-{
- return smp_ops.cpu_up(cpu);
-}
-static inline void smp_cpus_done(unsigned int max_cpus)
-{
- smp_ops.smp_cpus_done(max_cpus);
-}
-
-static inline void smp_send_stop(void)
-{
- smp_ops.smp_send_stop();
-}
-static inline void smp_send_reschedule(int cpu)
-{
- smp_ops.smp_send_reschedule(cpu);
-}
-static inline int smp_call_function_mask(cpumask_t mask,
- void (*func) (void *info), void *info,
- int wait)
-{
- return smp_ops.smp_call_function_mask(mask, func, info, wait);
-}
-
-void native_smp_prepare_boot_cpu(void);
-void native_smp_prepare_cpus(unsigned int max_cpus);
-int native_cpu_up(unsigned int cpunum);
-void native_smp_cpus_done(unsigned int max_cpus);
-
-#ifndef CONFIG_PARAVIRT
-#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \
-do { } while (0)
-#endif
-
/*
* This function is needed by all SMP systems. It must _always_ be valid
* from the initial startup. We map APIC_BASE very early in page_setup(),
diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h
index ab612b0..279ff92 100644
--- a/include/asm-x86/smp_64.h
+++ b/include/asm-x86/smp_64.h
@@ -36,9 +36,6 @@ extern volatile unsigned long smp_invalidate_needed;
extern void lock_ipi_call_lock(void);
extern void unlock_ipi_call_lock(void);
extern int smp_num_siblings;
-extern void smp_send_reschedule(int cpu);
-extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
- void *info, int wait);

/*
* cpu_sibling_map and cpu_core_map now live
@@ -127,4 +124,3 @@ extern unsigned int boot_cpu_id;
#define cpu_physical_id(cpu) boot_cpu_id
#endif /* !CONFIG_SMP */
#endif
-
--
1.4.4.2

2007-10-31 22:00:57

by Glauber Costa

[permalink] [raw]
Subject: [PATCH 6/7] consolidate apic.h functions

This patch consolidates apic.h functions for i386 and x86_64.
notice that we use u32 to be explicit about sizing requirements
between them

Signed-off-by: Glauber de Oliveira Costa <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
Acked-by: Jeremy Fitzhardinge <[email protected]>
---
arch/x86/kernel/smpboot_64.c | 10 ++++++++--
include/asm-x86/apic.h | 38 ++++++++++++++++++++++++++++++++++++++
include/asm-x86/apic_32.h | 31 -------------------------------
include/asm-x86/apic_64.h | 14 --------------
4 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index ecd00f6..53db29d 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -370,7 +370,7 @@ void __cpuinit start_secondary(void)

unlock_ipi_call_lock();

- setup_secondary_APIC_clock();
+ setup_secondary_clock();

cpu_idle();
}
@@ -462,6 +462,12 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
num_starts = 2;

/*
+ * Paravirt wants a startup IPI hook here to set up the
+ * target processor state.
+ */
+ startup_ipi_hook(phys_apicid, (unsigned long) start_rip,
+ (unsigned long) init_rsp);
+ /*
* Run STARTUP IPI loop.
*/
Dprintk("#startup loops: %d.\n", num_starts);
@@ -903,7 +909,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
* Set up local APIC timer on boot CPU.
*/

- setup_boot_APIC_clock();
+ setup_boot_clock();
}

/*
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 9fbcc0b..6f61672 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -1,5 +1,43 @@
+#ifndef _X86_APIC_H_
+#define _X86_APIC_H_
+#include <asm/apicdef.h>
+#include <asm/fixmap.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+/*
+ * Basic functions accessing APICs.
+ */
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define apic_write(reg, v) native_apic_write(reg, v)
+#define apic_write_atomic(reg, v) native_apic_write_atomic(reg, v)
+#define apic_read(reg) native_apic_read(reg)
+#define setup_boot_clock setup_boot_APIC_clock
+#define setup_secondary_clock setup_secondary_APIC_clock
+#endif
+static __inline fastcall void native_apic_write(unsigned long reg,
+ u32 v)
+{
+ *((volatile u32 *)(APIC_BASE+reg)) = v;
+}
+
+static __inline fastcall void native_apic_write_atomic(unsigned long reg,
+ u32 v)
+{
+ xchg((volatile unsigned long *)(APIC_BASE+reg), v);
+}
+
+static __inline fastcall u32 native_apic_read(unsigned long reg)
+{
+ return *((volatile u32 *)(APIC_BASE+reg));
+}
+
+#endif /* CONFIG_X86_LOCAL_APIC */
+
#ifdef CONFIG_X86_32
# include "apic_32.h"
#else
# include "apic_64.h"
#endif
+#endif
diff --git a/include/asm-x86/apic_32.h b/include/asm-x86/apic_32.h
index 4091b33..dbe51b4 100644
--- a/include/asm-x86/apic_32.h
+++ b/include/asm-x86/apic_32.h
@@ -34,37 +34,6 @@ extern int apic_verbosity;
extern void generic_apic_probe(void);

#ifdef CONFIG_X86_LOCAL_APIC
-
-/*
- * Basic functions accessing APICs.
- */
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define apic_write native_apic_write
-#define apic_write_atomic native_apic_write_atomic
-#define apic_read native_apic_read
-#define setup_boot_clock setup_boot_APIC_clock
-#define setup_secondary_clock setup_secondary_APIC_clock
-#endif
-
-static __inline fastcall void native_apic_write(unsigned long reg,
- unsigned long v)
-{
- *((volatile unsigned long *)(APIC_BASE+reg)) = v;
-}
-
-static __inline fastcall void native_apic_write_atomic(unsigned long reg,
- unsigned long v)
-{
- xchg((volatile unsigned long *)(APIC_BASE+reg), v);
-}
-
-static __inline fastcall unsigned long native_apic_read(unsigned long reg)
-{
- return *((volatile unsigned long *)(APIC_BASE+reg));
-}
-
void apic_wait_icr_idle(void);
unsigned long safe_apic_wait_icr_idle(void);
int get_physical_broadcast(void);
diff --git a/include/asm-x86/apic_64.h b/include/asm-x86/apic_64.h
index 2747a11..aaba5af 100644
--- a/include/asm-x86/apic_64.h
+++ b/include/asm-x86/apic_64.h
@@ -34,20 +34,6 @@ extern int disable_apic_timer;

struct pt_regs;

-/*
- * Basic functions accessing APICs.
- */
-
-static __inline void apic_write(unsigned long reg, unsigned int v)
-{
- *((volatile unsigned int *)(APIC_BASE+reg)) = v;
-}
-
-static __inline unsigned int apic_read(unsigned long reg)
-{
- return *((volatile unsigned int *)(APIC_BASE+reg));
-}
-
extern void apic_wait_icr_idle(void);
extern unsigned int safe_apic_wait_icr_idle(void);

--
1.4.4.2

2007-10-31 22:01:24

by Glauber Costa

[permalink] [raw]
Subject: [PATCH 1/7] irqflags consolidation

This patch consolidates the irqflags include files containing common
paravirt definitions. The native definition for interrupt handling, halt,
and such, are the same for 32 and 64 bit, and they are kept in irqflags.h.
The differences are split in the arch-specific files.

The syscall function, irq_enable_sysexit, has a very specific i386 naming,
and its name is then changed to a more general one.

Signed-off-by: Glauber de Oliveira Costa <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
Acked-by: Jeremy Fitzhardinge <[email protected]>
---
arch/x86/kernel/asm-offsets_32.c | 2 +-
arch/x86/kernel/entry_32.S | 8 ++--
arch/x86/kernel/paravirt_32.c | 10 ++--
arch/x86/kernel/vmi_32.c | 4 +-
arch/x86/xen/enlighten.c | 2 +-
include/asm-x86/irqflags.h | 107 ++++++++++++++++++++++++++++++++++++
include/asm-x86/irqflags_32.h | 103 +----------------------------------
include/asm-x86/irqflags_64.h | 112 +++++++-------------------------------
include/asm-x86/paravirt.h | 9 ++--
9 files changed, 147 insertions(+), 210 deletions(-)

diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 0e45981..c1ccfab 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -123,7 +123,7 @@ void foo(void)
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
+ OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
#endif

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index dc7f938..d63609d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -58,7 +58,7 @@
* for paravirtualization. The following will never clobber any registers:
* INTERRUPT_RETURN (aka. "iret")
* GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
- * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
+ * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit").
*
* For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
* specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
@@ -351,7 +351,7 @@ sysenter_past_esp:
xorl %ebp,%ebp
TRACE_IRQS_ON
1: mov PT_FS(%esp), %fs
- ENABLE_INTERRUPTS_SYSEXIT
+ ENABLE_INTERRUPTS_SYSCALL_RET
CFI_ENDPROC
.pushsection .fixup,"ax"
2: movl $0,PT_FS(%esp)
@@ -882,10 +882,10 @@ ENTRY(native_iret)
.previous
END(native_iret)

-ENTRY(native_irq_enable_sysexit)
+ENTRY(native_irq_enable_syscall_ret)
sti
sysexit
-END(native_irq_enable_sysexit)
+END(native_irq_enable_syscall_ret)
#endif

KPROBE_ENTRY(int3)
diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c
index 6a80d67..04f51d0 100644
--- a/arch/x86/kernel/paravirt_32.c
+++ b/arch/x86/kernel/paravirt_32.c
@@ -60,7 +60,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(pv_cpu_ops, iret, "iret");
-DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
+DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
@@ -88,7 +88,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
SITE(pv_irq_ops, restore_fl);
SITE(pv_irq_ops, save_fl);
SITE(pv_cpu_ops, iret);
- SITE(pv_cpu_ops, irq_enable_sysexit);
+ SITE(pv_cpu_ops, irq_enable_syscall_ret);
SITE(pv_mmu_ops, read_cr2);
SITE(pv_mmu_ops, read_cr3);
SITE(pv_mmu_ops, write_cr3);
@@ -186,7 +186,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
/* If the operation is a nop, then nop the callsite */
ret = paravirt_patch_nop();
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
- type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit))
+ type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret))
/* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
else
@@ -237,7 +237,7 @@ static void native_flush_tlb_single(unsigned long addr)

/* These are in entry.S */
extern void native_iret(void);
-extern void native_irq_enable_sysexit(void);
+extern void native_irq_enable_syscall_ret(void);

static int __init print_banner(void)
{
@@ -384,7 +384,7 @@ struct pv_cpu_ops pv_cpu_ops = {
.write_idt_entry = write_dt_entry,
.load_esp0 = native_load_esp0,

- .irq_enable_sysexit = native_irq_enable_sysexit,
+ .irq_enable_syscall_ret = native_irq_enable_syscall_ret,
.iret = native_iret,

.set_iopl_mask = native_set_iopl_mask,
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index f02bad6..aacce42 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -148,7 +148,7 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
insns, eip);
case PARAVIRT_PATCH(pv_cpu_ops.iret):
return patch_internal(VMI_CALL_IRET, len, insns, eip);
- case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
+ case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret):
return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip);
default:
break;
@@ -870,7 +870,7 @@ static inline int __init activate_vmi(void)
* the backend. They are performance critical anyway, so requiring
* a patch is not a big problem.
*/
- pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
+ pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0;
pv_cpu_ops.iret = (void *)0xbadbab0;

#ifdef CONFIG_SMP
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 94c39aa..094b915 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -953,7 +953,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.read_pmc = native_read_pmc,

.iret = (void *)&hypercall_page[__HYPERVISOR_iret],
- .irq_enable_sysexit = NULL, /* never called */
+ .irq_enable_syscall_ret = NULL, /* never called */

.load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt,
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index 1b695ff..fd0df93 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -1,5 +1,112 @@
+#ifndef _X86_IRQFLAGS_H_
+#define _X86_IRQFLAGS_H_
+
+#include <asm/processor-flags.h>
+
#ifdef CONFIG_X86_32
# include "irqflags_32.h"
#else
# include "irqflags_64.h"
#endif
+
+#ifndef __ASSEMBLY__
+static inline void native_irq_disable(void)
+{
+ asm volatile("cli": : :"memory");
+}
+
+static inline void native_irq_enable(void)
+{
+ asm volatile("sti": : :"memory");
+}
+
+static inline void native_safe_halt(void)
+{
+ asm volatile("sti; hlt": : :"memory");
+}
+
+static inline void native_halt(void)
+{
+ asm volatile("hlt": : :"memory");
+}
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define ENABLE_INTERRUPTS(x) sti
+#define DISABLE_INTERRUPTS(x) cli
+#ifndef __ASSEMBLY__
+
+static inline unsigned long __raw_local_save_flags(void)
+{
+ return native_save_fl();
+}
+
+static inline void raw_local_irq_restore(unsigned long flags)
+{
+ native_restore_fl(flags);
+}
+
+static inline void raw_local_irq_disable(void)
+{
+ native_irq_disable();
+}
+
+static inline void raw_local_irq_enable(void)
+{
+ native_irq_enable();
+}
+
+/*
+ * Used in the idle loop; sti takes one instruction cycle
+ * to complete:
+ */
+static inline void raw_safe_halt(void)
+{
+ native_safe_halt();
+}
+
+/*
+ * Used when interrupts are already enabled or to
+ * shutdown the processor:
+ */
+static inline void halt(void)
+{
+ native_halt();
+}
+
+/*
+ * For spinlocks, etc:
+ */
+static inline unsigned long __raw_local_irq_save(void)
+{
+ unsigned long flags = __raw_local_save_flags();
+
+ raw_local_irq_disable();
+
+ return flags;
+}
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_PARAVIRT */
+
+#ifndef __ASSEMBLY__
+#define raw_local_save_flags(flags) \
+ do { (flags) = __raw_local_save_flags(); } while (0)
+
+#define raw_local_irq_save(flags) \
+ do { (flags) = __raw_local_irq_save(); } while (0)
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+ return !(flags & X86_EFLAGS_IF);
+}
+
+static inline int raw_irqs_disabled(void)
+{
+ unsigned long flags = __raw_local_save_flags();
+
+ return raw_irqs_disabled_flags(flags);
+}
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/include/asm-x86/irqflags_32.h b/include/asm-x86/irqflags_32.h
index d058b04..11cfbdd 100644
--- a/include/asm-x86/irqflags_32.h
+++ b/include/asm-x86/irqflags_32.h
@@ -26,111 +26,12 @@ static inline void native_restore_fl(unsigned long f)
:"memory", "cc");
}

-static inline void native_irq_disable(void)
-{
- asm volatile("cli": : :"memory");
-}
-
-static inline void native_irq_enable(void)
-{
- asm volatile("sti": : :"memory");
-}
-
-static inline void native_safe_halt(void)
-{
- asm volatile("sti; hlt": : :"memory");
-}
-
-static inline void native_halt(void)
-{
- asm volatile("hlt": : :"memory");
-}
-#endif /* __ASSEMBLY__ */
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#ifndef __ASSEMBLY__
-
-static inline unsigned long __raw_local_save_flags(void)
-{
- return native_save_fl();
-}
-
-static inline void raw_local_irq_restore(unsigned long flags)
-{
- native_restore_fl(flags);
-}
-
-static inline void raw_local_irq_disable(void)
-{
- native_irq_disable();
-}
-
-static inline void raw_local_irq_enable(void)
-{
- native_irq_enable();
-}
-
-/*
- * Used in the idle loop; sti takes one instruction cycle
- * to complete:
- */
-static inline void raw_safe_halt(void)
-{
- native_safe_halt();
-}
-
-/*
- * Used when interrupts are already enabled or to
- * shutdown the processor:
- */
-static inline void halt(void)
-{
- native_halt();
-}
-
-/*
- * For spinlocks, etc:
- */
-static inline unsigned long __raw_local_irq_save(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- raw_local_irq_disable();
-
- return flags;
-}
-
-#else
-#define DISABLE_INTERRUPTS(clobbers) cli
-#define ENABLE_INTERRUPTS(clobbers) sti
-#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
+#elif !defined(CONFIG_PARAVIRT) /* __ASSEMBLY__ */
+#define ENABLE_INTERRUPTS_SYSCALL_RET sti; sysexit
#define INTERRUPT_RETURN iret
#define GET_CR0_INTO_EAX movl %cr0, %eax
-#endif /* __ASSEMBLY__ */
#endif /* CONFIG_PARAVIRT */

-#ifndef __ASSEMBLY__
-#define raw_local_save_flags(flags) \
- do { (flags) = __raw_local_save_flags(); } while (0)
-
-#define raw_local_irq_save(flags) \
- do { (flags) = __raw_local_irq_save(); } while (0)
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
- return !(flags & X86_EFLAGS_IF);
-}
-
-static inline int raw_irqs_disabled(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- return raw_irqs_disabled_flags(flags);
-}
-#endif /* __ASSEMBLY__ */
-
/*
* Do the CPU's IRQ-state tracing from assembly code. We call a
* C function, so save all the C-clobbered registers:
diff --git a/include/asm-x86/irqflags_64.h b/include/asm-x86/irqflags_64.h
index 5341ea1..88eb53f 100644
--- a/include/asm-x86/irqflags_64.h
+++ b/include/asm-x86/irqflags_64.h
@@ -16,7 +16,7 @@
* Interrupt control:
*/

-static inline unsigned long __raw_local_save_flags(void)
+static inline unsigned long native_save_fl(void)
{
unsigned long flags;

@@ -31,10 +31,7 @@ static inline unsigned long __raw_local_save_flags(void)
return flags;
}

-#define raw_local_save_flags(flags) \
- do { (flags) = __raw_local_save_flags(); } while (0)
-
-static inline void raw_local_irq_restore(unsigned long flags)
+static inline void native_restore_fl(unsigned long flags)
{
__asm__ __volatile__(
"pushq %0 ; popfq"
@@ -43,93 +40,7 @@ static inline void raw_local_irq_restore(unsigned long flags)
:"memory", "cc"
);
}
-
-#ifdef CONFIG_X86_VSMP
-
-/*
- * Interrupt control for the VSMP architecture:
- */
-
-static inline void raw_local_irq_disable(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
-}
-
-static inline void raw_local_irq_enable(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
-}
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
- return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC);
-}
-
-#else /* CONFIG_X86_VSMP */
-
-static inline void raw_local_irq_disable(void)
-{
- __asm__ __volatile__("cli" : : : "memory");
-}
-
-static inline void raw_local_irq_enable(void)
-{
- __asm__ __volatile__("sti" : : : "memory");
-}
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
- return !(flags & X86_EFLAGS_IF);
-}
-
-#endif
-
-/*
- * For spinlocks, etc.:
- */
-
-static inline unsigned long __raw_local_irq_save(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- raw_local_irq_disable();
-
- return flags;
-}
-
-#define raw_local_irq_save(flags) \
- do { (flags) = __raw_local_irq_save(); } while (0)
-
-static inline int raw_irqs_disabled(void)
-{
- unsigned long flags = __raw_local_save_flags();
-
- return raw_irqs_disabled_flags(flags);
-}
-
-/*
- * Used in the idle loop; sti takes one instruction cycle
- * to complete:
- */
-static inline void raw_safe_halt(void)
-{
- __asm__ __volatile__("sti; hlt" : : : "memory");
-}
-
-/*
- * Used when interrupts are already enabled or to
- * shutdown the processor:
- */
-static inline void halt(void)
-{
- __asm__ __volatile__("hlt": : :"memory");
-}
-
-#else /* __ASSEMBLY__: */
+#else /* __ASSEMBLY__ */
# ifdef CONFIG_TRACE_IRQFLAGS
# define TRACE_IRQS_ON call trace_hardirqs_on_thunk
# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk
@@ -153,4 +64,21 @@ static inline void halt(void)
# endif
#endif

+#ifndef CONFIG_PARAVIRT
+#define INTERRUPT_RETURN iretq
+#define ENABLE_INTERRUPTS_SYSCALL_RET \
+ movq %gs:pda_oldrsp, %rsp; \
+ swapgs; \
+ sysretq;
+#endif
+/* Currently paravirt can't handle swapgs nicely when we
+ * don't have a stack we can rely on (such as a user space
+ * stack). So we either find a way around these or just fault
+ * and emulate if a guest tries to call swapgs directly.
+ *
+ * Either way, this is a good way to document that we don't
+ * have a reliable stack.
+ */
+#define SWAPGS_UNSAFE_STACK swapgs
+
#endif
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index f59d370..d81a361 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -121,7 +121,7 @@ struct pv_cpu_ops {
u64 (*read_pmc)(void);

/* These two are jmp to, not actually called. */
- void (*irq_enable_sysexit)(void);
+ void (*irq_enable_syscall_ret)(void);
void (*iret)(void);

struct pv_lazy_ops lazy_mode;
@@ -1138,9 +1138,10 @@ static inline unsigned long __raw_local_irq_save(void)
call *%cs:pv_irq_ops+PV_IRQ_irq_enable; \
popl %edx; popl %ecx; popl %eax)

-#define ENABLE_INTERRUPTS_SYSEXIT \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), CLBR_NONE,\
- jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_sysexit)
+#define ENABLE_INTERRUPTS_SYSCALL_RET \
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\
+ CLBR_NONE, \
+ jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret)

#define GET_CR0_INTO_EAX \
push %ecx; push %edx; \
--
1.4.4.2

2007-10-31 22:01:42

by Glauber Costa

[permalink] [raw]
Subject: [PATCH 3/7] tlb functions consolidation

This patch consolidates part of the tlb handling functions for the x86
architecture. In this approach, we start by the parts actually used for
paravirt in i386.

Signed-off-by: Glauber de Oliveira Costa <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
Acked-by: Jeremy Fitzhardinge <[email protected]>
---
arch/x86/kernel/smp_64.c | 5 ++-
include/asm-x86/tlbflush.h | 77 +++++++++++++++++++++++++++++++++++++++++
include/asm-x86/tlbflush_32.h | 77 -----------------------------------------
include/asm-x86/tlbflush_64.h | 43 +++--------------------
4 files changed, 85 insertions(+), 117 deletions(-)

diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
index 03fa6ed..ad063a6 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/smp_64.c
@@ -166,11 +166,12 @@ out:
add_pda(irq_tlb_count, 1);
}

-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
- unsigned long va)
+void native_flush_tlb_others(const cpumask_t *cpumaskp,
+ struct mm_struct *mm, unsigned long va)
{
int sender;
union smp_flush_state *f;
+ cpumask_t cpumask = *cpumaskp;

/* Caller has disabled preemption */
sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
diff --git a/include/asm-x86/tlbflush.h b/include/asm-x86/tlbflush.h
index 9af4cc8..93283cf 100644
--- a/include/asm-x86/tlbflush.h
+++ b/include/asm-x86/tlbflush.h
@@ -1,5 +1,82 @@
+#ifndef _X86_TLBFLUSH_H_
+#define _X86_TLBFLUSH_H_
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define __flush_tlb() __native_flush_tlb()
+#define __flush_tlb_global() __native_flush_tlb_global()
+#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+#endif
+
+static inline void __native_flush_tlb(void)
+{
+ write_cr3(read_cr3());
+}
+
+static inline void __native_flush_tlb_global(void)
+{
+ unsigned long cr4 = read_cr4();
+ write_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */
+ write_cr4(cr4); /* write old PGE again and flush TLBs */
+}
+
+#define __native_flush_tlb_single(addr) \
+ __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory")
+
+#ifdef CONFIG_SMP
+
+#include <asm/smp.h>
+#include <linux/mm.h>
+
+#define local_flush_tlb() \
+ __flush_tlb()
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+
+#define flush_tlb() flush_tlb_current_task()
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ flush_tlb_mm(vma->vm_mm);
+}
+
+void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
+ unsigned long va);
+
+#define TLBSTATE_OK 1
+#define TLBSTATE_LAZY 2
+
+#ifdef CONFIG_X86_64
+/* Roughly an IPI every 20MB with 4k pages for freeing page table
+ ranges. Cost is about 42k of memory for each CPU. */
+#define ARCH_FREE_PTE_NR 5350
+
+#else /* X86_64 */
+struct tlb_state
+{
+ struct mm_struct *active_mm;
+ int state;
+ char __cacheline_padding[L1_CACHE_BYTES-8];
+};
+DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
+#endif /* X86_64 */
+
+#endif
+
+#ifndef CONFIG_PARAVIRT
+#define flush_tlb_others(mask, mm, va) \
+ native_flush_tlb_others(&mask, mm, va)
+#endif
+
#ifdef CONFIG_X86_32
# include "tlbflush_32.h"
#else
# include "tlbflush_64.h"
#endif
+
+#endif
diff --git a/include/asm-x86/tlbflush_32.h b/include/asm-x86/tlbflush_32.h
index 2bd5b95..07eaf37 100644
--- a/include/asm-x86/tlbflush_32.h
+++ b/include/asm-x86/tlbflush_32.h
@@ -1,49 +1,8 @@
#ifndef _I386_TLBFLUSH_H
#define _I386_TLBFLUSH_H

-#include <linux/mm.h>
#include <asm/processor.h>

-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define __flush_tlb() __native_flush_tlb()
-#define __flush_tlb_global() __native_flush_tlb_global()
-#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
-#endif
-
-#define __native_flush_tlb() \
- do { \
- unsigned int tmpreg; \
- \
- __asm__ __volatile__( \
- "movl %%cr3, %0; \n" \
- "movl %0, %%cr3; # flush TLB \n" \
- : "=r" (tmpreg) \
- :: "memory"); \
- } while (0)
-
-/*
- * Global pages have to be flushed a bit differently. Not a real
- * performance problem because this does not happen often.
- */
-#define __native_flush_tlb_global() \
- do { \
- unsigned int tmpreg, cr4, cr4_orig; \
- \
- __asm__ __volatile__( \
- "movl %%cr4, %2; # turn off PGE \n" \
- "movl %2, %1; \n" \
- "andl %3, %1; \n" \
- "movl %1, %%cr4; \n" \
- "movl %%cr3, %0; \n" \
- "movl %0, %%cr3; # flush TLB \n" \
- "movl %2, %%cr4; # turn PGE back on \n" \
- : "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig) \
- : "i" (~X86_CR4_PGE) \
- : "memory"); \
- } while (0)
-
#define __native_flush_tlb_single(addr) \
__asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory")

@@ -120,44 +79,8 @@ static inline void native_flush_tlb_others(const cpumask_t *cpumask,
{
}

-#else /* SMP */
-
-#include <asm/smp.h>
-
-#define local_flush_tlb() \
- __flush_tlb()
-
-extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
-extern void flush_tlb_mm(struct mm_struct *);
-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
-
-#define flush_tlb() flush_tlb_current_task()
-
-static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
-{
- flush_tlb_mm(vma->vm_mm);
-}
-
-void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
- unsigned long va);
-
-#define TLBSTATE_OK 1
-#define TLBSTATE_LAZY 2
-
-struct tlb_state
-{
- struct mm_struct *active_mm;
- int state;
- char __cacheline_padding[L1_CACHE_BYTES-8];
-};
-DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
#endif /* SMP */

-#ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, va) \
- native_flush_tlb_others(&mask, mm, va)
-#endif

static inline void flush_tlb_kernel_range(unsigned long start,
unsigned long end)
diff --git a/include/asm-x86/tlbflush_64.h b/include/asm-x86/tlbflush_64.h
index 7731fd2..d1d1097 100644
--- a/include/asm-x86/tlbflush_64.h
+++ b/include/asm-x86/tlbflush_64.h
@@ -6,21 +6,8 @@
#include <asm/processor.h>
#include <asm/system.h>

-static inline void __flush_tlb(void)
-{
- write_cr3(read_cr3());
-}
-
-static inline void __flush_tlb_all(void)
-{
- unsigned long cr4 = read_cr4();
- write_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */
- write_cr4(cr4); /* write old PGE again and flush TLBs */
-}
-
-#define __flush_tlb_one(addr) \
- __asm__ __volatile__("invlpg (%0)" :: "r" (addr) : "memory")
-
+#define __flush_tlb_one(addr) __flush_tlb_single(addr)
+#define __flush_tlb_all() __flush_tlb_global()

/*
* TLB flushing:
@@ -63,32 +50,12 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
__flush_tlb();
}

-#else
-
-#include <asm/smp.h>
-
-#define local_flush_tlb() \
- __flush_tlb()
-
-extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
-extern void flush_tlb_mm(struct mm_struct *);
-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
-
-#define flush_tlb() flush_tlb_current_task()
-
-static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
+static inline void native_flush_tlb_others(const cpumask_t *cpumask,
+ struct mm_struct *mm,
+ unsigned long va)
{
- flush_tlb_mm(vma->vm_mm);
}

-#define TLBSTATE_OK 1
-#define TLBSTATE_LAZY 2
-
-/* Roughly an IPI every 20MB with 4k pages for freeing page table
- ranges. Cost is about 42k of memory for each CPU. */
-#define ARCH_FREE_PTE_NR 5350
-
#endif

static inline void flush_tlb_kernel_range(unsigned long start,
--
1.4.4.2

2007-10-31 22:01:56

by Glauber Costa

[permalink] [raw]
Subject: [PATCH 7/7] consolidate msr.h

This patch goes one step forward in consolidating the msr.h header.
It shares code between i386 and x86_64, instead of duplicating the
code for tsc reading, msr reading/writing, etc.

Signed-off-by: Glauber de Oliveira Costa <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
Acked-by: Jeremy Fitzhardinge <[email protected]>
---
arch/x86/ia32/syscall32.c | 2 +-
arch/x86/kernel/setup64.c | 6 +-
arch/x86/kernel/tsc_64.c | 17 +++-
arch/x86/kernel/vsyscall_64.c | 4 +-
arch/x86/vdso/vgetcpu.c | 4 +-
include/asm-x86/alternative_32.h | 17 +++-
include/asm-x86/alternative_64.h | 27 ++++-
include/asm-x86/msr.h | 225 ++++++++++----------------------------
include/asm-x86/tsc.h | 33 +++++-
9 files changed, 151 insertions(+), 184 deletions(-)

diff --git a/arch/x86/ia32/syscall32.c b/arch/x86/ia32/syscall32.c
index 15013ba..dd1b4a3 100644
--- a/arch/x86/ia32/syscall32.c
+++ b/arch/x86/ia32/syscall32.c
@@ -79,5 +79,5 @@ void syscall32_cpu_init(void)
checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);

- wrmsrl(MSR_CSTAR, ia32_cstar_target);
+ wrmsrl(MSR_CSTAR, (u64)ia32_cstar_target);
}
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 3558ac7..50b7514 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -122,7 +122,7 @@ void pda_init(int cpu)
asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
/* Memory clobbers used to order PDA accessed */
mb();
- wrmsrl(MSR_GS_BASE, pda);
+ wrmsrl(MSR_GS_BASE, (u64)pda);
mb();

pda->cpunumber = cpu;
@@ -161,8 +161,8 @@ void syscall_init(void)
* but only a 32bit target. LSTAR sets the 64bit rip.
*/
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
- wrmsrl(MSR_LSTAR, system_call);
- wrmsrl(MSR_CSTAR, ignore_sysret);
+ wrmsrl(MSR_LSTAR, (u64)system_call);
+ wrmsrl(MSR_CSTAR, (u64)ignore_sysret);

#ifdef CONFIG_IA32_EMULATION
syscall32_cpu_init ();
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 9c70af4..4502539 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -30,7 +30,7 @@ static unsigned long long cycles_2_ns(unsigned long long cyc)
return (cyc * cyc2ns_scale) >> NS_SCALE;
}

-unsigned long long sched_clock(void)
+unsigned long long native_sched_clock(void)
{
unsigned long a = 0;

@@ -44,6 +44,19 @@ unsigned long long sched_clock(void)
return cycles_2_ns(a);
}

+/* We need to define a real function for sched_clock, to override the
+ weak default version */
+#ifdef CONFIG_PARAVIRT
+unsigned long long sched_clock(void)
+{
+ return paravirt_sched_clock();
+}
+#else
+unsigned long long
+sched_clock(void) __attribute__((alias("native_sched_clock")));
+#endif
+
+
static int tsc_unstable;

inline int check_tsc_unstable(void)
@@ -256,7 +269,7 @@ static cycle_t read_tsc(void)

static cycle_t __vsyscall_fn vread_tsc(void)
{
- cycle_t ret = (cycle_t)get_cycles_sync();
+ cycle_t ret = (cycle_t)vget_cycles_sync();
return ret;
}

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index ad4005c..1425d02 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -190,7 +190,7 @@ time_t __vsyscall(1) vtime(time_t *t)
long __vsyscall(2)
vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
{
- unsigned int dummy, p;
+ unsigned int p;
unsigned long j = 0;

/* Fast cache - only recompute value once per jiffies and avoid
@@ -205,7 +205,7 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
p = tcache->blob[1];
} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
/* Load per CPU data from RDTSCP */
- rdtscp(dummy, dummy, p);
+ native_read_tscp(&p);
} else {
/* Load per CPU data from GDT */
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index 91f6e85..61d0def 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -15,7 +15,7 @@

long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
{
- unsigned int dummy, p;
+ unsigned int p;
unsigned long j = 0;

/* Fast cache - only recompute value once per jiffies and avoid
@@ -30,7 +30,7 @@ long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
p = tcache->blob[1];
} else if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
/* Load per CPU data from RDTSCP */
- rdtscp(dummy, dummy, p);
+ native_read_tscp(&p);
} else {
/* Load per CPU data from GDT */
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
diff --git a/include/asm-x86/alternative_32.h b/include/asm-x86/alternative_32.h
index bda6c81..1ed7708 100644
--- a/include/asm-x86/alternative_32.h
+++ b/include/asm-x86/alternative_32.h
@@ -101,7 +101,22 @@ static inline void alternatives_smp_switch(int smp) {}
* use this macro(s) if you need more than one output parameter
* in alternative_io
*/
-#define ASM_OUTPUT2(a, b) a, b
+#define ASM_OUTPUT2(a, b...) a, b
+
+#define fixup_section(code, fixup, output, input...) \
+ asm volatile("2: " code "\n" \
+ "1:\n\t" \
+ ".section .fixup,\"ax\"\n\t" \
+ "3: " fixup "\n\t" \
+ "jmp 1b\n\t" \
+ ".previous\n\t" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n\t" \
+ " .long 2b,3b\n\t" \
+ ".previous" \
+ : output \
+ : input)
+

/*
* Alternative inline assembly for SMP.
diff --git a/include/asm-x86/alternative_64.h b/include/asm-x86/alternative_64.h
index ab161e8..f080b69 100644
--- a/include/asm-x86/alternative_64.h
+++ b/include/asm-x86/alternative_64.h
@@ -141,14 +141,29 @@ static inline void alternatives_smp_switch(int smp) {}
* use this macro(s) if you need more than one output parameter
* in alternative_io
*/
-#define ASM_OUTPUT2(a, b) a, b
-
-struct paravirt_patch;
+#define ASM_OUTPUT2(a, b...) a, b
+
+#define fixup_section(code, fixup, output, input...) \
+ asm volatile("2: " code "\n" \
+ "1:\n\t" \
+ ".section .fixup,\"ax\"\n\t" \
+ "3: " fixup "\n\t" \
+ " jmp 1b\n\t" \
+ ".previous\n\t" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 8\n\t" \
+ " .quad 2b,3b\n\t" \
+ ".previous" \
+ : output \
+ : input)
+
+struct paravirt_patch_site;
#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end);
+void apply_paravirt(struct paravirt_patch_site *start,
+ struct paravirt_patch_site *end);
#else
-static inline void
-apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
+static inline void apply_paravirt(struct paravirt_patch_site *start,
+ struct paravirt_patch_site *end)
{}
#define __parainstructions NULL
#define __parainstructions_end NULL
diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 48f73c7..9171564 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -3,8 +3,6 @@

#include <asm/msr-index.h>

-#ifdef __i386__
-
#ifdef __KERNEL__
#ifndef __ASSEMBLY__

@@ -12,70 +10,66 @@

static inline unsigned long long native_read_msr(unsigned int msr)
{
- unsigned long long val;
-
- asm volatile("rdmsr" : "=A" (val) : "c" (msr));
- return val;
+ unsigned long a, d;
+ asm volatile("rdmsr" : "=a" (a), "=d" (d) : "c" (msr));
+ return a | ((u64)d << 32);
}

static inline unsigned long long native_read_msr_safe(unsigned int msr,
int *err)
{
- unsigned long long val;
-
- asm volatile("2: rdmsr ; xorl %0,%0\n"
- "1:\n\t"
- ".section .fixup,\"ax\"\n\t"
- "3: movl %3,%0 ; jmp 1b\n\t"
- ".previous\n\t"
- ".section __ex_table,\"a\"\n"
- " .align 4\n\t"
- " .long 2b,3b\n\t"
- ".previous"
- : "=r" (*err), "=A" (val)
- : "c" (msr), "i" (-EFAULT));
-
- return val;
+ unsigned long a, d;
+ fixup_section("rdmsr; xor %0, %0", "mov %4, %0",
+ ASM_OUTPUT2("=r" (*err), "=a"((a)), "=d"((d))),
+ "c"(msr), "i"(-EFAULT), "0"(0));
+ return a | ((u64)d << 32);
}

-static inline void native_write_msr(unsigned int msr, unsigned long long val)
+static inline void native_write_msr(unsigned int msr, unsigned low,
+ unsigned high)
{
- asm volatile("wrmsr" : : "c" (msr), "A"(val));
+ asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high));
}

static inline int native_write_msr_safe(unsigned int msr,
- unsigned long long val)
+ unsigned low, unsigned high)
{
int err;
- asm volatile("2: wrmsr ; xorl %0,%0\n"
- "1:\n\t"
- ".section .fixup,\"ax\"\n\t"
- "3: movl %4,%0 ; jmp 1b\n\t"
- ".previous\n\t"
- ".section __ex_table,\"a\"\n"
- " .align 4\n\t"
- " .long 2b,3b\n\t"
- ".previous"
- : "=a" (err)
- : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)),
- "i" (-EFAULT));
+ fixup_section("wrmsr; xor %0, %0", "mov %4, %0", "=a" (err),
+ "c" (msr), "0" (low), "d" (high),
+ "i" (-EFAULT));
return err;
}

static inline unsigned long long native_read_tsc(void)
{
- unsigned long long val;
- asm volatile("rdtsc" : "=A" (val));
- return val;
+ unsigned int low, high;
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+ return low | ((u64)(high) << 32);
}

-static inline unsigned long long native_read_pmc(void)
+static inline unsigned long long native_read_pmc(int counter)
{
- unsigned long long val;
- asm volatile("rdpmc" : "=A" (val));
- return val;
+ unsigned long low, high;
+ asm volatile ("rdpmc"
+ : "=a" (low), "=d" (high)
+ : "c" (counter));
+
+ return low | ((u64)high << 32);
}

+static inline unsigned long long native_read_tscp(int *aux)
+{
+ unsigned long low, high;
+ asm volatile (".byte 0x0f,0x01,0xf9"
+ : "=a" (low), "=d" (high), "=c" (*aux));
+ return low | ((u64)high >> 32);
+}
+
+#endif /* ! __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+
+#ifndef __ASSEMBLY__
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
@@ -93,20 +87,26 @@ static inline unsigned long long native_read_pmc(void)
(val2) = (u32)(__val >> 32); \
} while(0)

-static inline void wrmsr(u32 __msr, u32 __low, u32 __high)
+static inline void wrmsr(unsigned int msr, unsigned int low, unsigned int high)
{
- native_write_msr(__msr, ((u64)__high << 32) | __low);
+ native_write_msr(msr, low, high);
}

#define rdmsrl(msr,val) \
((val) = native_read_msr(msr))

-#define wrmsrl(msr,val) native_write_msr(msr, val)
+static inline void wrmsrl(unsigned int msr, unsigned long long val)
+{
+ unsigned long low, high;
+ low = (u32)val;
+ high = val >> 32;
+ native_write_msr(msr, low, high);
+}

/* wrmsr with exception handling */
-static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high)
+static inline int wrmsr_safe(int msr, int low, int high)
{
- return native_write_msr_safe(__msr, ((u64)__high << 32) | __low);
+ return native_write_msr_safe(msr, low, high);
}

/* rdmsr with exception handling */
@@ -129,130 +129,28 @@ static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high)

#define rdpmc(counter,low,high) \
do { \
- u64 _l = native_read_pmc(); \
+ u64 _l = native_read_pmc(counter); \
(low) = (u32)_l; \
(high) = (u32)(_l >> 32); \
- } while(0)
-#endif /* !CONFIG_PARAVIRT */
-
-#ifdef CONFIG_SMP
-void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
-void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
-int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-#else /* CONFIG_SMP */
-static inline void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
- rdmsr(msr_no, *l, *h);
-}
-static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
- wrmsr(msr_no, l, h);
-}
-static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
- return rdmsr_safe(msr_no, l, h);
-}
-static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
- return wrmsr_safe(msr_no, l, h);
-}
-#endif /* CONFIG_SMP */
-#endif /* ! __ASSEMBLY__ */
-#endif /* __KERNEL__ */
-
-#else /* __i386__ */
-
-#ifndef __ASSEMBLY__
-#include <linux/errno.h>
-/*
- * Access to machine-specific registers (available on 586 and better only)
- * Note: the rd* operations modify the parameters directly (without using
- * pointer indirection), this allows gcc to optimize better
- */
+ } while (0)

-#define rdmsr(msr,val1,val2) \
- __asm__ __volatile__("rdmsr" \
- : "=a" (val1), "=d" (val2) \
- : "c" (msr))
-
-
-#define rdmsrl(msr,val) do { unsigned long a__,b__; \
- __asm__ __volatile__("rdmsr" \
- : "=a" (a__), "=d" (b__) \
- : "c" (msr)); \
- val = a__ | (b__<<32); \
-} while(0)
-
-#define wrmsr(msr,val1,val2) \
- __asm__ __volatile__("wrmsr" \
- : /* no outputs */ \
- : "c" (msr), "a" (val1), "d" (val2))
+#define rdtscp(low, high, aux) \
+ do { \
+ unsigned long long _val = native_read_tscp(&(aux)); \
+ (low) = (u32)_val; \
+ (high) = (u32)(_val >> 32); \
+ } while (0)

-#define wrmsrl(msr,val) wrmsr(msr,(__u32)((__u64)(val)),((__u64)(val))>>32)
+#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux))

-/* wrmsr with exception handling */
-#define wrmsr_safe(msr,a,b) ({ int ret__; \
- asm volatile("2: wrmsr ; xorl %0,%0\n" \
- "1:\n\t" \
- ".section .fixup,\"ax\"\n\t" \
- "3: movl %4,%0 ; jmp 1b\n\t" \
- ".previous\n\t" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n\t" \
- " .quad 2b,3b\n\t" \
- ".previous" \
- : "=a" (ret__) \
- : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT)); \
- ret__; })
+#endif /* !CONFIG_PARAVIRT */

#define checking_wrmsrl(msr,val) wrmsr_safe(msr,(u32)(val),(u32)((val)>>32))

-#define rdmsr_safe(msr,a,b) \
- ({ int ret__; \
- asm volatile ("1: rdmsr\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl %4,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad 1b,3b\n" \
- ".previous":"=&bDS" (ret__), "=a"(*(a)), "=d"(*(b)) \
- :"c"(msr), "i"(-EIO), "0"(0)); \
- ret__; })
-
-#define rdtsc(low,high) \
- __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
-
-#define rdtscl(low) \
- __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx")
-
-#define rdtscp(low,high,aux) \
- asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (aux))
-
-#define rdtscll(val) do { \
- unsigned int __a,__d; \
- asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
- (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
-} while(0)
-
-#define rdtscpll(val, aux) do { \
- unsigned long __a, __d; \
- asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (__a), "=d" (__d), "=c" (aux)); \
- (val) = (__d << 32) | __a; \
-} while (0)
-
#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)

#define write_rdtscp_aux(val) wrmsr(0xc0000103, val, 0)

-#define rdpmc(counter,low,high) \
- __asm__ __volatile__("rdpmc" \
- : "=a" (low), "=d" (high) \
- : "c" (counter))
-
#ifdef CONFIG_SMP
void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
@@ -275,9 +173,6 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
return wrmsr_safe(msr_no, l, h);
}
-#endif /* CONFIG_SMP */
-#endif /* __ASSEMBLY__ */
-
-#endif /* !__i386__ */
-
+#endif /* CONFIG_SMP */
+#endif /* ! __ASSEMBLY__ */
#endif
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index 6baab30..f6460ba 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -33,7 +33,7 @@ static inline cycles_t get_cycles(void)
}

/* Like get_cycles, but make sure the CPU is synchronized. */
-static __always_inline cycles_t get_cycles_sync(void)
+static __always_inline cycles_t __get_cycles_sync(void)
{
unsigned long long ret;
unsigned eax, edx;
@@ -55,11 +55,40 @@ static __always_inline cycles_t get_cycles_sync(void)
*/
alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
"=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
- rdtscll(ret);

+ return 0;
+}
+
+static __always_inline cycles_t get_cycles_sync(void)
+{
+ unsigned long long ret;
+ ret = __get_cycles_sync();
+ if (!ret)
+ rdtscll(ret);
return ret;
}

+#ifdef CONFIG_PARAVIRT
+/*
+ * For paravirt guests, some functionalities are executed through function
+ * pointers in the various pvops structures.
+ * These function pointers exist inside the kernel and can not
+ * be accessed by user space. To avoid this, we make a copy of the
+ * get_cycles_sync (called in kernel) but force the use of native_read_tsc.
+ * Ideally, the guest should set up it's own clock and vread
+ */
+static __always_inline long long vget_cycles_sync(void)
+{
+ unsigned long long ret;
+ ret = __get_cycles_sync();
+ if (!ret)
+ ret = native_read_tsc();
+ return ret;
+}
+#else
+# define vget_cycles_sync() get_cycles_sync()
+#endif
+
extern void tsc_init(void);
extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void);
--
1.4.4.2