2006-10-27 03:38:51

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 1/4] Prep for paravirt: move pagetable includes.

Move header includes for the nopud / nopmd types to the location of the
actual pte / pgd type definitions. This allows generic 4-level page
type code to be written before the split 2/3 level page table headers are
included.

Signed-off-by: Zachary Amsden <[email protected]>
Signed-off-by: Rusty Russell <[email protected]>

===================================================================
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -52,6 +52,7 @@ typedef struct { unsigned long long pgpr
#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32))
#define __pmd(x) ((pmd_t) { (x) } )
#define HPAGE_SHIFT 21
+#include <asm-generic/pgtable-nopud.h>
#else
typedef struct { unsigned long pte_low; } pte_t;
typedef struct { unsigned long pgd; } pgd_t;
@@ -59,6 +60,7 @@ typedef struct { unsigned long pgprot; }
#define boot_pte_t pte_t /* or would you rather have a typedef */
#define pte_val(x) ((x).pte_low)
#define HPAGE_SHIFT 22
+#include <asm-generic/pgtable-nopmd.h>
#endif
#define PTE_MASK PAGE_MASK

===================================================================
--- a/include/asm-i386/pgtable-2level.h
+++ b/include/asm-i386/pgtable-2level.h
@@ -1,7 +1,5 @@
#ifndef _I386_PGTABLE_2LEVEL_H
#define _I386_PGTABLE_2LEVEL_H
-
-#include <asm-generic/pgtable-nopmd.h>

#define pte_ERROR(e) \
printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low)
===================================================================
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -1,7 +1,5 @@
#ifndef _I386_PGTABLE_3LEVEL_H
#define _I386_PGTABLE_3LEVEL_H
-
-#include <asm-generic/pgtable-nopud.h>

/*
* Intel Physical Address Extension (PAE) Mode - three-level page



2006-10-27 03:42:17

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 1/4] Prep for paravirt: Be careful about touching BIOS address space

(Andrew had already taken that last one, I meant to send this)

Subject: Be careful about touching BIOS address space

BIOS ROM areas may not be mapped into the guest address space, so be careful
when touching those addresses to make sure they appear to be mapped.

Signed-off-by: Jeremy Fitzhardinge <[email protected]>
Signed-off-by: Rusty Russell <[email protected]>

===================================================================
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -270,7 +270,14 @@ static struct resource standard_io_resou
.flags = IORESOURCE_BUSY | IORESOURCE_IO
} };

-#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
+static inline int romsignature(const unsigned char *x)
+{
+ unsigned short sig;
+ int ret = 0;
+ if (__get_user(sig, (const unsigned short *)x) == 0)
+ ret = (sig == 0xaa55);
+ return ret;
+}

static int __init romchecksum(unsigned char *rom, unsigned long length)
{
===================================================================
--- a/arch/i386/pci/pcbios.c
+++ b/arch/i386/pci/pcbios.c
@@ -5,6 +5,7 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <asm/uaccess.h>
#include "pci.h"
#include "pci-functions.h"

@@ -301,7 +302,7 @@ static struct pci_raw_ops pci_bios_acces

static struct pci_raw_ops * __devinit pci_find_bios(void)
{
- union bios32 *check;
+ union bios32 *check, sig;
unsigned char sum;
int i, length;

@@ -314,6 +315,10 @@ static struct pci_raw_ops * __devinit pc
for (check = (union bios32 *) __va(0xe0000);
check <= (union bios32 *) __va(0xffff0);
++check) {
+ long sig;
+ if (__get_user(sig, &check->fields.signature))
+ continue;
+
if (check->fields.signature != BIOS32_SIGNATURE)
continue;
length = check->fields.length * 16;

--
ccontrol: http://ccontrol.ozlabs.org

2006-10-27 03:43:45

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 2/4] Prep for paravirt: cpu_detect extraction

Both lhype and Xen want to call the core of the x86 cpu detect code
before calling start_kernel.

Signed-off-by: Jeremy Fitzhardinge <[email protected]>
Signed-off-by: Rusty Russell <[email protected]> (extracted from larger patch)

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/dontdiff --minimal tmp1/arch/i386/kernel/cpu/common.c tmp2/arch/i386/kernel/cpu/common.c
--- tmp1/arch/i386/kernel/cpu/common.c 2006-10-27 13:16:53.000000000 +1000
+++ tmp2/arch/i386/kernel/cpu/common.c 2006-10-27 13:34:36.000000000 +1000
@@ -236,29 +236,14 @@ static int __cpuinit have_cpuid_p(void)
return flag_is_changeable_p(X86_EFLAGS_ID);
}

-/* Do minimum CPU detection early.
- Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
- The others are not touched to avoid unwanted side effects.
-
- WARNING: this function is only called on the BP. Don't add code here
- that is supposed to run on all CPUs. */
-static void __init early_cpu_detect(void)
+void __init cpu_detect(struct cpuinfo_x86 *c)
{
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
- c->x86_cache_alignment = 32;
-
- if (!have_cpuid_p())
- return;
-
/* Get vendor name */
cpuid(0x00000000, &c->cpuid_level,
(int *)&c->x86_vendor_id[0],
(int *)&c->x86_vendor_id[8],
(int *)&c->x86_vendor_id[4]);

- get_cpu_vendor(c, 1);
-
c->x86 = 4;
if (c->cpuid_level >= 0x00000001) {
u32 junk, tfms, cap0, misc;
@@ -275,6 +260,26 @@ static void __init early_cpu_detect(void
}
}

+/* Do minimum CPU detection early.
+ Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
+ The others are not touched to avoid unwanted side effects.
+
+ WARNING: this function is only called on the BP. Don't add code here
+ that is supposed to run on all CPUs. */
+static void __init early_cpu_detect(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ c->x86_cache_alignment = 32;
+
+ if (!have_cpuid_p())
+ return;
+
+ cpu_detect(c);
+
+ get_cpu_vendor(c, 1);
+}
+
static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
{
u32 tfms, xlvl;
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/dontdiff --minimal tmp1/include/asm-i386/processor.h tmp2/include/asm-i386/processor.h
--- tmp1/include/asm-i386/processor.h 2006-10-27 13:17:22.000000000 +1000
+++ tmp2/include/asm-i386/processor.h 2006-10-27 13:34:36.000000000 +1000
@@ -20,6 +20,7 @@
#include <linux/threads.h>
#include <asm/percpu.h>
#include <linux/cpumask.h>
+#include <linux/init.h>

/* flag for disabling the tsc */
extern int tsc_disable;
@@ -111,6 +112,8 @@ extern struct cpuinfo_x86 cpu_data[];
extern int cpu_llc_id[NR_CPUS];
extern char ignore_fpu_irq;

+void __init cpu_detect(struct cpuinfo_x86 *c);
+
extern void identify_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/dontdiff --minimal tmp1/include/linux/start_kernel.h tmp2/include/linux/start_kernel.h
--- tmp1/include/linux/start_kernel.h 1970-01-01 10:00:00.000000000 +1000
+++ tmp2/include/linux/start_kernel.h 2006-10-27 13:34:36.000000000 +1000
@@ -0,0 +1,12 @@
+#ifndef _LINUX_START_KERNEL_H
+#define _LINUX_START_KERNEL_H
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+/* Define the prototype for start_kernel here, rather than cluttering
+ up something else. */
+
+extern asmlinkage void __init start_kernel(void);
+
+#endif /* _LINUX_START_KERNEL_H */
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/dontdiff --minimal tmp1/init/main.c tmp2/init/main.c
--- tmp1/init/main.c 2006-10-27 13:17:26.000000000 +1000
+++ tmp2/init/main.c 2006-10-27 13:35:08.000000000 +1000
@@ -51,6 +51,7 @@
#include <linux/debug_locks.h>
#include <linux/lockdep.h>
#include <linux/pid_namespace.h>
+#include <linux/start_kernel.h>

#include <asm/io.h>
#include <asm/bugs.h>


2006-10-27 03:45:31

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 3/4] Prep for paravirt: desc.h clearer parameter names, some code motion

Jeremy Fitzhardinge's patch to clarify arg names in asm/desc.h

"a" and "b" are better named "low" and "high"; Rusty mixed them up
once with amusing results. Also change __u32 to u32 while there (this
must be kernel-only code, given the DECLARE_PER_CPU in the file
above).

Also moves set_ldt up higher in header, in preparation for paravirt.

Signed-off-by: Rusty Russell <[email protected]>

===================================================================
--- a/include/asm-i386/desc.h
+++ b/include/asm-i386/desc.h
@@ -32,19 +32,19 @@ extern struct desc_struct idt_table[];
extern struct desc_struct idt_table[];
extern void set_intr_gate(unsigned int irq, void * addr);

-static inline void pack_descriptor(__u32 *a, __u32 *b,
+static inline void pack_descriptor(u32 *low, u32 *high,
unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
{
- *a = ((base & 0xffff) << 16) | (limit & 0xffff);
- *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
+ *low = ((base & 0xffff) << 16) | (limit & 0xffff);
+ *high = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
(limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
}

-static inline void pack_gate(__u32 *a, __u32 *b,
+static inline void pack_gate(u32 *low, u32 *high,
unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
{
- *a = (seg << 16) | (base & 0xffff);
- *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
+ *low = (seg << 16) | (base & 0xffff);
+ *high = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
}

#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */
@@ -78,47 +78,47 @@ static inline void load_TLS(struct threa
#undef C
}

-static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
-{
- __u32 *lp = (__u32 *)((char *)dt + entry*8);
- *lp = entry_a;
- *(lp+1) = entry_b;
-}
-
-#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
-#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
-#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
-
-static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
-{
- __u32 a, b;
- pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
- write_idt_entry(idt_table, gate, a, b);
-}
-
-static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
-{
- __u32 a, b;
- pack_descriptor(&a, &b, (unsigned long)addr,
- offsetof(struct tss_struct, __cacheline_filler) - 1,
- DESCTYPE_TSS, 0);
- write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
-}
-
static inline void set_ldt(void *addr, unsigned int entries)
{
if (likely(entries == 0))
__asm__ __volatile__("lldt %w0"::"q" (0));
else {
unsigned cpu = smp_processor_id();
- __u32 a, b;
-
- pack_descriptor(&a, &b, (unsigned long)addr,
+ u32 low, high;
+
+ pack_descriptor(&low, &high, (unsigned long)addr,
entries * sizeof(struct desc_struct) - 1,
DESCTYPE_LDT, 0);
- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
+ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, low, high);
__asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
}
+}
+
+#define write_ldt_entry(dt, entry, low, high) write_dt_entry(dt,entry,low,high)
+#define write_gdt_entry(dt, entry, low, high) write_dt_entry(dt,entry,low,high)
+#define write_idt_entry(dt, entry, low, high) write_dt_entry(dt,entry,low,high)
+
+static inline void write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high)
+{
+ u32 *lp = (u32 *)((char *)dt + entry*8);
+ lp[0] = entry_low;
+ lp[1] = entry_high;
+}
+
+static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
+{
+ u32 low, high;
+ pack_gate(&low, &high, (unsigned long)addr, seg, type, 0);
+ write_idt_entry(idt_table, gate, low, high);
+}
+
+static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
+{
+ u32 low, high;
+ pack_descriptor(&low, &high, (unsigned long)addr,
+ offsetof(struct tss_struct, __cacheline_filler) - 1,
+ DESCTYPE_TSS, 0);
+ write_gdt_entry(get_cpu_gdt_table(cpu), entry, low, high);
}

#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)


2006-10-27 03:46:44

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 4/4] Prep for paravirt: rearrange processor.h

This patch simply moves processor.h functions around, to group all the
ones which paravirt will override together (for one big ifdef). No
code changes.

Signed-off-by: Rusty Russell <[email protected]>

===================================================================
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -145,58 +145,6 @@ static inline void detect_ht(struct cpui
: "0" (*eax), "2" (*ecx));
}

-/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = 0;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
- int *edx)
-{
- *eax = op;
- *ecx = count;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
- return eax;
-}
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
- return ebx;
-}
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
- return ecx;
-}
-static inline unsigned int cpuid_edx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
- return edx;
-}

#define load_cr3(pgdir) write_cr3(__pa(pgdir))

@@ -493,16 +428,6 @@ struct thread_struct {
.io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
}

-static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
-{
- tss->esp0 = thread->esp0;
- /* This can only happen when SEP is enabled, no need to test "SEP"arately */
- if (unlikely(tss->ss1 != thread->sysenter_cs)) {
- tss->ss1 = thread->sysenter_cs;
- wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
- }
-}
-
#define start_thread(regs, new_eip, new_esp) do { \
__asm__("movl %0,%%fs": :"r" (0)); \
regs->xgs = 0; \
@@ -515,33 +440,6 @@ static inline void load_esp0(struct tss_
regs->esp = new_esp; \
} while (0)

-/*
- * These special macros can be used to get or set a debugging register
- */
-#define get_debugreg(var, register) \
- __asm__("movl %%db" #register ", %0" \
- :"=r" (var))
-#define set_debugreg(value, register) \
- __asm__("movl %0,%%db" #register \
- : /* no output */ \
- :"r" (value))
-
-/*
- * Set IOPL bits in EFLAGS from given mask
- */
-static inline void set_iopl_mask(unsigned mask)
-{
- unsigned int reg;
- __asm__ __volatile__ ("pushfl;"
- "popl %0;"
- "andl %1, %0;"
- "orl %2, %0;"
- "pushl %0;"
- "popfl"
- : "=&r" (reg)
- : "i" (~X86_EFLAGS_IOPL), "r" (mask));
-}
-
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
@@ -632,6 +530,97 @@ static inline void rep_nop(void)
}

#define cpu_relax() rep_nop()
+
+static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+{
+ tss->esp0 = thread->esp0;
+
+ /* This can only happen when SEP is enabled, no need to test "SEP"arately */
+ if (unlikely(tss->ss1 != thread->sysenter_cs)) {
+ tss->ss1 = thread->sysenter_cs;
+ wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+ }
+}
+
+/*
+ * These special macros can be used to get or set a debugging register
+ */
+#define get_debugreg(var, register) \
+ __asm__("movl %%db" #register ", %0" \
+ :"=r" (var))
+#define set_debugreg(value, register) \
+ __asm__("movl %0,%%db" #register \
+ : /* no output */ \
+ :"r" (value))
+
+/*
+ * Set IOPL bits in EFLAGS from given mask
+ */
+static inline void set_iopl_mask(unsigned mask)
+{
+ unsigned int reg;
+ __asm__ __volatile__ ("pushfl;"
+ "popl %0;"
+ "andl %1, %0;"
+ "orl %2, %0;"
+ "pushl %0;"
+ "popfl"
+ : "=&r" (reg)
+ : "i" (~X86_EFLAGS_IOPL), "r" (mask));
+}
+
+/*
+ * Generic CPUID function
+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+ * resulting in stale register contents being returned.
+ */
+static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
+{
+ *eax = op;
+ *ecx = 0;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+/* Some CPUID calls want 'count' to be placed in ecx */
+static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
+ int *edx)
+{
+ *eax = op;
+ *ecx = count;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return eax;
+}
+static inline unsigned int cpuid_ebx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return ebx;
+}
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return ecx;
+}
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return edx;
+}

/* generic versions from gas */
#define GENERIC_NOP1 ".byte 0x90\n"


2006-10-27 10:30:50

by Zachary Amsden

[permalink] [raw]
Subject: Re: [PATCH 1/4] Prep for paravirt: move pagetable includes.

Rusty Russell wrote:
> Move header includes for the nopud / nopmd types to the location of the
> actual pte / pgd type definitions. This allows generic 4-level page
> type code to be written before the split 2/3 level page table headers are
> included.
>
> Signed-off-by: Zachary Amsden <[email protected]>
> Signed-off-by: Rusty Russell <[email protected]>
>

I sent this one out previously .. I think it is already in a subsystem tree.

Zach

2006-10-27 11:30:22

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH 1/4] Prep for paravirt: Be careful about touching BIOS address space

Hi!

> (Andrew had already taken that last one, I meant to send this)
>
> Subject: Be careful about touching BIOS address space
>
> BIOS ROM areas may not be mapped into the guest address space, so be careful
> when touching those addresses to make sure they appear to be mapped.
>
> Signed-off-by: Jeremy Fitzhardinge <[email protected]>
> Signed-off-by: Rusty Russell <[email protected]>
>
> ===================================================================
> --- a/arch/i386/kernel/setup.c
> +++ b/arch/i386/kernel/setup.c
> @@ -270,7 +270,14 @@ static struct resource standard_io_resou
> .flags = IORESOURCE_BUSY | IORESOURCE_IO
> } };
>
> -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
> +static inline int romsignature(const unsigned char *x)
> +{
> + unsigned short sig;
> + int ret = 0;
> + if (__get_user(sig, (const unsigned short *)x) == 0)
> + ret = (sig == 0xaa55);

Indentation is b0rken here.

And... is get_user right primitive for accessing area that may not be
there?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2006-10-27 21:31:41

by Jeremy Fitzhardinge

[permalink] [raw]
Subject: Re: [PATCH 1/4] Prep for paravirt: Be careful about touching BIOS address space

Pavel Machek wrote:
> Indentation is b0rken here.
>

Oops. How strange.

> And... is get_user right primitive for accessing area that may not be
> there?

I'm pretty sure there's precedent for using __get_user in this way
(get_user is a different matter, since it cares about whether the
address is within the user part of the address space). Certainly in
arch/i386 code there shouldn't be a problem. Is there some other way to
achieve the same effect (without manually setting up an exception/fixup
block)?

J

2006-10-27 21:42:11

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/4] Prep for paravirt: Be careful about touching BIOS address space

On Fri, 27 Oct 2006 14:31:41 -0700
Jeremy Fitzhardinge <[email protected]> wrote:

> Pavel Machek wrote:
> > Indentation is b0rken here.
> >
>
> Oops. How strange.
>
> > And... is get_user right primitive for accessing area that may not be
> > there?
>
> I'm pretty sure there's precedent for using __get_user in this way
> (get_user is a different matter, since it cares about whether the
> address is within the user part of the address space). Certainly in
> arch/i386 code there shouldn't be a problem. Is there some other way to
> achieve the same effect (without manually setting up an exception/fixup
> block)?

It'd be better to use include/linux/uaccess.h:probe_kernel_address() for
this operation.

2006-10-28 04:33:12

by Jeremy Fitzhardinge

[permalink] [raw]
Subject: Re: [PATCH 1/4] Prep for paravirt: Be careful about touching BIOS address space

Andrew Morton wrote:
> It'd be better to use include/linux/uaccess.h:probe_kernel_address() for
> this operation.
>
Ah, yes, that was the precedent I was thinking of, but I guess it would
be better to just use it directly. It's a relatively new interface,
isn't it?

J

2006-10-28 04:50:49

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/4] Prep for paravirt: Be careful about touching BIOS address space

On Fri, 27 Oct 2006 21:33:08 -0700
Jeremy Fitzhardinge <[email protected]> wrote:

> Andrew Morton wrote:
> > It'd be better to use include/linux/uaccess.h:probe_kernel_address() for
> > this operation.
> >
> Ah, yes, that was the precedent I was thinking of,

We've done open-coded __get_user() in various places in the past. The difference with
probe_kernel_address() is that it doesn't get deadlocked on mmap_sem().

> but I guess it would
> be better to just use it directly. It's a relatively new interface,
> isn't it?

Yeah. New enough that nobody's tried using it on non-x86 ;) It needs
to do set_fs(KERNEL_DS).

2006-10-29 20:07:57

by Don Mullis

[permalink] [raw]
Subject: Re: [PATCH 1/4] Prep for paravirt: Be careful about touching BIOS address space

Rusty Russell wrote:
> @@ -301,7 +302,7 @@ static struct pci_raw_ops pci_bios_acces
>
> static struct pci_raw_ops * __devinit pci_find_bios(void)
> {
> - union bios32 *check;
> + union bios32 *check, sig;

This "sig" definition has no references, and is shadowed by the definition below.

> @@ -314,6 +315,10 @@ static struct pci_raw_ops * __devinit pc
> for (check = (union bios32 *) __va(0xe0000);
> check <= (union bios32 *) __va(0xffff0);
> ++check) {
> + long sig;
> + if (__get_user(sig, &check->fields.signature))

But, no complaint from gcc. Trying to elicit a complaint by configuring
CC_OPTIMIZE_FOR_SIZE='n' breaks the build with:

include/asm/desc.h: In function 'set_ldt':
include/asm/desc.h:92: error: implicit declaration of function 'write_gdt_entry'

See reply to "[PATCH 3/4]" for a fix.

DM

2006-10-29 20:08:04

by Don Mullis

[permalink] [raw]
Subject: [PATCH] Re: [PATCH 3/4] Prep for paravirt: desc.h clearer parameter names, some code motion

Fix build where CONFIG_CC_OPTIMIZE_FOR_SIZE is not set.

Tested by build and boot.

Signed-off-by: Don Mullis <[email protected]>

---
include/asm-i386/desc.h | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)

Index: linux-trees/include/asm-i386/desc.h
===================================================================
--- linux-trees.orig/include/asm-i386/desc.h
+++ linux-trees/include/asm-i386/desc.h
@@ -78,6 +78,17 @@ static inline void load_TLS(struct threa
#undef C
}

+static inline void write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high)
+{
+ u32 *lp = (u32 *)((char *)dt + entry*8);
+ lp[0] = entry_low;
+ lp[1] = entry_high;
+}
+
+#define write_ldt_entry(dt, entry, low, high) write_dt_entry(dt,entry,low,high)
+#define write_gdt_entry(dt, entry, low, high) write_dt_entry(dt,entry,low,high)
+#define write_idt_entry(dt, entry, low, high) write_dt_entry(dt,entry,low,high)
+
static inline void set_ldt(void *addr, unsigned int entries)
{
if (likely(entries == 0))
@@ -94,17 +105,6 @@ static inline void set_ldt(void *addr, u
}
}

-#define write_ldt_entry(dt, entry, low, high) write_dt_entry(dt,entry,low,high)
-#define write_gdt_entry(dt, entry, low, high) write_dt_entry(dt,entry,low,high)
-#define write_idt_entry(dt, entry, low, high) write_dt_entry(dt,entry,low,high)
-
-static inline void write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high)
-{
- u32 *lp = (u32 *)((char *)dt + entry*8);
- lp[0] = entry_low;
- lp[1] = entry_high;
-}
-
static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
{
u32 low, high;


2006-10-29 21:07:12

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] Re: [PATCH 3/4] Prep for paravirt: desc.h clearer parameter names, some code motion

On Sunday 29 October 2006 12:01, Don Mullis wrote:
> Fix build where CONFIG_CC_OPTIMIZE_FOR_SIZE is not set.
>
> Tested by build and boot.

What error does that fix?

-Andi

2006-10-29 21:51:14

by Don Mullis

[permalink] [raw]
Subject: Re: [PATCH] Re: [PATCH 3/4] Prep for paravirt: desc.h clearer parameter names, some code motion

On Sun, 2006-10-29 at 13:06 -0800, Andi Kleen wrote:
> On Sunday 29 October 2006 12:01, Don Mullis wrote:
> > Fix build where CONFIG_CC_OPTIMIZE_FOR_SIZE is not set.
> >
> > Tested by build and boot.
>
> What error does that fix?

The build aborts with:

include/asm/desc.h: In function 'set_ldt':
include/asm/desc.h:92: error: implicit declaration of function 'write_gdt_entry'

The patch is a follow-on to my earlier reply to "[PATCH 1/4]".

DM

2006-10-30 00:05:54

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] Re: [PATCH 3/4] Prep for paravirt: desc.h clearer parameter names, some code motion

On Sun, 2006-10-29 at 13:44 -0800, Don Mullis wrote:
> On Sun, 2006-10-29 at 13:06 -0800, Andi Kleen wrote:
> > On Sunday 29 October 2006 12:01, Don Mullis wrote:
> > > Fix build where CONFIG_CC_OPTIMIZE_FOR_SIZE is not set.
> > >
> > > Tested by build and boot.
> >
> > What error does that fix?
>
> The build aborts with:
>
> include/asm/desc.h: In function 'set_ldt':
> include/asm/desc.h:92: error: implicit declaration of function 'write_gdt_entry'
>
> The patch is a follow-on to my earlier reply to "[PATCH 1/4]".

Yes, I caught this immediately after I sent; I sent the fix straight to
akpm, as I didn't expect anyone else to be applying the patches.

The patch was the same as yours.

Thanks!
Rusty.


2006-10-30 02:35:07

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 1/4] Prep for paravirt: Be careful about touching BIOS address space

On Fri, 2006-10-27 at 21:50 -0700, Andrew Morton wrote:
> On Fri, 27 Oct 2006 21:33:08 -0700
> Jeremy Fitzhardinge <[email protected]> wrote:
>
> > Andrew Morton wrote:
> > > It'd be better to use include/linux/uaccess.h:probe_kernel_address() for
> > > this operation.
> > >
> > Ah, yes, that was the precedent I was thinking of,
>
> We've done open-coded __get_user() in various places in the past. The difference with
> probe_kernel_address() is that it doesn't get deadlocked on mmap_sem().
>
> > but I guess it would
> > be better to just use it directly. It's a relatively new interface,
> > isn't it?
>
> Yeah. New enough that nobody's tried using it on non-x86 ;) It needs
> to do set_fs(KERNEL_DS).

And the function name is misleading: it really does get a value, not
merely probe an address. And the arguments are reversed from
__get_user, just to add fun.

Andrew, please replace
prep-for-paravirt-be-careful-about-touching-bios-warning-fix.patch

Subject: Be careful about touching BIOS address space

BIOS ROM areas may not be mapped into the guest address space, so be careful
when touching those addresses to make sure they appear to be mapped.

At Andrew's request, fix probe_kernel_address for non-x86.

Signed-off-by: Jeremy Fitzhardinge <[email protected]>
Signed-off-by: Rusty Russell <[email protected]> (modified)

diff -r 9a6c8ceba677 arch/i386/kernel/setup.c
--- a/arch/i386/kernel/setup.c Mon Oct 30 11:34:30 2006 +1100
+++ b/arch/i386/kernel/setup.c Mon Oct 30 13:15:33 2006 +1100
@@ -47,6 +47,7 @@
#include <linux/crash_dump.h>
#include <linux/dmi.h>
#include <linux/pfn.h>
+#include <linux/uaccess.h>

#include <video/edid.h>

@@ -270,7 +271,14 @@ static struct resource standard_io_resou
.flags = IORESOURCE_BUSY | IORESOURCE_IO
} };

-#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
+static inline int romsignature(const unsigned char *x)
+{
+ unsigned short sig;
+ int ret = 0;
+ if (probe_kernel_address((const unsigned short *)x, sig) == 0)
+ ret = (sig == 0xaa55);
+ return ret;
+}

static int __init romchecksum(unsigned char *rom, unsigned long length)
{
diff -r 9a6c8ceba677 arch/i386/pci/pcbios.c
--- a/arch/i386/pci/pcbios.c Mon Oct 30 11:34:30 2006 +1100
+++ b/arch/i386/pci/pcbios.c Mon Oct 30 13:15:02 2006 +1100
@@ -5,6 +5,7 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/uaccess.h>
#include "pci.h"
#include "pci-functions.h"

@@ -314,6 +315,10 @@ static struct pci_raw_ops * __devinit pc
for (check = (union bios32 *) __va(0xe0000);
check <= (union bios32 *) __va(0xffff0);
++check) {
+ long sig;
+ if (probe_kernel_address(&check->fields.signature, sig))
+ continue;
+
if (check->fields.signature != BIOS32_SIGNATURE)
continue;
length = check->fields.length * 16;
diff -r 9a6c8ceba677 include/linux/uaccess.h
--- a/include/linux/uaccess.h Mon Oct 30 11:34:30 2006 +1100
+++ b/include/linux/uaccess.h Mon Oct 30 13:10:39 2006 +1100
@@ -34,10 +34,13 @@ static inline unsigned long __copy_from_
#define probe_kernel_address(addr, retval) \
({ \
long ret; \
+ mm_segment_t old_fs = get_fs(); \
\
+ set_fs(KERNEL_DS); \
inc_preempt_count(); \
ret = __get_user(retval, addr); \
dec_preempt_count(); \
+ set_fs(old_fs); \
ret; \
})