2006-05-09 08:54:53

by Chris Wright

[permalink] [raw]
Subject: [RFC PATCH 08/35] Add Xen-specific memory management definitions

Add extra memory management definitions used by Xen-specific
code. These allow conversion between 'pseudophysical' memory
addresses, which provide the illusion of a physically contiguous
memory map, and underlying real machine addresses. This conversion is
neceesary when interpreting and updating PTEs. Also support write
protection of page mappings, which is needed to allow successful
validation of page tables.

The current definitions are incomplete and only a stub implementation,
allowing us to re-use existing code (drivers) which references these
memory management code changes.

Signed-off-by: Ian Pratt <[email protected]>
Signed-off-by: Christian Limpach <[email protected]>
Signed-off-by: Chris Wright <[email protected]>
---
include/asm-i386/hypervisor.h | 3
include/asm-i386/mach-default/mach_page.h | 4 +
include/asm-i386/mach-xen/mach_page.h | 99 ++++++++++++++++++++++++++++++
include/asm-i386/page.h | 2
4 files changed, 108 insertions(+)

--- linus-2.6.orig/include/asm-i386/hypervisor.h
+++ linus-2.6/include/asm-i386/hypervisor.h
@@ -67,4 +67,7 @@ u64 jiffies_to_st(unsigned long jiffies)

#define xen_init() (0)

+#include <xen/interface/version.h>
+#include <xen/features.h>
+
#endif /* __HYPERVISOR_H__ */
--- linus-2.6.orig/include/asm-i386/page.h
+++ linus-2.6/include/asm-i386/page.h
@@ -82,6 +82,8 @@ typedef struct { unsigned long pgprot; }
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)

+#include <mach_page.h>
+
/*
* This handles the memory map.. We could make this a config
* option, but too many people screw it up, and too few need
--- /dev/null
+++ linus-2.6/include/asm-i386/mach-default/mach_page.h
@@ -0,0 +1,4 @@
+#ifndef __ASM_MACH_PAGE_H
+#define __ASM_MACH_PAGE_H
+
+#endif /* __ASM_MACH_PAGE_H */
--- /dev/null
+++ linus-2.6/include/asm-i386/mach-xen/mach_page.h
@@ -0,0 +1,99 @@
+#ifndef __ASM_MACH_PAGE_H
+#define __ASM_MACH_PAGE_H
+
+#ifndef __ASSEMBLY__
+
+/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+#define INVALID_P2M_ENTRY (~0UL)
+
+static inline unsigned long pfn_to_mfn(unsigned long pfn)
+{
+#ifndef CONFIG_XEN_SHADOW_MODE
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return pfn;
+ return phys_to_machine_mapping[(unsigned int)(pfn)] &
+ ~FOREIGN_FRAME_BIT;
+#else
+ return pfn;
+#endif
+}
+
+static inline unsigned long mfn_to_pfn(unsigned long mfn)
+{
+#ifndef CONFIG_XEN_SHADOW_MODE
+ unsigned long pfn;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return mfn;
+
+ /*
+ * The array access can fail (e.g., device space beyond end of RAM).
+ * In such cases it doesn't matter what we return (we return garbage),
+ * but we must handle the fault without crashing!
+ */
+ asm (
+ "1: movl %1,%0\n"
+ "2:\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b,2b\n"
+ ".previous"
+ : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
+
+ return pfn;
+#else
+ return mfn;
+#endif
+}
+
+/* VIRT <-> MACHINE conversion */
+#define virt_to_machine(v) (__pa(v))
+#define virt_to_mfn(v) (pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
+#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
+
+/* Definitions for machine and pseudophysical addresses. */
+#ifdef CONFIG_X86_PAE
+typedef unsigned long long paddr_t;
+typedef unsigned long long maddr_t;
+#else
+typedef unsigned long paddr_t;
+typedef unsigned long maddr_t;
+#endif
+
+#ifndef CONFIG_X86_PAE
+#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
+#else
+#define pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) |\
+ (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT)))
+#endif
+
+#define virt_to_ptep(__va) \
+({ \
+ pgd_t *__pgd = pgd_offset_k((unsigned long)(__va)); \
+ pud_t *__pud = pud_offset(__pgd, (unsigned long)(__va)); \
+ pmd_t *__pmd = pmd_offset(__pud, (unsigned long)(__va)); \
+ pte_offset_kernel(__pmd, (unsigned long)(__va)); \
+})
+
+#define arbitrary_virt_to_machine(__va) \
+({ \
+ maddr_t m = (maddr_t)pte_mfn(*virt_to_ptep(__va)) << PAGE_SHIFT;\
+ m | ((unsigned long)(__va) & (PAGE_SIZE-1)); \
+})
+
+#define make_lowmem_page_readonly(va, feature) do { \
+ pte_t *pte; \
+ int rc; \
+ \
+ if (xen_feature(feature)) \
+ return; \
+ \
+ pte = virt_to_ptep(va); \
+ rc = HYPERVISOR_update_va_mapping( \
+ (unsigned long)va, pte_wrprotect(*pte), 0); \
+ BUG_ON(rc); \
+} while (0)
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_MACH_PAGE_H */

--


2006-05-09 14:50:19

by Martin Bligh

[permalink] [raw]
Subject: Re: [RFC PATCH 08/35] Add Xen-specific memory management definitions


> +#define virt_to_ptep(__va) \
> +({ \
> + pgd_t *__pgd = pgd_offset_k((unsigned long)(__va)); \
> + pud_t *__pud = pud_offset(__pgd, (unsigned long)(__va)); \
> + pmd_t *__pmd = pmd_offset(__pud, (unsigned long)(__va)); \
> + pte_offset_kernel(__pmd, (unsigned long)(__va)); \
> +})

Do we really need yet another function to do this?
Especially one in a mult-line #define instead of a real function call,
and that doesn't seem to error check at each step?

> +
> +#define arbitrary_virt_to_machine(__va) \
> +({ \
> + maddr_t m = (maddr_t)pte_mfn(*virt_to_ptep(__va)) << PAGE_SHIFT;\
> + m | ((unsigned long)(__va) & (PAGE_SIZE-1)); \
> +})
> +
> +#define make_lowmem_page_readonly(va, feature) do { \
> + pte_t *pte; \
> + int rc; \
> + \
> + if (xen_feature(feature)) \
> + return; \
> + \
> + pte = virt_to_ptep(va); \
> + rc = HYPERVISOR_update_va_mapping( \
> + (unsigned long)va, pte_wrprotect(*pte), 0); \
> + BUG_ON(rc); \
> +} while (0)

Things this long should definitely not be #defines.

2006-05-09 17:44:20

by Christian Limpach

[permalink] [raw]
Subject: Re: [RFC PATCH 08/35] Add Xen-specific memory management definitions

On Tue, May 09, 2006 at 07:49:45AM -0700, Martin J. Bligh wrote:
>
> >+#define virt_to_ptep(__va) \
> >+({ \
> >+ pgd_t *__pgd = pgd_offset_k((unsigned long)(__va)); \
> >+ pud_t *__pud = pud_offset(__pgd, (unsigned long)(__va)); \
> >+ pmd_t *__pmd = pmd_offset(__pud, (unsigned long)(__va)); \
> >+ pte_offset_kernel(__pmd, (unsigned long)(__va)); \
> >+})
>
> Do we really need yet another function to do this?
> Especially one in a mult-line #define instead of a real function call,
> and that doesn't seem to error check at each step?

Indeed, I'll use lookup_address instead.

> >+
> >+#define arbitrary_virt_to_machine(__va) \
> >+({ \
> >+ maddr_t m = (maddr_t)pte_mfn(*virt_to_ptep(__va)) << PAGE_SHIFT;\
> >+ m | ((unsigned long)(__va) & (PAGE_SIZE-1)); \
> >+})
> >+
> >+#define make_lowmem_page_readonly(va, feature) do { \
> >+ pte_t *pte; \
> >+ int rc; \
> >+ \
> >+ if (xen_feature(feature)) \
> >+ return; \
> >+ \
> >+ pte = virt_to_ptep(va); \
> >+ rc = HYPERVISOR_update_va_mapping( \
> >+ (unsigned long)va, pte_wrprotect(*pte), 0); \
> >+ BUG_ON(rc); \
> >+} while (0)
>
> Things this long should definitely not be #defines.

I've changed these to be functions and moved them into a .c file
under arch/i386/mach-xen.

christian

2006-05-15 06:45:54

by Pete Zaitcev

[permalink] [raw]
Subject: Re: [RFC PATCH 08/35] Add Xen-specific memory management definitions

On Tue, 09 May 2006 00:00:08 -0700, Chris Wright <[email protected]> wrote:

I'm a little concerned with the code below being entirely too smart:

> +static inline unsigned long mfn_to_pfn(unsigned long mfn)
> +{
> +#ifndef CONFIG_XEN_SHADOW_MODE
> + unsigned long pfn;
> +
> + if (xen_feature(XENFEAT_auto_translated_physmap))
> + return mfn;
> +
> + /*
> + * The array access can fail (e.g., device space beyond end of RAM).
> + * In such cases it doesn't matter what we return (we return garbage),
> + * but we must handle the fault without crashing!
> + */
> + asm (
> + "1: movl %1,%0\n"
> + "2:\n"
> + ".section __ex_table,\"a\"\n"
> + " .align 4\n"
> + " .long 1b,2b\n"
> + ".previous"
> + : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
> +
> + return pfn;
> +#else
> + return mfn;
> +#endif
> +}

I found that if someone tries to use this too early, hypervisor terminates
the domain with a message like this:

(XEN) DOM0: (file=mm.c, line=486) Non-privileged attempt to map I/O space 000fec00

Why can't we use something like this, only a proper number of machine
pages:

diff -urp -X dontdiff linux-2.6.15-1.2054_FC5/include/asm-x86_64/mach-xen/asm/page.h linux-2.6.15-1.2054_FC5.z3/include/asm-x86_64/mach-xen/asm/page.h
--- linux-2.6.15-1.2054_FC5/include/asm-x86_64/mach-xen/asm/page.h 2006-03-17 17:52:38.000000000 -0800
+++ linux-2.6.15-1.2054_FC5.z3/include/asm-x86_64/mach-xen/asm/page.h 2006-05-11 20:36:16.000000000 -0700
@@ -101,26 +101,11 @@ static inline int phys_to_machine_mappin

static inline unsigned long mfn_to_pfn(unsigned long mfn)
{
- unsigned long pfn;
-
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
-
- /*
- * The array access can fail (e.g., device space beyond end of RAM).
- * In such cases it doesn't matter what we return (we return garbage),
- * but we must handle the fault without crashing!
- */
- asm (
- "1: movq %1,%0\n"
- "2:\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
- " .quad 1b,2b\n"
- ".previous"
- : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
-
- return pfn;
+ if (mfn >= 1048576) /* 4GB _machine_ RAM. XXX How to find out? */
+ return ~0;
+ return machine_to_phys_mapping[mfn];
}

/*

I'm sure you considered this, but decided to be tricky. Why?
No way to find the safe number of machine pages in a guest?

-- Pete

2006-05-15 07:09:03

by Keir Fraser

[permalink] [raw]
Subject: Re: [RFC PATCH 08/35] Add Xen-specific memory management definitions


On 15 May 2006, at 07:44, Pete Zaitcev wrote:

> I'm sure you considered this, but decided to be tricky. Why?
> No way to find the safe number of machine pages in a guest?

We want to allow holes in the table if RAM is sparse. That code
shouldn't ever fail after the guest has installed a page-fault handler.
If you can make it do so (was it i386 or x86/64?) we're interested in
seeing the full crash output.

-- Keir

2006-05-15 08:19:49

by Christian Limpach

[permalink] [raw]
Subject: Re: [RFC PATCH 08/35] Add Xen-specific memory management definitions

On Sun, May 14, 2006 at 11:44:18PM -0700, Pete Zaitcev wrote:
> I'm sure you considered this, but decided to be tricky. Why?
> No way to find the safe number of machine pages in a guest?

In addition to wanting to support holes, the number of machine
pages will usually change when you move it to another machine.

christian

2006-05-17 16:08:26

by Pete Zaitcev

[permalink] [raw]
Subject: Re: [RFC PATCH 08/35] Add Xen-specific memory management definitions

On Tue, 09 May 2006 00:00:08 -0700, Chris Wright <[email protected]> wrote:

> +static inline unsigned long pfn_to_mfn(unsigned long pfn)
> +{
> +#ifndef CONFIG_XEN_SHADOW_MODE
> + if (xen_feature(XENFEAT_auto_translated_physmap))
> + return pfn;
> + return phys_to_machine_mapping[(unsigned int)(pfn)] &
> + ~FOREIGN_FRAME_BIT;
> +#else
> + return pfn;
> +#endif
> +}

Why do we need several modes in Linux guests?

If a significant tradeoff exists (for example, between performance
and maximum addressable memory), then we need to think about the
real issue instead of throwing config options into the pot.

-- Pete

2006-05-18 07:39:40

by Chris Wright

[permalink] [raw]
Subject: Re: [RFC PATCH 08/35] Add Xen-specific memory management definitions

* Pete Zaitcev ([email protected]) wrote:
> On Tue, 09 May 2006 00:00:08 -0700, Chris Wright <[email protected]> wrote:
>
> > +static inline unsigned long pfn_to_mfn(unsigned long pfn)
> > +{
> > +#ifndef CONFIG_XEN_SHADOW_MODE
> > + if (xen_feature(XENFEAT_auto_translated_physmap))
> > + return pfn;
> > + return phys_to_machine_mapping[(unsigned int)(pfn)] &
> > + ~FOREIGN_FRAME_BIT;
> > +#else
> > + return pfn;
> > +#endif
> > +}
>
> Why do we need several modes in Linux guests?

This patchset only supports shadow translated mode, so the extra CONFIG is
just an artifact of this simplied patchset. Ultimately, this is not the
preferred mode (performance wise), but shadow mode is simpler to port to.