2017-06-12 11:54:02

by Jürgen Groß

[permalink] [raw]
Subject: [PATCH] xen: allocate page for shared info page from low memory

In a HVM guest the kernel allocates the page for mapping the shared
info structure via extend_brk() today. This will lead to a drop of
performance as the underlying EPT entry will have to be split up into
4kB entries as the single shared info page is located in hypervisor
memory.

The issue has been detected by using the libmicro munmap test:
unmapping 8kB of memory was faster by nearly a factor of two when no
pv interfaces were active in the HVM guest.

So instead of taking a page from memory which might be mapped via
large EPT entries use a page which is already mapped via a 4kB EPT
entry: we can take a page from the first 1MB of memory as the video
memory at 640kB disallows using larger EPT entries.

Signed-off-by: Juergen Gross <[email protected]>
---
arch/x86/xen/enlighten_hvm.c | 31 ++++++++++++++++++++++++-------
arch/x86/xen/enlighten_pv.c | 2 --
2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index a6d014f47e52..c19477b6e43a 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -1,5 +1,6 @@
#include <linux/cpu.h>
#include <linux/kexec.h>
+#include <linux/memblock.h>

#include <xen/features.h>
#include <xen/events.h>
@@ -10,9 +11,11 @@
#include <asm/reboot.h>
#include <asm/setup.h>
#include <asm/hypervisor.h>
+#include <asm/e820/api.h>

#include <asm/xen/cpuid.h>
#include <asm/xen/hypervisor.h>
+#include <asm/xen/page.h>

#include "xen-ops.h"
#include "mmu.h"
@@ -22,20 +25,34 @@ void __ref xen_hvm_init_shared_info(void)
{
int cpu;
struct xen_add_to_physmap xatp;
- static struct shared_info *shared_info_page;
+ u64 pa;
+
+ if (HYPERVISOR_shared_info == &xen_dummy_shared_info) {
+ /*
+ * Search for a free page starting at 4kB physical address.
+ * Low memory is preferred to avoid an EPT large page split up
+ * by the mapping.
+ * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
+ * the BIOS used for HVM guests is well behaved and won't
+ * clobber memory other than the first 4kB.
+ */
+ for (pa = PAGE_SIZE;
+ !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
+ memblock_is_reserved(pa);
+ pa += PAGE_SIZE)
+ ;
+
+ memblock_reserve(pa, PAGE_SIZE);
+ HYPERVISOR_shared_info = __va(pa);
+ }

- if (!shared_info_page)
- shared_info_page = (struct shared_info *)
- extend_brk(PAGE_SIZE, PAGE_SIZE);
xatp.domid = DOMID_SELF;
xatp.idx = 0;
xatp.space = XENMAPSPACE_shared_info;
- xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
+ xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info);
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
BUG();

- HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
-
/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
* page, we use it in the event channel upcall and in some pvclock
* related functions. We don't need the vcpu_info placement
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index f33eef4ebd12..a9a67ecf2c07 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -89,8 +89,6 @@

void *xen_initial_gdt;

-RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
-
static int xen_cpu_up_prepare_pv(unsigned int cpu);
static int xen_cpu_dead_pv(unsigned int cpu);

--
2.12.3


2017-06-14 16:58:27

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH] xen: allocate page for shared info page from low memory

On 06/12/2017 07:53 AM, Juergen Gross wrote:
> In a HVM guest the kernel allocates the page for mapping the shared
> info structure via extend_brk() today. This will lead to a drop of
> performance as the underlying EPT entry will have to be split up into
> 4kB entries as the single shared info page is located in hypervisor
> memory.
>
> The issue has been detected by using the libmicro munmap test:
> unmapping 8kB of memory was faster by nearly a factor of two when no
> pv interfaces were active in the HVM guest.
>
> So instead of taking a page from memory which might be mapped via
> large EPT entries use a page which is already mapped via a 4kB EPT
> entry: we can take a page from the first 1MB of memory as the video
> memory at 640kB disallows using larger EPT entries.
>
> Signed-off-by: Juergen Gross <[email protected]>
> ---
> arch/x86/xen/enlighten_hvm.c | 31 ++++++++++++++++++++++++-------
> arch/x86/xen/enlighten_pv.c | 2 --
> 2 files changed, 24 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
> index a6d014f47e52..c19477b6e43a 100644
> --- a/arch/x86/xen/enlighten_hvm.c
> +++ b/arch/x86/xen/enlighten_hvm.c
> @@ -1,5 +1,6 @@
> #include <linux/cpu.h>
> #include <linux/kexec.h>
> +#include <linux/memblock.h>
>
> #include <xen/features.h>
> #include <xen/events.h>
> @@ -10,9 +11,11 @@
> #include <asm/reboot.h>
> #include <asm/setup.h>
> #include <asm/hypervisor.h>
> +#include <asm/e820/api.h>
>
> #include <asm/xen/cpuid.h>
> #include <asm/xen/hypervisor.h>
> +#include <asm/xen/page.h>
>
> #include "xen-ops.h"
> #include "mmu.h"
> @@ -22,20 +25,34 @@ void __ref xen_hvm_init_shared_info(void)
> {
> int cpu;
> struct xen_add_to_physmap xatp;
> - static struct shared_info *shared_info_page;
> + u64 pa;
> +
> + if (HYPERVISOR_shared_info == &xen_dummy_shared_info) {
> + /*
> + * Search for a free page starting at 4kB physical address.
> + * Low memory is preferred to avoid an EPT large page split up
> + * by the mapping.
> + * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
> + * the BIOS used for HVM guests is well behaved and won't
> + * clobber memory other than the first 4kB.
> + */
> + for (pa = PAGE_SIZE;
> + !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
> + memblock_is_reserved(pa);
> + pa += PAGE_SIZE)
> + ;

Is it possible to never find a page here?

-boris

> +
> + memblock_reserve(pa, PAGE_SIZE);
> + HYPERVISOR_shared_info = __va(pa);
> + }
>
> - if (!shared_info_page)
> - shared_info_page = (struct shared_info *)
> - extend_brk(PAGE_SIZE, PAGE_SIZE);
> xatp.domid = DOMID_SELF;
> xatp.idx = 0;
> xatp.space = XENMAPSPACE_shared_info;
> - xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
> + xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info);
> if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
> BUG();
>
> - HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
> -
> /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
> * page, we use it in the event channel upcall and in some pvclock
> * related functions. We don't need the vcpu_info placement
> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
> index f33eef4ebd12..a9a67ecf2c07 100644
> --- a/arch/x86/xen/enlighten_pv.c
> +++ b/arch/x86/xen/enlighten_pv.c
> @@ -89,8 +89,6 @@
>
> void *xen_initial_gdt;
>
> -RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
> -
> static int xen_cpu_up_prepare_pv(unsigned int cpu);
> static int xen_cpu_dead_pv(unsigned int cpu);
>

2017-06-14 17:11:59

by Jürgen Groß

[permalink] [raw]
Subject: Re: [PATCH] xen: allocate page for shared info page from low memory

On 14/06/17 18:58, Boris Ostrovsky wrote:
> On 06/12/2017 07:53 AM, Juergen Gross wrote:
>> In a HVM guest the kernel allocates the page for mapping the shared
>> info structure via extend_brk() today. This will lead to a drop of
>> performance as the underlying EPT entry will have to be split up into
>> 4kB entries as the single shared info page is located in hypervisor
>> memory.
>>
>> The issue has been detected by using the libmicro munmap test:
>> unmapping 8kB of memory was faster by nearly a factor of two when no
>> pv interfaces were active in the HVM guest.
>>
>> So instead of taking a page from memory which might be mapped via
>> large EPT entries use a page which is already mapped via a 4kB EPT
>> entry: we can take a page from the first 1MB of memory as the video
>> memory at 640kB disallows using larger EPT entries.
>>
>> Signed-off-by: Juergen Gross <[email protected]>
>> ---
>> arch/x86/xen/enlighten_hvm.c | 31 ++++++++++++++++++++++++-------
>> arch/x86/xen/enlighten_pv.c | 2 --
>> 2 files changed, 24 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
>> index a6d014f47e52..c19477b6e43a 100644
>> --- a/arch/x86/xen/enlighten_hvm.c
>> +++ b/arch/x86/xen/enlighten_hvm.c
>> @@ -1,5 +1,6 @@
>> #include <linux/cpu.h>
>> #include <linux/kexec.h>
>> +#include <linux/memblock.h>
>>
>> #include <xen/features.h>
>> #include <xen/events.h>
>> @@ -10,9 +11,11 @@
>> #include <asm/reboot.h>
>> #include <asm/setup.h>
>> #include <asm/hypervisor.h>
>> +#include <asm/e820/api.h>
>>
>> #include <asm/xen/cpuid.h>
>> #include <asm/xen/hypervisor.h>
>> +#include <asm/xen/page.h>
>>
>> #include "xen-ops.h"
>> #include "mmu.h"
>> @@ -22,20 +25,34 @@ void __ref xen_hvm_init_shared_info(void)
>> {
>> int cpu;
>> struct xen_add_to_physmap xatp;
>> - static struct shared_info *shared_info_page;
>> + u64 pa;
>> +
>> + if (HYPERVISOR_shared_info == &xen_dummy_shared_info) {
>> + /*
>> + * Search for a free page starting at 4kB physical address.
>> + * Low memory is preferred to avoid an EPT large page split up
>> + * by the mapping.
>> + * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
>> + * the BIOS used for HVM guests is well behaved and won't
>> + * clobber memory other than the first 4kB.
>> + */
>> + for (pa = PAGE_SIZE;
>> + !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
>> + memblock_is_reserved(pa);
>> + pa += PAGE_SIZE)
>> + ;
>
> Is it possible to never find a page here?

Only if there is no memory available at all. :-)

TBH: I expect this to _always_ succeed at the first loop iteration.


Juergen

2017-06-14 17:30:18

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH] xen: allocate page for shared info page from low memory

On 06/14/2017 01:11 PM, Juergen Gross wrote:
> On 14/06/17 18:58, Boris Ostrovsky wrote:
>> On 06/12/2017 07:53 AM, Juergen Gross wrote:
>>> In a HVM guest the kernel allocates the page for mapping the shared
>>> info structure via extend_brk() today. This will lead to a drop of
>>> performance as the underlying EPT entry will have to be split up into
>>> 4kB entries as the single shared info page is located in hypervisor
>>> memory.
>>>
>>> The issue has been detected by using the libmicro munmap test:
>>> unmapping 8kB of memory was faster by nearly a factor of two when no
>>> pv interfaces were active in the HVM guest.
>>>
>>> So instead of taking a page from memory which might be mapped via
>>> large EPT entries use a page which is already mapped via a 4kB EPT
>>> entry: we can take a page from the first 1MB of memory as the video
>>> memory at 640kB disallows using larger EPT entries.
>>>
>>> Signed-off-by: Juergen Gross <[email protected]>
>>> ---
>>> arch/x86/xen/enlighten_hvm.c | 31 ++++++++++++++++++++++++-------
>>> arch/x86/xen/enlighten_pv.c | 2 --
>>> 2 files changed, 24 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
>>> index a6d014f47e52..c19477b6e43a 100644
>>> --- a/arch/x86/xen/enlighten_hvm.c
>>> +++ b/arch/x86/xen/enlighten_hvm.c
>>> @@ -1,5 +1,6 @@
>>> #include <linux/cpu.h>
>>> #include <linux/kexec.h>
>>> +#include <linux/memblock.h>
>>>
>>> #include <xen/features.h>
>>> #include <xen/events.h>
>>> @@ -10,9 +11,11 @@
>>> #include <asm/reboot.h>
>>> #include <asm/setup.h>
>>> #include <asm/hypervisor.h>
>>> +#include <asm/e820/api.h>
>>>
>>> #include <asm/xen/cpuid.h>
>>> #include <asm/xen/hypervisor.h>
>>> +#include <asm/xen/page.h>
>>>
>>> #include "xen-ops.h"
>>> #include "mmu.h"
>>> @@ -22,20 +25,34 @@ void __ref xen_hvm_init_shared_info(void)
>>> {
>>> int cpu;
>>> struct xen_add_to_physmap xatp;
>>> - static struct shared_info *shared_info_page;
>>> + u64 pa;
>>> +
>>> + if (HYPERVISOR_shared_info == &xen_dummy_shared_info) {
>>> + /*
>>> + * Search for a free page starting at 4kB physical address.
>>> + * Low memory is preferred to avoid an EPT large page split up
>>> + * by the mapping.
>>> + * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
>>> + * the BIOS used for HVM guests is well behaved and won't
>>> + * clobber memory other than the first 4kB.
>>> + */
>>> + for (pa = PAGE_SIZE;
>>> + !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
>>> + memblock_is_reserved(pa);
>>> + pa += PAGE_SIZE)
>>> + ;
>> Is it possible to never find a page here?
> Only if there is no memory available at all. :-)
>
> TBH: I expect this to _always_ succeed at the first loop iteration.



Reviewed-by: Boris Ostrovsky <[email protected]>

2017-07-23 20:25:43

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH] xen: allocate page for shared info page from low memory



On 06/14/2017 01:11 PM, Juergen Gross wrote:
> On 14/06/17 18:58, Boris Ostrovsky wrote:
>> On 06/12/2017 07:53 AM, Juergen Gross wrote:
>>> In a HVM guest the kernel allocates the page for mapping the shared
>>> info structure via extend_brk() today. This will lead to a drop of
>>> performance as the underlying EPT entry will have to be split up into
>>> 4kB entries as the single shared info page is located in hypervisor
>>> memory.
>>>
>>> The issue has been detected by using the libmicro munmap test:
>>> unmapping 8kB of memory was faster by nearly a factor of two when no
>>> pv interfaces were active in the HVM guest.
>>>
>>> So instead of taking a page from memory which might be mapped via
>>> large EPT entries use a page which is already mapped via a 4kB EPT
>>> entry: we can take a page from the first 1MB of memory as the video
>>> memory at 640kB disallows using larger EPT entries.
>>>
>>> Signed-off-by: Juergen Gross <[email protected]>
>>> ---
>>> arch/x86/xen/enlighten_hvm.c | 31 ++++++++++++++++++++++++-------
>>> arch/x86/xen/enlighten_pv.c | 2 --
>>> 2 files changed, 24 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
>>> index a6d014f47e52..c19477b6e43a 100644
>>> --- a/arch/x86/xen/enlighten_hvm.c
>>> +++ b/arch/x86/xen/enlighten_hvm.c
>>> @@ -1,5 +1,6 @@
>>> #include <linux/cpu.h>
>>> #include <linux/kexec.h>
>>> +#include <linux/memblock.h>
>>>
>>> #include <xen/features.h>
>>> #include <xen/events.h>
>>> @@ -10,9 +11,11 @@
>>> #include <asm/reboot.h>
>>> #include <asm/setup.h>
>>> #include <asm/hypervisor.h>
>>> +#include <asm/e820/api.h>
>>>
>>> #include <asm/xen/cpuid.h>
>>> #include <asm/xen/hypervisor.h>
>>> +#include <asm/xen/page.h>
>>>
>>> #include "xen-ops.h"
>>> #include "mmu.h"
>>> @@ -22,20 +25,34 @@ void __ref xen_hvm_init_shared_info(void)
>>> {
>>> int cpu;
>>> struct xen_add_to_physmap xatp;
>>> - static struct shared_info *shared_info_page;
>>> + u64 pa;
>>> +
>>> + if (HYPERVISOR_shared_info == &xen_dummy_shared_info) {
>>> + /*
>>> + * Search for a free page starting at 4kB physical address.
>>> + * Low memory is preferred to avoid an EPT large page split up
>>> + * by the mapping.
>>> + * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
>>> + * the BIOS used for HVM guests is well behaved and won't
>>> + * clobber memory other than the first 4kB.
>>> + */
>>> + for (pa = PAGE_SIZE;
>>> + !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
>>> + memblock_is_reserved(pa);
>>> + pa += PAGE_SIZE)
>>> + ;
>>
>> Is it possible to never find a page here?
>
> Only if there is no memory available at all. :-)
>
> TBH: I expect this to _always_ succeed at the first loop iteration.

This patch seems to break (64-bit only) guests on dumpdata here. No
problems on other machines.

So far all I know is that we did get the first page (0x1000) but not
much more. I will poke at this more on Monday.

-boris

2017-07-25 03:59:20

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH] xen: allocate page for shared info page from low memory



On 07/23/2017 04:25 PM, Boris Ostrovsky wrote:
>
>
> On 06/14/2017 01:11 PM, Juergen Gross wrote:
>> On 14/06/17 18:58, Boris Ostrovsky wrote:
>>> On 06/12/2017 07:53 AM, Juergen Gross wrote:
>>>> In a HVM guest the kernel allocates the page for mapping the shared
>>>> info structure via extend_brk() today. This will lead to a drop of
>>>> performance as the underlying EPT entry will have to be split up into
>>>> 4kB entries as the single shared info page is located in hypervisor
>>>> memory.
>>>>
>>>> The issue has been detected by using the libmicro munmap test:
>>>> unmapping 8kB of memory was faster by nearly a factor of two when no
>>>> pv interfaces were active in the HVM guest.
>>>>
>>>> So instead of taking a page from memory which might be mapped via
>>>> large EPT entries use a page which is already mapped via a 4kB EPT
>>>> entry: we can take a page from the first 1MB of memory as the video
>>>> memory at 640kB disallows using larger EPT entries.
>>>>
>>>> Signed-off-by: Juergen Gross <[email protected]>
>>>> ---
>>>> arch/x86/xen/enlighten_hvm.c | 31 ++++++++++++++++++++++++-------
>>>> arch/x86/xen/enlighten_pv.c | 2 --
>>>> 2 files changed, 24 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/arch/x86/xen/enlighten_hvm.c
>>>> b/arch/x86/xen/enlighten_hvm.c
>>>> index a6d014f47e52..c19477b6e43a 100644
>>>> --- a/arch/x86/xen/enlighten_hvm.c
>>>> +++ b/arch/x86/xen/enlighten_hvm.c
>>>> @@ -1,5 +1,6 @@
>>>> #include <linux/cpu.h>
>>>> #include <linux/kexec.h>
>>>> +#include <linux/memblock.h>
>>>> #include <xen/features.h>
>>>> #include <xen/events.h>
>>>> @@ -10,9 +11,11 @@
>>>> #include <asm/reboot.h>
>>>> #include <asm/setup.h>
>>>> #include <asm/hypervisor.h>
>>>> +#include <asm/e820/api.h>
>>>> #include <asm/xen/cpuid.h>
>>>> #include <asm/xen/hypervisor.h>
>>>> +#include <asm/xen/page.h>
>>>> #include "xen-ops.h"
>>>> #include "mmu.h"
>>>> @@ -22,20 +25,34 @@ void __ref xen_hvm_init_shared_info(void)
>>>> {
>>>> int cpu;
>>>> struct xen_add_to_physmap xatp;
>>>> - static struct shared_info *shared_info_page;
>>>> + u64 pa;
>>>> +
>>>> + if (HYPERVISOR_shared_info == &xen_dummy_shared_info) {
>>>> + /*
>>>> + * Search for a free page starting at 4kB physical address.
>>>> + * Low memory is preferred to avoid an EPT large page split up
>>>> + * by the mapping.
>>>> + * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
>>>> + * the BIOS used for HVM guests is well behaved and won't
>>>> + * clobber memory other than the first 4kB.
>>>> + */
>>>> + for (pa = PAGE_SIZE;
>>>> + !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
>>>> + memblock_is_reserved(pa);
>>>> + pa += PAGE_SIZE)
>>>> + ;
>>>
>>> Is it possible to never find a page here?
>>
>> Only if there is no memory available at all. :-)
>>
>> TBH: I expect this to _always_ succeed at the first loop iteration.
>
> This patch seems to break (64-bit only) guests on dumpdata here. No
> problems on other machines.
>
> So far all I know is that we did get the first page (0x1000) but not
> much more. I will poke at this more on Monday.


So the problem is due to KASLR --- we can't use __va() before
kernel_randomize_memory() is called since it will change __PAGE_OFFSET.
(Setting CONFIG_RANDOMIZE_BASE will cause failure.)


-boris