The series improves the use of hotplug memory in the Xen balloon
driver.
- Reliably find a non-conflicting location for the hotplugged memory
(this fixes memory hotplug in a number of cases, particularly in
dom0).
- Use hotplugged memory for alloc_xenballooned_pages() (keeping more
memory available for the domain and reducing fragmentation of the
p2m).
Changes in v2:
- New BP_WAIT state to signal the balloon process to wait for
userspace to online the new memory.
- Preallocate P2M entries in alloc_xenballooned_pages() so they do not
need allocated later (in a context where GFP_KERNEL allocations are
not possible).
David
Add add_memory_resource() to add memory using an existing "System RAM"
resource. This is useful if the memory region is being located by
finding a free resource slot with allocate_resource().
Xen guests will make use of this in their balloon driver to hotplug
arbitrary amounts of memory in response to toolstack requests.
Signed-off-by: David Vrabel <[email protected]>
Cc: Andrew Morton <[email protected]>
---
include/linux/memory_hotplug.h | 2 ++
mm/memory_hotplug.c | 28 +++++++++++++++++++++-------
2 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 6ffa0ac..c76d371 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -11,6 +11,7 @@ struct zone;
struct pglist_data;
struct mem_section;
struct memory_block;
+struct resource;
#ifdef CONFIG_MEMORY_HOTPLUG
@@ -266,6 +267,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
void *arg, int (*func)(struct memory_block *, void *));
extern int add_memory(int nid, u64 start, u64 size);
+extern int add_memory_resource(int nid, struct resource *resource);
extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default);
extern int arch_add_memory(int nid, u64 start, u64 size);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 26fbba7..460d0fe 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1216,23 +1216,21 @@ int zone_for_memory(int nid, u64 start, u64 size, int zone_default)
}
/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
-int __ref add_memory(int nid, u64 start, u64 size)
+int __ref add_memory_resource(int nid, struct resource *res)
{
+ u64 start, size;
pg_data_t *pgdat = NULL;
bool new_pgdat;
bool new_node;
- struct resource *res;
int ret;
+ start = res->start;
+ size = resource_size(res);
+
ret = check_hotplug_memory_range(start, size);
if (ret)
return ret;
- res = register_memory_resource(start, size);
- ret = -EEXIST;
- if (!res)
- return ret;
-
{ /* Stupid hack to suppress address-never-null warning */
void *p = NODE_DATA(nid);
new_pgdat = !p;
@@ -1282,6 +1280,22 @@ out:
mem_hotplug_done();
return ret;
}
+EXPORT_SYMBOL_GPL(add_memory_resource);
+
+int __ref add_memory(int nid, u64 start, u64 size)
+{
+ struct resource *res;
+ int ret;
+
+ res = register_memory_resource(start, size);
+ if (!res)
+ return -EEXIST;
+
+ ret = add_memory_resource(nid, res);
+ if (ret < 0)
+ release_memory_resource(res);
+ return ret;
+}
EXPORT_SYMBOL_GPL(add_memory);
#ifdef CONFIG_MEMORY_HOTREMOVE
--
2.1.4
Commit 0bb599fd30108883b00c7d4a226eeb49111e6932 (xen: remove scratch
frames for ballooned pages and m2p override) removed the use of the
scratch page for ballooned out pages.
Remove some left over function definitions.
Signed-off-by: David Vrabel <[email protected]>
Reviewed-by: Daniel Kiper <[email protected]>
---
include/xen/balloon.h | 3 ---
1 file changed, 3 deletions(-)
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index a4c1c6a..cc2e1a7 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -29,9 +29,6 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages,
bool highmem);
void free_xenballooned_pages(int nr_pages, struct page **pages);
-struct page *get_balloon_scratch_page(void);
-void put_balloon_scratch_page(void);
-
struct device;
#ifdef CONFIG_XEN_SELFBALLOONING
extern int register_xen_selfballooning(struct device *dev);
--
2.1.4
During setup, discard RAM regions that are above the maximum
reservation (instead of marking them as E820_UNUSABLE). This allows
hotplug memory to be placed at these addresses.
Signed-off-by: David Vrabel <[email protected]>
Reviewed-by: Daniel Kiper <[email protected]>
---
arch/x86/xen/setup.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 55f388e..32910c5 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -646,6 +646,7 @@ char * __init xen_memory_setup(void)
phys_addr_t addr = map[i].addr;
phys_addr_t size = map[i].size;
u32 type = map[i].type;
+ bool discard = false;
if (type == E820_RAM) {
if (addr < mem_end) {
@@ -656,10 +657,11 @@ char * __init xen_memory_setup(void)
xen_add_extra_mem(addr, size);
xen_max_p2m_pfn = PFN_DOWN(addr + size);
} else
- type = E820_UNUSABLE;
+ discard = true;
}
- xen_align_and_add_e820_region(addr, size, type);
+ if (!discard)
+ xen_align_and_add_e820_region(addr, size, type);
map[i].addr += size;
map[i].size -= size;
--
2.1.4
Instead of placing hotplugged memory at the end of RAM (which may
conflict with PCI devices or reserved regions) use allocate_resource()
to get a new, suitably aligned resource that does not conflict.
Signed-off-by: David Vrabel <[email protected]>
Reviewed-by: Daniel Kiper <[email protected]>
---
drivers/xen/balloon.c | 64 +++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 54 insertions(+), 10 deletions(-)
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index fd93369..29aeb8f 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -54,6 +54,7 @@
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/percpu-defs.h>
+#include <linux/slab.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -208,6 +209,43 @@ static bool balloon_is_inflated(void)
return false;
}
+static struct resource *additional_memory_resource(phys_addr_t size)
+{
+ struct resource *res;
+ int ret;
+
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return NULL;
+
+ res->name = "System RAM";
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+ ret = allocate_resource(&iomem_resource, res,
+ size, 0, -1,
+ PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+ if (ret < 0) {
+ pr_err("Cannot allocate new System RAM resource\n");
+ kfree(res);
+ return NULL;
+ }
+
+ return res;
+}
+
+static void release_memory_resource(struct resource *resource)
+{
+ if (!resource)
+ return;
+
+ /*
+ * No need to reset region to identity mapped since we now
+ * know that no I/O can be in this region
+ */
+ release_resource(resource);
+ kfree(resource);
+}
+
/*
* reserve_additional_memory() adds memory region of size >= credit above
* max_pfn. New region is section aligned and size is modified to be multiple
@@ -221,13 +259,17 @@ static bool balloon_is_inflated(void)
static enum bp_state reserve_additional_memory(long credit)
{
+ struct resource *resource;
int nid, rc;
- u64 hotplug_start_paddr;
- unsigned long balloon_hotplug = credit;
+ unsigned long balloon_hotplug;
+
+ balloon_hotplug = round_up(credit, PAGES_PER_SECTION);
+
+ resource = additional_memory_resource(balloon_hotplug * PAGE_SIZE);
+ if (!resource)
+ goto err;
- hotplug_start_paddr = PFN_PHYS(SECTION_ALIGN_UP(max_pfn));
- balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION);
- nid = memory_add_physaddr_to_nid(hotplug_start_paddr);
+ nid = memory_add_physaddr_to_nid(resource->start);
#ifdef CONFIG_XEN_HAVE_PVMMU
/*
@@ -242,21 +284,20 @@ static enum bp_state reserve_additional_memory(long credit)
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
unsigned long pfn, i;
- pfn = PFN_DOWN(hotplug_start_paddr);
+ pfn = PFN_DOWN(resource->start);
for (i = 0; i < balloon_hotplug; i++) {
if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) {
pr_warn("set_phys_to_machine() failed, no memory added\n");
- return BP_ECANCELED;
+ goto err;
}
}
}
#endif
- rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT);
-
+ rc = add_memory_resource(nid, resource);
if (rc) {
pr_warn("Cannot add additional memory (%i)\n", rc);
- return BP_ECANCELED;
+ goto err;
}
balloon_hotplug -= credit;
@@ -265,6 +306,9 @@ static enum bp_state reserve_additional_memory(long credit)
balloon_stats.balloon_hotplug = balloon_hotplug;
return BP_DONE;
+ err:
+ release_memory_resource(resource);
+ return BP_ECANCELED;
}
static void xen_online_page(struct page *page)
--
2.1.4
The stats used for memory hotplug make no sense and are fiddled with
in odd ways. Remove them and introduce total_pages to track the total
number of pages (both populated and unpopulated) including those within
hotplugged regions (note that this includes not yet onlined pages).
This will be used in the following commit when deciding whether
additional memory needs to be hotplugged.
Signed-off-by: David Vrabel <[email protected]>
---
drivers/xen/balloon.c | 75 +++++++++------------------------------------------
include/xen/balloon.h | 5 +---
2 files changed, 13 insertions(+), 67 deletions(-)
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 29aeb8f..b5037b1 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -194,21 +194,6 @@ static enum bp_state update_schedule(enum bp_state state)
}
#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
-static long current_credit(void)
-{
- return balloon_stats.target_pages - balloon_stats.current_pages -
- balloon_stats.hotplug_pages;
-}
-
-static bool balloon_is_inflated(void)
-{
- if (balloon_stats.balloon_low || balloon_stats.balloon_high ||
- balloon_stats.balloon_hotplug)
- return true;
- else
- return false;
-}
-
static struct resource *additional_memory_resource(phys_addr_t size)
{
struct resource *res;
@@ -300,10 +285,7 @@ static enum bp_state reserve_additional_memory(long credit)
goto err;
}
- balloon_hotplug -= credit;
-
- balloon_stats.hotplug_pages += credit;
- balloon_stats.balloon_hotplug = balloon_hotplug;
+ balloon_stats.total_pages += balloon_hotplug;
return BP_DONE;
err:
@@ -319,11 +301,6 @@ static void xen_online_page(struct page *page)
__balloon_append(page);
- if (balloon_stats.hotplug_pages)
- --balloon_stats.hotplug_pages;
- else
- --balloon_stats.balloon_hotplug;
-
mutex_unlock(&balloon_mutex);
}
@@ -340,32 +317,22 @@ static struct notifier_block xen_memory_nb = {
.priority = 0
};
#else
-static long current_credit(void)
+static enum bp_state reserve_additional_memory(long credit)
{
- unsigned long target = balloon_stats.target_pages;
-
- target = min(target,
- balloon_stats.current_pages +
- balloon_stats.balloon_low +
- balloon_stats.balloon_high);
-
- return target - balloon_stats.current_pages;
+ balloon_stats.target_pages = balloon_stats.current_pages;
+ return BP_DONE;
}
+#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
-static bool balloon_is_inflated(void)
+static long current_credit(void)
{
- if (balloon_stats.balloon_low || balloon_stats.balloon_high)
- return true;
- else
- return false;
+ return balloon_stats.target_pages - balloon_stats.current_pages;
}
-static enum bp_state reserve_additional_memory(long credit)
+static bool balloon_is_inflated(void)
{
- balloon_stats.target_pages = balloon_stats.current_pages;
- return BP_DONE;
+ return balloon_stats.balloon_low || balloon_stats.balloon_high;
}
-#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
static enum bp_state increase_reservation(unsigned long nr_pages)
{
@@ -378,15 +345,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
.domid = DOMID_SELF
};
-#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
- if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) {
- nr_pages = min(nr_pages, balloon_stats.balloon_hotplug);
- balloon_stats.hotplug_pages += nr_pages;
- balloon_stats.balloon_hotplug -= nr_pages;
- return BP_DONE;
- }
-#endif
-
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
@@ -449,15 +407,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
.domid = DOMID_SELF
};
-#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
- if (balloon_stats.hotplug_pages) {
- nr_pages = min(nr_pages, balloon_stats.hotplug_pages);
- balloon_stats.hotplug_pages -= nr_pages;
- balloon_stats.balloon_hotplug += nr_pages;
- return BP_DONE;
- }
-#endif
-
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
@@ -647,6 +596,8 @@ static void __init balloon_add_region(unsigned long start_pfn,
don't subtract from it. */
__balloon_append(page);
}
+
+ balloon_stats.total_pages += extra_pfn_end - start_pfn;
}
static int __init balloon_init(void)
@@ -664,6 +615,7 @@ static int __init balloon_init(void)
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
+ balloon_stats.total_pages = balloon_stats.current_pages;
balloon_stats.schedule_delay = 1;
balloon_stats.max_schedule_delay = 32;
@@ -671,9 +623,6 @@ static int __init balloon_init(void)
balloon_stats.max_retry_count = RETRY_UNLIMITED;
#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
- balloon_stats.hotplug_pages = 0;
- balloon_stats.balloon_hotplug = 0;
-
set_online_page_callback(&xen_online_page);
register_memory_notifier(&xen_memory_nb);
#endif
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index cc2e1a7..c8aee7a 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -11,14 +11,11 @@ struct balloon_stats {
/* Number of pages in high- and low-memory balloons. */
unsigned long balloon_low;
unsigned long balloon_high;
+ unsigned long total_pages;
unsigned long schedule_delay;
unsigned long max_schedule_delay;
unsigned long retry_count;
unsigned long max_retry_count;
-#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
- unsigned long hotplug_pages;
- unsigned long balloon_hotplug;
-#endif
};
extern struct balloon_stats balloon_stats;
--
2.1.4
Now that we track the total number of pages (included hotplugged
regions), it is easy to determine if more memory needs to be
hotplugged.
Add a new BP_WAIT state to signal that the balloon process needs to
wait until kicked by the memory add notifier (when the new section is
onlined by userspace).
Signed-off-by: David Vrabel <[email protected]>
---
v2:
- New BP_WAIT status after adding new memory sections.
---
drivers/xen/balloon.c | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index b5037b1..ced34cd 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -75,12 +75,14 @@
* balloon_process() state:
*
* BP_DONE: done or nothing to do,
+ * BP_WAIT: wait to be rescheduled,
* BP_EAGAIN: error, go to sleep,
* BP_ECANCELED: error, balloon operation canceled.
*/
enum bp_state {
BP_DONE,
+ BP_WAIT,
BP_EAGAIN,
BP_ECANCELED
};
@@ -167,6 +169,9 @@ static struct page *balloon_next_page(struct page *page)
static enum bp_state update_schedule(enum bp_state state)
{
+ if (state == BP_WAIT)
+ return BP_WAIT;
+
if (state == BP_ECANCELED)
return BP_ECANCELED;
@@ -242,12 +247,22 @@ static void release_memory_resource(struct resource *resource)
* bit set). Real size of added memory is established at page onlining stage.
*/
-static enum bp_state reserve_additional_memory(long credit)
+static enum bp_state reserve_additional_memory(void)
{
+ long credit;
struct resource *resource;
int nid, rc;
unsigned long balloon_hotplug;
+ credit = balloon_stats.target_pages - balloon_stats.total_pages;
+
+ /*
+ * Already hotplugged enough pages? Wait for them to be
+ * onlined.
+ */
+ if (credit <= 0)
+ return BP_EAGAIN;
+
balloon_hotplug = round_up(credit, PAGES_PER_SECTION);
resource = additional_memory_resource(balloon_hotplug * PAGE_SIZE);
@@ -287,7 +302,7 @@ static enum bp_state reserve_additional_memory(long credit)
balloon_stats.total_pages += balloon_hotplug;
- return BP_DONE;
+ return BP_WAIT;
err:
release_memory_resource(resource);
return BP_ECANCELED;
@@ -317,7 +332,7 @@ static struct notifier_block xen_memory_nb = {
.priority = 0
};
#else
-static enum bp_state reserve_additional_memory(long credit)
+static enum bp_state reserve_additional_memory(void)
{
balloon_stats.target_pages = balloon_stats.current_pages;
return BP_DONE;
@@ -484,7 +499,7 @@ static void balloon_process(struct work_struct *work)
if (balloon_is_inflated())
state = increase_reservation(credit);
else
- state = reserve_additional_memory(credit);
+ state = reserve_additional_memory();
}
if (credit < 0)
--
2.1.4
All users of alloc_xenballoon_pages() wanted low memory pages, so
remove the option for high memory.
Signed-off-by: David Vrabel <[email protected]>
Reviewed-by: Daniel Kiper <[email protected]>
---
arch/x86/xen/grant-table.c | 2 +-
drivers/xen/balloon.c | 21 ++++++++-------------
drivers/xen/grant-table.c | 2 +-
drivers/xen/privcmd.c | 2 +-
drivers/xen/xenbus/xenbus_client.c | 3 +--
include/xen/balloon.h | 3 +--
6 files changed, 13 insertions(+), 20 deletions(-)
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 1580e7a..e079500 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -133,7 +133,7 @@ static int __init xlated_setup_gnttab_pages(void)
kfree(pages);
return -ENOMEM;
}
- rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */);
+ rc = alloc_xenballooned_pages(nr_grant_frames, pages);
if (rc) {
pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__,
nr_grant_frames, rc);
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index ced34cd..cc68a4d 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -136,17 +136,16 @@ static void balloon_append(struct page *page)
}
/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
-static struct page *balloon_retrieve(bool prefer_highmem)
+static struct page *balloon_retrieve(bool require_lowmem)
{
struct page *page;
if (list_empty(&ballooned_pages))
return NULL;
- if (prefer_highmem)
- page = list_entry(ballooned_pages.prev, struct page, lru);
- else
- page = list_entry(ballooned_pages.next, struct page, lru);
+ page = list_entry(ballooned_pages.next, struct page, lru);
+ if (require_lowmem && PageHighMem(page))
+ return NULL;
list_del(&page->lru);
if (PageHighMem(page))
@@ -533,24 +532,20 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target);
* alloc_xenballooned_pages - get pages that have been ballooned out
* @nr_pages: Number of pages to get
* @pages: pages returned
- * @highmem: allow highmem pages
* @return 0 on success, error otherwise
*/
-int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
+int alloc_xenballooned_pages(int nr_pages, struct page **pages)
{
int pgno = 0;
struct page *page;
mutex_lock(&balloon_mutex);
while (pgno < nr_pages) {
- page = balloon_retrieve(highmem);
- if (page && (highmem || !PageHighMem(page))) {
+ page = balloon_retrieve(true);
+ if (page) {
pages[pgno++] = page;
} else {
enum bp_state st;
- if (page)
- balloon_append(page);
- st = decrease_reservation(nr_pages - pgno,
- highmem ? GFP_HIGHUSER : GFP_USER);
+ st = decrease_reservation(nr_pages - pgno, GFP_USER);
if (st != BP_DONE)
goto out_undo;
}
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 62f591f..a4b702c 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -687,7 +687,7 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages)
int i;
int ret;
- ret = alloc_xenballooned_pages(nr_pages, pages, false);
+ ret = alloc_xenballooned_pages(nr_pages, pages);
if (ret < 0)
return ret;
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 5a29616..59cfec9 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -401,7 +401,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
if (pages == NULL)
return -ENOMEM;
- rc = alloc_xenballooned_pages(numpgs, pages, 0);
+ rc = alloc_xenballooned_pages(numpgs, pages);
if (rc != 0) {
pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
numpgs, rc);
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 9ad3272..2a2da04 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -614,8 +614,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
if (!node)
return -ENOMEM;
- err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages,
- false /* lowmem */);
+ err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages);
if (err)
goto out_err;
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index c8aee7a..83efdeb 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -22,8 +22,7 @@ extern struct balloon_stats balloon_stats;
void balloon_set_new_target(unsigned long target);
-int alloc_xenballooned_pages(int nr_pages, struct page **pages,
- bool highmem);
+int alloc_xenballooned_pages(int nr_pages, struct page **pages);
void free_xenballooned_pages(int nr_pages, struct page **pages);
struct device;
--
2.1.4
alloc_xenballooned_pages() is used to get ballooned pages to back
foreign mappings etc. Instead of having to balloon out real pages,
use (if supported) hotplugged memory.
This makes more memory available to the guest and reduces
fragmentation in the p2m.
If userspace is lacking a udev rule (or similar) to online hotplugged
regions automatically, alloc_xenballooned_pages() will timeout and
fall back to the old behaviour of ballooning out pages.
Signed-off-by: David Vrabel <[email protected]>
Reviewed-by: Daniel Kiper <[email protected]>
---
drivers/xen/balloon.c | 32 ++++++++++++++++++++++++++------
include/xen/balloon.h | 1 +
2 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index cc68a4d..fd6970f3 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -99,6 +99,7 @@ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
/* List of ballooned pages, threaded through the mem_map array. */
static LIST_HEAD(ballooned_pages);
+static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
/* Main work function, always executed in process context. */
static void balloon_process(struct work_struct *work);
@@ -127,6 +128,7 @@ static void __balloon_append(struct page *page)
list_add(&page->lru, &ballooned_pages);
balloon_stats.balloon_low++;
}
+ wake_up(&balloon_wq);
}
static void balloon_append(struct page *page)
@@ -253,7 +255,8 @@ static enum bp_state reserve_additional_memory(void)
int nid, rc;
unsigned long balloon_hotplug;
- credit = balloon_stats.target_pages - balloon_stats.total_pages;
+ credit = balloon_stats.target_pages + balloon_stats.target_unpopulated
+ - balloon_stats.total_pages;
/*
* Already hotplugged enough pages? Wait for them to be
@@ -334,7 +337,7 @@ static struct notifier_block xen_memory_nb = {
static enum bp_state reserve_additional_memory(void)
{
balloon_stats.target_pages = balloon_stats.current_pages;
- return BP_DONE;
+ return BP_ECANCELED;
}
#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
@@ -538,13 +541,31 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
{
int pgno = 0;
struct page *page;
+
mutex_lock(&balloon_mutex);
+
+ balloon_stats.target_unpopulated += nr_pages;
+
while (pgno < nr_pages) {
page = balloon_retrieve(true);
if (page) {
pages[pgno++] = page;
} else {
enum bp_state st;
+
+ st = reserve_additional_memory();
+ if (st != BP_ECANCELED) {
+ int ret;
+
+ mutex_unlock(&balloon_mutex);
+ ret = wait_event_timeout(balloon_wq,
+ !list_empty(&ballooned_pages),
+ msecs_to_jiffies(100));
+ mutex_lock(&balloon_mutex);
+ if (ret > 0)
+ continue;
+ }
+
st = decrease_reservation(nr_pages - pgno, GFP_USER);
if (st != BP_DONE)
goto out_undo;
@@ -553,11 +574,8 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
mutex_unlock(&balloon_mutex);
return 0;
out_undo:
- while (pgno)
- balloon_append(pages[--pgno]);
- /* Free the memory back to the kernel soon */
- schedule_delayed_work(&balloon_worker, 0);
mutex_unlock(&balloon_mutex);
+ free_xenballooned_pages(pgno, pages);
return -ENOMEM;
}
EXPORT_SYMBOL(alloc_xenballooned_pages);
@@ -578,6 +596,8 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
balloon_append(pages[i]);
}
+ balloon_stats.target_unpopulated -= nr_pages;
+
/* The balloon may be too large now. Shrink it if needed. */
if (current_credit())
schedule_delayed_work(&balloon_worker, 0);
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index 83efdeb..d1767df 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -8,6 +8,7 @@ struct balloon_stats {
/* We aim for 'current allocation' == 'target allocation'. */
unsigned long current_pages;
unsigned long target_pages;
+ unsigned long target_unpopulated;
/* Number of pages in high- and low-memory balloons. */
unsigned long balloon_low;
unsigned long balloon_high;
--
2.1.4
Rename alloc_p2m() to xen_alloc_p2m_entry() and export it.
This is useful for ensuring that a p2m entry is allocated (i.e., not a
shared missing or identity entry) so that subsequent set_phys_to_machine()
calls will require no further allocations.
Signed-off-by: David Vrabel <[email protected]>
---
arch/x86/include/asm/xen/page.h | 2 ++
arch/x86/xen/p2m.c | 16 ++++++++++------
2 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index c44a5d5..960b380 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -45,6 +45,8 @@ extern unsigned long *xen_p2m_addr;
extern unsigned long xen_p2m_size;
extern unsigned long xen_max_p2m_pfn;
+extern int xen_alloc_p2m_entry(unsigned long pfn);
+
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 8b7f18e..ef93ccf 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -503,7 +503,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
* the new pages are installed with cmpxchg; if we lose the race then
* simply free the page we allocated and use the one that's there.
*/
-static bool alloc_p2m(unsigned long pfn)
+int xen_alloc_p2m_entry(unsigned long pfn)
{
unsigned topidx, mididx;
unsigned long *top_mfn_p, *mid_mfn;
@@ -524,7 +524,7 @@ static bool alloc_p2m(unsigned long pfn)
/* PMD level is missing, allocate a new one */
ptep = alloc_p2m_pmd(addr, pte_pg);
if (!ptep)
- return false;
+ return -ENOMEM;
}
if (p2m_top_mfn) {
@@ -541,7 +541,7 @@ static bool alloc_p2m(unsigned long pfn)
mid_mfn = alloc_p2m_page();
if (!mid_mfn)
- return false;
+ return -ENOMEM;
p2m_mid_mfn_init(mid_mfn, p2m_missing);
@@ -567,7 +567,7 @@ static bool alloc_p2m(unsigned long pfn)
p2m = alloc_p2m_page();
if (!p2m)
- return false;
+ return -ENOMEM;
if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
p2m_init(p2m);
@@ -590,8 +590,9 @@ static bool alloc_p2m(unsigned long pfn)
free_p2m_page(p2m);
}
- return true;
+ return 0;
}
+EXPORT_SYMBOL(xen_alloc_p2m);
unsigned long __init set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e)
@@ -648,7 +649,10 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
- if (!alloc_p2m(pfn))
+ int ret;
+
+ ret = xen_alloc_p2m_entry(pfn);
+ if (ret < 0)
return false;
return __set_phys_to_machine(pfn, mfn);
--
2.1.4
Pages returned by alloc_xenballooned_pages() will be used for grant
mapping which will call set_phys_to_machine() (in PV guests).
Ballooned pages are set as INVALID_P2M_ENTRY in the p2m and thus may
be using the (shared) missing tables and a subsequent
set_phys_to_machine() will need to allocate new tables.
Since the grant mapping may be done from a context that cannot sleep,
the p2m entries must already be allocated.
Signed-off-by: David Vrabel <[email protected]>
---
drivers/xen/balloon.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index fd6970f3..8932d10 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -541,6 +541,7 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
{
int pgno = 0;
struct page *page;
+ int ret = -ENOMEM;
mutex_lock(&balloon_mutex);
@@ -550,6 +551,11 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
page = balloon_retrieve(true);
if (page) {
pages[pgno++] = page;
+#ifdef CONFIG_XEN_HAVE_PVMMU
+ ret = xen_alloc_p2m_entry(page_to_pfn(page));
+ if (ret < 0)
+ goto out_undo;
+#endif
} else {
enum bp_state st;
@@ -576,7 +582,7 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
out_undo:
mutex_unlock(&balloon_mutex);
free_xenballooned_pages(pgno, pages);
- return -ENOMEM;
+ return ret;
}
EXPORT_SYMBOL(alloc_xenballooned_pages);
--
2.1.4
On Fri, Jul 24, 2015 at 12:47:47PM +0100, David Vrabel wrote:
> Rename alloc_p2m() to xen_alloc_p2m_entry() and export it.
>
> This is useful for ensuring that a p2m entry is allocated (i.e., not a
> shared missing or identity entry) so that subsequent set_phys_to_machine()
> calls will require no further allocations.
>
> Signed-off-by: David Vrabel <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> arch/x86/include/asm/xen/page.h | 2 ++
> arch/x86/xen/p2m.c | 16 ++++++++++------
> 2 files changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
> index c44a5d5..960b380 100644
> --- a/arch/x86/include/asm/xen/page.h
> +++ b/arch/x86/include/asm/xen/page.h
> @@ -45,6 +45,8 @@ extern unsigned long *xen_p2m_addr;
> extern unsigned long xen_p2m_size;
> extern unsigned long xen_max_p2m_pfn;
>
> +extern int xen_alloc_p2m_entry(unsigned long pfn);
> +
> extern unsigned long get_phys_to_machine(unsigned long pfn);
> extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
> extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
> diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
> index 8b7f18e..ef93ccf 100644
> --- a/arch/x86/xen/p2m.c
> +++ b/arch/x86/xen/p2m.c
> @@ -503,7 +503,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
> * the new pages are installed with cmpxchg; if we lose the race then
> * simply free the page we allocated and use the one that's there.
> */
> -static bool alloc_p2m(unsigned long pfn)
> +int xen_alloc_p2m_entry(unsigned long pfn)
> {
> unsigned topidx, mididx;
> unsigned long *top_mfn_p, *mid_mfn;
> @@ -524,7 +524,7 @@ static bool alloc_p2m(unsigned long pfn)
> /* PMD level is missing, allocate a new one */
> ptep = alloc_p2m_pmd(addr, pte_pg);
> if (!ptep)
> - return false;
> + return -ENOMEM;
> }
>
> if (p2m_top_mfn) {
> @@ -541,7 +541,7 @@ static bool alloc_p2m(unsigned long pfn)
>
> mid_mfn = alloc_p2m_page();
> if (!mid_mfn)
> - return false;
> + return -ENOMEM;
>
> p2m_mid_mfn_init(mid_mfn, p2m_missing);
>
> @@ -567,7 +567,7 @@ static bool alloc_p2m(unsigned long pfn)
>
> p2m = alloc_p2m_page();
> if (!p2m)
> - return false;
> + return -ENOMEM;
>
> if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
> p2m_init(p2m);
> @@ -590,8 +590,9 @@ static bool alloc_p2m(unsigned long pfn)
> free_p2m_page(p2m);
> }
>
> - return true;
> + return 0;
> }
> +EXPORT_SYMBOL(xen_alloc_p2m);
>
> unsigned long __init set_phys_range_identity(unsigned long pfn_s,
> unsigned long pfn_e)
> @@ -648,7 +649,10 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
> bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
> {
> if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
> - if (!alloc_p2m(pfn))
> + int ret;
> +
> + ret = xen_alloc_p2m_entry(pfn);
> + if (ret < 0)
> return false;
>
> return __set_phys_to_machine(pfn, mfn);
> --
> 2.1.4
>
On Fri, Jul 24, 2015 at 12:47:43PM +0100, David Vrabel wrote:
> The stats used for memory hotplug make no sense and are fiddled with
> in odd ways. Remove them and introduce total_pages to track the total
> number of pages (both populated and unpopulated) including those within
> hotplugged regions (note that this includes not yet onlined pages).
>
> This will be used in the following commit when deciding whether
s/the following commit/"xen/balloon: only hotplug additional memory if required"
patch
> additional memory needs to be hotplugged.
>
> Signed-off-by: David Vrabel <[email protected]>
> ---
> drivers/xen/balloon.c | 75 +++++++++------------------------------------------
> include/xen/balloon.h | 5 +---
> 2 files changed, 13 insertions(+), 67 deletions(-)
>
> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
> index 29aeb8f..b5037b1 100644
> --- a/drivers/xen/balloon.c
> +++ b/drivers/xen/balloon.c
> @@ -194,21 +194,6 @@ static enum bp_state update_schedule(enum bp_state state)
> }
>
> #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> -static long current_credit(void)
> -{
> - return balloon_stats.target_pages - balloon_stats.current_pages -
> - balloon_stats.hotplug_pages;
> -}
> -
> -static bool balloon_is_inflated(void)
> -{
> - if (balloon_stats.balloon_low || balloon_stats.balloon_high ||
> - balloon_stats.balloon_hotplug)
> - return true;
> - else
> - return false;
> -}
> -
> static struct resource *additional_memory_resource(phys_addr_t size)
> {
> struct resource *res;
> @@ -300,10 +285,7 @@ static enum bp_state reserve_additional_memory(long credit)
> goto err;
> }
>
> - balloon_hotplug -= credit;
> -
> - balloon_stats.hotplug_pages += credit;
> - balloon_stats.balloon_hotplug = balloon_hotplug;
> + balloon_stats.total_pages += balloon_hotplug;
>
> return BP_DONE;
> err:
> @@ -319,11 +301,6 @@ static void xen_online_page(struct page *page)
>
> __balloon_append(page);
>
> - if (balloon_stats.hotplug_pages)
> - --balloon_stats.hotplug_pages;
> - else
> - --balloon_stats.balloon_hotplug;
> -
> mutex_unlock(&balloon_mutex);
> }
>
> @@ -340,32 +317,22 @@ static struct notifier_block xen_memory_nb = {
> .priority = 0
> };
> #else
> -static long current_credit(void)
> +static enum bp_state reserve_additional_memory(long credit)
> {
> - unsigned long target = balloon_stats.target_pages;
> -
> - target = min(target,
> - balloon_stats.current_pages +
> - balloon_stats.balloon_low +
> - balloon_stats.balloon_high);
> -
> - return target - balloon_stats.current_pages;
> + balloon_stats.target_pages = balloon_stats.current_pages;
> + return BP_DONE;
> }
> +#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
>
> -static bool balloon_is_inflated(void)
> +static long current_credit(void)
> {
> - if (balloon_stats.balloon_low || balloon_stats.balloon_high)
> - return true;
> - else
> - return false;
> + return balloon_stats.target_pages - balloon_stats.current_pages;
> }
>
> -static enum bp_state reserve_additional_memory(long credit)
> +static bool balloon_is_inflated(void)
> {
> - balloon_stats.target_pages = balloon_stats.current_pages;
> - return BP_DONE;
> + return balloon_stats.balloon_low || balloon_stats.balloon_high;
> }
> -#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
>
> static enum bp_state increase_reservation(unsigned long nr_pages)
> {
> @@ -378,15 +345,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
> .domid = DOMID_SELF
> };
>
> -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> - if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) {
> - nr_pages = min(nr_pages, balloon_stats.balloon_hotplug);
> - balloon_stats.hotplug_pages += nr_pages;
> - balloon_stats.balloon_hotplug -= nr_pages;
> - return BP_DONE;
> - }
> -#endif
> -
> if (nr_pages > ARRAY_SIZE(frame_list))
> nr_pages = ARRAY_SIZE(frame_list);
>
> @@ -449,15 +407,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
> .domid = DOMID_SELF
> };
>
> -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> - if (balloon_stats.hotplug_pages) {
> - nr_pages = min(nr_pages, balloon_stats.hotplug_pages);
> - balloon_stats.hotplug_pages -= nr_pages;
> - balloon_stats.balloon_hotplug += nr_pages;
> - return BP_DONE;
> - }
> -#endif
> -
> if (nr_pages > ARRAY_SIZE(frame_list))
> nr_pages = ARRAY_SIZE(frame_list);
>
> @@ -647,6 +596,8 @@ static void __init balloon_add_region(unsigned long start_pfn,
> don't subtract from it. */
> __balloon_append(page);
> }
> +
> + balloon_stats.total_pages += extra_pfn_end - start_pfn;
> }
>
> static int __init balloon_init(void)
> @@ -664,6 +615,7 @@ static int __init balloon_init(void)
> balloon_stats.target_pages = balloon_stats.current_pages;
> balloon_stats.balloon_low = 0;
> balloon_stats.balloon_high = 0;
> + balloon_stats.total_pages = balloon_stats.current_pages;
>
> balloon_stats.schedule_delay = 1;
> balloon_stats.max_schedule_delay = 32;
> @@ -671,9 +623,6 @@ static int __init balloon_init(void)
> balloon_stats.max_retry_count = RETRY_UNLIMITED;
>
> #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> - balloon_stats.hotplug_pages = 0;
> - balloon_stats.balloon_hotplug = 0;
> -
> set_online_page_callback(&xen_online_page);
> register_memory_notifier(&xen_memory_nb);
> #endif
> diff --git a/include/xen/balloon.h b/include/xen/balloon.h
> index cc2e1a7..c8aee7a 100644
> --- a/include/xen/balloon.h
> +++ b/include/xen/balloon.h
> @@ -11,14 +11,11 @@ struct balloon_stats {
> /* Number of pages in high- and low-memory balloons. */
> unsigned long balloon_low;
> unsigned long balloon_high;
> + unsigned long total_pages;
> unsigned long schedule_delay;
> unsigned long max_schedule_delay;
> unsigned long retry_count;
> unsigned long max_retry_count;
> -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
> - unsigned long hotplug_pages;
> - unsigned long balloon_hotplug;
> -#endif
> };
>
> extern struct balloon_stats balloon_stats;
> --
> 2.1.4
>
On Fri, Jul 24, 2015 at 12:47:46PM +0100, David Vrabel wrote:
> alloc_xenballooned_pages() is used to get ballooned pages to back
> foreign mappings etc. Instead of having to balloon out real pages,
> use (if supported) hotplugged memory.
>
> This makes more memory available to the guest and reduces
> fragmentation in the p2m.
>
> If userspace is lacking a udev rule (or similar) to online hotplugged
Is that udev rule already in distros?
> regions automatically, alloc_xenballooned_pages() will timeout and
> fall back to the old behaviour of ballooning out pages.
>
> Signed-off-by: David Vrabel <[email protected]>
> Reviewed-by: Daniel Kiper <[email protected]>
> ---
> drivers/xen/balloon.c | 32 ++++++++++++++++++++++++++------
> include/xen/balloon.h | 1 +
> 2 files changed, 27 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
> index cc68a4d..fd6970f3 100644
> --- a/drivers/xen/balloon.c
> +++ b/drivers/xen/balloon.c
> @@ -99,6 +99,7 @@ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
>
> /* List of ballooned pages, threaded through the mem_map array. */
> static LIST_HEAD(ballooned_pages);
> +static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
>
> /* Main work function, always executed in process context. */
> static void balloon_process(struct work_struct *work);
> @@ -127,6 +128,7 @@ static void __balloon_append(struct page *page)
> list_add(&page->lru, &ballooned_pages);
> balloon_stats.balloon_low++;
> }
> + wake_up(&balloon_wq);
> }
>
> static void balloon_append(struct page *page)
> @@ -253,7 +255,8 @@ static enum bp_state reserve_additional_memory(void)
> int nid, rc;
> unsigned long balloon_hotplug;
>
> - credit = balloon_stats.target_pages - balloon_stats.total_pages;
> + credit = balloon_stats.target_pages + balloon_stats.target_unpopulated
> + - balloon_stats.total_pages;
>
> /*
> * Already hotplugged enough pages? Wait for them to be
> @@ -334,7 +337,7 @@ static struct notifier_block xen_memory_nb = {
> static enum bp_state reserve_additional_memory(void)
> {
> balloon_stats.target_pages = balloon_stats.current_pages;
> - return BP_DONE;
> + return BP_ECANCELED;
> }
> #endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
>
> @@ -538,13 +541,31 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
> {
> int pgno = 0;
> struct page *page;
> +
> mutex_lock(&balloon_mutex);
> +
> + balloon_stats.target_unpopulated += nr_pages;
> +
> while (pgno < nr_pages) {
> page = balloon_retrieve(true);
> if (page) {
> pages[pgno++] = page;
> } else {
> enum bp_state st;
> +
> + st = reserve_additional_memory();
> + if (st != BP_ECANCELED) {
> + int ret;
> +
> + mutex_unlock(&balloon_mutex);
> + ret = wait_event_timeout(balloon_wq,
> + !list_empty(&ballooned_pages),
> + msecs_to_jiffies(100));
> + mutex_lock(&balloon_mutex);
> + if (ret > 0)
> + continue;
> + }
> +
> st = decrease_reservation(nr_pages - pgno, GFP_USER);
> if (st != BP_DONE)
> goto out_undo;
> @@ -553,11 +574,8 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
> mutex_unlock(&balloon_mutex);
> return 0;
> out_undo:
> - while (pgno)
> - balloon_append(pages[--pgno]);
> - /* Free the memory back to the kernel soon */
> - schedule_delayed_work(&balloon_worker, 0);
> mutex_unlock(&balloon_mutex);
> + free_xenballooned_pages(pgno, pages);
> return -ENOMEM;
> }
> EXPORT_SYMBOL(alloc_xenballooned_pages);
> @@ -578,6 +596,8 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
> balloon_append(pages[i]);
> }
>
> + balloon_stats.target_unpopulated -= nr_pages;
> +
> /* The balloon may be too large now. Shrink it if needed. */
> if (current_credit())
> schedule_delayed_work(&balloon_worker, 0);
> diff --git a/include/xen/balloon.h b/include/xen/balloon.h
> index 83efdeb..d1767df 100644
> --- a/include/xen/balloon.h
> +++ b/include/xen/balloon.h
> @@ -8,6 +8,7 @@ struct balloon_stats {
> /* We aim for 'current allocation' == 'target allocation'. */
> unsigned long current_pages;
> unsigned long target_pages;
> + unsigned long target_unpopulated;
> /* Number of pages in high- and low-memory balloons. */
> unsigned long balloon_low;
> unsigned long balloon_high;
> --
> 2.1.4
>
Hi David,
On 24/07/2015 12:47, David Vrabel wrote:
> Pages returned by alloc_xenballooned_pages() will be used for grant
> mapping which will call set_phys_to_machine() (in PV guests).
>
> Ballooned pages are set as INVALID_P2M_ENTRY in the p2m and thus may
> be using the (shared) missing tables and a subsequent
> set_phys_to_machine() will need to allocate new tables.
>
> Since the grant mapping may be done from a context that cannot sleep,
> the p2m entries must already be allocated.
>
> Signed-off-by: David Vrabel <[email protected]>
> ---
> drivers/xen/balloon.c | 8 +++++++-
> 1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
> index fd6970f3..8932d10 100644
> --- a/drivers/xen/balloon.c
> +++ b/drivers/xen/balloon.c
> @@ -541,6 +541,7 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
> {
> int pgno = 0;
> struct page *page;
> + int ret = -ENOMEM;
>
> mutex_lock(&balloon_mutex);
>
> @@ -550,6 +551,11 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
> page = balloon_retrieve(true);
> if (page) {
> pages[pgno++] = page;
> +#ifdef CONFIG_XEN_HAVE_PVMMU
> + ret = xen_alloc_p2m_entry(page_to_pfn(page));
Don't you want to call this function only when the guest is not using
auto-translated physmap?
> + if (ret < 0)
> + goto out_undo;
> +#endif
> } else {
> enum bp_state st;
>
> @@ -576,7 +582,7 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
> out_undo:
> mutex_unlock(&balloon_mutex);
> free_xenballooned_pages(pgno, pages);
> - return -ENOMEM;
> + return ret;
> }
> EXPORT_SYMBOL(alloc_xenballooned_pages);
>
>
Regards,
--
Julien Grall
On 25/07/15 00:21, Julien Grall wrote:
> On 24/07/2015 12:47, David Vrabel wrote:
>> @@ -550,6 +551,11 @@ int alloc_xenballooned_pages(int nr_pages, struct
>> page **pages)
>> page = balloon_retrieve(true);
>> if (page) {
>> pages[pgno++] = page;
>> +#ifdef CONFIG_XEN_HAVE_PVMMU
>> + ret = xen_alloc_p2m_entry(page_to_pfn(page));
>
> Don't you want to call this function only when the guest is not using
> auto-translated physmap?
xen_alloc_p2m_entry() is a nop in auto-xlate guests, so no need for an
additional check here.
David
On 24/07/15 19:55, Konrad Rzeszutek Wilk wrote:
> On Fri, Jul 24, 2015 at 12:47:46PM +0100, David Vrabel wrote:
>> alloc_xenballooned_pages() is used to get ballooned pages to back
>> foreign mappings etc. Instead of having to balloon out real pages,
>> use (if supported) hotplugged memory.
>>
>> This makes more memory available to the guest and reduces
>> fragmentation in the p2m.
>>
>> If userspace is lacking a udev rule (or similar) to online hotplugged
>
> Is that udev rule already in distros?
Not all, which makes me think that this behaviour should be enabled by
userspace (via a module parameter). This would also allow me to drop
the timeout and fallback path which I put in to handle the no udev rule
case.
David
On 27/07/15 10:30, David Vrabel wrote:
> On 25/07/15 00:21, Julien Grall wrote:
>> On 24/07/2015 12:47, David Vrabel wrote:
>>> @@ -550,6 +551,11 @@ int alloc_xenballooned_pages(int nr_pages, struct
>>> page **pages)
>>> page = balloon_retrieve(true);
>>> if (page) {
>>> pages[pgno++] = page;
>>> +#ifdef CONFIG_XEN_HAVE_PVMMU
>>> + ret = xen_alloc_p2m_entry(page_to_pfn(page));
>>
>> Don't you want to call this function only when the guest is not using
>> auto-translated physmap?
>
> xen_alloc_p2m_entry() is a nop in auto-xlate guests, so no need for an
> additional check here.
I don't have the impression it's the case or it's not obvious.
For instance xen_p2m_addr, used within with the xen_alloc_p2m_entry (old
name alloc_p2m) is never set for auto-xlate guests. Therefore the value
is 0.
Same for p2m_identity and p2m_missing & co.
Regards,
--
Julien Grall
On 27/07/15 12:01, Julien Grall wrote:
> On 27/07/15 10:30, David Vrabel wrote:
>> On 25/07/15 00:21, Julien Grall wrote:
>>> On 24/07/2015 12:47, David Vrabel wrote:
>>>> @@ -550,6 +551,11 @@ int alloc_xenballooned_pages(int nr_pages, struct
>>>> page **pages)
>>>> page = balloon_retrieve(true);
>>>> if (page) {
>>>> pages[pgno++] = page;
>>>> +#ifdef CONFIG_XEN_HAVE_PVMMU
>>>> + ret = xen_alloc_p2m_entry(page_to_pfn(page));
>>>
>>> Don't you want to call this function only when the guest is not using
>>> auto-translated physmap?
>>
>> xen_alloc_p2m_entry() is a nop in auto-xlate guests, so no need for an
>> additional check here.
>
> I don't have the impression it's the case or it's not obvious.
Oops. You're right. I'll add a
if (xen_feature(XENFEAT_auto_translated_physmap))
return true;
Check at the top.
David
On Fri, Jul 24, 2015 at 12:47:38PM +0100, David Vrabel wrote:
> The series improves the use of hotplug memory in the Xen balloon
> driver.
>
> - Reliably find a non-conflicting location for the hotplugged memory
> (this fixes memory hotplug in a number of cases, particularly in
> dom0).
>
> - Use hotplugged memory for alloc_xenballooned_pages() (keeping more
> memory available for the domain and reducing fragmentation of the
> p2m).
>
> Changes in v2:
> - New BP_WAIT state to signal the balloon process to wait for
> userspace to online the new memory.
> - Preallocate P2M entries in alloc_xenballooned_pages() so they do not
> need allocated later (in a context where GFP_KERNEL allocations are
> not possible).
Thanks! I will take a look at it this week.
Daniel
On Fri, Jul 24, 2015 at 12:47:39PM +0100, David Vrabel wrote:
> Add add_memory_resource() to add memory using an existing "System RAM"
> resource. This is useful if the memory region is being located by
> finding a free resource slot with allocate_resource().
>
> Xen guests will make use of this in their balloon driver to hotplug
> arbitrary amounts of memory in response to toolstack requests.
>
> Signed-off-by: David Vrabel <[email protected]>
> Cc: Andrew Morton <[email protected]>
Reviewed-by: Daniel Kiper <[email protected]>
Daniel
On Fri, Jul 24, 2015 at 12:47:43PM +0100, David Vrabel wrote:
> The stats used for memory hotplug make no sense and are fiddled with
> in odd ways. Remove them and introduce total_pages to track the total
> number of pages (both populated and unpopulated) including those within
> hotplugged regions (note that this includes not yet onlined pages).
>
> This will be used in the following commit when deciding whether
> additional memory needs to be hotplugged.
>
> Signed-off-by: David Vrabel <[email protected]>
Reviewed-by: Daniel Kiper <[email protected]>
Daniel
On Fri, Jul 24, 2015 at 12:47:44PM +0100, David Vrabel wrote:
> Now that we track the total number of pages (included hotplugged
> regions), it is easy to determine if more memory needs to be
> hotplugged.
>
> Add a new BP_WAIT state to signal that the balloon process needs to
> wait until kicked by the memory add notifier (when the new section is
> onlined by userspace).
>
> Signed-off-by: David Vrabel <[email protected]>
> ---
> v2:
> - New BP_WAIT status after adding new memory sections.
> ---
> drivers/xen/balloon.c | 23 +++++++++++++++++++----
> 1 file changed, 19 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
> index b5037b1..ced34cd 100644
> --- a/drivers/xen/balloon.c
> +++ b/drivers/xen/balloon.c
> @@ -75,12 +75,14 @@
> * balloon_process() state:
> *
> * BP_DONE: done or nothing to do,
> + * BP_WAIT: wait to be rescheduled,
BP_SLEEP? BP_WAIT suggests that balloon process waits for something in a loop.
> * BP_EAGAIN: error, go to sleep,
> * BP_ECANCELED: error, balloon operation canceled.
> */
>
> enum bp_state {
> BP_DONE,
> + BP_WAIT,
> BP_EAGAIN,
> BP_ECANCELED
> };
> @@ -167,6 +169,9 @@ static struct page *balloon_next_page(struct page *page)
>
> static enum bp_state update_schedule(enum bp_state state)
> {
> + if (state == BP_WAIT)
> + return BP_WAIT;
> +
> if (state == BP_ECANCELED)
> return BP_ECANCELED;
>
> @@ -242,12 +247,22 @@ static void release_memory_resource(struct resource *resource)
> * bit set). Real size of added memory is established at page onlining stage.
> */
Please align above, partially visible, comment to current reality.
> -static enum bp_state reserve_additional_memory(long credit)
> +static enum bp_state reserve_additional_memory(void)
> {
> + long credit;
> struct resource *resource;
> int nid, rc;
> unsigned long balloon_hotplug;
>
> + credit = balloon_stats.target_pages - balloon_stats.total_pages;
> +
> + /*
> + * Already hotplugged enough pages? Wait for them to be
> + * onlined.
> + */
Please change this comment for something like that:
Already hotplugged enough pages? If yes then go to sleep.
> + if (credit <= 0)
> + return BP_EAGAIN;
No, this should be BP_WAIT (BP_SLEEP). Otherwise when somebody
touches balloon_stats.target_pages balloon process will be
rescheduled unnecessarily until pages are onlined up to
balloon_stats.total_pages. We do not want that.
> +
> balloon_hotplug = round_up(credit, PAGES_PER_SECTION);
>
> resource = additional_memory_resource(balloon_hotplug * PAGE_SIZE);
> @@ -287,7 +302,7 @@ static enum bp_state reserve_additional_memory(long credit)
>
> balloon_stats.total_pages += balloon_hotplug;
>
> - return BP_DONE;
> + return BP_WAIT;
> err:
Please add one empty line before err label.
Daniel
On 29/07/15 16:55, Daniel Kiper wrote:
> On Fri, Jul 24, 2015 at 12:47:44PM +0100, David Vrabel wrote:
>> --- a/drivers/xen/balloon.c
>> +++ b/drivers/xen/balloon.c
>> @@ -75,12 +75,14 @@
>> * balloon_process() state:
>> *
>> * BP_DONE: done or nothing to do,
>> + * BP_WAIT: wait to be rescheduled,
>
> BP_SLEEP? BP_WAIT suggests that balloon process waits for something in a loop.
Waiting in a loop is what wait_event() does.
David
On Fri, Jul 24, 2015 at 12:47:47PM +0100, David Vrabel wrote:
> Rename alloc_p2m() to xen_alloc_p2m_entry() and export it.
>
> This is useful for ensuring that a p2m entry is allocated (i.e., not a
> shared missing or identity entry) so that subsequent set_phys_to_machine()
> calls will require no further allocations.
>
> Signed-off-by: David Vrabel <[email protected]>
Reviewed-by: Daniel Kiper <[email protected]>
... but please add line in commit message saying that stuff from
this patch will be used by next one.
Daniel