Memory hotplug support for Xen balloon driver. It should be
mentioned that hotplugged memory is not onlined automatically.
It should be onlined by user through standard sysfs interface.
There are a few prerequisite patches which fixes some problems
found during work on memory hotplug patch or add some futures
which are needed by this patch. They are available here:
- https://lkml.org/lkml/2011/3/28/94,
- https://lkml.org/lkml/2011/3/28/98.
I have received notice that previous series of patches broke
machine migration under Xen. I am going to confirm that and
solve that problem ASAP. I do not have received any notices
about other problems till now.
Signed-off-by: Daniel Kiper <[email protected]>
---
drivers/xen/Kconfig | 10 +++
drivers/xen/balloon.c | 148 ++++++++++++++++++++++++++++++++++++++++++++++++-
include/xen/balloon.h | 4 +
3 files changed, 160 insertions(+), 2 deletions(-)
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index e5ecae6..39df71b 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -9,6 +9,16 @@ config XEN_BALLOON
the system to expand the domain's memory allocation, or alternatively
return unneeded memory to the system.
+config XEN_BALLOON_MEMORY_HOTPLUG
+ bool "Memory hotplug support for Xen balloon driver"
+ default n
+ depends on XEN_BALLOON && MEMORY_HOTPLUG
+ help
+ Memory hotplug support for Xen balloon driver allows expanding memory
+ available for the system above limit declared at system startup.
+ It is very useful on critical systems which require long
+ run without rebooting.
+
config XEN_SCRUB_PAGES
bool "Scrub pages before returning them to system"
depends on XEN_BALLOON
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f54290b..189023e 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -4,6 +4,12 @@
* Copyright (c) 2003, B Dragovic
* Copyright (c) 2003-2004, M Williamson, K Fraser
* Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ * Copyright (c) 2010 Daniel Kiper
+ *
+ * Memory hotplug support was written by Daniel Kiper. Work on
+ * it was sponsored by Google under Google Summer of Code 2010
+ * program. Jeremy Fitzhardinge from Xen.org was the mentor for
+ * this project.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
@@ -40,6 +46,9 @@
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/gfp.h>
+#include <linux/notifier.h>
+#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -194,6 +203,96 @@ static enum bp_state update_schedule(enum bp_state state)
return BP_EAGAIN;
}
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+static long current_credit(void)
+{
+ return balloon_stats.target_pages - balloon_stats.current_pages -
+ balloon_stats.hotplug_pages;
+}
+
+static bool balloon_is_inflated(void)
+{
+ if (balloon_stats.balloon_low || balloon_stats.balloon_high ||
+ balloon_stats.balloon_hotplug)
+ return true;
+ else
+ return false;
+}
+
+/*
+ * reserve_additional_memory() adds memory region of size >= credit above
+ * max_pfn. New region is section aligned and size is modified to be multiple
+ * of section size. Those features allow optimal use of address space and
+ * establish proper alignment when this function is called first time after
+ * boot (last section not fully populated at boot time may contains unused
+ * memory pages with PG_reserved bit not set; online_pages() does not allow
+ * page onlining in whole section if first page does not have PG_reserved
+ * bit set). Real size of added memory is established at page onlining stage.
+ */
+
+static enum bp_state reserve_additional_memory(long credit)
+{
+ int nid, rc;
+ u64 start;
+ unsigned long balloon_hotplug = credit;
+
+ start = PFN_PHYS(SECTION_ALIGN_UP(max_pfn));
+ balloon_hotplug = (balloon_hotplug & PAGE_SECTION_MASK) + PAGES_PER_SECTION;
+ nid = memory_add_physaddr_to_nid(start);
+
+ rc = add_memory(nid, start, balloon_hotplug << PAGE_SHIFT);
+
+ if (rc) {
+ pr_info("xen_balloon: %s: add_memory() failed: %i\n", __func__, rc);
+ return BP_EAGAIN;
+ }
+
+ balloon_hotplug -= credit;
+
+ balloon_stats.hotplug_pages += credit;
+ balloon_stats.balloon_hotplug = balloon_hotplug;
+
+ return BP_DONE;
+}
+
+static int xen_online_page_notifier(struct notifier_block *nb, unsigned long val, void *v)
+{
+ struct page *page = v;
+
+ __online_page_increment_counters(page, OP_DO_NOT_INCREMENT_TOTAL_COUNTERS);
+
+ mutex_lock(&balloon_mutex);
+
+ __balloon_append(page);
+
+ if (balloon_stats.hotplug_pages)
+ --balloon_stats.hotplug_pages;
+ else
+ --balloon_stats.balloon_hotplug;
+
+ mutex_unlock(&balloon_mutex);
+
+ return NOTIFY_STOP;
+}
+
+static struct notifier_block xen_online_page_nb = {
+ .notifier_call = xen_online_page_notifier,
+ .priority = 10
+};
+
+static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
+{
+ if (val == MEM_ONLINE)
+ schedule_delayed_work(&balloon_worker, 0);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block xen_memory_nb = {
+ .notifier_call = xen_memory_notifier,
+ .priority = 0
+};
+#else
static long current_credit(void)
{
unsigned long target = balloon_stats.target_pages;
@@ -206,6 +305,21 @@ static long current_credit(void)
return target - balloon_stats.current_pages;
}
+static int balloon_is_inflated(void)
+{
+ if (balloon_stats.balloon_low || balloon_stats.balloon_high)
+ return 1;
+ else
+ return 0;
+}
+
+static enum bp_state reserve_additional_memory(long credit)
+{
+ balloon_stats.target_pages = balloon_stats.current_pages;
+ return BP_DONE;
+}
+#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
+
static enum bp_state increase_reservation(unsigned long nr_pages)
{
int rc;
@@ -217,6 +331,15 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
.domid = DOMID_SELF
};
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+ if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) {
+ nr_pages = min(nr_pages, balloon_stats.balloon_hotplug);
+ balloon_stats.hotplug_pages += nr_pages;
+ balloon_stats.balloon_hotplug -= nr_pages;
+ return BP_DONE;
+ }
+#endif
+
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
@@ -279,6 +402,15 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
.domid = DOMID_SELF
};
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+ if (balloon_stats.hotplug_pages) {
+ nr_pages = min(nr_pages, balloon_stats.hotplug_pages);
+ balloon_stats.hotplug_pages -= nr_pages;
+ balloon_stats.balloon_hotplug += nr_pages;
+ return BP_DONE;
+ }
+#endif
+
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
@@ -340,8 +472,12 @@ static void balloon_process(struct work_struct *work)
do {
credit = current_credit();
- if (credit > 0)
- state = increase_reservation(credit);
+ if (credit > 0) {
+ if (balloon_is_inflated())
+ state = increase_reservation(credit);
+ else
+ state = reserve_additional_memory(credit);
+ }
if (credit < 0)
state = decrease_reservation(-credit, GFP_BALLOON);
@@ -448,6 +584,14 @@ static int __init balloon_init(void)
balloon_stats.retry_count = 1;
balloon_stats.max_retry_count = RETRY_UNLIMITED;
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+ balloon_stats.hotplug_pages = 0;
+ balloon_stats.balloon_hotplug = 0;
+
+ register_online_page_notifier(&xen_online_page_nb);
+ register_memory_notifier(&xen_memory_nb);
+#endif
+
/*
* Initialise the balloon with excess memory space. We need
* to make sure we don't add memory which doesn't exist or
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index a2b22f0..aeca6ae 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -15,6 +15,10 @@ struct balloon_stats {
unsigned long max_schedule_delay;
unsigned long retry_count;
unsigned long max_retry_count;
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+ unsigned long hotplug_pages;
+ unsigned long balloon_hotplug;
+#endif
};
extern struct balloon_stats balloon_stats;
--
1.5.6.5
On Mon, 2011-03-28 at 11:47 +0200, Daniel Kiper wrote:
>
> +static enum bp_state reserve_additional_memory(long credit)
> +{
> + int nid, rc;
> + u64 start;
> + unsigned long balloon_hotplug = credit;
> +
> + start = PFN_PHYS(SECTION_ALIGN_UP(max_pfn));
> + balloon_hotplug = (balloon_hotplug & PAGE_SECTION_MASK) + PAGES_PER_SECTION;
> + nid = memory_add_physaddr_to_nid(start);
Is the 'balloon_hotplug' calculation correct? I _think_ you're trying
to round up to the SECTION_SIZE_PAGES. But, if 'credit' was already
section-aligned I think you'll unnecessarily round up to the next
SECTION_SIZE_PAGES boundary. Should it just be:
balloon_hotplug = ALIGN(balloon_hotplug, PAGES_PER_SECTION);
You might also want to consider some nicer units for those suckers.
'start_paddr' is _much_ easier to grok than 'start', for instance.
-- Dave
On Mon, Mar 28, 2011 at 08:55:27AM -0700, Dave Hansen wrote:
> On Mon, 2011-03-28 at 11:47 +0200, Daniel Kiper wrote:
> >
> > +static enum bp_state reserve_additional_memory(long credit)
> > +{
> > + int nid, rc;
> > + u64 start;
> > + unsigned long balloon_hotplug = credit;
> > +
> > + start = PFN_PHYS(SECTION_ALIGN_UP(max_pfn));
> > + balloon_hotplug = (balloon_hotplug & PAGE_SECTION_MASK) + PAGES_PER_SECTION;
> > + nid = memory_add_physaddr_to_nid(start);
>
> Is the 'balloon_hotplug' calculation correct? I _think_ you're trying
> to round up to the SECTION_SIZE_PAGES. But, if 'credit' was already
> section-aligned I think you'll unnecessarily round up to the next
> SECTION_SIZE_PAGES boundary. Should it just be:
>
> balloon_hotplug = ALIGN(balloon_hotplug, PAGES_PER_SECTION);
Yes, you are right. I am wrong. I will correct that. However, as I said
ealier I do not like ALIGN() in size context. For me ALIGN() is operation
on an address which aligns this address to specified boundary. That is
why I prefer use here open coded version (I agree that it is the same
to ALIGN()). I think that ROUND() macro would be better in size context.
However, I am not native english speaker and if I missed something correct
me, please.
> You might also want to consider some nicer units for those suckers.
What do you mind ??? I think that in that context PAGES_PER_SECTION
is quite good.
> 'start_paddr' is _much_ easier to grok than 'start', for instance.
OK.
Daniel
On Tue, 2011-03-29 at 20:18 +0200, Daniel Kiper wrote:
> On Mon, Mar 28, 2011 at 08:55:27AM -0700, Dave Hansen wrote:
> > On Mon, 2011-03-28 at 11:47 +0200, Daniel Kiper wrote:
> > >
> > > +static enum bp_state reserve_additional_memory(long credit)
> > > +{
> > > + int nid, rc;
> > > + u64 start;
> > > + unsigned long balloon_hotplug = credit;
> > > +
> > > + start = PFN_PHYS(SECTION_ALIGN_UP(max_pfn));
> > > + balloon_hotplug = (balloon_hotplug & PAGE_SECTION_MASK) + PAGES_PER_SECTION;
> > > + nid = memory_add_physaddr_to_nid(start);
> >
> > Is the 'balloon_hotplug' calculation correct? I _think_ you're trying
> > to round up to the SECTION_SIZE_PAGES. But, if 'credit' was already
> > section-aligned I think you'll unnecessarily round up to the next
> > SECTION_SIZE_PAGES boundary. Should it just be:
> >
> > balloon_hotplug = ALIGN(balloon_hotplug, PAGES_PER_SECTION);
>
> Yes, you are right. I am wrong. I will correct that. However, as I said
> ealier I do not like ALIGN() in size context. For me ALIGN() is operation
> on an address which aligns this address to specified boundary. That is
> why I prefer use here open coded version (I agree that it is the same
> to ALIGN()). I think that ROUND() macro would be better in size context.
> However, I am not native english speaker and if I missed something correct
> me, please.
The only problem with open-coding it is that it's more likely to have
bugs. But, sure, ROUND() sounds OK, as long as it does what you intend.
I'm still not quite sure what your intent here is, or in which direction
you're trying to round and why.
> > You might also want to consider some nicer units for those suckers.
>
> What do you mind ??? I think that in that context PAGES_PER_SECTION
> is quite good.
Memory management code is tricky. We keep addresses in many forms:
virtual addresses, physical addresses, pfns, 'struct page', etc... I've
found it very useful in the past to ensure that I'm explicit about what
I'm dealing with among those.
In this case, PAGES_PER_SECTION says that "balloon_hotplug" is intended
to be either a physical address or a page count. But, that only says
what you're filling the variable with, not what you _intend_ it to
contain.
-- Dave