This patch applies to Linus' git tree, git commit 98b98d316349e9a028e632629fe813d07fa5afdd
(Merge branch 'drm-core-next' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6)
with a few prerequisite patches available at https://lkml.org/lkml/2011/5/2/296
and https://lkml.org/lkml/2011/5/17/408 (all prerequisite patches were included in -mm tree).
This patch contains online_page_callback and apropriate functions for
registering/unregistering online page callbacks. It allows to do some
machine specific tasks during online page stage which is required
to implement memory hotplug in virtual machines. Currently this patch
is required by latest memory hotplug support for Xen balloon driver
patch which will be posted soon.
Additionally, originial online_page() function was splited into
following functions doing "atomic" operations:
- __online_page_set_limits() - set new limits for memory management code,
- __online_page_increment_counters() - increment totalram_pages and totalhigh_pages,
- __online_page_free() - free page to allocator.
It was done to:
- not duplicate existing code,
- ease hotplug code devolpment by usage of well defined interface,
- avoid stupid bugs which are unavoidable when the same code
(by design) is developed in many places.
Signed-off-by: Daniel Kiper <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
---
include/linux/memory_hotplug.h | 11 +++++-
mm/memory_hotplug.c | 68 ++++++++++++++++++++++++++++++++++++++--
2 files changed, 74 insertions(+), 5 deletions(-)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 8122018..0b8e2a7 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -68,12 +68,19 @@ static inline void zone_seqlock_init(struct zone *zone)
extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
-/* need some defines for these for archs that don't support it */
-extern void online_page(struct page *page);
/* VM interface that may be used by firmware interface */
extern int online_pages(unsigned long, unsigned long);
extern void __offline_isolated_pages(unsigned long, unsigned long);
+typedef void (*online_page_callback_t)(struct page *page);
+
+extern int set_online_page_callback(online_page_callback_t callback);
+extern int restore_online_page_callback(online_page_callback_t callback);
+
+extern void __online_page_set_limits(struct page *page);
+extern void __online_page_increment_counters(struct page *page);
+extern void __online_page_free(struct page *page);
+
#ifdef CONFIG_MEMORY_HOTREMOVE
extern bool is_pageblock_removable_nolock(struct page *page);
#endif /* CONFIG_MEMORY_HOTREMOVE */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a807ccb..9d47c39 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -34,6 +34,17 @@
#include "internal.h"
+/*
+ * online_page_callback contains pointer to current page onlining function.
+ * Initially it is generic_online_page(). If it is required it could be
+ * changed by calling set_online_page_callback() for callback registration
+ * and restore_online_page_callback() for generic callback restore.
+ */
+
+static void generic_online_page(struct page *page);
+
+static online_page_callback_t online_page_callback = generic_online_page;
+
DEFINE_MUTEX(mem_hotplug_mutex);
void lock_memory_hotplug(void)
@@ -361,23 +372,74 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
}
EXPORT_SYMBOL_GPL(__remove_pages);
-void online_page(struct page *page)
+int set_online_page_callback(online_page_callback_t callback)
+{
+ int rc = -EINVAL;
+
+ lock_memory_hotplug();
+
+ if (online_page_callback == generic_online_page) {
+ online_page_callback = callback;
+ rc = 0;
+ }
+
+ unlock_memory_hotplug();
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(set_online_page_callback);
+
+int restore_online_page_callback(online_page_callback_t callback)
+{
+ int rc = -EINVAL;
+
+ lock_memory_hotplug();
+
+ if (online_page_callback == callback) {
+ online_page_callback = generic_online_page;
+ rc = 0;
+ }
+
+ unlock_memory_hotplug();
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(restore_online_page_callback);
+
+void __online_page_set_limits(struct page *page)
{
unsigned long pfn = page_to_pfn(page);
- totalram_pages++;
if (pfn >= num_physpages)
num_physpages = pfn + 1;
+}
+EXPORT_SYMBOL_GPL(__online_page_set_limits);
+
+void __online_page_increment_counters(struct page *page)
+{
+ totalram_pages++;
#ifdef CONFIG_HIGHMEM
if (PageHighMem(page))
totalhigh_pages++;
#endif
+}
+EXPORT_SYMBOL_GPL(__online_page_increment_counters);
+void __online_page_free(struct page *page)
+{
ClearPageReserved(page);
init_page_count(page);
__free_page(page);
}
+EXPORT_SYMBOL_GPL(__online_page_free);
+
+static void generic_online_page(struct page *page)
+{
+ __online_page_set_limits(page);
+ __online_page_increment_counters(page);
+ __online_page_free(page);
+}
static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg)
@@ -388,7 +450,7 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
if (PageReserved(pfn_to_page(start_pfn)))
for (i = 0; i < nr_pages; i++) {
page = pfn_to_page(start_pfn + i);
- online_page(page);
+ online_page_callback(page);
onlined_pages++;
}
*(unsigned long *)arg = onlined_pages;
--
1.5.6.5
On Wed, 25 May 2011 00:27:33 +0200
Daniel Kiper <[email protected]> wrote:
> This patch applies to Linus' git tree, git commit 98b98d316349e9a028e632629fe813d07fa5afdd
> (Merge branch 'drm-core-next' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6)
> with a few prerequisite patches available at https://lkml.org/lkml/2011/5/2/296
> and https://lkml.org/lkml/2011/5/17/408 (all prerequisite patches were included in -mm tree).
>
> This patch contains online_page_callback and apropriate functions for
> registering/unregistering online page callbacks. It allows to do some
> machine specific tasks during online page stage which is required
> to implement memory hotplug in virtual machines. Currently this patch
> is required by latest memory hotplug support for Xen balloon driver
> patch which will be posted soon.
>
> Additionally, originial online_page() function was splited into
> following functions doing "atomic" operations:
> - __online_page_set_limits() - set new limits for memory management code,
> - __online_page_increment_counters() - increment totalram_pages and totalhigh_pages,
> - __online_page_free() - free page to allocator.
>
> It was done to:
> - not duplicate existing code,
> - ease hotplug code devolpment by usage of well defined interface,
> - avoid stupid bugs which are unavoidable when the same code
> (by design) is developed in many places.
I grabbed this and the xen patch. I assume that all prerequisites
are now in mainline?
Please give some thought to making this extra code Kconfigurable, and
selected by Xen? See if we can avoid a bit of bloat for other kernel
users.
What is missing from the patchset is an explanation of why we should
merge it ;) Why is this feature desirable? What value does it provide
to our users? Why should we bother? Answering these questions in a
form which can be pasted into the changelog would be convenient,
thanks.
Is there any propsect that the other virtualisation schemes will use
this facility? If not, why not?
>
> ...
>
> @@ -388,7 +450,7 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
> if (PageReserved(pfn_to_page(start_pfn)))
> for (i = 0; i < nr_pages; i++) {
> page = pfn_to_page(start_pfn + i);
> - online_page(page);
> + online_page_callback(page);
nit. I'll change this to
(*online_page_callback)(page);
because that syntax communicates some useful information to the reader.
On Thu, Jun 02, 2011 at 12:26:07PM -0700, Andrew Morton wrote:
> On Wed, 25 May 2011 00:27:33 +0200
> Daniel Kiper <[email protected]> wrote:
>
> > This patch applies to Linus' git tree, git commit 98b98d316349e9a028e632629fe813d07fa5afdd
> > (Merge branch 'drm-core-next' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6)
> > with a few prerequisite patches available at https://lkml.org/lkml/2011/5/2/296
> > and https://lkml.org/lkml/2011/5/17/408 (all prerequisite patches were included in -mm tree).
> >
> > This patch contains online_page_callback and apropriate functions for
> > registering/unregistering online page callbacks. It allows to do some
> > machine specific tasks during online page stage which is required
> > to implement memory hotplug in virtual machines. Currently this patch
> > is required by latest memory hotplug support for Xen balloon driver
> > patch which will be posted soon.
> >
> > Additionally, originial online_page() function was splited into
> > following functions doing "atomic" operations:
> > - __online_page_set_limits() - set new limits for memory management code,
> > - __online_page_increment_counters() - increment totalram_pages and totalhigh_pages,
> > - __online_page_free() - free page to allocator.
> >
> > It was done to:
> > - not duplicate existing code,
> > - ease hotplug code devolpment by usage of well defined interface,
> > - avoid stupid bugs which are unavoidable when the same code
> > (by design) is developed in many places.
>
> I grabbed this and the xen patch. I assume that all prerequisites
> are now in mainline?
Thank you. Yes, they are.
> Please give some thought to making this extra code Kconfigurable, and
> selected by Xen? See if we can avoid a bit of bloat for other kernel
> users.
If you think about Xen part it is Kconfigurable.
> What is missing from the patchset is an explanation of why we should
> merge it ;) Why is this feature desirable? What value does it provide
> to our users? Why should we bother? Answering these questions in a
> form which can be pasted into the changelog would be convenient,
> thanks.
Balloon driver for virtualized guest systems allows easy memory
allocation/deallocation from a hypervisor. It is utilized to improve
memory usage by memory deallocation from guests which have a lot of it
unused and allocation to systems under memory pressure. However, it is
not possible by design to allocate more memory for given guest machine
than it was allocated for it at startup. To obey that limitation memory
hotplug shuld be used. This patch contains memory hotplug implementation
for Xen balloon driver. It utilizes current memory hotplug infrastructure
with small modifications. This solution allows increasing guest machine
memory size without restart regardless of memory size set at startup.
It is very useful on critical systems which require long run
without rebooting.
Additionally, could you add
Tested-by: Konrad Rzeszutek Wilk <[email protected]>
to both patches. Here https://lkml.org/lkml/2011/5/31/416
is original e-mail asking for it.
> Is there any propsect that the other virtualisation schemes will use
> this facility? If not, why not?
I think about that. Even I put a project proposal for GSoC 2011 (you
could find more details here
http://www.google-melange.com/gsoc/proposal/review/google/gsoc2011/dkiper/1),
however, it was not accepted. Currently, I am working on kexec/kdump for
Xen (it was my second project proposal for GSoC 2011) and my PhD thesis.
That is why I could not devote my time to that project. However, I am going
to return to work on generic balloon implementation and memory hotplug
for other virtualisation schemes ASAP.
> > @@ -388,7 +450,7 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
> > if (PageReserved(pfn_to_page(start_pfn)))
> > for (i = 0; i < nr_pages; i++) {
> > page = pfn_to_page(start_pfn + i);
> > - online_page(page);
> > + online_page_callback(page);
>
> nit. I'll change this to
>
> (*online_page_callback)(page);
>
> because that syntax communicates some useful information to the reader.
OK.
Daniel
> > Is there any propsect that the other virtualisation schemes will use
> > this facility? If not, why not?
>
> I think about that. Even I put a project proposal for GSoC 2011 (you
> could find more details here
Plus .. I remember reading on LWN something about this year's Linux MMU conference
and Red Hat's guys wanting to leverage a generic implemenation for the ballooning
and make it more "self-aware" for KVM.