From: Gerald Schaefer <[email protected]>
Use for_each_populated_zone() instead of for_each_zone() in hibernation
code. This fixes a bug on s390, where we allow both config options
HIBERNATION and MEMORY_HOTPLUG, so that we also have a ZONE_MOVABLE
here. We only allow hibernation if no memory hotplug operation was
performed, so in fact both features can only be used exclusively, but
this way we don't need 2 differently configured (distribution) kernels.
If we have an unpopulated ZONE_MOVABLE, we allow hibernation but run
into a BUG_ON() in memory_bm_test/set/clear_bit() because hibernation
code iterates through all zones, not only the populated zones, in
several places. For example, swsusp_free() does for_each_zone() and
then checks for pfn_valid(), which is true even if the zone is not
populated, resulting in a BUG_ON() later because the pfn cannot be
found in the memory bitmap.
Replacing all occurences of for_each_zone() in hibernation code with
for_each_populated_zone() would fix this issue.
Signed-off-by: Gerald Schaefer <[email protected]>
---
kernel/power/snapshot.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
Index: linux-2.6-work/kernel/power/snapshot.c
===================================================================
--- linux-2.6-work.orig/kernel/power/snapshot.c
+++ linux-2.6-work/kernel/power/snapshot.c
@@ -853,7 +853,7 @@ static unsigned int count_highmem_pages(
struct zone *zone;
unsigned int n = 0;
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
unsigned long pfn, max_zone_pfn;
if (!is_highmem(zone))
@@ -916,7 +916,7 @@ static unsigned int count_data_pages(voi
unsigned long pfn, max_zone_pfn;
unsigned int n = 0;
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
if (is_highmem(zone))
continue;
@@ -1010,7 +1010,7 @@ copy_data_pages(struct memory_bitmap *co
struct zone *zone;
unsigned long pfn;
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
unsigned long max_zone_pfn;
mark_free_pages(zone);
@@ -1046,7 +1046,7 @@ void swsusp_free(void)
struct zone *zone;
unsigned long pfn, max_zone_pfn;
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (pfn_valid(pfn)) {
@@ -1166,7 +1166,7 @@ static int enough_free_mem(unsigned int
struct zone *zone;
unsigned int free = 0, meta = 0;
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
meta += snapshot_additional_pages(zone);
if (!is_highmem(zone))
free += zone_page_state(zone, NR_FREE_PAGES);
@@ -1474,7 +1474,7 @@ static int mark_unsafe_pages(struct memo
unsigned long pfn, max_zone_pfn;
/* Clear page flags */
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (pfn_valid(pfn))
On Monday 20 July 2009, Gerald Schaefer wrote:
> From: Gerald Schaefer <[email protected]>
>
> Use for_each_populated_zone() instead of for_each_zone() in hibernation
> code. This fixes a bug on s390, where we allow both config options
> HIBERNATION and MEMORY_HOTPLUG, so that we also have a ZONE_MOVABLE
> here. We only allow hibernation if no memory hotplug operation was
> performed, so in fact both features can only be used exclusively, but
> this way we don't need 2 differently configured (distribution) kernels.
>
> If we have an unpopulated ZONE_MOVABLE, we allow hibernation but run
> into a BUG_ON() in memory_bm_test/set/clear_bit() because hibernation
> code iterates through all zones, not only the populated zones, in
> several places. For example, swsusp_free() does for_each_zone() and
> then checks for pfn_valid(), which is true even if the zone is not
> populated, resulting in a BUG_ON() later because the pfn cannot be
> found in the memory bitmap.
>
> Replacing all occurences of for_each_zone() in hibernation code with
> for_each_populated_zone() would fix this issue.
Thanks for the patch, I'm going to add it to my 2.6.32 queue.
Best,
Rafael
> Signed-off-by: Gerald Schaefer <[email protected]>
> ---
> kernel/power/snapshot.c | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> Index: linux-2.6-work/kernel/power/snapshot.c
> ===================================================================
> --- linux-2.6-work.orig/kernel/power/snapshot.c
> +++ linux-2.6-work/kernel/power/snapshot.c
> @@ -853,7 +853,7 @@ static unsigned int count_highmem_pages(
> struct zone *zone;
> unsigned int n = 0;
>
> - for_each_zone(zone) {
> + for_each_populated_zone(zone) {
> unsigned long pfn, max_zone_pfn;
>
> if (!is_highmem(zone))
> @@ -916,7 +916,7 @@ static unsigned int count_data_pages(voi
> unsigned long pfn, max_zone_pfn;
> unsigned int n = 0;
>
> - for_each_zone(zone) {
> + for_each_populated_zone(zone) {
> if (is_highmem(zone))
> continue;
>
> @@ -1010,7 +1010,7 @@ copy_data_pages(struct memory_bitmap *co
> struct zone *zone;
> unsigned long pfn;
>
> - for_each_zone(zone) {
> + for_each_populated_zone(zone) {
> unsigned long max_zone_pfn;
>
> mark_free_pages(zone);
> @@ -1046,7 +1046,7 @@ void swsusp_free(void)
> struct zone *zone;
> unsigned long pfn, max_zone_pfn;
>
> - for_each_zone(zone) {
> + for_each_populated_zone(zone) {
> max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
> for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
> if (pfn_valid(pfn)) {
> @@ -1166,7 +1166,7 @@ static int enough_free_mem(unsigned int
> struct zone *zone;
> unsigned int free = 0, meta = 0;
>
> - for_each_zone(zone) {
> + for_each_populated_zone(zone) {
> meta += snapshot_additional_pages(zone);
> if (!is_highmem(zone))
> free += zone_page_state(zone, NR_FREE_PAGES);
> @@ -1474,7 +1474,7 @@ static int mark_unsafe_pages(struct memo
> unsigned long pfn, max_zone_pfn;
>
> /* Clear page flags */
> - for_each_zone(zone) {
> + for_each_populated_zone(zone) {
> max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
> for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
> if (pfn_valid(pfn))
>
>
>
> From: Gerald Schaefer <[email protected]>
>
> Use for_each_populated_zone() instead of for_each_zone() in hibernation
> code. This fixes a bug on s390, where we allow both config options
> HIBERNATION and MEMORY_HOTPLUG, so that we also have a ZONE_MOVABLE
> here. We only allow hibernation if no memory hotplug operation was
> performed, so in fact both features can only be used exclusively, but
> this way we don't need 2 differently configured (distribution) kernels.
>
> If we have an unpopulated ZONE_MOVABLE, we allow hibernation but run
> into a BUG_ON() in memory_bm_test/set/clear_bit() because hibernation
> code iterates through all zones, not only the populated zones, in
> several places. For example, swsusp_free() does for_each_zone() and
> then checks for pfn_valid(), which is true even if the zone is not
> populated, resulting in a BUG_ON() later because the pfn cannot be
> found in the memory bitmap.
>
> Replacing all occurences of for_each_zone() in hibernation code with
> for_each_populated_zone() would fix this issue.
>
> Signed-off-by: Gerald Schaefer <[email protected]>
Thanks this effort.
Acked-by: KOSAKI Motohiro <[email protected]>
Hi.
Gerald Schaefer wrote:
> From: Gerald Schaefer <[email protected]>
>
> Use for_each_populated_zone() instead of for_each_zone() in hibernation
> code. This fixes a bug on s390, where we allow both config options
> HIBERNATION and MEMORY_HOTPLUG, so that we also have a ZONE_MOVABLE
> here. We only allow hibernation if no memory hotplug operation was
> performed, so in fact both features can only be used exclusively, but
> this way we don't need 2 differently configured (distribution) kernels.
>
> If we have an unpopulated ZONE_MOVABLE, we allow hibernation but run
> into a BUG_ON() in memory_bm_test/set/clear_bit() because hibernation
> code iterates through all zones, not only the populated zones, in
> several places. For example, swsusp_free() does for_each_zone() and
> then checks for pfn_valid(), which is true even if the zone is not
> populated, resulting in a BUG_ON() later because the pfn cannot be
> found in the memory bitmap.
I agree with your logic and patch, but doesn't this also imply that the
s390 implementation pfn_valid should be changed to return false for
those pages?
Regards,
Nigel
On Tue, Jul 21, 2009 at 07:29:58AM +1000, Nigel Cunningham wrote:
> Hi.
>
> Gerald Schaefer wrote:
> > From: Gerald Schaefer <[email protected]>
> >
> > Use for_each_populated_zone() instead of for_each_zone() in hibernation
> > code. This fixes a bug on s390, where we allow both config options
> > HIBERNATION and MEMORY_HOTPLUG, so that we also have a ZONE_MOVABLE
> > here. We only allow hibernation if no memory hotplug operation was
> > performed, so in fact both features can only be used exclusively, but
> > this way we don't need 2 differently configured (distribution) kernels.
> >
> > If we have an unpopulated ZONE_MOVABLE, we allow hibernation but run
> > into a BUG_ON() in memory_bm_test/set/clear_bit() because hibernation
> > code iterates through all zones, not only the populated zones, in
> > several places. For example, swsusp_free() does for_each_zone() and
> > then checks for pfn_valid(), which is true even if the zone is not
> > populated, resulting in a BUG_ON() later because the pfn cannot be
> > found in the memory bitmap.
>
> I agree with your logic and patch, but doesn't this also imply that the
> s390 implementation pfn_valid should be changed to return false for
> those pages?
For CONFIG_SPARSEMEM, which s390 uses, there is no architecture specific
pfn_valid() implementation.
Also it looks like the semantics of pfn_valid() aren't clear.
At least for sparsemem it means nothing but "the memmap for the section
this page belongs to exists". So it just means the struct page for the
pfn exists.
We still have pfn_present() for CONFIG_SPARSEMEM. But that just means
"some pages in the section this pfn belongs to are present."
So it looks like checking for pfn_valid() and afterwards checking
for PG_Reserved (?) might give what one would expect.
Looks all a bit confusing to me.
Or maybe it's just me who is confused? :)
On Tue, Jul 21, 2009 at 09:15:08AM +0200, Heiko Carstens wrote:
> On Tue, Jul 21, 2009 at 07:29:58AM +1000, Nigel Cunningham wrote:
> > Hi.
> >
> > Gerald Schaefer wrote:
> > > From: Gerald Schaefer <[email protected]>
> > >
> > > Use for_each_populated_zone() instead of for_each_zone() in hibernation
> > > code. This fixes a bug on s390, where we allow both config options
> > > HIBERNATION and MEMORY_HOTPLUG, so that we also have a ZONE_MOVABLE
> > > here. We only allow hibernation if no memory hotplug operation was
> > > performed, so in fact both features can only be used exclusively, but
> > > this way we don't need 2 differently configured (distribution) kernels.
> > >
> > > If we have an unpopulated ZONE_MOVABLE, we allow hibernation but run
> > > into a BUG_ON() in memory_bm_test/set/clear_bit() because hibernation
> > > code iterates through all zones, not only the populated zones, in
> > > several places. For example, swsusp_free() does for_each_zone() and
> > > then checks for pfn_valid(), which is true even if the zone is not
> > > populated, resulting in a BUG_ON() later because the pfn cannot be
> > > found in the memory bitmap.
> >
> > I agree with your logic and patch, but doesn't this also imply that the
> > s390 implementation pfn_valid should be changed to return false for
> > those pages?
>
> For CONFIG_SPARSEMEM, which s390 uses, there is no architecture specific
> pfn_valid() implementation.
> Also it looks like the semantics of pfn_valid() aren't clear.
> At least for sparsemem it means nothing but "the memmap for the section
> this page belongs to exists". So it just means the struct page for the
> pfn exists.
> We still have pfn_present() for CONFIG_SPARSEMEM. But that just means
> "some pages in the section this pfn belongs to are present."
> So it looks like checking for pfn_valid() and afterwards checking
> for PG_Reserved (?) might give what one would expect.
> Looks all a bit confusing to me.
> Or maybe it's just me who is confused? :)
It would be nice to remove PG_reserved (most architectures also set
it I think for kernel text and IIRC bootmem), it could then be used
as a PG_arch_2 bit, and we could ask architectures to impement
pfn_is_ram (or whatever's going to be most useful).
On Tue, 21 Jul 2009 09:15:08 +0200
Heiko Carstens <[email protected]> wrote:
> On Tue, Jul 21, 2009 at 07:29:58AM +1000, Nigel Cunningham wrote:
> > Hi.
> >
> > Gerald Schaefer wrote:
> > > From: Gerald Schaefer <[email protected]>
> > >
> > > Use for_each_populated_zone() instead of for_each_zone() in hibernation
> > > code. This fixes a bug on s390, where we allow both config options
> > > HIBERNATION and MEMORY_HOTPLUG, so that we also have a ZONE_MOVABLE
> > > here. We only allow hibernation if no memory hotplug operation was
> > > performed, so in fact both features can only be used exclusively, but
> > > this way we don't need 2 differently configured (distribution) kernels.
> > >
> > > If we have an unpopulated ZONE_MOVABLE, we allow hibernation but run
> > > into a BUG_ON() in memory_bm_test/set/clear_bit() because hibernation
> > > code iterates through all zones, not only the populated zones, in
> > > several places. For example, swsusp_free() does for_each_zone() and
> > > then checks for pfn_valid(), which is true even if the zone is not
> > > populated, resulting in a BUG_ON() later because the pfn cannot be
> > > found in the memory bitmap.
> >
> > I agree with your logic and patch, but doesn't this also imply that the
> > s390 implementation pfn_valid should be changed to return false for
> > those pages?
>
> For CONFIG_SPARSEMEM, which s390 uses, there is no architecture specific
> pfn_valid() implementation.
> Also it looks like the semantics of pfn_valid() aren't clear.
> At least for sparsemem it means nothing but "the memmap for the section
> this page belongs to exists". So it just means the struct page for the
> pfn exists.
Historically, pfn_valid() just means "there is a memmap." no other meanings
in any configs/archs.
> We still have pfn_present() for CONFIG_SPARSEMEM. But that just means
> "some pages in the section this pfn belongs to are present."
It just exists for sparsemem internal purpose IIUC.
> So it looks like checking for pfn_valid() and afterwards checking
> for PG_Reserved (?) might give what one would expect.
I think so, too. If memory is offline, PG_reserved is always set.
In general, it's expected that "page is contiguous in MAX_ORDER range"
and no memory holes in MAX_ORDER. In most case, PG_reserved is checked
for skipping not-existing memory.
> Looks all a bit confusing to me.
> Or maybe it's just me who is confused? :)
>
IIRC, there are no generic interface to know whether there is a physical page.
pfn_valid() is only for memmap and people have used
if (pfn_valid(pfn) && !PageReserved(page))
check.
But, hmm, If hibernation have to save PG_reserved memory, general solution is
use copy_user_page() and handle fault.
Alternative is making use of walk_memory_resource() as memory hotplug does.
It checks resource information registered.
Thanks,
-Kame
On Tuesday 21 July 2009, KAMEZAWA Hiroyuki wrote:
> On Tue, 21 Jul 2009 09:15:08 +0200
> Heiko Carstens <[email protected]> wrote:
>
> > On Tue, Jul 21, 2009 at 07:29:58AM +1000, Nigel Cunningham wrote:
> > > Hi.
> > >
> > > Gerald Schaefer wrote:
> > > > From: Gerald Schaefer <[email protected]>
> > > >
> > > > Use for_each_populated_zone() instead of for_each_zone() in hibernation
> > > > code. This fixes a bug on s390, where we allow both config options
> > > > HIBERNATION and MEMORY_HOTPLUG, so that we also have a ZONE_MOVABLE
> > > > here. We only allow hibernation if no memory hotplug operation was
> > > > performed, so in fact both features can only be used exclusively, but
> > > > this way we don't need 2 differently configured (distribution) kernels.
> > > >
> > > > If we have an unpopulated ZONE_MOVABLE, we allow hibernation but run
> > > > into a BUG_ON() in memory_bm_test/set/clear_bit() because hibernation
> > > > code iterates through all zones, not only the populated zones, in
> > > > several places. For example, swsusp_free() does for_each_zone() and
> > > > then checks for pfn_valid(), which is true even if the zone is not
> > > > populated, resulting in a BUG_ON() later because the pfn cannot be
> > > > found in the memory bitmap.
> > >
> > > I agree with your logic and patch, but doesn't this also imply that the
> > > s390 implementation pfn_valid should be changed to return false for
> > > those pages?
> >
> > For CONFIG_SPARSEMEM, which s390 uses, there is no architecture specific
> > pfn_valid() implementation.
> > Also it looks like the semantics of pfn_valid() aren't clear.
> > At least for sparsemem it means nothing but "the memmap for the section
> > this page belongs to exists". So it just means the struct page for the
> > pfn exists.
>
> Historically, pfn_valid() just means "there is a memmap." no other meanings
> in any configs/archs.
Is this documented anywhere actually?
> > We still have pfn_present() for CONFIG_SPARSEMEM. But that just means
> > "some pages in the section this pfn belongs to are present."
>
> It just exists for sparsemem internal purpose IIUC.
>
>
> > So it looks like checking for pfn_valid() and afterwards checking
> > for PG_Reserved (?) might give what one would expect.
> I think so, too. If memory is offline, PG_reserved is always set.
>
> In general, it's expected that "page is contiguous in MAX_ORDER range"
> and no memory holes in MAX_ORDER. In most case, PG_reserved is checked
> for skipping not-existing memory.
PG_reserved is also set for kernel text, at least on some architectures, and
for some other areas that we want to save.
> > Looks all a bit confusing to me.
> > Or maybe it's just me who is confused? :)
> >
> IIRC, there are no generic interface to know whether there is a physical page.
We need to know that for hibernation, though.
Well, there is a mechanism for marking making address ranges that are never
to be saved, but they need to be known during initialisation already.
> pfn_valid() is only for memmap and people have used
> if (pfn_valid(pfn) && !PageReserved(page))
> check.
> But, hmm, If hibernation have to save PG_reserved memory, general solution is
> use copy_user_page() and handle fault.
That's not exactly straightforward IMHO.
> Alternative is making use of walk_memory_resource() as memory hotplug does.
> It checks resource information registered.
I'd be fine with any _simple_ mechanism allowing us to check whether there's
a physical page frame for given page (or given PFN).
Best,
Rafael
On Tue, 21 Jul 2009 16:11:08 +0200
"Rafael J. Wysocki" <[email protected]> wrote:
> On Tuesday 21 July 2009, KAMEZAWA Hiroyuki wrote:
> > On Tue, 21 Jul 2009 09:15:08 +0200
> > Heiko Carstens <[email protected]> wrote:
> >
> > > On Tue, Jul 21, 2009 at 07:29:58AM +1000, Nigel Cunningham wrote:
> > > > Hi.
> > > >
> > > > Gerald Schaefer wrote:
> > > > > From: Gerald Schaefer <[email protected]>
> > > > >
> > > > > Use for_each_populated_zone() instead of for_each_zone() in hibernation
> > > > > code. This fixes a bug on s390, where we allow both config options
> > > > > HIBERNATION and MEMORY_HOTPLUG, so that we also have a ZONE_MOVABLE
> > > > > here. We only allow hibernation if no memory hotplug operation was
> > > > > performed, so in fact both features can only be used exclusively, but
> > > > > this way we don't need 2 differently configured (distribution) kernels.
> > > > >
> > > > > If we have an unpopulated ZONE_MOVABLE, we allow hibernation but run
> > > > > into a BUG_ON() in memory_bm_test/set/clear_bit() because hibernation
> > > > > code iterates through all zones, not only the populated zones, in
> > > > > several places. For example, swsusp_free() does for_each_zone() and
> > > > > then checks for pfn_valid(), which is true even if the zone is not
> > > > > populated, resulting in a BUG_ON() later because the pfn cannot be
> > > > > found in the memory bitmap.
> > > >
> > > > I agree with your logic and patch, but doesn't this also imply that the
> > > > s390 implementation pfn_valid should be changed to return false for
> > > > those pages?
> > >
> > > For CONFIG_SPARSEMEM, which s390 uses, there is no architecture specific
> > > pfn_valid() implementation.
> > > Also it looks like the semantics of pfn_valid() aren't clear.
> > > At least for sparsemem it means nothing but "the memmap for the section
> > > this page belongs to exists". So it just means the struct page for the
> > > pfn exists.
> >
> > Historically, pfn_valid() just means "there is a memmap." no other meanings
> > in any configs/archs.
>
> Is this documented anywhere actually?
>
When I helped developping SPARSEMEM, I goodled, I found Linus said that ;)
But, from implementaion, it's a very clear fact. See CONFIG_FLATMEM, the simplest
implemenation of memmap. It use a coutinous mem_map regardless of memory holes
and pfn_valid() returns true if pfn < max_mapnr.
#define pfn_valid(pfn) ((pfn) < max_mapnr)
> > > We still have pfn_present() for CONFIG_SPARSEMEM. But that just means
> > > "some pages in the section this pfn belongs to are present."
> >
> > It just exists for sparsemem internal purpose IIUC.
> >
> >
> > > So it looks like checking for pfn_valid() and afterwards checking
> > > for PG_Reserved (?) might give what one would expect.
> > I think so, too. If memory is offline, PG_reserved is always set.
> >
> > In general, it's expected that "page is contiguous in MAX_ORDER range"
> > and no memory holes in MAX_ORDER. In most case, PG_reserved is checked
> > for skipping not-existing memory.
>
> PG_reserved is also set for kernel text, at least on some architectures, and
> for some other areas that we want to save.
>
yes.
> > > Looks all a bit confusing to me.
> > > Or maybe it's just me who is confused? :)
> > >
> > IIRC, there are no generic interface to know whether there is a physical page.
>
> We need to know that for hibernation, though.
>
> Well, there is a mechanism for marking making address ranges that are never
> to be saved, but they need to be known during initialisation already.
>
> > pfn_valid() is only for memmap and people have used
> > if (pfn_valid(pfn) && !PageReserved(page))
> > check.
> > But, hmm, If hibernation have to save PG_reserved memory, general solution is
> > use copy_user_page() and handle fault.
>
> That's not exactly straightforward IMHO.
>
See ia64's ia64_pfn_valid(). It uses get_user() very effectively.
(I think this cost cost is small in any arch...)
523 ia64_pfn_valid (unsigned long pfn)
524 {
525 char byte;
526 struct page *pg = pfn_to_page(pfn);
527
528 return (__get_user(byte, (char __user *) pg) == 0)
529 && ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK))
530 || (__get_user(byte, (char __user *) (pg + 1) - 1) == 0));
531 }
Adding function like this is not very hard.
bool can_access_physmem(unsigned long pfn)
{
char byte;
char *pg = __va(pfn << PAGE_SHIFT);
return (__get_user(byte, pg) == 0)
}
and enough simple. But this may allow you to access remapped device's memory...
Then, some range check will be required anyway.
Can we detect io-remapped range from memmap or any ?
(I think we'll have to skip PG_reserved page...)
> > Alternative is making use of walk_memory_resource() as memory hotplug does.
> > It checks resource information registered.
>
> I'd be fine with any _simple_ mechanism allowing us to check whether there's
> a physical page frame for given page (or given PFN).
>
walk_memory_resource() is enough _simple_, IMHO.
Now, I'm removing #ifdef CONFIG_MEMORY_HOTPLUG for walk_memory_resource() to
rewrite /proc/kcore.
Thanks,
-Kame
On Wed, 22 Jul 2009 09:25:35 +0900
KAMEZAWA Hiroyuki <[email protected]> wrote:
> See ia64's ia64_pfn_valid(). It uses get_user() very effectively.
> (I think this cost cost is small in any arch...)
>
> 523 ia64_pfn_valid (unsigned long pfn)
> 524 {
> 525 char byte;
> 526 struct page *pg = pfn_to_page(pfn);
> 527
> 528 return (__get_user(byte, (char __user *) pg) == 0)
> 529 && ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK))
> 530 || (__get_user(byte, (char __user *) (pg + 1) - 1) == 0));
> 531 }
>
Just an explanation. This code is for checking "there is memmap or not" for
CONFIG_VIRTUAL_MEMMAP+CONFIG_DISCONTIGMEM, which allocates memmap in virtually
contiguous area. Because ia64 tends to have very sparse memory map,
memmap cannot be allocated in continuous area and memmap has holes.
This code checkes first byte and last byte of "struct page" is valid.
Thanks,
-Kame
On Wednesday 22 July 2009, KAMEZAWA Hiroyuki wrote:
> On Tue, 21 Jul 2009 16:11:08 +0200
> "Rafael J. Wysocki" <[email protected]> wrote:
[...]
> Adding function like this is not very hard.
>
> bool can_access_physmem(unsigned long pfn)
> {
> char byte;
> char *pg = __va(pfn << PAGE_SHIFT);
> return (__get_user(byte, pg) == 0)
> }
Unfortunately we can't use __get_user() for hibernation, because it may sleep.
Some other mechanism would be necessary, it seems.
> and enough simple. But this may allow you to access remapped device's memory...
> Then, some range check will be required anyway.
> Can we detect io-remapped range from memmap or any ?
> (I think we'll have to skip PG_reserved page...)
>
> > > Alternative is making use of walk_memory_resource() as memory hotplug does.
> > > It checks resource information registered.
> >
> > I'd be fine with any _simple_ mechanism allowing us to check whether there's
> > a physical page frame for given page (or given PFN).
> >
>
> walk_memory_resource() is enough _simple_, IMHO.
> Now, I'm removing #ifdef CONFIG_MEMORY_HOTPLUG for walk_memory_resource() to
> rewrite /proc/kcore.
Hmm. Which architectures set CONFIG_ARCH_HAS_WALK_MEMORY ?
Best,
Rafael
On Wed, 22 Jul 2009 19:49:55 +0200
"Rafael J. Wysocki" <[email protected]> wrote:
> > and enough simple. But this may allow you to access remapped device's memory...
> > Then, some range check will be required anyway.
> > Can we detect io-remapped range from memmap or any ?
> > (I think we'll have to skip PG_reserved page...)
> >
> > > > Alternative is making use of walk_memory_resource() as memory hotplug does.
> > > > It checks resource information registered.
> > >
> > > I'd be fine with any _simple_ mechanism allowing us to check whether there's
> > > a physical page frame for given page (or given PFN).
> > >
> >
> > walk_memory_resource() is enough _simple_, IMHO.
> > Now, I'm removing #ifdef CONFIG_MEMORY_HOTPLUG for walk_memory_resource() to
> > rewrite /proc/kcore.
>
> Hmm. Which architectures set CONFIG_ARCH_HAS_WALK_MEMORY ?
>
ppc only. It has its own.
I'm now prepareing a patch to remove #ifdef CONFIG_MEMORY_HOTPLUG for /proc/kcore
and rename it to walk_system_ram_range(). plz see "kcore:...." patches currently
posted to lkml if you are interested in.
Thanks,
-Kame
Thanks,
-Kame
> Best,
> Rafael
Rafael J. Wysocki wrote:
>>> So it looks like checking for pfn_valid() and afterwards checking
>>> for PG_Reserved (?) might give what one would expect.
>> I think so, too. If memory is offline, PG_reserved is always set.
>>
>> In general, it's expected that "page is contiguous in MAX_ORDER range"
>> and no memory holes in MAX_ORDER. In most case, PG_reserved is checked
>> for skipping not-existing memory.
>
> PG_reserved is also set for kernel text, at least on some architectures, and
> for some other areas that we want to save.
How about checking for PG_reserved && ZONE_MOVABLE? I think we don't
have any special cases for PG_reserved inside ZONE_MOVABLE, but I'm not
sure if this is true for all architectures and NUMA systems.
If this would work, it could be a simple way to determine which hotplug
memory should be saved.
--
Regards,
Gerald