LinuxLists.cc - [PATCH 1/4] mm/pgtable: Fix continue to preallocate pmds even if failure occurrence

2013-08-15 00:32:05

Subject: [PATCH 1/4] mm/pgtable: Fix continue to preallocate pmds even if failure occurrence

preallocate_pmds will continue to preallocate pmds even if failure
occurrence, and then free all the preallocate pmds if there is
failure, this patch fix it by stop preallocate if failure occurrence
and go to free path.

Signed-off-by: Wanpeng Li <[email protected]>
---
arch/x86/mm/pgtable.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index dfa537a..ad3397a 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -200,8 +200,10 @@ static int preallocate_pmds(pmd_t *pmds[])

for(i = 0; i < PREALLOCATED_PMDS; i++) {
pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP);
- if (pmd == NULL)
+ if (pmd == NULL) {
failed = true;
+ break;
+ }
pmds[i] = pmd;
}

--
1.8.1.2

2013-08-15 00:32:01

by Wanpeng Li

[permalink] [raw]

Subject: [PATCH 3/4] mm/writeback: make writeback_inodes_wb static

It's not used globally and could be static.

Signed-off-by: Wanpeng Li <[email protected]>
---
fs/fs-writeback.c | 2 +-
include/linux/writeback.h | 2 --
2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 68851ff..a75951c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -723,7 +723,7 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb,
return wrote;
}

-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
enum wb_reason reason)
{
struct wb_writeback_work work = {
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 4e198ca..021b8a3 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -98,8 +98,6 @@ int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
enum wb_reason reason);
void sync_inodes_sb(struct super_block *);
-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
- enum wb_reason reason);
void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
void inode_wait_for_writeback(struct inode *inode);

--
1.8.1.2

2013-08-15 00:32:13

by Wanpeng Li

[permalink] [raw]

Subject: [PATCH 4/4] mm/vmalloc: use wrapper function get_vm_area_size to caculate size of vm area

Use wrapper function get_vm_area_size to calculate size of vm area.

Signed-off-by: Wanpeng Li <[email protected]>
---
mm/vmalloc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 93d3182..553368c 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1553,7 +1553,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
unsigned int nr_pages, array_size, i;
gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;

- nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
+ nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));

area->nr_pages = nr_pages;
--
1.8.1.2

2013-08-15 00:33:00

by Wanpeng Li

[permalink] [raw]

Subject: [PATCH 2/4] mm/sparse: introduce alloc_usemap_and_memmap

After commit 9bdac91424075("sparsemem: Put mem map for one node together."),
vmemmap for one node will be allocated together, its logic is similiar as
memory allocation for pageblock flags. This patch introduce alloc_usemap_and_memmap
to extract the same logic of memory alloction for pageblock flags and vmemmap.

Signed-off-by: Wanpeng Li <[email protected]>
---
mm/sparse.c | 136 +++++++++++++++++++++++++++---------------------------------
1 file changed, 62 insertions(+), 74 deletions(-)

diff --git a/mm/sparse.c b/mm/sparse.c
index 308d503..4e91df4 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -439,6 +439,14 @@ static void __init sparse_early_mem_maps_alloc_node(struct page **map_map,
map_count, nodeid);
}
#else
+
+static void __init sparse_early_mem_maps_alloc_node(struct page **map_map,
+ unsigned long pnum_begin,
+ unsigned long pnum_end,
+ unsigned long map_count, int nodeid)
+{
+}
+
static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
{
struct page *map;
@@ -460,6 +468,58 @@ void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
{
}

+
+static void alloc_usemap_and_memmap(unsigned long **map, bool use_map)
+{
+ unsigned long pnum;
+ unsigned long map_count;
+ int nodeid_begin = 0;
+ unsigned long pnum_begin = 0;
+
+ for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
+ struct mem_section *ms;
+
+ if (!present_section_nr(pnum))
+ continue;
+ ms = __nr_to_section(pnum);
+ nodeid_begin = sparse_early_nid(ms);
+ pnum_begin = pnum;
+ break;
+ }
+ map_count = 1;
+ for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
+ struct mem_section *ms;
+ int nodeid;
+
+ if (!present_section_nr(pnum))
+ continue;
+ ms = __nr_to_section(pnum);
+ nodeid = sparse_early_nid(ms);
+ if (nodeid == nodeid_begin) {
+ map_count++;
+ continue;
+ }
+ /* ok, we need to take cake of from pnum_begin to pnum - 1*/
+ if (use_map)
+ sparse_early_usemaps_alloc_node(map, pnum_begin, pnum,
+ map_count, nodeid_begin);
+ else
+ sparse_early_mem_maps_alloc_node((struct page **)map,
+ pnum_begin, pnum, map_count, nodeid_begin);
+ /* new start, update count etc*/
+ nodeid_begin = nodeid;
+ pnum_begin = pnum;
+ map_count = 1;
+ }
+ /* ok, last chunk */
+ if (use_map)
+ sparse_early_usemaps_alloc_node(map, pnum_begin,
+ NR_MEM_SECTIONS, map_count, nodeid_begin);
+ else
+ sparse_early_mem_maps_alloc_node((struct page **)map,
+ pnum_begin, NR_MEM_SECTIONS, map_count, nodeid_begin);
+}
+
/*
* Allocate the accumulated non-linear sections, allocate a mem_map
* for each and record the physical to section mapping.
@@ -471,11 +531,7 @@ void __init sparse_init(void)
unsigned long *usemap;
unsigned long **usemap_map;
int size;
- int nodeid_begin = 0;
- unsigned long pnum_begin = 0;
- unsigned long usemap_count;
#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
- unsigned long map_count;
int size2;
struct page **map_map;
#endif
@@ -501,82 +557,14 @@ void __init sparse_init(void)
usemap_map = alloc_bootmem(size);
if (!usemap_map)
panic("can not allocate usemap_map\n");
-
- for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
- struct mem_section *ms;
-
- if (!present_section_nr(pnum))
- continue;
- ms = __nr_to_section(pnum);
- nodeid_begin = sparse_early_nid(ms);
- pnum_begin = pnum;
- break;
- }
- usemap_count = 1;
- for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
- struct mem_section *ms;
- int nodeid;
-
- if (!present_section_nr(pnum))
- continue;
- ms = __nr_to_section(pnum);
- nodeid = sparse_early_nid(ms);
- if (nodeid == nodeid_begin) {
- usemap_count++;
- continue;
- }
- /* ok, we need to take cake of from pnum_begin to pnum - 1*/
- sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum,
- usemap_count, nodeid_begin);
- /* new start, update count etc*/
- nodeid_begin = nodeid;
- pnum_begin = pnum;
- usemap_count = 1;
- }
- /* ok, last chunk */
- sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS,
- usemap_count, nodeid_begin);
+ alloc_usemap_and_memmap(usemap_map, true);

#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
map_map = alloc_bootmem(size2);
if (!map_map)
panic("can not allocate map_map\n");
-
- for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
- struct mem_section *ms;
-
- if (!present_section_nr(pnum))
- continue;
- ms = __nr_to_section(pnum);
- nodeid_begin = sparse_early_nid(ms);
- pnum_begin = pnum;
- break;
- }
- map_count = 1;
- for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
- struct mem_section *ms;
- int nodeid;
-
- if (!present_section_nr(pnum))
- continue;
- ms = __nr_to_section(pnum);
- nodeid = sparse_early_nid(ms);
- if (nodeid == nodeid_begin) {
- map_count++;
- continue;
- }
- /* ok, we need to take cake of from pnum_begin to pnum - 1*/
- sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum,
- map_count, nodeid_begin);
- /* new start, update count etc*/
- nodeid_begin = nodeid;
- pnum_begin = pnum;
- map_count = 1;
- }
- /* ok, last chunk */
- sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS,
- map_count, nodeid_begin);
+ alloc_usemap_and_memmap((unsigned long **)map_map, false);
#endif

for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
--
1.8.1.2

2013-08-15 17:55:31

by Dave Hansen

[permalink] [raw]

Subject: Re: [PATCH 1/4] mm/pgtable: Fix continue to preallocate pmds even if failure occurrence

On 08/14/2013 05:31 PM, Wanpeng Li wrote:
> preallocate_pmds will continue to preallocate pmds even if failure
> occurrence, and then free all the preallocate pmds if there is
> failure, this patch fix it by stop preallocate if failure occurrence
> and go to free path.

I guess there are a billion ways to do this, but I'm not sure we even
need 'failed':

--- arch/x86/mm/pgtable.c.orig 2013-08-15 10:52:15.145615027 -0700
+++ arch/x86/mm/pgtable.c 2013-08-15 10:52:47.509614081 -0700
@@ -196,21 +196,18 @@
static int preallocate_pmds(pmd_t *pmds[])
{
int i;
- bool failed = false;

for(i = 0; i < PREALLOCATED_PMDS; i++) {
pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP);
if (pmd == NULL)
- failed = true;
+ goto err;
pmds[i] = pmd;
}

- if (failed) {
- free_pmds(pmds);
- return -ENOMEM;
- }
-
return 0;
+err:
+ free_pmds(pmds);
+ return -ENOMEM;
}

I don't have a problem with what you have, though. It's better than
what was there, so:

Reviewed-by: Dave Hansen <[email protected]>

2013-08-15 18:04:14

by Dave Hansen

[permalink] [raw]

Subject: Re: [PATCH 2/4] mm/sparse: introduce alloc_usemap_and_memmap

On 08/14/2013 05:31 PM, Wanpeng Li wrote:
> After commit 9bdac91424075("sparsemem: Put mem map for one node together."),
> vmemmap for one node will be allocated together, its logic is similiar as
> memory allocation for pageblock flags. This patch introduce alloc_usemap_and_memmap
> to extract the same logic of memory alloction for pageblock flags and vmemmap.

Shame on whoever copy-n-pasted that in the first place.

> -
> - for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
> - struct mem_section *ms;
> -
> - if (!present_section_nr(pnum))
> - continue;
> - ms = __nr_to_section(pnum);
> - nodeid_begin = sparse_early_nid(ms);
> - pnum_begin = pnum;
> - break;
> - }
> - usemap_count = 1;
> - for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
> - struct mem_section *ms;
> - int nodeid;
> -
> - if (!present_section_nr(pnum))
> - continue;
> - ms = __nr_to_section(pnum);
> - nodeid = sparse_early_nid(ms);
> - if (nodeid == nodeid_begin) {
> - usemap_count++;
> - continue;
> - }
> - /* ok, we need to take cake of from pnum_begin to pnum - 1*/
> - sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum,
> - usemap_count, nodeid_begin);
> - /* new start, update count etc*/
> - nodeid_begin = nodeid;
> - pnum_begin = pnum;
> - usemap_count = 1;
> - }
> - /* ok, last chunk */
> - sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS,
> - usemap_count, nodeid_begin);
> + alloc_usemap_and_memmap(usemap_map, true);
...
> + alloc_usemap_and_memmap((unsigned long **)map_map, false);
> #endif

Why does alloc_usemap_and_memmap() take an 'unsigned long **'?
'unsigned long' is for the usemap and 'struct page' is for the memmap.
It's misleading to have it take an 'unsigned long **' and then just cast
it over to a 'struct page **' internally.

Also, what's the point of having a function that returns something in a
double-pointer, but that doesn't use its return value?

alloc_usemap_and_memmap() also needs a comment about what it's doing
with that pointer and its other argument.

2013-08-15 18:08:01

by Dave Hansen

[permalink] [raw]

Subject: Re: [PATCH 4/4] mm/vmalloc: use wrapper function get_vm_area_size to caculate size of vm area

On 08/14/2013 05:31 PM, Wanpeng Li wrote:
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 93d3182..553368c 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1553,7 +1553,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
> unsigned int nr_pages, array_size, i;
> gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
>
> - nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
> + nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
> array_size = (nr_pages * sizeof(struct page *));

I guess this is fine, but I do see this same kind of use in a couple of
other spots in the kernel. Was there a reason for doing this in this
one spot but ignoring the others?