2019-07-02 14:16:45

by Pengfei Li

[permalink] [raw]
Subject: [PATCH v2 0/5] mm/vmalloc.c: improve readability and rewrite vmap_area


v1 -> v2:
* patch 3: Rename __find_vmap_area to __search_va_in_busy_tree
instead of __search_va_from_busy_tree.
* patch 5: Add motivation and necessary test data to the commit
message.
* patch 5: Let va->flags use only some low bits of va_start
instead of completely overwriting va_start.


The current implementation of struct vmap_area wasted space. At the
determined stage, not all members of the structure will be used.

For this problem, this commit places multiple structural members that
are not being used at the same time into a union to reduce the size
of the structure.

And local test results show that this commit will not hurt performance.

After applying this commit, sizeof(struct vmap_area) has been reduced
from 11 words to 8 words.

Pengfei Li (5):
mm/vmalloc.c: Introduce a wrapper function of insert_vmap_area()
mm/vmalloc.c: Introduce a wrapper function of
insert_vmap_area_augment()
mm/vmalloc.c: Rename function __find_vmap_area() for readability
mm/vmalloc.c: Modify function merge_or_add_vmap_area() for readability
mm/vmalloc.c: Rewrite struct vmap_area to reduce its size

include/linux/vmalloc.h | 28 +++++---
mm/vmalloc.c | 139 ++++++++++++++++++++++++++++------------
2 files changed, 118 insertions(+), 49 deletions(-)

--
2.21.0


2019-07-02 14:16:55

by Pengfei Li

[permalink] [raw]
Subject: [PATCH v2 1/5] mm/vmalloc.c: Introduce a wrapper function of insert_vmap_area()

The red-black tree whose root is vmap_area_root is called the
*BUSY* tree. Since function insert_vmap_area() is only used to
add vmap area to the *BUSY* tree, so add wrapper functions
insert_va_to_busy_tree for readability.

Besides, rename insert_vmap_area to __insert_vmap_area to indicate
that it should not be called directly.

Signed-off-by: Pengfei Li <[email protected]>
---
mm/vmalloc.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0f76cca32a1c..0a46be76c63b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -641,7 +641,7 @@ augment_tree_propagate_from(struct vmap_area *va)
}

static void
-insert_vmap_area(struct vmap_area *va,
+__insert_vmap_area(struct vmap_area *va,
struct rb_root *root, struct list_head *head)
{
struct rb_node **link;
@@ -651,6 +651,12 @@ insert_vmap_area(struct vmap_area *va,
link_va(va, root, parent, link, head);
}

+static __always_inline void
+insert_va_to_busy_tree(struct vmap_area *va)
+{
+ __insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
+}
+
static void
insert_vmap_area_augment(struct vmap_area *va,
struct rb_node *from, struct rb_root *root,
@@ -1070,7 +1076,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
va->va_start = addr;
va->va_end = addr + size;
va->flags = 0;
- insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
+ insert_va_to_busy_tree(va);

spin_unlock(&vmap_area_lock);

@@ -1871,7 +1877,7 @@ void __init vmalloc_init(void)
va->va_start = (unsigned long)tmp->addr;
va->va_end = va->va_start + tmp->size;
va->vm = tmp;
- insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
+ insert_va_to_busy_tree(va);
}

/*
@@ -3281,7 +3287,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
va->va_start = start;
va->va_end = start + size;

- insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
+ insert_va_to_busy_tree(va);
}

spin_unlock(&vmap_area_lock);
--
2.21.0

2019-07-02 14:17:25

by Pengfei Li

[permalink] [raw]
Subject: [PATCH v2 2/5] mm/vmalloc.c: Introduce a wrapper function of insert_vmap_area_augment()

The red-black tree whose root is free_vmap_area_root is called the
*FREE* tree. Like the previous commit, add wrapper functions
insert_va_to_free_tree and rename insert_vmap_area_augment to
__insert_vmap_area_augment.

Signed-off-by: Pengfei Li <[email protected]>
---
mm/vmalloc.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0a46be76c63b..a5065fcb74d3 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -658,7 +658,7 @@ insert_va_to_busy_tree(struct vmap_area *va)
}

static void
-insert_vmap_area_augment(struct vmap_area *va,
+__insert_vmap_area_augment(struct vmap_area *va,
struct rb_node *from, struct rb_root *root,
struct list_head *head)
{
@@ -674,6 +674,13 @@ insert_vmap_area_augment(struct vmap_area *va,
augment_tree_propagate_from(va);
}

+static __always_inline void
+insert_va_to_free_tree(struct vmap_area *va, struct rb_node *from)
+{
+ __insert_vmap_area_augment(va, from, &free_vmap_area_root,
+ &free_vmap_area_list);
+}
+
/*
* Merge de-allocated chunk of VA memory with previous
* and next free blocks. If coalesce is not done a new
@@ -979,8 +986,7 @@ adjust_va_to_fit_type(struct vmap_area *va,
augment_tree_propagate_from(va);

if (lva) /* type == NE_FIT_TYPE */
- insert_vmap_area_augment(lva, &va->rb_node,
- &free_vmap_area_root, &free_vmap_area_list);
+ insert_va_to_free_tree(lva, &va->rb_node);
}

return 0;
@@ -1822,9 +1828,7 @@ static void vmap_init_free_space(void)
free->va_start = vmap_start;
free->va_end = busy->va_start;

- insert_vmap_area_augment(free, NULL,
- &free_vmap_area_root,
- &free_vmap_area_list);
+ insert_va_to_free_tree(free, NULL);
}
}

@@ -1837,9 +1841,7 @@ static void vmap_init_free_space(void)
free->va_start = vmap_start;
free->va_end = vmap_end;

- insert_vmap_area_augment(free, NULL,
- &free_vmap_area_root,
- &free_vmap_area_list);
+ insert_va_to_free_tree(free, NULL);
}
}
}
--
2.21.0

2019-07-02 14:17:41

by Pengfei Li

[permalink] [raw]
Subject: [PATCH v2 3/5] mm/vmalloc.c: Rename function __find_vmap_area() for readability

Rename function __find_vmap_area to __search_va_in_busy_tree to
indicate that it is searching in the *BUSY* tree.

Signed-off-by: Pengfei Li <[email protected]>
---
mm/vmalloc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index a5065fcb74d3..b6ea52d6e8f9 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -399,7 +399,7 @@ static void purge_vmap_area_lazy(void);
static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
static unsigned long lazy_max_pages(void);

-static struct vmap_area *__find_vmap_area(unsigned long addr)
+static struct vmap_area *__search_va_in_busy_tree(unsigned long addr)
{
struct rb_node *n = vmap_area_root.rb_node;

@@ -1313,7 +1313,7 @@ static struct vmap_area *find_vmap_area(unsigned long addr)
struct vmap_area *va;

spin_lock(&vmap_area_lock);
- va = __find_vmap_area(addr);
+ va = __search_va_in_busy_tree(addr);
spin_unlock(&vmap_area_lock);

return va;
--
2.21.0

2019-07-02 14:18:01

by Pengfei Li

[permalink] [raw]
Subject: [PATCH v2 4/5] mm/vmalloc.c: Modify function merge_or_add_vmap_area() for readability

Since function merge_or_add_vmap_area() is only used to
merge or add vmap area to the *FREE* tree, so rename it
to merge_or_add_va_to_free_tree.

Then this is obvious, merge_or_add_vmap_area() does not
need parameters root and head, so remove them.

Signed-off-by: Pengfei Li <[email protected]>
---
mm/vmalloc.c | 21 +++++++++------------
1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b6ea52d6e8f9..ad117d16af34 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -688,8 +688,7 @@ insert_va_to_free_tree(struct vmap_area *va, struct rb_node *from)
* freed.
*/
static __always_inline void
-merge_or_add_vmap_area(struct vmap_area *va,
- struct rb_root *root, struct list_head *head)
+merge_or_add_va_to_free_tree(struct vmap_area *va)
{
struct vmap_area *sibling;
struct list_head *next;
@@ -701,7 +700,7 @@ merge_or_add_vmap_area(struct vmap_area *va,
* Find a place in the tree where VA potentially will be
* inserted, unless it is merged with its sibling/siblings.
*/
- link = find_va_links(va, root, NULL, &parent);
+ link = find_va_links(va, &free_vmap_area_root, NULL, &parent);

/*
* Get next node of VA to check if merging can be done.
@@ -717,7 +716,7 @@ merge_or_add_vmap_area(struct vmap_area *va,
* | |
* start end
*/
- if (next != head) {
+ if (next != &free_vmap_area_list) {
sibling = list_entry(next, struct vmap_area, list);
if (sibling->va_start == va->va_end) {
sibling->va_start = va->va_start;
@@ -725,9 +724,6 @@ merge_or_add_vmap_area(struct vmap_area *va,
/* Check and update the tree if needed. */
augment_tree_propagate_from(sibling);

- /* Remove this VA, it has been merged. */
- unlink_va(va, root);
-
/* Free vmap_area object. */
kmem_cache_free(vmap_area_cachep, va);

@@ -744,7 +740,7 @@ merge_or_add_vmap_area(struct vmap_area *va,
* | |
* start end
*/
- if (next->prev != head) {
+ if (next->prev != &free_vmap_area_list) {
sibling = list_entry(next->prev, struct vmap_area, list);
if (sibling->va_end == va->va_start) {
sibling->va_end = va->va_end;
@@ -753,7 +749,8 @@ merge_or_add_vmap_area(struct vmap_area *va,
augment_tree_propagate_from(sibling);

/* Remove this VA, it has been merged. */
- unlink_va(va, root);
+ if (merged)
+ unlink_va(va, &free_vmap_area_root);

/* Free vmap_area object. */
kmem_cache_free(vmap_area_cachep, va);
@@ -764,7 +761,8 @@ merge_or_add_vmap_area(struct vmap_area *va,

insert:
if (!merged) {
- link_va(va, root, parent, link, head);
+ link_va(va, &free_vmap_area_root, parent, link,
+ &free_vmap_area_list);
augment_tree_propagate_from(va);
}
}
@@ -1141,8 +1139,7 @@ static void __free_vmap_area(struct vmap_area *va)
/*
* Merge VA with its neighbors, otherwise just add it.
*/
- merge_or_add_vmap_area(va,
- &free_vmap_area_root, &free_vmap_area_list);
+ merge_or_add_va_to_free_tree(va);
}

/*
--
2.21.0

2019-07-02 14:18:50

by Pengfei Li

[permalink] [raw]
Subject: [PATCH v2 5/5] mm/vmalloc.c: Rewrite struct vmap_area to reduce its size

Objective
---------
The current implementation of struct vmap_area wasted space. At the
determined stage, not all members of the structure will be used.

For this problem, this commit places multiple structural members that
are not being used at the same time into a union to reduce the size
of the structure.

And local test results show that this commit will not hurt performance.

After applying this commit, sizeof(struct vmap_area) has been reduced
from 11 words to 8 words.

Description
-----------
Members of struct vmap_area are not being used at the same time.
For example,
(1)members @flags, @purge_list, and @vm are unused when vmap area is
in the *FREE* tree;

(2)members @subtree_max_size and @purge_list are unused when vmap area is
in the *BUSY* tree and not in the purge list;

(3)members @subtree_max_size and @vm are unused when vmap area is
in the *BUSY* tree and in the purge list.

Since members @subtree_max_size, @purge_list and @vm are not used
at the same time, so they can be placed in a union to reduce the
size of struct vmap_area.

Besides, since PAGE_ALIGNED(va->va_start) is always true, then @flags and
@va_start can be placed in a union, and @flags use bits below PAGE_SHIFT.
After that, the value of @va_start can be corrected by masking @flags.

Performance Testing
-------------------

Test procedure
--------------

Test result
-----------
(1)Base (5.2.0-rc7)
Test items round_1 round_2 round_3
fix_size_alloc_test 596666 usec 600600 usec 597993 usec
full_fit_alloc_test 632614 usec 623770 usec 633657 usec
long_busy_list_alloc_test 8139257 usec 8162904 usec 8234770 usec
random_size_alloc_test 4332230 usec 4335376 usec 4322771 usec
fix_align_alloc_test 1184827 usec 1133574 usec 1148328 usec
random_size_align_alloc_test 1459814 usec 1467915 usec 1465112 usec
pcpu_alloc_test 92053 usec 93290 usec 92439 usec

(2)Patched (5.2.0-rc7 + this series of patches)
Test items round_1 round_2 round_3
fix_size_alloc_test 607753 usec 602034 usec 584153 usec
full_fit_alloc_test 593415 usec 627681 usec 634853 usec
long_busy_list_alloc_test 8038797 usec 8012176 usec 8152911 usec
random_size_alloc_test 4304070 usec 4327863 usec 4588769 usec
fix_align_alloc_test 1122373 usec 1108628 usec 1145592 usec
random_size_align_alloc_test 1449265 usec 1475160 usec 1479629 usec
pcpu_alloc_test 93856 usec 91697 usec 92658 usec

(3) Average comparison
Test items Avg_base Avg_patched
fix_size_alloc_test 598419.6667 usec 597980.0000 usec
full_fit_alloc_test 630013.6667 usec 618649.6667 usec
long_busy_list_alloc_test 8178977.0000 usec 8067961.3333 usec
random_size_alloc_test 4330125.6667 usec 4406900.6667 usec
fix_align_alloc_test 1155576.3333 usec 1125531.0000 usec
random_size_align_alloc_test 1464280.3333 usec 1468018.0000 usec
pcpu_alloc_test 92594.0000 usec 92737.0000 usec

(4) Percentage difference
Test items (Avg_base - Avg_patched) / Avg_base
fix_size_alloc_test 0.0735%
full_fit_alloc_test 1.8038%
long_busy_list_alloc_test 1.3573%
random_size_alloc_test -1.7730%
fix_align_alloc_test 2.6000%
random_size_align_alloc_test -0.2553%
pcpu_alloc_test -0.1544%

Result analysis
---------------
Because the test results vary within a range, we can conclude that this
commit does not cause a significant impact on performance.

Signed-off-by: Pengfei Li <[email protected]>
---
include/linux/vmalloc.h | 28 ++++++++++----
mm/vmalloc.c | 82 +++++++++++++++++++++++++++++++++--------
2 files changed, 87 insertions(+), 23 deletions(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 51e131245379..bc23d59cf5ae 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -48,18 +48,30 @@ struct vm_struct {
};

struct vmap_area {
- unsigned long va_start;
+ union {
+ unsigned long va_start;
+ /*
+ * Only used when vmap area is in the *BUSY* tree
+ * and not in the purge_list.
+ */
+ unsigned long flags;
+ };
+
unsigned long va_end;

- /*
- * Largest available free size in subtree.
- */
- unsigned long subtree_max_size;
- unsigned long flags;
+ union {
+ /* Only used when vmap area is in the *FREE* tree */
+ unsigned long subtree_max_size;
+
+ /* Only used when vmap area is in the vmap_purge_list */
+ struct llist_node purge_list;
+
+ /* Only used when the VM_VM_AREA flag is set */
+ struct vm_struct *vm;
+ };
+
struct rb_node rb_node; /* address sorted rbtree */
struct list_head list; /* address sorted list */
- struct llist_node purge_list; /* "lazy purge" list */
- struct vm_struct *vm;
};

/*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ad117d16af34..c7e59bc54024 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -329,8 +329,50 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
#define DEBUG_AUGMENT_PROPAGATE_CHECK 0
#define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0

-#define VM_LAZY_FREE 0x02
-#define VM_VM_AREA 0x04
+
+/*
+ * For vmap area in the *BUSY* tree, PAGE_ALIGNED(va->va_start)
+ * is always true.
+ * This is guaranteed by calling BUG_ON(offset_in_page(size))
+ * and BUG_ON(!PAGE_ALIGNED(addr)).
+ * So define MAX_VA_FLAGS_SHIFT as PAGE_SHIFT.
+ */
+#define MAX_VA_FLAGS_SHIFT PAGE_SHIFT
+
+enum VA_FlAGS_TYPE {
+ VM_VM_AREA = 0x1UL,
+
+ /*
+ * The value of newly added flags should be between
+ * VM_VM_AREA and LAST_VMAP_AREA_FLAGS.
+ */
+
+ LAST_VMAP_AREA_FLAGS = (1UL << MAX_VA_FLAGS_SHIFT)
+};
+
+#define VMAP_AREA_FLAGS_MASK (~(LAST_VMAP_AREA_FLAGS - 1))
+
+/*
+ * va->flags should be set with this helper function to ensure
+ * the correctness of the flag being set, instead of directly
+ * modifying the va->flags.
+ */
+static __always_inline void
+set_va_flags(struct vmap_area *va, enum VA_FlAGS_TYPE flag)
+{
+ BUILD_BUG_ON(!is_power_of_2(flag));
+
+ BUILD_BUG_ON(flag >= LAST_VMAP_AREA_FLAGS);
+
+ va->flags |= flag;
+}
+
+/* Return the correct value of va->va_start by masking flags */
+static __always_inline unsigned long
+get_va_start(struct vmap_area *va)
+{
+ return (va->va_start & VMAP_AREA_FLAGS_MASK);
+}

static DEFINE_SPINLOCK(vmap_area_lock);
/* Export for kexec only */
@@ -399,6 +441,11 @@ static void purge_vmap_area_lazy(void);
static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
static unsigned long lazy_max_pages(void);

+/*
+ * va->va_start may contain the value of flags when vmap area is in
+ * the *BUSY* tree, so use the helper function get_va_start() to get
+ * the value of va_start instead of directly accessing it.
+ */
static struct vmap_area *__search_va_in_busy_tree(unsigned long addr)
{
struct rb_node *n = vmap_area_root.rb_node;
@@ -407,7 +454,7 @@ static struct vmap_area *__search_va_in_busy_tree(unsigned long addr)
struct vmap_area *va;

va = rb_entry(n, struct vmap_area, rb_node);
- if (addr < va->va_start)
+ if (addr < get_va_start(va))
n = n->rb_left;
else if (addr >= va->va_end)
n = n->rb_right;
@@ -454,9 +501,9 @@ find_va_links(struct vmap_area *va,
* or full overlaps.
*/
if (va->va_start < tmp_va->va_end &&
- va->va_end <= tmp_va->va_start)
+ va->va_end <= get_va_start(tmp_va))
link = &(*link)->rb_left;
- else if (va->va_end > tmp_va->va_start &&
+ else if (va->va_end > get_va_start(tmp_va) &&
va->va_start >= tmp_va->va_end)
link = &(*link)->rb_right;
else
@@ -1079,8 +1126,8 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,

va->va_start = addr;
va->va_end = addr + size;
- va->flags = 0;
insert_va_to_busy_tree(va);
+ va->vm = NULL;

spin_unlock(&vmap_area_lock);

@@ -1872,11 +1919,12 @@ void __init vmalloc_init(void)
if (WARN_ON_ONCE(!va))
continue;

- va->flags = VM_VM_AREA;
va->va_start = (unsigned long)tmp->addr;
va->va_end = va->va_start + tmp->size;
- va->vm = tmp;
insert_va_to_busy_tree(va);
+
+ set_va_flags(va, VM_VM_AREA);
+ va->vm = tmp;
}

/*
@@ -1969,8 +2017,10 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
vm->addr = (void *)va->va_start;
vm->size = va->va_end - va->va_start;
vm->caller = caller;
+
+ set_va_flags(va, VM_VM_AREA);
va->vm = vm;
- va->flags |= VM_VM_AREA;
+
spin_unlock(&vmap_area_lock);
}

@@ -2102,9 +2152,11 @@ struct vm_struct *remove_vm_area(const void *addr)
struct vm_struct *vm = va->vm;

spin_lock(&vmap_area_lock);
- va->vm = NULL;
- va->flags &= ~VM_VM_AREA;
- va->flags |= VM_LAZY_FREE;
+ /*
+ * Restore the value of va_start by masking flags
+ * before adding vmap area to the purge list.
+ */
+ va->va_start = get_va_start(va);
spin_unlock(&vmap_area_lock);

kasan_free_shadow(vm);
@@ -3412,9 +3464,9 @@ static int s_show(struct seq_file *m, void *p)
*/
if (!(va->flags & VM_VM_AREA)) {
seq_printf(m, "0x%pK-0x%pK %7ld %s\n",
- (void *)va->va_start, (void *)va->va_end,
- va->va_end - va->va_start,
- va->flags & VM_LAZY_FREE ? "unpurged vm_area" : "vm_map_ram");
+ (void *)get_va_start(va), (void *)va->va_end,
+ va->va_end - get_va_start(va),
+ va->vm ? "unpurged vm_area" : "vm_map_ram");

return 0;
}
--
2.21.0

2019-07-03 19:31:29

by Uladzislau Rezki

[permalink] [raw]
Subject: Re: [PATCH v2 0/5] mm/vmalloc.c: improve readability and rewrite vmap_area

Hello, Li.

>
> v1 -> v2:
> * patch 3: Rename __find_vmap_area to __search_va_in_busy_tree
> instead of __search_va_from_busy_tree.
> * patch 5: Add motivation and necessary test data to the commit
> message.
> * patch 5: Let va->flags use only some low bits of va_start
> instead of completely overwriting va_start.
>
>
> The current implementation of struct vmap_area wasted space. At the
> determined stage, not all members of the structure will be used.
>
> For this problem, this commit places multiple structural members that
> are not being used at the same time into a union to reduce the size
> of the structure.
>
> And local test results show that this commit will not hurt performance.
>
> After applying this commit, sizeof(struct vmap_area) has been reduced
> from 11 words to 8 words.
>
> Pengfei Li (5):
> mm/vmalloc.c: Introduce a wrapper function of insert_vmap_area()
> mm/vmalloc.c: Introduce a wrapper function of
> insert_vmap_area_augment()
> mm/vmalloc.c: Rename function __find_vmap_area() for readability
> mm/vmalloc.c: Modify function merge_or_add_vmap_area() for readability
> mm/vmalloc.c: Rewrite struct vmap_area to reduce its size
>
> include/linux/vmalloc.h | 28 +++++---
> mm/vmalloc.c | 139 ++++++++++++++++++++++++++++------------
> 2 files changed, 118 insertions(+), 49 deletions(-)
>
> --
> 2.21.0
>
I do not think that it is worth to reduce the struct size the way
this series does. I mean the union around flags/va_start. Simply saying
if we need two variables: flags and va_start let's have them. Otherwise
everybody has to think what he/she access at certain moment of time.

So it would be easier to make mistakes, also that conversion looks strange
to me. That is IMHO.

If we want to reduce the size to L1-cache-line(64 bytes), i would propose to
eliminate the "flags" variable from the structure. We could do that if apply
below patch(as an example) on top of https://lkml.org/lkml/2019/7/3/661:

<snip>
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 51e131245379..49bb82863d5b 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -51,15 +51,22 @@ struct vmap_area {
unsigned long va_start;
unsigned long va_end;

- /*
- * Largest available free size in subtree.
- */
- unsigned long subtree_max_size;
- unsigned long flags;
struct rb_node rb_node; /* address sorted rbtree */
struct list_head list; /* address sorted list */
- struct llist_node purge_list; /* "lazy purge" list */
- struct vm_struct *vm;
+
+ /*
+ * Below three variables can be packed, because vmap_area
+ * object can be only in one of the three different states:
+ *
+ * - when an object is in "free" tree only;
+ * - when an object is in "purge list" only;
+ * - when an object is in "busy" tree only.
+ */
+ union {
+ unsigned long subtree_max_size;
+ struct llist_node purge_list;
+ struct vm_struct *vm;
+ };
};

/*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6f1b6a188227..e389a6db222b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -329,8 +329,6 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
#define DEBUG_AUGMENT_PROPAGATE_CHECK 0
#define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0

-#define VM_VM_AREA 0x04
-
static DEFINE_SPINLOCK(vmap_area_lock);
/* Export for kexec only */
LIST_HEAD(vmap_area_list);
@@ -1108,7 +1106,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,

va->va_start = addr;
va->va_end = addr + size;
- va->flags = 0;
+ va->vm = NULL;
insert_vmap_area(va, &vmap_area_root, &vmap_area_list);

spin_unlock(&vmap_area_lock);
@@ -1912,7 +1910,6 @@ void __init vmalloc_init(void)
if (WARN_ON_ONCE(!va))
continue;

- va->flags = VM_VM_AREA;
va->va_start = (unsigned long)tmp->addr;
va->va_end = va->va_start + tmp->size;
va->vm = tmp;
@@ -2010,7 +2007,6 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
vm->size = va->va_end - va->va_start;
vm->caller = caller;
va->vm = vm;
- va->flags |= VM_VM_AREA;
spin_unlock(&vmap_area_lock);
}

@@ -2115,7 +2111,7 @@ struct vm_struct *find_vm_area(const void *addr)
struct vmap_area *va;

va = find_vmap_area((unsigned long)addr);
- if (va && va->flags & VM_VM_AREA)
+ if (va && va->vm)
return va->vm;

return NULL;
@@ -2139,11 +2135,10 @@ struct vm_struct *remove_vm_area(const void *addr)

spin_lock(&vmap_area_lock);
va = __find_vmap_area((unsigned long)addr);
- if (va && va->flags & VM_VM_AREA) {
+ if (va && va->vm) {
struct vm_struct *vm = va->vm;

va->vm = NULL;
- va->flags &= ~VM_VM_AREA;
spin_unlock(&vmap_area_lock);

kasan_free_shadow(vm);
@@ -2854,7 +2849,7 @@ long vread(char *buf, char *addr, unsigned long count)
if (!count)
break;

- if (!(va->flags & VM_VM_AREA))
+ if (!va->vm)
continue;

vm = va->vm;
@@ -2934,7 +2929,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
if (!count)
break;

- if (!(va->flags & VM_VM_AREA))
+ if (!va->vm)
continue;

vm = va->vm;
@@ -3464,10 +3459,10 @@ static int s_show(struct seq_file *m, void *p)
va = list_entry(p, struct vmap_area, list);

/*
- * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
- * behalf of vmap area is being tear down or vm_map_ram allocation.
+ * s_show can encounter race with remove_vm_area, !vm on behalf
+ * of vmap area is being tear down or vm_map_ram allocation.
*/
- if (!(va->flags & VM_VM_AREA)) {
+ if (!va->vm) {
seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
(void *)va->va_start, (void *)va->va_end,
va->va_end - va->va_start);
<snip>

urezki@pc636:~/data/ssd/coding/linux-stable$ pahole -C vmap_area mm/vmalloc.o
die__process_function: tag not supported (INVALID)!
struct vmap_area {
long unsigned int va_start; /* 0 8 */
long unsigned int va_end; /* 8 8 */
struct rb_node rb_node; /* 16 24 */
struct list_head list; /* 40 16 */
union {
long unsigned int subtree_max_size; /* 8 */
struct llist_node purge_list; /* 8 */
struct vm_struct * vm; /* 8 */
}; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */

/* size: 64, cachelines: 1, members: 5 */
};
urezki@pc636:~/data/ssd/coding/linux-stable$

--
Vlad Rezki

2019-07-04 09:33:22

by Pengfei Li

[permalink] [raw]
Subject: Re: [PATCH v2 0/5] mm/vmalloc.c: improve readability and rewrite vmap_area

On Thu, Jul 4, 2019 at 3:30 AM Uladzislau Rezki <[email protected]> wrote:
>
> Hello, Li.
>
> I do not think that it is worth to reduce the struct size the way
> this series does. I mean the union around flags/va_start. Simply saying
> if we need two variables: flags and va_start let's have them. Otherwise
> everybody has to think what he/she access at certain moment of time.
>
> So it would be easier to make mistakes, also that conversion looks strange
> to me. That is IMHO.
>
> If we want to reduce the size to L1-cache-line(64 bytes), i would propose to
> eliminate the "flags" variable from the structure. We could do that if apply
> below patch(as an example) on top of https://lkml.org/lkml/2019/7/3/661:
>

Hi, Vlad

Thank you for your detailed comments!

What you said inspired me. I really have no reason to stubbornly
keep the "flags" in vmap_area since it can be eliminated.

I will eliminate the "flags" from vmap_area as you suggested, and
the next version will be based on top of your commit
https://lkml.org/lkml/2019/7/3/661.


--
Pengfei

> <snip>
> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> index 51e131245379..49bb82863d5b 100644
> --- a/include/linux/vmalloc.h
> +++ b/include/linux/vmalloc.h
> @@ -51,15 +51,22 @@ struct vmap_area {
> unsigned long va_start;
> unsigned long va_end;
>
> - /*
> - * Largest available free size in subtree.
> - */
> - unsigned long subtree_max_size;
> - unsigned long flags;
> struct rb_node rb_node; /* address sorted rbtree */
> struct list_head list; /* address sorted list */
> - struct llist_node purge_list; /* "lazy purge" list */
> - struct vm_struct *vm;
> +
> + /*
> + * Below three variables can be packed, because vmap_area
> + * object can be only in one of the three different states:
> + *
> + * - when an object is in "free" tree only;
> + * - when an object is in "purge list" only;
> + * - when an object is in "busy" tree only.
> + */
> + union {
> + unsigned long subtree_max_size;
> + struct llist_node purge_list;
> + struct vm_struct *vm;
> + };
> };
>
> /*
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 6f1b6a188227..e389a6db222b 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -329,8 +329,6 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
> #define DEBUG_AUGMENT_PROPAGATE_CHECK 0
> #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
>
> -#define VM_VM_AREA 0x04
> -
> static DEFINE_SPINLOCK(vmap_area_lock);
> /* Export for kexec only */
> LIST_HEAD(vmap_area_list);
> @@ -1108,7 +1106,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
>
> va->va_start = addr;
> va->va_end = addr + size;
> - va->flags = 0;
> + va->vm = NULL;
> insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
>
> spin_unlock(&vmap_area_lock);
> @@ -1912,7 +1910,6 @@ void __init vmalloc_init(void)
> if (WARN_ON_ONCE(!va))
> continue;
>
> - va->flags = VM_VM_AREA;
> va->va_start = (unsigned long)tmp->addr;
> va->va_end = va->va_start + tmp->size;
> va->vm = tmp;
> @@ -2010,7 +2007,6 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
> vm->size = va->va_end - va->va_start;
> vm->caller = caller;
> va->vm = vm;
> - va->flags |= VM_VM_AREA;
> spin_unlock(&vmap_area_lock);
> }
>
> @@ -2115,7 +2111,7 @@ struct vm_struct *find_vm_area(const void *addr)
> struct vmap_area *va;
>
> va = find_vmap_area((unsigned long)addr);
> - if (va && va->flags & VM_VM_AREA)
> + if (va && va->vm)
> return va->vm;
>
> return NULL;
> @@ -2139,11 +2135,10 @@ struct vm_struct *remove_vm_area(const void *addr)
>
> spin_lock(&vmap_area_lock);
> va = __find_vmap_area((unsigned long)addr);
> - if (va && va->flags & VM_VM_AREA) {
> + if (va && va->vm) {
> struct vm_struct *vm = va->vm;
>
> va->vm = NULL;
> - va->flags &= ~VM_VM_AREA;
> spin_unlock(&vmap_area_lock);
>
> kasan_free_shadow(vm);
> @@ -2854,7 +2849,7 @@ long vread(char *buf, char *addr, unsigned long count)
> if (!count)
> break;
>
> - if (!(va->flags & VM_VM_AREA))
> + if (!va->vm)
> continue;
>
> vm = va->vm;
> @@ -2934,7 +2929,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
> if (!count)
> break;
>
> - if (!(va->flags & VM_VM_AREA))
> + if (!va->vm)
> continue;
>
> vm = va->vm;
> @@ -3464,10 +3459,10 @@ static int s_show(struct seq_file *m, void *p)
> va = list_entry(p, struct vmap_area, list);
>
> /*
> - * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
> - * behalf of vmap area is being tear down or vm_map_ram allocation.
> + * s_show can encounter race with remove_vm_area, !vm on behalf
> + * of vmap area is being tear down or vm_map_ram allocation.
> */
> - if (!(va->flags & VM_VM_AREA)) {
> + if (!va->vm) {
> seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
> (void *)va->va_start, (void *)va->va_end,
> va->va_end - va->va_start);
> <snip>
>
> urezki@pc636:~/data/ssd/coding/linux-stable$ pahole -C vmap_area mm/vmalloc.o
> die__process_function: tag not supported (INVALID)!
> struct vmap_area {
> long unsigned int va_start; /* 0 8 */
> long unsigned int va_end; /* 8 8 */
> struct rb_node rb_node; /* 16 24 */
> struct list_head list; /* 40 16 */
> union {
> long unsigned int subtree_max_size; /* 8 */
> struct llist_node purge_list; /* 8 */
> struct vm_struct * vm; /* 8 */
> }; /* 56 8 */
> /* --- cacheline 1 boundary (64 bytes) --- */
>
> /* size: 64, cachelines: 1, members: 5 */
> };
> urezki@pc636:~/data/ssd/coding/linux-stable$
>
> --
> Vlad Rezki