An earlier patch in this series disabled file_region coalescing in order
to hang the hugetlb_cgroup uncharge info on the file_region entries.
This patch re-adds support for coalescing of file_region entries.
Essentially everytime we add an entry, we call a recursive function that
tries to coalesce the added region with the regions next to it. The
worst case call depth for this function is 3: one to coalesce with the
region next to it, one to coalesce to the region prev, and one to reach
the base case.
This is an important performance optimization as private mappings add
their entries page by page, and we could incur big performance costs for
large mappings with lots of file_region entries in their resv_map.
Signed-off-by: Mina Almasry <[email protected]>
---
Changes in v12:
- Changed logic for coalescing. Instead of checking inline to coalesce
with only the region on next or prev, we now have a recursive function
that takes care of coalescing in both directions.
- For testing this code I added a bunch of debug code that checks that
the entries in the resv_map are coalesced appropriately. This passes
with libhugetlbfs tests.
---
mm/hugetlb.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 85 insertions(+)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2d62dd35399db..45219cb58ac71 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -276,6 +276,86 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
#endif
}
+static bool has_same_uncharge_info(struct file_region *rg,
+ struct file_region *org)
+{
+#ifdef CONFIG_CGROUP_HUGETLB
+ return rg && org &&
+ rg->reservation_counter == org->reservation_counter &&
+ rg->css == org->css;
+
+#else
+ return true;
+#endif
+}
+
+#ifdef CONFIG_DEBUG_VM
+static void dump_resv_map(struct resv_map *resv)
+{
+ struct list_head *head = &resv->regions;
+ struct file_region *rg = NULL;
+
+ pr_err("--------- start print resv_map ---------\n");
+ list_for_each_entry(rg, head, link) {
+ pr_err("rg->from=%ld, rg->to=%ld, rg->reservation_counter=%px, rg->css=%px\n",
+ rg->from, rg->to, rg->reservation_counter, rg->css);
+ }
+ pr_err("--------- end print resv_map ---------\n");
+}
+
+/* Debug function to loop over the resv_map and make sure that coalescing is
+ * working.
+ */
+static void check_coalesce_bug(struct resv_map *resv)
+{
+ struct list_head *head = &resv->regions;
+ struct file_region *rg = NULL, *nrg = NULL;
+
+ list_for_each_entry(rg, head, link) {
+ nrg = list_next_entry(rg, link);
+
+ if (&nrg->link == head)
+ break;
+
+ if (nrg->reservation_counter && nrg->from == rg->to &&
+ nrg->reservation_counter == rg->reservation_counter &&
+ nrg->css == rg->css) {
+ dump_resv_map(resv);
+ VM_BUG_ON(true);
+ }
+ }
+}
+#endif
+
+static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
+{
+ struct file_region *nrg = NULL, *prg = NULL;
+
+ prg = list_prev_entry(rg, link);
+ if (&prg->link != &resv->regions && prg->to == rg->from &&
+ has_same_uncharge_info(prg, rg)) {
+ prg->to = rg->to;
+
+ list_del(&rg->link);
+ kfree(rg);
+
+ coalesce_file_region(resv, prg);
+ return;
+ }
+
+ nrg = list_next_entry(rg, link);
+ if (&nrg->link != &resv->regions && nrg->from == rg->to &&
+ has_same_uncharge_info(nrg, rg)) {
+ nrg->from = rg->from;
+
+ list_del(&rg->link);
+ kfree(rg);
+
+ coalesce_file_region(resv, nrg);
+ return;
+ }
+}
+
/* Must be called with resv->lock held. Calling this with count_only == true
* will count the number of pages to be added but will not modify the linked
* list. If regions_needed != NULL and count_only == true, then regions_needed
@@ -327,6 +407,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
record_hugetlb_cgroup_uncharge_info(h_cg, h,
resv, nrg);
list_add(&nrg->link, rg->link.prev);
+ coalesce_file_region(resv, nrg);
} else if (regions_needed)
*regions_needed += 1;
}
@@ -344,11 +425,15 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
resv, last_accounted_offset, t);
record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg);
list_add(&nrg->link, rg->link.prev);
+ coalesce_file_region(resv, nrg);
} else if (regions_needed)
*regions_needed += 1;
}
VM_BUG_ON(add < 0);
+#ifdef CONFIG_DEBUG_VM
+ check_coalesce_bug(resv);
+#endif
return add;
}
--
2.25.0.225.g125e21ebc7-goog
On Tue, 11 Feb 2020, Mina Almasry wrote:
> An earlier patch in this series disabled file_region coalescing in order
> to hang the hugetlb_cgroup uncharge info on the file_region entries.
>
> This patch re-adds support for coalescing of file_region entries.
> Essentially everytime we add an entry, we call a recursive function that
> tries to coalesce the added region with the regions next to it. The
> worst case call depth for this function is 3: one to coalesce with the
> region next to it, one to coalesce to the region prev, and one to reach
> the base case.
>
> This is an important performance optimization as private mappings add
> their entries page by page, and we could incur big performance costs for
> large mappings with lots of file_region entries in their resv_map.
>
> Signed-off-by: Mina Almasry <[email protected]>
Acked-by: David Rientjes <[email protected]>
On 2/11/20 1:31 PM, Mina Almasry wrote:
> An earlier patch in this series disabled file_region coalescing in order
> to hang the hugetlb_cgroup uncharge info on the file_region entries.
>
> This patch re-adds support for coalescing of file_region entries.
> Essentially everytime we add an entry, we call a recursive function that
> tries to coalesce the added region with the regions next to it. The
> worst case call depth for this function is 3: one to coalesce with the
> region next to it, one to coalesce to the region prev, and one to reach
> the base case.
>
> This is an important performance optimization as private mappings add
> their entries page by page, and we could incur big performance costs for
> large mappings with lots of file_region entries in their resv_map.
>
> Signed-off-by: Mina Almasry <[email protected]>
>
> ---
>
> Changes in v12:
> - Changed logic for coalescing. Instead of checking inline to coalesce
> with only the region on next or prev, we now have a recursive function
> that takes care of coalescing in both directions.
> - For testing this code I added a bunch of debug code that checks that
> the entries in the resv_map are coalesced appropriately. This passes
> with libhugetlbfs tests.
>
> ---
> mm/hugetlb.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 85 insertions(+)
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 2d62dd35399db..45219cb58ac71 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -276,6 +276,86 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
> #endif
> }
>
> +static bool has_same_uncharge_info(struct file_region *rg,
> + struct file_region *org)
> +{
> +#ifdef CONFIG_CGROUP_HUGETLB
> + return rg && org &&
> + rg->reservation_counter == org->reservation_counter &&
> + rg->css == org->css;
> +
> +#else
> + return true;
> +#endif
> +}
> +
> +#ifdef CONFIG_DEBUG_VM
> +static void dump_resv_map(struct resv_map *resv)
> +{
> + struct list_head *head = &resv->regions;
> + struct file_region *rg = NULL;
> +
> + pr_err("--------- start print resv_map ---------\n");
> + list_for_each_entry(rg, head, link) {
> + pr_err("rg->from=%ld, rg->to=%ld, rg->reservation_counter=%px, rg->css=%px\n",
> + rg->from, rg->to, rg->reservation_counter, rg->css);
> + }
> + pr_err("--------- end print resv_map ---------\n");
> +}
> +
> +/* Debug function to loop over the resv_map and make sure that coalescing is
> + * working.
> + */
> +static void check_coalesce_bug(struct resv_map *resv)
> +{
> + struct list_head *head = &resv->regions;
> + struct file_region *rg = NULL, *nrg = NULL;
> +
> + list_for_each_entry(rg, head, link) {
> + nrg = list_next_entry(rg, link);
> +
> + if (&nrg->link == head)
> + break;
> +
> + if (nrg->reservation_counter && nrg->from == rg->to &&
> + nrg->reservation_counter == rg->reservation_counter &&
> + nrg->css == rg->css) {
> + dump_resv_map(resv);
> + VM_BUG_ON(true);
> + }
> + }
> +}
> +#endif
> +
> +static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
> +{
> + struct file_region *nrg = NULL, *prg = NULL;
> +
> + prg = list_prev_entry(rg, link);
> + if (&prg->link != &resv->regions && prg->to == rg->from &&
> + has_same_uncharge_info(prg, rg)) {
> + prg->to = rg->to;
> +
> + list_del(&rg->link);
> + kfree(rg);
> +
> + coalesce_file_region(resv, prg);
> + return;
> + }
> +
> + nrg = list_next_entry(rg, link);
> + if (&nrg->link != &resv->regions && nrg->from == rg->to &&
> + has_same_uncharge_info(nrg, rg)) {
> + nrg->from = rg->from;
> +
> + list_del(&rg->link);
> + kfree(rg);
> +
> + coalesce_file_region(resv, nrg);
> + return;
> + }
> +}
> +
> /* Must be called with resv->lock held. Calling this with count_only == true
> * will count the number of pages to be added but will not modify the linked
> * list. If regions_needed != NULL and count_only == true, then regions_needed
> @@ -327,6 +407,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
> record_hugetlb_cgroup_uncharge_info(h_cg, h,
> resv, nrg);
> list_add(&nrg->link, rg->link.prev);
> + coalesce_file_region(resv, nrg);
> } else if (regions_needed)
> *regions_needed += 1;
> }
> @@ -344,11 +425,15 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
> resv, last_accounted_offset, t);
> record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg);
> list_add(&nrg->link, rg->link.prev);
> + coalesce_file_region(resv, nrg);
> } else if (regions_needed)
> *regions_needed += 1;
> }
>
> VM_BUG_ON(add < 0);
> +#ifdef CONFIG_DEBUG_VM
> + check_coalesce_bug(resv);
> +#endif
Some distros have CONFIG_DEBUG_VM on in their default kernels. Fedora comes
to mind. Yes, this means 'VM_BUG_ON()' become 'BUG_ON()'. That is somewhat
accepted. I don't think we want this debug code behind CONFIG_DEBUG_VM and
called each time a file region is added. It may be overkill to make it a
debug option via the config system. Perhaps, just make it something like
CONFIG_DEBUG_HUGETLB_RSV_REGION and let hugetlb developers turn it on if
they would like?
Other than that, the code looks good.
--
Mike Kravetz
> return add;
> }
>
> --
> 2.25.0.225.g125e21ebc7-goog
>
On Tue, Feb 18, 2020 at 7:31 PM Mike Kravetz <[email protected]> wrote:
>
> On 2/11/20 1:31 PM, Mina Almasry wrote:
> > An earlier patch in this series disabled file_region coalescing in order
> > to hang the hugetlb_cgroup uncharge info on the file_region entries.
> >
> > This patch re-adds support for coalescing of file_region entries.
> > Essentially everytime we add an entry, we call a recursive function that
> > tries to coalesce the added region with the regions next to it. The
> > worst case call depth for this function is 3: one to coalesce with the
> > region next to it, one to coalesce to the region prev, and one to reach
> > the base case.
> >
> > This is an important performance optimization as private mappings add
> > their entries page by page, and we could incur big performance costs for
> > large mappings with lots of file_region entries in their resv_map.
> >
> > Signed-off-by: Mina Almasry <[email protected]>
> >
> > ---
> >
> > Changes in v12:
> > - Changed logic for coalescing. Instead of checking inline to coalesce
> > with only the region on next or prev, we now have a recursive function
> > that takes care of coalescing in both directions.
> > - For testing this code I added a bunch of debug code that checks that
> > the entries in the resv_map are coalesced appropriately. This passes
> > with libhugetlbfs tests.
> >
> > ---
> > mm/hugetlb.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 85 insertions(+)
> >
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index 2d62dd35399db..45219cb58ac71 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -276,6 +276,86 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
> > #endif
> > }
> >
> > +static bool has_same_uncharge_info(struct file_region *rg,
> > + struct file_region *org)
> > +{
> > +#ifdef CONFIG_CGROUP_HUGETLB
> > + return rg && org &&
> > + rg->reservation_counter == org->reservation_counter &&
> > + rg->css == org->css;
> > +
> > +#else
> > + return true;
> > +#endif
> > +}
> > +
> > +#ifdef CONFIG_DEBUG_VM
> > +static void dump_resv_map(struct resv_map *resv)
> > +{
> > + struct list_head *head = &resv->regions;
> > + struct file_region *rg = NULL;
> > +
> > + pr_err("--------- start print resv_map ---------\n");
> > + list_for_each_entry(rg, head, link) {
> > + pr_err("rg->from=%ld, rg->to=%ld, rg->reservation_counter=%px, rg->css=%px\n",
> > + rg->from, rg->to, rg->reservation_counter, rg->css);
> > + }
> > + pr_err("--------- end print resv_map ---------\n");
> > +}
> > +
> > +/* Debug function to loop over the resv_map and make sure that coalescing is
> > + * working.
> > + */
> > +static void check_coalesce_bug(struct resv_map *resv)
> > +{
> > + struct list_head *head = &resv->regions;
> > + struct file_region *rg = NULL, *nrg = NULL;
> > +
> > + list_for_each_entry(rg, head, link) {
> > + nrg = list_next_entry(rg, link);
> > +
> > + if (&nrg->link == head)
> > + break;
> > +
> > + if (nrg->reservation_counter && nrg->from == rg->to &&
> > + nrg->reservation_counter == rg->reservation_counter &&
> > + nrg->css == rg->css) {
> > + dump_resv_map(resv);
> > + VM_BUG_ON(true);
> > + }
> > + }
> > +}
> > +#endif
> > +
> > +static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
> > +{
> > + struct file_region *nrg = NULL, *prg = NULL;
> > +
> > + prg = list_prev_entry(rg, link);
> > + if (&prg->link != &resv->regions && prg->to == rg->from &&
> > + has_same_uncharge_info(prg, rg)) {
> > + prg->to = rg->to;
> > +
> > + list_del(&rg->link);
> > + kfree(rg);
> > +
> > + coalesce_file_region(resv, prg);
> > + return;
> > + }
> > +
> > + nrg = list_next_entry(rg, link);
> > + if (&nrg->link != &resv->regions && nrg->from == rg->to &&
> > + has_same_uncharge_info(nrg, rg)) {
> > + nrg->from = rg->from;
> > +
> > + list_del(&rg->link);
> > + kfree(rg);
> > +
> > + coalesce_file_region(resv, nrg);
> > + return;
> > + }
> > +}
> > +
> > /* Must be called with resv->lock held. Calling this with count_only == true
> > * will count the number of pages to be added but will not modify the linked
> > * list. If regions_needed != NULL and count_only == true, then regions_needed
> > @@ -327,6 +407,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
> > record_hugetlb_cgroup_uncharge_info(h_cg, h,
> > resv, nrg);
> > list_add(&nrg->link, rg->link.prev);
> > + coalesce_file_region(resv, nrg);
> > } else if (regions_needed)
> > *regions_needed += 1;
> > }
> > @@ -344,11 +425,15 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
> > resv, last_accounted_offset, t);
> > record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg);
> > list_add(&nrg->link, rg->link.prev);
> > + coalesce_file_region(resv, nrg);
> > } else if (regions_needed)
> > *regions_needed += 1;
> > }
> >
> > VM_BUG_ON(add < 0);
> > +#ifdef CONFIG_DEBUG_VM
> > + check_coalesce_bug(resv);
> > +#endif
>
> Some distros have CONFIG_DEBUG_VM on in their default kernels. Fedora comes
> to mind. Yes, this means 'VM_BUG_ON()' become 'BUG_ON()'. That is somewhat
> accepted. I don't think we want this debug code behind CONFIG_DEBUG_VM and
> called each time a file region is added. It may be overkill to make it a
> debug option via the config system. Perhaps, just make it something like
> CONFIG_DEBUG_HUGETLB_RSV_REGION and let hugetlb developers turn it on if
> they would like?
>
Ah, I feel like adding a whole config to accommodate this function is
overkill and will end up being dead code anyway as no-one remembers to
turn it on. If it's the same to you I'll take it out of the patch and
leave it as a local change for me to test with.
> Other than that, the code looks good.
> --
> Mike Kravetz
>
> > return add;
> > }
> >
> > --
> > 2.25.0.225.g125e21ebc7-goog
> >
Commit b5f16a533ce8a ("hugetlb: support file_region coalescing
again") made changes to the resv_map code which are hard to test, it
so added debug code guarded by CONFIG_DEBUG_VM which conducts an
expensive operation that loops over the resv_map and checks it for
errors.
Unfortunately, some distros have CONFIG_DEBUG_VM on in their default
kernels, and we don't want this debug code behind CONFIG_DEBUG_VM
and called each time a file region is added. This patch removes this
debug code. I may look into making it a test or leave it for my local
testing.
Signed-off-by: Mina Almasry <[email protected]>
Cc: David Rientjes <[email protected]>
Cc: Mike Kravetz <[email protected]>
Cc: Shakeel Butt <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: [email protected]
Cc: [email protected]
Fixes: b5f16a533ce8a ("hugetlb: support file_region coalescing again")
---
mm/hugetlb.c | 43 -------------------------------------------
1 file changed, 43 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 94e27dfec0435..3febbbda3dc2b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -289,48 +289,6 @@ static bool has_same_uncharge_info(struct file_region *rg,
#endif
}
-#if defined(CONFIG_DEBUG_VM) && defined(CONFIG_CGROUP_HUGETLB)
-static void dump_resv_map(struct resv_map *resv)
-{
- struct list_head *head = &resv->regions;
- struct file_region *rg = NULL;
-
- pr_err("--------- start print resv_map ---------\n");
- list_for_each_entry(rg, head, link) {
- pr_err("rg->from=%ld, rg->to=%ld, rg->reservation_counter=%px, rg->css=%px\n",
- rg->from, rg->to, rg->reservation_counter, rg->css);
- }
- pr_err("--------- end print resv_map ---------\n");
-}
-
-/* Debug function to loop over the resv_map and make sure that coalescing is
- * working.
- */
-static void check_coalesce_bug(struct resv_map *resv)
-{
- struct list_head *head = &resv->regions;
- struct file_region *rg = NULL, *nrg = NULL;
-
- list_for_each_entry(rg, head, link) {
- nrg = list_next_entry(rg, link);
-
- if (&nrg->link == head)
- break;
-
- if (nrg->reservation_counter && nrg->from == rg->to &&
- nrg->reservation_counter == rg->reservation_counter &&
- nrg->css == rg->css) {
- dump_resv_map(resv);
- VM_BUG_ON(true);
- }
- }
-}
-#else
-static void check_coalesce_bug(struct resv_map *resv)
-{
-}
-#endif
-
static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
{
struct file_region *nrg = NULL, *prg = NULL;
@@ -435,7 +393,6 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
}
VM_BUG_ON(add < 0);
- check_coalesce_bug(resv);
return add;
}
--
2.25.0.265.gbab2e86ba0-goog
On 2/19/20 3:36 PM, Mina Almasry wrote:
> Commit b5f16a533ce8a ("hugetlb: support file_region coalescing
> again") made changes to the resv_map code which are hard to test, it
> so added debug code guarded by CONFIG_DEBUG_VM which conducts an
> expensive operation that loops over the resv_map and checks it for
> errors.
>
> Unfortunately, some distros have CONFIG_DEBUG_VM on in their default
> kernels, and we don't want this debug code behind CONFIG_DEBUG_VM
> and called each time a file region is added. This patch removes this
> debug code. I may look into making it a test or leave it for my local
> testing.
>
> Signed-off-by: Mina Almasry <[email protected]>
>
> Cc: David Rientjes <[email protected]>
> Cc: Mike Kravetz <[email protected]>
> Cc: Shakeel Butt <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
>
> Fixes: b5f16a533ce8a ("hugetlb: support file_region coalescing again")
Thanks!
Reviewed-by: Mike Kravetz <[email protected]>
The review also applies to the original patch with the removal of
this code.
--
Mike Kravetz