2019-11-21 01:23:56

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH -tip v3 0/4] x86,mm/pat: Move towards using generic interval tree

Changes from v2:
- Removed unnecessary goto error path in patch 1, per tglx.
- Added the corresponding Makefile change for patch 4, per mingo.
- Added tglx's review tags.

Changes from v1[0]:
- Got rid of more code in patch 1 by using the end - 1 for closed
intervals, instead of keeping the overlap-check.

- added an additional cleanup patch.

Hi,

I'm sending this series again in this format as the interval tree
node conversion will, at a minimum, take longer than hoped for
(ie: Jason still removing interval tree users for the mmu_notifier
rework[1]). There is also a chance this will never see be done.

As such, I'm resending this series (where patch 1 is the only
interesting one and which Ingo acked previously, with the exception
that the nodes remain fully closed). In the future, it would be
trivial to port pat tree to semi open nodes, but for now think that
it makes sense to just get the pat changes in.

Please consider for v5.5. Thanks!

[0] https://lore.kernel.org/lkml/[email protected]/
[1] https://marc.info/?l=linux-mm&m=157116340411211

Davidlohr Bueso (4):
x86/mm, pat: Convert pat tree to generic interval tree
x86/mm, pat: Cleanup some of the local memtype_rb_* calls
x86/mm, pat: Drop rbt prefix from external memtype calls
x86/mm, pat: Rename pat_rbtree.c to pat_interval.c

arch/x86/mm/Makefile | 2 +-
arch/x86/mm/pat.c | 8 +-
arch/x86/mm/pat_internal.h | 20 ++--
arch/x86/mm/pat_interval.c | 185 +++++++++++++++++++++++++++++++
arch/x86/mm/pat_rbtree.c | 268 ---------------------------------------------
5 files changed, 200 insertions(+), 283 deletions(-)
create mode 100644 arch/x86/mm/pat_interval.c
delete mode 100644 arch/x86/mm/pat_rbtree.c

--
2.16.4


2019-11-21 01:24:53

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH 3/4] x86/mm, pat: Drop rbt prefix from external memtype calls

Drop the rbt_memtype_* calls (those used by pat.c) as we
no longer use an rbtree directly.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Davidlohr Bueso <[email protected]>
---
arch/x86/mm/pat.c | 8 ++++----
arch/x86/mm/pat_internal.h | 20 ++++++++++----------
arch/x86/mm/pat_rbtree.c | 12 ++++++------
3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index d9fbd4f69920..2d758e19ef22 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -603,7 +603,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,

spin_lock(&memtype_lock);

- err = rbt_memtype_check_insert(new, new_type);
+ err = memtype_check_insert(new, new_type);
if (err) {
pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
start, end - 1,
@@ -650,7 +650,7 @@ int free_memtype(u64 start, u64 end)
}

spin_lock(&memtype_lock);
- entry = rbt_memtype_erase(start, end);
+ entry = memtype_erase(start, end);
spin_unlock(&memtype_lock);

if (IS_ERR(entry)) {
@@ -693,7 +693,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr)

spin_lock(&memtype_lock);

- entry = rbt_memtype_lookup(paddr);
+ entry = memtype_lookup(paddr);
if (entry != NULL)
rettype = entry->type;
else
@@ -1109,7 +1109,7 @@ static struct memtype *memtype_get_idx(loff_t pos)
return NULL;

spin_lock(&memtype_lock);
- ret = rbt_memtype_copy_nth_element(print_entry, pos);
+ ret = memtype_copy_nth_element(print_entry, pos);
spin_unlock(&memtype_lock);

if (!ret) {
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h
index eeb5caeb089b..79a06684349e 100644
--- a/arch/x86/mm/pat_internal.h
+++ b/arch/x86/mm/pat_internal.h
@@ -29,20 +29,20 @@ static inline char *cattr_name(enum page_cache_mode pcm)
}

#ifdef CONFIG_X86_PAT
-extern int rbt_memtype_check_insert(struct memtype *new,
- enum page_cache_mode *new_type);
-extern struct memtype *rbt_memtype_erase(u64 start, u64 end);
-extern struct memtype *rbt_memtype_lookup(u64 addr);
-extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos);
+extern int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *new_type);
+extern struct memtype *memtype_erase(u64 start, u64 end);
+extern struct memtype *memtype_lookup(u64 addr);
+extern int memtype_copy_nth_element(struct memtype *out, loff_t pos);
#else
-static inline int rbt_memtype_check_insert(struct memtype *new,
- enum page_cache_mode *new_type)
+static inline int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *new_type)
{ return 0; }
-static inline struct memtype *rbt_memtype_erase(u64 start, u64 end)
+static inline struct memtype *memtype_erase(u64 start, u64 end)
{ return NULL; }
-static inline struct memtype *rbt_memtype_lookup(u64 addr)
+static inline struct memtype *memtype_lookup(u64 addr)
{ return NULL; }
-static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
+static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos)
{ return 0; }
#endif

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index d31ca773d4bb..47a1bf30748f 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -109,8 +109,8 @@ static int memtype_check_conflict(u64 start, u64 end,
return -EBUSY;
}

-int rbt_memtype_check_insert(struct memtype *new,
- enum page_cache_mode *ret_type)
+int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *ret_type)
{
int err = 0;

@@ -125,13 +125,13 @@ int rbt_memtype_check_insert(struct memtype *new,
return 0;
}

-struct memtype *rbt_memtype_erase(u64 start, u64 end)
+struct memtype *memtype_erase(u64 start, u64 end)
{
struct memtype *data;

/*
* Since the memtype_rbroot tree allows overlapping ranges,
- * rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free
+ * memtype_erase() checks with EXACT_MATCH first, i.e. free
* a whole node for the munmap case. If no such entry is found,
* it then checks with END_MATCH, i.e. shrink the size of a node
* from the end for the mremap case.
@@ -157,14 +157,14 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
return data;
}

-struct memtype *rbt_memtype_lookup(u64 addr)
+struct memtype *memtype_lookup(u64 addr)
{
return memtype_interval_iter_first(&memtype_rbroot, addr,
addr + PAGE_SIZE);
}

#if defined(CONFIG_DEBUG_FS)
-int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
+int memtype_copy_nth_element(struct memtype *out, loff_t pos)
{
struct memtype *match;
int i = 1;
--
2.16.4

2019-11-21 01:25:14

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH 4/4] x86/mm, pat: Rename pat_rbtree.c to pat_interval.c

Considering the previous changes, this is a more proper name.

Signed-off-by: Davidlohr Bueso <[email protected]>
---
arch/x86/mm/Makefile | 2 +-
arch/x86/mm/{pat_rbtree.c => pat_interval.c} | 0
2 files changed, 1 insertion(+), 1 deletion(-)
rename arch/x86/mm/{pat_rbtree.c => pat_interval.c} (100%)

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 84373dc9b341..de403df8eadc 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -23,7 +23,7 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp)

CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace

-obj-$(CONFIG_X86_PAT) += pat_rbtree.o
+obj-$(CONFIG_X86_PAT) += pat_interval.o

obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_interval.c
similarity index 100%
rename from arch/x86/mm/pat_rbtree.c
rename to arch/x86/mm/pat_interval.c
--
2.16.4

2019-11-21 01:25:58

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH 1/4] x86/mm, pat: Convert pat tree to generic interval tree

With some considerations, the custom pat_rbtree implementation can be
simplified to use most of the generic interval_tree machinery.

o The tree inorder traversal can slightly differ when there are key
('start') collisions in the tree due to one going left and another right.
This, however, only affects the output of debugfs' pat_memtype_list file.

o Generic interval trees are now fully closed [a, b], for which we need
to adjust the last endpoint (ie: end - 1).

o Erasing logic must remain untouched as well.

In order for the types to remain u64, the 'memtype_interval' calls are
introduced, as opposed to simply using struct interval_tree.

In addition, pat tree might potentially also benefit by the fast overlap
detection for the insertion case when looking up the first overlapping node
in the tree.

Finally, I've tested this on various servers, via sanity warnings, running
side by side with the current version and so far see no differences in the
returned pointer node when doing memtype_rb_lowest_match() lookups.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Davidlohr Bueso <[email protected]>
---
arch/x86/mm/pat_rbtree.c | 162 ++++++++++++-----------------------------------
1 file changed, 42 insertions(+), 120 deletions(-)

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 65ebe4b88f7c..c3d119cd155d 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -5,14 +5,13 @@
* Authors: Venkatesh Pallipadi <[email protected]>
* Suresh B Siddha <[email protected]>
*
- * Interval tree (augmented rbtree) used to store the PAT memory type
- * reservations.
+ * Interval tree used to store the PAT memory type reservations.
*/

#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
-#include <linux/rbtree_augmented.h>
+#include <linux/interval_tree_generic.h>
#include <linux/sched.h>
#include <linux/gfp.h>

@@ -33,72 +32,33 @@
*
* memtype_lock protects the rbtree.
*/
-
-static struct rb_root memtype_rbroot = RB_ROOT;
-
-static int is_node_overlap(struct memtype *node, u64 start, u64 end)
+static inline u64 memtype_interval_start(struct memtype *memtype)
{
- if (node->start >= end || node->end <= start)
- return 0;
-
- return 1;
+ return memtype->start;
}

-static u64 get_subtree_max_end(struct rb_node *node)
+static inline u64 memtype_interval_end(struct memtype *memtype)
{
- u64 ret = 0;
- if (node) {
- struct memtype *data = rb_entry(node, struct memtype, rb);
- ret = data->subtree_max_end;
- }
- return ret;
+ return memtype->end - 1;
}
+INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
+ memtype_interval_start, memtype_interval_end,
+ static, memtype_interval)

-#define NODE_END(node) ((node)->end)
-
-RB_DECLARE_CALLBACKS_MAX(static, memtype_rb_augment_cb,
- struct memtype, rb, u64, subtree_max_end, NODE_END)
-
-/* Find the first (lowest start addr) overlapping range from rb tree */
-static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
- u64 start, u64 end)
-{
- struct rb_node *node = root->rb_node;
- struct memtype *last_lower = NULL;
-
- while (node) {
- struct memtype *data = rb_entry(node, struct memtype, rb);
-
- if (get_subtree_max_end(node->rb_left) > start) {
- /* Lowest overlap if any must be on left side */
- node = node->rb_left;
- } else if (is_node_overlap(data, start, end)) {
- last_lower = data;
- break;
- } else if (start >= data->start) {
- /* Lowest overlap if any must be on right side */
- node = node->rb_right;
- } else {
- break;
- }
- }
- return last_lower; /* Returns NULL if there is no overlap */
-}
+static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;

enum {
MEMTYPE_EXACT_MATCH = 0,
MEMTYPE_END_MATCH = 1
};

-static struct memtype *memtype_rb_match(struct rb_root *root,
- u64 start, u64 end, int match_type)
+static struct memtype *memtype_match(struct rb_root_cached *root,
+ u64 start, u64 end, int match_type)
{
struct memtype *match;

- match = memtype_rb_lowest_match(root, start, end);
+ match = memtype_interval_iter_first(root, start, end);
while (match != NULL && match->start < end) {
- struct rb_node *node;
-
if ((match_type == MEMTYPE_EXACT_MATCH) &&
(match->start == start) && (match->end == end))
return match;
@@ -107,26 +67,21 @@ static struct memtype *memtype_rb_match(struct rb_root *root,
(match->start < start) && (match->end == end))
return match;

- node = rb_next(&match->rb);
- if (node)
- match = rb_entry(node, struct memtype, rb);
- else
- match = NULL;
+ match = memtype_interval_iter_next(match, start, end);
}

return NULL; /* Returns NULL if there is no match */
}

-static int memtype_rb_check_conflict(struct rb_root *root,
+static int memtype_rb_check_conflict(struct rb_root_cached *root,
u64 start, u64 end,
enum page_cache_mode reqtype,
enum page_cache_mode *newtype)
{
- struct rb_node *node;
struct memtype *match;
enum page_cache_mode found_type = reqtype;

- match = memtype_rb_lowest_match(&memtype_rbroot, start, end);
+ match = memtype_interval_iter_first(&memtype_rbroot, start, end);
if (match == NULL)
goto success;

@@ -136,19 +91,12 @@ static int memtype_rb_check_conflict(struct rb_root *root,
dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
found_type = match->type;

- node = rb_next(&match->rb);
- while (node) {
- match = rb_entry(node, struct memtype, rb);
-
- if (match->start >= end) /* Checked all possible matches */
- goto success;
-
- if (is_node_overlap(match, start, end) &&
- match->type != found_type) {
+ match = memtype_interval_iter_next(match, start, end);
+ while (match) {
+ if (match->type != found_type)
goto failure;
- }

- node = rb_next(&match->rb);
+ match = memtype_interval_iter_next(match, start, end);
}
success:
if (newtype)
@@ -163,44 +111,21 @@ static int memtype_rb_check_conflict(struct rb_root *root,
return -EBUSY;
}

-static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
-{
- struct rb_node **node = &(root->rb_node);
- struct rb_node *parent = NULL;
-
- while (*node) {
- struct memtype *data = rb_entry(*node, struct memtype, rb);
-
- parent = *node;
- if (data->subtree_max_end < newdata->end)
- data->subtree_max_end = newdata->end;
- if (newdata->start <= data->start)
- node = &((*node)->rb_left);
- else if (newdata->start > data->start)
- node = &((*node)->rb_right);
- }
-
- newdata->subtree_max_end = newdata->end;
- rb_link_node(&newdata->rb, parent, node);
- rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb);
-}
-
int rbt_memtype_check_insert(struct memtype *new,
enum page_cache_mode *ret_type)
{
int err = 0;

err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end,
- new->type, ret_type);
+ new->type, ret_type);
+ if (err)
+ return err;

- if (!err) {
- if (ret_type)
- new->type = *ret_type;
+ if (ret_type)
+ new->type = *ret_type;

- new->subtree_max_end = new->end;
- memtype_rb_insert(&memtype_rbroot, new);
- }
- return err;
+ memtype_interval_insert(new, &memtype_rbroot);
+ return 0;
}

struct memtype *rbt_memtype_erase(u64 start, u64 end)
@@ -214,26 +139,23 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
* it then checks with END_MATCH, i.e. shrink the size of a node
* from the end for the mremap case.
*/
- data = memtype_rb_match(&memtype_rbroot, start, end,
- MEMTYPE_EXACT_MATCH);
+ data = memtype_match(&memtype_rbroot, start, end,
+ MEMTYPE_EXACT_MATCH);
if (!data) {
- data = memtype_rb_match(&memtype_rbroot, start, end,
- MEMTYPE_END_MATCH);
+ data = memtype_match(&memtype_rbroot, start, end,
+ MEMTYPE_END_MATCH);
if (!data)
return ERR_PTR(-EINVAL);
}

if (data->start == start) {
/* munmap: erase this node */
- rb_erase_augmented(&data->rb, &memtype_rbroot,
- &memtype_rb_augment_cb);
+ memtype_interval_remove(data, &memtype_rbroot);
} else {
/* mremap: update the end value of this node */
- rb_erase_augmented(&data->rb, &memtype_rbroot,
- &memtype_rb_augment_cb);
+ memtype_interval_remove(data, &memtype_rbroot);
data->end = start;
- data->subtree_max_end = data->end;
- memtype_rb_insert(&memtype_rbroot, data);
+ memtype_interval_insert(data, &memtype_rbroot);
return NULL;
}

@@ -242,24 +164,24 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)

struct memtype *rbt_memtype_lookup(u64 addr)
{
- return memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE);
+ return memtype_interval_iter_first(&memtype_rbroot, addr,
+ addr + PAGE_SIZE);
}

#if defined(CONFIG_DEBUG_FS)
int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
{
- struct rb_node *node;
+ struct memtype *match;
int i = 1;

- node = rb_first(&memtype_rbroot);
- while (node && pos != i) {
- node = rb_next(node);
+ match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
+ while (match && pos != i) {
+ match = memtype_interval_iter_next(match, 0, ULONG_MAX);
i++;
}

- if (node) { /* pos == i */
- struct memtype *this = rb_entry(node, struct memtype, rb);
- *out = *this;
+ if (match) { /* pos == i */
+ *out = *match;
return 0;
} else {
return 1;
--
2.16.4

2019-11-21 01:26:33

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH 2/4] x86/mm, pat: Cleanup some of the local memtype_rb_* calls

Cleanup by both getting rid of passing the rb_root down the helper
calls; there is only one. Secondly rename some of the calls from
memtype_rb_*.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Davidlohr Bueso <[email protected]>
---
arch/x86/mm/pat_rbtree.c | 21 ++++++++-------------
1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index c3d119cd155d..d31ca773d4bb 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -52,12 +52,11 @@ enum {
MEMTYPE_END_MATCH = 1
};

-static struct memtype *memtype_match(struct rb_root_cached *root,
- u64 start, u64 end, int match_type)
+static struct memtype *memtype_match(u64 start, u64 end, int match_type)
{
struct memtype *match;

- match = memtype_interval_iter_first(root, start, end);
+ match = memtype_interval_iter_first(&memtype_rbroot, start, end);
while (match != NULL && match->start < end) {
if ((match_type == MEMTYPE_EXACT_MATCH) &&
(match->start == start) && (match->end == end))
@@ -73,10 +72,9 @@ static struct memtype *memtype_match(struct rb_root_cached *root,
return NULL; /* Returns NULL if there is no match */
}

-static int memtype_rb_check_conflict(struct rb_root_cached *root,
- u64 start, u64 end,
- enum page_cache_mode reqtype,
- enum page_cache_mode *newtype)
+static int memtype_check_conflict(u64 start, u64 end,
+ enum page_cache_mode reqtype,
+ enum page_cache_mode *newtype)
{
struct memtype *match;
enum page_cache_mode found_type = reqtype;
@@ -116,8 +114,7 @@ int rbt_memtype_check_insert(struct memtype *new,
{
int err = 0;

- err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end,
- new->type, ret_type);
+ err = memtype_check_conflict(new->start, new->end, new->type, ret_type);
if (err)
return err;

@@ -139,11 +136,9 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
* it then checks with END_MATCH, i.e. shrink the size of a node
* from the end for the mremap case.
*/
- data = memtype_match(&memtype_rbroot, start, end,
- MEMTYPE_EXACT_MATCH);
+ data = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
if (!data) {
- data = memtype_match(&memtype_rbroot, start, end,
- MEMTYPE_END_MATCH);
+ data = memtype_match(start, end, MEMTYPE_END_MATCH);
if (!data)
return ERR_PTR(-EINVAL);
}
--
2.16.4

2019-11-21 05:59:22

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH -tip v3 0/4] x86,mm/pat: Move towards using generic interval tree


* Davidlohr Bueso <[email protected]> wrote:

> Changes from v2:
> - Removed unnecessary goto error path in patch 1, per tglx.
> - Added the corresponding Makefile change for patch 4, per mingo.
> - Added tglx's review tags.
>
> Changes from v1[0]:
> - Got rid of more code in patch 1 by using the end - 1 for closed
> intervals, instead of keeping the overlap-check.
>
> - added an additional cleanup patch.
>
> Hi,
>
> I'm sending this series again in this format as the interval tree
> node conversion will, at a minimum, take longer than hoped for
> (ie: Jason still removing interval tree users for the mmu_notifier
> rework[1]). There is also a chance this will never see be done.
>
> As such, I'm resending this series (where patch 1 is the only
> interesting one and which Ingo acked previously, with the exception
> that the nodes remain fully closed). In the future, it would be
> trivial to port pat tree to semi open nodes, but for now think that
> it makes sense to just get the pat changes in.
>
> Please consider for v5.5. Thanks!
>
> [0] https://lore.kernel.org/lkml/[email protected]/
> [1] https://marc.info/?l=linux-mm&m=157116340411211
>
> Davidlohr Bueso (4):
> x86/mm, pat: Convert pat tree to generic interval tree
> x86/mm, pat: Cleanup some of the local memtype_rb_* calls
> x86/mm, pat: Drop rbt prefix from external memtype calls
> x86/mm, pat: Rename pat_rbtree.c to pat_interval.c
>
> arch/x86/mm/Makefile | 2 +-
> arch/x86/mm/pat.c | 8 +-
> arch/x86/mm/pat_internal.h | 20 ++--
> arch/x86/mm/pat_interval.c | 185 +++++++++++++++++++++++++++++++
> arch/x86/mm/pat_rbtree.c | 268 ---------------------------------------------
> 5 files changed, 200 insertions(+), 283 deletions(-)
> create mode 100644 arch/x86/mm/pat_interval.c
> delete mode 100644 arch/x86/mm/pat_rbtree.c

Thanks Davidlohr - this is a really nice cleanup of the logic and of the
tree data structure, and I've applied your earlier series to
tip:WIP.x86/mm already, with a few more work-in-progress patches from me
on top that tidy up this area of the code.

In particular I've done a bunch of changes to improve the hackability of
all pat/memtype/set_memory facilities, we've now got <asm/memtype.h>,
memtype.[ch], memtype_interval.c and set_memory.c in arch/x86/mm/pat/:

dagon:~/tip> ls -l arch/x86/mm/pat/
total 112
-rw-r--r-- 1 mingo mingo 5782 Nov 21 06:41 cpa-test.c
-rw-r--r-- 1 mingo mingo 117 Nov 21 06:41 Makefile
-rw-r--r-- 1 mingo mingo 32026 Nov 21 06:41 memtype.c
-rw-r--r-- 1 mingo mingo 1470 Nov 21 06:41 memtype.h
-rw-r--r-- 1 mingo mingo 5003 Nov 21 06:41 memtype_interval.c
-rw-r--r-- 1 mingo mingo 56668 Nov 21 06:41 set_memory.c

( Note: cpa-test.c should probable be renamed to set_memory_test.c, with
a few explicit set_memory() API tests added as well, not just the
internal change_page_attribute() tests. Will do this later. )

The memtype APIs are (rightside column):

reserve_memtype() => memtype_reserve()
free_memtype() => memtype_free()
kernel_map_sync_memtype() => memtype_kernel_map_sync()
io_reserve_memtype() => memtype_reserve_io()
io_free_memtype() => memtype_free_io()

memtype_check_insert() => memtype_check_insert()
memtype_erase() => memtype_erase()
memtype_lookup() => memtype_lookup()
memtype_copy_nth_element() => memtype_copy_nth_element()

But there's a lot more changes:

218bf1a8c73b: x86/mm/pat: Convert the PAT tree to a generic interval tree
3309be371c20: x86/mm/pat: Clean up some of the local memtype_rb_*() calls
b40805c214c5: x86/mm/pat: Drop the rbt_ prefix from external memtype function names
ee4e7b04b718: x86/mm/pat: Rename pat_rbtree.c to pat_interval.c

8afed68b3426: x86/mm/pat: Update the comments in pat.c and pat_interval.c and refresh the code a bit
bc8a3eed1241: x86/mm/pat: Disambiguate PAT-disabled boot messages
10ffd914266a: x86/mm/pat: Create fixed width output in /sys/kernel/debug/x86/pat_memtype_list, similar to the E820 debug printouts
37dfd5d60000: x86/mm/pat: Simplify the free_memtype() control flow
b686cd38771d: x86/mm/pat: Harmonize 'struct memtype *' local variable and function parameter use
7fa6ebcdfb73: x86/mm/pat: Clean up PAT initialization flags
a224f826be60: x86/mm/pat: Move the memtype related files to arch/x86/mm/pat/
acb2f580640a: x86/mm/pat: Standardize on memtype_*() prefix for APIs
393cac16e6b7: x86/mm/pat: Rename <asm/pat.h> => <asm/memtype.h>
22a6d30c44c7: x86/mm/pat: Clean up <asm/memtype.h> externs
56ca0be07c28: x86/mm/pat: Fix typo in the Kconfig help text
07fbea7b3be2: x86/mm: Tabulate the page table encoding definitions

I'll send them out separately as well once completed, but wanted to give
you a heads-up. My patches are in WIP state because neither the
changelogs nor the split-up is necessarily final.

These can all be accessed and followed under:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git WIP.x86/mm

What do you think about these changes? Anything else you'd like to see
happen?

In terms of upstreaming plans, the 4 commits from you I grouped
separately definitely look like v5.5 material to me - will merge them
into tip:x86/mm later today.

Thanks,

Ingo

Subject: [tip: x86/mm] x86/mm/pat: Rename pat_rbtree.c to pat_interval.c

The following commit has been merged into the x86/mm branch of tip:

Commit-ID: 820cac65197c2a5ff8eb5af658909ce0210a358b
Gitweb: https://git.kernel.org/tip/820cac65197c2a5ff8eb5af658909ce0210a358b
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Wed, 20 Nov 2019 17:16:01 -08:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 21 Nov 2019 17:37:32 +01:00

x86/mm/pat: Rename pat_rbtree.c to pat_interval.c

Considering the previous changes, this is a more proper name.

Signed-off-by: Davidlohr Bueso <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Wanpeng Li <[email protected]>
Cc: Yauheni Kaliuta <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/mm/Makefile | 2 +-
arch/x86/mm/pat_interval.c | 185 ++++++++++++++++++++++++++++++++++++-
arch/x86/mm/pat_rbtree.c | 185 +------------------------------------
3 files changed, 186 insertions(+), 186 deletions(-)
create mode 100644 arch/x86/mm/pat_interval.c
delete mode 100644 arch/x86/mm/pat_rbtree.c

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 84373dc..de403df 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -23,7 +23,7 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp)

CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace

-obj-$(CONFIG_X86_PAT) += pat_rbtree.o
+obj-$(CONFIG_X86_PAT) += pat_interval.o

obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o

diff --git a/arch/x86/mm/pat_interval.c b/arch/x86/mm/pat_interval.c
new file mode 100644
index 0000000..47a1bf3
--- /dev/null
+++ b/arch/x86/mm/pat_interval.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Handle caching attributes in page tables (PAT)
+ *
+ * Authors: Venkatesh Pallipadi <[email protected]>
+ * Suresh B Siddha <[email protected]>
+ *
+ * Interval tree used to store the PAT memory type reservations.
+ */
+
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+
+#include <asm/pgtable.h>
+#include <asm/pat.h>
+
+#include "pat_internal.h"
+
+/*
+ * The memtype tree keeps track of memory type for specific
+ * physical memory areas. Without proper tracking, conflicting memory
+ * types in different mappings can cause CPU cache corruption.
+ *
+ * The tree is an interval tree (augmented rbtree) with tree ordered
+ * on starting address. Tree can contain multiple entries for
+ * different regions which overlap. All the aliases have the same
+ * cache attributes of course.
+ *
+ * memtype_lock protects the rbtree.
+ */
+static inline u64 memtype_interval_start(struct memtype *memtype)
+{
+ return memtype->start;
+}
+
+static inline u64 memtype_interval_end(struct memtype *memtype)
+{
+ return memtype->end - 1;
+}
+INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
+ memtype_interval_start, memtype_interval_end,
+ static, memtype_interval)
+
+static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
+
+enum {
+ MEMTYPE_EXACT_MATCH = 0,
+ MEMTYPE_END_MATCH = 1
+};
+
+static struct memtype *memtype_match(u64 start, u64 end, int match_type)
+{
+ struct memtype *match;
+
+ match = memtype_interval_iter_first(&memtype_rbroot, start, end);
+ while (match != NULL && match->start < end) {
+ if ((match_type == MEMTYPE_EXACT_MATCH) &&
+ (match->start == start) && (match->end == end))
+ return match;
+
+ if ((match_type == MEMTYPE_END_MATCH) &&
+ (match->start < start) && (match->end == end))
+ return match;
+
+ match = memtype_interval_iter_next(match, start, end);
+ }
+
+ return NULL; /* Returns NULL if there is no match */
+}
+
+static int memtype_check_conflict(u64 start, u64 end,
+ enum page_cache_mode reqtype,
+ enum page_cache_mode *newtype)
+{
+ struct memtype *match;
+ enum page_cache_mode found_type = reqtype;
+
+ match = memtype_interval_iter_first(&memtype_rbroot, start, end);
+ if (match == NULL)
+ goto success;
+
+ if (match->type != found_type && newtype == NULL)
+ goto failure;
+
+ dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
+ found_type = match->type;
+
+ match = memtype_interval_iter_next(match, start, end);
+ while (match) {
+ if (match->type != found_type)
+ goto failure;
+
+ match = memtype_interval_iter_next(match, start, end);
+ }
+success:
+ if (newtype)
+ *newtype = found_type;
+
+ return 0;
+
+failure:
+ pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
+ current->comm, current->pid, start, end,
+ cattr_name(found_type), cattr_name(match->type));
+ return -EBUSY;
+}
+
+int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *ret_type)
+{
+ int err = 0;
+
+ err = memtype_check_conflict(new->start, new->end, new->type, ret_type);
+ if (err)
+ return err;
+
+ if (ret_type)
+ new->type = *ret_type;
+
+ memtype_interval_insert(new, &memtype_rbroot);
+ return 0;
+}
+
+struct memtype *memtype_erase(u64 start, u64 end)
+{
+ struct memtype *data;
+
+ /*
+ * Since the memtype_rbroot tree allows overlapping ranges,
+ * memtype_erase() checks with EXACT_MATCH first, i.e. free
+ * a whole node for the munmap case. If no such entry is found,
+ * it then checks with END_MATCH, i.e. shrink the size of a node
+ * from the end for the mremap case.
+ */
+ data = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
+ if (!data) {
+ data = memtype_match(start, end, MEMTYPE_END_MATCH);
+ if (!data)
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (data->start == start) {
+ /* munmap: erase this node */
+ memtype_interval_remove(data, &memtype_rbroot);
+ } else {
+ /* mremap: update the end value of this node */
+ memtype_interval_remove(data, &memtype_rbroot);
+ data->end = start;
+ memtype_interval_insert(data, &memtype_rbroot);
+ return NULL;
+ }
+
+ return data;
+}
+
+struct memtype *memtype_lookup(u64 addr)
+{
+ return memtype_interval_iter_first(&memtype_rbroot, addr,
+ addr + PAGE_SIZE);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+int memtype_copy_nth_element(struct memtype *out, loff_t pos)
+{
+ struct memtype *match;
+ int i = 1;
+
+ match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
+ while (match && pos != i) {
+ match = memtype_interval_iter_next(match, 0, ULONG_MAX);
+ i++;
+ }
+
+ if (match) { /* pos == i */
+ *out = *match;
+ return 0;
+ } else {
+ return 1;
+ }
+}
+#endif
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
deleted file mode 100644
index 47a1bf3..0000000
--- a/arch/x86/mm/pat_rbtree.c
+++ /dev/null
@@ -1,185 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Handle caching attributes in page tables (PAT)
- *
- * Authors: Venkatesh Pallipadi <[email protected]>
- * Suresh B Siddha <[email protected]>
- *
- * Interval tree used to store the PAT memory type reservations.
- */
-
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/kernel.h>
-#include <linux/interval_tree_generic.h>
-#include <linux/sched.h>
-#include <linux/gfp.h>
-
-#include <asm/pgtable.h>
-#include <asm/pat.h>
-
-#include "pat_internal.h"
-
-/*
- * The memtype tree keeps track of memory type for specific
- * physical memory areas. Without proper tracking, conflicting memory
- * types in different mappings can cause CPU cache corruption.
- *
- * The tree is an interval tree (augmented rbtree) with tree ordered
- * on starting address. Tree can contain multiple entries for
- * different regions which overlap. All the aliases have the same
- * cache attributes of course.
- *
- * memtype_lock protects the rbtree.
- */
-static inline u64 memtype_interval_start(struct memtype *memtype)
-{
- return memtype->start;
-}
-
-static inline u64 memtype_interval_end(struct memtype *memtype)
-{
- return memtype->end - 1;
-}
-INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
- memtype_interval_start, memtype_interval_end,
- static, memtype_interval)
-
-static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
-
-enum {
- MEMTYPE_EXACT_MATCH = 0,
- MEMTYPE_END_MATCH = 1
-};
-
-static struct memtype *memtype_match(u64 start, u64 end, int match_type)
-{
- struct memtype *match;
-
- match = memtype_interval_iter_first(&memtype_rbroot, start, end);
- while (match != NULL && match->start < end) {
- if ((match_type == MEMTYPE_EXACT_MATCH) &&
- (match->start == start) && (match->end == end))
- return match;
-
- if ((match_type == MEMTYPE_END_MATCH) &&
- (match->start < start) && (match->end == end))
- return match;
-
- match = memtype_interval_iter_next(match, start, end);
- }
-
- return NULL; /* Returns NULL if there is no match */
-}
-
-static int memtype_check_conflict(u64 start, u64 end,
- enum page_cache_mode reqtype,
- enum page_cache_mode *newtype)
-{
- struct memtype *match;
- enum page_cache_mode found_type = reqtype;
-
- match = memtype_interval_iter_first(&memtype_rbroot, start, end);
- if (match == NULL)
- goto success;
-
- if (match->type != found_type && newtype == NULL)
- goto failure;
-
- dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
- found_type = match->type;
-
- match = memtype_interval_iter_next(match, start, end);
- while (match) {
- if (match->type != found_type)
- goto failure;
-
- match = memtype_interval_iter_next(match, start, end);
- }
-success:
- if (newtype)
- *newtype = found_type;
-
- return 0;
-
-failure:
- pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
- current->comm, current->pid, start, end,
- cattr_name(found_type), cattr_name(match->type));
- return -EBUSY;
-}
-
-int memtype_check_insert(struct memtype *new,
- enum page_cache_mode *ret_type)
-{
- int err = 0;
-
- err = memtype_check_conflict(new->start, new->end, new->type, ret_type);
- if (err)
- return err;
-
- if (ret_type)
- new->type = *ret_type;
-
- memtype_interval_insert(new, &memtype_rbroot);
- return 0;
-}
-
-struct memtype *memtype_erase(u64 start, u64 end)
-{
- struct memtype *data;
-
- /*
- * Since the memtype_rbroot tree allows overlapping ranges,
- * memtype_erase() checks with EXACT_MATCH first, i.e. free
- * a whole node for the munmap case. If no such entry is found,
- * it then checks with END_MATCH, i.e. shrink the size of a node
- * from the end for the mremap case.
- */
- data = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
- if (!data) {
- data = memtype_match(start, end, MEMTYPE_END_MATCH);
- if (!data)
- return ERR_PTR(-EINVAL);
- }
-
- if (data->start == start) {
- /* munmap: erase this node */
- memtype_interval_remove(data, &memtype_rbroot);
- } else {
- /* mremap: update the end value of this node */
- memtype_interval_remove(data, &memtype_rbroot);
- data->end = start;
- memtype_interval_insert(data, &memtype_rbroot);
- return NULL;
- }
-
- return data;
-}
-
-struct memtype *memtype_lookup(u64 addr)
-{
- return memtype_interval_iter_first(&memtype_rbroot, addr,
- addr + PAGE_SIZE);
-}
-
-#if defined(CONFIG_DEBUG_FS)
-int memtype_copy_nth_element(struct memtype *out, loff_t pos)
-{
- struct memtype *match;
- int i = 1;
-
- match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
- while (match && pos != i) {
- match = memtype_interval_iter_next(match, 0, ULONG_MAX);
- i++;
- }
-
- if (match) { /* pos == i */
- *out = *match;
- return 0;
- } else {
- return 1;
- }
-}
-#endif

Subject: [tip: x86/mm] x86/mm/pat: Do not pass 'rb_root' down the memtype tree helper functions

The following commit has been merged into the x86/mm branch of tip:

Commit-ID: a2cb4c9af3150189b0e31039333d6fa0c54776e8
Gitweb: https://git.kernel.org/tip/a2cb4c9af3150189b0e31039333d6fa0c54776e8
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Wed, 20 Nov 2019 17:15:59 -08:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 21 Nov 2019 17:37:31 +01:00

x86/mm/pat: Do not pass 'rb_root' down the memtype tree helper functions

Get rid of the passing the rb_root down the helper calls; there
is only one: &memtype_rbroot.

No change in functionality.

[ mingo: Fixed the changelog which described a different version of the patch. ]

Signed-off-by: Davidlohr Bueso <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Wanpeng Li <[email protected]>
Cc: Yauheni Kaliuta <[email protected]>
Cc: [email protected]
Cc: [email protected]
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/mm/pat_rbtree.c | 21 ++++++++-------------
1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index c3d119c..d31ca77 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -52,12 +52,11 @@ enum {
MEMTYPE_END_MATCH = 1
};

-static struct memtype *memtype_match(struct rb_root_cached *root,
- u64 start, u64 end, int match_type)
+static struct memtype *memtype_match(u64 start, u64 end, int match_type)
{
struct memtype *match;

- match = memtype_interval_iter_first(root, start, end);
+ match = memtype_interval_iter_first(&memtype_rbroot, start, end);
while (match != NULL && match->start < end) {
if ((match_type == MEMTYPE_EXACT_MATCH) &&
(match->start == start) && (match->end == end))
@@ -73,10 +72,9 @@ static struct memtype *memtype_match(struct rb_root_cached *root,
return NULL; /* Returns NULL if there is no match */
}

-static int memtype_rb_check_conflict(struct rb_root_cached *root,
- u64 start, u64 end,
- enum page_cache_mode reqtype,
- enum page_cache_mode *newtype)
+static int memtype_check_conflict(u64 start, u64 end,
+ enum page_cache_mode reqtype,
+ enum page_cache_mode *newtype)
{
struct memtype *match;
enum page_cache_mode found_type = reqtype;
@@ -116,8 +114,7 @@ int rbt_memtype_check_insert(struct memtype *new,
{
int err = 0;

- err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end,
- new->type, ret_type);
+ err = memtype_check_conflict(new->start, new->end, new->type, ret_type);
if (err)
return err;

@@ -139,11 +136,9 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
* it then checks with END_MATCH, i.e. shrink the size of a node
* from the end for the mremap case.
*/
- data = memtype_match(&memtype_rbroot, start, end,
- MEMTYPE_EXACT_MATCH);
+ data = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
if (!data) {
- data = memtype_match(&memtype_rbroot, start, end,
- MEMTYPE_END_MATCH);
+ data = memtype_match(start, end, MEMTYPE_END_MATCH);
if (!data)
return ERR_PTR(-EINVAL);
}

Subject: [tip: x86/mm] x86/mm/pat: Drop the rbt_ prefix from external memtype calls

The following commit has been merged into the x86/mm branch of tip:

Commit-ID: 010ca1041da3d0384fbcbaf4e4f92203c8ea9b7b
Gitweb: https://git.kernel.org/tip/010ca1041da3d0384fbcbaf4e4f92203c8ea9b7b
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Wed, 20 Nov 2019 17:16:00 -08:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 21 Nov 2019 17:37:31 +01:00

x86/mm/pat: Drop the rbt_ prefix from external memtype calls

Drop the rbt_memtype_*() call rbt_ prefix, as we no longer use
an rbtree directly.

Signed-off-by: Davidlohr Bueso <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Wanpeng Li <[email protected]>
Cc: Yauheni Kaliuta <[email protected]>
Cc: [email protected]
Cc: [email protected]
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/mm/pat.c | 8 ++++----
arch/x86/mm/pat_internal.h | 20 ++++++++++----------
arch/x86/mm/pat_rbtree.c | 12 ++++++------
3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index d9fbd4f..2d758e1 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -603,7 +603,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,

spin_lock(&memtype_lock);

- err = rbt_memtype_check_insert(new, new_type);
+ err = memtype_check_insert(new, new_type);
if (err) {
pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
start, end - 1,
@@ -650,7 +650,7 @@ int free_memtype(u64 start, u64 end)
}

spin_lock(&memtype_lock);
- entry = rbt_memtype_erase(start, end);
+ entry = memtype_erase(start, end);
spin_unlock(&memtype_lock);

if (IS_ERR(entry)) {
@@ -693,7 +693,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr)

spin_lock(&memtype_lock);

- entry = rbt_memtype_lookup(paddr);
+ entry = memtype_lookup(paddr);
if (entry != NULL)
rettype = entry->type;
else
@@ -1109,7 +1109,7 @@ static struct memtype *memtype_get_idx(loff_t pos)
return NULL;

spin_lock(&memtype_lock);
- ret = rbt_memtype_copy_nth_element(print_entry, pos);
+ ret = memtype_copy_nth_element(print_entry, pos);
spin_unlock(&memtype_lock);

if (!ret) {
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h
index eeb5cae..79a0668 100644
--- a/arch/x86/mm/pat_internal.h
+++ b/arch/x86/mm/pat_internal.h
@@ -29,20 +29,20 @@ static inline char *cattr_name(enum page_cache_mode pcm)
}

#ifdef CONFIG_X86_PAT
-extern int rbt_memtype_check_insert(struct memtype *new,
- enum page_cache_mode *new_type);
-extern struct memtype *rbt_memtype_erase(u64 start, u64 end);
-extern struct memtype *rbt_memtype_lookup(u64 addr);
-extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos);
+extern int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *new_type);
+extern struct memtype *memtype_erase(u64 start, u64 end);
+extern struct memtype *memtype_lookup(u64 addr);
+extern int memtype_copy_nth_element(struct memtype *out, loff_t pos);
#else
-static inline int rbt_memtype_check_insert(struct memtype *new,
- enum page_cache_mode *new_type)
+static inline int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *new_type)
{ return 0; }
-static inline struct memtype *rbt_memtype_erase(u64 start, u64 end)
+static inline struct memtype *memtype_erase(u64 start, u64 end)
{ return NULL; }
-static inline struct memtype *rbt_memtype_lookup(u64 addr)
+static inline struct memtype *memtype_lookup(u64 addr)
{ return NULL; }
-static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
+static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos)
{ return 0; }
#endif

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index d31ca77..47a1bf3 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -109,8 +109,8 @@ failure:
return -EBUSY;
}

-int rbt_memtype_check_insert(struct memtype *new,
- enum page_cache_mode *ret_type)
+int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *ret_type)
{
int err = 0;

@@ -125,13 +125,13 @@ int rbt_memtype_check_insert(struct memtype *new,
return 0;
}

-struct memtype *rbt_memtype_erase(u64 start, u64 end)
+struct memtype *memtype_erase(u64 start, u64 end)
{
struct memtype *data;

/*
* Since the memtype_rbroot tree allows overlapping ranges,
- * rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free
+ * memtype_erase() checks with EXACT_MATCH first, i.e. free
* a whole node for the munmap case. If no such entry is found,
* it then checks with END_MATCH, i.e. shrink the size of a node
* from the end for the mremap case.
@@ -157,14 +157,14 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
return data;
}

-struct memtype *rbt_memtype_lookup(u64 addr)
+struct memtype *memtype_lookup(u64 addr)
{
return memtype_interval_iter_first(&memtype_rbroot, addr,
addr + PAGE_SIZE);
}

#if defined(CONFIG_DEBUG_FS)
-int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
+int memtype_copy_nth_element(struct memtype *out, loff_t pos)
{
struct memtype *match;
int i = 1;

Subject: [tip: x86/mm] x86/mm/pat: Convert the PAT tree to a generic interval tree

The following commit has been merged into the x86/mm branch of tip:

Commit-ID: 2418ac70a9c1f083cb7919fd20c050e2a41c2d1e
Gitweb: https://git.kernel.org/tip/2418ac70a9c1f083cb7919fd20c050e2a41c2d1e
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Wed, 20 Nov 2019 17:15:58 -08:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 21 Nov 2019 17:37:30 +01:00

x86/mm/pat: Convert the PAT tree to a generic interval tree

With some considerations, the custom pat_rbtree implementation can be
simplified to use most of the generic interval_tree machinery:

- The tree inorder traversal can slightly differ when there are key
('start') collisions in the tree due to one going left and another right.
This, however, only affects the output of debugfs' pat_memtype_list file.

- Generic interval trees are now fully closed [a, b], for which we need
to adjust the last endpoint (ie: end - 1).

- Erasing logic must remain untouched as well.

- In order for the types to remain u64, the 'memtype_interval' calls are
introduced, as opposed to simply using struct interval_tree.

In addition, the PAT tree might potentially also benefit by the fast overlap
detection for the insertion case when looking up the first overlapping node
in the tree.

No change in behavior is intended.

Finally, I've tested this on various servers, via sanity warnings, running
side by side with the current version and so far see no differences in the
returned pointer node when doing memtype_rb_lowest_match() lookups.

Signed-off-by: Davidlohr Bueso <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Wanpeng Li <[email protected]>
Cc: Yauheni Kaliuta <[email protected]>
Cc: [email protected]
Cc: [email protected]
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/mm/pat_rbtree.c | 162 +++++++++-----------------------------
1 file changed, 42 insertions(+), 120 deletions(-)

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 65ebe4b..c3d119c 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -5,14 +5,13 @@
* Authors: Venkatesh Pallipadi <[email protected]>
* Suresh B Siddha <[email protected]>
*
- * Interval tree (augmented rbtree) used to store the PAT memory type
- * reservations.
+ * Interval tree used to store the PAT memory type reservations.
*/

#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
-#include <linux/rbtree_augmented.h>
+#include <linux/interval_tree_generic.h>
#include <linux/sched.h>
#include <linux/gfp.h>

@@ -33,72 +32,33 @@
*
* memtype_lock protects the rbtree.
*/
-
-static struct rb_root memtype_rbroot = RB_ROOT;
-
-static int is_node_overlap(struct memtype *node, u64 start, u64 end)
+static inline u64 memtype_interval_start(struct memtype *memtype)
{
- if (node->start >= end || node->end <= start)
- return 0;
-
- return 1;
+ return memtype->start;
}

-static u64 get_subtree_max_end(struct rb_node *node)
+static inline u64 memtype_interval_end(struct memtype *memtype)
{
- u64 ret = 0;
- if (node) {
- struct memtype *data = rb_entry(node, struct memtype, rb);
- ret = data->subtree_max_end;
- }
- return ret;
+ return memtype->end - 1;
}
+INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
+ memtype_interval_start, memtype_interval_end,
+ static, memtype_interval)

-#define NODE_END(node) ((node)->end)
-
-RB_DECLARE_CALLBACKS_MAX(static, memtype_rb_augment_cb,
- struct memtype, rb, u64, subtree_max_end, NODE_END)
-
-/* Find the first (lowest start addr) overlapping range from rb tree */
-static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
- u64 start, u64 end)
-{
- struct rb_node *node = root->rb_node;
- struct memtype *last_lower = NULL;
-
- while (node) {
- struct memtype *data = rb_entry(node, struct memtype, rb);
-
- if (get_subtree_max_end(node->rb_left) > start) {
- /* Lowest overlap if any must be on left side */
- node = node->rb_left;
- } else if (is_node_overlap(data, start, end)) {
- last_lower = data;
- break;
- } else if (start >= data->start) {
- /* Lowest overlap if any must be on right side */
- node = node->rb_right;
- } else {
- break;
- }
- }
- return last_lower; /* Returns NULL if there is no overlap */
-}
+static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;

enum {
MEMTYPE_EXACT_MATCH = 0,
MEMTYPE_END_MATCH = 1
};

-static struct memtype *memtype_rb_match(struct rb_root *root,
- u64 start, u64 end, int match_type)
+static struct memtype *memtype_match(struct rb_root_cached *root,
+ u64 start, u64 end, int match_type)
{
struct memtype *match;

- match = memtype_rb_lowest_match(root, start, end);
+ match = memtype_interval_iter_first(root, start, end);
while (match != NULL && match->start < end) {
- struct rb_node *node;
-
if ((match_type == MEMTYPE_EXACT_MATCH) &&
(match->start == start) && (match->end == end))
return match;
@@ -107,26 +67,21 @@ static struct memtype *memtype_rb_match(struct rb_root *root,
(match->start < start) && (match->end == end))
return match;

- node = rb_next(&match->rb);
- if (node)
- match = rb_entry(node, struct memtype, rb);
- else
- match = NULL;
+ match = memtype_interval_iter_next(match, start, end);
}

return NULL; /* Returns NULL if there is no match */
}

-static int memtype_rb_check_conflict(struct rb_root *root,
+static int memtype_rb_check_conflict(struct rb_root_cached *root,
u64 start, u64 end,
enum page_cache_mode reqtype,
enum page_cache_mode *newtype)
{
- struct rb_node *node;
struct memtype *match;
enum page_cache_mode found_type = reqtype;

- match = memtype_rb_lowest_match(&memtype_rbroot, start, end);
+ match = memtype_interval_iter_first(&memtype_rbroot, start, end);
if (match == NULL)
goto success;

@@ -136,19 +91,12 @@ static int memtype_rb_check_conflict(struct rb_root *root,
dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
found_type = match->type;

- node = rb_next(&match->rb);
- while (node) {
- match = rb_entry(node, struct memtype, rb);
-
- if (match->start >= end) /* Checked all possible matches */
- goto success;
-
- if (is_node_overlap(match, start, end) &&
- match->type != found_type) {
+ match = memtype_interval_iter_next(match, start, end);
+ while (match) {
+ if (match->type != found_type)
goto failure;
- }

- node = rb_next(&match->rb);
+ match = memtype_interval_iter_next(match, start, end);
}
success:
if (newtype)
@@ -163,44 +111,21 @@ failure:
return -EBUSY;
}

-static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
-{
- struct rb_node **node = &(root->rb_node);
- struct rb_node *parent = NULL;
-
- while (*node) {
- struct memtype *data = rb_entry(*node, struct memtype, rb);
-
- parent = *node;
- if (data->subtree_max_end < newdata->end)
- data->subtree_max_end = newdata->end;
- if (newdata->start <= data->start)
- node = &((*node)->rb_left);
- else if (newdata->start > data->start)
- node = &((*node)->rb_right);
- }
-
- newdata->subtree_max_end = newdata->end;
- rb_link_node(&newdata->rb, parent, node);
- rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb);
-}
-
int rbt_memtype_check_insert(struct memtype *new,
enum page_cache_mode *ret_type)
{
int err = 0;

err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end,
- new->type, ret_type);
+ new->type, ret_type);
+ if (err)
+ return err;

- if (!err) {
- if (ret_type)
- new->type = *ret_type;
+ if (ret_type)
+ new->type = *ret_type;

- new->subtree_max_end = new->end;
- memtype_rb_insert(&memtype_rbroot, new);
- }
- return err;
+ memtype_interval_insert(new, &memtype_rbroot);
+ return 0;
}

struct memtype *rbt_memtype_erase(u64 start, u64 end)
@@ -214,26 +139,23 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
* it then checks with END_MATCH, i.e. shrink the size of a node
* from the end for the mremap case.
*/
- data = memtype_rb_match(&memtype_rbroot, start, end,
- MEMTYPE_EXACT_MATCH);
+ data = memtype_match(&memtype_rbroot, start, end,
+ MEMTYPE_EXACT_MATCH);
if (!data) {
- data = memtype_rb_match(&memtype_rbroot, start, end,
- MEMTYPE_END_MATCH);
+ data = memtype_match(&memtype_rbroot, start, end,
+ MEMTYPE_END_MATCH);
if (!data)
return ERR_PTR(-EINVAL);
}

if (data->start == start) {
/* munmap: erase this node */
- rb_erase_augmented(&data->rb, &memtype_rbroot,
- &memtype_rb_augment_cb);
+ memtype_interval_remove(data, &memtype_rbroot);
} else {
/* mremap: update the end value of this node */
- rb_erase_augmented(&data->rb, &memtype_rbroot,
- &memtype_rb_augment_cb);
+ memtype_interval_remove(data, &memtype_rbroot);
data->end = start;
- data->subtree_max_end = data->end;
- memtype_rb_insert(&memtype_rbroot, data);
+ memtype_interval_insert(data, &memtype_rbroot);
return NULL;
}

@@ -242,24 +164,24 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)

struct memtype *rbt_memtype_lookup(u64 addr)
{
- return memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE);
+ return memtype_interval_iter_first(&memtype_rbroot, addr,
+ addr + PAGE_SIZE);
}

#if defined(CONFIG_DEBUG_FS)
int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
{
- struct rb_node *node;
+ struct memtype *match;
int i = 1;

- node = rb_first(&memtype_rbroot);
- while (node && pos != i) {
- node = rb_next(node);
+ match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
+ while (match && pos != i) {
+ match = memtype_interval_iter_next(match, 0, ULONG_MAX);
i++;
}

- if (node) { /* pos == i */
- struct memtype *this = rb_entry(node, struct memtype, rb);
- *out = *this;
+ if (match) { /* pos == i */
+ *out = *match;
return 0;
} else {
return 1;

2019-11-21 17:10:24

by Ingo Molnar

[permalink] [raw]
Subject: Re: [tip: x86/mm] x86/mm/pat: Rename pat_rbtree.c to pat_interval.c


* Linus Torvalds <[email protected]> wrote:

> On Wed, Nov 20, 2019, 22:03 tip-bot2 for Davidlohr Bueso <
> [email protected]> wrote:
>
> >
> > x86/mm/pat: Rename pat_rbtree.c to pat_interval.c
> >
> > Considering that we don't use an rbtree but an interval tree,
> > rename the main file accordingly.
> >
>
> Wouldn't it be even better to not make the same mistake all over again, and
> instead of naming the file by an implementation detail, it should be named
> by what it does?
>
> Maybe pat_memtype.c or just pat_manage.c or something?
>
> Or even just pat.c?

Yeah, so incidentally, just before you made this suggestion yesterday, I
rearranged the files quite a bit in tip:WIP.x86/mm, and the latest naming
scheme is:

dagon:~/tip> ls -l arch/x86/mm/pat/
total 112
-rw-r--r-- 1 mingo mingo 5782 Nov 21 06:41 cpa-test.c
-rw-r--r-- 1 mingo mingo 117 Nov 21 06:41 Makefile
-rw-r--r-- 1 mingo mingo 32026 Nov 21 06:41 memtype.c
-rw-r--r-- 1 mingo mingo 1470 Nov 21 06:41 memtype.h
-rw-r--r-- 1 mingo mingo 5003 Nov 21 06:41 memtype_interval.c
-rw-r--r-- 1 mingo mingo 56668 Nov 21 06:41 set_memory.c

I named most of the files based on the API families they are
implementing:

- memtype*.c for the <asm/memtype.h> APIs
- set_memory.c for the <asm/set_memory.h> APIs.

Is this close to what you had in mind?

( Note: cpa-test.c is a leftover that should probably be renamed to
set_memory_test.c, with a few explicit set_memory() API tests added as
well, not just the internal change_page_attribute() tests. )

I also started the process of tidying up the API namespace which is a bit
of a historical accident as well, and I'm done with most of the memtype
funtions, which are now:

reserve_memtype() => memtype_reserve()
free_memtype() => memtype_free()
kernel_map_sync_memtype() => memtype_kernel_map_sync()
io_reserve_memtype() => memtype_reserve_io()
io_free_memtype() => memtype_free_io()

memtype_check_insert() => memtype_check_insert()
memtype_erase() => memtype_erase()
memtype_lookup() => memtype_lookup()
memtype_copy_nth_element() => memtype_copy_nth_element()

This work is in WIP.x86/mm:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git WIP.x86/mm

f53ee099dfac: x86/mm: Tabulate the page table encoding definitions
2ab1a9a197f7: x86/mm/pat: Fix typo in the Kconfig help text
0d2a9498e4db: x86/mm/pat: Clean up <asm/memtype.h> externs
2e2ee215db87: x86/mm/pat: Rename <asm/pat.h> => <asm/memtype.h>
84285e92bb7a: x86/mm/pat: Standardize on memtype_*() prefix for APIs
b2c61e70ccca: x86/mm/pat: Move the memtype related files to arch/x86/mm/pat/
f54b639ad101: x86/mm/pat: Clean up PAT initialization flags
bca867e88012: x86/mm/pat: Harmonize 'struct memtype *' local variable and function parameter use
35459848e92f: x86/mm/pat: Simplify the free_memtype() control flow
a71fbb6061dc: x86/mm/pat: Create fixed width output in /sys/kernel/debug/x86/pat_memtype_list, similar to the E820 debug printouts
a252a95b6b91: x86/mm/pat: Disambiguate PAT-disabled boot messages
83d743db88c5: x86/mm/pat: Update the comments in pat.c and pat_interval.c and refresh the code a bit

820cac65197c: x86/mm/pat: Rename pat_rbtree.c to pat_interval.c
010ca1041da3: x86/mm/pat: Drop the rbt_ prefix from external memtype calls
a2cb4c9af315: x86/mm/pat: Do not pass 'rb_root' down the memtype tree helper functions
2418ac70a9c1: x86/mm/pat: Convert the PAT tree to a generic interval tree

But there's still quite some work left. I'll send out a series once I
think the end result is a coherent whole.

Davidlohr's four patches are intended for v5.5, the remaining patches
from me on top of his work will probably need a bit more testing.

Thanks,

Ingo

2019-11-21 17:12:09

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/4] x86/mm, pat: Cleanup some of the local memtype_rb_* calls


* Davidlohr Bueso <[email protected]> wrote:

> Cleanup by both getting rid of passing the rb_root down the helper
> calls; there is only one. Secondly rename some of the calls from
> memtype_rb_*.
>
> Reviewed-by: Thomas Gleixner <[email protected]>
> Signed-off-by: Davidlohr Bueso <[email protected]>

Note that the changelog doesn't match what the patch does - in reality
the renames are done in a separate patch.

I fixed up the changelog as you can see it from the tip-bot notification.

Thanks,

Ingo

Subject: [tip: x86/mm] x86/mm/pat: Do not pass 'rb_root' down the memtype tree helper functions

The following commit has been merged into the x86/mm branch of tip:

Commit-ID: 6a9930b1c50d83facfa0f78e4f2f9ba0364f43f3
Gitweb: https://git.kernel.org/tip/6a9930b1c50d83facfa0f78e4f2f9ba0364f43f3
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Wed, 20 Nov 2019 17:15:59 -08:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 21 Nov 2019 18:47:59 +01:00

x86/mm/pat: Do not pass 'rb_root' down the memtype tree helper functions

Get rid of the passing the rb_root down the helper calls; there
is only one: &memtype_rbroot.

No change in functionality.

[ mingo: Fixed the changelog which described a different version of the patch. ]

Signed-off-by: Davidlohr Bueso <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/mm/pat_rbtree.c | 21 ++++++++-------------
1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index c3d119c..d31ca77 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -52,12 +52,11 @@ enum {
MEMTYPE_END_MATCH = 1
};

-static struct memtype *memtype_match(struct rb_root_cached *root,
- u64 start, u64 end, int match_type)
+static struct memtype *memtype_match(u64 start, u64 end, int match_type)
{
struct memtype *match;

- match = memtype_interval_iter_first(root, start, end);
+ match = memtype_interval_iter_first(&memtype_rbroot, start, end);
while (match != NULL && match->start < end) {
if ((match_type == MEMTYPE_EXACT_MATCH) &&
(match->start == start) && (match->end == end))
@@ -73,10 +72,9 @@ static struct memtype *memtype_match(struct rb_root_cached *root,
return NULL; /* Returns NULL if there is no match */
}

-static int memtype_rb_check_conflict(struct rb_root_cached *root,
- u64 start, u64 end,
- enum page_cache_mode reqtype,
- enum page_cache_mode *newtype)
+static int memtype_check_conflict(u64 start, u64 end,
+ enum page_cache_mode reqtype,
+ enum page_cache_mode *newtype)
{
struct memtype *match;
enum page_cache_mode found_type = reqtype;
@@ -116,8 +114,7 @@ int rbt_memtype_check_insert(struct memtype *new,
{
int err = 0;

- err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end,
- new->type, ret_type);
+ err = memtype_check_conflict(new->start, new->end, new->type, ret_type);
if (err)
return err;

@@ -139,11 +136,9 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
* it then checks with END_MATCH, i.e. shrink the size of a node
* from the end for the mremap case.
*/
- data = memtype_match(&memtype_rbroot, start, end,
- MEMTYPE_EXACT_MATCH);
+ data = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
if (!data) {
- data = memtype_match(&memtype_rbroot, start, end,
- MEMTYPE_END_MATCH);
+ data = memtype_match(start, end, MEMTYPE_END_MATCH);
if (!data)
return ERR_PTR(-EINVAL);
}

Subject: [tip: x86/mm] x86/mm/pat: Drop the rbt_ prefix from external memtype calls

The following commit has been merged into the x86/mm branch of tip:

Commit-ID: 511aaca834fe2dc0b652406bda6283842fdc70ce
Gitweb: https://git.kernel.org/tip/511aaca834fe2dc0b652406bda6283842fdc70ce
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Wed, 20 Nov 2019 17:16:00 -08:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 21 Nov 2019 18:48:07 +01:00

x86/mm/pat: Drop the rbt_ prefix from external memtype calls

Drop the rbt_memtype_*() call rbt_ prefix, as we no longer use
an rbtree directly.

Signed-off-by: Davidlohr Bueso <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/mm/pat.c | 8 ++++----
arch/x86/mm/pat_internal.h | 20 ++++++++++----------
arch/x86/mm/pat_rbtree.c | 12 ++++++------
3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index d9fbd4f..2d758e1 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -603,7 +603,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,

spin_lock(&memtype_lock);

- err = rbt_memtype_check_insert(new, new_type);
+ err = memtype_check_insert(new, new_type);
if (err) {
pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
start, end - 1,
@@ -650,7 +650,7 @@ int free_memtype(u64 start, u64 end)
}

spin_lock(&memtype_lock);
- entry = rbt_memtype_erase(start, end);
+ entry = memtype_erase(start, end);
spin_unlock(&memtype_lock);

if (IS_ERR(entry)) {
@@ -693,7 +693,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr)

spin_lock(&memtype_lock);

- entry = rbt_memtype_lookup(paddr);
+ entry = memtype_lookup(paddr);
if (entry != NULL)
rettype = entry->type;
else
@@ -1109,7 +1109,7 @@ static struct memtype *memtype_get_idx(loff_t pos)
return NULL;

spin_lock(&memtype_lock);
- ret = rbt_memtype_copy_nth_element(print_entry, pos);
+ ret = memtype_copy_nth_element(print_entry, pos);
spin_unlock(&memtype_lock);

if (!ret) {
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h
index eeb5cae..79a0668 100644
--- a/arch/x86/mm/pat_internal.h
+++ b/arch/x86/mm/pat_internal.h
@@ -29,20 +29,20 @@ static inline char *cattr_name(enum page_cache_mode pcm)
}

#ifdef CONFIG_X86_PAT
-extern int rbt_memtype_check_insert(struct memtype *new,
- enum page_cache_mode *new_type);
-extern struct memtype *rbt_memtype_erase(u64 start, u64 end);
-extern struct memtype *rbt_memtype_lookup(u64 addr);
-extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos);
+extern int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *new_type);
+extern struct memtype *memtype_erase(u64 start, u64 end);
+extern struct memtype *memtype_lookup(u64 addr);
+extern int memtype_copy_nth_element(struct memtype *out, loff_t pos);
#else
-static inline int rbt_memtype_check_insert(struct memtype *new,
- enum page_cache_mode *new_type)
+static inline int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *new_type)
{ return 0; }
-static inline struct memtype *rbt_memtype_erase(u64 start, u64 end)
+static inline struct memtype *memtype_erase(u64 start, u64 end)
{ return NULL; }
-static inline struct memtype *rbt_memtype_lookup(u64 addr)
+static inline struct memtype *memtype_lookup(u64 addr)
{ return NULL; }
-static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
+static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos)
{ return 0; }
#endif

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index d31ca77..47a1bf3 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -109,8 +109,8 @@ failure:
return -EBUSY;
}

-int rbt_memtype_check_insert(struct memtype *new,
- enum page_cache_mode *ret_type)
+int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *ret_type)
{
int err = 0;

@@ -125,13 +125,13 @@ int rbt_memtype_check_insert(struct memtype *new,
return 0;
}

-struct memtype *rbt_memtype_erase(u64 start, u64 end)
+struct memtype *memtype_erase(u64 start, u64 end)
{
struct memtype *data;

/*
* Since the memtype_rbroot tree allows overlapping ranges,
- * rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free
+ * memtype_erase() checks with EXACT_MATCH first, i.e. free
* a whole node for the munmap case. If no such entry is found,
* it then checks with END_MATCH, i.e. shrink the size of a node
* from the end for the mremap case.
@@ -157,14 +157,14 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
return data;
}

-struct memtype *rbt_memtype_lookup(u64 addr)
+struct memtype *memtype_lookup(u64 addr)
{
return memtype_interval_iter_first(&memtype_rbroot, addr,
addr + PAGE_SIZE);
}

#if defined(CONFIG_DEBUG_FS)
-int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
+int memtype_copy_nth_element(struct memtype *out, loff_t pos)
{
struct memtype *match;
int i = 1;

Subject: [tip: x86/mm] x86/mm/pat: Convert the PAT tree to a generic interval tree

The following commit has been merged into the x86/mm branch of tip:

Commit-ID: 8d04a5f97a5fa9d7afdf46eda3a5ceaa973a1bcc
Gitweb: https://git.kernel.org/tip/8d04a5f97a5fa9d7afdf46eda3a5ceaa973a1bcc
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Wed, 20 Nov 2019 17:15:58 -08:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 21 Nov 2019 18:47:30 +01:00

x86/mm/pat: Convert the PAT tree to a generic interval tree

With some considerations, the custom pat_rbtree implementation can be
simplified to use most of the generic interval_tree machinery:

- The tree inorder traversal can slightly differ when there are key
('start') collisions in the tree due to one going left and another right.
This, however, only affects the output of debugfs' pat_memtype_list file.

- Generic interval trees are now fully closed [a, b], for which we need
to adjust the last endpoint (ie: end - 1).

- Erasing logic must remain untouched as well.

- In order for the types to remain u64, the 'memtype_interval' calls are
introduced, as opposed to simply using struct interval_tree.

In addition, the PAT tree might potentially also benefit by the fast overlap
detection for the insertion case when looking up the first overlapping node
in the tree.

No change in behavior is intended.

Finally, I've tested this on various servers, via sanity warnings, running
side by side with the current version and so far see no differences in the
returned pointer node when doing memtype_rb_lowest_match() lookups.

Signed-off-by: Davidlohr Bueso <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/mm/pat_rbtree.c | 162 +++++++++-----------------------------
1 file changed, 42 insertions(+), 120 deletions(-)

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 65ebe4b..c3d119c 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -5,14 +5,13 @@
* Authors: Venkatesh Pallipadi <[email protected]>
* Suresh B Siddha <[email protected]>
*
- * Interval tree (augmented rbtree) used to store the PAT memory type
- * reservations.
+ * Interval tree used to store the PAT memory type reservations.
*/

#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
-#include <linux/rbtree_augmented.h>
+#include <linux/interval_tree_generic.h>
#include <linux/sched.h>
#include <linux/gfp.h>

@@ -33,72 +32,33 @@
*
* memtype_lock protects the rbtree.
*/
-
-static struct rb_root memtype_rbroot = RB_ROOT;
-
-static int is_node_overlap(struct memtype *node, u64 start, u64 end)
+static inline u64 memtype_interval_start(struct memtype *memtype)
{
- if (node->start >= end || node->end <= start)
- return 0;
-
- return 1;
+ return memtype->start;
}

-static u64 get_subtree_max_end(struct rb_node *node)
+static inline u64 memtype_interval_end(struct memtype *memtype)
{
- u64 ret = 0;
- if (node) {
- struct memtype *data = rb_entry(node, struct memtype, rb);
- ret = data->subtree_max_end;
- }
- return ret;
+ return memtype->end - 1;
}
+INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
+ memtype_interval_start, memtype_interval_end,
+ static, memtype_interval)

-#define NODE_END(node) ((node)->end)
-
-RB_DECLARE_CALLBACKS_MAX(static, memtype_rb_augment_cb,
- struct memtype, rb, u64, subtree_max_end, NODE_END)
-
-/* Find the first (lowest start addr) overlapping range from rb tree */
-static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
- u64 start, u64 end)
-{
- struct rb_node *node = root->rb_node;
- struct memtype *last_lower = NULL;
-
- while (node) {
- struct memtype *data = rb_entry(node, struct memtype, rb);
-
- if (get_subtree_max_end(node->rb_left) > start) {
- /* Lowest overlap if any must be on left side */
- node = node->rb_left;
- } else if (is_node_overlap(data, start, end)) {
- last_lower = data;
- break;
- } else if (start >= data->start) {
- /* Lowest overlap if any must be on right side */
- node = node->rb_right;
- } else {
- break;
- }
- }
- return last_lower; /* Returns NULL if there is no overlap */
-}
+static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;

enum {
MEMTYPE_EXACT_MATCH = 0,
MEMTYPE_END_MATCH = 1
};

-static struct memtype *memtype_rb_match(struct rb_root *root,
- u64 start, u64 end, int match_type)
+static struct memtype *memtype_match(struct rb_root_cached *root,
+ u64 start, u64 end, int match_type)
{
struct memtype *match;

- match = memtype_rb_lowest_match(root, start, end);
+ match = memtype_interval_iter_first(root, start, end);
while (match != NULL && match->start < end) {
- struct rb_node *node;
-
if ((match_type == MEMTYPE_EXACT_MATCH) &&
(match->start == start) && (match->end == end))
return match;
@@ -107,26 +67,21 @@ static struct memtype *memtype_rb_match(struct rb_root *root,
(match->start < start) && (match->end == end))
return match;

- node = rb_next(&match->rb);
- if (node)
- match = rb_entry(node, struct memtype, rb);
- else
- match = NULL;
+ match = memtype_interval_iter_next(match, start, end);
}

return NULL; /* Returns NULL if there is no match */
}

-static int memtype_rb_check_conflict(struct rb_root *root,
+static int memtype_rb_check_conflict(struct rb_root_cached *root,
u64 start, u64 end,
enum page_cache_mode reqtype,
enum page_cache_mode *newtype)
{
- struct rb_node *node;
struct memtype *match;
enum page_cache_mode found_type = reqtype;

- match = memtype_rb_lowest_match(&memtype_rbroot, start, end);
+ match = memtype_interval_iter_first(&memtype_rbroot, start, end);
if (match == NULL)
goto success;

@@ -136,19 +91,12 @@ static int memtype_rb_check_conflict(struct rb_root *root,
dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
found_type = match->type;

- node = rb_next(&match->rb);
- while (node) {
- match = rb_entry(node, struct memtype, rb);
-
- if (match->start >= end) /* Checked all possible matches */
- goto success;
-
- if (is_node_overlap(match, start, end) &&
- match->type != found_type) {
+ match = memtype_interval_iter_next(match, start, end);
+ while (match) {
+ if (match->type != found_type)
goto failure;
- }

- node = rb_next(&match->rb);
+ match = memtype_interval_iter_next(match, start, end);
}
success:
if (newtype)
@@ -163,44 +111,21 @@ failure:
return -EBUSY;
}

-static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
-{
- struct rb_node **node = &(root->rb_node);
- struct rb_node *parent = NULL;
-
- while (*node) {
- struct memtype *data = rb_entry(*node, struct memtype, rb);
-
- parent = *node;
- if (data->subtree_max_end < newdata->end)
- data->subtree_max_end = newdata->end;
- if (newdata->start <= data->start)
- node = &((*node)->rb_left);
- else if (newdata->start > data->start)
- node = &((*node)->rb_right);
- }
-
- newdata->subtree_max_end = newdata->end;
- rb_link_node(&newdata->rb, parent, node);
- rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb);
-}
-
int rbt_memtype_check_insert(struct memtype *new,
enum page_cache_mode *ret_type)
{
int err = 0;

err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end,
- new->type, ret_type);
+ new->type, ret_type);
+ if (err)
+ return err;

- if (!err) {
- if (ret_type)
- new->type = *ret_type;
+ if (ret_type)
+ new->type = *ret_type;

- new->subtree_max_end = new->end;
- memtype_rb_insert(&memtype_rbroot, new);
- }
- return err;
+ memtype_interval_insert(new, &memtype_rbroot);
+ return 0;
}

struct memtype *rbt_memtype_erase(u64 start, u64 end)
@@ -214,26 +139,23 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
* it then checks with END_MATCH, i.e. shrink the size of a node
* from the end for the mremap case.
*/
- data = memtype_rb_match(&memtype_rbroot, start, end,
- MEMTYPE_EXACT_MATCH);
+ data = memtype_match(&memtype_rbroot, start, end,
+ MEMTYPE_EXACT_MATCH);
if (!data) {
- data = memtype_rb_match(&memtype_rbroot, start, end,
- MEMTYPE_END_MATCH);
+ data = memtype_match(&memtype_rbroot, start, end,
+ MEMTYPE_END_MATCH);
if (!data)
return ERR_PTR(-EINVAL);
}

if (data->start == start) {
/* munmap: erase this node */
- rb_erase_augmented(&data->rb, &memtype_rbroot,
- &memtype_rb_augment_cb);
+ memtype_interval_remove(data, &memtype_rbroot);
} else {
/* mremap: update the end value of this node */
- rb_erase_augmented(&data->rb, &memtype_rbroot,
- &memtype_rb_augment_cb);
+ memtype_interval_remove(data, &memtype_rbroot);
data->end = start;
- data->subtree_max_end = data->end;
- memtype_rb_insert(&memtype_rbroot, data);
+ memtype_interval_insert(data, &memtype_rbroot);
return NULL;
}

@@ -242,24 +164,24 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)

struct memtype *rbt_memtype_lookup(u64 addr)
{
- return memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE);
+ return memtype_interval_iter_first(&memtype_rbroot, addr,
+ addr + PAGE_SIZE);
}

#if defined(CONFIG_DEBUG_FS)
int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
{
- struct rb_node *node;
+ struct memtype *match;
int i = 1;

- node = rb_first(&memtype_rbroot);
- while (node && pos != i) {
- node = rb_next(node);
+ match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
+ while (match && pos != i) {
+ match = memtype_interval_iter_next(match, 0, ULONG_MAX);
i++;
}

- if (node) { /* pos == i */
- struct memtype *this = rb_entry(node, struct memtype, rb);
- *out = *this;
+ if (match) { /* pos == i */
+ *out = *match;
return 0;
} else {
return 1;

Subject: [tip: x86/mm] x86/mm/pat: Rename pat_rbtree.c to pat_interval.c

The following commit has been merged into the x86/mm branch of tip:

Commit-ID: 7f264dab5b60343358e788d4c939c166c22ea4a2
Gitweb: https://git.kernel.org/tip/7f264dab5b60343358e788d4c939c166c22ea4a2
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Wed, 20 Nov 2019 17:16:01 -08:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 21 Nov 2019 18:48:18 +01:00

x86/mm/pat: Rename pat_rbtree.c to pat_interval.c

Considering the previous changes, this is a more proper name.

Signed-off-by: Davidlohr Bueso <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/mm/Makefile | 2 +-
arch/x86/mm/pat_interval.c | 185 ++++++++++++++++++++++++++++++++++++-
arch/x86/mm/pat_rbtree.c | 185 +------------------------------------
3 files changed, 186 insertions(+), 186 deletions(-)
create mode 100644 arch/x86/mm/pat_interval.c
delete mode 100644 arch/x86/mm/pat_rbtree.c

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 84373dc..de403df 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -23,7 +23,7 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp)

CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace

-obj-$(CONFIG_X86_PAT) += pat_rbtree.o
+obj-$(CONFIG_X86_PAT) += pat_interval.o

obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o

diff --git a/arch/x86/mm/pat_interval.c b/arch/x86/mm/pat_interval.c
new file mode 100644
index 0000000..47a1bf3
--- /dev/null
+++ b/arch/x86/mm/pat_interval.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Handle caching attributes in page tables (PAT)
+ *
+ * Authors: Venkatesh Pallipadi <[email protected]>
+ * Suresh B Siddha <[email protected]>
+ *
+ * Interval tree used to store the PAT memory type reservations.
+ */
+
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+
+#include <asm/pgtable.h>
+#include <asm/pat.h>
+
+#include "pat_internal.h"
+
+/*
+ * The memtype tree keeps track of memory type for specific
+ * physical memory areas. Without proper tracking, conflicting memory
+ * types in different mappings can cause CPU cache corruption.
+ *
+ * The tree is an interval tree (augmented rbtree) with tree ordered
+ * on starting address. Tree can contain multiple entries for
+ * different regions which overlap. All the aliases have the same
+ * cache attributes of course.
+ *
+ * memtype_lock protects the rbtree.
+ */
+static inline u64 memtype_interval_start(struct memtype *memtype)
+{
+ return memtype->start;
+}
+
+static inline u64 memtype_interval_end(struct memtype *memtype)
+{
+ return memtype->end - 1;
+}
+INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
+ memtype_interval_start, memtype_interval_end,
+ static, memtype_interval)
+
+static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
+
+enum {
+ MEMTYPE_EXACT_MATCH = 0,
+ MEMTYPE_END_MATCH = 1
+};
+
+static struct memtype *memtype_match(u64 start, u64 end, int match_type)
+{
+ struct memtype *match;
+
+ match = memtype_interval_iter_first(&memtype_rbroot, start, end);
+ while (match != NULL && match->start < end) {
+ if ((match_type == MEMTYPE_EXACT_MATCH) &&
+ (match->start == start) && (match->end == end))
+ return match;
+
+ if ((match_type == MEMTYPE_END_MATCH) &&
+ (match->start < start) && (match->end == end))
+ return match;
+
+ match = memtype_interval_iter_next(match, start, end);
+ }
+
+ return NULL; /* Returns NULL if there is no match */
+}
+
+static int memtype_check_conflict(u64 start, u64 end,
+ enum page_cache_mode reqtype,
+ enum page_cache_mode *newtype)
+{
+ struct memtype *match;
+ enum page_cache_mode found_type = reqtype;
+
+ match = memtype_interval_iter_first(&memtype_rbroot, start, end);
+ if (match == NULL)
+ goto success;
+
+ if (match->type != found_type && newtype == NULL)
+ goto failure;
+
+ dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
+ found_type = match->type;
+
+ match = memtype_interval_iter_next(match, start, end);
+ while (match) {
+ if (match->type != found_type)
+ goto failure;
+
+ match = memtype_interval_iter_next(match, start, end);
+ }
+success:
+ if (newtype)
+ *newtype = found_type;
+
+ return 0;
+
+failure:
+ pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
+ current->comm, current->pid, start, end,
+ cattr_name(found_type), cattr_name(match->type));
+ return -EBUSY;
+}
+
+int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *ret_type)
+{
+ int err = 0;
+
+ err = memtype_check_conflict(new->start, new->end, new->type, ret_type);
+ if (err)
+ return err;
+
+ if (ret_type)
+ new->type = *ret_type;
+
+ memtype_interval_insert(new, &memtype_rbroot);
+ return 0;
+}
+
+struct memtype *memtype_erase(u64 start, u64 end)
+{
+ struct memtype *data;
+
+ /*
+ * Since the memtype_rbroot tree allows overlapping ranges,
+ * memtype_erase() checks with EXACT_MATCH first, i.e. free
+ * a whole node for the munmap case. If no such entry is found,
+ * it then checks with END_MATCH, i.e. shrink the size of a node
+ * from the end for the mremap case.
+ */
+ data = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
+ if (!data) {
+ data = memtype_match(start, end, MEMTYPE_END_MATCH);
+ if (!data)
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (data->start == start) {
+ /* munmap: erase this node */
+ memtype_interval_remove(data, &memtype_rbroot);
+ } else {
+ /* mremap: update the end value of this node */
+ memtype_interval_remove(data, &memtype_rbroot);
+ data->end = start;
+ memtype_interval_insert(data, &memtype_rbroot);
+ return NULL;
+ }
+
+ return data;
+}
+
+struct memtype *memtype_lookup(u64 addr)
+{
+ return memtype_interval_iter_first(&memtype_rbroot, addr,
+ addr + PAGE_SIZE);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+int memtype_copy_nth_element(struct memtype *out, loff_t pos)
+{
+ struct memtype *match;
+ int i = 1;
+
+ match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
+ while (match && pos != i) {
+ match = memtype_interval_iter_next(match, 0, ULONG_MAX);
+ i++;
+ }
+
+ if (match) { /* pos == i */
+ *out = *match;
+ return 0;
+ } else {
+ return 1;
+ }
+}
+#endif
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
deleted file mode 100644
index 47a1bf3..0000000
--- a/arch/x86/mm/pat_rbtree.c
+++ /dev/null
@@ -1,185 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Handle caching attributes in page tables (PAT)
- *
- * Authors: Venkatesh Pallipadi <[email protected]>
- * Suresh B Siddha <[email protected]>
- *
- * Interval tree used to store the PAT memory type reservations.
- */
-
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/kernel.h>
-#include <linux/interval_tree_generic.h>
-#include <linux/sched.h>
-#include <linux/gfp.h>
-
-#include <asm/pgtable.h>
-#include <asm/pat.h>
-
-#include "pat_internal.h"
-
-/*
- * The memtype tree keeps track of memory type for specific
- * physical memory areas. Without proper tracking, conflicting memory
- * types in different mappings can cause CPU cache corruption.
- *
- * The tree is an interval tree (augmented rbtree) with tree ordered
- * on starting address. Tree can contain multiple entries for
- * different regions which overlap. All the aliases have the same
- * cache attributes of course.
- *
- * memtype_lock protects the rbtree.
- */
-static inline u64 memtype_interval_start(struct memtype *memtype)
-{
- return memtype->start;
-}
-
-static inline u64 memtype_interval_end(struct memtype *memtype)
-{
- return memtype->end - 1;
-}
-INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
- memtype_interval_start, memtype_interval_end,
- static, memtype_interval)
-
-static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
-
-enum {
- MEMTYPE_EXACT_MATCH = 0,
- MEMTYPE_END_MATCH = 1
-};
-
-static struct memtype *memtype_match(u64 start, u64 end, int match_type)
-{
- struct memtype *match;
-
- match = memtype_interval_iter_first(&memtype_rbroot, start, end);
- while (match != NULL && match->start < end) {
- if ((match_type == MEMTYPE_EXACT_MATCH) &&
- (match->start == start) && (match->end == end))
- return match;
-
- if ((match_type == MEMTYPE_END_MATCH) &&
- (match->start < start) && (match->end == end))
- return match;
-
- match = memtype_interval_iter_next(match, start, end);
- }
-
- return NULL; /* Returns NULL if there is no match */
-}
-
-static int memtype_check_conflict(u64 start, u64 end,
- enum page_cache_mode reqtype,
- enum page_cache_mode *newtype)
-{
- struct memtype *match;
- enum page_cache_mode found_type = reqtype;
-
- match = memtype_interval_iter_first(&memtype_rbroot, start, end);
- if (match == NULL)
- goto success;
-
- if (match->type != found_type && newtype == NULL)
- goto failure;
-
- dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
- found_type = match->type;
-
- match = memtype_interval_iter_next(match, start, end);
- while (match) {
- if (match->type != found_type)
- goto failure;
-
- match = memtype_interval_iter_next(match, start, end);
- }
-success:
- if (newtype)
- *newtype = found_type;
-
- return 0;
-
-failure:
- pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
- current->comm, current->pid, start, end,
- cattr_name(found_type), cattr_name(match->type));
- return -EBUSY;
-}
-
-int memtype_check_insert(struct memtype *new,
- enum page_cache_mode *ret_type)
-{
- int err = 0;
-
- err = memtype_check_conflict(new->start, new->end, new->type, ret_type);
- if (err)
- return err;
-
- if (ret_type)
- new->type = *ret_type;
-
- memtype_interval_insert(new, &memtype_rbroot);
- return 0;
-}
-
-struct memtype *memtype_erase(u64 start, u64 end)
-{
- struct memtype *data;
-
- /*
- * Since the memtype_rbroot tree allows overlapping ranges,
- * memtype_erase() checks with EXACT_MATCH first, i.e. free
- * a whole node for the munmap case. If no such entry is found,
- * it then checks with END_MATCH, i.e. shrink the size of a node
- * from the end for the mremap case.
- */
- data = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
- if (!data) {
- data = memtype_match(start, end, MEMTYPE_END_MATCH);
- if (!data)
- return ERR_PTR(-EINVAL);
- }
-
- if (data->start == start) {
- /* munmap: erase this node */
- memtype_interval_remove(data, &memtype_rbroot);
- } else {
- /* mremap: update the end value of this node */
- memtype_interval_remove(data, &memtype_rbroot);
- data->end = start;
- memtype_interval_insert(data, &memtype_rbroot);
- return NULL;
- }
-
- return data;
-}
-
-struct memtype *memtype_lookup(u64 addr)
-{
- return memtype_interval_iter_first(&memtype_rbroot, addr,
- addr + PAGE_SIZE);
-}
-
-#if defined(CONFIG_DEBUG_FS)
-int memtype_copy_nth_element(struct memtype *out, loff_t pos)
-{
- struct memtype *match;
- int i = 1;
-
- match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
- while (match && pos != i) {
- match = memtype_interval_iter_next(match, 0, ULONG_MAX);
- i++;
- }
-
- if (match) { /* pos == i */
- *out = *match;
- return 0;
- } else {
- return 1;
- }
-}
-#endif