2015-11-05 18:57:10

by Boris Ostrovsky

[permalink] [raw]
Subject: [PATCH] x86/mm: Skip the hypervisor range when walking PGD

The range between 0xffff800000000000 and 0xffff87ffffffffff is reserved
for hypervisor and therefore we should not try to follow PGD's indexes
corresponding to those addresses.

While this has alsways been a problem, with commit e1a58320a38d ("x86/mm:
Warn on W^X mappings") ptdump_walk_pgd_level_core() can now be called
during boot, causing a PV Xen guest to crash.

Reported-by: Sander Eikelenboom <[email protected]>
Signed-off-by: Boris Ostrovsky <[email protected]>
---
arch/x86/mm/dump_pagetables.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 1bf417e..756c921 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -362,8 +362,13 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
bool checkwx)
{
#ifdef CONFIG_X86_64
+/* ffff800000000000 - ffff87ffffffffff is reserved for hypervisor */
+#define is_hypervisor_range(idx) (paravirt_enabled() && \
+ (((idx) >= pgd_index(__PAGE_OFFSET) - 16) && \
+ ((idx) < pgd_index(__PAGE_OFFSET))))
pgd_t *start = (pgd_t *) &init_level4_pgt;
#else
+#define is_hypervisor_range(idx) 0
pgd_t *start = swapper_pg_dir;
#endif
pgprotval_t prot;
@@ -381,7 +386,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,

for (i = 0; i < PTRS_PER_PGD; i++) {
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
- if (!pgd_none(*start)) {
+ if (!pgd_none(*start) && !is_hypervisor_range(i)) {
if (pgd_large(*start) || !pgd_present(*start)) {
prot = pgd_flags(*start);
note_page(m, &st, __pgprot(prot), 1);
--
1.9.3


2015-11-05 22:32:19

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH] x86/mm: Skip the hypervisor range when walking PGD

On 11/05/15 10:56, Boris Ostrovsky wrote:
> The range between 0xffff800000000000 and 0xffff87ffffffffff is reserved
> for hypervisor and therefore we should not try to follow PGD's indexes
> corresponding to those addresses.
>
> While this has alsways been a problem, with commit e1a58320a38d ("x86/mm:
> Warn on W^X mappings") ptdump_walk_pgd_level_core() can now be called
> during boot, causing a PV Xen guest to crash.
>
> Reported-by: Sander Eikelenboom <[email protected]>
> Signed-off-by: Boris Ostrovsky <[email protected]>
> ---
> arch/x86/mm/dump_pagetables.c | 7 ++++++-
> 1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
> index 1bf417e..756c921 100644
> --- a/arch/x86/mm/dump_pagetables.c
> +++ b/arch/x86/mm/dump_pagetables.c
> @@ -362,8 +362,13 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
> bool checkwx)
> {
> #ifdef CONFIG_X86_64
> +/* ffff800000000000 - ffff87ffffffffff is reserved for hypervisor */
> +#define is_hypervisor_range(idx) (paravirt_enabled() && \
> + (((idx) >= pgd_index(__PAGE_OFFSET) - 16) && \
> + ((idx) < pgd_index(__PAGE_OFFSET))))
> pgd_t *start = (pgd_t *) &init_level4_pgt;
> #else
> +#define is_hypervisor_range(idx) 0
> pgd_t *start = swapper_pg_dir;
> #endif
> pgprotval_t prot;
> @@ -381,7 +386,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
>
> for (i = 0; i < PTRS_PER_PGD; i++) {
> st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
> - if (!pgd_none(*start)) {
> + if (!pgd_none(*start) && !is_hypervisor_range(i)) {
> if (pgd_large(*start) || !pgd_present(*start)) {
> prot = pgd_flags(*start);
> note_page(m, &st, __pgprot(prot), 1);
>

Maybe we could use the max_lines field in the address_markers[] array?
We really shouldn't be mapping anything in the hypervisor space even on
native.

-hpa

2015-11-06 03:39:17

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH] x86/mm: Skip the hypervisor range when walking PGD



On 11/05/2015 05:31 PM, H. Peter Anvin wrote:
> On 11/05/15 10:56, Boris Ostrovsky wrote:
>> The range between 0xffff800000000000 and 0xffff87ffffffffff is reserved
>> for hypervisor and therefore we should not try to follow PGD's indexes
>> corresponding to those addresses.
>>
>> While this has alsways been a problem, with commit e1a58320a38d ("x86/mm:
>> Warn on W^X mappings") ptdump_walk_pgd_level_core() can now be called
>> during boot, causing a PV Xen guest to crash.
>>
>> Reported-by: Sander Eikelenboom <[email protected]>
>> Signed-off-by: Boris Ostrovsky <[email protected]>
>> ---
>> arch/x86/mm/dump_pagetables.c | 7 ++++++-
>> 1 file changed, 6 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
>> index 1bf417e..756c921 100644
>> --- a/arch/x86/mm/dump_pagetables.c
>> +++ b/arch/x86/mm/dump_pagetables.c
>> @@ -362,8 +362,13 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
>> bool checkwx)
>> {
>> #ifdef CONFIG_X86_64
>> +/* ffff800000000000 - ffff87ffffffffff is reserved for hypervisor */
>> +#define is_hypervisor_range(idx) (paravirt_enabled() && \
>> + (((idx) >= pgd_index(__PAGE_OFFSET) - 16) && \
>> + ((idx) < pgd_index(__PAGE_OFFSET))))
>> pgd_t *start = (pgd_t *) &init_level4_pgt;
>> #else
>> +#define is_hypervisor_range(idx) 0
>> pgd_t *start = swapper_pg_dir;
>> #endif
>> pgprotval_t prot;
>> @@ -381,7 +386,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
>>
>> for (i = 0; i < PTRS_PER_PGD; i++) {
>> st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
>> - if (!pgd_none(*start)) {
>> + if (!pgd_none(*start) && !is_hypervisor_range(i)) {
>> if (pgd_large(*start) || !pgd_present(*start)) {
>> prot = pgd_flags(*start);
>> note_page(m, &st, __pgprot(prot), 1);
>>
> Maybe we could use the max_lines field in the address_markers[] array?
> We really shouldn't be mapping anything in the hypervisor space even on
> native.

You mean overload max_lines with a value indicating that the range needs
to be skipped?

That would require checking the range on each loop iteration since we
update st.marker *after* we've walked a particular index. (And I think
it would need to be done on each level to be generic).

I could just drop paravirt_enabled() in is_hypervisor_range() but you
are thinking about avoiding the macro altogether, right?

(I do need to add hypervisor range to address_markers[])

-boris

Subject: [tip:x86/urgent] x86/mm: Skip the hypervisor range when walking PGD

Commit-ID: f4e342c87776884f0309942a3880ca7e835239f9
Gitweb: http://git.kernel.org/tip/f4e342c87776884f0309942a3880ca7e835239f9
Author: Boris Ostrovsky <[email protected]>
AuthorDate: Thu, 5 Nov 2015 13:56:35 -0500
Committer: Thomas Gleixner <[email protected]>
CommitDate: Sat, 7 Nov 2015 10:39:39 +0100

x86/mm: Skip the hypervisor range when walking PGD

The range between 0xffff800000000000 and 0xffff87ffffffffff is reserved
for hypervisor and therefore we should not try to follow PGD's indexes
corresponding to those addresses.

While this has always been a problem, with the new W+X warning
mechanism ptdump_walk_pgd_level_core() can now be called during boot,
causing a PV Xen guest to crash.

[ tglx: Replaced the macro with a readable inline ]

Fixes: e1a58320a38d "x86/mm: Warn on W^X mappings"
Reported-by: Sander Eikelenboom <[email protected]>
Signed-off-by: Boris Ostrovsky <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/mm/dump_pagetables.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 1bf417e..a035c2a 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -358,6 +358,21 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
#define pgd_none(a) pud_none(__pud(pgd_val(a)))
#endif

+#ifdef CONFIG_X86_64
+static inline bool is_hypervisor_range(int idx)
+{
+ /*
+ * ffff800000000000 - ffff87ffffffffff is reserved for
+ * the hypervisor.
+ */
+ return paravirt_enabled() &&
+ (idx >= pgd_index(__PAGE_OFFSET) - 16) &&
+ (idx < pgd_index(__PAGE_OFFSET));
+}
+#else
+static inline bool is_hypervisor_range(int idx) { return false; }
+#endif
+
static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
bool checkwx)
{
@@ -381,7 +396,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,

for (i = 0; i < PTRS_PER_PGD; i++) {
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
- if (!pgd_none(*start)) {
+ if (!pgd_none(*start) && !is_hypervisor_range(i)) {
if (pgd_large(*start) || !pgd_present(*start)) {
prot = pgd_flags(*start);
note_page(m, &st, __pgprot(prot), 1);