2018-01-07 10:33:40

by Jike Song

[permalink] [raw]
Subject: [PATCH v2] x86/mm/pti: remove dead logic during user pagetable population

Look at one of the code snippets:

162 if (pgd_none(*pgd)) {
163 unsigned long new_p4d_page = __get_free_page(gfp);
164 if (!new_p4d_page)
165 return NULL;
166
167 if (pgd_none(*pgd)) {
168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
169 new_p4d_page = 0;
170 }
171 if (new_p4d_page)
172 free_page(new_p4d_page);
173 }

There can't be any difference between two pgd_none(*pgd) at L162 and L167,
so it's always false at L171.

v2: add the commit message above.

Signed-off-by: Jike Song <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Alan Cox <[email protected]>
Cc: Jiri Koshina <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Tim Chen <[email protected]>
Cc: Andi Lutomirski <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Turner <[email protected]>
Cc: Tom Lendacky <[email protected]>
Cc: Greg KH <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: [email protected]
Signed-off-by: Jike Song <[email protected]>
---
arch/x86/mm/pti.c | 28 ++++------------------------
1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 43d4a4a29037..dc611d039bd5 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
if (!new_p4d_page)
return NULL;

- if (pgd_none(*pgd)) {
- set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
- new_p4d_page = 0;
- }
- if (new_p4d_page)
- free_page(new_p4d_page);
+ set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
}
BUILD_BUG_ON(pgd_large(*pgd) != 0);

@@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
if (!new_pud_page)
return NULL;

- if (p4d_none(*p4d)) {
- set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
- new_pud_page = 0;
- }
- if (new_pud_page)
- free_page(new_pud_page);
+ set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
}

pud = pud_offset(p4d, address);
@@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
if (!new_pmd_page)
return NULL;

- if (pud_none(*pud)) {
- set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
- new_pmd_page = 0;
- }
- if (new_pmd_page)
- free_page(new_pmd_page);
+ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
}

return pmd_offset(pud, address);
@@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
if (!new_pte_page)
return NULL;

- if (pmd_none(*pmd)) {
- set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
- new_pte_page = 0;
- }
- if (new_pte_page)
- free_page(new_pte_page);
+ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
}

pte = pte_offset_kernel(pmd, address);
--
2.14.3


2018-01-07 11:37:21

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v2] x86/mm/pti: remove dead logic during user pagetable population

On Sun, Jan 07, 2018 at 06:33:17PM +0800, Jike Song wrote:
> Look at one of the code snippets:
>
> 162 if (pgd_none(*pgd)) {
> 163 unsigned long new_p4d_page = __get_free_page(gfp);
> 164 if (!new_p4d_page)
> 165 return NULL;
> 166
> 167 if (pgd_none(*pgd)) {
> 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
> 169 new_p4d_page = 0;
> 170 }
> 171 if (new_p4d_page)
> 172 free_page(new_p4d_page);
> 173 }
>
> There can't be any difference between two pgd_none(*pgd) at L162 and L167,
> so it's always false at L171.

I think this is a remnant from the kaiser version which did this:

if (pud_none(*pud)) {
unsigned long new_pmd_page = __get_free_page(gfp);
if (!new_pmd_page)
return NULL;
spin_lock(&shadow_table_allocation_lock);
if (pud_none(*pud))
set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
else
free_page(new_pmd_page);
spin_unlock(&shadow_table_allocation_lock);
}

I was wondering too, why the duplicated checks.

Which has this explanation about the need for the locking:

/*
* At runtime, the only things we map are some things for CPU
* hotplug, and stacks for new processes. No two CPUs will ever
* be populating the same addresses, so we only need to ensure
* that we protect between two CPUs trying to allocate and
* populate the same page table page.
*
* Only take this lock when doing a set_p[4um]d(), but it is not
* needed for doing a set_pte(). We assume that only the *owner*
* of a given allocation will be doing this for _their_
* allocation.
*
* This ensures that once a system has been running for a while
* and there have been stacks all over and these page tables
* are fully populated, there will be no further acquisitions of
* this lock.
*/
static DEFINE_SPINLOCK(shadow_table_allocation_lock);

Now I have my suspicions why that's not needed anymore upstream but I'd
let tglx explain better.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2018-01-07 12:05:39

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH v2] x86/mm/pti: remove dead logic during user pagetable population

On Sun, 7 Jan 2018, Borislav Petkov wrote:
> On Sun, Jan 07, 2018 at 06:33:17PM +0800, Jike Song wrote:
> > Look at one of the code snippets:
> >
> > 162 if (pgd_none(*pgd)) {
> > 163 unsigned long new_p4d_page = __get_free_page(gfp);
> > 164 if (!new_p4d_page)
> > 165 return NULL;
> > 166
> > 167 if (pgd_none(*pgd)) {
> > 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
> > 169 new_p4d_page = 0;
> > 170 }
> > 171 if (new_p4d_page)
> > 172 free_page(new_p4d_page);
> > 173 }
> >
> > There can't be any difference between two pgd_none(*pgd) at L162 and L167,
> > so it's always false at L171.
>
> I think this is a remnant from the kaiser version which did this:
>
> if (pud_none(*pud)) {
> unsigned long new_pmd_page = __get_free_page(gfp);
> if (!new_pmd_page)
> return NULL;
> spin_lock(&shadow_table_allocation_lock);
> if (pud_none(*pud))
> set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
> else
> free_page(new_pmd_page);
> spin_unlock(&shadow_table_allocation_lock);
> }
>
> I was wondering too, why the duplicated checks.
>
> Which has this explanation about the need for the locking:
>
> /*
> * At runtime, the only things we map are some things for CPU
> * hotplug, and stacks for new processes. No two CPUs will ever
> * be populating the same addresses, so we only need to ensure
> * that we protect between two CPUs trying to allocate and
> * populate the same page table page.
> *
> * Only take this lock when doing a set_p[4um]d(), but it is not
> * needed for doing a set_pte(). We assume that only the *owner*
> * of a given allocation will be doing this for _their_
> * allocation.
> *
> * This ensures that once a system has been running for a while
> * and there have been stacks all over and these page tables
> * are fully populated, there will be no further acquisitions of
> * this lock.
> */
> static DEFINE_SPINLOCK(shadow_table_allocation_lock);
>
> Now I have my suspicions why that's not needed anymore upstream but I'd
> let tglx explain better.

We got rid of all that runtime mapping stuff and the functions are only
called from pti_init(). So the locking and therefor the tests above are not
needed anymore. While at it we should mark all those function __init.

Thanks,

tglx

2018-01-07 18:34:14

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH v2] x86/mm/pti: remove dead logic during user pagetable population

On 01/07/2018 04:05 AM, Thomas Gleixner wrote:
>> static DEFINE_SPINLOCK(shadow_table_allocation_lock);
>>
>> Now I have my suspicions why that's not needed anymore upstream but I'd
>> let tglx explain better.
> We got rid of all that runtime mapping stuff and the functions are only
> called from pti_init(). So the locking and therefor the tests above are not
> needed anymore. While at it we should mark all those function __init.

Yes, the double-test was part of an optimization where we attempted to
avoid using a global spinlock in the fork() path. We would check for
unallocated mid-level page tables without the lock. The lock was only
taken it when we needed to *make* an entry to avoid collisions.

Now that it is all single-threaded, there is no chance of a collision,
no need for a lock, and no need for the re-check.

^^ Just in case someone wants to include a bit more first-hand
information about wtf that code was doing there.

2018-01-08 16:04:16

by Jike Song

[permalink] [raw]
Subject: [PATCH v3] x86/mm/pti: remove dead logic during user pagetable population

Look at one of the code snippets:

162 if (pgd_none(*pgd)) {
163 unsigned long new_p4d_page = __get_free_page(gfp);
164 if (!new_p4d_page)
165 return NULL;
166
167 if (pgd_none(*pgd)) {
168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
169 new_p4d_page = 0;
170 }
171 if (new_p4d_page)
172 free_page(new_p4d_page);
173 }

There can't be any difference between two pgd_none(*pgd) at L162 and L167,
so it's always false at L171.

To quote Dave Hansen:

> Yes, the double-test was part of an optimization where we attempted to
> avoid using a global spinlock in the fork() path. We would check for
> unallocated mid-level page tables without the lock. The lock was only
> taken it when we needed to *make* an entry to avoid collisions.
>
> Now that it is all single-threaded, there is no chance of a collision,
> no need for a lock, and no need for the re-check.

v3: mark functions __init; add first-hand explanation
v2: add commit message.

Signed-off-by: Jike Song <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Alan Cox <[email protected]>
Cc: Jiri Koshina <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Tim Chen <[email protected]>
Cc: Andi Lutomirski <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Turner <[email protected]>
Cc: Tom Lendacky <[email protected]>
Cc: Greg KH <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: [email protected]
---
arch/x86/mm/pti.c | 32 ++++++--------------------------
1 file changed, 6 insertions(+), 26 deletions(-)

diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 43d4a4a29037..ce38f165489b 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
*
* Returns a pointer to a P4D on success, or NULL on failure.
*/
-static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
{
pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
@@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
if (!new_p4d_page)
return NULL;

- if (pgd_none(*pgd)) {
- set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
- new_p4d_page = 0;
- }
- if (new_p4d_page)
- free_page(new_p4d_page);
+ set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
}
BUILD_BUG_ON(pgd_large(*pgd) != 0);

@@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
*
* Returns a pointer to a PMD on success, or NULL on failure.
*/
-static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
{
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
@@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
if (!new_pud_page)
return NULL;

- if (p4d_none(*p4d)) {
- set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
- new_pud_page = 0;
- }
- if (new_pud_page)
- free_page(new_pud_page);
+ set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
}

pud = pud_offset(p4d, address);
@@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
if (!new_pmd_page)
return NULL;

- if (pud_none(*pud)) {
- set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
- new_pmd_page = 0;
- }
- if (new_pmd_page)
- free_page(new_pmd_page);
+ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
}

return pmd_offset(pud, address);
@@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
if (!new_pte_page)
return NULL;

- if (pmd_none(*pmd)) {
- set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
- new_pte_page = 0;
- }
- if (new_pte_page)
- free_page(new_pte_page);
+ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
}

pte = pte_offset_kernel(pmd, address);
--
2.14.3

Subject: [tip:x86/pti] x86/mm/pti: Remove dead logic in pti_user_pagetable_walk*()

Commit-ID: 8d56eff266f3e41a6c39926269c4c3f58f881a8e
Gitweb: https://git.kernel.org/tip/8d56eff266f3e41a6c39926269c4c3f58f881a8e
Author: Jike Song <[email protected]>
AuthorDate: Tue, 9 Jan 2018 00:03:41 +0800
Committer: Thomas Gleixner <[email protected]>
CommitDate: Mon, 8 Jan 2018 17:42:13 +0100

x86/mm/pti: Remove dead logic in pti_user_pagetable_walk*()

The following code contains dead logic:

162 if (pgd_none(*pgd)) {
163 unsigned long new_p4d_page = __get_free_page(gfp);
164 if (!new_p4d_page)
165 return NULL;
166
167 if (pgd_none(*pgd)) {
168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
169 new_p4d_page = 0;
170 }
171 if (new_p4d_page)
172 free_page(new_p4d_page);
173 }

There can't be any difference between two pgd_none(*pgd) at L162 and L167,
so it's always false at L171.

Dave Hansen explained:

Yes, the double-test was part of an optimization where we attempted to
avoid using a global spinlock in the fork() path. We would check for
unallocated mid-level page tables without the lock. The lock was only
taken when we needed to *make* an entry to avoid collisions.

Now that it is all single-threaded, there is no chance of a collision,
no need for a lock, and no need for the re-check.

As all these functions are only called during init, mark them __init as
well.

Fixes: 03f4424f348e ("x86/mm/pti: Add functions to clone kernel PMDs")
Signed-off-by: Jike Song <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: Alan Cox <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Tom Lendacky <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Tim Chen <[email protected]>
Cc: Jiri Koshina <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Andi Lutomirski <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Greg KH <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Paul Turner <[email protected]>
Cc: [email protected]
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/mm/pti.c | 32 ++++++--------------------------
1 file changed, 6 insertions(+), 26 deletions(-)

diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 43d4a4a..ce38f16 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
*
* Returns a pointer to a P4D on success, or NULL on failure.
*/
-static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
{
pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
@@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
if (!new_p4d_page)
return NULL;

- if (pgd_none(*pgd)) {
- set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
- new_p4d_page = 0;
- }
- if (new_p4d_page)
- free_page(new_p4d_page);
+ set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
}
BUILD_BUG_ON(pgd_large(*pgd) != 0);

@@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
*
* Returns a pointer to a PMD on success, or NULL on failure.
*/
-static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
{
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
@@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
if (!new_pud_page)
return NULL;

- if (p4d_none(*p4d)) {
- set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
- new_pud_page = 0;
- }
- if (new_pud_page)
- free_page(new_pud_page);
+ set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
}

pud = pud_offset(p4d, address);
@@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
if (!new_pmd_page)
return NULL;

- if (pud_none(*pud)) {
- set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
- new_pmd_page = 0;
- }
- if (new_pmd_page)
- free_page(new_pmd_page);
+ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
}

return pmd_offset(pud, address);
@@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
if (!new_pte_page)
return NULL;

- if (pmd_none(*pmd)) {
- set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
- new_pte_page = 0;
- }
- if (new_pte_page)
- free_page(new_pte_page);
+ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
}

pte = pte_offset_kernel(pmd, address);