2024-04-12 15:13:12

by Jürgen Groß

[permalink] [raw]
Subject: [PATCH v2 0/4] x86: correctly handle NX and RW bit testing

When the processor is detecting a set NX bit on any page table level
when doing a page table walk, the resulting page will not be suitable
for code execution.

A similar approach is taken for the RW bit: all page table levels need
to have the RW bit set in order to result in a writable page.

Unfortunately the kernel is only looking at the leaf page table entry
for deciding whether e.g. a writable page is executable or not.

Fix that by calculating the effective NX and RW bits over all page
table levels when doing a software address lookup, mimicking the
hardware behavior.

Changes in V2:
- split the patch into multiple patches

Juergen Gross (4):
x86/pat: introduce lookup_address_in_pgd_attr()
x86/mm: use lookup_address_in_pgd_attr() in show_fault_oops()
x86/pat: restructure _lookup_address_cpa()
x86/pat: fix W^X violation false-positives when running as Xen PV
guest

arch/x86/include/asm/pgtable_types.h | 2 +
arch/x86/mm/fault.c | 7 +--
arch/x86/mm/pat/set_memory.c | 68 ++++++++++++++++++++++------
3 files changed, 60 insertions(+), 17 deletions(-)

--
2.35.3



2024-04-12 15:13:25

by Jürgen Groß

[permalink] [raw]
Subject: [PATCH v2 1/4] x86/pat: introduce lookup_address_in_pgd_attr()

Add lookup_address_in_pgd_attr() doing the same as the already
existing lookup_address_in_pgd(), but returning the effective settings
of the NX and RW bits of all walked page table levels, too.

This will be needed in order to match hardware behavior when looking
for effective access rights, especially for detecting writable code
pages.

In order to avoid code duplication, let lookup_address_in_pgd() call
lookup_address_in_pgd_attr() with dummy parameters.

Signed-off-by: Juergen Gross <[email protected]>
---
V2:
- split off from V1 patch (Ingo Molnar)
- introduced new function (Ingo Molnar)
---
arch/x86/include/asm/pgtable_types.h | 2 ++
arch/x86/mm/pat/set_memory.c | 33 +++++++++++++++++++++++++---
2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0b748ee16b3d..dd05caeeeeaf 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -566,6 +566,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level);
+pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
+ unsigned int *level, bool *nx, bool *rw);
extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 80c9037ffadf..bfa0aae45d48 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -657,20 +657,26 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star

/*
* Lookup the page table entry for a virtual address in a specific pgd.
- * Return a pointer to the entry and the level of the mapping.
+ * Return a pointer to the entry, the level of the mapping, and the effective
+ * NX and RW bits of all page table levels.
*/
-pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
- unsigned int *level)
+pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
+ unsigned int *level, bool *nx, bool *rw)
{
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;

*level = PG_LEVEL_NONE;
+ *nx = false;
+ *rw = true;

if (pgd_none(*pgd))
return NULL;

+ *nx |= pgd_flags(*pgd) & _PAGE_NX;
+ *rw &= pgd_flags(*pgd) & _PAGE_RW;
+
p4d = p4d_offset(pgd, address);
if (p4d_none(*p4d))
return NULL;
@@ -679,6 +685,9 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
if (p4d_leaf(*p4d) || !p4d_present(*p4d))
return (pte_t *)p4d;

+ *nx |= p4d_flags(*p4d) & _PAGE_NX;
+ *rw &= p4d_flags(*p4d) & _PAGE_RW;
+
pud = pud_offset(p4d, address);
if (pud_none(*pud))
return NULL;
@@ -687,6 +696,9 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
if (pud_leaf(*pud) || !pud_present(*pud))
return (pte_t *)pud;

+ *nx |= pud_flags(*pud) & _PAGE_NX;
+ *rw &= pud_flags(*pud) & _PAGE_RW;
+
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
return NULL;
@@ -695,11 +707,26 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
if (pmd_leaf(*pmd) || !pmd_present(*pmd))
return (pte_t *)pmd;

+ *nx |= pmd_flags(*pmd) & _PAGE_NX;
+ *rw &= pmd_flags(*pmd) & _PAGE_RW;
+
*level = PG_LEVEL_4K;

return pte_offset_kernel(pmd, address);
}

+/*
+ * Lookup the page table entry for a virtual address in a specific pgd.
+ * Return a pointer to the entry and the level of the mapping.
+ */
+pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
+ unsigned int *level)
+{
+ bool nx, rw;
+
+ return lookup_address_in_pgd_attr(pgd, address, level, &nx, &rw);
+}
+
/*
* Lookup the page table entry for a virtual address. Return a pointer
* to the entry and the level of the mapping.
--
2.35.3


2024-04-12 15:13:39

by Jürgen Groß

[permalink] [raw]
Subject: [PATCH v2 2/4] x86/mm: use lookup_address_in_pgd_attr() in show_fault_oops()

Fix show_fault_oops() to not only look at the leaf PTE for detecting
NX violations, but to use the effective NX value returned by
lookup_address_in_pgd_attr() instead.

Signed-off-by: Juergen Gross <[email protected]>
---
V2:
- split off from V1 patch (Ingo Molnar)
---
arch/x86/mm/fault.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 622d12ec7f08..6b2ca8ba75b8 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -514,18 +514,19 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad

if (error_code & X86_PF_INSTR) {
unsigned int level;
+ bool nx, rw;
pgd_t *pgd;
pte_t *pte;

pgd = __va(read_cr3_pa());
pgd += pgd_index(address);

- pte = lookup_address_in_pgd(pgd, address, &level);
+ pte = lookup_address_in_pgd_attr(pgd, address, &level, &nx, &rw);

- if (pte && pte_present(*pte) && !pte_exec(*pte))
+ if (pte && pte_present(*pte) && (!pte_exec(*pte) || nx))
pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n",
from_kuid(&init_user_ns, current_uid()));
- if (pte && pte_present(*pte) && pte_exec(*pte) &&
+ if (pte && pte_present(*pte) && pte_exec(*pte) && !nx &&
(pgd_flags(*pgd) & _PAGE_USER) &&
(__read_cr4() & X86_CR4_SMEP))
pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n",
--
2.35.3


2024-04-12 15:17:08

by Jürgen Groß

[permalink] [raw]
Subject: [PATCH v2 3/4] x86/pat: restructure _lookup_address_cpa()

Modify _lookup_address_cpa() to no longer use lookup_address(), but
only lookup_address_in_pgd().

This is done in preparation of using lookup_address_in_pgd_attr().

No functional change intended.

Signed-off-by: Juergen Gross <[email protected]>
---
V2:
- split off from V1 patch (Ingo Molnar)
---
arch/x86/mm/pat/set_memory.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index bfa0aae45d48..4ebccaf29bf2 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -744,11 +744,14 @@ EXPORT_SYMBOL_GPL(lookup_address);
static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
unsigned int *level)
{
- if (cpa->pgd)
- return lookup_address_in_pgd(cpa->pgd + pgd_index(address),
- address, level);
+ pgd_t *pgd;
+
+ if (!cpa->pgd)
+ pgd = pgd_offset_k(address);
+ else
+ pgd = cpa->pgd + pgd_index(address);

- return lookup_address(address, level);
+ return lookup_address_in_pgd(pgd, address, level);
}

/*
--
2.35.3


2024-04-12 15:17:48

by Jürgen Groß

[permalink] [raw]
Subject: [PATCH v2 4/4] x86/pat: fix W^X violation false-positives when running as Xen PV guest

When running as Xen PV guest in some cases W^X violation WARN()s have
been observed. Those WARN()s are produced by verify_rwx(), which looks
into the PTE to verify that writable kernel pages have the NX bit set
in order to avoid code modifications of the kernel by rogue code.

As the NX bits of all levels of translation entries are or-ed and the
RW bits of all levels are and-ed, looking just into the PTE isn't enough
for the decision that a writable page is executable, too.

When running as a Xen PV guest, the direct map PMDs and kernel high
map PMDs share the same set of PTEs. Xen kernel initialization will set
the NX bit in the direct map PMD entries, and not the shared PTEs.

Fixes: 652c5bf380ad ("x86/mm: Refuse W^X violations")
Reported-by: Jason Andryuk <[email protected]>
Signed-off-by: Juergen Gross <[email protected]>
---
V2:
- patch split (Ingo Molnar)
- commit message reworded (Jason Andryuk)
---
arch/x86/mm/pat/set_memory.c | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 4ebccaf29bf2..19fdfbb171ed 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -619,7 +619,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
* Validate strict W^X semantics.
*/
static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start,
- unsigned long pfn, unsigned long npg)
+ unsigned long pfn, unsigned long npg,
+ bool nx, bool rw)
{
unsigned long end;

@@ -641,6 +642,10 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star
if ((pgprot_val(new) & (_PAGE_RW | _PAGE_NX)) != _PAGE_RW)
return new;

+ /* Non-leaf translation entries can disable writing or execution. */
+ if (!rw || nx)
+ return new;
+
end = start + npg * PAGE_SIZE - 1;
WARN_ONCE(1, "CPA detected W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n",
(unsigned long long)pgprot_val(old),
@@ -742,7 +747,7 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
EXPORT_SYMBOL_GPL(lookup_address);

static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
- unsigned int *level)
+ unsigned int *level, bool *nx, bool *rw)
{
pgd_t *pgd;

@@ -751,7 +756,7 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
else
pgd = cpa->pgd + pgd_index(address);

- return lookup_address_in_pgd(pgd, address, level);
+ return lookup_address_in_pgd_attr(pgd, address, level, nx, rw);
}

/*
@@ -879,12 +884,13 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
pgprot_t old_prot, new_prot, req_prot, chk_prot;
pte_t new_pte, *tmp;
enum pg_level level;
+ bool nx, rw;

/*
* Check for races, another CPU might have split this page
* up already:
*/
- tmp = _lookup_address_cpa(cpa, address, &level);
+ tmp = _lookup_address_cpa(cpa, address, &level, &nx, &rw);
if (tmp != kpte)
return 1;

@@ -995,7 +1001,8 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
psize, CPA_DETECT);

- new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages);
+ new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages,
+ nx, rw);

/*
* If there is a conflict, split the large page.
@@ -1076,6 +1083,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
pte_t *pbase = (pte_t *)page_address(base);
unsigned int i, level;
pgprot_t ref_prot;
+ bool nx, rw;
pte_t *tmp;

spin_lock(&pgd_lock);
@@ -1083,7 +1091,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
* Check for races, another CPU might have split this page
* up for us already:
*/
- tmp = _lookup_address_cpa(cpa, address, &level);
+ tmp = _lookup_address_cpa(cpa, address, &level, &nx, &rw);
if (tmp != kpte) {
spin_unlock(&pgd_lock);
return 1;
@@ -1624,10 +1632,11 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
int do_split, err;
unsigned int level;
pte_t *kpte, old_pte;
+ bool nx, rw;

address = __cpa_addr(cpa, cpa->curpage);
repeat:
- kpte = _lookup_address_cpa(cpa, address, &level);
+ kpte = _lookup_address_cpa(cpa, address, &level, &nx, &rw);
if (!kpte)
return __cpa_process_fault(cpa, address, primary);

@@ -1649,7 +1658,8 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
new_prot = static_protections(new_prot, address, pfn, 1, 0,
CPA_PROTECT);

- new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1);
+ new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1,
+ nx, rw);

new_prot = pgprot_clear_protnone_bits(new_prot);

--
2.35.3


Subject: [tip: x86/mm] x86/pat: Restructure _lookup_address_cpa()

The following commit has been merged into the x86/mm branch of tip:

Commit-ID: 02eac06b820c3eae73e5736ae62f986d37fed991
Gitweb: https://git.kernel.org/tip/02eac06b820c3eae73e5736ae62f986d37fed991
Author: Juergen Gross <[email protected]>
AuthorDate: Fri, 12 Apr 2024 17:12:57 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Sun, 14 Apr 2024 22:16:28 +02:00

x86/pat: Restructure _lookup_address_cpa()

Modify _lookup_address_cpa() to no longer use lookup_address(), but
only lookup_address_in_pgd().

This is done in preparation of using lookup_address_in_pgd_attr().

No functional change intended.

Signed-off-by: Juergen Gross <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/mm/pat/set_memory.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index bfa0aae..4ebccaf 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -744,11 +744,14 @@ EXPORT_SYMBOL_GPL(lookup_address);
static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
unsigned int *level)
{
- if (cpa->pgd)
- return lookup_address_in_pgd(cpa->pgd + pgd_index(address),
- address, level);
+ pgd_t *pgd;
+
+ if (!cpa->pgd)
+ pgd = pgd_offset_k(address);
+ else
+ pgd = cpa->pgd + pgd_index(address);

- return lookup_address(address, level);
+ return lookup_address_in_pgd(pgd, address, level);
}

/*

Subject: [tip: x86/mm] x86/pat: Fix W^X violation false-positives when running as Xen PV guest

The following commit has been merged into the x86/mm branch of tip:

Commit-ID: 5bc8b0f5dac04cd4ebe47f8090a5942f2f2647ef
Gitweb: https://git.kernel.org/tip/5bc8b0f5dac04cd4ebe47f8090a5942f2f2647ef
Author: Juergen Gross <[email protected]>
AuthorDate: Fri, 12 Apr 2024 17:12:58 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Sun, 14 Apr 2024 22:16:30 +02:00

x86/pat: Fix W^X violation false-positives when running as Xen PV guest

When running as Xen PV guest in some cases W^X violation WARN()s have
been observed. Those WARN()s are produced by verify_rwx(), which looks
into the PTE to verify that writable kernel pages have the NX bit set
in order to avoid code modifications of the kernel by rogue code.

As the NX bits of all levels of translation entries are or-ed and the
RW bits of all levels are and-ed, looking just into the PTE isn't enough
for the decision that a writable page is executable, too.

When running as a Xen PV guest, the direct map PMDs and kernel high
map PMDs share the same set of PTEs. Xen kernel initialization will set
the NX bit in the direct map PMD entries, and not the shared PTEs.

Fixes: 652c5bf380ad ("x86/mm: Refuse W^X violations")
Reported-by: Jason Andryuk <[email protected]>
Signed-off-by: Juergen Gross <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/mm/pat/set_memory.c | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 4ebccaf..19fdfbb 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -619,7 +619,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
* Validate strict W^X semantics.
*/
static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start,
- unsigned long pfn, unsigned long npg)
+ unsigned long pfn, unsigned long npg,
+ bool nx, bool rw)
{
unsigned long end;

@@ -641,6 +642,10 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star
if ((pgprot_val(new) & (_PAGE_RW | _PAGE_NX)) != _PAGE_RW)
return new;

+ /* Non-leaf translation entries can disable writing or execution. */
+ if (!rw || nx)
+ return new;
+
end = start + npg * PAGE_SIZE - 1;
WARN_ONCE(1, "CPA detected W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n",
(unsigned long long)pgprot_val(old),
@@ -742,7 +747,7 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
EXPORT_SYMBOL_GPL(lookup_address);

static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
- unsigned int *level)
+ unsigned int *level, bool *nx, bool *rw)
{
pgd_t *pgd;

@@ -751,7 +756,7 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
else
pgd = cpa->pgd + pgd_index(address);

- return lookup_address_in_pgd(pgd, address, level);
+ return lookup_address_in_pgd_attr(pgd, address, level, nx, rw);
}

/*
@@ -879,12 +884,13 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
pgprot_t old_prot, new_prot, req_prot, chk_prot;
pte_t new_pte, *tmp;
enum pg_level level;
+ bool nx, rw;

/*
* Check for races, another CPU might have split this page
* up already:
*/
- tmp = _lookup_address_cpa(cpa, address, &level);
+ tmp = _lookup_address_cpa(cpa, address, &level, &nx, &rw);
if (tmp != kpte)
return 1;

@@ -995,7 +1001,8 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
psize, CPA_DETECT);

- new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages);
+ new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages,
+ nx, rw);

/*
* If there is a conflict, split the large page.
@@ -1076,6 +1083,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
pte_t *pbase = (pte_t *)page_address(base);
unsigned int i, level;
pgprot_t ref_prot;
+ bool nx, rw;
pte_t *tmp;

spin_lock(&pgd_lock);
@@ -1083,7 +1091,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
* Check for races, another CPU might have split this page
* up for us already:
*/
- tmp = _lookup_address_cpa(cpa, address, &level);
+ tmp = _lookup_address_cpa(cpa, address, &level, &nx, &rw);
if (tmp != kpte) {
spin_unlock(&pgd_lock);
return 1;
@@ -1624,10 +1632,11 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
int do_split, err;
unsigned int level;
pte_t *kpte, old_pte;
+ bool nx, rw;

address = __cpa_addr(cpa, cpa->curpage);
repeat:
- kpte = _lookup_address_cpa(cpa, address, &level);
+ kpte = _lookup_address_cpa(cpa, address, &level, &nx, &rw);
if (!kpte)
return __cpa_process_fault(cpa, address, primary);

@@ -1649,7 +1658,8 @@ repeat:
new_prot = static_protections(new_prot, address, pfn, 1, 0,
CPA_PROTECT);

- new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1);
+ new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1,
+ nx, rw);

new_prot = pgprot_clear_protnone_bits(new_prot);


Subject: [tip: x86/mm] x86/mm: Use lookup_address_in_pgd_attr() in show_fault_oops()

The following commit has been merged into the x86/mm branch of tip:

Commit-ID: d29dc5177b7d011377ecf648551027c94d2b1386
Gitweb: https://git.kernel.org/tip/d29dc5177b7d011377ecf648551027c94d2b1386
Author: Juergen Gross <[email protected]>
AuthorDate: Fri, 12 Apr 2024 17:12:56 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Sun, 14 Apr 2024 22:16:27 +02:00

x86/mm: Use lookup_address_in_pgd_attr() in show_fault_oops()

Fix show_fault_oops() to not only look at the leaf PTE for detecting
NX violations, but to use the effective NX value returned by
lookup_address_in_pgd_attr() instead.

Signed-off-by: Juergen Gross <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/mm/fault.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 622d12e..6b2ca8b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -514,18 +514,19 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad

if (error_code & X86_PF_INSTR) {
unsigned int level;
+ bool nx, rw;
pgd_t *pgd;
pte_t *pte;

pgd = __va(read_cr3_pa());
pgd += pgd_index(address);

- pte = lookup_address_in_pgd(pgd, address, &level);
+ pte = lookup_address_in_pgd_attr(pgd, address, &level, &nx, &rw);

- if (pte && pte_present(*pte) && !pte_exec(*pte))
+ if (pte && pte_present(*pte) && (!pte_exec(*pte) || nx))
pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n",
from_kuid(&init_user_ns, current_uid()));
- if (pte && pte_present(*pte) && pte_exec(*pte) &&
+ if (pte && pte_present(*pte) && pte_exec(*pte) && !nx &&
(pgd_flags(*pgd) & _PAGE_USER) &&
(__read_cr4() & X86_CR4_SMEP))
pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n",

Subject: [tip: x86/mm] x86/pat: Introduce lookup_address_in_pgd_attr()

The following commit has been merged into the x86/mm branch of tip:

Commit-ID: ceb647b4b529fdeca9021cd34486f5a170746bda
Gitweb: https://git.kernel.org/tip/ceb647b4b529fdeca9021cd34486f5a170746bda
Author: Juergen Gross <[email protected]>
AuthorDate: Fri, 12 Apr 2024 17:12:55 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Sun, 14 Apr 2024 22:16:26 +02:00

x86/pat: Introduce lookup_address_in_pgd_attr()

Add lookup_address_in_pgd_attr() doing the same as the already
existing lookup_address_in_pgd(), but returning the effective settings
of the NX and RW bits of all walked page table levels, too.

This will be needed in order to match hardware behavior when looking
for effective access rights, especially for detecting writable code
pages.

In order to avoid code duplication, let lookup_address_in_pgd() call
lookup_address_in_pgd_attr() with dummy parameters.

Signed-off-by: Juergen Gross <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/include/asm/pgtable_types.h | 2 ++-
arch/x86/mm/pat/set_memory.c | 33 ++++++++++++++++++++++++---
2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0b748ee..dd05cae 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -566,6 +566,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level);
+pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
+ unsigned int *level, bool *nx, bool *rw);
extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 80c9037..bfa0aae 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -657,20 +657,26 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star

/*
* Lookup the page table entry for a virtual address in a specific pgd.
- * Return a pointer to the entry and the level of the mapping.
+ * Return a pointer to the entry, the level of the mapping, and the effective
+ * NX and RW bits of all page table levels.
*/
-pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
- unsigned int *level)
+pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
+ unsigned int *level, bool *nx, bool *rw)
{
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;

*level = PG_LEVEL_NONE;
+ *nx = false;
+ *rw = true;

if (pgd_none(*pgd))
return NULL;

+ *nx |= pgd_flags(*pgd) & _PAGE_NX;
+ *rw &= pgd_flags(*pgd) & _PAGE_RW;
+
p4d = p4d_offset(pgd, address);
if (p4d_none(*p4d))
return NULL;
@@ -679,6 +685,9 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
if (p4d_leaf(*p4d) || !p4d_present(*p4d))
return (pte_t *)p4d;

+ *nx |= p4d_flags(*p4d) & _PAGE_NX;
+ *rw &= p4d_flags(*p4d) & _PAGE_RW;
+
pud = pud_offset(p4d, address);
if (pud_none(*pud))
return NULL;
@@ -687,6 +696,9 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
if (pud_leaf(*pud) || !pud_present(*pud))
return (pte_t *)pud;

+ *nx |= pud_flags(*pud) & _PAGE_NX;
+ *rw &= pud_flags(*pud) & _PAGE_RW;
+
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
return NULL;
@@ -695,12 +707,27 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
if (pmd_leaf(*pmd) || !pmd_present(*pmd))
return (pte_t *)pmd;

+ *nx |= pmd_flags(*pmd) & _PAGE_NX;
+ *rw &= pmd_flags(*pmd) & _PAGE_RW;
+
*level = PG_LEVEL_4K;

return pte_offset_kernel(pmd, address);
}

/*
+ * Lookup the page table entry for a virtual address in a specific pgd.
+ * Return a pointer to the entry and the level of the mapping.
+ */
+pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
+ unsigned int *level)
+{
+ bool nx, rw;
+
+ return lookup_address_in_pgd_attr(pgd, address, level, &nx, &rw);
+}
+
+/*
* Lookup the page table entry for a virtual address. Return a pointer
* to the entry and the level of the mapping.
*