2008-02-08 13:28:01

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [1/5] CPA: Split static_protections into required_static_prot and advised_static_prot


There is a big difference between NX and RO. NX absolutely has to be cleared
or the kernel will fail while RO just can be set, but does not need to.
And for a large page area not setting NX if there is a area below
it that needs it is essential, while making it ro is optional again.

This is needed for a followup patch who uses requred_static_prot() for large
pages where it is inconvenient to check all pages.

No behaviour change in this patch.

[Lines > 80 characters are changed in followup patch]

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/pageattr.c | 24 +++++++++++++++---------
1 file changed, 15 insertions(+), 9 deletions(-)

Index: linux/arch/x86/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr.c
+++ linux/arch/x86/mm/pageattr.c
@@ -149,7 +149,7 @@ static unsigned long virt_to_highmap(voi
* right (again, ioremap() on BIOS memory is not uncommon) so this function
* checks and fixes these known static required protection bits.
*/
-static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
+static inline pgprot_t required_static_prot(pgprot_t prot, unsigned long address)
{
pgprot_t forbidden = __pgprot(0);

@@ -173,21 +173,25 @@ static inline pgprot_t static_protection
pgprot_val(forbidden) |= _PAGE_NX;


+ prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
+
+ return prot;
+}
+
+static inline pgprot_t advised_static_prot(pgprot_t prot, unsigned long address)
+{
#ifdef CONFIG_DEBUG_RODATA
/* The .rodata section needs to be read-only */
if (within(address, (unsigned long)__start_rodata,
(unsigned long)__end_rodata))
- pgprot_val(forbidden) |= _PAGE_RW;
+ pgprot_val(prot) &= ~_PAGE_RW;
/*
* Do the same for the x86-64 high kernel mapping
*/
if (within(address, virt_to_highmap(__start_rodata),
virt_to_highmap(__end_rodata)))
- pgprot_val(forbidden) |= _PAGE_RW;
+ pgprot_val(prot) &= ~_PAGE_RW;
#endif
-
- prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
-
return prot;
}

@@ -318,7 +322,8 @@ try_preserve_large_page(pte_t *kpte, uns

pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
- new_prot = static_protections(new_prot, address);
+ new_prot = required_static_prot(new_prot, address);
+ new_prot = advised_static_prot(new_prot, address);

/*
* If there are no changes, return. maxpages has been updated
@@ -456,7 +461,8 @@ repeat:
pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);

- new_prot = static_protections(new_prot, address);
+ new_prot = required_static_prot(new_prot, address);
+ new_prot = advised_static_prot(new_prot, address);

/*
* We need to keep the pfn from the existing PTE,
@@ -546,7 +552,7 @@ static int change_page_attr_addr(struct
* for the non obvious details.
*
* Note that NX and other required permissions are
- * checked in static_protections().
+ * checked in required_static_prot().
*/
address = phys_addr + HIGH_MAP_START - phys_base;


2008-02-08 13:28:26

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [3/5] CPA: Make advised protection check truly advisory


Only force RO in the advisory protection checks when all pages in the
range are RO. Previously it would trigger when any page in the range
was ro.

I believe this will make try_preserve_large_page much safer to use.

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/pageattr.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

Index: linux/arch/x86/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr.c
+++ linux/arch/x86/mm/pageattr.c
@@ -191,14 +191,14 @@ advised_static_prot(pgprot_t prot, unsig
{
#ifdef CONFIG_DEBUG_RODATA
/* The .rodata section needs to be read-only */
- if (within_range(start, end, (unsigned long)__start_rodata,
- (unsigned long)__end_rodata))
+ if (within_range((unsigned long)__start_rodata,
+ (unsigned long)__end_rodata, start, end))
pgprot_val(prot) &= ~_PAGE_RW;
/*
* Do the same for the x86-64 high kernel mapping
*/
- if (within_range(start, end, virt_to_highmap(__start_rodata),
- virt_to_highmap(__end_rodata)))
+ if (within_range(virt_to_highmap(__start_rodata),
+ virt_to_highmap(__end_rodata), start, end))
pgprot_val(prot) &= ~_PAGE_RW;
#endif
return prot;

2008-02-08 13:28:39

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [4/5] Don't use inline for the protection checks


There are multiple call sites and they are not time critical

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/pageattr.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

Index: linux/arch/x86/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr.c
+++ linux/arch/x86/mm/pageattr.c
@@ -156,7 +156,7 @@ static unsigned long virt_to_highmap(voi
* right (again, ioremap() on BIOS memory is not uncommon) so this function
* checks and fixes these known static required protection bits.
*/
-static inline pgprot_t
+static pgprot_t
required_static_prot(pgprot_t prot, unsigned long start, unsigned long end)
{
pgprot_t forbidden = __pgprot(0);
@@ -188,7 +188,7 @@ required_static_prot(pgprot_t prot, unsi
return prot;
}

-static inline pgprot_t
+static pgprot_t
advised_static_prot(pgprot_t prot, unsigned long start, unsigned long end)
{
#ifdef CONFIG_DEBUG_RODATA

2008-02-08 13:28:56

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [5/5] Switch i386 early boot page table initilization over to use required_static_prot()


This makes it use the same tests for this as pageattr.

Does not check advisory protections yet because that is not needed yet.

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/init_32.c | 15 +++------------
arch/x86/mm/pageattr.c | 2 +-
include/asm-x86/cacheflush.h | 3 +++
3 files changed, 7 insertions(+), 13 deletions(-)

Index: linux/arch/x86/mm/init_32.c
===================================================================
--- linux.orig/arch/x86/mm/init_32.c
+++ linux/arch/x86/mm/init_32.c
@@ -140,13 +140,6 @@ page_table_range_init(unsigned long star
}
}

-static inline int is_kernel_text(unsigned long addr)
-{
- if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
- return 1;
- return 0;
-}
-
/*
* This maps the physical memory to kernel virtual address space, a total
* of max_low_pfn pages, by creating page tables starting from address
@@ -189,9 +182,7 @@ static void __init kernel_physical_mappi
addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
PAGE_OFFSET + PAGE_SIZE-1;

- if (is_kernel_text(addr) ||
- is_kernel_text(addr2))
- prot = PAGE_KERNEL_LARGE_EXEC;
+ prot = required_static_prot(prot, addr, addr2);

set_pmd(pmd, pfn_pmd(pfn, prot));

@@ -205,8 +196,8 @@ static void __init kernel_physical_mappi
pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
pgprot_t prot = PAGE_KERNEL;

- if (is_kernel_text(addr))
- prot = PAGE_KERNEL_EXEC;
+ prot = required_static_prot(prot, addr,
+ addr + PAGE_SIZE - 1);

set_pte(pte, pfn_pte(pfn, prot));
}
Index: linux/include/asm-x86/cacheflush.h
===================================================================
--- linux.orig/include/asm-x86/cacheflush.h
+++ linux/include/asm-x86/cacheflush.h
@@ -45,6 +45,9 @@ int set_memory_4k(unsigned long addr, in

void clflush_cache_range(void *addr, unsigned int size);

+pgprot_t required_static_prot(pgprot_t prot, unsigned long start,
+ unsigned long end);
+
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
#endif
Index: linux/arch/x86/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr.c
+++ linux/arch/x86/mm/pageattr.c
@@ -156,7 +156,7 @@ static unsigned long virt_to_highmap(voi
* right (again, ioremap() on BIOS memory is not uncommon) so this function
* checks and fixes these known static required protection bits.
*/
-static pgprot_t
+pgprot_t
required_static_prot(pgprot_t prot, unsigned long start, unsigned long end)
{
pgprot_t forbidden = __pgprot(0);

2008-02-08 13:29:31

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [2/5] Support range checking for required/advisory protections


Previously these checks would only check a single address, which is ok
for 4k pages, but not for large pages

Needed for followup patches

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/pageattr.c | 31 ++++++++++++++++++++-----------
1 file changed, 20 insertions(+), 11 deletions(-)

Index: linux/arch/x86/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr.c
+++ linux/arch/x86/mm/pageattr.c
@@ -35,6 +35,13 @@ within(unsigned long addr, unsigned long
return addr >= start && addr < end;
}

+static inline int
+within_range(unsigned long addr_start, unsigned long addr_end,
+ unsigned long start, unsigned long end)
+{
+ return addr_end >= start && addr_start < end;
+}
+
/*
* Flushing functions
*/
@@ -149,7 +156,8 @@ static unsigned long virt_to_highmap(voi
* right (again, ioremap() on BIOS memory is not uncommon) so this function
* checks and fixes these known static required protection bits.
*/
-static inline pgprot_t required_static_prot(pgprot_t prot, unsigned long address)
+static inline pgprot_t
+required_static_prot(pgprot_t prot, unsigned long start, unsigned long end)
{
pgprot_t forbidden = __pgprot(0);

@@ -157,19 +165,21 @@ static inline pgprot_t required_static_p
* The BIOS area between 640k and 1Mb needs to be executable for
* PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
*/
- if (within(__pa(address), BIOS_BEGIN, BIOS_END))
+ if (within_range(__pa(start), __pa(end), BIOS_BEGIN, BIOS_END))
pgprot_val(forbidden) |= _PAGE_NX;

/*
* The kernel text needs to be executable for obvious reasons
* Does not cover __inittext since that is gone later on
*/
- if (within(address, (unsigned long)_text, (unsigned long)_etext))
+ if (within_range(start, end,
+ (unsigned long)_text, (unsigned long)_etext))
pgprot_val(forbidden) |= _PAGE_NX;
/*
* Do the same for the x86-64 high kernel mapping
*/
- if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
+ if (within_range(start, end,
+ virt_to_highmap(_text), virt_to_highmap(_etext)))
pgprot_val(forbidden) |= _PAGE_NX;


@@ -178,17 +188,18 @@ static inline pgprot_t required_static_p
return prot;
}

-static inline pgprot_t advised_static_prot(pgprot_t prot, unsigned long address)
+static inline pgprot_t
+advised_static_prot(pgprot_t prot, unsigned long start, unsigned long end)
{
#ifdef CONFIG_DEBUG_RODATA
/* The .rodata section needs to be read-only */
- if (within(address, (unsigned long)__start_rodata,
+ if (within_range(start, end, (unsigned long)__start_rodata,
(unsigned long)__end_rodata))
pgprot_val(prot) &= ~_PAGE_RW;
/*
* Do the same for the x86-64 high kernel mapping
*/
- if (within(address, virt_to_highmap(__start_rodata),
+ if (within_range(start, end, virt_to_highmap(__start_rodata),
virt_to_highmap(__end_rodata)))
pgprot_val(prot) &= ~_PAGE_RW;
#endif
@@ -322,8 +333,8 @@ try_preserve_large_page(pte_t *kpte, uns

pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
- new_prot = required_static_prot(new_prot, address);
- new_prot = advised_static_prot(new_prot, address);
+ new_prot = required_static_prot(new_prot, address, address + psize - 1);
+ new_prot = advised_static_prot(new_prot, address, address + psize - 1);

/*
* If there are no changes, return. maxpages has been updated
@@ -447,6 +458,7 @@ repeat:
BUG_ON(PageCompound(kpte_page));

if (level == PG_LEVEL_4K) {
+ unsigned long end = address + PAGE_SIZE - 1;
pte_t new_pte, old_pte = *kpte;
pgprot_t new_prot = pte_pgprot(old_pte);

@@ -461,8 +473,8 @@ repeat:
pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);

- new_prot = required_static_prot(new_prot, address);
- new_prot = advised_static_prot(new_prot, address);
+ new_prot = required_static_prot(new_prot, address, end);
+ new_prot = advised_static_prot(new_prot, address, end);

/*
* We need to keep the pfn from the existing PTE,