2008-02-08 16:36:49

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [0/5] pageattr protection patchkit v2 for the latest kernel


There were some conflicts applying the previous patchkit
to the latest mainline tree; only difference is that I resolved
them.

-Andi


2008-02-08 16:36:32

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [2/5] Support range checking for required/advisory protections


Previously these checks would only check a single address, which is ok
for 4k pages, but not for large pages

Needed for followup patches

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/pageattr.c | 34 +++++++++++++++++++++++-----------
1 file changed, 23 insertions(+), 11 deletions(-)

Index: linux/arch/x86/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr.c
+++ linux/arch/x86/mm/pageattr.c
@@ -35,6 +35,13 @@ within(unsigned long addr, unsigned long
return addr >= start && addr < end;
}

+static inline int
+within_range(unsigned long addr_start, unsigned long addr_end,
+ unsigned long start, unsigned long end)
+{
+ return addr_end >= start && addr_start < end;
+}
+
/*
* Flushing functions
*/
@@ -149,7 +156,8 @@ static unsigned long virt_to_highmap(voi
* right (again, ioremap() on BIOS memory is not uncommon) so this function
* checks and fixes these known static required protection bits.
*/
-static inline pgprot_t required_static_prot(pgprot_t prot, unsigned long address)
+static inline pgprot_t
+required_static_prot(pgprot_t prot, unsigned long start, unsigned long end)
{
pgprot_t forbidden = __pgprot(0);

@@ -157,19 +165,21 @@ static inline pgprot_t required_static_p
* The BIOS area between 640k and 1Mb needs to be executable for
* PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
*/
- if (within(__pa(address), BIOS_BEGIN, BIOS_END))
+ if (within_range(__pa(start), __pa(end), BIOS_BEGIN, BIOS_END))
pgprot_val(forbidden) |= _PAGE_NX;

/*
* The kernel text needs to be executable for obvious reasons
* Does not cover __inittext since that is gone later on
*/
- if (within(address, (unsigned long)_text, (unsigned long)_etext))
+ if (within_range(start, end,
+ (unsigned long)_text, (unsigned long)_etext))
pgprot_val(forbidden) |= _PAGE_NX;
/*
* Do the same for the x86-64 high kernel mapping
*/
- if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
+ if (within_range(start, end,
+ virt_to_highmap(_text), virt_to_highmap(_etext)))
pgprot_val(forbidden) |= _PAGE_NX;

prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
@@ -177,18 +187,19 @@ static inline pgprot_t required_static_p
return prot;
}

-static inline pgprot_t advised_static_prot(pgprot_t prot, unsigned long address)
+static inline pgprot_t
+advised_static_prot(pgprot_t prot, unsigned long start, unsigned long end)
{
pgprot_t forbidden = __pgprot(0);

/* The .rodata section needs to be read-only */
- if (within(address, (unsigned long)__start_rodata,
+ if (within_range(start, end, (unsigned long)__start_rodata,
(unsigned long)__end_rodata))
pgprot_val(forbidden) |= _PAGE_RW;
/*
* Do the same for the x86-64 high kernel mapping
*/
- if (within(address, virt_to_highmap(__start_rodata),
+ if (within_range(start, end, virt_to_highmap(__start_rodata),
virt_to_highmap(__end_rodata)))
pgprot_val(forbidden) |= _PAGE_RW;

@@ -313,8 +324,8 @@ try_preserve_large_page(pte_t *kpte, uns

pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
- new_prot = required_static_prot(new_prot, address);
- new_prot = advised_static_prot(new_prot, address);
+ new_prot = required_static_prot(new_prot, address, address + psize - 1);
+ new_prot = advised_static_prot(new_prot, address, address + psize - 1);

/*
* If there are no changes, return. maxpages has been updated
@@ -438,6 +449,7 @@ repeat:
BUG_ON(PageCompound(kpte_page));

if (level == PG_LEVEL_4K) {
+ unsigned long end = address + PAGE_SIZE - 1;
pte_t new_pte, old_pte = *kpte;
pgprot_t new_prot = pte_pgprot(old_pte);

@@ -452,8 +464,8 @@ repeat:
pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);

- new_prot = required_static_prot(new_prot, address);
- new_prot = advised_static_prot(new_prot, address);
+ new_prot = required_static_prot(new_prot, address, end);
+ new_prot = advised_static_prot(new_prot, address, end);

/*
* We need to keep the pfn from the existing PTE,

2008-02-08 16:37:21

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [3/5] CPA: Make advised protection check truly advisory


Only force RO in the advisory protection checks when all pages in the
range are RO. Previously it would trigger when any page in the range
was ro.

I believe this will make try_preserve_large_page much safer to use.

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/pageattr.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)

Index: linux/arch/x86/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr.c
+++ linux/arch/x86/mm/pageattr.c
@@ -193,14 +193,16 @@ advised_static_prot(pgprot_t prot, unsig
pgprot_t forbidden = __pgprot(0);

/* The .rodata section needs to be read-only */
- if (within_range(start, end, (unsigned long)__start_rodata,
- (unsigned long)__end_rodata))
+ if (within_range((unsigned long)__start_rodata,
+ (unsigned long)__end_rodata,
+ start, end))
pgprot_val(forbidden) |= _PAGE_RW;
/*
* Do the same for the x86-64 high kernel mapping
*/
- if (within_range(start, end, virt_to_highmap(__start_rodata),
- virt_to_highmap(__end_rodata)))
+ if (within_range(virt_to_highmap(__start_rodata),
+ virt_to_highmap(__end_rodata),
+ start, end))
pgprot_val(forbidden) |= _PAGE_RW;

prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));

2008-02-08 16:37:50

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [4/5] Don't use inline for the protection checks


There are multiple call sites and they are not time critical

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/pageattr.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

Index: linux/arch/x86/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr.c
+++ linux/arch/x86/mm/pageattr.c
@@ -156,7 +156,7 @@ static unsigned long virt_to_highmap(voi
* right (again, ioremap() on BIOS memory is not uncommon) so this function
* checks and fixes these known static required protection bits.
*/
-static inline pgprot_t
+static pgprot_t
required_static_prot(pgprot_t prot, unsigned long start, unsigned long end)
{
pgprot_t forbidden = __pgprot(0);
@@ -187,7 +187,7 @@ required_static_prot(pgprot_t prot, unsi
return prot;
}

-static inline pgprot_t
+static pgprot_t
advised_static_prot(pgprot_t prot, unsigned long start, unsigned long end)
{
pgprot_t forbidden = __pgprot(0);

2008-02-08 16:38:15

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [1/5] CPA: Split static_protections into required_static_prot and advised_static_prot


There is a big difference between NX and RO. NX absolutely has to be cleared
or the kernel will fail while RO just can be set, but does not need to.
And for a large page area not setting NX if there is a area below
it that needs it is essential, while making it ro is optional again.

This is needed for a followup patch who uses requred_static_prot() for large
pages where it is inconvenient to check all pages.

No behaviour change in this patch.

[Lines > 80 characters are changed in followup patch]

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/pageattr.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)

Index: linux/arch/x86/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr.c
+++ linux/arch/x86/mm/pageattr.c
@@ -149,7 +149,7 @@ static unsigned long virt_to_highmap(voi
* right (again, ioremap() on BIOS memory is not uncommon) so this function
* checks and fixes these known static required protection bits.
*/
-static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
+static inline pgprot_t required_static_prot(pgprot_t prot, unsigned long address)
{
pgprot_t forbidden = __pgprot(0);

@@ -172,6 +172,15 @@ static inline pgprot_t static_protection
if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
pgprot_val(forbidden) |= _PAGE_NX;

+ prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
+
+ return prot;
+}
+
+static inline pgprot_t advised_static_prot(pgprot_t prot, unsigned long address)
+{
+ pgprot_t forbidden = __pgprot(0);
+
/* The .rodata section needs to be read-only */
if (within(address, (unsigned long)__start_rodata,
(unsigned long)__end_rodata))
@@ -304,7 +313,8 @@ try_preserve_large_page(pte_t *kpte, uns

pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
- new_prot = static_protections(new_prot, address);
+ new_prot = required_static_prot(new_prot, address);
+ new_prot = advised_static_prot(new_prot, address);

/*
* If there are no changes, return. maxpages has been updated
@@ -442,7 +452,8 @@ repeat:
pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);

- new_prot = static_protections(new_prot, address);
+ new_prot = required_static_prot(new_prot, address);
+ new_prot = advised_static_prot(new_prot, address);

/*
* We need to keep the pfn from the existing PTE,
@@ -532,7 +543,7 @@ static int change_page_attr_addr(struct
* for the non obvious details.
*
* Note that NX and other required permissions are
- * checked in static_protections().
+ * checked in required_static_prot().
*/
address = phys_addr + HIGH_MAP_START - phys_base;

2008-02-08 16:38:43

by Andi Kleen

[permalink] [raw]
Subject: [PATCH] [5/5] Switch i386 early boot page table initialization over to use required_static_prot()


This makes it use the same tests for this as pageattr.

Does not check advisory protections yet because that is not needed yet.

Signed-off-by: Andi Kleen <[email protected]>

---
arch/x86/mm/init_32.c | 15 +++------------
arch/x86/mm/pageattr.c | 2 +-
include/asm-x86/cacheflush.h | 3 +++
3 files changed, 7 insertions(+), 13 deletions(-)

Index: linux/arch/x86/mm/init_32.c
===================================================================
--- linux.orig/arch/x86/mm/init_32.c
+++ linux/arch/x86/mm/init_32.c
@@ -140,13 +140,6 @@ page_table_range_init(unsigned long star
}
}

-static inline int is_kernel_text(unsigned long addr)
-{
- if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
- return 1;
- return 0;
-}
-
/*
* This maps the physical memory to kernel virtual address space, a total
* of max_low_pfn pages, by creating page tables starting from address
@@ -189,9 +182,7 @@ static void __init kernel_physical_mappi
addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
PAGE_OFFSET + PAGE_SIZE-1;

- if (is_kernel_text(addr) ||
- is_kernel_text(addr2))
- prot = PAGE_KERNEL_LARGE_EXEC;
+ prot = required_static_prot(prot, addr, addr2);

set_pmd(pmd, pfn_pmd(pfn, prot));

@@ -205,8 +196,8 @@ static void __init kernel_physical_mappi
pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
pgprot_t prot = PAGE_KERNEL;

- if (is_kernel_text(addr))
- prot = PAGE_KERNEL_EXEC;
+ prot = required_static_prot(prot, addr,
+ addr + PAGE_SIZE - 1);

set_pte(pte, pfn_pte(pfn, prot));
}
Index: linux/include/asm-x86/cacheflush.h
===================================================================
--- linux.orig/include/asm-x86/cacheflush.h
+++ linux/include/asm-x86/cacheflush.h
@@ -45,6 +45,9 @@ int set_memory_4k(unsigned long addr, in

void clflush_cache_range(void *addr, unsigned int size);

+pgprot_t required_static_prot(pgprot_t prot, unsigned long start,
+ unsigned long end);
+
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
#endif
Index: linux/arch/x86/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr.c
+++ linux/arch/x86/mm/pageattr.c
@@ -156,7 +156,7 @@ static unsigned long virt_to_highmap(voi
* right (again, ioremap() on BIOS memory is not uncommon) so this function
* checks and fixes these known static required protection bits.
*/
-static pgprot_t
+pgprot_t
required_static_prot(pgprot_t prot, unsigned long start, unsigned long end)
{
pgprot_t forbidden = __pgprot(0);

2008-02-09 14:56:25

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] [1/5] CPA: Split static_protections into required_static_prot and advised_static_prot

On Fri, 8 Feb 2008, Andi Kleen wrote:
> There is a big difference between NX and RO. NX absolutely has to be cleared
> or the kernel will fail while RO just can be set, but does not need to.
> And for a large page area not setting NX if there is a area below
> it that needs it is essential, while making it ro is optional again.

No, it's not optional. Making the PMD RO will write protect all 4k
PTEs below independent of their setting. So there is the same
restriction as we have with NX.

> This is needed for a followup patch who uses requred_static_prot() for large
> pages where it is inconvenient to check all pages.

> No behaviour change in this patch.
>
> [Lines > 80 characters are changed in followup patch]

ROTFL.

tglx

2008-02-09 15:14:10

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] [1/5] CPA: Split static_protections into required_static_prot and advised_static_prot

On Sat, Feb 09, 2008 at 03:56:02PM +0100, Thomas Gleixner wrote:
> On Fri, 8 Feb 2008, Andi Kleen wrote:
> > There is a big difference between NX and RO. NX absolutely has to be cleared
> > or the kernel will fail while RO just can be set, but does not need to.
> > And for a large page area not setting NX if there is a area below
> > it that needs it is essential, while making it ro is optional again.

Optional as in it doesn't need to be forced.

>
> No, it's not optional. Making the PMD RO will write protect all 4k
> PTEs below independent of their setting. So there is the same
> restriction as we have with NX.

If there is a boundary between a RO area
and a RW area and you want to map it with 2MB pages then NX is required,
but RO is optional on the page and if you prefer TLB use minimalization
over debugging it is optional. Is it clear now?

Note the behaviour for pageattr and thus DEBUG_RODATA / debugging
sitations where you don't care about your TLB this
does not change, this makes only a difference for the initial init_32
direct mapping setup.

-Andi

2008-02-09 15:39:05

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] [3/5] CPA: Make advised protection check truly advisory

On Fri, 8 Feb 2008, Andi Kleen wrote:

>
> Only force RO in the advisory protection checks when all pages in the
> range are RO. Previously it would trigger when any page in the range
> was ro.
>
> I believe this will make try_preserve_large_page much safer to use.

It might be quite useful to know it for sure.

Thinking about the whole set of changes, you are right, that the
current check for the first page only is not correct, but I don't see
how your changes make it more correct.

The correct way to fix this is to check, whether all the small pages,
which fit in the range of the large page, which we want to preserve,
have the same resulting pgprot flags.

Thanks,
tglx

2008-02-09 15:50:37

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] [1/5] CPA: Split static_protections into required_static_prot and advised_static_prot

On Sat, 9 Feb 2008, Andi Kleen wrote:
> On Sat, Feb 09, 2008 at 03:56:02PM +0100, Thomas Gleixner wrote:
> > On Fri, 8 Feb 2008, Andi Kleen wrote:
> > > There is a big difference between NX and RO. NX absolutely has to be cleared
> > > or the kernel will fail while RO just can be set, but does not need to.
> > > And for a large page area not setting NX if there is a area below
> > > it that needs it is essential, while making it ro is optional again.
>
> Optional as in it doesn't need to be forced.
>
> >
> > No, it's not optional. Making the PMD RO will write protect all 4k
> > PTEs below independent of their setting. So there is the same
> > restriction as we have with NX.
>
> If there is a boundary between a RO area
> and a RW area and you want to map it with 2MB pages then NX is required,
> but RO is optional on the page and if you prefer TLB use minimalization
> over debugging it is optional. Is it clear now?

No, it is not clear at all. Why is there a requirement for NX, when I
have an overlapping area of RO and RW in a large page mapping? If I
want to preserve the large page mapping in such a case, then its
required to make the mapping RW and omit the protection of the RO
area, nothing else.

> Note the behaviour for pageattr and thus DEBUG_RODATA / debugging
> sitations where you don't care about your TLB this
> does not change, this makes only a difference for the initial init_32
> direct mapping setup.

Your patches do change the behaviour. The range checking breaks the
enforcement of some restrictions for the sake of keeping the large
page intact.

Thanks,
tglx

2008-02-09 16:04:01

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] [1/5] CPA: Split static_protections into required_static_prot and advised_static_prot

On Sat, Feb 09, 2008 at 04:50:14PM +0100, Thomas Gleixner wrote:
> On Sat, 9 Feb 2008, Andi Kleen wrote:
> > On Sat, Feb 09, 2008 at 03:56:02PM +0100, Thomas Gleixner wrote:
> > > On Fri, 8 Feb 2008, Andi Kleen wrote:
> > > > There is a big difference between NX and RO. NX absolutely has to be cleared
> > > > or the kernel will fail while RO just can be set, but does not need to.
> > > > And for a large page area not setting NX if there is a area below
> > > > it that needs it is essential, while making it ro is optional again.
> >
> > Optional as in it doesn't need to be forced.
> >
> > >
> > > No, it's not optional. Making the PMD RO will write protect all 4k
> > > PTEs below independent of their setting. So there is the same
> > > restriction as we have with NX.
> >
> > If there is a boundary between a RO area
> > and a RW area and you want to map it with 2MB pages then NX is required,
> > but RO is optional on the page and if you prefer TLB use minimalization
> > over debugging it is optional. Is it clear now?
>
> No, it is not clear at all. Why is there a requirement for NX, when I

Sorry the requirement is no NX, not NX. I left out the "no"
in the second sentence.

> have an overlapping area of RO and RW in a large page mapping? If I
> want to preserve the large page mapping in such a case, then its
> required to make the mapping RW and omit the protection of the RO
> area, nothing else.

Yes that is what my patch (or rather a followup) patch implements.

> > Note the behaviour for pageattr and thus DEBUG_RODATA / debugging
> > sitations where you don't care about your TLB this
> > does not change, this makes only a difference for the initial init_32
> > direct mapping setup.
>
> Your patches do change the behaviour. The range checking breaks the
> enforcement of some restrictions for the sake of keeping the large
> page intact.

You mean in try_preserve_large_page()?

No actually they were not completely enforced previously at all, because
it did only check the restrictions of the first page.

On the end of my patch series the enforcement is actually stricter
than it was before, although not 100%.

-Andi

2008-02-09 16:56:20

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] [3/5] CPA: Make advised protection check truly advisory

On Saturday 09 February 2008 16:38:35 Thomas Gleixner wrote:
> On Fri, 8 Feb 2008, Andi Kleen wrote:
>
> >
> > Only force RO in the advisory protection checks when all pages in the
> > range are RO. Previously it would trigger when any page in the range
> > was ro.
> >
> > I believe this will make try_preserve_large_page much safer to use.
>
> It might be quite useful to know it for sure.

I wrote that originally when you still had the bogus "AMD fix"
in the tree. I think it was because I hoped it would fix that one,
but I wasn't sure because I couldn't reproduce it. But Hugh's
patch fixed that anyways.

> Thinking about the whole set of changes, you are right, that the
> current check for the first page only is not correct, but I don't see
> how your changes make it more correct.

With my patch at least NX should be always correct and that is
the more important one because it is default and has to be cleared
and things go horribly wrong when it is incorrect.

RO on the other hand defaults to off and is only sometimes forced.

> The correct way to fix this is to check, whether all the small pages,
> which fit in the range of the large page, which we want to preserve,
> have the same resulting pgprot flags.

Ok if you don't like the try_preserve_large_page change
feel free to drop it or implement it differently.

My main goal here was to clean up the 32bit direct mapping
anyways (last patch) and that does just require splitting out the
NX checks from the RO checks and having a range (first patches)

I think at least the range checks are definitely needed
for correctness though.

If I cared particularly I would probably implement two modi:
one with DEBUG_RODATA and another without. With DEBUG_RODATA
advisory protections should be forced (and large pages split),
without not.

-Andi

2008-02-09 17:09:47

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] [1/5] CPA: Split static_protections into required_static_prot and advised_static_prot

On Sat, 9 Feb 2008, Andi Kleen wrote:
> > > Note the behaviour for pageattr and thus DEBUG_RODATA / debugging
> > > sitations where you don't care about your TLB this
> > > does not change, this makes only a difference for the initial init_32
> > > direct mapping setup.
> >
> > Your patches do change the behaviour. The range checking breaks the
> > enforcement of some restrictions for the sake of keeping the large
> > page intact.
>
> You mean in try_preserve_large_page()?
>
> No actually they were not completely enforced previously at all, because
> it did only check the restrictions of the first page.

Right, you poked my nose to it. I did not think about it when I coded
it. It is wrong and needs to be fixed, but not by the range check you
introduced.

> On the end of my patch series the enforcement is actually stricter
> than it was before, although not 100%.

As far as I can tell it is more relaxed, as it will make overlapping
regions of rodata and rwdata completely rw instead of splitting it up.

Thanks,
tglx

2008-02-09 17:39:18

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] [3/5] CPA: Make advised protection check truly advisory

On Sat, 9 Feb 2008, Andi Kleen wrote:
> On Saturday 09 February 2008 16:38:35 Thomas Gleixner wrote:
> > On Fri, 8 Feb 2008, Andi Kleen wrote:
> >
> > >
> > > Only force RO in the advisory protection checks when all pages in the
> > > range are RO. Previously it would trigger when any page in the range
> > > was ro.
> > >
> > > I believe this will make try_preserve_large_page much safer to use.
> >
> > It might be quite useful to know it for sure.
>
> I wrote that originally when you still had the bogus "AMD fix"
> in the tree. I think it was because I hoped it would fix that one,
> but I wasn't sure because I couldn't reproduce it. But Hugh's
> patch fixed that anyways.

The AMD limiting was due to a testing failure on a 64bit X2 and had
nothing to do with the 32bit wreckage, which was fixed by Hugh.

The fix for the X2 was a missing check for large pmds/puds in the
spurious fault code.

> > Thinking about the whole set of changes, you are right, that the
> > current check for the first page only is not correct, but I don't see
> > how your changes make it more correct.
>
> With my patch at least NX should be always correct and that is
> the more important one because it is default and has to be cleared
> and things go horribly wrong when it is incorrect.
>
> RO on the other hand defaults to off and is only sometimes forced.
>
> > The correct way to fix this is to check, whether all the small pages,
> > which fit in the range of the large page, which we want to preserve,
> > have the same resulting pgprot flags.
>
> Ok if you don't like the try_preserve_large_page change
> feel free to drop it or implement it differently.
>
> My main goal here was to clean up the 32bit direct mapping
> anyways (last patch) and that does just require splitting out the
> NX checks from the RO checks and having a range (first patches)
>
> I think at least the range checks are definitely needed
> for correctness though.
>
> If I cared particularly I would probably implement two modi:
> one with DEBUG_RODATA and another without. With DEBUG_RODATA
> advisory protections should be forced (and large pages split),
> without not.

I can understand, what you want to achieve, but I really do not like
the result for following reasons:

1) If there is a bug in some code, then we fix the bug and do not
intermingle it with something else.

2) I care about RO as much as I care about the NX correctness. That's
the same logic and the same problem. If we have overlapping regions,
then we need to split large pages. Otherwise both protections are
useless to a certain degree.

3) For correctness reasons I even ponder to make the NX/RO mandatory.

Thanks,
tglx

2008-02-10 09:04:44

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] [1/5] CPA: Split static_protections into required_static_prot and advised_static_prot

On Sat, Feb 09, 2008 at 06:09:23PM +0100, Thomas Gleixner wrote:
> On Sat, 9 Feb 2008, Andi Kleen wrote:
> > > > Note the behaviour for pageattr and thus DEBUG_RODATA / debugging
> > > > sitations where you don't care about your TLB this
> > > > does not change, this makes only a difference for the initial init_32
> > > > direct mapping setup.
> > >
> > > Your patches do change the behaviour. The range checking breaks the
> > > enforcement of some restrictions for the sake of keeping the large
> > > page intact.
> >
> > You mean in try_preserve_large_page()?
> >
> > No actually they were not completely enforced previously at all, because
> > it did only check the restrictions of the first page.
>
> Right, you poked my nose to it. I did not think about it when I coded
> it. It is wrong and needs to be fixed, but not by the range check you
> introduced.

Well I need the range check for a different piece of code (init_memory_mapping())
For that a range check is definitely needed and the existing code there
also does an (although quite fishy) range check. The DEBUG_RODATA case
is also handled correctly there because DEBUG_RODATA is applied explicitely
using pageattr later.

You have not commented on that at all so I assume it's ok for you.

> > On the end of my patch series the enforcement is actually stricter
> > than it was before, although not 100%.
>
> As far as I can tell it is more relaxed, as it will make overlapping
> regions of rodata and rwdata completely rw instead of splitting it up.

In try_preserve_large_page()? No because it only checks the first page.

In all other cases (in the existing code; my patchkit adds a new case
in mm/init_32.c) it always only checks single 4K pages so the only
overlap case would be sub 4K. For that there can be no split up.

-Andi

2008-02-10 09:19:57

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] [3/5] CPA: Make advised protection check truly advisory

Thomas Gleixner <[email protected]> writes:

> 2) I care about RO as much as I care about the NX correctness. That's
> the same logic and the same problem. If we have overlapping regions,
> then we need to split large pages. Otherwise both protections are
> useless to a certain degree.

That's laudable of you if you care about that so deeply, but then please just
fix try_preserve_large_page() to do that correctly.

But I suspect you will need some sort of range check for this anyways
so applying my patchkit as the foundation for your fix is probably
not a bad idea. I hope we can agree on the simple fact that without
ranges large pages cannot be handled correctly.

> 3) For correctness reasons I even ponder to make the NX/RO mandatory.

That might make sense for cpa, but is not a good idea for
sharing these checks with init_memory_mapping() which was the main goal
for my patchkit. I plan to do some further changes in that
area, but that first requires sharing of that code.

There you don't want to get into the mess of handling 4K pages for these
boundaries because they are later split up anyways by cpa for the DEBUG_RODATA
case. You've previously complained about other code reimplementing
pageattr code and doing fine grained 4K splits in init_memory_mapping()
would be exactly that so I assume you wouldn't like that either.

That is why I only wanted to apply the required checks there,
to leave the exact work for later.

You have not previously commented on that aspect so I assume
it's ok for you.

-Andi

2008-02-10 16:50:33

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] [3/5] CPA: Make advised protection check truly advisory

On Sun, 10 Feb 2008, Andi Kleen wrote:
> Thomas Gleixner <[email protected]> writes:
>
> > 2) I care about RO as much as I care about the NX correctness. That's
> > the same logic and the same problem. If we have overlapping regions,
> > then we need to split large pages. Otherwise both protections are
> > useless to a certain degree.
>
> That's laudable of you if you care about that so deeply, but then please just
> fix try_preserve_large_page() to do that correctly.

Done already :)

Btw. it's not a question of laudability. It's a question of
correctness.

Writing a piece of code, which has a thinko in the first place, is not
correct, but we are all humans and miss a detail from time to time.

What you did is far more than incorrect:

You discovered the bug, you did not point it out upfront and you hid
an alleged fix in a bunch of changes, which happen to be around the
same area. And at the very end you ignore my objections against your
buggy fix by handwaving about your init_memory_mapping() cleanups.

> But I suspect you will need some sort of range check for this anyways
> so applying my patchkit as the foundation for your fix is probably
> not a bad idea. I hope we can agree on the simple fact that without
> ranges large pages cannot be handled correctly.

To fix the try_preserve_large_page() problem a range check is not
necessary at all. A range check, if done right, might optimize it.

For the proposed cleanup of init_memory_mapping() I tend to agree,
that a range check might be useful.

> > 3) For correctness reasons I even ponder to make the NX/RO mandatory.
>
> That might make sense for cpa, but is not a good idea for
> sharing these checks with init_memory_mapping() which was the main goal
> for my patchkit. I plan to do some further changes in that
> area, but that first requires sharing of that code.
>
> There you don't want to get into the mess of handling 4K pages for these
> boundaries because they are later split up anyways by cpa for the DEBUG_RODATA
> case. You've previously complained about other code reimplementing
> pageattr code and doing fine grained 4K splits in init_memory_mapping()
> would be exactly that so I assume you wouldn't like that either.
>
> That is why I only wanted to apply the required checks there,
> to leave the exact work for later.
>
> You have not previously commented on that aspect so I assume
> it's ok for you.

I can see the requirement for a range check for the kind of cleanup
you want to do and it's fine with me to modify static_protections() in
a way which allows us to do range checks and share that code.

It just needs to be correct, while your so called fix is not even
close to correct.

To verify the incorrectness, lets apply the first two patches:

[PATCH] [1/5] CPA: Split static_protections into required_static_prot and advised_static_prot
[PATCH] [2/5] Support range checking for required/advisory protections

we get:

static inline int
within_range(unsigned long addr_start, unsigned long addr_end,
unsigned long start, unsigned long end)
{
return addr_end >= start && addr_start < end;
}

advised_static_prot(pgprot_t prot, unsigned long start, unsigned long end)
{
if (within_range(start, end, (unsigned long)__start_rodata,
(unsigned long)__end_rodata)
pgprot_val(forbidden) |= _PAGE_RW;

prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
return prot;
}

Called with:

prot = advised_static_prot(prot | _PAGE_RW,
(unsigned long)__start_rodata - PAGE_SIZE,
(unsigned long)__end_rodata + PAGE_SIZE - 1);

ends up with the check for:

(__end_rodata + PAGE_SIZE - 1) >= __start_rodata &&
(__start_rodata - PAGE_SIZE) < __end_rodata

Which evaluates to true. So we write protect one page before the RO
section and one page after the RO section.

How does this fix the incorrectness exactly ?

The next patch
[PATCH] [3/5] CPA: Make advised protection check truly advisory

morphes advised_static_prot() into:

advised_static_prot(pgprot_t prot, unsigned long start, unsigned long end)
{
if (within_range((unsigned long)__start_rodata,
(unsigned long)__end_rodata,
start, end))
pgprot_val(forbidden) |= _PAGE_RW;

prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
return prot;
}

Called with:

prot = advised_static_prot(prot | _PAGE_RW,
(unsigned long)__start_rodata - PAGE_SIZE,
(unsigned long)__end_rodata + PAGE_SIZE - 1);
This results in:

__end_rodata >= (__start_rodata - PAGE_SIZE) &&
__start_rodata < (__end_rodata + PAGE_SIZE - 1)

which evaluates to true as well. So we still write protect one page
before the RO section and one page after the RO section. Probably not
what we want either, right ?

>From the changelog:

"Only force RO in the advisory protection checks when all pages in the
range are RO. Previously it would trigger when any page in the range
was ro.
I believe this will make try_preserve_large_page much safer to use."

I had a reason to ask, whether you believe it or you are sure about
it. The changelog is so ludicrously detached from the code change,
that one needs to ask, whether you added it to the right patch.

The only change of this commit is:

(end_range >= start_test_range) && (start_range < end_test_range)

becomes

(end_test_range >= start_range) && (start_test_range < end_range)

which is the same as:

(end_range > start_test_range) && (start_range <= end_test_range)

So you just moved the "or equal" check to the other side, which does
not really change the behaviour of your code significantly, except for
one off corner cases.

Also the correctness of the order of arguments to within_range(), which is
btw. a horrible misleading function name, should have been clear in
the first place, when introducing it and converting the exisiting code
to use it.

Are you still convinced, that it would be a good idea to apply your
patches as a foundation for a fix, which takes 5 lines of code to be
correct ?

Thanks,

tglx