2009-06-13 13:28:45

by Marco Stornelli

[permalink] [raw]
Subject: [PATCH 13/14] Pramfs: Write protection

From: Marco Stornelli <[email protected]>

Write protection.

Signed-off-by: Marco Stornelli <[email protected]>
---

diff -uprN linux-2.6.30-orig/fs/pramfs/wprotect.c linux-2.6.30/fs/pramfs/wprotect.c
--- linux-2.6.30-orig/fs/pramfs/wprotect.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.30/fs/pramfs/wprotect.c 2009-06-13 12:54:16.000000000 +0200
@@ -0,0 +1,84 @@
+/*
+ * FILE NAME fs/pramfs/wprotect.c
+ *
+ * BRIEF DESCRIPTION
+ *
+ * Write protection for the filesystem pages.
+ *
+ * Copyright 2009 Marco Stornelli <[email protected]>
+ * Copyright 2003 Sony Corporation
+ * Copyright 2003 Matsushita Electric Industrial Co., Ltd.
+ * 2003-2004 (c) MontaVista Software, Inc. , Steve Longerbeam
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include "pram_fs.h"
+
+/* init_mm.page_table_lock must be held before calling! */
+static void pram_page_writeable(unsigned long addr, int rw)
+{
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pgdp = pgd_offset_k(addr);
+ if (!pgd_none(*pgdp)) {
+ pudp = pud_offset(pgdp, addr);
+ if (!pud_none(*pudp)) {
+ pmdp = pmd_offset(pudp, addr);
+ if (!pmd_none(*pmdp)) {
+ pte_t pte;
+ ptep = pte_offset_kernel(pmdp, addr);
+ pte = *ptep;
+ if (pte_present(pte)) {
+ pte = rw ? pte_mkwrite(pte) :
+ pte_wrprotect(pte);
+ set_pte(ptep, pte);
+ }
+ }
+ }
+ }
+}
+
+/* init_mm.page_table_lock must be held before calling! */
+void pram_writeable(void *vaddr, unsigned long size, int rw)
+{
+ unsigned long addr = (unsigned long)vaddr & PAGE_MASK;
+ unsigned long end = (unsigned long)vaddr + size;
+ unsigned long start = addr;
+
+ do {
+ pram_page_writeable(addr, rw);
+ addr += PAGE_SIZE;
+ } while (addr && (addr < end));
+
+
+ /*
+ * NOTE: we will always flush just one page (one TLB
+ * entry) except possibly in one case: when a new
+ * filesystem is initialized at mount time, when pram_read_super
+ * calls pram_lock_range to make the super block, inode
+ * table, and bitmap writeable.
+ */
+#if defined(CONFIG_ARM) || defined(CONFIG_M68K) || defined(CONFIG_H8300) || \
+ defined(CONFIG_BLACKFIN)
+ /*
+ * FIXME: so far only these archs have flush_tlb_kernel_page(),
+ * for the rest just use flush_tlb_kernel_range(). Not ideal
+ * to use _range() because many archs just flush the whole TLB.
+ */
+ if (end <= start + PAGE_SIZE)
+ flush_tlb_kernel_page(start);
+ else
+#endif
+ flush_tlb_kernel_range(start, end);
+}


2009-06-17 02:35:34

by Jared Hulbert

[permalink] [raw]
Subject: Re: [PATCH 13/14] Pramfs: Write protection

> +/* init_mm.page_table_lock must be held before calling! */
> +static void pram_page_writeable(unsigned long addr, int rw)
> +{
> + ? ? ? pgd_t *pgdp;
> + ? ? ? pud_t *pudp;
> + ? ? ? pmd_t *pmdp;
> + ? ? ? pte_t *ptep;
> +
> + ? ? ? pgdp = pgd_offset_k(addr);
> + ? ? ? if (!pgd_none(*pgdp)) {
> + ? ? ? ? ? ? ? pudp = pud_offset(pgdp, addr);
> + ? ? ? ? ? ? ? if (!pud_none(*pudp)) {
> + ? ? ? ? ? ? ? ? ? ? ? pmdp = pmd_offset(pudp, addr);
> + ? ? ? ? ? ? ? ? ? ? ? if (!pmd_none(*pmdp)) {
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pte_t pte;
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ptep = pte_offset_kernel(pmdp, addr);
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pte = *ptep;
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (pte_present(pte)) {
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pte = rw ? pte_mkwrite(pte) :
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pte_wrprotect(pte);
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? set_pte(ptep, pte);
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? }
> + ? ? ? ? ? ? ? ? ? ? ? }
> + ? ? ? ? ? ? ? }
> + ? ? ? }
> +}

Wow. Don't we want to do this pte walking in mm/ someplace?

Do you really intend to protect just the PTE in question rather than
the entire physical page, regardless of which PTE is talking to it?
Maybe I'm missing something.

> +/* init_mm.page_table_lock must be held before calling! */
> +void pram_writeable(void *vaddr, unsigned long size, int rw)
> +{
> + ? ? ? unsigned long addr = (unsigned long)vaddr & PAGE_MASK;
> + ? ? ? unsigned long end = (unsigned long)vaddr + size;
> + ? ? ? unsigned long start = addr;
> +
> + ? ? ? do {
> + ? ? ? ? ? ? ? pram_page_writeable(addr, rw);
> + ? ? ? ? ? ? ? addr += PAGE_SIZE;
> + ? ? ? } while (addr && (addr < end));
> +
> +
> + ? ? ? /*
> + ? ? ? ?* NOTE: we will always flush just one page (one TLB
> + ? ? ? ?* entry) except possibly in one case: when a new
> + ? ? ? ?* filesystem is initialized at mount time, when pram_read_super
> + ? ? ? ?* calls pram_lock_range to make the super block, inode
> + ? ? ? ?* table, and bitmap writeable.
> + ? ? ? ?*/
> +#if defined(CONFIG_ARM) || defined(CONFIG_M68K) || defined(CONFIG_H8300) || \
> + ? ? ? defined(CONFIG_BLACKFIN)
> + ? ? ? /*
> + ? ? ? ?* FIXME: so far only these archs have flush_tlb_kernel_page(),
> + ? ? ? ?* for the rest just use flush_tlb_kernel_range(). Not ideal
> + ? ? ? ?* to use _range() because many archs just flush the whole TLB.
> + ? ? ? ?*/
> + ? ? ? if (end <= start + PAGE_SIZE)
> + ? ? ? ? ? ? ? flush_tlb_kernel_page(start);
> + ? ? ? else
> +#endif
> + ? ? ? ? ? ? ? flush_tlb_kernel_range(start, end);
> +}

Why not just fix flush_tlb_range()?

If an arch has a flush_tlb_kernel_page() that works then it stands to
reason that the flush_tlb_kernel_range() shouldn't work with minimal
effort, no?

2009-06-17 07:08:26

by Paul Mundt

[permalink] [raw]
Subject: Re: [PATCH 13/14] Pramfs: Write protection

On Tue, Jun 16, 2009 at 07:35:24PM -0700, Jared Hulbert wrote:
> > +/* init_mm.page_table_lock must be held before calling! */
> > +static void pram_page_writeable(unsigned long addr, int rw)
> > +{
> > + ? ? ? pgd_t *pgdp;
> > + ? ? ? pud_t *pudp;
> > + ? ? ? pmd_t *pmdp;
> > + ? ? ? pte_t *ptep;
> > +
> > + ? ? ? pgdp = pgd_offset_k(addr);
> > + ? ? ? if (!pgd_none(*pgdp)) {
> > + ? ? ? ? ? ? ? pudp = pud_offset(pgdp, addr);
> > + ? ? ? ? ? ? ? if (!pud_none(*pudp)) {
> > + ? ? ? ? ? ? ? ? ? ? ? pmdp = pmd_offset(pudp, addr);
> > + ? ? ? ? ? ? ? ? ? ? ? if (!pmd_none(*pmdp)) {
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pte_t pte;
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ptep = pte_offset_kernel(pmdp, addr);
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pte = *ptep;
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (pte_present(pte)) {
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pte = rw ? pte_mkwrite(pte) :
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pte_wrprotect(pte);
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? set_pte(ptep, pte);
> > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? }
> > + ? ? ? ? ? ? ? ? ? ? ? }
> > + ? ? ? ? ? ? ? }
> > + ? ? ? }
> > +}
>
> Wow. Don't we want to do this pte walking in mm/ someplace?
>
> Do you really intend to protect just the PTE in question rather than
> the entire physical page, regardless of which PTE is talking to it?
> Maybe I'm missing something.
>
follow_pfn() ought to be fine for this, optionally follow_pte() could be
exported and used.

> > +#if defined(CONFIG_ARM) || defined(CONFIG_M68K) || defined(CONFIG_H8300) || \
> > + ? ? ? defined(CONFIG_BLACKFIN)
> > + ? ? ? /*
> > + ? ? ? ?* FIXME: so far only these archs have flush_tlb_kernel_page(),
> > + ? ? ? ?* for the rest just use flush_tlb_kernel_range(). Not ideal
> > + ? ? ? ?* to use _range() because many archs just flush the whole TLB.
> > + ? ? ? ?*/
> > + ? ? ? if (end <= start + PAGE_SIZE)
> > + ? ? ? ? ? ? ? flush_tlb_kernel_page(start);
> > + ? ? ? else
> > +#endif
> > + ? ? ? ? ? ? ? flush_tlb_kernel_range(start, end);
> > +}
>
> Why not just fix flush_tlb_range()?
>
> If an arch has a flush_tlb_kernel_page() that works then it stands to
> reason that the flush_tlb_kernel_range() shouldn't work with minimal
> effort, no?

flush_tlb_kernel_page() is a new one to me, it doesn't have any mention
in Documentation/cachetlb.txt anyways.

Many of the flush_tlb_kernel_range() implementations do ranged checks
with tunables to determine whether it is more expensive to selectively
flush vs just blowing the entire TLB away.

Likewise, there is no reason why those 4 architectures can not just shove
that if (end <= start + PAGE_SIZE) check in the beginning of their
flush_tlb_kernel_range() and fall back on flush_tlb_kernel_page() for
those cases. Hiding this in generic code is definitely not the way to go.