2005-02-22 09:53:08

by Jes Sorensen

[permalink] [raw]
Subject: [patch -mm series] ia64 specific /dev/mem handlers

Hi,

This patch introduces ia64 specific read/write handlers for /dev/mem
access which is needed to avoid uncached pages to be accessed through
the cached kernel window which can lead to random corruption. It also
introduces a new page-flag PG_uncached which will be used to mark the
uncached pages. I assume this may be useful to other architectures as
well where the CPU may use speculative reads which conflict with
uncached access. In addition I moved do_write_mem to be under
ARCH_HAS_DEV_MEM as it's only ever used if that is defined.

The patch is needed for the new ia64 special memory driver (mspec -
former fetchop).

Patch is relative to 2.6.11-rc3-mm2 and relies on the ARCH_HAS_DEV_MEM
flag which isn't in Linus' nor Tony's trees yet.

Cheers,
Jes

Signed-off-by: Jes Sorensen <[email protected]>

diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/Makefile linux-2.6.11-rc3-mm2/arch/ia64/kernel/Makefile
--- linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/Makefile 2005-02-16 11:20:19 -08:00
+++ linux-2.6.11-rc3-mm2/arch/ia64/kernel/Makefile 2005-02-16 11:58:35 -08:00
@@ -7,7 +7,7 @@
obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
- unwind.o mca.o mca_asm.o topology.o
+ unwind.o mca.o mca_asm.o topology.o mem.o

obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/mem.c linux-2.6.11-rc3-mm2/arch/ia64/kernel/mem.c
--- linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/mem.c 1969-12-31 16:00:00 -08:00
+++ linux-2.6.11-rc3-mm2/arch/ia64/kernel/mem.c 2005-02-16 12:49:01 -08:00
@@ -0,0 +1,151 @@
+/*
+ * arch/ia64/kernel/mem.c
+ *
+ * IA64 specific portions of /dev/mem access, notably handling
+ * read/write from uncached memory
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2005 Jes Sorensen <[email protected]>
+ */
+
+
+#include <linux/mm.h>
+
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+
+extern loff_t memory_lseek(struct file * file, loff_t offset, int orig);
+extern int mmap_kmem(struct file * file, struct vm_area_struct * vma);
+extern int open_port(struct inode * inode, struct file * filp);
+
+
+static inline int range_is_allowed(unsigned long from, unsigned long to)
+{
+ unsigned long cursor;
+
+ cursor = from >> PAGE_SHIFT;
+ while ((cursor << PAGE_SHIFT) < to) {
+ if (!devmem_is_allowed(cursor))
+ return 0;
+ cursor++;
+ }
+ return 1;
+}
+
+
+/*
+ * This funcion reads the *physical* memory. The f_pos points directly
+ * to the memory location.
+ */
+static ssize_t read_mem(struct file * file, char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long p = *ppos;
+ ssize_t read, sz;
+ struct page *page;
+ char *ptr;
+
+
+ if (!valid_phys_addr_range(p, &count))
+ return -EFAULT;
+ read = 0;
+
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = min(PAGE_SIZE, count);
+
+ page = pfn_to_page(p >> PAGE_SHIFT);
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ if (page->flags & PG_uncached)
+ ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+ else
+ ptr = __va(p);
+ if (copy_to_user(buf, ptr, sz))
+ return -EFAULT;
+ buf += sz;
+ p += sz;
+ count -= sz;
+ read += sz;
+ }
+
+ *ppos += read;
+ return read;
+}
+
+
+static ssize_t write_mem(struct file * file, const char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long p = *ppos;
+ unsigned long copied;
+ ssize_t written, sz;
+ struct page *page;
+ char *ptr;
+
+ if (!valid_phys_addr_range(p, &count))
+ return -EFAULT;
+
+ written = 0;
+
+ if (!range_is_allowed(p, p + count))
+ return -EPERM;
+ /*
+ * Need virtual p below here
+ */
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = min(PAGE_SIZE, count);
+
+ page = pfn_to_page(p >> PAGE_SHIFT);
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ if (page->flags & PG_uncached)
+ ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+ else
+ ptr = __va(p);
+
+ copied = copy_from_user(ptr, buf, sz);
+ if (copied) {
+ ssize_t ret;
+
+ ret = written + (sz - copied);
+ if (ret)
+ return ret;
+ return -EFAULT;
+ }
+ buf += sz;
+ p += sz;
+ count -= sz;
+ written += sz;
+ }
+
+ *ppos += written;
+ return written;
+}
+
+
+struct file_operations mem_fops = {
+ .llseek = memory_lseek,
+ .read = read_mem,
+ .write = write_mem,
+ .mmap = mmap_kmem,
+ .open = open_port,
+};
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c linux-2.6.11-rc3-mm2/drivers/char/mem.c
--- linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c 2005-02-16 11:20:55 -08:00
+++ linux-2.6.11-rc3-mm2/drivers/char/mem.c 2005-02-16 11:58:35 -08:00
@@ -125,39 +125,7 @@
}
return 1;
}
-static ssize_t do_write_mem(void *p, unsigned long realp,
- const char __user * buf, size_t count, loff_t *ppos)
-{
- ssize_t written;
- unsigned long copied;
-
- written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
- /* we don't have page 0 mapped on sparc and m68k.. */
- if (realp < PAGE_SIZE) {
- unsigned long sz = PAGE_SIZE-realp;
- if (sz > count) sz = count;
- /* Hmm. Do something? */
- buf+=sz;
- p+=sz;
- count-=sz;
- written+=sz;
- }
-#endif
- if (!range_is_allowed(realp, realp+count))
- return -EPERM;
- copied = copy_from_user(p, buf, count);
- if (copied) {
- ssize_t ret = written + (count - copied);

- if (ret)
- return ret;
- return -EFAULT;
- }
- written += count;
- *ppos += written;
- return written;
-}

#ifndef ARCH_HAS_DEV_MEM
/*
@@ -196,6 +164,40 @@
return read;
}

+static ssize_t do_write_mem(void *p, unsigned long realp,
+ const char __user * buf, size_t count, loff_t *ppos)
+{
+ ssize_t written;
+ unsigned long copied;
+
+ written = 0;
+#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+ /* we don't have page 0 mapped on sparc and m68k.. */
+ if (realp < PAGE_SIZE) {
+ unsigned long sz = PAGE_SIZE-realp;
+ if (sz > count) sz = count;
+ /* Hmm. Do something? */
+ buf+=sz;
+ p+=sz;
+ count-=sz;
+ written+=sz;
+ }
+#endif
+ if (!range_is_allowed(realp, realp+count))
+ return -EPERM;
+ copied = copy_from_user(p, buf, count);
+ if (copied) {
+ ssize_t ret = written + (count - copied);
+
+ if (ret)
+ return ret;
+ return -EFAULT;
+ }
+ written += count;
+ *ppos += written;
+ return written;
+}
+
static ssize_t write_mem(struct file * file, const char __user * buf,
size_t count, loff_t *ppos)
{
@@ -207,7 +209,8 @@
}
#endif

-static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+
+int mmap_kmem(struct file * file, struct vm_area_struct * vma)
{
#ifdef pgprot_noncached
unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
@@ -553,7 +556,7 @@
* also note that seeking relative to the "end of file" isn't supported:
* it has no meaning, so it returns -EINVAL.
*/
-static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
+loff_t memory_lseek(struct file * file, loff_t offset, int orig)
{
loff_t ret;

@@ -576,7 +579,7 @@
return ret;
}

-static int open_port(struct inode * inode, struct file * filp)
+int open_port(struct inode * inode, struct file * filp)
{
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/io.h linux-2.6.11-rc3-mm2/include/asm-ia64/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/io.h 2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2/include/asm-ia64/io.h 2005-02-16 11:58:35 -08:00
@@ -481,4 +481,6 @@
#define BIO_VMERGE_BOUNDARY (ia64_max_iommu_merge_mask + 1)
#endif

+#define ARCH_HAS_DEV_MEM
+
#endif /* _ASM_IA64_IO_H */
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h linux-2.6.11-rc3-mm2/include/linux/page-flags.h
--- linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h 2005-02-16 11:20:57 -08:00
+++ linux-2.6.11-rc3-mm2/include/linux/page-flags.h 2005-02-16 12:48:31 -08:00
@@ -76,7 +76,7 @@
#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */
#define PG_reclaim 18 /* To be reclaimed asap */
#define PG_nosave_free 19 /* Free, should not be written */
-
+#define PG_uncached 20 /* Page has been mapped as uncached */

/*
* Global page accounting. One instance per CPU. Only unsigned longs are


2005-02-22 10:03:42

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

[email protected] (Jes Sorensen) wrote:
>
> Hi,
>
> This patch introduces ia64 specific read/write handlers for /dev/mem
> access which is needed to avoid uncached pages to be accessed through
> the cached kernel window which can lead to random corruption. It also
> introduces a new page-flag PG_uncached which will be used to mark the
> uncached pages. I assume this may be useful to other architectures as
> well where the CPU may use speculative reads which conflict with
> uncached access. In addition I moved do_write_mem to be under
> ARCH_HAS_DEV_MEM as it's only ever used if that is defined.
>
> The patch is needed for the new ia64 special memory driver (mspec -
> former fetchop).
>
> Patch is relative to 2.6.11-rc3-mm2 and relies on the ARCH_HAS_DEV_MEM
> flag which isn't in Linus' nor Tony's trees yet.

Is it possible to avoid consuming a page flag?

If this is an ia64-only (or 64-bit-only) thing I guess we could use bit 32.

> + if (page->flags & PG_uncached)

dude. That ain't gonna work ;)

2005-02-22 12:08:53

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

Andrew Morton <[email protected]> writes:
>
> Is it possible to avoid consuming a page flag?
>
> If this is an ia64-only (or 64-bit-only) thing I guess we could use bit 32.

It's not. i386 essentially has the same problem. Other architectures
likely too. We definitely need a generic solution, especially when
we're going to support PAT on i386/x86-64 better (which I hope will
happen soon)

-Andi

2005-02-22 14:41:33

by Jes Sorensen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

>>>>> "Andrew" == Andrew Morton <[email protected]> writes:

Andrew> [email protected] (Jes Sorensen) wrote:
>> Hi,
>>
>> This patch introduces ia64 specific read/write handlers for
>> /dev/mem access which is needed to avoid uncached pages to be
>> accessed through the cached kernel window which can lead to random
>> corruption. It also introduces a new page-flag PG_uncached which
>> will be used to mark the uncached pages. I assume this may be
>> useful to other architectures as well where the CPU may use
>> speculative reads which conflict with uncached access. In addition
>> I moved do_write_mem to be under ARCH_HAS_DEV_MEM as it's only ever
>> used if that is defined.

Andrew> Is it possible to avoid consuming a page flag?

Andrew> If this is an ia64-only (or 64-bit-only) thing I guess we
Andrew> could use bit 32.

Hi Andrew,

I don't think we can get away with it, I think most modern
architectures have this problem. I've been trying to avoid it for
several iterations but I kept getting pushed towards this.

>> + if (page->flags & PG_uncached)

Andrew> dude. That ain't gonna work ;)

Pardon my lack of clue, but why not?

Maybe I should explain how I use it - in the mspec driver I pull a
chunk of pages out using alloc_pages and convert them all to uncached
(I need to do a chunk due to the speculation restrictions on ia64) and
then stick them into a special allocator and set page->PG_uncached
when doing so (the allocator is not tied to the uncached so I posted
the patch to Tony Luck). Ie. once the pages have been grabbed with
alloc_pages() and converted to uncached, they are never put back into
the generic pool.

It may be there are other places which needs to learn to respect the
flag?

Cheers,
Jes

2005-02-22 17:52:30

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

On Tue, Feb 22, 2005 at 09:41:04AM -0500, Jes Sorensen wrote:
> >> + if (page->flags & PG_uncached)
>
> Andrew> dude. That ain't gonna work ;)
>
> Pardon my lack of clue, but why not?

I think you're supposed to always use test_bit() to check page flags

--
"Next the statesmen will invent cheap lies, putting the blame upon
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince
himself that the war is just, and will thank God for the better sleep
he enjoys after this process of grotesque self-deception." -- Mark Twain

2005-02-22 18:08:40

by Luck, Tony

[permalink] [raw]
Subject: RE: [patch -mm series] ia64 specific /dev/mem handlers

>On Tue, Feb 22, 2005 at 09:41:04AM -0500, Jes Sorensen wrote:
>> >> + if (page->flags & PG_uncached)
>>
>> Andrew> dude. That ain't gonna work ;)
>>
>> Pardon my lack of clue, but why not?
>
>I think you're supposed to always use test_bit() to check page flags

You defined PG_uncached as "20" ... so either:

if (page->flags & (1<<PG_uncached))

or (much better) to use:

test_bit(PG_uncached, &page->flags).

-Tony

2005-02-22 19:25:36

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

Matthew Wilcox <[email protected]> wrote:
>
> On Tue, Feb 22, 2005 at 09:41:04AM -0500, Jes Sorensen wrote:
> > >> + if (page->flags & PG_uncached)
> >
> > Andrew> dude. That ain't gonna work ;)
> >
> > Pardon my lack of clue, but why not?

if (page->flags & (1<<PG_uncached))

would have been correcter.

> I think you're supposed to always use test_bit() to check page flags

Yup. Add PageUncached macros to page-flags.h.

2005-02-22 19:36:08

by Dave Hansen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

I was talking with Nigel Cunningham about doing something a little
different from the classic page flag bits when the number of users is
restricted and performance isn't ultra-critical. Would something like
this work for you, instead of using a real page->flags bit for
PG_cached?

http://marc.theaimsgroup.com/?l=linux-kernel&m=110878103926956&w=2

BTW, they're planning on using two of those dynamic flags for software
suspend, and I'll probably use at least another two in the memory
hotplug code.

-- Dave

2005-02-22 21:34:38

by Jes Sorensen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

>>>>> "Andrew" == Andrew Morton <[email protected]> writes:

Andrew> Matthew Wilcox <[email protected]> wrote:
>>
>> On Tue, Feb 22, 2005 at 09:41:04AM -0500, Jes Sorensen wrote:
>> > >> + if (page->flags & PG_uncached)
>> >
>> > Andrew> dude. That ain't gonna work ;)
>> >
>> > Pardon my lack of clue, but why not?

Andrew> if (page->flags & (1<<PG_uncached))

Andrew> would have been correcter.

DOH!

Desperately seeking a bulk supply of those brown paper bags!

>> I think you're supposed to always use test_bit() to check page
>> flags

Andrew> Yup. Add PageUncached macros to page-flags.h.

Mmmmm another butt ugly StUdLyCaPs macro coming soon.

Thanks,
Jes

2005-02-22 21:38:48

by Jes Sorensen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

>>>>> "Dave" == Dave Hansen <[email protected]> writes:

Dave> I was talking with Nigel Cunningham about doing something a
Dave> little different from the classic page flag bits when the number
Dave> of users is restricted and performance isn't ultra-critical.
Dave> Would something like this work for you, instead of using a real
Dave> page->flags bit for PG_cached?

Just took a quick look at this and it looks a bit heavy for our
use. We are only looking at a small number of pages. However I could
imagine future cases where performance may be more critical.

Cheers,
Jes

2005-02-22 22:04:09

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

On Tue, 2005-02-22 at 04:52 -0500, Jes Sorensen wrote:
> Hi,
>
> This patch introduces ia64 specific read/write handlers for /dev/mem
> access which is needed to avoid uncached pages to be accessed through
> the cached kernel window which can lead to random corruption. It also
> introduces a new page-flag PG_uncached which will be used to mark the
> uncached pages. I assume this may be useful to other architectures as
> well where the CPU may use speculative reads which conflict with
> uncached access. In addition I moved do_write_mem to be under
> ARCH_HAS_DEV_MEM as it's only ever used if that is defined.
>
> The patch is needed for the new ia64 special memory driver (mspec -
> former fetchop).


is there ANY valid reason to allow access to cached uses at all?
(eg kernel ram)

why not just disable any such ram access entirely...


2005-02-22 22:30:59

by Jes Sorensen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

>>>>> "Arjan" == Arjan van de Ven <[email protected]> writes:

Arjan> On Tue, 2005-02-22 at 04:52 -0500, Jes Sorensen wrote:
>> Hi,
>>
>> This patch introduces ia64 specific read/write handlers for
>> /dev/mem access which is needed to avoid uncached pages to be
>> accessed through the cached kernel window which can lead to random
>> corruption. It also introduces a new page-flag PG_uncached which
>> will be used to mark the uncached pages. I assume this may be
>> useful to other architectures as well where the CPU may use
>> speculative reads which conflict with uncached access. In addition
>> I moved do_write_mem to be under ARCH_HAS_DEV_MEM as it's only ever
>> used if that is defined.
>>
>> The patch is needed for the new ia64 special memory driver (mspec -
>> former fetchop).

Arjan> is there ANY valid reason to allow access to cached uses at
Arjan> all? (eg kernel ram)

Arjan> why not just disable any such ram access entirely...

You mean uncached?

For userspace it's used by some of the MPI type apps in userland.
Presumably there's cases where it gives better performance. For the
SN2 hardware there's also a special mode known as fetchop mode which
requires uncached memory, it's used quite heavily by the same types of
apps.

The problem is if you then have apps such as lcrash which may read
through all kernel memory. If a page is mapped uncached to userland
you can hit the memory corruption case if you access the same page
cached from within a kernel cached mapping. I suspect the suspend code
could hit similar problems, but I don't know that code well enough to
say if it's the case or not.

Cheers,
Jes

2005-02-22 22:41:53

by Jes Sorensen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

>>>>> "Andrew" == Andrew Morton <[email protected]> writes:

Andrew> Matthew Wilcox <[email protected]> wrote:
>>
>> On Tue, Feb 22, 2005 at 09:41:04AM -0500, Jes Sorensen wrote:
>> > >> + if (page->flags & PG_uncached)
>> >
>> > Andrew> dude. That ain't gonna work ;)
>> >
>> > Pardon my lack of clue, but why not?

Andrew> if (page->flags & (1<<PG_uncached))

Andrew> would have been correcter.

Andrew,

After applying the clue 2x4 to my head a couple of times, I came up
with this patch. Hopefully it will work a bit better ;-)

Cheers,
Jes

Signed-off-by: Jes Sorensen <[email protected]>

diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/Makefile linux-2.6.11-rc3-mm2/arch/ia64/kernel/Makefile
--- linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/Makefile 2005-02-16 11:20:19 -08:00
+++ linux-2.6.11-rc3-mm2/arch/ia64/kernel/Makefile 2005-02-16 11:58:35 -08:00
@@ -7,7 +7,7 @@
obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
- unwind.o mca.o mca_asm.o topology.o
+ unwind.o mca.o mca_asm.o topology.o mem.o

obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/mem.c linux-2.6.11-rc3-mm2/arch/ia64/kernel/mem.c
--- linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/mem.c 1969-12-31 16:00:00 -08:00
+++ linux-2.6.11-rc3-mm2/arch/ia64/kernel/mem.c 2005-02-22 14:11:40 -08:00
@@ -0,0 +1,151 @@
+/*
+ * arch/ia64/kernel/mem.c
+ *
+ * IA64 specific portions of /dev/mem access, notably handling
+ * read/write from uncached memory
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2005 Jes Sorensen <[email protected]>
+ */
+
+
+#include <linux/mm.h>
+
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+
+extern loff_t memory_lseek(struct file * file, loff_t offset, int orig);
+extern int mmap_kmem(struct file * file, struct vm_area_struct * vma);
+extern int open_port(struct inode * inode, struct file * filp);
+
+
+static inline int range_is_allowed(unsigned long from, unsigned long to)
+{
+ unsigned long cursor;
+
+ cursor = from >> PAGE_SHIFT;
+ while ((cursor << PAGE_SHIFT) < to) {
+ if (!devmem_is_allowed(cursor))
+ return 0;
+ cursor++;
+ }
+ return 1;
+}
+
+
+/*
+ * This funcion reads the *physical* memory. The f_pos points directly
+ * to the memory location.
+ */
+static ssize_t read_mem(struct file * file, char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long p = *ppos;
+ ssize_t read, sz;
+ struct page *page;
+ char *ptr;
+
+
+ if (!valid_phys_addr_range(p, &count))
+ return -EFAULT;
+ read = 0;
+
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = min(PAGE_SIZE, count);
+
+ page = pfn_to_page(p >> PAGE_SHIFT);
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ if (PageUncached(page))
+ ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+ else
+ ptr = __va(p);
+ if (copy_to_user(buf, ptr, sz))
+ return -EFAULT;
+ buf += sz;
+ p += sz;
+ count -= sz;
+ read += sz;
+ }
+
+ *ppos += read;
+ return read;
+}
+
+
+static ssize_t write_mem(struct file * file, const char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long p = *ppos;
+ unsigned long copied;
+ ssize_t written, sz;
+ struct page *page;
+ char *ptr;
+
+ if (!valid_phys_addr_range(p, &count))
+ return -EFAULT;
+
+ written = 0;
+
+ if (!range_is_allowed(p, p + count))
+ return -EPERM;
+ /*
+ * Need virtual p below here
+ */
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = min(PAGE_SIZE, count);
+
+ page = pfn_to_page(p >> PAGE_SHIFT);
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ if (PageUncached(page))
+ ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+ else
+ ptr = __va(p);
+
+ copied = copy_from_user(ptr, buf, sz);
+ if (copied) {
+ ssize_t ret;
+
+ ret = written + (sz - copied);
+ if (ret)
+ return ret;
+ return -EFAULT;
+ }
+ buf += sz;
+ p += sz;
+ count -= sz;
+ written += sz;
+ }
+
+ *ppos += written;
+ return written;
+}
+
+
+struct file_operations mem_fops = {
+ .llseek = memory_lseek,
+ .read = read_mem,
+ .write = write_mem,
+ .mmap = mmap_kmem,
+ .open = open_port,
+};
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c linux-2.6.11-rc3-mm2/drivers/char/mem.c
--- linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c 2005-02-16 11:20:55 -08:00
+++ linux-2.6.11-rc3-mm2/drivers/char/mem.c 2005-02-16 11:58:35 -08:00
@@ -125,39 +125,7 @@
}
return 1;
}
-static ssize_t do_write_mem(void *p, unsigned long realp,
- const char __user * buf, size_t count, loff_t *ppos)
-{
- ssize_t written;
- unsigned long copied;
-
- written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
- /* we don't have page 0 mapped on sparc and m68k.. */
- if (realp < PAGE_SIZE) {
- unsigned long sz = PAGE_SIZE-realp;
- if (sz > count) sz = count;
- /* Hmm. Do something? */
- buf+=sz;
- p+=sz;
- count-=sz;
- written+=sz;
- }
-#endif
- if (!range_is_allowed(realp, realp+count))
- return -EPERM;
- copied = copy_from_user(p, buf, count);
- if (copied) {
- ssize_t ret = written + (count - copied);

- if (ret)
- return ret;
- return -EFAULT;
- }
- written += count;
- *ppos += written;
- return written;
-}

#ifndef ARCH_HAS_DEV_MEM
/*
@@ -196,6 +164,40 @@
return read;
}

+static ssize_t do_write_mem(void *p, unsigned long realp,
+ const char __user * buf, size_t count, loff_t *ppos)
+{
+ ssize_t written;
+ unsigned long copied;
+
+ written = 0;
+#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+ /* we don't have page 0 mapped on sparc and m68k.. */
+ if (realp < PAGE_SIZE) {
+ unsigned long sz = PAGE_SIZE-realp;
+ if (sz > count) sz = count;
+ /* Hmm. Do something? */
+ buf+=sz;
+ p+=sz;
+ count-=sz;
+ written+=sz;
+ }
+#endif
+ if (!range_is_allowed(realp, realp+count))
+ return -EPERM;
+ copied = copy_from_user(p, buf, count);
+ if (copied) {
+ ssize_t ret = written + (count - copied);
+
+ if (ret)
+ return ret;
+ return -EFAULT;
+ }
+ written += count;
+ *ppos += written;
+ return written;
+}
+
static ssize_t write_mem(struct file * file, const char __user * buf,
size_t count, loff_t *ppos)
{
@@ -207,7 +209,8 @@
}
#endif

-static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+
+int mmap_kmem(struct file * file, struct vm_area_struct * vma)
{
#ifdef pgprot_noncached
unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
@@ -553,7 +556,7 @@
* also note that seeking relative to the "end of file" isn't supported:
* it has no meaning, so it returns -EINVAL.
*/
-static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
+loff_t memory_lseek(struct file * file, loff_t offset, int orig)
{
loff_t ret;

@@ -576,7 +579,7 @@
return ret;
}

-static int open_port(struct inode * inode, struct file * filp)
+int open_port(struct inode * inode, struct file * filp)
{
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/io.h linux-2.6.11-rc3-mm2/include/asm-ia64/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/io.h 2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2/include/asm-ia64/io.h 2005-02-16 11:58:35 -08:00
@@ -481,4 +481,6 @@
#define BIO_VMERGE_BOUNDARY (ia64_max_iommu_merge_mask + 1)
#endif

+#define ARCH_HAS_DEV_MEM
+
#endif /* _ASM_IA64_IO_H */
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h linux-2.6.11-rc3-mm2/include/linux/page-flags.h
--- linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h 2005-02-16 11:20:57 -08:00
+++ linux-2.6.11-rc3-mm2/include/linux/page-flags.h 2005-02-22 13:40:22 -08:00
@@ -76,7 +76,7 @@
#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */
#define PG_reclaim 18 /* To be reclaimed asap */
#define PG_nosave_free 19 /* Free, should not be written */
-
+#define PG_uncached 20 /* Page has been mapped as uncached */

/*
* Global page accounting. One instance per CPU. Only unsigned longs are
@@ -301,6 +301,10 @@
#else
#define PageSwapCache(page) 0
#endif
+
+#define PageUncached(page) test_bit(PG_uncached, &(page)->flags)
+#define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags)
+#define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags)

struct page; /* forward declaration */

2005-02-22 22:42:36

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

On Tue, 2005-02-22 at 17:30 -0500, Jes Sorensen wrote:
>
> For userspace it's used by some of the MPI type apps in userland.

you got to be kidding. Why are these MPI apps accessing memory that the
kernel has mapped cached (eg ram) via /dev/mem?


(eg my proposal is to make /dev/mem to be just device memory not kernel
accessable ram; wouldn't that solve the entire issue cleanly ?)

2005-02-22 23:29:57

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

Jes Sorensen <[email protected]> wrote:
>
> After applying the clue 2x4 to my head a couple of times, I came up
> with this patch. Hopefully it will work a bit better ;-)
>

I know it's repetitious, but it's nice to maintain a changelog entry along
with the patch. Especially when seventy people have asked "wtf is this patch
for?".

Implementation-wise, do you really need to clone-and-own the mem.c
functions? Would it not be sufficient to do

ptr = arch_translate_mem_ptr(page, ptr);

inside mem.c?


> + * arch/ia64/kernel/mem.c
> ...
> +extern loff_t memory_lseek(struct file * file, loff_t offset, int orig);
> +extern int mmap_kmem(struct file * file, struct vm_area_struct * vma);
> +extern int open_port(struct inode * inode, struct file * filp);
> +

Please find a .h file for the function prototypes.

2005-02-23 00:48:57

by Dave Hansen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

On Tue, 2005-02-22 at 16:38 -0500, Jes Sorensen wrote:
> >>>>> "Dave" == Dave Hansen <[email protected]> writes:
>
> Dave> I was talking with Nigel Cunningham about doing something a
> Dave> little different from the classic page flag bits when the number
> Dave> of users is restricted and performance isn't ultra-critical.
> Dave> Would something like this work for you, instead of using a real
> Dave> page->flags bit for PG_cached?
>
> Just took a quick look at this and it looks a bit heavy for our
> use. We are only looking at a small number of pages. However I could
> imagine future cases where performance may be more critical.

If it's a quite small number (or range) of pages, perhaps a short
list_head list would suffice. It would sure beat consuming a page flag.

-- Dave

2005-02-23 08:12:15

by Jes Sorensen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

>>>>> "Arjan" == Arjan van de Ven <[email protected]> writes:

Arjan> On Tue, 2005-02-22 at 17:30 -0500, Jes Sorensen wrote:
>> For userspace it's used by some of the MPI type apps in userland.

Arjan> you got to be kidding. Why are these MPI apps accessing memory
Arjan> that the kernel has mapped cached (eg ram) via /dev/mem?

Oh sorry, I think we're misunderstanding eachother. /dev/mem is used
by lcrash.

Arjan> (eg my proposal is to make /dev/mem to be just device memory
Arjan> not kernel accessable ram; wouldn't that solve the entire issue
Arjan> cleanly ?)

It would kill lcrash support, but sure it would solve this specific
problem. However what happens if someone wants to share say some
texture ram between the kernel and a video card and that has to be
mapped uncached? Though up example here though.

Cheers,
Jes

2005-02-23 08:24:56

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

> However what happens if someone wants to share say some
> texture ram between the kernel and a video card and that has to be
> mapped uncached? Though up example here though.

also that's surely supposed to be controlled by some kernel driver,
which in turn can and should export it's own mmap instead with all the
proper attributes...


2005-02-23 10:02:16

by Jes Sorensen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

>>>>> "Andrew" == Andrew Morton <[email protected]> writes:

Andrew> Jes Sorensen <[email protected]> wrote:
>> After applying the clue 2x4 to my head a couple of times, I came
>> up with this patch. Hopefully it will work a bit better ;-)
>>

Andrew> I know it's repetitious, but it's nice to maintain a changelog
Andrew> entry along with the patch. Especially when seventy people
Andrew> have asked "wtf is this patch for?".

True, that got lost in the multi-iteration game. Added one now.

Andrew> Implementation-wise, do you really need to clone-and-own the
Andrew> mem.c functions? Would it not be sufficient to do

Andrew> ptr = arch_translate_mem_ptr(page, ptr);

Andrew> inside mem.c?

I tried to avoid making too many changes to drivers/char/mem.c but on
the long run this is probably cleaner. It makes it slightly more
complex on all architectures to do large read/write calls on /dev/mem
but it's not exactly a performance path.

>> + * arch/ia64/kernel/mem.c ... +extern loff_t memory_lseek(struct
>> file * file, loff_t offset, int orig); +extern int mmap_kmem(struct
>> file * file, struct vm_area_struct * vma); +extern int
>> open_port(struct inode * inode, struct file * filp); +

Andrew> Please find a .h file for the function prototypes.

All gone with the merge into drivers/char/mem.c

Note, I put the macros in include/asm-ia64/uaccess.h to avoid all hell
breaking lose in the ia64 build process which generates offsets.h. We
can introduce a new header file but it seems a bit overkill imho.

Cheers,
Jes

Convert /dev/mem read/write calls to use arch_translate_mem_ptr if
available. Needed on ia64 for pages converted fo uncached mappings to
avoid it being accessed in cached mode after the conversion which can
lead to memory corruption. Introduces PG_uncached page flag for
marking pages uncached.

Also folds do_write_mem into write_mem as it was it's only user.

Signed-off-by: Jes Sorensen <[email protected]>


diff -urN -X /usr/people/jes/exclude-linux --exclude=shubio.h linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c linux-2.6.11-rc3-mm2-2/drivers/char/mem.c
--- linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c 2005-02-16 11:20:55 -08:00
+++ linux-2.6.11-rc3-mm2-2/drivers/char/mem.c 2005-02-23 01:51:54 -08:00
@@ -125,39 +125,6 @@
}
return 1;
}
-static ssize_t do_write_mem(void *p, unsigned long realp,
- const char __user * buf, size_t count, loff_t *ppos)
-{
- ssize_t written;
- unsigned long copied;
-
- written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
- /* we don't have page 0 mapped on sparc and m68k.. */
- if (realp < PAGE_SIZE) {
- unsigned long sz = PAGE_SIZE-realp;
- if (sz > count) sz = count;
- /* Hmm. Do something? */
- buf+=sz;
- p+=sz;
- count-=sz;
- written+=sz;
- }
-#endif
- if (!range_is_allowed(realp, realp+count))
- return -EPERM;
- copied = copy_from_user(p, buf, count);
- if (copied) {
- ssize_t ret = written + (count - copied);
-
- if (ret)
- return ret;
- return -EFAULT;
- }
- written += count;
- *ppos += written;
- return written;
-}

#ifndef ARCH_HAS_DEV_MEM
/*
@@ -168,7 +135,9 @@
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
- ssize_t read;
+ ssize_t read, sz;
+ struct page *page;
+ char *ptr;

if (!valid_phys_addr_range(p, &count))
return -EFAULT;
@@ -176,7 +145,7 @@
#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
/* we don't have page 0 mapped on sparc and m68k.. */
if (p < PAGE_SIZE) {
- unsigned long sz = PAGE_SIZE-p;
+ sz = PAGE_SIZE - p;
if (sz > count)
sz = count;
if (sz > 0) {
@@ -189,9 +158,35 @@
}
}
#endif
- if (copy_to_user(buf, __va(p), count))
- return -EFAULT;
- read += count;
+
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = min(PAGE_SIZE, count);
+
+ page = pfn_to_page(p >> PAGE_SHIFT);
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+#ifdef ARCH_HAS_TRANSLATE_MEM_PTR
+ ptr = arch_translate_mem_ptr(page, p);
+#else
+ ptr = __va(p);
+#endif
+ if (copy_to_user(buf, ptr, sz))
+ return -EFAULT;
+ buf += sz;
+ p += sz;
+ count -= sz;
+ read += sz;
+ }
+
*ppos += read;
return read;
}
@@ -200,10 +195,70 @@
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
+ ssize_t written, sz;
+ unsigned long copied;
+ struct page *page;
+ void *ptr;

if (!valid_phys_addr_range(p, &count))
return -EFAULT;
- return do_write_mem(__va(p), p, buf, count, ppos);
+
+ if (!range_is_allowed(p, p + count))
+ return -EPERM;
+
+ written = 0;
+
+#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+ /* we don't have page 0 mapped on sparc and m68k.. */
+ if (p < PAGE_SIZE) {
+ unsigned long sz = PAGE_SIZE - p;
+ if (sz > count)
+ sz = count;
+ /* Hmm. Do something? */
+ buf += sz;
+ p += sz;
+ count -= sz;
+ written += sz;
+ }
+#endif
+
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = min(PAGE_SIZE, count);
+
+ page = pfn_to_page(p >> PAGE_SHIFT);
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+#ifdef ARCH_HAS_TRANSLATE_MEM_PTR
+ ptr = arch_translate_mem_ptr(page, p);
+#else
+ ptr = __va(p);
+#endif
+ copied = copy_from_user(ptr, buf, sz);
+ if (copied) {
+ ssize_t ret;
+
+ ret = written + (sz - copied);
+ if (ret)
+ return ret;
+ return -EFAULT;
+ }
+ buf += sz;
+ p += sz;
+ count -= sz;
+ written += sz;
+ }
+
+ *ppos += written;
+ return written;
}
#endif

diff -urN -X /usr/people/jes/exclude-linux --exclude=shubio.h linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/uaccess.h linux-2.6.11-rc3-mm2-2/include/asm-ia64/uaccess.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/uaccess.h 2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-2/include/asm-ia64/uaccess.h 2005-02-23 00:42:12 -08:00
@@ -35,6 +35,8 @@
#include <linux/compiler.h>
#include <linux/errno.h>
#include <linux/sched.h>
+#include <linux/page-flags.h>
+#include <linux/mm.h>

#include <asm/intrinsics.h>
#include <asm/pgtable.h>
@@ -365,6 +367,20 @@
return 1;
}
return 0;
+}
+
+#define ARCH_HAS_TRANSLATE_MEM_PTR 1
+static __inline__ char *
+arch_translate_mem_ptr (struct page *page, unsigned long p)
+{
+ char * ptr;
+
+ if (PageUncached(page))
+ ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+ else
+ ptr = __va(p);
+
+ return ptr;
}

#endif /* _ASM_IA64_UACCESS_H */
diff -urN -X /usr/people/jes/exclude-linux --exclude=shubio.h linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h linux-2.6.11-rc3-mm2-2/include/linux/page-flags.h
--- linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h 2005-02-16 11:20:57 -08:00
+++ linux-2.6.11-rc3-mm2-2/include/linux/page-flags.h 2005-02-22 13:40:22 -08:00
@@ -76,7 +76,7 @@
#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */
#define PG_reclaim 18 /* To be reclaimed asap */
#define PG_nosave_free 19 /* Free, should not be written */
-
+#define PG_uncached 20 /* Page has been mapped as uncached */

/*
* Global page accounting. One instance per CPU. Only unsigned longs are
@@ -301,6 +301,10 @@
#else
#define PageSwapCache(page) 0
#endif
+
+#define PageUncached(page) test_bit(PG_uncached, &(page)->flags)
+#define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags)
+#define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags)

struct page; /* forward declaration */

2005-02-23 22:34:48

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

> + page = pfn_to_page(p >> PAGE_SHIFT);
> + /*
> + * On ia64 if a page has been mapped somewhere as
> + * uncached, then it must also be accessed uncached
> + * by the kernel or data corruption may occur
> + */
> +#ifdef ARCH_HAS_TRANSLATE_MEM_PTR
> + ptr = arch_translate_mem_ptr(page, p);
> +#else
> + ptr = __va(p);
> +#endif

Please remove the ifdef by letting every architecture implement a
arch_translate_mem_ptr (and give it a saner name while you're at it).

Also shouldn't the pfn_to_page be done inside arch_translate_mem_ptr?
The struct page * isn't used anywhere else.

> + if (!range_is_allowed(p, p + count))

isn't the name a little too generic?

> +
> + written = 0;
> +
> +#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
> + /* we don't have page 0 mapped on sparc and m68k.. */
> + if (p < PAGE_SIZE) {
> + unsigned long sz = PAGE_SIZE - p;
> + if (sz > count)
> + sz = count;
> + /* Hmm. Do something? */
> + buf += sz;
> + p += sz;
> + count -= sz;
> + written += sz;
> + }
> +#endif

While you're at it replace the ifdef mania with a #ifdef
__HAVE_ARCH_PAGE_ZERO_MAPPED or something similar.

2005-02-24 16:19:52

by Jes Sorensen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

>>>>> "Christoph" == Christoph Hellwig <[email protected]> writes:

Christoph> Please remove the ifdef by letting every architecture
Christoph> implement a arch_translate_mem_ptr (and give it a saner
Christoph> name while you're at it).

Ok, I thought it was tidier the other way, but I am not biased.

Christoph> Also shouldn't the pfn_to_page be done inside
Christoph> arch_translate_mem_ptr? The struct page * isn't used
Christoph> anywhere else.

Done!

>> + if (!range_is_allowed(p, p + count))

Christoph> isn't the name a little too generic?

Actually, thats an old function, nothing I added - but I renamed it.

Christoph> While you're at it replace the ifdef mania with a #ifdef
Christoph> __HAVE_ARCH_PAGE_ZERO_MAPPED or something similar.

Using __ARCH_HAS_NO_PAGE_ZERO_MAPPED for it now.

Cheers,
Jes

Convert /dev/mem read/write calls to use arch_translate_mem_ptr if
available. Needed on ia64 for pages converted fo uncached mappings to
avoid it being accessed in cached mode after the conversion which can
lead to memory corruption. Introduces PG_uncached page flag for
marking pages uncached.

Also folds do_write_mem into write_mem as it was it's only user.

Use __ARCH_HAS_NO_PAGE_ZERO_MAPPED for architectures to indicate they
require magic handling of the zero page (Sparc and m68k).

Signed-off-by: Jes Sorensen <[email protected]>


diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c linux-2.6.11-rc3-mm2-3/drivers/char/mem.c
--- linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c 2005-02-16 11:20:55 -08:00
+++ linux-2.6.11-rc3-mm2-3/drivers/char/mem.c 2005-02-24 04:45:34 -08:00
@@ -125,39 +125,6 @@
}
return 1;
}
-static ssize_t do_write_mem(void *p, unsigned long realp,
- const char __user * buf, size_t count, loff_t *ppos)
-{
- ssize_t written;
- unsigned long copied;
-
- written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
- /* we don't have page 0 mapped on sparc and m68k.. */
- if (realp < PAGE_SIZE) {
- unsigned long sz = PAGE_SIZE-realp;
- if (sz > count) sz = count;
- /* Hmm. Do something? */
- buf+=sz;
- p+=sz;
- count-=sz;
- written+=sz;
- }
-#endif
- if (!range_is_allowed(realp, realp+count))
- return -EPERM;
- copied = copy_from_user(p, buf, count);
- if (copied) {
- ssize_t ret = written + (count - copied);
-
- if (ret)
- return ret;
- return -EFAULT;
- }
- written += count;
- *ppos += written;
- return written;
-}

#ifndef ARCH_HAS_DEV_MEM
/*
@@ -168,15 +135,16 @@
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
- ssize_t read;
+ ssize_t read, sz;
+ char *ptr;

if (!valid_phys_addr_range(p, &count))
return -EFAULT;
read = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
/* we don't have page 0 mapped on sparc and m68k.. */
if (p < PAGE_SIZE) {
- unsigned long sz = PAGE_SIZE-p;
+ sz = PAGE_SIZE - p;
if (sz > count)
sz = count;
if (sz > 0) {
@@ -189,9 +157,31 @@
}
}
#endif
- if (copy_to_user(buf, __va(p), count))
- return -EFAULT;
- read += count;
+
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = min(PAGE_SIZE, count);
+
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ ptr = xlate_dev_mem_ptr(p);
+
+ if (copy_to_user(buf, ptr, sz))
+ return -EFAULT;
+ buf += sz;
+ p += sz;
+ count -= sz;
+ read += sz;
+ }
+
*ppos += read;
return read;
}
@@ -200,10 +190,65 @@
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
+ ssize_t written, sz;
+ unsigned long copied;
+ void *ptr;

if (!valid_phys_addr_range(p, &count))
return -EFAULT;
- return do_write_mem(__va(p), p, buf, count, ppos);
+
+ if (!range_is_allowed(p, p + count))
+ return -EPERM;
+
+ written = 0;
+
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
+ /* we don't have page 0 mapped on sparc and m68k.. */
+ if (p < PAGE_SIZE) {
+ unsigned long sz = PAGE_SIZE - p;
+ if (sz > count)
+ sz = count;
+ /* Hmm. Do something? */
+ buf += sz;
+ p += sz;
+ count -= sz;
+ written += sz;
+ }
+#endif
+
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = min(PAGE_SIZE, count);
+
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ ptr = xlate_dev_mem_ptr(p);
+
+ copied = copy_from_user(ptr, buf, sz);
+ if (copied) {
+ ssize_t ret;
+
+ ret = written + (sz - copied);
+ if (ret)
+ return ret;
+ return -EFAULT;
+ }
+ buf += sz;
+ p += sz;
+ count -= sz;
+ written += sz;
+ }
+
+ *ppos += written;
+ return written;
}
#endif

@@ -303,7 +348,7 @@
if (count > (unsigned long) high_memory - p)
read = (unsigned long) high_memory - p;

-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
/* we don't have page 0 mapped on sparc and m68k.. */
if (p < PAGE_SIZE && read > 0) {
size_t tmp = PAGE_SIZE - p;
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-alpha/io.h linux-2.6.11-rc3-mm2-3/include/asm-alpha/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-alpha/io.h 2004-12-24 13:35:28 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-alpha/io.h 2005-02-24 04:41:25 -08:00
@@ -666,6 +666,12 @@
#define writeq writeq
#define readq readq

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* __KERNEL__ */

#endif /* __ALPHA_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-arm/io.h linux-2.6.11-rc3-mm2-3/include/asm-arm/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-arm/io.h 2004-12-24 13:34:30 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-arm/io.h 2005-02-24 04:41:48 -08:00
@@ -271,5 +271,11 @@
#define BIOVEC_MERGEABLE(vec1, vec2) \
((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* __KERNEL__ */
#endif /* __ASM_ARM_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-arm26/io.h linux-2.6.11-rc3-mm2-3/include/asm-arm26/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-arm26/io.h 2004-12-24 13:35:00 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-arm26/io.h 2005-02-24 04:41:56 -08:00
@@ -420,5 +420,11 @@
#define BIOVEC_MERGEABLE(vec1, vec2) \
((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* __KERNEL__ */
#endif /* __ASM_ARM_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-cris/io.h linux-2.6.11-rc3-mm2-3/include/asm-cris/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-cris/io.h 2004-12-24 13:35:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-cris/io.h 2005-02-24 04:42:07 -08:00
@@ -86,4 +86,10 @@
#define outsw(x,y,z)
#define outsl(x,y,z)

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-frv/io.h linux-2.6.11-rc3-mm2-3/include/asm-frv/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-frv/io.h 2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-frv/io.h 2005-02-24 04:42:19 -08:00
@@ -273,6 +273,13 @@
__asm__ __volatile__ ("membar" : : :"memory");
}

+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* __KERNEL__ */

#endif /* _ASM_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-h8300/io.h linux-2.6.11-rc3-mm2-3/include/asm-h8300/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-h8300/io.h 2004-12-24 13:34:58 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-h8300/io.h 2005-02-24 04:42:32 -08:00
@@ -317,6 +317,12 @@
#define virt_to_bus virt_to_phys
#define bus_to_virt phys_to_virt

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* __KERNEL__ */

#endif /* _H8300_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-i386/io.h linux-2.6.11-rc3-mm2-3/include/asm-i386/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-i386/io.h 2004-12-24 13:35:40 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-i386/io.h 2005-02-24 04:41:07 -08:00
@@ -49,6 +49,12 @@

#include <linux/vmalloc.h>

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
/**
* virt_to_phys - map virtual addresses to physical
* @address: address to remap
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/uaccess.h linux-2.6.11-rc3-mm2-3/include/asm-ia64/uaccess.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/uaccess.h 2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-ia64/uaccess.h 2005-02-24 04:47:01 -08:00
@@ -35,6 +35,8 @@
#include <linux/compiler.h>
#include <linux/errno.h>
#include <linux/sched.h>
+#include <linux/page-flags.h>
+#include <linux/mm.h>

#include <asm/intrinsics.h>
#include <asm/pgtable.h>
@@ -365,6 +367,22 @@
return 1;
}
return 0;
+}
+
+#define ARCH_HAS_TRANSLATE_MEM_PTR 1
+static __inline__ char *
+xlate_dev_mem_ptr (unsigned long p)
+{
+ struct page *page;
+ char * ptr;
+
+ page = pfn_to_page(p >> PAGE_SHIFT);
+ if (PageUncached(page))
+ ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+ else
+ ptr = __va(p);
+
+ return ptr;
}

#endif /* _ASM_IA64_UACCESS_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-m32r/io.h linux-2.6.11-rc3-mm2-3/include/asm-m32r/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-m32r/io.h 2004-12-24 13:34:45 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-m32r/io.h 2005-02-24 04:43:02 -08:00
@@ -216,6 +216,12 @@
memcpy((void __force *) dst, src, count);
}

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* __KERNEL__ */

#endif /* _ASM_M32R_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-m68k/io.h linux-2.6.11-rc3-mm2-3/include/asm-m68k/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-m68k/io.h 2004-12-24 13:35:00 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-m68k/io.h 2005-02-24 04:43:18 -08:00
@@ -359,4 +359,13 @@
#endif

#endif /* __KERNEL__ */
+
+#define __ARCH_HAS_NO_PAGE_ZERO_MAPPED 1
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* _IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-m68knommu/io.h linux-2.6.11-rc3-mm2-3/include/asm-m68knommu/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-m68knommu/io.h 2004-12-24 13:34:32 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-m68knommu/io.h 2005-02-24 04:43:31 -08:00
@@ -187,6 +187,12 @@
#define virt_to_bus virt_to_phys
#define bus_to_virt phys_to_virt

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* __KERNEL__ */

#endif /* _M68KNOMMU_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-mips/io.h linux-2.6.11-rc3-mm2-3/include/asm-mips/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-mips/io.h 2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-mips/io.h 2005-02-24 04:43:42 -08:00
@@ -616,4 +616,10 @@
#define csr_out32(v,a) (*(volatile u32 *)((unsigned long)(a) + __CSR_32_ADJUST) = (v))
#define csr_in32(a) (*(volatile u32 *)((unsigned long)(a) + __CSR_32_ADJUST))

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* _ASM_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-parisc/io.h linux-2.6.11-rc3-mm2-3/include/asm-parisc/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-parisc/io.h 2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-parisc/io.h 2005-02-24 04:43:53 -08:00
@@ -403,4 +403,10 @@

#include <asm-generic/iomap.h>

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-ppc/io.h linux-2.6.11-rc3-mm2-3/include/asm-ppc/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-ppc/io.h 2005-02-16 11:20:57 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-ppc/io.h 2005-02-24 04:44:04 -08:00
@@ -549,4 +549,10 @@
#include <asm/mpc8260_pci9.h>
#endif

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* __KERNEL__ */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-ppc64/io.h linux-2.6.11-rc3-mm2-3/include/asm-ppc64/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-ppc64/io.h 2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-ppc64/io.h 2005-02-24 04:44:11 -08:00
@@ -442,6 +442,12 @@
extern int check_legacy_ioport(unsigned long base_port);


+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* __KERNEL__ */

#endif /* _PPC64_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-s390/io.h linux-2.6.11-rc3-mm2-3/include/asm-s390/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-s390/io.h 2004-12-24 13:35:01 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-s390/io.h 2005-02-24 04:44:21 -08:00
@@ -107,6 +107,12 @@

#define mmiowb()

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* __KERNEL__ */

#endif
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-sparc/io.h linux-2.6.11-rc3-mm2-3/include/asm-sparc/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-sparc/io.h 2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-sparc/io.h 2005-02-24 04:44:28 -08:00
@@ -274,4 +274,12 @@

#endif

+#define __ARCH_HAS_NO_PAGE_ZERO_MAPPED 1
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* !(__SPARC_IO_H) */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-sparc64/io.h linux-2.6.11-rc3-mm2-3/include/asm-sparc64/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-sparc64/io.h 2004-12-24 13:35:25 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-sparc64/io.h 2005-02-24 04:44:38 -08:00
@@ -485,6 +485,12 @@
#define dma_cache_wback(_start,_size) do { } while (0)
#define dma_cache_wback_inv(_start,_size) do { } while (0)

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif

#endif /* !(__SPARC64_IO_H) */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-um/io.h linux-2.6.11-rc3-mm2-3/include/asm-um/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-um/io.h 2004-12-24 13:35:28 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-um/io.h 2005-02-24 04:44:50 -08:00
@@ -22,4 +22,10 @@
return __va(address);
}

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-v850/io.h linux-2.6.11-rc3-mm2-3/include/asm-v850/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-v850/io.h 2004-12-24 13:34:00 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-v850/io.h 2005-02-24 04:44:59 -08:00
@@ -119,4 +119,10 @@
#define memcpy_fromio(dst, src, len) memcpy (dst, (void *)src, len)
#define memcpy_toio(dst, src, len) memcpy ((void *)dst, src, len)

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* __V850_IO_H__ */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-x86_64/io.h linux-2.6.11-rc3-mm2-3/include/asm-x86_64/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-x86_64/io.h 2005-02-16 11:20:24 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-x86_64/io.h 2005-02-24 04:45:07 -08:00
@@ -329,6 +329,12 @@
extern int iommu_bio_merge;
#define BIO_VMERGE_BOUNDARY iommu_bio_merge

+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
#endif /* __KERNEL__ */

#endif
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h linux-2.6.11-rc3-mm2-3/include/linux/page-flags.h
--- linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h 2005-02-16 11:20:57 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/linux/page-flags.h 2005-02-24 02:08:57 -08:00
@@ -76,7 +76,7 @@
#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */
#define PG_reclaim 18 /* To be reclaimed asap */
#define PG_nosave_free 19 /* Free, should not be written */
-
+#define PG_uncached 20 /* Page has been mapped as uncached */

/*
* Global page accounting. One instance per CPU. Only unsigned longs are
@@ -301,6 +301,10 @@
#else
#define PageSwapCache(page) 0
#endif
+
+#define PageUncached(page) test_bit(PG_uncached, &(page)->flags)
+#define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags)
+#define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags)

struct page; /* forward declaration */

2005-03-03 11:44:35

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

[email protected] (Jes Sorensen) wrote:
>
> This patch introduces ia64 specific read/write handlers for /dev/mem
> access which is needed to avoid uncached pages to be accessed through
> the cached kernel window which can lead to random corruption.

This patch causes hiccups on my ia32e box.

linux:/home/akpm# /usr/sbin/hwscan --isapnp
zsh: 7528 segmentation fault /usr/sbin/hwscan --isapnp
linux:/home/akpm# /usr/sbin/hwscan --pci
zsh: 7529 segmentation fault /usr/sbin/hwscan --pci
linux:/home/akpm# /usr/sbin/hwscan --block
zsh: 7530 segmentation fault /usr/sbin/hwscan --block
linux:/home/akpm# /usr/sbin/hwscan --floppy
zsh: 7533 segmentation fault /usr/sbin/hwscan --floppy

strace ends with:

open("/proc/apm", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/dev/mem", O_RDONLY) = 3
lseek(3, 1024, SEEK_SET) = 1024
read(3, 0x50a080, 256) = -1 EFAULT (Bad address)
close(3) = 0
--- SIGSEGV (Segmentation fault) @ 0 (0) ---
+++ killed by SIGSEGV +++



2005-03-03 12:54:32

by Jes Sorensen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

>>>>> "Andrew" == Andrew Morton <[email protected]> writes:

Andrew> [email protected] (Jes Sorensen) wrote:
>> This patch introduces ia64 specific read/write handlers for
>> /dev/mem access which is needed to avoid uncached pages to be
>> accessed through the cached kernel window which can lead to random
>> corruption.

Andrew> This patch causes hiccups on my ia32e box.

Andrew> linux:/home/akpm# /usr/sbin/hwscan --isapnp zsh: 7528
Andrew> segmentation fault

Weird, I'll take a look.

Thanks,
Jes

2005-03-04 12:35:44

by Jes Sorensen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

>>>>> "Jes" == Jes Sorensen <[email protected]> writes:

>>>>> "Andrew" == Andrew Morton <[email protected]> writes:
Andrew> This patch causes hiccups on my ia32e box.

Andrew> linux:/home/akpm# /usr/sbin/hwscan --isapnp zsh: 7528
Andrew> segmentation fault

Jes> Weird, I'll take a look.

-EPROGRAMMERISANIDIOT ... try this.

Cheers,
Jes

/dev/mem handle read/write case where ppos+count < PAGE_SIZE

Signed-off-by: Jes Sorensen <[email protected]>


--- ../old/linux-2.6.11-rc5-mm1/drivers/char/mem.c 2005-03-04 07:20:51.000000000 -0500
+++ drivers/char/mem.c 2005-03-04 07:22:20.000000000 -0500
@@ -152,7 +152,9 @@
if (-p & (PAGE_SIZE - 1))
sz = -p & (PAGE_SIZE - 1);
else
- sz = min_t(unsigned long, PAGE_SIZE, count);
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, count);

/*
* On ia64 if a page has been mapped somewhere as
@@ -207,7 +209,9 @@
if (-p & (PAGE_SIZE - 1))
sz = -p & (PAGE_SIZE - 1);
else
- sz = min_t(unsigned long, PAGE_SIZE, count);
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, count);

/*
* On ia64 if a page has been mapped somewhere as
@@ -353,7 +357,9 @@
if (-p & (PAGE_SIZE - 1))
sz = -p & (PAGE_SIZE - 1);
else
- sz = min_t(unsigned long, PAGE_SIZE, count);
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, count);

/*
* On ia64 if a page has been mapped somewhere as
@@ -430,7 +436,9 @@
if (-realp & (PAGE_SIZE - 1))
sz = -realp & (PAGE_SIZE - 1);
else
- sz = min_t(unsigned long, PAGE_SIZE, count);
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, count);

/*
* On ia64 if a page has been mapped somewhere as

2005-03-10 06:56:15

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

Jes Sorensen <[email protected]> wrote:
>
> Convert /dev/mem read/write calls to use arch_translate_mem_ptr if
> available. Needed on ia64 for pages converted fo uncached mappings to
> avoid it being accessed in cached mode after the conversion which can
> lead to memory corruption. Introduces PG_uncached page flag for
> marking pages uncached.

For some reason this patch still gives me the creeps. Maybe it's because
we lose a page flag for something so obscure.

Nothing ever clears PG_uncached. We'll end up with every page in the
machine marked as being uncached.

But then, nothing ever sets PG_uncached, either. Is there some patch which
you're hiding from me?

If a page is marked uncached then it'll remain marked as uncached even
after it's unmapped. Or will it? Would like to see the other patch, please.

We should add PG_uncached checks to the page allocator. Is this OK?


--- 25/mm/page_alloc.c~ia64-specific-dev-mem-handlers-checks 2005-03-09 22:53:12.000000000 -0800
+++ 25-akpm/mm/page_alloc.c 2005-03-09 22:53:44.000000000 -0800
@@ -108,6 +108,7 @@ static void bad_page(const char *functio
1 << PG_active |
1 << PG_dirty |
1 << PG_swapcache |
+ 1 << PG_uncached |
1 << PG_writeback);
set_page_count(page, 0);
reset_page_mapcount(page);
@@ -321,6 +322,7 @@ static inline void free_pages_check(cons
1 << PG_reclaim |
1 << PG_slab |
1 << PG_swapcache |
+ 1 << PG_uncached |
1 << PG_writeback )))
bad_page(function, page);
if (PageDirty(page))
@@ -446,6 +448,7 @@ static void prep_new_page(struct page *p
1 << PG_dirty |
1 << PG_reclaim |
1 << PG_swapcache |
+ 1 << PG_uncached |
1 << PG_writeback )))
bad_page(__FUNCTION__, page);

_

2005-03-10 13:49:39

by Jes Sorensen

[permalink] [raw]
Subject: Re: [patch -mm series] ia64 specific /dev/mem handlers

>>>>> "Andrew" == Andrew Morton <[email protected]> writes:

Andrew> Jes Sorensen <[email protected]> wrote:
>> Convert /dev/mem read/write calls to use arch_translate_mem_ptr if
>> available. Needed on ia64 for pages converted fo uncached mappings
>> to avoid it being accessed in cached mode after the conversion
>> which can lead to memory corruption. Introduces PG_uncached page
>> flag for marking pages uncached.

Andrew> For some reason this patch still gives me the creeps. Maybe
Andrew> it's because we lose a page flag for something so obscure.

Andrew> Nothing ever clears PG_uncached. We'll end up with every page
Andrew> in the machine marked as being uncached.

Actually there's restrictions to how many pages are getting converted
as converting pages over from cached to uncached isn't trivial on ia64.

Andrew> But then, nothing ever sets PG_uncached, either. Is there
Andrew> some patch which you're hiding from me?

Actually I posted that earlier, but it must have gotten lost in the
noise. It's part of the genalloc/mspec patchset. I'll send it to you
directly.

Andrew> If a page is marked uncached then it'll remain marked as
Andrew> uncached even after it's unmapped. Or will it? Would like to
Andrew> see the other patch, please.

Coming your way in a jiffy.

Andrew> We should add PG_uncached checks to the page allocator. Is
Andrew> this OK?

I don't see any problems with that. The way it's meant to be used is
that once pages are converted over, they don't go back into the
allocator.

Cheers,
Jes