Following some discussion with Matt, Andrew and Chris, here is a recast
of the 32-bit MMIO patch I posted the other day. The routine is now
named memcpy_toio32, and is provided in generic and x86_64-optimised
forms.
I haven't added a memcpy_fromio32, or routines for other access sizes,
because our hardware doesn't need them. If someone wants them for
reasons of symmetry, I can introduce them.
Signed-off-by: Bryan O'Sullivan <[email protected]>
This routine is an arch-independent building block for memcpy_toio32.
It copies data to a memory-mapped I/O region, using 32-bit accesses.
This style of access is required by some devices.
Signed-off-by: Bryan O'Sullivan <[email protected]>
diff -r 789a24638663 -r 7b7b442a4d63 include/asm-generic/iomap.h
--- a/include/asm-generic/iomap.h Tue Dec 27 09:27:10 2005 +0800
+++ b/include/asm-generic/iomap.h Tue Dec 27 15:41:48 2005 -0800
@@ -56,6 +56,12 @@
extern void fastcall iowrite16_rep(void __iomem *port, const void *buf, unsigned long count);
extern void fastcall iowrite32_rep(void __iomem *port, const void *buf, unsigned long count);
+/*
+ * MMIO copy routines. These are guaranteed to operate in units denoted
+ * by their names. This style of operation is required by some devices.
+ */
+extern void fastcall __memcpy_toio32(volatile void __iomem *to, const void *from, size_t count);
+
/* Create a virtual mapping cookie for an IO port range */
extern void __iomem *ioport_map(unsigned long port, unsigned int nr);
extern void ioport_unmap(void __iomem *);
diff -r 789a24638663 -r 7b7b442a4d63 lib/iomap.c
--- a/lib/iomap.c Tue Dec 27 09:27:10 2005 +0800
+++ b/lib/iomap.c Tue Dec 27 15:41:48 2005 -0800
@@ -187,6 +187,22 @@
EXPORT_SYMBOL(iowrite16_rep);
EXPORT_SYMBOL(iowrite32_rep);
+/*
+ * Copy data to an MMIO region. MMIO space accesses are performed
+ * in the sizes indicated in each function's name.
+ */
+void fastcall __memcpy_toio32(volatile void __iomem *d, const void *s, size_t count)
+{
+ volatile u32 __iomem *dst = d;
+ const u32 *src = s;
+
+ while (--count >= 0) {
+ __raw_writel(*src++, dst++);
+ }
+}
+
+EXPORT_SYMBOL_GPL(__memcpy_toio32);
+
/* Create a virtual mapping cookie for an IO port range */
void __iomem *ioport_map(unsigned long port, unsigned int nr)
{
Introduce an x86_64-specific memcpy32 routine. The routine is similar
to memcpy, but is guaranteed to work in units of 32 bits at a time.
Signed-off-by: Bryan O'Sullivan <[email protected]>
diff -r 7b7b442a4d63 -r 042b7d9004ac arch/x86_64/kernel/x8664_ksyms.c
--- a/arch/x86_64/kernel/x8664_ksyms.c Tue Dec 27 15:41:48 2005 -0800
+++ b/arch/x86_64/kernel/x8664_ksyms.c Tue Dec 27 15:41:48 2005 -0800
@@ -150,6 +150,8 @@
extern void * memcpy(void *,const void *,__kernel_size_t);
extern void * __memcpy(void *,const void *,__kernel_size_t);
+extern void memcpy32(void *,const void *,__kernel_size_t);
+
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(strlen);
EXPORT_SYMBOL(memmove);
@@ -164,6 +166,8 @@
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL_GPL(memcpy32);
+
#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
/* prototypes are wrong, these are assembly with custom calling functions */
extern void rwsem_down_read_failed_thunk(void);
diff -r 7b7b442a4d63 -r 042b7d9004ac arch/x86_64/lib/Makefile
--- a/arch/x86_64/lib/Makefile Tue Dec 27 15:41:48 2005 -0800
+++ b/arch/x86_64/lib/Makefile Tue Dec 27 15:41:48 2005 -0800
@@ -9,4 +9,4 @@
lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \
usercopy.o getuser.o putuser.o \
thunk.o clear_page.o copy_page.o bitstr.o bitops.o
-lib-y += memcpy.o memmove.o memset.o copy_user.o
+lib-y += memcpy.o memcpy32.o memmove.o memset.o copy_user.o
diff -r 7b7b442a4d63 -r 042b7d9004ac include/asm-x86_64/string.h
--- a/include/asm-x86_64/string.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-x86_64/string.h Tue Dec 27 15:41:48 2005 -0800
@@ -45,6 +45,8 @@
#define __HAVE_ARCH_MEMMOVE
void * memmove(void * dest,const void *src,size_t count);
+void memcpy32(void *dst, const void *src, size_t count);
+
/* Use C out of line version for memcmp */
#define memcmp __builtin_memcmp
int memcmp(const void * cs,const void * ct,size_t count);
diff -r 7b7b442a4d63 -r 042b7d9004ac arch/x86_64/lib/memcpy32.S
--- /dev/null Thu Jan 1 00:00:00 1970 +0000
+++ b/arch/x86_64/lib/memcpy32.S Tue Dec 27 15:41:48 2005 -0800
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2003, 2004, 2005 PathScale, Inc.
+ */
+
+/*
+ * memcpy32 - Copy a memory block, 32 bits at a time.
+ *
+ * Count is number of dwords; it need not be a qword multiple.
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ */
+
+ .globl memcpy32
+memcpy32:
+ movl %edx,%ecx
+ shrl $1,%ecx
+ andl $1,%edx
+ rep
+ movsq
+ movl %edx,%ecx
+ rep
+ movsd
+ ret
Most arches use the generic __memcpy_toio32 routine, while x86_64
uses memcpy32, which is substantially faster.
Signed-off-by: Bryan O'Sullivan <[email protected]>
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-alpha/io.h
--- a/include/asm-alpha/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-alpha/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -504,6 +504,8 @@
extern void memcpy_toio(volatile void __iomem *, const void *, long);
extern void _memset_c_io(volatile void __iomem *, unsigned long, long);
+#define memcpy_toio32 __memcpy_toio32
+
static inline void memset_io(volatile void __iomem *addr, u8 c, long len)
{
_memset_c_io(addr, 0x0101010101010101UL * c, len);
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-arm/io.h
--- a/include/asm-arm/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-arm/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -184,6 +184,8 @@
#define memset_io(c,v,l) _memset_io(__mem_pci(c),(v),(l))
#define memcpy_fromio(a,c,l) _memcpy_fromio((a),__mem_pci(c),(l))
#define memcpy_toio(c,a,l) _memcpy_toio(__mem_pci(c),(a),(l))
+
+#define memcpy_toio32 __memcpy_toio32
#define eth_io_copy_and_sum(s,c,l,b) \
eth_copy_and_sum((s),__mem_pci(c),(l),(b))
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-cris/io.h
--- a/include/asm-cris/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-cris/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -121,6 +121,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+#define memcpy_toio32 __memcpy_toio32
+
/*
* Again, CRIS does not require mem IO specific function.
*/
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-frv/io.h
--- a/include/asm-frv/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-frv/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -124,6 +124,8 @@
memcpy((void __force *) dst, src, count);
}
+#define memcpy_toio32 __memcpy_toio32
+
static inline uint8_t inb(unsigned long addr)
{
return __builtin_read8((void *)addr);
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-h8300/io.h
--- a/include/asm-h8300/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-h8300/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -209,6 +209,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+#define memcpy_toio32 __memcpy_toio32
+
#define mmiowb()
#define inb(addr) ((h8300_buswidth(addr))?readw((addr) & ~1) & 0xff:readb(addr))
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-i386/io.h
--- a/include/asm-i386/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-i386/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -203,6 +203,9 @@
{
__memcpy((void __force *) dst, src, count);
}
+
+#define memcpy_toio32 __memcpy_toio32
+
/*
* ISA space is 'always mapped' on a typical x86 system, no need to
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-ia64/io.h
--- a/include/asm-ia64/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-ia64/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -443,6 +443,8 @@
extern void memcpy_toio(volatile void __iomem *dst, const void *src, long n);
extern void memset_io(volatile void __iomem *s, int c, long n);
+#define memcpy_toio32 __memcpy_toio32
+
#define dma_cache_inv(_start,_size) do { } while (0)
#define dma_cache_wback(_start,_size) do { } while (0)
#define dma_cache_wback_inv(_start,_size) do { } while (0)
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-m32r/io.h
--- a/include/asm-m32r/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-m32r/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -216,6 +216,8 @@
memcpy((void __force *) dst, src, count);
}
+#define memcpy_toio32 __memcpy_toio32
+
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-m68knommu/io.h
--- a/include/asm-m68knommu/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-m68knommu/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -113,6 +113,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+#define memcpy_toio32 __memcpy_toio32
+
#define inb(addr) readb(addr)
#define inw(addr) readw(addr)
#define inl(addr) readl(addr)
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-mips/io.h
--- a/include/asm-mips/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-mips/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -534,6 +534,8 @@
memcpy((void __force *) dst, src, count);
}
+#define memcpy_toio32 __memcpy_toio32
+
/*
* Memory Mapped I/O
*/
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-parisc/io.h
--- a/include/asm-parisc/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-parisc/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -294,6 +294,8 @@
void memcpy_fromio(void *dst, const volatile void __iomem *src, int count);
void memcpy_toio(volatile void __iomem *dst, const void *src, int count);
+#define memcpy_toio32 __memcpy_toio32
+
/* Support old drivers which don't ioremap.
* NB this interface is scheduled to disappear in 2.5
*/
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-powerpc/io.h
--- a/include/asm-powerpc/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-powerpc/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -63,6 +63,8 @@
#define memcpy_fromio(a,b,c) iSeries_memcpy_fromio((a), (b), (c))
#define memcpy_toio(a,b,c) iSeries_memcpy_toio((a), (b), (c))
+#define memcpy_toio32 __memcpy_toio32
+
#define inb(addr) readb(((void __iomem *)(long)(addr)))
#define inw(addr) readw(((void __iomem *)(long)(addr)))
#define inl(addr) readl(((void __iomem *)(long)(addr)))
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-ppc/io.h
--- a/include/asm-ppc/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-ppc/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -367,6 +367,8 @@
}
#endif
+#define memcpy_toio32 __memcpy_toio32
+
#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(void __iomem *)(b),(c),(d))
/*
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-s390/io.h
--- a/include/asm-s390/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-s390/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -99,6 +99,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),__io_virt(b),(c))
#define memcpy_toio(a,b,c) memcpy(__io_virt(a),(b),(c))
+#define memcpy_toio32 __memcpy_toio32
+
#define inb_p(addr) readb(addr)
#define inb(addr) readb(addr)
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-sh/io.h
--- a/include/asm-sh/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-sh/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -177,6 +177,8 @@
extern void memcpy_toio(unsigned long, const void *, unsigned long);
extern void memset_io(unsigned long, int, unsigned long);
+#define memcpy_toio32 __memcpy_toio32
+
/* SuperH on-chip I/O functions */
static __inline__ unsigned char ctrl_inb(unsigned long addr)
{
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-sh64/io.h
--- a/include/asm-sh64/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-sh64/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -125,6 +125,8 @@
void memcpy_toio(void __iomem *to, const void *from, long count);
void memcpy_fromio(void *to, void __iomem *from, long count);
+
+#define memcpy_toio32 __memcpy_toio32
#define mmiowb()
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-sparc/io.h
--- a/include/asm-sparc/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-sparc/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -239,6 +239,8 @@
#define memcpy_toio(d,s,sz) _memcpy_toio(d,s,sz)
+#define memcpy_toio32 __memcpy_toio32
+
#ifdef __KERNEL__
/*
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-sparc64/io.h
--- a/include/asm-sparc64/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-sparc64/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -440,6 +440,8 @@
#define memcpy_toio(d,s,sz) _memcpy_toio(d,s,sz)
+#define memcpy_toio32 __memcpy_toio32
+
static inline int check_signature(void __iomem *io_addr,
const unsigned char *signature,
int length)
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-v850/io.h
--- a/include/asm-v850/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-v850/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -130,6 +130,8 @@
#define memcpy_fromio(dst, src, len) memcpy (dst, (void *)src, len)
#define memcpy_toio(dst, src, len) memcpy ((void *)dst, src, len)
+#define memcpy_toio32 __memcpy_toio32
+
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-x86_64/io.h
--- a/include/asm-x86_64/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-x86_64/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -252,6 +252,13 @@
__memcpy_toio((unsigned long)to,from,len);
}
+#include <asm/string.h>
+
+static inline void memcpy_toio32(volatile void __iomem *dst, const void *src, size_t count)
+{
+ memcpy32((void *) dst, src, count);
+}
+
void memset_io(volatile void __iomem *a, int b, size_t c);
/*
diff -r 042b7d9004ac -r 0441e7525e4e include/asm-xtensa/io.h
--- a/include/asm-xtensa/io.h Tue Dec 27 15:41:48 2005 -0800
+++ b/include/asm-xtensa/io.h Tue Dec 27 15:41:48 2005 -0800
@@ -159,6 +159,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+#define memcpy_toio32 __memcpy_toio32
+
/* At this point the Xtensa doesn't provide byte swap instructions */
#ifdef __XTENSA_EB__
A couple of comments here:
> +/*
> + * Copy data to an MMIO region. MMIO space accesses are performed
> + * in the sizes indicated in each function's name.
> + */
> +void fastcall __memcpy_toio32(volatile void __iomem *d, const void *s, size_t count)
> +{
> + volatile u32 __iomem *dst = d;
> + const u32 *src = s;
> +
> + while (--count >= 0) {
> + __raw_writel(*src++, dst++);
> + }
I think the principle of least surprise calls for memcpy_toio32 to be
ordered the same way memcpy_toio is. In other words there should be a
wmb() after the loop.
Also, no need for the { } for the while loop.
> +}
> +
> +EXPORT_SYMBOL_GPL(__memcpy_toio32);
You're adding this symbol and exporting it even if the arch will
supply its own version. So this is pure kernel .text bloat...
- R.
Oh yeah:
> +EXPORT_SYMBOL_GPL(__memcpy_toio32);
I think this is a sufficiently basic facility that it might as well be
plain EXPORT_SYMBOL(), although I don't mind making things harder on
non-GPL modules.
- R.
On Tue, Dec 27, 2005 at 03:41:55PM -0800, Bryan O'Sullivan wrote:
> +/*
> + * MMIO copy routines. These are guaranteed to operate in units denoted
> + * by their names. This style of operation is required by some devices.
> + */
Using kdoc style for new code is nice.
> +extern void fastcall __memcpy_toio32(volatile void __iomem *to, const void *from, size_t count);
> +
Minor rant: extern is always redundant for function prototypes in C.
I'd prefer that we adopt a standard of not using extern for functions,
as it would make use of extern for variables (almost always
inappropriate, especially in C files) stick out more.
While people claim this has some documentation value ("I _meant_ for
this to be exported"), I think it actually has a net negative effect,
as quite a number of people actually think the "extern" keyword does
some unspecified magic here and ignore the namespace pollution of their
theoretically "un-externed" but not explicitly static functions.
There isn't any sort of consensus on this point as far as I know, so
this is just me venting.
> /* Create a virtual mapping cookie for an IO port range */
> extern void __iomem *ioport_map(unsigned long port, unsigned int nr);
> extern void ioport_unmap(void __iomem *);
> diff -r 789a24638663 -r 7b7b442a4d63 lib/iomap.c
> --- a/lib/iomap.c Tue Dec 27 09:27:10 2005 +0800
> +++ b/lib/iomap.c Tue Dec 27 15:41:48 2005 -0800
> @@ -187,6 +187,22 @@
> EXPORT_SYMBOL(iowrite16_rep);
> EXPORT_SYMBOL(iowrite32_rep);
>
> +/*
> + * Copy data to an MMIO region. MMIO space accesses are performed
> + * in the sizes indicated in each function's name.
> + */
> +void fastcall __memcpy_toio32(volatile void __iomem *d, const void *s, size_t count)
> +{
> + volatile u32 __iomem *dst = d;
> + const u32 *src = s;
> +
> + while (--count >= 0) {
> + __raw_writel(*src++, dst++);
> +}
Suspicious use of volatile - writel is doing the actual write, this
function never does a dereference. As you've already got private
copies of the pointers already in s and d, it's perfectily reasonable
and idiomatic to do:
while (--count >= 0)
__raw_writel(*s++, d++);
I'd personally write this as:
while (count--)
__raw_writel(*s++, d++);
And as you appear to be using the __raw.. version to avoid repeated
mb()s, you probably ought to tack one on at the end.
--
Mathematics is the supreme nostalgia of our time.
On Tue, Dec 27, 2005 at 05:11:50PM -0800, Roland Dreier wrote:
> Oh yeah:
>
> > +EXPORT_SYMBOL_GPL(__memcpy_toio32);
>
> I think this is a sufficiently basic facility that it might as well be
> plain EXPORT_SYMBOL(), although I don't mind making things harder on
> non-GPL modules.
This area is so murky, it verges on religion. Thus, I really think
this ought to be submitter's preference.
Any attempt to make a hard and fast rule of what's considered GPL or
not will potentially dilute any legal protection for exported symbols
predating the existence of the EXPORT_SYMBOL_GPL mechanism that are
nonetheless only reasonably used by GPL code (which by some
contributors' measures is all of them).
In other words, we must be careful not to directly or indirectly
construe EXPORT_SYMBOL as a carte blanche for linking GPL-incompatible
code.
--
Mathematics is the supreme nostalgia of our time.
On Tue, Dec 27, 2005 at 03:41:56PM -0800, Bryan O'Sullivan wrote:
> Introduce an x86_64-specific memcpy32 routine. The routine is similar
> to memcpy, but is guaranteed to work in units of 32 bits at a time.
>
> Signed-off-by: Bryan O'Sullivan <[email protected]>
>
> diff -r 7b7b442a4d63 -r 042b7d9004ac arch/x86_64/kernel/x8664_ksyms.c
> --- a/arch/x86_64/kernel/x8664_ksyms.c Tue Dec 27 15:41:48 2005 -0800
> +++ b/arch/x86_64/kernel/x8664_ksyms.c Tue Dec 27 15:41:48 2005 -0800
> @@ -150,6 +150,8 @@
> extern void * memcpy(void *,const void *,__kernel_size_t);
> extern void * __memcpy(void *,const void *,__kernel_size_t);
>
> +extern void memcpy32(void *,const void *,__kernel_size_t);
It's better to do an include here. Duplicating prototypes in .c files
is frowned upon (despite the fact that it's already done here).
> +
> EXPORT_SYMBOL(memset);
> EXPORT_SYMBOL(strlen);
> EXPORT_SYMBOL(memmove);
> @@ -164,6 +166,8 @@
> EXPORT_SYMBOL(memcpy);
> EXPORT_SYMBOL(__memcpy);
>
> +EXPORT_SYMBOL_GPL(memcpy32);
> +
We've been steadily moving towards grouping EXPORTs with function
definitions. Do *_ksyms.c exist solely to provide exports for
functions defined in assembly at this point? If so, perhaps we ought
to come up with a suitable export macro for asm files.
> diff -r 7b7b442a4d63 -r 042b7d9004ac arch/x86_64/lib/memcpy32.S
> --- /dev/null Thu Jan 1 00:00:00 1970 +0000
> +++ b/arch/x86_64/lib/memcpy32.S Tue Dec 27 15:41:48 2005 -0800
> @@ -0,0 +1,25 @@
> +/*
> + * Copyright (c) 2003, 2004, 2005 PathScale, Inc.
> + */
> +
> +/*
> + * memcpy32 - Copy a memory block, 32 bits at a time.
> + *
> + * Count is number of dwords; it need not be a qword multiple.
> + * Input:
> + * rdi destination
> + * rsi source
> + * rdx count
> + */
> +
> + .globl memcpy32
> +memcpy32:
> + movl %edx,%ecx
> + shrl $1,%ecx
> + andl $1,%edx
> + rep
> + movsq
> + movl %edx,%ecx
> + rep
> + movsd
> + ret
Any reason this needs its own .S file? One wonders if the
.p2align 4
in memcpy.S is appropriate here too. Splitting rep movsq across two
lines is a little weird to me too, but I see Andi did it too.
--
Mathematics is the supreme nostalgia of our time.
On Tue, Dec 27, 2005 at 10:03:32PM -0800, David Wagner wrote:
>
> In article <[email protected]> you write:
> >> +void fastcall __memcpy_toio32(volatile void __iomem *d, const void
> >*s, size_t count)
> >> +{
> >> + volatile u32 __iomem *dst = d;
> >> + const u32 *src = s;
> >> +
> >> + while (--count >= 0) {
> >> + __raw_writel(*src++, dst++);
> >> +}
> >
> >Suspicious use of volatile - writel is doing the actual write, this
> >function never does a dereference. As you've already got private
> >copies of the pointers already in s and d, it's perfectily reasonable
> >and idiomatic to do:
> >
> > while (--count >= 0)
> > __raw_writel(*s++, d++);
>
> I don't think *s++ or d++ is going to be accepted by the compiler,
> given that s and d are both "void *"'s. The posted code casts to
> "u32 *", which should make the dereference and autoincrement work
> correctly.
>
> [Not posted to linux-kernel, in case I'm totally confused.]
No, you're absolutely right. It's idiomatic for things like strcpy,
where the type is known, but not for memcpy.
However, it just so happens that GCC will happily treat math on void
pointers as if they were char pointers. So this in fact will
compile without errors:
void cpy(void *a, void *b, int count)
{
while (count--)
copybyte(a++, b++);
}
Linus thinks this is a feature but I don't, so I'm not recommending
it. In fact, given the whole point of this function is to copy dwords,
not bytes, this GCCism fails us for type-safety.
At any rate, the dereference on s above _is_ a compile error - it's void
type.
[cc:ed back to lkml to keep me honest]
--
Mathematics is the supreme nostalgia of our time.
> > +
> > + .globl memcpy32
> > +memcpy32:
> > + movl %edx,%ecx
> > + shrl $1,%ecx
> > + andl $1,%edx
> > + rep
> > + movsq
Does this one really do 32-bit stores?! I doubt so...
> > + movl %edx,%ecx
> > + rep
> > + movsd
> > + ret
>
> Any reason this needs its own .S file? One wonders if the
>
> .p2align 4
>
> in memcpy.S is appropriate here too. Splitting rep movsq across two
> lines is a little weird to me too, but I see Andi did it too.
--
vda
On Tue, 2005-12-27 at 17:10 -0800, Roland Dreier wrote:
> I think the principle of least surprise calls for memcpy_toio32 to be
> ordered the same way memcpy_toio is. In other words there should be a
> wmb() after the loop.
Will do.
> Also, no need for the { } for the while loop.
Fine. There doesn't seem to be much consistency in whether to use
curlies for single-line blocks.
> You're adding this symbol and exporting it even if the arch will
> supply its own version. So this is pure kernel .text bloat...
I don't know what you'd prefer, so let me enumerate a few alternatives,
and you can either tell me which you'd prefer, or point out something
I've missed that would be even better. I'm entirely flexible on this.
* Use the __HAVE_ARCH_* mechanism that include/asm-*/string.h
uses. Caveat: Linus has lately come out as hating this style.
It makes for the smallest patch, though.
* Define the generic code in lib/, and have each arch that really
uses it export it.
* Put generic code in include/asm-generic/algo-memcpy_toio32.h,
and have each arch that needs it #include it somewhere and use
it.
Have I missed anything?
<b
On Tue, 2005-12-27 at 21:52 -0600, Matt Mackall wrote:
> On Tue, Dec 27, 2005 at 03:41:55PM -0800, Bryan O'Sullivan wrote:
> > +/*
> > + * MMIO copy routines. These are guaranteed to operate in units denoted
> > + * by their names. This style of operation is required by some devices.
> > + */
>
> Using kdoc style for new code is nice.
OK, will do.
> > +extern void fastcall __memcpy_toio32(volatile void __iomem *to, const void *from, size_t count);
> > +
>
> Minor rant: extern is always redundant for function prototypes in C.
I know. My intent was to keep the prototype consistent with the
prevailing style of other declarations in that same routine. If you
think that cleanliness is more important, I'll be happy to change it.
> Suspicious use of volatile - writel is doing the actual write, this
> function never does a dereference.
Yeah. I lost the plot there a bit. I'll remove the volatiles.
> As you've already got private
> copies of the pointers already in s and d, it's perfectily reasonable
> and idiomatic to do:
>
> while (--count >= 0)
> __raw_writel(*s++, d++);
But pointer arithmetic is undefined on void pointers. gcc lets you do
it, but it treats sizeof(void) as 1, which gives entirely the wrong
results in this case.
> And as you appear to be using the __raw.. version to avoid repeated
> mb()s, you probably ought to tack one on at the end.
Well spotted, thanks.
<b
On Tue, 2005-12-27 at 22:22 -0600, Matt Mackall wrote:
> It's better to do an include here. Duplicating prototypes in .c files
> is frowned upon (despite the fact that it's already done here).
Yeah. I'm not thrilled about the existing style of that file, but I
don't want to weed-whack it as I go. That turns a small patch into a
case of mission creep.
> We've been steadily moving towards grouping EXPORTs with function
> definitions. Do *_ksyms.c exist solely to provide exports for
> functions defined in assembly at this point? If so, perhaps we ought
> to come up with a suitable export macro for asm files.
That might make sense, but it's also beyond the scope of what I'm trying
to do.
> Any reason this needs its own .S file?
Not really.
> One wonders if the
>
> .p2align 4
>
> in memcpy.S is appropriate here too.
It's not clear to me that it makes any difference either way. Both
routines obviously work :-) Perhaps Andi can indicate his opinion.
<b
On Wed, Dec 28, 2005 at 06:40:03AM -0800, Bryan O'Sullivan wrote:
> On Tue, 2005-12-27 at 17:10 -0800, Roland Dreier wrote:
>
> > I think the principle of least surprise calls for memcpy_toio32 to be
> > ordered the same way memcpy_toio is. In other words there should be a
> > wmb() after the loop.
>
> Will do.
>
> > Also, no need for the { } for the while loop.
>
> Fine. There doesn't seem to be much consistency in whether to use
> curlies for single-line blocks.
We've been very consistent in discouraging it in new code. Enforcement
of fine points of coding style is a post-2.5 phenomenon, so it hasn't
hit all the tree yet.
> > You're adding this symbol and exporting it even if the arch will
> > supply its own version. So this is pure kernel .text bloat...
>
> I don't know what you'd prefer, so let me enumerate a few alternatives,
> and you can either tell me which you'd prefer, or point out something
> I've missed that would be even better. I'm entirely flexible on this.
>
> * Use the __HAVE_ARCH_* mechanism that include/asm-*/string.h
> uses. Caveat: Linus has lately come out as hating this style.
> It makes for the smallest patch, though.
> * Define the generic code in lib/, and have each arch that really
> uses it export it.
I'd favor this, at least for this case. If it becomes more widely
used, we'll relocate the export.
> * Put generic code in include/asm-generic/algo-memcpy_toio32.h,
> and have each arch that needs it #include it somewhere and use
> it.
--
Mathematics is the supreme nostalgia of our time.
On Wed, Dec 28, 2005 at 06:47:42AM -0800, Bryan O'Sullivan wrote:
> > > +extern void fastcall __memcpy_toio32(volatile void __iomem *to, const void *from, size_t count);
> > > +
> >
> > Minor rant: extern is always redundant for function prototypes in C.
>
> I know. My intent was to keep the prototype consistent with the
> prevailing style of other declarations in that same routine. If you
> think that cleanliness is more important, I'll be happy to change it.
No, I'm actually just raising it for discussion. I personally don't
add them, even for consistency, as I'd like to eventually see them gone.
But if Christoph and Andrew won't care, my tiny crusade is probably lost.
--
Mathematics is the supreme nostalgia of our time.
On Tue, 27 Dec 2005, Matt Mackall wrote:
> On Tue, Dec 27, 2005 at 03:41:55PM -0800, Bryan O'Sullivan wrote:
> > /* Create a virtual mapping cookie for an IO port range */
> > extern void __iomem *ioport_map(unsigned long port, unsigned int nr);
> > extern void ioport_unmap(void __iomem *);
> > diff -r 789a24638663 -r 7b7b442a4d63 lib/iomap.c
> > --- a/lib/iomap.c Tue Dec 27 09:27:10 2005 +0800
> > +++ b/lib/iomap.c Tue Dec 27 15:41:48 2005 -0800
> > @@ -187,6 +187,22 @@
> > EXPORT_SYMBOL(iowrite16_rep);
> > EXPORT_SYMBOL(iowrite32_rep);
> >
> > +/*
> > + * Copy data to an MMIO region. MMIO space accesses are performed
> > + * in the sizes indicated in each function's name.
> > + */
> > +void fastcall __memcpy_toio32(volatile void __iomem *d, const void *s, size_t count)
> > +{
> > + volatile u32 __iomem *dst = d;
> > + const u32 *src = s;
> > +
> > + while (--count >= 0) {
> > + __raw_writel(*src++, dst++);
> > +}
>
> Suspicious use of volatile - writel is doing the actual write, this
> function never does a dereference. As you've already got private
> copies of the pointers already in s and d, it's perfectily reasonable
> and idiomatic to do:
>
> while (--count >= 0)
> __raw_writel(*s++, d++);
>
> I'd personally write this as:
>
> while (count--)
> __raw_writel(*s++, d++);
Indeed.
BTW, does the original loop really work? Size size_t is unsigned, >= 0 is
always true and we have a nice infinite loop?
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
On Wed, 2005-12-28 at 16:18 +0100, Geert Uytterhoeven wrote:
> BTW, does the original loop really work?
I had to turn it into a for loop shortly after I posted it, for exactly
the reason you pointed out.
<b
> > You're adding this symbol and exporting it even if the arch will
> > supply its own version. So this is pure kernel .text bloat...
> I don't know what you'd prefer, so let me enumerate a few alternatives,
> and you can either tell me which you'd prefer, or point out something
> I've missed that would be even better. I'm entirely flexible on this.
>
> * Use the __HAVE_ARCH_* mechanism that include/asm-*/string.h
> uses. Caveat: Linus has lately come out as hating this style.
> It makes for the smallest patch, though.
> * Define the generic code in lib/, and have each arch that really
> uses it export it.
> * Put generic code in include/asm-generic/algo-memcpy_toio32.h,
> and have each arch that needs it #include it somewhere and use
> it.
The middle alternative seems the cleanest, although I'm not sure where
the export really belongs.
I don't think I could really say the right way to do this without
thinking some more -- but I am positive that exporting a function that
will never ever be called is something we should work hard to avoid.
- R.
On Wed, Dec 28, 2005 at 08:51:14AM -0600, Matt Mackall wrote:
> On Wed, Dec 28, 2005 at 06:40:03AM -0800, Bryan O'Sullivan wrote:
> > On Tue, 2005-12-27 at 17:10 -0800, Roland Dreier wrote:
> >
> > > I think the principle of least surprise calls for memcpy_toio32 to be
> > > ordered the same way memcpy_toio is. In other words there should be a
> > > wmb() after the loop.
> >
> > Will do.
> >
> > > Also, no need for the { } for the while loop.
> >
> > Fine. There doesn't seem to be much consistency in whether to use
> > curlies for single-line blocks.
>
> We've been very consistent in discouraging it in new code. Enforcement
> of fine points of coding style is a post-2.5 phenomenon, so it hasn't
> hit all the tree yet.
>
> > > You're adding this symbol and exporting it even if the arch will
> > > supply its own version. So this is pure kernel .text bloat...
> >
> > I don't know what you'd prefer, so let me enumerate a few alternatives,
> > and you can either tell me which you'd prefer, or point out something
> > I've missed that would be even better. I'm entirely flexible on this.
> >
> > * Use the __HAVE_ARCH_* mechanism that include/asm-*/string.h
> > uses. Caveat: Linus has lately come out as hating this style.
> > It makes for the smallest patch, though.
> > * Define the generic code in lib/, and have each arch that really
> > uses it export it.
>
> I'd favor this, at least for this case. If it becomes more widely
> used, we'll relocate the export.
>...
I don't like this for two reasons:
- we are moving exports to the actual functions and steadily killing all
*syms* files
- the lib-y approach has the disadvantage of completely omitting the
function if it's used only in modules resulting in non-working
modules
Where's the problem with the __HAVE_ARCH_* mechanism?
cu
Adrian
--
"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed
On Sat, Dec 31, 2005 at 12:46:28AM +0100, Adrian Bunk wrote:
> On Wed, Dec 28, 2005 at 08:51:14AM -0600, Matt Mackall wrote:
> > On Wed, Dec 28, 2005 at 06:40:03AM -0800, Bryan O'Sullivan wrote:
> > > On Tue, 2005-12-27 at 17:10 -0800, Roland Dreier wrote:
> > >
> > > > I think the principle of least surprise calls for memcpy_toio32 to be
> > > > ordered the same way memcpy_toio is. In other words there should be a
> > > > wmb() after the loop.
> > >
> > > Will do.
> > >
> > > > Also, no need for the { } for the while loop.
> > >
> > > Fine. There doesn't seem to be much consistency in whether to use
> > > curlies for single-line blocks.
> >
> > We've been very consistent in discouraging it in new code. Enforcement
> > of fine points of coding style is a post-2.5 phenomenon, so it hasn't
> > hit all the tree yet.
> >
> > > > You're adding this symbol and exporting it even if the arch will
> > > > supply its own version. So this is pure kernel .text bloat...
> > >
> > > I don't know what you'd prefer, so let me enumerate a few alternatives,
> > > and you can either tell me which you'd prefer, or point out something
> > > I've missed that would be even better. I'm entirely flexible on this.
> > >
> > > * Use the __HAVE_ARCH_* mechanism that include/asm-*/string.h
> > > uses. Caveat: Linus has lately come out as hating this style.
> > > It makes for the smallest patch, though.
> > > * Define the generic code in lib/, and have each arch that really
> > > uses it export it.
> >
> > I'd favor this, at least for this case. If it becomes more widely
> > used, we'll relocate the export.
> >...
>
> I don't like this for two reasons:
> - we are moving exports to the actual functions and steadily killing all
> *syms* files
> - the lib-y approach has the disadvantage of completely omitting the
> function if it's used only in modules resulting in non-working
> modules
>
> Where's the problem with the __HAVE_ARCH_* mechanism?
The head penguin peed on it last week.
--
Mathematics is the supreme nostalgia of our time.
On Fri, 30 Dec 2005, Matt Mackall wrote:
> >
> > Where's the problem with the __HAVE_ARCH_* mechanism?
>
> The head penguin peed on it last week.
Actually "sprkinling with penguin pee" means that something is blessed
(it's like a kernel baptism). Maybe that's not very civilized, but hey,
penguins don't have thumbs, and are thus kind of limited in their actions.
Don't be speciest.
So the head penguin didn't pee on it, it turned its back in disgust, and
hoped that it would freeze to death in the arctic winter.
And no, I don't like the __HAVE_ARCH_xxx mechanisms at all. They are
pointless, and hard to follow. If an architecture wants to use a generic
mechanism, it should do one of the following (or a combination):
- use the config file mechanism, and use
obj-$(CONFIG_GENERIC_FOO) += generic-foo.c
in a Makefile to link in the generic version.
Examples: CONFIG_RWSEM_GENERIC_SPINLOCK.
- just include the generic header from its own header, eg just do a
#include <asm-generic/div64.h>
or similar.
Now, the latter in particular is very easy to follow: if you look into the
<asm/div64.h> file and see that it just includes <asm-generic/div64.h>,
it's very obvious what is going on and where to find the real
implementation. You never have to wonder what the indirection means.
Similarly, anybody that fixes the generic header file can _trivially_ grep
for its use. So the code stays clean, and there are absolutely zero
compile-time conditionals, and the linkages both ways are obvious. And
architectures that do _not_ use the generic routines are totally
unaffected by them, and they don't need to specify any flags like "I have
my own routines" to disable things.
Now, the CONFIG_GENERIC_FOO thing is a bit less obvious, and you may have
to know about that config option in order to realize that a particular
architecture is using a generic library routine, but at least with those
Kconfig options, the language to describe them is clean these days, and
it's _the_ standard way to express configuration information. So it may be
a bit subtler and more indirect, but once you get used to it, it too is
very clean.
In contrast, the __HAVE_ARCH_xxx thing has zero upsides. It just causes
#ifdef mess in C source files, and unnecessary noise in standard header
files. I know it's been there for a long time, but just grep for
__HAVE_ARCH_MEMCPY and cry. Why the hell should all the architectures that
have their own optimized memcpy() have to tell the rest of the world about
it?
[ Yeah, I know why: bad implementation choice. It could easily have been
done with the asm-generic approach or CONFIG_GENERIC_MEMCPY, but it
wasn't. Note that the __HAVE_ARCH_xxx thing isn't even a standard form:
sometimes it's the negation: __ARCH_WANT_xxx, and sometimes it's called
something else entirely, like USE_ELF_CORE_DUMP or HAVE_PCI_MMAP, or
ARCH_HAS_PREFETCH or HAVE_CSUM_COPY_USER. My point being that it's
totally ad-hoc and random. ]
So we do have tons of ugly stuff, I just am trying to argue for not making
more of it (I don't think it's a big enough deal that it would be worth it
trying to clean up old uses).
If you look at the mutex patches, for example, I think everybody will
agree that they look much _better_ after they moved to just using the
trivial "#include <asm-generic/mutex-xyzzy.h>" format. At least I don't
_think_ this is just a personal weird hang-up of mine. It's literally a
cleanliness issue.
Linus
>>
>> The head penguin peed on it last week.
>
>Actually "sprkinling with penguin pee" means that something is blessed
>(it's like a kernel baptism). Maybe that's not very civilized, but hey,
>penguins don't have thumbs, and are thus kind of limited in their actions.
>Don't be speciest.
>
At least they could have used water instead of pee.
Jan
--
On Sat, 31 Dec 2005, Jan Engelhardt wrote:
> >>
> >> The head penguin peed on it last week.
> >
> >Actually "sprinkling with penguin pee" means that something is blessed
> >(it's like a kernel baptism). Maybe that's not very civilized, but hey,
> >penguins don't have thumbs, and are thus kind of limited in their actions.
> >Don't be speciest.
>
> At least they could have used water instead of pee.
Hey, when you live at -40 deg C for long times, I challenge you to find
some liquid water to sprinkle around.
Antarctica is one of the driest places on earth - never mind that there's
tons of ice around.
You use what you have.
Linus
On Fri, Dec 30, 2005 at 04:23:46PM -0800, Linus Torvalds wrote:
>...
> > > Where's the problem with the __HAVE_ARCH_* mechanism?
>...
> And no, I don't like the __HAVE_ARCH_xxx mechanisms at all. They are
> pointless, and hard to follow. If an architecture wants to use a generic
> mechanism, it should do one of the following (or a combination):
>
> - use the config file mechanism, and use
>
> obj-$(CONFIG_GENERIC_FOO) += generic-foo.c
>
> in a Makefile to link in the generic version.
>
> Examples: CONFIG_RWSEM_GENERIC_SPINLOCK.
>
> - just include the generic header from its own header, eg just do a
>
> #include <asm-generic/div64.h>
>
> or similar.
>
> Now, the latter in particular is very easy to follow: if you look into the
> <asm/div64.h> file and see that it just includes <asm-generic/div64.h>,
> it's very obvious what is going on and where to find the real
> implementation. You never have to wonder what the indirection means.
>...
> Now, the CONFIG_GENERIC_FOO thing is a bit less obvious, and you may have
> to know about that config option in order to realize that a particular
> architecture is using a generic library routine, but at least with those
> Kconfig options, the language to describe them is clean these days, and
> it's _the_ standard way to express configuration information. So it may be
> a bit subtler and more indirect, but once you get used to it, it too is
> very clean.
>...
OK, this I don't have any problem with.
I'm not yet fully convinced that __HAVE_ARCH_xxx is really that bad, but
your proposed solution doesn't have the problems I had in mind.
What is OK:
obj-$(CONFIG_GENERIC_FOO) += generic-foo.o
What is not OK:
lib-y += generic-foo.o
The latter has the following disadvantages:
- it's non-obvious whether the object actually gets included in the
kernel
- if the contents of generic-foo.o is only used in modules,
generic-foo.o is _not_ included in the kernel resulting in an
obvious breakage
> Linus
cu
Adrian
--
"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed
On ?t 27-12-05 15:41:56, Bryan O'Sullivan wrote:
> Introduce an x86_64-specific memcpy32 routine. The routine is similar
> to memcpy, but is guaranteed to work in units of 32 bits at a time.
>
> Signed-off-by: Bryan O'Sullivan <[email protected]>
> diff -r 7b7b442a4d63 -r 042b7d9004ac arch/x86_64/lib/memcpy32.S
> --- /dev/null Thu Jan 1 00:00:00 1970 +0000
> +++ b/arch/x86_64/lib/memcpy32.S Tue Dec 27 15:41:48 2005 -0800
> @@ -0,0 +1,25 @@
> +/*
> + * Copyright (c) 2003, 2004, 2005 PathScale, Inc.
> + */
Did it really take 3 years to develop this? Anyway this contains
copyright but not GPL, not allowing us to distribute it.
Pavel
--
Thanks, Sharp!
On Fri, 2006-01-06 at 10:12 +0100, Pavel Machek wrote:
> Did it really take 3 years to develop this?
Each instruction is carefully aged in an oak barrel, in a
climate-controlled cave.
> Anyway this contains
> copyright but not GPL, not allowing us to distribute it.
I'll fix that, next round.
<b