2013-05-23 16:50:47

by Kim Phillips

[permalink] [raw]
Subject: [PATCH v7] arm: use built-in byte swap function

Enable the compiler intrinsic for byte swapping on arch ARM. This
allows the compiler to detect and be able to optimize out byte
swappings, and has a very modest benefit on vmlinux size (Linaro gcc
4.8):

text data bss dec hex filename
2840310 123932 61960 3026202 2e2d1a vmlinux-lart #orig
2840152 123932 61960 3026044 2e2c7c vmlinux-lart #builtin-bswap

6473120 314840 5616016 12403976 bd4508 vmlinux-mxs #orig
6472586 314848 5616016 12403450 bd42fa vmlinux-mxs #builtin-bswap

7419872 318372 379556 8117800 7bde28 vmlinux-imx_v6_v7 #orig
7419170 318364 379556 8117090 7bdb62 vmlinux-imx_v6_v7 #builtin-bswap

Signed-off-by: Kim Phillips <[email protected]>
Reviewed-by: Nicolas Pitre <[email protected]>
Acked-by: David Woodhouse <[email protected]>
---
resending as v6 appears to have fallen though the cracks. Russell?

v7: rebased onto next-20130521, re-ran above vmlinux sizes with
Linaro gcc 4.8, added Nicolas' Reviewed-by, and David's Acked-by.
v6 and prior version information:
https://lkml.org/lkml/2013/2/22/475

arch/arm/Kconfig | 1 +
arch/arm/boot/compressed/Makefile | 15 +++++++++++----
arch/arm/kernel/armksyms.c | 4 ++++
arch/arm/lib/Makefile | 2 +-
arch/arm/lib/bswapsdi2.S | 36 ++++++++++++++++++++++++++++++++++++
5 files changed, 53 insertions(+), 5 deletions(-)
create mode 100644 arch/arm/lib/bswapsdi2.S

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a7fc5ea..c2fe04d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -63,6 +63,7 @@ config ARM
select OLD_SIGSUSPEND3
select OLD_SIGACTION
select HAVE_CONTEXT_TRACKING
+ select ARCH_USE_BUILTIN_BSWAP
help
The ARM series is a line of low-power-consumption RISC chip designs
licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 198a4ad..bd8a176 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -112,12 +112,12 @@ endif

targets := vmlinux vmlinux.lds \
piggy.$(suffix_y) piggy.$(suffix_y).o \
- lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S \
- font.o font.c head.o misc.o $(OBJS)
+ lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S bswapsdi2.o \
+ bswapsdi2.S font.o font.c head.o misc.o $(OBJS)

# Make sure files are removed during clean
extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \
- lib1funcs.S ashldi3.S $(libfdt) $(libfdt_hdrs)
+ lib1funcs.S ashldi3.S bswapsdi2.S $(libfdt) $(libfdt_hdrs)

ifeq ($(CONFIG_FUNCTION_TRACER),y)
ORIG_CFLAGS := $(KBUILD_CFLAGS)
@@ -159,6 +159,12 @@ ashldi3 = $(obj)/ashldi3.o
$(obj)/ashldi3.S: $(srctree)/arch/$(SRCARCH)/lib/ashldi3.S
$(call cmd,shipped)

+# For __bswapsi2, __bswapdi2
+bswapsdi2 = $(obj)/bswapsdi2.o
+
+$(obj)/bswapsdi2.S: $(srctree)/arch/$(SRCARCH)/lib/bswapsdi2.S
+ $(call cmd,shipped)
+
# We need to prevent any GOTOFF relocs being used with references
# to symbols in the .bss section since we cannot relocate them
# independently from the rest at run time. This can be achieved by
@@ -180,7 +186,8 @@ if [ $(words $(ZRELADDR)) -gt 1 -a "$(CONFIG_AUTO_ZRELADDR)" = "" ]; then \
fi

$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \
- $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) FORCE
+ $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) \
+ $(bswapsdi2) FORCE
@$(check_for_multiple_zreladdr)
$(call if_changed,ld)
@$(check_for_bad_syms)
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 60d3b73..ba578f7 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -35,6 +35,8 @@ extern void __ucmpdi2(void);
extern void __udivsi3(void);
extern void __umodsi3(void);
extern void __do_div64(void);
+extern void __bswapsi2(void);
+extern void __bswapdi2(void);

extern void __aeabi_idiv(void);
extern void __aeabi_idivmod(void);
@@ -114,6 +116,8 @@ EXPORT_SYMBOL(__ucmpdi2);
EXPORT_SYMBOL(__udivsi3);
EXPORT_SYMBOL(__umodsi3);
EXPORT_SYMBOL(__do_div64);
+EXPORT_SYMBOL(__bswapsi2);
+EXPORT_SYMBOL(__bswapdi2);

#ifdef CONFIG_AEABI
EXPORT_SYMBOL(__aeabi_idiv);
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index af72969..5383df7 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -13,7 +13,7 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
ucmpdi2.o lib1funcs.o div64.o \
io-readsb.o io-writesb.o io-readsl.o io-writesl.o \
- call_with_stack.o
+ call_with_stack.o bswapsdi2.o

mmu-y := clear_user.o copy_page.o getuser.o putuser.o

diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S
new file mode 100644
index 0000000..2ba43a0
--- /dev/null
+++ b/arch/arm/lib/bswapsdi2.S
@@ -0,0 +1,36 @@
+#include <linux/linkage.h>
+
+#if __LINUX_ARM_ARCH__ >= 6
+ENTRY(__bswapsi2)
+ rev r0, r0
+ bx lr
+ENDPROC(__bswapsi2)
+
+ENTRY(__bswapdi2)
+ rev r3, r0
+ rev r0, r1
+ mov r1, r3
+ bx lr
+ENDPROC(__bswapdi2)
+#else
+ENTRY(__bswapsi2)
+ eor r3, r0, r0, ror #16
+ mov r3, r3, lsr #8
+ bic r3, r3, #0xff00
+ eor r0, r3, r0, ror #8
+ mov pc, lr
+ENDPROC(__bswapsi2)
+
+ENTRY(__bswapdi2)
+ mov ip, r1
+ eor r3, ip, ip, ror #16
+ eor r1, r0, r0, ror #16
+ mov r1, r1, lsr #8
+ mov r3, r3, lsr #8
+ bic r3, r3, #0xff00
+ bic r1, r1, #0xff00
+ eor r1, r1, r0, ror #8
+ eor r0, r3, ip, ror #8
+ mov pc, lr
+ENDPROC(__bswapdi2)
+#endif
--
1.8.1.5


2013-05-23 20:09:45

by Nicolas Pitre

[permalink] [raw]
Subject: Re: [PATCH v7] arm: use built-in byte swap function

On Thu, 23 May 2013, Kim Phillips wrote:

> Enable the compiler intrinsic for byte swapping on arch ARM. This
> allows the compiler to detect and be able to optimize out byte
> swappings, and has a very modest benefit on vmlinux size (Linaro gcc
> 4.8):
>
> text data bss dec hex filename
> 2840310 123932 61960 3026202 2e2d1a vmlinux-lart #orig
> 2840152 123932 61960 3026044 2e2c7c vmlinux-lart #builtin-bswap
>
> 6473120 314840 5616016 12403976 bd4508 vmlinux-mxs #orig
> 6472586 314848 5616016 12403450 bd42fa vmlinux-mxs #builtin-bswap
>
> 7419872 318372 379556 8117800 7bde28 vmlinux-imx_v6_v7 #orig
> 7419170 318364 379556 8117090 7bdb62 vmlinux-imx_v6_v7 #builtin-bswap
>
> Signed-off-by: Kim Phillips <[email protected]>
> Reviewed-by: Nicolas Pitre <[email protected]>
> Acked-by: David Woodhouse <[email protected]>
> ---
> resending as v6 appears to have fallen though the cracks. Russell?

Please send your patch to Russell's patch system:

http://www.arm.linux.org.uk/developer/patches/


Nicolas

2013-05-23 23:14:52

by Russell King - ARM Linux

[permalink] [raw]
Subject: Re: [PATCH v7] arm: use built-in byte swap function

On Thu, May 23, 2013 at 11:46:54AM -0500, Kim Phillips wrote:
> Enable the compiler intrinsic for byte swapping on arch ARM. This
> allows the compiler to detect and be able to optimize out byte
> swappings, and has a very modest benefit on vmlinux size (Linaro gcc
> 4.8):
>
> text data bss dec hex filename
> 2840310 123932 61960 3026202 2e2d1a vmlinux-lart #orig
> 2840152 123932 61960 3026044 2e2c7c vmlinux-lart #builtin-bswap
>
> 6473120 314840 5616016 12403976 bd4508 vmlinux-mxs #orig
> 6472586 314848 5616016 12403450 bd42fa vmlinux-mxs #builtin-bswap
>
> 7419872 318372 379556 8117800 7bde28 vmlinux-imx_v6_v7 #orig
> 7419170 318364 379556 8117090 7bdb62 vmlinux-imx_v6_v7 #builtin-bswap
>
> Signed-off-by: Kim Phillips <[email protected]>
> Reviewed-by: Nicolas Pitre <[email protected]>
> Acked-by: David Woodhouse <[email protected]>
> ---
> resending as v6 appears to have fallen though the cracks. Russell?

Please put it in the patch system (otherwise I do drop patches.)

2013-06-06 22:13:22

by Russell King - ARM Linux

[permalink] [raw]
Subject: Re: [PATCH v7] arm: use built-in byte swap function

On Fri, May 24, 2013 at 12:13:36AM +0100, Russell King - ARM Linux wrote:
> On Thu, May 23, 2013 at 11:46:54AM -0500, Kim Phillips wrote:
> > Enable the compiler intrinsic for byte swapping on arch ARM. This
> > allows the compiler to detect and be able to optimize out byte
> > swappings, and has a very modest benefit on vmlinux size (Linaro gcc
> > 4.8):
> >
> > text data bss dec hex filename
> > 2840310 123932 61960 3026202 2e2d1a vmlinux-lart #orig
> > 2840152 123932 61960 3026044 2e2c7c vmlinux-lart #builtin-bswap
> >
> > 6473120 314840 5616016 12403976 bd4508 vmlinux-mxs #orig
> > 6472586 314848 5616016 12403450 bd42fa vmlinux-mxs #builtin-bswap
> >
> > 7419872 318372 379556 8117800 7bde28 vmlinux-imx_v6_v7 #orig
> > 7419170 318364 379556 8117090 7bdb62 vmlinux-imx_v6_v7 #builtin-bswap
> >
> > Signed-off-by: Kim Phillips <[email protected]>
> > Reviewed-by: Nicolas Pitre <[email protected]>
> > Acked-by: David Woodhouse <[email protected]>
> > ---
> > resending as v6 appears to have fallen though the cracks. Russell?
>
> Please put it in the patch system (otherwise I do drop patches.)

(Added Arnd/SFR in case they have comments.)

So, we have a problem here - the kind which appears when people stuff
things into the -next tree which aren't destined for the next merge
window. This is the relevant context from your patch, which is
against linux-next:

- lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S \
- font.o font.c head.o misc.o $(OBJS)
+ lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S bswapsdi2.o \
+ bswapsdi2.S font.o font.c head.o misc.o $(OBJS)

# Make sure files are removed during clean
extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \
^^^^^^^^^
- lib1funcs.S ashldi3.S $(libfdt) $(libfdt_hdrs)
+ lib1funcs.S ashldi3.S bswapsdi2.S $(libfdt) $(libfdt_hdrs)

the underlined bit - piggy.lz4 for those who read mail with proportional
fonts.

That is not in any kernel I have, and if it _is_ something that is
destined for the next merge window, it should be in my tree as it's
a core ARM feature, not in some random other tree.

Short of hand-editing and manually applying the patch, a solution would
be to rebase it on a mainline kernel version, like -rc4, and resubmit
that version instead. That will ultimately then give sfr a conflict
which should be trivial to resolve - and hopefully we'll find out who's
carrying the LZ4 patch and putting it into linux-next.

2013-06-06 22:23:38

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v7] arm: use built-in byte swap function

On Thu, Jun 06, 2013 at 11:12:34PM +0100, Russell King - ARM Linux wrote:
> That will ultimately then give sfr a conflict which should be trivial
> to resolve - and hopefully we'll find out who's carrying the LZ4 patch
> and putting it into linux-next.

That should be akpm:

http://ozlabs.org/~akpm/mmotm/broken-out/arm-add-support-for-lz4-compressed-kernel.patch

AFAICT.

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

2013-06-07 00:03:46

by Stephen Rothwell

[permalink] [raw]
Subject: Re: [PATCH v7] arm: use built-in byte swap function

Hi Russell,

On Thu, 6 Jun 2013 23:12:34 +0100 Russell King - ARM Linux <[email protected]> wrote:
>
> So, we have a problem here - the kind which appears when people stuff
> things into the -next tree which aren't destined for the next merge
> window. This is the relevant context from your patch, which is
> against linux-next:
>
> - lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S \
> - font.o font.c head.o misc.o $(OBJS)
> + lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S bswapsdi2.o \
> + bswapsdi2.S font.o font.c head.o misc.o $(OBJS)
>
> # Make sure files are removed during clean
> extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \
> ^^^^^^^^^
> - lib1funcs.S ashldi3.S $(libfdt) $(libfdt_hdrs)
> + lib1funcs.S ashldi3.S bswapsdi2.S $(libfdt) $(libfdt_hdrs)
>
> the underlined bit - piggy.lz4 for those who read mail with proportional
> fonts.
>
> That is not in any kernel I have, and if it _is_ something that is
> destined for the next merge window, it should be in my tree as it's
> a core ARM feature, not in some random other tree.

That is commit d8a6bf1b25bd ("arm: add support for LZ4-compressed
kernel") from next-20130606 from the akpm tree. (adding author cc) That
patch was cc'd to you, and is part of a series that adds LZ4 compression
to the kernel, so would not work on its own. The first patch in the
series is "decompressor: add LZ4 decompressor module".


> Short of hand-editing and manually applying the patch, a solution would
> be to rebase it on a mainline kernel version, like -rc4, and resubmit
> that version instead. That will ultimately then give sfr a conflict
> which should be trivial to resolve - and hopefully we'll find out who's
> carrying the LZ4 patch and putting it into linux-next.

People should *never, ever* submit patches based on linux-next (unless,
of course they are to me to help fix merge conflicts in linux-next, etc).
Patches submitted to a particular maintainer should be based on (an
ancestor of) that maintainer's current tree.

Sure, test new code before and after merging linux-next, but don;t base
new code on it.
--
Cheers,
Stephen Rothwell [email protected]


Attachments:
(No filename) (2.28 kB)
(No filename) (836.00 B)
Download all attachments

2013-10-27 02:41:39

by Nicolas Pitre

[permalink] [raw]
Subject: Re: [PATCH v7] arm: use built-in byte swap function

On Thu, 23 May 2013, Kim Phillips wrote:

> Enable the compiler intrinsic for byte swapping on arch ARM. This
> allows the compiler to detect and be able to optimize out byte
> swappings, and has a very modest benefit on vmlinux size (Linaro gcc
> 4.8):
>
> text data bss dec hex filename
> 2840310 123932 61960 3026202 2e2d1a vmlinux-lart #orig
> 2840152 123932 61960 3026044 2e2c7c vmlinux-lart #builtin-bswap
>
> 6473120 314840 5616016 12403976 bd4508 vmlinux-mxs #orig
> 6472586 314848 5616016 12403450 bd42fa vmlinux-mxs #builtin-bswap
>
> 7419872 318372 379556 8117800 7bde28 vmlinux-imx_v6_v7 #orig
> 7419170 318364 379556 8117090 7bdb62 vmlinux-imx_v6_v7 #builtin-bswap
>
> Signed-off-by: Kim Phillips <[email protected]>
> Reviewed-by: Nicolas Pitre <[email protected]>
> Acked-by: David Woodhouse <[email protected]>

Did this ever go somewhere?

Russell suggested at the time to base it against a mainline kernel
(since it was patching files which apparently were already patched with
out-of-tree lz4 patches) and send it to his patch system.


> ---
> resending as v6 appears to have fallen though the cracks. Russell?
>
> v7: rebased onto next-20130521, re-ran above vmlinux sizes with
> Linaro gcc 4.8, added Nicolas' Reviewed-by, and David's Acked-by.
> v6 and prior version information:
> https://lkml.org/lkml/2013/2/22/475
>
> arch/arm/Kconfig | 1 +
> arch/arm/boot/compressed/Makefile | 15 +++++++++++----
> arch/arm/kernel/armksyms.c | 4 ++++
> arch/arm/lib/Makefile | 2 +-
> arch/arm/lib/bswapsdi2.S | 36 ++++++++++++++++++++++++++++++++++++
> 5 files changed, 53 insertions(+), 5 deletions(-)
> create mode 100644 arch/arm/lib/bswapsdi2.S
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index a7fc5ea..c2fe04d 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -63,6 +63,7 @@ config ARM
> select OLD_SIGSUSPEND3
> select OLD_SIGACTION
> select HAVE_CONTEXT_TRACKING
> + select ARCH_USE_BUILTIN_BSWAP
> help
> The ARM series is a line of low-power-consumption RISC chip designs
> licensed by ARM Ltd and targeted at embedded applications and
> diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
> index 198a4ad..bd8a176 100644
> --- a/arch/arm/boot/compressed/Makefile
> +++ b/arch/arm/boot/compressed/Makefile
> @@ -112,12 +112,12 @@ endif
>
> targets := vmlinux vmlinux.lds \
> piggy.$(suffix_y) piggy.$(suffix_y).o \
> - lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S \
> - font.o font.c head.o misc.o $(OBJS)
> + lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S bswapsdi2.o \
> + bswapsdi2.S font.o font.c head.o misc.o $(OBJS)
>
> # Make sure files are removed during clean
> extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \
> - lib1funcs.S ashldi3.S $(libfdt) $(libfdt_hdrs)
> + lib1funcs.S ashldi3.S bswapsdi2.S $(libfdt) $(libfdt_hdrs)
>
> ifeq ($(CONFIG_FUNCTION_TRACER),y)
> ORIG_CFLAGS := $(KBUILD_CFLAGS)
> @@ -159,6 +159,12 @@ ashldi3 = $(obj)/ashldi3.o
> $(obj)/ashldi3.S: $(srctree)/arch/$(SRCARCH)/lib/ashldi3.S
> $(call cmd,shipped)
>
> +# For __bswapsi2, __bswapdi2
> +bswapsdi2 = $(obj)/bswapsdi2.o
> +
> +$(obj)/bswapsdi2.S: $(srctree)/arch/$(SRCARCH)/lib/bswapsdi2.S
> + $(call cmd,shipped)
> +
> # We need to prevent any GOTOFF relocs being used with references
> # to symbols in the .bss section since we cannot relocate them
> # independently from the rest at run time. This can be achieved by
> @@ -180,7 +186,8 @@ if [ $(words $(ZRELADDR)) -gt 1 -a "$(CONFIG_AUTO_ZRELADDR)" = "" ]; then \
> fi
>
> $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \
> - $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) FORCE
> + $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) \
> + $(bswapsdi2) FORCE
> @$(check_for_multiple_zreladdr)
> $(call if_changed,ld)
> @$(check_for_bad_syms)
> diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
> index 60d3b73..ba578f7 100644
> --- a/arch/arm/kernel/armksyms.c
> +++ b/arch/arm/kernel/armksyms.c
> @@ -35,6 +35,8 @@ extern void __ucmpdi2(void);
> extern void __udivsi3(void);
> extern void __umodsi3(void);
> extern void __do_div64(void);
> +extern void __bswapsi2(void);
> +extern void __bswapdi2(void);
>
> extern void __aeabi_idiv(void);
> extern void __aeabi_idivmod(void);
> @@ -114,6 +116,8 @@ EXPORT_SYMBOL(__ucmpdi2);
> EXPORT_SYMBOL(__udivsi3);
> EXPORT_SYMBOL(__umodsi3);
> EXPORT_SYMBOL(__do_div64);
> +EXPORT_SYMBOL(__bswapsi2);
> +EXPORT_SYMBOL(__bswapdi2);
>
> #ifdef CONFIG_AEABI
> EXPORT_SYMBOL(__aeabi_idiv);
> diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
> index af72969..5383df7 100644
> --- a/arch/arm/lib/Makefile
> +++ b/arch/arm/lib/Makefile
> @@ -13,7 +13,7 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
> ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
> ucmpdi2.o lib1funcs.o div64.o \
> io-readsb.o io-writesb.o io-readsl.o io-writesl.o \
> - call_with_stack.o
> + call_with_stack.o bswapsdi2.o
>
> mmu-y := clear_user.o copy_page.o getuser.o putuser.o
>
> diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S
> new file mode 100644
> index 0000000..2ba43a0
> --- /dev/null
> +++ b/arch/arm/lib/bswapsdi2.S
> @@ -0,0 +1,36 @@
> +#include <linux/linkage.h>
> +
> +#if __LINUX_ARM_ARCH__ >= 6
> +ENTRY(__bswapsi2)
> + rev r0, r0
> + bx lr
> +ENDPROC(__bswapsi2)
> +
> +ENTRY(__bswapdi2)
> + rev r3, r0
> + rev r0, r1
> + mov r1, r3
> + bx lr
> +ENDPROC(__bswapdi2)
> +#else
> +ENTRY(__bswapsi2)
> + eor r3, r0, r0, ror #16
> + mov r3, r3, lsr #8
> + bic r3, r3, #0xff00
> + eor r0, r3, r0, ror #8
> + mov pc, lr
> +ENDPROC(__bswapsi2)
> +
> +ENTRY(__bswapdi2)
> + mov ip, r1
> + eor r3, ip, ip, ror #16
> + eor r1, r0, r0, ror #16
> + mov r1, r1, lsr #8
> + mov r3, r3, lsr #8
> + bic r3, r3, #0xff00
> + bic r1, r1, #0xff00
> + eor r1, r1, r0, ror #8
> + eor r0, r3, ip, ror #8
> + mov pc, lr
> +ENDPROC(__bswapdi2)
> +#endif
> --
> 1.8.1.5
>