Add definitions of rmb() and cpu_relax() and include the ARM unistd.h
header. The kernel uses different definitions for rmb() depending on
the arch revision and whether the system is SMP or not. The lowest common
denominator is a compiler memory barrier so use that.
Signed-off-by: Jamie Iles <[email protected]>
Cc: Russell King <[email protected]>
Cc: Peter Zijlstra <[email protected]>
---
tools/perf/perf.h | 6 ++++++
1 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 454d5d5..4b8eac6 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -59,6 +59,12 @@
#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
#endif
+#ifdef __arm__
+#include "../../arch/arm/include/asm/unistd.h"
+#define rmb() asm volatile("":::"memory")
+#define cpu_relax() asm volatile("":::"memory")
+#endif
+
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
--
1.6.5.4
For embedded platforms, we want to be able to build the perf
tools on a build machine to run on a different arch. This patch
allows $CROSS_COMPILE to set the cross compiler. Additionally, if
NO_LIBPERL is set, then don't use perl include paths as they will
be for the host arch.
Signed-off-by: Jamie Iles <[email protected]>
CC: Peter Zijlstra <[email protected]>
---
tools/perf/Makefile | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 23ec660..e2ee3b5 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -237,8 +237,8 @@ lib = lib
export prefix bindir sharedir sysconfdir
-CC = gcc
-AR = ar
+CC = $(CROSS_COMPILE)gcc
+AR = $(CROSS_COMPILE)ar
RM = rm -f
TAR = tar
FIND = find
@@ -492,8 +492,10 @@ else
LIB_OBJS += util/probe-finder.o
endif
+ifndef NO_LIBPERL
PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null`
PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+endif
ifneq ($(shell sh -c "(echo '\#include <EXTERN.h>'; echo '\#include <perl.h>'; echo 'int main(void) { perl_alloc(); return 0; }') | $(CC) -x c - $(PERL_EMBED_CCOPTS) -o /dev/null $(PERL_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y)
BASIC_CFLAGS += -DNO_LIBPERL
--
1.6.5.4
* Jamie Iles <[email protected]> wrote:
> +#ifdef __arm__
> +#include "../../arch/arm/include/asm/unistd.h"
> +#define rmb() asm volatile("":::"memory")
> +#define cpu_relax() asm volatile("":::"memory")
> +#endif
cpu_relax() looks fine, but rmb() seems not to match the one that can be
found in arch/arm/:
arch/arm/include/asm/system.h:#define rmb() dmb()
arch/arm/include/asm/system.h:#define rmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
arch/arm/include/asm/system.h:#define smp_rmb() rmb()
arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("dmb" : : : "memory")
arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("" : : : "memory")
arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("" : : : "memory")
Ingo
Commit-ID: cc835752ae3634acd2d487fdf5152f6075f45aef
Gitweb: http://git.kernel.org/tip/cc835752ae3634acd2d487fdf5152f6075f45aef
Author: Jamie Iles <[email protected]>
AuthorDate: Fri, 11 Dec 2009 09:21:00 +0000
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 11 Dec 2009 11:24:13 +0100
perf tools: Allow cross compiling
For embedded platforms, we want to be able to build the perf
tools on a build machine to run on a different arch. This patch
allows $CROSS_COMPILE to set the cross compiler.
Additionally, if NO_LIBPERL is set, then don't use perl include
paths as they will be for the host arch.
Signed-off-by: Jamie Iles <[email protected]>
Cc: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/Makefile | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 23ec660..e2ee3b5 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -237,8 +237,8 @@ lib = lib
export prefix bindir sharedir sysconfdir
-CC = gcc
-AR = ar
+CC = $(CROSS_COMPILE)gcc
+AR = $(CROSS_COMPILE)ar
RM = rm -f
TAR = tar
FIND = find
@@ -492,8 +492,10 @@ else
LIB_OBJS += util/probe-finder.o
endif
+ifndef NO_LIBPERL
PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null`
PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+endif
ifneq ($(shell sh -c "(echo '\#include <EXTERN.h>'; echo '\#include <perl.h>'; echo 'int main(void) { perl_alloc(); return 0; }') | $(CC) -x c - $(PERL_EMBED_CCOPTS) -o /dev/null $(PERL_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y)
BASIC_CFLAGS += -DNO_LIBPERL
On Fri, Dec 11, 2009 at 11:23:16AM +0100, Ingo Molnar wrote:
> cpu_relax() looks fine, but rmb() seems not to match the one that can be
> found in arch/arm/:
>
> arch/arm/include/asm/system.h:#define rmb() dmb()
> arch/arm/include/asm/system.h:#define rmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
> arch/arm/include/asm/system.h:#define smp_rmb() rmb()
>
> arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("dmb" : : : "memory")
> arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
> arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("" : : : "memory")
> arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("" : : : "memory")
The implementation of the barriers depend on the CPU arch revision which is
defined in the kernel config. As the perf tools don't use the kernel config,
we don't know here what arch revision we're building for. Perhaps we need a
LINUX_ARM_ARCH parameter when building for ARM so we can pick the correct one.
Jamie
From: Ingo Molnar <[email protected]>
Date: Fri, 11 Dec 2009 11:23:16 +0100
>
> * Jamie Iles <[email protected]> wrote:
>
>> +#ifdef __arm__
>> +#include "../../arch/arm/include/asm/unistd.h"
>> +#define rmb() asm volatile("":::"memory")
>> +#define cpu_relax() asm volatile("":::"memory")
>> +#endif
>
> cpu_relax() looks fine, but rmb() seems not to match the one that can be
> found in arch/arm/:
I think he did it this way so it can compile in the meantime,
and that doing it right requires runtime cpu detection to
select which barrier instruction is even available on the
current ARM cpu.
* Jamie Iles <[email protected]> wrote:
> On Fri, Dec 11, 2009 at 11:23:16AM +0100, Ingo Molnar wrote:
> > cpu_relax() looks fine, but rmb() seems not to match the one that can be
> > found in arch/arm/:
> >
> > arch/arm/include/asm/system.h:#define rmb() dmb()
> > arch/arm/include/asm/system.h:#define rmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
> > arch/arm/include/asm/system.h:#define smp_rmb() rmb()
> >
> > arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("dmb" : : : "memory")
> > arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
> > arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("" : : : "memory")
> > arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("" : : : "memory")
>
> The implementation of the barriers depend on the CPU arch revision
> which is defined in the kernel config. As the perf tools don't use the
> kernel config, we don't know here what arch revision we're building
> for. Perhaps we need a LINUX_ARM_ARCH parameter when building for ARM
> so we can pick the correct one.
rmb() is used in two places in perf:
tools/perf/builtin-record.c: rmb();
tools/perf/builtin-top.c: rmb();
to interact with the shared kernel/user ring-buffer. Getting a barrier
wrong there may cause hickups in recording.
Could you tell me a bit more about this ARM instruction - is the 'DMB'
instruction used on all SMP ARM cores? Can it be used unconditionally,
or is the instruction undefined on certain versions? To get the ball
rolling we could use it unconditionally in the initial patch, but this
needs to be solved i suspect.
Ingo
* David Miller <[email protected]> wrote:
> From: Ingo Molnar <[email protected]>
> Date: Fri, 11 Dec 2009 11:23:16 +0100
>
> >
> > * Jamie Iles <[email protected]> wrote:
> >
> >> +#ifdef __arm__
> >> +#include "../../arch/arm/include/asm/unistd.h"
> >> +#define rmb() asm volatile("":::"memory")
> >> +#define cpu_relax() asm volatile("":::"memory")
> >> +#endif
> >
> > cpu_relax() looks fine, but rmb() seems not to match the one that can be
> > found in arch/arm/:
>
> I think he did it this way so it can compile in the meantime, and that
> doing it right requires runtime cpu detection to select which barrier
> instruction is even available on the current ARM cpu.
Yeah. We can merge a quick patch for it if runtime detection is
difficult - but if then such a patch should err on the side of using the
barrier instruction unconditionally - even if this causes perf to
segfault on certain (older? UP configured?) ARM cores.
Ingo
On Fri, Dec 11, 2009 at 11:38:48AM +0100, Ingo Molnar wrote:
>
> * Jamie Iles <[email protected]> wrote:
>
> > On Fri, Dec 11, 2009 at 11:23:16AM +0100, Ingo Molnar wrote:
> > > cpu_relax() looks fine, but rmb() seems not to match the one that can be
> > > found in arch/arm/:
> > >
> > > arch/arm/include/asm/system.h:#define rmb() dmb()
> > > arch/arm/include/asm/system.h:#define rmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
> > > arch/arm/include/asm/system.h:#define smp_rmb() rmb()
> > >
> > > arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("dmb" : : : "memory")
> > > arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
> > > arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("" : : : "memory")
> > > arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("" : : : "memory")
> >
> > The implementation of the barriers depend on the CPU arch revision
> > which is defined in the kernel config. As the perf tools don't use the
> > kernel config, we don't know here what arch revision we're building
> > for. Perhaps we need a LINUX_ARM_ARCH parameter when building for ARM
> > so we can pick the correct one.
>
> rmb() is used in two places in perf:
>
> tools/perf/builtin-record.c: rmb();
> tools/perf/builtin-top.c: rmb();
>
> to interact with the shared kernel/user ring-buffer. Getting a barrier
> wrong there may cause hickups in recording.
>
> Could you tell me a bit more about this ARM instruction - is the 'DMB'
> instruction used on all SMP ARM cores? Can it be used unconditionally,
> or is the instruction undefined on certain versions? To get the ball
> rolling we could use it unconditionally in the initial patch, but this
> needs to be solved i suspect.
There are a few cases we need to deal with:
- v7 SMP: DMB instruction
- v6 SMP: MCR coprocessor instruction
- v5 and earlier no instructions for barriers.
Looking at the TRM for a v7 core (cortex A9) the MCR instruction that v6 uses
is deprecated but still present. I suspect we could use this to cover the v6
and v7 cores but we wouldn't be able to do soft perf events on v5 or earlier
(which don't have hardware counters).
Jamie
* Will Deacon <[email protected]> wrote:
> > From: [email protected] [mailto:[email protected]]
> > On Behalf Of Jamie Iles
> > Sent: 11 December 2009 10:31
> >
> > The implementation of the barriers depend on the CPU arch revision
> > which is defined in the kernel config. As the perf tools don't use
> > the kernel config, we don't know here what arch revision we're
> > building for. Perhaps we need a LINUX_ARM_ARCH parameter when
> > building for ARM so we can pick the correct one.
>
> Hi Jamie, Ingo,
>
> Surely a better way to proceed with this would be to build the perf
> tool as a side effect of building the kernel? That way the relevant
> definitions from system.h could be included directly and there would
> be no need to duplicate the architectural conditionals in perf.h.
Might make sense.
> I'm also working on perf-events for ARM and am using:
>
> #define rmb() asm volatile("mcr p15, 0, %0, c7, c10, 5" :: "r" (0) : "memory")
>
> This will work on v6 and v7 [although the dmb instruction is preferred
> here] cores.
Note that the codepath where it's used isnt very performance sensitive
(we call it about once per batch of event processing), so we could use
the broadest instruction that works on as many cores as possible - to
keep things simple.
Ingo
On Fri, Dec 11, 2009 at 1:02 PM, Ingo Molnar <[email protected]> wrote:
>
> * Will Deacon <[email protected]> wrote:
>
>> > From: [email protected] [mailto:[email protected]]
>> > On Behalf Of Jamie Iles
>> > Sent: 11 December 2009 10:31
>> >
>> > The implementation of the barriers depend on the CPU arch revision
>> > which is defined in the kernel config. As the perf tools don't use
>> > the kernel config, we don't know here what arch revision we're
>> > building for. Perhaps we need a LINUX_ARM_ARCH parameter when
>> > building for ARM so we can pick the correct one.
>>
>> Hi Jamie, Ingo,
>>
>> Surely a better way to proceed with this would be to build the perf
>> tool as a side effect of building the kernel? That way the relevant
>> definitions from system.h could be included directly and there would
>> be no need to duplicate the architectural conditionals in perf.h.
>
> Might make sense.
>
>> I'm also working on perf-events for ARM and am using:
>>
>> #define rmb() ? ? ? ? ? asm volatile("mcr p15, 0, %0, c7, c10, 5" :: "r" (0) : "memory")
>>
>> This will work on v6 and v7 [although the dmb instruction is preferred
>> here] cores.
>
> Note that the codepath where it's used isnt very performance sensitive
> (we call it about once per batch of event processing), so we could use
> the broadest instruction that works on as many cores as possible - to
> keep things simple.
How plausible is it to reuse the bits in
arch/arm/include/asm/cputype.h and do an version of rmb() that has
if-else for the v6 and v7 cases?
Jamie Iles writes:
> On Fri, Dec 11, 2009 at 11:38:48AM +0100, Ingo Molnar wrote:
> >
> > * Jamie Iles <[email protected]> wrote:
> >
> > > On Fri, Dec 11, 2009 at 11:23:16AM +0100, Ingo Molnar wrote:
> > > > cpu_relax() looks fine, but rmb() seems not to match the one that can be
> > > > found in arch/arm/:
> > > >
> > > > arch/arm/include/asm/system.h:#define rmb() dmb()
> > > > arch/arm/include/asm/system.h:#define rmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
> > > > arch/arm/include/asm/system.h:#define smp_rmb() rmb()
> > > >
> > > > arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("dmb" : : : "memory")
> > > > arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
> > > > arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("" : : : "memory")
> > > > arch/arm/include/asm/system.h:#define dmb() __asm__ __volatile__ ("" : : : "memory")
> > >
> > > The implementation of the barriers depend on the CPU arch revision
> > > which is defined in the kernel config. As the perf tools don't use the
> > > kernel config, we don't know here what arch revision we're building
> > > for. Perhaps we need a LINUX_ARM_ARCH parameter when building for ARM
> > > so we can pick the correct one.
> >
> > rmb() is used in two places in perf:
> >
> > tools/perf/builtin-record.c: rmb();
> > tools/perf/builtin-top.c: rmb();
> >
> > to interact with the shared kernel/user ring-buffer. Getting a barrier
> > wrong there may cause hickups in recording.
> >
> > Could you tell me a bit more about this ARM instruction - is the 'DMB'
> > instruction used on all SMP ARM cores? Can it be used unconditionally,
> > or is the instruction undefined on certain versions? To get the ball
> > rolling we could use it unconditionally in the initial patch, but this
> > needs to be solved i suspect.
> There are a few cases we need to deal with:
> - v7 SMP: DMB instruction
> - v6 SMP: MCR coprocessor instruction
> - v5 and earlier no instructions for barriers.
>
> Looking at the TRM for a v7 core (cortex A9) the MCR instruction that v6 uses
> is deprecated but still present. I suspect we could use this to cover the v6
> and v7 cores but we wouldn't be able to do soft perf events on v5 or earlier
> (which don't have hardware counters).
The correct solution is to invoke a kernel-exported CPU-specific helper
function in the ARM kernel helper page.
I see a __kuser_memory_barrier entry there which maps to smp_dmb.
On Fri, Dec 11, 2009 at 11:41:26AM +0100, Ingo Molnar wrote:
> > I think he did it this way so it can compile in the meantime, and that
> > doing it right requires runtime cpu detection to select which barrier
> > instruction is even available on the current ARM cpu.
>
> Yeah. We can merge a quick patch for it if runtime detection is
> difficult - but if then such a patch should err on the side of using the
> barrier instruction unconditionally - even if this causes perf to
> segfault on certain (older? UP configured?) ARM cores.
Ok, unless anyone has any objections, I'll post a revised patch that uses the
MCR instruction so that we get the correct behaviour on v6/v7 SMP and UP
systems and an illegal instruction on v5 or earlier. I've had a quick look at
runtime detection and the ID registers are only accessible from privileged
modes so for long term, it might be better to define the rmb() at build time
from the kernel config.
Jamie
Add definitions of rmb() and cpu_relax() and include the ARM unistd.h
header. The __kuser_memory_barrier helper in the helper page is used to
provide the correct memory barrier depending on the CPU type.
Signed-off-by: Jamie Iles <[email protected]>
Cc: Russell King <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Mikael Pettersson <[email protected]>
---
tools/perf/perf.h | 12 ++++++++++++
1 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 454d5d5..75f941b 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -59,6 +59,18 @@
#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
#endif
+#ifdef __arm__
+#include "../../arch/arm/include/asm/unistd.h"
+/*
+ * Use the __kuser_memory_barrier helper in the CPU helper page. See
+ * arch/arm/kernel/entry-armv.S in the kernel source for details.
+ */
+#define rmb() asm volatile("mov r0, #0xffff0fff; mov lr, pc;" \
+ "sub pc, r0, #95" ::: "r0", "lr", "cc", \
+ "memory")
+#define cpu_relax() asm volatile("":::"memory")
+#endif
+
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
--
1.6.5.4
* Jamie Iles <[email protected]> wrote:
> On Fri, Dec 11, 2009 at 11:41:26AM +0100, Ingo Molnar wrote:
> > > I think he did it this way so it can compile in the meantime, and that
> > > doing it right requires runtime cpu detection to select which barrier
> > > instruction is even available on the current ARM cpu.
> >
> > Yeah. We can merge a quick patch for it if runtime detection is
> > difficult - but if then such a patch should err on the side of using the
> > barrier instruction unconditionally - even if this causes perf to
> > segfault on certain (older? UP configured?) ARM cores.
>
> Ok, unless anyone has any objections, I'll post a revised patch that
> uses the MCR instruction so that we get the correct behaviour on v6/v7
> SMP and UP systems and an illegal instruction on v5 or earlier. I've
> had a quick look at runtime detection and the ID registers are only
> accessible from privileged modes so for long term, it might be better
> to define the rmb() at build time from the kernel config.
Sounds good to me.
Ingo
Commit-ID: 58e9f94138c1d9c47f6a63632ca7a78fc6dcc15f
Gitweb: http://git.kernel.org/tip/58e9f94138c1d9c47f6a63632ca7a78fc6dcc15f
Author: Jamie Iles <[email protected]>
AuthorDate: Fri, 11 Dec 2009 12:20:09 +0000
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 11 Dec 2009 13:50:21 +0100
perf tools: Allow building for ARM
Add definitions of rmb() and cpu_relax() and include the ARM
unistd.h header. The __kuser_memory_barrier helper in the helper
page is used to provide the correct memory barrier depending on
the CPU type.
[ The rmb() will work on v6 and v7, segfault on v5. Dynamic
detection to add v5 support will be added later. ]
Signed-off-by: Jamie Iles <[email protected]>
Cc: Russell King <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Mikael Pettersson <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/perf/perf.h | 12 ++++++++++++
1 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 454d5d5..75f941b 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -59,6 +59,18 @@
#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
#endif
+#ifdef __arm__
+#include "../../arch/arm/include/asm/unistd.h"
+/*
+ * Use the __kuser_memory_barrier helper in the CPU helper page. See
+ * arch/arm/kernel/entry-armv.S in the kernel source for details.
+ */
+#define rmb() asm volatile("mov r0, #0xffff0fff; mov lr, pc;" \
+ "sub pc, r0, #95" ::: "r0", "lr", "cc", \
+ "memory")
+#define cpu_relax() asm volatile("":::"memory")
+#endif
+
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
On Fri, Dec 11, 2009 at 12:54:33PM +0000, tip-bot for Jamie Iles wrote:
> Commit-ID: 58e9f94138c1d9c47f6a63632ca7a78fc6dcc15f
> Gitweb: http://git.kernel.org/tip/58e9f94138c1d9c47f6a63632ca7a78fc6dcc15f
> Author: Jamie Iles <[email protected]>
> AuthorDate: Fri, 11 Dec 2009 12:20:09 +0000
> Committer: Ingo Molnar <[email protected]>
> CommitDate: Fri, 11 Dec 2009 13:50:21 +0100
>
> perf tools: Allow building for ARM
>
> Add definitions of rmb() and cpu_relax() and include the ARM
> unistd.h header. The __kuser_memory_barrier helper in the helper
> page is used to provide the correct memory barrier depending on
> the CPU type.
>
> [ The rmb() will work on v6 and v7, segfault on v5. Dynamic
> detection to add v5 support will be added later. ]
Sorry Ingo, my comment probably wasn't clear enough. The helper that Mikael
suggested should work for _all_ ARM processors including v5 and earlier. It
will just be a branch to a nop.
Jamie
* Jamie Iles <[email protected]> wrote:
> On Fri, Dec 11, 2009 at 12:54:33PM +0000, tip-bot for Jamie Iles wrote:
> > Commit-ID: 58e9f94138c1d9c47f6a63632ca7a78fc6dcc15f
> > Gitweb: http://git.kernel.org/tip/58e9f94138c1d9c47f6a63632ca7a78fc6dcc15f
> > Author: Jamie Iles <[email protected]>
> > AuthorDate: Fri, 11 Dec 2009 12:20:09 +0000
> > Committer: Ingo Molnar <[email protected]>
> > CommitDate: Fri, 11 Dec 2009 13:50:21 +0100
> >
> > perf tools: Allow building for ARM
> >
> > Add definitions of rmb() and cpu_relax() and include the ARM
> > unistd.h header. The __kuser_memory_barrier helper in the helper
> > page is used to provide the correct memory barrier depending on
> > the CPU type.
> >
> > [ The rmb() will work on v6 and v7, segfault on v5. Dynamic
> > detection to add v5 support will be added later. ]
>
> Sorry Ingo, my comment probably wasn't clear enough. The helper that
> Mikael suggested should work for _all_ ARM processors including v5 and
> earlier. It will just be a branch to a nop.
oh - good! Too late to fix the comment, i already pushed it out - but as
long as the _code_ is fine it's a good tradeoff ;-)
Ingo
On Fri, Dec 11, 2009 at 02:38:08AM -0800, David Miller wrote:
> From: Ingo Molnar <[email protected]>
> Date: Fri, 11 Dec 2009 11:23:16 +0100
>
> >
> > * Jamie Iles <[email protected]> wrote:
> >
> >> +#ifdef __arm__
> >> +#include "../../arch/arm/include/asm/unistd.h"
> >> +#define rmb() asm volatile("":::"memory")
> >> +#define cpu_relax() asm volatile("":::"memory")
> >> +#endif
> >
> > cpu_relax() looks fine, but rmb() seems not to match the one that can be
> > found in arch/arm/:
>
> I think he did it this way so it can compile in the meantime,
> and that doing it right requires runtime cpu detection to
> select which barrier instruction is even available on the
> current ARM cpu.
We provide a way for userspace to be independent of the CPU for these
operations by providing code snippets up in the vector page for userspace
to call. The kernel places the correct code there according to the CPU
it's built for.
Look for __kuser_memory_barrier in arch/arm/kernel/entry-armv.S
On Fri, Dec 11, 2009 at 12:26:57PM +0100, Mikael Pettersson wrote:
> The correct solution is to invoke a kernel-exported CPU-specific helper
> function in the ARM kernel helper page.
>
> I see a __kuser_memory_barrier entry there which maps to smp_dmb.
That is indeed the correct way to do this - otherwise user programs will
be tied to particular architecture versions, and either an undefined
instruction fault will happen, or worse, the desired effect will not
happen.