2019-09-10 18:40:00

by Arnd Bergmann

[permalink] [raw]
Subject: [PATCH] arm64: fix unreachable code issue with cmpxchg

On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined
when CONFIG_OPTIMIZE_INLINING is set.
Clang then fails a compile-time assertion, because it cannot tell at
compile time what the size of the argument is:

mm/memcontrol.o: In function `__cmpxchg_mb':
memcontrol.c:(.text+0x1a4c): undefined reference to `__compiletime_assert_175'
memcontrol.c:(.text+0x1a4c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `__compiletime_assert_175'

Mark all of the cmpxchg() style functions as __always_inline to
ensure that the compiler can see the result.

Signed-off-by: Arnd Bergmann <[email protected]>
---
arch/arm64/include/asm/cmpxchg.h | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index a1398f2f9994..fd64dc8a235f 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -19,7 +19,7 @@
* acquire+release for the latter.
*/
#define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \
-static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \
+static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\
{ \
u##sz ret; \
unsigned long tmp; \
@@ -62,7 +62,7 @@ __XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory")
#undef __XCHG_CASE

#define __XCHG_GEN(sfx) \
-static inline unsigned long __xchg##sfx(unsigned long x, \
+static __always_inline unsigned long __xchg##sfx(unsigned long x, \
volatile void *ptr, \
int size) \
{ \
@@ -103,8 +103,9 @@ __XCHG_GEN(_mb)
#define arch_xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__)
#define arch_xchg(...) __xchg_wrapper( _mb, __VA_ARGS__)

-#define __CMPXCHG_CASE(name, sz) \
-static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \
+#define __CMPXCHG_CASE(name, sz) \
+static __always_inline u##sz \
+__cmpxchg_case_##name##sz(volatile void *ptr, \
u##sz old, \
u##sz new) \
{ \
@@ -148,7 +149,7 @@ __CMPXCHG_DBL(_mb)
#undef __CMPXCHG_DBL

#define __CMPXCHG_GEN(sfx) \
-static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
+static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
unsigned long old, \
unsigned long new, \
int size) \
@@ -230,7 +231,7 @@ __CMPXCHG_GEN(_mb)
})

#define __CMPWAIT_CASE(w, sfx, sz) \
-static inline void __cmpwait_case_##sz(volatile void *ptr, \
+static __always_inline void __cmpwait_case_##sz(volatile void *ptr, \
unsigned long val) \
{ \
unsigned long tmp; \
@@ -255,7 +256,7 @@ __CMPWAIT_CASE( , , 64);
#undef __CMPWAIT_CASE

#define __CMPWAIT_GEN(sfx) \
-static inline void __cmpwait##sfx(volatile void *ptr, \
+static __always_inline void __cmpwait##sfx(volatile void *ptr, \
unsigned long val, \
int size) \
{ \
--
2.20.0


2019-09-10 18:40:44

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH] arm64: fix unreachable code issue with cmpxchg

On Mon, Sep 9, 2019 at 1:21 PM Arnd Bergmann <[email protected]> wrote:
>
> On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined
> when CONFIG_OPTIMIZE_INLINING is set.
> Clang then fails a compile-time assertion, because it cannot tell at
> compile time what the size of the argument is:
>
> mm/memcontrol.o: In function `__cmpxchg_mb':
> memcontrol.c:(.text+0x1a4c): undefined reference to `__compiletime_assert_175'
> memcontrol.c:(.text+0x1a4c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `__compiletime_assert_175'
>
> Mark all of the cmpxchg() style functions as __always_inline to
> ensure that the compiler can see the result.

Acked-by: Nick Desaulniers <[email protected]>

>
> Signed-off-by: Arnd Bergmann <[email protected]>
> ---
> arch/arm64/include/asm/cmpxchg.h | 15 ++++++++-------
> 1 file changed, 8 insertions(+), 7 deletions(-)
>
> diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
> index a1398f2f9994..fd64dc8a235f 100644
> --- a/arch/arm64/include/asm/cmpxchg.h
> +++ b/arch/arm64/include/asm/cmpxchg.h
> @@ -19,7 +19,7 @@
> * acquire+release for the latter.
> */
> #define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \
> -static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \
> +static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\
> { \
> u##sz ret; \
> unsigned long tmp; \
> @@ -62,7 +62,7 @@ __XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory")
> #undef __XCHG_CASE
>
> #define __XCHG_GEN(sfx) \
> -static inline unsigned long __xchg##sfx(unsigned long x, \
> +static __always_inline unsigned long __xchg##sfx(unsigned long x, \
> volatile void *ptr, \
> int size) \
> { \
> @@ -103,8 +103,9 @@ __XCHG_GEN(_mb)
> #define arch_xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__)
> #define arch_xchg(...) __xchg_wrapper( _mb, __VA_ARGS__)
>
> -#define __CMPXCHG_CASE(name, sz) \
> -static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \
> +#define __CMPXCHG_CASE(name, sz) \
> +static __always_inline u##sz \
> +__cmpxchg_case_##name##sz(volatile void *ptr, \
> u##sz old, \
> u##sz new) \
> { \
> @@ -148,7 +149,7 @@ __CMPXCHG_DBL(_mb)
> #undef __CMPXCHG_DBL
>
> #define __CMPXCHG_GEN(sfx) \
> -static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
> +static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
> unsigned long old, \
> unsigned long new, \
> int size) \
> @@ -230,7 +231,7 @@ __CMPXCHG_GEN(_mb)
> })
>
> #define __CMPWAIT_CASE(w, sfx, sz) \
> -static inline void __cmpwait_case_##sz(volatile void *ptr, \
> +static __always_inline void __cmpwait_case_##sz(volatile void *ptr, \
> unsigned long val) \
> { \
> unsigned long tmp; \
> @@ -255,7 +256,7 @@ __CMPWAIT_CASE( , , 64);
> #undef __CMPWAIT_CASE
>
> #define __CMPWAIT_GEN(sfx) \
> -static inline void __cmpwait##sfx(volatile void *ptr, \
> +static __always_inline void __cmpwait##sfx(volatile void *ptr, \
> unsigned long val, \
> int size) \
> { \
> --
> 2.20.0
>
> --
> You received this message because you are subscribed to the Google Groups "Clang Built Linux" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to [email protected].
> To view this discussion on the web visit https://groups.google.com/d/msgid/clang-built-linux/20190909202153.144970-1-arnd%40arndb.de.



--
Thanks,
~Nick Desaulniers

2019-09-10 18:41:18

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH] arm64: fix unreachable code issue with cmpxchg

On Mon, Sep 9, 2019 at 2:06 PM Nick Desaulniers <[email protected]> wrote:
>
> On Mon, Sep 9, 2019 at 1:21 PM Arnd Bergmann <[email protected]> wrote:
> >
> > On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined
> > when CONFIG_OPTIMIZE_INLINING is set.
> > Clang then fails a compile-time assertion, because it cannot tell at
> > compile time what the size of the argument is:
> >
> > mm/memcontrol.o: In function `__cmpxchg_mb':
> > memcontrol.c:(.text+0x1a4c): undefined reference to `__compiletime_assert_175'
> > memcontrol.c:(.text+0x1a4c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `__compiletime_assert_175'
> >
> > Mark all of the cmpxchg() style functions as __always_inline to
> > ensure that the compiler can see the result.
>
> Acked-by: Nick Desaulniers <[email protected]>

Also, I think a Link tag may be appropriate as I believe it fixes this report:

Reported-by: Nathan Chancellor <[email protected]>
Link: https://github.com/ClangBuiltLinux/linux/issues/648

>
> >
> > Signed-off-by: Arnd Bergmann <[email protected]>
> > ---
> > arch/arm64/include/asm/cmpxchg.h | 15 ++++++++-------
> > 1 file changed, 8 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
> > index a1398f2f9994..fd64dc8a235f 100644
> > --- a/arch/arm64/include/asm/cmpxchg.h
> > +++ b/arch/arm64/include/asm/cmpxchg.h
> > @@ -19,7 +19,7 @@
> > * acquire+release for the latter.
> > */
> > #define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \
> > -static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \
> > +static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\
> > { \
> > u##sz ret; \
> > unsigned long tmp; \
> > @@ -62,7 +62,7 @@ __XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory")
> > #undef __XCHG_CASE
> >
> > #define __XCHG_GEN(sfx) \
> > -static inline unsigned long __xchg##sfx(unsigned long x, \
> > +static __always_inline unsigned long __xchg##sfx(unsigned long x, \
> > volatile void *ptr, \
> > int size) \
> > { \
> > @@ -103,8 +103,9 @@ __XCHG_GEN(_mb)
> > #define arch_xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__)
> > #define arch_xchg(...) __xchg_wrapper( _mb, __VA_ARGS__)
> >
> > -#define __CMPXCHG_CASE(name, sz) \
> > -static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \
> > +#define __CMPXCHG_CASE(name, sz) \
> > +static __always_inline u##sz \
> > +__cmpxchg_case_##name##sz(volatile void *ptr, \
> > u##sz old, \
> > u##sz new) \
> > { \
> > @@ -148,7 +149,7 @@ __CMPXCHG_DBL(_mb)
> > #undef __CMPXCHG_DBL
> >
> > #define __CMPXCHG_GEN(sfx) \
> > -static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
> > +static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
> > unsigned long old, \
> > unsigned long new, \
> > int size) \
> > @@ -230,7 +231,7 @@ __CMPXCHG_GEN(_mb)
> > })
> >
> > #define __CMPWAIT_CASE(w, sfx, sz) \
> > -static inline void __cmpwait_case_##sz(volatile void *ptr, \
> > +static __always_inline void __cmpwait_case_##sz(volatile void *ptr, \
> > unsigned long val) \
> > { \
> > unsigned long tmp; \
> > @@ -255,7 +256,7 @@ __CMPWAIT_CASE( , , 64);
> > #undef __CMPWAIT_CASE
> >
> > #define __CMPWAIT_GEN(sfx) \
> > -static inline void __cmpwait##sfx(volatile void *ptr, \
> > +static __always_inline void __cmpwait##sfx(volatile void *ptr, \
> > unsigned long val, \
> > int size) \
> > { \
> > --
> > 2.20.0
> >
> > --
> > You received this message because you are subscribed to the Google Groups "Clang Built Linux" group.
> > To unsubscribe from this group and stop receiving emails from it, send an email to [email protected].
> > To view this discussion on the web visit https://groups.google.com/d/msgid/clang-built-linux/20190909202153.144970-1-arnd%40arndb.de.
>
>
>
> --
> Thanks,
> ~Nick Desaulniers



--
Thanks,
~Nick Desaulniers

2019-09-10 18:50:13

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH] arm64: fix unreachable code issue with cmpxchg

On Mon, Sep 09, 2019 at 10:21:35PM +0200, Arnd Bergmann wrote:
> On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined
> when CONFIG_OPTIMIZE_INLINING is set.

Hmm. Given that CONFIG_OPTIMIZE_INLINING has also been shown to break
assignment of local 'register' variables on GCC, perhaps we should just
disable that option for arm64 (at least) since we don't have any toolchains
that seem to like it very much! I'd certainly prefer that over playing
whack-a-mole with __always_inline.

Will

2019-09-10 18:51:23

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH] arm64: fix unreachable code issue with cmpxchg

On Tue, Sep 10, 2019 at 9:46 AM Will Deacon <[email protected]> wrote:
>
> On Mon, Sep 09, 2019 at 10:21:35PM +0200, Arnd Bergmann wrote:
> > On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined
> > when CONFIG_OPTIMIZE_INLINING is set.
>
> Hmm. Given that CONFIG_OPTIMIZE_INLINING has also been shown to break
> assignment of local 'register' variables on GCC, perhaps we should just
> disable that option for arm64 (at least) since we don't have any toolchains
> that seem to like it very much! I'd certainly prefer that over playing
> whack-a-mole with __always_inline.

Right, but I can also see good reasons to keep going:

- In theory, CONFIG_OPTIMIZE_INLINING is the right thing to do -- the compilers
also make some particularly bad decisions around inlining when each inline
turns into an __always_inline, as has been the case in Linux for a long time.
I think in most cases, we get better object code with CONFIG_OPTIMIZE_INLINING
and in the cases where this is worse, it may be better to fix the compiler.
The new "asm_inline" macro should also help with that.

- The x86 folks have apparently whacked most of the moles already, see this
commit from 2008

commit 3f9b5cc018566ad9562df0648395649aebdbc5e0
Author: Ingo Molnar <[email protected]>
Date: Fri Jul 18 16:30:05 2008 +0200

x86: re-enable OPTIMIZE_INLINING

re-enable OPTIMIZE_INLINING more widely. Jeff Dike fixed the remaining
outstanding issue in this commit:

| commit 4f81c5350b44bcc501ab6f8a089b16d064b4d2f6
| Author: Jeff Dike <[email protected]>
| Date: Mon Jul 7 13:36:56 2008 -0400
|
| [UML] fix gcc ICEs and unresolved externs
[...]
| This patch reintroduces unit-at-a-time for gcc >= 4.0,
bringing back the
| possibility of Uli's crash. If that happens, we'll debug it.

it's still default-off and thus opt-in.

- The inlining decisions of gcc and clang are already very different, and
the bugs we are finding around that are much more common than
the difference between CONFIG_OPTIMIZE_INLINING=y/n on a
given compiler.

Arnd

2019-09-10 18:53:13

by Andrew Murray

[permalink] [raw]
Subject: Re: [PATCH] arm64: fix unreachable code issue with cmpxchg

On Mon, Sep 09, 2019 at 10:21:35PM +0200, Arnd Bergmann wrote:
> On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined
> when CONFIG_OPTIMIZE_INLINING is set.
> Clang then fails a compile-time assertion, because it cannot tell at
> compile time what the size of the argument is:
>
> mm/memcontrol.o: In function `__cmpxchg_mb':
> memcontrol.c:(.text+0x1a4c): undefined reference to `__compiletime_assert_175'
> memcontrol.c:(.text+0x1a4c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `__compiletime_assert_175'
>
> Mark all of the cmpxchg() style functions as __always_inline to
> ensure that the compiler can see the result.
>
> Signed-off-by: Arnd Bergmann <[email protected]>
> ---

I was able to reproduce this with the following:

$ git describe HEAD
next-20190904

$ clang --version
Android (5821526 based on r365631) clang version 9.0.6 (https://android.googlesource.com/toolchain/llvm-project 85305eaf1e90ff529d304abac8a979e1d967f0a2) (based on LLVM 9.0.6svn)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /home/andrewm/android-clang/clang-r365631/bin

$ make O=~/linux-build/ ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- CC=clang -j56 allyesconfig Image

(I was unable to reproduce with defconfig+OPTIMIZE_INLINING).

However...

> arch/arm64/include/asm/cmpxchg.h | 15 ++++++++-------
> 1 file changed, 8 insertions(+), 7 deletions(-)
>
> diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
> index a1398f2f9994..fd64dc8a235f 100644
> --- a/arch/arm64/include/asm/cmpxchg.h
> +++ b/arch/arm64/include/asm/cmpxchg.h
> @@ -19,7 +19,7 @@
> * acquire+release for the latter.
> */
> #define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \
> -static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \
> +static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\

This hunk isn't needed, there is no BUILD_BUG here.


> { \
> u##sz ret; \
> unsigned long tmp; \
> @@ -62,7 +62,7 @@ __XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory")
> #undef __XCHG_CASE
>
> #define __XCHG_GEN(sfx) \
> -static inline unsigned long __xchg##sfx(unsigned long x, \
> +static __always_inline unsigned long __xchg##sfx(unsigned long x, \
> volatile void *ptr, \
> int size) \
> { \
> @@ -103,8 +103,9 @@ __XCHG_GEN(_mb)
> #define arch_xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__)
> #define arch_xchg(...) __xchg_wrapper( _mb, __VA_ARGS__)
>
> -#define __CMPXCHG_CASE(name, sz) \
> -static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \
> +#define __CMPXCHG_CASE(name, sz) \
> +static __always_inline u##sz \
> +__cmpxchg_case_##name##sz(volatile void *ptr, \

This hunk isn't needed, there is no BUILD_BUG here.

> u##sz old, \
> u##sz new) \
> { \
> @@ -148,7 +149,7 @@ __CMPXCHG_DBL(_mb)
> #undef __CMPXCHG_DBL
>
> #define __CMPXCHG_GEN(sfx) \
> -static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
> +static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
> unsigned long old, \
> unsigned long new, \
> int size) \
> @@ -230,7 +231,7 @@ __CMPXCHG_GEN(_mb)
> })
>
> #define __CMPWAIT_CASE(w, sfx, sz) \
> -static inline void __cmpwait_case_##sz(volatile void *ptr, \
> +static __always_inline void __cmpwait_case_##sz(volatile void *ptr, \
> unsigned long val) \

This hunk isn't needed, there is no BUILD_BUG here.

> { \
> unsigned long tmp; \
> @@ -255,7 +256,7 @@ __CMPWAIT_CASE( , , 64);
> #undef __CMPWAIT_CASE
>
> #define __CMPWAIT_GEN(sfx) \
> -static inline void __cmpwait##sfx(volatile void *ptr, \
> +static __always_inline void __cmpwait##sfx(volatile void *ptr, \
> unsigned long val, \
> int size) \
> { \

Alternatively is it possible to replace the BUILD_BUG's with something else?

I think because we use BUILD_BUG at the end of a switch statement, we make
the assumption that size is known at compile time, for this reason we should
ensure the function containing the BUILD_BUG is __always_inline.

Looking across the kernel where BUILD_BUG is used as a default in a switch
statment ($ git grep -B 3 BUILD_BUG\( | grep default), most instances are
within macros, but many are found in an __always_inline function:

arch/x86/kvm/cpuid.h
mm/kasan/generic.c

Though some are not:

include/linux/signal.h
arch/arm64/include/asm/arm_dsu/pmu.h

I wonder if there may be a latent mole ready to whack with pmu.h?

Anyway with just the three remaining hunks:

Reviewed-by: Andrew Murray <[email protected]>
Tested-by: Andrew Murray <[email protected]>

> --
> 2.20.0
>

2019-09-10 18:53:37

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH] arm64: fix unreachable code issue with cmpxchg

On Tue, Sep 10, 2019 at 11:23 AM Andrew Murray <[email protected]> wrote:

>
> > arch/arm64/include/asm/cmpxchg.h | 15 ++++++++-------
> > 1 file changed, 8 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
> > index a1398f2f9994..fd64dc8a235f 100644
> > --- a/arch/arm64/include/asm/cmpxchg.h
> > +++ b/arch/arm64/include/asm/cmpxchg.h
> > @@ -19,7 +19,7 @@
> > * acquire+release for the latter.
> > */
> > #define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \
> > -static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \
> > +static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\
>
> This hunk isn't needed, there is no BUILD_BUG here.

Right, I noticed this, but it seemed like a good idea regardless given the small
size of the function compared with the overhead of a function call. We clearly
want these to be inlined all the time.

Same for the others.

> Alternatively is it possible to replace the BUILD_BUG's with something else?
>
> I think because we use BUILD_BUG at the end of a switch statement, we make
> the assumption that size is known at compile time, for this reason we should
> ensure the function containing the BUILD_BUG is __always_inline.
>
> Looking across the kernel where BUILD_BUG is used as a default in a switch
> statment ($ git grep -B 3 BUILD_BUG\( | grep default), most instances are
> within macros, but many are found in an __always_inline function:
>
> arch/x86/kvm/cpuid.h
> mm/kasan/generic.c
>
> Though some are not:
>
> include/linux/signal.h
> arch/arm64/include/asm/arm_dsu/pmu.h
>
> I wonder if there may be a latent mole ready to whack with pmu.h?

Right, it can't hurt to annotate those as well. I actually have another
fixup for linux/signal.h that I would have to revisit at some point.
See https://bugs.llvm.org/show_bug.cgi?id=38789, I think this is
fixed with clang-9 now, but maybe not with clang-8.

Arnd

2019-09-10 18:54:33

by Masahiro Yamada

[permalink] [raw]
Subject: Re: [PATCH] arm64: fix unreachable code issue with cmpxchg

On Tue, Sep 10, 2019 at 6:38 PM Arnd Bergmann <[email protected]> wrote:
>
> On Tue, Sep 10, 2019 at 11:23 AM Andrew Murray <[email protected]> wrote:
>
> >
> > > arch/arm64/include/asm/cmpxchg.h | 15 ++++++++-------
> > > 1 file changed, 8 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
> > > index a1398f2f9994..fd64dc8a235f 100644
> > > --- a/arch/arm64/include/asm/cmpxchg.h
> > > +++ b/arch/arm64/include/asm/cmpxchg.h
> > > @@ -19,7 +19,7 @@
> > > * acquire+release for the latter.
> > > */
> > > #define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \
> > > -static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \
> > > +static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\
> >
> > This hunk isn't needed, there is no BUILD_BUG here.
>
> Right, I noticed this, but it seemed like a good idea regardless given the small
> size of the function compared with the overhead of a function call. We clearly
> want these to be inlined all the time.


Generally speaking, this should be judged by the compiler, not by humans.
If the function size is quite small compared with the cost of function call,
the compiler will determine to inline it anyway.
(If the compiler's inlining heuristic is not good, we should fix the compiler.)

So, I personally agree with Andrew Murray.
We should use __always_inline only when we must to do so.

Masahiro Yamada



>
> Same for the others.
>
> > Alternatively is it possible to replace the BUILD_BUG's with something else?
> >
> > I think because we use BUILD_BUG at the end of a switch statement, we make
> > the assumption that size is known at compile time, for this reason we should
> > ensure the function containing the BUILD_BUG is __always_inline.
> >
> > Looking across the kernel where BUILD_BUG is used as a default in a switch
> > statment ($ git grep -B 3 BUILD_BUG\( | grep default), most instances are
> > within macros, but many are found in an __always_inline function:
> >
> > arch/x86/kvm/cpuid.h
> > mm/kasan/generic.c
> >
> > Though some are not:
> >
> > include/linux/signal.h
> > arch/arm64/include/asm/arm_dsu/pmu.h
> >
> > I wonder if there may be a latent mole ready to whack with pmu.h?
>
> Right, it can't hurt to annotate those as well. I actually have another
> fixup for linux/signal.h that I would have to revisit at some point.
> See https://bugs.llvm.org/show_bug.cgi?id=38789, I think this is
> fixed with clang-9 now, but maybe not with clang-8.
>
> Arnd



--
Best Regards
Masahiro Yamada

2019-09-10 18:55:37

by Andrew Murray

[permalink] [raw]
Subject: Re: [PATCH] arm64: fix unreachable code issue with cmpxchg

On Tue, Sep 10, 2019 at 11:38:37AM +0200, Arnd Bergmann wrote:
> On Tue, Sep 10, 2019 at 11:23 AM Andrew Murray <[email protected]> wrote:
>
> >
> > > arch/arm64/include/asm/cmpxchg.h | 15 ++++++++-------
> > > 1 file changed, 8 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
> > > index a1398f2f9994..fd64dc8a235f 100644
> > > --- a/arch/arm64/include/asm/cmpxchg.h
> > > +++ b/arch/arm64/include/asm/cmpxchg.h
> > > @@ -19,7 +19,7 @@
> > > * acquire+release for the latter.
> > > */
> > > #define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \
> > > -static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \
> > > +static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\
> >
> > This hunk isn't needed, there is no BUILD_BUG here.
>
> Right, I noticed this, but it seemed like a good idea regardless given the small
> size of the function compared with the overhead of a function call. We clearly
> want these to be inlined all the time.
>
> Same for the others.

I'm not so sure - isn't the point of something like OPTIMIZE_INLINING to give
more freedom to the tooling (and by virtue of the option - the user)?

Surely any decent optimising compiler will do the right thing by inlining small
trivial functions that are annotated with inline? And if not, the compiler
should be fixed not the kernel - unless of course it causes an issue - and then
we should fix those specific cases.

There must be dozens of trivial functions that are marked with __inline, I
don't think it would make sense to mark those as __always_inline. For example the
atomics in atomic_lse.h are trivial but only marked inline. We obviously want
them inline, though I don't think we should babysit the compiler to do the
right thing.

(Also the commit message implies that all the hunks are required to fix this
particular issue which they are not).

Thanks,

Andrew Murray

>
> > Alternatively is it possible to replace the BUILD_BUG's with something else?
> >
> > I think because we use BUILD_BUG at the end of a switch statement, we make
> > the assumption that size is known at compile time, for this reason we should
> > ensure the function containing the BUILD_BUG is __always_inline.
> >
> > Looking across the kernel where BUILD_BUG is used as a default in a switch
> > statment ($ git grep -B 3 BUILD_BUG\( | grep default), most instances are
> > within macros, but many are found in an __always_inline function:
> >
> > arch/x86/kvm/cpuid.h
> > mm/kasan/generic.c
> >
> > Though some are not:
> >
> > include/linux/signal.h
> > arch/arm64/include/asm/arm_dsu/pmu.h
> >
> > I wonder if there may be a latent mole ready to whack with pmu.h?
>
> Right, it can't hurt to annotate those as well. I actually have another
> fixup for linux/signal.h that I would have to revisit at some point.
> See https://bugs.llvm.org/show_bug.cgi?id=38789, I think this is
> fixed with clang-9 now, but maybe not with clang-8.
>
> Arnd

2019-09-10 19:03:34

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH] arm64: fix unreachable code issue with cmpxchg

On Tue, Sep 10, 2019 at 3:24 PM Will Deacon <[email protected]> wrote:
> On Tue, Sep 10, 2019 at 10:04:24AM +0200, Arnd Bergmann wrote:
> > On Tue, Sep 10, 2019 at 9:46 AM Will Deacon <[email protected]> wrote:
> > - In theory, CONFIG_OPTIMIZE_INLINING is the right thing to do -- the compilers
> > also make some particularly bad decisions around inlining when each inline
> > turns into an __always_inline, as has been the case in Linux for a long time.
> > I think in most cases, we get better object code with CONFIG_OPTIMIZE_INLINING
> > and in the cases where this is worse, it may be better to fix the compiler.
> > The new "asm_inline" macro should also help with that.
>
> Sure, in theory, but it looks like there isn't a single arm64 compiler out
> there which gets it right.

I don't see anything architecture specific in here. When the option was
made generic instead of x86 specific, I fixed a ton of bugs that showed
up all over the place. If we don't want it on arm64, I'd suggest making
it a per-architecture opt-in instead of an opt-out.

> >
> > | commit 4f81c5350b44bcc501ab6f8a089b16d064b4d2f6
> > | Author: Jeff Dike <[email protected]>
> > | Date: Mon Jul 7 13:36:56 2008 -0400
> > |
> > | [UML] fix gcc ICEs and unresolved externs
> > [...]
> > | This patch reintroduces unit-at-a-time for gcc >= 4.0,
> > bringing back the
> > | possibility of Uli's crash. If that happens, we'll debug it.
> >
> > it's still default-off and thus opt-in.
>
> This appears to be fixing an ICE, whereas the issue reported recently for
> arm64 gcc was silent miscompilation of atomics in some cases. Unfortunately,
> I can't seem to find the thread :/ Mark, you were on that one too, right?

Sorry, that reference was unclear, I meant the text for commit 3f9b5cc01856,
which in turn contains a citation of the earlier 4f81c5350b44bc commit.

> > - The inlining decisions of gcc and clang are already very different, and
> > the bugs we are finding around that are much more common than
> > the difference between CONFIG_OPTIMIZE_INLINING=y/n on a
> > given compiler.
>
> Sorry, not sure that you're getting at here.
>
> Anyway, the second version of your patch looks fine, but I would still
> prefer to go the extra mile and disable CONFIG_OPTIMIZE_INLINING altogether
> given that I don't think it's a safe option to enable for us.

The point is that function inlining frequently causes all kinds of problems
when code was written in a way that is not entirely reproducible but
depends on the behavior of a particular implementation. I've fixed
lots of bugs based on any of these:

- gcc-4.0 and higher started ignoring 'inline' without
__attribute__((always_inline)), so a workaround got applied
in 2.6.26, and this turned into CONFIG_OPTIMIZE_INLINING=n
later
- gcc -O2 makes different decisions compared to -Os and -O3,
which is an endless source of "uninitialized variable" warnings
and similar problems
- Some configuration options like KASAN grow the code to result
in less inlining
- clang and gcc behave completely differently
- gcc is traditionally bad at guessing the size of inline assembly
to make a good decision
- newer compilers tend to get better at identifying which functions
benefit from inlining, which changes the balance

CONFIG_OPTIMIZE_INLINING clearly adds to that mess, but it's
not the worst part. The only real solution tends to be to write
portable and correct code rather than making assumptions
about compiler behavior.

Arnd

2019-09-10 20:15:34

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH] arm64: fix unreachable code issue with cmpxchg

On Tue, Sep 10, 2019 at 10:04:24AM +0200, Arnd Bergmann wrote:
> On Tue, Sep 10, 2019 at 9:46 AM Will Deacon <[email protected]> wrote:
> >
> > On Mon, Sep 09, 2019 at 10:21:35PM +0200, Arnd Bergmann wrote:
> > > On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined
> > > when CONFIG_OPTIMIZE_INLINING is set.
> >
> > Hmm. Given that CONFIG_OPTIMIZE_INLINING has also been shown to break
> > assignment of local 'register' variables on GCC, perhaps we should just
> > disable that option for arm64 (at least) since we don't have any toolchains
> > that seem to like it very much! I'd certainly prefer that over playing
> > whack-a-mole with __always_inline.
>
> Right, but I can also see good reasons to keep going:
>
> - In theory, CONFIG_OPTIMIZE_INLINING is the right thing to do -- the compilers
> also make some particularly bad decisions around inlining when each inline
> turns into an __always_inline, as has been the case in Linux for a long time.
> I think in most cases, we get better object code with CONFIG_OPTIMIZE_INLINING
> and in the cases where this is worse, it may be better to fix the compiler.
> The new "asm_inline" macro should also help with that.

Sure, in theory, but it looks like there isn't a single arm64 compiler out
there which gets it right.

> - The x86 folks have apparently whacked most of the moles already, see this
> commit from 2008
>
> commit 3f9b5cc018566ad9562df0648395649aebdbc5e0
> Author: Ingo Molnar <[email protected]>
> Date: Fri Jul 18 16:30:05 2008 +0200
>
> x86: re-enable OPTIMIZE_INLINING
>
> re-enable OPTIMIZE_INLINING more widely. Jeff Dike fixed the remaining
> outstanding issue in this commit:
>
> | commit 4f81c5350b44bcc501ab6f8a089b16d064b4d2f6
> | Author: Jeff Dike <[email protected]>
> | Date: Mon Jul 7 13:36:56 2008 -0400
> |
> | [UML] fix gcc ICEs and unresolved externs
> [...]
> | This patch reintroduces unit-at-a-time for gcc >= 4.0,
> bringing back the
> | possibility of Uli's crash. If that happens, we'll debug it.
>
> it's still default-off and thus opt-in.

This appears to be fixing an ICE, whereas the issue reported recently for
arm64 gcc was silent miscompilation of atomics in some cases. Unfortunately,
I can't seem to find the thread :/ Mark, you were on that one too, right?

> - The inlining decisions of gcc and clang are already very different, and
> the bugs we are finding around that are much more common than
> the difference between CONFIG_OPTIMIZE_INLINING=y/n on a
> given compiler.

Sorry, not sure that you're getting at here.

Anyway, the second version of your patch looks fine, but I would still
prefer to go the extra mile and disable CONFIG_OPTIMIZE_INLINING altogether
given that I don't think it's a safe option to enable for us.

Will