2013-10-12 17:10:14

by Ingo Molnar

[permalink] [raw]
Subject: [GIT PULL] compiler/gcc4: Add quirk for 'asm goto' miscompilation bug

Linus,

Please pull the latest core-urgent-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core-urgent-for-linus

HEAD: 3f0116c3238a96bc18ad4b4acefe4e7be32fa861 compiler/gcc4: Add quirk for 'asm goto' miscompilation bug

This is the fix for the GCC miscompilation discussed in the following lkml
thread:

[x86] BUG: unable to handle kernel paging request at 00740060

The bug in GCC has been fixed by Jakub and the fix will be part of the GCC
4.8.2 release expected to be released next week - so the quirk's version
test checks for <= 4.8.1.

The quirk is only added to compiler-gcc4.h and not to the higher level
compiler.h because all asm goto uses are behind a feature check.

Thanks,

Ingo

------------------>
Ingo Molnar (1):
compiler/gcc4: Add quirk for 'asm goto' miscompilation bug


arch/arm/include/asm/jump_label.h | 2 +-
arch/mips/include/asm/jump_label.h | 2 +-
arch/powerpc/include/asm/jump_label.h | 2 +-
arch/s390/include/asm/jump_label.h | 2 +-
arch/sparc/include/asm/jump_label.h | 2 +-
arch/x86/include/asm/cpufeature.h | 6 +++---
arch/x86/include/asm/jump_label.h | 2 +-
arch/x86/include/asm/mutex_64.h | 4 ++--
include/linux/compiler-gcc4.h | 15 +++++++++++++++
9 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index bfc198c..863c892 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -16,7 +16,7 @@

static __always_inline bool arch_static_branch(struct static_key *key)
{
- asm goto("1:\n\t"
+ asm_volatile_goto("1:\n\t"
JUMP_LABEL_NOP "\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b, %l[l_yes], %c0\n\t"
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 4d6d77e..e194f95 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -22,7 +22,7 @@

static __always_inline bool arch_static_branch(struct static_key *key)
{
- asm goto("1:\tnop\n\t"
+ asm_volatile_goto("1:\tnop\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
WORD_INSN " 1b, %l[l_yes], %0\n\t"
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index ae098c4..f016bb6 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -19,7 +19,7 @@

static __always_inline bool arch_static_branch(struct static_key *key)
{
- asm goto("1:\n\t"
+ asm_volatile_goto("1:\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 6c32190..346b1c8 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -15,7 +15,7 @@

static __always_inline bool arch_static_branch(struct static_key *key)
{
- asm goto("0: brcl 0,0\n"
+ asm_volatile_goto("0: brcl 0,0\n"
".pushsection __jump_table, \"aw\"\n"
ASM_ALIGN "\n"
ASM_PTR " 0b, %l[label], %0\n"
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index 5080d16..ec2e2e2 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -9,7 +9,7 @@

static __always_inline bool arch_static_branch(struct static_key *key)
{
- asm goto("1:\n\t"
+ asm_volatile_goto("1:\n\t"
"nop\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index d3f5c63..89270b4 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -374,7 +374,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
* Catch too early usage of this before alternatives
* have run.
*/
- asm goto("1: jmp %l[t_warn]\n"
+ asm_volatile_goto("1: jmp %l[t_warn]\n"
"2:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n"
@@ -388,7 +388,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)

#endif

- asm goto("1: jmp %l[t_no]\n"
+ asm_volatile_goto("1: jmp %l[t_no]\n"
"2:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n"
@@ -453,7 +453,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
* have. Thus, we force the jump to the widest, 4-byte, signed relative
* offset even though the last would often fit in less bytes.
*/
- asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+ asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
"2:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 64507f3..6a2cefb 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -18,7 +18,7 @@

static __always_inline bool arch_static_branch(struct static_key *key)
{
- asm goto("1:"
+ asm_volatile_goto("1:"
".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
".pushsection __jump_table, \"aw\" \n\t"
_ASM_ALIGN "\n\t"
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index e7e6751..07537a4 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -20,7 +20,7 @@
static inline void __mutex_fastpath_lock(atomic_t *v,
void (*fail_fn)(atomic_t *))
{
- asm volatile goto(LOCK_PREFIX " decl %0\n"
+ asm_volatile_goto(LOCK_PREFIX " decl %0\n"
" jns %l[exit]\n"
: : "m" (v->counter)
: "memory", "cc"
@@ -75,7 +75,7 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count)
static inline void __mutex_fastpath_unlock(atomic_t *v,
void (*fail_fn)(atomic_t *))
{
- asm volatile goto(LOCK_PREFIX " incl %0\n"
+ asm_volatile_goto(LOCK_PREFIX " incl %0\n"
" jg %l[exit]\n"
: : "m" (v->counter)
: "memory", "cc"
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 842de22..ded4299 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -65,6 +65,21 @@
#define __visible __attribute__((externally_visible))
#endif

+/*
+ * GCC 'asm goto' miscompiles certain code sequences:
+ *
+ * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
+ *
+ * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
+ * Fixed in GCC 4.8.2 and later versions.
+ *
+ * (asm goto is automatically volatile - the naming reflects this.)
+ */
+#if GCC_VERSION <= 40801
+# define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0)
+#else
+# define asm_volatile_goto(x...) do { asm goto(x); } while (0)
+#endif

#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
#if GCC_VERSION >= 40400


2014-02-13 03:09:51

by Steven Noonan

[permalink] [raw]
Subject: Re: [GIT PULL] compiler/gcc4: Add quirk for 'asm goto' miscompilation bug

Resurrecting this thread, as I'm running with GCC 4.8.2 and am
encountering miscompiles without this quirk being enabled for my
compiler version. I'm having trouble pinning down the miscompilation
itself, but I have a problem that seems reliably reproducible in my
environment.

I noticed that when I launch/destroy a KVM guest, the guest memory is
staying mapped. i.e. host has 200MB used, guest launches, ~4300MB
used, guest terminates, still 4300MB used and no qemu-system-x86_64
process hanging around. Thus depending on the guest memory size I can
exhaust host memory after a few guest reboots.

If I change the GCC_VERSION check for the asm_volatile_goto quirk to
include 4.8.2, then KVM guests are properly cleaned up.

The test case provided in the 'asm goto' bugzilla entry doesn't fail
on my compiler: gcc.gnu.org/bugzilla/show_bug.cgi?id=58670

So is there some other 'asm goto' bug we haven't yet fully uncovered
and reported to GCC upstream? Anyone have any idea where to look for
the miscompilation? I started by looking at the
mmu_notifier_unregister() function, since that seems like the obvious
place for a guest memory unmap problem. mmu_notifier_unregister()
calls mmdrop(), which uses the atomic_dec_and_test macro to determine
whether or not to call __mmdrop(). The generated code looks correct to
me, but it's possibly not this callsite that's broken:

On Sat, Oct 12, 2013 at 10:10 AM, Ingo Molnar <[email protected]> wrote:
> Linus,
>
> Please pull the latest core-urgent-for-linus git tree from:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core-urgent-for-linus
>
> HEAD: 3f0116c3238a96bc18ad4b4acefe4e7be32fa861 compiler/gcc4: Add quirk for 'asm goto' miscompilation bug
>
> This is the fix for the GCC miscompilation discussed in the following lkml
> thread:
>
> [x86] BUG: unable to handle kernel paging request at 00740060
>
> The bug in GCC has been fixed by Jakub and the fix will be part of the GCC
> 4.8.2 release expected to be released next week - so the quirk's version
> test checks for <= 4.8.1.
>
> The quirk is only added to compiler-gcc4.h and not to the higher level
> compiler.h because all asm goto uses are behind a feature check.
>
> Thanks,
>
> Ingo
>
> ------------------>
> Ingo Molnar (1):
> compiler/gcc4: Add quirk for 'asm goto' miscompilation bug
>
>
> arch/arm/include/asm/jump_label.h | 2 +-
> arch/mips/include/asm/jump_label.h | 2 +-
> arch/powerpc/include/asm/jump_label.h | 2 +-
> arch/s390/include/asm/jump_label.h | 2 +-
> arch/sparc/include/asm/jump_label.h | 2 +-
> arch/x86/include/asm/cpufeature.h | 6 +++---
> arch/x86/include/asm/jump_label.h | 2 +-
> arch/x86/include/asm/mutex_64.h | 4 ++--
> include/linux/compiler-gcc4.h | 15 +++++++++++++++
> 9 files changed, 26 insertions(+), 11 deletions(-)
>
> diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
> index bfc198c..863c892 100644
> --- a/arch/arm/include/asm/jump_label.h
> +++ b/arch/arm/include/asm/jump_label.h
> @@ -16,7 +16,7 @@
>
> static __always_inline bool arch_static_branch(struct static_key *key)
> {
> - asm goto("1:\n\t"
> + asm_volatile_goto("1:\n\t"
> JUMP_LABEL_NOP "\n\t"
> ".pushsection __jump_table, \"aw\"\n\t"
> ".word 1b, %l[l_yes], %c0\n\t"
> diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
> index 4d6d77e..e194f95 100644
> --- a/arch/mips/include/asm/jump_label.h
> +++ b/arch/mips/include/asm/jump_label.h
> @@ -22,7 +22,7 @@
>
> static __always_inline bool arch_static_branch(struct static_key *key)
> {
> - asm goto("1:\tnop\n\t"
> + asm_volatile_goto("1:\tnop\n\t"
> "nop\n\t"
> ".pushsection __jump_table, \"aw\"\n\t"
> WORD_INSN " 1b, %l[l_yes], %0\n\t"
> diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
> index ae098c4..f016bb6 100644
> --- a/arch/powerpc/include/asm/jump_label.h
> +++ b/arch/powerpc/include/asm/jump_label.h
> @@ -19,7 +19,7 @@
>
> static __always_inline bool arch_static_branch(struct static_key *key)
> {
> - asm goto("1:\n\t"
> + asm_volatile_goto("1:\n\t"
> "nop\n\t"
> ".pushsection __jump_table, \"aw\"\n\t"
> JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
> diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
> index 6c32190..346b1c8 100644
> --- a/arch/s390/include/asm/jump_label.h
> +++ b/arch/s390/include/asm/jump_label.h
> @@ -15,7 +15,7 @@
>
> static __always_inline bool arch_static_branch(struct static_key *key)
> {
> - asm goto("0: brcl 0,0\n"
> + asm_volatile_goto("0: brcl 0,0\n"
> ".pushsection __jump_table, \"aw\"\n"
> ASM_ALIGN "\n"
> ASM_PTR " 0b, %l[label], %0\n"
> diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
> index 5080d16..ec2e2e2 100644
> --- a/arch/sparc/include/asm/jump_label.h
> +++ b/arch/sparc/include/asm/jump_label.h
> @@ -9,7 +9,7 @@
>
> static __always_inline bool arch_static_branch(struct static_key *key)
> {
> - asm goto("1:\n\t"
> + asm_volatile_goto("1:\n\t"
> "nop\n\t"
> "nop\n\t"
> ".pushsection __jump_table, \"aw\"\n\t"
> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
> index d3f5c63..89270b4 100644
> --- a/arch/x86/include/asm/cpufeature.h
> +++ b/arch/x86/include/asm/cpufeature.h
> @@ -374,7 +374,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
> * Catch too early usage of this before alternatives
> * have run.
> */
> - asm goto("1: jmp %l[t_warn]\n"
> + asm_volatile_goto("1: jmp %l[t_warn]\n"
> "2:\n"
> ".section .altinstructions,\"a\"\n"
> " .long 1b - .\n"
> @@ -388,7 +388,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
>
> #endif
>
> - asm goto("1: jmp %l[t_no]\n"
> + asm_volatile_goto("1: jmp %l[t_no]\n"
> "2:\n"
> ".section .altinstructions,\"a\"\n"
> " .long 1b - .\n"
> @@ -453,7 +453,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
> * have. Thus, we force the jump to the widest, 4-byte, signed relative
> * offset even though the last would often fit in less bytes.
> */
> - asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
> + asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
> "2:\n"
> ".section .altinstructions,\"a\"\n"
> " .long 1b - .\n" /* src offset */
> diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
> index 64507f3..6a2cefb 100644
> --- a/arch/x86/include/asm/jump_label.h
> +++ b/arch/x86/include/asm/jump_label.h
> @@ -18,7 +18,7 @@
>
> static __always_inline bool arch_static_branch(struct static_key *key)
> {
> - asm goto("1:"
> + asm_volatile_goto("1:"
> ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
> ".pushsection __jump_table, \"aw\" \n\t"
> _ASM_ALIGN "\n\t"
> diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
> index e7e6751..07537a4 100644
> --- a/arch/x86/include/asm/mutex_64.h
> +++ b/arch/x86/include/asm/mutex_64.h
> @@ -20,7 +20,7 @@
> static inline void __mutex_fastpath_lock(atomic_t *v,
> void (*fail_fn)(atomic_t *))
> {
> - asm volatile goto(LOCK_PREFIX " decl %0\n"
> + asm_volatile_goto(LOCK_PREFIX " decl %0\n"
> " jns %l[exit]\n"
> : : "m" (v->counter)
> : "memory", "cc"
> @@ -75,7 +75,7 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count)
> static inline void __mutex_fastpath_unlock(atomic_t *v,
> void (*fail_fn)(atomic_t *))
> {
> - asm volatile goto(LOCK_PREFIX " incl %0\n"
> + asm_volatile_goto(LOCK_PREFIX " incl %0\n"
> " jg %l[exit]\n"
> : : "m" (v->counter)
> : "memory", "cc"
> diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
> index 842de22..ded4299 100644
> --- a/include/linux/compiler-gcc4.h
> +++ b/include/linux/compiler-gcc4.h
> @@ -65,6 +65,21 @@
> #define __visible __attribute__((externally_visible))
> #endif
>
> +/*
> + * GCC 'asm goto' miscompiles certain code sequences:
> + *
> + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
> + *
> + * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
> + * Fixed in GCC 4.8.2 and later versions.
> + *
> + * (asm goto is automatically volatile - the naming reflects this.)
> + */
> +#if GCC_VERSION <= 40801
> +# define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0)
> +#else
> +# define asm_volatile_goto(x...) do { asm goto(x); } while (0)
> +#endif
>
> #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
> #if GCC_VERSION >= 40400
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2014-02-13 04:11:52

by Linus Torvalds

[permalink] [raw]
Subject: Re: [GIT PULL] compiler/gcc4: Add quirk for 'asm goto' miscompilation bug

On Wed, Feb 12, 2014 at 7:09 PM, Steven Noonan <[email protected]> wrote:
>
> If I change the GCC_VERSION check for the asm_volatile_goto quirk to
> include 4.8.2, then KVM guests are properly cleaned up.

Ok, I guess that means we should just make the quirk unconditional.

Ingo, do you want to do that or should I?

> So is there some other 'asm goto' bug we haven't yet fully uncovered
> and reported to GCC upstream?

Not to my knowledge. But I'm sure Jakub&co would love to have a test-case.

Sadly, gcc has that really annoying habit of making small changes
create *huge* changes in label numbers etc, and that's definitely the
case with the extra empty asm - it's basically impossible to compare
the generated asm with and without the workaround, because all the
label numbers change.

I have no idea how gcc people debug things like this, when the output
is so unstable.

Jakub, any suggestions to how Steven might be able to pinpoint where
the code generation problem lies?

Linus

2014-02-13 04:53:31

by Steven Noonan

[permalink] [raw]
Subject: [PATCH] compiler/gcc4: make quirk for asm_volatile_goto unconditional

I started noticing problems with KVM guest destruction on Linux 3.12+, where
guest memory wasn't being cleaned up. I bisected it down to the commit
introducing the new 'asm goto'-based atomics, and found this quirk was later
applied to those.

Unfortunately, even with GCC 4.8.2 (which ostensibly fixed the known 'asm goto'
bug) I am still getting some kind of miscompilation. If I enable the
asm_volatile_goto quirk for my compiler, KVM guests are destroyed correctly and
the memory is cleaned up.

[1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
Cc: Ingo Molnar <[email protected]>
Cc: Linus Torvalds <[email protected]>
Signed-off-by: Steven Noonan <[email protected]>
---
include/linux/compiler-gcc4.h | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index ded4299..2507fd2 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -75,11 +75,7 @@
*
* (asm goto is automatically volatile - the naming reflects this.)
*/
-#if GCC_VERSION <= 40801
-# define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0)
-#else
-# define asm_volatile_goto(x...) do { asm goto(x); } while (0)
-#endif
+#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0)

#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
#if GCC_VERSION >= 40400
--
1.8.5.4

2014-02-13 06:48:32

by Mike Galbraith

[permalink] [raw]
Subject: Re: [PATCH] compiler/gcc4: make quirk for asm_volatile_goto unconditional

On Wed, 2014-02-12 at 20:53 -0800, Steven Noonan wrote:
> I started noticing problems with KVM guest destruction on Linux 3.12+, where
> guest memory wasn't being cleaned up. I bisected it down to the commit
> introducing the new 'asm goto'-based atomics, and found this quirk was later
> applied to those.
>
> Unfortunately, even with GCC 4.8.2 (which ostensibly fixed the known 'asm goto'
> bug) I am still getting some kind of miscompilation. If I enable the
> asm_volatile_goto quirk for my compiler, KVM guests are destroyed correctly and
> the memory is cleaned up.
>
> [1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
> Cc: Ingo Molnar <[email protected]>
> Cc: Linus Torvalds <[email protected]>

Cc: [email protected] v3.12+ ?

> Signed-off-by: Steven Noonan <[email protected]>
> ---
> include/linux/compiler-gcc4.h | 6 +-----
> 1 file changed, 1 insertion(+), 5 deletions(-)
>
> diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
> index ded4299..2507fd2 100644
> --- a/include/linux/compiler-gcc4.h
> +++ b/include/linux/compiler-gcc4.h
> @@ -75,11 +75,7 @@
> *
> * (asm goto is automatically volatile - the naming reflects this.)
> */
> -#if GCC_VERSION <= 40801
> -# define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0)
> -#else
> -# define asm_volatile_goto(x...) do { asm goto(x); } while (0)
> -#endif
> +#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0)
>
> #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
> #if GCC_VERSION >= 40400

2014-02-13 07:01:27

by Steven Noonan

[permalink] [raw]
Subject: [PATCH v2] compiler/gcc4: make quirk for asm_volatile_goto unconditional

I started noticing problems with KVM guest destruction on Linux 3.12+, where
guest memory wasn't being cleaned up. I bisected it down to the commit
introducing the new 'asm goto'-based atomics, and found this quirk was later
applied to those.

Unfortunately, even with GCC 4.8.2 (which ostensibly fixed the known 'asm goto'
bug) I am still getting some kind of miscompilation. If I enable the
asm_volatile_goto quirk for my compiler, KVM guests are destroyed correctly and
the memory is cleaned up.

[1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
Cc: Ingo Molnar <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: [email protected]
Signed-off-by: Steven Noonan <[email protected]>
---

v2: Adding [email protected] to Cc.

include/linux/compiler-gcc4.h | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index ded4299..2507fd2 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -75,11 +75,7 @@
*
* (asm goto is automatically volatile - the naming reflects this.)
*/
-#if GCC_VERSION <= 40801
-# define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0)
-#else
-# define asm_volatile_goto(x...) do { asm goto(x); } while (0)
-#endif
+#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0)

#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
#if GCC_VERSION >= 40400
--
1.8.5.4

2014-02-13 07:48:41

by Jakub Jelinek

[permalink] [raw]
Subject: Re: [GIT PULL] compiler/gcc4: Add quirk for 'asm goto' miscompilation bug

On Wed, Feb 12, 2014 at 08:11:49PM -0800, Linus Torvalds wrote:
> Jakub, any suggestions to how Steven might be able to pinpoint where
> the code generation problem lies?

For a suspected wrong-code where you have no idea where the problem is from
debugging or oops etc., usually the best way is to bisect first at the *.o
level between (ABI compatible) objects built in a way that it works (in this
case supposedly the asm goto workaround you have in the kernel, but
generally it can be -O0 compilation, a different version of the compiler,
some other compiler flags that make the issue go away) and objects built in
a way that fails, once you narrow it down to (hopefully a single) object
where everything built the "bad" way but that single object works in the end
and everything built the "good" way but that single object fails, the next
step is to narrow it down to a single routine (unless there is e.g. just a
single asm goto left in the TU). For that one can try e.g. optimize
attribute on selected functions, or in this case supposedly preprocessing
the file and bisecting between portions of the preprocessed file from "good"
preprocessed file (with the asm goto workarounds applied) and "bad"
preprocessed file (without them) - count the number of asm gotos there and
bisect to find which asm goto matters. Or it is also possible to bisect
at the *.s level (easiest is to build with -fno-asynchronous-unwind-tables
-fno-exceptions -g0 if possible to decrease number of labels, build twice
("good" and "bad"), then rename the .LNNN labels in one or both files
such that they don't clash (say to .LXNNN in one file, .LYNNN in another
one) and then bisect at the function level - start with taking approximately
first half of functions from one file and second half from another file,
etc.). When you narrow it down to function level, eyeball the assembly and
try to find out where it is wrong, or try to create a self-contained
executable testcase out of the preprocessed source (cut out everything
unneeded from the preprocessed file, keep there just the problematic routine
and everything that is inlined into it and all symbols they need, supply
from another file? a new main that calls the routine with the parameters
that cause the problem and stub all the functions it calls with something
that still reproduces it), or even just file a http://gcc.gnu.org/bugzilla/
bug with the preprocessed source, compiler options and hopefully detailed
description what to look at.

Jakub

2014-02-13 09:43:59

by Ingo Molnar

[permalink] [raw]
Subject: Re: [GIT PULL] compiler/gcc4: Add quirk for 'asm goto' miscompilation bug


* Linus Torvalds <[email protected]> wrote:

> On Wed, Feb 12, 2014 at 7:09 PM, Steven Noonan <[email protected]> wrote:
> >
> > If I change the GCC_VERSION check for the asm_volatile_goto quirk to
> > include 4.8.2, then KVM guests are properly cleaned up.
>
> Ok, I guess that means we should just make the quirk unconditional.
>
> Ingo, do you want to do that or should I?

Yeah, will pick up Steven's patch later today unless you beat me at
it. If you pick it up yourself:

Acked-by: Ingo Molnar <[email protected]>

Thanks,

Ingo

Subject: [tip:x86/urgent] compiler/gcc4: Make quirk for asm_volatile_goto( ) unconditional

Commit-ID: a9f180345f5378ac87d80ed0bea55ba421d83859
Gitweb: http://git.kernel.org/tip/a9f180345f5378ac87d80ed0bea55ba421d83859
Author: Steven Noonan <[email protected]>
AuthorDate: Wed, 12 Feb 2014 23:01:07 -0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Thu, 13 Feb 2014 12:34:05 +0100

compiler/gcc4: Make quirk for asm_volatile_goto() unconditional

I started noticing problems with KVM guest destruction on Linux
3.12+, where guest memory wasn't being cleaned up. I bisected it
down to the commit introducing the new 'asm goto'-based atomics,
and found this quirk was later applied to those.

Unfortunately, even with GCC 4.8.2 (which ostensibly fixed the
known 'asm goto' bug) I am still getting some kind of
miscompilation. If I enable the asm_volatile_goto quirk for my
compiler, KVM guests are destroyed correctly and the memory is
cleaned up.

So make the quirk unconditional for now, until bug is found
and fixed.

Suggested-by: Linus Torvalds <[email protected]>
Signed-off-by: Steven Noonan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Jakub Jelinek <[email protected]>
Cc: Richard Henderson <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Oleg Nesterov <[email protected]>
Cc: <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Link: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
Signed-off-by: Ingo Molnar <[email protected]>
---
include/linux/compiler-gcc4.h | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index ded4299..2507fd2 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -75,11 +75,7 @@
*
* (asm goto is automatically volatile - the naming reflects this.)
*/
-#if GCC_VERSION <= 40801
-# define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0)
-#else
-# define asm_volatile_goto(x...) do { asm goto(x); } while (0)
-#endif
+#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0)

#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
#if GCC_VERSION >= 40400

2014-02-13 11:56:42

by Jakub Jelinek

[permalink] [raw]
Subject: Re: [tip:x86/urgent] compiler/gcc4: Make quirk for asm_volatile_goto( ) unconditional

On Thu, Feb 13, 2014 at 03:37:08AM -0800, tip-bot for Steven Noonan wrote:
> Commit-ID: a9f180345f5378ac87d80ed0bea55ba421d83859
> Gitweb: http://git.kernel.org/tip/a9f180345f5378ac87d80ed0bea55ba421d83859
> Author: Steven Noonan <[email protected]>
> AuthorDate: Wed, 12 Feb 2014 23:01:07 -0800
> Committer: Ingo Molnar <[email protected]>
> CommitDate: Thu, 13 Feb 2014 12:34:05 +0100
>
> compiler/gcc4: Make quirk for asm_volatile_goto() unconditional
>
> I started noticing problems with KVM guest destruction on Linux
> 3.12+, where guest memory wasn't being cleaned up. I bisected it
> down to the commit introducing the new 'asm goto'-based atomics,
> and found this quirk was later applied to those.
>
> Unfortunately, even with GCC 4.8.2 (which ostensibly fixed the
> known 'asm goto' bug) I am still getting some kind of
> miscompilation. If I enable the asm_volatile_goto quirk for my
> compiler, KVM guests are destroyed correctly and the memory is
> cleaned up.

BTW, which exact 4.8.2 were you using?
The last known asm goto bug has been fixed on October, 10th, 2013:
http://gcc.gnu.org/PR58670
so before the October, 16th, 2013 4.8.2 release. But already since
May 31th, 2013 the tip of the 4.8 GCC branch has been announcing itself
as 4.8.2 prerelease. While some distribution versions of GCC announce
themselves as the new version only starting from the release date,
i.e. snapshots in between 4.8.1 release and 4.8.2 release announce
themselves as 4.8.1, in other distributions or upstream it announces itself
as 4.8.2. So, if you are using the latter and a snapshot in between May
31th, 2013 and October, 10th, 2013, then you could see gcc patchlevel 2,
yet have a gcc with that bug unfixed.
So, if the kernel doesn't use a runtime test/configure test to check for
this issue, but instead just relies on the patchlevel version, the only
safe way would be to look for GCC >= 4.9 or GCC 4.8 with patchlevel > 2
rather than > 1.

Jakub

2014-02-13 13:44:09

by Steven Noonan

[permalink] [raw]
Subject: Re: [tip:x86/urgent] compiler/gcc4: Make quirk for asm_volatile_goto( ) unconditional

It's the current Arch Linux GCC package:

$ gcc -v
Using built-in specs.
COLLECT_GCC=/usr/bin/gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-unknown-linux-gnu/4.8.2/lto-wrapper
Target: x86_64-unknown-linux-gnu
Configured with: /build/gcc/src/gcc-4.8-20140206/configure
--prefix=/usr --libdir=/usr/lib --libexecdir=/usr/lib
--mandir=/usr/share/man --infodir=/usr/share/info
--with-bugurl=https://bugs.archlinux.org/
--enable-languages=c,c++,ada,fortran,go,lto,objc,obj-c++
--enable-shared --enable-threads=posix --with-system-zlib
--enable-__cxa_atexit --disable-libunwind-exceptions
--enable-clocale=gnu --disable-libstdcxx-pch --disable-libssp
--enable-gnu-unique-object --enable-linker-build-id
--enable-cloog-backend=isl --disable-cloog-version-check --enable-lto
--enable-plugin --enable-install-libiberty
--with-linker-hash-style=gnu --disable-multilib --disable-werror
--enable-checking=release
Thread model: posix
gcc version 4.8.2 20140206 (prerelease) (GCC)

https://projects.archlinux.org/svntogit/packages.git/tree/trunk/PKGBUILD?h=packages/gcc&id=53e7ca8e616ebf6530f5dc43e86381dff92f136c

(Resending this message because gmail keeps defaulting to HTML email
and vger.kernel.org keeps rejecting those... Thanks Google.)

On Thu, Feb 13, 2014 at 3:55 AM, Jakub Jelinek <[email protected]> wrote:
> On Thu, Feb 13, 2014 at 03:37:08AM -0800, tip-bot for Steven Noonan wrote:
>> Commit-ID: a9f180345f5378ac87d80ed0bea55ba421d83859
>> Gitweb: http://git.kernel.org/tip/a9f180345f5378ac87d80ed0bea55ba421d83859
>> Author: Steven Noonan <[email protected]>
>> AuthorDate: Wed, 12 Feb 2014 23:01:07 -0800
>> Committer: Ingo Molnar <[email protected]>
>> CommitDate: Thu, 13 Feb 2014 12:34:05 +0100
>>
>> compiler/gcc4: Make quirk for asm_volatile_goto() unconditional
>>
>> I started noticing problems with KVM guest destruction on Linux
>> 3.12+, where guest memory wasn't being cleaned up. I bisected it
>> down to the commit introducing the new 'asm goto'-based atomics,
>> and found this quirk was later applied to those.
>>
>> Unfortunately, even with GCC 4.8.2 (which ostensibly fixed the
>> known 'asm goto' bug) I am still getting some kind of
>> miscompilation. If I enable the asm_volatile_goto quirk for my
>> compiler, KVM guests are destroyed correctly and the memory is
>> cleaned up.
>
> BTW, which exact 4.8.2 were you using?
> The last known asm goto bug has been fixed on October, 10th, 2013:
> http://gcc.gnu.org/PR58670
> so before the October, 16th, 2013 4.8.2 release. But already since
> May 31th, 2013 the tip of the 4.8 GCC branch has been announcing itself
> as 4.8.2 prerelease. While some distribution versions of GCC announce
> themselves as the new version only starting from the release date,
> i.e. snapshots in between 4.8.1 release and 4.8.2 release announce
> themselves as 4.8.1, in other distributions or upstream it announces itself
> as 4.8.2. So, if you are using the latter and a snapshot in between May
> 31th, 2013 and October, 10th, 2013, then you could see gcc patchlevel 2,
> yet have a gcc with that bug unfixed.
> So, if the kernel doesn't use a runtime test/configure test to check for
> this issue, but instead just relies on the patchlevel version, the only
> safe way would be to look for GCC >= 4.9 or GCC 4.8 with patchlevel > 2
> rather than > 1.
>
> Jakub

2014-07-10 15:00:11

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [tip:x86/urgent] compiler/gcc4: Make quirk for asm_volatile_goto( ) unconditional

On 02/13/2014 12:55 PM, Jakub Jelinek wrote:
> On Thu, Feb 13, 2014 at 03:37:08AM -0800, tip-bot for Steven Noonan wrote:
>> Commit-ID: a9f180345f5378ac87d80ed0bea55ba421d83859
>> Gitweb: http://git.kernel.org/tip/a9f180345f5378ac87d80ed0bea55ba421d83859
>> Author: Steven Noonan <[email protected]>
>> AuthorDate: Wed, 12 Feb 2014 23:01:07 -0800
>> Committer: Ingo Molnar <[email protected]>
>> CommitDate: Thu, 13 Feb 2014 12:34:05 +0100
>>
>> compiler/gcc4: Make quirk for asm_volatile_goto() unconditional
>>
>> I started noticing problems with KVM guest destruction on Linux
>> 3.12+, where guest memory wasn't being cleaned up. I bisected it
>> down to the commit introducing the new 'asm goto'-based atomics,
>> and found this quirk was later applied to those.
>>
>> Unfortunately, even with GCC 4.8.2 (which ostensibly fixed the
>> known 'asm goto' bug) I am still getting some kind of
>> miscompilation. If I enable the asm_volatile_goto quirk for my
>> compiler, KVM guests are destroyed correctly and the memory is
>> cleaned up.
>
> BTW, which exact 4.8.2 were you using?
> The last known asm goto bug has been fixed on October, 10th, 2013:
> http://gcc.gnu.org/PR58670

FYI, we have hit a very similar kind of memory leak (orphaned THP pages staying on LRU
with elevated page_count) due to the quirk patch missing in a backport, and tracked the
problem down to put_compound_page() which contains this:

if (put_page_testzero(page_head))
VM_BUG_ON_PAGE(1, page_head);

The problem is that with DEBUG_VM disabled, the 'then' part of this 'if' is a no-op which
makes gcc optimize out the whole put_page_testzero operation. The quirk happens to prevent
this.

There is a new gcc bug filed for this:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61772

> so before the October, 16th, 2013 4.8.2 release. But already since
> May 31th, 2013 the tip of the 4.8 GCC branch has been announcing itself
> as 4.8.2 prerelease. While some distribution versions of GCC announce
> themselves as the new version only starting from the release date,
> i.e. snapshots in between 4.8.1 release and 4.8.2 release announce
> themselves as 4.8.1, in other distributions or upstream it announces itself
> as 4.8.2. So, if you are using the latter and a snapshot in between May
> 31th, 2013 and October, 10th, 2013, then you could see gcc patchlevel 2,
> yet have a gcc with that bug unfixed.
> So, if the kernel doesn't use a runtime test/configure test to check for
> this issue, but instead just relies on the patchlevel version, the only
> safe way would be to look for GCC >= 4.9 or GCC 4.8 with patchlevel > 2
> rather than > 1.
>
> Jakub
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>