2013-09-11 02:49:16

by H. Peter Anvin

[permalink] [raw]
Subject: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

Hi Linus,

One more x86 tree for this merge window. This tree improves the
handling of jump labels, so that most of the time we don't have to do
a massive initial patching run. Furthermore, we will error out of the
jump label is not what is expected, e.g. if it has been corrupted or
tampered with.

This tree does conflict with your top of tree; the resolution should be
reasonably straightforward but let me know if you want a merged tree.

The following changes since commit ad81f0545ef01ea651886dddac4bef6cec930092:

Linux 3.11-rc1 (2013-07-14 15:18:27 -0700)

are available in the git repository at:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-jumplabel-for-linus

for you to fetch changes up to fb40d7a8994a3cc7a1e1c1f3258ea8662a366916:

x86/jump-label: Show where and what was wrong on errors (2013-08-06 21:54:33 -0400)

----------------------------------------------------------------
Steven Rostedt (4):
x86/jump-label: Use best default nops for inital jump label calls
x86/jump-label: Do not bother updating nops if they are correct
x86/jump-label: Add safety checks to jump label conversions
x86/jump-label: Show where and what was wrong on errors

arch/x86/include/asm/jump_label.h | 9 +++--
arch/x86/kernel/jump_label.c | 70 ++++++++++++++++++++++++++++++++++++---
2 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 3a16c14..64507f3 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -3,18 +3,23 @@

#ifdef __KERNEL__

+#include <linux/stringify.h>
#include <linux/types.h>
#include <asm/nops.h>
#include <asm/asm.h>

#define JUMP_LABEL_NOP_SIZE 5

-#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
+#ifdef CONFIG_X86_64
+# define STATIC_KEY_INIT_NOP P6_NOP5_ATOMIC
+#else
+# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
+#endif

static __always_inline bool arch_static_branch(struct static_key *key)
{
asm goto("1:"
- STATIC_KEY_INITIAL_NOP
+ ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
".pushsection __jump_table, \"aw\" \n\t"
_ASM_ALIGN "\n\t"
_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 2889b3d..912a528 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -24,18 +24,57 @@ union jump_code_union {
} __attribute__((packed));
};

+static void bug_at(unsigned char *ip, int line)
+{
+ /*
+ * The location is not an op that we were expecting.
+ * Something went wrong. Crash the box, as something could be
+ * corrupting the kernel.
+ */
+ pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n",
+ ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line);
+ BUG();
+}
+
static void __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
- void *(*poker)(void *, const void *, size_t))
+ void *(*poker)(void *, const void *, size_t),
+ int init)
{
union jump_code_union code;
+ const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];

if (type == JUMP_LABEL_ENABLE) {
+ /*
+ * We are enabling this jump label. If it is not a nop
+ * then something must have gone wrong.
+ */
+ if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
+ bug_at((void *)entry->code, __LINE__);
+
code.jump = 0xe9;
code.offset = entry->target -
(entry->code + JUMP_LABEL_NOP_SIZE);
- } else
+ } else {
+ /*
+ * We are disabling this jump label. If it is not what
+ * we think it is, then something must have gone wrong.
+ * If this is the first initialization call, then we
+ * are converting the default nop to the ideal nop.
+ */
+ if (init) {
+ const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
+ if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
+ bug_at((void *)entry->code, __LINE__);
+ } else {
+ code.jump = 0xe9;
+ code.offset = entry->target -
+ (entry->code + JUMP_LABEL_NOP_SIZE);
+ if (unlikely(memcmp((void *)entry->code, &code, 5) != 0))
+ bug_at((void *)entry->code, __LINE__);
+ }
memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
+ }

(*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
}
@@ -45,15 +84,38 @@ void arch_jump_label_transform(struct jump_entry *entry,
{
get_online_cpus();
mutex_lock(&text_mutex);
- __jump_label_transform(entry, type, text_poke_smp);
+ __jump_label_transform(entry, type, text_poke_smp, 0);
mutex_unlock(&text_mutex);
put_online_cpus();
}

+static enum {
+ JL_STATE_START,
+ JL_STATE_NO_UPDATE,
+ JL_STATE_UPDATE,
+} jlstate __initdata_or_module = JL_STATE_START;
+
__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type)
{
- __jump_label_transform(entry, type, text_poke_early);
+ /*
+ * This function is called at boot up and when modules are
+ * first loaded. Check if the default nop, the one that is
+ * inserted at compile time, is the ideal nop. If it is, then
+ * we do not need to update the nop, and we can leave it as is.
+ * If it is not, then we need to update the nop to the ideal nop.
+ */
+ if (jlstate == JL_STATE_START) {
+ const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
+ const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
+
+ if (memcmp(ideal_nop, default_nop, 5) != 0)
+ jlstate = JL_STATE_UPDATE;
+ else
+ jlstate = JL_STATE_NO_UPDATE;
+ }
+ if (jlstate == JL_STATE_UPDATE)
+ __jump_label_transform(entry, type, text_poke_early, 1);
}

#endif


2013-09-11 13:47:49

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Tue, Sep 10, 2013 at 07:48:44PM -0700, H. Peter Anvin wrote:
> Hi Linus,
>
> One more x86 tree for this merge window. This tree improves the
> handling of jump labels, so that most of the time we don't have to do
> a massive initial patching run. Furthermore, we will error out of the
> jump label is not what is expected, e.g. if it has been corrupted or
> tampered with.
>
> This tree does conflict with your top of tree; the resolution should be
> reasonably straightforward but let me know if you want a merged tree.
>
> The following changes since commit ad81f0545ef01ea651886dddac4bef6cec930092:
>
> Linux 3.11-rc1 (2013-07-14 15:18:27 -0700)
>
> are available in the git repository at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-jumplabel-for-linus
>
> for you to fetch changes up to fb40d7a8994a3cc7a1e1c1f3258ea8662a366916:
>
> x86/jump-label: Show where and what was wrong on errors (2013-08-06 21:54:33 -0400)

This triggers BUG when booting a Xen guest with PV ticketlocks enabled (which
are by default enabled). If I revert this merge it boots, or if I provide 'xen_nopvspin'..

With some modifications (pasted-in-at-the-end) I see:

about to get started...
Unexpected op at trace_clock_global+0x6b/0x120 [ffffffff8113a21b] (0f 1f 44 00 00) /home/build/linux-konrad/arch/x86/kernel/jumpn VCPU 0 [ec=0000]
(XEN) domain_crash_sync called from entry.S
(XEN) Domain 0 (vcpu#0) crashed on cpu#0:
(XEN) ----[ Xen-4.2.2-pre x86_64 debug=n Not tainted ]----
(XEN) CPU: 0
(XEN) RIP: e033:[<ffffffff81051e3d>]
(XEN) RFLAGS: 0000000000000292 EM: 1 CONTEXT: pv guest
(XEN) rax: 0000000000000000 rbx: ffffffff81eaaec0 rcx: 0000000000000001
(XEN) rdx: ffffffff81fac0a0 rsi: 000000000000008c rdi: 0000000000000000
(XEN) rbp: ffffffff81c01e88 rsp: ffffffff81c01e08 r8: 000000000000fffa
(XEN) r9: 0000000000000002 r10: 0000000000000000 r11: 000000000000fffd
(XEN) r12: ffffffff81ca8598 r13: ffffffff81eaaea0 r14: 0000000000000000
(XEN) r15: 0000000000000000 cr0: 0000000080050033 cr4: 00000000000426f0
(XEN) cr3: 0000000231c0c000 cr2: 0000000000000000
(XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: e02b cs: e033
(XEN) Guest stack trace from rsp=ffffffff81c01e08:
(XEN) 0000000000000001 000000000000fffd ffffffff81051e3d 000000010000e030
(XEN) 0000000000010092 ffffffff81c01e48 000000000000e02b ffffffff81051e3d
(XEN) ffffffff00000000 0000000000000000 ffffffff81952c18 0000000000000035
(XEN) 0000000000441f0f 0000000000000018 ffffff9066666666 ffffffffffffffff
(XEN) ffffffff81c01ea8 ffffffff81051eb5 0000000000441f0f 0000000000000000
(XEN) ffffffff81c01ed8 ffffffff81cfbbfb ffffffff81d6b900 ffffffffffffffff
(XEN) ffffffff81d6b900 ffffffff81d742e0 ffffffff81c01f28 ffffffff81cd3e3c
(XEN) ffffffff81cd3af2 ffffffff82051000 ffffffff82052000 ffffffff81d742e0
(XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN) ffffffff81c01f38 ffffffff81cd35f3 ffffffff81c01ff8 ffffffff81cd833a
(XEN) 0300000100000032 0000000000000005 0000000000000000 0000000000000000
(XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN) 0000000000000000 0000000000000000 9d9822831fc9cbf5 000206a700100800
(XEN) 0000000000000001 0000000000000000 0000000000000000 0f00000060c0c748
(XEN) ccccccccccccc305 cccccccccccccccc cccccccccccccccc cccccccccccccccc
(XEN) cccccccccccccccc cccccccccccccccc cccccccccccccccc cccccccccccccccc
(XEN) cccccccccccccccc cccccccccccccccc cccccccccccccccc cccccccccccccccc
(XEN) cccccccccccccccc cccccccccccccccc cccccccccccccccc cccccccccccccccc
(XEN) Domain 0 crashed: rebooting machine in 5 seconds.
(XEN) Resetting with ACPI MEMORY or I/O RESET_REG.

I can boot it with 'xen_nopvspin' which leads me to believe that it is due
to:

262 void __init xen_init_spinlocks(void)
263 {
264
265 if (!xen_pvspin) {
266 printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
267 return;
268 }
269
270 static_key_slow_inc(&paravirt_ticketlocks_enabled); <====

Which means that all of the arch_spin_unlock (which are inlined) and such
will now be patched over.

But perhaps they are not suppose to be enabled in the .smp_prepare_boot_cpu
function chain? But that seems the best place - as you need to enable
this before the spinlocks are used on SMP.

And the IPs are all NOPs.

Steven, ideas?


diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index ee11b7d..e37a2bb 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -23,7 +23,7 @@ union jump_code_union {
int offset;
} __attribute__((packed));
};
-
+#include <xen/hvc-console.h>
static void bug_at(unsigned char *ip, int line)
{
/*
@@ -31,7 +31,7 @@ static void bug_at(unsigned char *ip, int line)
* Something went wrong. Crash the box, as something could be
* corrupting the kernel.
*/
- pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n",
+ xen_raw_printk("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n",
ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line);
BUG();
}

Let me modify the bug_at so that the 'line' can been seen as it seems to have been
truncated.

2013-09-11 13:57:38

by Steven Rostedt

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, 11 Sep 2013 09:47:17 -0400
Konrad Rzeszutek Wilk <[email protected]> wrote:

> On Tue, Sep 10, 2013 at 07:48:44PM -0700, H. Peter Anvin wrote:

> Which means that all of the arch_spin_unlock (which are inlined) and such
> will now be patched over.
>
> But perhaps they are not suppose to be enabled in the .smp_prepare_boot_cpu
> function chain? But that seems the best place - as you need to enable
> this before the spinlocks are used on SMP.
>
> And the IPs are all NOPs.
>

Note, there was a major conflict with the merge. I need to examine if it
was done correctly. I'm not saying that the bug was caused by the
conflict, it could be because of the change the patch made.

Let me look at the conflict first to verify that it still does what was
intended, then if that looks fine, I'll look into this bug too.

Thanks,

-- Steve

2013-09-11 13:58:17

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, Sep 11, 2013 at 09:47:17AM -0400, Konrad Rzeszutek Wilk wrote:
> On Tue, Sep 10, 2013 at 07:48:44PM -0700, H. Peter Anvin wrote:
> > Hi Linus,
> >
> > One more x86 tree for this merge window. This tree improves the
> > handling of jump labels, so that most of the time we don't have to do
> > a massive initial patching run. Furthermore, we will error out of the
> > jump label is not what is expected, e.g. if it has been corrupted or
> > tampered with.
> >
> > This tree does conflict with your top of tree; the resolution should be
> > reasonably straightforward but let me know if you want a merged tree.
> >
> > The following changes since commit ad81f0545ef01ea651886dddac4bef6cec930092:
> >
> > Linux 3.11-rc1 (2013-07-14 15:18:27 -0700)
> >
> > are available in the git repository at:
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-jumplabel-for-linus
> >
> > for you to fetch changes up to fb40d7a8994a3cc7a1e1c1f3258ea8662a366916:
> >
> > x86/jump-label: Show where and what was wrong on errors (2013-08-06 21:54:33 -0400)
>
> This triggers BUG when booting a Xen guest with PV ticketlocks enabled (which
> are by default enabled). If I revert this merge it boots, or if I provide 'xen_nopvspin'..
>
> With some modifications (pasted-in-at-the-end) I see:
>
> about to get started...
> Unexpected op at trace_clock_global+0x6b/0x120 [ffffffff8113a21b] (0f 1f 44 00 00) /home/build/linux-konrad/arch/x86/kernel/jumpn VCPU 0 [ec=0000]
> (XEN) domain_crash_sync called from entry.S
> (XEN) Domain 0 (vcpu#0) crashed on cpu#0:
> (XEN) ----[ Xen-4.2.2-pre x86_64 debug=n Not tainted ]----
> (XEN) CPU: 0
> (XEN) RIP: e033:[<ffffffff81051e3d>]
> (XEN) RFLAGS: 0000000000000292 EM: 1 CONTEXT: pv guest
> (XEN) rax: 0000000000000000 rbx: ffffffff81eaaec0 rcx: 0000000000000001
> (XEN) rdx: ffffffff81fac0a0 rsi: 000000000000008c rdi: 0000000000000000
> (XEN) rbp: ffffffff81c01e88 rsp: ffffffff81c01e08 r8: 000000000000fffa
> (XEN) r9: 0000000000000002 r10: 0000000000000000 r11: 000000000000fffd
> (XEN) r12: ffffffff81ca8598 r13: ffffffff81eaaea0 r14: 0000000000000000
> (XEN) r15: 0000000000000000 cr0: 0000000080050033 cr4: 00000000000426f0
> (XEN) cr3: 0000000231c0c000 cr2: 0000000000000000
> (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: e02b cs: e033
> (XEN) Guest stack trace from rsp=ffffffff81c01e08:
> (XEN) 0000000000000001 000000000000fffd ffffffff81051e3d 000000010000e030
> (XEN) 0000000000010092 ffffffff81c01e48 000000000000e02b ffffffff81051e3d
> (XEN) ffffffff00000000 0000000000000000 ffffffff81952c18 0000000000000035
> (XEN) 0000000000441f0f 0000000000000018 ffffff9066666666 ffffffffffffffff
> (XEN) ffffffff81c01ea8 ffffffff81051eb5 0000000000441f0f 0000000000000000
> (XEN) ffffffff81c01ed8 ffffffff81cfbbfb ffffffff81d6b900 ffffffffffffffff
> (XEN) ffffffff81d6b900 ffffffff81d742e0 ffffffff81c01f28 ffffffff81cd3e3c
> (XEN) ffffffff81cd3af2 ffffffff82051000 ffffffff82052000 ffffffff81d742e0
> (XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN) ffffffff81c01f38 ffffffff81cd35f3 ffffffff81c01ff8 ffffffff81cd833a
> (XEN) 0300000100000032 0000000000000005 0000000000000000 0000000000000000
> (XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN) 0000000000000000 0000000000000000 9d9822831fc9cbf5 000206a700100800
> (XEN) 0000000000000001 0000000000000000 0000000000000000 0f00000060c0c748
> (XEN) ccccccccccccc305 cccccccccccccccc cccccccccccccccc cccccccccccccccc
> (XEN) cccccccccccccccc cccccccccccccccc cccccccccccccccc cccccccccccccccc
> (XEN) cccccccccccccccc cccccccccccccccc cccccccccccccccc cccccccccccccccc
> (XEN) cccccccccccccccc cccccccccccccccc cccccccccccccccc cccccccccccccccc
> (XEN) Domain 0 crashed: rebooting machine in 5 seconds.
> (XEN) Resetting with ACPI MEMORY or I/O RESET_REG.
>
> I can boot it with 'xen_nopvspin' which leads me to believe that it is due
> to:
>
> 262 void __init xen_init_spinlocks(void)
> 263 {
> 264
> 265 if (!xen_pvspin) {
> 266 printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
> 267 return;
> 268 }
> 269
> 270 static_key_slow_inc(&paravirt_ticketlocks_enabled); <====
>
> Which means that all of the arch_spin_unlock (which are inlined) and such
> will now be patched over.
>
> But perhaps they are not suppose to be enabled in the .smp_prepare_boot_cpu
> function chain? But that seems the best place - as you need to enable
> this before the spinlocks are used on SMP.
>
> And the IPs are all NOPs.
>
> Steven, ideas?
>
>
> diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
> index ee11b7d..e37a2bb 100644
> --- a/arch/x86/kernel/jump_label.c
> +++ b/arch/x86/kernel/jump_label.c
> @@ -23,7 +23,7 @@ union jump_code_union {
> int offset;
> } __attribute__((packed));
> };
> -
> +#include <xen/hvc-console.h>
> static void bug_at(unsigned char *ip, int line)
> {
> /*
> @@ -31,7 +31,7 @@ static void bug_at(unsigned char *ip, int line)
> * Something went wrong. Crash the box, as something could be
> * corrupting the kernel.
> */
> - pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n",
> + xen_raw_printk("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n",
> ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line);
> BUG();
> }
>
> Let me modify the bug_at so that the 'line' can been seen as it seems to have been
> truncated.

It seems to imply line 53 is the originating bug, so that would be:

47 if (type == JUMP_LABEL_ENABLE) {
48 /*
49 * We are enabling this jump label. If it is not a nop
50 * then something must have gone wrong.
51 */
52 if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
53 bug_at((void *)entry->code, __LINE__);

But it is a NOP isn't it? The code is

Unexpected op at trace_clock_global+0x6b/0x120 [ffffffff8113a21b] (0f 1f 44 00 00) 53

Perhaps the ideal_nop has not been set yet?

2013-09-11 14:26:17

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

> It seems to imply line 53 is the originating bug, so that would be:
>
> 47 if (type == JUMP_LABEL_ENABLE) {
> 48 /*
> 49 * We are enabling this jump label. If it is not a nop
> 50 * then something must have gone wrong.
> 51 */
> 52 if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
> 53 bug_at((void *)entry->code, __LINE__);
>
> But it is a NOP isn't it? The code is
>
> Unexpected op at trace_clock_global+0x6b/0x120 [ffffffff8113a21b] (0f 1f 44 00 00) 53
>
> Perhaps the ideal_nop has not been set yet?
>

And this looks to fix it for me.

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index ee11b7d..d688348 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -44,13 +44,20 @@ static void __jump_label_transform(struct jump_entry *entry,
union jump_code_union code;
const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];

+ if (init) {
+ const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
+ if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
+ bug_at((void *)entry->code, __LINE__);
+ }
if (type == JUMP_LABEL_ENABLE) {
/*
* We are enabling this jump label. If it is not a nop
* then something must have gone wrong.
*/
- if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
- bug_at((void *)entry->code, __LINE__);
+ if (!init) {
+ if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
+ bug_at((void *)entry->code, __LINE__);
+ }

code.jump = 0xe9;
code.offset = entry->target -
@@ -62,11 +69,7 @@ static void __jump_label_transform(struct jump_entry *entry,
* If this is the first initialization call, then we
* are converting the default nop to the ideal nop.
*/
- if (init) {
- const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
- if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
- bug_at((void *)entry->code, __LINE__);
- } else {
+ if (!init) {
code.jump = 0xe9;
code.offset = entry->target -
(entry->code + JUMP_LABEL_NOP_SIZE);

2013-09-11 14:38:48

by Steven Rostedt

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, 11 Sep 2013 09:47:17 -0400
Konrad Rzeszutek Wilk <[email protected]> wrote:


The merge conflict resolution looks good. Now to look at this bug.


> On Tue, Sep 10, 2013 at 07:48:44PM -0700, H. Peter Anvin wrote:
> > Hi Linus,
> >
> > One more x86 tree for this merge window. This tree improves the
> > handling of jump labels, so that most of the time we don't have to do
> > a massive initial patching run. Furthermore, we will error out of the
> > jump label is not what is expected, e.g. if it has been corrupted or
> > tampered with.
> >
> > This tree does conflict with your top of tree; the resolution should be
> > reasonably straightforward but let me know if you want a merged tree.
> >
> > The following changes since commit ad81f0545ef01ea651886dddac4bef6cec930092:
> >
> > Linux 3.11-rc1 (2013-07-14 15:18:27 -0700)
> >
> > are available in the git repository at:
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-jumplabel-for-linus
> >
> > for you to fetch changes up to fb40d7a8994a3cc7a1e1c1f3258ea8662a366916:
> >
> > x86/jump-label: Show where and what was wrong on errors (2013-08-06 21:54:33 -0400)
>
> This triggers BUG when booting a Xen guest with PV ticketlocks enabled (which
> are by default enabled). If I revert this merge it boots, or if I provide 'xen_nopvspin'..
>
> With some modifications (pasted-in-at-the-end) I see:
>
> about to get started...
> Unexpected op at trace_clock_global+0x6b/0x120 [ffffffff8113a21b] (0f 1f 44 00 00) /home/build/linux-konrad/arch/x86/kernel/jumpn VCPU 0 [ec=0000]

Hmm, we lost the line number, so I don't know which "bug_at()" was
called.

> (XEN) domain_crash_sync called from entry.S
> (XEN) Domain 0 (vcpu#0) crashed on cpu#0:
> (XEN) ----[ Xen-4.2.2-pre x86_64 debug=n Not tainted ]----
> (XEN) CPU: 0
> (XEN) RIP: e033:[<ffffffff81051e3d>]
> (XEN) RFLAGS: 0000000000000292 EM: 1 CONTEXT: pv guest
> (XEN) rax: 0000000000000000 rbx: ffffffff81eaaec0 rcx: 0000000000000001
> (XEN) rdx: ffffffff81fac0a0 rsi: 000000000000008c rdi: 0000000000000000
> (XEN) rbp: ffffffff81c01e88 rsp: ffffffff81c01e08 r8: 000000000000fffa
> (XEN) r9: 0000000000000002 r10: 0000000000000000 r11: 000000000000fffd
> (XEN) r12: ffffffff81ca8598 r13: ffffffff81eaaea0 r14: 0000000000000000
> (XEN) r15: 0000000000000000 cr0: 0000000080050033 cr4: 00000000000426f0
> (XEN) cr3: 0000000231c0c000 cr2: 0000000000000000
> (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: e02b cs: e033
> (XEN) Guest stack trace from rsp=ffffffff81c01e08:
> (XEN) 0000000000000001 000000000000fffd ffffffff81051e3d 000000010000e030
> (XEN) 0000000000010092 ffffffff81c01e48 000000000000e02b ffffffff81051e3d
> (XEN) ffffffff00000000 0000000000000000 ffffffff81952c18 0000000000000035
> (XEN) 0000000000441f0f 0000000000000018 ffffff9066666666 ffffffffffffffff
> (XEN) ffffffff81c01ea8 ffffffff81051eb5 0000000000441f0f 0000000000000000
> (XEN) ffffffff81c01ed8 ffffffff81cfbbfb ffffffff81d6b900 ffffffffffffffff
> (XEN) ffffffff81d6b900 ffffffff81d742e0 ffffffff81c01f28 ffffffff81cd3e3c
> (XEN) ffffffff81cd3af2 ffffffff82051000 ffffffff82052000 ffffffff81d742e0
> (XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN) ffffffff81c01f38 ffffffff81cd35f3 ffffffff81c01ff8 ffffffff81cd833a
> (XEN) 0300000100000032 0000000000000005 0000000000000000 0000000000000000
> (XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN) 0000000000000000 0000000000000000 9d9822831fc9cbf5 000206a700100800
> (XEN) 0000000000000001 0000000000000000 0000000000000000 0f00000060c0c748
> (XEN) ccccccccccccc305 cccccccccccccccc cccccccccccccccc cccccccccccccccc
> (XEN) cccccccccccccccc cccccccccccccccc cccccccccccccccc cccccccccccccccc
> (XEN) cccccccccccccccc cccccccccccccccc cccccccccccccccc cccccccccccccccc
> (XEN) cccccccccccccccc cccccccccccccccc cccccccccccccccc cccccccccccccccc
> (XEN) Domain 0 crashed: rebooting machine in 5 seconds.
> (XEN) Resetting with ACPI MEMORY or I/O RESET_REG.
>
> I can boot it with 'xen_nopvspin' which leads me to believe that it is due
> to:
>
> 262 void __init xen_init_spinlocks(void)
> 263 {
> 264
> 265 if (!xen_pvspin) {
> 266 printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
> 267 return;
> 268 }
> 269
> 270 static_key_slow_inc(&paravirt_ticketlocks_enabled); <====
>
> Which means that all of the arch_spin_unlock (which are inlined) and such
> will now be patched over.
>
> But perhaps they are not suppose to be enabled in the .smp_prepare_boot_cpu
> function chain? But that seems the best place - as you need to enable
> this before the spinlocks are used on SMP.

You are correct that this is where it crashes. As
smp_prepare_boot_cpu() is called just before jump_label_init().

Now, if this just needs to be done before smp is enabled, then you have
plenty of time. There's even a "do_pre_smp_init_calls()".

>
> And the IPs are all NOPs.
>
> Steven, ideas?

I'm thinking that you can delay where you do that update.

-- Steve

>
>
> diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
> index ee11b7d..e37a2bb 100644
> --- a/arch/x86/kernel/jump_label.c
> +++ b/arch/x86/kernel/jump_label.c
> @@ -23,7 +23,7 @@ union jump_code_union {
> int offset;
> } __attribute__((packed));
> };
> -
> +#include <xen/hvc-console.h>
> static void bug_at(unsigned char *ip, int line)
> {
> /*
> @@ -31,7 +31,7 @@ static void bug_at(unsigned char *ip, int line)
> * Something went wrong. Crash the box, as something could be
> * corrupting the kernel.
> */
> - pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n",
> + xen_raw_printk("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n",
> ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line);
> BUG();
> }
>
> Let me modify the bug_at so that the 'line' can been seen as it seems to have been
> truncated.

2013-09-11 14:56:37

by Steven Rostedt

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, 11 Sep 2013 10:25:45 -0400
Konrad Rzeszutek Wilk <[email protected]> wrote:

> > It seems to imply line 53 is the originating bug, so that would be:
> >
> > 47 if (type == JUMP_LABEL_ENABLE) {
> > 48 /*
> > 49 * We are enabling this jump label. If it is not a nop
> > 50 * then something must have gone wrong.
> > 51 */
> > 52 if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
> > 53 bug_at((void *)entry->code, __LINE__);
> >
> > But it is a NOP isn't it? The code is
> >
> > Unexpected op at trace_clock_global+0x6b/0x120 [ffffffff8113a21b] (0f 1f 44 00 00) 53
> >
> > Perhaps the ideal_nop has not been set yet?
> >
>
> And this looks to fix it for me.

I'm trying to understand how this will fix it for you. Are you sure you
removed 'xen_nopvspin'?

If you are calling static_key_slow_inc() before jump_label_init(), then
it should still fail. The static_key_slow_inc() eventually calls
arch_jump_label_transform(), which calls __jump_label_transform() with
init == 0.

The below code looks to me that it would still compare the contents
with the ideal_nop, which hasn't been set yet.

-- Steve

>
> diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
> index ee11b7d..d688348 100644
> --- a/arch/x86/kernel/jump_label.c
> +++ b/arch/x86/kernel/jump_label.c
> @@ -44,13 +44,20 @@ static void __jump_label_transform(struct jump_entry *entry,
> union jump_code_union code;
> const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
>
> + if (init) {
> + const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
> + if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
> + bug_at((void *)entry->code, __LINE__);
> + }
> if (type == JUMP_LABEL_ENABLE) {
> /*
> * We are enabling this jump label. If it is not a nop
> * then something must have gone wrong.
> */
> - if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
> - bug_at((void *)entry->code, __LINE__);
> + if (!init) {
> + if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
> + bug_at((void *)entry->code, __LINE__);
> + }
>
> code.jump = 0xe9;
> code.offset = entry->target -
> @@ -62,11 +69,7 @@ static void __jump_label_transform(struct jump_entry *entry,
> * If this is the first initialization call, then we
> * are converting the default nop to the ideal nop.
> */
> - if (init) {
> - const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
> - if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
> - bug_at((void *)entry->code, __LINE__);
> - } else {
> + if (!init) {
> code.jump = 0xe9;
> code.offset = entry->target -
> (entry->code + JUMP_LABEL_NOP_SIZE);

2013-09-11 15:22:21

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, Sep 11, 2013 at 10:56:33AM -0400, Steven Rostedt wrote:
> On Wed, 11 Sep 2013 10:25:45 -0400
> Konrad Rzeszutek Wilk <[email protected]> wrote:
>
> > > It seems to imply line 53 is the originating bug, so that would be:
> > >
> > > 47 if (type == JUMP_LABEL_ENABLE) {
> > > 48 /*
> > > 49 * We are enabling this jump label. If it is not a nop
> > > 50 * then something must have gone wrong.
> > > 51 */
> > > 52 if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
> > > 53 bug_at((void *)entry->code, __LINE__);
> > >
> > > But it is a NOP isn't it? The code is
> > >
> > > Unexpected op at trace_clock_global+0x6b/0x120 [ffffffff8113a21b] (0f 1f 44 00 00) 53
> > >
> > > Perhaps the ideal_nop has not been set yet?
> > >
> >
> > And this looks to fix it for me.
>
> I'm trying to understand how this will fix it for you. Are you sure you
> removed 'xen_nopvspin'?

Yes.
>
> If you are calling static_key_slow_inc() before jump_label_init(), then
> it should still fail. The static_key_slow_inc() eventually calls
> arch_jump_label_transform(), which calls __jump_label_transform() with
> init == 0.

Perhaps I am misreading the code, but I believe init is set to one.
That is due to us calling:

arch_jump_label_transform (.., JUMP_LABEL_ENABLE)

which calls __jump_label_transform(.., 1)
?

Perhaps the 'init' and 'enable' parameters have different meanings?

>
> The below code looks to me that it would still compare the contents
> with the ideal_nop, which hasn't been set yet.

In the !init case - sure.

In the init case - just with default_nop.

>
> -- Steve
>
> >
> > diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
> > index ee11b7d..d688348 100644
> > --- a/arch/x86/kernel/jump_label.c
> > +++ b/arch/x86/kernel/jump_label.c
> > @@ -44,13 +44,20 @@ static void __jump_label_transform(struct jump_entry *entry,
> > union jump_code_union code;
> > const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
> >
> > + if (init) {
> > + const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
> > + if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
> > + bug_at((void *)entry->code, __LINE__);
> > + }
> > if (type == JUMP_LABEL_ENABLE) {
> > /*
> > * We are enabling this jump label. If it is not a nop
> > * then something must have gone wrong.
> > */
> > - if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
> > - bug_at((void *)entry->code, __LINE__);
> > + if (!init) {
> > + if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
> > + bug_at((void *)entry->code, __LINE__);
> > + }
> >
> > code.jump = 0xe9;
> > code.offset = entry->target -
> > @@ -62,11 +69,7 @@ static void __jump_label_transform(struct jump_entry *entry,
> > * If this is the first initialization call, then we
> > * are converting the default nop to the ideal nop.
> > */
> > - if (init) {
> > - const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
> > - if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
> > - bug_at((void *)entry->code, __LINE__);
> > - } else {
> > + if (!init) {
> > code.jump = 0xe9;
> > code.offset = entry->target -
> > (entry->code + JUMP_LABEL_NOP_SIZE);
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2013-09-11 15:47:12

by Steven Rostedt

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, 11 Sep 2013 11:21:49 -0400
Konrad Rzeszutek Wilk <[email protected]> wrote:
> >
> > I'm trying to understand how this will fix it for you. Are you sure you
> > removed 'xen_nopvspin'?
>
> Yes.
> >
> > If you are calling static_key_slow_inc() before jump_label_init(), then
> > it should still fail. The static_key_slow_inc() eventually calls
> > arch_jump_label_transform(), which calls __jump_label_transform() with
> > init == 0.
>
> Perhaps I am misreading the code, but I believe init is set to one.
> That is due to us calling:
>
> arch_jump_label_transform (.., JUMP_LABEL_ENABLE)
>
> which calls __jump_label_transform(.., 1)
> ?

>From what I'm looking at, only arch_jump_label_transform_static() calls
__jump_label_transform() with a 1 for init. arch_jump_label_transform()
calls it with 0 for init, which is what eventually gets called by
xen_init_spinlocks().

>
> Perhaps the 'init' and 'enable' parameters have different meanings?

Yes they do.

-- Steve

>
> >
> > The below code looks to me that it would still compare the contents
> > with the ideal_nop, which hasn't been set yet.
>
> In the !init case - sure.
>
> In the init case - just with default_nop.
>

2013-09-11 16:18:22

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, Sep 11, 2013 at 11:47:08AM -0400, Steven Rostedt wrote:
> On Wed, 11 Sep 2013 11:21:49 -0400
> Konrad Rzeszutek Wilk <[email protected]> wrote:
> > >
> > > I'm trying to understand how this will fix it for you. Are you sure you
> > > removed 'xen_nopvspin'?
> >
> > Yes.
> > >
> > > If you are calling static_key_slow_inc() before jump_label_init(), then
> > > it should still fail. The static_key_slow_inc() eventually calls
> > > arch_jump_label_transform(), which calls __jump_label_transform() with
> > > init == 0.
> >
> > Perhaps I am misreading the code, but I believe init is set to one.
> > That is due to us calling:
> >
> > arch_jump_label_transform (.., JUMP_LABEL_ENABLE)
> >
> > which calls __jump_label_transform(.., 1)
> > ?
>
> From what I'm looking at, only arch_jump_label_transform_static() calls
> __jump_label_transform() with a 1 for init. arch_jump_label_transform()
> calls it with 0 for init, which is what eventually gets called by
> xen_init_spinlocks().
>
> >
> > Perhaps the 'init' and 'enable' parameters have different meanings?
>
> Yes they do.
>
> -- Steve

Adding an stack_dump() gets this:

[ 0.000000] Initializing cgroup subsys cpuset
[ 0.000000] Initializing cgroup subsys cpu
[ 0.000000] Initializing cgroup subsys cpuacct
[ 0.000000] Linux version 3.11.0upstream-09031-ga22a0fd-dirty ([email protected]) (gcc version 4.4.4 20100503 (Red Hat 4.4.4-2) (GCC) ) #1 SMP Wed Sep 11 11:42:38 EDT 2013
[ 0.000000] Command line: debug selinux=0 earlyprintk=xen console=hvc0 xencons=hvc0 loglevel=10 pci=resource_alignment=00:13.2 xen-pciback.hide=(08:07.0)(08:06.0)(00:12.0)(00:12.1)(00:12.2)(00:13.0)(00:13.1)(00:13.2)(00:14.5) xen-pciback.passthrough=0
[ 0.000000] Freeing 9f-100 pfn range: 97 pages freed
[ 0.000000] 1-1 mapping on 9f->100
[ 0.000000] 1-1 mapping on cfe90->100000
[ 0.000000] Released 97 pages of unused memory
[ 0.000000] Set 197073 page(s) to 1-1 mapping
[ 0.000000] Populating 40000-40061 pfn range: 97 pages added
[ 0.000000] e820: BIOS-provided physical RAM map:
[ 0.000000] Xen: [mem 0x0000000000000000-0x000000000009efff] usable
[ 0.000000] Xen: [mem 0x000000000009f400-0x00000000000fffff] reserved
[ 0.000000] Xen: [mem 0x0000000000100000-0x0000000040060fff] usable
[ 0.000000] Xen: [mem 0x0000000040061000-0x00000000cfe8ffff] unusable
[ 0.000000] Xen: [mem 0x00000000cfe90000-0x00000000cfe9dfff] ACPI data
[ 0.000000] Xen: [mem 0x00000000cfe9e000-0x00000000cfedffff] ACPI NVS
[ 0.000000] Xen: [mem 0x00000000cfee0000-0x00000000cfeedfff] reserved
[ 0.000000] Xen: [mem 0x00000000cfef0000-0x00000000cfefffff] reserved
[ 0.000000] Xen: [mem 0x00000000e0000000-0x00000000efffffff] reserved
[ 0.000000] Xen: [mem 0x00000000fec00000-0x00000000fec00fff] reserved
[ 0.000000] Xen: [mem 0x00000000fee00000-0x00000000fee00fff] reserved
[ 0.000000] Xen: [mem 0x00000000ff700000-0x00000000ffffffff] reserved
[ 0.000000] Xen: [mem 0x0000000100000000-0x000000021fffffff] unusable
[ 0.000000] ERROR: earlyprintk= xenboot already used
[ 0.000000] NX (Execute Disable) protection: active
[ 0.000000] SMBIOS 2.5 present.
[ 0.000000] DMI: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080014 07/18/2008
[ 0.000000] e820: update [mem 0x00000000-0x00000fff] usable ==> reserved
[ 0.000000] e820: remove [mem 0x000a0000-0x000fffff] usable
[ 0.000000] No AGP bridge found
[ 0.000000] e820: last_pfn = 0x40061 max_arch_pfn = 0x400000000
[ 0.000000] Scanning 1 areas for low memory corruption
[ 0.000000] Base memory trampoline at [ffff880000099000] 99000 size 24576
[ 0.000000] init_memory_mapping: [mem 0x00000000-0x000fffff]
[ 0.000000] [mem 0x00000000-0x000fffff] page 4k
[ 0.000000] init_memory_mapping: [mem 0x3fe00000-0x3fffffff]
[ 0.000000] [mem 0x3fe00000-0x3fffffff] page 4k
[ 0.000000] BRK [0x0205a000, 0x0205afff] PGTABLE
[ 0.000000] init_memory_mapping: [mem 0x3c000000-0x3fdfffff]
[ 0.000000] [mem 0x3c000000-0x3fdfffff] page 4k
[ 0.000000] BRK [0x0205b000, 0x0205bfff] PGTABLE
[ 0.000000] BRK [0x0205c000, 0x0205cfff] PGTABLE
[ 0.000000] BRK [0x0205d000, 0x0205dfff] PGTABLE
[ 0.000000] BRK [0x0205e000, 0x0205efff] PGTABLE
[ 0.000000] BRK [0x0205f000, 0x0205ffff] PGTABLE
[ 0.000000] init_memory_mapping: [mem 0x00100000-0x3bffffff]
[ 0.000000] [mem 0x00100000-0x3bffffff] page 4k
[ 0.000000] init_memory_mapping: [mem 0x40000000-0x40060fff]
[ 0.000000] [mem 0x40000000-0x40060fff] page 4k
[ 0.000000] RAMDISK: [mem 0x02469000-0x06eeefff]
[ 0.000000] ACPI: RSDP 00000000000fa930 00024 (v02 ACPIAM)
[ 0.000000] ACPI: XSDT 00000000cfe90100 00054 (v01 071808 XSDT0330 20080718 MSFT 00000097)
[ 0.000000] ACPI: FACP 00000000cfe90290 000F4 (v04 071808 FACP0330 20080718 MSFT 00000097)
[ 0.000000] ACPI BIOS Warning (bug): Optional FADT field Pm2ControlBlock has zero address or length: 0x0000000000000000/0x1 (20130725/tbfadt-603)
[ 0.000000] ACPI: DSDT 00000000cfe90490 097CC (v02 MTLAX MTLAX2-0 00000000 INTL 20051117)
[ 0.000000] ACPI: FACS 00000000cfe9e000 00040
[ 0.000000] ACPI: APIC 00000000cfe90390 0006C (v02 071808 APIC0330 20080718 MSFT 00000097)
[ 0.000000] ACPI: MCFG 00000000cfe90400 0003C (v01 071808 OEMMCFG 20080718 MSFT 00000097)
[ 0.000000] ACPI: OEMB 00000000cfe9e040 00072 (v01 071808 OEMB0330 20080718 MSFT 00000097)
[ 0.000000] ACPI: HPET 00000000cfe99c60 00038 (v01 071808 OEMHPET 20080718 MSFT 00000097)
[ 0.000000] ACPI: SSDT 00000000cfe99de0 0088C (v01 A M I POWERNOW 00000001 AMD 00000001)
[ 0.000000] ACPI: Local APIC address 0xfee00000
[ 0.000000] NUMA turned off
[ 0.000000] Faking a node at [mem 0x0000000000000000-0x0000000040060fff]
[ 0.000000] Initmem setup node 0 [mem 0x00000000-0x40060fff]
[ 0.000000] NODE_DATA [mem 0x4005d000-0x40060fff]
[ 0.000000] Zone ranges:
[ 0.000000] DMA [mem 0x00001000-0x00ffffff]
[ 0.000000] DMA32 [mem 0x01000000-0xffffffff]
[ 0.000000] Normal empty
[ 0.000000] Movable zone start for each node
[ 0.000000] Early memory node ranges
[ 0.000000] node 0: [mem 0x00001000-0x0009efff]
[ 0.000000] node 0: [mem 0x00100000-0x40060fff]
[ 0.000000] On node 0 totalpages: 262143
[ 0.000000] DMA zone: 56 pages used for memmap
[ 0.000000] DMA zone: 21 pages reserved
[ 0.000000] DMA zone: 3998 pages, LIFO batch:0
[ 0.000000] DMA32 zone: 3530 pages used for memmap
[ 0.000000] DMA32 zone: 258145 pages, LIFO batch:31
[ 0.000000] ACPI: PM-Timer IO Port: 0x808
[ 0.000000] ACPI: Local APIC address 0xfee00000
[ 0.000000] ACPI: LAPIC (acpi_id[0x01] lapic_id[0x00] enabled)
[ 0.000000] ACPI: LAPIC (acpi_id[0x02] lapic_id[0x01] enabled)
[ 0.000000] ACPI: LAPIC (acpi_id[0x03] lapic_id[0x02] enabled)
[ 0.000000] ACPI: LAPIC (acpi_id[0x04] lapic_id[0x03] enabled)
[ 0.000000] ACPI: IOAPIC (id[0x04] address[0xfec00000] gsi_base[0])
[ 0.000000] IOAPIC[0]: apic_id 4, version 33, address 0xfec00000, GSI 0-23
[ 0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
[ 0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
[ 0.000000] ACPI: IRQ0 used by override.
[ 0.000000] ACPI: IRQ2 used by override.
[ 0.000000] ACPI: IRQ9 used by override.
[ 0.000000] Using ACPI (MADT) for SMP configuration information
[ 0.000000] ACPI: HPET id: 0x8300 base: 0xfed00000
[ 0.000000] smpboot: Allowing 4 CPUs, 0 hotplug CPUs
[ 0.000000] nr_irqs_gsi: 40
[ 0.000000] PM: Registered nosave memory: [mem 0x0009f000-0x0009ffff]
[ 0.000000] PM: Registered nosave memory: [mem 0x000a0000-0x000fffff]
[ 0.000000] e820: [mem 0xcff00000-0xdfffffff] available for PCI devices
[ 0.000000] Booting paravirtualized kernel on Xen
[ 0.000000] Xen version: 4.2.2-pre (preserve-AD)
[ 0.000000] setup_percpu: NR_CPUS:512 nr_cpumask_bits:512 nr_cpu_ids:4 nr_node_ids:1
[ 0.000000] PERCPU: Embedded 28 pages/cpu @ffff88003fa00000 s85312 r8192 d21184 u524288
[ 0.000000] pcpu-alloc: s85312 r8192 d21184 u524288 alloc=1*2097152
[ 0.000000] pcpu-alloc: [0] 0 1 2 3
[ 4.966096] Built 1 zonelists in Node order, mobility grouping on. Total pages: 258536
[ 4.966098] Policy zone: DMA32
[ 4.966101] Kernel command line: debug selinux=0 earlyprintk=xen console=hvc0 xencons=hvc0 loglevel=10 pci=resource_alignment=00:13.2 xen-pciback.hide=(08:07.0)(08:06.0)(00:12.0)(00:12.1)(00:12.2)(00:13.0)(00:13.1)(00:13.2)(00:14.5) xen-pciback.passthrough=0
[ 4.966892] op trace_clock_global+0x6b/0x120
[ 4.966895] CPU: 0 PID: 0 Comm: swapper Not tainted 3.11.0upstream-09031-ga22a0fd-dirty #1
[ 4.966897] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080014 07/18/2008
[ 4.966899] ffffffff810542e0 ffffffff81c01e28 ffffffff816a0cf3 0000000000000001
[ 4.966903] ffffffff81ca8598 ffffffff81c01e88 ffffffff81051e0a ffffffe8ffffffe8
[ 4.966905] 0000001800000000 ffffffff81162980 0000000000000018 ffffff0000441f0f
[ 4.966907] Call Trace:
[ 4.966912] [<ffffffff810542e0>] ? poke_int3_handler+0x40/0x40
[ 4.966916] [<ffffffff816a0cf3>] dump_stack+0x59/0x7b
[ 4.966920] [<ffffffff81051e0a>] __jump_label_transform+0x18a/0x230
[ 4.966923] [<ffffffff81162980>] ? fire_user_return_notifiers+0x70/0x70
[ 4.966926] [<ffffffff81051f15>] arch_jump_label_transform_static+0x65/0x90
[ 4.966930] [<ffffffff81cfbbfb>] jump_label_init+0x75/0xa3
[ 4.966932] [<ffffffff81cd3e3c>] start_kernel+0x168/0x3ff
[ 4.966934] [<ffffffff81cd3af2>] ? repair_env_string+0x5b/0x5b
[ 4.966938] [<ffffffff81cd35f3>] x86_64_start_reservations+0x2a/0x2c
[ 4.966941] [<ffffffff81cd833a>] xen_start_kernel+0x594/0x596
[ 4.967072] PID hash table entries: 4096 (order: 3, 32768 bytes)
[ 5.009945] software IO TLB [mem 0x3a400000-0x3e400000] (64MB) mapped at [ffff88003a400000-ffff88003e3fffff]
[ 5.013794] Memory: 868480K/1048572K available (6860K kernel code, 752K rwdata, 2140K rodata, 1708K init, 1876K bss, 180092K reserved)
[ 5.014212] Hierarchical RCU implementation.
[ 5.014214] RCU restricting CPUs from NR_CPUS=512 to nr_cpu_ids=4.
[ 5.014229] NR_IRQS:33024 nr_irqs:712 16
[ 5.014370] xen: sci override: global_irq=9 trigger=0 polarity=1

.... snip.

And here is the patch:

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index ee11b7d..e3a41a0 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -44,13 +44,31 @@ static void __jump_label_transform(struct jump_entry *entry,
union jump_code_union code;
const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];

+ if (init) {
+ const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
+ if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
+ bug_at((void *)entry->code, __LINE__);
+ }
if (type == JUMP_LABEL_ENABLE) {
/*
* We are enabling this jump label. If it is not a nop
* then something must have gone wrong.
*/
- if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
- bug_at((void *)entry->code, __LINE__);
+ if (init) {
+ if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0)) {
+ static int log = 0;
+
+ if (log == 0) {
+ pr_warning("op %pS\n", (void *)entry->code);
+ dump_stack();
+ }
+ log++;
+ }
+ }
+ if (!init) {
+ if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
+ bug_at((void *)entry->code, __LINE__);
+ }

code.jump = 0xe9;
code.offset = entry->target -
@@ -62,11 +80,7 @@ static void __jump_label_transform(struct jump_entry *entry,
* If this is the first initialization call, then we
* are converting the default nop to the ideal nop.
*/
- if (init) {
- const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
- if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
- bug_at((void *)entry->code, __LINE__);
- } else {
+ if (!init) {
code.jump = 0xe9;
code.offset = entry->target -
(entry->code + JUMP_LABEL_NOP_SIZE);

2013-09-11 17:05:11

by Steven Rostedt

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1


[ Fixed Jason Baron's email so that he can join the conversation ]

On Wed, 11 Sep 2013 12:17:45 -0400
Konrad Rzeszutek Wilk <[email protected]> wrote:

> On Wed, Sep 11, 2013 at 11:47:08AM -0400, Steven Rostedt wrote:

> [ 4.966101] Kernel command line: debug selinux=0 earlyprintk=xen console=hvc0 xencons=hvc0 loglevel=10 pci=resource_alignment=00:13.2 xen-pciback.hide=(08:07.0)(08:06.0)(00:12.0)(00:12.1)(00:12.2)(00:13.0)(00:13.1)(00:13.2)(00:14.5) xen-pciback.passthrough=0
> [ 4.966892] op trace_clock_global+0x6b/0x120
> [ 4.966895] CPU: 0 PID: 0 Comm: swapper Not tainted 3.11.0upstream-09031-ga22a0fd-dirty #1
> [ 4.966897] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080014 07/18/2008
> [ 4.966899] ffffffff810542e0 ffffffff81c01e28 ffffffff816a0cf3 0000000000000001
> [ 4.966903] ffffffff81ca8598 ffffffff81c01e88 ffffffff81051e0a ffffffe8ffffffe8
> [ 4.966905] 0000001800000000 ffffffff81162980 0000000000000018 ffffff0000441f0f
> [ 4.966907] Call Trace:
> [ 4.966912] [<ffffffff810542e0>] ? poke_int3_handler+0x40/0x40
> [ 4.966916] [<ffffffff816a0cf3>] dump_stack+0x59/0x7b
> [ 4.966920] [<ffffffff81051e0a>] __jump_label_transform+0x18a/0x230
> [ 4.966923] [<ffffffff81162980>] ? fire_user_return_notifiers+0x70/0x70
> [ 4.966926] [<ffffffff81051f15>] arch_jump_label_transform_static+0x65/0x90
> [ 4.966930] [<ffffffff81cfbbfb>] jump_label_init+0x75/0xa3
> [ 4.966932] [<ffffffff81cd3e3c>] start_kernel+0x168/0x3ff
> [ 4.966934] [<ffffffff81cd3af2>] ? repair_env_string+0x5b/0x5b
> [ 4.966938] [<ffffffff81cd35f3>] x86_64_start_reservations+0x2a/0x2c
> [ 4.966941] [<ffffffff81cd833a>] xen_start_kernel+0x594/0x596
> [ 4.967072] PID hash table entries: 4096 (order: 3, 32768 bytes)
> [ 5.009945] software IO TLB [mem 0x3a400000-0x3e400000] (64MB) mapped at [ffff88003a400000-ffff88003e3fffff]
> [ 5.013794] Memory: 868480K/1048572K available (6860K kernel code, 752K rwdata, 2140K rodata, 1708K init, 1876K bss, 180092K reserved)
> [ 5.014212] Hierarchical RCU implementation.
> [ 5.014214] RCU restricting CPUs from NR_CPUS=512 to nr_cpu_ids=4.
> [ 5.014229] NR_IRQS:33024 nr_irqs:712 16
> [ 5.014370] xen: sci override: global_irq=9 trigger=0 polarity=1
>
> .... snip.
>
> And here is the patch:
>
> diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
> index ee11b7d..e3a41a0 100644
> --- a/arch/x86/kernel/jump_label.c
> +++ b/arch/x86/kernel/jump_label.c
> @@ -44,13 +44,31 @@ static void __jump_label_transform(struct jump_entry *entry,
> union jump_code_union code;
> const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
>
> + if (init) {
> + const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
> + if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
> + bug_at((void *)entry->code, __LINE__);
> + }
> if (type == JUMP_LABEL_ENABLE) {
> /*
> * We are enabling this jump label. If it is not a nop
> * then something must have gone wrong.
> */
> - if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
> - bug_at((void *)entry->code, __LINE__);
> + if (init) {
> + if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0)) {
> + static int log = 0;
> +
> + if (log == 0) {
> + pr_warning("op %pS\n", (void *)entry->code);
> + dump_stack();

OK, I think I understand the problem, and this may or may not be a real
bug depending on what the jump label infrastructure expects.

Jason,

How safe is it to use static_key_slow_inc() before jump_label_init() is
called?

What happened here is that the xen code called by
smp_prepare_boot_cpu() checks boot parameters and may do a
static_key_slow_inc() if xen_nopvspin is not set. Which basically
enables a jump label. The issues is that because jump_labels have not
been initialized yet, it just ups the "enable" count and does not
modify anything because key->entries is still NULL.

When jump_label_init() is called, it sees that the branch is enabled
and then converts it to being enabled, but here's where the current
check fails. It does not expect a jump label to be already enabled when
it gets here.

Now, if it is fine to enable a jump label before jump_label_init() then
I will agree that this patch is the proper fix. But before I give my
Ack, I want to know if the jump label infrastructure was designed to
allow enabling of jump labels at boot up before jump_label_init() is
run.

-- Steve




> + }
> + log++;
> + }
> + }
> + if (!init) {
> + if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
> + bug_at((void *)entry->code, __LINE__);
> + }
>
> code.jump = 0xe9;
> code.offset = entry->target -
> @@ -62,11 +80,7 @@ static void __jump_label_transform(struct jump_entry *entry,
> * If this is the first initialization call, then we
> * are converting the default nop to the ideal nop.
> */
> - if (init) {
> - const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
> - if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
> - bug_at((void *)entry->code, __LINE__);
> - } else {
> + if (!init) {
> code.jump = 0xe9;
> code.offset = entry->target -
> (entry->code + JUMP_LABEL_NOP_SIZE);

2013-09-11 17:26:23

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, Sep 11, 2013 at 01:05:07PM -0400, Steven Rostedt wrote:
>
> [ Fixed Jason Baron's email so that he can join the conversation ]
>
> On Wed, 11 Sep 2013 12:17:45 -0400
> Konrad Rzeszutek Wilk <[email protected]> wrote:
>
> > On Wed, Sep 11, 2013 at 11:47:08AM -0400, Steven Rostedt wrote:
>
> > [ 4.966101] Kernel command line: debug selinux=0 earlyprintk=xen console=hvc0 xencons=hvc0 loglevel=10 pci=resource_alignment=00:13.2 xen-pciback.hide=(08:07.0)(08:06.0)(00:12.0)(00:12.1)(00:12.2)(00:13.0)(00:13.1)(00:13.2)(00:14.5) xen-pciback.passthrough=0
> > [ 4.966892] op trace_clock_global+0x6b/0x120
> > [ 4.966895] CPU: 0 PID: 0 Comm: swapper Not tainted 3.11.0upstream-09031-ga22a0fd-dirty #1
> > [ 4.966897] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080014 07/18/2008
> > [ 4.966899] ffffffff810542e0 ffffffff81c01e28 ffffffff816a0cf3 0000000000000001
> > [ 4.966903] ffffffff81ca8598 ffffffff81c01e88 ffffffff81051e0a ffffffe8ffffffe8
> > [ 4.966905] 0000001800000000 ffffffff81162980 0000000000000018 ffffff0000441f0f
> > [ 4.966907] Call Trace:
> > [ 4.966912] [<ffffffff810542e0>] ? poke_int3_handler+0x40/0x40
> > [ 4.966916] [<ffffffff816a0cf3>] dump_stack+0x59/0x7b
> > [ 4.966920] [<ffffffff81051e0a>] __jump_label_transform+0x18a/0x230
> > [ 4.966923] [<ffffffff81162980>] ? fire_user_return_notifiers+0x70/0x70
> > [ 4.966926] [<ffffffff81051f15>] arch_jump_label_transform_static+0x65/0x90
> > [ 4.966930] [<ffffffff81cfbbfb>] jump_label_init+0x75/0xa3
> > [ 4.966932] [<ffffffff81cd3e3c>] start_kernel+0x168/0x3ff
> > [ 4.966934] [<ffffffff81cd3af2>] ? repair_env_string+0x5b/0x5b
> > [ 4.966938] [<ffffffff81cd35f3>] x86_64_start_reservations+0x2a/0x2c
> > [ 4.966941] [<ffffffff81cd833a>] xen_start_kernel+0x594/0x596
> > [ 4.967072] PID hash table entries: 4096 (order: 3, 32768 bytes)
> > [ 5.009945] software IO TLB [mem 0x3a400000-0x3e400000] (64MB) mapped at [ffff88003a400000-ffff88003e3fffff]
> > [ 5.013794] Memory: 868480K/1048572K available (6860K kernel code, 752K rwdata, 2140K rodata, 1708K init, 1876K bss, 180092K reserved)
> > [ 5.014212] Hierarchical RCU implementation.
> > [ 5.014214] RCU restricting CPUs from NR_CPUS=512 to nr_cpu_ids=4.
> > [ 5.014229] NR_IRQS:33024 nr_irqs:712 16
> > [ 5.014370] xen: sci override: global_irq=9 trigger=0 polarity=1
> >
> > .... snip.
> >
> > And here is the patch:
> >
> > diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
> > index ee11b7d..e3a41a0 100644
> > --- a/arch/x86/kernel/jump_label.c
> > +++ b/arch/x86/kernel/jump_label.c
> > @@ -44,13 +44,31 @@ static void __jump_label_transform(struct jump_entry *entry,
> > union jump_code_union code;
> > const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
> >
> > + if (init) {
> > + const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
> > + if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
> > + bug_at((void *)entry->code, __LINE__);
> > + }
> > if (type == JUMP_LABEL_ENABLE) {
> > /*
> > * We are enabling this jump label. If it is not a nop
> > * then something must have gone wrong.
> > */
> > - if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
> > - bug_at((void *)entry->code, __LINE__);
> > + if (init) {
> > + if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0)) {
> > + static int log = 0;
> > +
> > + if (log == 0) {
> > + pr_warning("op %pS\n", (void *)entry->code);
> > + dump_stack();
>
> OK, I think I understand the problem, and this may or may not be a real
> bug depending on what the jump label infrastructure expects.
>
> Jason,
>
> How safe is it to use static_key_slow_inc() before jump_label_init() is
> called?
>
> What happened here is that the xen code called by
> smp_prepare_boot_cpu() checks boot parameters and may do a
> static_key_slow_inc() if xen_nopvspin is not set. Which basically
> enables a jump label. The issues is that because jump_labels have not
> been initialized yet, it just ups the "enable" count and does not
> modify anything because key->entries is still NULL.
>
> When jump_label_init() is called, it sees that the branch is enabled
> and then converts it to being enabled, but here's where the current
> check fails. It does not expect a jump label to be already enabled when
> it gets here.
>
> Now, if it is fine to enable a jump label before jump_label_init() then
> I will agree that this patch is the proper fix. But before I give my
> Ack, I want to know if the jump label infrastructure was designed to
> allow enabling of jump labels at boot up before jump_label_init() is
> run.

This patch:

commit 97ce2c88f9ad42e3c60a9beb9fca87abf3639faa
Author: Jeremy Fitzhardinge <[email protected]>
Date: Wed Oct 12 16:17:54 2011 -0700

jump-label: initialize jump-label subsystem much earlier

Initialize jump_labels much, much earlier, so they're available for use
during system setup.

Signed-off-by: Jeremy Fitzhardinge <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>


implies that yes.

2013-09-11 17:52:40

by Steven Rostedt

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, 11 Sep 2013 13:25:52 -0400
Konrad Rzeszutek Wilk <[email protected]> wrote:


> commit 97ce2c88f9ad42e3c60a9beb9fca87abf3639faa
> Author: Jeremy Fitzhardinge <[email protected]>
> Date: Wed Oct 12 16:17:54 2011 -0700
>
> jump-label: initialize jump-label subsystem much earlier
>
> Initialize jump_labels much, much earlier, so they're available for use
> during system setup.
>
> Signed-off-by: Jeremy Fitzhardinge <[email protected]>
> Acked-by: Peter Zijlstra <[email protected]>
>
>
> implies that yes.

Unfortunately it does not. All that patch did was move
jump_label_init() up more. If anything, it implies "no".

The question is, can we enable jump_labels before jump_label_init()?

Note, we may still be able to (as it seems to work), the thing is, the
only thing that static_key_slow_inc() does is to tell jump_label_init()
to enable it. Before jump_label_init() is called, nothing has changed.
No code modification, all users of paravirt_ticketlocks_enabled are
still off.

-- Steve

2013-09-11 18:01:45

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, Sep 11, 2013 at 01:52:37PM -0400, Steven Rostedt wrote:
> On Wed, 11 Sep 2013 13:25:52 -0400
> Konrad Rzeszutek Wilk <[email protected]> wrote:
>
>
> > commit 97ce2c88f9ad42e3c60a9beb9fca87abf3639faa
> > Author: Jeremy Fitzhardinge <[email protected]>
> > Date: Wed Oct 12 16:17:54 2011 -0700
> >
> > jump-label: initialize jump-label subsystem much earlier
> >
> > Initialize jump_labels much, much earlier, so they're available for use
> > during system setup.
> >
> > Signed-off-by: Jeremy Fitzhardinge <[email protected]>
> > Acked-by: Peter Zijlstra <[email protected]>
> >
> >
> > implies that yes.
>
> Unfortunately it does not. All that patch did was move
> jump_label_init() up more. If anything, it implies "no".
>
> The question is, can we enable jump_labels before jump_label_init()?
>
> Note, we may still be able to (as it seems to work), the thing is, the
> only thing that static_key_slow_inc() does is to tell jump_label_init()
> to enable it. Before jump_label_init() is called, nothing has changed.
> No code modification, all users of paravirt_ticketlocks_enabled are
> still off.

<confused>

I am thins would still work:


47 static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
148 {
149 if (TICKET_SLOWPATH_FLAG &&
150 static_key_false(&paravirt_ticketlocks_enabled)) {

(from arch/x86/include/asm/spinlock.h) as the static_key_false
would check the key->enabled. Which had been incremented?

Granted there are no patching done yet, but that should still allow
this code path to be taken?

2013-09-11 18:27:30

by Steven Rostedt

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, 11 Sep 2013 14:01:13 -0400
Konrad Rzeszutek Wilk <[email protected]> wrote:


> <confused>
>
> I am thins would still work:
>
>
> 47 static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
> 148 {
> 149 if (TICKET_SLOWPATH_FLAG &&
> 150 static_key_false(&paravirt_ticketlocks_enabled)) {
>
> (from arch/x86/include/asm/spinlock.h) as the static_key_false
> would check the key->enabled. Which had been incremented?
>
> Granted there are no patching done yet, but that should still allow
> this code path to be taken?

Lets look at static_key_false():

If jump labels is not enabled, you are correct. It simply looks like
this:

static __always_inline bool static_key_false(struct static_key *key)
{
if (unlikely(atomic_read(&key->enabled)) > 0)
return true;
return false;
}


But that's not the case here. Here we have code modifying jump labels,
where static_key_false() looks like this:

static __always_inline bool static_key_false(struct static_key *key)
{
return arch_static_branch(key);
}

static __always_inline bool arch_static_branch(struct static_key *key)
{
asm goto("1:"
".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
".pushsection __jump_table, \"aw\" \n\t"
_ASM_ALIGN "\n\t"
_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
".popsection \n\t"
: : "i" (key) : : l_yes);
return false;
l_yes:
return true;
}




Look in that assembly. That "STATIC_KEY_INIT_NOP" is the byte code for
a nop, and until we modify it, arch_static_branch() will always return
false, no matter what "key->enable" is.


In fact, your call trace you posted earlier proves my point!

[ 4.966912] [<ffffffff810542e0>] ? poke_int3_handler+0x40/0x40
[ 4.966916] [<ffffffff816a0cf3>] dump_stack+0x59/0x7b
[ 4.966920] [<ffffffff81051e0a>] __jump_label_transform+0x18a/0x230
[ 4.966923] [<ffffffff81162980>] ? fire_user_return_notifiers+0x70/0x70
[ 4.966926] [<ffffffff81051f15>] arch_jump_label_transform_static+0x65/0x90
[ 4.966930] [<ffffffff81cfbbfb>] jump_label_init+0x75/0xa3
[ 4.966932] [<ffffffff81cd3e3c>] start_kernel+0x168/0x3ff
[ 4.966934] [<ffffffff81cd3af2>] ? repair_env_string+0x5b/0x5b
[ 4.966938] [<ffffffff81cd35f3>] x86_64_start_reservations+0x2a/0x2c
[ 4.966941] [<ffffffff81cd833a>] xen_start_kernel+0x594/0x596

This blew up in your patch:

if (type == JUMP_LABEL_ENABLE) {
/*
* We are enabling this jump label. If it is not a nop
* then something must have gone wrong.
*/
- if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
- bug_at((void *)entry->code, __LINE__);
+ if (init) {
+ if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0)) {
+ static int log = 0;
+
+ if (log == 0) {
+ pr_warning("op %pS\n", (void *)entry->code);
+ dump_stack();
+ }
+ log++;
+ }
+ }


It was expecting to have the ideal nop, because on boot up it didn't
expect to have something already marked for enable. It only thought this
to be the case after initialization. This explains your origin error
message:

Unexpected op at trace_clock_global+0x6b/0x120 [ffffffff8113a21b] (0f 1f 44 00 00)

The NOP was still the default nop, but it was expecting the ideal nop
because it normally only gets into this path after the init was already
done.

My point is, it wasn't until jump_label_init() where it did the
conversion from nop to calling the label.

I'm looking to NAK your patch because it is obvious that the jump label
code isn't doing what you expect it to be doing. And it wasn't until my
checks were in place for you to notice.

-- Steve

2013-09-11 18:57:34

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, Sep 11, 2013 at 02:26:44PM -0400, Steven Rostedt wrote:
> On Wed, 11 Sep 2013 14:01:13 -0400
> Konrad Rzeszutek Wilk <[email protected]> wrote:
>
>
> > <confused>
> >
> > I am thins would still work:
> >
> >
> > 47 static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
> > 148 {
> > 149 if (TICKET_SLOWPATH_FLAG &&
> > 150 static_key_false(&paravirt_ticketlocks_enabled)) {
> >
> > (from arch/x86/include/asm/spinlock.h) as the static_key_false
> > would check the key->enabled. Which had been incremented?
> >
> > Granted there are no patching done yet, but that should still allow
> > this code path to be taken?
>
> Lets look at static_key_false():
>
> If jump labels is not enabled, you are correct. It simply looks like
> this:
>
> static __always_inline bool static_key_false(struct static_key *key)
> {
> if (unlikely(atomic_read(&key->enabled)) > 0)
> return true;
> return false;
> }
>
>
> But that's not the case here. Here we have code modifying jump labels,
> where static_key_false() looks like this:
>
> static __always_inline bool static_key_false(struct static_key *key)
> {
> return arch_static_branch(key);
> }
>
> static __always_inline bool arch_static_branch(struct static_key *key)
> {
> asm goto("1:"
> ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
> ".pushsection __jump_table, \"aw\" \n\t"
> _ASM_ALIGN "\n\t"
> _ASM_PTR "1b, %l[l_yes], %c0 \n\t"
> ".popsection \n\t"
> : : "i" (key) : : l_yes);
> return false;
> l_yes:
> return true;
> }
>
>
>
>
> Look in that assembly. That "STATIC_KEY_INIT_NOP" is the byte code for
> a nop, and until we modify it, arch_static_branch() will always return
> false, no matter what "key->enable" is.
>
>
> In fact, your call trace you posted earlier proves my point!
>
> [ 4.966912] [<ffffffff810542e0>] ? poke_int3_handler+0x40/0x40
> [ 4.966916] [<ffffffff816a0cf3>] dump_stack+0x59/0x7b
> [ 4.966920] [<ffffffff81051e0a>] __jump_label_transform+0x18a/0x230
> [ 4.966923] [<ffffffff81162980>] ? fire_user_return_notifiers+0x70/0x70
> [ 4.966926] [<ffffffff81051f15>] arch_jump_label_transform_static+0x65/0x90
> [ 4.966930] [<ffffffff81cfbbfb>] jump_label_init+0x75/0xa3
> [ 4.966932] [<ffffffff81cd3e3c>] start_kernel+0x168/0x3ff
> [ 4.966934] [<ffffffff81cd3af2>] ? repair_env_string+0x5b/0x5b
> [ 4.966938] [<ffffffff81cd35f3>] x86_64_start_reservations+0x2a/0x2c
> [ 4.966941] [<ffffffff81cd833a>] xen_start_kernel+0x594/0x596
>
> This blew up in your patch:
>
> if (type == JUMP_LABEL_ENABLE) {
> /*
> * We are enabling this jump label. If it is not a nop
> * then something must have gone wrong.
> */
> - if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
> - bug_at((void *)entry->code, __LINE__);
> + if (init) {
> + if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0)) {
> + static int log = 0;
> +
> + if (log == 0) {
> + pr_warning("op %pS\n", (void *)entry->code);
> + dump_stack();
> + }
> + log++;
> + }
> + }
>
>
> It was expecting to have the ideal nop, because on boot up it didn't
> expect to have something already marked for enable. It only thought this
> to be the case after initialization. This explains your origin error
> message:
>
> Unexpected op at trace_clock_global+0x6b/0x120 [ffffffff8113a21b] (0f 1f 44 00 00)
>
> The NOP was still the default nop, but it was expecting the ideal nop
> because it normally only gets into this path after the init was already
> done.
>
> My point is, it wasn't until jump_label_init() where it did the
> conversion from nop to calling the label.
>
> I'm looking to NAK your patch because it is obvious that the jump label
> code isn't doing what you expect it to be doing. And it wasn't until my

Actually it is OK. They need to be enabled before the SMP code kicks in.

> checks were in place for you to notice.

Any suggestion on how to resolve the crash?

The PV spinlock code is OK (I think, I need to think hard about this) until
the spinlocks start being used by multiple CPUs. At that point the
jump_lables have to be in place - otherwise you will end with a spinlock
going in a slowpath (patched over) and an kicker not using the slowpath
and never kicking the waiter. Which ends with a hanged system.

Or simple said - jump labels have to be setup before we boot
the other CPUs.

This would affect the KVM guests as well, I think if the slowpath
waiter was blocking on the VCPU (which I think it is doing now, but
not entirely sure?)

P.S.
I am out on vacation tomorrow for a week. Boris (CC-ed here) can help.

2013-09-11 19:14:56

by Steven Rostedt

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, 11 Sep 2013 14:56:54 -0400
Konrad Rzeszutek Wilk <[email protected]> wrote:


> > I'm looking to NAK your patch because it is obvious that the jump label
> > code isn't doing what you expect it to be doing. And it wasn't until my
>
> Actually it is OK. They need to be enabled before the SMP code kicks in.
>
> > checks were in place for you to notice.
>
> Any suggestion on how to resolve the crash?
>
> The PV spinlock code is OK (I think, I need to think hard about this) until
> the spinlocks start being used by multiple CPUs. At that point the
> jump_lables have to be in place - otherwise you will end with a spinlock
> going in a slowpath (patched over) and an kicker not using the slowpath
> and never kicking the waiter. Which ends with a hanged system.

Note, a simple early_initcall() could do the trick. SMP isn't set up
until much further in the boot process.

>
> Or simple said - jump labels have to be setup before we boot
> the other CPUs.

Right, and initcalls() can easily serve that purpose.

>
> This would affect the KVM guests as well, I think if the slowpath
> waiter was blocking on the VCPU (which I think it is doing now, but
> not entirely sure?)
>
> P.S.
> I am out on vacation tomorrow for a week. Boris (CC-ed here) can help.

Your patch isn't wrong per say, but I'm hesitant to apply it because it
the result is different depending on whether JUMP_LABEL is configured
or not. Using any jump_label() calls before jump_label_init() is
called, is entering a gray area, and I think it should be avoided.

This patch should solve it for you:

xen: Do not enable spinlocks before jump_label_init()

The static_key paravirt_ticketlocks_enabled does not need to be
initialized before jump_label_init(), as that will cause an
inconsistent result between JUMP_LABEL being configured or not. The
static key update will not take place at the time of the
static_key_slow_inc() but instead at the time of jump_label_init(), if
CONFIG_JUMP_LABEL is configured, otherwise it happens at the time of
the static_key_slow_inc() call.

The updates to the spinlocks need to happen before other processors are
initialized, which happens much later in boot up. A simple use of
early_initcall() will do the trick, as that too is called before other
processors are enabled and after jump_label_init() is called.

Reported-by: Konrad Rzeszutek Wilk <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 9235842..4214bde 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -279,7 +279,6 @@ static void __init xen_smp_prepare_boot_cpu(void)

xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
- xen_init_spinlocks();
}

static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 0438b93..52582fd 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -285,25 +285,28 @@ void xen_uninit_lock_cpu(int cpu)

static bool xen_pvspin __initdata = true;

-void __init xen_init_spinlocks(void)
+static __init int xen_init_spinlocks(void)
{
/*
* See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23
* (xen: disable PV spinlocks on HVM)
*/
if (xen_hvm_domain())
- return;
+ return 0;

if (!xen_pvspin) {
printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
- return;
+ return 0;
}

static_key_slow_inc(&paravirt_ticketlocks_enabled);

pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
pv_lock_ops.unlock_kick = xen_unlock_kick;
+
+ return 0;
}
+early_initcall(xen_init_spinlocks);

static __init int xen_parse_nopvspin(char *arg)
{
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 95f8c61..7609eb1 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -72,13 +72,9 @@ static inline void xen_hvm_smp_init(void) {}
#endif

#ifdef CONFIG_PARAVIRT_SPINLOCKS
-void __init xen_init_spinlocks(void);
void xen_init_lock_cpu(int cpu);
void xen_uninit_lock_cpu(int cpu);
#else
-static inline void xen_init_spinlocks(void)
-{
-}
static inline void xen_init_lock_cpu(int cpu)
{
}

2013-09-11 19:56:32

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, Sep 11, 2013 at 03:14:52PM -0400, Steven Rostedt wrote:
> On Wed, 11 Sep 2013 14:56:54 -0400
> Konrad Rzeszutek Wilk <[email protected]> wrote:
>
>
> > > I'm looking to NAK your patch because it is obvious that the jump label
> > > code isn't doing what you expect it to be doing. And it wasn't until my
> >
> > Actually it is OK. They need to be enabled before the SMP code kicks in.
> >
> > > checks were in place for you to notice.
> >
> > Any suggestion on how to resolve the crash?
> >
> > The PV spinlock code is OK (I think, I need to think hard about this) until
> > the spinlocks start being used by multiple CPUs. At that point the
> > jump_lables have to be in place - otherwise you will end with a spinlock
> > going in a slowpath (patched over) and an kicker not using the slowpath
> > and never kicking the waiter. Which ends with a hanged system.
>
> Note, a simple early_initcall() could do the trick. SMP isn't set up
> until much further in the boot process.
>
> >
> > Or simple said - jump labels have to be setup before we boot
> > the other CPUs.
>
> Right, and initcalls() can easily serve that purpose.
>
> >
> > This would affect the KVM guests as well, I think if the slowpath
> > waiter was blocking on the VCPU (which I think it is doing now, but
> > not entirely sure?)
> >
> > P.S.
> > I am out on vacation tomorrow for a week. Boris (CC-ed here) can help.
>
> Your patch isn't wrong per say, but I'm hesitant to apply it because it
> the result is different depending on whether JUMP_LABEL is configured
> or not. Using any jump_label() calls before jump_label_init() is
> called, is entering a gray area, and I think it should be avoided.
>
> This patch should solve it for you:

And also the pv_lock_ops need to be set before alternative_asm
code is called :-) (Called from check_bugs()).

Otherwise you end up with some code still using the native slowpath
kicker/waiter while the modules might be using the Xen variant.

I knew that I forgot to mention something ..

With that in mind and your patch I made this one:

diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 253f63f..d90628d 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -267,11 +267,18 @@ void __init xen_init_spinlocks(void)
return;
}

- static_key_slow_inc(&paravirt_ticketlocks_enabled);
-
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
pv_lock_ops.unlock_kick = xen_unlock_kick;
}
+static __init int xen_init_spinlocks_jump(void)
+{
+ if (!xen_pvspin)
+ return 0;
+
+ static_key_slow_inc(&paravirt_ticketlocks_enabled);
+ return 0;
+}
+early_initcall(xen_init_spinlocks_jump);

static __init int xen_parse_nopvspin(char *arg)
{

which seem to work.

2013-09-12 16:13:25

by Steven Rostedt

[permalink] [raw]
Subject: Re: Regression :-) Re: [GIT PULL RESEND] x86/jumpmplabel changes for v3.12-rc1

On Wed, 11 Sep 2013 15:55:50 -0400
Konrad Rzeszutek Wilk <[email protected]> wrote:

> On Wed, Sep 11, 2013 at 03:14:52PM -0400, Steven Rostedt wrote:
> > On Wed, 11 Sep 2013 14:56:54 -0400
> > Konrad Rzeszutek Wilk <[email protected]> wrote:
> >
> >
> > > > I'm looking to NAK your patch because it is obvious that the jump label
> > > > code isn't doing what you expect it to be doing. And it wasn't until my
> > >
> > > Actually it is OK. They need to be enabled before the SMP code kicks in.
> > >
> > > > checks were in place for you to notice.
> > >
> > > Any suggestion on how to resolve the crash?
> > >
> > > The PV spinlock code is OK (I think, I need to think hard about this) until
> > > the spinlocks start being used by multiple CPUs. At that point the
> > > jump_lables have to be in place - otherwise you will end with a spinlock
> > > going in a slowpath (patched over) and an kicker not using the slowpath
> > > and never kicking the waiter. Which ends with a hanged system.
> >
> > Note, a simple early_initcall() could do the trick. SMP isn't set up
> > until much further in the boot process.
> >
> > >
> > > Or simple said - jump labels have to be setup before we boot
> > > the other CPUs.
> >
> > Right, and initcalls() can easily serve that purpose.
> >
> > >
> > > This would affect the KVM guests as well, I think if the slowpath
> > > waiter was blocking on the VCPU (which I think it is doing now, but
> > > not entirely sure?)
> > >
> > > P.S.
> > > I am out on vacation tomorrow for a week. Boris (CC-ed here) can help.
> >
> > Your patch isn't wrong per say, but I'm hesitant to apply it because it
> > the result is different depending on whether JUMP_LABEL is configured
> > or not. Using any jump_label() calls before jump_label_init() is
> > called, is entering a gray area, and I think it should be avoided.
> >
> > This patch should solve it for you:
>
> And also the pv_lock_ops need to be set before alternative_asm
> code is called :-) (Called from check_bugs()).
>
> Otherwise you end up with some code still using the native slowpath
> kicker/waiter while the modules might be using the Xen variant.
>
> I knew that I forgot to mention something ..
>
> With that in mind and your patch I made this one:
>
> diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
> index 253f63f..d90628d 100644
> --- a/arch/x86/xen/spinlock.c
> +++ b/arch/x86/xen/spinlock.c
> @@ -267,11 +267,18 @@ void __init xen_init_spinlocks(void)
> return;
> }
>
> - static_key_slow_inc(&paravirt_ticketlocks_enabled);
> -
> pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
> pv_lock_ops.unlock_kick = xen_unlock_kick;
> }
> +static __init int xen_init_spinlocks_jump(void)
> +{
> + if (!xen_pvspin)
> + return 0;
> +
> + static_key_slow_inc(&paravirt_ticketlocks_enabled);
> + return 0;
> +}
> +early_initcall(xen_init_spinlocks_jump);

Can you write up a nice change log for this (include our discussion)
and then send it as a formal patch.

If it works for you, I'll give it an ack, and we can have hpa pull it
in and send it off to Linus.

Thanks!

-- Steve

>
> static __init int xen_parse_nopvspin(char *arg)
> {
>
> which seem to work.