2020-11-20 11:48:53

by Juergen Gross

[permalink] [raw]
Subject: [PATCH v2 05/12] x86: rework arch_local_irq_restore() to not use popf

"popf" is a rather expensive operation, so don't use it for restoring
irq flags. Instead test whether interrupts are enabled in the flags
parameter and enable interrupts via "sti" in that case.

This results in the restore_fl paravirt op to be no longer needed.

Suggested-by: Andy Lutomirski <[email protected]>
Signed-off-by: Juergen Gross <[email protected]>
---
arch/x86/include/asm/irqflags.h | 20 ++++++-------------
arch/x86/include/asm/paravirt.h | 5 -----
arch/x86/include/asm/paravirt_types.h | 7 ++-----
arch/x86/kernel/irqflags.S | 11 -----------
arch/x86/kernel/paravirt.c | 1 -
arch/x86/kernel/paravirt_patch.c | 3 ---
arch/x86/xen/enlighten_pv.c | 2 --
arch/x86/xen/irq.c | 23 ----------------------
arch/x86/xen/xen-asm.S | 28 ---------------------------
arch/x86/xen/xen-ops.h | 1 -
10 files changed, 8 insertions(+), 93 deletions(-)

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index e585a4705b8d..144d70ea4393 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -35,15 +35,6 @@ extern __always_inline unsigned long native_save_fl(void)
return flags;
}

-extern inline void native_restore_fl(unsigned long flags);
-extern inline void native_restore_fl(unsigned long flags)
-{
- asm volatile("push %0 ; popf"
- : /* no output */
- :"g" (flags)
- :"memory", "cc");
-}
-
static __always_inline void native_irq_disable(void)
{
asm volatile("cli": : :"memory");
@@ -79,11 +70,6 @@ static __always_inline unsigned long arch_local_save_flags(void)
return native_save_fl();
}

-static __always_inline void arch_local_irq_restore(unsigned long flags)
-{
- native_restore_fl(flags);
-}
-
static __always_inline void arch_local_irq_disable(void)
{
native_irq_disable();
@@ -152,6 +138,12 @@ static __always_inline int arch_irqs_disabled(void)

return arch_irqs_disabled_flags(flags);
}
+
+static __always_inline void arch_local_irq_restore(unsigned long flags)
+{
+ if (!arch_irqs_disabled_flags(flags))
+ arch_local_irq_enable();
+}
#else
#ifdef CONFIG_X86_64
#ifdef CONFIG_XEN_PV
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8121cf9b8d81..ce2b8c5aecde 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -648,11 +648,6 @@ static inline notrace unsigned long arch_local_save_flags(void)
return PVOP_CALLEE0(unsigned long, irq.save_fl);
}

-static inline notrace void arch_local_irq_restore(unsigned long f)
-{
- PVOP_VCALLEE1(irq.restore_fl, f);
-}
-
static inline notrace void arch_local_irq_disable(void)
{
PVOP_VCALLEE0(irq.irq_disable);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 55d8b7950e61..2031631160d0 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -169,16 +169,13 @@ struct pv_cpu_ops {
struct pv_irq_ops {
#ifdef CONFIG_PARAVIRT_XXL
/*
- * Get/set interrupt state. save_fl and restore_fl are only
- * expected to use X86_EFLAGS_IF; all other bits
- * returned from save_fl are undefined, and may be ignored by
- * restore_fl.
+ * Get/set interrupt state. save_fl is expected to use X86_EFLAGS_IF;
+ * all other bits returned from save_fl are undefined.
*
* NOTE: These functions callers expect the callee to preserve
* more registers than the standard C calling convention.
*/
struct paravirt_callee_save save_fl;
- struct paravirt_callee_save restore_fl;
struct paravirt_callee_save irq_disable;
struct paravirt_callee_save irq_enable;

diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
index 0db0375235b4..8ef35063964b 100644
--- a/arch/x86/kernel/irqflags.S
+++ b/arch/x86/kernel/irqflags.S
@@ -13,14 +13,3 @@ SYM_FUNC_START(native_save_fl)
ret
SYM_FUNC_END(native_save_fl)
EXPORT_SYMBOL(native_save_fl)
-
-/*
- * void native_restore_fl(unsigned long flags)
- * %eax/%rdi: flags
- */
-SYM_FUNC_START(native_restore_fl)
- push %_ASM_ARG1
- popf
- ret
-SYM_FUNC_END(native_restore_fl)
-EXPORT_SYMBOL(native_restore_fl)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 18560b71e717..c60222ab8ab9 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -320,7 +320,6 @@ struct paravirt_patch_template pv_ops = {

/* Irq ops. */
.irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
- .irq.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
.irq.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
.irq.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
.irq.safe_halt = native_safe_halt,
diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c
index 2fada2c347c9..abd27ec67397 100644
--- a/arch/x86/kernel/paravirt_patch.c
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -25,7 +25,6 @@ struct patch_xxl {
const unsigned char mmu_read_cr2[3];
const unsigned char mmu_read_cr3[3];
const unsigned char mmu_write_cr3[3];
- const unsigned char irq_restore_fl[2];
const unsigned char cpu_wbinvd[2];
const unsigned char mov64[3];
};
@@ -37,7 +36,6 @@ static const struct patch_xxl patch_data_xxl = {
.mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
.mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
.mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
- .irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq
.cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd
.mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
};
@@ -71,7 +69,6 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
switch (type) {

#ifdef CONFIG_PARAVIRT_XXL
- PATCH_CASE(irq, restore_fl, xxl, insn_buff, len);
PATCH_CASE(irq, save_fl, xxl, insn_buff, len);
PATCH_CASE(irq, irq_enable, xxl, insn_buff, len);
PATCH_CASE(irq, irq_disable, xxl, insn_buff, len);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5476423fc6d0..32b295cc2716 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1022,8 +1022,6 @@ void __init xen_setup_vcpu_info_placement(void)
*/
if (xen_have_vcpu_info_placement) {
pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
- pv_ops.irq.restore_fl =
- __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
pv_ops.irq.irq_disable =
__PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
pv_ops.irq.irq_enable =
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 850c93f346c7..dfa091d79c2e 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -42,28 +42,6 @@ asmlinkage __visible unsigned long xen_save_fl(void)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl);

-__visible void xen_restore_fl(unsigned long flags)
-{
- struct vcpu_info *vcpu;
-
- /* convert from IF type flag */
- flags = !(flags & X86_EFLAGS_IF);
-
- /* See xen_irq_enable() for why preemption must be disabled. */
- preempt_disable();
- vcpu = this_cpu_read(xen_vcpu);
- vcpu->evtchn_upcall_mask = flags;
-
- if (flags == 0) {
- barrier(); /* unmask then check (avoid races) */
- if (unlikely(vcpu->evtchn_upcall_pending))
- xen_force_evtchn_callback();
- preempt_enable();
- } else
- preempt_enable_no_resched();
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
-
asmlinkage __visible void xen_irq_disable(void)
{
/* There's a one instruction preempt window here. We need to
@@ -118,7 +96,6 @@ static void xen_halt(void)

static const struct pv_irq_ops xen_irq_ops __initconst = {
.save_fl = PV_CALLEE_SAVE(xen_save_fl),
- .restore_fl = PV_CALLEE_SAVE(xen_restore_fl),
.irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
.irq_enable = PV_CALLEE_SAVE(xen_irq_enable),

diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index c0630fd9f44e..1ea7e41044b5 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -72,34 +72,6 @@ SYM_FUNC_START(xen_save_fl_direct)
ret
SYM_FUNC_END(xen_save_fl_direct)

-
-/*
- * In principle the caller should be passing us a value return from
- * xen_save_fl_direct, but for robustness sake we test only the
- * X86_EFLAGS_IF flag rather than the whole byte. After setting the
- * interrupt mask state, it checks for unmasked pending events and
- * enters the hypervisor to get them delivered if so.
- */
-SYM_FUNC_START(xen_restore_fl_direct)
- FRAME_BEGIN
- testw $X86_EFLAGS_IF, %di
- setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
- /*
- * Preempt here doesn't matter because that will deal with any
- * pending interrupts. The pending check may end up being run
- * on the wrong CPU, but that doesn't hurt.
- */
-
- /* check for unmasked and pending */
- cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
- jnz 1f
- call check_events
-1:
- FRAME_END
- ret
-SYM_FUNC_END(xen_restore_fl_direct)
-
-
/*
* Force an event check by making a hypercall, but preserve regs
* before making the call.
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index b2fd80a01a36..8d7ec49a35fb 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -131,7 +131,6 @@ static inline void __init xen_efi_init(struct boot_params *boot_params)
__visible void xen_irq_enable_direct(void);
__visible void xen_irq_disable_direct(void);
__visible unsigned long xen_save_fl_direct(void);
-__visible void xen_restore_fl_direct(unsigned long);

__visible unsigned long xen_read_cr2(void);
__visible unsigned long xen_read_cr2_direct(void);
--
2.26.2


2020-11-20 12:03:35

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 05/12] x86: rework arch_local_irq_restore() to not use popf

On Fri, Nov 20, 2020 at 12:46:23PM +0100, Juergen Gross wrote:
> +static __always_inline void arch_local_irq_restore(unsigned long flags)
> +{
> + if (!arch_irqs_disabled_flags(flags))
> + arch_local_irq_enable();
> +}

If someone were to write horrible code like:

local_irq_disable();
local_irq_save(flags);
local_irq_enable();
local_irq_restore(flags);

we'd be up some creek without a paddle... now I don't _think_ we have
genius code like that, but I'd feel saver if we can haz an assertion in
there somewhere...

Maybe something like:

#ifdef CONFIG_DEBUG_ENTRY // for lack of something saner
WARN_ON_ONCE((arch_local_save_flags() ^ flags) & X86_EFLAGS_IF);
#endif

At the end?

2020-11-20 12:08:17

by Juergen Gross

[permalink] [raw]
Subject: Re: [PATCH v2 05/12] x86: rework arch_local_irq_restore() to not use popf

On 20.11.20 12:59, Peter Zijlstra wrote:
> On Fri, Nov 20, 2020 at 12:46:23PM +0100, Juergen Gross wrote:
>> +static __always_inline void arch_local_irq_restore(unsigned long flags)
>> +{
>> + if (!arch_irqs_disabled_flags(flags))
>> + arch_local_irq_enable();
>> +}
>
> If someone were to write horrible code like:
>
> local_irq_disable();
> local_irq_save(flags);
> local_irq_enable();
> local_irq_restore(flags);
>
> we'd be up some creek without a paddle... now I don't _think_ we have
> genius code like that, but I'd feel saver if we can haz an assertion in
> there somewhere...
>
> Maybe something like:
>
> #ifdef CONFIG_DEBUG_ENTRY // for lack of something saner
> WARN_ON_ONCE((arch_local_save_flags() ^ flags) & X86_EFLAGS_IF);
> #endif
>
> At the end?
>

I'd be fine with that. I didn't add something like that because I
couldn't find a suitable CONFIG_ :-)


Juergen


Attachments:
OpenPGP_0xB0DE9DD628BF132F.asc (3.06 kB)
OpenPGP_signature (505.00 B)
OpenPGP digital signature
Download all attachments

2020-11-22 07:12:59

by Juergen Gross

[permalink] [raw]
Subject: Re: [PATCH v2 05/12] x86: rework arch_local_irq_restore() to not use popf

On 20.11.20 12:59, Peter Zijlstra wrote:
> On Fri, Nov 20, 2020 at 12:46:23PM +0100, Juergen Gross wrote:
>> +static __always_inline void arch_local_irq_restore(unsigned long flags)
>> +{
>> + if (!arch_irqs_disabled_flags(flags))
>> + arch_local_irq_enable();
>> +}
>
> If someone were to write horrible code like:
>
> local_irq_disable();
> local_irq_save(flags);
> local_irq_enable();
> local_irq_restore(flags);
>
> we'd be up some creek without a paddle... now I don't _think_ we have
> genius code like that, but I'd feel saver if we can haz an assertion in
> there somewhere...
>
> Maybe something like:
>
> #ifdef CONFIG_DEBUG_ENTRY // for lack of something saner
> WARN_ON_ONCE((arch_local_save_flags() ^ flags) & X86_EFLAGS_IF);
> #endif
>
> At the end?

I'd like to, but using WARN_ON_ONCE() in include/asm/irqflags.h sounds
like a perfect receipt for include dependency hell.

We could use a plain asm("ud2") instead.


Juergen


Attachments:
OpenPGP_0xB0DE9DD628BF132F.asc (3.06 kB)
OpenPGP_signature (505.00 B)
OpenPGP digital signature
Download all attachments

2020-11-22 21:53:09

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH v2 05/12] x86: rework arch_local_irq_restore() to not use popf

On Sat, Nov 21, 2020 at 10:55 PM Jürgen Groß <[email protected]> wrote:
>
> On 20.11.20 12:59, Peter Zijlstra wrote:
> > On Fri, Nov 20, 2020 at 12:46:23PM +0100, Juergen Gross wrote:
> >> +static __always_inline void arch_local_irq_restore(unsigned long flags)
> >> +{
> >> + if (!arch_irqs_disabled_flags(flags))
> >> + arch_local_irq_enable();
> >> +}
> >
> > If someone were to write horrible code like:
> >
> > local_irq_disable();
> > local_irq_save(flags);
> > local_irq_enable();
> > local_irq_restore(flags);
> >
> > we'd be up some creek without a paddle... now I don't _think_ we have
> > genius code like that, but I'd feel saver if we can haz an assertion in
> > there somewhere...
> >
> > Maybe something like:
> >
> > #ifdef CONFIG_DEBUG_ENTRY // for lack of something saner
> > WARN_ON_ONCE((arch_local_save_flags() ^ flags) & X86_EFLAGS_IF);
> > #endif
> >
> > At the end?
>
> I'd like to, but using WARN_ON_ONCE() in include/asm/irqflags.h sounds
> like a perfect receipt for include dependency hell.
>
> We could use a plain asm("ud2") instead.

How about out-of-lining it:

#ifdef CONFIG_DEBUG_ENTRY
extern void warn_bogus_irqrestore();
#endif

static __always_inline void arch_local_irq_restore(unsigned long flags)
{
if (!arch_irqs_disabled_flags(flags)) {
arch_local_irq_enable();
} else {
#ifdef CONFIG_DEBUG_ENTRY
if (unlikely(arch_local_irq_save() & X86_EFLAGS_IF))
warn_bogus_irqrestore();
#endif
}

2020-11-23 07:17:20

by Juergen Gross

[permalink] [raw]
Subject: Re: [PATCH v2 05/12] x86: rework arch_local_irq_restore() to not use popf

On 22.11.20 22:44, Andy Lutomirski wrote:
> On Sat, Nov 21, 2020 at 10:55 PM Jürgen Groß <[email protected]> wrote:
>>
>> On 20.11.20 12:59, Peter Zijlstra wrote:
>>> On Fri, Nov 20, 2020 at 12:46:23PM +0100, Juergen Gross wrote:
>>>> +static __always_inline void arch_local_irq_restore(unsigned long flags)
>>>> +{
>>>> + if (!arch_irqs_disabled_flags(flags))
>>>> + arch_local_irq_enable();
>>>> +}
>>>
>>> If someone were to write horrible code like:
>>>
>>> local_irq_disable();
>>> local_irq_save(flags);
>>> local_irq_enable();
>>> local_irq_restore(flags);
>>>
>>> we'd be up some creek without a paddle... now I don't _think_ we have
>>> genius code like that, but I'd feel saver if we can haz an assertion in
>>> there somewhere...
>>>
>>> Maybe something like:
>>>
>>> #ifdef CONFIG_DEBUG_ENTRY // for lack of something saner
>>> WARN_ON_ONCE((arch_local_save_flags() ^ flags) & X86_EFLAGS_IF);
>>> #endif
>>>
>>> At the end?
>>
>> I'd like to, but using WARN_ON_ONCE() in include/asm/irqflags.h sounds
>> like a perfect receipt for include dependency hell.
>>
>> We could use a plain asm("ud2") instead.
>
> How about out-of-lining it:
>
> #ifdef CONFIG_DEBUG_ENTRY
> extern void warn_bogus_irqrestore();
> #endif
>
> static __always_inline void arch_local_irq_restore(unsigned long flags)
> {
> if (!arch_irqs_disabled_flags(flags)) {
> arch_local_irq_enable();
> } else {
> #ifdef CONFIG_DEBUG_ENTRY
> if (unlikely(arch_local_irq_save() & X86_EFLAGS_IF))
> warn_bogus_irqrestore();
> #endif
> }
>

This couldn't be a WARN_ON_ONCE() then (or it would be a catch all).
Another approach might be to open-code the WARN_ON_ONCE(), like:

#ifdef CONFIG_DEBUG_ENTRY
extern void warn_bogus_irqrestore(bool *once);
#endif

static __always_inline void arch_local_irq_restore(unsigned long flags)
{
if (!arch_irqs_disabled_flags(flags))
arch_local_irq_enable();
#ifdef CONFIG_DEBUG_ENTRY
{
static bool once;

if (unlikely(arch_local_irq_save() & X86_EFLAGS_IF))
warn_bogus_irqrestore(&once);
}
#endif
}


Juergen


Attachments:
OpenPGP_0xB0DE9DD628BF132F.asc (3.06 kB)
OpenPGP_signature (505.00 B)
OpenPGP digital signature
Download all attachments

2020-11-23 15:18:48

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH v2 05/12] x86: rework arch_local_irq_restore() to not use popf





> On Nov 22, 2020, at 9:22 PM, Jürgen Groß <[email protected]> wrote:
>
> On 22.11.20 22:44, Andy Lutomirski wrote:
>>> On Sat, Nov 21, 2020 at 10:55 PM Jürgen Groß <[email protected]> wrote:
>>>
>>> On 20.11.20 12:59, Peter Zijlstra wrote:
>>>> On Fri, Nov 20, 2020 at 12:46:23PM +0100, Juergen Gross wrote:
>>>>> +static __always_inline void arch_local_irq_restore(unsigned long flags)
>>>>> +{
>>>>> + if (!arch_irqs_disabled_flags(flags))
>>>>> + arch_local_irq_enable();
>>>>> +}
>>>>
>>>> If someone were to write horrible code like:
>>>>
>>>> local_irq_disable();
>>>> local_irq_save(flags);
>>>> local_irq_enable();
>>>> local_irq_restore(flags);
>>>>
>>>> we'd be up some creek without a paddle... now I don't _think_ we have
>>>> genius code like that, but I'd feel saver if we can haz an assertion in
>>>> there somewhere...
>>>>
>>>> Maybe something like:
>>>>
>>>> #ifdef CONFIG_DEBUG_ENTRY // for lack of something saner
>>>> WARN_ON_ONCE((arch_local_save_flags() ^ flags) & X86_EFLAGS_IF);
>>>> #endif
>>>>
>>>> At the end?
>>>
>>> I'd like to, but using WARN_ON_ONCE() in include/asm/irqflags.h sounds
>>> like a perfect receipt for include dependency hell.
>>>
>>> We could use a plain asm("ud2") instead.
>> How about out-of-lining it:
>> #ifdef CONFIG_DEBUG_ENTRY
>> extern void warn_bogus_irqrestore();
>> #endif
>> static __always_inline void arch_local_irq_restore(unsigned long flags)
>> {
>> if (!arch_irqs_disabled_flags(flags)) {
>> arch_local_irq_enable();
>> } else {
>> #ifdef CONFIG_DEBUG_ENTRY
>> if (unlikely(arch_local_irq_save() & X86_EFLAGS_IF))
>> warn_bogus_irqrestore();
>> #endif
>> }
>
> This couldn't be a WARN_ON_ONCE() then (or it would be a catch all).

If you put the WARN_ON_ONCE in the out-of-line helper, it should work reasonably well.

> Another approach might be to open-code the WARN_ON_ONCE(), like:
>
> #ifdef CONFIG_DEBUG_ENTRY
> extern void warn_bogus_irqrestore(bool *once);
> #endif
>
> static __always_inline void arch_local_irq_restore(unsigned long flags)
> {
> if (!arch_irqs_disabled_flags(flags))
> arch_local_irq_enable();
> #ifdef CONFIG_DEBUG_ENTRY
> {
> static bool once;
>
> if (unlikely(arch_local_irq_save() & X86_EFLAGS_IF))
> warn_bogus_irqrestore(&once);
> }
> #endif
> }
>

I don’t know precisely what a static variable in an __always_inline function will do, but I imagine it will be, at best, erratic, especially when modules are involved.

>
> Juergen
> <OpenPGP_0xB0DE9DD628BF132F.asc>

2020-11-27 08:50:24

by Oliver Sang

[permalink] [raw]
Subject: [x86] 97e8f0134a: fio.write_iops 8.6% improvement


Greeting,

FYI, we noticed a 8.6% improvement of fio.write_iops due to commit:


commit: 97e8f0134a2bb794e4885f642724a50979b84f89 ("x86: rework arch_local_irq_restore() to not use popf")
url: https://github.com/0day-ci/linux/commits/Juergen-Gross/x86-major-paravirt-cleanup/20201120-194934


in testcase: fio-basic
on test machine: 96 threads Intel(R) Xeon(R) Gold 6252 CPU @ 2.10GHz with 256G memory
with following parameters:

disk: 2pmem
fs: xfs
mount_option: dax
runtime: 200s
nr_task: 50%
time_based: tb
rw: randwrite
bs: 4k
ioengine: sync
test_size: 200G
cpufreq_governor: performance
ucode: 0x5003003

test-description: Fio is a tool that will spawn a number of threads or processes doing a particular type of I/O action as specified by the user.
test-url: https://github.com/axboe/fio

In addition to that, the commit also has significant impact on the following tests:

+------------------+---------------------------------------------------------------------------+
| testcase: change | will-it-scale: will-it-scale.per_process_ops 5.2% improvement |
| test machine | 192 threads Intel(R) Xeon(R) Platinum 9242 CPU @ 2.30GHz with 192G memory |
| test parameters | cpufreq_governor=performance |
| | mode=process |
| | nr_task=50% |
| | test=futex1 |
| | ucode=0x5003003 |
+------------------+---------------------------------------------------------------------------+




Details are as below:
-------------------------------------------------------------------------------------------------->


To reproduce:

git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
bin/lkp install job.yaml # job file is attached in this email
bin/lkp run job.yaml

=========================================================================================
bs/compiler/cpufreq_governor/disk/fs/ioengine/kconfig/mount_option/nr_task/rootfs/runtime/rw/tbox_group/test_size/testcase/time_based/ucode:
4k/gcc-9/performance/2pmem/xfs/sync/x86_64-rhel-8.3/dax/50%/debian-10.4-x86_64-20200603.cgz/200s/randwrite/lkp-csl-2sp6/200G/fio-basic/tb/0x5003003

commit:
d625d30a28 ("x86/xen: drop USERGS_SYSRET64 paravirt call")
97e8f0134a ("x86: rework arch_local_irq_restore() to not use popf")

d625d30a28a4c7a3 97e8f0134a2bb794e4885f64272
---------------- ---------------------------
%stddev %change %stddev
\ | \
0.12 ? 5% -0.0 0.09 ? 13% fio.latency_1000us%
0.32 -0.0 0.31 fio.latency_100ms%
18.23 -1.8 16.42 ? 2% fio.latency_100us%
19.93 +2.4 22.31 ? 4% fio.latency_20us%
20.34 -3.5 16.85 fio.latency_250us%
0.04 ? 4% -0.0 0.03 ? 5% fio.latency_2ms%
2.78 ? 2% -0.3 2.45 ? 2% fio.latency_500us%
0.26 -0.0 0.21 fio.latency_50ms%
37.13 +3.5 40.61 ? 2% fio.latency_50us%
0.61 ? 4% -0.1 0.52 ? 7% fio.latency_750us%
600.25 -4.7% 572.00 fio.time.percent_of_cpu_this_job_got
1169 -4.8% 1112 fio.time.system_time
8540510 +2.4% 8746174 fio.time.voluntary_context_switches
26105864 +8.6% 28354900 fio.workload
509.80 +8.6% 553.74 fio.write_bw_MBps
153600 -12.7% 134144 fio.write_clat_90%_us
220672 -9.5% 199680 fio.write_clat_95%_us
612352 -9.4% 555008 ? 2% fio.write_clat_99%_us
366788 -7.9% 337679 fio.write_clat_mean_us
3896686 -2.5% 3799973 fio.write_clat_stddev
130510 +8.6% 141756 fio.write_iops
15.54 -1.3% 15.35 boot-time.dhcp
3739 ? 76% +84.2% 6886 ? 30% numa-meminfo.node1.PageTables
934.75 ? 76% +84.2% 1721 ? 31% numa-vmstat.node1.nr_page_table_pages
1128546 +7.6% 1214673 vmstat.io.bo
254919 +5.4% 268773 vmstat.system.cs
158457 +1.5% 160806 proc-vmstat.nr_slab_unreclaimable
1694299 +5.6% 1789326 proc-vmstat.numa_hit
1662873 +5.7% 1757835 proc-vmstat.numa_local
6648063 +9.2% 7260774 proc-vmstat.pgalloc_normal
6403869 +9.8% 7028415 proc-vmstat.pgfree
2.313e+08 +7.5% 2.487e+08 proc-vmstat.pgpgout
36071 ? 5% -10.1% 32430 ? 3% softirqs.CPU36.RCU
36623 ? 5% -9.0% 33322 ? 4% softirqs.CPU41.RCU
36414 ? 5% -8.8% 33202 ? 3% softirqs.CPU47.RCU
34248 -9.6% 30945 softirqs.CPU73.RCU
33882 -9.4% 30687 softirqs.CPU74.RCU
36889 ? 4% -12.9% 32132 ? 2% softirqs.CPU82.RCU
35183 ? 4% -8.5% 32189 ? 4% softirqs.CPU85.RCU
36146 ? 2% -9.3% 32785 ? 3% softirqs.CPU87.RCU
36673 ? 3% -9.9% 33030 ? 3% softirqs.CPU95.RCU
5942 -30.4% 4138 ? 21% sched_debug.cfs_rq:/.exec_clock.avg
4314 ? 2% -33.6% 2863 ? 25% sched_debug.cfs_rq:/.exec_clock.min
773.71 ? 8% -16.7% 644.43 ? 6% sched_debug.cfs_rq:/.exec_clock.stddev
241.46 ? 11% -27.0% 176.38 ? 26% sched_debug.cfs_rq:/.load_avg.avg
27925 ? 5% -21.0% 22049 ? 17% sched_debug.cfs_rq:/.min_vruntime.min
116399 -20.2% 92862 ? 13% sched_debug.cpu.clock.avg
116404 -20.2% 92867 ? 13% sched_debug.cpu.clock.max
116393 -20.2% 92856 ? 13% sched_debug.cpu.clock.min
114940 -20.2% 91709 ? 13% sched_debug.cpu.clock_task.avg
115124 -20.2% 91877 ? 13% sched_debug.cpu.clock_task.max
110309 -20.8% 87321 ? 14% sched_debug.cpu.clock_task.min
5029 -11.6% 4444 ? 7% sched_debug.cpu.curr->pid.max
150621 ? 6% -31.1% 103723 ? 26% sched_debug.cpu.nr_switches.min
148865 ? 6% -31.1% 102577 ? 26% sched_debug.cpu.sched_count.min
36388 -27.7% 26315 ? 22% sched_debug.cpu.sched_goidle.avg
23703 ? 3% -35.2% 15364 ? 25% sched_debug.cpu.sched_goidle.min
74196 ? 5% -30.8% 51361 ? 26% sched_debug.cpu.ttwu_count.min
116395 -20.2% 92858 ? 13% sched_debug.cpu_clk
115878 -20.3% 92361 ? 13% sched_debug.ktime
116767 -20.2% 93217 ? 13% sched_debug.sched_clk
1.668e+09 +3.3% 1.724e+09 perf-stat.i.branch-instructions
28584168 ? 9% +8.4% 30994854 ? 8% perf-stat.i.branch-misses
33324787 +7.5% 35813189 perf-stat.i.cache-misses
83826973 ? 12% +12.7% 94503492 ? 13% perf-stat.i.cache-references
258508 +5.4% 272477 perf-stat.i.context-switches
2.60 ? 2% -5.6% 2.45 ? 2% perf-stat.i.cpi
674.12 ? 2% -7.8% 621.30 ? 3% perf-stat.i.cycles-between-cache-misses
2.429e+09 +3.9% 2.524e+09 perf-stat.i.dTLB-loads
1.259e+09 +5.3% 1.326e+09 perf-stat.i.dTLB-stores
53.99 +2.1 56.12 perf-stat.i.iTLB-load-miss-rate%
9728741 +5.2% 10234658 ? 3% perf-stat.i.iTLB-load-misses
8.601e+09 +3.8% 8.927e+09 perf-stat.i.instructions
0.39 ? 2% +5.7% 0.41 ? 2% perf-stat.i.ipc
56.92 +4.2% 59.31 perf-stat.i.metric.M/sec
9006030 +7.1% 9648264 perf-stat.i.node-load-misses
3237763 ? 2% +10.6% 3581907 ? 2% perf-stat.i.node-loads
2918052 ? 2% +6.4% 3106057 perf-stat.i.node-store-misses
582042 ? 3% +11.5% 649247 ? 4% perf-stat.i.node-stores
2.53 ? 2% -5.2% 2.40 ? 2% perf-stat.overall.cpi
656.32 ? 2% -8.5% 600.78 ? 2% perf-stat.overall.cycles-between-cache-misses
53.81 +2.0 55.85 perf-stat.overall.iTLB-load-miss-rate%
0.39 ? 2% +5.5% 0.42 ? 2% perf-stat.overall.ipc
66759 -4.6% 63713 perf-stat.overall.path-length
1.666e+09 +3.3% 1.721e+09 perf-stat.ps.branch-instructions
28522187 ? 9% +8.4% 30911250 ? 8% perf-stat.ps.branch-misses
33163222 +7.5% 35640249 perf-stat.ps.cache-misses
83518487 ? 12% +12.7% 94131115 ? 12% perf-stat.ps.cache-references
257076 +5.4% 270958 perf-stat.ps.context-switches
2.425e+09 +3.9% 2.52e+09 perf-stat.ps.dTLB-loads
1.256e+09 +5.3% 1.323e+09 perf-stat.ps.dTLB-stores
9695548 +5.2% 10200116 ? 3% perf-stat.ps.iTLB-load-misses
8.591e+09 +3.8% 8.916e+09 perf-stat.ps.instructions
8959319 +7.1% 9599418 perf-stat.ps.node-load-misses
3223144 ? 2% +10.6% 3564824 ? 2% perf-stat.ps.node-loads
2903781 ? 2% +6.5% 3091553 perf-stat.ps.node-store-misses
580260 ? 3% +11.5% 646965 ? 4% perf-stat.ps.node-stores
1.743e+12 +3.7% 1.807e+12 perf-stat.total.instructions
1131 ?164% -99.8% 2.25 ?173% interrupts.79:PCI-MSI.31981612-edge.i40e-eth0-TxRx-43
714498 ? 2% +14.7% 819498 ? 2% interrupts.CAL:Function_call_interrupts
6256 ? 8% +31.9% 8251 ? 6% interrupts.CPU0.CAL:Function_call_interrupts
306.50 ? 12% +31.2% 402.00 ? 7% interrupts.CPU0.RES:Rescheduling_interrupts
6860 ? 11% +39.6% 9576 ? 7% interrupts.CPU1.CAL:Function_call_interrupts
291.25 ? 16% +23.7% 360.25 ? 7% interrupts.CPU1.RES:Rescheduling_interrupts
6531 ? 4% +23.6% 8073 ? 12% interrupts.CPU11.CAL:Function_call_interrupts
6598 ? 9% +22.4% 8078 ? 7% interrupts.CPU12.CAL:Function_call_interrupts
6250 ? 8% +34.0% 8372 ? 9% interrupts.CPU13.CAL:Function_call_interrupts
6748 ? 11% +25.4% 8460 ? 10% interrupts.CPU14.CAL:Function_call_interrupts
6387 ? 10% +33.1% 8498 ? 10% interrupts.CPU16.CAL:Function_call_interrupts
6562 ? 12% +21.9% 7996 ? 9% interrupts.CPU19.CAL:Function_call_interrupts
78.75 ? 36% -64.8% 27.75 ? 39% interrupts.CPU19.TLB:TLB_shootdowns
7255 ? 4% +23.5% 8959 ? 7% interrupts.CPU2.CAL:Function_call_interrupts
6542 ? 9% +34.1% 8770 ? 4% interrupts.CPU20.CAL:Function_call_interrupts
6278 ? 9% +37.5% 8635 ? 5% interrupts.CPU21.CAL:Function_call_interrupts
243.00 ? 13% +24.4% 302.25 ? 6% interrupts.CPU21.RES:Rescheduling_interrupts
397.25 ? 12% -21.1% 313.50 ? 18% interrupts.CPU24.RES:Rescheduling_interrupts
540.25 ? 26% +68.0% 907.50 ? 23% interrupts.CPU26.NMI:Non-maskable_interrupts
540.25 ? 26% +68.0% 907.50 ? 23% interrupts.CPU26.PMI:Performance_monitoring_interrupts
336.50 ? 11% -26.4% 247.75 ? 8% interrupts.CPU29.RES:Rescheduling_interrupts
7506 ? 3% +25.4% 9410 ? 11% interrupts.CPU3.CAL:Function_call_interrupts
6747 ? 11% +17.7% 7944 ? 5% interrupts.CPU39.CAL:Function_call_interrupts
6517 ? 10% +31.9% 8593 ? 8% interrupts.CPU4.CAL:Function_call_interrupts
1131 ?164% -99.8% 2.00 ?173% interrupts.CPU43.79:PCI-MSI.31981612-edge.i40e-eth0-TxRx-43
1009 ? 10% -37.5% 631.25 ? 27% interrupts.CPU43.NMI:Non-maskable_interrupts
1009 ? 10% -37.5% 631.25 ? 27% interrupts.CPU43.PMI:Performance_monitoring_interrupts
6214 ? 17% +26.6% 7866 ? 5% interrupts.CPU48.CAL:Function_call_interrupts
95.00 ? 34% -64.5% 33.75 ? 46% interrupts.CPU49.TLB:TLB_shootdowns
7046 ? 12% +31.1% 9235 ? 10% interrupts.CPU5.CAL:Function_call_interrupts
7146 ? 9% +22.1% 8727 ? 2% interrupts.CPU50.CAL:Function_call_interrupts
7306 ? 8% +18.5% 8654 ? 14% interrupts.CPU51.CAL:Function_call_interrupts
7609 ? 11% +26.2% 9601 ? 4% interrupts.CPU53.CAL:Function_call_interrupts
7626 ? 2% +15.1% 8774 ? 9% interrupts.CPU55.CAL:Function_call_interrupts
7280 ? 4% +27.3% 9264 ? 4% interrupts.CPU57.CAL:Function_call_interrupts
491.75 ? 22% +26.5% 622.00 ? 29% interrupts.CPU57.NMI:Non-maskable_interrupts
491.75 ? 22% +26.5% 622.00 ? 29% interrupts.CPU57.PMI:Performance_monitoring_interrupts
8002 ? 10% +13.9% 9117 ? 6% interrupts.CPU60.CAL:Function_call_interrupts
7373 ? 8% +32.7% 9786 ? 4% interrupts.CPU62.CAL:Function_call_interrupts
7193 ? 5% +33.4% 9593 ? 4% interrupts.CPU63.CAL:Function_call_interrupts
272.50 ? 12% +29.0% 351.50 ? 14% interrupts.CPU63.RES:Rescheduling_interrupts
7818 ? 6% +21.6% 9507 ? 5% interrupts.CPU64.CAL:Function_call_interrupts
7044 ? 11% +34.5% 9474 ? 10% interrupts.CPU65.CAL:Function_call_interrupts
7602 ? 9% +21.2% 9216 ? 6% interrupts.CPU66.CAL:Function_call_interrupts
7413 ? 4% +29.0% 9561 ? 6% interrupts.CPU67.CAL:Function_call_interrupts
282.00 ? 17% +23.7% 348.75 ? 5% interrupts.CPU67.RES:Rescheduling_interrupts
7334 ? 6% +24.8% 9155 ? 8% interrupts.CPU68.CAL:Function_call_interrupts
748.50 ? 24% +32.8% 994.25 ? 8% interrupts.CPU69.NMI:Non-maskable_interrupts
748.50 ? 24% +32.8% 994.25 ? 8% interrupts.CPU69.PMI:Performance_monitoring_interrupts
6745 ? 8% +24.7% 8412 ? 9% interrupts.CPU7.CAL:Function_call_interrupts
7765 ? 6% +25.8% 9769 ? 6% interrupts.CPU70.CAL:Function_call_interrupts
281.75 ? 8% +18.4% 333.50 ? 12% interrupts.CPU70.RES:Rescheduling_interrupts
7299 ? 6% +33.6% 9749 ? 12% interrupts.CPU71.CAL:Function_call_interrupts
1046 ? 8% -37.0% 659.50 ? 17% interrupts.CPU73.NMI:Non-maskable_interrupts
1046 ? 8% -37.0% 659.50 ? 17% interrupts.CPU73.PMI:Performance_monitoring_interrupts
970.50 ? 10% -26.4% 714.00 ? 22% interrupts.CPU75.NMI:Non-maskable_interrupts
970.50 ? 10% -26.4% 714.00 ? 22% interrupts.CPU75.PMI:Performance_monitoring_interrupts
854.25 ? 19% -39.6% 515.75 ? 27% interrupts.CPU76.NMI:Non-maskable_interrupts
854.25 ? 19% -39.6% 515.75 ? 27% interrupts.CPU76.PMI:Performance_monitoring_interrupts
978.00 ? 24% -46.2% 525.75 ? 40% interrupts.CPU77.NMI:Non-maskable_interrupts
978.00 ? 24% -46.2% 525.75 ? 40% interrupts.CPU77.PMI:Performance_monitoring_interrupts
7666 ? 2% +16.3% 8914 ? 7% interrupts.CPU78.CAL:Function_call_interrupts
888.25 ? 20% -28.4% 636.00 ? 25% interrupts.CPU78.NMI:Non-maskable_interrupts
888.25 ? 20% -28.4% 636.00 ? 25% interrupts.CPU78.PMI:Performance_monitoring_interrupts
829.00 ? 13% -17.2% 686.75 ? 20% interrupts.CPU79.NMI:Non-maskable_interrupts
829.00 ? 13% -17.2% 686.75 ? 20% interrupts.CPU79.PMI:Performance_monitoring_interrupts
314.75 ? 10% -12.4% 275.75 ? 5% interrupts.CPU85.RES:Rescheduling_interrupts
341.50 ? 13% -17.9% 280.25 ? 13% interrupts.CPU92.RES:Rescheduling_interrupts
350.25 ? 11% -22.6% 271.25 ? 9% interrupts.CPU94.RES:Rescheduling_interrupts
15.97 ? 4% -1.3 14.63 ? 3% perf-profile.calltrace.cycles-pp.xlog_grant_head_check.xfs_log_reserve.xfs_trans_reserve.xfs_trans_alloc.xfs_iomap_write_direct
16.73 ? 4% -1.2 15.49 ? 2% perf-profile.calltrace.cycles-pp.xfs_log_reserve.xfs_trans_reserve.xfs_trans_alloc.xfs_iomap_write_direct.xfs_direct_write_iomap_begin
16.78 ? 4% -1.2 15.55 ? 3% perf-profile.calltrace.cycles-pp.xfs_trans_reserve.xfs_trans_alloc.xfs_iomap_write_direct.xfs_direct_write_iomap_begin.iomap_apply
16.87 ? 4% -1.2 15.65 ? 2% perf-profile.calltrace.cycles-pp.xfs_trans_alloc.xfs_iomap_write_direct.xfs_direct_write_iomap_begin.iomap_apply.dax_iomap_rw
7.83 ? 3% -1.1 6.68 ? 4% perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock.xlog_grant_head_wait.xlog_grant_head_check.xfs_log_reserve
7.89 ? 3% -1.1 6.76 ? 4% perf-profile.calltrace.cycles-pp._raw_spin_lock.xlog_grant_head_wait.xlog_grant_head_check.xfs_log_reserve.xfs_trans_reserve
8.81 ? 3% -1.1 7.74 ? 4% perf-profile.calltrace.cycles-pp.xlog_grant_head_wait.xlog_grant_head_check.xfs_log_reserve.xfs_trans_reserve.xfs_trans_alloc
9.79 ? 4% -1.1 8.71 ? 3% perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock.xfs_log_space_wake.xfs_log_ticket_ungrant.xfs_log_commit_cil
9.86 ? 4% -1.1 8.80 ? 3% perf-profile.calltrace.cycles-pp._raw_spin_lock.xfs_log_space_wake.xfs_log_ticket_ungrant.xfs_log_commit_cil.__xfs_trans_commit
10.16 ? 3% -1.0 9.15 ? 3% perf-profile.calltrace.cycles-pp.xfs_log_space_wake.xfs_log_ticket_ungrant.xfs_log_commit_cil.__xfs_trans_commit.xfs_iomap_write_direct
10.83 ? 3% -0.8 9.99 ? 3% perf-profile.calltrace.cycles-pp.xfs_log_ticket_ungrant.xfs_log_commit_cil.__xfs_trans_commit.xfs_iomap_write_direct.xfs_direct_write_iomap_begin
3.89 ? 7% -0.4 3.45 ? 4% perf-profile.calltrace.cycles-pp.file_update_time.xfs_file_aio_write_checks.xfs_file_dax_write.new_sync_write.vfs_write
3.97 ? 7% -0.4 3.54 ? 4% perf-profile.calltrace.cycles-pp.xfs_file_aio_write_checks.xfs_file_dax_write.new_sync_write.vfs_write.ksys_write
3.84 ? 6% -0.4 3.42 ? 4% perf-profile.calltrace.cycles-pp.xfs_vn_update_time.file_update_time.xfs_file_aio_write_checks.xfs_file_dax_write.new_sync_write
2.31 ? 7% -0.4 1.91 ? 6% perf-profile.calltrace.cycles-pp.xlog_grant_head_check.xfs_log_reserve.xfs_trans_reserve.xfs_trans_alloc.xfs_vn_update_time
2.40 ? 7% -0.4 1.99 ? 5% perf-profile.calltrace.cycles-pp.xfs_log_reserve.xfs_trans_reserve.xfs_trans_alloc.xfs_vn_update_time.file_update_time
2.40 ? 6% -0.4 2.00 ? 5% perf-profile.calltrace.cycles-pp.xfs_trans_reserve.xfs_trans_alloc.xfs_vn_update_time.file_update_time.xfs_file_aio_write_checks
2.40 ? 7% -0.4 2.01 ? 5% perf-profile.calltrace.cycles-pp.xfs_trans_alloc.xfs_vn_update_time.file_update_time.xfs_file_aio_write_checks.xfs_file_dax_write
0.96 ? 9% -0.1 0.83 ? 5% perf-profile.calltrace.cycles-pp.xfs_log_space_wake.xfs_log_ticket_ungrant.xfs_log_commit_cil.__xfs_trans_commit.xfs_vn_update_time
0.89 ? 2% -0.1 0.83 ? 3% perf-profile.calltrace.cycles-pp.xfs_trans_committed_bulk.xlog_cil_committed.xlog_cil_process_committed.xlog_state_do_callback.xlog_ioend_work
0.56 ? 4% +0.1 0.63 ? 3% perf-profile.calltrace.cycles-pp.xfs_bmbt_init_key_from_rec.xfs_lookup_get_search_key.xfs_btree_lookup.xfs_bmap_add_extent_unwritten_real.xfs_bmapi_convert_unwritten
0.67 ? 4% +0.1 0.75 ? 2% perf-profile.calltrace.cycles-pp.xfs_lookup_get_search_key.xfs_btree_lookup.xfs_bmap_add_extent_unwritten_real.xfs_bmapi_convert_unwritten.xfs_bmapi_write
0.80 ? 7% +0.1 0.89 ? 6% perf-profile.calltrace.cycles-pp.xfs_buf_item_format.xlog_cil_insert_items.xfs_log_commit_cil.__xfs_trans_commit.xfs_iomap_write_direct
0.69 ? 6% +0.1 0.78 ? 5% perf-profile.calltrace.cycles-pp.xlog_state_release_iclog.xlog_write.xlog_cil_push_work.process_one_work.worker_thread
0.67 ? 4% +0.1 0.77 ? 8% perf-profile.calltrace.cycles-pp.pmem_submit_bio.submit_bio_noacct.submit_bio._xfs_buf_ioapply.__xfs_buf_submit
0.73 ? 4% +0.1 0.85 ? 7% perf-profile.calltrace.cycles-pp.submit_bio_noacct.submit_bio._xfs_buf_ioapply.__xfs_buf_submit.xfs_buf_delwri_submit_buffers
0.73 ? 4% +0.1 0.86 ? 7% perf-profile.calltrace.cycles-pp.submit_bio._xfs_buf_ioapply.__xfs_buf_submit.xfs_buf_delwri_submit_buffers.xfsaild_push
0.69 ? 7% +0.1 0.82 ? 8% perf-profile.calltrace.cycles-pp.xfs_iext_lookup_extent.xfs_bmapi_read.xfs_direct_write_iomap_begin.iomap_apply.dax_iomap_rw
0.74 ? 7% +0.1 0.88 ? 8% perf-profile.calltrace.cycles-pp.xfs_bmapi_read.xfs_direct_write_iomap_begin.iomap_apply.dax_iomap_rw.xfs_file_dax_write
1.21 ? 4% +0.2 1.36 ? 6% perf-profile.calltrace.cycles-pp.pmem_submit_bio.submit_bio_noacct.submit_bio.submit_bio_wait.blkdev_issue_zeroout
1.74 ? 6% +0.2 1.92 ? 4% perf-profile.calltrace.cycles-pp.blkdev_issue_zeroout.xfs_bmapi_convert_unwritten.xfs_bmapi_write.xfs_iomap_write_direct.xfs_direct_write_iomap_begin
1.26 ? 4% +0.2 1.46 ? 4% perf-profile.calltrace.cycles-pp.xlog_write.xlog_cil_push_work.process_one_work.worker_thread.kthread
1.76 ? 4% +0.2 1.96 ? 4% perf-profile.calltrace.cycles-pp.xfs_btree_insrec.xfs_btree_insert.xfs_bmap_add_extent_unwritten_real.xfs_bmapi_convert_unwritten.xfs_bmapi_write
0.42 ? 57% +0.2 0.62 ? 6% perf-profile.calltrace.cycles-pp.xfs_cil_prepare_item.xlog_cil_insert_items.xfs_log_commit_cil.__xfs_trans_commit.xfs_iomap_write_direct
1.80 ? 4% +0.2 2.02 ? 4% perf-profile.calltrace.cycles-pp.xfs_btree_insert.xfs_bmap_add_extent_unwritten_real.xfs_bmapi_convert_unwritten.xfs_bmapi_write.xfs_iomap_write_direct
2.75 ? 3% +0.2 2.97 ? 5% perf-profile.calltrace.cycles-pp.xfsaild.kthread.ret_from_fork
2.75 ? 3% +0.2 2.97 ? 5% perf-profile.calltrace.cycles-pp.xfsaild_push.xfsaild.kthread.ret_from_fork
1.25 ? 5% +0.2 1.48 ? 2% perf-profile.calltrace.cycles-pp.dax_iomap_actor.iomap_apply.dax_iomap_rw.xfs_file_dax_write.new_sync_write
1.39 ? 3% +0.2 1.63 ? 3% perf-profile.calltrace.cycles-pp.xlog_cil_push_work.process_one_work.worker_thread.kthread.ret_from_fork
1.32 ? 4% +0.3 1.60 ? 5% perf-profile.calltrace.cycles-pp._xfs_buf_ioapply.__xfs_buf_submit.xfs_buf_delwri_submit_buffers.xfsaild_push.xfsaild
2.99 +0.3 3.30 ? 2% perf-profile.calltrace.cycles-pp.process_one_work.worker_thread.kthread.ret_from_fork
1.96 ? 4% +0.3 2.29 ? 5% perf-profile.calltrace.cycles-pp.__xfs_buf_submit.xfs_buf_delwri_submit_buffers.xfsaild_push.xfsaild.kthread
2.30 ? 4% +0.3 2.63 ? 5% perf-profile.calltrace.cycles-pp.xfs_buf_delwri_submit_buffers.xfsaild_push.xfsaild.kthread.ret_from_fork
3.31 +0.4 3.69 ? 3% perf-profile.calltrace.cycles-pp.worker_thread.kthread.ret_from_fork
6.09 ? 2% +0.6 6.70 ? 4% perf-profile.calltrace.cycles-pp.ret_from_fork
6.09 ? 2% +0.6 6.70 ? 4% perf-profile.calltrace.cycles-pp.kthread.ret_from_fork
6.92 ? 4% +0.8 7.72 ? 3% perf-profile.calltrace.cycles-pp.xfs_bmap_add_extent_unwritten_real.xfs_bmapi_convert_unwritten.xfs_bmapi_write.xfs_iomap_write_direct.xfs_direct_write_iomap_begin
8.79 ? 4% +1.0 9.78 ? 3% perf-profile.calltrace.cycles-pp.xfs_bmapi_convert_unwritten.xfs_bmapi_write.xfs_iomap_write_direct.xfs_direct_write_iomap_begin.iomap_apply
9.29 ? 4% +1.0 10.33 ? 3% perf-profile.calltrace.cycles-pp.xfs_bmapi_write.xfs_iomap_write_direct.xfs_direct_write_iomap_begin.iomap_apply.dax_iomap_rw
2.87 ? 5% +1.4 4.24 ? 7% perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock.xlog_cil_insert_items.xfs_log_commit_cil.__xfs_trans_commit
3.03 ? 5% +1.4 4.45 ? 7% perf-profile.calltrace.cycles-pp._raw_spin_lock.xlog_cil_insert_items.xfs_log_commit_cil.__xfs_trans_commit.xfs_iomap_write_direct
18.13 ? 4% +1.7 19.80 ? 5% perf-profile.calltrace.cycles-pp.__xfs_trans_commit.xfs_iomap_write_direct.xfs_direct_write_iomap_begin.iomap_apply.dax_iomap_rw
18.04 ? 4% +1.7 19.71 ? 5% perf-profile.calltrace.cycles-pp.xfs_log_commit_cil.__xfs_trans_commit.xfs_iomap_write_direct.xfs_direct_write_iomap_begin.iomap_apply
4.89 ? 5% +1.7 6.59 ? 4% perf-profile.calltrace.cycles-pp.xlog_cil_insert_items.xfs_log_commit_cil.__xfs_trans_commit.xfs_iomap_write_direct.xfs_direct_write_iomap_begin
18.29 ? 4% -1.8 16.54 ? 3% perf-profile.children.cycles-pp.xlog_grant_head_check
19.12 ? 4% -1.6 17.49 ? 2% perf-profile.children.cycles-pp.xfs_log_reserve
19.18 ? 4% -1.6 17.55 ? 3% perf-profile.children.cycles-pp.xfs_trans_reserve
19.28 ? 4% -1.6 17.66 ? 2% perf-profile.children.cycles-pp.xfs_trans_alloc
11.15 ? 4% -1.1 10.02 ? 3% perf-profile.children.cycles-pp.xfs_log_space_wake
8.81 ? 3% -1.1 7.74 ? 4% perf-profile.children.cycles-pp.xlog_grant_head_wait
11.88 ? 3% -1.0 10.92 ? 3% perf-profile.children.cycles-pp.xfs_log_ticket_ungrant
3.89 ? 7% -0.4 3.45 ? 4% perf-profile.children.cycles-pp.file_update_time
3.97 ? 7% -0.4 3.54 ? 4% perf-profile.children.cycles-pp.xfs_file_aio_write_checks
3.84 ? 6% -0.4 3.42 ? 4% perf-profile.children.cycles-pp.xfs_vn_update_time
0.36 ? 6% -0.3 0.10 ? 11% perf-profile.children.cycles-pp._raw_spin_unlock_irqrestore
1.70 ? 3% -0.2 1.52 ? 5% perf-profile.children.cycles-pp.xlog_grant_head_wake
0.30 ? 5% -0.1 0.23 ? 10% perf-profile.children.cycles-pp.xfs_buf_item_push
0.89 ? 2% -0.1 0.83 ? 3% perf-profile.children.cycles-pp.xfs_trans_committed_bulk
0.11 ? 15% -0.0 0.08 ? 19% perf-profile.children.cycles-pp.get_next_timer_interrupt
0.17 ? 2% -0.0 0.15 ? 5% perf-profile.children.cycles-pp.lapic_next_deadline
0.09 +0.0 0.11 ? 6% perf-profile.children.cycles-pp.xfs_btree_ptr_addr
0.11 ? 6% +0.0 0.13 ? 12% perf-profile.children.cycles-pp.crc_128
0.16 ? 10% +0.0 0.19 ? 6% perf-profile.children.cycles-pp.xfs_trans_ail_update_bulk
0.10 ? 12% +0.0 0.13 ? 6% perf-profile.children.cycles-pp.xfs_trans_dirty_buf
0.13 ? 6% +0.0 0.16 perf-profile.children.cycles-pp.irqtime_account_irq
0.09 +0.0 0.12 ? 3% perf-profile.children.cycles-pp.xfs_btree_log_block
0.04 ? 58% +0.0 0.07 ? 6% perf-profile.children.cycles-pp.percpu_counter_add_batch
0.04 ? 58% +0.0 0.07 ? 12% perf-profile.children.cycles-pp.xfs_verify_fsbno
0.04 ? 58% +0.0 0.07 ? 10% perf-profile.children.cycles-pp.cpumask_next_and
0.11 ? 9% +0.0 0.14 ? 15% perf-profile.children.cycles-pp.__bio_add_page
0.14 ? 11% +0.0 0.17 ? 10% perf-profile.children.cycles-pp.bio_add_page
0.10 ? 7% +0.0 0.13 ? 14% perf-profile.children.cycles-pp.sysvec_call_function_single
0.07 ? 6% +0.0 0.10 ? 7% perf-profile.children.cycles-pp.xfs_ail_delete_one
0.14 ? 10% +0.0 0.18 ? 4% perf-profile.children.cycles-pp.finish_task_switch
0.16 ? 5% +0.0 0.19 ? 9% perf-profile.children.cycles-pp.xfs_perag_put
0.09 ? 13% +0.0 0.12 ? 14% perf-profile.children.cycles-pp.xfs_ail_check
0.17 ? 7% +0.0 0.21 ? 6% perf-profile.children.cycles-pp.xfs_buf_unlock
0.15 ? 5% +0.0 0.20 ? 5% perf-profile.children.cycles-pp.__slab_free
0.12 ? 8% +0.0 0.16 ? 4% perf-profile.children.cycles-pp.xfs_trans_log_buf
0.11 ? 10% +0.0 0.15 ? 8% perf-profile.children.cycles-pp.asm_sysvec_call_function_single
0.24 ? 9% +0.0 0.28 ? 5% perf-profile.children.cycles-pp.xfs_btree_update
0.28 ? 7% +0.0 0.32 ? 5% perf-profile.children.cycles-pp.orc_find
0.26 ? 6% +0.0 0.30 ? 6% perf-profile.children.cycles-pp.xfs_buf_bio_end_io
0.33 ? 6% +0.0 0.38 ? 3% perf-profile.children.cycles-pp.bio_alloc_bioset
0.06 ? 7% +0.1 0.11 ? 10% perf-profile.children.cycles-pp.xfs_btree_lblock_calc_crc
0.01 ?173% +0.1 0.07 ? 25% perf-profile.children.cycles-pp.timerqueue_del
0.19 ? 7% +0.1 0.24 ? 3% perf-profile.children.cycles-pp.preempt_schedule_common
0.00 +0.1 0.05 ? 8% perf-profile.children.cycles-pp._find_next_bit
0.26 ? 7% +0.1 0.32 ? 3% perf-profile.children.cycles-pp._cond_resched
0.51 ? 4% +0.1 0.56 ? 5% perf-profile.children.cycles-pp._xfs_trans_bjoin
0.14 ? 11% +0.1 0.20 ? 4% perf-profile.children.cycles-pp.xfs_buf_item_done
0.14 ? 10% +0.1 0.20 ? 5% perf-profile.children.cycles-pp.xfs_trans_ail_delete
0.23 ? 9% +0.1 0.29 ? 5% perf-profile.children.cycles-pp.__orc_find
0.39 ? 9% +0.1 0.45 ? 4% perf-profile.children.cycles-pp.xfs_buf_item_pin
0.45 ? 8% +0.1 0.51 ? 2% perf-profile.children.cycles-pp.update_sd_lb_stats
0.46 ? 8% +0.1 0.52 ? 2% perf-profile.children.cycles-pp.find_busiest_group
0.24 ? 8% +0.1 0.31 ? 7% perf-profile.children.cycles-pp.__kmalloc
0.30 ? 11% +0.1 0.37 ? 7% perf-profile.children.cycles-pp.schedule_idle
0.38 ? 6% +0.1 0.45 ? 6% perf-profile.children.cycles-pp.down_read
0.60 ? 6% +0.1 0.67 ? 4% perf-profile.children.cycles-pp.xfs_perag_get
0.54 +0.1 0.61 ? 3% perf-profile.children.cycles-pp.memmove
0.46 ? 7% +0.1 0.53 ? 4% perf-profile.children.cycles-pp.newidle_balance
0.57 ? 3% +0.1 0.65 ? 2% perf-profile.children.cycles-pp.xfs_bmbt_init_key_from_rec
0.54 ? 10% +0.1 0.61 ? 2% perf-profile.children.cycles-pp.load_balance
0.83 ? 7% +0.1 0.91 ? 6% perf-profile.children.cycles-pp.xfs_buf_item_format
0.11 ? 7% +0.1 0.19 ? 2% perf-profile.children.cycles-pp.up
0.68 ? 3% +0.1 0.76 ? 2% perf-profile.children.cycles-pp.xfs_lookup_get_search_key
0.20 ? 5% +0.1 0.28 ? 2% perf-profile.children.cycles-pp.__wake_up_common_lock
0.28 ? 3% +0.1 0.37 perf-profile.children.cycles-pp.xfs_buf_ioend
0.54 ? 8% +0.1 0.63 ? 5% perf-profile.children.cycles-pp.xfs_cil_prepare_item
0.69 ? 6% +0.1 0.78 ? 5% perf-profile.children.cycles-pp.xlog_state_release_iclog
0.57 ? 8% +0.1 0.67 ? 5% perf-profile.children.cycles-pp.unwind_next_frame
0.89 ? 5% +0.1 1.01 ? 7% perf-profile.children.cycles-pp.memcpy_erms
0.43 ? 6% +0.1 0.56 ? 7% perf-profile.children.cycles-pp.__srcu_read_unlock
0.68 ? 5% +0.1 0.82 ? 4% perf-profile.children.cycles-pp.pick_next_task_fair
0.74 ? 7% +0.1 0.89 ? 8% perf-profile.children.cycles-pp.xfs_bmapi_read
1.09 ? 6% +0.1 1.23 ? 3% perf-profile.children.cycles-pp.schedule
0.81 ? 8% +0.2 0.97 ? 4% perf-profile.children.cycles-pp.arch_stack_walk
0.88 ? 7% +0.2 1.03 ? 5% perf-profile.children.cycles-pp.stack_trace_save_tsk
0.82 ? 7% +0.2 0.98 ? 7% perf-profile.children.cycles-pp.xfs_iext_lookup_extent
1.13 ? 6% +0.2 1.29 ? 4% perf-profile.children.cycles-pp.__account_scheduler_latency
1.46 ? 5% +0.2 1.63 ? 4% perf-profile.children.cycles-pp.ttwu_do_activate
1.45 ? 5% +0.2 1.62 ? 4% perf-profile.children.cycles-pp.enqueue_task_fair
1.74 ? 6% +0.2 1.92 ? 4% perf-profile.children.cycles-pp.blkdev_issue_zeroout
1.38 ? 5% +0.2 1.56 ? 4% perf-profile.children.cycles-pp.enqueue_entity
1.26 ? 4% +0.2 1.46 ? 3% perf-profile.children.cycles-pp.xlog_write
1.76 ? 4% +0.2 1.97 ? 4% perf-profile.children.cycles-pp.xfs_btree_insrec
1.81 ? 4% +0.2 2.02 ? 4% perf-profile.children.cycles-pp.xfs_btree_insert
2.75 ? 3% +0.2 2.97 ? 5% perf-profile.children.cycles-pp.xfsaild
2.75 ? 3% +0.2 2.97 ? 5% perf-profile.children.cycles-pp.xfsaild_push
1.25 ? 5% +0.2 1.48 ? 2% perf-profile.children.cycles-pp.dax_iomap_actor
1.39 ? 3% +0.2 1.63 ? 3% perf-profile.children.cycles-pp.xlog_cil_push_work
1.54 ? 5% +0.3 1.79 ? 3% perf-profile.children.cycles-pp.__schedule
1.32 ? 4% +0.3 1.60 ? 5% perf-profile.children.cycles-pp._xfs_buf_ioapply
2.99 +0.3 3.30 ? 2% perf-profile.children.cycles-pp.process_one_work
1.96 ? 3% +0.3 2.29 ? 5% perf-profile.children.cycles-pp.__xfs_buf_submit
2.43 ? 4% +0.3 2.75 ? 6% perf-profile.children.cycles-pp.pmem_submit_bio
2.30 ? 4% +0.3 2.63 ? 5% perf-profile.children.cycles-pp.xfs_buf_delwri_submit_buffers
2.73 ? 4% +0.4 3.08 ? 5% perf-profile.children.cycles-pp.submit_bio_noacct
2.75 ? 4% +0.4 3.10 ? 5% perf-profile.children.cycles-pp.submit_bio
1.52 ? 6% +0.4 1.89 ? 4% perf-profile.children.cycles-pp._raw_spin_lock_irqsave
3.31 +0.4 3.69 ? 3% perf-profile.children.cycles-pp.worker_thread
6.10 ? 2% +0.6 6.70 ? 4% perf-profile.children.cycles-pp.ret_from_fork
6.09 ? 2% +0.6 6.70 ? 4% perf-profile.children.cycles-pp.kthread
6.92 ? 4% +0.8 7.73 ? 3% perf-profile.children.cycles-pp.xfs_bmap_add_extent_unwritten_real
8.79 ? 4% +1.0 9.78 ? 3% perf-profile.children.cycles-pp.xfs_bmapi_convert_unwritten
9.29 ? 4% +1.0 10.33 ? 3% perf-profile.children.cycles-pp.xfs_bmapi_write
19.55 ? 4% +1.6 21.20 ? 4% perf-profile.children.cycles-pp.__xfs_trans_commit
19.46 ? 4% +1.6 21.11 ? 4% perf-profile.children.cycles-pp.xfs_log_commit_cil
5.16 ? 5% +1.8 6.94 ? 4% perf-profile.children.cycles-pp.xlog_cil_insert_items
0.31 ? 7% -0.2 0.08 ? 8% perf-profile.self.cycles-pp._raw_spin_unlock_irqrestore
0.35 ? 5% -0.2 0.14 ? 10% perf-profile.self.cycles-pp.xlog_grant_head_wake
0.20 ? 5% -0.1 0.08 ? 13% perf-profile.self.cycles-pp.xfs_buf_item_unpin
0.11 ? 6% -0.0 0.09 ? 10% perf-profile.self.cycles-pp.try_to_wake_up
0.17 -0.0 0.15 ? 5% perf-profile.self.cycles-pp.lapic_next_deadline
0.11 ? 3% +0.0 0.13 ? 5% perf-profile.self.cycles-pp.xfs_bmbt_key_diff
0.10 ? 4% +0.0 0.12 ? 6% perf-profile.self.cycles-pp._xfs_trans_bjoin
0.07 ? 10% +0.0 0.09 ? 8% perf-profile.self.cycles-pp.irqtime_account_irq
0.06 ? 11% +0.0 0.08 ? 15% perf-profile.self.cycles-pp.pmem_submit_bio
0.11 ? 6% +0.0 0.13 ? 12% perf-profile.self.cycles-pp.crc_128
0.08 ? 8% +0.0 0.10 ? 10% perf-profile.self.cycles-pp.list_sort
0.10 ? 12% +0.0 0.12 ? 8% perf-profile.self.cycles-pp.xfs_trans_dirty_buf
0.08 ? 15% +0.0 0.10 ? 10% perf-profile.self.cycles-pp.xfs_trans_committed_bulk
0.08 ? 8% +0.0 0.11 ? 4% perf-profile.self.cycles-pp.tick_nohz_next_event
0.14 ? 5% +0.0 0.17 ? 9% perf-profile.self.cycles-pp.xfs_cil_prepare_item
0.11 ? 9% +0.0 0.14 ? 15% perf-profile.self.cycles-pp.__bio_add_page
0.16 ? 7% +0.0 0.19 ? 8% perf-profile.self.cycles-pp.xfs_perag_put
0.08 ? 10% +0.0 0.12 ? 13% perf-profile.self.cycles-pp.xfs_ail_check
0.17 ? 2% +0.0 0.21 ? 8% perf-profile.self.cycles-pp.xlog_write
0.15 ? 5% +0.0 0.19 ? 3% perf-profile.self.cycles-pp.__slab_free
0.25 ? 10% +0.0 0.29 ? 5% perf-profile.self.cycles-pp.xfs_buf_bio_end_io
0.06 ? 9% +0.0 0.10 ? 8% perf-profile.self.cycles-pp.xfs_btree_lblock_calc_crc
0.03 ?100% +0.1 0.08 ? 10% perf-profile.self.cycles-pp._xfs_buf_ioapply
0.23 ? 9% +0.1 0.29 ? 5% perf-profile.self.cycles-pp.__orc_find
0.56 ? 4% +0.1 0.62 ? 3% perf-profile.self.cycles-pp.xfs_bmbt_init_key_from_rec
0.35 ? 8% +0.1 0.42 ? 6% perf-profile.self.cycles-pp.down_read
0.55 ? 7% +0.1 0.62 ? 3% perf-profile.self.cycles-pp.xfs_perag_get
0.54 +0.1 0.61 ? 3% perf-profile.self.cycles-pp.memmove
0.49 ? 2% +0.1 0.57 ? 5% perf-profile.self.cycles-pp.xfs_btree_lookup
0.53 ? 9% +0.1 0.63 ? 4% perf-profile.self.cycles-pp.xfs_log_commit_cil
0.43 ? 6% +0.1 0.55 ? 7% perf-profile.self.cycles-pp.__srcu_read_unlock
0.88 ? 5% +0.1 1.00 ? 8% perf-profile.self.cycles-pp.memcpy_erms
0.81 ? 7% +0.1 0.96 ? 7% perf-profile.self.cycles-pp.xfs_iext_lookup_extent
0.75 ? 10% +0.2 0.92 ? 5% perf-profile.self.cycles-pp.xfs_log_ticket_ungrant
1.47 ? 6% +0.4 1.84 ? 3% perf-profile.self.cycles-pp._raw_spin_lock_irqsave



fio.write_bw_MBps

600 +---------------------------------------------------------------------+
| O O O O O O O O O O O O O O O O O O O O O O O |
500 |.+.+.+.+.+.+.+.+..+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+..+.+.+.+.+.+.+.+.|
| |
| |
400 |-+ |
| |
300 |-+ |
| |
200 |-+ |
| |
| |
100 |-+ |
| |
0 +---------------------------------------------------------------------+


fio.write_iops

160000 +------------------------------------------------------------------+
| O O |
140000 |-+.+.O.+.O.+.O.+.+.O O O O O O O.+O O O.+. .+.O .+. .+.+.+. |
120000 |.+ + + + +.+.+.+.+.+.+ +.+.+ + +.+ + +.+.+.|
| |
100000 |-+ |
| |
80000 |-+ |
| |
60000 |-+ |
40000 |-+ |
| |
20000 |-+ |
| |
0 +------------------------------------------------------------------+


fio.write_clat_mean_us

400000 +------------------------------------------------------------------+
|.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.++.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.|
350000 |-+ O O O O O O O O O O O O O O O OO O O O O O O |
| |
300000 |-+ |
250000 |-+ |
| |
200000 |-+ |
| |
150000 |-+ |
100000 |-+ |
| |
50000 |-+ |
| |
0 +------------------------------------------------------------------+


fio.write_clat_90__us

160000 +------------------------------------------------------------------+
| + ++ + |
140000 |-+ O O O O O O O O O O O O O O O OO O O O O O O |
120000 |-+ |
| |
100000 |-+ |
| |
80000 |-+ |
| |
60000 |-+ |
40000 |-+ |
| |
20000 |-+ |
| |
0 +------------------------------------------------------------------+


fio.latency_250us_

25 +----------------------------------------------------------------------+
| |
|.+. .+. .+. .+. .+. .+.+. .+.+.+. .+.. .+.|
20 |-+ +.+ +. + +.+.+.+ +.+.+..+.+ +.+ +.+ +.+.+.+ |
| O O O O O O O O O O O O O O |
| O O O O O O O O O |
15 |-+ |
| |
10 |-+ |
| |
| |
5 |-+ |
| |
| |
0 +----------------------------------------------------------------------+


fio.workload

3e+07 +-----------------------------------------------------------------+
| O O O O O O OO O O O O O O O O O O O O O O O |
2.5e+07 |.+.+.+.+.+.+.+.++.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.++.+.+.+.+.+.+.+.|
| |
| |
2e+07 |-+ |
| |
1.5e+07 |-+ |
| |
1e+07 |-+ |
| |
| |
5e+06 |-+ |
| |
0 +-----------------------------------------------------------------+


[*] bisect-good sample
[O] bisect-bad sample

***************************************************************************************************
lkp-csl-2ap2: 192 threads Intel(R) Xeon(R) Platinum 9242 CPU @ 2.30GHz with 192G memory




Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.


Thanks,
Oliver Sang


Attachments:
(No filename) (48.32 kB)
config-5.10.0-rc4-00005-g97e8f0134a2b (172.76 kB)
job-script (8.47 kB)
job.yaml (5.86 kB)
reproduce (0.98 kB)
Download all attachments

2020-12-09 13:32:19

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 05/12] x86: rework arch_local_irq_restore() to not use popf

On Sun, Nov 22, 2020 at 01:44:53PM -0800, Andy Lutomirski wrote:
> On Sat, Nov 21, 2020 at 10:55 PM Jürgen Groß <[email protected]> wrote:
> >
> > On 20.11.20 12:59, Peter Zijlstra wrote:
> > > On Fri, Nov 20, 2020 at 12:46:23PM +0100, Juergen Gross wrote:
> > >> +static __always_inline void arch_local_irq_restore(unsigned long flags)
> > >> +{
> > >> + if (!arch_irqs_disabled_flags(flags))
> > >> + arch_local_irq_enable();
> > >> +}
> > >
> > > If someone were to write horrible code like:
> > >
> > > local_irq_disable();
> > > local_irq_save(flags);
> > > local_irq_enable();
> > > local_irq_restore(flags);
> > >
> > > we'd be up some creek without a paddle... now I don't _think_ we have
> > > genius code like that, but I'd feel saver if we can haz an assertion in
> > > there somewhere...
> > >
> > > Maybe something like:
> > >
> > > #ifdef CONFIG_DEBUG_ENTRY // for lack of something saner
> > > WARN_ON_ONCE((arch_local_save_flags() ^ flags) & X86_EFLAGS_IF);
> > > #endif
> > >
> > > At the end?
> >
> > I'd like to, but using WARN_ON_ONCE() in include/asm/irqflags.h sounds
> > like a perfect receipt for include dependency hell.
> >
> > We could use a plain asm("ud2") instead.
>
> How about out-of-lining it:
>
> #ifdef CONFIG_DEBUG_ENTRY
> extern void warn_bogus_irqrestore();
> #endif
>
> static __always_inline void arch_local_irq_restore(unsigned long flags)
> {
> if (!arch_irqs_disabled_flags(flags)) {
> arch_local_irq_enable();
> } else {
> #ifdef CONFIG_DEBUG_ENTRY
> if (unlikely(arch_local_irq_save() & X86_EFLAGS_IF))
> warn_bogus_irqrestore();
> #endif
> }

I was just talking to Peter on IRC about implementing the same thing for
arm64, so could we put this in the generic irqflags code? IIUC we can
use raw_irqs_disabled() to do the check.

As this isn't really entry specific (and IIUC the cases this should
catch would break lockdep today), maybe we should add a new
DEBUG_IRQFLAGS for this, that DEBUG_LOCKDEP can also select?

Something like:

#define local_irq_restore(flags) \
do { \
if (!raw_irqs_disabled_flags(flags)) { \
trace_hardirqs_on(); \
} else if (IS_ENABLED(CONFIG_DEBUG_IRQFLAGS) { \
if (unlikely(raw_irqs_disabled()) \
warn_bogus_irqrestore(); \
} \
raw_local_irq_restore(flags); \
} while (0)

... perhaps? (ignoring however we deal with once-ness).

Thanks,
Mark.

2020-12-09 15:09:50

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 05/12] x86: rework arch_local_irq_restore() to not use popf

On Wed, Dec 09, 2020 at 01:27:10PM +0000, Mark Rutland wrote:
> On Sun, Nov 22, 2020 at 01:44:53PM -0800, Andy Lutomirski wrote:
> > On Sat, Nov 21, 2020 at 10:55 PM Jürgen Groß <[email protected]> wrote:
> > > On 20.11.20 12:59, Peter Zijlstra wrote:
> > > > If someone were to write horrible code like:
> > > >
> > > > local_irq_disable();
> > > > local_irq_save(flags);
> > > > local_irq_enable();
> > > > local_irq_restore(flags);
> > > >
> > > > we'd be up some creek without a paddle... now I don't _think_ we have
> > > > genius code like that, but I'd feel saver if we can haz an assertion in
> > > > there somewhere...

> I was just talking to Peter on IRC about implementing the same thing for
> arm64, so could we put this in the generic irqflags code? IIUC we can
> use raw_irqs_disabled() to do the check.
>
> As this isn't really entry specific (and IIUC the cases this should
> catch would break lockdep today), maybe we should add a new
> DEBUG_IRQFLAGS for this, that DEBUG_LOCKDEP can also select?
>
> Something like:
>
> #define local_irq_restore(flags) \
> do { \
> if (!raw_irqs_disabled_flags(flags)) { \
> trace_hardirqs_on(); \
> } else if (IS_ENABLED(CONFIG_DEBUG_IRQFLAGS) { \
> if (unlikely(raw_irqs_disabled()) \

Whoops; that should be !raw_irqs_disabled().

> warn_bogus_irqrestore(); \
> } \
> raw_local_irq_restore(flags); \
> } while (0)
>
> ... perhaps? (ignoring however we deal with once-ness).

If no-one shouts in the next day or two I'll spin this as its own patch.

Mark.

2020-12-09 20:20:51

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 05/12] x86: rework arch_local_irq_restore() to not use popf

On Fri, Nov 20, 2020 at 12:59:43PM +0100, Peter Zijlstra wrote:
> On Fri, Nov 20, 2020 at 12:46:23PM +0100, Juergen Gross wrote:
> > +static __always_inline void arch_local_irq_restore(unsigned long flags)
> > +{
> > + if (!arch_irqs_disabled_flags(flags))
> > + arch_local_irq_enable();
> > +}
>
> If someone were to write horrible code like:
>
> local_irq_disable();
> local_irq_save(flags);
> local_irq_enable();
> local_irq_restore(flags);
>
> we'd be up some creek without a paddle... now I don't _think_ we have
> genius code like that, but I'd feel saver if we can haz an assertion in
> there somewhere...

I've cobbled that together locally (i'll post it momentarily), and gave it a
spin on both arm64 and x86, whereupon it exploded at boot time on x86.

In arch/x86/kernel/apic/io_apic.c's timer_irq_works() we do:

local_irq_save(flags);
local_irq_enable();

[ trigger an IRQ here ]

local_irq_restore(flags);

... and in check_timer() we call that a number of times after either a
local_irq_save() or local_irq_disable(), eventually trailing with a
local_irq_disable() that will balance things up before calling
local_irq_restore().

I guess that timer_irq_works() should instead do:

local_irq_save(flags);
local_irq_enable();
...
local_irq_disable();
local_irq_restore(flags);

... assuming we consider that legitimate?

With that, and all the calls to local_irq_disable() in check_timer() removed
(diff below) I get a clean boot under QEMU with the assertion hacked in and
DEBUG_LOCKDEP enabled.

Thanks
Mark.

---->8----
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7b3c7e0d4a09..e79e665a3aeb 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1631,6 +1631,7 @@ static int __init timer_irq_works(void)
else
delay_without_tsc();

+ local_irq_disable();
local_irq_restore(flags);

/*
@@ -2191,7 +2192,6 @@ static inline void __init check_timer(void)
goto out;
}
panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
- local_irq_disable();
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2215,7 +2215,6 @@ static inline void __init check_timer(void)
/*
* Cleanup, just in case ...
*/
- local_irq_disable();
legacy_pic->mask(0);
clear_IO_APIC_pin(apic2, pin2);
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
@@ -2232,7 +2231,6 @@ static inline void __init check_timer(void)
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
- local_irq_disable();
legacy_pic->mask(0);
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
@@ -2251,7 +2249,6 @@ static inline void __init check_timer(void)
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
- local_irq_disable();
apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
if (apic_is_x2apic_enabled())
apic_printk(APIC_QUIET, KERN_INFO

2020-12-09 20:23:49

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH v2 05/12] x86: rework arch_local_irq_restore() to not use popf

On Wed, Dec 09 2020 at 18:15, Mark Rutland wrote:
> In arch/x86/kernel/apic/io_apic.c's timer_irq_works() we do:
>
> local_irq_save(flags);
> local_irq_enable();
>
> [ trigger an IRQ here ]
>
> local_irq_restore(flags);
>
> ... and in check_timer() we call that a number of times after either a
> local_irq_save() or local_irq_disable(), eventually trailing with a
> local_irq_disable() that will balance things up before calling
> local_irq_restore().
>
> I guess that timer_irq_works() should instead do:
>
> local_irq_save(flags);
> local_irq_enable();
> ...
> local_irq_disable();
> local_irq_restore(flags);
>
> ... assuming we consider that legitimate?

Nah. That's old and insane gunk.

Thanks,

tglx
---
arch/x86/kernel/apic/io_apic.c | 22 ++++++----------------
1 file changed, 6 insertions(+), 16 deletions(-)

--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1618,21 +1618,16 @@ static void __init delay_without_tsc(voi
static int __init timer_irq_works(void)
{
unsigned long t1 = jiffies;
- unsigned long flags;

if (no_timer_check)
return 1;

- local_save_flags(flags);
local_irq_enable();
-
if (boot_cpu_has(X86_FEATURE_TSC))
delay_with_tsc();
else
delay_without_tsc();

- local_irq_restore(flags);
-
/*
* Expect a few ticks at least, to be sure some possible
* glue logic does not lock up after one or two first
@@ -1641,10 +1636,10 @@ static int __init timer_irq_works(void)
* least one tick may be lost due to delays.
*/

- /* jiffies wrap? */
- if (time_after(jiffies, t1 + 4))
- return 1;
- return 0;
+ local_irq_disable();
+
+ /* Did jiffies advance? */
+ return time_after(jiffies, t1 + 4);
}

/*
@@ -2117,13 +2112,12 @@ static inline void __init check_timer(vo
struct irq_cfg *cfg = irqd_cfg(irq_data);
int node = cpu_to_node(0);
int apic1, pin1, apic2, pin2;
- unsigned long flags;
int no_pin1 = 0;

if (!global_clock_event)
return;

- local_irq_save(flags);
+ local_irq_disable();

/*
* get/set the timer IRQ vector:
@@ -2191,7 +2185,6 @@ static inline void __init check_timer(vo
goto out;
}
panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
- local_irq_disable();
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2215,7 +2208,6 @@ static inline void __init check_timer(vo
/*
* Cleanup, just in case ...
*/
- local_irq_disable();
legacy_pic->mask(0);
clear_IO_APIC_pin(apic2, pin2);
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
@@ -2232,7 +2224,6 @@ static inline void __init check_timer(vo
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
- local_irq_disable();
legacy_pic->mask(0);
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
@@ -2251,7 +2242,6 @@ static inline void __init check_timer(vo
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
- local_irq_disable();
apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
if (apic_is_x2apic_enabled())
apic_printk(APIC_QUIET, KERN_INFO
@@ -2260,7 +2250,7 @@ static inline void __init check_timer(vo
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
"report. Then try booting with the 'noapic' option.\n");
out:
- local_irq_restore(flags);
+ local_irq_enable();
}

/*

2020-12-10 01:42:49

by Juergen Gross

[permalink] [raw]
Subject: Re: [PATCH v2 05/12] x86: rework arch_local_irq_restore() to not use popf

On 09.12.20 15:02, Mark Rutland wrote:
> On Wed, Dec 09, 2020 at 01:27:10PM +0000, Mark Rutland wrote:
>> On Sun, Nov 22, 2020 at 01:44:53PM -0800, Andy Lutomirski wrote:
>>> On Sat, Nov 21, 2020 at 10:55 PM Jürgen Groß <[email protected]> wrote:
>>>> On 20.11.20 12:59, Peter Zijlstra wrote:
>>>>> If someone were to write horrible code like:
>>>>>
>>>>> local_irq_disable();
>>>>> local_irq_save(flags);
>>>>> local_irq_enable();
>>>>> local_irq_restore(flags);
>>>>>
>>>>> we'd be up some creek without a paddle... now I don't _think_ we have
>>>>> genius code like that, but I'd feel saver if we can haz an assertion in
>>>>> there somewhere...
>
>> I was just talking to Peter on IRC about implementing the same thing for
>> arm64, so could we put this in the generic irqflags code? IIUC we can
>> use raw_irqs_disabled() to do the check.
>>
>> As this isn't really entry specific (and IIUC the cases this should
>> catch would break lockdep today), maybe we should add a new
>> DEBUG_IRQFLAGS for this, that DEBUG_LOCKDEP can also select?
>>
>> Something like:
>>
>> #define local_irq_restore(flags) \
>> do { \
>> if (!raw_irqs_disabled_flags(flags)) { \
>> trace_hardirqs_on(); \
>> } else if (IS_ENABLED(CONFIG_DEBUG_IRQFLAGS) { \
>> if (unlikely(raw_irqs_disabled()) \
>
> Whoops; that should be !raw_irqs_disabled().
>
>> warn_bogus_irqrestore(); \
>> } \
>> raw_local_irq_restore(flags); \
>> } while (0)
>>
>> ... perhaps? (ignoring however we deal with once-ness).
>
> If no-one shouts in the next day or two I'll spin this as its own patch.

Fine with me. So I'll just ignore a potential error case in my patch.

Thanks,


Juergen


Attachments:
OpenPGP_0xB0DE9DD628BF132F.asc (3.06 kB)
OpenPGP_signature (505.00 B)
OpenPGP digital signature
Download all attachments

2020-12-10 14:33:31

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 05/12] x86: rework arch_local_irq_restore() to not use popf

On Wed, Dec 09, 2020 at 07:54:26PM +0100, Thomas Gleixner wrote:
> On Wed, Dec 09 2020 at 18:15, Mark Rutland wrote:
> > In arch/x86/kernel/apic/io_apic.c's timer_irq_works() we do:
> >
> > local_irq_save(flags);
> > local_irq_enable();
> >
> > [ trigger an IRQ here ]
> >
> > local_irq_restore(flags);
> >
> > ... and in check_timer() we call that a number of times after either a
> > local_irq_save() or local_irq_disable(), eventually trailing with a
> > local_irq_disable() that will balance things up before calling
> > local_irq_restore().

I gave the patchlet below a spin with my debug patch, and it boots
cleanly for me under QEMU. If you spin it as a real patch, feel free to
add:

Tested-by: Mark Rutland <[email protected]>

Mark.

> ---
> arch/x86/kernel/apic/io_apic.c | 22 ++++++----------------
> 1 file changed, 6 insertions(+), 16 deletions(-)
>
> --- a/arch/x86/kernel/apic/io_apic.c
> +++ b/arch/x86/kernel/apic/io_apic.c
> @@ -1618,21 +1618,16 @@ static void __init delay_without_tsc(voi
> static int __init timer_irq_works(void)
> {
> unsigned long t1 = jiffies;
> - unsigned long flags;
>
> if (no_timer_check)
> return 1;
>
> - local_save_flags(flags);
> local_irq_enable();
> -
> if (boot_cpu_has(X86_FEATURE_TSC))
> delay_with_tsc();
> else
> delay_without_tsc();
>
> - local_irq_restore(flags);
> -
> /*
> * Expect a few ticks at least, to be sure some possible
> * glue logic does not lock up after one or two first
> @@ -1641,10 +1636,10 @@ static int __init timer_irq_works(void)
> * least one tick may be lost due to delays.
> */
>
> - /* jiffies wrap? */
> - if (time_after(jiffies, t1 + 4))
> - return 1;
> - return 0;
> + local_irq_disable();
> +
> + /* Did jiffies advance? */
> + return time_after(jiffies, t1 + 4);
> }
>
> /*
> @@ -2117,13 +2112,12 @@ static inline void __init check_timer(vo
> struct irq_cfg *cfg = irqd_cfg(irq_data);
> int node = cpu_to_node(0);
> int apic1, pin1, apic2, pin2;
> - unsigned long flags;
> int no_pin1 = 0;
>
> if (!global_clock_event)
> return;
>
> - local_irq_save(flags);
> + local_irq_disable();
>
> /*
> * get/set the timer IRQ vector:
> @@ -2191,7 +2185,6 @@ static inline void __init check_timer(vo
> goto out;
> }
> panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
> - local_irq_disable();
> clear_IO_APIC_pin(apic1, pin1);
> if (!no_pin1)
> apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
> @@ -2215,7 +2208,6 @@ static inline void __init check_timer(vo
> /*
> * Cleanup, just in case ...
> */
> - local_irq_disable();
> legacy_pic->mask(0);
> clear_IO_APIC_pin(apic2, pin2);
> apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
> @@ -2232,7 +2224,6 @@ static inline void __init check_timer(vo
> apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
> goto out;
> }
> - local_irq_disable();
> legacy_pic->mask(0);
> apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
> apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
> @@ -2251,7 +2242,6 @@ static inline void __init check_timer(vo
> apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
> goto out;
> }
> - local_irq_disable();
> apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
> if (apic_is_x2apic_enabled())
> apic_printk(APIC_QUIET, KERN_INFO
> @@ -2260,7 +2250,7 @@ static inline void __init check_timer(vo
> panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
> "report. Then try booting with the 'noapic' option.\n");
> out:
> - local_irq_restore(flags);
> + local_irq_enable();
> }
>
> /*

2020-12-10 20:20:23

by Thomas Gleixner

[permalink] [raw]
Subject: x86/ioapic: Cleanup the timer_works() irqflags mess

Mark tripped over the creative irqflags handling in the IO-APIC timer
delivery check which ends up doing:

local_irq_save(flags);
local_irq_enable();
local_irq_restore(flags);

which triggered a new consistency check he's working on required for
replacing the POPF based restore with a conditional STI.

That code is a historical mess and none of this is needed. Make it
straightforward use local_irq_disable()/enable() as that's all what is
required. It is invoked from interrupt enabled code nowadays.

Reported-by: Mark Rutland <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Mark Rutland <[email protected]>
---
arch/x86/kernel/apic/io_apic.c | 22 ++++++----------------
1 file changed, 6 insertions(+), 16 deletions(-)

--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1618,21 +1618,16 @@ static void __init delay_without_tsc(voi
static int __init timer_irq_works(void)
{
unsigned long t1 = jiffies;
- unsigned long flags;

if (no_timer_check)
return 1;

- local_save_flags(flags);
local_irq_enable();
-
if (boot_cpu_has(X86_FEATURE_TSC))
delay_with_tsc();
else
delay_without_tsc();

- local_irq_restore(flags);
-
/*
* Expect a few ticks at least, to be sure some possible
* glue logic does not lock up after one or two first
@@ -1641,10 +1636,10 @@ static int __init timer_irq_works(void)
* least one tick may be lost due to delays.
*/

- /* jiffies wrap? */
- if (time_after(jiffies, t1 + 4))
- return 1;
- return 0;
+ local_irq_disable();
+
+ /* Did jiffies advance? */
+ return time_after(jiffies, t1 + 4);
}

/*
@@ -2117,13 +2112,12 @@ static inline void __init check_timer(vo
struct irq_cfg *cfg = irqd_cfg(irq_data);
int node = cpu_to_node(0);
int apic1, pin1, apic2, pin2;
- unsigned long flags;
int no_pin1 = 0;

if (!global_clock_event)
return;

- local_irq_save(flags);
+ local_irq_disable();

/*
* get/set the timer IRQ vector:
@@ -2191,7 +2185,6 @@ static inline void __init check_timer(vo
goto out;
}
panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
- local_irq_disable();
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2215,7 +2208,6 @@ static inline void __init check_timer(vo
/*
* Cleanup, just in case ...
*/
- local_irq_disable();
legacy_pic->mask(0);
clear_IO_APIC_pin(apic2, pin2);
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
@@ -2232,7 +2224,6 @@ static inline void __init check_timer(vo
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
- local_irq_disable();
legacy_pic->mask(0);
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
@@ -2251,7 +2242,6 @@ static inline void __init check_timer(vo
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
- local_irq_disable();
apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
if (apic_is_x2apic_enabled())
apic_printk(APIC_QUIET, KERN_INFO
@@ -2260,7 +2250,7 @@ static inline void __init check_timer(vo
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
"report. Then try booting with the 'noapic' option.\n");
out:
- local_irq_restore(flags);
+ local_irq_enable();
}

/*

2020-12-11 11:58:49

by Juergen Gross

[permalink] [raw]
Subject: Re: x86/ioapic: Cleanup the timer_works() irqflags mess

On 10.12.20 21:15, Thomas Gleixner wrote:
> Mark tripped over the creative irqflags handling in the IO-APIC timer
> delivery check which ends up doing:
>
> local_irq_save(flags);
> local_irq_enable();
> local_irq_restore(flags);
>
> which triggered a new consistency check he's working on required for
> replacing the POPF based restore with a conditional STI.
>
> That code is a historical mess and none of this is needed. Make it
> straightforward use local_irq_disable()/enable() as that's all what is
> required. It is invoked from interrupt enabled code nowadays.
>
> Reported-by: Mark Rutland <[email protected]>
> Signed-off-by: Thomas Gleixner <[email protected]>
> Tested-by: Mark Rutland <[email protected]>

Reviewed-by: Juergen Gross <[email protected]>


Juergen


Attachments:
OpenPGP_0xB0DE9DD628BF132F.asc (3.06 kB)
OpenPGP_signature (505.00 B)
OpenPGP digital signature
Download all attachments

2020-12-11 22:21:15

by tip-bot2 for Jacob Pan

[permalink] [raw]
Subject: [tip: x86/apic] x86/ioapic: Cleanup the timer_works() irqflags mess

The following commit has been merged into the x86/apic branch of tip:

Commit-ID: 058df195c23403f91acc028e39ca2ad599d0af52
Gitweb: https://git.kernel.org/tip/058df195c23403f91acc028e39ca2ad599d0af52
Author: Thomas Gleixner <[email protected]>
AuthorDate: Thu, 10 Dec 2020 21:15:04 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Thu, 10 Dec 2020 23:02:31 +01:00

x86/ioapic: Cleanup the timer_works() irqflags mess

Mark tripped over the creative irqflags handling in the IO-APIC timer
delivery check which ends up doing:

local_irq_save(flags);
local_irq_enable();
local_irq_restore(flags);

which triggered a new consistency check he's working on required for
replacing the POPF based restore with a conditional STI.

That code is a historical mess and none of this is needed. Make it
straightforward use local_irq_disable()/enable() as that's all what is
required. It is invoked from interrupt enabled code nowadays.

Reported-by: Mark Rutland <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Mark Rutland <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
arch/x86/kernel/apic/io_apic.c | 22 ++++++----------------
1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 089e755..e4ab480 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1620,21 +1620,16 @@ static void __init delay_without_tsc(void)
static int __init timer_irq_works(void)
{
unsigned long t1 = jiffies;
- unsigned long flags;

if (no_timer_check)
return 1;

- local_save_flags(flags);
local_irq_enable();
-
if (boot_cpu_has(X86_FEATURE_TSC))
delay_with_tsc();
else
delay_without_tsc();

- local_irq_restore(flags);
-
/*
* Expect a few ticks at least, to be sure some possible
* glue logic does not lock up after one or two first
@@ -1643,10 +1638,10 @@ static int __init timer_irq_works(void)
* least one tick may be lost due to delays.
*/

- /* jiffies wrap? */
- if (time_after(jiffies, t1 + 4))
- return 1;
- return 0;
+ local_irq_disable();
+
+ /* Did jiffies advance? */
+ return time_after(jiffies, t1 + 4);
}

/*
@@ -2163,13 +2158,12 @@ static inline void __init check_timer(void)
struct irq_cfg *cfg = irqd_cfg(irq_data);
int node = cpu_to_node(0);
int apic1, pin1, apic2, pin2;
- unsigned long flags;
int no_pin1 = 0;

if (!global_clock_event)
return;

- local_irq_save(flags);
+ local_irq_disable();

/*
* get/set the timer IRQ vector:
@@ -2237,7 +2231,6 @@ static inline void __init check_timer(void)
goto out;
}
panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
- local_irq_disable();
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2261,7 +2254,6 @@ static inline void __init check_timer(void)
/*
* Cleanup, just in case ...
*/
- local_irq_disable();
legacy_pic->mask(0);
clear_IO_APIC_pin(apic2, pin2);
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
@@ -2278,7 +2270,6 @@ static inline void __init check_timer(void)
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
- local_irq_disable();
legacy_pic->mask(0);
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
@@ -2297,7 +2288,6 @@ static inline void __init check_timer(void)
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
- local_irq_disable();
apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
if (apic_is_x2apic_enabled())
apic_printk(APIC_QUIET, KERN_INFO
@@ -2306,7 +2296,7 @@ static inline void __init check_timer(void)
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
"report. Then try booting with the 'noapic' option.\n");
out:
- local_irq_restore(flags);
+ local_irq_enable();
}

/*