2023-12-05 13:31:18

by Sven Schnelle

[permalink] [raw]
Subject: [PATCH 0/3] entry: inline syscall enter/exit functions

Hi List,

looking into the performance of syscall entry/exit after s390 switched
to generic entry showed that there's quite some overhead calling some
of the entry/exit work functions even when there's nothing to do.
This patchset moves the entry and exit function to entry-common.h, so
non inlined code gets only called when there is some work pending.

I wrote a small program that just issues invalid syscalls in a loop.
On an s390 machine, this results in the following numbers:

without this series:

# ./syscall 1000000000
runtime: 94.886581s / per-syscall 9.488658e-08s

with this series:

./syscall 1000000000
runtime: 84.732391s / per-syscall 8.473239e-08s

so the time required for one syscall dropped from 94.8ns to
84.7ns, which is a drop of about 11%.

Sven Schnelle (3):
entry: move exit to usermode functions to header file
move enter_from_user_mode() to header file
entry: move syscall_enter_from_user_mode() to header file

include/linux/entry-common.h | 137 ++++++++++++++++++++++++++++++++-
kernel/entry/common.c | 145 ++---------------------------------
2 files changed, 138 insertions(+), 144 deletions(-)

--
2.40.1


2023-12-05 13:31:28

by Sven Schnelle

[permalink] [raw]
Subject: [PATCH 1/3] entry: move exit to usermode functions to header file

To allow inlining, move exit_to_user_mode() and
exit_to_user_mode_loop() to common.h.

Signed-off-by: Sven Schnelle <[email protected]>
---
include/linux/entry-common.h | 95 +++++++++++++++++++++++++++++++++++-
kernel/entry/common.c | 89 +--------------------------------
2 files changed, 96 insertions(+), 88 deletions(-)

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index d95ab85f96ba..f0f1a26dc638 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -7,6 +7,10 @@
#include <linux/syscalls.h>
#include <linux/seccomp.h>
#include <linux/sched.h>
+#include <linux/context_tracking.h>
+#include <linux/livepatch.h>
+#include <linux/resume_user_mode.h>
+#include <linux/tick.h>

#include <asm/entry-common.h>

@@ -258,6 +262,85 @@ static __always_inline void arch_exit_to_user_mode(void) { }
*/
void arch_do_signal_or_restart(struct pt_regs *regs);

+/**
+ * exit_to_user_mode_loop - do any pending work before leaving to user space
+ */
+static __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
+ unsigned long ti_work)
+{
+ /*
+ * Before returning to user space ensure that all pending work
+ * items have been completed.
+ */
+ while (ti_work & EXIT_TO_USER_MODE_WORK) {
+
+ local_irq_enable_exit_to_user(ti_work);
+
+ if (ti_work & _TIF_NEED_RESCHED)
+ schedule();
+
+ if (ti_work & _TIF_UPROBE)
+ uprobe_notify_resume(regs);
+
+ if (ti_work & _TIF_PATCH_PENDING)
+ klp_update_patch_state(current);
+
+ if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
+ arch_do_signal_or_restart(regs);
+
+ if (ti_work & _TIF_NOTIFY_RESUME)
+ resume_user_mode_work(regs);
+
+ /* Architecture specific TIF work */
+ arch_exit_to_user_mode_work(regs, ti_work);
+
+ /*
+ * Disable interrupts and reevaluate the work flags as they
+ * might have changed while interrupts and preemption was
+ * enabled above.
+ */
+ local_irq_disable_exit_to_user();
+
+ /* Check if any of the above work has queued a deferred wakeup */
+ tick_nohz_user_enter_prepare();
+
+ ti_work = read_thread_flags();
+ }
+
+ /* Return the latest work state for arch_exit_to_user_mode() */
+ return ti_work;
+}
+
+/**
+ * exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
+ *
+ * 1) check that interrupts are disabled
+ * 2) call tick_nohz_user_enter_prepare()
+ * 3) call exit_to_user_mode_loop() if any flags from
+ * EXIT_TO_USER_MODE_WORK are set
+ * 4) check that interrupts are still disabled
+ */
+static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
+{
+ unsigned long ti_work;
+
+ lockdep_assert_irqs_disabled();
+
+ /* Flush pending rcuog wakeup before the last need_resched() check */
+ tick_nohz_user_enter_prepare();
+
+ ti_work = read_thread_flags();
+ if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
+ ti_work = exit_to_user_mode_loop(regs, ti_work);
+
+ arch_exit_to_user_mode_prepare(regs, ti_work);
+
+ /* Ensure that kernel state is sane for a return to userspace */
+ kmap_assert_nomap();
+ lockdep_assert_irqs_disabled();
+ lockdep_sys_exit();
+}
+
/**
* exit_to_user_mode - Fixup state when exiting to user mode
*
@@ -276,7 +359,17 @@ void arch_do_signal_or_restart(struct pt_regs *regs);
* non-instrumentable.
* The caller has to invoke syscall_exit_to_user_mode_work() before this.
*/
-void exit_to_user_mode(void);
+static __always_inline void exit_to_user_mode(void)
+{
+ instrumentation_begin();
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare();
+ instrumentation_end();
+
+ user_enter_irqoff();
+ arch_exit_to_user_mode();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+}

/**
* syscall_exit_to_user_mode_work - Handle work before returning to user mode
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index d7ee4bc3f2ba..6ba2bcfbe32c 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -123,94 +123,9 @@ noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
instrumentation_end();
}

-/* See comment for exit_to_user_mode() in entry-common.h */
-static __always_inline void __exit_to_user_mode(void)
-{
- instrumentation_begin();
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- instrumentation_end();
-
- user_enter_irqoff();
- arch_exit_to_user_mode();
- lockdep_hardirqs_on(CALLER_ADDR0);
-}
-
-void noinstr exit_to_user_mode(void)
-{
- __exit_to_user_mode();
-}
-
/* Workaround to allow gradual conversion of architecture code */
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }

-static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
- unsigned long ti_work)
-{
- /*
- * Before returning to user space ensure that all pending work
- * items have been completed.
- */
- while (ti_work & EXIT_TO_USER_MODE_WORK) {
-
- local_irq_enable_exit_to_user(ti_work);
-
- if (ti_work & _TIF_NEED_RESCHED)
- schedule();
-
- if (ti_work & _TIF_UPROBE)
- uprobe_notify_resume(regs);
-
- if (ti_work & _TIF_PATCH_PENDING)
- klp_update_patch_state(current);
-
- if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
- arch_do_signal_or_restart(regs);
-
- if (ti_work & _TIF_NOTIFY_RESUME)
- resume_user_mode_work(regs);
-
- /* Architecture specific TIF work */
- arch_exit_to_user_mode_work(regs, ti_work);
-
- /*
- * Disable interrupts and reevaluate the work flags as they
- * might have changed while interrupts and preemption was
- * enabled above.
- */
- local_irq_disable_exit_to_user();
-
- /* Check if any of the above work has queued a deferred wakeup */
- tick_nohz_user_enter_prepare();
-
- ti_work = read_thread_flags();
- }
-
- /* Return the latest work state for arch_exit_to_user_mode() */
- return ti_work;
-}
-
-static void exit_to_user_mode_prepare(struct pt_regs *regs)
-{
- unsigned long ti_work;
-
- lockdep_assert_irqs_disabled();
-
- /* Flush pending rcuog wakeup before the last need_resched() check */
- tick_nohz_user_enter_prepare();
-
- ti_work = read_thread_flags();
- if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
- ti_work = exit_to_user_mode_loop(regs, ti_work);
-
- arch_exit_to_user_mode_prepare(regs, ti_work);
-
- /* Ensure that kernel state is sane for a return to userspace */
- kmap_assert_nomap();
- lockdep_assert_irqs_disabled();
- lockdep_sys_exit();
-}
-
/*
* If SYSCALL_EMU is set, then the only reason to report is when
* SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
@@ -295,7 +210,7 @@ __visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
instrumentation_begin();
__syscall_exit_to_user_mode_work(regs);
instrumentation_end();
- __exit_to_user_mode();
+ exit_to_user_mode();
}

noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
@@ -308,7 +223,7 @@ noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
instrumentation_begin();
exit_to_user_mode_prepare(regs);
instrumentation_end();
- __exit_to_user_mode();
+ exit_to_user_mode();
}

noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
--
2.40.1

2023-12-06 11:04:32

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 0/3] entry: inline syscall enter/exit functions

On Tue, Dec 05, 2023 at 02:30:12PM +0100, Sven Schnelle wrote:
> Hi List,
>
> looking into the performance of syscall entry/exit after s390 switched
> to generic entry showed that there's quite some overhead calling some
> of the entry/exit work functions even when there's nothing to do.
> This patchset moves the entry and exit function to entry-common.h, so
> non inlined code gets only called when there is some work pending.

So per that logic you wouldn't need to inline exit_to_user_mode_loop()
for example, that's only called when there is a EXIT_TO_USER_MODE_WORK
bit set.

That is, I'm just being pedantic here and pointing out that your
justification doesn't cover the extent of the changes.

> I wrote a small program that just issues invalid syscalls in a loop.
> On an s390 machine, this results in the following numbers:
>
> without this series:
>
> # ./syscall 1000000000
> runtime: 94.886581s / per-syscall 9.488658e-08s
>
> with this series:
>
> ./syscall 1000000000
> runtime: 84.732391s / per-syscall 8.473239e-08s
>
> so the time required for one syscall dropped from 94.8ns to
> 84.7ns, which is a drop of about 11%.

That is obviously very nice, and I don't immediately see anything wrong
with moving the lot to header based inlines.

Thomas?

2023-12-14 08:24:57

by Sven Schnelle

[permalink] [raw]
Subject: Re: [PATCH 0/3] entry: inline syscall enter/exit functions

Peter Zijlstra <[email protected]> writes:

> On Tue, Dec 05, 2023 at 02:30:12PM +0100, Sven Schnelle wrote:
>> Hi List,
>>
>> looking into the performance of syscall entry/exit after s390 switched
>> to generic entry showed that there's quite some overhead calling some
>> of the entry/exit work functions even when there's nothing to do.
>> This patchset moves the entry and exit function to entry-common.h, so
>> non inlined code gets only called when there is some work pending.
>
> So per that logic you wouldn't need to inline exit_to_user_mode_loop()
> for example, that's only called when there is a EXIT_TO_USER_MODE_WORK
> bit set.
>
> That is, I'm just being pedantic here and pointing out that your
> justification doesn't cover the extent of the changes.
>
>> I wrote a small program that just issues invalid syscalls in a loop.
>> On an s390 machine, this results in the following numbers:
>>
>> without this series:
>>
>> # ./syscall 1000000000
>> runtime: 94.886581s / per-syscall 9.488658e-08s
>>
>> with this series:
>>
>> ./syscall 1000000000
>> runtime: 84.732391s / per-syscall 8.473239e-08s
>>
>> so the time required for one syscall dropped from 94.8ns to
>> 84.7ns, which is a drop of about 11%.
>
> That is obviously very nice, and I don't immediately see anything wrong
> with moving the lot to header based inlines.
>
> Thomas?

Thomas, any opinion on this change?

2023-12-15 19:09:39

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/3] entry: move exit to usermode functions to header file

On Tue, Dec 05 2023 at 14:30, Sven Schnelle wrote:
> +/**
> + * exit_to_user_mode_loop - do any pending work before leaving to user space
> + */
> +static __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
> + unsigned long ti_work)
> +{
> + /*
> + * Before returning to user space ensure that all pending work
> + * items have been completed.
> + */
> + while (ti_work & EXIT_TO_USER_MODE_WORK) {
> +
> + local_irq_enable_exit_to_user(ti_work);
> +
> + if (ti_work & _TIF_NEED_RESCHED)
> + schedule();
> +
> + if (ti_work & _TIF_UPROBE)
> + uprobe_notify_resume(regs);
> +
> + if (ti_work & _TIF_PATCH_PENDING)
> + klp_update_patch_state(current);
> +
> + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
> + arch_do_signal_or_restart(regs);
> +
> + if (ti_work & _TIF_NOTIFY_RESUME)
> + resume_user_mode_work(regs);
> +
> + /* Architecture specific TIF work */
> + arch_exit_to_user_mode_work(regs, ti_work);
> +
> + /*
> + * Disable interrupts and reevaluate the work flags as they
> + * might have changed while interrupts and preemption was
> + * enabled above.
> + */
> + local_irq_disable_exit_to_user();
> +
> + /* Check if any of the above work has queued a deferred wakeup */
> + tick_nohz_user_enter_prepare();
> +
> + ti_work = read_thread_flags();
> + }
> +
> + /* Return the latest work state for arch_exit_to_user_mode() */
> + return ti_work;
> +}

I'm not really sure about this part. exit_to_user_mode_loop() is the
slowpath when a TIF work flag is set. I can see the benefit on the
fastpath functions which are way smaller.

Thanks,

tglx


2023-12-15 19:14:28

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 0/3] entry: inline syscall enter/exit functions

On Thu, Dec 14 2023 at 09:24, Sven Schnelle wrote:
> Peter Zijlstra <[email protected]> writes:
>>> so the time required for one syscall dropped from 94.8ns to
>>> 84.7ns, which is a drop of about 11%.
>>
>> That is obviously very nice, and I don't immediately see anything wrong
>> with moving the lot to header based inlines.
>>
>> Thomas?

No objections in principle. Let me look at the lot

2023-12-18 07:48:59

by Sven Schnelle

[permalink] [raw]
Subject: Re: [PATCH 1/3] entry: move exit to usermode functions to header file

Hi Thomas,

Thomas Gleixner <[email protected]> writes:

> On Tue, Dec 05 2023 at 14:30, Sven Schnelle wrote:
>> +/**
>> + * exit_to_user_mode_loop - do any pending work before leaving to user space
>> + */
>> +static __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
>> + unsigned long ti_work)
>> +{
>> + /*
>> + * Before returning to user space ensure that all pending work
>> + * items have been completed.
>> + */
>> + while (ti_work & EXIT_TO_USER_MODE_WORK) {
>> +
>> + local_irq_enable_exit_to_user(ti_work);
>> +
>> + if (ti_work & _TIF_NEED_RESCHED)
>> + schedule();
>> +
>> + if (ti_work & _TIF_UPROBE)
>> + uprobe_notify_resume(regs);
>> +
>> + if (ti_work & _TIF_PATCH_PENDING)
>> + klp_update_patch_state(current);
>> +
>> + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
>> + arch_do_signal_or_restart(regs);
>> +
>> + if (ti_work & _TIF_NOTIFY_RESUME)
>> + resume_user_mode_work(regs);
>> +
>> + /* Architecture specific TIF work */
>> + arch_exit_to_user_mode_work(regs, ti_work);
>> +
>> + /*
>> + * Disable interrupts and reevaluate the work flags as they
>> + * might have changed while interrupts and preemption was
>> + * enabled above.
>> + */
>> + local_irq_disable_exit_to_user();
>> +
>> + /* Check if any of the above work has queued a deferred wakeup */
>> + tick_nohz_user_enter_prepare();
>> +
>> + ti_work = read_thread_flags();
>> + }
>> +
>> + /* Return the latest work state for arch_exit_to_user_mode() */
>> + return ti_work;
>> +}
>
> I'm not really sure about this part. exit_to_user_mode_loop() is the
> slowpath when a TIF work flag is set. I can see the benefit on the
> fastpath functions which are way smaller.

Indeed, the main performance improvement comes from inlining the small
functions. As Peter mentioned the same, i sent out a v2 which doesn't
move exit_to_user_mode_loop().

Thanks!
Sven