2020-07-22 22:12:36

by Thomas Gleixner

[permalink] [raw]
Subject: [patch V5 07/15] x86/entry: Consolidate 32/64 bit syscall entry

From: Thomas Gleixner <[email protected]>

64bit and 32bit entry code have the same open coded syscall entry handling
after the bitwidth specific bits.

Move it to a helper function and share the code.

Signed-off-by: Thomas Gleixner <[email protected]>

---
arch/x86/entry/common.c | 93 +++++++++++++++++++++---------------------------
1 file changed, 41 insertions(+), 52 deletions(-)

--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -366,8 +366,7 @@ static void __syscall_return_slowpath(st
exit_to_user_mode();
}

-#ifdef CONFIG_X86_64
-__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
+static noinstr long syscall_enter(struct pt_regs *regs, unsigned long nr)
{
struct thread_info *ti;

@@ -379,6 +378,16 @@ static void __syscall_return_slowpath(st
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
nr = syscall_trace_enter(regs);

+ instrumentation_end();
+ return nr;
+}
+
+#ifdef CONFIG_X86_64
+__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
+{
+ nr = syscall_enter(regs, nr);
+
+ instrumentation_begin();
if (likely(nr < NR_syscalls)) {
nr = array_index_nospec(nr, NR_syscalls);
regs->ax = sys_call_table[nr](regs);
@@ -390,64 +399,53 @@ static void __syscall_return_slowpath(st
regs->ax = x32_sys_call_table[nr](regs);
#endif
}
- __syscall_return_slowpath(regs);
-
instrumentation_end();
- exit_to_user_mode();
+ syscall_return_slowpath(regs);
}
#endif

#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_IA32_EMULATION))
+ current_thread_info()->status |= TS_COMPAT;
+ /*
+ * Subtlety here: if ptrace pokes something larger than 2^32-1 into
+ * orig_ax, the unsigned int return value truncates it. This may
+ * or may not be necessary, but it matches the old asm behavior.
+ */
+ return syscall_enter(regs, (unsigned int)regs->orig_ax);
+}
+
/*
- * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
- * all entry and exit work and returns with IRQs off. This function is
- * extremely hot in workloads that use it, and it's usually called from
- * do_fast_syscall_32, so forcibly inline it to improve performance.
+ * Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL.
*/
-static void do_syscall_32_irqs_on(struct pt_regs *regs)
+static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
+ unsigned int nr)
{
- struct thread_info *ti = current_thread_info();
- unsigned int nr = (unsigned int)regs->orig_ax;
-
-#ifdef CONFIG_IA32_EMULATION
- ti->status |= TS_COMPAT;
-#endif
-
- if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
- /*
- * Subtlety here: if ptrace pokes something larger than
- * 2^32-1 into orig_ax, this truncates it. This may or
- * may not be necessary, but it matches the old asm
- * behavior.
- */
- nr = syscall_trace_enter(regs);
- }
-
if (likely(nr < IA32_NR_syscalls)) {
+ instrumentation_begin();
nr = array_index_nospec(nr, IA32_NR_syscalls);
regs->ax = ia32_sys_call_table[nr](regs);
+ instrumentation_end();
}
-
- __syscall_return_slowpath(regs);
}

/* Handles int $0x80 */
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
- enter_from_user_mode(regs);
- instrumentation_begin();
-
- local_irq_enable();
- do_syscall_32_irqs_on(regs);
+ unsigned int nr = syscall_32_enter(regs);

- instrumentation_end();
- exit_to_user_mode();
+ do_syscall_32_irqs_on(regs, nr);
+ syscall_return_slowpath(regs);
}

-static bool __do_fast_syscall_32(struct pt_regs *regs)
+static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
{
+ unsigned int nr = syscall_32_enter(regs);
int res;

+ instrumentation_begin();
/* Fetch EBP from where the vDSO stashed it. */
if (IS_ENABLED(CONFIG_X86_64)) {
/*
@@ -460,17 +458,18 @@ static bool __do_fast_syscall_32(struct
res = get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp);
}
+ instrumentation_end();

if (res) {
/* User code screwed up. */
regs->ax = -EFAULT;
- local_irq_disable();
- __prepare_exit_to_usermode(regs);
+ syscall_return_slowpath(regs);
return false;
}

/* Now this is just like a normal syscall. */
- do_syscall_32_irqs_on(regs);
+ do_syscall_32_irqs_on(regs, nr);
+ syscall_return_slowpath(regs);
return true;
}

@@ -483,7 +482,6 @@ static bool __do_fast_syscall_32(struct
*/
unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
vdso_image_32.sym_int80_landing_pad;
- bool success;

/*
* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
@@ -492,17 +490,8 @@ static bool __do_fast_syscall_32(struct
*/
regs->ip = landing_pad;

- enter_from_user_mode(regs);
- instrumentation_begin();
-
- local_irq_enable();
- success = __do_fast_syscall_32(regs);
-
- instrumentation_end();
- exit_to_user_mode();
-
- /* If it failed, keep it simple: use IRET. */
- if (!success)
+ /* Invoke the syscall. If it failed, keep it simple: use IRET. */
+ if (!__do_fast_syscall_32(regs))
return 0;

#ifdef CONFIG_X86_64




Subject: [tip: x86/entry] x86/entry: Consolidate 32/64 bit syscall entry

The following commit has been merged into the x86/entry branch of tip:

Commit-ID: 0b085e68f4072024ecaa3889aeeaab5f6c8eba5c
Gitweb: https://git.kernel.org/tip/0b085e68f4072024ecaa3889aeeaab5f6c8eba5c
Author: Thomas Gleixner <[email protected]>
AuthorDate: Thu, 23 Jul 2020 00:00:01 +02:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Fri, 24 Jul 2020 15:04:58 +02:00

x86/entry: Consolidate 32/64 bit syscall entry

64bit and 32bit entry code have the same open coded syscall entry handling
after the bitwidth specific bits.

Move it to a helper function and share the code.

Signed-off-by: Thomas Gleixner <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]


---
arch/x86/entry/common.c | 93 +++++++++++++++++-----------------------
1 file changed, 41 insertions(+), 52 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ab6cb86..68d5c86 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -366,8 +366,7 @@ __visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
exit_to_user_mode();
}

-#ifdef CONFIG_X86_64
-__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
+static noinstr long syscall_enter(struct pt_regs *regs, unsigned long nr)
{
struct thread_info *ti;

@@ -379,6 +378,16 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
nr = syscall_trace_enter(regs);

+ instrumentation_end();
+ return nr;
+}
+
+#ifdef CONFIG_X86_64
+__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
+{
+ nr = syscall_enter(regs, nr);
+
+ instrumentation_begin();
if (likely(nr < NR_syscalls)) {
nr = array_index_nospec(nr, NR_syscalls);
regs->ax = sys_call_table[nr](regs);
@@ -390,64 +399,53 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
regs->ax = x32_sys_call_table[nr](regs);
#endif
}
- __syscall_return_slowpath(regs);
-
instrumentation_end();
- exit_to_user_mode();
+ syscall_return_slowpath(regs);
}
#endif

#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_IA32_EMULATION))
+ current_thread_info()->status |= TS_COMPAT;
+ /*
+ * Subtlety here: if ptrace pokes something larger than 2^32-1 into
+ * orig_ax, the unsigned int return value truncates it. This may
+ * or may not be necessary, but it matches the old asm behavior.
+ */
+ return syscall_enter(regs, (unsigned int)regs->orig_ax);
+}
+
/*
- * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
- * all entry and exit work and returns with IRQs off. This function is
- * extremely hot in workloads that use it, and it's usually called from
- * do_fast_syscall_32, so forcibly inline it to improve performance.
+ * Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL.
*/
-static void do_syscall_32_irqs_on(struct pt_regs *regs)
+static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
+ unsigned int nr)
{
- struct thread_info *ti = current_thread_info();
- unsigned int nr = (unsigned int)regs->orig_ax;
-
-#ifdef CONFIG_IA32_EMULATION
- ti->status |= TS_COMPAT;
-#endif
-
- if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
- /*
- * Subtlety here: if ptrace pokes something larger than
- * 2^32-1 into orig_ax, this truncates it. This may or
- * may not be necessary, but it matches the old asm
- * behavior.
- */
- nr = syscall_trace_enter(regs);
- }
-
if (likely(nr < IA32_NR_syscalls)) {
+ instrumentation_begin();
nr = array_index_nospec(nr, IA32_NR_syscalls);
regs->ax = ia32_sys_call_table[nr](regs);
+ instrumentation_end();
}
-
- __syscall_return_slowpath(regs);
}

/* Handles int $0x80 */
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
- enter_from_user_mode(regs);
- instrumentation_begin();
+ unsigned int nr = syscall_32_enter(regs);

- local_irq_enable();
- do_syscall_32_irqs_on(regs);
-
- instrumentation_end();
- exit_to_user_mode();
+ do_syscall_32_irqs_on(regs, nr);
+ syscall_return_slowpath(regs);
}

-static bool __do_fast_syscall_32(struct pt_regs *regs)
+static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
{
+ unsigned int nr = syscall_32_enter(regs);
int res;

+ instrumentation_begin();
/* Fetch EBP from where the vDSO stashed it. */
if (IS_ENABLED(CONFIG_X86_64)) {
/*
@@ -460,17 +458,18 @@ static bool __do_fast_syscall_32(struct pt_regs *regs)
res = get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp);
}
+ instrumentation_end();

if (res) {
/* User code screwed up. */
regs->ax = -EFAULT;
- local_irq_disable();
- __prepare_exit_to_usermode(regs);
+ syscall_return_slowpath(regs);
return false;
}

/* Now this is just like a normal syscall. */
- do_syscall_32_irqs_on(regs);
+ do_syscall_32_irqs_on(regs, nr);
+ syscall_return_slowpath(regs);
return true;
}

@@ -483,7 +482,6 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
*/
unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
vdso_image_32.sym_int80_landing_pad;
- bool success;

/*
* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
@@ -492,17 +490,8 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
*/
regs->ip = landing_pad;

- enter_from_user_mode(regs);
- instrumentation_begin();
-
- local_irq_enable();
- success = __do_fast_syscall_32(regs);
-
- instrumentation_end();
- exit_to_user_mode();
-
- /* If it failed, keep it simple: use IRET. */
- if (!success)
+ /* Invoke the syscall. If it failed, keep it simple: use IRET. */
+ if (!__do_fast_syscall_32(regs))
return 0;

#ifdef CONFIG_X86_64

2020-07-26 18:35:02

by Brian Gerst

[permalink] [raw]
Subject: Re: [tip: x86/entry] x86/entry: Consolidate 32/64 bit syscall entry

On Fri, Jul 24, 2020 at 4:14 PM tip-bot2 for Thomas Gleixner
<[email protected]> wrote:
>
> The following commit has been merged into the x86/entry branch of tip:
>
> Commit-ID: 0b085e68f4072024ecaa3889aeeaab5f6c8eba5c
> Gitweb: https://git.kernel.org/tip/0b085e68f4072024ecaa3889aeeaab5f6c8eba5c
> Author: Thomas Gleixner <[email protected]>
> AuthorDate: Thu, 23 Jul 2020 00:00:01 +02:00
> Committer: Thomas Gleixner <[email protected]>
> CommitterDate: Fri, 24 Jul 2020 15:04:58 +02:00
>
> x86/entry: Consolidate 32/64 bit syscall entry
>
> 64bit and 32bit entry code have the same open coded syscall entry handling
> after the bitwidth specific bits.
>
> Move it to a helper function and share the code.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> Link: https://lkml.kernel.org/r/[email protected]
>
>
> ---
> arch/x86/entry/common.c | 93 +++++++++++++++++-----------------------
> 1 file changed, 41 insertions(+), 52 deletions(-)
>
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index ab6cb86..68d5c86 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -366,8 +366,7 @@ __visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
> exit_to_user_mode();
> }
>
> -#ifdef CONFIG_X86_64
> -__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
> +static noinstr long syscall_enter(struct pt_regs *regs, unsigned long nr)
> {
> struct thread_info *ti;
>
> @@ -379,6 +378,16 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
> if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
> nr = syscall_trace_enter(regs);
>
> + instrumentation_end();
> + return nr;
> +}
> +
> +#ifdef CONFIG_X86_64
> +__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
> +{
> + nr = syscall_enter(regs, nr);
> +
> + instrumentation_begin();
> if (likely(nr < NR_syscalls)) {
> nr = array_index_nospec(nr, NR_syscalls);
> regs->ax = sys_call_table[nr](regs);
> @@ -390,64 +399,53 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
> regs->ax = x32_sys_call_table[nr](regs);
> #endif
> }
> - __syscall_return_slowpath(regs);
> -
> instrumentation_end();
> - exit_to_user_mode();
> + syscall_return_slowpath(regs);
> }
> #endif
>
> #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
> +static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
> +{
> + if (IS_ENABLED(CONFIG_IA32_EMULATION))
> + current_thread_info()->status |= TS_COMPAT;
> + /*
> + * Subtlety here: if ptrace pokes something larger than 2^32-1 into
> + * orig_ax, the unsigned int return value truncates it. This may
> + * or may not be necessary, but it matches the old asm behavior.
> + */
> + return syscall_enter(regs, (unsigned int)regs->orig_ax);
> +}
> +
> /*
> - * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
> - * all entry and exit work and returns with IRQs off. This function is
> - * extremely hot in workloads that use it, and it's usually called from
> - * do_fast_syscall_32, so forcibly inline it to improve performance.
> + * Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL.
> */
> -static void do_syscall_32_irqs_on(struct pt_regs *regs)
> +static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
> + unsigned int nr)
> {
> - struct thread_info *ti = current_thread_info();
> - unsigned int nr = (unsigned int)regs->orig_ax;
> -
> -#ifdef CONFIG_IA32_EMULATION
> - ti->status |= TS_COMPAT;
> -#endif
> -
> - if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
> - /*
> - * Subtlety here: if ptrace pokes something larger than
> - * 2^32-1 into orig_ax, this truncates it. This may or
> - * may not be necessary, but it matches the old asm
> - * behavior.
> - */
> - nr = syscall_trace_enter(regs);
> - }
> -
> if (likely(nr < IA32_NR_syscalls)) {
> + instrumentation_begin();
> nr = array_index_nospec(nr, IA32_NR_syscalls);
> regs->ax = ia32_sys_call_table[nr](regs);
> + instrumentation_end();
> }
> -
> - __syscall_return_slowpath(regs);
> }
>
> /* Handles int $0x80 */
> __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
> {
> - enter_from_user_mode(regs);
> - instrumentation_begin();
> + unsigned int nr = syscall_32_enter(regs);
>
> - local_irq_enable();
> - do_syscall_32_irqs_on(regs);
> -
> - instrumentation_end();
> - exit_to_user_mode();
> + do_syscall_32_irqs_on(regs, nr);
> + syscall_return_slowpath(regs);
> }
>
> -static bool __do_fast_syscall_32(struct pt_regs *regs)
> +static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)

Can __do_fast_syscall_32() be merged back into do_fast_syscall_32()
now that both are marked noinstr?

--
Brian Gerst

2020-07-27 13:56:16

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [tip: x86/entry] x86/entry: Consolidate 32/64 bit syscall entry

Brian Gerst <[email protected]> writes:
> On Fri, Jul 24, 2020 at 4:14 PM tip-bot2 for Thomas Gleixner
>>
>> -static bool __do_fast_syscall_32(struct pt_regs *regs)
>> +static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
>
> Can __do_fast_syscall_32() be merged back into do_fast_syscall_32()
> now that both are marked noinstr?

It could.