2023-03-22 04:01:03

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2 00/11] static_call: Improve NULL/ret0 handling

Peter pointed out that v1 had CFI violations on arm64 with
CONFIG_CFI_CLANG. Then I realized there are already CFI violations
today in the existing code.

So this ended up turning into a complete rewrite of v1.

Highlights include:

- Several cleanups
- Fix arm64 CFI violations
- Make NULL pointer behavior consistent across configs
- Merge NULL and RET0 into a single concept


v1 can be found here:

https://lkml.kernel.org/lkml/[email protected]

Josh Poimboeuf (11):
static_call: Improve key type abstraction
static_call: Flip key type union bit
static_call: Remove static_call_mod_init() declaration
static_call: Remove static_call.h dependency on cpu.h
static_call: Make ARCH_ADD_TRAMP_KEY() generic
static_call: "EXPORT_STATIC_CALL_TRAMP" -> "EXPORT_STATIC_CALL_RO"
static_call: Reorganize static call headers
arm64/static_call: Fix static call CFI violations
static_call: Make NULL static calls consistent
static_call: Remove static_call_cond()
static_call: Remove DEFINE_STATIC_CALL_RET0()

arch/Kconfig | 4 +
arch/arm/include/asm/paravirt.h | 2 +-
arch/arm64/include/asm/paravirt.h | 2 +-
arch/arm64/include/asm/static_call.h | 29 ++
arch/powerpc/include/asm/static_call.h | 1 -
arch/powerpc/kernel/irq.c | 2 +-
arch/powerpc/kernel/static_call.c | 6 +-
arch/x86/events/amd/brs.c | 2 +-
arch/x86/events/amd/core.c | 2 +-
arch/x86/events/core.c | 29 +-
arch/x86/include/asm/kvm-x86-ops.h | 6 +-
arch/x86/include/asm/kvm-x86-pmu-ops.h | 3 +-
arch/x86/include/asm/kvm_host.h | 4 +-
arch/x86/include/asm/paravirt.h | 2 +-
arch/x86/include/asm/perf_event.h | 2 +-
arch/x86/include/asm/preempt.h | 6 +-
arch/x86/include/asm/static_call.h | 22 +-
arch/x86/kernel/alternative.c | 6 -
arch/x86/kernel/paravirt.c | 1 +
arch/x86/kernel/static_call.c | 89 +-----
arch/x86/kvm/irq.c | 2 +-
arch/x86/kvm/lapic.c | 22 +-
arch/x86/kvm/pmu.c | 4 +-
arch/x86/kvm/x86.c | 28 +-
block/bio.c | 1 +
include/linux/entry-common.h | 2 +-
include/linux/entry-kvm.h | 2 +-
include/linux/kernel.h | 4 +-
include/linux/module.h | 2 +-
include/linux/sched.h | 2 +-
include/linux/static_call.h | 369 ++++++++++------------
include/linux/static_call_types.h | 74 +----
kernel/Makefile | 2 +-
kernel/cgroup/cgroup.c | 1 +
kernel/events/core.c | 15 +-
kernel/sched/core.c | 18 +-
kernel/static_call.c | 13 +
kernel/static_call_inline.c | 64 +++-
security/keys/trusted-keys/trusted_core.c | 2 +-
sound/soc/intel/avs/trace.c | 1 +
tools/include/linux/static_call_types.h | 74 +----
41 files changed, 369 insertions(+), 553 deletions(-)
create mode 100644 arch/arm64/include/asm/static_call.h

--
2.39.2


2023-03-22 04:01:08

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2 01/11] static_call: Improve key type abstraction

Make the static_call_key union less fragile by abstracting all knowledge
about the type bit into helper functions.

Signed-off-by: Josh Poimboeuf <[email protected]>
---
include/linux/static_call_types.h | 4 +-
kernel/static_call_inline.c | 51 +++++++++++++++++--------
tools/include/linux/static_call_types.h | 4 +-
3 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h
index 5a00b8b2cf9f..87c3598609e8 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -63,8 +63,8 @@ struct static_call_key {
union {
/* bit 0: 0 = mods, 1 = sites */
unsigned long type;
- struct static_call_mod *mods;
- struct static_call_site *sites;
+ struct static_call_mod *_mods;
+ struct static_call_site *_sites;
};
};

diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c
index 639397b5491c..41f6bda6773a 100644
--- a/kernel/static_call_inline.c
+++ b/kernel/static_call_inline.c
@@ -112,15 +112,21 @@ static inline void static_call_sort_entries(struct static_call_site *start,

static inline bool static_call_key_has_mods(struct static_call_key *key)
{
- return !(key->type & 1);
+ return !!(key->type & 1);
}

-static inline struct static_call_mod *static_call_key_next(struct static_call_key *key)
+static inline struct static_call_mod *static_call_key_mods(struct static_call_key *key)
{
if (!static_call_key_has_mods(key))
return NULL;

- return key->mods;
+ return (struct static_call_mod *)(key->type & ~1);
+}
+
+static inline void static_call_key_set_mods(struct static_call_key *key, struct static_call_mod *mods)
+{
+ key->_mods = mods;
+ key->type |= 1;
}

static inline struct static_call_site *static_call_key_sites(struct static_call_key *key)
@@ -128,7 +134,12 @@ static inline struct static_call_site *static_call_key_sites(struct static_call_
if (static_call_key_has_mods(key))
return NULL;

- return (struct static_call_site *)(key->type & ~1);
+ return key->_sites;
+}
+
+static inline void static_call_key_set_sites(struct static_call_key *key, struct static_call_site *sites)
+{
+ key->_sites = sites;
}

void __static_call_update(struct static_call_key *key, void *tramp, void *func)
@@ -154,7 +165,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func)
goto done;

first = (struct static_call_mod){
- .next = static_call_key_next(key),
+ .next = static_call_key_mods(key),
.mod = NULL,
.sites = static_call_key_sites(key),
};
@@ -250,8 +261,7 @@ static int __static_call_init(struct module *mod,
* static_call_init() before memory allocation works.
*/
if (!mod) {
- key->sites = site;
- key->type |= 1;
+ static_call_key_set_sites(key, site);
goto do_transform;
}

@@ -266,10 +276,10 @@ static int __static_call_init(struct module *mod,
*/
if (static_call_key_sites(key)) {
site_mod->mod = NULL;
- site_mod->next = NULL;
site_mod->sites = static_call_key_sites(key);
+ site_mod->next = NULL;

- key->mods = site_mod;
+ static_call_key_set_mods(key, site_mod);

site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
if (!site_mod)
@@ -278,8 +288,9 @@ static int __static_call_init(struct module *mod,

site_mod->mod = mod;
site_mod->sites = site;
- site_mod->next = static_call_key_next(key);
- key->mods = site_mod;
+ site_mod->next = static_call_key_mods(key);
+
+ static_call_key_set_mods(key, site_mod);
}

do_transform:
@@ -406,7 +417,7 @@ static void static_call_del_module(struct module *mod)
struct static_call_site *stop = mod->static_call_sites +
mod->num_static_call_sites;
struct static_call_key *key, *prev_key = NULL;
- struct static_call_mod *site_mod, **prev;
+ struct static_call_mod *site_mod, *prev;
struct static_call_site *site;

for (site = start; site < stop; site++) {
@@ -416,15 +427,25 @@ static void static_call_del_module(struct module *mod)

prev_key = key;

- for (prev = &key->mods, site_mod = key->mods;
+ site_mod = static_call_key_mods(key);
+ if (!site_mod)
+ continue;
+
+ if (site_mod->mod == mod) {
+ static_call_key_set_mods(key, site_mod->next);
+ kfree(site_mod);
+ continue;
+ }
+
+ for (prev = site_mod, site_mod = site_mod->next;
site_mod && site_mod->mod != mod;
- prev = &site_mod->next, site_mod = site_mod->next)
+ prev = site_mod, site_mod = site_mod->next)
;

if (!site_mod)
continue;

- *prev = site_mod->next;
+ prev->next = site_mod->next;
kfree(site_mod);
}
}
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index 5a00b8b2cf9f..87c3598609e8 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -63,8 +63,8 @@ struct static_call_key {
union {
/* bit 0: 0 = mods, 1 = sites */
unsigned long type;
- struct static_call_mod *mods;
- struct static_call_site *sites;
+ struct static_call_mod *_mods;
+ struct static_call_site *_sites;
};
};

--
2.39.2

2023-03-22 04:01:11

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2 02/11] static_call: Flip key type union bit

Flip the meaning of the key->type union field. This will make it easier
to converge some of the DECLARE_STATIC_CALL() macros.

Signed-off-by: Josh Poimboeuf <[email protected]>
---
include/linux/static_call.h | 3 ---
include/linux/static_call_types.h | 4 ++--
tools/include/linux/static_call_types.h | 4 ++--
3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 141e6b176a1b..f984b8f6d974 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -186,7 +186,6 @@ extern long __static_call_return0(void);
DECLARE_STATIC_CALL(name, _func); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = _func, \
- .type = 1, \
}; \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)

@@ -194,7 +193,6 @@ extern long __static_call_return0(void);
DECLARE_STATIC_CALL(name, _func); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = NULL, \
- .type = 1, \
}; \
ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)

@@ -202,7 +200,6 @@ extern long __static_call_return0(void);
DECLARE_STATIC_CALL(name, _func); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = __static_call_return0, \
- .type = 1, \
}; \
ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)

diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h
index 87c3598609e8..c4c4efb6f6fa 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -61,10 +61,10 @@ struct static_call_site {
struct static_call_key {
void *func;
union {
- /* bit 0: 0 = mods, 1 = sites */
+ /* bit 0: 0 = sites, 1 = mods */
unsigned long type;
- struct static_call_mod *_mods;
struct static_call_site *_sites;
+ struct static_call_mod *_mods;
};
};

diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index 87c3598609e8..c4c4efb6f6fa 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -61,10 +61,10 @@ struct static_call_site {
struct static_call_key {
void *func;
union {
- /* bit 0: 0 = mods, 1 = sites */
+ /* bit 0: 0 = sites, 1 = mods */
unsigned long type;
- struct static_call_mod *_mods;
struct static_call_site *_sites;
+ struct static_call_mod *_mods;
};
};

--
2.39.2

2023-03-22 04:01:15

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2 03/11] static_call: Remove static_call_mod_init() declaration

This function doesn't exist (and never did).

Signed-off-by: Josh Poimboeuf <[email protected]>
---
include/linux/static_call.h | 1 -
1 file changed, 1 deletion(-)

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index f984b8f6d974..890ddc0c3190 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -177,7 +177,6 @@ struct static_call_tramp_key {
};

extern void __static_call_update(struct static_call_key *key, void *tramp, void *func);
-extern int static_call_mod_init(struct module *mod);
extern int static_call_text_reserved(void *start, void *end);

extern long __static_call_return0(void);
--
2.39.2

2023-03-22 04:01:25

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2 05/11] static_call: Make ARCH_ADD_TRAMP_KEY() generic

There's nothing arch-specific about ARCH_ADD_TRAMP_KEY(). Move it to
the generic static_call.h.

Signed-off-by: Josh Poimboeuf <[email protected]>
---
arch/x86/include/asm/static_call.h | 6 ------
include/linux/static_call.h | 11 +++++++++--
2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 343b722ccaf2..52abbdfd6106 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -57,12 +57,6 @@
#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)

-#define ARCH_ADD_TRAMP_KEY(name) \
- asm(".pushsection .static_call_tramp_key, \"a\" \n" \
- ".long " STATIC_CALL_TRAMP_STR(name) " - . \n" \
- ".long " STATIC_CALL_KEY_STR(name) " - . \n" \
- ".popsection \n")
-
extern bool __static_call_fixup(void *tramp, u8 op, void *dest);

#endif /* _ASM_STATIC_CALL_H */
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index abce40166039..013022a8611d 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -213,10 +213,17 @@ extern long __static_call_return0(void);
/* Leave the key unexported, so modules can't change static call targets: */
#define EXPORT_STATIC_CALL_TRAMP(name) \
EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)); \
- ARCH_ADD_TRAMP_KEY(name)
+ __STATIC_CALL_ADD_TRAMP_KEY(name)
#define EXPORT_STATIC_CALL_TRAMP_GPL(name) \
EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name)); \
- ARCH_ADD_TRAMP_KEY(name)
+ __STATIC_CALL_ADD_TRAMP_KEY(name)
+
+/* Unexported key lookup table */
+#define __STATIC_CALL_ADD_TRAMP_KEY(name) \
+ asm(".pushsection .static_call_tramp_key, \"a\" \n" \
+ ".long " STATIC_CALL_TRAMP_STR(name) " - . \n" \
+ ".long " STATIC_CALL_KEY_STR(name) " - . \n" \
+ ".popsection \n")

#elif defined(CONFIG_HAVE_STATIC_CALL)

--
2.39.2

2023-03-22 04:01:28

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2 04/11] static_call: Remove static_call.h dependency on cpu.h

Uninline __static_call_update() to remove static_call.h's dependency on
cpu.h. This will make it much easier to include static_call.h in common
header files like <linux/kernel.h>.

Signed-off-by: Josh Poimboeuf <[email protected]>
---
block/bio.c | 1 +
include/linux/static_call.h | 10 +---------
kernel/cgroup/cgroup.c | 1 +
kernel/static_call.c | 12 ++++++++++++
sound/soc/intel/avs/trace.c | 1 +
5 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index fd11614bba4d..a2ca0680fd18 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -19,6 +19,7 @@
#include <linux/sched/sysctl.h>
#include <linux/blk-crypto.h>
#include <linux/xarray.h>
+#include <linux/cpu.h>

#include <trace/events/block.h>
#include "blk.h"
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 890ddc0c3190..abce40166039 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -132,7 +132,6 @@
*/

#include <linux/types.h>
-#include <linux/cpu.h>
#include <linux/static_call_types.h>

#ifdef CONFIG_HAVE_STATIC_CALL
@@ -246,14 +245,7 @@ static inline int static_call_init(void) { return 0; }

#define static_call_cond(name) (void)__static_call(name)

-static inline
-void __static_call_update(struct static_call_key *key, void *tramp, void *func)
-{
- cpus_read_lock();
- WRITE_ONCE(key->func, func);
- arch_static_call_transform(NULL, tramp, func, false);
- cpus_read_unlock();
-}
+extern void __static_call_update(struct static_call_key *key, void *tramp, void *func);

static inline int static_call_text_reserved(void *start, void *end)
{
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 935e8121b21e..4f29f509d9ce 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -58,6 +58,7 @@
#include <linux/fs_parser.h>
#include <linux/sched/cputime.h>
#include <linux/psi.h>
+#include <linux/cpu.h>
#include <net/sock.h>

#define CREATE_TRACE_POINTS
diff --git a/kernel/static_call.c b/kernel/static_call.c
index e9c3e69f3837..63486995fd82 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -1,8 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/static_call.h>
+#include <linux/cpu.h>

long __static_call_return0(void)
{
return 0;
}
EXPORT_SYMBOL_GPL(__static_call_return0);
+
+#ifndef CONFIG_HAVE_STATIC_CALL_INLINE
+void __static_call_update(struct static_call_key *key, void *tramp, void *func)
+{
+ cpus_read_lock();
+ WRITE_ONCE(key->func, func);
+ arch_static_call_transform(NULL, tramp, func, false);
+ cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(__static_call_update);
+#endif
diff --git a/sound/soc/intel/avs/trace.c b/sound/soc/intel/avs/trace.c
index c63eea909b5e..b033b560e6d2 100644
--- a/sound/soc/intel/avs/trace.c
+++ b/sound/soc/intel/avs/trace.c
@@ -7,6 +7,7 @@
//

#include <linux/types.h>
+#include <asm/page.h>

#define CREATE_TRACE_POINTS
#include "trace.h"
--
2.39.2

2023-03-22 04:01:31

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2 08/11] arm64/static_call: Fix static call CFI violations

On arm64, with CONFIG_CFI_CLANG, it's trivial to trigger CFI violations
by running "perf record -e sched:sched_switch -a":

CFI failure at perf_misc_flags+0x34/0x70 (target: __static_call_return0+0x0/0xc; expected type: 0x837de525)
WARNING: CPU: 3 PID: 32 at perf_misc_flags+0x34/0x70
CPU: 3 PID: 32 Comm: ksoftirqd/3 Kdump: loaded Tainted: P 6.3.0-rc2 #8
Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
pstate: 904000c5 (NzcV daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : perf_misc_flags+0x34/0x70
lr : perf_event_output_forward+0x74/0xf0
sp : ffff80000a98b970
x29: ffff80000a98b970 x28: ffff00077bd34d00 x27: ffff8000097d2d00
x26: fffffbffeff6a360 x25: ffff800009835a30 x24: ffff0000c2e8dca0
x23: 0000000000000000 x22: 0000000000000080 x21: ffff00077bd31610
x20: ffff0000c2e8dca0 x19: ffff00077bd31610 x18: ffff800008cd52f0
x17: 00000000837de525 x16: 0000000072923c8f x15: 000000000000b67e
x14: 000000000178797d x13: 0000000000000004 x12: 0000000070b5b3a8
x11: 0000000000000015 x10: 0000000000000048 x9 : ffff80000829e2b4
x8 : ffff80000829c6f0 x7 : 0000000000000000 x6 : 0000000000000000
x5 : fffffbffeff6a340 x4 : ffff00077bd31610 x3 : ffff00077bd31610
x2 : ffff800009833400 x1 : 0000000000000000 x0 : ffff00077bd31610
Call trace:
perf_misc_flags+0x34/0x70
perf_event_output_forward+0x74/0xf0
__perf_event_overflow+0x12c/0x1e8
perf_swevent_event+0x98/0x1a0
perf_tp_event+0x140/0x558
perf_trace_run_bpf_submit+0x88/0xc8
perf_trace_sched_switch+0x160/0x19c
__schedule+0xabc/0x153c
dynamic_cond_resched+0x48/0x68
run_ksoftirqd+0x3c/0x138
smpboot_thread_fn+0x26c/0x2f8
kthread+0x108/0x1c4
ret_from_fork+0x10/0x20

The problem is that the __perf_guest_state() static call does an
indirect branch to __static_call_return0(), which isn't CFI-compliant.

Fix that by generating custom CFI-compliant ret0 functions for each
defined static key.

Signed-off-by: Josh Poimboeuf <[email protected]>
---
arch/Kconfig | 4 ++
arch/arm64/include/asm/static_call.h | 29 +++++++++++
include/linux/static_call.h | 64 +++++++++++++++++++++----
include/linux/static_call_types.h | 4 ++
kernel/Makefile | 2 +-
kernel/static_call.c | 2 +-
tools/include/linux/static_call_types.h | 4 ++
7 files changed, 97 insertions(+), 12 deletions(-)
create mode 100644 arch/arm64/include/asm/static_call.h

diff --git a/arch/Kconfig b/arch/Kconfig
index e3511afbb7f2..8800fe80a0f9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1348,6 +1348,10 @@ config HAVE_STATIC_CALL_INLINE
depends on HAVE_STATIC_CALL
select OBJTOOL

+config CFI_WITHOUT_STATIC_CALL
+ def_bool y
+ depends on CFI_CLANG && !HAVE_STATIC_CALL
+
config HAVE_PREEMPT_DYNAMIC
bool

diff --git a/arch/arm64/include/asm/static_call.h b/arch/arm64/include/asm/static_call.h
new file mode 100644
index 000000000000..b3489cac7742
--- /dev/null
+++ b/arch/arm64/include/asm/static_call.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_STATIC_CALL_H
+#define _ASM_ARM64_STATIC_CALL_H
+
+/*
+ * Make a dummy reference to a function pointer in C to force the compiler to
+ * emit a __kcfi_typeid_ symbol for asm to use.
+ */
+#define GEN_CFI_SYM(func) \
+ static typeof(func) __used __section(".discard.cfi") *__UNIQUE_ID(cfi) = func
+
+
+/* Generate a CFI-compliant static call NOP function */
+#define __ARCH_DEFINE_STATIC_CALL_CFI(name, insns) \
+ asm(".align 4 \n" \
+ ".word __kcfi_typeid_" name " \n" \
+ ".globl " name " \n" \
+ name ": \n" \
+ "bti c \n" \
+ insns " \n" \
+ "ret \n" \
+ ".type " name ", @function \n" \
+ ".size " name ", . - " name " \n")
+
+#define __ARCH_DEFINE_STATIC_CALL_RET0_CFI(name) \
+ GEN_CFI_SYM(STATIC_CALL_RET0_CFI(name)); \
+ __ARCH_DEFINE_STATIC_CALL_CFI(STATIC_CALL_RET0_CFI_STR(name), "mov x0, xzr")
+
+#endif /* _ASM_ARM64_STATIC_CALL_H */
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 650bda9a3367..50ad928afeb8 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -147,15 +147,19 @@ struct static_call_key {
#endif
};

+extern long __static_call_return0(void);
+
#define DECLARE_STATIC_CALL(name, func) \
extern struct static_call_key STATIC_CALL_KEY(name); \
- extern typeof(func) STATIC_CALL_TRAMP(name);
+ extern typeof(func) STATIC_CALL_TRAMP(name); \
+ __DECLARE_STATIC_CALL_CFI(name, func)

#define __DEFINE_STATIC_CALL(name, type, _func) \
DECLARE_STATIC_CALL(name, type); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = _func, \
- }
+ }; \
+ __DEFINE_STATIC_CALL_CFI(name)

#define DEFINE_STATIC_CALL(name, func) \
__DEFINE_STATIC_CALL(name, func, func); \
@@ -166,15 +170,18 @@ struct static_call_key {
__DEFINE_STATIC_CALL_NULL_TRAMP(name)

#define DEFINE_STATIC_CALL_RET0(name, type) \
- __DEFINE_STATIC_CALL(name, type, __static_call_return0); \
+ __DEFINE_STATIC_CALL(name, type, __STATIC_CALL_RET0(name)); \
__DEFINE_STATIC_CALL_RET0_TRAMP(name)

#define EXPORT_STATIC_CALL(name) \
EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \
- __EXPORT_STATIC_CALL_TRAMP(name)
+ __EXPORT_STATIC_CALL_TRAMP(name); \
+ __EXPORT_STATIC_CALL_CFI(name)
+
#define EXPORT_STATIC_CALL_GPL(name) \
EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)); \
- __EXPORT_STATIC_CALL_TRAMP_GPL(name)
+ __EXPORT_STATIC_CALL_TRAMP_GPL(name); \
+ __EXPORT_STATIC_CALL_CFI_GPL(name)

/*
* Read-only exports: export the trampoline but not the key, so modules can't
@@ -184,9 +191,12 @@ struct static_call_key {
*/
#define EXPORT_STATIC_CALL_RO(name) \
__EXPORT_STATIC_CALL_TRAMP(name); \
+ __EXPORT_STATIC_CALL_CFI(name) \
__STATIC_CALL_ADD_TRAMP_KEY(name)
+
#define EXPORT_STATIC_CALL_RO_GPL(name) \
__EXPORT_STATIC_CALL_TRAMP_GPL(name); \
+ __EXPORT_STATIC_CALL_CFI_GPL(name) \
__STATIC_CALL_ADD_TRAMP_KEY(name)

/*
@@ -218,12 +228,19 @@ struct static_call_key {
#define static_call_update(name, func) \
({ \
typeof(&STATIC_CALL_TRAMP(name)) __F = (func); \
+ if (__F == (void *)__static_call_return0) \
+ __F = __STATIC_CALL_RET0(name); \
__static_call_update(&STATIC_CALL_KEY(name), \
STATIC_CALL_TRAMP_ADDR(name), __F); \
})

-#define static_call_query(name) (READ_ONCE(STATIC_CALL_KEY(name).func))
-
+#define static_call_query(name) \
+({ \
+ void *__F = (READ_ONCE(STATIC_CALL_KEY(name).func)); \
+ if (__F == __STATIC_CALL_RET0(name)) \
+ __F = __static_call_return0; \
+ __F; \
+})

#ifdef CONFIG_HAVE_STATIC_CALL

@@ -249,7 +266,6 @@ struct static_call_key {

#define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name)

-extern long __static_call_return0(void);
extern void __static_call_update(struct static_call_key *key, void *tramp, void *func);

/*
@@ -291,8 +307,6 @@ static inline void __static_call_nop(void) { }

#define STATIC_CALL_TRAMP_ADDR(name) NULL

-static inline long __static_call_return0(void) { return 0; }
-
static inline
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
{
@@ -324,4 +338,34 @@ static inline void static_call_force_reinit(void) {}

#endif /* CONFIG_HAVE_STATIC_CALL_INLINE */

+
+#ifdef CONFIG_CFI_WITHOUT_STATIC_CALL
+
+#include <asm/static_call.h>
+
+#define __STATIC_CALL_RET0(name) STATIC_CALL_RET0_CFI(name)
+
+#define __DECLARE_STATIC_CALL_CFI(name, func) \
+ extern typeof(func) STATIC_CALL_RET0_CFI(name)
+
+#define __DEFINE_STATIC_CALL_CFI(name) \
+ __ARCH_DEFINE_STATIC_CALL_RET0_CFI(name)
+
+#define __EXPORT_STATIC_CALL_CFI(name) \
+ EXPORT_SYMBOL(STATIC_CALL_RET0_CFI(name))
+
+#define __EXPORT_STATIC_CALL_CFI_GPL(name) \
+ EXPORT_SYMBOL_GPL(STATIC_CALL_RET0_CFI(name))
+
+#else /* ! CONFIG_CFI_WITHOUT_STATIC_CALL */
+
+#define __STATIC_CALL_RET0(name) (void *)__static_call_return0
+
+#define __DECLARE_STATIC_CALL_CFI(name, func)
+#define __DEFINE_STATIC_CALL_CFI(name)
+#define __EXPORT_STATIC_CALL_CFI(name)
+#define __EXPORT_STATIC_CALL_CFI_GPL(name)
+
+#endif /* CONFIG_CFI_WITHOUT_STATIC_CALL */
+
#endif /* _LINUX_STATIC_CALL_H */
diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h
index 8b349fe39e45..72732af51cba 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -22,6 +22,10 @@
#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name))

+#define STATIC_CALL_RET0_CFI_PREFIX __SCR__
+#define STATIC_CALL_RET0_CFI(name) __PASTE(STATIC_CALL_RET0_CFI_PREFIX, name)
+#define STATIC_CALL_RET0_CFI_STR(name) __stringify(STATIC_CALL_RET0_CFI(name))
+
/*
* Flags in the low bits of static_call_site::key.
*/
diff --git a/kernel/Makefile b/kernel/Makefile
index 10ef068f598d..59b062b1c8f7 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -110,7 +110,7 @@ obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
obj-$(CONFIG_KCSAN) += kcsan/
obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
-obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o
+obj-y += static_call.o
obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o
obj-$(CONFIG_CFI_CLANG) += cfi.o

diff --git a/kernel/static_call.c b/kernel/static_call.c
index e5fc33d05015..090ecf5d34b4 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -9,7 +9,7 @@ long __static_call_return0(void)
}
EXPORT_SYMBOL_GPL(__static_call_return0);

-#ifndef CONFIG_HAVE_STATIC_CALL_INLINE
+#if defined(CONFIG_HAVE_STATIC_CALL) && !defined(CONFIG_HAVE_STATIC_CALL_INLINE)
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
{
cpus_read_lock();
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index 8b349fe39e45..72732af51cba 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -22,6 +22,10 @@
#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name))

+#define STATIC_CALL_RET0_CFI_PREFIX __SCR__
+#define STATIC_CALL_RET0_CFI(name) __PASTE(STATIC_CALL_RET0_CFI_PREFIX, name)
+#define STATIC_CALL_RET0_CFI_STR(name) __stringify(STATIC_CALL_RET0_CFI(name))
+
/*
* Flags in the low bits of static_call_site::key.
*/
--
2.39.2

2023-03-22 04:01:42

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2 07/11] static_call: Reorganize static call headers

Move all the extra gunk out of static_call_types.h, which is for sharing
types with objtool.

While at it, de-spaghettify static_call.h, with user-visible interfaces
at the top, and implementation differences more clearly separated.

Signed-off-by: Josh Poimboeuf <[email protected]>
---
arch/arm/include/asm/paravirt.h | 2 +-
arch/arm64/include/asm/paravirt.h | 2 +-
arch/x86/include/asm/paravirt.h | 2 +-
arch/x86/include/asm/preempt.h | 2 +-
arch/x86/include/asm/static_call.h | 3 +-
arch/x86/kernel/paravirt.c | 1 +
include/linux/entry-common.h | 2 +-
include/linux/entry-kvm.h | 2 +-
include/linux/kernel.h | 2 +-
include/linux/module.h | 2 +-
include/linux/static_call.h | 250 +++++++++++-------------
include/linux/static_call_types.h | 70 +------
kernel/static_call.c | 1 +
kernel/static_call_inline.c | 13 ++
tools/include/linux/static_call_types.h | 70 +------
15 files changed, 148 insertions(+), 276 deletions(-)

diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h
index 95d5b0d625cd..37a723b653f5 100644
--- a/arch/arm/include/asm/paravirt.h
+++ b/arch/arm/include/asm/paravirt.h
@@ -3,7 +3,7 @@
#define _ASM_ARM_PARAVIRT_H

#ifdef CONFIG_PARAVIRT
-#include <linux/static_call_types.h>
+#include <linux/static_call.h>

struct static_key;
extern struct static_key paravirt_steal_enabled;
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h
index 9aa193e0e8f2..f59cb310d8ef 100644
--- a/arch/arm64/include/asm/paravirt.h
+++ b/arch/arm64/include/asm/paravirt.h
@@ -3,7 +3,7 @@
#define _ASM_ARM64_PARAVIRT_H

#ifdef CONFIG_PARAVIRT
-#include <linux/static_call_types.h>
+#include <linux/static_call.h>

struct static_key;
extern struct static_key paravirt_steal_enabled;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index cf40e813b3d7..25d7696be801 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -15,7 +15,7 @@
#include <linux/bug.h>
#include <linux/types.h>
#include <linux/cpumask.h>
-#include <linux/static_call_types.h>
+#include <linux/static_call.h>
#include <asm/frame.h>

u64 dummy_steal_clock(int cpu);
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 65028c346709..0879ec504b31 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -7,7 +7,7 @@
#include <asm/current.h>

#include <linux/thread_info.h>
-#include <linux/static_call_types.h>
+#include <linux/static_call.h>

/* We use the MSB mostly because its available */
#define PREEMPT_NEED_RESCHED 0x80000000
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 52abbdfd6106..14c6b1862e2e 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -2,7 +2,8 @@
#ifndef _ASM_STATIC_CALL_H
#define _ASM_STATIC_CALL_H

-#include <asm/text-patching.h>
+#include <linux/objtool.h>
+#include <linux/static_call_types.h>

/*
* For CONFIG_HAVE_STATIC_CALL_INLINE, this is a temporary trampoline which
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 42e182868873..378aaa2925ad 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -33,6 +33,7 @@
#include <asm/tlb.h>
#include <asm/io_bitmap.h>
#include <asm/gsseg.h>
+#include <asm/text-patching.h>

/*
* nop stub, which must not clobber anything *including the stack* to
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index d95ab85f96ba..c89b08e6a029 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -2,7 +2,7 @@
#ifndef __LINUX_ENTRYCOMMON_H
#define __LINUX_ENTRYCOMMON_H

-#include <linux/static_call_types.h>
+#include <linux/static_call.h>
#include <linux/ptrace.h>
#include <linux/syscalls.h>
#include <linux/seccomp.h>
diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
index 6813171afccb..2f3e56062e3e 100644
--- a/include/linux/entry-kvm.h
+++ b/include/linux/entry-kvm.h
@@ -2,7 +2,7 @@
#ifndef __LINUX_ENTRYKVM_H
#define __LINUX_ENTRYKVM_H

-#include <linux/static_call_types.h>
+#include <linux/static_call.h>
#include <linux/resume_user_mode.h>
#include <linux/syscalls.h>
#include <linux/seccomp.h>
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5c857c3acbc0..90bc8932c4e3 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -28,7 +28,7 @@
#include <linux/panic.h>
#include <linux/printk.h>
#include <linux/build_bug.h>
-#include <linux/static_call_types.h>
+#include <linux/static_call.h>
#include <linux/instruction_pointer.h>
#include <asm/byteorder.h>

diff --git a/include/linux/module.h b/include/linux/module.h
index 4435ad9439ab..a933ec51817d 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -26,7 +26,7 @@
#include <linux/error-injection.h>
#include <linux/tracepoint-defs.h>
#include <linux/srcu.h>
-#include <linux/static_call_types.h>
+#include <linux/static_call.h>

#include <linux/percpu.h>
#include <asm/module.h>
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 74f089a5955b..650bda9a3367 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -134,178 +134,141 @@
#include <linux/types.h>
#include <linux/static_call_types.h>

-#ifdef CONFIG_HAVE_STATIC_CALL
-#include <asm/static_call.h>
-
-/*
- * Either @site or @tramp can be NULL.
- */
-extern void arch_static_call_transform(void *site, void *tramp, void *func, bool tail);
-
-#define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name)
-
-#else
-#define STATIC_CALL_TRAMP_ADDR(name) NULL
-#endif
-
-#define static_call_update(name, func) \
-({ \
- typeof(&STATIC_CALL_TRAMP(name)) __F = (func); \
- __static_call_update(&STATIC_CALL_KEY(name), \
- STATIC_CALL_TRAMP_ADDR(name), __F); \
-})
-
-#define static_call_query(name) (READ_ONCE(STATIC_CALL_KEY(name).func))
-
+struct static_call_mods;
+struct static_call_key {
+ void *func;
#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
-
-extern int __init static_call_init(void);
-
-extern void static_call_force_reinit(void);
-
-struct static_call_mod {
- struct static_call_mod *next;
- struct module *mod; /* for vmlinux, mod == NULL */
- struct static_call_site *sites;
+ union {
+ /* bit 0: 0 = sites, 1 = mods */
+ unsigned long type;
+ struct static_call_site *_sites;
+ struct static_call_mod *_mods;
+ };
+#endif
};

-/* For finding the key associated with a trampoline */
-struct static_call_tramp_key {
- s32 tramp;
- s32 key;
-};
+#define DECLARE_STATIC_CALL(name, func) \
+ extern struct static_call_key STATIC_CALL_KEY(name); \
+ extern typeof(func) STATIC_CALL_TRAMP(name);

-extern void __static_call_update(struct static_call_key *key, void *tramp, void *func);
-extern int static_call_text_reserved(void *start, void *end);
-
-extern long __static_call_return0(void);
-
-#define DEFINE_STATIC_CALL(name, _func) \
- DECLARE_STATIC_CALL(name, _func); \
+#define __DEFINE_STATIC_CALL(name, type, _func) \
+ DECLARE_STATIC_CALL(name, type); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = _func, \
- }; \
- ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)
+ }

-#define DEFINE_STATIC_CALL_NULL(name, _func) \
- DECLARE_STATIC_CALL(name, _func); \
- struct static_call_key STATIC_CALL_KEY(name) = { \
- .func = NULL, \
- }; \
- ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
+#define DEFINE_STATIC_CALL(name, func) \
+ __DEFINE_STATIC_CALL(name, func, func); \
+ __DEFINE_STATIC_CALL_TRAMP(name, func)

-#define DEFINE_STATIC_CALL_RET0(name, _func) \
- DECLARE_STATIC_CALL(name, _func); \
- struct static_call_key STATIC_CALL_KEY(name) = { \
- .func = __static_call_return0, \
- }; \
- ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)
+#define DEFINE_STATIC_CALL_NULL(name, type) \
+ __DEFINE_STATIC_CALL(name, type, NULL); \
+ __DEFINE_STATIC_CALL_NULL_TRAMP(name)

-#define static_call_cond(name) (void)__static_call(name)
+#define DEFINE_STATIC_CALL_RET0(name, type) \
+ __DEFINE_STATIC_CALL(name, type, __static_call_return0); \
+ __DEFINE_STATIC_CALL_RET0_TRAMP(name)

#define EXPORT_STATIC_CALL(name) \
EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \
- EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))
+ __EXPORT_STATIC_CALL_TRAMP(name)
#define EXPORT_STATIC_CALL_GPL(name) \
EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)); \
- EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))
+ __EXPORT_STATIC_CALL_TRAMP_GPL(name)

/*
* Read-only exports: export the trampoline but not the key, so modules can't
* change call targets.
+ *
+ * These are called via static_call_ro().
*/
#define EXPORT_STATIC_CALL_RO(name) \
- EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)); \
+ __EXPORT_STATIC_CALL_TRAMP(name); \
__STATIC_CALL_ADD_TRAMP_KEY(name)
-#define EXPORT_STATIC_CALL_RO_GPL(name) \
- EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name)); \
+#define EXPORT_STATIC_CALL_RO_GPL(name) \
+ __EXPORT_STATIC_CALL_TRAMP_GPL(name); \
__STATIC_CALL_ADD_TRAMP_KEY(name)

-/* Unexported key lookup table */
-#define __STATIC_CALL_ADD_TRAMP_KEY(name) \
- asm(".pushsection .static_call_tramp_key, \"a\" \n" \
- ".long " STATIC_CALL_TRAMP_STR(name) " - . \n" \
- ".long " STATIC_CALL_KEY_STR(name) " - . \n" \
- ".popsection \n")
+/*
+ * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from
+ * the symbol table so that objtool can reference it when it generates the
+ * .static_call_sites section.
+ */
+#define __STATIC_CALL_ADDRESSABLE(name) __ADDRESSABLE(STATIC_CALL_KEY(name))

-#elif defined(CONFIG_HAVE_STATIC_CALL)
+#define static_call(name) \
+({ \
+ __STATIC_CALL_ADDRESSABLE(name); \
+ __static_call(name); \
+})

-static inline int static_call_init(void) { return 0; }
+#define static_call_cond(name) (void)__static_call_cond(name)

-#define DEFINE_STATIC_CALL(name, _func) \
- DECLARE_STATIC_CALL(name, _func); \
- struct static_call_key STATIC_CALL_KEY(name) = { \
- .func = _func, \
- }; \
- ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)
+/* Use static_call_ro() to call a read-only-exported static call. */
+#define static_call_ro(name) __static_call_ro(name)

-#define DEFINE_STATIC_CALL_NULL(name, _func) \
- DECLARE_STATIC_CALL(name, _func); \
- struct static_call_key STATIC_CALL_KEY(name) = { \
- .func = NULL, \
- }; \
- ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
+#if defined(MODULE) || !defined(CONFIG_HAVE_STATIC_CALL_INLINE)
+#define __STATIC_CALL_RO_ADDRESSABLE(name)
+#define __static_call_ro(name) __static_call(name)
+#else
+#define __STATIC_CALL_RO_ADDRESSABLE(name) __STATIC_CALL_ADDRESSABLE(name)
+#define __static_call_ro(name) static_call(name)
+#endif

-#define DEFINE_STATIC_CALL_RET0(name, _func) \
- DECLARE_STATIC_CALL(name, _func); \
- struct static_call_key STATIC_CALL_KEY(name) = { \
- .func = __static_call_return0, \
- }; \
- ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)
+#define static_call_update(name, func) \
+({ \
+ typeof(&STATIC_CALL_TRAMP(name)) __F = (func); \
+ __static_call_update(&STATIC_CALL_KEY(name), \
+ STATIC_CALL_TRAMP_ADDR(name), __F); \
+})

-#define static_call_cond(name) (void)__static_call(name)
+#define static_call_query(name) (READ_ONCE(STATIC_CALL_KEY(name).func))

-extern void __static_call_update(struct static_call_key *key, void *tramp, void *func);

-static inline int static_call_text_reserved(void *start, void *end)
-{
- return 0;
-}
+#ifdef CONFIG_HAVE_STATIC_CALL

-extern long __static_call_return0(void);
+#include <asm/static_call.h>

-#define EXPORT_STATIC_CALL(name) \
- EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \
- EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))
-#define EXPORT_STATIC_CALL_GPL(name) \
- EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)); \
- EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))
+#define __DEFINE_STATIC_CALL_TRAMP(name, func) \
+ ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)

-/*
- * Read-only exports: export the trampoline but not the key, so modules can't
- * change call targets.
- */
-#define EXPORT_STATIC_CALL_RO(name) \
+#define __DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
+
+#define __DEFINE_STATIC_CALL_RET0_TRAMP(name) \
+ ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)
+
+#define __EXPORT_STATIC_CALL_TRAMP(name) \
EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))
-#define EXPORT_STATIC_CALL_RO_GPL(name) \
+
+#define __EXPORT_STATIC_CALL_TRAMP_GPL(name) \
EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))

-#else /* Generic implementation */
+#define __static_call(name) (&STATIC_CALL_TRAMP(name))
+#define __static_call_cond __static_call

-static inline int static_call_init(void) { return 0; }
+#define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name)

-static inline long __static_call_return0(void)
-{
- return 0;
-}
+extern long __static_call_return0(void);
+extern void __static_call_update(struct static_call_key *key, void *tramp, void *func);

-#define __DEFINE_STATIC_CALL(name, _func, _func_init) \
- DECLARE_STATIC_CALL(name, _func); \
- struct static_call_key STATIC_CALL_KEY(name) = { \
- .func = _func_init, \
- }
+/*
+ * Either @site or @tramp can be NULL.
+ */
+extern void arch_static_call_transform(void *site, void *tramp, void *func, bool tail);

-#define DEFINE_STATIC_CALL(name, _func) \
- __DEFINE_STATIC_CALL(name, _func, _func)
+#else /* !CONFIG_HAVE_STATIC_CALL */

-#define DEFINE_STATIC_CALL_NULL(name, _func) \
- __DEFINE_STATIC_CALL(name, _func, NULL)
+#define __DEFINE_STATIC_CALL_TRAMP(name, func)
+#define __DEFINE_STATIC_CALL_NULL_TRAMP(name)
+#define __DEFINE_STATIC_CALL_RET0_TRAMP(name)
+#define __EXPORT_STATIC_CALL_TRAMP(name)
+#define __EXPORT_STATIC_CALL_TRAMP_GPL(name)

-#define DEFINE_STATIC_CALL_RET0(name, _func) \
- __DEFINE_STATIC_CALL(name, _func, __static_call_return0)
+#define __static_call(name) \
+ ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))

static inline void __static_call_nop(void) { }
-
/*
* This horrific hack takes care of two things:
*
@@ -326,7 +289,9 @@ static inline void __static_call_nop(void) { }
(typeof(STATIC_CALL_TRAMP(name))*)func; \
})

-#define static_call_cond(name) (void)__static_call_cond(name)
+#define STATIC_CALL_TRAMP_ADDR(name) NULL
+
+static inline long __static_call_return0(void) { return 0; }

static inline
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
@@ -334,14 +299,29 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func)
WRITE_ONCE(key->func, func);
}

-static inline int static_call_text_reserved(void *start, void *end)
-{
- return 0;
-}
+#endif /* CONFIG_HAVE_STATIC_CALL */

-#define EXPORT_STATIC_CALL(name) EXPORT_SYMBOL(STATIC_CALL_KEY(name))
-#define EXPORT_STATIC_CALL_GPL(name) EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name))

-#endif /* CONFIG_HAVE_STATIC_CALL */
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+
+/* Unexported key lookup table */
+#define __STATIC_CALL_ADD_TRAMP_KEY(name) \
+ asm(".pushsection .static_call_tramp_key, \"a\" \n" \
+ ".long " STATIC_CALL_TRAMP_STR(name) " - . \n" \
+ ".long " STATIC_CALL_KEY_STR(name) " - . \n" \
+ ".popsection \n")
+
+extern int static_call_init(void);
+extern int static_call_text_reserved(void *start, void *end);
+extern void static_call_force_reinit(void);
+
+#else /* !CONFIG_HAVE_STATIC_CALL_INLINE*/
+
+#define __STATIC_CALL_ADD_TRAMP_KEY(name)
+static inline int static_call_init(void) { return 0; }
+static inline int static_call_text_reserved(void *start, void *end) { return 0; }
+static inline void static_call_force_reinit(void) {}
+
+#endif /* CONFIG_HAVE_STATIC_CALL_INLINE */

#endif /* _LINUX_STATIC_CALL_H */
diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h
index 06293067424f..8b349fe39e45 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -2,6 +2,10 @@
#ifndef _STATIC_CALL_TYPES_H
#define _STATIC_CALL_TYPES_H

+/*
+ * Static call types for sharing with objtool
+ */
+
#include <linux/types.h>
#include <linux/stringify.h>
#include <linux/compiler.h>
@@ -34,70 +38,4 @@ struct static_call_site {
s32 key;
};

-#define DECLARE_STATIC_CALL(name, func) \
- extern struct static_call_key STATIC_CALL_KEY(name); \
- extern typeof(func) STATIC_CALL_TRAMP(name);
-
-#ifdef CONFIG_HAVE_STATIC_CALL
-
-#define __raw_static_call(name) (&STATIC_CALL_TRAMP(name))
-
-#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
-
-/*
- * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from
- * the symbol table so that objtool can reference it when it generates the
- * .static_call_sites section.
- */
-#define __STATIC_CALL_ADDRESSABLE(name) \
- __ADDRESSABLE(STATIC_CALL_KEY(name))
-
-#define __static_call(name) \
-({ \
- __STATIC_CALL_ADDRESSABLE(name); \
- __raw_static_call(name); \
-})
-
-struct static_call_key {
- void *func;
- union {
- /* bit 0: 0 = sites, 1 = mods */
- unsigned long type;
- struct static_call_site *_sites;
- struct static_call_mod *_mods;
- };
-};
-
-#else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
-
-#define __STATIC_CALL_ADDRESSABLE(name)
-#define __static_call(name) __raw_static_call(name)
-
-struct static_call_key {
- void *func;
-};
-
-#endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
-
-#ifdef MODULE
-#define __STATIC_CALL_RO_ADDRESSABLE(name)
-#define static_call_ro(name) __raw_static_call(name)
-#else
-#define __STATIC_CALL_RO_ADDRESSABLE(name) __STATIC_CALL_ADDRESSABLE(name)
-#define static_call_ro(name) __static_call(name)
-#endif
-
-#define static_call(name) __static_call(name)
-
-#else
-
-struct static_call_key {
- void *func;
-};
-
-#define static_call(name) \
- ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
-
-#endif /* CONFIG_HAVE_STATIC_CALL */
-
#endif /* _STATIC_CALL_TYPES_H */
diff --git a/kernel/static_call.c b/kernel/static_call.c
index 63486995fd82..e5fc33d05015 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
#include <linux/static_call.h>
#include <linux/cpu.h>

diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c
index 41f6bda6773a..b4f4a9eaa6d8 100644
--- a/kernel/static_call_inline.c
+++ b/kernel/static_call_inline.c
@@ -9,6 +9,19 @@
#include <linux/cpu.h>
#include <linux/processor.h>
#include <asm/sections.h>
+#include <asm/text-patching.h>
+
+/* For finding the key associated with a trampoline */
+struct static_call_tramp_key {
+ s32 tramp;
+ s32 key;
+};
+
+struct static_call_mod {
+ struct static_call_mod *next;
+ struct module *mod; /* for vmlinux, mod == NULL */
+ struct static_call_site *sites;
+};

extern struct static_call_site __start_static_call_sites[],
__stop_static_call_sites[];
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index 06293067424f..8b349fe39e45 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -2,6 +2,10 @@
#ifndef _STATIC_CALL_TYPES_H
#define _STATIC_CALL_TYPES_H

+/*
+ * Static call types for sharing with objtool
+ */
+
#include <linux/types.h>
#include <linux/stringify.h>
#include <linux/compiler.h>
@@ -34,70 +38,4 @@ struct static_call_site {
s32 key;
};

-#define DECLARE_STATIC_CALL(name, func) \
- extern struct static_call_key STATIC_CALL_KEY(name); \
- extern typeof(func) STATIC_CALL_TRAMP(name);
-
-#ifdef CONFIG_HAVE_STATIC_CALL
-
-#define __raw_static_call(name) (&STATIC_CALL_TRAMP(name))
-
-#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
-
-/*
- * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from
- * the symbol table so that objtool can reference it when it generates the
- * .static_call_sites section.
- */
-#define __STATIC_CALL_ADDRESSABLE(name) \
- __ADDRESSABLE(STATIC_CALL_KEY(name))
-
-#define __static_call(name) \
-({ \
- __STATIC_CALL_ADDRESSABLE(name); \
- __raw_static_call(name); \
-})
-
-struct static_call_key {
- void *func;
- union {
- /* bit 0: 0 = sites, 1 = mods */
- unsigned long type;
- struct static_call_site *_sites;
- struct static_call_mod *_mods;
- };
-};
-
-#else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
-
-#define __STATIC_CALL_ADDRESSABLE(name)
-#define __static_call(name) __raw_static_call(name)
-
-struct static_call_key {
- void *func;
-};
-
-#endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
-
-#ifdef MODULE
-#define __STATIC_CALL_RO_ADDRESSABLE(name)
-#define static_call_ro(name) __raw_static_call(name)
-#else
-#define __STATIC_CALL_RO_ADDRESSABLE(name) __STATIC_CALL_ADDRESSABLE(name)
-#define static_call_ro(name) __static_call(name)
-#endif
-
-#define static_call(name) __static_call(name)
-
-#else
-
-struct static_call_key {
- void *func;
-};
-
-#define static_call(name) \
- ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
-
-#endif /* CONFIG_HAVE_STATIC_CALL */
-
#endif /* _STATIC_CALL_TYPES_H */
--
2.39.2

2023-03-22 04:02:32

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2 10/11] static_call: Remove static_call_cond()

A static call to a NULL pointer is now a NOP for all configs. There's
no longer a need for an explicit static_call_cond(). Remove it and
convert its usages to plain static_call().

Signed-off-by: Josh Poimboeuf <[email protected]>
---
arch/x86/events/core.c | 24 +++++++++++------------
arch/x86/include/asm/kvm-x86-ops.h | 3 +--
arch/x86/include/asm/kvm-x86-pmu-ops.h | 3 +--
arch/x86/include/asm/kvm_host.h | 4 ++--
arch/x86/kvm/irq.c | 2 +-
arch/x86/kvm/lapic.c | 22 ++++++++++-----------
arch/x86/kvm/pmu.c | 4 ++--
arch/x86/kvm/x86.c | 24 +++++++++++------------
include/linux/static_call.h | 5 +----
security/keys/trusted-keys/trusted_core.c | 2 +-
10 files changed, 44 insertions(+), 49 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index d096b04bf80e..c94537501091 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -995,7 +995,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
n0 -= cpuc->n_txn;

- static_call_cond(x86_pmu_start_scheduling)(cpuc);
+ static_call(x86_pmu_start_scheduling)(cpuc);

for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
c = cpuc->event_constraint[i];
@@ -1094,7 +1094,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
*/
if (!unsched && assign) {
for (i = 0; i < n; i++)
- static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]);
+ static_call(x86_pmu_commit_scheduling)(cpuc, i, assign[i]);
} else {
for (i = n0; i < n; i++) {
e = cpuc->event_list[i];
@@ -1102,13 +1102,13 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
/*
* release events that failed scheduling
*/
- static_call_cond(x86_pmu_put_event_constraints)(cpuc, e);
+ static_call(x86_pmu_put_event_constraints)(cpuc, e);

cpuc->event_constraint[i] = NULL;
}
}

- static_call_cond(x86_pmu_stop_scheduling)(cpuc);
+ static_call(x86_pmu_stop_scheduling)(cpuc);

return unsched ? -EINVAL : 0;
}
@@ -1221,7 +1221,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->last_cpu = smp_processor_id();
hwc->last_tag = ++cpuc->tags[i];

- static_call_cond(x86_pmu_assign)(event, idx);
+ static_call(x86_pmu_assign)(event, idx);

switch (hwc->idx) {
case INTEL_PMC_IDX_FIXED_BTS:
@@ -1399,7 +1399,7 @@ int x86_perf_event_set_period(struct perf_event *event)
if (left > x86_pmu.max_period)
left = x86_pmu.max_period;

- static_call_cond(x86_pmu_limit_period)(event, &left);
+ static_call(x86_pmu_limit_period)(event, &left);

this_cpu_write(pmc_prev_left[idx], left);

@@ -1487,7 +1487,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
* This is before x86_pmu_enable() will call x86_pmu_start(),
* so we enable LBRs before an event needs them etc..
*/
- static_call_cond(x86_pmu_add)(event);
+ static_call(x86_pmu_add)(event);

ret = 0;
out:
@@ -1640,7 +1640,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
if (i >= cpuc->n_events - cpuc->n_added)
--cpuc->n_added;

- static_call_cond(x86_pmu_put_event_constraints)(cpuc, event);
+ static_call(x86_pmu_put_event_constraints)(cpuc, event);

/* Delete the array entry. */
while (++i < cpuc->n_events) {
@@ -1660,7 +1660,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
* This is after x86_pmu_stop(); so we disable LBRs after any
* event can need them etc..
*/
- static_call_cond(x86_pmu_del)(event);
+ static_call(x86_pmu_del)(event);
}

int x86_pmu_handle_irq(struct pt_regs *regs)
@@ -2627,13 +2627,13 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {

static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
- static_call_cond(x86_pmu_sched_task)(pmu_ctx, sched_in);
+ static_call(x86_pmu_sched_task)(pmu_ctx, sched_in);
}

static void x86_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
struct perf_event_pmu_context *next_epc)
{
- static_call_cond(x86_pmu_swap_task_ctx)(prev_epc, next_epc);
+ static_call(x86_pmu_swap_task_ctx)(prev_epc, next_epc);
}

void perf_check_microcode(void)
@@ -2672,7 +2672,7 @@ static bool x86_pmu_filter(struct pmu *pmu, int cpu)
{
bool ret = false;

- static_call_cond(x86_pmu_filter)(pmu, cpu, &ret);
+ static_call(x86_pmu_filter)(pmu, cpu, &ret);

return ret;
}
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 8dc345cc6318..2f0bfd910637 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -9,8 +9,7 @@ BUILD_BUG_ON(1)
* "static_call_update()" calls.
*
* KVM_X86_OP_OPTIONAL() can be used for those functions that can have
- * a NULL definition, for example if "static_call_cond()" will be used
- * at the call sites. KVM_X86_OP_OPTIONAL_RET0() can be used likewise
+ * a NULL definition. KVM_X86_OP_OPTIONAL_RET0() can be used likewise
* to make a definition optional, but in this case the default will
* be __static_call_return0.
*/
diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h
index c17e3e96fc1d..6815319c4ff3 100644
--- a/arch/x86/include/asm/kvm-x86-pmu-ops.h
+++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h
@@ -9,8 +9,7 @@ BUILD_BUG_ON(1)
* "static_call_update()" calls.
*
* KVM_X86_PMU_OP_OPTIONAL() can be used for those functions that can have
- * a NULL definition, for example if "static_call_cond()" will be used
- * at the call sites.
+ * a NULL definition.
*/
KVM_X86_PMU_OP(hw_event_available)
KVM_X86_PMU_OP(pmc_is_enabled)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 808c292ad3f4..1dfba499d3e5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2172,12 +2172,12 @@ static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)

static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
- static_call_cond(kvm_x86_vcpu_blocking)(vcpu);
+ static_call(kvm_x86_vcpu_blocking)(vcpu);
}

static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- static_call_cond(kvm_x86_vcpu_unblocking)(vcpu);
+ static_call(kvm_x86_vcpu_unblocking)(vcpu);
}

static inline int kvm_cpu_get_apicid(int mps_cpu)
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index b2c397dd2bc6..4f9e090c9d42 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -155,7 +155,7 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
{
__kvm_migrate_apic_timer(vcpu);
__kvm_migrate_pit_timer(vcpu);
- static_call_cond(kvm_x86_migrate_timers)(vcpu);
+ static_call(kvm_x86_migrate_timers)(vcpu);
}

bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e542cf285b51..d5f7e829d975 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -681,8 +681,8 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
if (unlikely(apic->apicv_active)) {
/* need to update RVI */
kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
- static_call_cond(kvm_x86_hwapic_irr_update)(apic->vcpu,
- apic_find_highest_irr(apic));
+ static_call(kvm_x86_hwapic_irr_update)(apic->vcpu,
+ apic_find_highest_irr(apic));
} else {
apic->irr_pending = false;
kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
@@ -708,7 +708,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
* just set SVI.
*/
if (unlikely(apic->apicv_active))
- static_call_cond(kvm_x86_hwapic_isr_update)(vec);
+ static_call(kvm_x86_hwapic_isr_update)(vec);
else {
++apic->isr_count;
BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
@@ -753,7 +753,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
* and must be left alone.
*/
if (unlikely(apic->apicv_active))
- static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
+ static_call(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
else {
--apic->isr_count;
BUG_ON(apic->isr_count < 0);
@@ -2519,7 +2519,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)

if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) {
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
- static_call_cond(kvm_x86_set_virtual_apic_mode)(vcpu);
+ static_call(kvm_x86_set_virtual_apic_mode)(vcpu);
}

apic->base_address = apic->vcpu->arch.apic_base &
@@ -2682,9 +2682,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.pv_eoi.msr_val = 0;
apic_update_ppr(apic);
if (apic->apicv_active) {
- static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
- static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, -1);
- static_call_cond(kvm_x86_hwapic_isr_update)(-1);
+ static_call(kvm_x86_apicv_post_state_restore)(vcpu);
+ static_call(kvm_x86_hwapic_irr_update)(vcpu, -1);
+ static_call(kvm_x86_hwapic_isr_update)(-1);
}

vcpu->arch.apic_arb_prio = 0;
@@ -2961,9 +2961,9 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
kvm_apic_update_apicv(vcpu);
if (apic->apicv_active) {
- static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
- static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
- static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
+ static_call(kvm_x86_apicv_post_state_restore)(vcpu);
+ static_call(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
+ static_call(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
}
kvm_make_request(KVM_REQ_EVENT, vcpu);
if (ioapic_in_kernel(vcpu->kvm))
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 612e6c70ce2e..6accb46295a3 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -552,7 +552,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
{
if (lapic_in_kernel(vcpu)) {
- static_call_cond(kvm_x86_pmu_deliver_pmi)(vcpu);
+ static_call(kvm_x86_pmu_deliver_pmi)(vcpu);
kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
}
}
@@ -632,7 +632,7 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
pmc_stop_counter(pmc);
}

- static_call_cond(kvm_x86_pmu_cleanup)(vcpu);
+ static_call(kvm_x86_pmu_cleanup)(vcpu);

bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7713420abab0..fcf845fc5770 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4845,7 +4845,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
- static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+ static_call(kvm_x86_sync_pir_to_irr)(vcpu);

return kvm_apic_get_state(vcpu, s);
}
@@ -8948,7 +8948,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
kvm_rip_write(vcpu, ctxt->eip);
if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
r = kvm_vcpu_do_singlestep(vcpu);
- static_call_cond(kvm_x86_update_emulated_instruction)(vcpu);
+ static_call(kvm_x86_update_emulated_instruction)(vcpu);
__kvm_set_rflags(vcpu, ctxt->eflags);
}

@@ -10307,7 +10307,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
if (irqchip_split(vcpu->kvm))
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
else {
- static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+ static_call(kvm_x86_sync_pir_to_irr)(vcpu);
if (ioapic_in_kernel(vcpu->kvm))
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
@@ -10329,11 +10329,11 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
bitmap_or((ulong *)eoi_exit_bitmap,
vcpu->arch.ioapic_handled_vectors,
to_hv_synic(vcpu)->vec_bitmap, 256);
- static_call_cond(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
+ static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
return;
}

- static_call_cond(kvm_x86_load_eoi_exitmap)(
+ static_call(kvm_x86_load_eoi_exitmap)(
vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
}

@@ -10353,7 +10353,7 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,

void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
{
- static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
+ static_call(kvm_x86_guest_memory_reclaimed)(kvm);
}

static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
@@ -10361,7 +10361,7 @@ static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
if (!lapic_in_kernel(vcpu))
return;

- static_call_cond(kvm_x86_set_apic_access_page_addr)(vcpu);
+ static_call(kvm_x86_set_apic_access_page_addr)(vcpu);
}

void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
@@ -10603,7 +10603,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* i.e. they can post interrupts even if APICv is temporarily disabled.
*/
if (kvm_lapic_enabled(vcpu))
- static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+ static_call(kvm_x86_sync_pir_to_irr)(vcpu);

if (kvm_vcpu_exit_request(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
@@ -10654,7 +10654,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
break;

if (kvm_lapic_enabled(vcpu))
- static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+ static_call(kvm_x86_sync_pir_to_irr)(vcpu);

if (unlikely(kvm_vcpu_exit_request(vcpu))) {
exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
@@ -11392,7 +11392,7 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
*mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
vcpu->arch.cr3 = sregs->cr3;
kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
- static_call_cond(kvm_x86_post_set_cr3)(vcpu, sregs->cr3);
+ static_call(kvm_x86_post_set_cr3)(vcpu, sregs->cr3);

kvm_set_cr8(vcpu, sregs->cr8);

@@ -12361,7 +12361,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
mutex_unlock(&kvm->slots_lock);
}
kvm_unload_vcpu_mmus(kvm);
- static_call_cond(kvm_x86_vm_destroy)(kvm);
+ static_call(kvm_x86_vm_destroy)(kvm);
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
@@ -13049,7 +13049,7 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
void kvm_arch_start_assignment(struct kvm *kvm)
{
if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
- static_call_cond(kvm_x86_pi_start_assignment)(kvm);
+ static_call(kvm_x86_pi_start_assignment)(kvm);
}
EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 65ac01179993..d5254107ccf4 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -22,7 +22,6 @@
* __static_call_return0;
*
* static_call(name)(args...);
- * static_call_cond(name)(args...);
* static_call_ro(name)(args...);
* static_call_update(name, func);
* static_call_query(name);
@@ -92,7 +91,7 @@
*
* DEFINE_STATIC_CALL_RET0 / __static_call_return0:
*
- * Just like how DEFINE_STATIC_CALL_NULL() / static_call_cond() optimize the
+ * Just like how DEFINE_STATIC_CALL_NULL() optimizes the
* conditional void function call, DEFINE_STATIC_CALL_RET0 /
* __static_call_return0 optimize the do nothing return 0 function.
*
@@ -198,8 +197,6 @@ extern long __static_call_return0(void);
__static_call(name); \
})

-#define static_call_cond(name) (void)static_call(name)
-
/* Use static_call_ro() to call a read-only-exported static call. */
#define static_call_ro(name) __static_call_ro(name)

diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c
index c6fc50d67214..b7920482ebcb 100644
--- a/security/keys/trusted-keys/trusted_core.c
+++ b/security/keys/trusted-keys/trusted_core.c
@@ -388,7 +388,7 @@ static int __init init_trusted(void)

static void __exit cleanup_trusted(void)
{
- static_call_cond(trusted_key_exit)();
+ static_call(trusted_key_exit)();
}

late_initcall(init_trusted);
--
2.39.2

2023-03-22 04:02:53

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2 06/11] static_call: "EXPORT_STATIC_CALL_TRAMP" -> "EXPORT_STATIC_CALL_RO"

EXPORT_STATIC_CALL_TRAMP() basically creates a read-only export of the
static call. Make that clearer by renaming it to
EXPORT_STATIC_CALL_RO().

Signed-off-by: Josh Poimboeuf <[email protected]>
---
arch/x86/events/amd/brs.c | 2 +-
arch/x86/include/asm/perf_event.h | 2 +-
arch/x86/include/asm/preempt.h | 4 ++--
include/linux/kernel.h | 2 +-
include/linux/sched.h | 2 +-
include/linux/static_call.h | 28 +++++++++++++++----------
include/linux/static_call_types.h | 8 +++----
kernel/sched/core.c | 8 +++----
tools/include/linux/static_call_types.h | 8 +++----
9 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
index ed308719236c..961be770aa24 100644
--- a/arch/x86/events/amd/brs.c
+++ b/arch/x86/events/amd/brs.c
@@ -423,7 +423,7 @@ void noinstr perf_amd_brs_lopwr_cb(bool lopwr_in)
}

DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
-EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb);
+EXPORT_STATIC_CALL_RO_GPL(perf_lopwr_cb);

void __init amd_brs_lopwr_init(void)
{
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8fc15ed5e60b..43eb95db4cc9 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -588,7 +588,7 @@ DECLARE_STATIC_CALL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);

static __always_inline void perf_lopwr_cb(bool lopwr_in)
{
- static_call_mod(perf_lopwr_cb)(lopwr_in);
+ static_call_ro(perf_lopwr_cb)(lopwr_in);
}

#endif /* PERF_NEEDS_LOPWR_CB */
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 2d13f25b1bd8..65028c346709 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -124,7 +124,7 @@ DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);

#define __preempt_schedule() \
do { \
- __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule); \
+ __STATIC_CALL_RO_ADDRESSABLE(preempt_schedule); \
asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \
} while (0)

@@ -132,7 +132,7 @@ DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_e

#define __preempt_schedule_notrace() \
do { \
- __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule_notrace); \
+ __STATIC_CALL_RO_ADDRESSABLE(preempt_schedule_notrace); \
asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule_notrace) : ASM_CALL_CONSTRAINT); \
} while (0)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 40bce7495af8..5c857c3acbc0 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -107,7 +107,7 @@ DECLARE_STATIC_CALL(might_resched, __cond_resched);

static __always_inline void might_resched(void)
{
- static_call_mod(might_resched)();
+ static_call_ro(might_resched)();
}

#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 63d242164b1a..13b17ff4ad22 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2074,7 +2074,7 @@ DECLARE_STATIC_CALL(cond_resched, __cond_resched);

static __always_inline int _cond_resched(void)
{
- return static_call_mod(cond_resched)();
+ return static_call_ro(cond_resched)();
}

#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 013022a8611d..74f089a5955b 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -23,6 +23,7 @@
*
* static_call(name)(args...);
* static_call_cond(name)(args...);
+ * static_call_ro(name)(args...);
* static_call_update(name, func);
* static_call_query(name);
*
@@ -123,12 +124,11 @@
* Notably argument setup is unconditional.
*
*
- * EXPORT_STATIC_CALL() vs EXPORT_STATIC_CALL_TRAMP():
- *
- * The difference is that the _TRAMP variant tries to only export the
- * trampoline with the result that a module can use static_call{,_cond}() but
- * not static_call_update().
+ * EXPORT_STATIC_CALL() vs EXPORT_STATIC_CALL_RO():
*
+ * The difference is the read-only variant exports the trampoline but not the
+ * key, so a module can call it via static_call_ro() but can't update the
+ * target via static_call_update().
*/

#include <linux/types.h>
@@ -210,11 +210,14 @@ extern long __static_call_return0(void);
EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)); \
EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))

-/* Leave the key unexported, so modules can't change static call targets: */
-#define EXPORT_STATIC_CALL_TRAMP(name) \
+/*
+ * Read-only exports: export the trampoline but not the key, so modules can't
+ * change call targets.
+ */
+#define EXPORT_STATIC_CALL_RO(name) \
EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)); \
__STATIC_CALL_ADD_TRAMP_KEY(name)
-#define EXPORT_STATIC_CALL_TRAMP_GPL(name) \
+#define EXPORT_STATIC_CALL_RO_GPL(name) \
EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name)); \
__STATIC_CALL_ADD_TRAMP_KEY(name)

@@ -268,10 +271,13 @@ extern long __static_call_return0(void);
EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)); \
EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))

-/* Leave the key unexported, so modules can't change static call targets: */
-#define EXPORT_STATIC_CALL_TRAMP(name) \
+/*
+ * Read-only exports: export the trampoline but not the key, so modules can't
+ * change call targets.
+ */
+#define EXPORT_STATIC_CALL_RO(name) \
EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))
-#define EXPORT_STATIC_CALL_TRAMP_GPL(name) \
+#define EXPORT_STATIC_CALL_RO_GPL(name) \
EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))

#else /* Generic implementation */
diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h
index c4c4efb6f6fa..06293067424f 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -80,11 +80,11 @@ struct static_call_key {
#endif /* CONFIG_HAVE_STATIC_CALL_INLINE */

#ifdef MODULE
-#define __STATIC_CALL_MOD_ADDRESSABLE(name)
-#define static_call_mod(name) __raw_static_call(name)
+#define __STATIC_CALL_RO_ADDRESSABLE(name)
+#define static_call_ro(name) __raw_static_call(name)
#else
-#define __STATIC_CALL_MOD_ADDRESSABLE(name) __STATIC_CALL_ADDRESSABLE(name)
-#define static_call_mod(name) __static_call(name)
+#define __STATIC_CALL_RO_ADDRESSABLE(name) __STATIC_CALL_ADDRESSABLE(name)
+#define static_call_ro(name) __static_call(name)
#endif

#define static_call(name) __static_call(name)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index af017e038b48..a89de2a2d8f8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6824,7 +6824,7 @@ EXPORT_SYMBOL(preempt_schedule);
#define preempt_schedule_dynamic_disabled NULL
#endif
DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
-EXPORT_STATIC_CALL_TRAMP(preempt_schedule);
+EXPORT_STATIC_CALL_RO(preempt_schedule);
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule);
void __sched notrace dynamic_preempt_schedule(void)
@@ -6897,7 +6897,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
#define preempt_schedule_notrace_dynamic_disabled NULL
#endif
DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
-EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace);
+EXPORT_STATIC_CALL_RO(preempt_schedule_notrace);
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule_notrace);
void __sched notrace dynamic_preempt_schedule_notrace(void)
@@ -8493,12 +8493,12 @@ EXPORT_SYMBOL(__cond_resched);
#define cond_resched_dynamic_enabled __cond_resched
#define cond_resched_dynamic_disabled ((void *)&__static_call_return0)
DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched);
-EXPORT_STATIC_CALL_TRAMP(cond_resched);
+EXPORT_STATIC_CALL_RO(cond_resched);

#define might_resched_dynamic_enabled __cond_resched
#define might_resched_dynamic_disabled ((void *)&__static_call_return0)
DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched);
-EXPORT_STATIC_CALL_TRAMP(might_resched);
+EXPORT_STATIC_CALL_RO(might_resched);
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
int __sched dynamic_cond_resched(void)
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index c4c4efb6f6fa..06293067424f 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -80,11 +80,11 @@ struct static_call_key {
#endif /* CONFIG_HAVE_STATIC_CALL_INLINE */

#ifdef MODULE
-#define __STATIC_CALL_MOD_ADDRESSABLE(name)
-#define static_call_mod(name) __raw_static_call(name)
+#define __STATIC_CALL_RO_ADDRESSABLE(name)
+#define static_call_ro(name) __raw_static_call(name)
#else
-#define __STATIC_CALL_MOD_ADDRESSABLE(name) __STATIC_CALL_ADDRESSABLE(name)
-#define static_call_mod(name) __static_call(name)
+#define __STATIC_CALL_RO_ADDRESSABLE(name) __STATIC_CALL_ADDRESSABLE(name)
+#define static_call_ro(name) __static_call(name)
#endif

#define static_call(name) __static_call(name)
--
2.39.2

2023-03-22 04:03:11

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2 09/11] static_call: Make NULL static calls consistent

NULL static calls have inconsistent behavior. With HAVE_STATIC_CALL=y
they're a NOP, but with HAVE_STATIC_CALL=n they go boom.

That's guaranteed to cause subtle bugs. Make the behavior consistent by
making NULL static calls a NOP with HAVE_STATIC_CALL=n.

This is probably easier than doing the reverse (making NULL static calls
panic with HAVE_STATIC_CALL=y). And it seems to match the current use
cases better: there are several call sites which rely on the NOP
behavior, whereas no call sites rely on the crashing behavior.

Signed-off-by: Josh Poimboeuf <[email protected]>
---
arch/arm64/include/asm/static_call.h | 4 ++
arch/powerpc/include/asm/static_call.h | 2 +-
arch/powerpc/kernel/static_call.c | 5 +-
arch/x86/include/asm/static_call.h | 4 +-
arch/x86/kernel/static_call.c | 14 +++--
include/linux/static_call.h | 78 +++++++++----------------
include/linux/static_call_types.h | 4 ++
kernel/static_call.c | 5 ++
tools/include/linux/static_call_types.h | 4 ++
9 files changed, 57 insertions(+), 63 deletions(-)

diff --git a/arch/arm64/include/asm/static_call.h b/arch/arm64/include/asm/static_call.h
index b3489cac7742..02693b404afc 100644
--- a/arch/arm64/include/asm/static_call.h
+++ b/arch/arm64/include/asm/static_call.h
@@ -22,6 +22,10 @@
".type " name ", @function \n" \
".size " name ", . - " name " \n")

+#define __ARCH_DEFINE_STATIC_CALL_NOP_CFI(name) \
+ GEN_CFI_SYM(STATIC_CALL_NOP_CFI(name)); \
+ __ARCH_DEFINE_STATIC_CALL_CFI(STATIC_CALL_NOP_CFI_STR(name), "")
+
#define __ARCH_DEFINE_STATIC_CALL_RET0_CFI(name) \
GEN_CFI_SYM(STATIC_CALL_RET0_CFI(name)); \
__ARCH_DEFINE_STATIC_CALL_CFI(STATIC_CALL_RET0_CFI_STR(name), "mov x0, xzr")
diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h
index de1018cc522b..744435127574 100644
--- a/arch/powerpc/include/asm/static_call.h
+++ b/arch/powerpc/include/asm/static_call.h
@@ -23,7 +23,7 @@
#define PPC_SCT_DATA 28 /* Offset of label 2 */

#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) __PPC_SCT(name, "b " #func)
-#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) __PPC_SCT(name, "blr")
+#define ARCH_DEFINE_STATIC_CALL_NOP_TRAMP(name) __PPC_SCT(name, "blr")
#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) __PPC_SCT(name, "b .+20")

#endif /* _ASM_POWERPC_STATIC_CALL_H */
diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c
index 863a7aa24650..8bfe46654e01 100644
--- a/arch/powerpc/kernel/static_call.c
+++ b/arch/powerpc/kernel/static_call.c
@@ -8,6 +8,7 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
{
int err;
bool is_ret0 = (func == __static_call_return0);
+ bool is_nop = (func == __static_call_nop);
unsigned long target = (unsigned long)(is_ret0 ? tramp + PPC_SCT_RET0 : func);
bool is_short = is_offset_in_branch_range((long)target - (long)tramp);

@@ -16,13 +17,13 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)

mutex_lock(&text_mutex);

- if (func && !is_short) {
+ if (!is_nop && !is_short) {
err = patch_instruction(tramp + PPC_SCT_DATA, ppc_inst(target));
if (err)
goto out;
}

- if (!func)
+ if (is_nop)
err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR()));
else if (is_short)
err = patch_branch(tramp, target, 0);
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 14c6b1862e2e..afea6ceeed23 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -48,10 +48,10 @@
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")

#ifdef CONFIG_RETHUNK
-#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+#define ARCH_DEFINE_STATIC_CALL_NOP_TRAMP(name) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
#else
-#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+#define ARCH_DEFINE_STATIC_CALL_NOP_TRAMP(name) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
#endif

diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index b70670a98597..27c095c7fc96 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -89,7 +89,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,

case JCC:
if (!func) {
- func = __static_call_return;
+ func = __static_call_return; //FIXME use __static_call_nop()?
if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
func = x86_return_thunk;
}
@@ -139,33 +139,35 @@ static void __static_call_validate(u8 *insn, bool tail, bool tramp)
BUG();
}

-static inline enum insn_type __sc_insn(bool null, bool tail)
+static inline enum insn_type __sc_insn(bool nop, bool tail)
{
/*
* Encode the following table without branches:
*
- * tail null insn
+ * tail nop insn
* -----+-------+------
* 0 | 0 | CALL
* 0 | 1 | NOP
* 1 | 0 | JMP
* 1 | 1 | RET
*/
- return 2*tail + null;
+ return 2*tail + nop;
}

void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
{
+ bool nop = (func == __static_call_nop);
+
mutex_lock(&text_mutex);

if (tramp) {
__static_call_validate(tramp, true, true);
- __static_call_transform(tramp, __sc_insn(!func, true), func, false);
+ __static_call_transform(tramp, __sc_insn(nop, true), func, false);
}

if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
__static_call_validate(site, tail, false);
- __static_call_transform(site, __sc_insn(!func, tail), func, false);
+ __static_call_transform(site, __sc_insn(nop, tail), func, false);
}

mutex_unlock(&text_mutex);
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 50ad928afeb8..65ac01179993 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -66,24 +66,16 @@
*
* Notes on NULL function pointers:
*
- * Static_call()s support NULL functions, with many of the caveats that
- * regular function pointers have.
+ * A static_call() to a NULL function pointer is a NOP.
*
- * Clearly calling a NULL function pointer is 'BAD', so too for
- * static_call()s (although when HAVE_STATIC_CALL it might not be immediately
- * fatal). A NULL static_call can be the result of:
+ * A NULL static call can be the result of:
*
* DECLARE_STATIC_CALL_NULL(my_static_call, void (*)(int));
*
- * which is equivalent to declaring a NULL function pointer with just a
- * typename:
- *
- * void (*my_func_ptr)(int arg1) = NULL;
- *
- * or using static_call_update() with a NULL function. In both cases the
- * HAVE_STATIC_CALL implementation will patch the trampoline with a RET
- * instruction, instead of an immediate tail-call JMP. HAVE_STATIC_CALL_INLINE
- * architectures can patch the trampoline call to a NOP.
+ * or using static_call_update() with a NULL function pointer. In both cases
+ * the HAVE_STATIC_CALL implementation will patch the trampoline with a RET
+* instruction, instead of an immediate tail-call JMP. HAVE_STATIC_CALL_INLINE
+* architectures can patch the trampoline call to a NOP.
*
* In all cases, any argument evaluation is unconditional. Unlike a regular
* conditional function pointer call:
@@ -91,14 +83,7 @@
* if (my_func_ptr)
* my_func_ptr(arg1)
*
- * where the argument evaludation also depends on the pointer value.
- *
- * When calling a static_call that can be NULL, use:
- *
- * static_call_cond(name)(arg1);
- *
- * which will include the required value tests to avoid NULL-pointer
- * dereferences.
+ * where the argument evaluation also depends on the pointer value.
*
* To query which function is currently set to be called, use:
*
@@ -147,6 +132,7 @@ struct static_call_key {
#endif
};

+extern void __static_call_nop(void);
extern long __static_call_return0(void);

#define DECLARE_STATIC_CALL(name, func) \
@@ -166,8 +152,8 @@ extern long __static_call_return0(void);
__DEFINE_STATIC_CALL_TRAMP(name, func)

#define DEFINE_STATIC_CALL_NULL(name, type) \
- __DEFINE_STATIC_CALL(name, type, NULL); \
- __DEFINE_STATIC_CALL_NULL_TRAMP(name)
+ __DEFINE_STATIC_CALL(name, type, __STATIC_CALL_NOP(name)); \
+ __DEFINE_STATIC_CALL_NOP_TRAMP(name)

#define DEFINE_STATIC_CALL_RET0(name, type) \
__DEFINE_STATIC_CALL(name, type, __STATIC_CALL_RET0(name)); \
@@ -212,7 +198,7 @@ extern long __static_call_return0(void);
__static_call(name); \
})

-#define static_call_cond(name) (void)__static_call_cond(name)
+#define static_call_cond(name) (void)static_call(name)

/* Use static_call_ro() to call a read-only-exported static call. */
#define static_call_ro(name) __static_call_ro(name)
@@ -228,7 +214,9 @@ extern long __static_call_return0(void);
#define static_call_update(name, func) \
({ \
typeof(&STATIC_CALL_TRAMP(name)) __F = (func); \
- if (__F == (void *)__static_call_return0) \
+ if (!__F) \
+ __F = __STATIC_CALL_NOP(name); \
+ else if (__F == (void *)__static_call_return0) \
__F = __STATIC_CALL_RET0(name); \
__static_call_update(&STATIC_CALL_KEY(name), \
STATIC_CALL_TRAMP_ADDR(name), __F); \
@@ -237,7 +225,9 @@ extern long __static_call_return0(void);
#define static_call_query(name) \
({ \
void *__F = (READ_ONCE(STATIC_CALL_KEY(name).func)); \
- if (__F == __STATIC_CALL_RET0(name)) \
+ if (__F == __STATIC_CALL_NOP(name)) \
+ __F = NULL; \
+ else if (__F == __STATIC_CALL_RET0(name)) \
__F = __static_call_return0; \
__F; \
})
@@ -249,8 +239,8 @@ extern long __static_call_return0(void);
#define __DEFINE_STATIC_CALL_TRAMP(name, func) \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)

-#define __DEFINE_STATIC_CALL_NULL_TRAMP(name) \
- ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
+#define __DEFINE_STATIC_CALL_NOP_TRAMP(name) \
+ ARCH_DEFINE_STATIC_CALL_NOP_TRAMP(name)

#define __DEFINE_STATIC_CALL_RET0_TRAMP(name) \
ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)
@@ -262,7 +252,6 @@ extern long __static_call_return0(void);
EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))

#define __static_call(name) (&STATIC_CALL_TRAMP(name))
-#define __static_call_cond __static_call

#define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name)

@@ -276,7 +265,7 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool
#else /* !CONFIG_HAVE_STATIC_CALL */

#define __DEFINE_STATIC_CALL_TRAMP(name, func)
-#define __DEFINE_STATIC_CALL_NULL_TRAMP(name)
+#define __DEFINE_STATIC_CALL_NOP_TRAMP(name)
#define __DEFINE_STATIC_CALL_RET0_TRAMP(name)
#define __EXPORT_STATIC_CALL_TRAMP(name)
#define __EXPORT_STATIC_CALL_TRAMP_GPL(name)
@@ -284,27 +273,6 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool
#define __static_call(name) \
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))

-static inline void __static_call_nop(void) { }
-/*
- * This horrific hack takes care of two things:
- *
- * - it ensures the compiler will only load the function pointer ONCE,
- * which avoids a reload race.
- *
- * - it ensures the argument evaluation is unconditional, similar
- * to the HAVE_STATIC_CALL variant.
- *
- * Sadly current GCC/Clang (10 for both) do not optimize this properly
- * and will emit an indirect call for the NULL case :-(
- */
-#define __static_call_cond(name) \
-({ \
- void *func = READ_ONCE(STATIC_CALL_KEY(name).func); \
- if (!func) \
- func = &__static_call_nop; \
- (typeof(STATIC_CALL_TRAMP(name))*)func; \
-})
-
#define STATIC_CALL_TRAMP_ADDR(name) NULL

static inline
@@ -343,22 +311,28 @@ static inline void static_call_force_reinit(void) {}

#include <asm/static_call.h>

+#define __STATIC_CALL_NOP(name) STATIC_CALL_NOP_CFI(name)
#define __STATIC_CALL_RET0(name) STATIC_CALL_RET0_CFI(name)

#define __DECLARE_STATIC_CALL_CFI(name, func) \
+ extern typeof(func) STATIC_CALL_NOP_CFI(name); \
extern typeof(func) STATIC_CALL_RET0_CFI(name)

#define __DEFINE_STATIC_CALL_CFI(name) \
+ __ARCH_DEFINE_STATIC_CALL_NOP_CFI(name); \
__ARCH_DEFINE_STATIC_CALL_RET0_CFI(name)

#define __EXPORT_STATIC_CALL_CFI(name) \
+ EXPORT_SYMBOL(STATIC_CALL_NOP_CFI(name)); \
EXPORT_SYMBOL(STATIC_CALL_RET0_CFI(name))

#define __EXPORT_STATIC_CALL_CFI_GPL(name) \
+ EXPORT_SYMBOL_GPL(STATIC_CALL_NOP_CFI(name)); \
EXPORT_SYMBOL_GPL(STATIC_CALL_RET0_CFI(name))

#else /* ! CONFIG_CFI_WITHOUT_STATIC_CALL */

+#define __STATIC_CALL_NOP(name) (void *)__static_call_nop
#define __STATIC_CALL_RET0(name) (void *)__static_call_return0

#define __DECLARE_STATIC_CALL_CFI(name, func)
diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h
index 72732af51cba..2e2481c3f54e 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -22,6 +22,10 @@
#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name))

+#define STATIC_CALL_NOP_CFI_PREFIX __SCN__
+#define STATIC_CALL_NOP_CFI(name) __PASTE(STATIC_CALL_NOP_CFI_PREFIX, name)
+#define STATIC_CALL_NOP_CFI_STR(name) __stringify(STATIC_CALL_NOP_CFI(name))
+
#define STATIC_CALL_RET0_CFI_PREFIX __SCR__
#define STATIC_CALL_RET0_CFI(name) __PASTE(STATIC_CALL_RET0_CFI_PREFIX, name)
#define STATIC_CALL_RET0_CFI_STR(name) __stringify(STATIC_CALL_RET0_CFI(name))
diff --git a/kernel/static_call.c b/kernel/static_call.c
index 090ecf5d34b4..20bf34bc3e2a 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -9,6 +9,11 @@ long __static_call_return0(void)
}
EXPORT_SYMBOL_GPL(__static_call_return0);

+void __static_call_nop(void)
+{
+}
+EXPORT_SYMBOL_GPL(__static_call_nop);
+
#if defined(CONFIG_HAVE_STATIC_CALL) && !defined(CONFIG_HAVE_STATIC_CALL_INLINE)
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
{
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index 72732af51cba..2e2481c3f54e 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -22,6 +22,10 @@
#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name))

+#define STATIC_CALL_NOP_CFI_PREFIX __SCN__
+#define STATIC_CALL_NOP_CFI(name) __PASTE(STATIC_CALL_NOP_CFI_PREFIX, name)
+#define STATIC_CALL_NOP_CFI_STR(name) __stringify(STATIC_CALL_NOP_CFI(name))
+
#define STATIC_CALL_RET0_CFI_PREFIX __SCR__
#define STATIC_CALL_RET0_CFI(name) __PASTE(STATIC_CALL_RET0_CFI_PREFIX, name)
#define STATIC_CALL_RET0_CFI_STR(name) __stringify(STATIC_CALL_RET0_CFI(name))
--
2.39.2

2023-03-22 04:03:58

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2 11/11] static_call: Remove DEFINE_STATIC_CALL_RET0()

NULL and RET0 static calls are both slightly different ways of nopping a
static call. A not-insignificant amount of code and complexity is spent
maintaining them separately. It's also somewhat tricky for the user who
has to try to remember to use the correct one for the given function
type.

Simplify things all around by just combining them, such that NULL static
calls always return 0.

While it doesn't necessarily make sense for void-return functions to
return 0, it's pretty much harmless. The return value register is
already callee-clobbered, and an extra "xor %eax, %eax" shouldn't affect
performance (knock on wood).

This "do nothing return 0" default should work for the vast majority of
NULL cases. Otherwise it can be easily overridden with a user-specified
function which panics or returns 0xdeadbeef or does whatever one wants.

This simplifies the static call code and also tends to help simplify
users' code as well.

Signed-off-by: Josh Poimboeuf <[email protected]>
---
arch/arm64/include/asm/static_call.h | 4 --
arch/powerpc/include/asm/static_call.h | 1 -
arch/powerpc/kernel/irq.c | 2 +-
arch/powerpc/kernel/static_call.c | 7 +-
arch/x86/events/amd/core.c | 2 +-
arch/x86/events/core.c | 5 +-
arch/x86/include/asm/kvm-x86-ops.h | 3 +-
arch/x86/include/asm/static_call.h | 13 +---
arch/x86/kernel/alternative.c | 6 --
arch/x86/kernel/static_call.c | 89 ++-----------------------
arch/x86/kvm/x86.c | 4 +-
include/linux/static_call.h | 65 +++++-------------
include/linux/static_call_types.h | 4 --
kernel/events/core.c | 15 ++---
kernel/sched/core.c | 10 +--
kernel/static_call.c | 5 --
tools/include/linux/static_call_types.h | 4 --
17 files changed, 39 insertions(+), 200 deletions(-)

diff --git a/arch/arm64/include/asm/static_call.h b/arch/arm64/include/asm/static_call.h
index 02693b404afc..b3489cac7742 100644
--- a/arch/arm64/include/asm/static_call.h
+++ b/arch/arm64/include/asm/static_call.h
@@ -22,10 +22,6 @@
".type " name ", @function \n" \
".size " name ", . - " name " \n")

-#define __ARCH_DEFINE_STATIC_CALL_NOP_CFI(name) \
- GEN_CFI_SYM(STATIC_CALL_NOP_CFI(name)); \
- __ARCH_DEFINE_STATIC_CALL_CFI(STATIC_CALL_NOP_CFI_STR(name), "")
-
#define __ARCH_DEFINE_STATIC_CALL_RET0_CFI(name) \
GEN_CFI_SYM(STATIC_CALL_RET0_CFI(name)); \
__ARCH_DEFINE_STATIC_CALL_CFI(STATIC_CALL_RET0_CFI_STR(name), "mov x0, xzr")
diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h
index 744435127574..0b17fc551157 100644
--- a/arch/powerpc/include/asm/static_call.h
+++ b/arch/powerpc/include/asm/static_call.h
@@ -23,7 +23,6 @@
#define PPC_SCT_DATA 28 /* Offset of label 2 */

#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) __PPC_SCT(name, "b " #func)
-#define ARCH_DEFINE_STATIC_CALL_NOP_TRAMP(name) __PPC_SCT(name, "blr")
#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) __PPC_SCT(name, "b .+20")

#endif /* _ASM_POWERPC_STATIC_CALL_H */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index c9535f2760b5..320e1a41abd6 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -220,7 +220,7 @@ static __always_inline void call_do_softirq(const void *sp)
}
#endif

-DEFINE_STATIC_CALL_RET0(ppc_get_irq, *ppc_md.get_irq);
+DEFINE_STATIC_CALL_NULL(ppc_get_irq, *ppc_md.get_irq);

static void __do_irq(struct pt_regs *regs, unsigned long oldsp)
{
diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c
index 8bfe46654e01..db3116b2d8a8 100644
--- a/arch/powerpc/kernel/static_call.c
+++ b/arch/powerpc/kernel/static_call.c
@@ -8,7 +8,6 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
{
int err;
bool is_ret0 = (func == __static_call_return0);
- bool is_nop = (func == __static_call_nop);
unsigned long target = (unsigned long)(is_ret0 ? tramp + PPC_SCT_RET0 : func);
bool is_short = is_offset_in_branch_range((long)target - (long)tramp);

@@ -17,15 +16,13 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)

mutex_lock(&text_mutex);

- if (!is_nop && !is_short) {
+ if (!is_short) {
err = patch_instruction(tramp + PPC_SCT_DATA, ppc_inst(target));
if (err)
goto out;
}

- if (is_nop)
- err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR()));
- else if (is_short)
+ if (is_short)
err = patch_branch(tramp, target, 0);
else
err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP()));
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 8c45b198b62f..3c545595bfeb 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -330,7 +330,7 @@ static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
}
}

-DEFINE_STATIC_CALL_RET0(amd_pmu_branch_hw_config, *x86_pmu.hw_config);
+DEFINE_STATIC_CALL_NULL(amd_pmu_branch_hw_config, *x86_pmu.hw_config);

static int amd_core_hw_config(struct perf_event *event)
{
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index c94537501091..dfeaeee34acf 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -96,7 +96,7 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);
* This one is magic, it will get called even when PMU init fails (because
* there is no PMU), in which case it should simply return NULL.
*/
-DEFINE_STATIC_CALL_RET0(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
+DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);

u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
@@ -2125,9 +2125,6 @@ static int __init init_hw_perf_events(void)
if (!x86_pmu.read)
x86_pmu.read = _x86_pmu_read;

- if (!x86_pmu.guest_get_msrs)
- x86_pmu.guest_get_msrs = (void *)&__static_call_return0;
-
if (!x86_pmu.set_period)
x86_pmu.set_period = x86_perf_event_set_period;

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 2f0bfd910637..6e1259ed1014 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -10,8 +10,7 @@ BUILD_BUG_ON(1)
*
* KVM_X86_OP_OPTIONAL() can be used for those functions that can have
* a NULL definition. KVM_X86_OP_OPTIONAL_RET0() can be used likewise
- * to make a definition optional, but in this case the default will
- * be __static_call_return0.
+ * to make a definition optional.
*/
KVM_X86_OP(check_processor_compatibility)
KVM_X86_OP(hardware_enable)
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index afea6ceeed23..21ad48988f6e 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -29,8 +29,7 @@
* ud1 %esp, %ecx
*
* That trailing #UD provides both a speculation stop and serves as a unique
- * 3 byte signature identifying static call trampolines. Also see tramp_ud[]
- * and __static_call_fixup().
+ * 3 byte signature identifying static call trampolines. Also see tramp_ud[].
*/
#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
asm(".pushsection .static_call.text, \"ax\" \n" \
@@ -47,17 +46,7 @@
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")

-#ifdef CONFIG_RETHUNK
-#define ARCH_DEFINE_STATIC_CALL_NOP_TRAMP(name) \
- __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
-#else
-#define ARCH_DEFINE_STATIC_CALL_NOP_TRAMP(name) \
- __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
-#endif
-
#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)

-extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
-
#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f615e0cb6d93..4388dc9942ca 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -624,12 +624,6 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
if (op == JMP32_INSN_OPCODE)
dest = addr + insn.length + insn.immediate.value;

- if (__static_call_fixup(addr, op, dest) ||
- WARN_ONCE(dest != &__x86_return_thunk,
- "missing return thunk: %pS-%pS: %*ph",
- addr, dest, 5, addr))
- continue;
-
DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
addr, addr, insn.length,
addr + insn.length + insn.immediate.value);
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index 27c095c7fc96..d914167fbb4e 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -6,10 +6,8 @@

enum insn_type {
CALL = 0, /* site call */
- NOP = 1, /* site cond-call */
- JMP = 2, /* tramp / site tail-call */
- RET = 3, /* tramp / site cond-tail-call */
- JCC = 4,
+ JMP = 1, /* tramp / site tail-call */
+ JCC = 2,
};

/*
@@ -24,8 +22,6 @@ static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc };
*/
static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 };

-static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
-
static u8 __is_Jcc(u8 *insn) /* Jcc.d32 */
{
u8 ret = 0;
@@ -39,17 +35,6 @@ static u8 __is_Jcc(u8 *insn) /* Jcc.d32 */
return ret;
}

-extern void __static_call_return(void);
-
-asm (".global __static_call_return\n\t"
- ".type __static_call_return, @function\n\t"
- ASM_FUNC_ALIGN "\n\t"
- "__static_call_return:\n\t"
- ANNOTATE_NOENDBR
- ANNOTATE_RETPOLINE_SAFE
- "ret; int3\n\t"
- ".size __static_call_return, . - __static_call_return \n\t");
-
static void __ref __static_call_transform(void *insn, enum insn_type type,
void *func, bool modinit)
{
@@ -58,7 +43,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
const void *code;
u8 op, buf[6];

- if ((type == JMP || type == RET) && (op = __is_Jcc(insn)))
+ if (type == JMP && (op = __is_Jcc(insn)))
type = JCC;

switch (type) {
@@ -72,28 +57,11 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,

break;

- case NOP:
- code = x86_nops[5];
- break;
-
case JMP:
code = text_gen_insn(JMP32_INSN_OPCODE, insn, func);
break;

- case RET:
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
- code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk);
- else
- code = &retinsn;
- break;
-
case JCC:
- if (!func) {
- func = __static_call_return; //FIXME use __static_call_nop()?
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
- func = x86_return_thunk;
- }
-
buf[0] = 0x0f;
__text_gen_insn(buf+1, op, insn+1, func, 5);
code = buf;
@@ -122,12 +90,10 @@ static void __static_call_validate(u8 *insn, bool tail, bool tramp)

if (tail) {
if (opcode == JMP32_INSN_OPCODE ||
- opcode == RET_INSN_OPCODE ||
__is_Jcc(insn))
return;
} else {
if (opcode == CALL_INSN_OPCODE ||
- !memcmp(insn, x86_nops[5], 5) ||
!memcmp(insn, xor5rax, 5))
return;
}
@@ -139,65 +105,22 @@ static void __static_call_validate(u8 *insn, bool tail, bool tramp)
BUG();
}

-static inline enum insn_type __sc_insn(bool nop, bool tail)
-{
- /*
- * Encode the following table without branches:
- *
- * tail nop insn
- * -----+-------+------
- * 0 | 0 | CALL
- * 0 | 1 | NOP
- * 1 | 0 | JMP
- * 1 | 1 | RET
- */
- return 2*tail + nop;
-}
-
void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
{
- bool nop = (func == __static_call_nop);
+ enum insn_type insn = tail ? JMP : CALL;

mutex_lock(&text_mutex);

if (tramp) {
__static_call_validate(tramp, true, true);
- __static_call_transform(tramp, __sc_insn(nop, true), func, false);
+ __static_call_transform(tramp, insn, func, false);
}

if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
__static_call_validate(site, tail, false);
- __static_call_transform(site, __sc_insn(nop, tail), func, false);
+ __static_call_transform(site, insn, func, false);
}

mutex_unlock(&text_mutex);
}
EXPORT_SYMBOL_GPL(arch_static_call_transform);
-
-#ifdef CONFIG_RETHUNK
-/*
- * This is called by apply_returns() to fix up static call trampolines,
- * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
- * having a return trampoline.
- *
- * The problem is that static_call() is available before determining
- * X86_FEATURE_RETHUNK and, by implication, running alternatives.
- *
- * This means that __static_call_transform() above can have overwritten the
- * return trampoline and we now need to fix things up to be consistent.
- */
-bool __static_call_fixup(void *tramp, u8 op, void *dest)
-{
- if (memcmp(tramp+5, tramp_ud, 3)) {
- /* Not a trampoline site, not our problem. */
- return false;
- }
-
- mutex_lock(&text_mutex);
- if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk)
- __static_call_transform(tramp, RET, NULL, true);
- mutex_unlock(&text_mutex);
-
- return true;
-}
-#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fcf845fc5770..324676d738c0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9321,9 +9321,7 @@ static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
#define KVM_X86_OP(func) \
WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func)
#define KVM_X86_OP_OPTIONAL __KVM_X86_OP
-#define KVM_X86_OP_OPTIONAL_RET0(func) \
- static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \
- (void *)__static_call_return0);
+#define KVM_X86_OP_OPTIONAL_RET0(func) __KVM_X86_OP
#include <asm/kvm-x86-ops.h>
#undef __KVM_X86_OP

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index d5254107ccf4..625b3217480f 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -17,9 +17,6 @@
* DECLARE_STATIC_CALL(name, func);
* DEFINE_STATIC_CALL(name, func);
* DEFINE_STATIC_CALL_NULL(name, typename);
- * DEFINE_STATIC_CALL_RET0(name, typename);
- *
- * __static_call_return0;
*
* static_call(name)(args...);
* static_call_ro(name)(args...);
@@ -65,19 +62,26 @@
*
* Notes on NULL function pointers:
*
- * A static_call() to a NULL function pointer is a NOP.
+ * A static_call() to a NULL function pointer is equivalent to a call to a
+ * "do nothing return 0" function.
*
* A NULL static call can be the result of:
*
* DECLARE_STATIC_CALL_NULL(my_static_call, void (*)(int));
*
- * or using static_call_update() with a NULL function pointer. In both cases
- * the HAVE_STATIC_CALL implementation will patch the trampoline with a RET
-* instruction, instead of an immediate tail-call JMP. HAVE_STATIC_CALL_INLINE
-* architectures can patch the trampoline call to a NOP.
+ * or using static_call_update() with a NULL function pointer.
+ *
+ * The "return 0" feature is strictly UB per the C standard (since it casts a
+ * function pointer to a different signature) and relies on the architecture
+ * ABI to make things work. In particular it relies on the return value
+ * register being callee-clobbered for all function calls.
+ *
+ * In particular The x86_64 implementation of HAVE_STATIC_CALL_INLINE
+ * replaces the 5 byte CALL instruction at the callsite with a 5 byte clear
+ * of the RAX register, completely eliding any function call overhead.
*
- * In all cases, any argument evaluation is unconditional. Unlike a regular
- * conditional function pointer call:
+ * Any argument evaluation is unconditional. Unlike a regular conditional
+ * function pointer call:
*
* if (my_func_ptr)
* my_func_ptr(arg1)
@@ -88,26 +92,6 @@
*
* func = static_call_query(name);
*
- *
- * DEFINE_STATIC_CALL_RET0 / __static_call_return0:
- *
- * Just like how DEFINE_STATIC_CALL_NULL() optimizes the
- * conditional void function call, DEFINE_STATIC_CALL_RET0 /
- * __static_call_return0 optimize the do nothing return 0 function.
- *
- * This feature is strictly UB per the C standard (since it casts a function
- * pointer to a different signature) and relies on the architecture ABI to
- * make things work. In particular it relies on Caller Stack-cleanup and the
- * whole return register being clobbered for short return values. All normal
- * CDECL style ABIs conform.
- *
- * In particular the x86_64 implementation replaces the 5 byte CALL
- * instruction at the callsite with a 5 byte clear of the RAX register,
- * completely eliding any function call overhead.
- *
- * Notably argument setup is unconditional.
- *
- *
* EXPORT_STATIC_CALL() vs EXPORT_STATIC_CALL_RO():
*
* The difference is the read-only variant exports the trampoline but not the
@@ -131,7 +115,6 @@ struct static_call_key {
#endif
};

-extern void __static_call_nop(void);
extern long __static_call_return0(void);

#define DECLARE_STATIC_CALL(name, func) \
@@ -151,10 +134,6 @@ extern long __static_call_return0(void);
__DEFINE_STATIC_CALL_TRAMP(name, func)

#define DEFINE_STATIC_CALL_NULL(name, type) \
- __DEFINE_STATIC_CALL(name, type, __STATIC_CALL_NOP(name)); \
- __DEFINE_STATIC_CALL_NOP_TRAMP(name)
-
-#define DEFINE_STATIC_CALL_RET0(name, type) \
__DEFINE_STATIC_CALL(name, type, __STATIC_CALL_RET0(name)); \
__DEFINE_STATIC_CALL_RET0_TRAMP(name)

@@ -212,8 +191,6 @@ extern long __static_call_return0(void);
({ \
typeof(&STATIC_CALL_TRAMP(name)) __F = (func); \
if (!__F) \
- __F = __STATIC_CALL_NOP(name); \
- else if (__F == (void *)__static_call_return0) \
__F = __STATIC_CALL_RET0(name); \
__static_call_update(&STATIC_CALL_KEY(name), \
STATIC_CALL_TRAMP_ADDR(name), __F); \
@@ -222,10 +199,8 @@ extern long __static_call_return0(void);
#define static_call_query(name) \
({ \
void *__F = (READ_ONCE(STATIC_CALL_KEY(name).func)); \
- if (__F == __STATIC_CALL_NOP(name)) \
+ if (__F == __STATIC_CALL_RET0(name)) \
__F = NULL; \
- else if (__F == __STATIC_CALL_RET0(name)) \
- __F = __static_call_return0; \
__F; \
})

@@ -236,9 +211,6 @@ extern long __static_call_return0(void);
#define __DEFINE_STATIC_CALL_TRAMP(name, func) \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)

-#define __DEFINE_STATIC_CALL_NOP_TRAMP(name) \
- ARCH_DEFINE_STATIC_CALL_NOP_TRAMP(name)
-
#define __DEFINE_STATIC_CALL_RET0_TRAMP(name) \
ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)

@@ -262,7 +234,6 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool
#else /* !CONFIG_HAVE_STATIC_CALL */

#define __DEFINE_STATIC_CALL_TRAMP(name, func)
-#define __DEFINE_STATIC_CALL_NOP_TRAMP(name)
#define __DEFINE_STATIC_CALL_RET0_TRAMP(name)
#define __EXPORT_STATIC_CALL_TRAMP(name)
#define __EXPORT_STATIC_CALL_TRAMP_GPL(name)
@@ -308,28 +279,22 @@ static inline void static_call_force_reinit(void) {}

#include <asm/static_call.h>

-#define __STATIC_CALL_NOP(name) STATIC_CALL_NOP_CFI(name)
#define __STATIC_CALL_RET0(name) STATIC_CALL_RET0_CFI(name)

#define __DECLARE_STATIC_CALL_CFI(name, func) \
- extern typeof(func) STATIC_CALL_NOP_CFI(name); \
extern typeof(func) STATIC_CALL_RET0_CFI(name)

#define __DEFINE_STATIC_CALL_CFI(name) \
- __ARCH_DEFINE_STATIC_CALL_NOP_CFI(name); \
__ARCH_DEFINE_STATIC_CALL_RET0_CFI(name)

#define __EXPORT_STATIC_CALL_CFI(name) \
- EXPORT_SYMBOL(STATIC_CALL_NOP_CFI(name)); \
EXPORT_SYMBOL(STATIC_CALL_RET0_CFI(name))

#define __EXPORT_STATIC_CALL_CFI_GPL(name) \
- EXPORT_SYMBOL_GPL(STATIC_CALL_NOP_CFI(name)); \
EXPORT_SYMBOL_GPL(STATIC_CALL_RET0_CFI(name))

#else /* ! CONFIG_CFI_WITHOUT_STATIC_CALL */

-#define __STATIC_CALL_NOP(name) (void *)__static_call_nop
#define __STATIC_CALL_RET0(name) (void *)__static_call_return0

#define __DECLARE_STATIC_CALL_CFI(name, func)
diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h
index 2e2481c3f54e..72732af51cba 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -22,10 +22,6 @@
#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name))

-#define STATIC_CALL_NOP_CFI_PREFIX __SCN__
-#define STATIC_CALL_NOP_CFI(name) __PASTE(STATIC_CALL_NOP_CFI_PREFIX, name)
-#define STATIC_CALL_NOP_CFI_STR(name) __stringify(STATIC_CALL_NOP_CFI(name))
-
#define STATIC_CALL_RET0_CFI_PREFIX __SCR__
#define STATIC_CALL_RET0_CFI(name) __PASTE(STATIC_CALL_RET0_CFI_PREFIX, name)
#define STATIC_CALL_RET0_CFI_STR(name) __stringify(STATIC_CALL_RET0_CFI(name))
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f79fd8b87f75..52f1edb8128c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6757,9 +6757,9 @@ static void perf_pending_task(struct callback_head *head)
#ifdef CONFIG_GUEST_PERF_EVENTS
struct perf_guest_info_callbacks __rcu *perf_guest_cbs;

-DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state);
-DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
-DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
+DEFINE_STATIC_CALL_NULL(__perf_guest_state, *perf_guest_cbs->state);
+DEFINE_STATIC_CALL_NULL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
+DEFINE_STATIC_CALL_NULL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);

void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
{
@@ -6783,10 +6783,9 @@ void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
return;

rcu_assign_pointer(perf_guest_cbs, NULL);
- static_call_update(__perf_guest_state, (void *)&__static_call_return0);
- static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0);
- static_call_update(__perf_guest_handle_intel_pt_intr,
- (void *)&__static_call_return0);
+ static_call_update(__perf_guest_state, NULL);
+ static_call_update(__perf_guest_get_ip, NULL);
+ static_call_update(__perf_guest_handle_intel_pt_intr, NULL);
synchronize_rcu();
}
EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
@@ -13766,4 +13765,4 @@ struct cgroup_subsys perf_event_cgrp_subsys = {
};
#endif /* CONFIG_CGROUP_PERF */

-DEFINE_STATIC_CALL_RET0(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
+DEFINE_STATIC_CALL_NULL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a89de2a2d8f8..e69543a8b098 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6821,7 +6821,6 @@ EXPORT_SYMBOL(preempt_schedule);
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
#ifndef preempt_schedule_dynamic_enabled
#define preempt_schedule_dynamic_enabled preempt_schedule
-#define preempt_schedule_dynamic_disabled NULL
#endif
DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
EXPORT_STATIC_CALL_RO(preempt_schedule);
@@ -6894,7 +6893,6 @@ EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
#ifndef preempt_schedule_notrace_dynamic_enabled
#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace
-#define preempt_schedule_notrace_dynamic_disabled NULL
#endif
DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
EXPORT_STATIC_CALL_RO(preempt_schedule_notrace);
@@ -8491,13 +8489,11 @@ EXPORT_SYMBOL(__cond_resched);
#ifdef CONFIG_PREEMPT_DYNAMIC
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
#define cond_resched_dynamic_enabled __cond_resched
-#define cond_resched_dynamic_disabled ((void *)&__static_call_return0)
-DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched);
+DEFINE_STATIC_CALL_NULL(cond_resched, __cond_resched);
EXPORT_STATIC_CALL_RO(cond_resched);

#define might_resched_dynamic_enabled __cond_resched
-#define might_resched_dynamic_disabled ((void *)&__static_call_return0)
-DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched);
+DEFINE_STATIC_CALL_NULL(might_resched, __cond_resched);
EXPORT_STATIC_CALL_RO(might_resched);
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
@@ -8643,7 +8639,7 @@ int sched_dynamic_mode(const char *str)

#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
#define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled)
-#define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled)
+#define preempt_dynamic_disable(f) static_call_update(f, NULL)
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key)
#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key)
diff --git a/kernel/static_call.c b/kernel/static_call.c
index 20bf34bc3e2a..090ecf5d34b4 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -9,11 +9,6 @@ long __static_call_return0(void)
}
EXPORT_SYMBOL_GPL(__static_call_return0);

-void __static_call_nop(void)
-{
-}
-EXPORT_SYMBOL_GPL(__static_call_nop);
-
#if defined(CONFIG_HAVE_STATIC_CALL) && !defined(CONFIG_HAVE_STATIC_CALL_INLINE)
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
{
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index 2e2481c3f54e..72732af51cba 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -22,10 +22,6 @@
#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name))

-#define STATIC_CALL_NOP_CFI_PREFIX __SCN__
-#define STATIC_CALL_NOP_CFI(name) __PASTE(STATIC_CALL_NOP_CFI_PREFIX, name)
-#define STATIC_CALL_NOP_CFI_STR(name) __stringify(STATIC_CALL_NOP_CFI(name))
-
#define STATIC_CALL_RET0_CFI_PREFIX __SCR__
#define STATIC_CALL_RET0_CFI(name) __PASTE(STATIC_CALL_RET0_CFI_PREFIX, name)
#define STATIC_CALL_RET0_CFI_STR(name) __stringify(STATIC_CALL_RET0_CFI(name))
--
2.39.2

2023-03-22 04:59:01

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 01/11] static_call: Improve key type abstraction

On Tue, Mar 21, 2023 at 09:00:07PM -0700, Josh Poimboeuf wrote:
> diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c
> index 639397b5491c..41f6bda6773a 100644
> --- a/kernel/static_call_inline.c
> +++ b/kernel/static_call_inline.c
> @@ -112,15 +112,21 @@ static inline void static_call_sort_entries(struct static_call_site *start,
>
> static inline bool static_call_key_has_mods(struct static_call_key *key)
> {
> - return !(key->type & 1);
> + return !!(key->type & 1);
> }
>
> -static inline struct static_call_mod *static_call_key_next(struct static_call_key *key)
> +static inline struct static_call_mod *static_call_key_mods(struct static_call_key *key)
> {
> if (!static_call_key_has_mods(key))
> return NULL;
>
> - return key->mods;
> + return (struct static_call_mod *)(key->type & ~1);
> +}
> +

Oops, these parts belonged in the next patch.

--
Josh

2023-03-22 05:12:22

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2.1 01/11] static_call: Improve key type abstraction

Make the static_call_key union less fragile by abstracting all knowledge
about the type bit into helper functions.

Signed-off-by: Josh Poimboeuf <[email protected]>
---
include/linux/static_call_types.h | 4 +--
kernel/static_call_inline.c | 47 ++++++++++++++++++-------
tools/include/linux/static_call_types.h | 4 +--
3 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h
index 5a00b8b2cf9f..87c3598609e8 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -63,8 +63,8 @@ struct static_call_key {
union {
/* bit 0: 0 = mods, 1 = sites */
unsigned long type;
- struct static_call_mod *mods;
- struct static_call_site *sites;
+ struct static_call_mod *_mods;
+ struct static_call_site *_sites;
};
};

diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c
index 639397b5491c..1328370d3cf6 100644
--- a/kernel/static_call_inline.c
+++ b/kernel/static_call_inline.c
@@ -115,12 +115,17 @@ static inline bool static_call_key_has_mods(struct static_call_key *key)
return !(key->type & 1);
}

-static inline struct static_call_mod *static_call_key_next(struct static_call_key *key)
+static inline struct static_call_mod *static_call_key_mods(struct static_call_key *key)
{
if (!static_call_key_has_mods(key))
return NULL;

- return key->mods;
+ return key->_mods;
+}
+
+static inline void static_call_key_set_mods(struct static_call_key *key, struct static_call_mod *mods)
+{
+ key->_mods = mods;
}

static inline struct static_call_site *static_call_key_sites(struct static_call_key *key)
@@ -131,6 +136,12 @@ static inline struct static_call_site *static_call_key_sites(struct static_call_
return (struct static_call_site *)(key->type & ~1);
}

+static inline void static_call_key_set_sites(struct static_call_key *key, struct static_call_site *sites)
+{
+ key->_sites = sites;
+ key->type |= 1;
+}
+
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
{
struct static_call_site *site, *stop;
@@ -154,7 +165,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func)
goto done;

first = (struct static_call_mod){
- .next = static_call_key_next(key),
+ .next = static_call_key_mods(key),
.mod = NULL,
.sites = static_call_key_sites(key),
};
@@ -250,8 +261,7 @@ static int __static_call_init(struct module *mod,
* static_call_init() before memory allocation works.
*/
if (!mod) {
- key->sites = site;
- key->type |= 1;
+ static_call_key_set_sites(key, site);
goto do_transform;
}

@@ -266,10 +276,10 @@ static int __static_call_init(struct module *mod,
*/
if (static_call_key_sites(key)) {
site_mod->mod = NULL;
- site_mod->next = NULL;
site_mod->sites = static_call_key_sites(key);
+ site_mod->next = NULL;

- key->mods = site_mod;
+ static_call_key_set_mods(key, site_mod);

site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
if (!site_mod)
@@ -278,8 +288,9 @@ static int __static_call_init(struct module *mod,

site_mod->mod = mod;
site_mod->sites = site;
- site_mod->next = static_call_key_next(key);
- key->mods = site_mod;
+ site_mod->next = static_call_key_mods(key);
+
+ static_call_key_set_mods(key, site_mod);
}

do_transform:
@@ -406,7 +417,7 @@ static void static_call_del_module(struct module *mod)
struct static_call_site *stop = mod->static_call_sites +
mod->num_static_call_sites;
struct static_call_key *key, *prev_key = NULL;
- struct static_call_mod *site_mod, **prev;
+ struct static_call_mod *site_mod, *prev;
struct static_call_site *site;

for (site = start; site < stop; site++) {
@@ -416,15 +427,25 @@ static void static_call_del_module(struct module *mod)

prev_key = key;

- for (prev = &key->mods, site_mod = key->mods;
+ site_mod = static_call_key_mods(key);
+ if (!site_mod)
+ continue;
+
+ if (site_mod->mod == mod) {
+ static_call_key_set_mods(key, site_mod->next);
+ kfree(site_mod);
+ continue;
+ }
+
+ for (prev = site_mod, site_mod = site_mod->next;
site_mod && site_mod->mod != mod;
- prev = &site_mod->next, site_mod = site_mod->next)
+ prev = site_mod, site_mod = site_mod->next)
;

if (!site_mod)
continue;

- *prev = site_mod->next;
+ prev->next = site_mod->next;
kfree(site_mod);
}
}
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index 5a00b8b2cf9f..87c3598609e8 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -63,8 +63,8 @@ struct static_call_key {
union {
/* bit 0: 0 = mods, 1 = sites */
unsigned long type;
- struct static_call_mod *mods;
- struct static_call_site *sites;
+ struct static_call_mod *_mods;
+ struct static_call_site *_sites;
};
};

--
2.39.2

2023-03-22 05:14:14

by Josh Poimboeuf

[permalink] [raw]
Subject: [PATCH v2.1 02/11] static_call: Flip key type union bit

Flip the meaning of the key->type union field. This will make it easier
to converge some of the DECLARE_STATIC_CALL() macros.

Signed-off-by: Josh Poimboeuf <[email protected]>
---
include/linux/static_call.h | 3 ---
include/linux/static_call_types.h | 4 ++--
kernel/static_call_inline.c | 8 ++++----
tools/include/linux/static_call_types.h | 4 ++--
4 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 141e6b176a1b..f984b8f6d974 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -186,7 +186,6 @@ extern long __static_call_return0(void);
DECLARE_STATIC_CALL(name, _func); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = _func, \
- .type = 1, \
}; \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)

@@ -194,7 +193,6 @@ extern long __static_call_return0(void);
DECLARE_STATIC_CALL(name, _func); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = NULL, \
- .type = 1, \
}; \
ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)

@@ -202,7 +200,6 @@ extern long __static_call_return0(void);
DECLARE_STATIC_CALL(name, _func); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = __static_call_return0, \
- .type = 1, \
}; \
ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)

diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h
index 87c3598609e8..c4c4efb6f6fa 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -61,10 +61,10 @@ struct static_call_site {
struct static_call_key {
void *func;
union {
- /* bit 0: 0 = mods, 1 = sites */
+ /* bit 0: 0 = sites, 1 = mods */
unsigned long type;
- struct static_call_mod *_mods;
struct static_call_site *_sites;
+ struct static_call_mod *_mods;
};
};

diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c
index 1328370d3cf6..41f6bda6773a 100644
--- a/kernel/static_call_inline.c
+++ b/kernel/static_call_inline.c
@@ -112,7 +112,7 @@ static inline void static_call_sort_entries(struct static_call_site *start,

static inline bool static_call_key_has_mods(struct static_call_key *key)
{
- return !(key->type & 1);
+ return !!(key->type & 1);
}

static inline struct static_call_mod *static_call_key_mods(struct static_call_key *key)
@@ -120,12 +120,13 @@ static inline struct static_call_mod *static_call_key_mods(struct static_call_ke
if (!static_call_key_has_mods(key))
return NULL;

- return key->_mods;
+ return (struct static_call_mod *)(key->type & ~1);
}

static inline void static_call_key_set_mods(struct static_call_key *key, struct static_call_mod *mods)
{
key->_mods = mods;
+ key->type |= 1;
}

static inline struct static_call_site *static_call_key_sites(struct static_call_key *key)
@@ -133,13 +134,12 @@ static inline struct static_call_site *static_call_key_sites(struct static_call_
if (static_call_key_has_mods(key))
return NULL;

- return (struct static_call_site *)(key->type & ~1);
+ return key->_sites;
}

static inline void static_call_key_set_sites(struct static_call_key *key, struct static_call_site *sites)
{
key->_sites = sites;
- key->type |= 1;
}

void __static_call_update(struct static_call_key *key, void *tramp, void *func)
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index 87c3598609e8..c4c4efb6f6fa 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -61,10 +61,10 @@ struct static_call_site {
struct static_call_key {
void *func;
union {
- /* bit 0: 0 = mods, 1 = sites */
+ /* bit 0: 0 = sites, 1 = mods */
unsigned long type;
- struct static_call_mod *_mods;
struct static_call_site *_sites;
+ struct static_call_mod *_mods;
};
};

--
2.39.2

2023-03-22 12:28:30

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 08/11] arm64/static_call: Fix static call CFI violations

On Tue, Mar 21, 2023 at 09:00:14PM -0700, Josh Poimboeuf wrote:
> On arm64, with CONFIG_CFI_CLANG, it's trivial to trigger CFI violations
> by running "perf record -e sched:sched_switch -a":
>
> CFI failure at perf_misc_flags+0x34/0x70 (target: __static_call_return0+0x0/0xc; expected type: 0x837de525)
> WARNING: CPU: 3 PID: 32 at perf_misc_flags+0x34/0x70
> CPU: 3 PID: 32 Comm: ksoftirqd/3 Kdump: loaded Tainted: P 6.3.0-rc2 #8
> Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
> pstate: 904000c5 (NzcV daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> pc : perf_misc_flags+0x34/0x70
> lr : perf_event_output_forward+0x74/0xf0
> sp : ffff80000a98b970
> x29: ffff80000a98b970 x28: ffff00077bd34d00 x27: ffff8000097d2d00
> x26: fffffbffeff6a360 x25: ffff800009835a30 x24: ffff0000c2e8dca0
> x23: 0000000000000000 x22: 0000000000000080 x21: ffff00077bd31610
> x20: ffff0000c2e8dca0 x19: ffff00077bd31610 x18: ffff800008cd52f0
> x17: 00000000837de525 x16: 0000000072923c8f x15: 000000000000b67e
> x14: 000000000178797d x13: 0000000000000004 x12: 0000000070b5b3a8
> x11: 0000000000000015 x10: 0000000000000048 x9 : ffff80000829e2b4
> x8 : ffff80000829c6f0 x7 : 0000000000000000 x6 : 0000000000000000
> x5 : fffffbffeff6a340 x4 : ffff00077bd31610 x3 : ffff00077bd31610
> x2 : ffff800009833400 x1 : 0000000000000000 x0 : ffff00077bd31610
> Call trace:
> perf_misc_flags+0x34/0x70
> perf_event_output_forward+0x74/0xf0
> __perf_event_overflow+0x12c/0x1e8
> perf_swevent_event+0x98/0x1a0
> perf_tp_event+0x140/0x558
> perf_trace_run_bpf_submit+0x88/0xc8
> perf_trace_sched_switch+0x160/0x19c
> __schedule+0xabc/0x153c
> dynamic_cond_resched+0x48/0x68
> run_ksoftirqd+0x3c/0x138
> smpboot_thread_fn+0x26c/0x2f8
> kthread+0x108/0x1c4
> ret_from_fork+0x10/0x20
>
> The problem is that the __perf_guest_state() static call does an
> indirect branch to __static_call_return0(), which isn't CFI-compliant.

IIUC that'd be broken even with the old CFI mechanism, since commit:

87b940a0675e2526 ("perf/core: Use static_call to optimize perf_guest_info_callbacks")

If so, we probably want a Fixes tag?

> Fix that by generating custom CFI-compliant ret0 functions for each
> defined static key.
>
> Signed-off-by: Josh Poimboeuf <[email protected]>
> ---
> arch/Kconfig | 4 ++
> arch/arm64/include/asm/static_call.h | 29 +++++++++++
> include/linux/static_call.h | 64 +++++++++++++++++++++----
> include/linux/static_call_types.h | 4 ++
> kernel/Makefile | 2 +-
> kernel/static_call.c | 2 +-
> tools/include/linux/static_call_types.h | 4 ++
> 7 files changed, 97 insertions(+), 12 deletions(-)
> create mode 100644 arch/arm64/include/asm/static_call.h
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index e3511afbb7f2..8800fe80a0f9 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -1348,6 +1348,10 @@ config HAVE_STATIC_CALL_INLINE
> depends on HAVE_STATIC_CALL
> select OBJTOOL
>
> +config CFI_WITHOUT_STATIC_CALL
> + def_bool y
> + depends on CFI_CLANG && !HAVE_STATIC_CALL
> +
> config HAVE_PREEMPT_DYNAMIC
> bool
>
> diff --git a/arch/arm64/include/asm/static_call.h b/arch/arm64/include/asm/static_call.h
> new file mode 100644
> index 000000000000..b3489cac7742
> --- /dev/null
> +++ b/arch/arm64/include/asm/static_call.h
> @@ -0,0 +1,29 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_ARM64_STATIC_CALL_H
> +#define _ASM_ARM64_STATIC_CALL_H
> +
> +/*
> + * Make a dummy reference to a function pointer in C to force the compiler to
> + * emit a __kcfi_typeid_ symbol for asm to use.
> + */
> +#define GEN_CFI_SYM(func) \
> + static typeof(func) __used __section(".discard.cfi") *__UNIQUE_ID(cfi) = func
> +
> +
> +/* Generate a CFI-compliant static call NOP function */
> +#define __ARCH_DEFINE_STATIC_CALL_CFI(name, insns) \
> + asm(".align 4 \n" \
> + ".word __kcfi_typeid_" name " \n" \
> + ".globl " name " \n" \
> + name ": \n" \
> + "bti c \n" \
> + insns " \n" \
> + "ret \n" \
> + ".type " name ", @function \n" \
> + ".size " name ", . - " name " \n")
> +
> +#define __ARCH_DEFINE_STATIC_CALL_RET0_CFI(name) \
> + GEN_CFI_SYM(STATIC_CALL_RET0_CFI(name)); \
> + __ARCH_DEFINE_STATIC_CALL_CFI(STATIC_CALL_RET0_CFI_STR(name), "mov x0, xzr")

This looks correct, but given we're generating a regular functions it's
unfortunate we can't have the compiler generate the actual code with something
like:

#define __ARCH_DEFINE_STATIC_CALL_RET0_CFI(rettype, name, args...) \
rettype name(args) \
{ \
return (rettype)0; \
}

... but I guess passing the rettype and args around is painful.

Regardless, I gave this a spin atop v6.3-rc3 using LLVM 16.0.0 and CFI_CLANG,
and it does seem to work, so:

Tested-by: Mark Rutland <[email protected]>

Thanks,
Mark.

2023-03-22 14:29:26

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 08/11] arm64/static_call: Fix static call CFI violations

On Wed, Mar 22, 2023 at 12:22:07PM +0000, Mark Rutland wrote:
> On Tue, Mar 21, 2023 at 09:00:14PM -0700, Josh Poimboeuf wrote:

> > +++ b/arch/arm64/include/asm/static_call.h
> > @@ -0,0 +1,29 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#ifndef _ASM_ARM64_STATIC_CALL_H
> > +#define _ASM_ARM64_STATIC_CALL_H
> > +
> > +/*
> > + * Make a dummy reference to a function pointer in C to force the compiler to
> > + * emit a __kcfi_typeid_ symbol for asm to use.
> > + */
> > +#define GEN_CFI_SYM(func) \
> > + static typeof(func) __used __section(".discard.cfi") *__UNIQUE_ID(cfi) = func
> > +
> > +
> > +/* Generate a CFI-compliant static call NOP function */
> > +#define __ARCH_DEFINE_STATIC_CALL_CFI(name, insns) \
> > + asm(".align 4 \n" \

Should this ^ be:

__ALIGN_STR "\n" \

> > + ".word __kcfi_typeid_" name " \n" \
> > + ".globl " name " \n" \
> > + name ": \n" \
> > + "bti c \n" \
> > + insns " \n" \
> > + "ret \n" \
> > + ".type " name ", @function \n" \
> > + ".size " name ", . - " name " \n")

Also, Mark, I think you can remove __ALIGN and __ALIGN_STR from
arch/arm64/include/linkage.h they're very similar to the generic one.

2023-03-22 14:54:00

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 09/11] static_call: Make NULL static calls consistent

On Tue, Mar 21, 2023 at 09:00:15PM -0700, Josh Poimboeuf wrote:
> + * or using static_call_update() with a NULL function pointer. In both cases
> + * the HAVE_STATIC_CALL implementation will patch the trampoline with a RET
> +* instruction, instead of an immediate tail-call JMP. HAVE_STATIC_CALL_INLINE
> +* architectures can patch the trampoline call to a NOP.
> *
> * In all cases, any argument evaluation is unconditional. Unlike a regular
> * conditional function pointer call:

you wrecked the indent there ;-)

2023-03-22 15:09:54

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 09/11] static_call: Make NULL static calls consistent

On Tue, Mar 21, 2023 at 09:00:15PM -0700, Josh Poimboeuf wrote:
> +void __static_call_nop(void)
> +{
> +}
> +EXPORT_SYMBOL_GPL(__static_call_nop);

Kees, is this a ROP target? The above is basically ENDBR;RET, push
something on the stack, jump there and you're in business or so.

2023-03-22 15:13:58

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v2 11/11] static_call: Remove DEFINE_STATIC_CALL_RET0()



Le 22/03/2023 à 05:00, Josh Poimboeuf a écrit :
> NULL and RET0 static calls are both slightly different ways of nopping a
> static call. A not-insignificant amount of code and complexity is spent
> maintaining them separately. It's also somewhat tricky for the user who
> has to try to remember to use the correct one for the given function
> type.
>
> Simplify things all around by just combining them, such that NULL static
> calls always return 0.
>
> While it doesn't necessarily make sense for void-return functions to
> return 0, it's pretty much harmless. The return value register is
> already callee-clobbered, and an extra "xor %eax, %eax" shouldn't affect
> performance (knock on wood).

In the case of powerpc, which implements out-of-line static calls for
now, it is more than just an extra instruction. It requires a jump to
the couple instructions that clear ret reg and rets. For the 8xx it also
means cache miss as the cache lines are 16 bytes. So what was just one
cycle return instruction becomes a 3 cycles + 1 cache miss. It is not a
show-stopper for that change, but I think it was worth mentioning.

>
> This "do nothing return 0" default should work for the vast majority of
> NULL cases. Otherwise it can be easily overridden with a user-specified
> function which panics or returns 0xdeadbeef or does whatever one wants.
>
> This simplifies the static call code and also tends to help simplify
> users' code as well.

I'd have expected DEFINE_STATIC_CALL_RET0() to remain, to make it clear
that it returns 0. As you explained, it doesn't matter what NULL
returns, but returning 0 is vital four RET0 cases. So I would have
dropped DEFINE_STATIC_CALL_NULL() and retained DEFINE_STATIC_CALL_RET0().

>
> Signed-off-by: Josh Poimboeuf <[email protected]>
> ---
> arch/arm64/include/asm/static_call.h | 4 --
> arch/powerpc/include/asm/static_call.h | 1 -
> arch/powerpc/kernel/irq.c | 2 +-
> arch/powerpc/kernel/static_call.c | 7 +-
> arch/x86/events/amd/core.c | 2 +-
> arch/x86/events/core.c | 5 +-
> arch/x86/include/asm/kvm-x86-ops.h | 3 +-
> arch/x86/include/asm/static_call.h | 13 +---
> arch/x86/kernel/alternative.c | 6 --
> arch/x86/kernel/static_call.c | 89 ++-----------------------
> arch/x86/kvm/x86.c | 4 +-
> include/linux/static_call.h | 65 +++++-------------
> include/linux/static_call_types.h | 4 --
> kernel/events/core.c | 15 ++---
> kernel/sched/core.c | 10 +--
> kernel/static_call.c | 5 --
> tools/include/linux/static_call_types.h | 4 --
> 17 files changed, 39 insertions(+), 200 deletions(-)
>
> diff --git a/arch/arm64/include/asm/static_call.h b/arch/arm64/include/asm/static_call.h
> index 02693b404afc..b3489cac7742 100644
> --- a/arch/arm64/include/asm/static_call.h
> +++ b/arch/arm64/include/asm/static_call.h
> @@ -22,10 +22,6 @@
> ".type " name ", @function \n" \
> ".size " name ", . - " name " \n")
>
> -#define __ARCH_DEFINE_STATIC_CALL_NOP_CFI(name) \
> - GEN_CFI_SYM(STATIC_CALL_NOP_CFI(name)); \
> - __ARCH_DEFINE_STATIC_CALL_CFI(STATIC_CALL_NOP_CFI_STR(name), "")
> -
> #define __ARCH_DEFINE_STATIC_CALL_RET0_CFI(name) \
> GEN_CFI_SYM(STATIC_CALL_RET0_CFI(name)); \
> __ARCH_DEFINE_STATIC_CALL_CFI(STATIC_CALL_RET0_CFI_STR(name), "mov x0, xzr")
> diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h
> index 744435127574..0b17fc551157 100644
> --- a/arch/powerpc/include/asm/static_call.h
> +++ b/arch/powerpc/include/asm/static_call.h
> @@ -23,7 +23,6 @@
> #define PPC_SCT_DATA 28 /* Offset of label 2 */
>
> #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) __PPC_SCT(name, "b " #func)
> -#define ARCH_DEFINE_STATIC_CALL_NOP_TRAMP(name) __PPC_SCT(name, "blr")
> #define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) __PPC_SCT(name, "b .+20")
>
> #endif /* _ASM_POWERPC_STATIC_CALL_H */
> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index c9535f2760b5..320e1a41abd6 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -220,7 +220,7 @@ static __always_inline void call_do_softirq(const void *sp)
> }
> #endif
>
> -DEFINE_STATIC_CALL_RET0(ppc_get_irq, *ppc_md.get_irq);
> +DEFINE_STATIC_CALL_NULL(ppc_get_irq, *ppc_md.get_irq);
>
> static void __do_irq(struct pt_regs *regs, unsigned long oldsp)
> {
> diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c
> index 8bfe46654e01..db3116b2d8a8 100644
> --- a/arch/powerpc/kernel/static_call.c
> +++ b/arch/powerpc/kernel/static_call.c
> @@ -8,7 +8,6 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
> {
> int err;
> bool is_ret0 = (func == __static_call_return0);
> - bool is_nop = (func == __static_call_nop);
> unsigned long target = (unsigned long)(is_ret0 ? tramp + PPC_SCT_RET0 : func);
> bool is_short = is_offset_in_branch_range((long)target - (long)tramp);
>
> @@ -17,15 +16,13 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
>
> mutex_lock(&text_mutex);
>
> - if (!is_nop && !is_short) {
> + if (!is_short) {
> err = patch_instruction(tramp + PPC_SCT_DATA, ppc_inst(target));
> if (err)
> goto out;
> }
>
> - if (is_nop)
> - err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR()));
> - else if (is_short)
> + if (is_short)
> err = patch_branch(tramp, target, 0);
> else
> err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP()));
> diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
> index 8c45b198b62f..3c545595bfeb 100644
> --- a/arch/x86/events/amd/core.c
> +++ b/arch/x86/events/amd/core.c
> @@ -330,7 +330,7 @@ static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
> }
> }
>
> -DEFINE_STATIC_CALL_RET0(amd_pmu_branch_hw_config, *x86_pmu.hw_config);
> +DEFINE_STATIC_CALL_NULL(amd_pmu_branch_hw_config, *x86_pmu.hw_config);
>
> static int amd_core_hw_config(struct perf_event *event)
> {
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index c94537501091..dfeaeee34acf 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -96,7 +96,7 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);
> * This one is magic, it will get called even when PMU init fails (because
> * there is no PMU), in which case it should simply return NULL.
> */
> -DEFINE_STATIC_CALL_RET0(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
> +DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
>
> u64 __read_mostly hw_cache_event_ids
> [PERF_COUNT_HW_CACHE_MAX]
> @@ -2125,9 +2125,6 @@ static int __init init_hw_perf_events(void)
> if (!x86_pmu.read)
> x86_pmu.read = _x86_pmu_read;
>
> - if (!x86_pmu.guest_get_msrs)
> - x86_pmu.guest_get_msrs = (void *)&__static_call_return0;
> -
> if (!x86_pmu.set_period)
> x86_pmu.set_period = x86_perf_event_set_period;
>
> diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> index 2f0bfd910637..6e1259ed1014 100644
> --- a/arch/x86/include/asm/kvm-x86-ops.h
> +++ b/arch/x86/include/asm/kvm-x86-ops.h
> @@ -10,8 +10,7 @@ BUILD_BUG_ON(1)
> *
> * KVM_X86_OP_OPTIONAL() can be used for those functions that can have
> * a NULL definition. KVM_X86_OP_OPTIONAL_RET0() can be used likewise
> - * to make a definition optional, but in this case the default will
> - * be __static_call_return0.
> + * to make a definition optional.
> */
> KVM_X86_OP(check_processor_compatibility)
> KVM_X86_OP(hardware_enable)
> diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
> index afea6ceeed23..21ad48988f6e 100644
> --- a/arch/x86/include/asm/static_call.h
> +++ b/arch/x86/include/asm/static_call.h
> @@ -29,8 +29,7 @@
> * ud1 %esp, %ecx
> *
> * That trailing #UD provides both a speculation stop and serves as a unique
> - * 3 byte signature identifying static call trampolines. Also see tramp_ud[]
> - * and __static_call_fixup().
> + * 3 byte signature identifying static call trampolines. Also see tramp_ud[].
> */
> #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
> asm(".pushsection .static_call.text, \"ax\" \n" \
> @@ -47,17 +46,7 @@
> #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
> __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
>
> -#ifdef CONFIG_RETHUNK
> -#define ARCH_DEFINE_STATIC_CALL_NOP_TRAMP(name) \
> - __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
> -#else
> -#define ARCH_DEFINE_STATIC_CALL_NOP_TRAMP(name) \
> - __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
> -#endif
> -
> #define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \
> ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)
>
> -extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
> -
> #endif /* _ASM_STATIC_CALL_H */
> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index f615e0cb6d93..4388dc9942ca 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -624,12 +624,6 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
> if (op == JMP32_INSN_OPCODE)
> dest = addr + insn.length + insn.immediate.value;
>
> - if (__static_call_fixup(addr, op, dest) ||
> - WARN_ONCE(dest != &__x86_return_thunk,
> - "missing return thunk: %pS-%pS: %*ph",
> - addr, dest, 5, addr))
> - continue;
> -
> DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
> addr, addr, insn.length,
> addr + insn.length + insn.immediate.value);
> diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
> index 27c095c7fc96..d914167fbb4e 100644
> --- a/arch/x86/kernel/static_call.c
> +++ b/arch/x86/kernel/static_call.c
> @@ -6,10 +6,8 @@
>
> enum insn_type {
> CALL = 0, /* site call */
> - NOP = 1, /* site cond-call */
> - JMP = 2, /* tramp / site tail-call */
> - RET = 3, /* tramp / site cond-tail-call */
> - JCC = 4,
> + JMP = 1, /* tramp / site tail-call */
> + JCC = 2,
> };
>
> /*
> @@ -24,8 +22,6 @@ static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc };
> */
> static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 };
>
> -static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
> -
> static u8 __is_Jcc(u8 *insn) /* Jcc.d32 */
> {
> u8 ret = 0;
> @@ -39,17 +35,6 @@ static u8 __is_Jcc(u8 *insn) /* Jcc.d32 */
> return ret;
> }
>
> -extern void __static_call_return(void);
> -
> -asm (".global __static_call_return\n\t"
> - ".type __static_call_return, @function\n\t"
> - ASM_FUNC_ALIGN "\n\t"
> - "__static_call_return:\n\t"
> - ANNOTATE_NOENDBR
> - ANNOTATE_RETPOLINE_SAFE
> - "ret; int3\n\t"
> - ".size __static_call_return, . - __static_call_return \n\t");
> -
> static void __ref __static_call_transform(void *insn, enum insn_type type,
> void *func, bool modinit)
> {
> @@ -58,7 +43,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
> const void *code;
> u8 op, buf[6];
>
> - if ((type == JMP || type == RET) && (op = __is_Jcc(insn)))
> + if (type == JMP && (op = __is_Jcc(insn)))
> type = JCC;
>
> switch (type) {
> @@ -72,28 +57,11 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
>
> break;
>
> - case NOP:
> - code = x86_nops[5];
> - break;
> -
> case JMP:
> code = text_gen_insn(JMP32_INSN_OPCODE, insn, func);
> break;
>
> - case RET:
> - if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
> - code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk);
> - else
> - code = &retinsn;
> - break;
> -
> case JCC:
> - if (!func) {
> - func = __static_call_return; //FIXME use __static_call_nop()?
> - if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
> - func = x86_return_thunk;
> - }
> -
> buf[0] = 0x0f;
> __text_gen_insn(buf+1, op, insn+1, func, 5);
> code = buf;
> @@ -122,12 +90,10 @@ static void __static_call_validate(u8 *insn, bool tail, bool tramp)
>
> if (tail) {
> if (opcode == JMP32_INSN_OPCODE ||
> - opcode == RET_INSN_OPCODE ||
> __is_Jcc(insn))
> return;
> } else {
> if (opcode == CALL_INSN_OPCODE ||
> - !memcmp(insn, x86_nops[5], 5) ||
> !memcmp(insn, xor5rax, 5))
> return;
> }
> @@ -139,65 +105,22 @@ static void __static_call_validate(u8 *insn, bool tail, bool tramp)
> BUG();
> }
>
> -static inline enum insn_type __sc_insn(bool nop, bool tail)
> -{
> - /*
> - * Encode the following table without branches:
> - *
> - * tail nop insn
> - * -----+-------+------
> - * 0 | 0 | CALL
> - * 0 | 1 | NOP
> - * 1 | 0 | JMP
> - * 1 | 1 | RET
> - */
> - return 2*tail + nop;
> -}
> -
> void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
> {
> - bool nop = (func == __static_call_nop);
> + enum insn_type insn = tail ? JMP : CALL;
>
> mutex_lock(&text_mutex);
>
> if (tramp) {
> __static_call_validate(tramp, true, true);
> - __static_call_transform(tramp, __sc_insn(nop, true), func, false);
> + __static_call_transform(tramp, insn, func, false);
> }
>
> if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
> __static_call_validate(site, tail, false);
> - __static_call_transform(site, __sc_insn(nop, tail), func, false);
> + __static_call_transform(site, insn, func, false);
> }
>
> mutex_unlock(&text_mutex);
> }
> EXPORT_SYMBOL_GPL(arch_static_call_transform);
> -
> -#ifdef CONFIG_RETHUNK
> -/*
> - * This is called by apply_returns() to fix up static call trampolines,
> - * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
> - * having a return trampoline.
> - *
> - * The problem is that static_call() is available before determining
> - * X86_FEATURE_RETHUNK and, by implication, running alternatives.
> - *
> - * This means that __static_call_transform() above can have overwritten the
> - * return trampoline and we now need to fix things up to be consistent.
> - */
> -bool __static_call_fixup(void *tramp, u8 op, void *dest)
> -{
> - if (memcmp(tramp+5, tramp_ud, 3)) {
> - /* Not a trampoline site, not our problem. */
> - return false;
> - }
> -
> - mutex_lock(&text_mutex);
> - if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk)
> - __static_call_transform(tramp, RET, NULL, true);
> - mutex_unlock(&text_mutex);
> -
> - return true;
> -}
> -#endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index fcf845fc5770..324676d738c0 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9321,9 +9321,7 @@ static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
> #define KVM_X86_OP(func) \
> WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func)
> #define KVM_X86_OP_OPTIONAL __KVM_X86_OP
> -#define KVM_X86_OP_OPTIONAL_RET0(func) \
> - static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \
> - (void *)__static_call_return0);
> +#define KVM_X86_OP_OPTIONAL_RET0(func) __KVM_X86_OP
> #include <asm/kvm-x86-ops.h>
> #undef __KVM_X86_OP
>
> diff --git a/include/linux/static_call.h b/include/linux/static_call.h
> index d5254107ccf4..625b3217480f 100644
> --- a/include/linux/static_call.h
> +++ b/include/linux/static_call.h
> @@ -17,9 +17,6 @@
> * DECLARE_STATIC_CALL(name, func);
> * DEFINE_STATIC_CALL(name, func);
> * DEFINE_STATIC_CALL_NULL(name, typename);
> - * DEFINE_STATIC_CALL_RET0(name, typename);
> - *
> - * __static_call_return0;
> *
> * static_call(name)(args...);
> * static_call_ro(name)(args...);
> @@ -65,19 +62,26 @@
> *
> * Notes on NULL function pointers:
> *
> - * A static_call() to a NULL function pointer is a NOP.
> + * A static_call() to a NULL function pointer is equivalent to a call to a
> + * "do nothing return 0" function.
> *
> * A NULL static call can be the result of:
> *
> * DECLARE_STATIC_CALL_NULL(my_static_call, void (*)(int));
> *
> - * or using static_call_update() with a NULL function pointer. In both cases
> - * the HAVE_STATIC_CALL implementation will patch the trampoline with a RET
> -* instruction, instead of an immediate tail-call JMP. HAVE_STATIC_CALL_INLINE
> -* architectures can patch the trampoline call to a NOP.
> + * or using static_call_update() with a NULL function pointer.
> + *
> + * The "return 0" feature is strictly UB per the C standard (since it casts a
> + * function pointer to a different signature) and relies on the architecture
> + * ABI to make things work. In particular it relies on the return value
> + * register being callee-clobbered for all function calls.
> + *
> + * In particular The x86_64 implementation of HAVE_STATIC_CALL_INLINE
> + * replaces the 5 byte CALL instruction at the callsite with a 5 byte clear
> + * of the RAX register, completely eliding any function call overhead.
> *
> - * In all cases, any argument evaluation is unconditional. Unlike a regular
> - * conditional function pointer call:
> + * Any argument evaluation is unconditional. Unlike a regular conditional
> + * function pointer call:
> *
> * if (my_func_ptr)
> * my_func_ptr(arg1)
> @@ -88,26 +92,6 @@
> *
> * func = static_call_query(name);
> *
> - *
> - * DEFINE_STATIC_CALL_RET0 / __static_call_return0:
> - *
> - * Just like how DEFINE_STATIC_CALL_NULL() optimizes the
> - * conditional void function call, DEFINE_STATIC_CALL_RET0 /
> - * __static_call_return0 optimize the do nothing return 0 function.
> - *
> - * This feature is strictly UB per the C standard (since it casts a function
> - * pointer to a different signature) and relies on the architecture ABI to
> - * make things work. In particular it relies on Caller Stack-cleanup and the
> - * whole return register being clobbered for short return values. All normal
> - * CDECL style ABIs conform.
> - *
> - * In particular the x86_64 implementation replaces the 5 byte CALL
> - * instruction at the callsite with a 5 byte clear of the RAX register,
> - * completely eliding any function call overhead.
> - *
> - * Notably argument setup is unconditional.
> - *
> - *
> * EXPORT_STATIC_CALL() vs EXPORT_STATIC_CALL_RO():
> *
> * The difference is the read-only variant exports the trampoline but not the
> @@ -131,7 +115,6 @@ struct static_call_key {
> #endif
> };
>
> -extern void __static_call_nop(void);
> extern long __static_call_return0(void);
>
> #define DECLARE_STATIC_CALL(name, func) \
> @@ -151,10 +134,6 @@ extern long __static_call_return0(void);
> __DEFINE_STATIC_CALL_TRAMP(name, func)
>
> #define DEFINE_STATIC_CALL_NULL(name, type) \
> - __DEFINE_STATIC_CALL(name, type, __STATIC_CALL_NOP(name)); \
> - __DEFINE_STATIC_CALL_NOP_TRAMP(name)
> -
> -#define DEFINE_STATIC_CALL_RET0(name, type) \
> __DEFINE_STATIC_CALL(name, type, __STATIC_CALL_RET0(name)); \
> __DEFINE_STATIC_CALL_RET0_TRAMP(name)
>
> @@ -212,8 +191,6 @@ extern long __static_call_return0(void);
> ({ \
> typeof(&STATIC_CALL_TRAMP(name)) __F = (func); \
> if (!__F) \
> - __F = __STATIC_CALL_NOP(name); \
> - else if (__F == (void *)__static_call_return0) \
> __F = __STATIC_CALL_RET0(name); \
> __static_call_update(&STATIC_CALL_KEY(name), \
> STATIC_CALL_TRAMP_ADDR(name), __F); \
> @@ -222,10 +199,8 @@ extern long __static_call_return0(void);
> #define static_call_query(name) \
> ({ \
> void *__F = (READ_ONCE(STATIC_CALL_KEY(name).func)); \
> - if (__F == __STATIC_CALL_NOP(name)) \
> + if (__F == __STATIC_CALL_RET0(name)) \
> __F = NULL; \
> - else if (__F == __STATIC_CALL_RET0(name)) \
> - __F = __static_call_return0; \
> __F; \
> })
>
> @@ -236,9 +211,6 @@ extern long __static_call_return0(void);
> #define __DEFINE_STATIC_CALL_TRAMP(name, func) \
> ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)
>
> -#define __DEFINE_STATIC_CALL_NOP_TRAMP(name) \
> - ARCH_DEFINE_STATIC_CALL_NOP_TRAMP(name)
> -
> #define __DEFINE_STATIC_CALL_RET0_TRAMP(name) \
> ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)
>
> @@ -262,7 +234,6 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool
> #else /* !CONFIG_HAVE_STATIC_CALL */
>
> #define __DEFINE_STATIC_CALL_TRAMP(name, func)
> -#define __DEFINE_STATIC_CALL_NOP_TRAMP(name)
> #define __DEFINE_STATIC_CALL_RET0_TRAMP(name)
> #define __EXPORT_STATIC_CALL_TRAMP(name)
> #define __EXPORT_STATIC_CALL_TRAMP_GPL(name)
> @@ -308,28 +279,22 @@ static inline void static_call_force_reinit(void) {}
>
> #include <asm/static_call.h>
>
> -#define __STATIC_CALL_NOP(name) STATIC_CALL_NOP_CFI(name)
> #define __STATIC_CALL_RET0(name) STATIC_CALL_RET0_CFI(name)
>
> #define __DECLARE_STATIC_CALL_CFI(name, func) \
> - extern typeof(func) STATIC_CALL_NOP_CFI(name); \
> extern typeof(func) STATIC_CALL_RET0_CFI(name)
>
> #define __DEFINE_STATIC_CALL_CFI(name) \
> - __ARCH_DEFINE_STATIC_CALL_NOP_CFI(name); \
> __ARCH_DEFINE_STATIC_CALL_RET0_CFI(name)
>
> #define __EXPORT_STATIC_CALL_CFI(name) \
> - EXPORT_SYMBOL(STATIC_CALL_NOP_CFI(name)); \
> EXPORT_SYMBOL(STATIC_CALL_RET0_CFI(name))
>
> #define __EXPORT_STATIC_CALL_CFI_GPL(name) \
> - EXPORT_SYMBOL_GPL(STATIC_CALL_NOP_CFI(name)); \
> EXPORT_SYMBOL_GPL(STATIC_CALL_RET0_CFI(name))
>
> #else /* ! CONFIG_CFI_WITHOUT_STATIC_CALL */
>
> -#define __STATIC_CALL_NOP(name) (void *)__static_call_nop
> #define __STATIC_CALL_RET0(name) (void *)__static_call_return0
>
> #define __DECLARE_STATIC_CALL_CFI(name, func)
> diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h
> index 2e2481c3f54e..72732af51cba 100644
> --- a/include/linux/static_call_types.h
> +++ b/include/linux/static_call_types.h
> @@ -22,10 +22,6 @@
> #define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
> #define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name))
>
> -#define STATIC_CALL_NOP_CFI_PREFIX __SCN__
> -#define STATIC_CALL_NOP_CFI(name) __PASTE(STATIC_CALL_NOP_CFI_PREFIX, name)
> -#define STATIC_CALL_NOP_CFI_STR(name) __stringify(STATIC_CALL_NOP_CFI(name))
> -
> #define STATIC_CALL_RET0_CFI_PREFIX __SCR__
> #define STATIC_CALL_RET0_CFI(name) __PASTE(STATIC_CALL_RET0_CFI_PREFIX, name)
> #define STATIC_CALL_RET0_CFI_STR(name) __stringify(STATIC_CALL_RET0_CFI(name))
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index f79fd8b87f75..52f1edb8128c 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -6757,9 +6757,9 @@ static void perf_pending_task(struct callback_head *head)
> #ifdef CONFIG_GUEST_PERF_EVENTS
> struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
>
> -DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state);
> -DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
> -DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
> +DEFINE_STATIC_CALL_NULL(__perf_guest_state, *perf_guest_cbs->state);
> +DEFINE_STATIC_CALL_NULL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
> +DEFINE_STATIC_CALL_NULL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
>
> void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
> {
> @@ -6783,10 +6783,9 @@ void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
> return;
>
> rcu_assign_pointer(perf_guest_cbs, NULL);
> - static_call_update(__perf_guest_state, (void *)&__static_call_return0);
> - static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0);
> - static_call_update(__perf_guest_handle_intel_pt_intr,
> - (void *)&__static_call_return0);
> + static_call_update(__perf_guest_state, NULL);
> + static_call_update(__perf_guest_get_ip, NULL);
> + static_call_update(__perf_guest_handle_intel_pt_intr, NULL);
> synchronize_rcu();
> }
> EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
> @@ -13766,4 +13765,4 @@ struct cgroup_subsys perf_event_cgrp_subsys = {
> };
> #endif /* CONFIG_CGROUP_PERF */
>
> -DEFINE_STATIC_CALL_RET0(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
> +DEFINE_STATIC_CALL_NULL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index a89de2a2d8f8..e69543a8b098 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -6821,7 +6821,6 @@ EXPORT_SYMBOL(preempt_schedule);
> #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
> #ifndef preempt_schedule_dynamic_enabled
> #define preempt_schedule_dynamic_enabled preempt_schedule
> -#define preempt_schedule_dynamic_disabled NULL
> #endif
> DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
> EXPORT_STATIC_CALL_RO(preempt_schedule);
> @@ -6894,7 +6893,6 @@ EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
> #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
> #ifndef preempt_schedule_notrace_dynamic_enabled
> #define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace
> -#define preempt_schedule_notrace_dynamic_disabled NULL
> #endif
> DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
> EXPORT_STATIC_CALL_RO(preempt_schedule_notrace);
> @@ -8491,13 +8489,11 @@ EXPORT_SYMBOL(__cond_resched);
> #ifdef CONFIG_PREEMPT_DYNAMIC
> #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
> #define cond_resched_dynamic_enabled __cond_resched
> -#define cond_resched_dynamic_disabled ((void *)&__static_call_return0)
> -DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched);
> +DEFINE_STATIC_CALL_NULL(cond_resched, __cond_resched);
> EXPORT_STATIC_CALL_RO(cond_resched);
>
> #define might_resched_dynamic_enabled __cond_resched
> -#define might_resched_dynamic_disabled ((void *)&__static_call_return0)
> -DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched);
> +DEFINE_STATIC_CALL_NULL(might_resched, __cond_resched);
> EXPORT_STATIC_CALL_RO(might_resched);
> #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
> static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
> @@ -8643,7 +8639,7 @@ int sched_dynamic_mode(const char *str)
>
> #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
> #define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled)
> -#define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled)
> +#define preempt_dynamic_disable(f) static_call_update(f, NULL)
> #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
> #define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key)
> #define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key)
> diff --git a/kernel/static_call.c b/kernel/static_call.c
> index 20bf34bc3e2a..090ecf5d34b4 100644
> --- a/kernel/static_call.c
> +++ b/kernel/static_call.c
> @@ -9,11 +9,6 @@ long __static_call_return0(void)
> }
> EXPORT_SYMBOL_GPL(__static_call_return0);
>
> -void __static_call_nop(void)
> -{
> -}
> -EXPORT_SYMBOL_GPL(__static_call_nop);
> -
> #if defined(CONFIG_HAVE_STATIC_CALL) && !defined(CONFIG_HAVE_STATIC_CALL_INLINE)
> void __static_call_update(struct static_call_key *key, void *tramp, void *func)
> {
> diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
> index 2e2481c3f54e..72732af51cba 100644
> --- a/tools/include/linux/static_call_types.h
> +++ b/tools/include/linux/static_call_types.h
> @@ -22,10 +22,6 @@
> #define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
> #define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name))
>
> -#define STATIC_CALL_NOP_CFI_PREFIX __SCN__
> -#define STATIC_CALL_NOP_CFI(name) __PASTE(STATIC_CALL_NOP_CFI_PREFIX, name)
> -#define STATIC_CALL_NOP_CFI_STR(name) __stringify(STATIC_CALL_NOP_CFI(name))
> -
> #define STATIC_CALL_RET0_CFI_PREFIX __SCR__
> #define STATIC_CALL_RET0_CFI(name) __PASTE(STATIC_CALL_RET0_CFI_PREFIX, name)
> #define STATIC_CALL_RET0_CFI_STR(name) __stringify(STATIC_CALL_RET0_CFI(name))

2023-03-22 15:14:21

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH v2 09/11] static_call: Make NULL static calls consistent



Le 22/03/2023 à 05:00, Josh Poimboeuf a écrit :
> NULL static calls have inconsistent behavior. With HAVE_STATIC_CALL=y
> they're a NOP, but with HAVE_STATIC_CALL=n they go boom.
>
> That's guaranteed to cause subtle bugs. Make the behavior consistent by
> making NULL static calls a NOP with HAVE_STATIC_CALL=n.
>
> This is probably easier than doing the reverse (making NULL static calls
> panic with HAVE_STATIC_CALL=y). And it seems to match the current use
> cases better: there are several call sites which rely on the NOP
> behavior, whereas no call sites rely on the crashing behavior.
>
> Signed-off-by: Josh Poimboeuf <[email protected]>
> ---

> diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
> index b70670a98597..27c095c7fc96 100644
> --- a/arch/x86/kernel/static_call.c
> +++ b/arch/x86/kernel/static_call.c
> @@ -89,7 +89,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
>
> case JCC:
> if (!func) {
> - func = __static_call_return;
> + func = __static_call_return; //FIXME use __static_call_nop()?
> if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
> func = x86_return_thunk;
> }
> @@ -139,33 +139,35 @@ static void __static_call_validate(u8 *insn, bool tail, bool tramp)
> BUG();
> }
>
> -static inline enum insn_type __sc_insn(bool null, bool tail)
> +static inline enum insn_type __sc_insn(bool nop, bool tail)
> {
> /*
> * Encode the following table without branches:
> *
> - * tail null insn
> + * tail nop insn
> * -----+-------+------
> * 0 | 0 | CALL
> * 0 | 1 | NOP
> * 1 | 0 | JMP
> * 1 | 1 | RET
> */
> - return 2*tail + null;
> + return 2*tail + nop;
> }
>
> void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
> {
> + bool nop = (func == __static_call_nop);
> +

Would be clearer to call it 'is_nop', wouldn't it ?

> mutex_lock(&text_mutex);
>
> if (tramp) {
> __static_call_validate(tramp, true, true);
> - __static_call_transform(tramp, __sc_insn(!func, true), func, false);
> + __static_call_transform(tramp, __sc_insn(nop, true), func, false);
> }
>
> if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
> __static_call_validate(site, tail, false);
> - __static_call_transform(site, __sc_insn(!func, tail), func, false);
> + __static_call_transform(site, __sc_insn(nop, tail), func, false);
> }
>
> mutex_unlock(&text_mutex);

Christophe

2023-03-22 15:25:50

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 11/11] static_call: Remove DEFINE_STATIC_CALL_RET0()

On Tue, Mar 21, 2023 at 09:00:17PM -0700, Josh Poimboeuf wrote:
> NULL and RET0 static calls are both slightly different ways of nopping a
> static call. A not-insignificant amount of code and complexity is spent
> maintaining them separately. It's also somewhat tricky for the user who
> has to try to remember to use the correct one for the given function
> type.

Well, I have very little sympathy for that argument. The return type
should be a big frigging clue.

> Simplify things all around by just combining them, such that NULL static
> calls always return 0.
>
> While it doesn't necessarily make sense for void-return functions to
> return 0, it's pretty much harmless. The return value register is
> already callee-clobbered, and an extra "xor %eax, %eax" shouldn't affect
> performance (knock on wood).

Urgh.. OTOH I do like the lines removes.

> This "do nothing return 0" default should work for the vast majority of
> NULL cases. Otherwise it can be easily overridden with a user-specified
> function which panics or returns 0xdeadbeef or does whatever one wants.
>
> This simplifies the static call code and also tends to help simplify
> users' code as well.

Can we at least keep the DEFINE_STATIC_CALL_RET0() and
__static_call_return0 as aliases? It reads really daft to use _NULL or
__static_call_nop for non-void functions.

2023-03-22 18:09:37

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v2 08/11] arm64/static_call: Fix static call CFI violations

On Tue, Mar 21, 2023 at 9:00 PM Josh Poimboeuf <[email protected]> wrote:
>
> On arm64, with CONFIG_CFI_CLANG, it's trivial to trigger CFI violations
> by running "perf record -e sched:sched_switch -a":
>
> CFI failure at perf_misc_flags+0x34/0x70 (target: __static_call_return0+0x0/0xc; expected type: 0x837de525)
> WARNING: CPU: 3 PID: 32 at perf_misc_flags+0x34/0x70
> CPU: 3 PID: 32 Comm: ksoftirqd/3 Kdump: loaded Tainted: P 6.3.0-rc2 #8
> Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
> pstate: 904000c5 (NzcV daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> pc : perf_misc_flags+0x34/0x70
> lr : perf_event_output_forward+0x74/0xf0
> sp : ffff80000a98b970
> x29: ffff80000a98b970 x28: ffff00077bd34d00 x27: ffff8000097d2d00
> x26: fffffbffeff6a360 x25: ffff800009835a30 x24: ffff0000c2e8dca0
> x23: 0000000000000000 x22: 0000000000000080 x21: ffff00077bd31610
> x20: ffff0000c2e8dca0 x19: ffff00077bd31610 x18: ffff800008cd52f0
> x17: 00000000837de525 x16: 0000000072923c8f x15: 000000000000b67e
> x14: 000000000178797d x13: 0000000000000004 x12: 0000000070b5b3a8
> x11: 0000000000000015 x10: 0000000000000048 x9 : ffff80000829e2b4
> x8 : ffff80000829c6f0 x7 : 0000000000000000 x6 : 0000000000000000
> x5 : fffffbffeff6a340 x4 : ffff00077bd31610 x3 : ffff00077bd31610
> x2 : ffff800009833400 x1 : 0000000000000000 x0 : ffff00077bd31610
> Call trace:
> perf_misc_flags+0x34/0x70
> perf_event_output_forward+0x74/0xf0
> __perf_event_overflow+0x12c/0x1e8
> perf_swevent_event+0x98/0x1a0
> perf_tp_event+0x140/0x558
> perf_trace_run_bpf_submit+0x88/0xc8
> perf_trace_sched_switch+0x160/0x19c
> __schedule+0xabc/0x153c
> dynamic_cond_resched+0x48/0x68
> run_ksoftirqd+0x3c/0x138
> smpboot_thread_fn+0x26c/0x2f8
> kthread+0x108/0x1c4
> ret_from_fork+0x10/0x20
>
> The problem is that the __perf_guest_state() static call does an
> indirect branch to __static_call_return0(), which isn't CFI-compliant.
>
> Fix that by generating custom CFI-compliant ret0 functions for each
> defined static key.
>
> Signed-off-by: Josh Poimboeuf <[email protected]>
> ---
> arch/Kconfig | 4 ++
> arch/arm64/include/asm/static_call.h | 29 +++++++++++
> include/linux/static_call.h | 64 +++++++++++++++++++++----
> include/linux/static_call_types.h | 4 ++
> kernel/Makefile | 2 +-
> kernel/static_call.c | 2 +-
> tools/include/linux/static_call_types.h | 4 ++
> 7 files changed, 97 insertions(+), 12 deletions(-)
> create mode 100644 arch/arm64/include/asm/static_call.h
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index e3511afbb7f2..8800fe80a0f9 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -1348,6 +1348,10 @@ config HAVE_STATIC_CALL_INLINE
> depends on HAVE_STATIC_CALL
> select OBJTOOL
>
> +config CFI_WITHOUT_STATIC_CALL
> + def_bool y
> + depends on CFI_CLANG && !HAVE_STATIC_CALL
> +
> config HAVE_PREEMPT_DYNAMIC
> bool
>
> diff --git a/arch/arm64/include/asm/static_call.h b/arch/arm64/include/asm/static_call.h
> new file mode 100644
> index 000000000000..b3489cac7742
> --- /dev/null
> +++ b/arch/arm64/include/asm/static_call.h
> @@ -0,0 +1,29 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_ARM64_STATIC_CALL_H
> +#define _ASM_ARM64_STATIC_CALL_H
> +
> +/*
> + * Make a dummy reference to a function pointer in C to force the compiler to
> + * emit a __kcfi_typeid_ symbol for asm to use.
> + */
> +#define GEN_CFI_SYM(func) \
> + static typeof(func) __used __section(".discard.cfi") *__UNIQUE_ID(cfi) = func

Couldn't we just use __ADDRESSABLE instead of adding a separate macro?
The type of the variable shouldn't matter here, as long as we take the
address of func.

Sami

2023-03-22 18:15:02

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 08/11] arm64/static_call: Fix static call CFI violations

On Wed, Mar 22, 2023 at 12:22:07PM +0000, Mark Rutland wrote:
> > The problem is that the __perf_guest_state() static call does an
> > indirect branch to __static_call_return0(), which isn't CFI-compliant.
>
> IIUC that'd be broken even with the old CFI mechanism, since commit:
>
> 87b940a0675e2526 ("perf/core: Use static_call to optimize perf_guest_info_callbacks")
>
> If so, we probably want a Fixes tag?

Yeah, it should definitely get a Fixes tag. I wasn't quite sure if this
bug started with the above commit or with the CFI_CLANG switch to kcfi.
And then I forgot to investigate.

> > +/* Generate a CFI-compliant static call NOP function */
> > +#define __ARCH_DEFINE_STATIC_CALL_CFI(name, insns) \
> > + asm(".align 4 \n" \
> > + ".word __kcfi_typeid_" name " \n" \
> > + ".globl " name " \n" \
> > + name ": \n" \
> > + "bti c \n" \
> > + insns " \n" \
> > + "ret \n" \
> > + ".type " name ", @function \n" \
> > + ".size " name ", . - " name " \n")
> > +
> > +#define __ARCH_DEFINE_STATIC_CALL_RET0_CFI(name) \
> > + GEN_CFI_SYM(STATIC_CALL_RET0_CFI(name)); \
> > + __ARCH_DEFINE_STATIC_CALL_CFI(STATIC_CALL_RET0_CFI_STR(name), "mov x0, xzr")
>
> This looks correct, but given we're generating a regular functions it's
> unfortunate we can't have the compiler generate the actual code with something
> like:
>
> #define __ARCH_DEFINE_STATIC_CALL_RET0_CFI(rettype, name, args...) \
> rettype name(args) \
> { \
> return (rettype)0; \
> }
>
> ... but I guess passing the rettype and args around is painful.

Hm, I hadn't considered that. I'll play around with it.

> Regardless, I gave this a spin atop v6.3-rc3 using LLVM 16.0.0 and CFI_CLANG,
> and it does seem to work, so:
>
> Tested-by: Mark Rutland <[email protected]>

Thanks!

--
Josh

2023-03-22 18:27:19

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 08/11] arm64/static_call: Fix static call CFI violations

On Wed, Mar 22, 2023 at 03:19:33PM +0100, Peter Zijlstra wrote:
> On Wed, Mar 22, 2023 at 12:22:07PM +0000, Mark Rutland wrote:
> > On Tue, Mar 21, 2023 at 09:00:14PM -0700, Josh Poimboeuf wrote:
>
> > > +++ b/arch/arm64/include/asm/static_call.h
> > > @@ -0,0 +1,29 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +#ifndef _ASM_ARM64_STATIC_CALL_H
> > > +#define _ASM_ARM64_STATIC_CALL_H
> > > +
> > > +/*
> > > + * Make a dummy reference to a function pointer in C to force the compiler to
> > > + * emit a __kcfi_typeid_ symbol for asm to use.
> > > + */
> > > +#define GEN_CFI_SYM(func) \
> > > + static typeof(func) __used __section(".discard.cfi") *__UNIQUE_ID(cfi) = func
> > > +
> > > +
> > > +/* Generate a CFI-compliant static call NOP function */
> > > +#define __ARCH_DEFINE_STATIC_CALL_CFI(name, insns) \
> > > + asm(".align 4 \n" \
>
> Should this ^ be:
>
> __ALIGN_STR "\n" \

Ah right, you did mention that before. Will do.

--
Josh

2023-03-22 18:42:48

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 08/11] arm64/static_call: Fix static call CFI violations

On Wed, Mar 22, 2023 at 11:07:21AM -0700, Sami Tolvanen wrote:
> > +/*
> > + * Make a dummy reference to a function pointer in C to force the compiler to
> > + * emit a __kcfi_typeid_ symbol for asm to use.
> > + */
> > +#define GEN_CFI_SYM(func) \
> > + static typeof(func) __used __section(".discard.cfi") *__UNIQUE_ID(cfi) = func
>
> Couldn't we just use __ADDRESSABLE instead of adding a separate macro?
> The type of the variable shouldn't matter here, as long as we take the
> address of func.

Oh I did pretty much reimplement __ADDRESSABLE didn't I ;-)

--
Josh

2023-03-22 18:43:30

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 09/11] static_call: Make NULL static calls consistent

On Wed, Mar 22, 2023 at 03:59:18PM +0100, Peter Zijlstra wrote:
> On Tue, Mar 21, 2023 at 09:00:15PM -0700, Josh Poimboeuf wrote:
> > +void __static_call_nop(void)
> > +{
> > +}
> > +EXPORT_SYMBOL_GPL(__static_call_nop);
>
> Kees, is this a ROP target? The above is basically ENDBR;RET, push
> something on the stack, jump there and you're in business or so.

I could add __noendbr, except for the !HAVE_STATIC_CALL + !CFI_CLANG
case which actually needs to indirect call it.

--
Josh

2023-03-22 18:46:55

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 11/11] static_call: Remove DEFINE_STATIC_CALL_RET0()

On Wed, Mar 22, 2023 at 04:15:32PM +0100, Peter Zijlstra wrote:
> On Tue, Mar 21, 2023 at 09:00:17PM -0700, Josh Poimboeuf wrote:
> > NULL and RET0 static calls are both slightly different ways of nopping a
> > static call. A not-insignificant amount of code and complexity is spent
> > maintaining them separately. It's also somewhat tricky for the user who
> > has to try to remember to use the correct one for the given function
> > type.
>
> Well, I have very little sympathy for that argument. The return type
> should be a big frigging clue.
>
> > Simplify things all around by just combining them, such that NULL static
> > calls always return 0.
> >
> > While it doesn't necessarily make sense for void-return functions to
> > return 0, it's pretty much harmless. The return value register is
> > already callee-clobbered, and an extra "xor %eax, %eax" shouldn't affect
> > performance (knock on wood).
>
> Urgh.. OTOH I do like the lines removes.

So this patch is more of an RFC than the others. I'm not fully
convinced myself, but I very much liked the removed lines and simpler
interface.

> > This "do nothing return 0" default should work for the vast majority of
> > NULL cases. Otherwise it can be easily overridden with a user-specified
> > function which panics or returns 0xdeadbeef or does whatever one wants.
> >
> > This simplifies the static call code and also tends to help simplify
> > users' code as well.
>
> Can we at least keep the DEFINE_STATIC_CALL_RET0() and
> __static_call_return0 as aliases? It reads really daft to use _NULL or
> __static_call_nop for non-void functions.

I disagree, to me NULL means "nop the function (including any return
value)". Nice and easy.

Keeping those other ret0 defines around would negate some of the cool
deletions.

--
Josh

2023-03-22 18:55:31

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 11/11] static_call: Remove DEFINE_STATIC_CALL_RET0()

On Wed, Mar 22, 2023 at 03:04:31PM +0000, Christophe Leroy wrote:
> Le 22/03/2023 à 05:00, Josh Poimboeuf a écrit :
> > NULL and RET0 static calls are both slightly different ways of nopping a
> > static call. A not-insignificant amount of code and complexity is spent
> > maintaining them separately. It's also somewhat tricky for the user who
> > has to try to remember to use the correct one for the given function
> > type.
> >
> > Simplify things all around by just combining them, such that NULL static
> > calls always return 0.
> >
> > While it doesn't necessarily make sense for void-return functions to
> > return 0, it's pretty much harmless. The return value register is
> > already callee-clobbered, and an extra "xor %eax, %eax" shouldn't affect
> > performance (knock on wood).
>
> In the case of powerpc, which implements out-of-line static calls for
> now, it is more than just an extra instruction. It requires a jump to
> the couple instructions that clear ret reg and rets. For the 8xx it also
> means cache miss as the cache lines are 16 bytes. So what was just one
> cycle return instruction becomes a 3 cycles + 1 cache miss. It is not a
> show-stopper for that change, but I think it was worth mentioning.

Good point. I should mention that (if we keep the patch).

> > This "do nothing return 0" default should work for the vast majority of
> > NULL cases. Otherwise it can be easily overridden with a user-specified
> > function which panics or returns 0xdeadbeef or does whatever one wants.
> >
> > This simplifies the static call code and also tends to help simplify
> > users' code as well.
>
> I'd have expected DEFINE_STATIC_CALL_RET0() to remain, to make it clear
> that it returns 0. As you explained, it doesn't matter what NULL
> returns, but returning 0 is vital four RET0 cases. So I would have
> dropped DEFINE_STATIC_CALL_NULL() and retained DEFINE_STATIC_CALL_RET0().

The issue is static_call_update(). It takes NULL as an input, which
comes in handy for many static call users. So it makes sense to have
NULL mean "nop (and return 0 if needed)".

IMO it becomes more confusing to have two interfaces (NULL and RET0)
meaning the same thing.

--
Josh