2020-05-01 20:33:23

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH v4 14/18] static_call: Add static_cond_call()

Extend the static_call infrastructure to optimize the following common
pattern:

if (func_ptr)
func_ptr(args...)

For the trampoline (which is in effect a tail-call), we patch the
JMP.d32 into a RET, which then directly consumes the trampoline call.

For the in-line sites we replace the CALL with a NOP5.

NOTE: this is 'obviously' limited to functions with a 'void' return type.

NOTE: DEFINE_STATIC_COND_CALL() only requires a typename, as opposed
to a full function.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/include/asm/static_call.h | 10 ++++++++
arch/x86/kernel/static_call.c | 42 ++++++++++++++++++++++++++++---------
include/linux/static_call.h | 29 +++++++++++++++++++++++++
3 files changed, 71 insertions(+), 10 deletions(-)

--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -30,4 +30,14 @@
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
".popsection \n")

+#define ARCH_DEFINE_STATIC_CALL_RETTRAMP(name) \
+ asm(".pushsection .static_call.text, \"ax\" \n" \
+ ".align 4 \n" \
+ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
+ STATIC_CALL_TRAMP_STR(name) ": \n" \
+ " ret; nop; nop; nop; nop; \n" \
+ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
+ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+ ".popsection \n")
+
#endif /* _ASM_STATIC_CALL_H */
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -4,19 +4,41 @@
#include <linux/bug.h>
#include <asm/text-patching.h>

-static void __static_call_transform(void *insn, u8 opcode, void *func)
+enum insn_type {
+ call = 0, /* site call */
+ nop = 1, /* site cond-call */
+ jmp = 2, /* tramp / site tail-call */
+ ret = 3, /* tramp / site cond-tail-call */
+};
+
+static void __static_call_transform(void *insn, enum insn_type type, void *func)
{
- const void *code = text_gen_insn(opcode, insn, func);
+ int size = CALL_INSN_SIZE;
+ const void *code;

- if (WARN_ONCE(*(u8 *)insn != opcode,
- "unexpected static call insn opcode 0x%x at %pS\n",
- opcode, insn))
- return;
+ switch (type) {
+ case call:
+ code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
+ break;
+
+ case nop:
+ code = ideal_nops[NOP_ATOMIC5];
+ break;
+
+ case jmp:
+ code = text_gen_insn(JMP32_INSN_OPCODE, insn, func);
+ break;
+
+ case ret:
+ code = text_gen_insn(RET_INSN_OPCODE, insn, func);
+ size = RET_INSN_SIZE;
+ break;
+ }

- if (memcmp(insn, code, CALL_INSN_SIZE) == 0)
+ if (memcmp(insn, code, size) == 0)
return;

- text_poke_bp(insn, code, CALL_INSN_SIZE, NULL);
+ text_poke_bp(insn, code, size, NULL);
}

void arch_static_call_transform(void *site, void *tramp, void *func)
@@ -24,10 +46,10 @@ void arch_static_call_transform(void *si
mutex_lock(&text_mutex);

if (tramp)
- __static_call_transform(tramp, JMP32_INSN_OPCODE, func);
+ __static_call_transform(tramp, jmp + !func, func);

if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
- __static_call_transform(site, CALL_INSN_OPCODE, func);
+ __static_call_transform(site, !func, func);

mutex_unlock(&text_mutex);
}
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -16,7 +16,9 @@
*
* DECLARE_STATIC_CALL(name, func);
* DEFINE_STATIC_CALL(name, func);
+ * DEFINE_STATIC_COND_CALL(name, typename);
* static_call(name)(args...);
+ * static_cond_call(name)(args...)
* static_call_update(name, func);
*
* Usage example:
@@ -120,7 +122,16 @@ extern int static_call_text_reserved(voi
}; \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)

+#define DEFINE_STATIC_COND_CALL(name, _func) \
+ DECLARE_STATIC_CALL(name, _func); \
+ struct static_call_key STATIC_CALL_KEY(name) = { \
+ .func = NULL, \
+ .type = 1, \
+ }; \
+ ARCH_DEFINE_STATIC_CALL_RETTRAMP(name)
+
#define static_call(name) __static_call(name)
+#define static_cond_call(name) (void)__static_call(name)

#define EXPORT_STATIC_CALL(name) \
EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \
@@ -143,7 +154,15 @@ struct static_call_key {
}; \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)

+#define DEFINE_STATIC_COND_CALL(name, _func) \
+ DECLARE_STATIC_CALL(name, _func); \
+ struct static_call_key STATIC_CALL_KEY(name) = { \
+ .func = NULL, \
+ }; \
+ ARCH_DEFINE_STATIC_CALL_RETTRAMP(name)
+
#define static_call(name) __static_call(name)
+#define static_cond_call(name) (void)__static_call(name)

static inline
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
@@ -179,9 +198,19 @@ struct static_call_key {
.func = _func, \
}

+#define DEFINE_STATIC_COND_CALL(name, _func) \
+ DECLARE_STATIC_CALL(name, _func); \
+ struct static_call_key STATIC_CALL_KEY(name) = { \
+ .func = NULL, \
+ }
+
#define static_call(name) \
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))

+#define static_cond_call(name) \
+ if (STATIC_CALL_KEY(name).func) \
+ ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
+
static inline
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
{



2020-05-02 13:10:14

by Rasmus Villemoes

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On 01/05/2020 22.29, Peter Zijlstra wrote:
> Extend the static_call infrastructure to optimize the following common
> pattern:
>
> if (func_ptr)
> func_ptr(args...)
>
> +
> #define static_call(name) __static_call(name)
> +#define static_cond_call(name) (void)__static_call(name)
>
> +
> #define static_call(name) __static_call(name)
> +#define static_cond_call(name) (void)__static_call(name)
>

> +#define static_cond_call(name) \
> + if (STATIC_CALL_KEY(name).func) \
> + ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
> +

This addresses neither the READ_ONCE issue nor the fact that, AFAICT,
the semantics of

static_cond_call(foo)(i++)

will depend on CONFIG_HAVE_STATIC_CALL. Also, I'd have appreciated being
cc'ed on new revisions instead of stumbling on it by chance.

Rasmus

2020-05-03 13:01:28

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Sat, May 02, 2020 at 03:08:00PM +0200, Rasmus Villemoes wrote:
> On 01/05/2020 22.29, Peter Zijlstra wrote:
> > Extend the static_call infrastructure to optimize the following common
> > pattern:
> >
> > if (func_ptr)
> > func_ptr(args...)
> >
> > +
> > #define static_call(name) __static_call(name)
> > +#define static_cond_call(name) (void)__static_call(name)
> >
> > +
> > #define static_call(name) __static_call(name)
> > +#define static_cond_call(name) (void)__static_call(name)
> >
>
> > +#define static_cond_call(name) \
> > + if (STATIC_CALL_KEY(name).func) \
> > + ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
> > +
>
> This addresses neither the READ_ONCE issue nor the fact that,

> AFAICT,
> the semantics of
>
> static_cond_call(foo)(i++)
>
> will depend on CONFIG_HAVE_STATIC_CALL.

True.

So there is something utterly terrible we can do to address both:


void __static_call_nop(void)
{
}

#define __static_cond_call(name) \
({ \
void *func = READ_ONCE(STATIC_CALL_KEY(name).func); \
if (!func) \
func = &__static_call_nop; \
(typeof(STATIC_CALL_TRAMP(name))*)func; \
})

#define static_cond_call(name) (void)__static_cond_call(name)


This gets us into Undefined Behaviour territory, but it ought to work.

It adds the READ_ONCE(), and it cures the argument evaluation issue.

> Also, I'd have appreciated being
> cc'ed on new revisions instead of stumbling on it by chance.

Sorry, my bad, I forgot about that :-/. I rushed to repost, and simply
forgot a few things.

2020-05-04 07:23:04

by Rasmus Villemoes

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On 03/05/2020 14.58, Peter Zijlstra wrote:
> On Sat, May 02, 2020 at 03:08:00PM +0200, Rasmus Villemoes wrote:
>> On 01/05/2020 22.29, Peter Zijlstra wrote:

>>> +#define static_cond_call(name) \
>>> + if (STATIC_CALL_KEY(name).func) \
>>> + ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
>>> +
>>
>> This addresses neither the READ_ONCE issue nor the fact that,
>> AFAICT, the semantics of
>>
>> static_cond_call(foo)(i++)
>>
>> will depend on CONFIG_HAVE_STATIC_CALL.
>
> True.
>
> So there is something utterly terrible we can do to address both:
>
> void __static_call_nop(void)
> {
> }
>
> #define __static_cond_call(name) \
> ({ \
> void *func = READ_ONCE(STATIC_CALL_KEY(name).func); \
> if (!func) \
> func = &__static_call_nop; \
> (typeof(STATIC_CALL_TRAMP(name))*)func; \
> })
>
> #define static_cond_call(name) (void)__static_cond_call(name)
>
> This gets us into Undefined Behaviour territory, but it ought to work.
>
> It adds the READ_ONCE(), and it cures the argument evaluation issue.

Indeed, that is horrible. And it "fixes" the argument evaluation by
changing the !HAVE_STATIC_CALL case to match the HAVE_STATIC_CALL, not
the other way around, which means that it is not a direct equivalent to the

if (foo)
foo(a, b, c)

[which pattern of course has the READ_ONCE issue, but each individual
existing site with that may be ok for various reasons].

Is gcc smart enough to change the if (!func) to a jump across the
function call (but still evaluting side effects in args), or is
__static_call_nop actually emitted and called? If the latter, then one
might as well patch the write-side to do "WRITE_ONCE(foo, func ? :
__static_call_nop)" and elide the test from __static_cond_call() - in
fact, that just becomes a single READ_ONCE. [There's probably some
annoying issue with making sure static initialization of foo points at
__static_call_nop].

And that brings me to the other issue I raised - do you have a few
examples of call sites that could use this, so we can see disassembly
before/after? I'm still concerned that, even if there are no
side-effects in the arguments, you still force the compiler to
spill/shuffle registers for call/restore unconditionally, whereas with a
good'ol if(), all that work is guarded by the load+test.

Rasmus

2020-05-04 20:19:31

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Mon, May 04, 2020 at 09:20:03AM +0200, Rasmus Villemoes wrote:

> > So there is something utterly terrible we can do to address both:
> >
> > void __static_call_nop(void)
> > {
> > }
> >
> > #define __static_cond_call(name) \
> > ({ \
> > void *func = READ_ONCE(STATIC_CALL_KEY(name).func); \
> > if (!func) \
> > func = &__static_call_nop; \
> > (typeof(STATIC_CALL_TRAMP(name))*)func; \
> > })
> >
> > #define static_cond_call(name) (void)__static_cond_call(name)
> >
> > This gets us into Undefined Behaviour territory, but it ought to work.
> >
> > It adds the READ_ONCE(), and it cures the argument evaluation issue.
>
> Indeed, that is horrible. And it "fixes" the argument evaluation by
> changing the !HAVE_STATIC_CALL case to match the HAVE_STATIC_CALL, not
> the other way around,

Correct; making it the other way is far more 'interesting'. It would
basically mean combining the static_branch and static_call, but that
would also make it less optimal for simple forwarding cases.

> which means that it is not a direct equivalent to the
>
> if (foo)
> foo(a, b, c)
>
> [which pattern of course has the READ_ONCE issue, but each individual
> existing site with that may be ok for various reasons].
>
> Is gcc smart enough to change the if (!func) to a jump across the
> function call (but still evaluting side effects in args), or is
> __static_call_nop actually emitted and called?

I was hoping it would be clever, but I just tried (find below) and it is
not -- although there's always hoping a newer version / clang might be
smarter.

It does indeed emit the nop function :/

> If the latter, then one
> might as well patch the write-side to do "WRITE_ONCE(foo, func ? :
> __static_call_nop)" and elide the test from __static_cond_call() - in
> fact, that just becomes a single READ_ONCE. [There's probably some
> annoying issue with making sure static initialization of foo points at
> __static_call_nop].

But that would not give a more clever compiler the ability to do the
'right' thing here..

> And that brings me to the other issue I raised - do you have a few
> examples of call sites that could use this, so we can see disassembly
> before/after?

Patch 17 has a few -- which is why I wrote the support in the first
place. Obviously those will never ever hit the !HAVE_STATIC_BRANCH case.

> I'm still concerned that, even if there are no
> side-effects in the arguments, you still force the compiler to
> spill/shuffle registers for call/restore unconditionally, whereas with a
> good'ol if(), all that work is guarded by the load+test.

https://godbolt.org/z/SDRG2q

---
#include <stddef.h>


#define READ_ONCE(var) (*((volatile typeof(var) *)&(var)))
#define WRITE_ONCE(var, val) (*((volatile typeof(var) *)&(var)) = (val))

struct static_call_key {
void *func;
};

#define DECLARE_STATIC_CALL(name, func) \
extern struct static_call_key name; \
extern typeof(func) __SCT__##name;

#define DEFINE_STATIC_COND_CALL(name, _func) \
DECLARE_STATIC_CALL(name, _func) \
struct static_call_key name = { \
.func = NULL, \
}

static void __static_call_nop(void)
{
}

#define __static_cond_call(name) \
({ \
void *func = READ_ONCE(name.func); \
if (!func) \
func = &__static_call_nop; \
(typeof(__SCT__##name)*)func; \
})

#define static_cond_call(name) (void)__static_cond_call(name)

static void inline static_call_update(struct static_call_key *call, void *func)
{
WRITE_ONCE(call->func, func);
}

volatile int _x;

void bar(int x)
{
_x = x;
}

DEFINE_STATIC_COND_CALL(foo, bar);

void ponies(int x)
{
static_cond_call(foo)(x);
}

2020-05-05 07:54:18

by Rasmus Villemoes

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On 04/05/2020 22.14, Peter Zijlstra wrote:
> On Mon, May 04, 2020 at 09:20:03AM +0200, Rasmus Villemoes wrote:
>
>>
>> Indeed, that is horrible. And it "fixes" the argument evaluation by
>> changing the !HAVE_STATIC_CALL case to match the HAVE_STATIC_CALL, not
>> the other way around,
>
> Correct; making it the other way is far more 'interesting'. It would
> basically mean combining the static_branch and static_call, but that
> would also make it less optimal for simple forwarding cases.

Yes, I can see how implementing that would be quite hard.

>> which means that it is not a direct equivalent to the
>>
>> if (foo)
>> foo(a, b, c)
>>
>> [which pattern of course has the READ_ONCE issue, but each individual
>> existing site with that may be ok for various reasons].
>>
>> Is gcc smart enough to change the if (!func) to a jump across the
>> function call (but still evaluting side effects in args), or is
>> __static_call_nop actually emitted and called?
>
> I was hoping it would be clever, but I just tried (find below) and it is
> not -- although there's always hoping a newer version / clang might be
> smarter.
>
> It does indeed emit the nop function :/

Hm.

>> If the latter, then one
>> might as well patch the write-side to do "WRITE_ONCE(foo, func ? :
>> __static_call_nop)" and elide the test from __static_cond_call() - in
>> fact, that just becomes a single READ_ONCE. [There's probably some
>> annoying issue with making sure static initialization of foo points at
>> __static_call_nop].
>
> But that would not give a more clever compiler the ability to do the
> 'right' thing here..

True. However, I think it's unlikely we'll see a compiler being that
clever anytime soon.

Anyway, it's hard to judge what version of the !HAVE_STATIC_CALL
implementation is best when there's no !HAVE_STATIC_CALL use cases to
look at. I just want to ensure that whatever limitations or gotchas
(e.g., "arguments are evaluated regardless of NULLness of func", or
alternatively "arguments must not have side effects") it ends up with
get documented.

>
> #define __static_cond_call(name) \
> ({ \
> void *func = READ_ONCE(name.func); \
> if (!func) \
> func = &__static_call_nop; \
> (typeof(__SCT__##name)*)func; \
> })

I think you can just make it

#define __static_cond_call(name) \
( \
(typeof(__SCT__##name)*) ((void *)name.func ? : (void *)__static_call_nop) \
)

but that simplification is not enough to make gcc change its mind about
how to compile it :( But I'm guessing that various sanitizers or static
checkers might complain about the UB.

Rasmus

2020-05-05 09:41:30

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()


HJ, Nick,

Any chance any of you can see a way to make your respective compilers
not emit utter junk for this?

On Mon, May 04, 2020 at 10:14:45PM +0200, Peter Zijlstra wrote:

> https://godbolt.org/z/SDRG2q
>
> ---
> #include <stddef.h>
>
>
> #define READ_ONCE(var) (*((volatile typeof(var) *)&(var)))
> #define WRITE_ONCE(var, val) (*((volatile typeof(var) *)&(var)) = (val))
>
> struct static_call_key {
> void *func;
> };
>
> #define DECLARE_STATIC_CALL(name, func) \
> extern struct static_call_key name; \
> extern typeof(func) __SCT__##name;
>
> #define DEFINE_STATIC_COND_CALL(name, _func) \
> DECLARE_STATIC_CALL(name, _func) \
> struct static_call_key name = { \
> .func = NULL, \
> }
>
> static void __static_call_nop(void)
> {
> }
>
> #define __static_cond_call(name) \
> ({ \
> void *func = READ_ONCE(name.func); \
> if (!func) \
> func = &__static_call_nop; \
> (typeof(__SCT__##name)*)func; \
> })
>
> #define static_cond_call(name) (void)__static_cond_call(name)
>
> static void inline static_call_update(struct static_call_key *call, void *func)
> {
> WRITE_ONCE(call->func, func);
> }
>
> volatile int _x;
>
> void bar(int x)
> {
> _x = x;
> }
>
> DEFINE_STATIC_COND_CALL(foo, bar);
>
> void ponies(int x)
> {
> static_cond_call(foo)(x);
> }

2020-05-05 09:41:52

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Tue, May 05, 2020 at 09:50:26AM +0200, Rasmus Villemoes wrote:
> On 04/05/2020 22.14, Peter Zijlstra wrote:

> Anyway, it's hard to judge what version of the !HAVE_STATIC_CALL
> implementation is best when there's no !HAVE_STATIC_CALL use cases to
> look at. I just want to ensure that whatever limitations or gotchas
> (e.g., "arguments are evaluated regardless of NULLness of func", or
> alternatively "arguments must not have side effects") it ends up with
> get documented.

I can certainly try and write a better comment for it.

> > #define __static_cond_call(name) \
> > ({ \
> > void *func = READ_ONCE(name.func); \
> > if (!func) \
> > func = &__static_call_nop; \
> > (typeof(__SCT__##name)*)func; \
> > })
>
> I think you can just make it
>
> #define __static_cond_call(name) \
> ( \
> (typeof(__SCT__##name)*) ((void *)name.func ? : (void *)__static_call_nop) \
> )

I _think_ the compiler sees the two as exactly the same, but then, I've
not seen the inside of a modern compiler.

> but that simplification is not enough to make gcc change its mind about
> how to compile it :( But I'm guessing that various sanitizers or static
> checkers might complain about the UB.

Yeah, we'll see.

2020-05-05 18:18:27

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Tue, May 5, 2020 at 2:36 AM Peter Zijlstra <[email protected]> wrote:
>
>
> HJ, Nick,
>
> Any chance any of you can see a way to make your respective compilers
> not emit utter junk for this?
>
> On Mon, May 04, 2020 at 10:14:45PM +0200, Peter Zijlstra wrote:
>
> > https://godbolt.org/z/SDRG2q

Woah, a godbolt link! Now we're speaking the same language. What were
you expecting? Us to remove the conditional check that a volatile read
wasn't NULL? (Not using READ_ONCE, produces the direct tail call I
suspect you're looking for, but am unsure if that's what you meant,
and understand that's not a solution). I am simultaneously impressed
and disgusted by this btw, cool stuff.

i.e.
void *func = &name.func; \
rather than
void *func = READ_ONCE(name.func); \
(I'm surprised that `&name.func;` and `name.func;` also produce
different results).

> >
> > ---
> > #include <stddef.h>
> >
> >
> > #define READ_ONCE(var) (*((volatile typeof(var) *)&(var)))
> > #define WRITE_ONCE(var, val) (*((volatile typeof(var) *)&(var)) = (val))
> >
> > struct static_call_key {
> > void *func;
> > };
> >
> > #define DECLARE_STATIC_CALL(name, func) \
> > extern struct static_call_key name; \
> > extern typeof(func) __SCT__##name;
> >
> > #define DEFINE_STATIC_COND_CALL(name, _func) \
> > DECLARE_STATIC_CALL(name, _func) \
> > struct static_call_key name = { \
> > .func = NULL, \
> > }
> >
> > static void __static_call_nop(void)
> > {
> > }
> >
> > #define __static_cond_call(name) \
> > ({ \
> > void *func = READ_ONCE(name.func); \
> > if (!func) \
> > func = &__static_call_nop; \
> > (typeof(__SCT__##name)*)func; \
> > })
> >
> > #define static_cond_call(name) (void)__static_cond_call(name)
> >
> > static void inline static_call_update(struct static_call_key *call, void *func)
> > {
> > WRITE_ONCE(call->func, func);
> > }
> >
> > volatile int _x;
> >
> > void bar(int x)
> > {
> > _x = x;
> > }
> >
> > DEFINE_STATIC_COND_CALL(foo, bar);
> >
> > void ponies(int x)
> > {
> > static_cond_call(foo)(x);
> > }



--
Thanks,
~Nick Desaulniers

2020-05-05 18:30:12

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Tue, May 5, 2020 at 11:13 AM Nick Desaulniers
<[email protected]> wrote:
>
> On Tue, May 5, 2020 at 2:36 AM Peter Zijlstra <[email protected]> wrote:
> >
> >
> > HJ, Nick,
> >
> > Any chance any of you can see a way to make your respective compilers
> > not emit utter junk for this?
> >
> > On Mon, May 04, 2020 at 10:14:45PM +0200, Peter Zijlstra wrote:
> >
> > > https://godbolt.org/z/SDRG2q
>
> Woah, a godbolt link! Now we're speaking the same language. What were
> you expecting? Us to remove the conditional check that a volatile read
> wasn't NULL? (Not using READ_ONCE, produces the direct tail call I
> suspect you're looking for, but am unsure if that's what you meant,
> and understand that's not a solution). I am simultaneously impressed
> and disgusted by this btw, cool stuff.
>
> i.e.
> void *func = &name.func; \
> rather than
> void *func = READ_ONCE(name.func); \

Changing
void *func = READ_ONCE(name.func); \
to
void *func = &READ_ONCE(name.func); \
produces the tail call. Not sure if that's relevant/what you were
looking for/even correct (haven't thought to hard about the
implications of that change; juggling other stuff ATM)

> (I'm surprised that `&name.func;` and `name.func;` also produce
> different results).
>
> > >
> > > ---
> > > #include <stddef.h>
> > >
> > >
> > > #define READ_ONCE(var) (*((volatile typeof(var) *)&(var)))
> > > #define WRITE_ONCE(var, val) (*((volatile typeof(var) *)&(var)) = (val))
> > >
> > > struct static_call_key {
> > > void *func;
> > > };
> > >
> > > #define DECLARE_STATIC_CALL(name, func) \
> > > extern struct static_call_key name; \
> > > extern typeof(func) __SCT__##name;
> > >
> > > #define DEFINE_STATIC_COND_CALL(name, _func) \
> > > DECLARE_STATIC_CALL(name, _func) \
> > > struct static_call_key name = { \
> > > .func = NULL, \
> > > }
> > >
> > > static void __static_call_nop(void)
> > > {
> > > }
> > >
> > > #define __static_cond_call(name) \
> > > ({ \
> > > void *func = READ_ONCE(name.func); \
> > > if (!func) \
> > > func = &__static_call_nop; \
> > > (typeof(__SCT__##name)*)func; \
> > > })
> > >
> > > #define static_cond_call(name) (void)__static_cond_call(name)
> > >
> > > static void inline static_call_update(struct static_call_key *call, void *func)
> > > {
> > > WRITE_ONCE(call->func, func);
> > > }
> > >
> > > volatile int _x;
> > >
> > > void bar(int x)
> > > {
> > > _x = x;
> > > }
> > >
> > > DEFINE_STATIC_COND_CALL(foo, bar);
> > >
> > > void ponies(int x)
> > > {
> > > static_cond_call(foo)(x);
> > > }
>
>
>
> --
> Thanks,
> ~Nick Desaulniers



--
Thanks,
~Nick Desaulniers

2020-05-05 18:58:34

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Tue, May 5, 2020 at 11:28 AM Nick Desaulniers
<[email protected]> wrote:
>
> Changing
> void *func = READ_ONCE(name.func); \
> to
> void *func = &READ_ONCE(name.func); \

What? That makes no sense.

Yes,

void *func = foo;

and

void *func = &foo;

are the same thing, _if_ "foo" is an actual function, because then
"foo" degrades from a function to a pointer to a function as part of C
type semantics.

But that's not the case here. READ_ONCE(name.func) isn't a function -
it's a pointer to a function. So it doesn't degrade to anything at
all, and adding a '&' in front ot it completely changes the meaning of
the expression. So now the '&' changes it from "pointer to a function"
to "pointer to a pointer to a function", and the end result is not the
same thing any more.

Without the "&" it will call the function "bar" (which is the function
pointer that was assigned).

With the "&", it will not not call a function at all, it will do a
call to an address that is actually data of type "struct
static_call_key".

That's also why the NULL pointer check goes away: now the pointer is a
pointer to static data, which can never be NULL.

That said, I found it interesting that the volatile read also goes
away. That struck me as strange. But then I thought about it somem
more, and realized that the '&' basically just peels off the '*', so
now there isn't any actual volatile access any more, which is why the
read went away too.

Anyway, adding that '&' completely changes the meaning of the test.

Your initial reaction that "you can't compile away the read and the
test of NULL" was correct, I think.

Linus

2020-05-05 19:03:16

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

----- On May 5, 2020, at 2:48 PM, Linus Torvalds [email protected] wrote:
[...]
>
> Your initial reaction that "you can't compile away the read and the
> test of NULL" was correct, I think.

I suspect this pattern of "if (func != NULL) func(...)" could be semantically
changed to just invoking an empty function which effectively does nothing.
This would remove the need to do a pointer check in the first place. But maybe
I'm missing something subtle about why it has not been done in this context.

Thanks,

Mathieu

--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

2020-05-05 20:01:20

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Tue, May 5, 2020 at 12:00 PM Mathieu Desnoyers
<[email protected]> wrote:
>
> ----- On May 5, 2020, at 2:48 PM, Linus Torvalds [email protected] wrote:
> [...]
> >
> > Your initial reaction that "you can't compile away the read and the
> > test of NULL" was correct, I think.
>
> I suspect this pattern of "if (func != NULL) func(...)" could be semantically
> changed to just invoking an empty function which effectively does nothing.
> This would remove the need to do a pointer check in the first place. But maybe
> I'm missing something subtle about why it has not been done in this context.

Good idea, this eliminates the check: https://godbolt.org/z/Xugo9w
but you still have an indirect tail call (I think a direct tail call
is the desired solution?)
--
Thanks,
~Nick Desaulniers

2020-05-05 20:30:20

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

----- On May 5, 2020, at 3:57 PM, ndesaulniers [email protected] wrote:

> On Tue, May 5, 2020 at 12:00 PM Mathieu Desnoyers
> <[email protected]> wrote:
>>
>> ----- On May 5, 2020, at 2:48 PM, Linus Torvalds [email protected]
>> wrote:
>> [...]
>> >
>> > Your initial reaction that "you can't compile away the read and the
>> > test of NULL" was correct, I think.
>>
>> I suspect this pattern of "if (func != NULL) func(...)" could be semantically
>> changed to just invoking an empty function which effectively does nothing.
>> This would remove the need to do a pointer check in the first place. But maybe
>> I'm missing something subtle about why it has not been done in this context.
>
> Good idea, this eliminates the check: https://godbolt.org/z/Xugo9w
> but you still have an indirect tail call (I think a direct tail call
> is the desired solution?)

Actually, if the goal is to do code patching of the call, I wonder
what makes it OK to "guess" all the call patterns generated by the compiler ?
AFAIU this is not an ABI in any way. For instance, a new compiler version could
choose to add some no-op instructions within this pattern just because it feels
like it.

For static jumps, we worked with the compiler people to add "asm goto ()" so
we could express a jump in assembly which would branch outside of the asm.
Emitting the jump in assembly allows us to control the exact code pattern,
which can then be patched, and the asm goto operands allow the compiler to
be in control of all the allowed branch targets.

I'm again possibly missing something, but it looks like this proposal of static_call()
(especially the static_cond_call part) is trying to just assume the common call
patterns generated by the compilers, and patch those. What is the expected behavior
if a compiler ends up generating unknown code patterns in future versions ?

I would think a more robust approach would be to, again, work with the compiler people
and introduce something like:

asm call ("asm goes here" : : : funcA, funcB, funcC )

which would allow patching the call emitted _in assembly_ between the various
available targets. Bonus points if the compiler can let the asm know whether
it's a standard call or tail-call.

Then once we have that, we can start doing fun stuff like adding a conditional
within the assembly, but I don't see why the conditional should be the same
variable as the actual function pointer: each can be changed independently as
long as the function pointer always points to a "valid" function (not NULL).

Thanks,

Mathieu

--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

2020-05-06 13:58:30

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Tue, May 05, 2020 at 04:27:44PM -0400, Mathieu Desnoyers wrote:
> Actually, if the goal is to do code patching of the call, I wonder
> what makes it OK to "guess" all the call patterns generated by the compiler ?

We're not guessing, have have objtool read the compiler output and
record the location for us. The compiler can generate whatever it likes.

2020-05-06 14:05:10

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

----- On May 6, 2020, at 9:55 AM, Peter Zijlstra [email protected] wrote:

> On Tue, May 05, 2020 at 04:27:44PM -0400, Mathieu Desnoyers wrote:
>> Actually, if the goal is to do code patching of the call, I wonder
>> what makes it OK to "guess" all the call patterns generated by the compiler ?
>
> We're not guessing, have have objtool read the compiler output and
> record the location for us. The compiler can generate whatever it likes.

So is the plan to adapt objtool if future compilers change the generated
instruction patterns ?

Thanks,

Mathieu


--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

2020-05-06 16:05:35

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Wed, May 06, 2020 at 03:51:28PM +0200, Peter Zijlstra wrote:
> On Tue, May 05, 2020 at 11:13:53AM -0700, Nick Desaulniers wrote:
> > On Tue, May 5, 2020 at 2:36 AM Peter Zijlstra <[email protected]> wrote:
> > >
> > >
> > > HJ, Nick,
> > >
> > > Any chance any of you can see a way to make your respective compilers
> > > not emit utter junk for this?
> > >
> > > On Mon, May 04, 2020 at 10:14:45PM +0200, Peter Zijlstra wrote:
> > >
> > > > https://godbolt.org/z/SDRG2q
> >
> > Woah, a godbolt link! Now we're speaking the same language. What were
> > you expecting?
>
> Given the output for x86-64 clang (trunk)
>
> bar: # @bar
> movl %edi, .L_x$local(%rip)
> retq
> ponies: # @ponies
> movq .Lfoo$local(%rip), %rax
> testq %rax, %rax
> movl $__static_call_nop, %ecx
> cmovneq %rax, %rcx
> jmpq *%rcx # TAILCALL
> __static_call_nop: # @__static_call_nop
> retq
> _x:
> .L_x$local:
> .long 0 # 0x0
>
> foo:
> .Lfoo$local:
> .zero 8
>
>
> I was hoping for:
>
> bar: # @bar
> movl %edi, .L_x$local(%rip)
> retq
> ponies: # @ponies
> movq .Lfoo$local(%rip), %rax
> testq %rax, %rax
> jz 1f
> jmpq *%rcx # TAILCALL

Obviously this then wants to be *%rax.

> 1:
> retq
>
> That avoids the indirect call (possible retpoline) and does an immediate
> return.
>
> So it does 2 things different:
>
> - it realizes the NULL case is a constant and uses an
> immediate call and avoids the indirect call/jmp.
>
> - it realizes __static_call_nop() is a no-op and avoids the call
> entirely and does an immediate return.

IOW, have it inline __static_call_nop().

2020-05-06 16:22:27

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Wed, May 06, 2020 at 10:01:12AM -0400, Mathieu Desnoyers wrote:
> ----- On May 6, 2020, at 9:55 AM, Peter Zijlstra [email protected] wrote:
>
> > On Tue, May 05, 2020 at 04:27:44PM -0400, Mathieu Desnoyers wrote:
> >> Actually, if the goal is to do code patching of the call, I wonder
> >> what makes it OK to "guess" all the call patterns generated by the compiler ?
> >
> > We're not guessing, have have objtool read the compiler output and
> > record the location for us. The compiler can generate whatever it likes.
>
> So is the plan to adapt objtool if future compilers change the generated
> instruction patterns ?

If needed, sure. I don't really see what a compiler can do differently
though.

objtool looks for:

JMP/CALL __SCT__##foo

and writes a .static_call_sites table entry for it. Anything else we
can't rewrite anyway and will have to keep relying on the trampoline
working (which we also update, so that's fine).

2020-05-06 17:29:38

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Fri, May 01, 2020 at 10:29:03PM +0200, Peter Zijlstra wrote:
> +++ b/arch/x86/include/asm/static_call.h
> @@ -30,4 +30,14 @@
> ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
> ".popsection \n")
>
> +#define ARCH_DEFINE_STATIC_CALL_RETTRAMP(name) \
> + asm(".pushsection .static_call.text, \"ax\" \n" \
> + ".align 4 \n" \
> + ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
> + STATIC_CALL_TRAMP_STR(name) ": \n" \
> + " ret; nop; nop; nop; nop; \n" \
> + ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
> + ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
> + ".popsection \n")
> +

The boilerplate in these two trampoline macros is identical except for
the actual instructions, maybe there could be a shared
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) macro which does most of
the dirty work.

> #endif /* _ASM_STATIC_CALL_H */
> --- a/arch/x86/kernel/static_call.c
> +++ b/arch/x86/kernel/static_call.c
> @@ -4,19 +4,41 @@
> #include <linux/bug.h>
> #include <asm/text-patching.h>
>
> -static void __static_call_transform(void *insn, u8 opcode, void *func)
> +enum insn_type {
> + call = 0, /* site call */
> + nop = 1, /* site cond-call */
> + jmp = 2, /* tramp / site tail-call */
> + ret = 3, /* tramp / site cond-tail-call */
> +};

The lowercase enums threw me for a loop, I thought they were variables a
few times. Starting a new enum trend? :-)

> void arch_static_call_transform(void *site, void *tramp, void *func)
> @@ -24,10 +46,10 @@ void arch_static_call_transform(void *si
> mutex_lock(&text_mutex);
>
> if (tramp)
> - __static_call_transform(tramp, JMP32_INSN_OPCODE, func);
> + __static_call_transform(tramp, jmp + !func, func);
>
> if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
> - __static_call_transform(site, CALL_INSN_OPCODE, func);
> + __static_call_transform(site, !func, func);

Clever enum math, but probably more robust to be ignorant of the values:

if (tramp)
__static_call_transform(tramp, func ? jmp : ret, func);

if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
__static_call_transform(site, func ? call : nop, func);


> +++ b/include/linux/static_call.h
> @@ -16,7 +16,9 @@
> *
> * DECLARE_STATIC_CALL(name, func);
> * DEFINE_STATIC_CALL(name, func);
> + * DEFINE_STATIC_COND_CALL(name, typename);
> * static_call(name)(args...);
> + * static_cond_call(name)(args...)
> * static_call_update(name, func);

Missing semicolon, also an updated description/example would be useful.

On that note, what do you think about tweaking the naming from

DEFINE_STATIC_COND_CALL(name, typename);
static_cond_call(name)(args...);

to

DEFINE_STATIC_CALL_NO_FUNC(name, typename);
static_call_if_func(name)(args...);

?

Seems clearer to me. They're still STATIC_CALLs, so it seems logical to
keep those two words together. And NO_FUNC clarifies the initialized
value.

Similarly RETTRAMP could be ARCH_DEFINE_STATIC_CALL_NO_FUNC.

--
Josh

2020-05-06 18:01:28

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Wed, May 06, 2020 at 12:24:55PM -0500, Josh Poimboeuf wrote:
> On Fri, May 01, 2020 at 10:29:03PM +0200, Peter Zijlstra wrote:
> > +++ b/arch/x86/include/asm/static_call.h
> > @@ -30,4 +30,14 @@
> > ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
> > ".popsection \n")
> >
> > +#define ARCH_DEFINE_STATIC_CALL_RETTRAMP(name) \
> > + asm(".pushsection .static_call.text, \"ax\" \n" \
> > + ".align 4 \n" \
> > + ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
> > + STATIC_CALL_TRAMP_STR(name) ": \n" \
> > + " ret; nop; nop; nop; nop; \n" \
> > + ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
> > + ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
> > + ".popsection \n")
> > +
>
> The boilerplate in these two trampoline macros is identical except for
> the actual instructions, maybe there could be a shared
> __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) macro which does most of
> the dirty work.

I'm afraid that'll just make it less readable :/

> > #endif /* _ASM_STATIC_CALL_H */
> > --- a/arch/x86/kernel/static_call.c
> > +++ b/arch/x86/kernel/static_call.c
> > @@ -4,19 +4,41 @@
> > #include <linux/bug.h>
> > #include <asm/text-patching.h>
> >
> > -static void __static_call_transform(void *insn, u8 opcode, void *func)
> > +enum insn_type {
> > + call = 0, /* site call */
> > + nop = 1, /* site cond-call */
> > + jmp = 2, /* tramp / site tail-call */
> > + ret = 3, /* tramp / site cond-tail-call */
> > +};
>
> The lowercase enums threw me for a loop, I thought they were variables a
> few times. Starting a new enum trend? :-)

I can UPPERCASE them I suppose, not sure where this came from.

> > void arch_static_call_transform(void *site, void *tramp, void *func)
> > @@ -24,10 +46,10 @@ void arch_static_call_transform(void *si
> > mutex_lock(&text_mutex);
> >
> > if (tramp)
> > - __static_call_transform(tramp, JMP32_INSN_OPCODE, func);
> > + __static_call_transform(tramp, jmp + !func, func);
> >
> > if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
> > - __static_call_transform(site, CALL_INSN_OPCODE, func);
> > + __static_call_transform(site, !func, func);
>
> Clever enum math, but probably more robust to be ignorant of the values:
>
> if (tramp)
> __static_call_transform(tramp, func ? jmp : ret, func);
>
> if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
> __static_call_transform(site, func ? call : nop, func);
>

That is more readable, and I checked, GCC is clever enough to not
actually emit branches for that, so w00t.

> > +++ b/include/linux/static_call.h
> > @@ -16,7 +16,9 @@
> > *
> > * DECLARE_STATIC_CALL(name, func);
> > * DEFINE_STATIC_CALL(name, func);
> > + * DEFINE_STATIC_COND_CALL(name, typename);
> > * static_call(name)(args...);
> > + * static_cond_call(name)(args...)
> > * static_call_update(name, func);
>
> Missing semicolon, also an updated description/example would be useful.

Yes, I already promised Rasmus more documentation.

> On that note, what do you think about tweaking the naming from
>
> DEFINE_STATIC_COND_CALL(name, typename);
> static_cond_call(name)(args...);
>
> to
>
> DEFINE_STATIC_CALL_NO_FUNC(name, typename);
> static_call_if_func(name)(args...);
>
> ?
>
> Seems clearer to me. They're still STATIC_CALLs, so it seems logical to
> keep those two words together. And NO_FUNC clarifies the initialized
> value.
>
> Similarly RETTRAMP could be ARCH_DEFINE_STATIC_CALL_NO_FUNC.

What can I say, I'm sorta used to the old naming by now, but sure, any
other opinions before I edit things?

2020-05-06 18:14:33

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Wed, May 06, 2020 at 07:58:52PM +0200, Peter Zijlstra wrote:
> On Wed, May 06, 2020 at 12:24:55PM -0500, Josh Poimboeuf wrote:
> > On Fri, May 01, 2020 at 10:29:03PM +0200, Peter Zijlstra wrote:
> > > +++ b/arch/x86/include/asm/static_call.h
> > > @@ -30,4 +30,14 @@
> > > ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
> > > ".popsection \n")
> > >
> > > +#define ARCH_DEFINE_STATIC_CALL_RETTRAMP(name) \
> > > + asm(".pushsection .static_call.text, \"ax\" \n" \
> > > + ".align 4 \n" \
> > > + ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
> > > + STATIC_CALL_TRAMP_STR(name) ": \n" \
> > > + " ret; nop; nop; nop; nop; \n" \
> > > + ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
> > > + ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
> > > + ".popsection \n")
> > > +
> >
> > The boilerplate in these two trampoline macros is identical except for
> > the actual instructions, maybe there could be a shared
> > __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) macro which does most of
> > the dirty work.
>
> I'm afraid that'll just make it less readable :/

#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
asm(".pushsection .static_call.text, \"ax\" \n" \
".align 4 \n" \
".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
STATIC_CALL_TRAMP_STR(name) ": \n" \
insns " \n" \
".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
".popsection \n")

#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp.d32 " # func)

#define ARCH_DEFINE_STATIC_CALL_RETTRAMP(name, func) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop")

I like it. Makes it easy to see the differences between the tramps.

> > > #endif /* _ASM_STATIC_CALL_H */
> > > --- a/arch/x86/kernel/static_call.c
> > > +++ b/arch/x86/kernel/static_call.c
> > > @@ -4,19 +4,41 @@
> > > #include <linux/bug.h>
> > > #include <asm/text-patching.h>
> > >
> > > -static void __static_call_transform(void *insn, u8 opcode, void *func)
> > > +enum insn_type {
> > > + call = 0, /* site call */
> > > + nop = 1, /* site cond-call */
> > > + jmp = 2, /* tramp / site tail-call */
> > > + ret = 3, /* tramp / site cond-tail-call */
> > > +};
> >
> > The lowercase enums threw me for a loop, I thought they were variables a
> > few times. Starting a new enum trend? :-)
>
> I can UPPERCASE them I suppose, not sure where this came from.

Just thought UPPERCASE was the standard... lowercase looks confusingly
like variables when referenced.

--
Josh

2020-05-06 18:20:42

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Wed, May 06, 2020 at 01:09:24PM -0500, Josh Poimboeuf wrote:

> #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
> asm(".pushsection .static_call.text, \"ax\" \n" \
> ".align 4 \n" \
> ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
> STATIC_CALL_TRAMP_STR(name) ": \n" \
> insns " \n" \
> ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
> ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
> ".popsection \n")
>
> #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
> __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp.d32 " # func)

Note that this one is now:

.byte 0xe9; .long #func - (. + 4);

due to clang not actually understanding jmp.d32 :-(

> #define ARCH_DEFINE_STATIC_CALL_RETTRAMP(name, func) \
> __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop")
>
> I like it. Makes it easy to see the differences between the tramps.

OK, ok.. changed :-)

2020-05-06 19:07:38

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Tue, May 05, 2020 at 11:13:53AM -0700, Nick Desaulniers wrote:
> On Tue, May 5, 2020 at 2:36 AM Peter Zijlstra <[email protected]> wrote:
> >
> >
> > HJ, Nick,
> >
> > Any chance any of you can see a way to make your respective compilers
> > not emit utter junk for this?
> >
> > On Mon, May 04, 2020 at 10:14:45PM +0200, Peter Zijlstra wrote:
> >
> > > https://godbolt.org/z/SDRG2q
>
> Woah, a godbolt link! Now we're speaking the same language. What were
> you expecting?

Given the output for x86-64 clang (trunk)

bar: # @bar
movl %edi, .L_x$local(%rip)
retq
ponies: # @ponies
movq .Lfoo$local(%rip), %rax
testq %rax, %rax
movl $__static_call_nop, %ecx
cmovneq %rax, %rcx
jmpq *%rcx # TAILCALL
__static_call_nop: # @__static_call_nop
retq
_x:
.L_x$local:
.long 0 # 0x0

foo:
.Lfoo$local:
.zero 8


I was hoping for:

bar: # @bar
movl %edi, .L_x$local(%rip)
retq
ponies: # @ponies
movq .Lfoo$local(%rip), %rax
testq %rax, %rax
jz 1f
jmpq *%rcx # TAILCALL
1:
retq

That avoids the indirect call (possible retpoline) and does an immediate
return.

So it does 2 things different:

- it realizes the NULL case is a constant and uses an
immediate call and avoids the indirect call/jmp.

- it realizes __static_call_nop() is a no-op and avoids the call
entirely and does an immediate return.

> Us to remove the conditional check that a volatile read
> wasn't NULL?

No, obviously the load is required, and the READ_ONCE() is so that the
compiler will not emit 2 different loads (just for giggles).

That is:

tmp1 = name.func;
if (!tmp) {
tmp2 = name.func;
tmp2(args);
}

is a valid translation of:

if (!name.func)
name.func(args)

and allows for a NULL dereference (as noted by Rasmus).

What I did do want, per the above, is to avoid the indirect (tail) call.
Because indirect jmp/call are evil and expensive.

> I am simultaneously impressed
> and disgusted by this btw, cool stuff.

Yes, it's nasty, esp the casting of a function pointer like that is
gruesome.

2020-05-06 19:46:22

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Wed, May 6, 2020 at 6:51 AM Peter Zijlstra <[email protected]> wrote:
>
> I was hoping for:
>
> bar: # @bar
> movl %edi, .L_x$local(%rip)
> retq
> ponies: # @ponies
> movq .Lfoo$local(%rip), %rax
> testq %rax, %rax
> jz 1f
> jmpq *%rcx # TAILCALL
> 1:
> retq

If you want to just avoid the 'cmov', the best way to do that is to
insert a barrier() on one side of the if-statement.

That breaks the ability to turn the conditional jump into a cmov.

HOWEVER.

It looks like noth clang and gcc will move the indirect jump to the
conditional sites, but then neither of them is smart enough to just
turn the indirect jump into one direct jump.

Strange. So you still get an indirect call for just the "ret" case.
The code looks actively stupid with

gcc:
.L7:
movl $__static_call_nop, %eax
jmp *%rax

clang:
.LBB1_1:
mov eax, offset __static_call_nop
jmp rax # TAILCALL

despite the barrier not being between those two points. The only
difference is the assembler syntax.

Odd. That's such a trivial and obvious optimization. But presumably
it's a pattern that just doesn't happen normally.

Linus

2020-05-06 20:24:17

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()



> On May 6, 2020, at 10:23 AM, Linus Torvalds <[email protected]> wrote:
>
> On Wed, May 6, 2020 at 6:51 AM Peter Zijlstra <[email protected]> wrote:
>>
>> I was hoping for:
>>
>> bar: # @bar
>> movl %edi, .L_x$local(%rip)
>> retq
>> ponies: # @ponies
>> movq .Lfoo$local(%rip), %rax
>> testq %rax, %rax
>> jz 1f
>> jmpq *%rcx # TAILCALL
>> 1:
>> retq
>
> If you want to just avoid the 'cmov', the best way to do that is to
> insert a barrier() on one side of the if-statement.
>
> That breaks the ability to turn the conditional jump into a cmov.

Having done this in the past, you can get potentially better code with asm volatile (“”); than with a barrier and its memory clobber. I don’t know if that will make a difference in this particular case.

2020-05-08 15:32:39

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Wed, May 06, 2020 at 12:24:55PM -0500, Josh Poimboeuf wrote:

> On that note, what do you think about tweaking the naming from
>
> DEFINE_STATIC_COND_CALL(name, typename);
> static_cond_call(name)(args...);
>
> to
>
> DEFINE_STATIC_CALL_NO_FUNC(name, typename);
> static_call_if_func(name)(args...);
>
> ?
>
> Seems clearer to me. They're still STATIC_CALLs, so it seems logical to
> keep those two words together. And NO_FUNC clarifies the initialized
> value.
>
> Similarly RETTRAMP could be ARCH_DEFINE_STATIC_CALL_NO_FUNC.

So I dislike static_call_if_func(), that's so much typing. Also, I
prefer ARCH_*_RETTRAMP as it clearly describes what the thing is.

How is something like this?

---
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -16,7 +16,7 @@
*
* DECLARE_STATIC_CALL(name, func);
* DEFINE_STATIC_CALL(name, func);
- * DEFINE_STATIC_COND_CALL(name, typename);
+ * DEFINE_STATIC_CALL_NULL(name, typename);
* static_call(name)(args...);
* static_cond_call(name)(args...);
* static_call_update(name, func);
@@ -54,6 +54,43 @@
* rather than calling through the trampoline. This requires objtool or a
* compiler plugin to detect all the static_call() sites and annotate them
* in the .static_call_sites section.
+ *
+ *
+ * Notes on NULL function pointers:
+ *
+ * Static_call()s support NULL functions, with many of the caveats that
+ * regular function pointers have.
+ *
+ * Clearly calling a NULL function pointer is 'BAD', so too for
+ * static_call()s (although when HAVE_STATIC_CALL it might not be immediately
+ * fatal). A NULL static_call can be the result of:
+ *
+ * DECLARE_STATIC_CALL_NULL(my_static_call, void (*)(int));
+ *
+ * which is equivalent to declaring a NULL function pointer with just a
+ * typename:
+ *
+ * void (*my_func_ptr)(int arg1) = NULL;
+ *
+ * or using static_call_update() with a NULL function. In both cases the
+ * HAVE_STATIC_CALL implementation will patch the trampoline with a RET
+ * instruction, instead of an immediate tail-call JMP. HAVE_STATIC_CALL_INLINE
+ * architectures can patch the trampoline call to a NOP.
+ *
+ * In all cases, any argument evaluation is unconditional. Unlike a regular
+ * conditional function pointer call:
+ *
+ * if (my_func_ptr)
+ * my_func_ptr(arg1)
+ *
+ * where the argument evaludation also depends on the pointer value.
+ *
+ * When calling a static_call that can be NULL, use:
+ *
+ * static_cond_call(name)(arg1);
+ *
+ * which will include the required value tests to avoid NULL-pointer
+ * dereferences.
*/

#include <linux/types.h>
@@ -122,7 +159,7 @@ extern int static_call_text_reserved(voi
}; \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)

-#define DEFINE_STATIC_COND_CALL(name, _func) \
+#define DEFINE_STATIC_CALL_NULL(name, _func) \
DECLARE_STATIC_CALL(name, _func); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = NULL, \
@@ -154,7 +191,7 @@ struct static_call_key {
}; \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)

-#define DEFINE_STATIC_COND_CALL(name, _func) \
+#define DEFINE_STATIC_CALL_NULL(name, _func) \
DECLARE_STATIC_CALL(name, _func); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = NULL, \
@@ -198,7 +235,7 @@ struct static_call_key {
.func = _func, \
}

-#define DEFINE_STATIC_COND_CALL(name, _func) \
+#define DEFINE_STATIC_CALL_NULL(name, _func) \
DECLARE_STATIC_CALL(name, _func); \
struct static_call_key STATIC_CALL_KEY(name) = { \
.func = NULL, \

2020-05-08 15:49:41

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Fri, May 08, 2020 at 05:27:30PM +0200, Peter Zijlstra wrote:
> On Wed, May 06, 2020 at 12:24:55PM -0500, Josh Poimboeuf wrote:
>
> > On that note, what do you think about tweaking the naming from
> >
> > DEFINE_STATIC_COND_CALL(name, typename);
> > static_cond_call(name)(args...);
> >
> > to
> >
> > DEFINE_STATIC_CALL_NO_FUNC(name, typename);
> > static_call_if_func(name)(args...);
> >
> > ?
> >
> > Seems clearer to me. They're still STATIC_CALLs, so it seems logical to
> > keep those two words together. And NO_FUNC clarifies the initialized
> > value.
> >
> > Similarly RETTRAMP could be ARCH_DEFINE_STATIC_CALL_NO_FUNC.
>
> So I dislike static_call_if_func(), that's so much typing. Also, I
> prefer ARCH_*_RETTRAMP as it clearly describes what the thing is.
>
> How is something like this?

I like DEFINE_STATIC_CALL_NULL. I also like the new comment.

And if you're calling it

DEFINE_STATIC_CALL_NULL

then it seems like

ARCH_DEFINE_STATIC_CALL_NULL

would be the logical name rather than RETTRAMP?

Still not crazy about static_cond_call(), though I think at least
changing it to static_call_cond() would be better for namespacing
reasons.

--
Josh

2020-05-08 16:21:45

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 14/18] static_call: Add static_cond_call()

On Fri, May 08, 2020 at 10:47:04AM -0500, Josh Poimboeuf wrote:

> And if you're calling it
>
> DEFINE_STATIC_CALL_NULL
>
> then it seems like
>
> ARCH_DEFINE_STATIC_CALL_NULL
>
> would be the logical name rather than RETTRAMP?

But that looses the TRAMP, and ARCH_DEFINE_STATIC_CALL_NULL_TRAMP so
damn long, then again, it's only 2 chars more than what I have now,
sure, lets do it.

> Still not crazy about static_cond_call(), though I think at least
> changing it to static_call_cond() would be better for namespacing
> reasons.

Done.