2021-11-05 17:29:08

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

Rewrite load_unaligned_zeropad() to not require .fixup text.

This is easiest done using asm_goto_output, where we can stick a C
label in the exception table entry. The fallback version isn't nearly
so nice but should work.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/include/asm/word-at-a-time.h | 67 ++++++++++++++++++++++++----------
1 file changed, 48 insertions(+), 19 deletions(-)

--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -77,30 +77,59 @@ static inline unsigned long find_zero(un
* and the next page not being mapped, take the exception and
* return zeroes in the non-existing part.
*/
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+ unsigned long offset, data;
+ unsigned long ret;
+
+ asm_volatile_goto(
+ "1: mov %[mem], %[ret]\n"
+
+ _ASM_EXTABLE(1b, %l[do_exception])
+
+ : [ret] "=&r" (ret)
+ : [mem] "m" (*(unsigned long *)addr)
+ : : do_exception);
+
+out:
+ return ret;
+
+do_exception: __cold;
+
+ offset = (unsigned long)addr & (sizeof(long) - 1);
+ addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1));
+ data = *(unsigned long *)addr;
+ ret = data >> offset * 8;
+ goto out;
+}
+
+#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
static inline unsigned long load_unaligned_zeropad(const void *addr)
{
- unsigned long ret, dummy;
+ unsigned long offset, data;
+ unsigned long ret, err = 0;

- asm(
- "1:\tmov %2,%0\n"
+ asm( "1: mov %[mem], %[ret]\n"
"2:\n"
- ".section .fixup,\"ax\"\n"
- "3:\t"
- "lea %2,%1\n\t"
- "and %3,%1\n\t"
- "mov (%1),%0\n\t"
- "leal %2,%%ecx\n\t"
- "andl %4,%%ecx\n\t"
- "shll $3,%%ecx\n\t"
- "shr %%cl,%0\n\t"
- "jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- :"=&r" (ret),"=&c" (dummy)
- :"m" (*(unsigned long *)addr),
- "i" (-sizeof(unsigned long)),
- "i" (sizeof(unsigned long)-1));
+
+ _ASM_EXTABLE_FAULT(1b, 2b)
+
+ : [ret] "=&r" (ret), "+a" (err)
+ : [mem] "m" (*(unsigned long *)addr));
+
+ if (unlikely(err)) {
+ offset = (unsigned long)addr & (sizeof(long) - 1);
+ addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1));
+ data = *(unsigned long *)addr;
+ ret = data >> offset * 8;
+ }
+
return ret;
}

+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
#endif /* _ASM_WORD_AT_A_TIME_H */



2021-11-05 19:31:10

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Fri, Nov 05, 2021 at 06:10:43PM +0100, Peter Zijlstra wrote:
> +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
> +
> +static inline unsigned long load_unaligned_zeropad(const void *addr)
> +{
> + unsigned long offset, data;
> + unsigned long ret;
> +
> + asm_volatile_goto(
> + "1: mov %[mem], %[ret]\n"
> +
> + _ASM_EXTABLE(1b, %l[do_exception])
> +
> + : [ret] "=&r" (ret)
> + : [mem] "m" (*(unsigned long *)addr)
> + : : do_exception);
> +
> +out:
> + return ret;
> +
> +do_exception: __cold;
> +
> + offset = (unsigned long)addr & (sizeof(long) - 1);
> + addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1));
> + data = *(unsigned long *)addr;
> + ret = data >> offset * 8;
> + goto out;

Superfluous goto, can just return here?

> static inline unsigned long load_unaligned_zeropad(const void *addr)
> {
> - unsigned long ret, dummy;
> + unsigned long offset, data;
> + unsigned long ret, err = 0;
>
> - asm(
> - "1:\tmov %2,%0\n"
> + asm( "1: mov %[mem], %[ret]\n"
> "2:\n"
> - ".section .fixup,\"ax\"\n"
> - "3:\t"
> - "lea %2,%1\n\t"
> - "and %3,%1\n\t"
> - "mov (%1),%0\n\t"
> - "leal %2,%%ecx\n\t"
> - "andl %4,%%ecx\n\t"
> - "shll $3,%%ecx\n\t"
> - "shr %%cl,%0\n\t"
> - "jmp 2b\n"
> - ".previous\n"
> - _ASM_EXTABLE(1b, 3b)
> - :"=&r" (ret),"=&c" (dummy)
> - :"m" (*(unsigned long *)addr),
> - "i" (-sizeof(unsigned long)),
> - "i" (sizeof(unsigned long)-1));
> +
> + _ASM_EXTABLE_FAULT(1b, 2b)
> +
> + : [ret] "=&r" (ret), "+a" (err)
> + : [mem] "m" (*(unsigned long *)addr));
> +
> + if (unlikely(err)) {
> + offset = (unsigned long)addr & (sizeof(long) - 1);
> + addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1));
> + data = *(unsigned long *)addr;
> + ret = data >> offset * 8;
> + }
> +
> return ret;

This adds a (normally not taken) conditional jump, would a straight jmp
over the fixup not be better?

i.e.

1: mov %[mem], %[ret]
jmp 2
... fixup code ...
2:

--
Josh

2021-11-05 19:34:00

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Fri, Nov 05, 2021 at 11:01:52AM -0700, Josh Poimboeuf wrote:
> On Fri, Nov 05, 2021 at 06:10:43PM +0100, Peter Zijlstra wrote:
> > +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
> > +
> > +static inline unsigned long load_unaligned_zeropad(const void *addr)
> > +{
> > + unsigned long offset, data;
> > + unsigned long ret;
> > +
> > + asm_volatile_goto(
> > + "1: mov %[mem], %[ret]\n"
> > +
> > + _ASM_EXTABLE(1b, %l[do_exception])
> > +
> > + : [ret] "=&r" (ret)
> > + : [mem] "m" (*(unsigned long *)addr)
> > + : : do_exception);
> > +
> > +out:
> > + return ret;
> > +
> > +do_exception: __cold;
> > +
> > + offset = (unsigned long)addr & (sizeof(long) - 1);
> > + addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1));
> > + data = *(unsigned long *)addr;
> > + ret = data >> offset * 8;
> > + goto out;
>
> Superfluous goto, can just return here?

Can do I suppose.

> > static inline unsigned long load_unaligned_zeropad(const void *addr)
> > {
> > - unsigned long ret, dummy;
> > + unsigned long offset, data;
> > + unsigned long ret, err = 0;
> >
> > - asm(
> > - "1:\tmov %2,%0\n"
> > + asm( "1: mov %[mem], %[ret]\n"
> > "2:\n"
> > - ".section .fixup,\"ax\"\n"
> > - "3:\t"
> > - "lea %2,%1\n\t"
> > - "and %3,%1\n\t"
> > - "mov (%1),%0\n\t"
> > - "leal %2,%%ecx\n\t"
> > - "andl %4,%%ecx\n\t"
> > - "shll $3,%%ecx\n\t"
> > - "shr %%cl,%0\n\t"
> > - "jmp 2b\n"
> > - ".previous\n"
> > - _ASM_EXTABLE(1b, 3b)
> > - :"=&r" (ret),"=&c" (dummy)
> > - :"m" (*(unsigned long *)addr),
> > - "i" (-sizeof(unsigned long)),
> > - "i" (sizeof(unsigned long)-1));
> > +
> > + _ASM_EXTABLE_FAULT(1b, 2b)
> > +
> > + : [ret] "=&r" (ret), "+a" (err)
> > + : [mem] "m" (*(unsigned long *)addr));
> > +
> > + if (unlikely(err)) {
> > + offset = (unsigned long)addr & (sizeof(long) - 1);
> > + addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1));
> > + data = *(unsigned long *)addr;
> > + ret = data >> offset * 8;
> > + }
> > +
> > return ret;
>
> This adds a (normally not taken) conditional jump, would a straight jmp
> over the fixup not be better?
>
> i.e.
>
> 1: mov %[mem], %[ret]
> jmp 2
> ... fixup code ...
> 2:

I really don't know, this way we can get the bulk of the text
out-of-line.

2021-11-08 21:54:18

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Fri, Nov 05, 2021 at 06:10:43PM +0100, Peter Zijlstra wrote:
> +static inline unsigned long load_unaligned_zeropad(const void *addr)
> +{
> + unsigned long offset, data;
> + unsigned long ret;
> +
> + asm_volatile_goto(
> + "1: mov %[mem], %[ret]\n"
> +
> + _ASM_EXTABLE(1b, %l[do_exception])
> +
> + : [ret] "=&r" (ret)
> + : [mem] "m" (*(unsigned long *)addr)
> + : : do_exception);
> +
> +out:
> + return ret;
> +
> +do_exception: __cold;

Clang doesn't approve of this label annotation:

In file included from fs/dcache.c:186:
./arch/x86/include/asm/word-at-a-time.h:99:15: warning: '__cold__' attribute only applies to functions [-Wignored-attributes]
do_exception: __cold;

--
Josh

2021-11-09 02:27:39

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Mon, Nov 08, 2021 at 08:47:11AM -0800, Josh Poimboeuf wrote:
> On Fri, Nov 05, 2021 at 06:10:43PM +0100, Peter Zijlstra wrote:
> > +static inline unsigned long load_unaligned_zeropad(const void *addr)
> > +{
> > + unsigned long offset, data;
> > + unsigned long ret;
> > +
> > + asm_volatile_goto(
> > + "1: mov %[mem], %[ret]\n"
> > +
> > + _ASM_EXTABLE(1b, %l[do_exception])
> > +
> > + : [ret] "=&r" (ret)
> > + : [mem] "m" (*(unsigned long *)addr)
> > + : : do_exception);
> > +
> > +out:
> > + return ret;
> > +
> > +do_exception: __cold;
>
> Clang doesn't approve of this label annotation:
>
> In file included from fs/dcache.c:186:
> ./arch/x86/include/asm/word-at-a-time.h:99:15: warning: '__cold__' attribute only applies to functions [-Wignored-attributes]
> do_exception: __cold;

/me mutters something best left unsaid these days...

Nick, how come?

2021-11-09 02:29:26

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Mon, Nov 8, 2021 at 10:29 AM Peter Zijlstra <[email protected]> wrote:
>
> On Mon, Nov 08, 2021 at 08:47:11AM -0800, Josh Poimboeuf wrote:
> > On Fri, Nov 05, 2021 at 06:10:43PM +0100, Peter Zijlstra wrote:
> > > +static inline unsigned long load_unaligned_zeropad(const void *addr)
> > > +{
> > > + unsigned long offset, data;
> > > + unsigned long ret;
> > > +
> > > + asm_volatile_goto(
> > > + "1: mov %[mem], %[ret]\n"
> > > +
> > > + _ASM_EXTABLE(1b, %l[do_exception])
> > > +
> > > + : [ret] "=&r" (ret)
> > > + : [mem] "m" (*(unsigned long *)addr)
> > > + : : do_exception);
> > > +
> > > +out:
> > > + return ret;
> > > +
> > > +do_exception: __cold;
> >
> > Clang doesn't approve of this label annotation:
> >
> > In file included from fs/dcache.c:186:
> > ./arch/x86/include/asm/word-at-a-time.h:99:15: warning: '__cold__' attribute only applies to functions [-Wignored-attributes]
> > do_exception: __cold;
>
> /me mutters something best left unsaid these days...
>
> Nick, how come?

Looks like https://bugs.llvm.org/show_bug.cgi?id=47487.
--
Thanks,
~Nick Desaulniers

2021-11-09 17:07:36

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Mon, Nov 08, 2021 at 10:53:31AM -0800, Nick Desaulniers wrote:
> On Mon, Nov 8, 2021 at 10:29 AM Peter Zijlstra <[email protected]> wrote:
> >
> > On Mon, Nov 08, 2021 at 08:47:11AM -0800, Josh Poimboeuf wrote:
> > > On Fri, Nov 05, 2021 at 06:10:43PM +0100, Peter Zijlstra wrote:
> > > > +static inline unsigned long load_unaligned_zeropad(const void *addr)
> > > > +{
> > > > + unsigned long offset, data;
> > > > + unsigned long ret;
> > > > +
> > > > + asm_volatile_goto(
> > > > + "1: mov %[mem], %[ret]\n"
> > > > +
> > > > + _ASM_EXTABLE(1b, %l[do_exception])
> > > > +
> > > > + : [ret] "=&r" (ret)
> > > > + : [mem] "m" (*(unsigned long *)addr)
> > > > + : : do_exception);
> > > > +
> > > > +out:
> > > > + return ret;
> > > > +
> > > > +do_exception: __cold;
> > >
> > > Clang doesn't approve of this label annotation:
> > >
> > > In file included from fs/dcache.c:186:
> > > ./arch/x86/include/asm/word-at-a-time.h:99:15: warning: '__cold__' attribute only applies to functions [-Wignored-attributes]
> > > do_exception: __cold;
> >
> > /me mutters something best left unsaid these days...
> >
> > Nick, how come?
>
> Looks like https://bugs.llvm.org/show_bug.cgi?id=47487.

Indeed it does. So what do we do? Keep the attribute and ignore the warn
on clang for now? Even if techinically useless, I do like it's
descriptive nature.

2021-11-10 00:14:15

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Tue, Nov 9, 2021 at 12:23 AM Peter Zijlstra <[email protected]> wrote:
>
> On Mon, Nov 08, 2021 at 10:53:31AM -0800, Nick Desaulniers wrote:
> > On Mon, Nov 8, 2021 at 10:29 AM Peter Zijlstra <[email protected]> wrote:
> > >
> > > On Mon, Nov 08, 2021 at 08:47:11AM -0800, Josh Poimboeuf wrote:
> > > > On Fri, Nov 05, 2021 at 06:10:43PM +0100, Peter Zijlstra wrote:
> > > > > +static inline unsigned long load_unaligned_zeropad(const void *addr)
> > > > > +{
> > > > > + unsigned long offset, data;
> > > > > + unsigned long ret;
> > > > > +
> > > > > + asm_volatile_goto(
> > > > > + "1: mov %[mem], %[ret]\n"
> > > > > +
> > > > > + _ASM_EXTABLE(1b, %l[do_exception])
> > > > > +
> > > > > + : [ret] "=&r" (ret)
> > > > > + : [mem] "m" (*(unsigned long *)addr)
> > > > > + : : do_exception);
> > > > > +
> > > > > +out:
> > > > > + return ret;
> > > > > +
> > > > > +do_exception: __cold;
> > > >
> > > > Clang doesn't approve of this label annotation:
> > > >
> > > > In file included from fs/dcache.c:186:
> > > > ./arch/x86/include/asm/word-at-a-time.h:99:15: warning: '__cold__' attribute only applies to functions [-Wignored-attributes]
> > > > do_exception: __cold;
> > >
> > > /me mutters something best left unsaid these days...
> > >
> > > Nick, how come?
> >
> > Looks like https://bugs.llvm.org/show_bug.cgi?id=47487.
>
> Indeed it does. So what do we do? Keep the attribute and ignore the warn
> on clang for now? Even if techinically useless, I do like it's
> descriptive nature.

I think the feature of label-attributes is generally useful, for asm
goto (without outputs) and probably computed-goto, so I think we
should implement support for these in clang. I suspect the machinery
for hot/cold labels was added to clang and LLVM before asm goto was;
LLVM likely has all the machinery needed and we probably just need to
relax or adjust clang's semantic analysis of where the attribute may
occur.

With the above patch, we'd still have issues though with released
versions of clang, and -Werror would complicate things further.

I think the use of this feature (label-attributes) here isn't
necessary though; because of the use of outputs, the "fallthrough"
basic block needs to be placed immediately after the basic block
terminated by the asm goto, at least in LLVM. Was different ordering
of basic blocks observed with GCC without this label attribute?

_Without_ outputs, I can see being able to specify which target of an
asm-goto with multiple labels is relatively hot as useful, but _with_
outputs I suspect specifying the indirect targets as cold provides
little to no utility. Unless the cold attribute is helping move
("shrink-wrap"?) the basic block to a whole other section
(.text.cold.)?
--
Thanks,
~Nick Desaulniers

2021-11-10 00:21:00

by Bill Wendling

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Tue, Nov 9, 2021 at 11:22 AM Nick Desaulniers
<[email protected]> wrote:
>
> On Tue, Nov 9, 2021 at 12:23 AM Peter Zijlstra <[email protected]> wrote:
> >
> > On Mon, Nov 08, 2021 at 10:53:31AM -0800, Nick Desaulniers wrote:
> > > On Mon, Nov 8, 2021 at 10:29 AM Peter Zijlstra <[email protected]> wrote:
> > > >
> > > > On Mon, Nov 08, 2021 at 08:47:11AM -0800, Josh Poimboeuf wrote:
> > > > > On Fri, Nov 05, 2021 at 06:10:43PM +0100, Peter Zijlstra wrote:
> > > > > > +static inline unsigned long load_unaligned_zeropad(const void *addr)
> > > > > > +{
> > > > > > + unsigned long offset, data;
> > > > > > + unsigned long ret;
> > > > > > +
> > > > > > + asm_volatile_goto(
> > > > > > + "1: mov %[mem], %[ret]\n"
> > > > > > +
> > > > > > + _ASM_EXTABLE(1b, %l[do_exception])
> > > > > > +
> > > > > > + : [ret] "=&r" (ret)
> > > > > > + : [mem] "m" (*(unsigned long *)addr)
> > > > > > + : : do_exception);
> > > > > > +
> > > > > > +out:
> > > > > > + return ret;
> > > > > > +
> > > > > > +do_exception: __cold;
> > > > >
> > > > > Clang doesn't approve of this label annotation:
> > > > >
> > > > > In file included from fs/dcache.c:186:
> > > > > ./arch/x86/include/asm/word-at-a-time.h:99:15: warning: '__cold__' attribute only applies to functions [-Wignored-attributes]
> > > > > do_exception: __cold;
> > > >
> > > > /me mutters something best left unsaid these days...
> > > >
> > > > Nick, how come?
> > >
> > > Looks like https://bugs.llvm.org/show_bug.cgi?id=47487.
> >
> > Indeed it does. So what do we do? Keep the attribute and ignore the warn
> > on clang for now? Even if techinically useless, I do like it's
> > descriptive nature.
>
> I think the feature of label-attributes is generally useful, for asm
> goto (without outputs) and probably computed-goto, so I think we
> should implement support for these in clang. I suspect the machinery
> for hot/cold labels was added to clang and LLVM before asm goto was;
> LLVM likely has all the machinery needed and we probably just need to
> relax or adjust clang's semantic analysis of where the attribute may
> occur.
>
> With the above patch, we'd still have issues though with released
> versions of clang, and -Werror would complicate things further.
>
> I think the use of this feature (label-attributes) here isn't
> necessary though; because of the use of outputs, the "fallthrough"
> basic block needs to be placed immediately after the basic block
> terminated by the asm goto, at least in LLVM. Was different ordering
> of basic blocks observed with GCC without this label attribute?
>
> _Without_ outputs, I can see being able to specify which target of an
> asm-goto with multiple labels is relatively hot as useful, but _with_
> outputs I suspect specifying the indirect targets as cold provides
> little to no utility. Unless the cold attribute is helping move
> ("shrink-wrap"?) the basic block to a whole other section
> (.text.cold.)?

Adding attributes to labels shouldn't be difficult, as you mention. In
the case of cold/hot, it's adjusting some of the metadata that already
exists on some basic blocks. It might be enough to allow the normal
block placement algorithms to move the hot and cold blocks around for
us. The question becomes how many attributes does GCC allow on labels?

-bw

2021-11-10 00:21:09

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Tue, Nov 9, 2021 at 1:21 PM Peter Zijlstra <[email protected]> wrote:
>
> On Tue, Nov 09, 2021 at 12:59:12PM -0800, Bill Wendling wrote:
> >
> > Adding attributes to labels shouldn't be difficult, as you mention. In
> > the case of cold/hot, it's adjusting some of the metadata that already
> > exists on some basic blocks. It might be enough to allow the normal
> > block placement algorithms to move the hot and cold blocks around for
> > us. The question becomes how many attributes does GCC allow on labels?
>
> I'm aware of 3: unused, hot, cold. Also:
>
> https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html

Re: unused:
Being able to selectively disable -Wunused-label via
__attribute__((unused)); seems useful, too.
--
Thanks,
~Nick Desaulniers

2021-11-10 00:21:12

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Tue, Nov 09, 2021 at 11:22:44AM -0800, Nick Desaulniers wrote:

> I think the use of this feature (label-attributes) here isn't
> necessary though; because of the use of outputs, the "fallthrough"
> basic block needs to be placed immediately after the basic block
> terminated by the asm goto, at least in LLVM. Was different ordering
> of basic blocks observed with GCC without this label attribute?

GCC does the same, but I wanted to have the exception stuff be in
.text.cold, but alas it doesn't do that. I left the attribute because of
it's descriptive value.

> Unless the cold attribute is helping move
> ("shrink-wrap"?) the basic block to a whole other section
> (.text.cold.)?

I was hoping it would do that, but it doesn't on gcc-11.

2021-11-10 00:21:59

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Tue, Nov 09, 2021 at 12:59:12PM -0800, Bill Wendling wrote:
>
> Adding attributes to labels shouldn't be difficult, as you mention. In
> the case of cold/hot, it's adjusting some of the metadata that already
> exists on some basic blocks. It might be enough to allow the normal
> block placement algorithms to move the hot and cold blocks around for
> us. The question becomes how many attributes does GCC allow on labels?

I'm aware of 3: unused, hot, cold. Also:

https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html

2021-11-10 00:25:30

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Tue, Nov 9, 2021 at 2:11 PM Peter Zijlstra <[email protected]> wrote:
>
> On Tue, Nov 09, 2021 at 01:25:47PM -0800, Nick Desaulniers wrote:
> > On Tue, Nov 9, 2021 at 1:21 PM Peter Zijlstra <[email protected]> wrote:
> > >
> > > On Tue, Nov 09, 2021 at 12:59:12PM -0800, Bill Wendling wrote:
> > > >
> > > > Adding attributes to labels shouldn't be difficult, as you mention. In
> > > > the case of cold/hot, it's adjusting some of the metadata that already
> > > > exists on some basic blocks. It might be enough to allow the normal
> > > > block placement algorithms to move the hot and cold blocks around for
> > > > us. The question becomes how many attributes does GCC allow on labels?
> > >
> > > I'm aware of 3: unused, hot, cold. Also:
> > >
> > > https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html
> >
> > Re: unused:
> > Being able to selectively disable -Wunused-label via
> > __attribute__((unused)); seems useful, too.
>
> kernel/sched/fair.c:done: __maybe_unused;
>
> Yes it is ;-)

Ah, that's already supported: https://godbolt.org/z/aa76aexnv, so it's
just hot/cold that could be implemented next.
--
Thanks,
~Nick Desaulniers

2021-11-10 00:25:58

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Tue, Nov 09, 2021 at 01:25:47PM -0800, Nick Desaulniers wrote:
> On Tue, Nov 9, 2021 at 1:21 PM Peter Zijlstra <[email protected]> wrote:
> >
> > On Tue, Nov 09, 2021 at 12:59:12PM -0800, Bill Wendling wrote:
> > >
> > > Adding attributes to labels shouldn't be difficult, as you mention. In
> > > the case of cold/hot, it's adjusting some of the metadata that already
> > > exists on some basic blocks. It might be enough to allow the normal
> > > block placement algorithms to move the hot and cold blocks around for
> > > us. The question becomes how many attributes does GCC allow on labels?
> >
> > I'm aware of 3: unused, hot, cold. Also:
> >
> > https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html
>
> Re: unused:
> Being able to selectively disable -Wunused-label via
> __attribute__((unused)); seems useful, too.

kernel/sched/fair.c:done: __maybe_unused;

Yes it is ;-)

2021-11-10 10:21:24

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Tue, Nov 09, 2021 at 10:07:36PM +0100, Peter Zijlstra wrote:
> On Tue, Nov 09, 2021 at 11:22:44AM -0800, Nick Desaulniers wrote:
>
> > I think the use of this feature (label-attributes) here isn't
> > necessary though; because of the use of outputs, the "fallthrough"
> > basic block needs to be placed immediately after the basic block
> > terminated by the asm goto, at least in LLVM. Was different ordering
> > of basic blocks observed with GCC without this label attribute?
>
> GCC does the same, but I wanted to have the exception stuff be in
> .text.cold, but alas it doesn't do that. I left the attribute because of
> it's descriptive value.
>
> > Unless the cold attribute is helping move
> > ("shrink-wrap"?) the basic block to a whole other section
> > (.text.cold.)?
>
> I was hoping it would do that, but it doesn't on gcc-11.

I've removed the __cold on labels in the latest posting.

https://lkml.kernel.org/r/[email protected]

2021-11-10 10:51:09

by David Laight

[permalink] [raw]
Subject: RE: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

From: Peter Zijlstra
> Sent: 09 November 2021 21:08
...
>
> GCC does the same, but I wanted to have the exception stuff be in
> .text.cold, but alas it doesn't do that. I left the attribute because of
> it's descriptive value.
>
> > Unless the cold attribute is helping move
> > ("shrink-wrap"?) the basic block to a whole other section
> > (.text.cold.)?
>
> I was hoping it would do that, but it doesn't on gcc-11.

Wouldn't moving part of a function to .text.cold (or .text.unlikely)
generate the same problems with the stack backtrace code as the
.text.fixup section you are removing had??

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2021-11-10 11:11:29

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Wed, Nov 10, 2021 at 10:46:42AM +0000, David Laight wrote:
> From: Peter Zijlstra
> > Sent: 09 November 2021 21:08
> ...
> >
> > GCC does the same, but I wanted to have the exception stuff be in
> > .text.cold, but alas it doesn't do that. I left the attribute because of
> > it's descriptive value.
> >
> > > Unless the cold attribute is helping move
> > > ("shrink-wrap"?) the basic block to a whole other section
> > > (.text.cold.)?
> >
> > I was hoping it would do that, but it doesn't on gcc-11.
>
> Wouldn't moving part of a function to .text.cold (or .text.unlikely)
> generate the same problems with the stack backtrace code as the
> .text.fixup section you are removing had??

GCC can already split a function into func and func.cold today (or
worse: func, func.isra.N, func.cold, func.isra.N.cold etc..).

I'm assuming reliable unwind and livepatch know how to deal with this.

2021-11-10 12:21:09

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

Hi!

On Tue, Nov 09, 2021 at 10:07:36PM +0100, Peter Zijlstra wrote:
> On Tue, Nov 09, 2021 at 11:22:44AM -0800, Nick Desaulniers wrote:
> > I think the use of this feature (label-attributes) here isn't
> > necessary though; because of the use of outputs, the "fallthrough"
> > basic block needs to be placed immediately after the basic block
> > terminated by the asm goto, at least in LLVM. Was different ordering
> > of basic blocks observed with GCC without this label attribute?
>
> GCC does the same, but I wanted to have the exception stuff be in
> .text.cold, but alas it doesn't do that. I left the attribute because of
> it's descriptive value.
>
> > Unless the cold attribute is helping move
> > ("shrink-wrap"?)

Shrink-wrapping is something else entirely.

>> the basic block to a whole other section
> > (.text.cold.)?
>
> I was hoping it would do that, but it doesn't on gcc-11.

A cold basic block can never dominate a non-cold basic block. GCC will
fix things up when it notices this property is violated, so marking
random blocks as "cold" will not be very effective.


Segher

2021-11-10 12:23:41

by David Laight

[permalink] [raw]
Subject: RE: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

From: Peter Zijlstra
> Sent: 10 November 2021 11:10
>
> On Wed, Nov 10, 2021 at 10:46:42AM +0000, David Laight wrote:
> > From: Peter Zijlstra
> > > Sent: 09 November 2021 21:08
> > ...
> > >
> > > GCC does the same, but I wanted to have the exception stuff be in
> > > .text.cold, but alas it doesn't do that. I left the attribute because of
> > > it's descriptive value.
> > >
> > > > Unless the cold attribute is helping move
> > > > ("shrink-wrap"?) the basic block to a whole other section
> > > > (.text.cold.)?
> > >
> > > I was hoping it would do that, but it doesn't on gcc-11.
> >
> > Wouldn't moving part of a function to .text.cold (or .text.unlikely)
> > generate the same problems with the stack backtrace code as the
> > .text.fixup section you are removing had??
>
> GCC can already split a function into func and func.cold today (or
> worse: func, func.isra.N, func.cold, func.isra.N.cold etc..).
>
> I'm assuming reliable unwind and livepatch know how to deal with this.

They'll have 'proper' function labels at the top - so backtrace
stands a chance.
Indeed you (probably) want it to output "func.irsa.n.cold" rather
than just "func" to help show which copy it is in.

I guess that livepatch will need separate patches for each
version of the function - which might be 'interesting' if
all the copies actually need patching at the same time.
You'd certainly want a warning if there seemed to be multiple
copies of the function.

I'm waiting for the side-channel attack caused by detecting
timing differences caused by TLB misses when speculatively
executing code in the .cold/.unlikely sections.
ISTR recent x86 cpu speculate unknown conditional branches
'randomly' - rather than (say) assuming backwards taken.
So you can't (easily) stop speculative execution into the 'cold'
text.

I don't know if speculative execution will load TLB,
it would speed a lot of code up - with the same downsides
as evicting code from the L1-cache.
A 'half-way house' would be to do the page table walk, but
hold the read value 'pending' the code being needed.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2021-11-12 01:50:12

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Wed, Nov 10, 2021 at 12:20:47PM +0000, David Laight wrote:
> > > Wouldn't moving part of a function to .text.cold (or .text.unlikely)
> > > generate the same problems with the stack backtrace code as the
> > > .text.fixup section you are removing had??
> >
> > GCC can already split a function into func and func.cold today (or
> > worse: func, func.isra.N, func.cold, func.isra.N.cold etc..).
> >
> > I'm assuming reliable unwind and livepatch know how to deal with this.
>
> They'll have 'proper' function labels at the top - so backtrace
> stands a chance.
> Indeed you (probably) want it to output "func.irsa.n.cold" rather
> than just "func" to help show which copy it is in. >
> I guess that livepatch will need separate patches for each
> version of the function - which might be 'interesting' if
> all the copies actually need patching at the same time.
> You'd certainly want a warning if there seemed to be multiple
> copies of the function.

Hm, I think there is actually a livepatch problem here.

If the .cold (aka "child") function actually had a fentry hook then we'd
be fine. Then we could just patch both "parent" and "child" functions
at the same time. We already have the ability to patch multiple
functions having dependent interface changes.

But there's no fentry hook in the child, so we can only patch the
parent.

If the child schedules out, and then the parent gets patched, things can
go off-script if the child later jumps back to the unpatched version of
the parent, and then for example the old parent tries to call another
patched function with a since-changed ABI.

Granted, it's like three nested edge cases, so it may not be all that
likely to happen.

Some ideas to fix:

a) Add a field to 'klp_func' which allows the patch module to specify a
function's .cold counterpart?

b) Detect such cold counterparts in klp_enable_patch()? Presumably it
would require searching kallsyms for "<func>.cold", which is somewhat
problematic as there might be duplicates.

c) Update the reliable stacktrace code to mark the stack unreliable if
it has a function with ".cold" in the name?

d) Don't patch functions with .cold counterparts? (Probably not a viable
long-term solution, there are a ton of .cold functions because calls
to printk are marked cold)

e) Disable .cold optimization?

f) Add fentry hooks to .cold functions?


I'm thinking a) seems do-able, and less disruptive / more precise than
most others, but it requires more due diligence on behalf of the patch
creation. It sounds be pretty easy for kpatch-build to handle at least.

--
Josh


2021-11-12 09:34:03

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Thu, Nov 11, 2021 at 05:50:03PM -0800, Josh Poimboeuf wrote:

> Hm, I think there is actually a livepatch problem here.

I suspected as much, because I couldn't find any code dealing with it
when I looked in a hurry.. :/

> Some ideas to fix:

> c) Update the reliable stacktrace code to mark the stack unreliable if
> it has a function with ".cold" in the name?

Why not simply match func.cold as func in the transition thing? Then
func won't get patched as long as it (or it's .cold part) is in use.
This seems like the natural thing to do.

If there are enough .cold functions, always reporting stacktrace as
unreliable will make progress hard, even though it might be perfectly
safe.

> e) Disable .cold optimization?

Yeah, lets not do that. That'll have me lobbying to kill KLP again
because it generates crap code.

2021-11-13 05:35:11

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Fri, Nov 12, 2021 at 10:33:36AM +0100, Peter Zijlstra wrote:
> On Thu, Nov 11, 2021 at 05:50:03PM -0800, Josh Poimboeuf wrote:
>
> > Hm, I think there is actually a livepatch problem here.
>
> I suspected as much, because I couldn't find any code dealing with it
> when I looked in a hurry.. :/
>
> > Some ideas to fix:
>
> > c) Update the reliable stacktrace code to mark the stack unreliable if
> > it has a function with ".cold" in the name?
>
> Why not simply match func.cold as func in the transition thing? Then
> func won't get patched as long as it (or it's .cold part) is in use.
> This seems like the natural thing to do.

Well yes, you're basically hinting at my first two options a and b:

a) Add a field to 'klp_func' which allows the patch module to specify a
function's .cold counterpart?

b) Detect such cold counterparts in klp_enable_patch()? Presumably it
would require searching kallsyms for "<func>.cold", which is somewhat
problematic as there might be duplicates.

It's basically a two-step process: 1) match func to .cold if it exists;
2) check for both in klp_check_stack_func(). The above two options are
proposals for the 1st step. The 2nd step was implied.

I think something like that is probably the way to go, but the question
is where to match func to .cold:

- patch creation;
- klp_init_object_loaded(); or
- klp_check_stack_func().

I think the main problem with matching them in the kernel is that you
can't disambiguate duplicate ".cold" symbols without disassembling the
function. Duplicates are rare but they do exist.

Matching them at patch creation time (option a) may work best. At least
with kpatch-build, the tooling already knows about .cold functions, so
explicitly matching them in new klp_func.{cold_name,cold_sympos} fields
would be trivial.

I don't know about other patch creation tooling, but I'd imagine they
also need to know about .cold functions, unless they have that
optimization disabled. Because the func and its .cold counterpart
always need to be patched together.

--
Josh


2021-11-15 12:37:02

by Miroslav Benes

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Fri, 12 Nov 2021, Josh Poimboeuf wrote:

> On Fri, Nov 12, 2021 at 10:33:36AM +0100, Peter Zijlstra wrote:
> > On Thu, Nov 11, 2021 at 05:50:03PM -0800, Josh Poimboeuf wrote:
> >
> > > Hm, I think there is actually a livepatch problem here.
> >
> > I suspected as much, because I couldn't find any code dealing with it
> > when I looked in a hurry.. :/
> >
> > > Some ideas to fix:
> >
> > > c) Update the reliable stacktrace code to mark the stack unreliable if
> > > it has a function with ".cold" in the name?
> >
> > Why not simply match func.cold as func in the transition thing? Then
> > func won't get patched as long as it (or it's .cold part) is in use.
> > This seems like the natural thing to do.
>
> Well yes, you're basically hinting at my first two options a and b:
>
> a) Add a field to 'klp_func' which allows the patch module to specify a
> function's .cold counterpart?
>
> b) Detect such cold counterparts in klp_enable_patch()? Presumably it
> would require searching kallsyms for "<func>.cold", which is somewhat
> problematic as there might be duplicates.
>
> It's basically a two-step process: 1) match func to .cold if it exists;
> 2) check for both in klp_check_stack_func(). The above two options are
> proposals for the 1st step. The 2nd step was implied.

This reminded me... one of the things I have on my todo list for a long
time is to add an option for a live patch creator to specify functions
which are not contained in the live patch but their presence on stacks
should be checked for. It could save some space in the final live patch
when one would add functions (callers) just because the consistency
requires it.

I took as a convenience feature with a low priority and forgot about it.
The problem above changes it. So should we take the opportunity and
implement both in one step? I wanted to include a list of functions in
on a patch level (klp_patch structure) and klp_check_stack() would just
have more to check.

Miroslav

2021-11-15 12:59:56

by Miroslav Benes

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Fri, 12 Nov 2021, Josh Poimboeuf wrote:

> If the child schedules out, and then the parent gets patched, things can
> go off-script if the child later jumps back to the unpatched version of
> the parent, and then for example the old parent tries to call another
> patched function with a since-changed ABI.

...

> I don't know about other patch creation tooling, but I'd imagine they
> also need to know about .cold functions, unless they have that
> optimization disabled. Because the func and its .cold counterpart
> always need to be patched together.

We, at SUSE, solve the issue differently... the new patched parent would
call that another patched function with a changed ABI statically in a live
patch. So in that example, .cold child would jump back to the unpatched
parent which would then call, also, the unpatched function.

The situation would change if we ever were to have some notion of global
consistency. Then it would be really fragile, so it is worth of improving
this, I think.

Miroslav

2021-11-15 13:02:35

by Joe Lawrence

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On 11/15/21 7:36 AM, Miroslav Benes wrote:
> On Fri, 12 Nov 2021, Josh Poimboeuf wrote:
>
>> On Fri, Nov 12, 2021 at 10:33:36AM +0100, Peter Zijlstra wrote:
>>> On Thu, Nov 11, 2021 at 05:50:03PM -0800, Josh Poimboeuf wrote:
>>>
>>>> Hm, I think there is actually a livepatch problem here.
>>>
>>> I suspected as much, because I couldn't find any code dealing with it
>>> when I looked in a hurry.. :/
>>>
>>>> Some ideas to fix:
>>>
>>>> c) Update the reliable stacktrace code to mark the stack unreliable if
>>>> it has a function with ".cold" in the name?
>>>
>>> Why not simply match func.cold as func in the transition thing? Then
>>> func won't get patched as long as it (or it's .cold part) is in use.
>>> This seems like the natural thing to do.
>>
>> Well yes, you're basically hinting at my first two options a and b:
>>
>> a) Add a field to 'klp_func' which allows the patch module to specify a
>> function's .cold counterpart?
>>
>> b) Detect such cold counterparts in klp_enable_patch()? Presumably it
>> would require searching kallsyms for "<func>.cold", which is somewhat
>> problematic as there might be duplicates.
>>
>> It's basically a two-step process: 1) match func to .cold if it exists;
>> 2) check for both in klp_check_stack_func(). The above two options are
>> proposals for the 1st step. The 2nd step was implied.
>
> This reminded me... one of the things I have on my todo list for a long
> time is to add an option for a live patch creator to specify functions
> which are not contained in the live patch but their presence on stacks
> should be checked for. It could save some space in the final live patch
> when one would add functions (callers) just because the consistency
> requires it.
>

Yea, I've used this technique once (adding a nop to a function so
kpatch-build would detect and include in klp_funcs[]) to make a set of
changes safer with respect to the consistency model. Making it simpler
to for the livepatch author to say, "I'm not changing foo(), but I don't
want it doing anything while patching a task" sounds reasonable.

> I took as a convenience feature with a low priority and forgot about it.
> The problem above changes it. So should we take the opportunity and
> implement both in one step? I wanted to include a list of functions in
> on a patch level (klp_patch structure) and klp_check_stack() would just
> have more to check.
>

As far as I read the original problem, I think it would solve for that,
too, so I would say go for it.

--
Joe


2021-11-16 00:16:04

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Mon, Nov 15, 2021 at 08:01:13AM -0500, Joe Lawrence wrote:
> > This reminded me... one of the things I have on my todo list for a long
> > time is to add an option for a live patch creator to specify functions
> > which are not contained in the live patch but their presence on stacks
> > should be checked for. It could save some space in the final live patch
> > when one would add functions (callers) just because the consistency
> > requires it.
> >
>
> Yea, I've used this technique once (adding a nop to a function so
> kpatch-build would detect and include in klp_funcs[]) to make a set of
> changes safer with respect to the consistency model. Making it simpler
> to for the livepatch author to say, "I'm not changing foo(), but I don't
> want it doing anything while patching a task" sounds reasonable.
>
> > I took as a convenience feature with a low priority and forgot about it.
> > The problem above changes it. So should we take the opportunity and
> > implement both in one step? I wanted to include a list of functions in
> > on a patch level (klp_patch structure) and klp_check_stack() would just
> > have more to check.
> >
>
> As far as I read the original problem, I think it would solve for that,
> too, so I would say go for it.

Sounds good to me.

Miroslav, do I understand correctly that you're volunteering to make
this change? ;-)

--
Josh


2021-11-16 07:26:10

by Miroslav Benes

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Mon, 15 Nov 2021, Josh Poimboeuf wrote:

> On Mon, Nov 15, 2021 at 08:01:13AM -0500, Joe Lawrence wrote:
> > > This reminded me... one of the things I have on my todo list for a long
> > > time is to add an option for a live patch creator to specify functions
> > > which are not contained in the live patch but their presence on stacks
> > > should be checked for. It could save some space in the final live patch
> > > when one would add functions (callers) just because the consistency
> > > requires it.
> > >
> >
> > Yea, I've used this technique once (adding a nop to a function so
> > kpatch-build would detect and include in klp_funcs[]) to make a set of
> > changes safer with respect to the consistency model. Making it simpler
> > to for the livepatch author to say, "I'm not changing foo(), but I don't
> > want it doing anything while patching a task" sounds reasonable.
> >
> > > I took as a convenience feature with a low priority and forgot about it.
> > > The problem above changes it. So should we take the opportunity and
> > > implement both in one step? I wanted to include a list of functions in
> > > on a patch level (klp_patch structure) and klp_check_stack() would just
> > > have more to check.
> > >
> >
> > As far as I read the original problem, I think it would solve for that,
> > too, so I would say go for it.
>
> Sounds good to me.
>
> Miroslav, do I understand correctly that you're volunteering to make
> this change? ;-)

Yes, you do. I am not superfast peterz, so it will take some time, but
I'll be happy to do it :).

Miroslav

2021-11-16 21:27:56

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Mon, Nov 15, 2021 at 01:59:36PM +0100, Miroslav Benes wrote:
> On Fri, 12 Nov 2021, Josh Poimboeuf wrote:
>
> > If the child schedules out, and then the parent gets patched, things can
> > go off-script if the child later jumps back to the unpatched version of
> > the parent, and then for example the old parent tries to call another
> > patched function with a since-changed ABI.
>
> ...
>
> > I don't know about other patch creation tooling, but I'd imagine they
> > also need to know about .cold functions, unless they have that
> > optimization disabled. Because the func and its .cold counterpart
> > always need to be patched together.
>
> We, at SUSE, solve the issue differently... the new patched parent would
> call that another patched function with a changed ABI statically in a live
> patch. So in that example, .cold child would jump back to the unpatched
> parent which would then call, also, the unpatched function.

So if I understand correctly, if a function's ABI changes then you don't
patch it directly, but only patch its callers to call the replacement?

Is there a reason for that?

--
Josh


2021-11-18 07:15:58

by Miroslav Benes

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Tue, 16 Nov 2021, Josh Poimboeuf wrote:

> On Mon, Nov 15, 2021 at 01:59:36PM +0100, Miroslav Benes wrote:
> > On Fri, 12 Nov 2021, Josh Poimboeuf wrote:
> >
> > > If the child schedules out, and then the parent gets patched, things can
> > > go off-script if the child later jumps back to the unpatched version of
> > > the parent, and then for example the old parent tries to call another
> > > patched function with a since-changed ABI.
> >
> > ...
> >
> > > I don't know about other patch creation tooling, but I'd imagine they
> > > also need to know about .cold functions, unless they have that
> > > optimization disabled. Because the func and its .cold counterpart
> > > always need to be patched together.
> >
> > We, at SUSE, solve the issue differently... the new patched parent would
> > call that another patched function with a changed ABI statically in a live
> > patch. So in that example, .cold child would jump back to the unpatched
> > parent which would then call, also, the unpatched function.
>
> So if I understand correctly, if a function's ABI changes then you don't
> patch it directly, but only patch its callers to call the replacement?

Yes.

> Is there a reason for that?

Not really. There were a couple of cases in the past where this was safer
to implement and then it became a habbit, I guess.

[ Nicolai CCed, if he wants to add more details ]

Miroslav

2021-11-22 17:46:57

by Petr Mladek

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Thu 2021-11-11 17:50:03, Josh Poimboeuf wrote:
> On Wed, Nov 10, 2021 at 12:20:47PM +0000, David Laight wrote:
> > > > Wouldn't moving part of a function to .text.cold (or .text.unlikely)
> > > > generate the same problems with the stack backtrace code as the
> > > > .text.fixup section you are removing had??
> > >
> > > GCC can already split a function into func and func.cold today (or
> > > worse: func, func.isra.N, func.cold, func.isra.N.cold etc..).
> > >
> > > I'm assuming reliable unwind and livepatch know how to deal with this.
> >
> > They'll have 'proper' function labels at the top - so backtrace
> > stands a chance.
> > Indeed you (probably) want it to output "func.irsa.n.cold" rather
> > than just "func" to help show which copy it is in. >
> > I guess that livepatch will need separate patches for each
> > version of the function - which might be 'interesting' if
> > all the copies actually need patching at the same time.
> > You'd certainly want a warning if there seemed to be multiple
> > copies of the function.
>
> Hm, I think there is actually a livepatch problem here.
>
> If the .cold (aka "child") function actually had a fentry hook then we'd
> be fine. Then we could just patch both "parent" and "child" functions
> at the same time. We already have the ability to patch multiple
> functions having dependent interface changes.
>
> But there's no fentry hook in the child, so we can only patch the
> parent.
>
> If the child schedules out, and then the parent gets patched, things can
> go off-script if the child later jumps back to the unpatched version of
> the parent, and then for example the old parent tries to call another
> patched function with a since-changed ABI.

This thread seems to be motivation for the patchset
https://lore.kernel.org/all/[email protected]/
I am trying to understand the problem here, first. And I am
a bit lost.

How exactly is child called in the above scenario, please?
How could parent get livepatched when child is sleeping?

I imagine it the following way:

parent_func()
fentry

/* some parent code */
jmp child
/* child code */
jmp back_to_parent
/* more parent code */
ret

In the above example, parent_func() would be on stack and could not
get livepatched even when the process is sleeping in the child code.

The livepatching is done via ftrace. Only code with fentry could be
livepatched. And code called via fentry must be visible on stack.


Anyway, this looks to me like yet another compiler optimization where
we need to livepatch the callers. The compiler might produce completely
different optimizations for the new code. I do not see a reasonable
way how to create compatible func, func.isra.N, func.cold,
func.isra.N.cold variants.

Best Regards,
Petr

2021-11-24 17:42:29

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Mon, Nov 22, 2021 at 06:46:44PM +0100, Petr Mladek wrote:
> On Thu 2021-11-11 17:50:03, Josh Poimboeuf wrote:
> > On Wed, Nov 10, 2021 at 12:20:47PM +0000, David Laight wrote:
> > > > > Wouldn't moving part of a function to .text.cold (or .text.unlikely)
> > > > > generate the same problems with the stack backtrace code as the
> > > > > .text.fixup section you are removing had??
> > > >
> > > > GCC can already split a function into func and func.cold today (or
> > > > worse: func, func.isra.N, func.cold, func.isra.N.cold etc..).
> > > >
> > > > I'm assuming reliable unwind and livepatch know how to deal with this.
> > >
> > > They'll have 'proper' function labels at the top - so backtrace
> > > stands a chance.
> > > Indeed you (probably) want it to output "func.irsa.n.cold" rather
> > > than just "func" to help show which copy it is in. >
> > > I guess that livepatch will need separate patches for each
> > > version of the function - which might be 'interesting' if
> > > all the copies actually need patching at the same time.
> > > You'd certainly want a warning if there seemed to be multiple
> > > copies of the function.
> >
> > Hm, I think there is actually a livepatch problem here.
> >
> > If the .cold (aka "child") function actually had a fentry hook then we'd
> > be fine. Then we could just patch both "parent" and "child" functions
> > at the same time. We already have the ability to patch multiple
> > functions having dependent interface changes.
> >
> > But there's no fentry hook in the child, so we can only patch the
> > parent.
> >
> > If the child schedules out, and then the parent gets patched, things can
> > go off-script if the child later jumps back to the unpatched version of
> > the parent, and then for example the old parent tries to call another
> > patched function with a since-changed ABI.
>
> This thread seems to be motivation for the patchset
> https://lore.kernel.org/all/[email protected]/
> I am trying to understand the problem here, first. And I am
> a bit lost.
>
> How exactly is child called in the above scenario, please?
> How could parent get livepatched when child is sleeping?
>
> I imagine it the following way:
>
> parent_func()
> fentry
>
> /* some parent code */
> jmp child
> /* child code */
> jmp back_to_parent
> /* more parent code */
> ret

Right.

> In the above example, parent_func() would be on stack and could not
> get livepatched even when the process is sleeping in the child code.
>
> The livepatching is done via ftrace. Only code with fentry could be
> livepatched. And code called via fentry must be visible on stack.

How would parent_func() be on the stack? If it jumps to the child then
it leaves no trace on the stack.

--
Josh


2021-11-25 08:20:14

by Petr Mladek

[permalink] [raw]
Subject: Re: [PATCH 20/22] x86,word-at-a-time: Remove .fixup usage

On Wed 2021-11-24 09:42:13, Josh Poimboeuf wrote:
> On Mon, Nov 22, 2021 at 06:46:44PM +0100, Petr Mladek wrote:
> > On Thu 2021-11-11 17:50:03, Josh Poimboeuf wrote:
> > > On Wed, Nov 10, 2021 at 12:20:47PM +0000, David Laight wrote:
> > > > > > Wouldn't moving part of a function to .text.cold (or .text.unlikely)
> > > > > > generate the same problems with the stack backtrace code as the
> > > > > > .text.fixup section you are removing had??
> > > > >
> > > > > GCC can already split a function into func and func.cold today (or
> > > > > worse: func, func.isra.N, func.cold, func.isra.N.cold etc..).
> > > > >
> > > > > I'm assuming reliable unwind and livepatch know how to deal with this.
> > > >
> > > > They'll have 'proper' function labels at the top - so backtrace
> > > > stands a chance.
> > > > Indeed you (probably) want it to output "func.irsa.n.cold" rather
> > > > than just "func" to help show which copy it is in. >
> > > > I guess that livepatch will need separate patches for each
> > > > version of the function - which might be 'interesting' if
> > > > all the copies actually need patching at the same time.
> > > > You'd certainly want a warning if there seemed to be multiple
> > > > copies of the function.
> > >
> > > Hm, I think there is actually a livepatch problem here.
> > >
> > > If the .cold (aka "child") function actually had a fentry hook then we'd
> > > be fine. Then we could just patch both "parent" and "child" functions
> > > at the same time. We already have the ability to patch multiple
> > > functions having dependent interface changes.
> > >
> > > But there's no fentry hook in the child, so we can only patch the
> > > parent.
> > >
> > > If the child schedules out, and then the parent gets patched, things can
> > > go off-script if the child later jumps back to the unpatched version of
> > > the parent, and then for example the old parent tries to call another
> > > patched function with a since-changed ABI.
> >
> > This thread seems to be motivation for the patchset
> > https://lore.kernel.org/all/[email protected]/
> > I am trying to understand the problem here, first. And I am
> > a bit lost.
> >
> > How exactly is child called in the above scenario, please?
> > How could parent get livepatched when child is sleeping?
> >
> > I imagine it the following way:
> >
> > parent_func()
> > fentry
> >
> > /* some parent code */
> > jmp child
> > /* child code */
> > jmp back_to_parent
> > /* more parent code */
> > ret
>
> Right.
>
> > In the above example, parent_func() would be on stack and could not
> > get livepatched even when the process is sleeping in the child code.
> >
> > The livepatching is done via ftrace. Only code with fentry could be
> > livepatched. And code called via fentry must be visible on stack.
>
> How would parent_func() be on the stack? If it jumps to the child then
> it leaves no trace on the stack.

Grr, sure. It was off-by-one error on my side. /o\

Thanks for explanation.

Best Regards,
Petr