2021-06-28 23:41:27

by Nick Desaulniers

[permalink] [raw]
Subject: [PATCH v3] kallsyms: strip LTO suffixes from static functions

Similar to:
commit 8b8e6b5d3b01 ("kallsyms: strip ThinLTO hashes from static
functions")

It's very common for compilers to modify the symbol name for static
functions as part of optimizing transformations. That makes hooking
static functions (that weren't inlined or DCE'd) with kprobes difficult.

LLVM has yet another name mangling scheme used by thin LTO. Strip off
these suffixes so that we can continue to hook such static functions.

Reported-by: KE.LI(Lieke) <[email protected]>
Signed-off-by: Nick Desaulniers <[email protected]>
---
Changes v2 -> V3:
* Un-nest preprocessor checks, as per Nathan.

Changes v1 -> v2:
* Both mangling schemes can occur for thinLTO + CFI, this new scheme can
also occur for thinLTO without CFI. Split cleanup_symbol_name() into
two function calls.
* Drop KE.LI's tested by tag.
* Do not carry Fangrui's Reviewed by tag.
* Drop the inline keyword; it is meaningless.
kernel/kallsyms.c | 32 +++++++++++++++++++++++++++++---
1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 4067564ec59f..143c69e7e75d 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -171,6 +171,26 @@ static unsigned long kallsyms_sym_address(int idx)
return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
}

+#ifdef CONFIG_LTO_CLANG_THIN
+/*
+ * LLVM appends a suffix for local variables that must be promoted to global
+ * scope as part of thin LTO. foo() becomes foo.llvm.974640843467629774. This
+ * can break hooking of static functions with kprobes.
+ */
+static bool cleanup_symbol_name_thinlto(char *s)
+{
+ char *res;
+
+ res = strstr(s, ".llvm.");
+ if (res)
+ *res = '\0';
+
+ return res != NULL;
+}
+#else
+static bool cleanup_symbol_name_thinlto(char *s) { return false; }
+#endif /* CONFIG_LTO_CLANG_THIN */
+
#if defined(CONFIG_CFI_CLANG) && defined(CONFIG_LTO_CLANG_THIN)
/*
* LLVM appends a hash to static function names when ThinLTO and CFI are
@@ -178,7 +198,7 @@ static unsigned long kallsyms_sym_address(int idx)
* This causes confusion and potentially breaks user space tools, so we
* strip the suffix from expanded symbol names.
*/
-static inline bool cleanup_symbol_name(char *s)
+static bool cleanup_symbol_name_thinlto_cfi(char *s)
{
char *res;

@@ -189,8 +209,14 @@ static inline bool cleanup_symbol_name(char *s)
return res != NULL;
}
#else
-static inline bool cleanup_symbol_name(char *s) { return false; }
-#endif
+static bool cleanup_symbol_name_thinlto_cfi(char *s) { return false; }
+#endif /* CONFIG_CFI_CLANG && CONFIG_LTO_CLANG_THIN */
+
+static bool cleanup_symbol_name(char *s)
+{
+ return cleanup_symbol_name_thinlto(s) &&
+ cleanup_symbol_name_thinlto_cfi(s);
+}

/* Lookup the address for this symbol. Returns 0 if not found. */
unsigned long kallsyms_lookup_name(const char *name)
--
2.32.0.93.g670b81a890-goog


2021-06-29 00:32:32

by Nathan Chancellor

[permalink] [raw]
Subject: Re: [PATCH v3] kallsyms: strip LTO suffixes from static functions

On Mon, Jun 28, 2021 at 01:31:06PM -0700, Nick Desaulniers wrote:
> Similar to:
> commit 8b8e6b5d3b01 ("kallsyms: strip ThinLTO hashes from static
> functions")
>
> It's very common for compilers to modify the symbol name for static
> functions as part of optimizing transformations. That makes hooking
> static functions (that weren't inlined or DCE'd) with kprobes difficult.
>
> LLVM has yet another name mangling scheme used by thin LTO. Strip off
> these suffixes so that we can continue to hook such static functions.
>
> Reported-by: KE.LI(Lieke) <[email protected]>
> Signed-off-by: Nick Desaulniers <[email protected]>
> ---
> Changes v2 -> V3:
> * Un-nest preprocessor checks, as per Nathan.
>
> Changes v1 -> v2:
> * Both mangling schemes can occur for thinLTO + CFI, this new scheme can
> also occur for thinLTO without CFI. Split cleanup_symbol_name() into
> two function calls.
> * Drop KE.LI's tested by tag.
> * Do not carry Fangrui's Reviewed by tag.
> * Drop the inline keyword; it is meaningless.
> kernel/kallsyms.c | 32 +++++++++++++++++++++++++++++---
> 1 file changed, 29 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> index 4067564ec59f..143c69e7e75d 100644
> --- a/kernel/kallsyms.c
> +++ b/kernel/kallsyms.c
> @@ -171,6 +171,26 @@ static unsigned long kallsyms_sym_address(int idx)
> return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
> }
>
> +#ifdef CONFIG_LTO_CLANG_THIN
> +/*
> + * LLVM appends a suffix for local variables that must be promoted to global
> + * scope as part of thin LTO. foo() becomes foo.llvm.974640843467629774. This
> + * can break hooking of static functions with kprobes.
> + */
> +static bool cleanup_symbol_name_thinlto(char *s)
> +{
> + char *res;
> +
> + res = strstr(s, ".llvm.");
> + if (res)
> + *res = '\0';
> +
> + return res != NULL;
> +}
> +#else
> +static bool cleanup_symbol_name_thinlto(char *s) { return false; }
> +#endif /* CONFIG_LTO_CLANG_THIN */
> +
> #if defined(CONFIG_CFI_CLANG) && defined(CONFIG_LTO_CLANG_THIN)
> /*
> * LLVM appends a hash to static function names when ThinLTO and CFI are
> @@ -178,7 +198,7 @@ static unsigned long kallsyms_sym_address(int idx)
> * This causes confusion and potentially breaks user space tools, so we
> * strip the suffix from expanded symbol names.
> */
> -static inline bool cleanup_symbol_name(char *s)
> +static bool cleanup_symbol_name_thinlto_cfi(char *s)
> {
> char *res;
>
> @@ -189,8 +209,14 @@ static inline bool cleanup_symbol_name(char *s)
> return res != NULL;
> }
> #else
> -static inline bool cleanup_symbol_name(char *s) { return false; }
> -#endif
> +static bool cleanup_symbol_name_thinlto_cfi(char *s) { return false; }
> +#endif /* CONFIG_CFI_CLANG && CONFIG_LTO_CLANG_THIN */
> +
> +static bool cleanup_symbol_name(char *s)
> +{
> + return cleanup_symbol_name_thinlto(s) &&
> + cleanup_symbol_name_thinlto_cfi(s);
> +}
>
> /* Lookup the address for this symbol. Returns 0 if not found. */
> unsigned long kallsyms_lookup_name(const char *name)
> --
> 2.32.0.93.g670b81a890-goog
>

Is there any reason that we cannot eliminate the stubs and combine the
functions, or am I missing something? Completely untested diff.

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index c851ca0ed357..014b59ad68a3 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -161,26 +161,36 @@ static unsigned long kallsyms_sym_address(int idx)
return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
}

-#if defined(CONFIG_CFI_CLANG) && defined(CONFIG_LTO_CLANG_THIN)
-/*
- * LLVM appends a hash to static function names when ThinLTO and CFI are
- * both enabled, i.e. foo() becomes foo$707af9a22804d33c81801f27dcfe489b.
- * This causes confusion and potentially breaks user space tools, so we
- * strip the suffix from expanded symbol names.
- */
-static inline bool cleanup_symbol_name(char *s)
+static inline bool cleanup_symbol_name_thinlto(char *s)
{
char *res;

- res = strrchr(s, '$');
+ if (!IS_ENABLED(CONFIG_LTO_CLANG_THIN))
+ return false;
+
+ /*
+ * LLVM appends a suffix for local variables that must be promoted to global
+ * scope as part of ThinLTO. foo() becomes foo.llvm.974640843467629774. This
+ * can break hooking of static functions with kprobes.
+ */
+ res = strstr(s, ".llvm.");
if (res)
*res = '\0';

+ /*
+ * LLVM appends a hash to static function names when ThinLTO and CFI are
+ * both enabled, i.e. foo() becomes foo$707af9a22804d33c81801f27dcfe489b.
+ * This causes confusion and potentially breaks user space tools, so we
+ * strip the suffix from expanded symbol names.
+ */
+ if (IS_ENABLED(CONFIG_CFI_CLANG)) {
+ res = strrchr(s, '$');
+ if (res)
+ *res = '\0';
+ }
+
return res != NULL;
}
-#else
-static inline bool cleanup_symbol_name(char *s) { return false; }
-#endif

/* Lookup the address for this symbol. Returns 0 if not found. */
unsigned long kallsyms_lookup_name(const char *name)
@@ -195,7 +205,7 @@ unsigned long kallsyms_lookup_name(const char *name)
if (strcmp(namebuf, name) == 0)
return kallsyms_sym_address(i);

- if (cleanup_symbol_name(namebuf) && strcmp(namebuf, name) == 0)
+ if (cleanup_symbol_name_thinlto(namebuf) && strcmp(namebuf, name) == 0)
return kallsyms_sym_address(i);
}
return module_kallsyms_lookup_name(name);

2021-06-29 00:36:43

by Nathan Chancellor

[permalink] [raw]
Subject: Re: [PATCH v3] kallsyms: strip LTO suffixes from static functions

On 6/28/2021 3:01 PM, Nick Desaulniers wrote:
> On Mon, Jun 28, 2021 at 2:20 PM Nathan Chancellor <[email protected]> wrote:
>>
>> On Mon, Jun 28, 2021 at 01:31:06PM -0700, Nick Desaulniers wrote:
>>> Similar to:
>>> commit 8b8e6b5d3b01 ("kallsyms: strip ThinLTO hashes from static
>>> functions")
>>>
>>> It's very common for compilers to modify the symbol name for static
>>> functions as part of optimizing transformations. That makes hooking
>>> static functions (that weren't inlined or DCE'd) with kprobes difficult.
>>>
>>> LLVM has yet another name mangling scheme used by thin LTO. Strip off
>>> these suffixes so that we can continue to hook such static functions.
>>>
>>> Reported-by: KE.LI(Lieke) <[email protected]>
>>> Signed-off-by: Nick Desaulniers <[email protected]>
>>> ---
>>> Changes v2 -> V3:
>>> * Un-nest preprocessor checks, as per Nathan.
>>>
>>> Changes v1 -> v2:
>>> * Both mangling schemes can occur for thinLTO + CFI, this new scheme can
>>> also occur for thinLTO without CFI. Split cleanup_symbol_name() into
>>> two function calls.
>>> * Drop KE.LI's tested by tag.
>>> * Do not carry Fangrui's Reviewed by tag.
>>> * Drop the inline keyword; it is meaningless.
>>> kernel/kallsyms.c | 32 +++++++++++++++++++++++++++++---
>>> 1 file changed, 29 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
>>> index 4067564ec59f..143c69e7e75d 100644
>>> --- a/kernel/kallsyms.c
>>> +++ b/kernel/kallsyms.c
>>> @@ -171,6 +171,26 @@ static unsigned long kallsyms_sym_address(int idx)
>>> return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
>>> }
>>>
>>> +#ifdef CONFIG_LTO_CLANG_THIN
>>> +/*
>>> + * LLVM appends a suffix for local variables that must be promoted to global
>>> + * scope as part of thin LTO. foo() becomes foo.llvm.974640843467629774. This
>>> + * can break hooking of static functions with kprobes.
>>> + */
>>> +static bool cleanup_symbol_name_thinlto(char *s)
>>> +{
>>> + char *res;
>>> +
>>> + res = strstr(s, ".llvm.");
>>> + if (res)
>>> + *res = '\0';
>>> +
>>> + return res != NULL;
>>> +}
>>> +#else
>>> +static bool cleanup_symbol_name_thinlto(char *s) { return false; }
>>> +#endif /* CONFIG_LTO_CLANG_THIN */
>>> +
>>> #if defined(CONFIG_CFI_CLANG) && defined(CONFIG_LTO_CLANG_THIN)
>>> /*
>>> * LLVM appends a hash to static function names when ThinLTO and CFI are
>>> @@ -178,7 +198,7 @@ static unsigned long kallsyms_sym_address(int idx)
>>> * This causes confusion and potentially breaks user space tools, so we
>>> * strip the suffix from expanded symbol names.
>>> */
>>> -static inline bool cleanup_symbol_name(char *s)
>>> +static bool cleanup_symbol_name_thinlto_cfi(char *s)
>>> {
>>> char *res;
>>>
>>> @@ -189,8 +209,14 @@ static inline bool cleanup_symbol_name(char *s)
>>> return res != NULL;
>>> }
>>> #else
>>> -static inline bool cleanup_symbol_name(char *s) { return false; }
>>> -#endif
>>> +static bool cleanup_symbol_name_thinlto_cfi(char *s) { return false; }
>>> +#endif /* CONFIG_CFI_CLANG && CONFIG_LTO_CLANG_THIN */
>>> +
>>> +static bool cleanup_symbol_name(char *s)
>>> +{
>>> + return cleanup_symbol_name_thinlto(s) &&
>>> + cleanup_symbol_name_thinlto_cfi(s);
>>> +}
>>>
>>> /* Lookup the address for this symbol. Returns 0 if not found. */
>>> unsigned long kallsyms_lookup_name(const char *name)
>>> --
>>> 2.32.0.93.g670b81a890-goog
>>>
>>
>> Is there any reason that we cannot eliminate the stubs and combine the
>> functions, or am I missing something? Completely untested diff.
>>
>> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
>> index c851ca0ed357..014b59ad68a3 100644
>> --- a/kernel/kallsyms.c
>> +++ b/kernel/kallsyms.c
>> @@ -161,26 +161,36 @@ static unsigned long kallsyms_sym_address(int idx)
>> return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
>> }
>>
>> -#if defined(CONFIG_CFI_CLANG) && defined(CONFIG_LTO_CLANG_THIN)
>> -/*
>> - * LLVM appends a hash to static function names when ThinLTO and CFI are
>> - * both enabled, i.e. foo() becomes foo$707af9a22804d33c81801f27dcfe489b.
>> - * This causes confusion and potentially breaks user space tools, so we
>> - * strip the suffix from expanded symbol names.
>> - */
>> -static inline bool cleanup_symbol_name(char *s)
>> +static inline bool cleanup_symbol_name_thinlto(char *s)
>
> Drop inline while touching this line.

Sure.

>> {
>> char *res;
>>
>> - res = strrchr(s, '$');
>> + if (!IS_ENABLED(CONFIG_LTO_CLANG_THIN))
>> + return false;
>> +
>> + /*
>> + * LLVM appends a suffix for local variables that must be promoted to global
>> + * scope as part of ThinLTO. foo() becomes foo.llvm.974640843467629774. This
>> + * can break hooking of static functions with kprobes.
>> + */
>> + res = strstr(s, ".llvm.");
>> if (res)
>> *res = '\0';
>
> Sure, this is nicer though within the true block we should `return
> true;` early rather than additionally check the $ mangling, I suppose.

I am not sure I follow? Are you talking about moving this into an else
block in the if statement below this?

This should probably be two separate patches, with the first one
eliminating the stub, dropping the inline, and renaming the function
then the second patch do what this one does but I do not have a strong
opinion.

I do not mind if you take ownership of this diff, with or without
attribution.

>>
>> + /*
>> + * LLVM appends a hash to static function names when ThinLTO and CFI are
>> + * both enabled, i.e. foo() becomes foo$707af9a22804d33c81801f27dcfe489b.
>> + * This causes confusion and potentially breaks user space tools, so we
>> + * strip the suffix from expanded symbol names.
>> + */
>> + if (IS_ENABLED(CONFIG_CFI_CLANG)) {
>> + res = strrchr(s, '$');
>> + if (res)
>> + *res = '\0';
>> + }
>> +
>> return res != NULL;
>> }
>> -#else
>> -static inline bool cleanup_symbol_name(char *s) { return false; }
>> -#endif
>>
>> /* Lookup the address for this symbol. Returns 0 if not found. */
>> unsigned long kallsyms_lookup_name(const char *name)
>> @@ -195,7 +205,7 @@ unsigned long kallsyms_lookup_name(const char *name)
>> if (strcmp(namebuf, name) == 0)
>> return kallsyms_sym_address(i);
>>
>> - if (cleanup_symbol_name(namebuf) && strcmp(namebuf, name) == 0)
>> + if (cleanup_symbol_name_thinlto(namebuf) && strcmp(namebuf, name) == 0)
>> return kallsyms_sym_address(i);
>> }
>> return module_kallsyms_lookup_name(name);
>
>
>

2021-06-29 00:36:55

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH v3] kallsyms: strip LTO suffixes from static functions

On Mon, Jun 28, 2021 at 2:20 PM Nathan Chancellor <[email protected]> wrote:
>
> On Mon, Jun 28, 2021 at 01:31:06PM -0700, Nick Desaulniers wrote:
> > Similar to:
> > commit 8b8e6b5d3b01 ("kallsyms: strip ThinLTO hashes from static
> > functions")
> >
> > It's very common for compilers to modify the symbol name for static
> > functions as part of optimizing transformations. That makes hooking
> > static functions (that weren't inlined or DCE'd) with kprobes difficult.
> >
> > LLVM has yet another name mangling scheme used by thin LTO. Strip off
> > these suffixes so that we can continue to hook such static functions.
> >
> > Reported-by: KE.LI(Lieke) <[email protected]>
> > Signed-off-by: Nick Desaulniers <[email protected]>
> > ---
> > Changes v2 -> V3:
> > * Un-nest preprocessor checks, as per Nathan.
> >
> > Changes v1 -> v2:
> > * Both mangling schemes can occur for thinLTO + CFI, this new scheme can
> > also occur for thinLTO without CFI. Split cleanup_symbol_name() into
> > two function calls.
> > * Drop KE.LI's tested by tag.
> > * Do not carry Fangrui's Reviewed by tag.
> > * Drop the inline keyword; it is meaningless.
> > kernel/kallsyms.c | 32 +++++++++++++++++++++++++++++---
> > 1 file changed, 29 insertions(+), 3 deletions(-)
> >
> > diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> > index 4067564ec59f..143c69e7e75d 100644
> > --- a/kernel/kallsyms.c
> > +++ b/kernel/kallsyms.c
> > @@ -171,6 +171,26 @@ static unsigned long kallsyms_sym_address(int idx)
> > return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
> > }
> >
> > +#ifdef CONFIG_LTO_CLANG_THIN
> > +/*
> > + * LLVM appends a suffix for local variables that must be promoted to global
> > + * scope as part of thin LTO. foo() becomes foo.llvm.974640843467629774. This
> > + * can break hooking of static functions with kprobes.
> > + */
> > +static bool cleanup_symbol_name_thinlto(char *s)
> > +{
> > + char *res;
> > +
> > + res = strstr(s, ".llvm.");
> > + if (res)
> > + *res = '\0';
> > +
> > + return res != NULL;
> > +}
> > +#else
> > +static bool cleanup_symbol_name_thinlto(char *s) { return false; }
> > +#endif /* CONFIG_LTO_CLANG_THIN */
> > +
> > #if defined(CONFIG_CFI_CLANG) && defined(CONFIG_LTO_CLANG_THIN)
> > /*
> > * LLVM appends a hash to static function names when ThinLTO and CFI are
> > @@ -178,7 +198,7 @@ static unsigned long kallsyms_sym_address(int idx)
> > * This causes confusion and potentially breaks user space tools, so we
> > * strip the suffix from expanded symbol names.
> > */
> > -static inline bool cleanup_symbol_name(char *s)
> > +static bool cleanup_symbol_name_thinlto_cfi(char *s)
> > {
> > char *res;
> >
> > @@ -189,8 +209,14 @@ static inline bool cleanup_symbol_name(char *s)
> > return res != NULL;
> > }
> > #else
> > -static inline bool cleanup_symbol_name(char *s) { return false; }
> > -#endif
> > +static bool cleanup_symbol_name_thinlto_cfi(char *s) { return false; }
> > +#endif /* CONFIG_CFI_CLANG && CONFIG_LTO_CLANG_THIN */
> > +
> > +static bool cleanup_symbol_name(char *s)
> > +{
> > + return cleanup_symbol_name_thinlto(s) &&
> > + cleanup_symbol_name_thinlto_cfi(s);
> > +}
> >
> > /* Lookup the address for this symbol. Returns 0 if not found. */
> > unsigned long kallsyms_lookup_name(const char *name)
> > --
> > 2.32.0.93.g670b81a890-goog
> >
>
> Is there any reason that we cannot eliminate the stubs and combine the
> functions, or am I missing something? Completely untested diff.
>
> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> index c851ca0ed357..014b59ad68a3 100644
> --- a/kernel/kallsyms.c
> +++ b/kernel/kallsyms.c
> @@ -161,26 +161,36 @@ static unsigned long kallsyms_sym_address(int idx)
> return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
> }
>
> -#if defined(CONFIG_CFI_CLANG) && defined(CONFIG_LTO_CLANG_THIN)
> -/*
> - * LLVM appends a hash to static function names when ThinLTO and CFI are
> - * both enabled, i.e. foo() becomes foo$707af9a22804d33c81801f27dcfe489b.
> - * This causes confusion and potentially breaks user space tools, so we
> - * strip the suffix from expanded symbol names.
> - */
> -static inline bool cleanup_symbol_name(char *s)
> +static inline bool cleanup_symbol_name_thinlto(char *s)

Drop inline while touching this line.

> {
> char *res;
>
> - res = strrchr(s, '$');
> + if (!IS_ENABLED(CONFIG_LTO_CLANG_THIN))
> + return false;
> +
> + /*
> + * LLVM appends a suffix for local variables that must be promoted to global
> + * scope as part of ThinLTO. foo() becomes foo.llvm.974640843467629774. This
> + * can break hooking of static functions with kprobes.
> + */
> + res = strstr(s, ".llvm.");
> if (res)
> *res = '\0';

Sure, this is nicer though within the true block we should `return
true;` early rather than additionally check the $ mangling, I suppose.

>
> + /*
> + * LLVM appends a hash to static function names when ThinLTO and CFI are
> + * both enabled, i.e. foo() becomes foo$707af9a22804d33c81801f27dcfe489b.
> + * This causes confusion and potentially breaks user space tools, so we
> + * strip the suffix from expanded symbol names.
> + */
> + if (IS_ENABLED(CONFIG_CFI_CLANG)) {
> + res = strrchr(s, '$');
> + if (res)
> + *res = '\0';
> + }
> +
> return res != NULL;
> }
> -#else
> -static inline bool cleanup_symbol_name(char *s) { return false; }
> -#endif
>
> /* Lookup the address for this symbol. Returns 0 if not found. */
> unsigned long kallsyms_lookup_name(const char *name)
> @@ -195,7 +205,7 @@ unsigned long kallsyms_lookup_name(const char *name)
> if (strcmp(namebuf, name) == 0)
> return kallsyms_sym_address(i);
>
> - if (cleanup_symbol_name(namebuf) && strcmp(namebuf, name) == 0)
> + if (cleanup_symbol_name_thinlto(namebuf) && strcmp(namebuf, name) == 0)
> return kallsyms_sym_address(i);
> }
> return module_kallsyms_lookup_name(name);



--
Thanks,
~Nick Desaulniers

2021-07-07 18:24:08

by Nick Desaulniers

[permalink] [raw]
Subject: [PATCH v4] kallsyms: strip LTO suffixes from static functions

Similar to:
commit 8b8e6b5d3b01 ("kallsyms: strip ThinLTO hashes from static
functions")

It's very common for compilers to modify the symbol name for static
functions as part of optimizing transformations. That makes hooking
static functions (that weren't inlined or DCE'd) with kprobes difficult.

LLVM has yet another name mangling scheme used by thin LTO. Strip off
these suffixes so that we can continue to hook such static functions.

Reported-by: KE.LI(Lieke) <[email protected]>
Suggested-by: Nathan Chancellor <[email protected]>
Signed-off-by: Nick Desaulniers <[email protected]>
---
Changes v3 -> v4:
* Convert this function to use IS_ENABLED rather than provide multiple
definitions based on preprocessor checks.
* Add Nathan's suggested-by.

Changes v2 -> v3:
* Un-nest preprocessor checks, as per Nathan.

Changes v1 -> v2:
* Both mangling schemes can occur for thinLTO + CFI, this new scheme can
also occur for thinLTO without CFI. Split cleanup_symbol_name() into
two function calls.
* Drop KE.LI's tested by tag.
* Do not carry Fangrui's Reviewed by tag.
* Drop the inline keyword; it is meaningless.

kernel/kallsyms.c | 43 ++++++++++++++++++++++++++++++-------------
1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 4067564ec59f..a10dab216f4f 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -171,26 +171,43 @@ static unsigned long kallsyms_sym_address(int idx)
return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
}

-#if defined(CONFIG_CFI_CLANG) && defined(CONFIG_LTO_CLANG_THIN)
-/*
- * LLVM appends a hash to static function names when ThinLTO and CFI are
- * both enabled, i.e. foo() becomes foo$707af9a22804d33c81801f27dcfe489b.
- * This causes confusion and potentially breaks user space tools, so we
- * strip the suffix from expanded symbol names.
- */
-static inline bool cleanup_symbol_name(char *s)
+static bool cleanup_symbol_name(char *s)
{
char *res;

+ /*
+ * LLVM appends a suffix for local variables that must be promoted to
+ * global scope as part of ThinLTO. foo() becomes
+ * foo.llvm.974640843467629774. This can break hooking of static
+ * functions with kprobes.
+ */
+ if (!IS_ENABLED(CONFIG_LTO_CLANG_THIN))
+ return false;
+
+ res = strstr(s, ".llvm.");
+ if (res) {
+ *res = '\0';
+ return true;
+ }
+
+ /*
+ * LLVM appends a hash to static function names when ThinLTO and CFI
+ * are both enabled, i.e. foo() becomes
+ * foo$707af9a22804d33c81801f27dcfe489b. This causes confusion and
+ * potentially breaks user space tools, so we strip the suffix from
+ * expanded symbol names.
+ */
+ if (!IS_ENABLED(CONFIG_CFI_CLANG))
+ return false;
+
res = strrchr(s, '$');
- if (res)
+ if (res) {
*res = '\0';
+ return true;
+ }

- return res != NULL;
+ return false;
}
-#else
-static inline bool cleanup_symbol_name(char *s) { return false; }
-#endif

/* Lookup the address for this symbol. Returns 0 if not found. */
unsigned long kallsyms_lookup_name(const char *name)
--
2.32.0.93.g670b81a890-goog

2021-07-07 18:36:50

by Nathan Chancellor

[permalink] [raw]
Subject: Re: [PATCH v4] kallsyms: strip LTO suffixes from static functions

On 7/7/2021 11:18 AM, Nick Desaulniers wrote:
> Similar to:
> commit 8b8e6b5d3b01 ("kallsyms: strip ThinLTO hashes from static
> functions")
>
> It's very common for compilers to modify the symbol name for static
> functions as part of optimizing transformations. That makes hooking
> static functions (that weren't inlined or DCE'd) with kprobes difficult.
>
> LLVM has yet another name mangling scheme used by thin LTO. Strip off
> these suffixes so that we can continue to hook such static functions.
>
> Reported-by: KE.LI(Lieke) <[email protected]>
> Suggested-by: Nathan Chancellor <[email protected]>
> Signed-off-by: Nick Desaulniers <[email protected]>

Code looks fine, small comment about a comment below.

Reviewed-by: Nathan Chancellor <[email protected]>

> ---
> Changes v3 -> v4:
> * Convert this function to use IS_ENABLED rather than provide multiple
> definitions based on preprocessor checks.
> * Add Nathan's suggested-by.
>
> Changes v2 -> v3:
> * Un-nest preprocessor checks, as per Nathan.
>
> Changes v1 -> v2:
> * Both mangling schemes can occur for thinLTO + CFI, this new scheme can
> also occur for thinLTO without CFI. Split cleanup_symbol_name() into
> two function calls.
> * Drop KE.LI's tested by tag.
> * Do not carry Fangrui's Reviewed by tag.
> * Drop the inline keyword; it is meaningless.
>
> kernel/kallsyms.c | 43 ++++++++++++++++++++++++++++++-------------
> 1 file changed, 30 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> index 4067564ec59f..a10dab216f4f 100644
> --- a/kernel/kallsyms.c
> +++ b/kernel/kallsyms.c
> @@ -171,26 +171,43 @@ static unsigned long kallsyms_sym_address(int idx)
> return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
> }
>
> -#if defined(CONFIG_CFI_CLANG) && defined(CONFIG_LTO_CLANG_THIN)
> -/*
> - * LLVM appends a hash to static function names when ThinLTO and CFI are
> - * both enabled, i.e. foo() becomes foo$707af9a22804d33c81801f27dcfe489b.
> - * This causes confusion and potentially breaks user space tools, so we
> - * strip the suffix from expanded symbol names.
> - */
> -static inline bool cleanup_symbol_name(char *s)
> +static bool cleanup_symbol_name(char *s)
> {
> char *res;
>
> + /*
> + * LLVM appends a suffix for local variables that must be promoted to

This says local variables but the example uses a function? Is that correct?

> + * global scope as part of ThinLTO. foo() becomes
> + * foo.llvm.974640843467629774. This can break hooking of static
> + * functions with kprobes.
> + */
> + if (!IS_ENABLED(CONFIG_LTO_CLANG_THIN))
> + return false;
> +
> + res = strstr(s, ".llvm.");
> + if (res) {
> + *res = '\0';
> + return true;
> + }
> +
> + /*
> + * LLVM appends a hash to static function names when ThinLTO and CFI
> + * are both enabled, i.e. foo() becomes
> + * foo$707af9a22804d33c81801f27dcfe489b. This causes confusion and
> + * potentially breaks user space tools, so we strip the suffix from
> + * expanded symbol names.
> + */
> + if (!IS_ENABLED(CONFIG_CFI_CLANG))
> + return false;
> +
> res = strrchr(s, '$');
> - if (res)
> + if (res) {
> *res = '\0';
> + return true;
> + }
>
> - return res != NULL;
> + return false;
> }
> -#else
> -static inline bool cleanup_symbol_name(char *s) { return false; }
> -#endif
>
> /* Lookup the address for this symbol. Returns 0 if not found. */
> unsigned long kallsyms_lookup_name(const char *name)
>

2021-07-07 21:05:33

by Fangrui Song

[permalink] [raw]
Subject: Re: [PATCH v4] kallsyms: strip LTO suffixes from static functions

On Wed, Jul 7, 2021 at 11:34 AM Nathan Chancellor <[email protected]> wrote:
>
> On 7/7/2021 11:18 AM, Nick Desaulniers wrote:
> > Similar to:
> > commit 8b8e6b5d3b01 ("kallsyms: strip ThinLTO hashes from static
> > functions")
> >
> > It's very common for compilers to modify the symbol name for static
> > functions as part of optimizing transformations. That makes hooking
> > static functions (that weren't inlined or DCE'd) with kprobes difficult.
> >
> > LLVM has yet another name mangling scheme used by thin LTO. Strip off
> > these suffixes so that we can continue to hook such static functions.
> >
> > Reported-by: KE.LI(Lieke) <[email protected]>
> > Suggested-by: Nathan Chancellor <[email protected]>
> > Signed-off-by: Nick Desaulniers <[email protected]>
>
> Code looks fine, small comment about a comment below.
>
> Reviewed-by: Nathan Chancellor <[email protected]>
>
> > ---
> > Changes v3 -> v4:
> > * Convert this function to use IS_ENABLED rather than provide multiple
> > definitions based on preprocessor checks.
> > * Add Nathan's suggested-by.
> >
> > Changes v2 -> v3:
> > * Un-nest preprocessor checks, as per Nathan.
> >
> > Changes v1 -> v2:
> > * Both mangling schemes can occur for thinLTO + CFI, this new scheme can
> > also occur for thinLTO without CFI. Split cleanup_symbol_name() into
> > two function calls.
> > * Drop KE.LI's tested by tag.
> > * Do not carry Fangrui's Reviewed by tag.
> > * Drop the inline keyword; it is meaningless.
> >
> > kernel/kallsyms.c | 43 ++++++++++++++++++++++++++++++-------------
> > 1 file changed, 30 insertions(+), 13 deletions(-)
> >
> > diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> > index 4067564ec59f..a10dab216f4f 100644
> > --- a/kernel/kallsyms.c
> > +++ b/kernel/kallsyms.c
> > @@ -171,26 +171,43 @@ static unsigned long kallsyms_sym_address(int idx)
> > return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
> > }
> >
> > -#if defined(CONFIG_CFI_CLANG) && defined(CONFIG_LTO_CLANG_THIN)
> > -/*
> > - * LLVM appends a hash to static function names when ThinLTO and CFI are
> > - * both enabled, i.e. foo() becomes foo$707af9a22804d33c81801f27dcfe489b.
> > - * This causes confusion and potentially breaks user space tools, so we
> > - * strip the suffix from expanded symbol names.
> > - */
> > -static inline bool cleanup_symbol_name(char *s)
> > +static bool cleanup_symbol_name(char *s)
> > {
> > char *res;
> >
> > + /*
> > + * LLVM appends a suffix for local variables that must be promoted to
>
> This says local variables but the example uses a function? Is that correct?

local functions/variables.

Both functions and variables can have a .llvm.[0-9]+ suffix.


Aside from this, the updated description looks good to me

Reviewed-by: Fangrui Song <[email protected]>

> > + * global scope as part of ThinLTO. foo() becomes
> > + * foo.llvm.974640843467629774. This can break hooking of static
> > + * functions with kprobes.
> > + */
> > + if (!IS_ENABLED(CONFIG_LTO_CLANG_THIN))
> > + return false;
> > +
> > + res = strstr(s, ".llvm.");
> > + if (res) {
> > + *res = '\0';
> > + return true;
> > + }
> > +
> > + /*
> > + * LLVM appends a hash to static function names when ThinLTO and CFI
> > + * are both enabled, i.e. foo() becomes
> > + * foo$707af9a22804d33c81801f27dcfe489b. This causes confusion and
> > + * potentially breaks user space tools, so we strip the suffix from
> > + * expanded symbol names.
> > + */
> > + if (!IS_ENABLED(CONFIG_CFI_CLANG))
> > + return false;
> > +
> > res = strrchr(s, '$');
> > - if (res)
> > + if (res) {
> > *res = '\0';
> > + return true;
> > + }
> >
> > - return res != NULL;
> > + return false;
> > }
> > -#else
> > -static inline bool cleanup_symbol_name(char *s) { return false; }
> > -#endif
> >
> > /* Lookup the address for this symbol. Returns 0 if not found. */
> > unsigned long kallsyms_lookup_name(const char *name)
> >

2021-08-06 23:52:54

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v4] kallsyms: strip LTO suffixes from static functions

On Wed, Jul 7, 2021 at 11:18 AM Nick Desaulniers
<[email protected]> wrote:
>
> Similar to:
> commit 8b8e6b5d3b01 ("kallsyms: strip ThinLTO hashes from static
> functions")
>
> It's very common for compilers to modify the symbol name for static
> functions as part of optimizing transformations. That makes hooking
> static functions (that weren't inlined or DCE'd) with kprobes difficult.
>
> LLVM has yet another name mangling scheme used by thin LTO. Strip off
> these suffixes so that we can continue to hook such static functions.
>
> Reported-by: KE.LI(Lieke) <[email protected]>
> Suggested-by: Nathan Chancellor <[email protected]>
> Signed-off-by: Nick Desaulniers <[email protected]>
> ---
> Changes v3 -> v4:
> * Convert this function to use IS_ENABLED rather than provide multiple
> definitions based on preprocessor checks.
> * Add Nathan's suggested-by.
>
> Changes v2 -> v3:
> * Un-nest preprocessor checks, as per Nathan.
>
> Changes v1 -> v2:
> * Both mangling schemes can occur for thinLTO + CFI, this new scheme can
> also occur for thinLTO without CFI. Split cleanup_symbol_name() into
> two function calls.
> * Drop KE.LI's tested by tag.
> * Do not carry Fangrui's Reviewed by tag.
> * Drop the inline keyword; it is meaningless.
>
> kernel/kallsyms.c | 43 ++++++++++++++++++++++++++++++-------------
> 1 file changed, 30 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> index 4067564ec59f..a10dab216f4f 100644
> --- a/kernel/kallsyms.c
> +++ b/kernel/kallsyms.c
> @@ -171,26 +171,43 @@ static unsigned long kallsyms_sym_address(int idx)
> return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
> }
>
> -#if defined(CONFIG_CFI_CLANG) && defined(CONFIG_LTO_CLANG_THIN)
> -/*
> - * LLVM appends a hash to static function names when ThinLTO and CFI are
> - * both enabled, i.e. foo() becomes foo$707af9a22804d33c81801f27dcfe489b.
> - * This causes confusion and potentially breaks user space tools, so we
> - * strip the suffix from expanded symbol names.
> - */
> -static inline bool cleanup_symbol_name(char *s)
> +static bool cleanup_symbol_name(char *s)
> {
> char *res;
>
> + /*
> + * LLVM appends a suffix for local variables that must be promoted to
> + * global scope as part of ThinLTO. foo() becomes
> + * foo.llvm.974640843467629774. This can break hooking of static
> + * functions with kprobes.
> + */
> + if (!IS_ENABLED(CONFIG_LTO_CLANG_THIN))
> + return false;
> +
> + res = strstr(s, ".llvm.");
> + if (res) {
> + *res = '\0';
> + return true;
> + }
> +
> + /*
> + * LLVM appends a hash to static function names when ThinLTO and CFI
> + * are both enabled, i.e. foo() becomes
> + * foo$707af9a22804d33c81801f27dcfe489b. This causes confusion and
> + * potentially breaks user space tools, so we strip the suffix from
> + * expanded symbol names.
> + */
> + if (!IS_ENABLED(CONFIG_CFI_CLANG))
> + return false;
> +
> res = strrchr(s, '$');
> - if (res)
> + if (res) {
> *res = '\0';
> + return true;
> + }

Note that starting with https://reviews.llvm.org/D97484, the hash
separator is '.' instead of '$'. It looks like this change will be in
Clang 13.

Sami

2021-10-01 21:00:07

by Nick Desaulniers

[permalink] [raw]
Subject: [PATCH v5] kallsyms: strip LTO suffixes from static functions

Similar to:
commit 8b8e6b5d3b01 ("kallsyms: strip ThinLTO hashes from static
functions")

It's very common for compilers to modify the symbol name for static
functions as part of optimizing transformations. That makes hooking
static functions (that weren't inlined or DCE'd) with kprobes difficult.

LLVM has yet another name mangling scheme used by thin LTO.

Combine handling of the various schemes by truncating after the first
'.'. Strip off these suffixes so that we can continue to hook such
static functions. Clang releases prior to clang-13 would use '$'
instead of '.'

Link: https://reviews.llvm.org/rGc6e5c4654bd5045fe22a1a52779e48e2038a404c
Reported-by: KE.LI(Lieke) <[email protected]>
Suggested-by: Fangrui Song <[email protected]>
Suggested-by: Nathan Chancellor <[email protected]>
Suggested-by: Padmanabha Srinivasaiah <[email protected]>
Suggested-by: Sami Tolvanen <[email protected]>
Signed-off-by: Nick Desaulniers <[email protected]>
---
Changes v4 -> v5:
* Absorb Padmanabha Srinivasaiah's patch from
https://lore.kernel.org/lkml/[email protected]/.
* Add Padmanabha's Suggested-by tag.
* Rewrite the patch to truncate after first '.', as per Sami's comment
from
https://lore.kernel.org/lkml/CABCJKue5Ay6_+8sibzh5wRh3gPzV1g72gJ9m2ot4E1ezj8bpHA@mail.gmail.com/.
* Add Sami's Suggested-by tag.
* Verify that the '$' delimiter only appears for
thin LTO + CFI + clang <= 12, use __clang_minor__ to check.
* Update comments as per Nathan + Fangrui, add their Suggested-by tags.
* While Nathan + Fangrui did review v4, v5 is too different IMO to carry
those tags forward.

Changes v3 -> v4:
* Convert this function to use IS_ENABLED rather than provide multiple
definitions based on preprocessor checks.
* Add Nathan's suggested-by.

Changes v2 -> v3:
* Un-nest preprocessor checks, as per Nathan.

Changes v1 -> v2:
* Both mangling schemes can occur for thinLTO + CFI, this new scheme can
also occur for thinLTO without CFI. Split cleanup_symbol_name() into
two function calls.
* Drop KE.LI's tested by tag.
* Do not carry Fangrui's Reviewed by tag.
* Drop the inline keyword; it is meaningless.

kernel/kallsyms.c | 45 ++++++++++++++++++++++++++++++++-------------
1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 0ba87982d017..3e4766204b07 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -164,26 +164,45 @@ static unsigned long kallsyms_sym_address(int idx)
return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
}

-#if defined(CONFIG_CFI_CLANG) && defined(CONFIG_LTO_CLANG_THIN)
-/*
- * LLVM appends a hash to static function names when ThinLTO and CFI are
- * both enabled, i.e. foo() becomes foo$707af9a22804d33c81801f27dcfe489b.
- * This causes confusion and potentially breaks user space tools, so we
- * strip the suffix from expanded symbol names.
- */
-static inline bool cleanup_symbol_name(char *s)
+static bool cleanup_symbol_name(char *s)
{
char *res;

+ if (!IS_ENABLED(CONFIG_LTO_CLANG))
+ return false;
+
+ /*
+ * LLVM appends various suffixes for local functions and variables that must
+ * be promoted to global scope as part of LTO. This can break hooking of
+ * static functions with kprobes. '.' is not a valid character in an
+ * identifier in C. Suffixes observed:
+ * - foo.llvm.[0-9a-f]+
+ * - foo.[0-9a-f]+
+ * - foo.[0-9a-f]+.cfi_jt
+ */
+ res = strchr(s, '.');
+ if (res) {
+ *res = '\0';
+ return true;
+ }
+
+ if (!IS_ENABLED(CONFIG_CFI_CLANG) || !IS_ENABLED(CONFIG_LTO_CLANG_THIN) ||
+ __clang_major__ >= 13)
+ return false;
+
+ /*
+ * Prior to LLVM 13, the following suffixes were observed when thinLTO
+ * and CFI are both enabled:
+ * - foo$[0-9]+
+ */
res = strrchr(s, '$');
- if (res)
+ if (res) {
*res = '\0';
+ return true;
+ }

- return res != NULL;
+ return false;
}
-#else
-static inline bool cleanup_symbol_name(char *s) { return false; }
-#endif

/* Lookup the address for this symbol. Returns 0 if not found. */
unsigned long kallsyms_lookup_name(const char *name)

base-commit: 4de593fb965fc2bd11a0b767e0c65ff43540a6e4
--
2.33.0.800.g4c38ced690-goog