2019-11-20 14:58:32

by Jessica Yu

[permalink] [raw]
Subject: [PATCH] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

Commit c3a6cf19e695 ("export: avoid code duplication in
include/linux/export.h") refactors export.h quite nicely, but introduces
a slight increase in memory usage due to using the empty string ""
instead of NULL to indicate that an exported symbol has no namespace. As
mentioned in that commit, this meant an increase of 1 byte per exported
symbol without a namespace. For example, if a kernel configuration has
about 10k exported symbols, this would mean that the size of
__ksymtab_strings would increase by roughly 10kB.

We can alleviate this situation by utilizing the SHF_MERGE and
SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
that the data in the section are null-terminated strings that can be
merged to eliminate duplication. More specifically, from the binutils
documentation - "for sections with both M and S, a string which is a
suffix of a larger string is considered a duplicate. Thus "def" will be
merged with "abcdef"; A reference to the first "def" will be changed to
a reference to "abcdef"+3". Thus, all the empty strings would be merged
as well as any strings that can be merged according to the cited method
above. For example, "memset" and "__memset" would be merged to just
"__memset" in __ksymtab_strings.

As of v5.4-rc5, the following statistics were gathered with x86
defconfig with approximately 10.7k exported symbols.

Size of __ksymtab_strings in vmlinux:
-------------------------------------
v5.4-rc5: 213834 bytes
v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
v5.4-rc5 with this patch: 205759 bytes

So, we already see memory savings of ~8kB compared to vanilla -rc5 and
savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.

Unfortunately, as of this writing, strings will not get deduplicated for
kernel modules, as ld does not do the deduplication for
SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
kernel modules are. A patch for ld is currently being worked on to
hopefully allow for string deduplication in relocatable files in the
future.

Suggested-by: Rasmus Villemoes <[email protected]>
Signed-off-by: Jessica Yu <[email protected]>
---
include/asm-generic/export.h | 13 ++++++++++---
include/linux/export.h | 28 ++++++++++++++++++++++------
2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index fa577978fbbd..d0704f2602f4 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -26,9 +26,16 @@
.endm

/*
- * note on .section use: @progbits vs %progbits nastiness doesn't matter,
- * since we immediately emit into those sections anyway.
+ * note on .section use: we specify @progbits vs %progbits since usage of
+ * "M" (SHF_MERGE) section flag requires it.
*/
+
+#ifdef CONFIG_ARM
+#define ARCH_PROGBITS %progbits
+#else
+#define ARCH_PROGBITS @progbits
+#endif
+
.macro ___EXPORT_SYMBOL name,val,sec
#ifdef CONFIG_MODULES
.globl __ksymtab_\name
@@ -37,7 +44,7 @@
__ksymtab_\name:
__put \val, __kstrtab_\name
.previous
- .section __ksymtab_strings,"a"
+ .section __ksymtab_strings,"aMS",ARCH_PROGBITS,1
__kstrtab_\name:
.asciz "\name"
.previous
diff --git a/include/linux/export.h b/include/linux/export.h
index 201262793369..ab325a8e6bee 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -81,16 +81,32 @@ struct kernel_symbol {

#else

+#ifdef CONFIG_ARM
+#define ARCH_PROGBITS "%progbits"
+#else
+#define ARCH_PROGBITS "@progbits"
+#endif
+
+#define __KSTRTAB_ENTRY(sym) \
+ asm(" .section \"__ksymtab_strings\",\"aMS\","ARCH_PROGBITS",1\n" \
+ "__kstrtab_" #sym ": \n" \
+ " .asciz \"" #sym "\" \n" \
+ " .previous \n")
+
+#define __KSTRTAB_NS_ENTRY(sym, ns) \
+ asm(" .section \"__ksymtab_strings\",\"aMS\","ARCH_PROGBITS",1\n" \
+ "__kstrtabns_" #sym ": \n" \
+ " .asciz " #ns " \n" \
+ " .previous \n")
+
/* For every exported symbol, place a struct in the __ksymtab section */
#define ___EXPORT_SYMBOL(sym, sec, ns) \
extern typeof(sym) sym; \
+ extern const char __kstrtab_##sym[]; \
+ extern const char __kstrtabns_##sym[]; \
__CRC_SYMBOL(sym, sec); \
- static const char __kstrtab_##sym[] \
- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
- = #sym; \
- static const char __kstrtabns_##sym[] \
- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
- = ns; \
+ __KSTRTAB_ENTRY(sym); \
+ __KSTRTAB_NS_ENTRY(sym, ns); \
__KSYMTAB_ENTRY(sym, sec)

#endif
--
2.16.4



2019-11-20 15:53:24

by Jessica Yu

[permalink] [raw]
Subject: Re: [PATCH] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

+++ Jessica Yu [20/11/19 15:51 +0100]:
>Commit c3a6cf19e695 ("export: avoid code duplication in
>include/linux/export.h") refactors export.h quite nicely, but introduces
>a slight increase in memory usage due to using the empty string ""
>instead of NULL to indicate that an exported symbol has no namespace. As
>mentioned in that commit, this meant an increase of 1 byte per exported
>symbol without a namespace. For example, if a kernel configuration has
>about 10k exported symbols, this would mean that the size of
>__ksymtab_strings would increase by roughly 10kB.
>
>We can alleviate this situation by utilizing the SHF_MERGE and
>SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
>that the data in the section are null-terminated strings that can be
>merged to eliminate duplication. More specifically, from the binutils
>documentation - "for sections with both M and S, a string which is a
>suffix of a larger string is considered a duplicate. Thus "def" will be
>merged with "abcdef"; A reference to the first "def" will be changed to
>a reference to "abcdef"+3". Thus, all the empty strings would be merged
>as well as any strings that can be merged according to the cited method
>above. For example, "memset" and "__memset" would be merged to just
>"__memset" in __ksymtab_strings.
>
>As of v5.4-rc5, the following statistics were gathered with x86
>defconfig with approximately 10.7k exported symbols.
>
>Size of __ksymtab_strings in vmlinux:
>-------------------------------------
>v5.4-rc5: 213834 bytes
>v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
>v5.4-rc5 with this patch: 205759 bytes
>
>So, we already see memory savings of ~8kB compared to vanilla -rc5 and
>savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
>
>Unfortunately, as of this writing, strings will not get deduplicated for
>kernel modules, as ld does not do the deduplication for
>SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
>kernel modules are. A patch for ld is currently being worked on to
>hopefully allow for string deduplication in relocatable files in the
>future.
>
>Suggested-by: Rasmus Villemoes <[email protected]>
>Signed-off-by: Jessica Yu <[email protected]>
>---
> include/asm-generic/export.h | 13 ++++++++++---
> include/linux/export.h | 28 ++++++++++++++++++++++------
> 2 files changed, 32 insertions(+), 9 deletions(-)
>
>diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
>index fa577978fbbd..d0704f2602f4 100644
>--- a/include/asm-generic/export.h
>+++ b/include/asm-generic/export.h
>@@ -26,9 +26,16 @@
> .endm
>
> /*
>- * note on .section use: @progbits vs %progbits nastiness doesn't matter,
>- * since we immediately emit into those sections anyway.
>+ * note on .section use: we specify @progbits vs %progbits since usage of
>+ * "M" (SHF_MERGE) section flag requires it.
> */
>+
>+#ifdef CONFIG_ARM
>+#define ARCH_PROGBITS %progbits
>+#else
>+#define ARCH_PROGBITS @progbits
>+#endif
>+
> .macro ___EXPORT_SYMBOL name,val,sec
> #ifdef CONFIG_MODULES
> .globl __ksymtab_\name
>@@ -37,7 +44,7 @@
> __ksymtab_\name:
> __put \val, __kstrtab_\name
> .previous
>- .section __ksymtab_strings,"a"
>+ .section __ksymtab_strings,"aMS",ARCH_PROGBITS,1
> __kstrtab_\name:
> .asciz "\name"
> .previous
>diff --git a/include/linux/export.h b/include/linux/export.h
>index 201262793369..ab325a8e6bee 100644
>--- a/include/linux/export.h
>+++ b/include/linux/export.h
>@@ -81,16 +81,32 @@ struct kernel_symbol {
>
> #else
>
>+#ifdef CONFIG_ARM
>+#define ARCH_PROGBITS "%progbits"
>+#else
>+#define ARCH_PROGBITS "@progbits"
>+#endif

Just a comment, I don't like the duplication of this block, but I
wasn't sure where was the best place to have this define since
asm-generic/export.h and linux/export.h don't share includes, and
includes are generally kept to a minimum. If anyone has a better
idea, please let me know.


2019-11-21 10:53:12

by Rasmus Villemoes

[permalink] [raw]
Subject: Re: [PATCH] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

On 20/11/2019 15.51, Jessica Yu wrote:
>
> We can alleviate this situation by utilizing the SHF_MERGE and
> SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
> that the data in the section are null-terminated strings

[pet peeve: nul, not null.]

> As of v5.4-rc5, the following statistics were gathered with x86
> defconfig with approximately 10.7k exported symbols.
>
> Size of __ksymtab_strings in vmlinux:
> -------------------------------------
> v5.4-rc5: 213834 bytes
> v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
> v5.4-rc5 with this patch: 205759 bytes
>
> So, we already see memory savings of ~8kB compared to vanilla -rc5 and
> savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.

Yippee :) Thanks for picking up the ball and sending this.

> /*
> - * note on .section use: @progbits vs %progbits nastiness doesn't matter,
> - * since we immediately emit into those sections anyway.
> + * note on .section use: we specify @progbits vs %progbits since usage of
> + * "M" (SHF_MERGE) section flag requires it.
> */
> +
> +#ifdef CONFIG_ARM
> +#define ARCH_PROGBITS %progbits
> +#else
> +#define ARCH_PROGBITS @progbits
> +#endif

Did you figure out a way to determine if ARM is the only odd one? I was
just going by gas' documentation which mentions ARM as an example, but
doesn't really provide a way to know what each arch uses. I suppose 0day
will tell us shortly.

If we want to avoid arch-ifdefs in these headers (and that could become
unwieldy if ARM is not the only one) I think one could add a
asm-generic/progbits.h, add progbits.h to mandatory-y in
include/asm-generic/Kbuild, and then just provide a progbits.h on ARM
(and the other exceptions) - then I think the kbuild logic automatically
makes "#include <asm/progbits.h>" pick up the right one. And the header
could define ARCH_PROGBITS with or without the double quotes depending
on __ASSEMBLY__.

OTOH, adding a whole new header just for this may be overkill.

Rasmus

2019-11-21 16:13:54

by Jessica Yu

[permalink] [raw]
Subject: Re: [PATCH] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

+++ Rasmus Villemoes [21/11/19 11:51 +0100]:
>On 20/11/2019 15.51, Jessica Yu wrote:
>>
>> We can alleviate this situation by utilizing the SHF_MERGE and
>> SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
>> that the data in the section are null-terminated strings
>
>[pet peeve: nul, not null.]

Ah, right you are!

>> As of v5.4-rc5, the following statistics were gathered with x86
>> defconfig with approximately 10.7k exported symbols.
>>
>> Size of __ksymtab_strings in vmlinux:
>> -------------------------------------
>> v5.4-rc5: 213834 bytes
>> v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
>> v5.4-rc5 with this patch: 205759 bytes
>>
>> So, we already see memory savings of ~8kB compared to vanilla -rc5 and
>> savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
>
>Yippee :) Thanks for picking up the ball and sending this.

And thanks for the idea in the first place :-)

>> /*
>> - * note on .section use: @progbits vs %progbits nastiness doesn't matter,
>> - * since we immediately emit into those sections anyway.
>> + * note on .section use: we specify @progbits vs %progbits since usage of
>> + * "M" (SHF_MERGE) section flag requires it.
>> */
>> +
>> +#ifdef CONFIG_ARM
>> +#define ARCH_PROGBITS %progbits
>> +#else
>> +#define ARCH_PROGBITS @progbits
>> +#endif
>
>Did you figure out a way to determine if ARM is the only odd one? I was
>just going by gas' documentation which mentions ARM as an example, but
>doesn't really provide a way to know what each arch uses. I suppose 0day
>will tell us shortly.

I *think* so. At least, I was going off of
drivers/base/firmware_loader/builtin/Makefile and
scripts/recordmcount.pl, which were the only other places that I found
that reference the %progbits vs @progbits oddity. They only use
%progbits in the case of CONFIG_ARM and @progbits for other
arches. I wasn't sure about arm64, but I looked at the assembly files
gcc produced and it looked like @progbits was used there. Added some
arm64 people to CC since they would know :-)

>If we want to avoid arch-ifdefs in these headers (and that could become
>unwieldy if ARM is not the only one) I think one could add a
>asm-generic/progbits.h, add progbits.h to mandatory-y in
>include/asm-generic/Kbuild, and then just provide a progbits.h on ARM
>(and the other exceptions) - then I think the kbuild logic automatically
>makes "#include <asm/progbits.h>" pick up the right one. And the header
>could define ARCH_PROGBITS with or without the double quotes depending
>on __ASSEMBLY__.

I think this would work, and it feels like the more correct solution
especially if arm isn't the only one with the odd progbits char. It
might be overkill if it's just arm that's affected though. I leave it
to Masahiro to see what he prefers.

Thanks!

Jessica


2019-11-21 16:55:06

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

Hi Jessica,

On Thu, Nov 21, 2019 at 05:09:20PM +0100, Jessica Yu wrote:
> +++ Rasmus Villemoes [21/11/19 11:51 +0100]:
> > On 20/11/2019 15.51, Jessica Yu wrote:
> > > /*
> > > - * note on .section use: @progbits vs %progbits nastiness doesn't matter,
> > > - * since we immediately emit into those sections anyway.
> > > + * note on .section use: we specify @progbits vs %progbits since usage of
> > > + * "M" (SHF_MERGE) section flag requires it.
> > > */
> > > +
> > > +#ifdef CONFIG_ARM
> > > +#define ARCH_PROGBITS %progbits
> > > +#else
> > > +#define ARCH_PROGBITS @progbits
> > > +#endif
> >
> > Did you figure out a way to determine if ARM is the only odd one? I was
> > just going by gas' documentation which mentions ARM as an example, but
> > doesn't really provide a way to know what each arch uses. I suppose 0day
> > will tell us shortly.
>
> I *think* so. At least, I was going off of
> drivers/base/firmware_loader/builtin/Makefile and
> scripts/recordmcount.pl, which were the only other places that I found
> that reference the %progbits vs @progbits oddity. They only use
> %progbits in the case of CONFIG_ARM and @progbits for other
> arches. I wasn't sure about arm64, but I looked at the assembly files
> gcc produced and it looked like @progbits was used there. Added some
> arm64 people to CC since they would know :-)

The '@' character is a comment delimiter for 32-bit ARM assembly, so that's
why you end up having to use a different character there. This isn't the
case for arm64, where you need to use standard C/C++ comment delimiters
instead and so '@progbits' should work correctly.

Hope that helps,

Will

2019-11-22 11:46:22

by Masahiro Yamada

[permalink] [raw]
Subject: Re: [PATCH] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

On Fri, Nov 22, 2019 at 1:09 AM Jessica Yu <[email protected]> wrote:
>
> +++ Rasmus Villemoes [21/11/19 11:51 +0100]:
> >On 20/11/2019 15.51, Jessica Yu wrote:
> >>
> >> We can alleviate this situation by utilizing the SHF_MERGE and
> >> SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
> >> that the data in the section are null-terminated strings
> >
> >[pet peeve: nul, not null.]
>
> Ah, right you are!
>
> >> As of v5.4-rc5, the following statistics were gathered with x86
> >> defconfig with approximately 10.7k exported symbols.
> >>
> >> Size of __ksymtab_strings in vmlinux:
> >> -------------------------------------
> >> v5.4-rc5: 213834 bytes
> >> v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
> >> v5.4-rc5 with this patch: 205759 bytes
> >>
> >> So, we already see memory savings of ~8kB compared to vanilla -rc5 and
> >> savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
> >
> >Yippee :) Thanks for picking up the ball and sending this.
>
> And thanks for the idea in the first place :-)
>
> >> /*
> >> - * note on .section use: @progbits vs %progbits nastiness doesn't matter,
> >> - * since we immediately emit into those sections anyway.
> >> + * note on .section use: we specify @progbits vs %progbits since usage of
> >> + * "M" (SHF_MERGE) section flag requires it.
> >> */
> >> +
> >> +#ifdef CONFIG_ARM
> >> +#define ARCH_PROGBITS %progbits
> >> +#else
> >> +#define ARCH_PROGBITS @progbits
> >> +#endif
> >
> >Did you figure out a way to determine if ARM is the only odd one? I was
> >just going by gas' documentation which mentions ARM as an example, but
> >doesn't really provide a way to know what each arch uses. I suppose 0day
> >will tell us shortly.
>
> I *think* so. At least, I was going off of
> drivers/base/firmware_loader/builtin/Makefile and
> scripts/recordmcount.pl, which were the only other places that I found
> that reference the %progbits vs @progbits oddity. They only use
> %progbits in the case of CONFIG_ARM and @progbits for other
> arches. I wasn't sure about arm64, but I looked at the assembly files
> gcc produced and it looked like @progbits was used there. Added some
> arm64 people to CC since they would know :-)
>
> >If we want to avoid arch-ifdefs in these headers (and that could become
> >unwieldy if ARM is not the only one) I think one could add a
> >asm-generic/progbits.h, add progbits.h to mandatory-y in
> >include/asm-generic/Kbuild, and then just provide a progbits.h on ARM
> >(and the other exceptions) - then I think the kbuild logic automatically
> >makes "#include <asm/progbits.h>" pick up the right one. And the header
> >could define ARCH_PROGBITS with or without the double quotes depending
> >on __ASSEMBLY__.
>
> I think this would work, and it feels like the more correct solution
> especially if arm isn't the only one with the odd progbits char. It
> might be overkill if it's just arm that's affected though. I leave it
> to Masahiro to see what he prefers.
>


BTW, is there any reason why
not use %progbits all the time?


include/linux/init.h hard-codes %progbits

#define __INITDATA .section ".init.data","aw",%progbits
#define __INITRODATA .section ".init.rodata","a",%progbits


So, my understanding is '%' works for all architectures,
but it is better to ask 0-day bot to test it.



--
Best Regards
Masahiro Yamada

2019-11-25 09:33:19

by Rasmus Villemoes

[permalink] [raw]
Subject: Re: [PATCH] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

cc += binutils ML

[on @progbits v %progbits in generic (data only) assembly code]

On 22/11/2019 12.44, Masahiro Yamada wrote:
> On Fri, Nov 22, 2019 at 1:09 AM Jessica Yu <[email protected]> wrote:
>>

>> I think this would work, and it feels like the more correct solution
>> especially if arm isn't the only one with the odd progbits char. It
>> might be overkill if it's just arm that's affected though. I leave it
>> to Masahiro to see what he prefers.
>>
>
>
> BTW, is there any reason why
> not use %progbits all the time?
>
>
> include/linux/init.h hard-codes %progbits
>
> #define __INITDATA .section ".init.data","aw",%progbits
> #define __INITRODATA .section ".init.rodata","a",%progbits
>
>
> So, my understanding is '%' works for all architectures,
> but it is better to ask 0-day bot to test it.

It seems that you're absolutely right. The binutils source has code like

+ if (*input_line_pointer == '@' || *input_line_pointer == '%')
+ {
+ ++input_line_pointer;
+ if (strncmp (input_line_pointer, "progbits",
+ sizeof "progbits" - 1) == 0)
+ {
+ type = SHT_PROGBITS;
+ input_line_pointer += sizeof "progbits" - 1;
+ }
+ else if (strncmp (input_line_pointer, "nobits",
+ sizeof "nobits" - 1) == 0)
+ {
+ type = SHT_NOBITS;
+ input_line_pointer += sizeof "nobits" - 1;
+ }


all the way back from

commit 252b5132c753830d5fd56823373aed85f2a0db63 (tag: binu_ss_19990502)
Author: Richard Henderson <[email protected]>
Date: Mon May 3 07:29:11 1999 +0000

19990502 sourceware import

So yes, let's just try unconditionally using %progbits, that makes
everything much simpler.

The only reason I thought one needed to do it differently on ARM is this
from the gas docs:

===
The optional TYPE argument may contain one of the following
constants:

'@progbits'
section contains data
...
Note on targets where the '@' character is the start of a comment (eg
ARM) then another character is used instead. For example the ARM port
uses the '%' character.
===

That doesn't suggest the possibility that % or some other character
might work on all architectures.

Jessica, can you resend using just %progbits to let kbuild chew on that?
Please include a comment about the misleading gas documentation.

Rasmus

2019-11-25 11:29:44

by Jessica Yu

[permalink] [raw]
Subject: Re: [PATCH] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

+++ Rasmus Villemoes [25/11/19 10:29 +0100]:
>cc += binutils ML
>
>[on @progbits v %progbits in generic (data only) assembly code]
>
>On 22/11/2019 12.44, Masahiro Yamada wrote:
>> On Fri, Nov 22, 2019 at 1:09 AM Jessica Yu <[email protected]> wrote:
>>>
>
>>> I think this would work, and it feels like the more correct solution
>>> especially if arm isn't the only one with the odd progbits char. It
>>> might be overkill if it's just arm that's affected though. I leave it
>>> to Masahiro to see what he prefers.
>>>
>>
>>
>> BTW, is there any reason why
>> not use %progbits all the time?
>>
>>
>> include/linux/init.h hard-codes %progbits
>>
>> #define __INITDATA .section ".init.data","aw",%progbits
>> #define __INITRODATA .section ".init.rodata","a",%progbits
>>
>>
>> So, my understanding is '%' works for all architectures,
>> but it is better to ask 0-day bot to test it.
>
>It seems that you're absolutely right. The binutils source has code like
>
>+ if (*input_line_pointer == '@' || *input_line_pointer == '%')
>+ {
>+ ++input_line_pointer;
>+ if (strncmp (input_line_pointer, "progbits",
>+ sizeof "progbits" - 1) == 0)
>+ {
>+ type = SHT_PROGBITS;
>+ input_line_pointer += sizeof "progbits" - 1;
>+ }
>+ else if (strncmp (input_line_pointer, "nobits",
>+ sizeof "nobits" - 1) == 0)
>+ {
>+ type = SHT_NOBITS;
>+ input_line_pointer += sizeof "nobits" - 1;
>+ }

Yeah, I saw this too while digging. I also came across this commit in
the binutils source:

commit ecc054c097e7ced281d02ef9632eb0261a410b96
Author: Thomas Preud'homme <[email protected]>
Date: Fri Mar 2 11:51:34 2018 +0000

[GDB/testsuite] Use %progbits in watch-loc.c

While using @progbits in .pushsection work on some targets, it does not
work on arm target where this introduces a comment. This patch replaces
its use in gdb.dlang/watch-loc.c and gdb.mi/dw2-ref-missing-frame-func.c
by %progbits which should work on all targets since it is used in
target-independent elf/section7.s GAS test.

2018-03-02 Thomas Preud'homme <[email protected]>

gdb/testsuite/
* gdb.dlang/watch-loc.c: Use %progbits instead of @progbits.
* gdb.mi/dw2-ref-missing-frame-func.c: Likewise.

So that seems to confirm this theory :-) I'm just surprised it isn't
documented anywhere it seems.

>The only reason I thought one needed to do it differently on ARM is this
>from the gas docs:
>
>===
> The optional TYPE argument may contain one of the following
>constants:
>
>'@progbits'
> section contains data
>...
> Note on targets where the '@' character is the start of a comment (eg
>ARM) then another character is used instead. For example the ARM port
>uses the '%' character.
>===
>
>That doesn't suggest the possibility that % or some other character
>might work on all architectures.
>
>Jessica, can you resend using just %progbits to let kbuild chew on that?
>Please include a comment about the misleading gas documentation.

Yup, sounds good. Thanks!

Jessica

2019-11-25 15:46:25

by Jessica Yu

[permalink] [raw]
Subject: [PATCH v2] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

Commit c3a6cf19e695 ("export: avoid code duplication in
include/linux/export.h") refactors export.h quite nicely, but introduces
a slight increase in memory usage due to using the empty string ""
instead of NULL to indicate that an exported symbol has no namespace. As
mentioned in that commit, this meant an increase of 1 byte per exported
symbol without a namespace. For example, if a kernel configuration has
about 10k exported symbols, this would mean that the size of
__ksymtab_strings would increase by roughly 10kB.

We can alleviate this situation by utilizing the SHF_MERGE and
SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
that the data in the section are null-terminated strings that can be
merged to eliminate duplication. More specifically, from the binutils
documentation - "for sections with both M and S, a string which is a
suffix of a larger string is considered a duplicate. Thus "def" will be
merged with "abcdef"; A reference to the first "def" will be changed to
a reference to "abcdef"+3". Thus, all the empty strings would be merged
as well as any strings that can be merged according to the cited method
above. For example, "memset" and "__memset" would be merged to just
"__memset" in __ksymtab_strings.

As of v5.4-rc5, the following statistics were gathered with x86
defconfig with approximately 10.7k exported symbols.

Size of __ksymtab_strings in vmlinux:
-------------------------------------
v5.4-rc5: 213834 bytes
v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
v5.4-rc5 with this patch: 205759 bytes

So, we already see memory savings of ~8kB compared to vanilla -rc5 and
savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.

Unfortunately, as of this writing, strings will not get deduplicated for
kernel modules, as ld does not do the deduplication for
SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
kernel modules are. A patch for ld is currently being worked on to
hopefully allow for string deduplication in relocatable files in the
future.

Suggested-by: Rasmus Villemoes <[email protected]>
Signed-off-by: Jessica Yu <[email protected]>
---

v2: use %progbits throughout and document the oddity in a comment.

include/asm-generic/export.h | 8 +++++---
include/linux/export.h | 27 +++++++++++++++++++++------
2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index fa577978fbbd..23bc98e97a66 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -26,9 +26,11 @@
.endm

/*
- * note on .section use: @progbits vs %progbits nastiness doesn't matter,
- * since we immediately emit into those sections anyway.
+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
+ * former apparently works on all arches according to the binutils source.
*/
+
.macro ___EXPORT_SYMBOL name,val,sec
#ifdef CONFIG_MODULES
.globl __ksymtab_\name
@@ -37,7 +39,7 @@
__ksymtab_\name:
__put \val, __kstrtab_\name
.previous
- .section __ksymtab_strings,"a"
+ .section __ksymtab_strings,"aMS",%progbits,1
__kstrtab_\name:
.asciz "\name"
.previous
diff --git a/include/linux/export.h b/include/linux/export.h
index 201262793369..3d835ca34d33 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -81,16 +81,31 @@ struct kernel_symbol {

#else

+/*
+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
+ * former apparently works on all arches according to the binutils source.
+ */
+#define __KSTRTAB_ENTRY(sym) \
+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
+ "__kstrtab_" #sym ": \n" \
+ " .asciz \"" #sym "\" \n" \
+ " .previous \n")
+
+#define __KSTRTAB_NS_ENTRY(sym, ns) \
+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
+ "__kstrtabns_" #sym ": \n" \
+ " .asciz " #ns " \n" \
+ " .previous \n")
+
/* For every exported symbol, place a struct in the __ksymtab section */
#define ___EXPORT_SYMBOL(sym, sec, ns) \
extern typeof(sym) sym; \
+ extern const char __kstrtab_##sym[]; \
+ extern const char __kstrtabns_##sym[]; \
__CRC_SYMBOL(sym, sec); \
- static const char __kstrtab_##sym[] \
- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
- = #sym; \
- static const char __kstrtabns_##sym[] \
- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
- = ns; \
+ __KSTRTAB_ENTRY(sym); \
+ __KSTRTAB_NS_ENTRY(sym, ns); \
__KSYMTAB_ENTRY(sym, sec)

#endif
--
2.16.4

2019-11-26 09:57:58

by Jessica Yu

[permalink] [raw]
Subject: Re: [PATCH v2] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

+++ Masahiro Yamada [26/11/19 17:32 +0900]:
>On Tue, Nov 26, 2019 at 12:42 AM Jessica Yu <[email protected]> wrote:
>>
>> Commit c3a6cf19e695 ("export: avoid code duplication in
>> include/linux/export.h") refactors export.h quite nicely, but introduces
>> a slight increase in memory usage due to using the empty string ""
>> instead of NULL to indicate that an exported symbol has no namespace. As
>> mentioned in that commit, this meant an increase of 1 byte per exported
>> symbol without a namespace. For example, if a kernel configuration has
>> about 10k exported symbols, this would mean that the size of
>> __ksymtab_strings would increase by roughly 10kB.
>>
>> We can alleviate this situation by utilizing the SHF_MERGE and
>> SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
>> that the data in the section are null-terminated strings that can be
>> merged to eliminate duplication. More specifically, from the binutils
>> documentation - "for sections with both M and S, a string which is a
>> suffix of a larger string is considered a duplicate. Thus "def" will be
>> merged with "abcdef"; A reference to the first "def" will be changed to
>> a reference to "abcdef"+3". Thus, all the empty strings would be merged
>> as well as any strings that can be merged according to the cited method
>> above. For example, "memset" and "__memset" would be merged to just
>> "__memset" in __ksymtab_strings.
>>
>> As of v5.4-rc5, the following statistics were gathered with x86
>> defconfig with approximately 10.7k exported symbols.
>>
>> Size of __ksymtab_strings in vmlinux:
>> -------------------------------------
>> v5.4-rc5: 213834 bytes
>> v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
>> v5.4-rc5 with this patch: 205759 bytes
>>
>> So, we already see memory savings of ~8kB compared to vanilla -rc5 and
>> savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
>>
>> Unfortunately, as of this writing, strings will not get deduplicated for
>> kernel modules, as ld does not do the deduplication for
>> SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
>> kernel modules are. A patch for ld is currently being worked on to
>> hopefully allow for string deduplication in relocatable files in the
>> future.
>>
>> Suggested-by: Rasmus Villemoes <[email protected]>
>> Signed-off-by: Jessica Yu <[email protected]>
>> ---
>>
>> v2: use %progbits throughout and document the oddity in a comment.
>>
>> include/asm-generic/export.h | 8 +++++---
>> include/linux/export.h | 27 +++++++++++++++++++++------
>> 2 files changed, 26 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
>> index fa577978fbbd..23bc98e97a66 100644
>> --- a/include/asm-generic/export.h
>> +++ b/include/asm-generic/export.h
>> @@ -26,9 +26,11 @@
>> .endm
>>
>> /*
>> - * note on .section use: @progbits vs %progbits nastiness doesn't matter,
>> - * since we immediately emit into those sections anyway.
>> + * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>> + * section flag requires it. Use '%progbits' instead of '@progbits' since the
>> + * former apparently works on all arches according to the binutils source.
>> */
>> +
>> .macro ___EXPORT_SYMBOL name,val,sec
>> #ifdef CONFIG_MODULES
>> .globl __ksymtab_\name
>> @@ -37,7 +39,7 @@
>> __ksymtab_\name:
>> __put \val, __kstrtab_\name
>> .previous
>> - .section __ksymtab_strings,"a"
>> + .section __ksymtab_strings,"aMS",%progbits,1
>> __kstrtab_\name:
>> .asciz "\name"
>> .previous
>> diff --git a/include/linux/export.h b/include/linux/export.h
>> index 201262793369..3d835ca34d33 100644
>> --- a/include/linux/export.h
>> +++ b/include/linux/export.h
>> @@ -81,16 +81,31 @@ struct kernel_symbol {
>>
>> #else
>>
>> +/*
>> + * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>> + * section flag requires it. Use '%progbits' instead of '@progbits' since the
>> + * former apparently works on all arches according to the binutils source.
>> + */
>> +#define __KSTRTAB_ENTRY(sym) \
>> + asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
>> + "__kstrtab_" #sym ": \n" \
>> + " .asciz \"" #sym "\" \n" \
>> + " .previous \n")
>> +
>> +#define __KSTRTAB_NS_ENTRY(sym, ns) \
>> + asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
>> + "__kstrtabns_" #sym ": \n" \
>> + " .asciz " #ns " \n" \
>
>
>Hmm, it took some time for me to how this code works.
>
>ns is already a C string, then you added # to it,
>then I was confused.
>
>Personally, I prefer this code:
>" .asciz \"" ns "\" \n"
>
>so it looks in the same way as __KSTRTAB_ENTRY().
>
>
>
>BTW, you duplicated \"aMS\",%progbits,1" and ".previous"
>
>
>I would write it shorter, like this:
>
>
>#define ___EXPORT_SYMBOL(sym, sec, ns) \
> extern typeof(sym) sym; \
> extern const char __kstrtab_##sym[]; \
> extern const char __kstrtabns_##sym[]; \
> __CRC_SYMBOL(sym, sec); \
> asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1\n" \
> "__kstrtab_" #sym ": \n" \
> " .asciz \"" #sym "\" \n" \
> "__kstrtabns_" #sym ": \n" \
> " .asciz \"" ns "\" \n" \
> " .previous \n"); \
> __KSYMTAB_ENTRY(sym, sec)
>

Sure, I can change that. Just thought it'd be easier to read with the
separate macros. Thanks for your comments!

2019-11-26 11:15:34

by Masahiro Yamada

[permalink] [raw]
Subject: Re: [PATCH v2] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

On Tue, Nov 26, 2019 at 12:42 AM Jessica Yu <[email protected]> wrote:
>
> Commit c3a6cf19e695 ("export: avoid code duplication in
> include/linux/export.h") refactors export.h quite nicely, but introduces
> a slight increase in memory usage due to using the empty string ""
> instead of NULL to indicate that an exported symbol has no namespace. As
> mentioned in that commit, this meant an increase of 1 byte per exported
> symbol without a namespace. For example, if a kernel configuration has
> about 10k exported symbols, this would mean that the size of
> __ksymtab_strings would increase by roughly 10kB.
>
> We can alleviate this situation by utilizing the SHF_MERGE and
> SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
> that the data in the section are null-terminated strings that can be
> merged to eliminate duplication. More specifically, from the binutils
> documentation - "for sections with both M and S, a string which is a
> suffix of a larger string is considered a duplicate. Thus "def" will be
> merged with "abcdef"; A reference to the first "def" will be changed to
> a reference to "abcdef"+3". Thus, all the empty strings would be merged
> as well as any strings that can be merged according to the cited method
> above. For example, "memset" and "__memset" would be merged to just
> "__memset" in __ksymtab_strings.
>
> As of v5.4-rc5, the following statistics were gathered with x86
> defconfig with approximately 10.7k exported symbols.
>
> Size of __ksymtab_strings in vmlinux:
> -------------------------------------
> v5.4-rc5: 213834 bytes
> v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
> v5.4-rc5 with this patch: 205759 bytes
>
> So, we already see memory savings of ~8kB compared to vanilla -rc5 and
> savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
>
> Unfortunately, as of this writing, strings will not get deduplicated for
> kernel modules, as ld does not do the deduplication for
> SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
> kernel modules are. A patch for ld is currently being worked on to
> hopefully allow for string deduplication in relocatable files in the
> future.
>
> Suggested-by: Rasmus Villemoes <[email protected]>
> Signed-off-by: Jessica Yu <[email protected]>
> ---
>
> v2: use %progbits throughout and document the oddity in a comment.
>
> include/asm-generic/export.h | 8 +++++---
> include/linux/export.h | 27 +++++++++++++++++++++------
> 2 files changed, 26 insertions(+), 9 deletions(-)
>
> diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
> index fa577978fbbd..23bc98e97a66 100644
> --- a/include/asm-generic/export.h
> +++ b/include/asm-generic/export.h
> @@ -26,9 +26,11 @@
> .endm
>
> /*
> - * note on .section use: @progbits vs %progbits nastiness doesn't matter,
> - * since we immediately emit into those sections anyway.
> + * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
> + * section flag requires it. Use '%progbits' instead of '@progbits' since the
> + * former apparently works on all arches according to the binutils source.
> */
> +
> .macro ___EXPORT_SYMBOL name,val,sec
> #ifdef CONFIG_MODULES
> .globl __ksymtab_\name
> @@ -37,7 +39,7 @@
> __ksymtab_\name:
> __put \val, __kstrtab_\name
> .previous
> - .section __ksymtab_strings,"a"
> + .section __ksymtab_strings,"aMS",%progbits,1
> __kstrtab_\name:
> .asciz "\name"
> .previous
> diff --git a/include/linux/export.h b/include/linux/export.h
> index 201262793369..3d835ca34d33 100644
> --- a/include/linux/export.h
> +++ b/include/linux/export.h
> @@ -81,16 +81,31 @@ struct kernel_symbol {
>
> #else
>
> +/*
> + * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
> + * section flag requires it. Use '%progbits' instead of '@progbits' since the
> + * former apparently works on all arches according to the binutils source.
> + */
> +#define __KSTRTAB_ENTRY(sym) \
> + asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
> + "__kstrtab_" #sym ": \n" \
> + " .asciz \"" #sym "\" \n" \
> + " .previous \n")
> +
> +#define __KSTRTAB_NS_ENTRY(sym, ns) \
> + asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
> + "__kstrtabns_" #sym ": \n" \
> + " .asciz " #ns " \n" \


Hmm, it took some time for me to how this code works.

ns is already a C string, then you added # to it,
then I was confused.

Personally, I prefer this code:
" .asciz \"" ns "\" \n"

so it looks in the same way as __KSTRTAB_ENTRY().



BTW, you duplicated \"aMS\",%progbits,1" and ".previous"


I would write it shorter, like this:


#define ___EXPORT_SYMBOL(sym, sec, ns) \
extern typeof(sym) sym; \
extern const char __kstrtab_##sym[]; \
extern const char __kstrtabns_##sym[]; \
__CRC_SYMBOL(sym, sec); \
asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1\n" \
"__kstrtab_" #sym ": \n" \
" .asciz \"" #sym "\" \n" \
"__kstrtabns_" #sym ": \n" \
" .asciz \"" ns "\" \n" \
" .previous \n"); \
__KSYMTAB_ENTRY(sym, sec)








> + " .previous \n")
> +
> /* For every exported symbol, place a struct in the __ksymtab section */
> #define ___EXPORT_SYMBOL(sym, sec, ns) \
> extern typeof(sym) sym; \
> + extern const char __kstrtab_##sym[]; \
> + extern const char __kstrtabns_##sym[]; \
> __CRC_SYMBOL(sym, sec); \
> - static const char __kstrtab_##sym[] \
> - __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
> - = #sym; \
> - static const char __kstrtabns_##sym[] \
> - __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
> - = ns; \
> + __KSTRTAB_ENTRY(sym); \
> + __KSTRTAB_NS_ENTRY(sym, ns); \
> __KSYMTAB_ENTRY(sym, sec)
>
> #endif
> --
> 2.16.4
>


--
Best Regards
Masahiro Yamada

2019-11-26 14:29:40

by Matthias Maennich

[permalink] [raw]
Subject: Re: [PATCH v2] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

On Tue, Nov 26, 2019 at 05:32:59PM +0900, Masahiro Yamada wrote:
>On Tue, Nov 26, 2019 at 12:42 AM Jessica Yu <[email protected]> wrote:
>>
>> Commit c3a6cf19e695 ("export: avoid code duplication in
>> include/linux/export.h") refactors export.h quite nicely, but introduces
>> a slight increase in memory usage due to using the empty string ""
>> instead of NULL to indicate that an exported symbol has no namespace. As
>> mentioned in that commit, this meant an increase of 1 byte per exported
>> symbol without a namespace. For example, if a kernel configuration has
>> about 10k exported symbols, this would mean that the size of
>> __ksymtab_strings would increase by roughly 10kB.
>>
>> We can alleviate this situation by utilizing the SHF_MERGE and
>> SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
>> that the data in the section are null-terminated strings that can be
>> merged to eliminate duplication. More specifically, from the binutils
>> documentation - "for sections with both M and S, a string which is a
>> suffix of a larger string is considered a duplicate. Thus "def" will be
>> merged with "abcdef"; A reference to the first "def" will be changed to
>> a reference to "abcdef"+3". Thus, all the empty strings would be merged
>> as well as any strings that can be merged according to the cited method
>> above. For example, "memset" and "__memset" would be merged to just
>> "__memset" in __ksymtab_strings.
>>
>> As of v5.4-rc5, the following statistics were gathered with x86
>> defconfig with approximately 10.7k exported symbols.
>>
>> Size of __ksymtab_strings in vmlinux:
>> -------------------------------------
>> v5.4-rc5: 213834 bytes
>> v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
>> v5.4-rc5 with this patch: 205759 bytes
>>
>> So, we already see memory savings of ~8kB compared to vanilla -rc5 and
>> savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
>>
>> Unfortunately, as of this writing, strings will not get deduplicated for
>> kernel modules, as ld does not do the deduplication for
>> SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
>> kernel modules are. A patch for ld is currently being worked on to
>> hopefully allow for string deduplication in relocatable files in the
>> future.
>>

Thanks for working on this!

>> Suggested-by: Rasmus Villemoes <[email protected]>
>> Signed-off-by: Jessica Yu <[email protected]>
>> ---
>>
>> v2: use %progbits throughout and document the oddity in a comment.
>>
>> include/asm-generic/export.h | 8 +++++---
>> include/linux/export.h | 27 +++++++++++++++++++++------
>> 2 files changed, 26 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
>> index fa577978fbbd..23bc98e97a66 100644
>> --- a/include/asm-generic/export.h
>> +++ b/include/asm-generic/export.h
>> @@ -26,9 +26,11 @@
>> .endm
>>
>> /*
>> - * note on .section use: @progbits vs %progbits nastiness doesn't matter,
>> - * since we immediately emit into those sections anyway.
>> + * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>> + * section flag requires it. Use '%progbits' instead of '@progbits' since the
>> + * former apparently works on all arches according to the binutils source.
>> */
>> +
>> .macro ___EXPORT_SYMBOL name,val,sec
>> #ifdef CONFIG_MODULES
>> .globl __ksymtab_\name
>> @@ -37,7 +39,7 @@
>> __ksymtab_\name:
>> __put \val, __kstrtab_\name
>> .previous
>> - .section __ksymtab_strings,"a"
>> + .section __ksymtab_strings,"aMS",%progbits,1
>> __kstrtab_\name:
>> .asciz "\name"
>> .previous
>> diff --git a/include/linux/export.h b/include/linux/export.h
>> index 201262793369..3d835ca34d33 100644
>> --- a/include/linux/export.h
>> +++ b/include/linux/export.h
>> @@ -81,16 +81,31 @@ struct kernel_symbol {
>>
>> #else
>>
>> +/*
>> + * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>> + * section flag requires it. Use '%progbits' instead of '@progbits' since the
>> + * former apparently works on all arches according to the binutils source.
>> + */
>> +#define __KSTRTAB_ENTRY(sym) \
>> + asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
>> + "__kstrtab_" #sym ": \n" \
>> + " .asciz \"" #sym "\" \n" \
>> + " .previous \n")
>> +
>> +#define __KSTRTAB_NS_ENTRY(sym, ns) \
>> + asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
>> + "__kstrtabns_" #sym ": \n" \
>> + " .asciz " #ns " \n" \
>
>
>Hmm, it took some time for me to how this code works.
>
>ns is already a C string, then you added # to it,
>then I was confused.
>
>Personally, I prefer this code:
>" .asciz \"" ns "\" \n"
>
>so it looks in the same way as __KSTRTAB_ENTRY().

I agree with this, these entries should be consistent.

>
>
>
>BTW, you duplicated \"aMS\",%progbits,1" and ".previous"
>
>
>I would write it shorter, like this:
>
>
>#define ___EXPORT_SYMBOL(sym, sec, ns) \
> extern typeof(sym) sym; \
> extern const char __kstrtab_##sym[]; \
> extern const char __kstrtabns_##sym[]; \
> __CRC_SYMBOL(sym, sec); \
> asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1\n" \
> "__kstrtab_" #sym ": \n" \
> " .asciz \"" #sym "\" \n" \
> "__kstrtabns_" #sym ": \n" \
> " .asciz \"" ns "\" \n" \
> " .previous \n"); \
> __KSYMTAB_ENTRY(sym, sec)
>

I would prefer the separate macros though (as initially proposed) as I
find them much more readable. The code is already a bit tricky to reason
about and I don't think the shorter version is enough of a gain.

>
>
>
>
>
>
>
>> + " .previous \n")
>> +
>> /* For every exported symbol, place a struct in the __ksymtab section */
>> #define ___EXPORT_SYMBOL(sym, sec, ns) \
>> extern typeof(sym) sym; \
>> + extern const char __kstrtab_##sym[]; \
>> + extern const char __kstrtabns_##sym[]; \
>> __CRC_SYMBOL(sym, sec); \
>> - static const char __kstrtab_##sym[] \
>> - __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
>> - = #sym; \

You could keep simplified versions of these statements as comment for
the above macros to increase readability.

>> - static const char __kstrtabns_##sym[] \
>> - __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
>> - = ns; \
>> + __KSTRTAB_ENTRY(sym); \
>> + __KSTRTAB_NS_ENTRY(sym, ns); \
>> __KSYMTAB_ENTRY(sym, sec)
>>
>> #endif
>> --
>> 2.16.4
>>

With the above addressed, please feel free to add

Reviewed-by: Matthias Maennich <[email protected]>

Cheers,
Matthias

>
>
>--
>Best Regards
>Masahiro Yamada

2019-11-26 16:21:58

by Jessica Yu

[permalink] [raw]
Subject: Re: [PATCH v2] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

+++ Matthias Maennich [26/11/19 13:56 +0000]:
>On Tue, Nov 26, 2019 at 05:32:59PM +0900, Masahiro Yamada wrote:
>>On Tue, Nov 26, 2019 at 12:42 AM Jessica Yu <[email protected]> wrote:
>>>
>>>Commit c3a6cf19e695 ("export: avoid code duplication in
>>>include/linux/export.h") refactors export.h quite nicely, but introduces
>>>a slight increase in memory usage due to using the empty string ""
>>>instead of NULL to indicate that an exported symbol has no namespace. As
>>>mentioned in that commit, this meant an increase of 1 byte per exported
>>>symbol without a namespace. For example, if a kernel configuration has
>>>about 10k exported symbols, this would mean that the size of
>>>__ksymtab_strings would increase by roughly 10kB.
>>>
>>>We can alleviate this situation by utilizing the SHF_MERGE and
>>>SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
>>>that the data in the section are null-terminated strings that can be
>>>merged to eliminate duplication. More specifically, from the binutils
>>>documentation - "for sections with both M and S, a string which is a
>>>suffix of a larger string is considered a duplicate. Thus "def" will be
>>>merged with "abcdef"; A reference to the first "def" will be changed to
>>>a reference to "abcdef"+3". Thus, all the empty strings would be merged
>>>as well as any strings that can be merged according to the cited method
>>>above. For example, "memset" and "__memset" would be merged to just
>>>"__memset" in __ksymtab_strings.
>>>
>>>As of v5.4-rc5, the following statistics were gathered with x86
>>>defconfig with approximately 10.7k exported symbols.
>>>
>>>Size of __ksymtab_strings in vmlinux:
>>>-------------------------------------
>>>v5.4-rc5: 213834 bytes
>>>v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
>>>v5.4-rc5 with this patch: 205759 bytes
>>>
>>>So, we already see memory savings of ~8kB compared to vanilla -rc5 and
>>>savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
>>>
>>>Unfortunately, as of this writing, strings will not get deduplicated for
>>>kernel modules, as ld does not do the deduplication for
>>>SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
>>>kernel modules are. A patch for ld is currently being worked on to
>>>hopefully allow for string deduplication in relocatable files in the
>>>future.
>>>
>
>Thanks for working on this!
>
>>>Suggested-by: Rasmus Villemoes <[email protected]>
>>>Signed-off-by: Jessica Yu <[email protected]>
>>>---
>>>
>>>v2: use %progbits throughout and document the oddity in a comment.
>>>
>>> include/asm-generic/export.h | 8 +++++---
>>> include/linux/export.h | 27 +++++++++++++++++++++------
>>> 2 files changed, 26 insertions(+), 9 deletions(-)
>>>
>>>diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
>>>index fa577978fbbd..23bc98e97a66 100644
>>>--- a/include/asm-generic/export.h
>>>+++ b/include/asm-generic/export.h
>>>@@ -26,9 +26,11 @@
>>> .endm
>>>
>>> /*
>>>- * note on .section use: @progbits vs %progbits nastiness doesn't matter,
>>>- * since we immediately emit into those sections anyway.
>>>+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>>>+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
>>>+ * former apparently works on all arches according to the binutils source.
>>> */
>>>+
>>> .macro ___EXPORT_SYMBOL name,val,sec
>>> #ifdef CONFIG_MODULES
>>> .globl __ksymtab_\name
>>>@@ -37,7 +39,7 @@
>>> __ksymtab_\name:
>>> __put \val, __kstrtab_\name
>>> .previous
>>>- .section __ksymtab_strings,"a"
>>>+ .section __ksymtab_strings,"aMS",%progbits,1
>>> __kstrtab_\name:
>>> .asciz "\name"
>>> .previous
>>>diff --git a/include/linux/export.h b/include/linux/export.h
>>>index 201262793369..3d835ca34d33 100644
>>>--- a/include/linux/export.h
>>>+++ b/include/linux/export.h
>>>@@ -81,16 +81,31 @@ struct kernel_symbol {
>>>
>>> #else
>>>
>>>+/*
>>>+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>>>+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
>>>+ * former apparently works on all arches according to the binutils source.
>>>+ */
>>>+#define __KSTRTAB_ENTRY(sym) \
>>>+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
>>>+ "__kstrtab_" #sym ": \n" \
>>>+ " .asciz \"" #sym "\" \n" \
>>>+ " .previous \n")
>>>+
>>>+#define __KSTRTAB_NS_ENTRY(sym, ns) \
>>>+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
>>>+ "__kstrtabns_" #sym ": \n" \
>>>+ " .asciz " #ns " \n" \
>>
>>
>>Hmm, it took some time for me to how this code works.
>>
>>ns is already a C string, then you added # to it,
>>then I was confused.
>>
>>Personally, I prefer this code:
>>" .asciz \"" ns "\" \n"
>>
>>so it looks in the same way as __KSTRTAB_ENTRY().
>
>I agree with this, these entries should be consistent.
>
>>
>>
>>
>>BTW, you duplicated \"aMS\",%progbits,1" and ".previous"
>>
>>
>>I would write it shorter, like this:
>>
>>
>>#define ___EXPORT_SYMBOL(sym, sec, ns) \
>> extern typeof(sym) sym; \
>> extern const char __kstrtab_##sym[]; \
>> extern const char __kstrtabns_##sym[]; \
>> __CRC_SYMBOL(sym, sec); \
>> asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1\n" \
>> "__kstrtab_" #sym ": \n" \
>> " .asciz \"" #sym "\" \n" \
>> "__kstrtabns_" #sym ": \n" \
>> " .asciz \"" ns "\" \n" \
>> " .previous \n"); \
>> __KSYMTAB_ENTRY(sym, sec)
>>
>
>I would prefer the separate macros though (as initially proposed) as I
>find them much more readable. The code is already a bit tricky to reason
>about and I don't think the shorter version is enough of a gain.

Yeah, the macros were more readable IMO. But I could just squash them into one
__KSTRTAB_ENTRY macro as a compromise for Masahiro maybe?

Is this any better?

diff --git a/include/linux/export.h b/include/linux/export.h
index 201262793369..f4a8fc798a1b 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -81,16 +81,30 @@ struct kernel_symbol {

#else

+/*
+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
+ * former apparently works on all arches according to the binutils source.
+ *
+ * This basically corresponds to:
+ * const char __kstrtab_##sym[] __attribute__((section("__ksymtab_strings")) = #sym;
+ * const char __kstrtabns_##sym[] __attribute__((section("__ksymtab_strings")) = ns;
+ */
+#define __KSTRTAB_ENTRY(sym, ns) \
+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
+ "__kstrtab_" #sym ": \n" \
+ " .asciz \"" #sym "\" \n" \
+ "__kstrtabns_" #sym ": \n" \
+ " .asciz \"" ns "\" \n" \
+ " .previous \n")
+
/* For every exported symbol, place a struct in the __ksymtab section */
#define ___EXPORT_SYMBOL(sym, sec, ns) \
extern typeof(sym) sym; \
+ extern const char __kstrtab_##sym[]; \
+ extern const char __kstrtabns_##sym[]; \
__CRC_SYMBOL(sym, sec); \
- static const char __kstrtab_##sym[] \
- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
- = #sym; \
- static const char __kstrtabns_##sym[] \
- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
- = ns; \
+ __KSTRTAB_ENTRY(sym, ns); \
__KSYMTAB_ENTRY(sym, sec)

#endif

>>
>>
>>
>>
>>
>>
>>
>>>+ " .previous \n")
>>>+
>>> /* For every exported symbol, place a struct in the __ksymtab section */
>>> #define ___EXPORT_SYMBOL(sym, sec, ns) \
>>> extern typeof(sym) sym; \
>>>+ extern const char __kstrtab_##sym[]; \
>>>+ extern const char __kstrtabns_##sym[]; \
>>> __CRC_SYMBOL(sym, sec); \
>>>- static const char __kstrtab_##sym[] \
>>>- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
>>>- = #sym; \
>
>You could keep simplified versions of these statements as comment for
>the above macros to increase readability.
>
>>>- static const char __kstrtabns_##sym[] \
>>>- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
>>>- = ns; \
>>>+ __KSTRTAB_ENTRY(sym); \
>>>+ __KSTRTAB_NS_ENTRY(sym, ns); \
>>> __KSYMTAB_ENTRY(sym, sec)
>>>
>>> #endif
>>>--
>>>2.16.4
>>>
>
>With the above addressed, please feel free to add
>
>Reviewed-by: Matthias Maennich <[email protected]>
>
>Cheers,
>Matthias
>
>>
>>
>>--
>>Best Regards
>>Masahiro Yamada

2019-11-26 16:23:36

by Matthias Maennich

[permalink] [raw]
Subject: Re: [PATCH v2] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

On Tue, Nov 26, 2019 at 04:31:54PM +0100, Jessica Yu wrote:
>+++ Matthias Maennich [26/11/19 13:56 +0000]:
>>On Tue, Nov 26, 2019 at 05:32:59PM +0900, Masahiro Yamada wrote:
>>>On Tue, Nov 26, 2019 at 12:42 AM Jessica Yu <[email protected]> wrote:
>>>>
>>>>Commit c3a6cf19e695 ("export: avoid code duplication in
>>>>include/linux/export.h") refactors export.h quite nicely, but introduces
>>>>a slight increase in memory usage due to using the empty string ""
>>>>instead of NULL to indicate that an exported symbol has no namespace. As
>>>>mentioned in that commit, this meant an increase of 1 byte per exported
>>>>symbol without a namespace. For example, if a kernel configuration has
>>>>about 10k exported symbols, this would mean that the size of
>>>>__ksymtab_strings would increase by roughly 10kB.
>>>>
>>>>We can alleviate this situation by utilizing the SHF_MERGE and
>>>>SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
>>>>that the data in the section are null-terminated strings that can be
>>>>merged to eliminate duplication. More specifically, from the binutils
>>>>documentation - "for sections with both M and S, a string which is a
>>>>suffix of a larger string is considered a duplicate. Thus "def" will be
>>>>merged with "abcdef"; A reference to the first "def" will be changed to
>>>>a reference to "abcdef"+3". Thus, all the empty strings would be merged
>>>>as well as any strings that can be merged according to the cited method
>>>>above. For example, "memset" and "__memset" would be merged to just
>>>>"__memset" in __ksymtab_strings.
>>>>
>>>>As of v5.4-rc5, the following statistics were gathered with x86
>>>>defconfig with approximately 10.7k exported symbols.
>>>>
>>>>Size of __ksymtab_strings in vmlinux:
>>>>-------------------------------------
>>>>v5.4-rc5: 213834 bytes
>>>>v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
>>>>v5.4-rc5 with this patch: 205759 bytes
>>>>
>>>>So, we already see memory savings of ~8kB compared to vanilla -rc5 and
>>>>savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
>>>>
>>>>Unfortunately, as of this writing, strings will not get deduplicated for
>>>>kernel modules, as ld does not do the deduplication for
>>>>SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
>>>>kernel modules are. A patch for ld is currently being worked on to
>>>>hopefully allow for string deduplication in relocatable files in the
>>>>future.
>>>>
>>
>>Thanks for working on this!
>>
>>>>Suggested-by: Rasmus Villemoes <[email protected]>
>>>>Signed-off-by: Jessica Yu <[email protected]>
>>>>---
>>>>
>>>>v2: use %progbits throughout and document the oddity in a comment.
>>>>
>>>>include/asm-generic/export.h | 8 +++++---
>>>>include/linux/export.h | 27 +++++++++++++++++++++------
>>>>2 files changed, 26 insertions(+), 9 deletions(-)
>>>>
>>>>diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
>>>>index fa577978fbbd..23bc98e97a66 100644
>>>>--- a/include/asm-generic/export.h
>>>>+++ b/include/asm-generic/export.h
>>>>@@ -26,9 +26,11 @@
>>>>.endm
>>>>
>>>>/*
>>>>- * note on .section use: @progbits vs %progbits nastiness doesn't matter,
>>>>- * since we immediately emit into those sections anyway.
>>>>+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>>>>+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
>>>>+ * former apparently works on all arches according to the binutils source.
>>>> */
>>>>+
>>>>.macro ___EXPORT_SYMBOL name,val,sec
>>>>#ifdef CONFIG_MODULES
>>>> .globl __ksymtab_\name
>>>>@@ -37,7 +39,7 @@
>>>>__ksymtab_\name:
>>>> __put \val, __kstrtab_\name
>>>> .previous
>>>>- .section __ksymtab_strings,"a"
>>>>+ .section __ksymtab_strings,"aMS",%progbits,1
>>>>__kstrtab_\name:
>>>> .asciz "\name"
>>>> .previous
>>>>diff --git a/include/linux/export.h b/include/linux/export.h
>>>>index 201262793369..3d835ca34d33 100644
>>>>--- a/include/linux/export.h
>>>>+++ b/include/linux/export.h
>>>>@@ -81,16 +81,31 @@ struct kernel_symbol {
>>>>
>>>>#else
>>>>
>>>>+/*
>>>>+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>>>>+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
>>>>+ * former apparently works on all arches according to the binutils source.
>>>>+ */
>>>>+#define __KSTRTAB_ENTRY(sym) \
>>>>+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
>>>>+ "__kstrtab_" #sym ": \n" \
>>>>+ " .asciz \"" #sym "\" \n" \
>>>>+ " .previous \n")
>>>>+
>>>>+#define __KSTRTAB_NS_ENTRY(sym, ns) \
>>>>+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
>>>>+ "__kstrtabns_" #sym ": \n" \
>>>>+ " .asciz " #ns " \n" \
>>>
>>>
>>>Hmm, it took some time for me to how this code works.
>>>
>>>ns is already a C string, then you added # to it,
>>>then I was confused.
>>>
>>>Personally, I prefer this code:
>>>" .asciz \"" ns "\" \n"
>>>
>>>so it looks in the same way as __KSTRTAB_ENTRY().
>>
>>I agree with this, these entries should be consistent.
>>
>>>
>>>
>>>
>>>BTW, you duplicated \"aMS\",%progbits,1" and ".previous"
>>>
>>>
>>>I would write it shorter, like this:
>>>
>>>
>>>#define ___EXPORT_SYMBOL(sym, sec, ns) \
>>> extern typeof(sym) sym; \
>>> extern const char __kstrtab_##sym[]; \
>>> extern const char __kstrtabns_##sym[]; \
>>> __CRC_SYMBOL(sym, sec); \
>>> asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1\n" \
>>> "__kstrtab_" #sym ": \n" \
>>> " .asciz \"" #sym "\" \n" \
>>> "__kstrtabns_" #sym ": \n" \
>>> " .asciz \"" ns "\" \n" \
>>> " .previous \n"); \
>>> __KSYMTAB_ENTRY(sym, sec)
>>>
>>
>>I would prefer the separate macros though (as initially proposed) as I
>>find them much more readable. The code is already a bit tricky to reason
>>about and I don't think the shorter version is enough of a gain.
>
>Yeah, the macros were more readable IMO. But I could just squash them into one
>__KSTRTAB_ENTRY macro as a compromise for Masahiro maybe?
>
>Is this any better?

Yes, I like this variant.

>
>diff --git a/include/linux/export.h b/include/linux/export.h
>index 201262793369..f4a8fc798a1b 100644
>--- a/include/linux/export.h
>+++ b/include/linux/export.h
>@@ -81,16 +81,30 @@ struct kernel_symbol {
>
>#else
>
>+/*
>+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
>+ * former apparently works on all arches according to the binutils source.
>+ *
>+ * This basically corresponds to:
>+ * const char __kstrtab_##sym[] __attribute__((section("__ksymtab_strings")) = #sym;
>+ * const char __kstrtabns_##sym[] __attribute__((section("__ksymtab_strings")) = ns;
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
You might want to drop the section attribute here to make the comment
shorter (even though it is not entirely correct than anymore).

Cheers,
Matthias

>+ */
>+#define __KSTRTAB_ENTRY(sym, ns) \
>+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
>+ "__kstrtab_" #sym ": \n" \
>+ " .asciz \"" #sym "\" \n" \
>+ "__kstrtabns_" #sym ": \n" \
>+ " .asciz \"" ns "\" \n" \
>+ " .previous \n")
>+
>/* For every exported symbol, place a struct in the __ksymtab section */
>#define ___EXPORT_SYMBOL(sym, sec, ns) \
> extern typeof(sym) sym; \
>+ extern const char __kstrtab_##sym[]; \
>+ extern const char __kstrtabns_##sym[]; \
> __CRC_SYMBOL(sym, sec); \
>- static const char __kstrtab_##sym[] \
>- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
>- = #sym; \
>- static const char __kstrtabns_##sym[] \
>- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
>- = ns; \
>+ __KSTRTAB_ENTRY(sym, ns); \
> __KSYMTAB_ENTRY(sym, sec)
>
>#endif
>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>>+ " .previous \n")
>>>>+
>>>>/* For every exported symbol, place a struct in the __ksymtab section */
>>>>#define ___EXPORT_SYMBOL(sym, sec, ns) \
>>>> extern typeof(sym) sym; \
>>>>+ extern const char __kstrtab_##sym[]; \
>>>>+ extern const char __kstrtabns_##sym[]; \
>>>> __CRC_SYMBOL(sym, sec); \
>>>>- static const char __kstrtab_##sym[] \
>>>>- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
>>>>- = #sym; \
>>
>>You could keep simplified versions of these statements as comment for
>>the above macros to increase readability.
>>
>>>>- static const char __kstrtabns_##sym[] \
>>>>- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
>>>>- = ns; \
>>>>+ __KSTRTAB_ENTRY(sym); \
>>>>+ __KSTRTAB_NS_ENTRY(sym, ns); \
>>>> __KSYMTAB_ENTRY(sym, sec)
>>>>
>>>>#endif
>>>>--
>>>>2.16.4
>>>>
>>
>>With the above addressed, please feel free to add
>>
>>Reviewed-by: Matthias Maennich <[email protected]>
>>
>>Cheers,
>>Matthias
>>
>>>
>>>
>>>--
>>>Best Regards
>>>Masahiro Yamada

2019-11-26 16:24:27

by Masahiro Yamada

[permalink] [raw]
Subject: Re: [PATCH v2] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

On Wed, Nov 27, 2019 at 12:32 AM Jessica Yu <[email protected]> wrote:
>
> +++ Matthias Maennich [26/11/19 13:56 +0000]:
> >On Tue, Nov 26, 2019 at 05:32:59PM +0900, Masahiro Yamada wrote:
> >>On Tue, Nov 26, 2019 at 12:42 AM Jessica Yu <[email protected]> wrote:
> >>>
> >>>Commit c3a6cf19e695 ("export: avoid code duplication in
> >>>include/linux/export.h") refactors export.h quite nicely, but introduces
> >>>a slight increase in memory usage due to using the empty string ""
> >>>instead of NULL to indicate that an exported symbol has no namespace. As
> >>>mentioned in that commit, this meant an increase of 1 byte per exported
> >>>symbol without a namespace. For example, if a kernel configuration has
> >>>about 10k exported symbols, this would mean that the size of
> >>>__ksymtab_strings would increase by roughly 10kB.
> >>>
> >>>We can alleviate this situation by utilizing the SHF_MERGE and
> >>>SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
> >>>that the data in the section are null-terminated strings that can be
> >>>merged to eliminate duplication. More specifically, from the binutils
> >>>documentation - "for sections with both M and S, a string which is a
> >>>suffix of a larger string is considered a duplicate. Thus "def" will be
> >>>merged with "abcdef"; A reference to the first "def" will be changed to
> >>>a reference to "abcdef"+3". Thus, all the empty strings would be merged
> >>>as well as any strings that can be merged according to the cited method
> >>>above. For example, "memset" and "__memset" would be merged to just
> >>>"__memset" in __ksymtab_strings.
> >>>
> >>>As of v5.4-rc5, the following statistics were gathered with x86
> >>>defconfig with approximately 10.7k exported symbols.
> >>>
> >>>Size of __ksymtab_strings in vmlinux:
> >>>-------------------------------------
> >>>v5.4-rc5: 213834 bytes
> >>>v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
> >>>v5.4-rc5 with this patch: 205759 bytes
> >>>
> >>>So, we already see memory savings of ~8kB compared to vanilla -rc5 and
> >>>savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
> >>>
> >>>Unfortunately, as of this writing, strings will not get deduplicated for
> >>>kernel modules, as ld does not do the deduplication for
> >>>SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
> >>>kernel modules are. A patch for ld is currently being worked on to
> >>>hopefully allow for string deduplication in relocatable files in the
> >>>future.
> >>>
> >
> >Thanks for working on this!
> >
> >>>Suggested-by: Rasmus Villemoes <[email protected]>
> >>>Signed-off-by: Jessica Yu <[email protected]>
> >>>---
> >>>
> >>>v2: use %progbits throughout and document the oddity in a comment.
> >>>
> >>> include/asm-generic/export.h | 8 +++++---
> >>> include/linux/export.h | 27 +++++++++++++++++++++------
> >>> 2 files changed, 26 insertions(+), 9 deletions(-)
> >>>
> >>>diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
> >>>index fa577978fbbd..23bc98e97a66 100644
> >>>--- a/include/asm-generic/export.h
> >>>+++ b/include/asm-generic/export.h
> >>>@@ -26,9 +26,11 @@
> >>> .endm
> >>>
> >>> /*
> >>>- * note on .section use: @progbits vs %progbits nastiness doesn't matter,
> >>>- * since we immediately emit into those sections anyway.
> >>>+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
> >>>+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
> >>>+ * former apparently works on all arches according to the binutils source.
> >>> */
> >>>+
> >>> .macro ___EXPORT_SYMBOL name,val,sec
> >>> #ifdef CONFIG_MODULES
> >>> .globl __ksymtab_\name
> >>>@@ -37,7 +39,7 @@
> >>> __ksymtab_\name:
> >>> __put \val, __kstrtab_\name
> >>> .previous
> >>>- .section __ksymtab_strings,"a"
> >>>+ .section __ksymtab_strings,"aMS",%progbits,1
> >>> __kstrtab_\name:
> >>> .asciz "\name"
> >>> .previous
> >>>diff --git a/include/linux/export.h b/include/linux/export.h
> >>>index 201262793369..3d835ca34d33 100644
> >>>--- a/include/linux/export.h
> >>>+++ b/include/linux/export.h
> >>>@@ -81,16 +81,31 @@ struct kernel_symbol {
> >>>
> >>> #else
> >>>
> >>>+/*
> >>>+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
> >>>+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
> >>>+ * former apparently works on all arches according to the binutils source.
> >>>+ */
> >>>+#define __KSTRTAB_ENTRY(sym) \
> >>>+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
> >>>+ "__kstrtab_" #sym ": \n" \
> >>>+ " .asciz \"" #sym "\" \n" \
> >>>+ " .previous \n")
> >>>+
> >>>+#define __KSTRTAB_NS_ENTRY(sym, ns) \
> >>>+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
> >>>+ "__kstrtabns_" #sym ": \n" \
> >>>+ " .asciz " #ns " \n" \
> >>
> >>
> >>Hmm, it took some time for me to how this code works.
> >>
> >>ns is already a C string, then you added # to it,
> >>then I was confused.
> >>
> >>Personally, I prefer this code:
> >>" .asciz \"" ns "\" \n"
> >>
> >>so it looks in the same way as __KSTRTAB_ENTRY().
> >
> >I agree with this, these entries should be consistent.
> >
> >>
> >>
> >>
> >>BTW, you duplicated \"aMS\",%progbits,1" and ".previous"
> >>
> >>
> >>I would write it shorter, like this:
> >>
> >>
> >>#define ___EXPORT_SYMBOL(sym, sec, ns) \
> >> extern typeof(sym) sym; \
> >> extern const char __kstrtab_##sym[]; \
> >> extern const char __kstrtabns_##sym[]; \
> >> __CRC_SYMBOL(sym, sec); \
> >> asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1\n" \
> >> "__kstrtab_" #sym ": \n" \
> >> " .asciz \"" #sym "\" \n" \
> >> "__kstrtabns_" #sym ": \n" \
> >> " .asciz \"" ns "\" \n" \
> >> " .previous \n"); \
> >> __KSYMTAB_ENTRY(sym, sec)
> >>
> >
> >I would prefer the separate macros though (as initially proposed) as I
> >find them much more readable. The code is already a bit tricky to reason
> >about and I don't think the shorter version is enough of a gain.
>
> Yeah, the macros were more readable IMO. But I could just squash them into one
> __KSTRTAB_ENTRY macro as a compromise for Masahiro maybe?
>
> Is this any better?

I prefer opposite.


__CRC_SYMBOL() is macrofied because it is ifdef'ed by
CONFIG_MODVERSIONS and CONFIG_MOD_REL_CRCS.

__KSYMTAB_ENTRY() is macrofied because it is ifdef'ed by
CONFIG_HAVE_ARCH_PREL32_RELOCATIONS.



__KSTRTAB_ENTRY() does not depend on any CONFIG option,
so it can be expanded in ___EXPORT_SYMBOL().


You need to check multiple locations
to understand how it works as a whole.
I do not understand why increasing macro-chains is readable.



Masahiro




>
> diff --git a/include/linux/export.h b/include/linux/export.h
> index 201262793369..f4a8fc798a1b 100644
> --- a/include/linux/export.h
> +++ b/include/linux/export.h
> @@ -81,16 +81,30 @@ struct kernel_symbol {
>
> #else
>
> +/*
> + * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
> + * section flag requires it. Use '%progbits' instead of '@progbits' since the
> + * former apparently works on all arches according to the binutils source.
> + *
> + * This basically corresponds to:
> + * const char __kstrtab_##sym[] __attribute__((section("__ksymtab_strings")) = #sym;
> + * const char __kstrtabns_##sym[] __attribute__((section("__ksymtab_strings")) = ns;
> + */
> +#define __KSTRTAB_ENTRY(sym, ns) \
> + asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
> + "__kstrtab_" #sym ": \n" \
> + " .asciz \"" #sym "\" \n" \
> + "__kstrtabns_" #sym ": \n" \
> + " .asciz \"" ns "\" \n" \
> + " .previous \n")
> +
> /* For every exported symbol, place a struct in the __ksymtab section */
> #define ___EXPORT_SYMBOL(sym, sec, ns) \
> extern typeof(sym) sym; \
> + extern const char __kstrtab_##sym[]; \
> + extern const char __kstrtabns_##sym[]; \
> __CRC_SYMBOL(sym, sec); \
> - static const char __kstrtab_##sym[] \
> - __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
> - = #sym; \
> - static const char __kstrtabns_##sym[] \
> - __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
> - = ns; \
> + __KSTRTAB_ENTRY(sym, ns); \
> __KSYMTAB_ENTRY(sym, sec)
>
> #endif
>
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> >>>+ " .previous \n")
> >>>+
> >>> /* For every exported symbol, place a struct in the __ksymtab section */
> >>> #define ___EXPORT_SYMBOL(sym, sec, ns) \
> >>> extern typeof(sym) sym; \
> >>>+ extern const char __kstrtab_##sym[]; \
> >>>+ extern const char __kstrtabns_##sym[]; \
> >>> __CRC_SYMBOL(sym, sec); \
> >>>- static const char __kstrtab_##sym[] \
> >>>- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
> >>>- = #sym; \
> >
> >You could keep simplified versions of these statements as comment for
> >the above macros to increase readability.
> >
> >>>- static const char __kstrtabns_##sym[] \
> >>>- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
> >>>- = ns; \
> >>>+ __KSTRTAB_ENTRY(sym); \
> >>>+ __KSTRTAB_NS_ENTRY(sym, ns); \
> >>> __KSYMTAB_ENTRY(sym, sec)
> >>>
> >>> #endif
> >>>--
> >>>2.16.4
> >>>
> >
> >With the above addressed, please feel free to add
> >
> >Reviewed-by: Matthias Maennich <[email protected]>
> >
> >Cheers,
> >Matthias
> >
> >>
> >>
> >>--
> >>Best Regards
> >>Masahiro Yamada



--
Best Regards
Masahiro Yamada

2019-11-26 16:51:40

by Jessica Yu

[permalink] [raw]
Subject: Re: [PATCH v2] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

+++ Masahiro Yamada [27/11/19 01:12 +0900]:
>On Wed, Nov 27, 2019 at 12:32 AM Jessica Yu <[email protected]> wrote:
>>
>> +++ Matthias Maennich [26/11/19 13:56 +0000]:
>> >On Tue, Nov 26, 2019 at 05:32:59PM +0900, Masahiro Yamada wrote:
>> >>On Tue, Nov 26, 2019 at 12:42 AM Jessica Yu <[email protected]> wrote:
>> >>>
>> >>>Commit c3a6cf19e695 ("export: avoid code duplication in
>> >>>include/linux/export.h") refactors export.h quite nicely, but introduces
>> >>>a slight increase in memory usage due to using the empty string ""
>> >>>instead of NULL to indicate that an exported symbol has no namespace. As
>> >>>mentioned in that commit, this meant an increase of 1 byte per exported
>> >>>symbol without a namespace. For example, if a kernel configuration has
>> >>>about 10k exported symbols, this would mean that the size of
>> >>>__ksymtab_strings would increase by roughly 10kB.
>> >>>
>> >>>We can alleviate this situation by utilizing the SHF_MERGE and
>> >>>SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
>> >>>that the data in the section are null-terminated strings that can be
>> >>>merged to eliminate duplication. More specifically, from the binutils
>> >>>documentation - "for sections with both M and S, a string which is a
>> >>>suffix of a larger string is considered a duplicate. Thus "def" will be
>> >>>merged with "abcdef"; A reference to the first "def" will be changed to
>> >>>a reference to "abcdef"+3". Thus, all the empty strings would be merged
>> >>>as well as any strings that can be merged according to the cited method
>> >>>above. For example, "memset" and "__memset" would be merged to just
>> >>>"__memset" in __ksymtab_strings.
>> >>>
>> >>>As of v5.4-rc5, the following statistics were gathered with x86
>> >>>defconfig with approximately 10.7k exported symbols.
>> >>>
>> >>>Size of __ksymtab_strings in vmlinux:
>> >>>-------------------------------------
>> >>>v5.4-rc5: 213834 bytes
>> >>>v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
>> >>>v5.4-rc5 with this patch: 205759 bytes
>> >>>
>> >>>So, we already see memory savings of ~8kB compared to vanilla -rc5 and
>> >>>savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
>> >>>
>> >>>Unfortunately, as of this writing, strings will not get deduplicated for
>> >>>kernel modules, as ld does not do the deduplication for
>> >>>SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
>> >>>kernel modules are. A patch for ld is currently being worked on to
>> >>>hopefully allow for string deduplication in relocatable files in the
>> >>>future.
>> >>>
>> >
>> >Thanks for working on this!
>> >
>> >>>Suggested-by: Rasmus Villemoes <[email protected]>
>> >>>Signed-off-by: Jessica Yu <[email protected]>
>> >>>---
>> >>>
>> >>>v2: use %progbits throughout and document the oddity in a comment.
>> >>>
>> >>> include/asm-generic/export.h | 8 +++++---
>> >>> include/linux/export.h | 27 +++++++++++++++++++++------
>> >>> 2 files changed, 26 insertions(+), 9 deletions(-)
>> >>>
>> >>>diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
>> >>>index fa577978fbbd..23bc98e97a66 100644
>> >>>--- a/include/asm-generic/export.h
>> >>>+++ b/include/asm-generic/export.h
>> >>>@@ -26,9 +26,11 @@
>> >>> .endm
>> >>>
>> >>> /*
>> >>>- * note on .section use: @progbits vs %progbits nastiness doesn't matter,
>> >>>- * since we immediately emit into those sections anyway.
>> >>>+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>> >>>+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
>> >>>+ * former apparently works on all arches according to the binutils source.
>> >>> */
>> >>>+
>> >>> .macro ___EXPORT_SYMBOL name,val,sec
>> >>> #ifdef CONFIG_MODULES
>> >>> .globl __ksymtab_\name
>> >>>@@ -37,7 +39,7 @@
>> >>> __ksymtab_\name:
>> >>> __put \val, __kstrtab_\name
>> >>> .previous
>> >>>- .section __ksymtab_strings,"a"
>> >>>+ .section __ksymtab_strings,"aMS",%progbits,1
>> >>> __kstrtab_\name:
>> >>> .asciz "\name"
>> >>> .previous
>> >>>diff --git a/include/linux/export.h b/include/linux/export.h
>> >>>index 201262793369..3d835ca34d33 100644
>> >>>--- a/include/linux/export.h
>> >>>+++ b/include/linux/export.h
>> >>>@@ -81,16 +81,31 @@ struct kernel_symbol {
>> >>>
>> >>> #else
>> >>>
>> >>>+/*
>> >>>+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>> >>>+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
>> >>>+ * former apparently works on all arches according to the binutils source.
>> >>>+ */
>> >>>+#define __KSTRTAB_ENTRY(sym) \
>> >>>+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
>> >>>+ "__kstrtab_" #sym ": \n" \
>> >>>+ " .asciz \"" #sym "\" \n" \
>> >>>+ " .previous \n")
>> >>>+
>> >>>+#define __KSTRTAB_NS_ENTRY(sym, ns) \
>> >>>+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
>> >>>+ "__kstrtabns_" #sym ": \n" \
>> >>>+ " .asciz " #ns " \n" \
>> >>
>> >>
>> >>Hmm, it took some time for me to how this code works.
>> >>
>> >>ns is already a C string, then you added # to it,
>> >>then I was confused.
>> >>
>> >>Personally, I prefer this code:
>> >>" .asciz \"" ns "\" \n"
>> >>
>> >>so it looks in the same way as __KSTRTAB_ENTRY().
>> >
>> >I agree with this, these entries should be consistent.
>> >
>> >>
>> >>
>> >>
>> >>BTW, you duplicated \"aMS\",%progbits,1" and ".previous"
>> >>
>> >>
>> >>I would write it shorter, like this:
>> >>
>> >>
>> >>#define ___EXPORT_SYMBOL(sym, sec, ns) \
>> >> extern typeof(sym) sym; \
>> >> extern const char __kstrtab_##sym[]; \
>> >> extern const char __kstrtabns_##sym[]; \
>> >> __CRC_SYMBOL(sym, sec); \
>> >> asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1\n" \
>> >> "__kstrtab_" #sym ": \n" \
>> >> " .asciz \"" #sym "\" \n" \
>> >> "__kstrtabns_" #sym ": \n" \
>> >> " .asciz \"" ns "\" \n" \
>> >> " .previous \n"); \
>> >> __KSYMTAB_ENTRY(sym, sec)
>> >>
>> >
>> >I would prefer the separate macros though (as initially proposed) as I
>> >find them much more readable. The code is already a bit tricky to reason
>> >about and I don't think the shorter version is enough of a gain.
>>
>> Yeah, the macros were more readable IMO. But I could just squash them into one
>> __KSTRTAB_ENTRY macro as a compromise for Masahiro maybe?
>>
>> Is this any better?
>
>I prefer opposite.
>
>
>__CRC_SYMBOL() is macrofied because it is ifdef'ed by
>CONFIG_MODVERSIONS and CONFIG_MOD_REL_CRCS.
>
>__KSYMTAB_ENTRY() is macrofied because it is ifdef'ed by
>CONFIG_HAVE_ARCH_PREL32_RELOCATIONS.
>
>
>
>__KSTRTAB_ENTRY() does not depend on any CONFIG option,
>so it can be expanded in ___EXPORT_SYMBOL().
>
>
>You need to check multiple locations
>to understand how it works as a whole.
>I do not understand why increasing macro-chains is readable.

I think it is "readable" in the sense that when someone is reading
export.h for the first time, they know exactly what ___EXPORT_SYMBOL()
is supposed to do, without requiring them to read too deeply into the
asm and swim through the #ifdefs. __CRC_SYMBOL(), depending on
modversions, creates an entry in what would eventually be merged to
become the __kcrctab section, __KSTRTAB_ENTRY() creates entries in
__ksymtab_strings, and __KSYMTAB_ENTRY() creates an entry in (what
would eventually become) the __ksymtab{,_gpl} section. It gives a
general idea of what it's doing.

However, I do understand your reasoning behind not having an extra
macro. I'll wait a bit before respinning the patch to see if we can
get at a consensus..


2019-12-05 16:44:50

by Matthias Maennich

[permalink] [raw]
Subject: Re: [PATCH v2] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

On Tue, Nov 26, 2019 at 05:48:40PM +0100, Jessica Yu wrote:
>+++ Masahiro Yamada [27/11/19 01:12 +0900]:
>>On Wed, Nov 27, 2019 at 12:32 AM Jessica Yu <[email protected]> wrote:
>>>
>>>+++ Matthias Maennich [26/11/19 13:56 +0000]:
>>>>On Tue, Nov 26, 2019 at 05:32:59PM +0900, Masahiro Yamada wrote:
>>>>>On Tue, Nov 26, 2019 at 12:42 AM Jessica Yu <[email protected]> wrote:
>>>>>>
>>>>>>Commit c3a6cf19e695 ("export: avoid code duplication in
>>>>>>include/linux/export.h") refactors export.h quite nicely, but introduces
>>>>>>a slight increase in memory usage due to using the empty string ""
>>>>>>instead of NULL to indicate that an exported symbol has no namespace. As
>>>>>>mentioned in that commit, this meant an increase of 1 byte per exported
>>>>>>symbol without a namespace. For example, if a kernel configuration has
>>>>>>about 10k exported symbols, this would mean that the size of
>>>>>>__ksymtab_strings would increase by roughly 10kB.
>>>>>>
>>>>>>We can alleviate this situation by utilizing the SHF_MERGE and
>>>>>>SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
>>>>>>that the data in the section are null-terminated strings that can be
>>>>>>merged to eliminate duplication. More specifically, from the binutils
>>>>>>documentation - "for sections with both M and S, a string which is a
>>>>>>suffix of a larger string is considered a duplicate. Thus "def" will be
>>>>>>merged with "abcdef"; A reference to the first "def" will be changed to
>>>>>>a reference to "abcdef"+3". Thus, all the empty strings would be merged
>>>>>>as well as any strings that can be merged according to the cited method
>>>>>>above. For example, "memset" and "__memset" would be merged to just
>>>>>>"__memset" in __ksymtab_strings.
>>>>>>
>>>>>>As of v5.4-rc5, the following statistics were gathered with x86
>>>>>>defconfig with approximately 10.7k exported symbols.
>>>>>>
>>>>>>Size of __ksymtab_strings in vmlinux:
>>>>>>-------------------------------------
>>>>>>v5.4-rc5: 213834 bytes
>>>>>>v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
>>>>>>v5.4-rc5 with this patch: 205759 bytes
>>>>>>
>>>>>>So, we already see memory savings of ~8kB compared to vanilla -rc5 and
>>>>>>savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
>>>>>>
>>>>>>Unfortunately, as of this writing, strings will not get deduplicated for
>>>>>>kernel modules, as ld does not do the deduplication for
>>>>>>SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
>>>>>>kernel modules are. A patch for ld is currently being worked on to
>>>>>>hopefully allow for string deduplication in relocatable files in the
>>>>>>future.
>>>>>>
>>>>
>>>>Thanks for working on this!
>>>>
>>>>>>Suggested-by: Rasmus Villemoes <[email protected]>
>>>>>>Signed-off-by: Jessica Yu <[email protected]>
>>>>>>---
>>>>>>
>>>>>>v2: use %progbits throughout and document the oddity in a comment.
>>>>>>
>>>>>> include/asm-generic/export.h | 8 +++++---
>>>>>> include/linux/export.h | 27 +++++++++++++++++++++------
>>>>>> 2 files changed, 26 insertions(+), 9 deletions(-)
>>>>>>
>>>>>>diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
>>>>>>index fa577978fbbd..23bc98e97a66 100644
>>>>>>--- a/include/asm-generic/export.h
>>>>>>+++ b/include/asm-generic/export.h
>>>>>>@@ -26,9 +26,11 @@
>>>>>> .endm
>>>>>>
>>>>>> /*
>>>>>>- * note on .section use: @progbits vs %progbits nastiness doesn't matter,
>>>>>>- * since we immediately emit into those sections anyway.
>>>>>>+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>>>>>>+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
>>>>>>+ * former apparently works on all arches according to the binutils source.
>>>>>> */
>>>>>>+
>>>>>> .macro ___EXPORT_SYMBOL name,val,sec
>>>>>> #ifdef CONFIG_MODULES
>>>>>> .globl __ksymtab_\name
>>>>>>@@ -37,7 +39,7 @@
>>>>>> __ksymtab_\name:
>>>>>> __put \val, __kstrtab_\name
>>>>>> .previous
>>>>>>- .section __ksymtab_strings,"a"
>>>>>>+ .section __ksymtab_strings,"aMS",%progbits,1
>>>>>> __kstrtab_\name:
>>>>>> .asciz "\name"
>>>>>> .previous
>>>>>>diff --git a/include/linux/export.h b/include/linux/export.h
>>>>>>index 201262793369..3d835ca34d33 100644
>>>>>>--- a/include/linux/export.h
>>>>>>+++ b/include/linux/export.h
>>>>>>@@ -81,16 +81,31 @@ struct kernel_symbol {
>>>>>>
>>>>>> #else
>>>>>>
>>>>>>+/*
>>>>>>+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>>>>>>+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
>>>>>>+ * former apparently works on all arches according to the binutils source.
>>>>>>+ */
>>>>>>+#define __KSTRTAB_ENTRY(sym) \
>>>>>>+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
>>>>>>+ "__kstrtab_" #sym ": \n" \
>>>>>>+ " .asciz \"" #sym "\" \n" \
>>>>>>+ " .previous \n")
>>>>>>+
>>>>>>+#define __KSTRTAB_NS_ENTRY(sym, ns) \
>>>>>>+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
>>>>>>+ "__kstrtabns_" #sym ": \n" \
>>>>>>+ " .asciz " #ns " \n" \
>>>>>
>>>>>
>>>>>Hmm, it took some time for me to how this code works.
>>>>>
>>>>>ns is already a C string, then you added # to it,
>>>>>then I was confused.
>>>>>
>>>>>Personally, I prefer this code:
>>>>>" .asciz \"" ns "\" \n"
>>>>>
>>>>>so it looks in the same way as __KSTRTAB_ENTRY().
>>>>
>>>>I agree with this, these entries should be consistent.
>>>>
>>>>>
>>>>>
>>>>>
>>>>>BTW, you duplicated \"aMS\",%progbits,1" and ".previous"
>>>>>
>>>>>
>>>>>I would write it shorter, like this:
>>>>>
>>>>>
>>>>>#define ___EXPORT_SYMBOL(sym, sec, ns) \
>>>>> extern typeof(sym) sym; \
>>>>> extern const char __kstrtab_##sym[]; \
>>>>> extern const char __kstrtabns_##sym[]; \
>>>>> __CRC_SYMBOL(sym, sec); \
>>>>> asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1\n" \
>>>>> "__kstrtab_" #sym ": \n" \
>>>>> " .asciz \"" #sym "\" \n" \
>>>>> "__kstrtabns_" #sym ": \n" \
>>>>> " .asciz \"" ns "\" \n" \
>>>>> " .previous \n"); \
>>>>> __KSYMTAB_ENTRY(sym, sec)
>>>>>
>>>>
>>>>I would prefer the separate macros though (as initially proposed) as I
>>>>find them much more readable. The code is already a bit tricky to reason
>>>>about and I don't think the shorter version is enough of a gain.
>>>
>>>Yeah, the macros were more readable IMO. But I could just squash them into one
>>>__KSTRTAB_ENTRY macro as a compromise for Masahiro maybe?
>>>
>>>Is this any better?
>>
>>I prefer opposite.
>>
>>
>>__CRC_SYMBOL() is macrofied because it is ifdef'ed by
>>CONFIG_MODVERSIONS and CONFIG_MOD_REL_CRCS.
>>
>>__KSYMTAB_ENTRY() is macrofied because it is ifdef'ed by
>>CONFIG_HAVE_ARCH_PREL32_RELOCATIONS.
>>
>>
>>
>>__KSTRTAB_ENTRY() does not depend on any CONFIG option,
>>so it can be expanded in ___EXPORT_SYMBOL().
>>
>>
>>You need to check multiple locations
>>to understand how it works as a whole.
>>I do not understand why increasing macro-chains is readable.
>
>I think it is "readable" in the sense that when someone is reading
>export.h for the first time, they know exactly what ___EXPORT_SYMBOL()
>is supposed to do, without requiring them to read too deeply into the
>asm and swim through the #ifdefs. __CRC_SYMBOL(), depending on
>modversions, creates an entry in what would eventually be merged to
>become the __kcrctab section, __KSTRTAB_ENTRY() creates entries in
>__ksymtab_strings, and __KSYMTAB_ENTRY() creates an entry in (what
>would eventually become) the __ksymtab{,_gpl} section. It gives a
>general idea of what it's doing.

I agree with that thought. One option would be, though, to put a proper
comment there to explain what happens in the code. Splitting the
functionality across several macros has the effect that the code feels
easier to reason about and has some implicit documentation through the
macro names. If we could restore that in Masahiro's version, I am ok
with that.

Cheers,
Matthias

>
>However, I do understand your reasoning behind not having an extra
>macro. I'll wait a bit before respinning the patch to see if we can
>get at a consensus..
>
>

2019-12-06 12:42:11

by Jessica Yu

[permalink] [raw]
Subject: [PATCH v3] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

Commit c3a6cf19e695 ("export: avoid code duplication in
include/linux/export.h") refactors export.h quite nicely, but introduces
a slight increase in memory usage due to using the empty string ""
instead of NULL to indicate that an exported symbol has no namespace. As
mentioned in that commit, this meant an increase of 1 byte per exported
symbol without a namespace. For example, if a kernel configuration has
about 10k exported symbols, this would mean that the size of
__ksymtab_strings would increase by roughly 10kB.

We can alleviate this situation by utilizing the SHF_MERGE and
SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
that the data in the section are null-terminated strings that can be
merged to eliminate duplication. More specifically, from the binutils
documentation - "for sections with both M and S, a string which is a
suffix of a larger string is considered a duplicate. Thus "def" will be
merged with "abcdef"; A reference to the first "def" will be changed to
a reference to "abcdef"+3". Thus, all the empty strings would be merged
as well as any strings that can be merged according to the cited method
above. For example, "memset" and "__memset" would be merged to just
"__memset" in __ksymtab_strings.

As of v5.4-rc5, the following statistics were gathered with x86
defconfig with approximately 10.7k exported symbols.

Size of __ksymtab_strings in vmlinux:
-------------------------------------
v5.4-rc5: 213834 bytes
v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
v5.4-rc5 with this patch: 205759 bytes

So, we already see memory savings of ~8kB compared to vanilla -rc5 and
savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.

Unfortunately, as of this writing, strings will not get deduplicated for
kernel modules, as ld does not do the deduplication for
SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
kernel modules are. A patch for ld is currently being worked on to
hopefully allow for string deduplication in relocatable files in the
future.

Suggested-by: Rasmus Villemoes <[email protected]>
Signed-off-by: Jessica Yu <[email protected]>
---
v3:
- remove __KSTRTAB_ENTRY macros in favor of just putting the asm directly
in ___EXPORT_SYMBOL
- Document more clearly what the ___EXPORT_SYMBOL macro does

include/asm-generic/export.h | 8 +++++---
include/linux/export.h | 27 ++++++++++++++++++++-------
2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index fa577978fbbd..23bc98e97a66 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -26,9 +26,11 @@
.endm

/*
- * note on .section use: @progbits vs %progbits nastiness doesn't matter,
- * since we immediately emit into those sections anyway.
+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
+ * former apparently works on all arches according to the binutils source.
*/
+
.macro ___EXPORT_SYMBOL name,val,sec
#ifdef CONFIG_MODULES
.globl __ksymtab_\name
@@ -37,7 +39,7 @@
__ksymtab_\name:
__put \val, __kstrtab_\name
.previous
- .section __ksymtab_strings,"a"
+ .section __ksymtab_strings,"aMS",%progbits,1
__kstrtab_\name:
.asciz "\name"
.previous
diff --git a/include/linux/export.h b/include/linux/export.h
index 201262793369..18dcdcd118e7 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -81,16 +81,29 @@ struct kernel_symbol {

#else

-/* For every exported symbol, place a struct in the __ksymtab section */
+/*
+ * For every exported symbol, do the following:
+ *
+ * - If applicable, place an entry in the __kcrctab section.
+ * - Put the name of the symbol and namespace (empty string "" for none) in
+ * __ksymtab_strings.
+ * - Place an entry in the __ksymtab section.
+ *
+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
+ * former apparently works on all arches according to the binutils source.
+ */
#define ___EXPORT_SYMBOL(sym, sec, ns) \
extern typeof(sym) sym; \
+ extern const char __kstrtab_##sym[]; \
+ extern const char __kstrtabns_##sym[]; \
__CRC_SYMBOL(sym, sec); \
- static const char __kstrtab_##sym[] \
- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
- = #sym; \
- static const char __kstrtabns_##sym[] \
- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
- = ns; \
+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1\n" \
+ "__kstrtab_" #sym ": \n" \
+ " .asciz \"" #sym "\" \n" \
+ "__kstrtabns_" #sym ": \n" \
+ " .asciz \"" ns "\" \n" \
+ " .previous \n"); \
__KSYMTAB_ENTRY(sym, sec)

#endif
--
2.16.4

2019-12-12 06:25:10

by Masahiro Yamada

[permalink] [raw]
Subject: Re: [PATCH v3] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

On Fri, Dec 6, 2019 at 9:41 PM Jessica Yu <[email protected]> wrote:
>
> Commit c3a6cf19e695 ("export: avoid code duplication in
> include/linux/export.h") refactors export.h quite nicely, but introduces
> a slight increase in memory usage due to using the empty string ""
> instead of NULL to indicate that an exported symbol has no namespace. As
> mentioned in that commit, this meant an increase of 1 byte per exported
> symbol without a namespace. For example, if a kernel configuration has
> about 10k exported symbols, this would mean that the size of
> __ksymtab_strings would increase by roughly 10kB.
>
> We can alleviate this situation by utilizing the SHF_MERGE and
> SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
> that the data in the section are null-terminated strings that can be
> merged to eliminate duplication. More specifically, from the binutils
> documentation - "for sections with both M and S, a string which is a
> suffix of a larger string is considered a duplicate. Thus "def" will be
> merged with "abcdef"; A reference to the first "def" will be changed to
> a reference to "abcdef"+3". Thus, all the empty strings would be merged
> as well as any strings that can be merged according to the cited method
> above. For example, "memset" and "__memset" would be merged to just
> "__memset" in __ksymtab_strings.
>
> As of v5.4-rc5, the following statistics were gathered with x86
> defconfig with approximately 10.7k exported symbols.
>
> Size of __ksymtab_strings in vmlinux:
> -------------------------------------
> v5.4-rc5: 213834 bytes
> v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
> v5.4-rc5 with this patch: 205759 bytes
>
> So, we already see memory savings of ~8kB compared to vanilla -rc5 and
> savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
>
> Unfortunately, as of this writing, strings will not get deduplicated for
> kernel modules, as ld does not do the deduplication for
> SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
> kernel modules are. A patch for ld is currently being worked on to
> hopefully allow for string deduplication in relocatable files in the
> future.
>
> Suggested-by: Rasmus Villemoes <[email protected]>
> Signed-off-by: Jessica Yu <[email protected]>
> ---
> v3:
> - remove __KSTRTAB_ENTRY macros in favor of just putting the asm directly
> in ___EXPORT_SYMBOL
> - Document more clearly what the ___EXPORT_SYMBOL macro does
>
> include/asm-generic/export.h | 8 +++++---
> include/linux/export.h | 27 ++++++++++++++++++++-------
> 2 files changed, 25 insertions(+), 10 deletions(-)
>
> diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
> index fa577978fbbd..23bc98e97a66 100644
> --- a/include/asm-generic/export.h
> +++ b/include/asm-generic/export.h
> @@ -26,9 +26,11 @@
> .endm
>
> /*
> - * note on .section use: @progbits vs %progbits nastiness doesn't matter,
> - * since we immediately emit into those sections anyway.
> + * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
> + * section flag requires it. Use '%progbits' instead of '@progbits' since the
> + * former apparently works on all arches according to the binutils source.
> */
> +
> .macro ___EXPORT_SYMBOL name,val,sec
> #ifdef CONFIG_MODULES
> .globl __ksymtab_\name
> @@ -37,7 +39,7 @@
> __ksymtab_\name:
> __put \val, __kstrtab_\name
> .previous
> - .section __ksymtab_strings,"a"
> + .section __ksymtab_strings,"aMS",%progbits,1
> __kstrtab_\name:
> .asciz "\name"
> .previous
> diff --git a/include/linux/export.h b/include/linux/export.h
> index 201262793369..18dcdcd118e7 100644
> --- a/include/linux/export.h
> +++ b/include/linux/export.h
> @@ -81,16 +81,29 @@ struct kernel_symbol {
>
> #else
>
> -/* For every exported symbol, place a struct in the __ksymtab section */
> +/*
> + * For every exported symbol, do the following:

Just a nit.

You mention "an entry" twice
to talk about different classes of structures.

It might be better to be explicit about "which entry".


> + *
> + * - If applicable, place an entry in the __kcrctab section.

"place a CRC entry in the __kcrctab section"
might be clearer ?


> + * - Put the name of the symbol and namespace (empty string "" for none) in
> + * __ksymtab_strings.
> + * - Place an entry in the __ksymtab section.

"Place a struct kernel_symbol entry in __ksymtab section" ?
might be clearer ?


Other than that:
Reviewed-by: Masahiro Yamada <[email protected]>


> + *
> + * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
> + * section flag requires it. Use '%progbits' instead of '@progbits' since the
> + * former apparently works on all arches according to the binutils source.
> + */
> #define ___EXPORT_SYMBOL(sym, sec, ns) \
> extern typeof(sym) sym; \
> + extern const char __kstrtab_##sym[]; \
> + extern const char __kstrtabns_##sym[]; \
> __CRC_SYMBOL(sym, sec); \
> - static const char __kstrtab_##sym[] \
> - __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
> - = #sym; \
> - static const char __kstrtabns_##sym[] \
> - __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
> - = ns; \
> + asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1\n" \
> + "__kstrtab_" #sym ": \n" \
> + " .asciz \"" #sym "\" \n" \
> + "__kstrtabns_" #sym ": \n" \
> + " .asciz \"" ns "\" \n" \
> + " .previous \n"); \
> __KSYMTAB_ENTRY(sym, sec)
>
> #endif
> --
> 2.16.4
>


--
Best Regards
Masahiro Yamada

2019-12-12 10:37:02

by Jessica Yu

[permalink] [raw]
Subject: Re: [PATCH v3] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

+++ Masahiro Yamada [12/12/19 15:22 +0900]:
>On Fri, Dec 6, 2019 at 9:41 PM Jessica Yu <[email protected]> wrote:
>>
>> Commit c3a6cf19e695 ("export: avoid code duplication in
>> include/linux/export.h") refactors export.h quite nicely, but introduces
>> a slight increase in memory usage due to using the empty string ""
>> instead of NULL to indicate that an exported symbol has no namespace. As
>> mentioned in that commit, this meant an increase of 1 byte per exported
>> symbol without a namespace. For example, if a kernel configuration has
>> about 10k exported symbols, this would mean that the size of
>> __ksymtab_strings would increase by roughly 10kB.
>>
>> We can alleviate this situation by utilizing the SHF_MERGE and
>> SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
>> that the data in the section are null-terminated strings that can be
>> merged to eliminate duplication. More specifically, from the binutils
>> documentation - "for sections with both M and S, a string which is a
>> suffix of a larger string is considered a duplicate. Thus "def" will be
>> merged with "abcdef"; A reference to the first "def" will be changed to
>> a reference to "abcdef"+3". Thus, all the empty strings would be merged
>> as well as any strings that can be merged according to the cited method
>> above. For example, "memset" and "__memset" would be merged to just
>> "__memset" in __ksymtab_strings.
>>
>> As of v5.4-rc5, the following statistics were gathered with x86
>> defconfig with approximately 10.7k exported symbols.
>>
>> Size of __ksymtab_strings in vmlinux:
>> -------------------------------------
>> v5.4-rc5: 213834 bytes
>> v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
>> v5.4-rc5 with this patch: 205759 bytes
>>
>> So, we already see memory savings of ~8kB compared to vanilla -rc5 and
>> savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
>>
>> Unfortunately, as of this writing, strings will not get deduplicated for
>> kernel modules, as ld does not do the deduplication for
>> SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
>> kernel modules are. A patch for ld is currently being worked on to
>> hopefully allow for string deduplication in relocatable files in the
>> future.
>>
>> Suggested-by: Rasmus Villemoes <[email protected]>
>> Signed-off-by: Jessica Yu <[email protected]>
>> ---
>> v3:
>> - remove __KSTRTAB_ENTRY macros in favor of just putting the asm directly
>> in ___EXPORT_SYMBOL
>> - Document more clearly what the ___EXPORT_SYMBOL macro does
>>
>> include/asm-generic/export.h | 8 +++++---
>> include/linux/export.h | 27 ++++++++++++++++++++-------
>> 2 files changed, 25 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
>> index fa577978fbbd..23bc98e97a66 100644
>> --- a/include/asm-generic/export.h
>> +++ b/include/asm-generic/export.h
>> @@ -26,9 +26,11 @@
>> .endm
>>
>> /*
>> - * note on .section use: @progbits vs %progbits nastiness doesn't matter,
>> - * since we immediately emit into those sections anyway.
>> + * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>> + * section flag requires it. Use '%progbits' instead of '@progbits' since the
>> + * former apparently works on all arches according to the binutils source.
>> */
>> +
>> .macro ___EXPORT_SYMBOL name,val,sec
>> #ifdef CONFIG_MODULES
>> .globl __ksymtab_\name
>> @@ -37,7 +39,7 @@
>> __ksymtab_\name:
>> __put \val, __kstrtab_\name
>> .previous
>> - .section __ksymtab_strings,"a"
>> + .section __ksymtab_strings,"aMS",%progbits,1
>> __kstrtab_\name:
>> .asciz "\name"
>> .previous
>> diff --git a/include/linux/export.h b/include/linux/export.h
>> index 201262793369..18dcdcd118e7 100644
>> --- a/include/linux/export.h
>> +++ b/include/linux/export.h
>> @@ -81,16 +81,29 @@ struct kernel_symbol {
>>
>> #else
>>
>> -/* For every exported symbol, place a struct in the __ksymtab section */
>> +/*
>> + * For every exported symbol, do the following:
>
>Just a nit.
>
>You mention "an entry" twice
>to talk about different classes of structures.
>
>It might be better to be explicit about "which entry".
>
>
>> + *
>> + * - If applicable, place an entry in the __kcrctab section.
>
> "place a CRC entry in the __kcrctab section"
>might be clearer ?
>
>
>> + * - Put the name of the symbol and namespace (empty string "" for none) in
>> + * __ksymtab_strings.
>> + * - Place an entry in the __ksymtab section.
>
> "Place a struct kernel_symbol entry in __ksymtab section" ?
>might be clearer ?
>
>
>Other than that:
>Reviewed-by: Masahiro Yamada <[email protected]>

Yeah, that is much clearer, will resend. Thanks!

Jessica

2019-12-12 14:19:51

by Jessica Yu

[permalink] [raw]
Subject: [PATCH v4] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

Commit c3a6cf19e695 ("export: avoid code duplication in
include/linux/export.h") refactors export.h quite nicely, but introduces
a slight increase in memory usage due to using the empty string ""
instead of NULL to indicate that an exported symbol has no namespace. As
mentioned in that commit, this meant an increase of 1 byte per exported
symbol without a namespace. For example, if a kernel configuration has
about 10k exported symbols, this would mean that the size of
__ksymtab_strings would increase by roughly 10kB.

We can alleviate this situation by utilizing the SHF_MERGE and
SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
that the data in the section are null-terminated strings that can be
merged to eliminate duplication. More specifically, from the binutils
documentation - "for sections with both M and S, a string which is a
suffix of a larger string is considered a duplicate. Thus "def" will be
merged with "abcdef"; A reference to the first "def" will be changed to
a reference to "abcdef"+3". Thus, all the empty strings would be merged
as well as any strings that can be merged according to the cited method
above. For example, "memset" and "__memset" would be merged to just
"__memset" in __ksymtab_strings.

As of v5.4-rc5, the following statistics were gathered with x86
defconfig with approximately 10.7k exported symbols.

Size of __ksymtab_strings in vmlinux:
-------------------------------------
v5.4-rc5: 213834 bytes
v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
v5.4-rc5 with this patch: 205759 bytes

So, we already see memory savings of ~8kB compared to vanilla -rc5 and
savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.

Unfortunately, as of this writing, strings will not get deduplicated for
kernel modules, as ld does not do the deduplication for
SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
kernel modules are. A patch for ld is currently being worked on to
hopefully allow for string deduplication in relocatable files in the
future.

Suggested-by: Rasmus Villemoes <[email protected]>
Reviewed-by: Masahiro Yamada <[email protected]>
Signed-off-by: Jessica Yu <[email protected]>
---
v4:
- fix the comment above ___EXPORT_SYMBOL to be more specific about what
entries are being placed in their respective sections.

include/asm-generic/export.h | 8 +++++---
include/linux/export.h | 27 ++++++++++++++++++++-------

2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index afddc5442e92..365345f9a9e3 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -27,9 +27,11 @@
.endm

/*
- * note on .section use: @progbits vs %progbits nastiness doesn't matter,
- * since we immediately emit into those sections anyway.
+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
+ * former apparently works on all arches according to the binutils source.
*/
+
.macro ___EXPORT_SYMBOL name,val,sec
#ifdef CONFIG_MODULES
.section ___ksymtab\sec+\name,"a"
@@ -37,7 +39,7 @@
__ksymtab_\name:
__put \val, __kstrtab_\name
.previous
- .section __ksymtab_strings,"a"
+ .section __ksymtab_strings,"aMS",%progbits,1
__kstrtab_\name:
.asciz "\name"
.previous
diff --git a/include/linux/export.h b/include/linux/export.h
index 627841448293..c166d35e3d76 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -82,16 +82,29 @@ struct kernel_symbol {

#else

-/* For every exported symbol, place a struct in the __ksymtab section */
+/*
+ * For every exported symbol, do the following:
+ *
+ * - If applicable, place a CRC entry in the __kcrctab section.
+ * - Put the name of the symbol and namespace (empty string "" for none) in
+ * __ksymtab_strings.
+ * - Place a struct kernel_symbol entry in the __ksymtab section.
+ *
+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
+ * former apparently works on all arches according to the binutils source.
+ */
#define ___EXPORT_SYMBOL(sym, sec, ns) \
extern typeof(sym) sym; \
+ extern const char __kstrtab_##sym[]; \
+ extern const char __kstrtabns_##sym[]; \
__CRC_SYMBOL(sym, sec); \
- static const char __kstrtab_##sym[] \
- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
- = #sym; \
- static const char __kstrtabns_##sym[] \
- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
- = ns; \
+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1\n" \
+ "__kstrtab_" #sym ": \n" \
+ " .asciz \"" #sym "\" \n" \
+ "__kstrtabns_" #sym ": \n" \
+ " .asciz \"" ns "\" \n" \
+ " .previous \n"); \
__KSYMTAB_ENTRY(sym, sec)

#endif
--
2.16.4

2019-12-12 14:30:30

by Matthias Maennich

[permalink] [raw]
Subject: Re: [PATCH v4] export.h: reduce __ksymtab_strings string duplication by using "MS" section flags

On Thu, Dec 12, 2019 at 03:16:13PM +0100, Jessica Yu wrote:
>Commit c3a6cf19e695 ("export: avoid code duplication in
>include/linux/export.h") refactors export.h quite nicely, but introduces
>a slight increase in memory usage due to using the empty string ""
>instead of NULL to indicate that an exported symbol has no namespace. As
>mentioned in that commit, this meant an increase of 1 byte per exported
>symbol without a namespace. For example, if a kernel configuration has
>about 10k exported symbols, this would mean that the size of
>__ksymtab_strings would increase by roughly 10kB.
>
>We can alleviate this situation by utilizing the SHF_MERGE and
>SHF_STRING section flags. SHF_MERGE|SHF_STRING indicate to the linker
>that the data in the section are null-terminated strings that can be
>merged to eliminate duplication. More specifically, from the binutils
>documentation - "for sections with both M and S, a string which is a
>suffix of a larger string is considered a duplicate. Thus "def" will be
>merged with "abcdef"; A reference to the first "def" will be changed to
>a reference to "abcdef"+3". Thus, all the empty strings would be merged
>as well as any strings that can be merged according to the cited method
>above. For example, "memset" and "__memset" would be merged to just
>"__memset" in __ksymtab_strings.
>
>As of v5.4-rc5, the following statistics were gathered with x86
>defconfig with approximately 10.7k exported symbols.
>
>Size of __ksymtab_strings in vmlinux:
>-------------------------------------
>v5.4-rc5: 213834 bytes
>v5.4-rc5 with commit c3a6cf19e695: 224455 bytes
>v5.4-rc5 with this patch: 205759 bytes
>
>So, we already see memory savings of ~8kB compared to vanilla -rc5 and
>savings of nearly 18.7kB compared to -rc5 with commit c3a6cf19e695 on top.
>
>Unfortunately, as of this writing, strings will not get deduplicated for
>kernel modules, as ld does not do the deduplication for
>SHF_MERGE|SHF_STRINGS sections for relocatable files (ld -r), which
>kernel modules are. A patch for ld is currently being worked on to
>hopefully allow for string deduplication in relocatable files in the
>future.
>
>Suggested-by: Rasmus Villemoes <[email protected]>
>Reviewed-by: Masahiro Yamada <[email protected]>
>Signed-off-by: Jessica Yu <[email protected]>
>---
>v4:
> - fix the comment above ___EXPORT_SYMBOL to be more specific about what
> entries are being placed in their respective sections.
>
> include/asm-generic/export.h | 8 +++++---
> include/linux/export.h | 27 ++++++++++++++++++++-------
>
> 2 files changed, 25 insertions(+), 10 deletions(-)
>
>diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
>index afddc5442e92..365345f9a9e3 100644
>--- a/include/asm-generic/export.h
>+++ b/include/asm-generic/export.h
>@@ -27,9 +27,11 @@
> .endm
>
> /*
>- * note on .section use: @progbits vs %progbits nastiness doesn't matter,
>- * since we immediately emit into those sections anyway.
>+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
>+ * former apparently works on all arches according to the binutils source.
> */
>+
> .macro ___EXPORT_SYMBOL name,val,sec
> #ifdef CONFIG_MODULES
> .section ___ksymtab\sec+\name,"a"
>@@ -37,7 +39,7 @@
> __ksymtab_\name:
> __put \val, __kstrtab_\name
> .previous
>- .section __ksymtab_strings,"a"
>+ .section __ksymtab_strings,"aMS",%progbits,1
> __kstrtab_\name:
> .asciz "\name"
> .previous
>diff --git a/include/linux/export.h b/include/linux/export.h
>index 627841448293..c166d35e3d76 100644
>--- a/include/linux/export.h
>+++ b/include/linux/export.h
>@@ -82,16 +82,29 @@ struct kernel_symbol {
>
> #else
>
>-/* For every exported symbol, place a struct in the __ksymtab section */
>+/*
>+ * For every exported symbol, do the following:
>+ *
>+ * - If applicable, place a CRC entry in the __kcrctab section.
>+ * - Put the name of the symbol and namespace (empty string "" for none) in
>+ * __ksymtab_strings.
>+ * - Place a struct kernel_symbol entry in the __ksymtab section.
>+ *
>+ * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
>+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
>+ * former apparently works on all arches according to the binutils source.
>+ */
> #define ___EXPORT_SYMBOL(sym, sec, ns) \
> extern typeof(sym) sym; \
>+ extern const char __kstrtab_##sym[]; \
>+ extern const char __kstrtabns_##sym[]; \
> __CRC_SYMBOL(sym, sec); \
>- static const char __kstrtab_##sym[] \
>- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
>- = #sym; \
>- static const char __kstrtabns_##sym[] \
>- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
>- = ns; \
>+ asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1\n" \
>+ "__kstrtab_" #sym ": \n" \
>+ " .asciz \"" #sym "\" \n" \
>+ "__kstrtabns_" #sym ": \n" \
>+ " .asciz \"" ns "\" \n" \
>+ " .previous \n"); \

nit: You might want to align the newline characters up to the asm line.

Thanks for working on this!

Reviewed-by: Matthias Maennich <[email protected]>

Cheers,
Matthias


> __KSYMTAB_ENTRY(sym, sec)
>
> #endif
>--
>2.16.4
>