2014-02-08 08:02:08

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO

The VDSO does not play well with LTO, so just disable LTO for it.
Also pass a 32bit linker flag for the 32bit version.

Cc: [email protected]
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/vdso/Makefile | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index fd14be1..598f163 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -2,6 +2,8 @@
# Building vDSO images for x86.
#

+KBUILD_CFLAGS += ${DISABLE_LTO}
+
VDSO64-$(CONFIG_X86_64) := y
VDSOX32-$(CONFIG_X86_X32_ABI) := y
VDSO32-$(CONFIG_X86_32) := y
@@ -35,7 +37,8 @@ export CPPFLAGS_vdso.lds += -P -C

VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
-Wl,--no-undefined \
- -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
+ -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \
+ $(DISABLE_LTO)

$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so

@@ -127,7 +130,7 @@ vdso32.so-$(VDSO32-y) += sysenter
vdso32-images = $(vdso32.so-y:%=vdso32-%.so)

CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
-VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1
+VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1

# This makes sure the $(obj) subdirectory exists even though vdso32/
# is not a kbuild sub-make subdirectory.
@@ -181,7 +184,8 @@ quiet_cmd_vdso = VDSO $@
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'

-VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
+ ${LTO_CFLAGS}
GCOV_PROFILE := n

#
--
1.8.5.2


2014-02-08 08:02:17

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 09/17] Kbuild, lto: add ld-version and ld-ifversion macros

To check the linker version. Used by the LTO makefile.

Signed-off-by: Andi Kleen <[email protected]>
---
scripts/Kbuild.include | 9 +++++++++
scripts/ld-version.sh | 8 ++++++++
2 files changed, 17 insertions(+)
create mode 100755 scripts/ld-version.sh

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 547e15d..93a0da2 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -155,6 +155,15 @@ ld-option = $(call try-run,\
# Important: no spaces around options
ar-option = $(call try-run, $(AR) rc$(1) "$$TMP",$(1),$(2))

+# ld-version
+# Usage: $(call ld-version)
+# Note this is mainly for HJ Lu's 3 number binutil versions
+ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh)
+
+# ld-ifversion
+# Usage: $(call ld-ifversion, -ge, 22252, y)
+ld-ifversion = $(shell [ $(call ld-version) $(1) $(2) ] && echo $(3))
+
######

###
diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh
new file mode 100755
index 0000000..198580d
--- /dev/null
+++ b/scripts/ld-version.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/awk -f
+# extract linker version number from stdin and turn into single number
+ {
+ gsub(".*)", "");
+ split($1,a, ".");
+ print a[1]*10000000 + a[2]*100000 + a[3]*10000 + a[4]*100 + a[5];
+ exit
+ }
--
1.8.5.2

2014-02-08 08:02:26

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 13/17] Kbuild, lto: Handle basic LTO in modpost

- Don't warn about LTO marker symbols. modpost runs before
the linker, so the module is not necessarily LTOed yet.
- Don't complain about .gnu.lto* sections

Signed-off-by: Andi Kleen <[email protected]>
---
scripts/mod/modpost.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index f91dd45..63804a1 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -623,7 +623,10 @@ static void handle_modversions(struct module *mod, struct elf_info *info,

switch (sym->st_shndx) {
case SHN_COMMON:
- warn("\"%s\" [%s] is COMMON symbol\n", symname, mod->name);
+ if (!strncmp(symname, "__gnu_lto_", sizeof("__gnu_lto_")-1)) {
+ /* Should warn here, but modpost runs before the linker */
+ } else
+ warn("\"%s\" [%s] is COMMON symbol\n", symname, mod->name);
break;
case SHN_UNDEF:
/* undefined symbol */
@@ -849,6 +852,7 @@ static const char *section_white_list[] =
".xt.lit", /* xtensa */
".arcextmap*", /* arc */
".gnu.linkonce.arcext*", /* arc : modules */
+ ".gnu.lto*",
NULL
};

--
1.8.5.2

2014-02-08 08:02:15

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 02/17] x86, lto: Disable fancy hweight optimizations for LTO v2

The fancy x86 hweight uses different compiler options for the
hweight file. This does not work with LTO. Just disable the optimization
with LTO

v2: Simplify Kconfig checks (Jan Beulich)
Cc: [email protected]
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/arch_hweight.h | 9 +++++++++
2 files changed, 10 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 940e50e..f125c5f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -255,6 +255,7 @@ config X86_32_LAZY_GS

config ARCH_HWEIGHT_CFLAGS
string
+ default "" if LTO
default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64

diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 9686c3d..ca80549 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -25,9 +25,14 @@ static inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res = 0;

+#ifdef CONFIG_LTO
+ res = __sw_hweight32(w);
+#else
+
asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
: "="REG_OUT (res)
: REG_IN (w));
+#endif

return res;
}
@@ -46,6 +51,9 @@ static inline unsigned long __arch_hweight64(__u64 w)
{
unsigned long res = 0;

+#ifdef CONFIG_LTO
+ res = __sw_hweight64(w);
+#else
#ifdef CONFIG_X86_32
return __arch_hweight32((u32)w) +
__arch_hweight32((u32)(w >> 32));
@@ -54,6 +62,7 @@ static inline unsigned long __arch_hweight64(__u64 w)
: "="REG_OUT (res)
: REG_IN (w));
#endif /* CONFIG_X86_32 */
+#endif

return res;
}
--
1.8.5.2

2014-02-08 08:02:58

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 15/17] Kbuild, lto: Add LTO build Documentation

Add build documentation for LTO.

Signed-off-by: Andi Kleen <[email protected]>
---
Documentation/lto-build | 121 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 121 insertions(+)
create mode 100644 Documentation/lto-build

diff --git a/Documentation/lto-build b/Documentation/lto-build
new file mode 100644
index 0000000..4c8a713
--- /dev/null
+++ b/Documentation/lto-build
@@ -0,0 +1,121 @@
+Link time optimization (LTO) for the Linux kernel
+
+This is an experimental feature which still has various problems.
+
+Link Time Optimization allows the compiler to optimize the complete program
+instead of just each file. Link Time Optimization was a new feature in gcc 4.6,
+but only really works with gcc 4.7. The kernel LTO build also requires
+the Linux binutils (the normal FSF releases do not work at the moment)
+
+The compiler can inline functions between files and do some other global
+optimizations. It will also drop unused functions which can make the kernel
+image smaller in some circumstances. The binary gets somewhat larger.
+In return the resulting kernels (usually) have better performance.
+
+Build time and memory consumption at build time will increase.
+The build time penalty depends on the size of the vmlinux. Reasonable
+sized vmlinux build about twice as long, much larger monolithic kernels
+like allyesconfig ~4x as long. Modular kernels are less affected.
+
+Normal "reasonable" builds work with less than 4GB of RAM, but very large
+configurations like allyesconfig may need more memory. The actual
+memory needed depends on the available memory (gcc sizes its garbage
+collector pools based on that or on the ulimit -m limits)
+
+Issues:
+- Various workarounds in kernel needed for toolchain problems.
+- A few kernel features are currently incompatible with LTO, in particular
+function tracing, because they require special compiler flags for
+specific files, which is not supported in LTO right now.
+- The build is faster with LTO_SLIM enabled, but this still triggers
+problems in some circumstances (currently disabled)
+- Jobserver control for -j does not work correctly for the final
+LTO phase. The makefiles hardcodes -j<number of online cpus>
+
+Configuration:
+- Enable CONFIG_LTO_MENU and then disable CONFIG_LTO_DISABLE.
+This is mainly to not have allyesconfig default to LTO.
+- FUNCTION_TRACER, STACK_TRACER, FUNCTION_GRAPH_TRACER have to disabled
+because they are currently incompatible with LTO.
+- MODVERSIONS have to be disabled because they are not fixed for LTO
+yet.
+
+Requirements:
+- Enough memory: 4GB for a standard build, more for allyesconfig
+If you are tight on memory and use tmpfs as /tmp define TMPDIR and
+point it to a directory on disk. The peak memory usage
+happens single threaded (when lto-wpa merges types), so dialing
+back -j options will not help much.
+
+A 32bit compiler is unlikely to work due to the memory requirements.
+You can however build a kernel targetted at 32bit on a 64bit host.
+
+- Get the Linux binutils from
+http://www.kernel.org/pub/linux/devel/binutils/
+Sorry standard binutils releases don't work
+The kernel build has to use this linker, so if it is installed
+in a non standard location use LD=... on the make line.
+
+- gcc 4.7 built with plugin ld (--with-plugin-ld) also pointing to the
+linker from the Linux binutils and LTO
+
+If the gcc is not built with this option it may also work to put the correct
+binutils linker first in $PATH when building. I haven't tested
+this however.
+
+Example build procedure for the tool chain and kernel. This does not
+overwrite the standard compiler toolchain on the system. If you already
+have a suitable gcc 4.7+ compiler and linker the toolchain build can
+be skipped (note that a distribution gcc 4.7 is not necessarily
+correctly configured for LTO)
+
+Get the Linux binutils from http://www.kernel.org/pub/linux/devel/binutils/
+The standard binutils do not work at this point!
+
+Unpack binutils
+
+cd binutils-VERSION (or plain binutils in some versions)
+./configure --prefix=/opt/binutils-VERSION --enable-plugins
+nice -n20 make -j$(getconf _NPROCESSORS_ONLN)
+sudo make install
+sudo ln -sf /opt/binutils-VERSION/bin/ld /usr/local/bin/ld-plugin
+
+Unpack gcc-4.7
+
+mkdir obj-gcc
+# please don't skip this cd. the build will not work correctly in the
+# source dir, you have to use the separate object dir
+cd obj-gcc
+# make sure to install gmp-devel and mpfr-devel
+# and the 32bit glibc package if you have a multilib system
+# if mpc-devel is not there get it from
+# http://www.multiprecision.org/mpc/download/mpc-0.8.2.tar.gz
+# and install in gcc-4.7*/mpc
+../gcc-4.7*/configure --prefix=/opt/gcc-4.7 --enable-lto \
+--with-plugin-ld=/usr/local/bin/ld-plugin \
+--disable-nls --enable-languages=c,c++ \
+--disable-libstdcxx-pch
+nice -n20 make -j$(getconf _NPROCESSORS_ONLN)
+sudo make install-no-fixedincludes
+sudo ln -sf /opt/gcc-4.7/bin/gcc /usr/local/bin/gcc47
+sudo ln -sf /opt/gcc-4.7/bin/gcc-ar /usr/local/bin/gcc-ar47
+
+# get lto tree in linux-lto
+
+mkdir obj-lto
+cd obj-lto
+# copy a suitable kernel config file into .config
+make -C ../linux-lto O=$(pwd) oldconfig
+./source/scripts/config --disable function_tracer --disable function_graph_tracer \
+ --disable stack_tracer --enable lto_menu \
+ --disable lto_disable --disable lto_debug --disable lto_slim
+export TMPDIR=$(pwd)
+# this lowers memory usage with /tmp=tmpfs
+# note the special ar is only needed if CONFIG_LTO_SLIM is enabled
+# The PATH is that gcc-ar finds a plugin aware ar, if your standard
+# binutils doesn't support that. If the standard ar supports --plugin
+# it is not needed
+PATH=/opt/binutils-VERSION:$PATH nice -n20 make CC=gcc47 LD=ld-plugin AR=gcc-ar47 \
+-j $(getconf _NPROCESSORS_ONLN)
+
+Andi Kleen
--
1.8.5.2

2014-02-08 08:03:17

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 11/17] Kbuild, lto: Disable LTO for asm-offsets.c

The asm-offset.c technique to fish data out of the assembler file
does not work with LTO. Just disable for the asm-offset.c build.

Signed-off-by: Andi Kleen <[email protected]>
---
scripts/Makefile.build | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index d5d859c..9f0ee22 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -198,7 +198,7 @@ $(multi-objs-y:.o=.s) : modname = $(modname-multi)
$(multi-objs-y:.o=.lst) : modname = $(modname-multi)

quiet_cmd_cc_s_c = CC $(quiet_modtag) $@
-cmd_cc_s_c = $(CC) $(c_flags) -fverbose-asm -S -o $@ $<
+cmd_cc_s_c = $(CC) $(c_flags) $(DISABLE_LTO) -fverbose-asm -S -o $@ $<

$(obj)/%.s: $(src)/%.c FORCE
$(call if_changed_dep,cc_s_c)
--
1.8.5.2

2014-02-08 08:03:37

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 07/17] Kbuild, lto, workaround: Don't warn for initcall_reference in modpost

This reference is discarded, but can cause warnings when it refers to
exit. Ignore for now.

This is a workaround and can be removed once we get rid of
-fno-toplevel-reorder

Signed-off-by: Andi Kleen <[email protected]>
---
scripts/mod/modpost.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 4061098..1f1b154 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1455,6 +1455,10 @@ static void check_section_mismatch(const char *modname, struct elf_info *elf,
to = find_elf_symbol(elf, r->r_addend, sym);
tosym = sym_name(elf, to);

+ if (!strncmp(fromsym, "reference___initcall",
+ sizeof("reference___initcall")-1))
+ return;
+
/* check whitelist - we may ignore it */
if (secref_whitelist(mismatch,
fromsec, fromsym, tosec, tosym)) {
--
1.8.5.2

2014-02-08 08:03:36

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 12/17] Kbuild, lto: Set TMPDIR for LTO

LTO gcc puts a lot of data into $TMPDIR, essentially another copy
of the object directory to pass the repartitioned object files
to the code generation processes.

TMPDIR defaults to /tmp With /tmp as tmpfs it's easy to drive systems to
out of memory, because they will compete with the already high anonymous
memory consumption of the wpa LTO pass.

When LTO is set always set TMPDIR to the object directory. This could
be slightly slower, but is far safer and eliminates another parameter
the LTO user would need to set manually.

I made it conditional on LTO for now.

Signed-off-by: Andi Kleen <[email protected]>
---
Makefile | 8 ++++++++
1 file changed, 8 insertions(+)

diff --git a/Makefile b/Makefile
index 606ef7c..d1189ea 100644
--- a/Makefile
+++ b/Makefile
@@ -407,6 +407,14 @@ export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
export KBUILD_ARFLAGS

+ifdef CONFIG_LTO
+# LTO gcc creates a lot of files in TMPDIR, and with /tmp as tmpfs
+# it's easy to drive the machine OOM. Use the object directory
+# instead
+TMPDIR := ${objtree}
+export TMPDIR
+endif
+
# When compiling out-of-tree modules, put MODVERDIR in the module
# tree rather than in the kernel tree. The kernel tree might
# even be read-only.
--
1.8.5.2

2014-02-08 08:02:13

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 04/17] lto, workaround: Add workaround for initcall reordering

Work around a LTO gcc problem: when there is no reference to a variable
in a module it will be moved to the end of the program. This causes
reordering of initcalls which the kernel does not like.
Add a dummy reference function to avoid this. The function is
deleted by the linker.

This replaces a previous much slower workaround.

Thanks to Honza Hubicka for suggesting this technique.

Signed-off-by: Andi Kleen <[email protected]>
---
include/linux/init.h | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/linux/init.h b/include/linux/init.h
index e168880..a3ba270 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -163,6 +163,23 @@ extern bool initcall_debug;

#ifndef __ASSEMBLY__

+#ifdef CONFIG_LTO
+/* Work around a LTO gcc problem: when there is no reference to a variable
+ * in a module it will be moved to the end of the program. This causes
+ * reordering of initcalls which the kernel does not like.
+ * Add a dummy reference function to avoid this. The function is
+ * deleted by the linker.
+ */
+#define LTO_REFERENCE_INITCALL(x) \
+ ; /* yes this is needed */ \
+ static __used __exit void *reference_##x(void) \
+ { \
+ return &x; \
+ }
+#else
+#define LTO_REFERENCE_INITCALL(x)
+#endif
+
/* initcalls are now grouped by functionality into separate
* subsections. Ordering inside the subsections is determined
* by link order.
@@ -175,7 +192,8 @@ extern bool initcall_debug;

#define __define_initcall(fn, id) \
static initcall_t __initcall_##fn##id __used \
- __attribute__((__section__(".initcall" #id ".init"))) = fn
+ __attribute__((__section__(".initcall" #id ".init"))) = fn; \
+ LTO_REFERENCE_INITCALL(__initcall_##fn##id)

/*
* Early initcalls run before initializing SMP.
--
1.8.5.2

2014-02-08 08:04:14

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 06/17] lto: Disable LTO for sys_ni

The assembler alias code in cond_syscall does not work
when compiled for LTO. Just disable LTO for that file.

Signed-off-by: Andi Kleen <[email protected]>
---
kernel/Makefile | 3 +++
1 file changed, 3 insertions(+)

diff --git a/kernel/Makefile b/kernel/Makefile
index bc010ee..31c26c6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -18,6 +18,9 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_irq_work.o = -pg
endif

+# cond_syscall is currently not LTO compatible
+CFLAGS_sys_ni.o = $(DISABLE_LTO)
+
obj-y += sched/
obj-y += locking/
obj-y += power/
--
1.8.5.2

2014-02-08 08:04:34

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 17/17] lto, module: Warn about modules that are not fully LTOed

When __gnu_lto_* is present that means that the module hasn't run with
LTO yet.

Cc: [email protected]
Signed-off-by: Andi Kleen <[email protected]>
---
kernel/module.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/module.c b/kernel/module.c
index b99e801..2052155 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1949,8 +1949,11 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
switch (sym[i].st_shndx) {
case SHN_COMMON:
/* Ignore common symbols */
- if (!strncmp(name, "__gnu_lto", 9))
+ if (!strncmp(name, "__gnu_lto", 9)) {
+ printk("%s: module not link time optimized\n",
+ mod->name);
break;
+ }

/* We compiled with -fno-common. These are not
supposed to happen. */
--
1.8.5.2

2014-02-08 08:04:52

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 14/17] Kbuild, lto: Add Link Time Optimization support

With LTO gcc will do whole program optimizations for
the whole kernel and each module. This increases compile time,
but can generate faster and smaller code and allows
the compiler to do global checking.

LTO allows gcc to inline functions between different files and
do various other optimization across the whole binary.

It might also trigger bugs due to more aggressive optimization.
It allows gcc to drop unused code. It also allows it to check
types over the whole program.

This adds the basic Kbuild plumbing for LTO:

- In Kbuild add a new scripts/Makefile.lto that checks
the tool chain (note the checks may not be fully bulletproof)
and when the tests pass sets the LTO options
Currently LTO is very finicky about the tool chain.
- Add a new LDFINAL variable that controls the final link
for vmlinux or module. In this case we call gcc-ld instead
of ld, to run the LTO step.
- For slim LTO builds (object files containing no backup
executable) force AR to gcc-ar
- Theoretically LTO should pass through compiler options from
the compiler to the link step, but this doesn't work for all options.
So the Makefile sets most of these options manually.
- Kconfigs:
Since LTO with allyesconfig needs more than 4G of memory (~8G)
and has the potential to makes people's system swap to death.
I used a nested config that ensures that a simple
allyesconfig disables LTO. It has to be explicitely
enabled.
- Some depencies on other Kconfigs:
MODVERSIONS, GCOV, FUNCTION_TRACER, KALLSYMS_ALL, single chain WCHAN are
incompatible with LTO currently. MODVERSIONS should be fixable,
but the others require setting special compiler options
for specific files, which LTO currently doesn't support.
[MODVERSIONS should in principle work with gcc 4.9, but still disabled]
- I also disable strict copy user checks because they trigger
errors with LTO.
- I had to use a hack to support the single pass kallsyms,
as gcc-nm does not support static symbols currently
- modpost symbol checking is downgraded to a warning,
as in some cases modpost runs before the final link
and it cannot resolve LTO symbols at this point.

For more information see Documentation/lto-build

Thanks to HJ Lu, Joe Mario, Honza Hubicka, Richard Guenther,
Don Zickus, Changlong Xie who helped with this project
(and probably some more who I forgot, sorry)

Signed-off-by: Andi Kleen <[email protected]>
---
Makefile | 9 +++++-
arch/x86/Kconfig | 2 +-
init/Kconfig | 70 +++++++++++++++++++++++++++++++++++++++-
kernel/gcov/Kconfig | 2 +-
scripts/Makefile.lto | 84 ++++++++++++++++++++++++++++++++++++++++++++++++
scripts/Makefile.modpost | 7 ++--
scripts/gcc-ld | 1 +
scripts/kallsyms.c | 4 ++-
scripts/link-vmlinux.sh | 28 +++++++++++++---
9 files changed, 194 insertions(+), 13 deletions(-)
create mode 100644 scripts/Makefile.lto

diff --git a/Makefile b/Makefile
index d1189ea..68e1528 100644
--- a/Makefile
+++ b/Makefile
@@ -335,9 +335,14 @@ include $(srctree)/scripts/Kbuild.include

AS = $(CROSS_COMPILE)as
LD = $(CROSS_COMPILE)ld
+LDFINAL = $(LD)
CC = $(CROSS_COMPILE)gcc
CPP = $(CC) -E
+ifdef CONFIG_LTO_SLIM
+AR = $(CROSS_COMPILE)gcc-ar
+else
AR = $(CROSS_COMPILE)ar
+endif
NM = $(CROSS_COMPILE)nm
STRIP = $(CROSS_COMPILE)strip
OBJCOPY = $(CROSS_COMPILE)objcopy
@@ -396,7 +401,7 @@ KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(S

export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
-export CPP AR NM STRIP OBJCOPY OBJDUMP
+export CPP AR NM STRIP OBJCOPY OBJDUMP LDFINAL
export MAKE AWK GENKSYMS INSTALLKERNEL PERL UTS_MACHINE
export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS

@@ -707,6 +712,8 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
endif

+include ${srctree}/scripts/Makefile.lto
+
# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
KBUILD_CPPFLAGS += $(KCPPFLAGS)
KBUILD_AFLAGS += $(KAFLAGS)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f125c5f..bba793f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -577,7 +577,7 @@ config X86_32_IRIS

config SCHED_OMIT_FRAME_POINTER
def_bool y
- prompt "Single-depth WCHAN output"
+ prompt "Single-depth WCHAN output" if !LTO && !FRAME_POINTER
depends on X86
---help---
Calculate simpler /proc/<PID>/wchan values. If this option
diff --git a/init/Kconfig b/init/Kconfig
index 009a797..9561935 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1241,6 +1241,70 @@ config CC_OPTIMIZE_FOR_SIZE

If unsure, say N.

+config LTO_MENU
+ bool "Enable gcc link time optimizations"
+ # Only tested on X86 for now. For other architectures you likely
+ # have to fix some things first, like adding asmlinkages etc.
+ depends on X86
+ # lto does not support excluding flags for specific files
+ # right now. Can be removed if that is fixed.
+ depends on !FUNCTION_TRACER
+ help
+ With this option gcc will do whole program optimizations for
+ the whole kernel and module. This increases compile time, but can
+ lead to better code. It allows gcc to inline functions between
+ different files. It might also trigger bugs due to more
+ aggressive optimization. It allows gcc to drop unused code.
+ With this option gcc will also do some global checking over
+ different source files.
+
+ This requires a gcc 4.7 or later compiler and
+ Linux binutils 2.21.51.0.3 or later. It does not currently
+ work with a FSF release of binutils or with gold.
+
+ On larger configurations this may need more than 4GB of RAM.
+ It will likely not work on those with a 32bit compiler. Also
+ /tmp in tmpfs may lead to faster running out of RAM
+ (in this case set the TMPDIR environment variable to a different
+ directory directly on disk)
+
+ When the toolchain support is not available this will (hopefully)
+ be automatically disabled.
+
+ For more information see Documentation/lto-build
+
+config LTO_DISABLE
+ bool "Disable LTO again"
+ depends on LTO_MENU
+ default n
+ help
+ This option is merely here so that allyesconfig or allmodconfig does
+ not enable LTO. If you want to actually use LTO do not enable.
+
+config LTO
+ bool
+ default y
+ depends on LTO_MENU && !LTO_DISABLE
+
+config LTO_DEBUG
+ bool "Enable LTO compile time debugging"
+ depends on LTO
+
+config LTO_CP_CLONE
+ bool "Allow aggressive cloning for constant specialization"
+ depends on LTO
+ help
+ Allow the compiler to clone functions for specific arguments.
+ Experimential. Will increase text size.
+
+config LTO_SLIM
+ #bool "Use slim lto"
+ def_bool y
+ depends on LTO
+ help
+ Do not generate all code twice. The object files will only contain
+ LTO information. This lowers build time.
+
config SYSCTL
bool

@@ -1317,7 +1381,10 @@ config KALLSYMS

config KALLSYMS_ALL
bool "Include all symbols in kallsyms"
- depends on DEBUG_KERNEL && KALLSYMS
+ # the method LTO uses to predict the symbol table
+ # only supports functions for now
+ # This can be removed once http://gcc.gnu.org/PR60016 is fixed
+ depends on DEBUG_KERNEL && KALLSYMS && !LTO
help
Normally kallsyms only contains the symbols of functions for nicer
OOPS messages and backtraces (i.e., symbols from the text and inittext
@@ -1712,6 +1779,7 @@ config MODULE_FORCE_UNLOAD

config MODVERSIONS
bool "Module versioning support"
+ depends on !LTO
help
Usually, you have to use modules compiled with your kernel.
Saying Y here makes it sometimes possible to use modules
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index d04ce8a..32f65b7 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -2,7 +2,7 @@ menu "GCOV-based kernel profiling"

config GCOV_KERNEL
bool "Enable gcov-based kernel profiling"
- depends on DEBUG_FS
+ depends on DEBUG_FS && !LTO
select CONSTRUCTORS if !UML
default n
---help---
diff --git a/scripts/Makefile.lto b/scripts/Makefile.lto
new file mode 100644
index 0000000..df1d8ea
--- /dev/null
+++ b/scripts/Makefile.lto
@@ -0,0 +1,84 @@
+#
+# Support for gcc link time optimization
+#
+
+DISABLE_LTO :=
+LTO_CFLAGS :=
+
+export DISABLE_LTO
+export LTO_CFLAGS
+
+ifdef CONFIG_LTO
+# 4.7 works mostly, but it sometimes loses symbols on large builds
+# This can be worked around by marking those symbols visible,
+# but that is fairly ugly and the problem is gone with 4.8
+# So only allow it with 4.8 for now.
+ifeq ($(call cc-ifversion, -ge, 0408,y),y)
+ifneq ($(call cc-option,${LTO_CFLAGS},n),n)
+# We need HJ Lu's Linux binutils because mainline binutils does not
+# support mixing assembler and LTO code in the same ld -r object.
+# XXX check if the gcc plugin ld is the expected one too
+# XXX some Fedora binutils should also support it. How to check for that?
+ifeq ($(call ld-ifversion,-ge,22710001,y),y)
+ LTO_CFLAGS := -flto -fno-toplevel-reorder
+ LTO_FINAL_CFLAGS := -fuse-linker-plugin
+
+# the -fno-toplevel-reorder is to preserve the order of initcalls
+# everything else should tolerate reordering
+ LTO_FINAL_CFLAGS +=-fno-toplevel-reorder
+
+# enable LTO and set the jobs used by the LTO phase
+# this should be -flto=jobserver to coordinate with the
+# parent make, but work around
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50639
+# use as many jobs as processors are online for now
+ LTO_FINAL_CFLAGS := -flto=$(shell getconf _NPROCESSORS_ONLN)
+ #LTO_FINAL_CFLAGS := -flto=jobserver
+
+ifdef CONFIG_LTO_SLIM
+ # requires plugin ar passed and very recent HJ binutils
+ LTO_CFLAGS += -fno-fat-lto-objects
+endif
+# Used to disable LTO for specific files (e.g. vdso)
+ DISABLE_LTO := -fno-lto
+
+ LTO_FINAL_CFLAGS += ${LTO_CFLAGS} -fwhole-program
+
+ifdef CONFIG_LTO_DEBUG
+ LTO_FINAL_CFLAGS += -dH -fdump-ipa-cgraph -fdump-ipa-inline-details
+ # -Wl,-plugin-save-temps -save-temps
+ LTO_CFLAGS +=
+endif
+ifdef CONFIG_LTO_CP_CLONE
+ LTO_FINAL_CFLAGS += -fipa-cp-clone
+ LTO_CFLAGS += -fipa-cp-clone
+endif
+
+ # In principle gcc should pass through options in the object files,
+ # but it doesn't always work. So do it here manually
+ # Note that special options for individual files does not
+ # work currently (except for some special cases that only
+ # affect the compiler frontend)
+ # The main offenders are FTRACE and GCOV -- we exclude
+ # those in the config.
+ LTO_FINAL_CFLAGS += $(filter -g%,${KBUILD_CFLAGS})
+ LTO_FINAL_CFLAGS += $(filter -O%,${KBUILD_CFLAGS})
+ LTO_FINAL_CFLAGS += $(filter -f%,${KBUILD_CFLAGS})
+ LTO_FINAL_CFLAGS += $(filter -m%,${KBUILD_CFLAGS})
+ LTO_FINAL_CFLAGS += $(filter -W%,${KBUILD_CFLAGS})
+
+ KBUILD_CFLAGS += ${LTO_CFLAGS}
+
+ LDFINAL := ${CONFIG_SHELL} ${srctree}/scripts/gcc-ld \
+ ${LTO_FINAL_CFLAGS}
+
+else
+ $(warning "WARNING: Too old linker version $(call ld-version) for kernel LTO. You need Linux binutils. CONFIG_LTO disabled.")
+endif
+else
+ $(warning "WARNING: Compiler/Linker does not support LTO/WHOPR with linker plugin. CONFIG_LTO disabled.")
+endif
+else
+ $(warning "WARNING: GCC $(call cc-version) too old for LTO/WHOPR. CONFIG_LTO disabled")
+endif
+endif
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 69f0a14..9c40dae 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -77,7 +77,8 @@ modpost = scripts/mod/modpost \
$(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \
$(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \
$(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \
- $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
+ $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \
+ $(if $(CONFIG_LTO),-w)

MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS)))

@@ -115,8 +116,8 @@ $(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE
targets += $(modules:.ko=.mod.o)

# Step 6), final link of the modules
-quiet_cmd_ld_ko_o = LD [M] $@
- cmd_ld_ko_o = $(LD) -r $(LDFLAGS) \
+quiet_cmd_ld_ko_o = LDFINAL [M] $@
+ cmd_ld_ko_o = $(LDFINAL) -r $(LDFLAGS) \
$(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
-o $@ $(filter-out FORCE,$^)

diff --git a/scripts/gcc-ld b/scripts/gcc-ld
index cadab9a..a9161da 100644
--- a/scripts/gcc-ld
+++ b/scripts/gcc-ld
@@ -18,6 +18,7 @@ while [ "$1" != "" ] ; do
-rpath-link|--sort-section|--section-start|-Tbss|-Tdata|-Ttext|\
--version-script|--dynamic-list|--version-exports-symbol|--wrap|-m)
A="$1" ; shift ; N="-Wl,$A,$1" ;;
+ --param) shift ; N="--param $1" ;;
-[m]*) N="$1" ;;
-*) N="-Wl,$1" ;;
*) N="$1" ;;
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index d0e2b56..e479076 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -247,11 +247,13 @@ static int symbol_valid(struct sym_entry *s)
* the kallsyms data are added. If these symbols move then
* they may get dropped in pass 2, which breaks the kallsyms
* rules.
+ * But don't do this for predicted fake symbols with 0 value.
*/
- if ((s->addr == text_range_text->end &&
+ if (((s->addr == text_range_text->end &&
strcmp((char *)s->sym + offset, text_range_text->etext)) ||
(s->addr == text_range_inittext->end &&
strcmp((char *)s->sym + offset, text_range_inittext->etext)))
+ && text_range_text->end != 0)
return 0;
}

diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 0300047..4c5435f 100644
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -53,7 +53,7 @@ vmlinux_link()
local lds="${objtree}/${KBUILD_LDS}"

if [ "${SRCARCH}" != "um" ]; then
- ${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \
+ ${LDFINAL} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \
-T ${lds} ${KBUILD_VMLINUX_INIT} \
--start-group ${KBUILD_VMLINUX_MAIN} --end-group ${1}
else
@@ -90,10 +90,28 @@ kallsyms()
local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \
${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}"

- ${NM} -n ${1} | \
- awk 'NF == 3 { print}' |
- scripts/kallsyms ${kallsymopt} | \
+ # workaround for slim LTO gcc-nm not outputing static symbols
+ # http://gcc.gnu.org/PR60016
+ # generate a fake symbol table based on the LTO function sections.
+ # This unfortunately "knows" about the internal LTO file format
+ # and only works for functions
+ # needs perl for now when building for LTO
+ (
+ if $OBJDUMP --section-headers ${1} | grep -q \.gnu\.lto_ ; then
+ ${OBJDUMP} --section-headers ${1} |
+ perl -ne '
+@n = split;
+next unless $n[1] =~ /\.gnu\.lto_([_a-zA-Z][^.]+)/;
+next if $n[1] eq $prev;
+$prev = $n[1];
+print "0 T ",$1,"\n"'
+ fi
+ ${NM} -n ${1} | awk 'NF == 3 { print }'
+ ) > ${2}_sym
+ # run without pipe to make kallsyms errors stop the script
+ ./scripts/kallsyms ${kallsymopt} < ${2}_sym |
${CC} ${aflags} -c -o ${2} -x assembler-with-cpp -
+
}

# Create map file with all symbols from ${1}
@@ -181,7 +199,7 @@ if [ -n "${CONFIG_KALLSYMS}" ] ; then
kallsymsso=.tmp_kallsyms1.o
fi

-info LD vmlinux
+info LDFINAL vmlinux
vmlinux_link "${kallsymsso}" vmlinux
if [ -n "${CONFIG_KALLSYMS}" ] ; then
# Now regenerate the kallsyms table and patch it into the
--
1.8.5.2

2014-02-08 08:04:51

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 16/17] lto: Mark spinlocks noinline when inline spinlocks are disabled

Otherwise LTO will inline them anyways

Cc: [email protected]
Signed-off-by: Andi Kleen <[email protected]>
---
kernel/locking/spinlock.c | 56 +++++++++++++++++++++++------------------------
1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index 4b082b5..975bfe9 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -130,7 +130,7 @@ BUILD_LOCK_OPS(write, rwlock);
#endif

#ifndef CONFIG_INLINE_SPIN_TRYLOCK
-int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
+noinline int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
{
return __raw_spin_trylock(lock);
}
@@ -138,7 +138,7 @@ EXPORT_SYMBOL(_raw_spin_trylock);
#endif

#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
-int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
+noinline int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
{
return __raw_spin_trylock_bh(lock);
}
@@ -146,7 +146,7 @@ EXPORT_SYMBOL(_raw_spin_trylock_bh);
#endif

#ifndef CONFIG_INLINE_SPIN_LOCK
-void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
{
__raw_spin_lock(lock);
}
@@ -154,7 +154,7 @@ EXPORT_SYMBOL(_raw_spin_lock);
#endif

#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
-unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
+noinline unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
{
return __raw_spin_lock_irqsave(lock);
}
@@ -162,7 +162,7 @@ EXPORT_SYMBOL(_raw_spin_lock_irqsave);
#endif

#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
-void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
{
__raw_spin_lock_irq(lock);
}
@@ -170,7 +170,7 @@ EXPORT_SYMBOL(_raw_spin_lock_irq);
#endif

#ifndef CONFIG_INLINE_SPIN_LOCK_BH
-void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
{
__raw_spin_lock_bh(lock);
}
@@ -178,7 +178,7 @@ EXPORT_SYMBOL(_raw_spin_lock_bh);
#endif

#ifdef CONFIG_UNINLINE_SPIN_UNLOCK
-void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
{
__raw_spin_unlock(lock);
}
@@ -186,7 +186,7 @@ EXPORT_SYMBOL(_raw_spin_unlock);
#endif

#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
-void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
+noinline void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
{
__raw_spin_unlock_irqrestore(lock, flags);
}
@@ -194,7 +194,7 @@ EXPORT_SYMBOL(_raw_spin_unlock_irqrestore);
#endif

#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
-void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
{
__raw_spin_unlock_irq(lock);
}
@@ -202,7 +202,7 @@ EXPORT_SYMBOL(_raw_spin_unlock_irq);
#endif

#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
-void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
{
__raw_spin_unlock_bh(lock);
}
@@ -210,7 +210,7 @@ EXPORT_SYMBOL(_raw_spin_unlock_bh);
#endif

#ifndef CONFIG_INLINE_READ_TRYLOCK
-int __lockfunc _raw_read_trylock(rwlock_t *lock)
+noinline int __lockfunc _raw_read_trylock(rwlock_t *lock)
{
return __raw_read_trylock(lock);
}
@@ -218,7 +218,7 @@ EXPORT_SYMBOL(_raw_read_trylock);
#endif

#ifndef CONFIG_INLINE_READ_LOCK
-void __lockfunc _raw_read_lock(rwlock_t *lock)
+noinline void __lockfunc _raw_read_lock(rwlock_t *lock)
{
__raw_read_lock(lock);
}
@@ -226,7 +226,7 @@ EXPORT_SYMBOL(_raw_read_lock);
#endif

#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
-unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
+noinline unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
{
return __raw_read_lock_irqsave(lock);
}
@@ -234,7 +234,7 @@ EXPORT_SYMBOL(_raw_read_lock_irqsave);
#endif

#ifndef CONFIG_INLINE_READ_LOCK_IRQ
-void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
+noinline void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
{
__raw_read_lock_irq(lock);
}
@@ -242,7 +242,7 @@ EXPORT_SYMBOL(_raw_read_lock_irq);
#endif

#ifndef CONFIG_INLINE_READ_LOCK_BH
-void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
+noinline void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
{
__raw_read_lock_bh(lock);
}
@@ -250,7 +250,7 @@ EXPORT_SYMBOL(_raw_read_lock_bh);
#endif

#ifndef CONFIG_INLINE_READ_UNLOCK
-void __lockfunc _raw_read_unlock(rwlock_t *lock)
+noinline void __lockfunc _raw_read_unlock(rwlock_t *lock)
{
__raw_read_unlock(lock);
}
@@ -258,7 +258,7 @@ EXPORT_SYMBOL(_raw_read_unlock);
#endif

#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
-void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+noinline void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
{
__raw_read_unlock_irqrestore(lock, flags);
}
@@ -266,7 +266,7 @@ EXPORT_SYMBOL(_raw_read_unlock_irqrestore);
#endif

#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
-void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
+noinline void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
{
__raw_read_unlock_irq(lock);
}
@@ -274,7 +274,7 @@ EXPORT_SYMBOL(_raw_read_unlock_irq);
#endif

#ifndef CONFIG_INLINE_READ_UNLOCK_BH
-void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
+noinline void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
{
__raw_read_unlock_bh(lock);
}
@@ -282,7 +282,7 @@ EXPORT_SYMBOL(_raw_read_unlock_bh);
#endif

#ifndef CONFIG_INLINE_WRITE_TRYLOCK
-int __lockfunc _raw_write_trylock(rwlock_t *lock)
+noinline int __lockfunc _raw_write_trylock(rwlock_t *lock)
{
return __raw_write_trylock(lock);
}
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(_raw_write_trylock);
#endif

#ifndef CONFIG_INLINE_WRITE_LOCK
-void __lockfunc _raw_write_lock(rwlock_t *lock)
+noinline void __lockfunc _raw_write_lock(rwlock_t *lock)
{
__raw_write_lock(lock);
}
@@ -298,7 +298,7 @@ EXPORT_SYMBOL(_raw_write_lock);
#endif

#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
-unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
+noinline unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
{
return __raw_write_lock_irqsave(lock);
}
@@ -306,7 +306,7 @@ EXPORT_SYMBOL(_raw_write_lock_irqsave);
#endif

#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
-void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
+noinline void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
{
__raw_write_lock_irq(lock);
}
@@ -314,7 +314,7 @@ EXPORT_SYMBOL(_raw_write_lock_irq);
#endif

#ifndef CONFIG_INLINE_WRITE_LOCK_BH
-void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
+noinline void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
{
__raw_write_lock_bh(lock);
}
@@ -322,7 +322,7 @@ EXPORT_SYMBOL(_raw_write_lock_bh);
#endif

#ifndef CONFIG_INLINE_WRITE_UNLOCK
-void __lockfunc _raw_write_unlock(rwlock_t *lock)
+noinline void __lockfunc _raw_write_unlock(rwlock_t *lock)
{
__raw_write_unlock(lock);
}
@@ -330,7 +330,7 @@ EXPORT_SYMBOL(_raw_write_unlock);
#endif

#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
-void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+noinline void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
{
__raw_write_unlock_irqrestore(lock, flags);
}
@@ -338,7 +338,7 @@ EXPORT_SYMBOL(_raw_write_unlock_irqrestore);
#endif

#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
-void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
+noinline void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
{
__raw_write_unlock_irq(lock);
}
@@ -346,7 +346,7 @@ EXPORT_SYMBOL(_raw_write_unlock_irq);
#endif

#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
-void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
+noinline void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
{
__raw_write_unlock_bh(lock);
}
--
1.8.5.2

2014-02-08 08:02:09

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 03/17] lto: Make asmlinkage __visible

Signed-off-by: Andi Kleen <[email protected]>
---
include/linux/linkage.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index a6a42dd..34a513a 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -12,9 +12,9 @@
#endif

#ifdef __cplusplus
-#define CPP_ASMLINKAGE extern "C"
+#define CPP_ASMLINKAGE extern "C" __visible
#else
-#define CPP_ASMLINKAGE
+#define CPP_ASMLINKAGE __visible
#endif

#ifndef asmlinkage
--
1.8.5.2

2014-02-08 08:05:57

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 10/17] Kbuild, lto: Add a gcc-ld script to let run gcc as ld

For LTO we need to run the link step with gcc, not ld.
Since there are a lot of linker options passed to it, add a gcc-ld wrapper
that wraps them as -Wl,

Signed-off-by: Andi Kleen <[email protected]>
---
scripts/gcc-ld | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
create mode 100644 scripts/gcc-ld

diff --git a/scripts/gcc-ld b/scripts/gcc-ld
new file mode 100644
index 0000000..cadab9a
--- /dev/null
+++ b/scripts/gcc-ld
@@ -0,0 +1,29 @@
+#!/bin/sh
+# run gcc with ld options
+# used as a wrapper to execute link time optimizations
+# yes virginia, this is not pretty
+
+ARGS="-nostdlib"
+
+while [ "$1" != "" ] ; do
+ case "$1" in
+ -save-temps|-m32|-m64) N="$1" ;;
+ -r) N="$1" ;;
+ -[Wg]*) N="$1" ;;
+ -[olv]|-[Ofd]*|-nostdlib) N="$1" ;;
+ --end-group|--start-group)
+ N="-Wl,$1" ;;
+ -[RTFGhIezcbyYu]*|\
+--script|--defsym|-init|-Map|--oformat|-rpath|\
+-rpath-link|--sort-section|--section-start|-Tbss|-Tdata|-Ttext|\
+--version-script|--dynamic-list|--version-exports-symbol|--wrap|-m)
+ A="$1" ; shift ; N="-Wl,$A,$1" ;;
+ -[m]*) N="$1" ;;
+ -*) N="-Wl,$1" ;;
+ *) N="$1" ;;
+ esac
+ ARGS="$ARGS $N"
+ shift
+done
+
+exec $CC $ARGS
--
1.8.5.2

2014-02-08 08:06:32

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 08/17] Kbuild, lto: Drop .number postfixes in modpost

LTO turns all global symbols effectively into statics. This
has the side effect that they all have a .NUMBER postfix to make
them unique. In modpost drop this postfix because it confuses
it.

Signed-off-by: Andi Kleen <[email protected]>
---
scripts/mod/modpost.c | 15 ++++++++++++++-
scripts/mod/modpost.h | 2 +-
2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 1f1b154..f91dd45 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1684,6 +1684,19 @@ static void check_sec_ref(struct module *mod, const char *modname,
}
}

+static char *remove_dot(char *s)
+{
+ char *end;
+ int n = strcspn(s, ".");
+
+ if (n > 0 && s[n] != 0) {
+ strtoul(s + n + 1, &end, 10);
+ if (end > s + n + 1 && (*end == '.' || *end == 0))
+ s[n] = 0;
+ }
+ return s;
+}
+
static void read_symbols(char *modname)
{
const char *symname;
@@ -1722,7 +1735,7 @@ static void read_symbols(char *modname)
}

for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
- symname = info.strtab + sym->st_name;
+ symname = remove_dot(info.strtab + sym->st_name);

handle_modversions(mod, &info, sym, symname);
handle_moddevtable(mod, &info, sym, symname);
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 51207e4..168b43d 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -127,7 +127,7 @@ struct elf_info {
Elf_Section export_gpl_sec;
Elf_Section export_unused_gpl_sec;
Elf_Section export_gpl_future_sec;
- const char *strtab;
+ char *strtab;
char *modinfo;
unsigned int modinfo_len;

--
1.8.5.2

2014-02-08 08:06:49

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 05/17] lto: Handle LTO common symbols in module loader

From: Joe Mario <[email protected]>

Here is the workaround I made for having the kernel not reject modules
built with -flto. The clean solution would be to get the compiler to not
emit the symbol. Or if it has to emit the symbol, then emit it as
initialized data but put it into a comdat/linkonce section.

Minor tweaks by AK over Joe's patch.

Cc: [email protected]
Signed-off-by: Andi Kleen <[email protected]>
---
kernel/module.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/kernel/module.c b/kernel/module.c
index d24fcf2..b99e801 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1948,6 +1948,10 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)

switch (sym[i].st_shndx) {
case SHN_COMMON:
+ /* Ignore common symbols */
+ if (!strncmp(name, "__gnu_lto", 9))
+ break;
+
/* We compiled with -fno-common. These are not
supposed to happen. */
pr_debug("Common symbol: %s\n", name);
--
1.8.5.2

2014-02-08 18:52:34

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 02/17] x86, lto: Disable fancy hweight optimizations for LTO v2

On 02/08/2014 12:01 AM, Andi Kleen wrote:
> The fancy x86 hweight uses different compiler options for the
> hweight file. This does not work with LTO. Just disable the optimization
> with LTO

No, I'm going to NAK this. This means not using the POPCNT instruction
if LTO is enabled, and that really isn't an acceptable option.

-hpa

2014-02-08 20:21:28

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 02/17] x86, lto: Disable fancy hweight optimizations for LTO v2

On Sat, Feb 08, 2014 at 10:52:07AM -0800, H. Peter Anvin wrote:
> On 02/08/2014 12:01 AM, Andi Kleen wrote:
> > The fancy x86 hweight uses different compiler options for the
> > hweight file. This does not work with LTO. Just disable the optimization
> > with LTO
>
> No, I'm going to NAK this. This means not using the POPCNT instruction
> if LTO is enabled, and that really isn't an acceptable option.

I thought the use was obscure?

Ok, suppose can just disable LTO for the file.
The only drawback is that the functions will not be optimized away when
not used, as they'll need to be __visible.

-Andi
--
[email protected] -- Speaking for myself only

2014-02-08 21:44:20

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 02/17] x86, lto: Disable fancy hweight optimizations for LTO v2

That's fine.

On February 8, 2014 12:21:24 PM PST, Andi Kleen <[email protected]> wrote:
>On Sat, Feb 08, 2014 at 10:52:07AM -0800, H. Peter Anvin wrote:
>> On 02/08/2014 12:01 AM, Andi Kleen wrote:
>> > The fancy x86 hweight uses different compiler options for the
>> > hweight file. This does not work with LTO. Just disable the
>optimization
>> > with LTO
>>
>> No, I'm going to NAK this. This means not using the POPCNT
>instruction
>> if LTO is enabled, and that really isn't an acceptable option.
>
>I thought the use was obscure?
>
>Ok, suppose can just disable LTO for the file.
>The only drawback is that the functions will not be optimized away when
>not used, as they'll need to be __visible.
>
>-Andi

--
Sent from my mobile phone. Please pardon brevity and lack of formatting.

2014-02-13 01:23:55

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 05/17] lto: Handle LTO common symbols in module loader

Andi Kleen <[email protected]> writes:
> From: Joe Mario <[email protected]>
>
> Here is the workaround I made for having the kernel not reject modules
> built with -flto. The clean solution would be to get the compiler to not
> emit the symbol. Or if it has to emit the symbol, then emit it as
> initialized data but put it into a comdat/linkonce section.
>
> Minor tweaks by AK over Joe's patch.

Patch is fine, but what's with the comment?

> switch (sym[i].st_shndx) {
> case SHN_COMMON:
> + /* Ignore common symbols */
> + if (!strncmp(name, "__gnu_lto", 9))
> + break;
> +

You mean, "/* Ignore symbols from -flto */"?

Other than that, I'm happy for this to go via some other tree:

Acked-by: Rusty Russell <[email protected]>

Thanks,
Rusty.

2014-02-13 01:23:53

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 17/17] lto, module: Warn about modules that are not fully LTOed

Andi Kleen <[email protected]> writes:
> When __gnu_lto_* is present that means that the module hasn't run with
> LTO yet.

In practice, this means they didn't build their kernel properly, right?
It shouldn't break anything, but it seems really weird. And how many
times will the prink fire on a single module?

Seems like a job for pr_warn?

Thanks,
Rusty.

> ---
> kernel/module.c | 5 ++++-
> 1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/module.c b/kernel/module.c
> index b99e801..2052155 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -1949,8 +1949,11 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
> switch (sym[i].st_shndx) {
> case SHN_COMMON:
> /* Ignore common symbols */
> - if (!strncmp(name, "__gnu_lto", 9))
> + if (!strncmp(name, "__gnu_lto", 9)) {
> + printk("%s: module not link time optimized\n",
> + mod->name);
> break;
> + }
>
> /* We compiled with -fno-common. These are not
> supposed to happen. */
> --
> 1.8.5.2

2014-02-14 04:27:01

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 12/17] Kbuild, lto: Set TMPDIR for LTO

On 02/08/2014 12:01 AM, Andi Kleen wrote:
> LTO gcc puts a lot of data into $TMPDIR, essentially another copy
> of the object directory to pass the repartitioned object files
> to the code generation processes.
>
> TMPDIR defaults to /tmp With /tmp as tmpfs it's easy to drive systems to
> out of memory, because they will compete with the already high anonymous
> memory consumption of the wpa LTO pass.
>
> When LTO is set always set TMPDIR to the object directory. This could
> be slightly slower, but is far safer and eliminates another parameter
> the LTO user would need to set manually.
>
> I made it conditional on LTO for now.

I think this really ought to use ?= so it doesn't override a TMPDIR
explicitly set by the user.

-hpa

2014-02-14 04:28:20

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 14/17] Kbuild, lto: Add Link Time Optimization support

I am about to commit the patches before this except 02/17 and 12/17 to
tip:x86/asmlinkage; however, I figure we need a new 02/17 before
committing the actual LTO patches to avoid build breakage.

-hpa

Subject: [tip:x86/asmlinkage] x86, lto: Disable LTO for the x86 VDSO

Commit-ID: 67424d5a22124fa2d115faa8f32d12506989628f
Gitweb: http://git.kernel.org/tip/67424d5a22124fa2d115faa8f32d12506989628f
Author: Andi Kleen <[email protected]>
AuthorDate: Sat, 8 Feb 2014 09:01:05 +0100
Committer: H. Peter Anvin <[email protected]>
CommitDate: Thu, 13 Feb 2014 20:21:57 -0800

x86, lto: Disable LTO for the x86 VDSO

The VDSO does not play well with LTO, so just disable LTO for it.
Also pass a 32bit linker flag for the 32bit version.

[ hpa: change braces to parens to match kernel Makefile style ]

Signed-off-by: Andi Kleen <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: H. Peter Anvin <[email protected]>
---
arch/x86/vdso/Makefile | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index fd14be1..9206ac7 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -2,6 +2,8 @@
# Building vDSO images for x86.
#

+KBUILD_CFLAGS += $(DISABLE_LTO)
+
VDSO64-$(CONFIG_X86_64) := y
VDSOX32-$(CONFIG_X86_X32_ABI) := y
VDSO32-$(CONFIG_X86_32) := y
@@ -35,7 +37,8 @@ export CPPFLAGS_vdso.lds += -P -C

VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
-Wl,--no-undefined \
- -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
+ -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \
+ $(DISABLE_LTO)

$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so

@@ -127,7 +130,7 @@ vdso32.so-$(VDSO32-y) += sysenter
vdso32-images = $(vdso32.so-y:%=vdso32-%.so)

CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
-VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1
+VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1

# This makes sure the $(obj) subdirectory exists even though vdso32/
# is not a kbuild sub-make subdirectory.
@@ -181,7 +184,8 @@ quiet_cmd_vdso = VDSO $@
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'

-VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
+ $(LTO_CFLAGS)
GCOV_PROFILE := n

#

Subject: [tip:x86/asmlinkage] lto: Make asmlinkage __visible

Commit-ID: 128ea04a9885af9629059e631ddf0cab4815b589
Gitweb: http://git.kernel.org/tip/128ea04a9885af9629059e631ddf0cab4815b589
Author: Andi Kleen <[email protected]>
AuthorDate: Sat, 8 Feb 2014 09:01:07 +0100
Committer: H. Peter Anvin <[email protected]>
CommitDate: Thu, 13 Feb 2014 20:21:59 -0800

lto: Make asmlinkage __visible

Signed-off-by: Andi Kleen <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: H. Peter Anvin <[email protected]>
---
include/linux/linkage.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index a6a42dd..34a513a 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -12,9 +12,9 @@
#endif

#ifdef __cplusplus
-#define CPP_ASMLINKAGE extern "C"
+#define CPP_ASMLINKAGE extern "C" __visible
#else
-#define CPP_ASMLINKAGE
+#define CPP_ASMLINKAGE __visible
#endif

#ifndef asmlinkage

Subject: [tip:x86/asmlinkage] lto, workaround: Add workaround for initcall reordering

Commit-ID: ef1b893c29d0dba778f67ad97b554b37f9108dcc
Gitweb: http://git.kernel.org/tip/ef1b893c29d0dba778f67ad97b554b37f9108dcc
Author: Andi Kleen <[email protected]>
AuthorDate: Sat, 8 Feb 2014 09:01:08 +0100
Committer: H. Peter Anvin <[email protected]>
CommitDate: Thu, 13 Feb 2014 20:24:13 -0800

lto, workaround: Add workaround for initcall reordering

Work around a LTO gcc problem: when there is no reference to a variable
in a module it will be moved to the end of the program. This causes
reordering of initcalls which the kernel does not like.
Add a dummy reference function to avoid this. The function is
deleted by the linker.

This replaces a previous much slower workaround.

Thanks to Jan "Honza" Hubička for suggesting this technique.

Suggested-by: Jan Hubička <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: H. Peter Anvin <[email protected]>
---
include/linux/init.h | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/linux/init.h b/include/linux/init.h
index e168880..a3ba270 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -163,6 +163,23 @@ extern bool initcall_debug;

#ifndef __ASSEMBLY__

+#ifdef CONFIG_LTO
+/* Work around a LTO gcc problem: when there is no reference to a variable
+ * in a module it will be moved to the end of the program. This causes
+ * reordering of initcalls which the kernel does not like.
+ * Add a dummy reference function to avoid this. The function is
+ * deleted by the linker.
+ */
+#define LTO_REFERENCE_INITCALL(x) \
+ ; /* yes this is needed */ \
+ static __used __exit void *reference_##x(void) \
+ { \
+ return &x; \
+ }
+#else
+#define LTO_REFERENCE_INITCALL(x)
+#endif
+
/* initcalls are now grouped by functionality into separate
* subsections. Ordering inside the subsections is determined
* by link order.
@@ -175,7 +192,8 @@ extern bool initcall_debug;

#define __define_initcall(fn, id) \
static initcall_t __initcall_##fn##id __used \
- __attribute__((__section__(".initcall" #id ".init"))) = fn
+ __attribute__((__section__(".initcall" #id ".init"))) = fn; \
+ LTO_REFERENCE_INITCALL(__initcall_##fn##id)

/*
* Early initcalls run before initializing SMP.

Subject: [tip:x86/asmlinkage] lto: Handle LTO common symbols in module loader

Commit-ID: 80375980f1608f43b47abc2671456b23ec68c434
Gitweb: http://git.kernel.org/tip/80375980f1608f43b47abc2671456b23ec68c434
Author: Joe Mario <[email protected]>
AuthorDate: Sat, 8 Feb 2014 09:01:09 +0100
Committer: H. Peter Anvin <[email protected]>
CommitDate: Thu, 13 Feb 2014 20:24:50 -0800

lto: Handle LTO common symbols in module loader

Here is the workaround I made for having the kernel not reject modules
built with -flto. The clean solution would be to get the compiler to not
emit the symbol. Or if it has to emit the symbol, then emit it as
initialized data but put it into a comdat/linkonce section.

Minor tweaks by AK over Joe's patch.

Cc: Rusty Russell <[email protected]>
Signed-off-by: Andi Kleen <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: H. Peter Anvin <[email protected]>
---
kernel/module.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/kernel/module.c b/kernel/module.c
index d24fcf2..b99e801 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1948,6 +1948,10 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)

switch (sym[i].st_shndx) {
case SHN_COMMON:
+ /* Ignore common symbols */
+ if (!strncmp(name, "__gnu_lto", 9))
+ break;
+
/* We compiled with -fno-common. These are not
supposed to happen. */
pr_debug("Common symbol: %s\n", name);

Subject: [tip:x86/asmlinkage] lto: Disable LTO for sys_ni

Commit-ID: 58edae3aac9f2ccd1afb12ea08127e840a0a706c
Gitweb: http://git.kernel.org/tip/58edae3aac9f2ccd1afb12ea08127e840a0a706c
Author: Andi Kleen <[email protected]>
AuthorDate: Sat, 8 Feb 2014 09:01:10 +0100
Committer: H. Peter Anvin <[email protected]>
CommitDate: Thu, 13 Feb 2014 20:24:53 -0800

lto: Disable LTO for sys_ni

The assembler alias code in cond_syscall does not work
when compiled for LTO. Just disable LTO for that file.

Signed-off-by: Andi Kleen <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: H. Peter Anvin <[email protected]>
---
kernel/Makefile | 3 +++
1 file changed, 3 insertions(+)

diff --git a/kernel/Makefile b/kernel/Makefile
index bc010ee..31c26c6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -18,6 +18,9 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_irq_work.o = -pg
endif

+# cond_syscall is currently not LTO compatible
+CFLAGS_sys_ni.o = $(DISABLE_LTO)
+
obj-y += sched/
obj-y += locking/
obj-y += power/

Subject: [tip:x86/asmlinkage] Kbuild, lto, workaround: Don' t warn for initcall_reference in modpost

Commit-ID: 77ab21adae509c5540956729e2d03bc1a59bc82a
Gitweb: http://git.kernel.org/tip/77ab21adae509c5540956729e2d03bc1a59bc82a
Author: Andi Kleen <[email protected]>
AuthorDate: Sat, 8 Feb 2014 09:01:11 +0100
Committer: H. Peter Anvin <[email protected]>
CommitDate: Thu, 13 Feb 2014 20:24:56 -0800

Kbuild, lto, workaround: Don't warn for initcall_reference in modpost

This reference is discarded, but can cause warnings when it refers to
exit. Ignore for now.

This is a workaround and can be removed once we get rid of
-fno-toplevel-reorder

Signed-off-by: Andi Kleen <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: H. Peter Anvin <[email protected]>
---
scripts/mod/modpost.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 4061098..1f1b154 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1455,6 +1455,10 @@ static void check_section_mismatch(const char *modname, struct elf_info *elf,
to = find_elf_symbol(elf, r->r_addend, sym);
tosym = sym_name(elf, to);

+ if (!strncmp(fromsym, "reference___initcall",
+ sizeof("reference___initcall")-1))
+ return;
+
/* check whitelist - we may ignore it */
if (secref_whitelist(mismatch,
fromsec, fromsym, tosec, tosym)) {

Subject: [tip:x86/asmlinkage] Kbuild, lto: Drop .number postfixes in modpost

Commit-ID: 7d02b490e93c199a15b3c4bce1c393588c1300ca
Gitweb: http://git.kernel.org/tip/7d02b490e93c199a15b3c4bce1c393588c1300ca
Author: Andi Kleen <[email protected]>
AuthorDate: Sat, 8 Feb 2014 09:01:12 +0100
Committer: H. Peter Anvin <[email protected]>
CommitDate: Thu, 13 Feb 2014 20:24:58 -0800

Kbuild, lto: Drop .number postfixes in modpost

LTO turns all global symbols effectively into statics. This
has the side effect that they all have a .NUMBER postfix to make
them unique. In modpost drop this postfix because it confuses
it.

Signed-off-by: Andi Kleen <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: H. Peter Anvin <[email protected]>
---
scripts/mod/modpost.c | 15 ++++++++++++++-
scripts/mod/modpost.h | 2 +-
2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 1f1b154..f91dd45 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1684,6 +1684,19 @@ static void check_sec_ref(struct module *mod, const char *modname,
}
}

+static char *remove_dot(char *s)
+{
+ char *end;
+ int n = strcspn(s, ".");
+
+ if (n > 0 && s[n] != 0) {
+ strtoul(s + n + 1, &end, 10);
+ if (end > s + n + 1 && (*end == '.' || *end == 0))
+ s[n] = 0;
+ }
+ return s;
+}
+
static void read_symbols(char *modname)
{
const char *symname;
@@ -1722,7 +1735,7 @@ static void read_symbols(char *modname)
}

for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
- symname = info.strtab + sym->st_name;
+ symname = remove_dot(info.strtab + sym->st_name);

handle_modversions(mod, &info, sym, symname);
handle_moddevtable(mod, &info, sym, symname);
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 51207e4..168b43d 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -127,7 +127,7 @@ struct elf_info {
Elf_Section export_gpl_sec;
Elf_Section export_unused_gpl_sec;
Elf_Section export_gpl_future_sec;
- const char *strtab;
+ char *strtab;
char *modinfo;
unsigned int modinfo_len;

Subject: [tip:x86/asmlinkage] Kbuild, lto: add ld-version and ld-ifversion macros

Commit-ID: ccbef1674a1579842c7dbdf554efca85d2cd245a
Gitweb: http://git.kernel.org/tip/ccbef1674a1579842c7dbdf554efca85d2cd245a
Author: Andi Kleen <[email protected]>
AuthorDate: Sat, 8 Feb 2014 09:01:13 +0100
Committer: H. Peter Anvin <[email protected]>
CommitDate: Thu, 13 Feb 2014 20:25:00 -0800

Kbuild, lto: add ld-version and ld-ifversion macros

To check the linker version. Used by the LTO makefile.

Signed-off-by: Andi Kleen <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: H. Peter Anvin <[email protected]>
---
scripts/Kbuild.include | 9 +++++++++
scripts/ld-version.sh | 8 ++++++++
2 files changed, 17 insertions(+)

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 547e15d..93a0da2 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -155,6 +155,15 @@ ld-option = $(call try-run,\
# Important: no spaces around options
ar-option = $(call try-run, $(AR) rc$(1) "$$TMP",$(1),$(2))

+# ld-version
+# Usage: $(call ld-version)
+# Note this is mainly for HJ Lu's 3 number binutil versions
+ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh)
+
+# ld-ifversion
+# Usage: $(call ld-ifversion, -ge, 22252, y)
+ld-ifversion = $(shell [ $(call ld-version) $(1) $(2) ] && echo $(3))
+
######

###
diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh
new file mode 100755
index 0000000..198580d
--- /dev/null
+++ b/scripts/ld-version.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/awk -f
+# extract linker version number from stdin and turn into single number
+ {
+ gsub(".*)", "");
+ split($1,a, ".");
+ print a[1]*10000000 + a[2]*100000 + a[3]*10000 + a[4]*100 + a[5];
+ exit
+ }

Subject: [tip:x86/asmlinkage] Kbuild, lto: Add a gcc-ld script to let run gcc as ld

Commit-ID: 8564ed2b3888176ac323eefea1722003daeba3d3
Gitweb: http://git.kernel.org/tip/8564ed2b3888176ac323eefea1722003daeba3d3
Author: Andi Kleen <[email protected]>
AuthorDate: Sat, 8 Feb 2014 09:01:14 +0100
Committer: H. Peter Anvin <[email protected]>
CommitDate: Thu, 13 Feb 2014 20:25:02 -0800

Kbuild, lto: Add a gcc-ld script to let run gcc as ld

For LTO we need to run the link step with gcc, not ld.
Since there are a lot of linker options passed to it, add a gcc-ld wrapper
that wraps them as -Wl,

Signed-off-by: Andi Kleen <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: H. Peter Anvin <[email protected]>
---
scripts/gcc-ld | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)

diff --git a/scripts/gcc-ld b/scripts/gcc-ld
new file mode 100644
index 0000000..cadab9a
--- /dev/null
+++ b/scripts/gcc-ld
@@ -0,0 +1,29 @@
+#!/bin/sh
+# run gcc with ld options
+# used as a wrapper to execute link time optimizations
+# yes virginia, this is not pretty
+
+ARGS="-nostdlib"
+
+while [ "$1" != "" ] ; do
+ case "$1" in
+ -save-temps|-m32|-m64) N="$1" ;;
+ -r) N="$1" ;;
+ -[Wg]*) N="$1" ;;
+ -[olv]|-[Ofd]*|-nostdlib) N="$1" ;;
+ --end-group|--start-group)
+ N="-Wl,$1" ;;
+ -[RTFGhIezcbyYu]*|\
+--script|--defsym|-init|-Map|--oformat|-rpath|\
+-rpath-link|--sort-section|--section-start|-Tbss|-Tdata|-Ttext|\
+--version-script|--dynamic-list|--version-exports-symbol|--wrap|-m)
+ A="$1" ; shift ; N="-Wl,$A,$1" ;;
+ -[m]*) N="$1" ;;
+ -*) N="-Wl,$1" ;;
+ *) N="$1" ;;
+ esac
+ ARGS="$ARGS $N"
+ shift
+done
+
+exec $CC $ARGS

Subject: [tip:x86/asmlinkage] Kbuild, lto: Disable LTO for asm-offsets.c

Commit-ID: 1e64ff42ea3d8d2fc8aa71f9717b3c1cb6c2f893
Gitweb: http://git.kernel.org/tip/1e64ff42ea3d8d2fc8aa71f9717b3c1cb6c2f893
Author: Andi Kleen <[email protected]>
AuthorDate: Sat, 8 Feb 2014 09:01:15 +0100
Committer: H. Peter Anvin <[email protected]>
CommitDate: Thu, 13 Feb 2014 20:25:03 -0800

Kbuild, lto: Disable LTO for asm-offsets.c

The asm-offset.c technique to fish data out of the assembler file
does not work with LTO. Just disable for the asm-offset.c build.

Signed-off-by: Andi Kleen <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: H. Peter Anvin <[email protected]>
---
scripts/Makefile.build | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index d5d859c..9f0ee22 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -198,7 +198,7 @@ $(multi-objs-y:.o=.s) : modname = $(modname-multi)
$(multi-objs-y:.o=.lst) : modname = $(modname-multi)

quiet_cmd_cc_s_c = CC $(quiet_modtag) $@
-cmd_cc_s_c = $(CC) $(c_flags) -fverbose-asm -S -o $@ $<
+cmd_cc_s_c = $(CC) $(c_flags) $(DISABLE_LTO) -fverbose-asm -S -o $@ $<

$(obj)/%.s: $(src)/%.c FORCE
$(call if_changed_dep,cc_s_c)

Subject: [tip:x86/asmlinkage] Kbuild, lto: Handle basic LTO in modpost

Commit-ID: ef178f9238b142cc1020265e176b20d27fd02ba9
Gitweb: http://git.kernel.org/tip/ef178f9238b142cc1020265e176b20d27fd02ba9
Author: Andi Kleen <[email protected]>
AuthorDate: Sat, 8 Feb 2014 09:01:17 +0100
Committer: H. Peter Anvin <[email protected]>
CommitDate: Thu, 13 Feb 2014 20:25:05 -0800

Kbuild, lto: Handle basic LTO in modpost

- Don't warn about LTO marker symbols. modpost runs before
the linker, so the module is not necessarily LTOed yet.
- Don't complain about .gnu.lto* sections

Signed-off-by: Andi Kleen <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: H. Peter Anvin <[email protected]>
---
scripts/mod/modpost.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index f91dd45..63804a1 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -623,7 +623,10 @@ static void handle_modversions(struct module *mod, struct elf_info *info,

switch (sym->st_shndx) {
case SHN_COMMON:
- warn("\"%s\" [%s] is COMMON symbol\n", symname, mod->name);
+ if (!strncmp(symname, "__gnu_lto_", sizeof("__gnu_lto_")-1)) {
+ /* Should warn here, but modpost runs before the linker */
+ } else
+ warn("\"%s\" [%s] is COMMON symbol\n", symname, mod->name);
break;
case SHN_UNDEF:
/* undefined symbol */
@@ -849,6 +852,7 @@ static const char *section_white_list[] =
".xt.lit", /* xtensa */
".arcextmap*", /* arc */
".gnu.linkonce.arcext*", /* arc : modules */
+ ".gnu.lto*",
NULL
};

2014-02-14 09:11:22

by Borislav Petkov

[permalink] [raw]
Subject: Re: [tip:x86/asmlinkage] lto: Make asmlinkage __visible

On Thu, Feb 13, 2014 at 08:30:37PM -0800, tip-bot for Andi Kleen wrote:
> Commit-ID: 128ea04a9885af9629059e631ddf0cab4815b589
> Gitweb: http://git.kernel.org/tip/128ea04a9885af9629059e631ddf0cab4815b589
> Author: Andi Kleen <[email protected]>
> AuthorDate: Sat, 8 Feb 2014 09:01:07 +0100
> Committer: H. Peter Anvin <[email protected]>
> CommitDate: Thu, 13 Feb 2014 20:21:59 -0800
>
> lto: Make asmlinkage __visible
>
> Signed-off-by: Andi Kleen <[email protected]>
> Link: http://lkml.kernel.org/r/[email protected]
> Signed-off-by: H. Peter Anvin <[email protected]>
> ---
> include/linux/linkage.h | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/linkage.h b/include/linux/linkage.h
> index a6a42dd..34a513a 100644
> --- a/include/linux/linkage.h
> +++ b/include/linux/linkage.h
> @@ -12,9 +12,9 @@
> #endif
>
> #ifdef __cplusplus
> -#define CPP_ASMLINKAGE extern "C"
> +#define CPP_ASMLINKAGE extern "C" __visible
> #else
> -#define CPP_ASMLINKAGE
> +#define CPP_ASMLINKAGE __visible
> #endif
>
> #ifndef asmlinkage
> --

arch/x86/crypto/sha256_ssse3_glue.c:56:1: warning: ‘externally_visible’ attribute have effect only on public objects [-Wattributes]
static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64);
^
arch/x86/crypto/sha512_ssse3_glue.c:55:1: warning: ‘externally_visible’ attribute have effect only on public objects [-Wattributes]
static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64);


I guess it is trying to tell me that static function pointers cannot be
__visible:

static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64);

and

static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64);

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

2014-02-14 14:36:55

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 14/17] Kbuild, lto: Add Link Time Optimization support

On Thu, Feb 13, 2014 at 08:28:10PM -0800, H. Peter Anvin wrote:
> I am about to commit the patches before this except 02/17 and 12/17 to
> tip:x86/asmlinkage; however, I figure we need a new 02/17 before
> committing the actual LTO patches to avoid build breakage.

Thanks.

Yes I'll repost today.

I was assuming those would go through the kbuild tree.

-Andi

--
[email protected] -- Speaking for myself only

2014-02-14 14:38:26

by Andi Kleen

[permalink] [raw]
Subject: Re: [tip:x86/asmlinkage] lto: Make asmlinkage __visible

> I guess it is trying to tell me that static function pointers cannot be
> __visible:
>
> static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64);
>
> and
>
> static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64);

Yes there are a couple such warnings now. static asmlinkage
does not make much sense. Can run a cocinelle rule over the tree
later, will send patches.

-Andi

2014-02-14 15:38:11

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 12/17] Kbuild, lto: Set TMPDIR for LTO

"H. Peter Anvin" <[email protected]> writes:
>
> I think this really ought to use ?= so it doesn't override a TMPDIR
> explicitly set by the user.

Done. Thanks.
-andi
--
[email protected] -- Speaking for myself only

2014-02-14 16:26:19

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 14/17] Kbuild, lto: Add Link Time Optimization support

On 02/14/2014 06:36 AM, Andi Kleen wrote:
> On Thu, Feb 13, 2014 at 08:28:10PM -0800, H. Peter Anvin wrote:
>> I am about to commit the patches before this except 02/17 and 12/17 to
>> tip:x86/asmlinkage; however, I figure we need a new 02/17 before
>> committing the actual LTO patches to avoid build breakage.
>
> Thanks.
>
> Yes I'll repost today.
>
> I was assuming those would go through the kbuild tree.
>

I'll check with Michal to see if he cares.

-hpa

2014-02-21 02:31:02

by Rusty Russell

[permalink] [raw]
Subject: Re: [tip:x86/asmlinkage] lto: Handle LTO common symbols in module loader

tip-bot for Joe Mario <[email protected]> writes:
> Commit-ID: 80375980f1608f43b47abc2671456b23ec68c434
> Gitweb: http://git.kernel.org/tip/80375980f1608f43b47abc2671456b23ec68c434
> Author: Joe Mario <[email protected]>
> AuthorDate: Sat, 8 Feb 2014 09:01:09 +0100
> Committer: H. Peter Anvin <[email protected]>
> CommitDate: Thu, 13 Feb 2014 20:24:50 -0800
>
> lto: Handle LTO common symbols in module loader
>
> Here is the workaround I made for having the kernel not reject modules
> built with -flto. The clean solution would be to get the compiler to not
> emit the symbol. Or if it has to emit the symbol, then emit it as
> initialized data but put it into a comdat/linkonce section.

Gah, as I said, fix the damn comment!

> case SHN_COMMON:
> + /* Ignore common symbols */
> + if (!strncmp(name, "__gnu_lto", 9))
> + break;
> +
> /* We compiled with -fno-common. These are not

/* Ignore common symbols */ is so bad, it's not even wrong.

Cheers,
Rusty.

2014-02-21 19:12:31

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [tip:x86/asmlinkage] lto: Handle LTO common symbols in module loader

On 02/20/2014 03:11 PM, Rusty Russell wrote:
> tip-bot for Joe Mario <[email protected]> writes:
>> Commit-ID: 80375980f1608f43b47abc2671456b23ec68c434
>> Gitweb: http://git.kernel.org/tip/80375980f1608f43b47abc2671456b23ec68c434
>> Author: Joe Mario <[email protected]>
>> AuthorDate: Sat, 8 Feb 2014 09:01:09 +0100
>> Committer: H. Peter Anvin <[email protected]>
>> CommitDate: Thu, 13 Feb 2014 20:24:50 -0800
>>
>> lto: Handle LTO common symbols in module loader
>>
>> Here is the workaround I made for having the kernel not reject modules
>> built with -flto. The clean solution would be to get the compiler to not
>> emit the symbol. Or if it has to emit the symbol, then emit it as
>> initialized data but put it into a comdat/linkonce section.
>
> Gah, as I said, fix the damn comment!
>
>> case SHN_COMMON:
>> + /* Ignore common symbols */
>> + if (!strncmp(name, "__gnu_lto", 9))
>> + break;
>> +
>> /* We compiled with -fno-common. These are not
>
> /* Ignore common symbols */ is so bad, it's not even wrong.
>

Joe, Andi, could one of you submit an incremental patch to clean up this
comment?

-hpa

2014-02-21 22:38:00

by Andi Kleen

[permalink] [raw]
Subject: Re: [tip:x86/asmlinkage] lto: Handle LTO common symbols in module loader

> >> case SHN_COMMON:
> >> + /* Ignore common symbols */
> >> + if (!strncmp(name, "__gnu_lto", 9))
> >> + break;
> >> +
> >> /* We compiled with -fno-common. These are not
> >
> > /* Ignore common symbols */ is so bad, it's not even wrong.
> >
>
> Joe, Andi, could one of you submit an incremental patch to clean up this
> comment?

You can just remove the patch. It was only needed for fat LTO,
which I stopped supporting.

-Andi