2017-12-22 00:28:07

by Andi Kleen

[permalink] [raw]
Subject: x86 cleanups from the LTO tree

These are all the fixes for the x86 tree needed for LTO. They are strictly
not needed without LTO, but I believe they can be all considered cleanups
and documentation improvements and are valuable because of that.

The initconst/data fixes help generating correct section permissions in the
vmlinux ELF file.


2017-12-22 00:27:14

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 5/6] x86: Make exception handler functions visible

From: Andi Kleen <[email protected]>

Make the C exception handler functions that are directly called through
exception tables visible. LTO needs to know they are accessed from assembler.

Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/mm/extable.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 9fe656c42aa5..8bd4b864e681 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -21,7 +21,7 @@ ex_fixup_handler(const struct exception_table_entry *x)
return (ex_handler_t)((unsigned long)&x->handler + x->handler);
}

-bool ex_handler_default(const struct exception_table_entry *fixup,
+__visible bool ex_handler_default(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
regs->ip = ex_fixup_addr(fixup);
@@ -29,7 +29,7 @@ bool ex_handler_default(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_default);

-bool ex_handler_fault(const struct exception_table_entry *fixup,
+__visible bool ex_handler_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
regs->ip = ex_fixup_addr(fixup);
@@ -42,7 +42,7 @@ EXPORT_SYMBOL_GPL(ex_handler_fault);
* Handler for UD0 exception following a failed test against the
* result of a refcount inc/dec/add/sub.
*/
-bool ex_handler_refcount(const struct exception_table_entry *fixup,
+__visible bool ex_handler_refcount(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
/* First unconditionally saturate the refcount. */
@@ -95,6 +95,7 @@ EXPORT_SYMBOL(ex_handler_refcount);
* of vulnerability by restoring from the initial state (essentially, zeroing
* out all the FPU registers) if we can't restore from the task's FPU state.
*/
+__visible
bool ex_handler_fprestore(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
@@ -108,7 +109,7 @@ bool ex_handler_fprestore(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL_GPL(ex_handler_fprestore);

-bool ex_handler_ext(const struct exception_table_entry *fixup,
+__visible bool ex_handler_ext(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
/* Special hack for uaccess_err */
@@ -118,7 +119,7 @@ bool ex_handler_ext(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_ext);

-bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
+__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
@@ -133,7 +134,7 @@ bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);

-bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
+__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
@@ -147,7 +148,7 @@ bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);

-bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
+__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
if (static_cpu_has(X86_BUG_NULL_SEG))
@@ -157,7 +158,7 @@ bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_clear_fs);

-bool ex_has_fault_handler(unsigned long ip)
+__visible bool ex_has_fault_handler(unsigned long ip)
{
const struct exception_table_entry *e;
ex_handler_t handler;
--
2.15.0

2017-12-22 00:27:12

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 6/6] x86/idt: Make const __initconst

From: Andi Kleen <[email protected]>

const variables must use __initconst, not __initdata. Fix this up
for the new IDT tables recently added, which got it consistently wrong.

Fixes a whole range of commits between
16bc18d895ce x86/idt: Move 32-bit idt_descr to C code
and
dc20b2d52653 x86/idt: Move interrupt gate initialization to IDT code

Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/kernel/idt.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d985cef3984f..56d99be3706a 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -56,7 +56,7 @@ struct idt_data {
* Early traps running on the DEFAULT_STACK because the other interrupt
* stacks work only after cpu_init().
*/
-static const __initdata struct idt_data early_idts[] = {
+static const __initconst struct idt_data early_idts[] = {
INTG(X86_TRAP_DB, debug),
SYSG(X86_TRAP_BP, int3),
#ifdef CONFIG_X86_32
@@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = {
* the traps which use them are reinitialized with IST after cpu_init() has
* set up TSS.
*/
-static const __initdata struct idt_data def_idts[] = {
+static const __initconst struct idt_data def_idts[] = {
INTG(X86_TRAP_DE, divide_error),
INTG(X86_TRAP_NMI, nmi),
INTG(X86_TRAP_BR, bounds),
@@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = {
/*
* The APIC and SMP idt entries
*/
-static const __initdata struct idt_data apic_idts[] = {
+static const __initconst struct idt_data apic_idts[] = {
#ifdef CONFIG_SMP
INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
INTG(CALL_FUNCTION_VECTOR, call_function_interrupt),
@@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = {
* Early traps running on the DEFAULT_STACK because the other interrupt
* stacks work only after cpu_init().
*/
-static const __initdata struct idt_data early_pf_idts[] = {
+static const __initconst struct idt_data early_pf_idts[] = {
INTG(X86_TRAP_PF, page_fault),
};

@@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = {
* Override for the debug_idt. Same as the default, but with interrupt
* stack set to DEFAULT_STACK (0). Required for NMI trap handling.
*/
-static const __initdata struct idt_data dbg_idts[] = {
+static const __initconst struct idt_data dbg_idts[] = {
INTG(X86_TRAP_DB, debug),
INTG(X86_TRAP_BP, int3),
};
@@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
* The exceptions which use Interrupt stacks. They are setup after
* cpu_init() when the TSS has been initialized.
*/
-static const __initdata struct idt_data ist_idts[] = {
+static const __initconst struct idt_data ist_idts[] = {
ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
SISTG(X86_TRAP_BP, int3, DEBUG_STACK),
--
2.15.0

2017-12-22 00:27:53

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 2/6] x86/xen: Mark pv stub assembler symbol visible

From: Andi Kleen <[email protected]>

With LTO any external assembler symbol has to be marked __visible.
Mark the generated asm PV stubs __visible to prevent a linker error.

Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/include/asm/paravirt.h | 3 ++-
drivers/xen/time.c | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 892df375b615..f03445fbbe2f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -745,7 +745,8 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu)
*/
#define PV_THUNK_NAME(func) "__raw_callee_save_" #func
#define PV_CALLEE_SAVE_REGS_THUNK(func) \
- extern typeof(func) __raw_callee_save_##func; \
+ extern __visible typeof(func) __raw_callee_save_##func; \
+ extern __visible typeof(func) func; \
\
asm(".pushsection .text;" \
".globl " PV_THUNK_NAME(func) ";" \
diff --git a/drivers/xen/time.c b/drivers/xen/time.c
index 3e741cd1409c..708a00c337d7 100644
--- a/drivers/xen/time.c
+++ b/drivers/xen/time.c
@@ -144,7 +144,7 @@ void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
}

/* return true when a vcpu could run but has no real cpu to run on */
-bool xen_vcpu_stolen(int vcpu)
+__visible bool xen_vcpu_stolen(int vcpu)
{
return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
}
--
2.15.0

2017-12-22 00:27:10

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 1/6] x86/timer: Don't inline __const_udelay

From: Andi Kleen <[email protected]>

__const_udelay is marked inline, and LTO will happily inline it everywhere
Dropping the inline saves ~44k text in a LTO build.

13999560 1740864 1499136 17239560 1070e08 vmlinux-with-udelay-inline
13954764 1736768 1499136 17190668 1064f0c vmlinux-wo-udelay-inline

Even without LTO I believe marking it noinline documents it correctly.

Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/lib/delay.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 4846eff7e4c8..f5b7f1b3b6d7 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -162,7 +162,7 @@ void __delay(unsigned long loops)
}
EXPORT_SYMBOL(__delay);

-inline void __const_udelay(unsigned long xloops)
+void __const_udelay(unsigned long xloops)
{
unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
int d0;
--
2.15.0

2017-12-22 00:28:13

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 3/6] locking/spinlocks: Mark spinlocks noinline when inline spinlocks are disabled

From: Andi Kleen <[email protected]>

Otherwise LTO will inline them anyways and cause a large
kernel text increase.

Since the explicit intention here is to not inline them marking
them noinline is good documentation even for the non LTO case.

Signed-off-by: Andi Kleen <[email protected]>
---
kernel/locking/spinlock.c | 56 +++++++++++++++++++++++------------------------
1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index 936f3d14dd6b..51031785e821 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -123,7 +123,7 @@ BUILD_LOCK_OPS(write, rwlock);
#endif

#ifndef CONFIG_INLINE_SPIN_TRYLOCK
-int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
+noinline int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
{
return __raw_spin_trylock(lock);
}
@@ -131,7 +131,7 @@ EXPORT_SYMBOL(_raw_spin_trylock);
#endif

#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
-int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
+noinline int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
{
return __raw_spin_trylock_bh(lock);
}
@@ -139,7 +139,7 @@ EXPORT_SYMBOL(_raw_spin_trylock_bh);
#endif

#ifndef CONFIG_INLINE_SPIN_LOCK
-void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
{
__raw_spin_lock(lock);
}
@@ -147,7 +147,7 @@ EXPORT_SYMBOL(_raw_spin_lock);
#endif

#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
-unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
+noinline unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
{
return __raw_spin_lock_irqsave(lock);
}
@@ -155,7 +155,7 @@ EXPORT_SYMBOL(_raw_spin_lock_irqsave);
#endif

#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
-void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
{
__raw_spin_lock_irq(lock);
}
@@ -163,7 +163,7 @@ EXPORT_SYMBOL(_raw_spin_lock_irq);
#endif

#ifndef CONFIG_INLINE_SPIN_LOCK_BH
-void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
{
__raw_spin_lock_bh(lock);
}
@@ -171,7 +171,7 @@ EXPORT_SYMBOL(_raw_spin_lock_bh);
#endif

#ifdef CONFIG_UNINLINE_SPIN_UNLOCK
-void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
{
__raw_spin_unlock(lock);
}
@@ -179,7 +179,7 @@ EXPORT_SYMBOL(_raw_spin_unlock);
#endif

#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
-void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
+noinline void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
{
__raw_spin_unlock_irqrestore(lock, flags);
}
@@ -187,7 +187,7 @@ EXPORT_SYMBOL(_raw_spin_unlock_irqrestore);
#endif

#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
-void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
{
__raw_spin_unlock_irq(lock);
}
@@ -195,7 +195,7 @@ EXPORT_SYMBOL(_raw_spin_unlock_irq);
#endif

#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
-void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
{
__raw_spin_unlock_bh(lock);
}
@@ -203,7 +203,7 @@ EXPORT_SYMBOL(_raw_spin_unlock_bh);
#endif

#ifndef CONFIG_INLINE_READ_TRYLOCK
-int __lockfunc _raw_read_trylock(rwlock_t *lock)
+noinline int __lockfunc _raw_read_trylock(rwlock_t *lock)
{
return __raw_read_trylock(lock);
}
@@ -211,7 +211,7 @@ EXPORT_SYMBOL(_raw_read_trylock);
#endif

#ifndef CONFIG_INLINE_READ_LOCK
-void __lockfunc _raw_read_lock(rwlock_t *lock)
+noinline void __lockfunc _raw_read_lock(rwlock_t *lock)
{
__raw_read_lock(lock);
}
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(_raw_read_lock);
#endif

#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
-unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
+noinline unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
{
return __raw_read_lock_irqsave(lock);
}
@@ -227,7 +227,7 @@ EXPORT_SYMBOL(_raw_read_lock_irqsave);
#endif

#ifndef CONFIG_INLINE_READ_LOCK_IRQ
-void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
+noinline void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
{
__raw_read_lock_irq(lock);
}
@@ -235,7 +235,7 @@ EXPORT_SYMBOL(_raw_read_lock_irq);
#endif

#ifndef CONFIG_INLINE_READ_LOCK_BH
-void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
+noinline void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
{
__raw_read_lock_bh(lock);
}
@@ -243,7 +243,7 @@ EXPORT_SYMBOL(_raw_read_lock_bh);
#endif

#ifndef CONFIG_INLINE_READ_UNLOCK
-void __lockfunc _raw_read_unlock(rwlock_t *lock)
+noinline void __lockfunc _raw_read_unlock(rwlock_t *lock)
{
__raw_read_unlock(lock);
}
@@ -251,7 +251,7 @@ EXPORT_SYMBOL(_raw_read_unlock);
#endif

#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
-void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+noinline void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
{
__raw_read_unlock_irqrestore(lock, flags);
}
@@ -259,7 +259,7 @@ EXPORT_SYMBOL(_raw_read_unlock_irqrestore);
#endif

#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
-void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
+noinline void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
{
__raw_read_unlock_irq(lock);
}
@@ -267,7 +267,7 @@ EXPORT_SYMBOL(_raw_read_unlock_irq);
#endif

#ifndef CONFIG_INLINE_READ_UNLOCK_BH
-void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
+noinline void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
{
__raw_read_unlock_bh(lock);
}
@@ -275,7 +275,7 @@ EXPORT_SYMBOL(_raw_read_unlock_bh);
#endif

#ifndef CONFIG_INLINE_WRITE_TRYLOCK
-int __lockfunc _raw_write_trylock(rwlock_t *lock)
+noinline int __lockfunc _raw_write_trylock(rwlock_t *lock)
{
return __raw_write_trylock(lock);
}
@@ -283,7 +283,7 @@ EXPORT_SYMBOL(_raw_write_trylock);
#endif

#ifndef CONFIG_INLINE_WRITE_LOCK
-void __lockfunc _raw_write_lock(rwlock_t *lock)
+noinline void __lockfunc _raw_write_lock(rwlock_t *lock)
{
__raw_write_lock(lock);
}
@@ -291,7 +291,7 @@ EXPORT_SYMBOL(_raw_write_lock);
#endif

#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
-unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
+noinline unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
{
return __raw_write_lock_irqsave(lock);
}
@@ -299,7 +299,7 @@ EXPORT_SYMBOL(_raw_write_lock_irqsave);
#endif

#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
-void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
+noinline void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
{
__raw_write_lock_irq(lock);
}
@@ -307,7 +307,7 @@ EXPORT_SYMBOL(_raw_write_lock_irq);
#endif

#ifndef CONFIG_INLINE_WRITE_LOCK_BH
-void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
+noinline void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
{
__raw_write_lock_bh(lock);
}
@@ -315,7 +315,7 @@ EXPORT_SYMBOL(_raw_write_lock_bh);
#endif

#ifndef CONFIG_INLINE_WRITE_UNLOCK
-void __lockfunc _raw_write_unlock(rwlock_t *lock)
+noinline void __lockfunc _raw_write_unlock(rwlock_t *lock)
{
__raw_write_unlock(lock);
}
@@ -323,7 +323,7 @@ EXPORT_SYMBOL(_raw_write_unlock);
#endif

#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
-void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+noinline void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
{
__raw_write_unlock_irqrestore(lock, flags);
}
@@ -331,7 +331,7 @@ EXPORT_SYMBOL(_raw_write_unlock_irqrestore);
#endif

#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
-void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
+noinline void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
{
__raw_write_unlock_irq(lock);
}
@@ -339,7 +339,7 @@ EXPORT_SYMBOL(_raw_write_unlock_irq);
#endif

#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
-void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
+noinline void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
{
__raw_write_unlock_bh(lock);
}
--
2.15.0

2017-12-22 00:28:42

by Andi Kleen

[permalink] [raw]
Subject: [PATCH 4/6] x86/kvm: Make steal_time visible

From: Andi Kleen <[email protected]>

This per cpu variable is accessed from assembler code, so needs
to be visible.

Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/kernel/kvm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b40ffbf156c1..8484e3e41d36 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -76,7 +76,7 @@ static int parse_no_kvmclock_vsyscall(char *arg)
early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);

static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
-static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64);
+DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
static int has_steal_clock = 0;

/*
--
2.15.0

2018-01-14 18:13:42

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 3/6] locking/spinlocks: Mark spinlocks noinline when inline spinlocks are disabled

On Thu, 21 Dec 2017, Andi Kleen wrote:

> From: Andi Kleen <[email protected]>
>
> Otherwise LTO will inline them anyways and cause a large
> kernel text increase.
>
> Since the explicit intention here is to not inline them marking
> them noinline is good documentation even for the non LTO case.
>
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> kernel/locking/spinlock.c | 56 +++++++++++++++++++++++------------------------

How is that patch x86 specific?

Cc'in the maintainers of that is not optional either.

Thanks,

tglx

2018-01-14 18:14:08

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86/xen: Mark pv stub assembler symbol visible

On Thu, 21 Dec 2017, Andi Kleen wrote:

> From: Andi Kleen <[email protected]>
>
> With LTO any external assembler symbol has to be marked __visible.
> Mark the generated asm PV stubs __visible to prevent a linker error.
>
> Signed-off-by: Andi Kleen <[email protected]>

Lacks cc of the Xen folks ....

2018-01-14 18:14:47

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 4/6] x86/kvm: Make steal_time visible

On Thu, 21 Dec 2017, Andi Kleen wrote:

> From: Andi Kleen <[email protected]>
>
> This per cpu variable is accessed from assembler code, so needs
> to be visible.
>
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> arch/x86/kernel/kvm.c | 2 +-

KVM has maintainers ...

> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index b40ffbf156c1..8484e3e41d36 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -76,7 +76,7 @@ static int parse_no_kvmclock_vsyscall(char *arg)
> early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
>
> static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
> -static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64);
> +DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
> static int has_steal_clock = 0;
>
> /*
> --
> 2.15.0
>
>

Subject: [tip:x86/cleanups] x86/timer: Don't inline __const_udelay

Commit-ID: 7cf1aaa2ad3855bd5e95bef382a66fe122fc9b01
Gitweb: https://git.kernel.org/tip/7cf1aaa2ad3855bd5e95bef382a66fe122fc9b01
Author: Andi Kleen <[email protected]>
AuthorDate: Thu, 21 Dec 2017 16:18:16 -0800
Committer: Thomas Gleixner <[email protected]>
CommitDate: Sun, 14 Jan 2018 20:03:49 +0100

x86/timer: Don't inline __const_udelay

__const_udelay is marked inline, and LTO will happily inline it everywhere

Dropping the inline saves ~44k text in a LTO build.

13999560 1740864 1499136 17239560 1070e08 vmlinux-with-udelay-inline
13954764 1736768 1499136 17190668 1064f0c vmlinux-wo-udelay-inline

Inlining it has no advantage in general, so its the right thing to do.

Signed-off-by: Andi Kleen <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/lib/delay.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 553f8fd..09c83b2 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -162,7 +162,7 @@ void __delay(unsigned long loops)
}
EXPORT_SYMBOL(__delay);

-inline void __const_udelay(unsigned long xloops)
+void __const_udelay(unsigned long xloops)
{
unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
int d0;

Subject: [tip:x86/cleanups] x86/extable: Mark exception handler functions visible

Commit-ID: 80a3e3949b8f3a3efa853d8752fd7ed5ec02de2d
Gitweb: https://git.kernel.org/tip/80a3e3949b8f3a3efa853d8752fd7ed5ec02de2d
Author: Andi Kleen <[email protected]>
AuthorDate: Thu, 21 Dec 2017 16:18:20 -0800
Committer: Thomas Gleixner <[email protected]>
CommitDate: Sun, 14 Jan 2018 20:04:16 +0100

x86/extable: Mark exception handler functions visible

Mark the C exception handler functions that are directly called through
exception tables visible. LTO needs to know they are accessed from assembler.

[ tglx: Mopped up the wrecked argument alignment. Sigh.... ]

Signed-off-by: Andi Kleen <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/mm/extable.c | 34 +++++++++++++++++-----------------
1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 9fe656c..45f5d6c 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -21,16 +21,16 @@ ex_fixup_handler(const struct exception_table_entry *x)
return (ex_handler_t)((unsigned long)&x->handler + x->handler);
}

-bool ex_handler_default(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_default(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
regs->ip = ex_fixup_addr(fixup);
return true;
}
EXPORT_SYMBOL(ex_handler_default);

-bool ex_handler_fault(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
regs->ip = ex_fixup_addr(fixup);
regs->ax = trapnr;
@@ -42,8 +42,8 @@ EXPORT_SYMBOL_GPL(ex_handler_fault);
* Handler for UD0 exception following a failed test against the
* result of a refcount inc/dec/add/sub.
*/
-bool ex_handler_refcount(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_refcount(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
/* First unconditionally saturate the refcount. */
*(int *)regs->cx = INT_MIN / 2;
@@ -95,8 +95,8 @@ EXPORT_SYMBOL(ex_handler_refcount);
* of vulnerability by restoring from the initial state (essentially, zeroing
* out all the FPU registers) if we can't restore from the task's FPU state.
*/
-bool ex_handler_fprestore(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
regs->ip = ex_fixup_addr(fixup);

@@ -108,8 +108,8 @@ bool ex_handler_fprestore(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL_GPL(ex_handler_fprestore);

-bool ex_handler_ext(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_ext(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
/* Special hack for uaccess_err */
current->thread.uaccess_err = 1;
@@ -118,8 +118,8 @@ bool ex_handler_ext(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_ext);

-bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip))
@@ -133,8 +133,8 @@ bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);

-bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx,
@@ -147,8 +147,8 @@ bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);

-bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr)
+__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
if (static_cpu_has(X86_BUG_NULL_SEG))
asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
@@ -157,7 +157,7 @@ bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_clear_fs);

-bool ex_has_fault_handler(unsigned long ip)
+__visible bool ex_has_fault_handler(unsigned long ip)
{
const struct exception_table_entry *e;
ex_handler_t handler;

Subject: [tip:x86/urgent] x86/idt: Mark IDT tables __initconst

Commit-ID: 327867faa4d66628fcd92a843adb3345736a5313
Gitweb: https://git.kernel.org/tip/327867faa4d66628fcd92a843adb3345736a5313
Author: Andi Kleen <[email protected]>
AuthorDate: Thu, 21 Dec 2017 16:18:21 -0800
Committer: Thomas Gleixner <[email protected]>
CommitDate: Sun, 14 Jan 2018 20:09:45 +0100

x86/idt: Mark IDT tables __initconst

const variables must use __initconst, not __initdata.

Fix this up for the IDT tables, which got it consistently wrong.

Fixes: 16bc18d895ce ("x86/idt: Move 32-bit idt_descr to C code")
Signed-off-by: Andi Kleen <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: [email protected]
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/kernel/idt.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d985cef..56d99be 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -56,7 +56,7 @@ struct idt_data {
* Early traps running on the DEFAULT_STACK because the other interrupt
* stacks work only after cpu_init().
*/
-static const __initdata struct idt_data early_idts[] = {
+static const __initconst struct idt_data early_idts[] = {
INTG(X86_TRAP_DB, debug),
SYSG(X86_TRAP_BP, int3),
#ifdef CONFIG_X86_32
@@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = {
* the traps which use them are reinitialized with IST after cpu_init() has
* set up TSS.
*/
-static const __initdata struct idt_data def_idts[] = {
+static const __initconst struct idt_data def_idts[] = {
INTG(X86_TRAP_DE, divide_error),
INTG(X86_TRAP_NMI, nmi),
INTG(X86_TRAP_BR, bounds),
@@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = {
/*
* The APIC and SMP idt entries
*/
-static const __initdata struct idt_data apic_idts[] = {
+static const __initconst struct idt_data apic_idts[] = {
#ifdef CONFIG_SMP
INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
INTG(CALL_FUNCTION_VECTOR, call_function_interrupt),
@@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = {
* Early traps running on the DEFAULT_STACK because the other interrupt
* stacks work only after cpu_init().
*/
-static const __initdata struct idt_data early_pf_idts[] = {
+static const __initconst struct idt_data early_pf_idts[] = {
INTG(X86_TRAP_PF, page_fault),
};

@@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = {
* Override for the debug_idt. Same as the default, but with interrupt
* stack set to DEFAULT_STACK (0). Required for NMI trap handling.
*/
-static const __initdata struct idt_data dbg_idts[] = {
+static const __initconst struct idt_data dbg_idts[] = {
INTG(X86_TRAP_DB, debug),
INTG(X86_TRAP_BP, int3),
};
@@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
* The exceptions which use Interrupt stacks. They are setup after
* cpu_init() when the TSS has been initialized.
*/
-static const __initdata struct idt_data ist_idts[] = {
+static const __initconst struct idt_data ist_idts[] = {
ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
SISTG(X86_TRAP_BP, int3, DEBUG_STACK),

2018-01-14 19:16:39

by Joe Perches

[permalink] [raw]
Subject: Re: [tip:x86/cleanups] x86/timer: Don't inline __const_udelay

On Sun, 2018-01-14 at 11:10 -0800, tip-bot for Andi Kleen wrote:
> Commit-ID: 7cf1aaa2ad3855bd5e95bef382a66fe122fc9b01
> Gitweb: https://git.kernel.org/tip/7cf1aaa2ad3855bd5e95bef382a66fe122fc9b01
> Author: Andi Kleen <[email protected]>
> AuthorDate: Thu, 21 Dec 2017 16:18:16 -0800
> Committer: Thomas Gleixner <[email protected]>
> CommitDate: Sun, 14 Jan 2018 20:03:49 +0100
>
> x86/timer: Don't inline __const_udelay

Perhaps update them all.

$ git grep __const_udelay|grep inline
arch/arm64/lib/delay.c:inline void __const_udelay(unsigned long xloops)
arch/metag/lib/delay.c:inline void __const_udelay(unsigned long xloops)
arch/openrisc/lib/delay.c:inline void __const_udelay(unsigned long xloops)
arch/sh/lib/delay.c:inline void __const_udelay(unsigned long xloops)
arch/x86/lib/delay.c:inline void __const_udelay(unsigned long xloops)
arch/x86/um/delay.c:inline void __const_udelay(unsigned long xloops)

2018-01-15 17:18:18

by Boris Ostrovsky

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86/xen: Mark pv stub assembler symbol visible



On 12/21/2017 07:18 PM, Andi Kleen wrote:
> From: Andi Kleen <[email protected]>
>
> With LTO any external assembler symbol has to be marked __visible.
> Mark the generated asm PV stubs __visible to prevent a linker error.
>
> Signed-off-by: Andi Kleen <[email protected]>

Reviewed-by: Boris Ostrovsky <[email protected]>


> ---
> arch/x86/include/asm/paravirt.h | 3 ++-
> drivers/xen/time.c | 2 +-
> 2 files changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
> index 892df375b615..f03445fbbe2f 100644
> --- a/arch/x86/include/asm/paravirt.h
> +++ b/arch/x86/include/asm/paravirt.h
> @@ -745,7 +745,8 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu)
> */
> #define PV_THUNK_NAME(func) "__raw_callee_save_" #func
> #define PV_CALLEE_SAVE_REGS_THUNK(func) \
> - extern typeof(func) __raw_callee_save_##func; \
> + extern __visible typeof(func) __raw_callee_save_##func; \
> + extern __visible typeof(func) func; \
> \
> asm(".pushsection .text;" \
> ".globl " PV_THUNK_NAME(func) ";" \
> diff --git a/drivers/xen/time.c b/drivers/xen/time.c
> index 3e741cd1409c..708a00c337d7 100644
> --- a/drivers/xen/time.c
> +++ b/drivers/xen/time.c
> @@ -144,7 +144,7 @@ void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
> }
>
> /* return true when a vcpu could run but has no real cpu to run on */
> -bool xen_vcpu_stolen(int vcpu)
> +__visible bool xen_vcpu_stolen(int vcpu)
> {
> return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
> }
>