2020-05-26 16:27:38

by Peter Zijlstra

[permalink] [raw]
Subject: [RFC][PATCH 7/7] sched: Replace rq::wake_list

The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
got smp_call_function_single_async() subtly wrong. Even though it will
return -EBUSY when trying to re-use a csd, that condition is not
atomic and still requires external serialization.

The change in ttwu_queue_remote() got this wrong.

While on first reading ttwu_queue_remote() has an atomic test-and-set
that appears to serialize the use, the matching 'release' is not in
the right place to actually guarantee this serialization.

The actual race is vs the sched_ttwu_pending() call in the idle loop;
that can run the wakeup-list without consuming the CSD.

Instead of trying to chain the lists, merge them.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
include/linux/sched.h | 1 +
include/linux/smp.h | 1 +
kernel/sched/core.c | 25 +++++++------------------
kernel/sched/idle.c | 1 -
kernel/sched/sched.h | 11 -----------
kernel/smp.c | 47 ++++++++++++++++++++++++++++++++++++++++-------
6 files changed, 49 insertions(+), 37 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -655,6 +655,7 @@ struct task_struct {

#ifdef CONFIG_SMP
struct llist_node wake_entry;
+ unsigned int wake_entry_type;
int on_cpu;
#ifdef CONFIG_THREAD_INFO_IN_TASK
/* Current CPU: */
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -25,6 +25,7 @@ enum {
CSD_TYPE_ASYNC = 0x00,
CSD_TYPE_SYNC = 0x10,
CSD_TYPE_IRQ_WORK = 0x20,
+ CSD_TYPE_TTWU = 0x30,
CSD_FLAG_TYPE_MASK = 0xF0,
};

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1538,7 +1538,7 @@ static int migration_cpu_stop(void *data
* __migrate_task() such that we will not miss enforcing cpus_ptr
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
*/
- sched_ttwu_pending();
+ flush_smp_call_function_from_idle();

raw_spin_lock(&p->pi_lock);
rq_lock(rq, &rf);
@@ -2272,14 +2272,13 @@ static int ttwu_remote(struct task_struc
}

#ifdef CONFIG_SMP
-void sched_ttwu_pending(void)
+void sched_ttwu_pending(void *arg)
{
+ struct llist_node *llist = arg;
struct rq *rq = this_rq();
- struct llist_node *llist;
struct task_struct *p, *t;
struct rq_flags rf;

- llist = llist_del_all(&rq->wake_list);
if (!llist)
return;

@@ -2299,11 +2298,6 @@ void sched_ttwu_pending(void)
rq_unlock_irqrestore(rq, &rf);
}

-static void wake_csd_func(void *info)
-{
- sched_ttwu_pending();
-}
-
void send_call_function_single_ipi(int cpu)
{
struct rq *rq = cpu_rq(cpu);
@@ -2321,12 +2315,7 @@ static void ttwu_queue_remote(struct tas
p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);

WRITE_ONCE(rq->ttwu_pending, 1);
- if (llist_add(&p->wake_entry, &rq->wake_list)) {
- if (!set_nr_if_polling(rq->idle))
- smp_call_function_single_async(cpu, &rq->wake_csd);
- else
- trace_sched_wake_idle_without_ipi(cpu);
- }
+ __smp_call_single_queue(cpu, &p->wake_entry);
}

void wake_up_if_idle(int cpu)
@@ -2735,6 +2724,9 @@ static void __sched_fork(unsigned long c
p->capture_control = NULL;
#endif
init_numa_balancing(clone_flags, p);
+#ifdef CONFIG_SMP
+ p->wake_entry_type = CSD_TYPE_TTWU;
+#endif
}

DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -6527,7 +6519,6 @@ int sched_cpu_dying(unsigned int cpu)
struct rq_flags rf;

/* Handle pending wakeups and then migrate everything off */
- sched_ttwu_pending();
sched_tick_stop(cpu);

rq_lock_irqsave(rq, &rf);
@@ -6726,8 +6717,6 @@ void __init sched_init(void)
rq->avg_idle = 2*sysctl_sched_migration_cost;
rq->max_idle_balance_cost = sysctl_sched_migration_cost;

- rq_csd_init(rq, &rq->wake_csd, wake_csd_func);
-
INIT_LIST_HEAD(&rq->cfs_tasks);

rq_attach_root(rq, &def_root_domain);
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -290,7 +290,6 @@ static void do_idle(void)
smp_mb__after_atomic();

flush_smp_call_function_from_idle();
- sched_ttwu_pending();
schedule_idle();

if (unlikely(klp_patch_pending(current)))
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1023,11 +1023,6 @@ struct rq {
unsigned int ttwu_local;
#endif

-#ifdef CONFIG_SMP
- call_single_data_t wake_csd;
- struct llist_head wake_list;
-#endif
-
#ifdef CONFIG_CPU_IDLE
/* Must be inspected within a rcu lock section */
struct cpuidle_state *idle_state;
@@ -1371,8 +1366,6 @@ queue_balance_callback(struct rq *rq,
rq->balance_callback = head;
}

-extern void sched_ttwu_pending(void);
-
#define rcu_dereference_check_sched_domain(p) \
rcu_dereference_check((p), \
lockdep_is_held(&sched_domains_mutex))
@@ -1510,10 +1503,6 @@ static inline void unregister_sched_doma

extern void flush_smp_call_function_from_idle(void);

-#else
-
-static inline void sched_ttwu_pending(void) { }
-
#endif /* CONFIG_SMP */

#include "stats.h"
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -196,6 +196,7 @@ void generic_smp_call_function_single_in
flush_smp_call_function_queue(true);
}

+extern void sched_ttwu_pending(void *);
extern void irq_work_single(void *);

/**
@@ -244,6 +245,10 @@ static void flush_smp_call_function_queu
csd->func);
break;

+ case CSD_TYPE_TTWU:
+ pr_warn("IPI task-wakeup sent to offline CPU\n");
+ break;
+
default:
pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
CSD_TYPE(csd));
@@ -275,22 +280,43 @@ static void flush_smp_call_function_queu
}
}

+ if (!entry)
+ return;
+
/*
* Second; run all !SYNC callbacks.
*/
+ prev = NULL;
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
int type = CSD_TYPE(csd);

- if (type == CSD_TYPE_ASYNC) {
- smp_call_func_t func = csd->func;
- void *info = csd->info;
+ if (type != CSD_TYPE_TTWU) {
+ if (prev) {
+ prev->next = &csd_next->llist;
+ } else {
+ entry = &csd_next->llist;
+ }

- csd_unlock(csd);
- func(info);
- } else if (type == CSD_TYPE_IRQ_WORK) {
- irq_work_single(csd);
+ if (type == CSD_TYPE_ASYNC) {
+ smp_call_func_t func = csd->func;
+ void *info = csd->info;
+
+ csd_unlock(csd);
+ func(info);
+ } else if (type == CSD_TYPE_IRQ_WORK) {
+ irq_work_single(csd);
+ }
+
+ } else {
+ prev = &csd->llist;
}
}
+
+ /*
+ * Third; only CSD_TYPE_TTWU is left, issue those.
+ */
+ if (entry)
+ sched_ttwu_pending(entry);
}

void flush_smp_call_function_from_idle(void)
@@ -659,6 +685,13 @@ void __init smp_init(void)
BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
offsetof(struct __call_single_data, flags));

+ /*
+ * Assert the CSD_TYPE_TTWU layout is similar enough
+ * for task_struct to be on the @call_single_queue.
+ */
+ BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
+ offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
+
idle_threads_init();
cpuhp_threads_init();




2020-05-29 15:13:37

by Valdis Klētnieks

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Tue, 26 May 2020 18:11:04 +0200, Peter Zijlstra said:
> The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
> got smp_call_function_single_async() subtly wrong. Even though it will
> return -EBUSY when trying to re-use a csd, that condition is not
> atomic and still requires external serialization.

> kernel/smp.c | 47 ++++++++++++++++++++++++++++++++++++++++-------

> --- a/kernel/smp.c
> +++ b/kernel/smp.c

> @@ -659,6 +685,13 @@ void __init smp_init(void)
> BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
> offsetof(struct __call_single_data, flags));
>
> + /*
> + * Assert the CSD_TYPE_TTWU layout is similar enough
> + * for task_struct to be on the @call_single_queue.
> + */
> + BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
> + offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
> +
> idle_threads_init();
> cpuhp_threads_init();

This blows up on a 32-bit ARM allmodconfig:

CC kernel/smp.o
kernel/smp.c:319:6: warning: no previous prototype for 'flush_smp_call_function_from_idle' [-Wmissing-prototypes]
void flush_smp_call_function_from_idle(void)
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from ./arch/arm/include/asm/atomic.h:11,
from ./include/linux/atomic.h:7,
from ./include/linux/llist.h:51,
from ./include/linux/irq_work.h:5,
from kernel/smp.c:10:
kernel/smp.c: In function 'smp_init':
./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_152' declared with attribute error: BUILD_BUG_ON failed: offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) != offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist)
_compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
^
./include/linux/compiler.h:373:4: note: in definition of macro '__compiletime_assert'
prefix ## suffix(); \
^~~~~~
./include/linux/compiler.h:392:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
^~~~~~~~~~~~~~~~~~~
./include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
./include/linux/build_bug.h:50:2: note: in expansion of macro 'BUILD_BUG_ON_MSG'
BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
^~~~~~~~~~~~~~~~
kernel/smp.c:689:2: note: in expansion of macro 'BUILD_BUG_ON'
BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
^~~~~~~~~~~~


Attachments:
(No filename) (849.00 B)

2020-06-01 09:57:33

by tip-bot2 for Jacob Pan

[permalink] [raw]
Subject: [tip: sched/core] sched: Replace rq::wake_list

The following commit has been merged into the sched/core branch of tip:

Commit-ID: a148866489fbe243c936fe43e4525d8dbfa0318f
Gitweb: https://git.kernel.org/tip/a148866489fbe243c936fe43e4525d8dbfa0318f
Author: Peter Zijlstra <[email protected]>
AuthorDate: Tue, 26 May 2020 18:11:04 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 28 May 2020 10:54:16 +02:00

sched: Replace rq::wake_list

The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
got smp_call_function_single_async() subtly wrong. Even though it will
return -EBUSY when trying to re-use a csd, that condition is not
atomic and still requires external serialization.

The change in ttwu_queue_remote() got this wrong.

While on first reading ttwu_queue_remote() has an atomic test-and-set
that appears to serialize the use, the matching 'release' is not in
the right place to actually guarantee this serialization.

The actual race is vs the sched_ttwu_pending() call in the idle loop;
that can run the wakeup-list without consuming the CSD.

Instead of trying to chain the lists, merge them.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
include/linux/sched.h | 1 +-
include/linux/smp.h | 1 +-
kernel/sched/core.c | 25 ++++++----------------
kernel/sched/idle.c | 1 +-
kernel/sched/sched.h | 8 +-------
kernel/smp.c | 47 +++++++++++++++++++++++++++++++++++-------
6 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ebc6870..e0f5f41 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -654,6 +654,7 @@ struct task_struct {

#ifdef CONFIG_SMP
struct llist_node wake_entry;
+ unsigned int wake_entry_type;
int on_cpu;
#ifdef CONFIG_THREAD_INFO_IN_TASK
/* Current CPU: */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 45ad6e3..84f90e2 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -25,6 +25,7 @@ enum {
CSD_TYPE_ASYNC = 0x00,
CSD_TYPE_SYNC = 0x10,
CSD_TYPE_IRQ_WORK = 0x20,
+ CSD_TYPE_TTWU = 0x30,
CSD_FLAG_TYPE_MASK = 0xF0,
};

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b71ed5e..b3c64c6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1538,7 +1538,7 @@ static int migration_cpu_stop(void *data)
* __migrate_task() such that we will not miss enforcing cpus_ptr
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
*/
- sched_ttwu_pending();
+ flush_smp_call_function_from_idle();

raw_spin_lock(&p->pi_lock);
rq_lock(rq, &rf);
@@ -2272,14 +2272,13 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
}

#ifdef CONFIG_SMP
-void sched_ttwu_pending(void)
+void sched_ttwu_pending(void *arg)
{
+ struct llist_node *llist = arg;
struct rq *rq = this_rq();
- struct llist_node *llist;
struct task_struct *p, *t;
struct rq_flags rf;

- llist = llist_del_all(&rq->wake_list);
if (!llist)
return;

@@ -2299,11 +2298,6 @@ void sched_ttwu_pending(void)
rq_unlock_irqrestore(rq, &rf);
}

-static void wake_csd_func(void *info)
-{
- sched_ttwu_pending();
-}
-
void send_call_function_single_ipi(int cpu)
{
struct rq *rq = cpu_rq(cpu);
@@ -2327,12 +2321,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags
p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);

WRITE_ONCE(rq->ttwu_pending, 1);
- if (llist_add(&p->wake_entry, &rq->wake_list)) {
- if (!set_nr_if_polling(rq->idle))
- smp_call_function_single_async(cpu, &rq->wake_csd);
- else
- trace_sched_wake_idle_without_ipi(cpu);
- }
+ __smp_call_single_queue(cpu, &p->wake_entry);
}

void wake_up_if_idle(int cpu)
@@ -2772,6 +2761,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->capture_control = NULL;
#endif
init_numa_balancing(clone_flags, p);
+#ifdef CONFIG_SMP
+ p->wake_entry_type = CSD_TYPE_TTWU;
+#endif
}

DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -6564,7 +6556,6 @@ int sched_cpu_dying(unsigned int cpu)
struct rq_flags rf;

/* Handle pending wakeups and then migrate everything off */
- sched_ttwu_pending();
sched_tick_stop(cpu);

rq_lock_irqsave(rq, &rf);
@@ -6763,8 +6754,6 @@ void __init sched_init(void)
rq->avg_idle = 2*sysctl_sched_migration_cost;
rq->max_idle_balance_cost = sysctl_sched_migration_cost;

- rq_csd_init(rq, &rq->wake_csd, wake_csd_func);
-
INIT_LIST_HEAD(&rq->cfs_tasks);

rq_attach_root(rq, &def_root_domain);
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 387fd75..05deb81 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -294,7 +294,6 @@ static void do_idle(void)
* critical section.
*/
flush_smp_call_function_from_idle();
- sched_ttwu_pending();
schedule_idle();

if (unlikely(klp_patch_pending(current)))
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c86fc94..1d4e94c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1023,11 +1023,6 @@ struct rq {
unsigned int ttwu_local;
#endif

-#ifdef CONFIG_SMP
- call_single_data_t wake_csd;
- struct llist_head wake_list;
-#endif
-
#ifdef CONFIG_CPU_IDLE
/* Must be inspected within a rcu lock section */
struct cpuidle_state *idle_state;
@@ -1371,8 +1366,6 @@ queue_balance_callback(struct rq *rq,
rq->balance_callback = head;
}

-extern void sched_ttwu_pending(void);
-
#define rcu_dereference_check_sched_domain(p) \
rcu_dereference_check((p), \
lockdep_is_held(&sched_domains_mutex))
@@ -1512,7 +1505,6 @@ extern void flush_smp_call_function_from_idle(void);

#else /* !CONFIG_SMP: */
static inline void flush_smp_call_function_from_idle(void) { }
-static inline void sched_ttwu_pending(void) { }
#endif

#include "stats.h"
diff --git a/kernel/smp.c b/kernel/smp.c
index 856562b..0d61dc0 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -196,6 +196,7 @@ void generic_smp_call_function_single_interrupt(void)
flush_smp_call_function_queue(true);
}

+extern void sched_ttwu_pending(void *);
extern void irq_work_single(void *);

/**
@@ -244,6 +245,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
csd->func);
break;

+ case CSD_TYPE_TTWU:
+ pr_warn("IPI task-wakeup sent to offline CPU\n");
+ break;
+
default:
pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
CSD_TYPE(csd));
@@ -275,22 +280,43 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
}
}

+ if (!entry)
+ return;
+
/*
* Second; run all !SYNC callbacks.
*/
+ prev = NULL;
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
int type = CSD_TYPE(csd);

- if (type == CSD_TYPE_ASYNC) {
- smp_call_func_t func = csd->func;
- void *info = csd->info;
+ if (type != CSD_TYPE_TTWU) {
+ if (prev) {
+ prev->next = &csd_next->llist;
+ } else {
+ entry = &csd_next->llist;
+ }

- csd_unlock(csd);
- func(info);
- } else if (type == CSD_TYPE_IRQ_WORK) {
- irq_work_single(csd);
+ if (type == CSD_TYPE_ASYNC) {
+ smp_call_func_t func = csd->func;
+ void *info = csd->info;
+
+ csd_unlock(csd);
+ func(info);
+ } else if (type == CSD_TYPE_IRQ_WORK) {
+ irq_work_single(csd);
+ }
+
+ } else {
+ prev = &csd->llist;
}
}
+
+ /*
+ * Third; only CSD_TYPE_TTWU is left, issue those.
+ */
+ if (entry)
+ sched_ttwu_pending(entry);
}

void flush_smp_call_function_from_idle(void)
@@ -659,6 +685,13 @@ void __init smp_init(void)
BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
offsetof(struct __call_single_data, flags));

+ /*
+ * Assert the CSD_TYPE_TTWU layout is similar enough
+ * for task_struct to be on the @call_single_queue.
+ */
+ BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
+ offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
+
idle_threads_init();
cpuhp_threads_init();

2020-06-02 15:18:39

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [tip: sched/core] sched: Replace rq::wake_list

On Mon, Jun 01, 2020 at 09:52:18AM -0000, tip-bot2 for Peter Zijlstra wrote:
> The following commit has been merged into the sched/core branch of tip:
>
> Commit-ID: a148866489fbe243c936fe43e4525d8dbfa0318f
> Gitweb: https://git.kernel.org/tip/a148866489fbe243c936fe43e4525d8dbfa0318f
> Author: Peter Zijlstra <[email protected]>
> AuthorDate: Tue, 26 May 2020 18:11:04 +02:00
> Committer: Ingo Molnar <[email protected]>
> CommitterDate: Thu, 28 May 2020 10:54:16 +02:00
>
> sched: Replace rq::wake_list
>
> The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
> got smp_call_function_single_async() subtly wrong. Even though it will
> return -EBUSY when trying to re-use a csd, that condition is not
> atomic and still requires external serialization.
>
> The change in ttwu_queue_remote() got this wrong.
>
> While on first reading ttwu_queue_remote() has an atomic test-and-set
> that appears to serialize the use, the matching 'release' is not in
> the right place to actually guarantee this serialization.
>
> The actual race is vs the sched_ttwu_pending() call in the idle loop;
> that can run the wakeup-list without consuming the CSD.
>
> Instead of trying to chain the lists, merge them.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> Signed-off-by: Ingo Molnar <[email protected]>
> Link: https://lore.kernel.org/r/[email protected]

Looks good, thanks :)

2020-06-04 14:23:34

by Guenter Roeck

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Tue, May 26, 2020 at 06:11:04PM +0200, Peter Zijlstra wrote:
> The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
> got smp_call_function_single_async() subtly wrong. Even though it will
> return -EBUSY when trying to re-use a csd, that condition is not
> atomic and still requires external serialization.
>
> The change in ttwu_queue_remote() got this wrong.
>
> While on first reading ttwu_queue_remote() has an atomic test-and-set
> that appears to serialize the use, the matching 'release' is not in
> the right place to actually guarantee this serialization.
>
> The actual race is vs the sched_ttwu_pending() call in the idle loop;
> that can run the wakeup-list without consuming the CSD.
>
> Instead of trying to chain the lists, merge them.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
...
> + /*
> + * Assert the CSD_TYPE_TTWU layout is similar enough
> + * for task_struct to be on the @call_single_queue.
> + */
> + BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
> + offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
> +

There is no guarantee in C that

type1 a;
type2 b;

in two different data structures means that offsetof(b) - offsetof(a)
is the same in both data structures unless attributes such as
__attribute__((__packed__)) are used.

As result, this does and will cause a variety of build errors depending
on the compiler version and compile flags.

Guenter

2020-06-05 00:27:01

by Eric Biggers

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Thu, Jun 04, 2020 at 07:18:37AM -0700, Guenter Roeck wrote:
> On Tue, May 26, 2020 at 06:11:04PM +0200, Peter Zijlstra wrote:
> > The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
> > got smp_call_function_single_async() subtly wrong. Even though it will
> > return -EBUSY when trying to re-use a csd, that condition is not
> > atomic and still requires external serialization.
> >
> > The change in ttwu_queue_remote() got this wrong.
> >
> > While on first reading ttwu_queue_remote() has an atomic test-and-set
> > that appears to serialize the use, the matching 'release' is not in
> > the right place to actually guarantee this serialization.
> >
> > The actual race is vs the sched_ttwu_pending() call in the idle loop;
> > that can run the wakeup-list without consuming the CSD.
> >
> > Instead of trying to chain the lists, merge them.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > ---
> ...
> > + /*
> > + * Assert the CSD_TYPE_TTWU layout is similar enough
> > + * for task_struct to be on the @call_single_queue.
> > + */
> > + BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
> > + offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
> > +
>
> There is no guarantee in C that
>
> type1 a;
> type2 b;
>
> in two different data structures means that offsetof(b) - offsetof(a)
> is the same in both data structures unless attributes such as
> __attribute__((__packed__)) are used.
>
> As result, this does and will cause a variety of build errors depending
> on the compiler version and compile flags.
>
> Guenter

Yep, this breaks the build for me.

CC kernel/smp.o
In file included from ./arch/x86/include/asm/atomic.h:5,
from ./include/linux/atomic.h:7,
from ./include/linux/llist.h:51,
from ./include/linux/irq_work.h:5,
from kernel/smp.c:10:
kernel/smp.c: In function ‘smp_init’:
./include/linux/compiler.h:403:38: error: call to ‘__compiletime_assert_68’ declared with attribute error: BUILD_BUG_ON failed: offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) != offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist)
403 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
| ^
./include/linux/compiler.h:384:4: note: in definition of macro ‘__compiletime_assert’
384 | prefix ## suffix(); \
| ^~~~~~
./include/linux/compiler.h:403:2: note: in expansion of macro ‘_compiletime_assert’
403 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
| ^~~~~~~~~~~~~~~~~~~
./include/linux/build_bug.h:39:37: note: in expansion of macro ‘compiletime_assert’
39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
| ^~~~~~~~~~~~~~~~~~
./include/linux/build_bug.h:50:2: note: in expansion of macro ‘BUILD_BUG_ON_MSG’
50 | BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
| ^~~~~~~~~~~~~~~~
kernel/smp.c:687:2: note: in expansion of macro ‘BUILD_BUG_ON’
687 | BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
| ^~~~~~~~~~~~
make[1]: *** [scripts/Makefile.build:267: kernel/smp.o] Error 1
make: *** [Makefile:1735: kernel] Error 2

2020-06-05 07:44:58

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Thu, Jun 04, 2020 at 05:24:33PM -0700, Eric Biggers wrote:
> On Thu, Jun 04, 2020 at 07:18:37AM -0700, Guenter Roeck wrote:
> > On Tue, May 26, 2020 at 06:11:04PM +0200, Peter Zijlstra wrote:
> > > The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
> > > got smp_call_function_single_async() subtly wrong. Even though it will
> > > return -EBUSY when trying to re-use a csd, that condition is not
> > > atomic and still requires external serialization.
> > >
> > > The change in ttwu_queue_remote() got this wrong.
> > >
> > > While on first reading ttwu_queue_remote() has an atomic test-and-set
> > > that appears to serialize the use, the matching 'release' is not in
> > > the right place to actually guarantee this serialization.
> > >
> > > The actual race is vs the sched_ttwu_pending() call in the idle loop;
> > > that can run the wakeup-list without consuming the CSD.
> > >
> > > Instead of trying to chain the lists, merge them.
> > >
> > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > ---
> > ...
> > > + /*
> > > + * Assert the CSD_TYPE_TTWU layout is similar enough
> > > + * for task_struct to be on the @call_single_queue.
> > > + */
> > > + BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
> > > + offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
> > > +
> >
> > There is no guarantee in C that
> >
> > type1 a;
> > type2 b;
> >
> > in two different data structures means that offsetof(b) - offsetof(a)
> > is the same in both data structures unless attributes such as
> > __attribute__((__packed__)) are used.
> >
> > As result, this does and will cause a variety of build errors depending
> > on the compiler version and compile flags.
> >
> > Guenter
>
> Yep, this breaks the build for me.

-ENOCONFIG

2020-06-05 08:15:35

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Thu, Jun 04, 2020 at 07:18:37AM -0700, Guenter Roeck wrote:
> On Tue, May 26, 2020 at 06:11:04PM +0200, Peter Zijlstra wrote:

> > + BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
> > + offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
> > +
>
> There is no guarantee in C that
>
> type1 a;
> type2 b;
>
> in two different data structures means that offsetof(b) - offsetof(a)
> is the same in both data structures unless attributes such as
> __attribute__((__packed__)) are used.

Do tell more; the alignment requirements and size of the types remains
the same, this resulting in different layout is unlikely.

I found this excellent quote on Hacker News this morning:

"I think the attitude of compiler writers is a good reason to fix the
spec so they can't keep ratfucking developers trying to get work done."

> As result, this does and will cause a variety of build errors depending
> on the compiler version and compile flags.

The only thing I can think of that's actually a problem is that retarded
struct randomization stuff.

Anyway, I'll move cleaning it up a little higher on the todo list.

2020-06-05 13:37:17

by Guenter Roeck

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On 6/5/20 1:10 AM, Peter Zijlstra wrote:
> On Thu, Jun 04, 2020 at 07:18:37AM -0700, Guenter Roeck wrote:
>> On Tue, May 26, 2020 at 06:11:04PM +0200, Peter Zijlstra wrote:
>
>>> + BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
>>> + offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
>>> +
>>
>> There is no guarantee in C that
>>
>> type1 a;
>> type2 b;
>>
>> in two different data structures means that offsetof(b) - offsetof(a)
>> is the same in both data structures unless attributes such as
>> __attribute__((__packed__)) are used.
>
> Do tell more; the alignment requirements and size of the types remains
> the same, this resulting in different layout is unlikely.
>

I have not made the C standard. You point out yourself a possible explicit
culprit: struct randomization. That by itself shows that you can not rely
on two elements of different structures having the same alignment,
which is pretty much exactly what I said (and may explain why observing
the problem seemed to at least somewhat depend on the weather).

> I found this excellent quote on Hacker News this morning:
>
> "I think the attitude of compiler writers is a good reason to fix the
> spec so they can't keep ratfucking developers trying to get work done."
>

Qed.

Guenter

>> As result, this does and will cause a variety of build errors depending
>> on the compiler version and compile flags.
>
> The only thing I can think of that's actually a problem is that retarded
> struct randomization stuff.
>
> Anyway, I'll move cleaning it up a little higher on the todo list.
>

2020-06-05 14:12:55

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Fri, Jun 05, 2020 at 06:33:38AM -0700, Guenter Roeck wrote:

> I have not made the C standard. You point out yourself a possible explicit
> culprit: struct randomization.

The randomization crud is very much outside the C spec.

> That by itself shows that you can not rely
> on two elements of different structures having the same alignment,

Randomization does not change the alignment, if it were to do that it
would break all sorts. All it does is change the order of elements.

> which is pretty much exactly what I said (and may explain why observing
> the problem seemed to at least somewhat depend on the weather).

A normal C compiler will have deterministic layout, otherwise unions and
union based type punning would not be a thing.

2020-06-05 16:18:03

by Eric Biggers

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Fri, Jun 05, 2020 at 09:41:54AM +0200, Peter Zijlstra wrote:
> On Thu, Jun 04, 2020 at 05:24:33PM -0700, Eric Biggers wrote:
> > On Thu, Jun 04, 2020 at 07:18:37AM -0700, Guenter Roeck wrote:
> > > On Tue, May 26, 2020 at 06:11:04PM +0200, Peter Zijlstra wrote:
> > > > The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
> > > > got smp_call_function_single_async() subtly wrong. Even though it will
> > > > return -EBUSY when trying to re-use a csd, that condition is not
> > > > atomic and still requires external serialization.
> > > >
> > > > The change in ttwu_queue_remote() got this wrong.
> > > >
> > > > While on first reading ttwu_queue_remote() has an atomic test-and-set
> > > > that appears to serialize the use, the matching 'release' is not in
> > > > the right place to actually guarantee this serialization.
> > > >
> > > > The actual race is vs the sched_ttwu_pending() call in the idle loop;
> > > > that can run the wakeup-list without consuming the CSD.
> > > >
> > > > Instead of trying to chain the lists, merge them.
> > > >
> > > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > > ---
> > > ...
> > > > + /*
> > > > + * Assert the CSD_TYPE_TTWU layout is similar enough
> > > > + * for task_struct to be on the @call_single_queue.
> > > > + */
> > > > + BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
> > > > + offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
> > > > +
> > >
> > > There is no guarantee in C that
> > >
> > > type1 a;
> > > type2 b;
> > >
> > > in two different data structures means that offsetof(b) - offsetof(a)
> > > is the same in both data structures unless attributes such as
> > > __attribute__((__packed__)) are used.
> > >
> > > As result, this does and will cause a variety of build errors depending
> > > on the compiler version and compile flags.
> > >
> > > Guenter
> >
> > Yep, this breaks the build for me.
>
> -ENOCONFIG

For me, the problem seems to be randstruct. To reproduce, you can use
(on x86_64):

make defconfig
echo CONFIG_GCC_PLUGIN_RANDSTRUCT=y >> .config
make olddefconfig
make kernel/smp.o

2020-06-06 23:16:55

by Guenter Roeck

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On 6/5/20 9:15 AM, Eric Biggers wrote:
> On Fri, Jun 05, 2020 at 09:41:54AM +0200, Peter Zijlstra wrote:
>> On Thu, Jun 04, 2020 at 05:24:33PM -0700, Eric Biggers wrote:
>>> On Thu, Jun 04, 2020 at 07:18:37AM -0700, Guenter Roeck wrote:
>>>> On Tue, May 26, 2020 at 06:11:04PM +0200, Peter Zijlstra wrote:
>>>>> The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
>>>>> got smp_call_function_single_async() subtly wrong. Even though it will
>>>>> return -EBUSY when trying to re-use a csd, that condition is not
>>>>> atomic and still requires external serialization.
>>>>>
>>>>> The change in ttwu_queue_remote() got this wrong.
>>>>>
>>>>> While on first reading ttwu_queue_remote() has an atomic test-and-set
>>>>> that appears to serialize the use, the matching 'release' is not in
>>>>> the right place to actually guarantee this serialization.
>>>>>
>>>>> The actual race is vs the sched_ttwu_pending() call in the idle loop;
>>>>> that can run the wakeup-list without consuming the CSD.
>>>>>
>>>>> Instead of trying to chain the lists, merge them.
>>>>>
>>>>> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
>>>>> ---
>>>> ...
>>>>> + /*
>>>>> + * Assert the CSD_TYPE_TTWU layout is similar enough
>>>>> + * for task_struct to be on the @call_single_queue.
>>>>> + */
>>>>> + BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
>>>>> + offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
>>>>> +
>>>>
>>>> There is no guarantee in C that
>>>>
>>>> type1 a;
>>>> type2 b;
>>>>
>>>> in two different data structures means that offsetof(b) - offsetof(a)
>>>> is the same in both data structures unless attributes such as
>>>> __attribute__((__packed__)) are used.
>>>>
>>>> As result, this does and will cause a variety of build errors depending
>>>> on the compiler version and compile flags.
>>>>
>>>> Guenter
>>>
>>> Yep, this breaks the build for me.
>>
>> -ENOCONFIG
>
> For me, the problem seems to be randstruct. To reproduce, you can use
> (on x86_64):
>
> make defconfig
> echo CONFIG_GCC_PLUGIN_RANDSTRUCT=y >> .config
> make olddefconfig
> make kernel/smp.o
>

I confirmed that disabling CONFIG_GCC_PLUGIN_RANDSTRUCT "fixes" the problem
in my test builds. Maybe it would make sense to mark that configuration option
for the time being as BROKEN.

Guenter

2020-06-09 20:41:47

by Eric Biggers

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Sat, Jun 06, 2020 at 04:13:33PM -0700, Guenter Roeck wrote:
> On 6/5/20 9:15 AM, Eric Biggers wrote:
> > On Fri, Jun 05, 2020 at 09:41:54AM +0200, Peter Zijlstra wrote:
> >> On Thu, Jun 04, 2020 at 05:24:33PM -0700, Eric Biggers wrote:
> >>> On Thu, Jun 04, 2020 at 07:18:37AM -0700, Guenter Roeck wrote:
> >>>> On Tue, May 26, 2020 at 06:11:04PM +0200, Peter Zijlstra wrote:
> >>>>> The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
> >>>>> got smp_call_function_single_async() subtly wrong. Even though it will
> >>>>> return -EBUSY when trying to re-use a csd, that condition is not
> >>>>> atomic and still requires external serialization.
> >>>>>
> >>>>> The change in ttwu_queue_remote() got this wrong.
> >>>>>
> >>>>> While on first reading ttwu_queue_remote() has an atomic test-and-set
> >>>>> that appears to serialize the use, the matching 'release' is not in
> >>>>> the right place to actually guarantee this serialization.
> >>>>>
> >>>>> The actual race is vs the sched_ttwu_pending() call in the idle loop;
> >>>>> that can run the wakeup-list without consuming the CSD.
> >>>>>
> >>>>> Instead of trying to chain the lists, merge them.
> >>>>>
> >>>>> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> >>>>> ---
> >>>> ...
> >>>>> + /*
> >>>>> + * Assert the CSD_TYPE_TTWU layout is similar enough
> >>>>> + * for task_struct to be on the @call_single_queue.
> >>>>> + */
> >>>>> + BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
> >>>>> + offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
> >>>>> +
> >>>>
> >>>> There is no guarantee in C that
> >>>>
> >>>> type1 a;
> >>>> type2 b;
> >>>>
> >>>> in two different data structures means that offsetof(b) - offsetof(a)
> >>>> is the same in both data structures unless attributes such as
> >>>> __attribute__((__packed__)) are used.
> >>>>
> >>>> As result, this does and will cause a variety of build errors depending
> >>>> on the compiler version and compile flags.
> >>>>
> >>>> Guenter
> >>>
> >>> Yep, this breaks the build for me.
> >>
> >> -ENOCONFIG
> >
> > For me, the problem seems to be randstruct. To reproduce, you can use
> > (on x86_64):
> >
> > make defconfig
> > echo CONFIG_GCC_PLUGIN_RANDSTRUCT=y >> .config
> > make olddefconfig
> > make kernel/smp.o
> >
>
> I confirmed that disabling CONFIG_GCC_PLUGIN_RANDSTRUCT "fixes" the problem
> in my test builds. Maybe it would make sense to mark that configuration option
> for the time being as BROKEN.
>

Still occurring on Linus' tree. This needs to be fixed. (And not by removing
support for randstruct; that's not a "fix"...)

Shouldn't the kbuild test robot have caught this?

- Eric

2020-06-09 21:28:48

by Guenter Roeck

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Tue, Jun 09, 2020 at 01:21:34PM -0700, Eric Biggers wrote:
> On Sat, Jun 06, 2020 at 04:13:33PM -0700, Guenter Roeck wrote:
> > On 6/5/20 9:15 AM, Eric Biggers wrote:
> > > On Fri, Jun 05, 2020 at 09:41:54AM +0200, Peter Zijlstra wrote:
> > >> On Thu, Jun 04, 2020 at 05:24:33PM -0700, Eric Biggers wrote:
> > >>> On Thu, Jun 04, 2020 at 07:18:37AM -0700, Guenter Roeck wrote:
> > >>>> On Tue, May 26, 2020 at 06:11:04PM +0200, Peter Zijlstra wrote:
> > >>>>> The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
> > >>>>> got smp_call_function_single_async() subtly wrong. Even though it will
> > >>>>> return -EBUSY when trying to re-use a csd, that condition is not
> > >>>>> atomic and still requires external serialization.
> > >>>>>
> > >>>>> The change in ttwu_queue_remote() got this wrong.
> > >>>>>
> > >>>>> While on first reading ttwu_queue_remote() has an atomic test-and-set
> > >>>>> that appears to serialize the use, the matching 'release' is not in
> > >>>>> the right place to actually guarantee this serialization.
> > >>>>>
> > >>>>> The actual race is vs the sched_ttwu_pending() call in the idle loop;
> > >>>>> that can run the wakeup-list without consuming the CSD.
> > >>>>>
> > >>>>> Instead of trying to chain the lists, merge them.
> > >>>>>
> > >>>>> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > >>>>> ---
> > >>>> ...
> > >>>>> + /*
> > >>>>> + * Assert the CSD_TYPE_TTWU layout is similar enough
> > >>>>> + * for task_struct to be on the @call_single_queue.
> > >>>>> + */
> > >>>>> + BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
> > >>>>> + offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
> > >>>>> +
> > >>>>
> > >>>> There is no guarantee in C that
> > >>>>
> > >>>> type1 a;
> > >>>> type2 b;
> > >>>>
> > >>>> in two different data structures means that offsetof(b) - offsetof(a)
> > >>>> is the same in both data structures unless attributes such as
> > >>>> __attribute__((__packed__)) are used.
> > >>>>
> > >>>> As result, this does and will cause a variety of build errors depending
> > >>>> on the compiler version and compile flags.
> > >>>>
> > >>>> Guenter
> > >>>
> > >>> Yep, this breaks the build for me.
> > >>
> > >> -ENOCONFIG
> > >
> > > For me, the problem seems to be randstruct. To reproduce, you can use
> > > (on x86_64):
> > >
> > > make defconfig
> > > echo CONFIG_GCC_PLUGIN_RANDSTRUCT=y >> .config
> > > make olddefconfig
> > > make kernel/smp.o
> > >
> >
> > I confirmed that disabling CONFIG_GCC_PLUGIN_RANDSTRUCT "fixes" the problem
> > in my test builds. Maybe it would make sense to mark that configuration option
> > for the time being as BROKEN.
> >
>
> Still occurring on Linus' tree. This needs to be fixed. (And not by removing
> support for randstruct; that's not a "fix"...)
>

How about the hack below ?

Guenter

---
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c5d96e3e7fff..df1cbb04f9b3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -629,6 +629,15 @@ struct wake_q_node {
struct wake_q_node *next;
};

+/*
+ * Hack around assumption that wake_entry_type follows wake_entry even with
+ * CONFIG_GCC_PLUGIN_RANDSTRUCT=y.
+ */
+struct _wake_entry {
+ struct llist_node wake_entry;
+ unsigned int wake_entry_type;
+};
+
struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
/*
@@ -653,8 +662,9 @@ struct task_struct {
unsigned int ptrace;

#ifdef CONFIG_SMP
- struct llist_node wake_entry;
- unsigned int wake_entry_type;
+ struct _wake_entry _we;
+#define wake_entry _we.wake_entry
+#define wake_entry_type _we.wake_entry_type
int on_cpu;
#ifdef CONFIG_THREAD_INFO_IN_TASK
/* Current CPU: */

2020-06-09 21:48:08

by Eric Biggers

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Tue, Jun 09, 2020 at 02:25:09PM -0700, Guenter Roeck wrote:
> >
> > Still occurring on Linus' tree. This needs to be fixed. (And not by removing
> > support for randstruct; that's not a "fix"...)
> >
>
> How about the hack below ?
>
> Guenter
>
> ---
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index c5d96e3e7fff..df1cbb04f9b3 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -629,6 +629,15 @@ struct wake_q_node {
> struct wake_q_node *next;
> };
>
> +/*
> + * Hack around assumption that wake_entry_type follows wake_entry even with
> + * CONFIG_GCC_PLUGIN_RANDSTRUCT=y.
> + */
> +struct _wake_entry {
> + struct llist_node wake_entry;
> + unsigned int wake_entry_type;
> +};
> +
> struct task_struct {
> #ifdef CONFIG_THREAD_INFO_IN_TASK
> /*
> @@ -653,8 +662,9 @@ struct task_struct {
> unsigned int ptrace;
>
> #ifdef CONFIG_SMP
> - struct llist_node wake_entry;
> - unsigned int wake_entry_type;
> + struct _wake_entry _we;
> +#define wake_entry _we.wake_entry
> +#define wake_entry_type _we.wake_entry_type
> int on_cpu;
> #ifdef CONFIG_THREAD_INFO_IN_TASK
> /* Current CPU: */

Does the struct actually have to be named? How about:

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c5d96e3e7fff42..14ca25cda19150 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -653,8 +653,14 @@ struct task_struct {
unsigned int ptrace;

#ifdef CONFIG_SMP
- struct llist_node wake_entry;
- unsigned int wake_entry_type;
+ /*
+ * wake_entry_type must follow wake_entry, even when
+ * CONFIG_GCC_PLUGIN_RANDSTRUCT=y.
+ */
+ struct {
+ struct llist_node wake_entry;
+ unsigned int wake_entry_type;
+ };
int on_cpu;
#ifdef CONFIG_THREAD_INFO_IN_TASK
/* Current CPU: */


However, it would be preferable to not rely on different structs sharing the
same field order, but rather write proper C code that uses the same struct
everywhere to encapsulate these 2 fields...

- Eric

2020-06-09 22:08:28

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Tue, Jun 09, 2020 at 02:38:29PM -0700, Eric Biggers wrote:
> Does the struct actually have to be named? How about:
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index c5d96e3e7fff42..14ca25cda19150 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -653,8 +653,14 @@ struct task_struct {
> unsigned int ptrace;
>
> #ifdef CONFIG_SMP
> - struct llist_node wake_entry;
> - unsigned int wake_entry_type;
> + /*
> + * wake_entry_type must follow wake_entry, even when
> + * CONFIG_GCC_PLUGIN_RANDSTRUCT=y.
> + */
> + struct {
> + struct llist_node wake_entry;
> + unsigned int wake_entry_type;
> + };
> int on_cpu;
> #ifdef CONFIG_THREAD_INFO_IN_TASK
> /* Current CPU: */
>
>
> However, it would be preferable to not rely on different structs sharing the
> same field order, but rather write proper C code that uses the same struct
> everywhere to encapsulate these 2 fields...

https://lkml.kernel.org/r/[email protected]

And I have more patches on top to clean up some of the anonymous union
stuff, that that's quite a lot of frobbing.


2020-06-09 22:10:31

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Tue, Jun 09, 2020 at 01:21:34PM -0700, Eric Biggers wrote:
> Still occurring on Linus' tree. This needs to be fixed. (And not by removing
> support for randstruct; that's not a "fix"...)
>
> Shouldn't the kbuild test robot have caught this?

Should probably, but the thing has been rather spotty of late.

2020-06-09 23:05:57

by Guenter Roeck

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On 6/9/20 3:06 PM, Peter Zijlstra wrote:
> On Tue, Jun 09, 2020 at 02:38:29PM -0700, Eric Biggers wrote:
>> Does the struct actually have to be named? How about:
>>
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index c5d96e3e7fff42..14ca25cda19150 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -653,8 +653,14 @@ struct task_struct {
>> unsigned int ptrace;
>>
>> #ifdef CONFIG_SMP
>> - struct llist_node wake_entry;
>> - unsigned int wake_entry_type;
>> + /*
>> + * wake_entry_type must follow wake_entry, even when
>> + * CONFIG_GCC_PLUGIN_RANDSTRUCT=y.
>> + */
>> + struct {
>> + struct llist_node wake_entry;
>> + unsigned int wake_entry_type;
>> + };
>> int on_cpu;
>> #ifdef CONFIG_THREAD_INFO_IN_TASK
>> /* Current CPU: */
>>
>>
>> However, it would be preferable to not rely on different structs sharing the
>> same field order, but rather write proper C code that uses the same struct
>> everywhere to encapsulate these 2 fields...
>
> https://lkml.kernel.org/r/[email protected]
>
> And I have more patches on top to clean up some of the anonymous union
> stuff, that that's quite a lot of frobbing.
>

That is why I tried to keep it simple as hackish fixup patch.

Guenter

2020-06-10 09:16:04

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Tue, Jun 09, 2020 at 04:03:19PM -0700, Guenter Roeck wrote:
> On 6/9/20 3:06 PM, Peter Zijlstra wrote:
> > On Tue, Jun 09, 2020 at 02:38:29PM -0700, Eric Biggers wrote:
> >> Does the struct actually have to be named? How about:
> >>
> >> diff --git a/include/linux/sched.h b/include/linux/sched.h
> >> index c5d96e3e7fff42..14ca25cda19150 100644
> >> --- a/include/linux/sched.h
> >> +++ b/include/linux/sched.h
> >> @@ -653,8 +653,14 @@ struct task_struct {
> >> unsigned int ptrace;
> >>
> >> #ifdef CONFIG_SMP
> >> - struct llist_node wake_entry;
> >> - unsigned int wake_entry_type;
> >> + /*
> >> + * wake_entry_type must follow wake_entry, even when
> >> + * CONFIG_GCC_PLUGIN_RANDSTRUCT=y.
> >> + */
> >> + struct {
> >> + struct llist_node wake_entry;
> >> + unsigned int wake_entry_type;
> >> + };
> >> int on_cpu;
> >> #ifdef CONFIG_THREAD_INFO_IN_TASK
> >> /* Current CPU: */
> >>
> >>
> >> However, it would be preferable to not rely on different structs sharing the
> >> same field order, but rather write proper C code that uses the same struct
> >> everywhere to encapsulate these 2 fields...
> >
> > https://lkml.kernel.org/r/[email protected]
> >
> > And I have more patches on top to clean up some of the anonymous union
> > stuff, that that's quite a lot of frobbing.
> >
>
> That is why I tried to keep it simple as hackish fixup patch.

Fair enough; I'll try and get the above variant merged to address the
build fail. Then I can chase down Paul's bug and finisht the cleanup.

2020-06-18 18:55:38

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Tue, Jun 09, 2020 at 02:25:09PM -0700, Guenter Roeck wrote:
> >
> > Still occurring on Linus' tree. This needs to be fixed. (And not by removing
> > support for randstruct; that's not a "fix"...)
> >
>
> How about the hack below ?

My test suite failed due to this bug (on my allmodconfig test).

Your hack appears to fix it. I've applied it to my "fixes" patches applied to
my test suite before building my kernels.

Thanks!

-- Steve


>
> Guenter
>
> ---
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index c5d96e3e7fff..df1cbb04f9b3 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -629,6 +629,15 @@ struct wake_q_node {
> struct wake_q_node *next;
> };
>
> +/*
> + * Hack around assumption that wake_entry_type follows wake_entry even with
> + * CONFIG_GCC_PLUGIN_RANDSTRUCT=y.
> + */
> +struct _wake_entry {
> + struct llist_node wake_entry;
> + unsigned int wake_entry_type;
> +};
> +
> struct task_struct {
> #ifdef CONFIG_THREAD_INFO_IN_TASK
> /*
> @@ -653,8 +662,9 @@ struct task_struct {
> unsigned int ptrace;
>
> #ifdef CONFIG_SMP
> - struct llist_node wake_entry;
> - unsigned int wake_entry_type;
> + struct _wake_entry _we;
> +#define wake_entry _we.wake_entry
> +#define wake_entry_type _we.wake_entry_type
> int on_cpu;
> #ifdef CONFIG_THREAD_INFO_IN_TASK
> /* Current CPU: */

2020-06-18 21:54:47

by Guenter Roeck

[permalink] [raw]
Subject: Re: [RFC][PATCH 7/7] sched: Replace rq::wake_list

On Thu, Jun 18, 2020 at 01:57:33PM -0400, Steven Rostedt wrote:
> On Tue, Jun 09, 2020 at 02:25:09PM -0700, Guenter Roeck wrote:
> > >
> > > Still occurring on Linus' tree. This needs to be fixed. (And not by removing
> > > support for randstruct; that's not a "fix"...)
> > >
> >
> > How about the hack below ?
>
> My test suite failed due to this bug (on my allmodconfig test).
>
> Your hack appears to fix it. I've applied it to my "fixes" patches applied to
> my test suite before building my kernels.
>
Ah, I would have hoped that this was by now fixed upstream. Apparently that
is not the case. Guess Linus is using an old version of gcc (or maybe clang)
for his test builds, and everyone doing test builds is now using this or a
similar hack. For my part, I disabled CONFIG_GCC_PLUGIN_RANDSTRUCT.

Guenter