2019-12-19 21:47:48

by Steven Rostedt

[permalink] [raw]
Subject: [RFC][PATCH 3/4] sched: Remove struct sched_class next field

From: "Steven Rostedt (VMware)" <[email protected]>

Now that the sched_class descriptors are defined in order via the linker
script vmlinux.lds.h, there's no reason to have a "next" pointer to the
previous priroity structure. The order of the sturctures can be aligned as
an array, and used to index and find the next sched_class descriptor.

Signed-off-by: Steven Rostedt (VMware) <[email protected]>
---
include/asm-generic/vmlinux.lds.h | 1 +
kernel/sched/deadline.c | 1 -
kernel/sched/fair.c | 1 -
kernel/sched/idle.c | 1 -
kernel/sched/rt.c | 1 -
kernel/sched/sched.h | 6 +++---
kernel/sched/stop_task.c | 1 -
7 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 1c14c4ddf785..f4d480c4f7c6 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -128,6 +128,7 @@
*/
#define SCHED_DATA \
STRUCT_ALIGN(); \
+ __start_sched_classes = .; \
*(__idle_sched_class) \
*(__fair_sched_class) \
*(__rt_sched_class) \
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 5abdbe569f93..9c232214fe63 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2430,7 +2430,6 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,

const struct sched_class dl_sched_class
__attribute__((section("__dl_sched_class"))) = {
- .next = &rt_sched_class,
.enqueue_task = enqueue_task_dl,
.dequeue_task = dequeue_task_dl,
.yield_task = yield_task_dl,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e745fe0e0cd3..52f2a7b06d9b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10747,7 +10747,6 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
*/
const struct sched_class fair_sched_class
__attribute__((section("__fair_sched_class"))) = {
- .next = &idle_sched_class,
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 700a9c826f0e..f0871a9b8c98 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -456,7 +456,6 @@ static void update_curr_idle(struct rq *rq)
*/
const struct sched_class idle_sched_class
__attribute__((section("__idle_sched_class"))) = {
- /* .next is NULL */
/* no enqueue/yield_task for idle tasks */

/* dequeue is not valid, we print a debug message there: */
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 5d3f9bcddaeb..d6b330b72c60 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2356,7 +2356,6 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)

const struct sched_class rt_sched_class
__attribute__((section("__rt_sched_class"))) = {
- .next = &fair_sched_class,
.enqueue_task = enqueue_task_rt,
.dequeue_task = dequeue_task_rt,
.yield_task = yield_task_rt,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0554c588ad85..30a4615cf480 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1700,7 +1700,6 @@ extern const u32 sched_prio_to_wmult[40];
#define RETRY_TASK ((void *)-1UL)

struct sched_class {
- const struct sched_class *next;

#ifdef CONFIG_UCLAMP_TASK
int uclamp_enabled;
@@ -1773,12 +1772,13 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)

/* Defined in include/asm-generic/vmlinux.lds.h */
extern struct sched_class sched_class_highest;
+extern struct sched_class __start_sched_classes;

#define for_class_range(class, _from, _to) \
- for (class = (_from); class != (_to); class = class->next)
+ for (class = (_from); class > (_to); class--)

#define for_each_class(class) \
- for_class_range(class, &sched_class_highest, NULL)
+ for_class_range(class, &sched_class_highest, (&__start_sched_classes) - 1)

extern const struct sched_class stop_sched_class;
extern const struct sched_class dl_sched_class;
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 03bc7530ff75..0f88eec8d4da 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -117,7 +117,6 @@ static void update_curr_stop(struct rq *rq)
*/
const struct sched_class stop_sched_class
__attribute__((section("__stop_sched_class"))) = {
- .next = &dl_sched_class,

.enqueue_task = enqueue_task_stop,
.dequeue_task = dequeue_task_stop,
--
2.24.0



2019-12-20 12:15:40

by Rasmus Villemoes

[permalink] [raw]
Subject: Re: [RFC][PATCH 3/4] sched: Remove struct sched_class next field

On 19/12/2019 22.44, Steven Rostedt wrote:
> From: "Steven Rostedt (VMware)" <[email protected]>
>
> Now that the sched_class descriptors are defined in order via the linker
> script vmlinux.lds.h, there's no reason to have a "next" pointer to the
> previous priroity structure. The order of the sturctures can be aligned as
> an array, and used to index and find the next sched_class descriptor.
>
> Signed-off-by: Steven Rostedt (VMware) <[email protected]>
> ---
> include/asm-generic/vmlinux.lds.h | 1 +
> kernel/sched/deadline.c | 1 -
> kernel/sched/fair.c | 1 -
> kernel/sched/idle.c | 1 -
> kernel/sched/rt.c | 1 -
> kernel/sched/sched.h | 6 +++---
> kernel/sched/stop_task.c | 1 -
> 7 files changed, 4 insertions(+), 8 deletions(-)
>
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index 1c14c4ddf785..f4d480c4f7c6 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -128,6 +128,7 @@
> */
> #define SCHED_DATA \
> STRUCT_ALIGN(); \
> + __start_sched_classes = .; \
> *(__idle_sched_class) \
> *(__fair_sched_class) \
> *(__rt_sched_class) \

This is broken. It works by accident on a 64 bit SMP config, since you
start at a 32 byte boundary, then include four 8-byte aligned structs,
so the second STRUCT_ALIGN (not visible in this hunk, but comes from the
STOP_SCHED_CLASS) is a no-op, and stop_sched_class ends up at the right
offset from the previous one.

But, for example, a 32 bit non-smp kernel with CONFIG_FAIR_GROUP_SCHED=y
has sizeof(struct sched_class) == 68, and

$ nm -n vmlinux | grep sched_class
c0728660 D idle_sched_class
c0728660 D __start_sched_classes
c07286a4 D fair_sched_class
c07286e8 D rt_sched_class
c0728740 D dl_sched_class
c0728740 D sched_class_highest

notice dl_sched_class is 88 bytes beyond rt_sched_class, while the
others are properly 68-byte separated.

So just drop the second STRUCT_ALIGN (and maybe the first as well).
Maybe throw in some ASSERTs in the linker script, but since the linker
doesn't know sizeof(struct sched_class), the best one can do is perhaps
some kind of ASSERT(fair_sched_class - idle_sched_class ==
rt_sched_class - fair_sched_class). And/or include a BUG_ON that checks
that the sched_class elements actually constitute a proper "struct
sched_class[]" array.

Rasmus

Subject: [tip: sched/core] sched: Remove struct sched_class::next field

The following commit has been merged into the sched/core branch of tip:

Commit-ID: a87e749e8fa1aaef9b4db32e21c2795e69ce67bf
Gitweb: https://git.kernel.org/tip/a87e749e8fa1aaef9b4db32e21c2795e69ce67bf
Author: Steven Rostedt (VMware) <[email protected]>
AuthorDate: Thu, 19 Dec 2019 16:44:54 -05:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Thu, 25 Jun 2020 13:45:44 +02:00

sched: Remove struct sched_class::next field

Now that the sched_class descriptors are defined in order via the linker
script vmlinux.lds.h, there's no reason to have a "next" pointer to the
previous priroity structure. The order of the sturctures can be aligned as
an array, and used to index and find the next sched_class descriptor.

Signed-off-by: Steven Rostedt (VMware) <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
kernel/sched/deadline.c | 1 -
kernel/sched/fair.c | 1 -
kernel/sched/idle.c | 1 -
kernel/sched/rt.c | 1 -
kernel/sched/sched.h | 1 -
kernel/sched/stop_task.c | 1 -
6 files changed, 6 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index d9e7946..c9cc1d6 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2481,7 +2481,6 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,

const struct sched_class dl_sched_class
__attribute__((section("__dl_sched_class"))) = {
- .next = &rt_sched_class,
.enqueue_task = enqueue_task_dl,
.dequeue_task = dequeue_task_dl,
.yield_task = yield_task_dl,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3365f6b..a63f400 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11124,7 +11124,6 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
*/
const struct sched_class fair_sched_class
__attribute__((section("__fair_sched_class"))) = {
- .next = &idle_sched_class,
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index f580629..336d478 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -455,7 +455,6 @@ static void update_curr_idle(struct rq *rq)
*/
const struct sched_class idle_sched_class
__attribute__((section("__idle_sched_class"))) = {
- /* .next is NULL */
/* no enqueue/yield_task for idle tasks */

/* dequeue is not valid, we print a debug message there: */
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 6543d44..f215eea 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2431,7 +2431,6 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)

const struct sched_class rt_sched_class
__attribute__((section("__rt_sched_class"))) = {
- .next = &fair_sched_class,
.enqueue_task = enqueue_task_rt,
.dequeue_task = dequeue_task_rt,
.yield_task = yield_task_rt,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4165c06..549e7e6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1754,7 +1754,6 @@ extern const u32 sched_prio_to_wmult[40];
#define RETRY_TASK ((void *)-1UL)

struct sched_class {
- const struct sched_class *next;

#ifdef CONFIG_UCLAMP_TASK
int uclamp_enabled;
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index f4bbd54..394bc81 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -111,7 +111,6 @@ static void update_curr_stop(struct rq *rq)
*/
const struct sched_class stop_sched_class
__attribute__((section("__stop_sched_class"))) = {
- .next = &dl_sched_class,

.enqueue_task = enqueue_task_stop,
.dequeue_task = dequeue_task_stop,