The sched_class structure is defined to be sorted by pointer size.
You can see it in the macro definition like this:
kernel/sched/sched.h
#define DEFINE_SCHED_CLASS(name)
const struct sched_class name##_sched_class \
__aligned(__alignof__(struct sched_class)) \
__section("__" #name "_sched_class")
include/asm-generic/vmlinux.lds.h
#define SCHED_DATA \
STRUCT_ALIGN(); \
__sched_class_highest = .; \
*(__stop_sched_class) \
*(__dl_sched_class) \
*(__rt_sched_class) \
*(__fair_sched_class) \
*(__idle_sched_class) \
__sched_class_lowest = .;
And in the System.map file,
you can see that they are arranged in memory address order.
System.map
----------------------------------------------------------------
ffffffff8260d520 R __sched_class_highest
ffffffff8260d520 R stop_sched_class
ffffffff8260d5f0 R dl_sched_class
ffffffff8260d6c0 R rt_sched_class
ffffffff8260d790 R fair_sched_class
ffffffff8260d860 R idle_sched_class
ffffffff8260d930 R __sched_class_lowest
----------------------------------------------------------------
This matches the sched class priority.
Therefore, in the prio_less() function in kernel/sched/core.c,
the less value can be determined by pointer operation as follows.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f730b6fe94a7..7a64ac8ea3d8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -151,21 +151,6 @@ __read_mostly int scheduler_running;
DEFINE_STATIC_KEY_FALSE(__sched_core_enabled);
-/* kernel prio, less is more */
-static inline int __task_prio(struct task_struct *p)
-{
- if (p->sched_class == &stop_sched_class) /* trumps deadline */
- return -2;
-
- if (rt_prio(p->prio)) /* includes deadline */
- return p->prio; /* [-1, 99] */
-
- if (p->sched_class == &idle_sched_class)
- return MAX_RT_PRIO + NICE_WIDTH; /* 140 */
-
- return MAX_RT_PRIO + MAX_NICE; /* 120, squash fair */
-}
-
/*
* l(a,b)
* le(a,b) := !l(b,a)
@@ -176,22 +161,18 @@ static inline int __task_prio(struct task_struct *p)
/* real prio, less is less */
static inline bool prio_less(struct task_struct *a, struct
task_struct *b, bool in_fi)
{
+ int less = a->sched_class - b->sched_class;
- int pa = __task_prio(a), pb = __task_prio(b);
+ if (less == 0) {
+ if (a->sched_class == &dl_sched_class)
+ return !dl_time_before(a->dl.deadline, b->dl.deadline);
- if (-pa < -pb)
- return true;
-
- if (-pb < -pa)
- return false;
-
- if (pa == -1) /* dl_prio() doesn't work because of stop_class above */
- return !dl_time_before(a->dl.deadline, b->dl.deadline);
-
- if (pa == MAX_RT_PRIO + MAX_NICE) /* fair */
- return cfs_prio_less(a, b, in_fi);
-
- return false;
+ else if (a->sched_class == &fair_sched_class)
+ return cfs_prio_less(a, b, in_fi);
+ else
+ return false;
+ } else
+ return (less > 0) ? true : false;
}
If the prio_less() function is modified as above, the __task_prio()
function is not required.
Please review.
Thanks,
From JaeJoon Jung.
On Mon, Feb 13, 2023 at 10:11:04AM +0900, JaeJoon Jung wrote:
> The sched_class structure is defined to be sorted by pointer size.
> @@ -176,22 +161,18 @@ static inline int __task_prio(struct task_struct *p)
> /* real prio, less is less */
> static inline bool prio_less(struct task_struct *a, struct
> task_struct *b, bool in_fi)
> {
> + int less = a->sched_class - b->sched_class;
>
> + if (less == 0) {
> + if (a->sched_class == &dl_sched_class)
> + return !dl_time_before(a->dl.deadline, b->dl.deadline);
>
> + else if (a->sched_class == &fair_sched_class)
> + return cfs_prio_less(a, b, in_fi);
> + else
> + return false;
> + } else
> + return (less > 0) ? true : false;
> }
>
> If the prio_less() function is modified as above, the __task_prio()
> function is not required.
Yeah, except your patch is whitespace mangled..
Thank you for answering.
Is it going to be reflected in your patch?
JaeJoon Jung.
On Mon, 13 Feb 2023 at 19:08, Peter Zijlstra <[email protected]> wrote:
>
> On Mon, Feb 13, 2023 at 10:11:04AM +0900, JaeJoon Jung wrote:
> > The sched_class structure is defined to be sorted by pointer size.
> > @@ -176,22 +161,18 @@ static inline int __task_prio(struct task_struct *p)
> > /* real prio, less is less */
> > static inline bool prio_less(struct task_struct *a, struct
> > task_struct *b, bool in_fi)
> > {
> > + int less = a->sched_class - b->sched_class;
> >
> > + if (less == 0) {
> > + if (a->sched_class == &dl_sched_class)
> > + return !dl_time_before(a->dl.deadline, b->dl.deadline);
> >
> > + else if (a->sched_class == &fair_sched_class)
> > + return cfs_prio_less(a, b, in_fi);
> > + else
> > + return false;
> > + } else
> > + return (less > 0) ? true : false;
> > }
> >
> > If the prio_less() function is modified as above, the __task_prio()
> > function is not required.
>
> Yeah, except your patch is whitespace mangled..