2024-06-04 14:51:52

by Qais Yousef

[permalink] [raw]
Subject: [PATCH v5 1/2] sched/rt: Clean up usage of rt_task()

rt_task() checks if a task has RT priority. But depends on your
dictionary, this could mean it belongs to RT class, or is a 'realtime'
task, which includes RT and DL classes.

Since this has caused some confusion already on discussion [1], it
seemed a clean up is due.

I define the usage of rt_task() to be tasks that belong to RT class.
Make sure that it returns true only for RT class and audit the users and
replace the ones required the old behavior with the new realtime_task()
which returns true for RT and DL classes. Introduce similar
realtime_prio() to create similar distinction to rt_prio() and update
the users that required the old behavior to use the new function.

Move MAX_DL_PRIO to prio.h so it can be used in the new definitions.

Document the functions to make it more obvious what is the difference
between them. PI-boosted tasks is a factor that must be taken into
account when choosing which function to use.

Rename task_is_realtime() to realtime_task_policy() as the old name is
confusing against the new realtime_task().

No functional changes were intended.

[1] https://lore.kernel.org/lkml/[email protected]/

Reviewed-by: Phil Auld <[email protected]>
Reviewed-by: Steven Rostedt (Google) <[email protected]>
Signed-off-by: Qais Yousef <[email protected]>
---
fs/bcachefs/six.c | 2 +-
fs/select.c | 2 +-
include/linux/ioprio.h | 2 +-
include/linux/sched/deadline.h | 6 ++++--
include/linux/sched/prio.h | 1 +
include/linux/sched/rt.h | 27 ++++++++++++++++++++++++++-
kernel/locking/rtmutex.c | 4 ++--
kernel/locking/rwsem.c | 4 ++--
kernel/locking/ww_mutex.h | 2 +-
kernel/sched/core.c | 4 ++--
kernel/sched/syscalls.c | 2 +-
kernel/time/hrtimer.c | 6 +++---
kernel/trace/trace_sched_wakeup.c | 2 +-
mm/page-writeback.c | 4 ++--
mm/page_alloc.c | 2 +-
15 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c
index 3a494c5d1247..b30870bf7e4a 100644
--- a/fs/bcachefs/six.c
+++ b/fs/bcachefs/six.c
@@ -335,7 +335,7 @@ static inline bool six_owner_running(struct six_lock *lock)
*/
rcu_read_lock();
struct task_struct *owner = READ_ONCE(lock->owner);
- bool ret = owner ? owner_on_cpu(owner) : !rt_task(current);
+ bool ret = owner ? owner_on_cpu(owner) : !realtime_task(current);
rcu_read_unlock();

return ret;
diff --git a/fs/select.c b/fs/select.c
index 9515c3fa1a03..8d5c1419416c 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -82,7 +82,7 @@ u64 select_estimate_accuracy(struct timespec64 *tv)
* Realtime tasks get a slack of 0 for obvious reasons.
*/

- if (rt_task(current))
+ if (realtime_task(current))
return 0;

ktime_get_ts64(&now);
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index db1249cd9692..75859b78d540 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -40,7 +40,7 @@ static inline int task_nice_ioclass(struct task_struct *task)
{
if (task->policy == SCHED_IDLE)
return IOPRIO_CLASS_IDLE;
- else if (task_is_realtime(task))
+ else if (realtime_task_policy(task))
return IOPRIO_CLASS_RT;
else
return IOPRIO_CLASS_BE;
diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index df3aca89d4f5..5cb88b748ad6 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -10,8 +10,6 @@

#include <linux/sched.h>

-#define MAX_DL_PRIO 0
-
static inline int dl_prio(int prio)
{
if (unlikely(prio < MAX_DL_PRIO))
@@ -19,6 +17,10 @@ static inline int dl_prio(int prio)
return 0;
}

+/*
+ * Returns true if a task has a priority that belongs to DL class. PI-boosted
+ * tasks will return true. Use dl_policy() to ignore PI-boosted tasks.
+ */
static inline int dl_task(struct task_struct *p)
{
return dl_prio(p->prio);
diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
index ab83d85e1183..6ab43b4f72f9 100644
--- a/include/linux/sched/prio.h
+++ b/include/linux/sched/prio.h
@@ -14,6 +14,7 @@
*/

#define MAX_RT_PRIO 100
+#define MAX_DL_PRIO 0

#define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH)
#define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2)
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index b2b9e6eb9683..a055dd68a77c 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -7,18 +7,43 @@
struct task_struct;

static inline int rt_prio(int prio)
+{
+ if (unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO))
+ return 1;
+ return 0;
+}
+
+static inline int realtime_prio(int prio)
{
if (unlikely(prio < MAX_RT_PRIO))
return 1;
return 0;
}

+/*
+ * Returns true if a task has a priority that belongs to RT class. PI-boosted
+ * tasks will return true. Use rt_policy() to ignore PI-boosted tasks.
+ */
static inline int rt_task(struct task_struct *p)
{
return rt_prio(p->prio);
}

-static inline bool task_is_realtime(struct task_struct *tsk)
+/*
+ * Returns true if a task has a priority that belongs to RT or DL classes.
+ * PI-boosted tasks will return true. Use realtime_task_policy() to ignore
+ * PI-boosted tasks.
+ */
+static inline int realtime_task(struct task_struct *p)
+{
+ return realtime_prio(p->prio);
+}
+
+/*
+ * Returns true if a task has a policy that belongs to RT or DL classes.
+ * PI-boosted tasks will return false.
+ */
+static inline bool realtime_task_policy(struct task_struct *tsk)
{
int policy = tsk->policy;

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 88d08eeb8bc0..55c9dab37f33 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -347,7 +347,7 @@ static __always_inline int __waiter_prio(struct task_struct *task)
{
int prio = task->prio;

- if (!rt_prio(prio))
+ if (!realtime_prio(prio))
return DEFAULT_PRIO;

return prio;
@@ -435,7 +435,7 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
* Note that RT tasks are excluded from same priority (lateral)
* steals to prevent the introduction of an unbounded latency.
*/
- if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio))
+ if (realtime_prio(waiter->tree.prio))
return false;

return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree);
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index c6d17aee4209..ad8d4438bc91 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -631,7 +631,7 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
* if it is an RT task or wait in the wait queue
* for too long.
*/
- if (has_handoff || (!rt_task(waiter->task) &&
+ if (has_handoff || (!realtime_task(waiter->task) &&
!time_after(jiffies, waiter->timeout)))
return false;

@@ -914,7 +914,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
if (owner_state != OWNER_WRITER) {
if (need_resched())
break;
- if (rt_task(current) &&
+ if (realtime_task(current) &&
(prev_owner_state != OWNER_WRITER))
break;
}
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
index 3ad2cc4823e5..fa4b416a1f62 100644
--- a/kernel/locking/ww_mutex.h
+++ b/kernel/locking/ww_mutex.h
@@ -237,7 +237,7 @@ __ww_ctx_less(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
int a_prio = a->task->prio;
int b_prio = b->task->prio;

- if (rt_prio(a_prio) || rt_prio(b_prio)) {
+ if (realtime_prio(a_prio) || realtime_prio(b_prio)) {

if (a_prio > b_prio)
return true;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5d861b59d737..22c7efed83b4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -163,7 +163,7 @@ static inline int __task_prio(const struct task_struct *p)
if (p->sched_class == &stop_sched_class) /* trumps deadline */
return -2;

- if (rt_prio(p->prio)) /* includes deadline */
+ if (realtime_prio(p->prio)) /* includes deadline */
return p->prio; /* [-1, 99] */

if (p->sched_class == &idle_sched_class)
@@ -8522,7 +8522,7 @@ void normalize_rt_tasks(void)
schedstat_set(p->stats.sleep_start, 0);
schedstat_set(p->stats.block_start, 0);

- if (!dl_task(p) && !rt_task(p)) {
+ if (!realtime_task(p)) {
/*
* Renice negative nice level userspace
* tasks back to 0:
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index ae1b42775ef9..6d60326d73e4 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -57,7 +57,7 @@ static int effective_prio(struct task_struct *p)
* keep the priority unchanged. Otherwise, update priority
* to the normal priority:
*/
- if (!rt_prio(p->prio))
+ if (!realtime_prio(p->prio))
return p->normal_prio;
return p->prio;
}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 492c14aac642..89d4da59059d 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1973,7 +1973,7 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
* expiry.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
- if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
+ if (realtime_task_policy(current) && !(mode & HRTIMER_MODE_SOFT))
mode |= HRTIMER_MODE_HARD;
}

@@ -2073,7 +2073,7 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
u64 slack;

slack = current->timer_slack_ns;
- if (rt_task(current))
+ if (realtime_task(current))
slack = 0;

hrtimer_init_sleeper_on_stack(&t, clockid, mode);
@@ -2278,7 +2278,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
* Override any slack passed by the user if under
* rt contraints.
*/
- if (rt_task(current))
+ if (realtime_task(current))
delta = 0;

hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 0469a04a355f..19d737742e29 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -545,7 +545,7 @@ probe_wakeup(void *ignore, struct task_struct *p)
* - wakeup_dl handles tasks belonging to sched_dl class only.
*/
if (tracing_dl || (wakeup_dl && !dl_task(p)) ||
- (wakeup_rt && !dl_task(p) && !rt_task(p)) ||
+ (wakeup_rt && !realtime_task(p)) ||
(!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio)))
return;

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 12c9297ed4a7..d9464af1d992 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -418,7 +418,7 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
if (bg_thresh >= thresh)
bg_thresh = thresh / 2;
tsk = current;
- if (rt_task(tsk)) {
+ if (realtime_task(tsk)) {
bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
}
@@ -468,7 +468,7 @@ static unsigned long node_dirty_limit(struct pglist_data *pgdat)
else
dirty = vm_dirty_ratio * node_memory / 100;

- if (rt_task(tsk))
+ if (realtime_task(tsk))
dirty += dirty / 4;

return dirty;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2e22ce5675ca..807dd6aa3edb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3962,7 +3962,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
*/
if (alloc_flags & ALLOC_MIN_RESERVE)
alloc_flags &= ~ALLOC_CPUSET;
- } else if (unlikely(rt_task(current)) && in_task())
+ } else if (unlikely(realtime_task(current)) && in_task())
alloc_flags |= ALLOC_MIN_RESERVE;

alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
--
2.34.1



Subject: Re: [PATCH v5 1/2] sched/rt: Clean up usage of rt_task()

On 6/4/24 16:42, Qais Yousef wrote:
> - (wakeup_rt && !dl_task(p) && !rt_task(p)) ||
> + (wakeup_rt && !realtime_task(p)) ||

I do not like bikeshedding, and no hard feelings...

But rt is a shortened version of realtime, and so it is making *it less*
clear that we also have DL here.

I know we can always read the comments, but we can do without changes
as well...

I would suggest finding the plural version for realtime_task()... so
we know it is not about the "rt" scheduler, but rt and dl schedulers.

-- Daniel


2024-06-04 16:38:35

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v5 1/2] sched/rt: Clean up usage of rt_task()

On Tue, 4 Jun 2024 17:57:46 +0200
Daniel Bristot de Oliveira <[email protected]> wrote:

> On 6/4/24 16:42, Qais Yousef wrote:
> > - (wakeup_rt && !dl_task(p) && !rt_task(p)) ||
> > + (wakeup_rt && !realtime_task(p)) ||
>
> I do not like bikeshedding, and no hard feelings...
>
> But rt is a shortened version of realtime, and so it is making *it less*
> clear that we also have DL here.
>
> I know we can always read the comments, but we can do without changes
> as well...
>
> I would suggest finding the plural version for realtime_task()... so
> we know it is not about the "rt" scheduler, but rt and dl schedulers.

priority_task() ?

Or should we go with royal purple and call it "royalty_task()" ? ;-)

-- Steve



Subject: Re: [PATCH v5 1/2] sched/rt: Clean up usage of rt_task()

On 6/4/24 18:37, Steven Rostedt wrote:
> On Tue, 4 Jun 2024 17:57:46 +0200
> Daniel Bristot de Oliveira <[email protected]> wrote:
>
>> On 6/4/24 16:42, Qais Yousef wrote:
>>> - (wakeup_rt && !dl_task(p) && !rt_task(p)) ||
>>> + (wakeup_rt && !realtime_task(p)) ||
>>
>> I do not like bikeshedding, and no hard feelings...
>>
>> But rt is a shortened version of realtime, and so it is making *it less*
>> clear that we also have DL here.
>>
>> I know we can always read the comments, but we can do without changes
>> as well...
>>
>> I would suggest finding the plural version for realtime_task()... so
>> we know it is not about the "rt" scheduler, but rt and dl schedulers.
>
> priority_task() ?

rt_or_dl_task() ?
rt_schedulers_task() ?
higher_than_fair_task() ? (this is bad haha)

I am not good with names, and it is hard to find one, I know.... but something
to make it clear that dl is also there becase rt/realtime is ambiguous with
the rt.c.

-- Daniel


Subject: Re: [PATCH v5 1/2] sched/rt: Clean up usage of rt_task()

On 2024-06-04 17:57:46 [+0200], Daniel Bristot de Oliveira wrote:
> On 6/4/24 16:42, Qais Yousef wrote:
> > - (wakeup_rt && !dl_task(p) && !rt_task(p)) ||
> > + (wakeup_rt && !realtime_task(p)) ||
>
> I do not like bikeshedding, and no hard feelings...
>
> But rt is a shortened version of realtime, and so it is making *it less*
> clear that we also have DL here.

Can SCHED_DL be considered a real-time scheduling class as in opposite
to SCHED_BATCH for instance? Due to its requirements it fits for a real
time scheduling class, right?
And RT (as in real time) already includes SCHED_RR and SCHED_FIFO.

> -- Daniel

Sebastian

2024-06-05 13:42:24

by Qais Yousef

[permalink] [raw]
Subject: Re: [PATCH v5 1/2] sched/rt: Clean up usage of rt_task()

On 06/05/24 11:32, Sebastian Andrzej Siewior wrote:
> On 2024-06-04 17:57:46 [+0200], Daniel Bristot de Oliveira wrote:
> > On 6/4/24 16:42, Qais Yousef wrote:
> > > - (wakeup_rt && !dl_task(p) && !rt_task(p)) ||
> > > + (wakeup_rt && !realtime_task(p)) ||
> >
> > I do not like bikeshedding, and no hard feelings...

No hard feelings :-)

> >
> > But rt is a shortened version of realtime, and so it is making *it less*
> > clear that we also have DL here.
>
> Can SCHED_DL be considered a real-time scheduling class as in opposite
> to SCHED_BATCH for instance? Due to its requirements it fits for a real
> time scheduling class, right?
> And RT (as in real time) already includes SCHED_RR and SCHED_FIFO.

Yeah I think the usage of realtime to cover both makes sense. I followed your
precedence with task_is_realtime().

Anyway. If people really find this confusing, what would make sense is to split
them and ask users to call rt_task() and dl_task() explicitly without this
wrapper. I personally like it better with the wrapper. But happy to follow the
crowd.

Subject: Re: [PATCH v5 1/2] sched/rt: Clean up usage of rt_task()

On 6/5/24 15:24, Qais Yousef wrote:
>>> But rt is a shortened version of realtime, and so it is making *it less*
>>> clear that we also have DL here.
>> Can SCHED_DL be considered a real-time scheduling class as in opposite
>> to SCHED_BATCH for instance? Due to its requirements it fits for a real
>> time scheduling class, right?
>> And RT (as in real time) already includes SCHED_RR and SCHED_FIFO.
> Yeah I think the usage of realtime to cover both makes sense. I followed your
> precedence with task_is_realtime().
>
> Anyway. If people really find this confusing, what would make sense is to split
> them and ask users to call rt_task() and dl_task() explicitly without this
> wrapper. I personally like it better with the wrapper. But happy to follow the
> crowd.

For me, doing dl_ things it is better to keep them separate, so I can
easily search for dl_ specific checks.

rt_or_dl_task(p);

would also make it clear that we have both.

-- Daniel


Subject: Re: [PATCH v5 1/2] sched/rt: Clean up usage of rt_task()

On 6/5/24 11:32, Sebastian Andrzej Siewior wrote:
> On 2024-06-04 17:57:46 [+0200], Daniel Bristot de Oliveira wrote:
>> On 6/4/24 16:42, Qais Yousef wrote:
>>> - (wakeup_rt && !dl_task(p) && !rt_task(p)) ||
>>> + (wakeup_rt && !realtime_task(p)) ||
>>
>> I do not like bikeshedding, and no hard feelings...
>>
>> But rt is a shortened version of realtime, and so it is making *it less*
>> clear that we also have DL here.
>
> Can SCHED_DL be considered a real-time scheduling class as in opposite
> to SCHED_BATCH for instance? Due to its requirements it fits for a real
> time scheduling class, right?
> And RT (as in real time) already includes SCHED_RR and SCHED_FIFO.

It is a real-time scheduler, but the problem is that FIFO and RR are in rt.c and
they are called the "realtime" ones, so they are the first to come in mind.

-- Daniel

>> -- Daniel
>
> Sebastian
>


2024-06-10 19:22:39

by Qais Yousef

[permalink] [raw]
Subject: Re: [PATCH v5 1/2] sched/rt: Clean up usage of rt_task()

On 06/05/24 16:07, Daniel Bristot de Oliveira wrote:
> On 6/5/24 15:24, Qais Yousef wrote:
> >>> But rt is a shortened version of realtime, and so it is making *it less*
> >>> clear that we also have DL here.
> >> Can SCHED_DL be considered a real-time scheduling class as in opposite
> >> to SCHED_BATCH for instance? Due to its requirements it fits for a real
> >> time scheduling class, right?
> >> And RT (as in real time) already includes SCHED_RR and SCHED_FIFO.
> > Yeah I think the usage of realtime to cover both makes sense. I followed your
> > precedence with task_is_realtime().
> >
> > Anyway. If people really find this confusing, what would make sense is to split
> > them and ask users to call rt_task() and dl_task() explicitly without this
> > wrapper. I personally like it better with the wrapper. But happy to follow the
> > crowd.
>
> For me, doing dl_ things it is better to keep them separate, so I can
> easily search for dl_ specific checks.
>
> rt_or_dl_task(p);

I posted a new version with this suggestion as the top patch so that it can be
shredded more :-)

Thanks for having a look.


Cheers

--
Qais Yousef