Hi Peter,
This patchset is version 2 about priority of sched. Please help to review.
Sorry for the late update about it, coming back from vacation.
v2:
* leave the task_prio() in kernel/sched/core.c
* remove macro TASK_NICE and implement it as static inline
function in include/linux/sched.h.
* remove #ifndef when include prio.h.
Dongsheng Yang (3):
sched: Move the priority specific bits into a new header file.
sched: Expose some macros related with priority.
sched: Implement task_nice as static inline function.
include/linux/sched.h | 12 +++++++++++-
include/linux/sched/prio.h | 40 ++++++++++++++++++++++++++++++++++++++++
include/linux/sched/rt.h | 19 +------------------
kernel/sched/core.c | 26 +++++++-------------------
kernel/sched/cputime.c | 4 ++--
kernel/sched/sched.h | 18 ------------------
6 files changed, 61 insertions(+), 58 deletions(-)
create mode 100644 include/linux/sched/prio.h
--
1.8.2.1
Some bits about priority are defined in linux/sched/rt.h, but
some of them are not only for rt scheduler, such as MAX_PRIO.
This patch move them all into a new header file, linux/sched/prio.h.
Signed-off-by: Dongsheng Yang <[email protected]>
---
include/linux/sched.h | 1 +
include/linux/sched/prio.h | 23 +++++++++++++++++++++++
include/linux/sched/rt.h | 19 +------------------
3 files changed, 25 insertions(+), 18 deletions(-)
create mode 100644 include/linux/sched/prio.h
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a781dec..5aa0329 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -54,6 +54,7 @@ struct sched_param {
#include <linux/llist.h>
#include <linux/uidgid.h>
#include <linux/gfp.h>
+#include <linux/sched/prio.h>
#include <asm/processor.h>
diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
new file mode 100644
index 0000000..9382ba8
--- /dev/null
+++ b/include/linux/sched/prio.h
@@ -0,0 +1,23 @@
+#ifndef _SCHED_PRIO_H
+#define _SCHED_PRIO_H
+
+/*
+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
+ * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
+ * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
+ * values are inverted: lower p->prio value means higher priority.
+ *
+ * The MAX_USER_RT_PRIO value allows the actual maximum
+ * RT priority to be separate from the value exported to
+ * user-space. This allows kernel threads to set their
+ * priority to a value higher than any user task. Note:
+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
+ */
+
+#define MAX_USER_RT_PRIO 100
+#define MAX_RT_PRIO MAX_USER_RT_PRIO
+
+#define MAX_PRIO (MAX_RT_PRIO + 40)
+#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
+
+#endif /* _SCHED_PRIO_H */
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index 34e4ebe..f7453d4 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -1,24 +1,7 @@
#ifndef _SCHED_RT_H
#define _SCHED_RT_H
-/*
- * Priority of a process goes from 0..MAX_PRIO-1, valid RT
- * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
- * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
- * values are inverted: lower p->prio value means higher priority.
- *
- * The MAX_USER_RT_PRIO value allows the actual maximum
- * RT priority to be separate from the value exported to
- * user-space. This allows kernel threads to set their
- * priority to a value higher than any user task. Note:
- * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
- */
-
-#define MAX_USER_RT_PRIO 100
-#define MAX_RT_PRIO MAX_USER_RT_PRIO
-
-#define MAX_PRIO (MAX_RT_PRIO + 40)
-#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
+#include <linux/sched/prio.h>
static inline int rt_prio(int prio)
{
--
1.8.2.1
As commit 0e0c0797 expose the priority related macros in linux/sched/prio.h,
we don't have to implement task_nice in kernel/sched/core.c any more.
This patch implement it in linux/sched/sched.h as static inline function,
saving the kernel stack and enhancing the performance.
Signed-off-by: Dongsheng Yang <[email protected]>
---
include/linux/sched.h | 11 ++++++++++-
include/linux/sched/prio.h | 1 -
kernel/sched/core.c | 26 +++++++-------------------
kernel/sched/cputime.c | 4 ++--
4 files changed, 19 insertions(+), 23 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5aa0329..65fcbae 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2081,7 +2081,16 @@ static inline void sched_autogroup_exit(struct signal_struct *sig) { }
extern bool yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
-extern int task_nice(const struct task_struct *p);
+/**
+ * task_nice - return the nice value of a given task.
+ * @p: the task in question.
+ *
+ * Return: The nice value [ -20 ... 0 ... 19 ].
+ */
+static inline int task_nice(const struct task_struct *p)
+{
+ return PRIO_TO_NICE((p)->static_prio);
+}
extern int can_nice(const struct task_struct *p, const int nice);
extern int task_curr(const struct task_struct *p);
extern int idle_cpu(int cpu);
diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
index 13216f1..410ccb7 100644
--- a/include/linux/sched/prio.h
+++ b/include/linux/sched/prio.h
@@ -27,7 +27,6 @@
*/
#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
-#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
/*
* 'User priority' is the nice value converted to something we
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b46131e..8d45a47 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2998,7 +2998,7 @@ void set_user_nice(struct task_struct *p, long nice)
unsigned long flags;
struct rq *rq;
- if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
+ if (task_nice(p) == nice || nice < -20 || nice > 19)
return;
/*
* We have to be careful, if called from sys_setpriority(),
@@ -3076,7 +3076,7 @@ SYSCALL_DEFINE1(nice, int, increment)
if (increment > 40)
increment = 40;
- nice = TASK_NICE(current) + increment;
+ nice = task_nice(current) + increment;
if (nice < -20)
nice = -20;
if (nice > 19)
@@ -3109,18 +3109,6 @@ int task_prio(const struct task_struct *p)
}
/**
- * task_nice - return the nice value of a given task.
- * @p: the task in question.
- *
- * Return: The nice value [ -20 ... 0 ... 19 ].
- */
-int task_nice(const struct task_struct *p)
-{
- return TASK_NICE(p);
-}
-EXPORT_SYMBOL(task_nice);
-
-/**
* idle_cpu - is a given cpu idle currently?
* @cpu: the processor in question.
*
@@ -3319,7 +3307,7 @@ recheck:
*/
if (user && !capable(CAP_SYS_NICE)) {
if (fair_policy(policy)) {
- if (attr->sched_nice < TASK_NICE(p) &&
+ if (attr->sched_nice < task_nice(p) &&
!can_nice(p, attr->sched_nice))
return -EPERM;
}
@@ -3343,7 +3331,7 @@ recheck:
* SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
*/
if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) {
- if (!can_nice(p, TASK_NICE(p)))
+ if (!can_nice(p, task_nice(p)))
return -EPERM;
}
@@ -3383,7 +3371,7 @@ recheck:
* If not changing anything there's no need to proceed further:
*/
if (unlikely(policy == p->policy)) {
- if (fair_policy(policy) && attr->sched_nice != TASK_NICE(p))
+ if (fair_policy(policy) && attr->sched_nice != task_nice(p))
goto change;
if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
goto change;
@@ -3835,7 +3823,7 @@ SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
else if (task_has_rt_policy(p))
attr.sched_priority = p->rt_priority;
else
- attr.sched_nice = TASK_NICE(p);
+ attr.sched_nice = task_nice(p);
rcu_read_unlock();
@@ -7008,7 +6996,7 @@ void normalize_rt_tasks(void)
* Renice negative nice level userspace
* tasks back to 0:
*/
- if (TASK_NICE(p) < 0 && p->mm)
+ if (task_nice(p) < 0 && p->mm)
set_user_nice(p, 0);
continue;
}
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 9994791..58624a6 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -142,7 +142,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
p->utimescaled += cputime_scaled;
account_group_user_time(p, cputime);
- index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
+ index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
/* Add user time to cpustat. */
task_group_account_field(p, index, (__force u64) cputime);
@@ -169,7 +169,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
p->gtime += cputime;
/* Add guest time to cpustat. */
- if (TASK_NICE(p) > 0) {
+ if (task_nice(p) > 0) {
cpustat[CPUTIME_NICE] += (__force u64) cputime;
cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
} else {
--
1.8.2.1
Some macros in kernel/sched/sched.h about priority are
private to kernel/sched. But they are useful to other
parts of the core kernel.
This patch move these macros from kernel/sched/sched.h to
include/linux/sched/prio.h so that they are available to
other subsystems.
Signed-off-by: Dongsheng Yang <[email protected]>
---
include/linux/sched/prio.h | 18 ++++++++++++++++++
kernel/sched/sched.h | 18 ------------------
2 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
index 9382ba8..13216f1 100644
--- a/include/linux/sched/prio.h
+++ b/include/linux/sched/prio.h
@@ -20,4 +20,22 @@
#define MAX_PRIO (MAX_RT_PRIO + 40)
#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
+/*
+ * Convert user-nice values [ -20 ... 0 ... 19 ]
+ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
+ * and back.
+ */
+#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
+#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
+#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
+
+/*
+ * 'User priority' is the nice value converted to something we
+ * can work with better when scaling various scheduler parameters,
+ * it's a [ 0 ... 39 ] range.
+ */
+#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
+#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
+#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
+
#endif /* _SCHED_PRIO_H */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c2119fd..b44720d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -24,24 +24,6 @@ extern long calc_load_fold_active(struct rq *this_rq);
extern void update_cpu_load_active(struct rq *this_rq);
/*
- * Convert user-nice values [ -20 ... 0 ... 19 ]
- * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
- * and back.
- */
-#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
-#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
-#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
-
-/*
- * 'User priority' is the nice value converted to something we
- * can work with better when scaling various scheduler parameters,
- * it's a [ 0 ... 39 ] range.
- */
-#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
-#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
-#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
-
-/*
* Helpers for converting nanosecond timing to jiffy resolution
*/
#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
--
1.8.2.1