Hi Ingo,
Have been watching and testing your changes as they have
evolved. Our group has a customer request for a scheduler that will give
them some tuneable parameters, and your changes have actually had
some parameters change thru the deltas you've made. It seemed like it
might be useful to take them and make them tweakable on a running
system. I am not 100% convinced of the goodness of this, but I wanted
to submit it for your consideration. The current code performs great btw,
thanks for all your hard work.
Regards,
--
Jack F. Vogel
IBM Linux Solutions
[email protected] (work)
[email protected] (home)
diff -Naur linux/include/linux/sched.h linux.jfv/include/linux/sched.h
--- linux/include/linux/sched.h Thu Jan 24 17:33:23 2002
+++ linux.jfv/include/linux/sched.h Thu Jan 24 17:24:37 2002
@@ -473,18 +473,31 @@
#define DEF_USER_NICE 0
/*
- * Default timeslice is 250 msecs, maximum is 500 msecs.
+ * Default timeslice is 250 msecs, maximum is 300 msecs.
* Minimum timeslice is 10 msecs.
*/
-#define MIN_TIMESLICE ( 10 * HZ / 1000)
-#define MAX_TIMESLICE (300 * HZ / 1000)
-#define CHILD_FORK_PENALTY 95
-#define PARENT_FORK_PENALTY 100
-#define PRIO_INTERACTIVE_RATIO 20
-#define PRIO_CPU_HOG_RATIO 60
-#define PRIO_BONUS_RATIO 70
-#define INTERACTIVE_DELTA 3
-
+#define DEFAULT_MIN_TIMESLICE ( 10 * HZ / 1000)
+#define DEFAULT_MAX_TIMESLICE (300 * HZ / 1000)
+#define DEFAULT_TIMESLICE (250 * HZ / 1000)
+#define DEFAULT_CHILD_FORK_PENALTY 95
+#define DEFAULT_PARENT_FORK_PENALTY 100
+#define DEFAULT_PRIO_INTERACTIVE_RATIO 20
+#define DEFAULT_PRIO_CPU_HOG_RATIO 60
+#define DEFAULT_PRIO_BONUS_RATIO 70
+#define DEFAULT_INTERACTIVE_DELTA 3
+
+extern int min_timeslice, max_timeslice, child_fork_penalty;
+extern int parent_fork_penalty, prio_cpu_hog_ratio, prio_bonus_ratio;
+extern int prio_bonus_ratio, interactive_delta;
+
+#define MIN_TIMESLICE (min_timeslice)
+#define MAX_TIMESLICE (max_timeslice)
+#define CHILD_FORK_PENALTY (child_fork_penalty)
+#define PARENT_FORK_PENALTY (parent_fork_penalty)
+#define PRIO_INTERACTIVE_RATIO (prio_interactive_ratio)
+#define PRIO_CPU_HOG_RATIO (prio_cpu_hog_ratio)
+#define PRIO_BONUS_RATIO (prio_bonus_ratio)
+#define INTERACTIVE_DELTA (interactive_delta)
#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
@@ -529,7 +542,7 @@
mm: NULL, \
active_mm: &init_mm, \
run_list: LIST_HEAD_INIT(tsk.run_list), \
- time_slice: NICE_TO_TIMESLICE(DEF_USER_NICE), \
+ time_slice: DEFAULT_TIMESLICE, \
next_task: &tsk, \
prev_task: &tsk, \
p_opptr: &tsk, \
diff -Naur linux/include/linux/sysctl.h linux.jfv/include/linux/sysctl.h
--- linux/include/linux/sysctl.h Mon Nov 26 05:29:17 2001
+++ linux.jfv/include/linux/sysctl.h Thu Jan 24 17:24:48 2002
@@ -63,7 +63,8 @@
CTL_DEV=7, /* Devices */
CTL_BUS=8, /* Busses */
CTL_ABI=9, /* Binary emulation */
- CTL_CPU=10 /* CPU stuff (speed scaling, etc) */
+ CTL_CPU=10, /* CPU stuff (speed scaling, etc) */
+ CTL_SCHED=11 /* SCHED ctl (tuneable parameters) */
};
/* CTL_BUS names: */
@@ -72,6 +73,19 @@
BUS_ISA=1 /* ISA */
};
+/* CTL_SCHED names: */
+enum
+{
+ MAX_SLICE=1, /* Timeslice scaling */
+ MIN_SLICE=2,
+ CHILD_PENALTY=3,
+ PARENT_PENALTY=4,
+ INT_RATIO=5,
+ HOG_RATIO=6,
+ BONUS_RATIO=7,
+ INT_DELTA=8
+};
+
/* CTL_KERN names: */
enum
{
diff -Naur linux/kernel/sched.c linux.jfv/kernel/sched.c
--- linux/kernel/sched.c Thu Jan 24 17:33:23 2002
+++ linux.jfv/kernel/sched.c Thu Jan 24 17:09:55 2002
@@ -22,6 +22,19 @@
#define BITMAP_SIZE ((((MAX_PRIO+7)/8)+sizeof(long)-1)/sizeof(long))
+/*
+** Tuneable scheduler parameters,
+** brought out in /proc/sys/sched
+*/
+int max_timeslice = DEFAULT_MAX_TIMESLICE;
+int min_timeslice = DEFAULT_MIN_TIMESLICE;
+int child_fork_penalty = DEFAULT_CHILD_FORK_PENALTY;
+int parent_fork_penalty = DEFAULT_PARENT_FORK_PENALTY;
+int prio_interactive_ratio = DEFAULT_PRIO_INTERACTIVE_RATIO;
+int prio_cpu_hog_ratio = DEFAULT_PRIO_CPU_HOG_RATIO;
+int prio_bonus_ratio = DEFAULT_PRIO_BONUS_RATIO;
+int interactive_delta = DEFAULT_INTERACTIVE_DELTA;
+
typedef struct runqueue runqueue_t;
struct prio_array {
@@ -295,8 +308,8 @@
if (!rt_task(p)) {
p->sleep_avg = p->sleep_avg * CHILD_FORK_PENALTY / 100;
p->prio = effective_prio(p);
-
- current->sleep_avg = current->sleep_avg * PARENT_FORK_PENALTY / 100;
+ current->sleep_avg =
+ current->sleep_avg * PARENT_FORK_PENALTY / 100;
}
spin_lock_irq(&rq->lock);
p->cpu = smp_processor_id();
diff -Naur linux/kernel/sysctl.c linux.jfv/kernel/sysctl.c
--- linux/kernel/sysctl.c Fri Dec 21 09:42:04 2001
+++ linux.jfv/kernel/sysctl.c Thu Jan 24 17:09:55 2002
@@ -50,7 +50,8 @@
extern int sysrq_enabled;
extern int core_uses_pid;
extern int cad_pid;
-
+extern int child_fork_penalty, parent_fork_penalty,prio_interactive_ratio;
+extern int prio_cpu_hog_ratio, prio_bonus_ratio, interactive_delta;
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID
*/
static int maxolduid = 65535;
static int minolduid;
@@ -109,6 +110,7 @@
static ctl_table kern_table[];
static ctl_table vm_table[];
+static ctl_table sched_table[];
#ifdef CONFIG_NET
extern ctl_table net_table[];
#endif
@@ -153,6 +155,7 @@
{CTL_FS, "fs", NULL, 0, 0555, fs_table},
{CTL_DEBUG, "debug", NULL, 0, 0555, debug_table},
{CTL_DEV, "dev", NULL, 0, 0555, dev_table},
+ {CTL_SCHED, "sched", NULL, 0, 0555, sched_table},
{0}
};
@@ -278,6 +281,27 @@
{0}
};
+
+static ctl_table sched_table[] = {
+ {MAX_SLICE, "max-timeslice",
+ &max_timeslice, sizeof(int), 0644, NULL, &proc_dointvec},
+ {MIN_SLICE, "min-timeslice",
+ &min_timeslice, sizeof(int), 0644, NULL, &proc_dointvec},
+ {CHILD_PENALTY, "child-fork-penalty",
+ &child_fork_penalty, sizeof(int), 0644, NULL, &proc_dointvec},
+ {PARENT_PENALTY, "parent-fork-penalty",
+ &parent_fork_penalty, sizeof(int), 0644, NULL, &proc_dointvec},
+ {INT_RATIO, "prio-interactive-ratio",
+ &prio_interactive_ratio, sizeof(int), 0644, NULL, &proc_dointvec},
+ {HOG_RATIO, "prio-cpu-hog-ratio",
+ &prio_cpu_hog_ratio, sizeof(int), 0644, NULL, &proc_dointvec},
+ {BONUS_RATIO, "prio-bonus-ratio",
+ &prio_bonus_ratio, sizeof(int), 0644, NULL, &proc_dointvec},
+ {INT_DELTA, "interactive-delta",
+ &interactive_delta, sizeof(int), 0644, NULL, &proc_dointvec},
+ {0}
+};
+
static ctl_table proc_table[] = {
{0}
};
On Thu, 24 Jan 2002, Jack F. Vogel wrote:
> Have been watching and testing your changes as they have
> evolved. Our group has a customer request for a scheduler that will
> give them some tuneable parameters, and your changes have actually had
> some parameters change thru the deltas you've made. It seemed like it
> might be useful to take them and make them tweakable on a running
> system. I am not 100% convinced of the goodness of this, but I wanted
> to submit it for your consideration. The current code performs great
> btw, thanks for all your hard work.
i'm using something like this, hence the structured extraction of all
relevant parameters in -J6. It's very useful for testing. We do not want
to slow down the scheduler with runtime parameters though (and it's just
way too easy to change fundamental behavior of the scheduler by changing
the paramters), so this should definitely remain a development-helper
patch.
Ingo
Jack,
On Thu, 24 Jan 2002, Jack F. Vogel wrote:
> Hi Ingo,
>
> Have been watching and testing your changes as they have
> evolved. Our group has a customer request for a scheduler that will
> give them some tuneable parameters, and your changes have actually had
> some parameters change thru the deltas you've made. It seemed like it
> might be useful to take them and make them tweakable on a running
> system. I am not 100% convinced of the goodness of this, but I wanted
> to submit it for your consideration. The current code performs great
> btw, thanks for all your hard work.
i think we could use your patch for development purposes as well, lets
merge the two efforts?
i'd suggest to name the /proc/sys/sched/ values the same way the constants
are called. Eg. /proc/sys/sched/CHILD_FORK_PENALTY. This makes it easier
to communicate suggested parameter changes.
i have a script that dumps the current sched-parameters state:
[root@mars root]# ./getsched
echo 95 > /proc/sys/kernel/CHILD_FORK_PENALTY
echo 3 > /proc/sys/kernel/EXIT_WEIGHT
echo 3 > /proc/sys/kernel/INTERACTIVE_DELTA
echo 200 > /proc/sys/kernel/MAX_SLEEP_AVG
echo 30 > /proc/sys/kernel/MAX_TIMESLICE
echo 100 > /proc/sys/kernel/PARENT_FORK_PENALTY
echo 70 > /proc/sys/kernel/PRIO_BONUS_RATIO
echo 60 > /proc/sys/kernel/PRIO_CPU_HOG_RATIO
echo 20 > /proc/sys/kernel/PRIO_INTERACTIVE_RATIO
echo 200 > /proc/sys/kernel/STARVATION_LIMIT
the script is very simple:
cd /proc/sys/kernel
for N in *[A-Z]*; do echo "echo "`cat $N`" > /proc/sys/kernel/$N"; done
otherwise our approach is identical. This patch would always stay
separate, but could be readily applied by people who want more control
over the scheduler for development or whatever other reasons.
Ingo
On Friday 25 January 2002 04:46 am, Ingo Molnar wrote:
> Jack,
>
>
> i think we could use your patch for development purposes as well, lets
> merge the two efforts?
Sound good to me Ingo, thanks.
> i'd suggest to name the /proc/sys/sched/ values the same way the constants
> are called. Eg. /proc/sys/sched/CHILD_FORK_PENALTY. This makes it easier
> to communicate suggested parameter changes.
No problem. I can make those changes.
> i have a script that dumps the current sched-parameters state:
>
> [root@mars root]# ./getsched
> echo 95 > /proc/sys/kernel/CHILD_FORK_PENALTY
> echo 3 > /proc/sys/kernel/EXIT_WEIGHT
> echo 3 > /proc/sys/kernel/INTERACTIVE_DELTA
> echo 200 > /proc/sys/kernel/MAX_SLEEP_AVG
> echo 30 > /proc/sys/kernel/MAX_TIMESLICE
> echo 100 > /proc/sys/kernel/PARENT_FORK_PENALTY
> echo 70 > /proc/sys/kernel/PRIO_BONUS_RATIO
> echo 60 > /proc/sys/kernel/PRIO_CPU_HOG_RATIO
> echo 20 > /proc/sys/kernel/PRIO_INTERACTIVE_RATIO
> echo 200 > /proc/sys/kernel/STARVATION_LIMIT
>
> the script is very simple:
>
> cd /proc/sys/kernel
>
> for N in *[A-Z]*; do echo "echo "`cat $N`" > /proc/sys/kernel/$N"; done
>
> otherwise our approach is identical. This patch would always stay
> separate, but could be readily applied by people who want more control
> over the scheduler for development or whatever other reasons.
>
> Ingo
Great, so how and where do we maintain it?
Cheers,
--
Jack F. Vogel
IBM Linux Solutions
[email protected] (work)
[email protected] (home)