2006-05-02 06:17:58

by Balbir Singh

[permalink] [raw]
Subject: [Patch 3/8] cpu delay collection via schedstats


Changelog

Fixes comments by akpm
- comments about locking used in rq_sched_info_arrive/depart

No fix needed/possible
- redundant extern declaration of delayacct_on in sched.h
suggested location (delayacct.h) cannot be used as it includes sched.h
extern declaration moved to where its needed
- move unlikely declaration inside sched_info_on
Function only returns constants. Cannot be done.
- removal of #if defined in sched_fork (Dave Hansen)
Refactoring suggested does not work if only SCHEDSTATS is configured

delayacct-shedstats.patch

Make the task-related schedstats functions
callable by delay accounting even if schedstats
collection isn't turned on. This removes the
dependency of delay accounting on schedstats.

Signed-off-by: Chandra Seetharaman <[email protected]>
Signed-off-by: Shailabh Nagar <[email protected]>
Signed-off-by: Balbir Singh <[email protected]>
---

include/linux/sched.h | 20 ++++++++++++++---
kernel/sched.c | 58 +++++++++++++++++++++++++++++++++++---------------
2 files changed, 58 insertions(+), 20 deletions(-)

diff -puN include/linux/sched.h~delayacct-schedstats include/linux/sched.h
--- linux-2.6.17-rc3/include/linux/sched.h~delayacct-schedstats 2006-05-02 07:31:18.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/sched.h 2006-05-02 07:34:27.000000000 +0530
@@ -521,7 +521,7 @@ typedef struct prio_array prio_array_t;
struct backing_dev_info;
struct reclaim_state;

-#ifdef CONFIG_SCHEDSTATS
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info {
/* cumulative counters */
unsigned long cpu_time, /* time spent on the cpu */
@@ -532,9 +532,11 @@ struct sched_info {
unsigned long last_arrival, /* when we last ran on a cpu */
last_queued; /* when we were last queued to run */
};
+#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */

+#ifdef CONFIG_SCHEDSTATS
extern struct file_operations proc_schedstat_operations;
-#endif
+#endif /* CONFIG_SCHEDSTATS */

#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info {
@@ -557,7 +559,19 @@ struct task_delay_info {
u32 blkio_count;
u32 swapin_count;
};
+#endif /* CONFIG_TASK_DELAY_ACCT */
+
+static inline int sched_info_on(void)
+{
+#ifdef CONFIG_SCHEDSTATS
+ return 1;
+#elif defined(CONFIG_TASK_DELAY_ACCT)
+ extern int delayacct_on;
+ return delayacct_on;
+#else
+ return 0;
#endif
+}

enum idle_type
{
@@ -744,7 +758,7 @@ struct task_struct {
cpumask_t cpus_allowed;
unsigned int time_slice, first_time_slice;

-#ifdef CONFIG_SCHEDSTATS
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
#endif

diff -puN kernel/sched.c~delayacct-schedstats kernel/sched.c
--- linux-2.6.17-rc3/kernel/sched.c~delayacct-schedstats 2006-05-02 07:31:18.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/sched.c 2006-05-02 07:31:18.000000000 +0530
@@ -469,9 +469,34 @@ struct file_operations proc_schedstat_op
.release = single_release,
};

+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void rq_sched_info_arrive(struct runqueue *rq,
+ unsigned long diff)
+{
+ if (rq) {
+ rq->rq_sched_info.run_delay += diff;
+ rq->rq_sched_info.pcnt++;
+ }
+}
+
+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void rq_sched_info_depart(struct runqueue *rq,
+ unsigned long diff)
+{
+ if (rq)
+ rq->rq_sched_info.cpu_time += diff;
+}
# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
#else /* !CONFIG_SCHEDSTATS */
+static inline void rq_sched_info_arrive(struct runqueue *rq, unsigned long diff)
+{}
+static inline void rq_sched_info_depart(struct runqueue *rq, unsigned long diff)
+{}
# define schedstat_inc(rq, field) do { } while (0)
# define schedstat_add(rq, field, amt) do { } while (0)
#endif
@@ -491,7 +516,7 @@ static inline runqueue_t *this_rq_lock(v
return rq;
}

-#ifdef CONFIG_SCHEDSTATS
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
/*
* Called when a process is dequeued from the active array and given
* the cpu. We should note that with the exception of interactive
@@ -520,7 +545,6 @@ static inline void sched_info_dequeued(t
static void sched_info_arrive(task_t *t)
{
unsigned long now = jiffies, diff = 0;
- struct runqueue *rq = task_rq(t);

if (t->sched_info.last_queued)
diff = now - t->sched_info.last_queued;
@@ -529,11 +553,7 @@ static void sched_info_arrive(task_t *t)
t->sched_info.last_arrival = now;
t->sched_info.pcnt++;

- if (!rq)
- return;
-
- rq->rq_sched_info.run_delay += diff;
- rq->rq_sched_info.pcnt++;
+ rq_sched_info_arrive(task_rq(t), diff);
}

/*
@@ -553,8 +573,9 @@ static void sched_info_arrive(task_t *t)
*/
static inline void sched_info_queued(task_t *t)
{
- if (!t->sched_info.last_queued)
- t->sched_info.last_queued = jiffies;
+ if (unlikely(sched_info_on()))
+ if (!t->sched_info.last_queued)
+ t->sched_info.last_queued = jiffies;
}

/*
@@ -563,13 +584,10 @@ static inline void sched_info_queued(tas
*/
static inline void sched_info_depart(task_t *t)
{
- struct runqueue *rq = task_rq(t);
unsigned long diff = jiffies - t->sched_info.last_arrival;

t->sched_info.cpu_time += diff;
-
- if (rq)
- rq->rq_sched_info.cpu_time += diff;
+ rq_sched_info_depart(task_rq(t), diff);
}

/*
@@ -577,7 +595,7 @@ static inline void sched_info_depart(tas
* their time slice. (This may also be called when switching to or from
* the idle task.) We are only called when prev != next.
*/
-static inline void sched_info_switch(task_t *prev, task_t *next)
+static inline void __sched_info_switch(task_t *prev, task_t *next)
{
struct runqueue *rq = task_rq(prev);

@@ -592,10 +610,15 @@ static inline void sched_info_switch(tas
if (next != rq->idle)
sched_info_arrive(next);
}
+static inline void sched_info_switch(task_t *prev, task_t *next)
+{
+ if (unlikely(sched_info_on()))
+ __sched_info_switch(prev, next);
+}
#else
#define sched_info_queued(t) do { } while (0)
#define sched_info_switch(t, next) do { } while (0)
-#endif /* CONFIG_SCHEDSTATS */
+#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */

/*
* Adding/removing a task to/from a priority array:
@@ -1393,8 +1416,9 @@ void fastcall sched_fork(task_t *p, int
p->state = TASK_RUNNING;
INIT_LIST_HEAD(&p->run_list);
p->array = NULL;
-#ifdef CONFIG_SCHEDSTATS
- memset(&p->sched_info, 0, sizeof(p->sched_info));
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+ if (unlikely(sched_info_on()))
+ memset(&p->sched_info, 0, sizeof(p->sched_info));
#endif
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
p->oncpu = 0;
_


2006-05-08 21:24:09

by Andrew Morton

[permalink] [raw]
Subject: Re: [Patch 3/8] cpu delay collection via schedstats

Balbir Singh <[email protected]> wrote:
>
> +/*
> + * Expects runqueue lock to be held for atomicity of update
> + */
> +static inline void rq_sched_info_arrive(struct runqueue *rq,
> + unsigned long diff)
> +{
> + if (rq) {
> + rq->rq_sched_info.run_delay += diff;
> + rq->rq_sched_info.pcnt++;
> + }
> +}
> +
> +/*
> + * Expects runqueue lock to be held for atomicity of update
> + */
> +static inline void rq_sched_info_depart(struct runqueue *rq,
> + unsigned long diff)
> +{
> + if (rq)
> + rq->rq_sched_info.cpu_time += diff;
> +}

The kernel has many different units of time - jiffies, cpu ticks, ns, us,
ms, etc. So the reader of these functions doesn't have a clue what "diff"
is.

A good way to remove all doubt in all cases is to include the units in the
variable's name. Something like delta_jiffies, perhaps.

2006-05-09 04:04:32

by Balbir Singh

[permalink] [raw]
Subject: Re: [Patch 3/8] cpu delay collection via schedstats

On Mon, May 08, 2006 at 02:26:40PM -0700, Andrew Morton wrote:
> Balbir Singh <[email protected]> wrote:
> >
> > +/*
> > + * Expects runqueue lock to be held for atomicity of update
> > + */
> > +static inline void rq_sched_info_arrive(struct runqueue *rq,
> > + unsigned long diff)
> > +{
> > + if (rq) {
> > + rq->rq_sched_info.run_delay += diff;
> > + rq->rq_sched_info.pcnt++;
> > + }
> > +}
> > +
> > +/*
> > + * Expects runqueue lock to be held for atomicity of update
> > + */
> > +static inline void rq_sched_info_depart(struct runqueue *rq,
> > + unsigned long diff)
> > +{
> > + if (rq)
> > + rq->rq_sched_info.cpu_time += diff;
> > +}
>
> The kernel has many different units of time - jiffies, cpu ticks, ns, us,
> ms, etc. So the reader of these functions doesn't have a clue what "diff"
> is.
>
> A good way to remove all doubt in all cases is to include the units in the
> variable's name. Something like delta_jiffies, perhaps.

Yes, that makes sense and enhances readability. We will fix the naming
convention. "diff" is indeed "delta_jiffies"


Thanks,
Balbir Singh,
Linux Technology Center,
IBM Software Labs

2006-05-10 10:28:05

by Balbir Singh

[permalink] [raw]
Subject: [PATCH][delayacct] Use better names in schedstats (was Re: [Patch 3/8] cpu delay collection via schedstats)

On Mon, May 08, 2006 at 02:26:40PM -0700, Andrew Morton wrote:
> Balbir Singh <[email protected]> wrote:
> >
> > +/*
> > + * Expects runqueue lock to be held for atomicity of update
> > + */
> > +static inline void rq_sched_info_arrive(struct runqueue *rq,
> > + unsigned long diff)
> > +{
> > + if (rq) {
> > + rq->rq_sched_info.run_delay += diff;
> > + rq->rq_sched_info.pcnt++;
> > + }
> > +}
> > +
> > +/*
> > + * Expects runqueue lock to be held for atomicity of update
> > + */
> > +static inline void rq_sched_info_depart(struct runqueue *rq,
> > + unsigned long diff)
> > +{
> > + if (rq)
> > + rq->rq_sched_info.cpu_time += diff;
> > +}
>
> The kernel has many different units of time - jiffies, cpu ticks, ns, us,
> ms, etc. So the reader of these functions doesn't have a clue what "diff"
> is.
>
> A good way to remove all doubt in all cases is to include the units in the
> variable's name. Something like delta_jiffies, perhaps.

Hi, Andrew

I have renamed all the "diff" to "delta_jiffies" to make it easier to
read the code as suggested in the review comments.

Balbir Singh,
Linux Technology Center,
IBM Software Labs


Changelog
1. Clean up the usage of the names. Use names with units to make the code
easier to read

Signed-off-by: Balbir Singh <[email protected]>
---

kernel/sched.c | 28 +++++++++++++++-------------
1 files changed, 15 insertions(+), 13 deletions(-)

diff -puN kernel/sched.c~schedstats-use-better-names kernel/sched.c
--- linux-2.6.17-rc3/kernel/sched.c~schedstats-use-better-names 2006-05-10 14:48:54.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/sched.c 2006-05-10 14:56:09.000000000 +0530
@@ -473,10 +473,10 @@ struct file_operations proc_schedstat_op
* Expects runqueue lock to be held for atomicity of update
*/
static inline void rq_sched_info_arrive(struct runqueue *rq,
- unsigned long diff)
+ unsigned long delta_jiffies)
{
if (rq) {
- rq->rq_sched_info.run_delay += diff;
+ rq->rq_sched_info.run_delay += delta_jiffies;
rq->rq_sched_info.pcnt++;
}
}
@@ -485,17 +485,19 @@ static inline void rq_sched_info_arrive(
* Expects runqueue lock to be held for atomicity of update
*/
static inline void rq_sched_info_depart(struct runqueue *rq,
- unsigned long diff)
+ unsigned long delta_jiffies)
{
if (rq)
- rq->rq_sched_info.cpu_time += diff;
+ rq->rq_sched_info.cpu_time += delta_jiffies;
}
# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
#else /* !CONFIG_SCHEDSTATS */
-static inline void rq_sched_info_arrive(struct runqueue *rq, unsigned long diff)
+static inline void rq_sched_info_arrive(struct runqueue *rq,
+ unsigned long delta_jiffies)
{}
-static inline void rq_sched_info_depart(struct runqueue *rq, unsigned long diff)
+static inline void rq_sched_info_depart(struct runqueue *rq,
+ unsigned long delta_jiffies)
{}
# define schedstat_inc(rq, field) do { } while (0)
# define schedstat_add(rq, field, amt) do { } while (0)
@@ -544,16 +546,16 @@ static inline void sched_info_dequeued(t
*/
static void sched_info_arrive(task_t *t)
{
- unsigned long now = jiffies, diff = 0;
+ unsigned long now = jiffies, delta_jiffies = 0;

if (t->sched_info.last_queued)
- diff = now - t->sched_info.last_queued;
+ delta_jiffies = now - t->sched_info.last_queued;
sched_info_dequeued(t);
- t->sched_info.run_delay += diff;
+ t->sched_info.run_delay += delta_jiffies;
t->sched_info.last_arrival = now;
t->sched_info.pcnt++;

- rq_sched_info_arrive(task_rq(t), diff);
+ rq_sched_info_arrive(task_rq(t), delta_jiffies);
}

/*
@@ -584,10 +586,10 @@ static inline void sched_info_queued(tas
*/
static inline void sched_info_depart(task_t *t)
{
- unsigned long diff = jiffies - t->sched_info.last_arrival;
+ unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival;

- t->sched_info.cpu_time += diff;
- rq_sched_info_depart(task_rq(t), diff);
+ t->sched_info.cpu_time += delta_jiffies;
+ rq_sched_info_depart(task_rq(t), delta_jiffies);
}

/*
_