2013-08-19 16:15:03

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH 04/10] sched, fair: Shrink sg_lb_stats and play memset games

We can shrink sg_lb_stats because rq::nr_running is an 'unsigned int'
and cpu numbers are 'int'

Before:
sgs: /* size: 72, cachelines: 2, members: 10 */
sds: /* size: 184, cachelines: 3, members: 7 */

After:
sgs: /* size: 56, cachelines: 1, members: 10 */
sds: /* size: 152, cachelines: 3, members: 7 */

Further we can avoid clearing all of sds since we do a total
clear/assignment of sg_stats in update_sg_lb_stats() with exception of
busiest_stat.avg_load which is referenced in update_sd_pick_busiest().

Signed-off-by: Peter Zijlstra <[email protected]>
---
kernel/sched/fair.c | 42 +++++++++++++++++++++++++++++++++++-------
1 file changed, 35 insertions(+), 7 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4282,12 +4282,12 @@ static unsigned long task_h_load(struct
struct sg_lb_stats {
unsigned long avg_load; /*Avg load across the CPUs of the group */
unsigned long group_load; /* Total load over the CPUs of the group */
- unsigned long sum_nr_running; /* Nr tasks running in the group */
unsigned long sum_weighted_load; /* Weighted load of group's tasks */
unsigned long load_per_task;
- unsigned long group_capacity;
- unsigned long idle_cpus;
- unsigned long group_weight;
+ unsigned int sum_nr_running; /* Nr tasks running in the group */
+ unsigned int group_capacity;
+ unsigned int idle_cpus;
+ unsigned int group_weight;
int group_imb; /* Is there an imbalance in the group ? */
int group_has_capacity; /* Is there extra capacity in the group? */
};
@@ -4303,10 +4303,38 @@ struct sd_lb_stats {
unsigned long total_pwr; /* Total power of all groups in sd */
unsigned long avg_load; /* Average load across all groups in sd */

- struct sg_lb_stats this_stat; /* Statistics of this group */
struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */
+ struct sg_lb_stats this_stat; /* Statistics of this group */
};

+static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
+{
+ /*
+ * struct sd_lb_stats {
+ * struct sched_group * busiest; // 0 8
+ * struct sched_group * this; // 8 8
+ * long unsigned int total_load; // 16 8
+ * long unsigned int total_pwr; // 24 8
+ * long unsigned int avg_load; // 32 8
+ * struct sg_lb_stats {
+ * long unsigned int avg_load; // 40 8
+ * long unsigned int group_load; // 48 8
+ * ...
+ * } busiest_stat; // 40 56
+ * struct sg_lb_stats this_stat; // 96 56
+ *
+ * // size: 152, cachelines: 3, members: 7
+ * // last cacheline: 24 bytes
+ * };
+ *
+ * Skimp on the clearing to avoid duplicate work. We can avoid clearing
+ * this_stat because update_sg_lb_stats() does a full clear/assignment.
+ * We must however clear busiest_stat::avg_load because
+ * update_sd_pick_busiest() reads this before assignment.
+ */
+ memset(sds, 0, offsetof(struct sd_lb_stats, busiest_stat.group_load));
+}
+
/**
* get_sd_load_idx - Obtain the load index for a given sched domain.
* @sd: The sched_domain whose load_idx is to be obtained.
@@ -4665,7 +4693,7 @@ static inline void update_sd_lb_stats(st
*/
if (prefer_sibling && !local_group &&
sds->this && sds->this_stat.group_has_capacity)
- sgs->group_capacity = min(sgs->group_capacity, 1UL);
+ sgs->group_capacity = min(sgs->group_capacity, 1U);

/* Now, start updating sd_lb_stats */
sds->total_load += sgs->group_load;
@@ -4896,7 +4924,7 @@ static struct sched_group *find_busiest_
struct sg_lb_stats *this, *busiest;
struct sd_lb_stats sds;

- memset(&sds, 0, sizeof(sds));
+ init_sd_lb_stats(&sds);

/*
* Compute the various statistics relavent for load balancing at


2013-08-21 02:08:30

by Joonsoo Kim

[permalink] [raw]
Subject: Re: [PATCH 04/10] sched, fair: Shrink sg_lb_stats and play memset games

On Mon, Aug 19, 2013 at 06:01:02PM +0200, Peter Zijlstra wrote:
> +static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
> +{
> + /*
> + * struct sd_lb_stats {
> + * struct sched_group * busiest; // 0 8
> + * struct sched_group * this; // 8 8
> + * long unsigned int total_load; // 16 8
> + * long unsigned int total_pwr; // 24 8
> + * long unsigned int avg_load; // 32 8
> + * struct sg_lb_stats {
> + * long unsigned int avg_load; // 40 8
> + * long unsigned int group_load; // 48 8
> + * ...
> + * } busiest_stat; // 40 56
> + * struct sg_lb_stats this_stat; // 96 56
> + *
> + * // size: 152, cachelines: 3, members: 7
> + * // last cacheline: 24 bytes
> + * };
> + *
> + * Skimp on the clearing to avoid duplicate work. We can avoid clearing
> + * this_stat because update_sg_lb_stats() does a full clear/assignment.
> + * We must however clear busiest_stat::avg_load because
> + * update_sd_pick_busiest() reads this before assignment.
> + */
> + memset(sds, 0, offsetof(struct sd_lb_stats, busiest_stat.group_load));

Hello, Peter.

IMHO, this is so tricky.
With below change, we can simply do 'offsetof(struct sd_lb_stats, busiest_stat)'.

@@ -4546,7 +4546,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
struct sched_group *sg,
struct sg_lb_stats *sgs)
{
- if (sgs->avg_load <= sds->busiest_stat.avg_load)
+ if (!sds->busiest && sgs->avg_load <= sds->busiest_stat.avg_load)
return false;

if (sgs->sum_nr_running > sgs->group_capacity)

Thanks.

2013-08-21 02:20:04

by Joonsoo Kim

[permalink] [raw]
Subject: Re: [PATCH 04/10] sched, fair: Shrink sg_lb_stats and play memset games

On Wed, Aug 21, 2013 at 11:08:29AM +0900, Joonsoo Kim wrote:
> On Mon, Aug 19, 2013 at 06:01:02PM +0200, Peter Zijlstra wrote:
> > +static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
> > +{
> > + /*
> > + * struct sd_lb_stats {
> > + * struct sched_group * busiest; // 0 8
> > + * struct sched_group * this; // 8 8
> > + * long unsigned int total_load; // 16 8
> > + * long unsigned int total_pwr; // 24 8
> > + * long unsigned int avg_load; // 32 8
> > + * struct sg_lb_stats {
> > + * long unsigned int avg_load; // 40 8
> > + * long unsigned int group_load; // 48 8
> > + * ...
> > + * } busiest_stat; // 40 56
> > + * struct sg_lb_stats this_stat; // 96 56
> > + *
> > + * // size: 152, cachelines: 3, members: 7
> > + * // last cacheline: 24 bytes
> > + * };
> > + *
> > + * Skimp on the clearing to avoid duplicate work. We can avoid clearing
> > + * this_stat because update_sg_lb_stats() does a full clear/assignment.
> > + * We must however clear busiest_stat::avg_load because
> > + * update_sd_pick_busiest() reads this before assignment.
> > + */
> > + memset(sds, 0, offsetof(struct sd_lb_stats, busiest_stat.group_load));
>
> Hello, Peter.
>
> IMHO, this is so tricky.
> With below change, we can simply do 'offsetof(struct sd_lb_stats, busiest_stat)'.
>
> @@ -4546,7 +4546,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
> struct sched_group *sg,
> struct sg_lb_stats *sgs)
> {
> - if (sgs->avg_load <= sds->busiest_stat.avg_load)
> + if (!sds->busiest && sgs->avg_load <= sds->busiest_stat.avg_load)
> return false;
>
> if (sgs->sum_nr_running > sgs->group_capacity)
>

Sorry, instead of !sds->busiest, it should be sds->busiest. :)

Thanks.

2013-08-21 08:36:05

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 04/10] sched, fair: Shrink sg_lb_stats and play memset games

On Wed, Aug 21, 2013 at 11:08:29AM +0900, Joonsoo Kim wrote:
> On Mon, Aug 19, 2013 at 06:01:02PM +0200, Peter Zijlstra wrote:
> > +static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
> > +{
> > + /*
> > + * struct sd_lb_stats {
> > + * struct sched_group * busiest; // 0 8
> > + * struct sched_group * this; // 8 8
> > + * long unsigned int total_load; // 16 8
> > + * long unsigned int total_pwr; // 24 8
> > + * long unsigned int avg_load; // 32 8
> > + * struct sg_lb_stats {
> > + * long unsigned int avg_load; // 40 8
> > + * long unsigned int group_load; // 48 8
> > + * ...
> > + * } busiest_stat; // 40 56
> > + * struct sg_lb_stats this_stat; // 96 56
> > + *
> > + * // size: 152, cachelines: 3, members: 7
> > + * // last cacheline: 24 bytes
> > + * };
> > + *
> > + * Skimp on the clearing to avoid duplicate work. We can avoid clearing
> > + * this_stat because update_sg_lb_stats() does a full clear/assignment.
> > + * We must however clear busiest_stat::avg_load because
> > + * update_sd_pick_busiest() reads this before assignment.
> > + */
> > + memset(sds, 0, offsetof(struct sd_lb_stats, busiest_stat.group_load));
>
> Hello, Peter.
>
> IMHO, this is so tricky.

Agreed, hence the somewhat elaborate comment :/

> With below change, we can simply do 'offsetof(struct sd_lb_stats, busiest_stat)'.
>
> @@ -4546,7 +4546,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
> struct sched_group *sg,
> struct sg_lb_stats *sgs)
> {
> - if (sgs->avg_load <= sds->busiest_stat.avg_load)
> + if (!sds->busiest && sgs->avg_load <= sds->busiest_stat.avg_load)
> return false;
>
> if (sgs->sum_nr_running > sgs->group_capacity)

Indeed, thanks!

2013-08-21 08:38:13

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 04/10] sched, fair: Shrink sg_lb_stats and play memset games

On Wed, Aug 21, 2013 at 11:20:02AM +0900, Joonsoo Kim wrote:
> > With below change, we can simply do 'offsetof(struct sd_lb_stats, busiest_stat)'.
> >
> > @@ -4546,7 +4546,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
> > struct sched_group *sg,
> > struct sg_lb_stats *sgs)
> > {
> > - if (sgs->avg_load <= sds->busiest_stat.avg_load)
> > + if (!sds->busiest && sgs->avg_load <= sds->busiest_stat.avg_load)
> > return false;
> >
> > if (sgs->sum_nr_running > sgs->group_capacity)
> >
>
> Sorry, instead of !sds->busiest, it should be sds->busiest. :)

Of course ;-) However I'm not sure which I prefer.. adding this extra
condition or having the initialization extra tricky. I'll sit on it a
little longer.

2013-08-24 10:16:29

by Paul Turner

[permalink] [raw]
Subject: Re: [PATCH 04/10] sched, fair: Shrink sg_lb_stats and play memset games

On Mon, Aug 19, 2013 at 9:01 AM, Peter Zijlstra <[email protected]> wrote:
> We can shrink sg_lb_stats because rq::nr_running is an 'unsigned int'
> and cpu numbers are 'int'
>
> Before:
> sgs: /* size: 72, cachelines: 2, members: 10 */
> sds: /* size: 184, cachelines: 3, members: 7 */
>
> After:
> sgs: /* size: 56, cachelines: 1, members: 10 */
> sds: /* size: 152, cachelines: 3, members: 7 */
>
> Further we can avoid clearing all of sds since we do a total
> clear/assignment of sg_stats in update_sg_lb_stats() with exception of
> busiest_stat.avg_load which is referenced in update_sd_pick_busiest().
>
> Signed-off-by: Peter Zijlstra <[email protected]>
> ---
> kernel/sched/fair.c | 42 +++++++++++++++++++++++++++++++++++-------
> 1 file changed, 35 insertions(+), 7 deletions(-)
>
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4282,12 +4282,12 @@ static unsigned long task_h_load(struct
> struct sg_lb_stats {
> unsigned long avg_load; /*Avg load across the CPUs of the group */
> unsigned long group_load; /* Total load over the CPUs of the group */
> - unsigned long sum_nr_running; /* Nr tasks running in the group */
> unsigned long sum_weighted_load; /* Weighted load of group's tasks */
> unsigned long load_per_task;
> - unsigned long group_capacity;
> - unsigned long idle_cpus;
> - unsigned long group_weight;
> + unsigned int sum_nr_running; /* Nr tasks running in the group */
> + unsigned int group_capacity;
> + unsigned int idle_cpus;
> + unsigned int group_weight;
> int group_imb; /* Is there an imbalance in the group ? */
> int group_has_capacity; /* Is there extra capacity in the group? */
> };
> @@ -4303,10 +4303,38 @@ struct sd_lb_stats {
> unsigned long total_pwr; /* Total power of all groups in sd */
> unsigned long avg_load; /* Average load across all groups in sd */
>
> - struct sg_lb_stats this_stat; /* Statistics of this group */
> struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */
> + struct sg_lb_stats this_stat; /* Statistics of this group */
> };
>
> +static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
> +{
> + /*
> + * struct sd_lb_stats {
> + * struct sched_group * busiest; // 0 8
> + * struct sched_group * this; // 8 8
> + * long unsigned int total_load; // 16 8
> + * long unsigned int total_pwr; // 24 8
> + * long unsigned int avg_load; // 32 8
> + * struct sg_lb_stats {
> + * long unsigned int avg_load; // 40 8
> + * long unsigned int group_load; // 48 8
> + * ...
> + * } busiest_stat; // 40 56
> + * struct sg_lb_stats this_stat; // 96 56
> + *
> + * // size: 152, cachelines: 3, members: 7
> + * // last cacheline: 24 bytes
> + * };
> + *
> + * Skimp on the clearing to avoid duplicate work. We can avoid clearing
> + * this_stat because update_sg_lb_stats() does a full clear/assignment.
> + * We must however clear busiest_stat::avg_load because
> + * update_sd_pick_busiest() reads this before assignment.
> + */

Does gcc seriously not get this right if we just write:
sds->busiest = NULL;
sds->local = NULL;
....

etc.?

> + memset(sds, 0, offsetof(struct sd_lb_stats, busiest_stat.group_load));
> +}
> +
> /**
> * get_sd_load_idx - Obtain the load index for a given sched domain.
> * @sd: The sched_domain whose load_idx is to be obtained.
> @@ -4665,7 +4693,7 @@ static inline void update_sd_lb_stats(st
> */
> if (prefer_sibling && !local_group &&
> sds->this && sds->this_stat.group_has_capacity)
> - sgs->group_capacity = min(sgs->group_capacity, 1UL);
> + sgs->group_capacity = min(sgs->group_capacity, 1U);
>
> /* Now, start updating sd_lb_stats */
> sds->total_load += sgs->group_load;
> @@ -4896,7 +4924,7 @@ static struct sched_group *find_busiest_
> struct sg_lb_stats *this, *busiest;
> struct sd_lb_stats sds;
>
> - memset(&sds, 0, sizeof(sds));
> + init_sd_lb_stats(&sds);
>
> /*
> * Compute the various statistics relavent for load balancing at
>
>

2013-08-26 11:49:13

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 04/10] sched, fair: Shrink sg_lb_stats and play memset games

On Sat, Aug 24, 2013 at 03:15:57AM -0700, Paul Turner wrote:
> > +static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
> > +{
> > + /*
> > + * struct sd_lb_stats {
> > + * struct sched_group * busiest; // 0 8
> > + * struct sched_group * this; // 8 8
> > + * long unsigned int total_load; // 16 8
> > + * long unsigned int total_pwr; // 24 8
> > + * long unsigned int avg_load; // 32 8
> > + * struct sg_lb_stats {
> > + * long unsigned int avg_load; // 40 8
> > + * long unsigned int group_load; // 48 8
> > + * ...
> > + * } busiest_stat; // 40 56
> > + * struct sg_lb_stats this_stat; // 96 56
> > + *
> > + * // size: 152, cachelines: 3, members: 7
> > + * // last cacheline: 24 bytes
> > + * };
> > + *
> > + * Skimp on the clearing to avoid duplicate work. We can avoid clearing
> > + * this_stat because update_sg_lb_stats() does a full clear/assignment.
> > + * We must however clear busiest_stat::avg_load because
> > + * update_sd_pick_busiest() reads this before assignment.
> > + */
>
> Does gcc seriously not get this right if we just write:
> sds->busiest = NULL;
> sds->local = NULL;
> ....
>
> etc.?

There's a thought ;-) How about something like this?

sds = (struct sd_lb_stats){
.busiest = NULL,
.local = NULL,

.busiest_stat = {
.avg_load = 0,
},
};

C99 named initializers should ignore all unmentioned fields, and I think
we don't actually touch any of the other fields unless something is set.