2020-01-11 09:54:55

by Crystal Wood

[permalink] [raw]
Subject: [PATCH v2 0/2] Track loadavg changes during full nohz

v2: drop the first couple patches that appear unnecessary

Peter Zijlstra (Intel) (1):
timers/nohz: Update nohz load in remote tick

Scott Wood (1):
sched/core: Don't skip remote tick for idle cpus

include/linux/sched/nohz.h | 2 ++
kernel/sched/core.c | 22 +++++++++++++---------
kernel/sched/loadavg.c | 33 +++++++++++++++++++++++----------
3 files changed, 38 insertions(+), 19 deletions(-)

--
1.8.3.1


2020-01-11 09:55:11

by Crystal Wood

[permalink] [raw]
Subject: [PATCH v2 1/2] sched/core: Don't skip remote tick for idle cpus

This will be used in the next patch to get a loadavg update from
nohz cpus. The delta check is skipped because idle_sched_class
doesn't update se.exec_start.

Signed-off-by: Scott Wood <[email protected]>
---
kernel/sched/core.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 90e4b00ace89..dfb8ea801700 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3668,22 +3668,24 @@ static void sched_tick_remote(struct work_struct *work)
* statistics and checks timeslices in a time-independent way, regardless
* of when exactly it is running.
*/
- if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
+ if (!tick_nohz_tick_stopped_cpu(cpu))
goto out_requeue;

rq_lock_irq(rq, &rf);
curr = rq->curr;
- if (is_idle_task(curr) || cpu_is_offline(cpu))
+ if (cpu_is_offline(cpu))
goto out_unlock;

update_rq_clock(rq);
- delta = rq_clock_task(rq) - curr->se.exec_start;

- /*
- * Make sure the next tick runs within a reasonable
- * amount of time.
- */
- WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+ if (!is_idle_task(curr)) {
+ /*
+ * Make sure the next tick runs within a reasonable
+ * amount of time.
+ */
+ delta = rq_clock_task(rq) - curr->se.exec_start;
+ WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+ }
curr->sched_class->task_tick(rq, curr, 0);

out_unlock:
--
1.8.3.1

2020-01-11 09:55:47

by Crystal Wood

[permalink] [raw]
Subject: [PATCH v2 2/2] timers/nohz: Update nohz load in remote tick

From: "Peter Zijlstra (Intel)" <[email protected]>

The way loadavg is tracked during nohz only pays attention to the load
upon entering nohz. This can be particularly noticeable if full nohz is
entered while non-idle, and then the cpu goes idle and stays that way for
a long time.

Use the remote tick to ensure that full nohz cpus report their deltas
within a reasonable time.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
[swood: added changelog and removed recheck of stopped tick]
Signed-off-by: Scott Wood <[email protected]>
---
include/linux/sched/nohz.h | 2 ++
kernel/sched/core.c | 4 +++-
kernel/sched/loadavg.c | 33 +++++++++++++++++++++++----------
3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 1abe91ff6e4a..6d67e9a5af6b 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -15,9 +15,11 @@ static inline void nohz_balance_enter_idle(int cpu) { }

#ifdef CONFIG_NO_HZ_COMMON
void calc_load_nohz_start(void);
+void calc_load_nohz_remote(struct rq *rq);
void calc_load_nohz_stop(void);
#else
static inline void calc_load_nohz_start(void) { }
+static inline void calc_load_nohz_remote(struct rq *rq) { }
static inline void calc_load_nohz_stop(void) { }
#endif /* CONFIG_NO_HZ_COMMON */

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index dfb8ea801700..2e4a505e48af 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3676,6 +3676,7 @@ static void sched_tick_remote(struct work_struct *work)
if (cpu_is_offline(cpu))
goto out_unlock;

+ curr = rq->curr;
update_rq_clock(rq);

if (!is_idle_task(curr)) {
@@ -3688,10 +3689,11 @@ static void sched_tick_remote(struct work_struct *work)
}
curr->sched_class->task_tick(rq, curr, 0);

+ calc_load_nohz_remote(rq);
out_unlock:
rq_unlock_irq(rq, &rf);
-
out_requeue:
+
/*
* Run the remote tick once per second (1Hz). This arbitrary
* frequency is large enough to avoid overload but short enough
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 28a516575c18..de22da666ac7 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -231,16 +231,11 @@ static inline int calc_load_read_idx(void)
return calc_load_idx & 1;
}

-void calc_load_nohz_start(void)
+static void calc_load_nohz_fold(struct rq *rq)
{
- struct rq *this_rq = this_rq();
long delta;

- /*
- * We're going into NO_HZ mode, if there's any pending delta, fold it
- * into the pending NO_HZ delta.
- */
- delta = calc_load_fold_active(this_rq, 0);
+ delta = calc_load_fold_active(rq, 0);
if (delta) {
int idx = calc_load_write_idx();

@@ -248,6 +243,24 @@ void calc_load_nohz_start(void)
}
}

+void calc_load_nohz_start(void)
+{
+ /*
+ * We're going into NO_HZ mode, if there's any pending delta, fold it
+ * into the pending NO_HZ delta.
+ */
+ calc_load_nohz_fold(this_rq());
+}
+
+/*
+ * Keep track of the load for NOHZ_FULL, must be called between
+ * calc_load_nohz_{start,stop}().
+ */
+void calc_load_nohz_remote(struct rq *rq)
+{
+ calc_load_nohz_fold(rq);
+}
+
void calc_load_nohz_stop(void)
{
struct rq *this_rq = this_rq();
@@ -268,7 +281,7 @@ void calc_load_nohz_stop(void)
this_rq->calc_load_update += LOAD_FREQ;
}

-static long calc_load_nohz_fold(void)
+static long calc_load_nohz_read(void)
{
int idx = calc_load_read_idx();
long delta = 0;
@@ -323,7 +336,7 @@ static void calc_global_nohz(void)
}
#else /* !CONFIG_NO_HZ_COMMON */

-static inline long calc_load_nohz_fold(void) { return 0; }
+static inline long calc_load_nohz_read(void) { return 0; }
static inline void calc_global_nohz(void) { }

#endif /* CONFIG_NO_HZ_COMMON */
@@ -346,7 +359,7 @@ void calc_global_load(unsigned long ticks)
/*
* Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs.
*/
- delta = calc_load_nohz_fold();
+ delta = calc_load_nohz_read();
if (delta)
atomic_long_add(delta, &calc_load_tasks);

--
1.8.3.1

2020-01-20 12:29:35

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 0/2] Track loadavg changes during full nohz

On Sat, Jan 11, 2020 at 04:53:37AM -0500, Scott Wood wrote:
> v2: drop the first couple patches that appear unnecessary
>
> Peter Zijlstra (Intel) (1):
> timers/nohz: Update nohz load in remote tick
>
> Scott Wood (1):
> sched/core: Don't skip remote tick for idle cpus
>
> include/linux/sched/nohz.h | 2 ++
> kernel/sched/core.c | 22 +++++++++++++---------
> kernel/sched/loadavg.c | 33 +++++++++++++++++++++++----------
> 3 files changed, 38 insertions(+), 19 deletions(-)

Thanks, I've queued them.

2020-01-29 11:34:36

by tip-bot2 for Jacob Pan

[permalink] [raw]
Subject: [tip: sched/core] sched/core: Don't skip remote tick for idle CPUs

The following commit has been merged into the sched/core branch of tip:

Commit-ID: 488603b815a7514c7009e6fc339d74ed4a30f343
Gitweb: https://git.kernel.org/tip/488603b815a7514c7009e6fc339d74ed4a30f343
Author: Scott Wood <[email protected]>
AuthorDate: Sat, 11 Jan 2020 04:53:38 -05:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Tue, 28 Jan 2020 21:36:16 +01:00

sched/core: Don't skip remote tick for idle CPUs

This will be used in the next patch to get a loadavg update from
nohz cpus. The delta check is skipped because idle_sched_class
doesn't update se.exec_start.

Signed-off-by: Scott Wood <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
kernel/sched/core.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fc1dfc0..cf8b33d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3669,22 +3669,24 @@ static void sched_tick_remote(struct work_struct *work)
* statistics and checks timeslices in a time-independent way, regardless
* of when exactly it is running.
*/
- if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
+ if (!tick_nohz_tick_stopped_cpu(cpu))
goto out_requeue;

rq_lock_irq(rq, &rf);
curr = rq->curr;
- if (is_idle_task(curr) || cpu_is_offline(cpu))
+ if (cpu_is_offline(cpu))
goto out_unlock;

update_rq_clock(rq);
- delta = rq_clock_task(rq) - curr->se.exec_start;

- /*
- * Make sure the next tick runs within a reasonable
- * amount of time.
- */
- WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+ if (!is_idle_task(curr)) {
+ /*
+ * Make sure the next tick runs within a reasonable
+ * amount of time.
+ */
+ delta = rq_clock_task(rq) - curr->se.exec_start;
+ WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+ }
curr->sched_class->task_tick(rq, curr, 0);

out_unlock:

2020-01-29 11:36:16

by tip-bot2 for Jacob Pan

[permalink] [raw]
Subject: [tip: sched/core] timers/nohz: Update NOHZ load in remote tick

The following commit has been merged into the sched/core branch of tip:

Commit-ID: ebc0f83c78a2d26384401ecf2d2fa48063c0ee27
Gitweb: https://git.kernel.org/tip/ebc0f83c78a2d26384401ecf2d2fa48063c0ee27
Author: Peter Zijlstra (Intel) <[email protected]>
AuthorDate: Sat, 11 Jan 2020 04:53:39 -05:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Tue, 28 Jan 2020 21:36:44 +01:00

timers/nohz: Update NOHZ load in remote tick

The way loadavg is tracked during nohz only pays attention to the load
upon entering nohz. This can be particularly noticeable if full nohz is
entered while non-idle, and then the cpu goes idle and stays that way for
a long time.

Use the remote tick to ensure that full nohz cpus report their deltas
within a reasonable time.

[ swood: Added changelog and removed recheck of stopped tick. ]

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Scott Wood <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
include/linux/sched/nohz.h | 2 ++
kernel/sched/core.c | 4 +++-
kernel/sched/loadavg.c | 33 +++++++++++++++++++++++----------
3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 1abe91f..6d67e9a 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -15,9 +15,11 @@ static inline void nohz_balance_enter_idle(int cpu) { }

#ifdef CONFIG_NO_HZ_COMMON
void calc_load_nohz_start(void);
+void calc_load_nohz_remote(struct rq *rq);
void calc_load_nohz_stop(void);
#else
static inline void calc_load_nohz_start(void) { }
+static inline void calc_load_nohz_remote(struct rq *rq) { }
static inline void calc_load_nohz_stop(void) { }
#endif /* CONFIG_NO_HZ_COMMON */

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cf8b33d..4ff03c2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3677,6 +3677,7 @@ static void sched_tick_remote(struct work_struct *work)
if (cpu_is_offline(cpu))
goto out_unlock;

+ curr = rq->curr;
update_rq_clock(rq);

if (!is_idle_task(curr)) {
@@ -3689,10 +3690,11 @@ static void sched_tick_remote(struct work_struct *work)
}
curr->sched_class->task_tick(rq, curr, 0);

+ calc_load_nohz_remote(rq);
out_unlock:
rq_unlock_irq(rq, &rf);
-
out_requeue:
+
/*
* Run the remote tick once per second (1Hz). This arbitrary
* frequency is large enough to avoid overload but short enough
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 28a5165..de22da6 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -231,16 +231,11 @@ static inline int calc_load_read_idx(void)
return calc_load_idx & 1;
}

-void calc_load_nohz_start(void)
+static void calc_load_nohz_fold(struct rq *rq)
{
- struct rq *this_rq = this_rq();
long delta;

- /*
- * We're going into NO_HZ mode, if there's any pending delta, fold it
- * into the pending NO_HZ delta.
- */
- delta = calc_load_fold_active(this_rq, 0);
+ delta = calc_load_fold_active(rq, 0);
if (delta) {
int idx = calc_load_write_idx();

@@ -248,6 +243,24 @@ void calc_load_nohz_start(void)
}
}

+void calc_load_nohz_start(void)
+{
+ /*
+ * We're going into NO_HZ mode, if there's any pending delta, fold it
+ * into the pending NO_HZ delta.
+ */
+ calc_load_nohz_fold(this_rq());
+}
+
+/*
+ * Keep track of the load for NOHZ_FULL, must be called between
+ * calc_load_nohz_{start,stop}().
+ */
+void calc_load_nohz_remote(struct rq *rq)
+{
+ calc_load_nohz_fold(rq);
+}
+
void calc_load_nohz_stop(void)
{
struct rq *this_rq = this_rq();
@@ -268,7 +281,7 @@ void calc_load_nohz_stop(void)
this_rq->calc_load_update += LOAD_FREQ;
}

-static long calc_load_nohz_fold(void)
+static long calc_load_nohz_read(void)
{
int idx = calc_load_read_idx();
long delta = 0;
@@ -323,7 +336,7 @@ static void calc_global_nohz(void)
}
#else /* !CONFIG_NO_HZ_COMMON */

-static inline long calc_load_nohz_fold(void) { return 0; }
+static inline long calc_load_nohz_read(void) { return 0; }
static inline void calc_global_nohz(void) { }

#endif /* CONFIG_NO_HZ_COMMON */
@@ -346,7 +359,7 @@ void calc_global_load(unsigned long ticks)
/*
* Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs.
*/
- delta = calc_load_nohz_fold();
+ delta = calc_load_nohz_read();
if (delta)
atomic_long_add(delta, &calc_load_tasks);