2021-05-12 23:44:55

by Frederic Weisbecker

[permalink] [raw]
Subject: [GIT PULL] tick/nohz updates v3

Ingo, Thomas,

Please pull the timers/nohz-v3 branch that can be found at:

git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks.git
timers/nohz-v3

Changes since v2:

* Add Acks from Peter Zijlstra

* Only bother to fetch task's CPU if the task is queued in
"tick/nohz: Kick only _queued_ task whose tick dependency is updated"
(reported by Peter Zijlstra)

* Correctly indent comment in
"tick/nohz: Kick only _queued_ task whose tick dependency is updated"
(reported by Peter Zijlstra)

* Add Peter's SoB in "tick/nohz: Evaluate the CPU expression after the static key"

* Add "tick/nohz: Call tick_nohz_task_switch() with interrupts disabled"
(from Peter Zijlstra)

* Add "MAINTAINERS: Add myself as context tracking maintainer"

---
Summary:

* Further reduce ticks and IPIs in full dynticks mode.

* Optimize static key based test for a CPU's nohz_full mode
and also tick nohz probe on context switch.

* A few cleanups, Kconfig documentation and maintainership clarification.

HEAD: 380b68819eed62264ad8e54467481ca7003248c3

Thanks,
Frederic
---

Frederic Weisbecker (4):
tick/nohz: Remove superflous check for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
tick/nohz: Update nohz_full Kconfig help
tick/nohz: Only wakeup a single target cpu when kicking a task
MAINTAINERS: Add myself as context tracking maintainer

Marcelo Tosatti (2):
tick/nohz: Change signal tick dependency to wakeup CPUs of member tasks
tick/nohz: Kick only _queued_ task whose tick dependency is updated

Peter Zijlstra (2):
tick/nohz: Evaluate the CPU expression after the static key
tick/nohz: Call tick_nohz_task_switch() with interrupts disabled

Yunfeng Ye (2):
tick/nohz: Conditionally restart tick on idle exit
tick/nohz: Update idle_exittime on actual idle exit


MAINTAINERS | 6 ++
include/linux/sched.h | 2 +
include/linux/tick.h | 26 +++++----
kernel/sched/core.c | 7 ++-
kernel/time/Kconfig | 11 ++--
kernel/time/posix-cpu-timers.c | 4 +-
kernel/time/tick-sched.c | 129 ++++++++++++++++++++++++++++-------------
7 files changed, 125 insertions(+), 60 deletions(-)


2021-05-12 23:44:55

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 07/10] tick/nohz: Change signal tick dependency to wakeup CPUs of member tasks

From: Marcelo Tosatti <[email protected]>

Rather than waking up all nohz_full CPUs on the system, only wakeup
the target CPUs of member threads of the signal.

Reduces interruptions to nohz_full CPUs.

Acked-by: Peter Zijlstra <[email protected]>
Signed-off-by: Marcelo Tosatti <[email protected]>
Cc: Yunfeng Ye <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/tick.h | 8 ++++----
kernel/time/posix-cpu-timers.c | 4 ++--
kernel/time/tick-sched.c | 15 +++++++++++++--
3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 2258984a0e8a..0bb80a7f05b9 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -211,7 +211,7 @@ extern void tick_nohz_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit);
-extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
+extern void tick_nohz_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
@@ -256,11 +256,11 @@ static inline void tick_dep_clear_task(struct task_struct *tsk,
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
-static inline void tick_dep_set_signal(struct signal_struct *signal,
+static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
- tick_nohz_dep_set_signal(signal, bit);
+ tick_nohz_dep_set_signal(tsk, bit);
}
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
@@ -288,7 +288,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
-static inline void tick_dep_set_signal(struct signal_struct *signal,
+static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 3bb96a8b49c9..29a5e54e6e10 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -523,7 +523,7 @@ static void arm_timer(struct k_itimer *timer, struct task_struct *p)
if (CPUCLOCK_PERTHREAD(timer->it_clock))
tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
else
- tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
+ tick_dep_set_signal(p, TICK_DEP_BIT_POSIX_TIMER);
}

/*
@@ -1358,7 +1358,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
if (*newval < *nextevt)
*nextevt = *newval;

- tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
+ tick_dep_set_signal(tsk, TICK_DEP_BIT_POSIX_TIMER);
}

static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1991adf5a922..800719ea4045 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -444,9 +444,20 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
* per process timers.
*/
-void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+void tick_nohz_dep_set_signal(struct task_struct *tsk,
+ enum tick_dep_bits bit)
{
- tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
+ int prev;
+ struct signal_struct *sig = tsk->signal;
+
+ prev = atomic_fetch_or(BIT(bit), &sig->tick_dep_mask);
+ if (!prev) {
+ struct task_struct *t;
+
+ lockdep_assert_held(&tsk->sighand->siglock);
+ __for_each_thread(sig, t)
+ tick_nohz_kick_task(t);
+ }
}

void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
--
2.25.1

2021-05-12 23:44:55

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 06/10] tick/nohz: Only wakeup a single target cpu when kicking a task

When adding a tick dependency to a task, its necessary to
wakeup the CPU where the task resides to reevaluate tick
dependencies on that CPU.

However the current code wakes up all nohz_full CPUs, which
is unnecessary.

Switch to waking up a single CPU, by using ordering of writes
to task->cpu and task->tick_dep_mask.

Suggested-by: Peter Zijlstra <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
Cc: Yunfeng Ye <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Signed-off-by: Marcelo Tosatti <[email protected]>
---
kernel/time/tick-sched.c | 40 +++++++++++++++++++++++++++-------------
1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 89ec0abcd62b..1991adf5a922 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -322,6 +322,31 @@ void tick_nohz_full_kick_cpu(int cpu)
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
}

+static void tick_nohz_kick_task(struct task_struct *tsk)
+{
+ int cpu = task_cpu(tsk);
+
+ /*
+ * If the task concurrently migrates to another cpu,
+ * we guarantee it sees the new tick dependency upon
+ * schedule.
+ *
+ *
+ * set_task_cpu(p, cpu);
+ * STORE p->cpu = @cpu
+ * __schedule() (switch to task 'p')
+ * LOCK rq->lock
+ * smp_mb__after_spin_lock() STORE p->tick_dep_mask
+ * tick_nohz_task_switch() smp_mb() (atomic_fetch_or())
+ * LOAD p->tick_dep_mask LOAD p->cpu
+ */
+
+ preempt_disable();
+ if (cpu_online(cpu))
+ tick_nohz_full_kick_cpu(cpu);
+ preempt_enable();
+}
+
/*
* Kick all full dynticks CPUs in order to force these to re-evaluate
* their dependency on the tick and restart it if necessary.
@@ -404,19 +429,8 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
*/
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
- if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) {
- if (tsk == current) {
- preempt_disable();
- tick_nohz_full_kick();
- preempt_enable();
- } else {
- /*
- * Some future tick_nohz_full_kick_task()
- * should optimize this.
- */
- tick_nohz_full_kick_all();
- }
- }
+ if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask))
+ tick_nohz_kick_task(tsk);
}
EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);

--
2.25.1

2021-05-12 23:44:57

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 01/10] tick/nohz: Evaluate the CPU expression after the static key

From: Peter Zijlstra <[email protected]>

When tick_nohz_full_cpu() is called with smp_processor_id(), the latter
is unconditionally evaluated whether the static key is on or off. It is
not necessary in the off-case though, so make sure the cpu expression
is executed at the last moment.

Illustrate with the following test function:

int tick_nohz_test(void)
{
return tick_nohz_full_cpu(smp_processor_id());
}

The resulting code before was:

mov %gs:0x7eea92d1(%rip),%eax # smp_processor_id() fetch
nopl 0x0(%rax,%rax,1)
xor %eax,%eax
retq
cmpb $0x0,0x29d393a(%rip) # <tick_nohz_full_running>
je tick_nohz_test+0x29 # jump to below eax clear
mov %eax,%eax
bt %rax,0x29d3936(%rip) # <tick_nohz_full_mask>
setb %al
movzbl %al,%eax
retq
xor %eax,%eax
retq

Now it becomes:

nopl 0x0(%rax,%rax,1)
xor %eax,%eax
retq
cmpb $0x0,0x29d3871(%rip) # <tick_nohz_full_running>
je tick_nohz_test+0x29 # jump to below eax clear
mov %gs:0x7eea91f0(%rip),%eax # smp_processor_id() fetch, after static key
mov %eax,%eax
bt %rax,0x29d3866(%rip) # <tick_nohz_full_mask>
setb %al
movzbl %al,%eax
retq
xor %eax,%eax
retq

Signed-off-by: Peter Zijlstra <[email protected]>
Cc: Yunfeng Ye <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Marcelo Tosatti <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/tick.h | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7340613c7eff..2258984a0e8a 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -185,13 +185,17 @@ static inline bool tick_nohz_full_enabled(void)
return tick_nohz_full_running;
}

-static inline bool tick_nohz_full_cpu(int cpu)
-{
- if (!tick_nohz_full_enabled())
- return false;
-
- return cpumask_test_cpu(cpu, tick_nohz_full_mask);
-}
+/*
+ * Check if a CPU is part of the nohz_full subset. Arrange for evaluating
+ * the cpu expression (typically smp_processor_id()) _after_ the static
+ * key.
+ */
+#define tick_nohz_full_cpu(_cpu) ({ \
+ bool __ret = false; \
+ if (tick_nohz_full_enabled()) \
+ __ret = cpumask_test_cpu((_cpu), tick_nohz_full_mask); \
+ __ret; \
+})

static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
{
--
2.25.1

2021-05-12 23:44:57

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 02/10] tick/nohz: Conditionally restart tick on idle exit

From: Yunfeng Ye <[email protected]>

In nohz_full mode, switching from idle to a task will unconditionally
issue a tick restart. If the task is alone in the runqueue or is the
highest priority, the tick will fire once then eventually stop. But that
alone is still undesired noise.

Therefore, only restart the tick on idle exit when it's strictly
necessary.

Acked-by: Peter Zijlstra <[email protected]>
Signed-off-by: Yunfeng Ye <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Marcelo Tosatti <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/tick-sched.c | 46 +++++++++++++++++++++++++---------------
1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 828b091501ca..05c1ce1034d6 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -926,24 +926,30 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
tick_nohz_restart(ts, now);
}

-static void tick_nohz_full_update_tick(struct tick_sched *ts)
+static void __tick_nohz_full_update_tick(struct tick_sched *ts,
+ ktime_t now)
{
#ifdef CONFIG_NO_HZ_FULL
int cpu = smp_processor_id();

- if (!tick_nohz_full_cpu(cpu))
- return;
-
- if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
- return;
-
if (can_stop_full_tick(cpu, ts))
tick_nohz_stop_sched_tick(ts, cpu);
else if (ts->tick_stopped)
- tick_nohz_restart_sched_tick(ts, ktime_get());
+ tick_nohz_restart_sched_tick(ts, now);
#endif
}

+static void tick_nohz_full_update_tick(struct tick_sched *ts)
+{
+ if (!tick_nohz_full_cpu(smp_processor_id()))
+ return;
+
+ if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
+ return;
+
+ __tick_nohz_full_update_tick(ts, ktime_get());
+}
+
static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
{
/*
@@ -1209,18 +1215,24 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
#endif
}

-static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
-{
- tick_nohz_restart_sched_tick(ts, now);
- tick_nohz_account_idle_ticks(ts);
-}
-
void tick_nohz_idle_restart_tick(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

- if (ts->tick_stopped)
- __tick_nohz_idle_restart_tick(ts, ktime_get());
+ if (ts->tick_stopped) {
+ tick_nohz_restart_sched_tick(ts, ktime_get());
+ tick_nohz_account_idle_ticks(ts);
+ }
+}
+
+static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
+{
+ if (tick_nohz_full_cpu(smp_processor_id()))
+ __tick_nohz_full_update_tick(ts, now);
+ else
+ tick_nohz_restart_sched_tick(ts, now);
+
+ tick_nohz_account_idle_ticks(ts);
}

/**
@@ -1252,7 +1264,7 @@ void tick_nohz_idle_exit(void)
tick_nohz_stop_idle(ts, now);

if (tick_stopped)
- __tick_nohz_idle_restart_tick(ts, now);
+ tick_nohz_idle_update_tick(ts, now);

local_irq_enable();
}
--
2.25.1

2021-05-12 23:44:59

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 09/10] tick/nohz: Call tick_nohz_task_switch() with interrupts disabled

From: Peter Zijlstra <[email protected]>

Call tick_nohz_task_switch() slightly earlier after the context switch
to benefit from disabled IRQs. This way the function doesn't need to
disable them once more.

Signed-off-by: Peter Zijlstra <[email protected]>
Cc: Marcelo Tosatti <[email protected]>
Cc: Yunfeng Ye <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/sched/core.c | 2 +-
kernel/time/tick-sched.c | 7 +------
2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 78e480f7881a..8f86ac28877e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4212,6 +4212,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
vtime_task_switch(prev);
perf_event_task_sched_in(prev, current);
finish_task(prev);
+ tick_nohz_task_switch();
finish_lock_switch(rq);
finish_arch_post_lock_switch();
kcov_finish_switch(current);
@@ -4257,7 +4258,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
put_task_struct_rcu_user(prev);
}

- tick_nohz_task_switch();
return rq;
}

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d370a04deaa8..b748a71d8040 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -487,13 +487,10 @@ void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bi
*/
void __tick_nohz_task_switch(void)
{
- unsigned long flags;
struct tick_sched *ts;

- local_irq_save(flags);
-
if (!tick_nohz_full_cpu(smp_processor_id()))
- goto out;
+ return;

ts = this_cpu_ptr(&tick_cpu_sched);

@@ -502,8 +499,6 @@ void __tick_nohz_task_switch(void)
atomic_read(&current->signal->tick_dep_mask))
tick_nohz_full_kick();
}
-out:
- local_irq_restore(flags);
}

/* Get the boot-time nohz CPU list from the kernel parameters. */
--
2.25.1

2021-05-12 23:45:36

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 03/10] tick/nohz: Remove superflous check for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE

The vtime_accounting_enabled_this_cpu() early check already makes what
follows as dead code in the case of CONFIG_VIRT_CPU_ACCOUNTING_NATIVE.
No need to keep the ifdeferry around.

Acked-by: Peter Zijlstra <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
Cc: Yunfeng Ye <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Marcelo Tosatti <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
---
kernel/time/tick-sched.c | 2 --
1 file changed, 2 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 05c1ce1034d6..1afa7595d1e0 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1196,7 +1196,6 @@ unsigned long tick_nohz_get_idle_calls(void)

static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
{
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
unsigned long ticks;

if (vtime_accounting_enabled_this_cpu())
@@ -1212,7 +1211,6 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
*/
if (ticks && ticks < LONG_MAX)
account_idle_ticks(ticks);
-#endif
}

void tick_nohz_idle_restart_tick(void)
--
2.25.1

2021-05-12 23:48:29

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 04/10] tick/nohz: Update idle_exittime on actual idle exit

From: Yunfeng Ye <[email protected]>

The idle_exittime field of tick_sched is used to record the time when
the idle state was left. but currently the idle_exittime is updated in
the function tick_nohz_restart_sched_tick(), which is not always in idle
state when nohz_full is configured:

tick_irq_exit
tick_nohz_irq_exit
tick_nohz_full_update_tick
tick_nohz_restart_sched_tick
ts->idle_exittime = now;

It's thus overwritten by mistake on nohz_full tick restart. Move the
update to the appropriate idle exit path instead.

Acked-by: Peter Zijlstra <[email protected]>
Signed-off-by: Yunfeng Ye <[email protected]>
Cc: Yunfeng Ye <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Marcelo Tosatti <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/tick-sched.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1afa7595d1e0..89ec0abcd62b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -921,8 +921,6 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
* Cancel the scheduled timer and restore the tick
*/
ts->tick_stopped = 0;
- ts->idle_exittime = now;
-
tick_nohz_restart(ts, now);
}

@@ -1194,10 +1192,13 @@ unsigned long tick_nohz_get_idle_calls(void)
return ts->idle_calls;
}

-static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
+static void tick_nohz_account_idle_time(struct tick_sched *ts,
+ ktime_t now)
{
unsigned long ticks;

+ ts->idle_exittime = now;
+
if (vtime_accounting_enabled_this_cpu())
return;
/*
@@ -1218,8 +1219,9 @@ void tick_nohz_idle_restart_tick(void)
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

if (ts->tick_stopped) {
- tick_nohz_restart_sched_tick(ts, ktime_get());
- tick_nohz_account_idle_ticks(ts);
+ ktime_t now = ktime_get();
+ tick_nohz_restart_sched_tick(ts, now);
+ tick_nohz_account_idle_time(ts, now);
}
}

@@ -1230,7 +1232,7 @@ static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
else
tick_nohz_restart_sched_tick(ts, now);

- tick_nohz_account_idle_ticks(ts);
+ tick_nohz_account_idle_time(ts, now);
}

/**
--
2.25.1

2021-05-12 23:52:35

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 08/10] tick/nohz: Kick only _queued_ task whose tick dependency is updated

From: Marcelo Tosatti <[email protected]>

When the tick dependency of a task is updated, we want it to aknowledge
the new state and restart the tick if needed. If the task is not
running, we don't need to kick it because it will observe the new
dependency upon scheduling in. But if the task is running, we may need
to send an IPI to it so that it gets notified.

Unfortunately we don't have the means to check if a task is running
in a race free way. Checking p->on_cpu in a synchronized way against
p->tick_dep_mask would imply adding a full barrier between
prepare_task_switch() and tick_nohz_task_switch(), which we want to
avoid in this fast-path.

Therefore we blindly fire an IPI to the task's CPU.

Meanwhile we can check if the task is queued on the CPU rq because
p->on_rq is always set to TASK_ON_RQ_QUEUED _before_ schedule() and its
full barrier that precedes tick_nohz_task_switch(). And if the task is
queued on a nohz_full CPU, it also has fair chances to be running as the
isolation constraints prescribe running single tasks on full dynticks
CPUs.

So use this as a trick to check if we can spare an IPI toward a
non-running task.

NOTE: For the ordering to be correct, it is assumed that we never
deactivate a task while it is running, the only exception being the task
deactivating itself while scheduling out.

Suggested-by: Peter Zijlstra <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Signed-off-by: Marcelo Tosatti <[email protected]>
Cc: Yunfeng Ye <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/sched.h | 2 ++
kernel/sched/core.c | 5 +++++
kernel/time/tick-sched.c | 19 +++++++++++++++++--
3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2c881384517..3341ae2e8231 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2011,6 +2011,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)

#endif /* CONFIG_SMP */

+extern bool sched_task_on_rq(struct task_struct *p);
+
/*
* In order to reduce various lock holder preemption latencies provide an
* interface to see if a vCPU is currently running or not.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5226cc26a095..78e480f7881a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1590,6 +1590,11 @@ static inline void uclamp_post_fork(struct task_struct *p) { }
static inline void init_uclamp(void) { }
#endif /* CONFIG_UCLAMP_TASK */

+bool sched_task_on_rq(struct task_struct *p)
+{
+ return task_on_rq_queued(p);
+}
+
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
if (!(flags & ENQUEUE_NOCLOCK))
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 800719ea4045..d370a04deaa8 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -324,14 +324,28 @@ void tick_nohz_full_kick_cpu(int cpu)

static void tick_nohz_kick_task(struct task_struct *tsk)
{
- int cpu = task_cpu(tsk);
+ int cpu;
+
+ /*
+ * If the task is not running, run_posix_cpu_timers
+ * has nothing to elapse, IPI can then be spared.
+ *
+ * activate_task() STORE p->tick_dep_mask
+ * STORE p->on_rq
+ * __schedule() (switch to task 'p') smp_mb() (atomic_fetch_or())
+ * LOCK rq->lock LOAD p->on_rq
+ * smp_mb__after_spin_lock()
+ * tick_nohz_task_switch()
+ * LOAD p->tick_dep_mask
+ */
+ if (!sched_task_on_rq(tsk))
+ return;

/*
* If the task concurrently migrates to another cpu,
* we guarantee it sees the new tick dependency upon
* schedule.
*
- *
* set_task_cpu(p, cpu);
* STORE p->cpu = @cpu
* __schedule() (switch to task 'p')
@@ -340,6 +354,7 @@ static void tick_nohz_kick_task(struct task_struct *tsk)
* tick_nohz_task_switch() smp_mb() (atomic_fetch_or())
* LOAD p->tick_dep_mask LOAD p->cpu
*/
+ cpu = task_cpu(tsk);

preempt_disable();
if (cpu_online(cpu))
--
2.25.1

2021-05-13 13:20:29

by tip-bot2 for Haifeng Xu

[permalink] [raw]
Subject: [tip: timers/nohz] tick/nohz: Call tick_nohz_task_switch() with interrupts disabled

The following commit has been merged into the timers/nohz branch of tip:

Commit-ID: 0fdcccfafcffac70b452b3127cc3d981f0117655
Gitweb: https://git.kernel.org/tip/0fdcccfafcffac70b452b3127cc3d981f0117655
Author: Peter Zijlstra <[email protected]>
AuthorDate: Thu, 13 May 2021 01:29:23 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 13 May 2021 14:21:23 +02:00

tick/nohz: Call tick_nohz_task_switch() with interrupts disabled

Call tick_nohz_task_switch() slightly earlier after the context switch
to benefit from disabled IRQs. This way the function doesn't need to
disable them once more.

Signed-off-by: Peter Zijlstra <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/sched/core.c | 2 +-
kernel/time/tick-sched.c | 7 +------
2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 78e480f..8f86ac2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4212,6 +4212,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
vtime_task_switch(prev);
perf_event_task_sched_in(prev, current);
finish_task(prev);
+ tick_nohz_task_switch();
finish_lock_switch(rq);
finish_arch_post_lock_switch();
kcov_finish_switch(current);
@@ -4257,7 +4258,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
put_task_struct_rcu_user(prev);
}

- tick_nohz_task_switch();
return rq;
}

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 197a3bd..6ea619d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -487,13 +487,10 @@ void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bi
*/
void __tick_nohz_task_switch(void)
{
- unsigned long flags;
struct tick_sched *ts;

- local_irq_save(flags);
-
if (!tick_nohz_full_cpu(smp_processor_id()))
- goto out;
+ return;

ts = this_cpu_ptr(&tick_cpu_sched);

@@ -502,8 +499,6 @@ void __tick_nohz_task_switch(void)
atomic_read(&current->signal->tick_dep_mask))
tick_nohz_full_kick();
}
-out:
- local_irq_restore(flags);
}

/* Get the boot-time nohz CPU list from the kernel parameters. */

2021-05-13 13:21:01

by tip-bot2 for Haifeng Xu

[permalink] [raw]
Subject: [tip: timers/nohz] tick/nohz: Kick only _queued_ task whose tick dependency is updated

The following commit has been merged into the timers/nohz branch of tip:

Commit-ID: a1dfb6311c7739e21e160bc4c5575a1b21b48c87
Gitweb: https://git.kernel.org/tip/a1dfb6311c7739e21e160bc4c5575a1b21b48c87
Author: Marcelo Tosatti <[email protected]>
AuthorDate: Thu, 13 May 2021 01:29:22 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 13 May 2021 14:21:22 +02:00

tick/nohz: Kick only _queued_ task whose tick dependency is updated

When the tick dependency of a task is updated, we want it to aknowledge
the new state and restart the tick if needed. If the task is not
running, we don't need to kick it because it will observe the new
dependency upon scheduling in. But if the task is running, we may need
to send an IPI to it so that it gets notified.

Unfortunately we don't have the means to check if a task is running
in a race free way. Checking p->on_cpu in a synchronized way against
p->tick_dep_mask would imply adding a full barrier between
prepare_task_switch() and tick_nohz_task_switch(), which we want to
avoid in this fast-path.

Therefore we blindly fire an IPI to the task's CPU.

Meanwhile we can check if the task is queued on the CPU rq because
p->on_rq is always set to TASK_ON_RQ_QUEUED _before_ schedule() and its
full barrier that precedes tick_nohz_task_switch(). And if the task is
queued on a nohz_full CPU, it also has fair chances to be running as the
isolation constraints prescribe running single tasks on full dynticks
CPUs.

So use this as a trick to check if we can spare an IPI toward a
non-running task.

NOTE: For the ordering to be correct, it is assumed that we never
deactivate a task while it is running, the only exception being the task
deactivating itself while scheduling out.

Suggested-by: Peter Zijlstra <[email protected]>
Signed-off-by: Marcelo Tosatti <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
include/linux/sched.h | 2 ++
kernel/sched/core.c | 5 +++++
kernel/time/tick-sched.c | 19 +++++++++++++++++--
3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2c8813..3341ae2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2011,6 +2011,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)

#endif /* CONFIG_SMP */

+extern bool sched_task_on_rq(struct task_struct *p);
+
/*
* In order to reduce various lock holder preemption latencies provide an
* interface to see if a vCPU is currently running or not.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5226cc2..78e480f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1590,6 +1590,11 @@ static inline void uclamp_post_fork(struct task_struct *p) { }
static inline void init_uclamp(void) { }
#endif /* CONFIG_UCLAMP_TASK */

+bool sched_task_on_rq(struct task_struct *p)
+{
+ return task_on_rq_queued(p);
+}
+
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
if (!(flags & ENQUEUE_NOCLOCK))
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index acbe672..197a3bd 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -324,14 +324,28 @@ void tick_nohz_full_kick_cpu(int cpu)

static void tick_nohz_kick_task(struct task_struct *tsk)
{
- int cpu = task_cpu(tsk);
+ int cpu;
+
+ /*
+ * If the task is not running, run_posix_cpu_timers()
+ * has nothing to elapse, IPI can then be spared.
+ *
+ * activate_task() STORE p->tick_dep_mask
+ * STORE p->on_rq
+ * __schedule() (switch to task 'p') smp_mb() (atomic_fetch_or())
+ * LOCK rq->lock LOAD p->on_rq
+ * smp_mb__after_spin_lock()
+ * tick_nohz_task_switch()
+ * LOAD p->tick_dep_mask
+ */
+ if (!sched_task_on_rq(tsk))
+ return;

/*
* If the task concurrently migrates to another CPU,
* we guarantee it sees the new tick dependency upon
* schedule.
*
- *
* set_task_cpu(p, cpu);
* STORE p->cpu = @cpu
* __schedule() (switch to task 'p')
@@ -340,6 +354,7 @@ static void tick_nohz_kick_task(struct task_struct *tsk)
* tick_nohz_task_switch() smp_mb() (atomic_fetch_or())
* LOAD p->tick_dep_mask LOAD p->cpu
*/
+ cpu = task_cpu(tsk);

preempt_disable();
if (cpu_online(cpu))

2021-05-13 13:21:10

by tip-bot2 for Haifeng Xu

[permalink] [raw]
Subject: [tip: timers/nohz] tick/nohz: Only wake up a single target cpu when kicking a task

The following commit has been merged into the timers/nohz branch of tip:

Commit-ID: 29721b859217b946bfc001c1644745ed4d7c26cb
Gitweb: https://git.kernel.org/tip/29721b859217b946bfc001c1644745ed4d7c26cb
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Thu, 13 May 2021 01:29:20 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 13 May 2021 14:21:22 +02:00

tick/nohz: Only wake up a single target cpu when kicking a task

When adding a tick dependency to a task, its necessary to
wake up the CPU where the task resides to reevaluate tick
dependencies on that CPU.

However the current code wakes up all nohz_full CPUs, which
is unnecessary.

Switch to waking up a single CPU, by using ordering of writes
to task->cpu and task->tick_dep_mask.

[ mingo: Minor readability edit. ]

Suggested-by: Peter Zijlstra <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Marcelo Tosatti <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/time/tick-sched.c | 40 ++++++++++++++++++++++++++-------------
1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 89ec0ab..b90ca66 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -322,6 +322,31 @@ void tick_nohz_full_kick_cpu(int cpu)
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
}

+static void tick_nohz_kick_task(struct task_struct *tsk)
+{
+ int cpu = task_cpu(tsk);
+
+ /*
+ * If the task concurrently migrates to another CPU,
+ * we guarantee it sees the new tick dependency upon
+ * schedule.
+ *
+ *
+ * set_task_cpu(p, cpu);
+ * STORE p->cpu = @cpu
+ * __schedule() (switch to task 'p')
+ * LOCK rq->lock
+ * smp_mb__after_spin_lock() STORE p->tick_dep_mask
+ * tick_nohz_task_switch() smp_mb() (atomic_fetch_or())
+ * LOAD p->tick_dep_mask LOAD p->cpu
+ */
+
+ preempt_disable();
+ if (cpu_online(cpu))
+ tick_nohz_full_kick_cpu(cpu);
+ preempt_enable();
+}
+
/*
* Kick all full dynticks CPUs in order to force these to re-evaluate
* their dependency on the tick and restart it if necessary.
@@ -404,19 +429,8 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
*/
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
- if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) {
- if (tsk == current) {
- preempt_disable();
- tick_nohz_full_kick();
- preempt_enable();
- } else {
- /*
- * Some future tick_nohz_full_kick_task()
- * should optimize this.
- */
- tick_nohz_full_kick_all();
- }
- }
+ if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask))
+ tick_nohz_kick_task(tsk);
}
EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);


2021-05-13 13:22:21

by tip-bot2 for Haifeng Xu

[permalink] [raw]
Subject: [tip: timers/nohz] tick/nohz: Conditionally restart tick on idle exit

The following commit has been merged into the timers/nohz branch of tip:

Commit-ID: a5183862e76fdc25f36b39c2489b816a5c66e2e5
Gitweb: https://git.kernel.org/tip/a5183862e76fdc25f36b39c2489b816a5c66e2e5
Author: Yunfeng Ye <[email protected]>
AuthorDate: Thu, 13 May 2021 01:29:16 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 13 May 2021 14:21:21 +02:00

tick/nohz: Conditionally restart tick on idle exit

In nohz_full mode, switching from idle to a task will unconditionally
issue a tick restart. If the task is alone in the runqueue or is the
highest priority, the tick will fire once then eventually stop. But that
alone is still undesired noise.

Therefore, only restart the tick on idle exit when it's strictly
necessary.

Signed-off-by: Yunfeng Ye <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/time/tick-sched.c | 42 +++++++++++++++++++++++++--------------
1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 828b091..05c1ce1 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -926,22 +926,28 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
tick_nohz_restart(ts, now);
}

-static void tick_nohz_full_update_tick(struct tick_sched *ts)
+static void __tick_nohz_full_update_tick(struct tick_sched *ts,
+ ktime_t now)
{
#ifdef CONFIG_NO_HZ_FULL
int cpu = smp_processor_id();

- if (!tick_nohz_full_cpu(cpu))
+ if (can_stop_full_tick(cpu, ts))
+ tick_nohz_stop_sched_tick(ts, cpu);
+ else if (ts->tick_stopped)
+ tick_nohz_restart_sched_tick(ts, now);
+#endif
+}
+
+static void tick_nohz_full_update_tick(struct tick_sched *ts)
+{
+ if (!tick_nohz_full_cpu(smp_processor_id()))
return;

if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return;

- if (can_stop_full_tick(cpu, ts))
- tick_nohz_stop_sched_tick(ts, cpu);
- else if (ts->tick_stopped)
- tick_nohz_restart_sched_tick(ts, ktime_get());
-#endif
+ __tick_nohz_full_update_tick(ts, ktime_get());
}

static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
@@ -1209,18 +1215,24 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
#endif
}

-static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
+void tick_nohz_idle_restart_tick(void)
{
- tick_nohz_restart_sched_tick(ts, now);
- tick_nohz_account_idle_ticks(ts);
+ struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+ if (ts->tick_stopped) {
+ tick_nohz_restart_sched_tick(ts, ktime_get());
+ tick_nohz_account_idle_ticks(ts);
+ }
}

-void tick_nohz_idle_restart_tick(void)
+static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
{
- struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+ if (tick_nohz_full_cpu(smp_processor_id()))
+ __tick_nohz_full_update_tick(ts, now);
+ else
+ tick_nohz_restart_sched_tick(ts, now);

- if (ts->tick_stopped)
- __tick_nohz_idle_restart_tick(ts, ktime_get());
+ tick_nohz_account_idle_ticks(ts);
}

/**
@@ -1252,7 +1264,7 @@ void tick_nohz_idle_exit(void)
tick_nohz_stop_idle(ts, now);

if (tick_stopped)
- __tick_nohz_idle_restart_tick(ts, now);
+ tick_nohz_idle_update_tick(ts, now);

local_irq_enable();
}

2021-05-13 19:38:18

by tip-bot2 for Haifeng Xu

[permalink] [raw]
Subject: [tip: timers/nohz] tick/nohz: Change signal tick dependency to wake up CPUs of member tasks

The following commit has been merged into the timers/nohz branch of tip:

Commit-ID: 1e4ca26d367ae71743e25068e5cd8750ef3f5f7d
Gitweb: https://git.kernel.org/tip/1e4ca26d367ae71743e25068e5cd8750ef3f5f7d
Author: Marcelo Tosatti <[email protected]>
AuthorDate: Thu, 13 May 2021 01:29:21 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 13 May 2021 14:21:22 +02:00

tick/nohz: Change signal tick dependency to wake up CPUs of member tasks

Rather than waking up all nohz_full CPUs on the system, only wake up
the target CPUs of member threads of the signal.

Reduces interruptions to nohz_full CPUs.

Signed-off-by: Marcelo Tosatti <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
include/linux/tick.h | 8 ++++----
kernel/time/posix-cpu-timers.c | 4 ++--
kernel/time/tick-sched.c | 15 +++++++++++++--
3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 2258984..0bb80a7 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -211,7 +211,7 @@ extern void tick_nohz_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit);
-extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
+extern void tick_nohz_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
@@ -256,11 +256,11 @@ static inline void tick_dep_clear_task(struct task_struct *tsk,
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
-static inline void tick_dep_set_signal(struct signal_struct *signal,
+static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
- tick_nohz_dep_set_signal(signal, bit);
+ tick_nohz_dep_set_signal(tsk, bit);
}
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
@@ -288,7 +288,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
-static inline void tick_dep_set_signal(struct signal_struct *signal,
+static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 3bb96a8..29a5e54 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -523,7 +523,7 @@ static void arm_timer(struct k_itimer *timer, struct task_struct *p)
if (CPUCLOCK_PERTHREAD(timer->it_clock))
tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
else
- tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
+ tick_dep_set_signal(p, TICK_DEP_BIT_POSIX_TIMER);
}

/*
@@ -1358,7 +1358,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
if (*newval < *nextevt)
*nextevt = *newval;

- tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
+ tick_dep_set_signal(tsk, TICK_DEP_BIT_POSIX_TIMER);
}

static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b90ca66..acbe672 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -444,9 +444,20 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
* per process timers.
*/
-void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+void tick_nohz_dep_set_signal(struct task_struct *tsk,
+ enum tick_dep_bits bit)
{
- tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
+ int prev;
+ struct signal_struct *sig = tsk->signal;
+
+ prev = atomic_fetch_or(BIT(bit), &sig->tick_dep_mask);
+ if (!prev) {
+ struct task_struct *t;
+
+ lockdep_assert_held(&tsk->sighand->siglock);
+ __for_each_thread(sig, t)
+ tick_nohz_kick_task(t);
+ }
}

void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)

2021-05-13 19:38:29

by tip-bot2 for Haifeng Xu

[permalink] [raw]
Subject: [tip: timers/nohz] tick/nohz: Remove superflous check for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE

The following commit has been merged into the timers/nohz branch of tip:

Commit-ID: 3f624314b3f7c580aa5844a8930befd71e2a287c
Gitweb: https://git.kernel.org/tip/3f624314b3f7c580aa5844a8930befd71e2a287c
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Thu, 13 May 2021 01:29:17 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 13 May 2021 14:21:21 +02:00

tick/nohz: Remove superflous check for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE

The vtime_accounting_enabled_this_cpu() early check already makes what
follows as dead code in the case of CONFIG_VIRT_CPU_ACCOUNTING_NATIVE.
No need to keep the ifdeferry around.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/time/tick-sched.c | 2 --
1 file changed, 2 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 05c1ce1..1afa759 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1196,7 +1196,6 @@ unsigned long tick_nohz_get_idle_calls(void)

static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
{
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
unsigned long ticks;

if (vtime_accounting_enabled_this_cpu())
@@ -1212,7 +1211,6 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
*/
if (ticks && ticks < LONG_MAX)
account_idle_ticks(ticks);
-#endif
}

void tick_nohz_idle_restart_tick(void)

2021-05-13 20:23:55

by Ingo Molnar

[permalink] [raw]
Subject: Re: [GIT PULL] tick/nohz updates v3


* Frederic Weisbecker <[email protected]> wrote:

> Ingo, Thomas,
>
> Please pull the timers/nohz-v3 branch that can be found at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks.git
> timers/nohz-v3
>
> Changes since v2:
>
> * Add Acks from Peter Zijlstra
>
> * Only bother to fetch task's CPU if the task is queued in
> "tick/nohz: Kick only _queued_ task whose tick dependency is updated"
> (reported by Peter Zijlstra)
>
> * Correctly indent comment in
> "tick/nohz: Kick only _queued_ task whose tick dependency is updated"
> (reported by Peter Zijlstra)
>
> * Add Peter's SoB in "tick/nohz: Evaluate the CPU expression after the static key"
>
> * Add "tick/nohz: Call tick_nohz_task_switch() with interrupts disabled"
> (from Peter Zijlstra)

Thanks - I've applied your patches to tip:timers/nohz.

I did a few cosmetic touch-ups, so applied it from email. Will push it out
after some testing.

> * Add "MAINTAINERS: Add myself as context tracking maintainer"

Ack. Thomas?

Thanks,

Ingo

2021-05-13 20:26:52

by tip-bot2 for Haifeng Xu

[permalink] [raw]
Subject: [tip: timers/nohz] tick/nohz: Update idle_exittime on actual idle exit

The following commit has been merged into the timers/nohz branch of tip:

Commit-ID: 96c9b90396f9ab6caf13b4ebf00095818ac53b7f
Gitweb: https://git.kernel.org/tip/96c9b90396f9ab6caf13b4ebf00095818ac53b7f
Author: Yunfeng Ye <[email protected]>
AuthorDate: Thu, 13 May 2021 01:29:18 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 13 May 2021 14:21:21 +02:00

tick/nohz: Update idle_exittime on actual idle exit

The idle_exittime field of tick_sched is used to record the time when
the idle state was left. but currently the idle_exittime is updated in
the function tick_nohz_restart_sched_tick(), which is not always in idle
state when nohz_full is configured:

tick_irq_exit
tick_nohz_irq_exit
tick_nohz_full_update_tick
tick_nohz_restart_sched_tick
ts->idle_exittime = now;

It's thus overwritten by mistake on nohz_full tick restart. Move the
update to the appropriate idle exit path instead.

Signed-off-by: Yunfeng Ye <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/time/tick-sched.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1afa759..89ec0ab 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -921,8 +921,6 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
* Cancel the scheduled timer and restore the tick
*/
ts->tick_stopped = 0;
- ts->idle_exittime = now;
-
tick_nohz_restart(ts, now);
}

@@ -1194,10 +1192,13 @@ unsigned long tick_nohz_get_idle_calls(void)
return ts->idle_calls;
}

-static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
+static void tick_nohz_account_idle_time(struct tick_sched *ts,
+ ktime_t now)
{
unsigned long ticks;

+ ts->idle_exittime = now;
+
if (vtime_accounting_enabled_this_cpu())
return;
/*
@@ -1218,8 +1219,9 @@ void tick_nohz_idle_restart_tick(void)
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

if (ts->tick_stopped) {
- tick_nohz_restart_sched_tick(ts, ktime_get());
- tick_nohz_account_idle_ticks(ts);
+ ktime_t now = ktime_get();
+ tick_nohz_restart_sched_tick(ts, now);
+ tick_nohz_account_idle_time(ts, now);
}
}

@@ -1230,7 +1232,7 @@ static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
else
tick_nohz_restart_sched_tick(ts, now);

- tick_nohz_account_idle_ticks(ts);
+ tick_nohz_account_idle_time(ts, now);
}

/**

2021-05-13 20:27:47

by tip-bot2 for Haifeng Xu

[permalink] [raw]
Subject: [tip: timers/nohz] tick/nohz: Evaluate the CPU expression after the static key

The following commit has been merged into the timers/nohz branch of tip:

Commit-ID: f105dfec0a951cd0d5bfbfe9dc067ea69f71ad5c
Gitweb: https://git.kernel.org/tip/f105dfec0a951cd0d5bfbfe9dc067ea69f71ad5c
Author: Peter Zijlstra <[email protected]>
AuthorDate: Thu, 13 May 2021 01:29:15 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 13 May 2021 14:21:20 +02:00

tick/nohz: Evaluate the CPU expression after the static key

When tick_nohz_full_cpu() is called with smp_processor_id(), the latter
is unconditionally evaluated whether the static key is on or off. It is
not necessary in the off-case though, so make sure the cpu expression
is executed at the last moment.

Illustrate with the following test function:

int tick_nohz_test(void)
{
return tick_nohz_full_cpu(smp_processor_id());
}

The resulting code before was:

mov %gs:0x7eea92d1(%rip),%eax # smp_processor_id() fetch
nopl 0x0(%rax,%rax,1)
xor %eax,%eax
retq
cmpb $0x0,0x29d393a(%rip) # <tick_nohz_full_running>
je tick_nohz_test+0x29 # jump to below eax clear
mov %eax,%eax
bt %rax,0x29d3936(%rip) # <tick_nohz_full_mask>
setb %al
movzbl %al,%eax
retq
xor %eax,%eax
retq

Now it becomes:

nopl 0x0(%rax,%rax,1)
xor %eax,%eax
retq
cmpb $0x0,0x29d3871(%rip) # <tick_nohz_full_running>
je tick_nohz_test+0x29 # jump to below eax clear
mov %gs:0x7eea91f0(%rip),%eax # smp_processor_id() fetch, after static key
mov %eax,%eax
bt %rax,0x29d3866(%rip) # <tick_nohz_full_mask>
setb %al
movzbl %al,%eax
retq
xor %eax,%eax
retq

Signed-off-by: Peter Zijlstra <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
include/linux/tick.h | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7340613..2258984 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -185,13 +185,17 @@ static inline bool tick_nohz_full_enabled(void)
return tick_nohz_full_running;
}

-static inline bool tick_nohz_full_cpu(int cpu)
-{
- if (!tick_nohz_full_enabled())
- return false;
-
- return cpumask_test_cpu(cpu, tick_nohz_full_mask);
-}
+/*
+ * Check if a CPU is part of the nohz_full subset. Arrange for evaluating
+ * the cpu expression (typically smp_processor_id()) _after_ the static
+ * key.
+ */
+#define tick_nohz_full_cpu(_cpu) ({ \
+ bool __ret = false; \
+ if (tick_nohz_full_enabled()) \
+ __ret = cpumask_test_cpu((_cpu), tick_nohz_full_mask); \
+ __ret; \
+})

static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
{