Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753716AbbGWQmw (ORCPT ); Thu, 23 Jul 2015 12:42:52 -0400 Received: from mail-wi0-f171.google.com ([209.85.212.171]:38574 "EHLO mail-wi0-f171.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754015AbbGWQmd (ORCPT ); Thu, 23 Jul 2015 12:42:33 -0400 From: Frederic Weisbecker To: LKML Cc: Frederic Weisbecker , Peter Zijlstra , Thomas Gleixner , Preeti U Murthy , Christoph Lameter , Ingo Molnar , Viresh Kumar , Rik van Riel Subject: [PATCH 05/10] nohz: New tick dependency mask Date: Thu, 23 Jul 2015 18:42:10 +0200 Message-Id: <1437669735-8786-6-git-send-email-fweisbec@gmail.com> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1437669735-8786-1-git-send-email-fweisbec@gmail.com> References: <1437669735-8786-1-git-send-email-fweisbec@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8857 Lines: 287 The tick dependency is evaluated on every IRQ. This is a batch of checks which determine whether it is safe to stop the tick or not. These checks are often split in many details: posix cpu timers, scheduler, sched clock, perf events. Each of which are made of smaller details: posix cpu timer involves checking process wide timers then thread wide timers. Perf involves checking freq events then more per cpu details. Checking these details asynchronously every time we update the full dynticks state bring avoidable overhead and a messy layout. Lets introduce two tick dependency masks: one for system wide dependency and another for CPU wide dependency. The subsystems are responsible of setting and clearing their dependency through a set of APIs that will take care of concurrent dependency mask modifications and IPI trigger to restart the relevant CPU tick whenever needed. This new dependency engine stays beside the old one until all subsystems having a tick dependency are converted to it. Suggested-by: Thomas Gleixner Suggested-by: Peter Zijlstra Cc: Christoph Lameter Cc: Ingo Molnar Cc: Preeti U Murthy Cc: Rik van Riel Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- include/linux/tick.h | 20 +++++++ kernel/time/tick-sched.c | 142 +++++++++++++++++++++++++++++++++++++++++++++-- kernel/time/tick-sched.h | 1 + 3 files changed, 158 insertions(+), 5 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index 48d901f..daafcce 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -97,6 +97,18 @@ static inline void tick_broadcast_exit(void) tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); } +enum tick_dependency_bit { + TICK_POSIX_TIMER_BIT = 0, + TICK_PERF_EVENTS_BIT = 1, + TICK_SCHED_BIT = 2, + TICK_CLOCK_UNSTABLE_BIT = 3 +}; + +#define TICK_POSIX_TIMER_MASK (1 << TICK_POSIX_TIMER_BIT) +#define TICK_PERF_EVENTS_MASK (1 << TICK_PERF_EVENTS_BIT) +#define TICK_SCHED_MASK (1 << TICK_SCHED_BIT) +#define TICK_CLOCK_UNSTABLE_MASK (1 << TICK_CLOCK_UNSTABLE_BIT) + #ifdef CONFIG_NO_HZ_COMMON extern int tick_nohz_tick_stopped(void); extern void tick_nohz_idle_enter(void); @@ -147,6 +159,14 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) cpumask_or(mask, mask, tick_nohz_full_mask); } +extern void tick_nohz_set_tick_dependency(enum tick_dependency_bit bit); +extern void tick_nohz_set_tick_dependency_delayed(enum tick_dependency_bit bit); +extern void tick_nohz_clear_tick_dependency(enum tick_dependency_bit bit); +extern void tick_nohz_set_tick_dependency_cpu(enum tick_dependency_bit bit, int cpu); +extern void tick_nohz_clear_tick_dependency_cpu(enum tick_dependency_bit bit, int cpu); +extern void tick_nohz_set_tick_dependency_this_cpu(enum tick_dependency_bit bit); +extern void tick_nohz_clear_tick_dependency_this_cpu(enum tick_dependency_bit bit); + extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_all(void); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3319e16..a64646e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -156,11 +156,43 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) cpumask_var_t tick_nohz_full_mask; cpumask_var_t housekeeping_mask; bool tick_nohz_full_running; +static unsigned long tick_dependency; -static bool can_stop_full_tick(void) +static void trace_tick_dependency(unsigned long dep) +{ + if (dep & TICK_POSIX_TIMER_MASK) { + trace_tick_stop(0, "posix timers running\n"); + return; + } + + if (dep & TICK_PERF_EVENTS_MASK) { + trace_tick_stop(0, "perf events running\n"); + return; + } + + if (dep & TICK_SCHED_MASK) { + trace_tick_stop(0, "more than 1 task in runqueue\n"); + return; + } + + if (dep & TICK_CLOCK_UNSTABLE_MASK) + trace_tick_stop(0, "unstable sched clock\n"); +} + +static bool can_stop_full_tick(struct tick_sched *ts) { WARN_ON_ONCE(!irqs_disabled()); + if (tick_dependency) { + trace_tick_dependency(tick_dependency); + return false; + } + + if (ts->tick_dependency) { + trace_tick_dependency(ts->tick_dependency); + return false; + } + if (!sched_can_stop_tick()) { trace_tick_stop(0, "more than 1 task in runqueue\n"); return false; @@ -176,9 +208,10 @@ static bool can_stop_full_tick(void) return false; } - /* sched_clock_tick() needs us? */ #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK /* + * sched_clock_tick() needs us? + * * TODO: kick full dynticks CPUs when * sched_clock_stable is set. */ @@ -253,6 +286,103 @@ void tick_nohz_full_kick_all(void) preempt_enable(); } +unsigned long __tick_nohz_set_tick_dependency(enum tick_dependency_bit bit, + unsigned long *dep) +{ + unsigned long prev; + unsigned long old = *dep; + unsigned long mask = BIT_MASK(bit); + + while ((prev = cmpxchg(dep, old, old | mask)) != old) { + old = prev; + cpu_relax(); + } + + return prev; +} + +void __tick_nohz_clear_tick_dependency(enum tick_dependency_bit bit, + unsigned long *dep) +{ + clear_bit(bit, dep); +} + +void tick_nohz_set_tick_dependency(enum tick_dependency_bit bit) +{ + unsigned long prev; + + prev = __tick_nohz_set_tick_dependency(bit, &tick_dependency); + if (!prev) + tick_nohz_full_kick_all(); +} + +static void kick_all_work_fn(struct work_struct *work) +{ + tick_nohz_full_kick_all(); +} +static DECLARE_WORK(kick_all_work, kick_all_work_fn); + +void tick_nohz_set_tick_dependency_delayed(enum tick_dependency_bit bit) +{ + unsigned long prev; + + prev = __tick_nohz_set_tick_dependency(bit, &tick_dependency); + if (!prev) { + /* + * We need the IPIs to be sent from sane process context. + * The posix cpu timers are always set with irqs disabled. + */ + schedule_work(&kick_all_work); + } +} + +void tick_nohz_clear_tick_dependency(enum tick_dependency_bit bit) +{ + __tick_nohz_clear_tick_dependency(bit, &tick_dependency); +} + +void tick_nohz_set_tick_dependency_cpu(enum tick_dependency_bit bit, int cpu) +{ + unsigned long prev; + struct tick_sched *ts; + + ts = per_cpu_ptr(&tick_cpu_sched, cpu); + + prev = __tick_nohz_set_tick_dependency(bit, &ts->tick_dependency); + if (!prev) + tick_nohz_full_kick_cpu(cpu); +} + +void tick_nohz_clear_tick_dependency_cpu(enum tick_dependency_bit bit, int cpu) +{ + struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu); + + __tick_nohz_clear_tick_dependency(bit, &ts->tick_dependency); +} + +/* + * Local dependency must have its own flavour due to NMI-safe requirement + * on perf. + */ +void tick_nohz_set_tick_dependency_this_cpu(enum tick_dependency_bit bit) +{ + unsigned long prev; + struct tick_sched *ts; + + ts = this_cpu_ptr(&tick_cpu_sched); + + prev = __tick_nohz_set_tick_dependency(bit, &ts->tick_dependency); + if (!prev) + tick_nohz_full_kick(); +} + +void tick_nohz_clear_tick_dependency_this_cpu(enum tick_dependency_bit bit) +{ + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); + + __tick_nohz_clear_tick_dependency(bit, &ts->tick_dependency); +} + /* * Re-evaluate the need for the tick as we switch the current task. * It might need the tick due to per task/process properties: @@ -261,15 +391,17 @@ void tick_nohz_full_kick_all(void) void __tick_nohz_task_switch(void) { unsigned long flags; + struct tick_sched *ts; local_irq_save(flags); if (!tick_nohz_full_cpu(smp_processor_id())) goto out; - if (tick_nohz_tick_stopped() && !can_stop_full_tick()) + ts = this_cpu_ptr(&tick_cpu_sched); + + if (ts->tick_stopped && !can_stop_full_tick(ts)) tick_nohz_full_kick(); - out: local_irq_restore(flags); } @@ -715,7 +847,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) return; - if (can_stop_full_tick()) + if (can_stop_full_tick(ts)) tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); else if (ts->tick_stopped) tick_nohz_restart_sched_tick(ts, ktime_get()); diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index a4a8d4e..d327f70 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -60,6 +60,7 @@ struct tick_sched { u64 next_timer; ktime_t idle_expires; int do_timer_last; + unsigned long tick_dependency; }; extern struct tick_sched *tick_get_tick_sched(int cpu); -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/