Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S937423AbdLSDYZ (ORCPT ); Mon, 18 Dec 2017 22:24:25 -0500 Received: from mail.kernel.org ([198.145.29.99]:50672 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S937374AbdLSDYT (ORCPT ); Mon, 18 Dec 2017 22:24:19 -0500 DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 8623C2190A Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=kernel.org Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=frederic@kernel.org From: Frederic Weisbecker To: LKML Cc: Frederic Weisbecker , Peter Zijlstra , Chris Metcalf , Thomas Gleixner , Luiz Capitulino , Christoph Lameter , "Paul E . McKenney" , Ingo Molnar , Wanpeng Li , Mike Galbraith , Rik van Riel Subject: [PATCH 4/5] sched/isolation: Residual 1Hz scheduler tick offload Date: Tue, 19 Dec 2017 04:23:57 +0100 Message-Id: <1513653838-31314-5-git-send-email-frederic@kernel.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1513653838-31314-1-git-send-email-frederic@kernel.org> References: <1513653838-31314-1-git-send-email-frederic@kernel.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5689 Lines: 202 When a CPU runs in full dynticks mode, a 1Hz tick remains in order to keep the scheduler stats alive. However this residual tick is a burden for Real-Time tasks that can't stand no interruption at all. Adding the boot parameter "isolcpus=nohz_offload" will now outsource these scheduler ticks to the global workqueue so that a housekeeping CPU handles that tick remotely. Note it's still up to the user to affine the global workqueues to the housekeeping CPUs through /sys/devices/virtual/workqueue/cpumask or domains isolation. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Ingo Molnar --- kernel/sched/core.c | 2 ++ kernel/sched/isolation.c | 4 +++ kernel/sched/sched.h | 6 ++++ kernel/sched/tick.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 88 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b6f74c8..f50ba18 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5531,6 +5531,7 @@ int sched_cpu_starting(unsigned int cpu) { set_cpu_rq_start_time(cpu); sched_rq_cpu_starting(cpu); + sched_tick_start(cpu); return 0; } @@ -5542,6 +5543,7 @@ int sched_cpu_dying(unsigned int cpu) /* Handle pending wakeups and then migrate everything off */ sched_ttwu_pending(); + sched_tick_stop(cpu); rq_lock_irqsave(rq, &rf); if (rq->rd) { diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 264ddcd..c5e7e90a 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -12,6 +12,7 @@ #include #include #include +#include "sched.h" DEFINE_STATIC_KEY_FALSE(housekeeping_overriden); EXPORT_SYMBOL_GPL(housekeeping_overriden); @@ -60,6 +61,9 @@ void __init housekeeping_init(void) static_branch_enable(&housekeeping_overriden); + if (housekeeping_flags & HK_FLAG_TICK_SCHED) + sched_tick_offload_init(); + /* We need at least one CPU to handle housekeeping work */ WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 16eef0c..57821c9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1587,6 +1587,9 @@ extern void post_init_entity_util_avg(struct sched_entity *se); #ifdef CONFIG_NO_HZ_FULL extern bool sched_can_stop_tick(struct rq *rq); +extern void sched_tick_start(int cpu); +extern void sched_tick_stop(int cpu); +extern int __init sched_tick_offload_init(void); /* * Tick may be needed by tasks in the runqueue depending on their policy and @@ -1611,6 +1614,9 @@ static inline void sched_update_tick_dependency(struct rq *rq) tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED); } #else +static inline void sched_tick_start(int cpu) { } +static inline void sched_tick_stop(int cpu) { } +static inline int sched_tick_offload_init(void) { return 0; } static inline void sched_update_tick_dependency(struct rq *rq) { } #endif diff --git a/kernel/sched/tick.c b/kernel/sched/tick.c index 5eabfe3..fc31f9e 100644 --- a/kernel/sched/tick.c +++ b/kernel/sched/tick.c @@ -1,5 +1,6 @@ #include #include +#include #include #include "sched.h" @@ -50,9 +51,14 @@ void scheduler_tick(void) */ u64 scheduler_tick_max_deferment(void) { - struct rq *rq = this_rq(); - unsigned long next, now = READ_ONCE(jiffies); + struct rq *rq; + unsigned long next, now; + if (!housekeeping_cpu(smp_processor_id(), HK_FLAG_TICK_SCHED)) + return ktime_to_ns(KTIME_MAX); + + rq = this_rq(); + now = READ_ONCE(jiffies); next = rq->last_sched_tick + HZ; if (time_before_eq(next, now)) @@ -60,7 +66,74 @@ u64 scheduler_tick_max_deferment(void) return jiffies_to_nsecs(next - now); } -#endif + +struct tick_work { + int cpu; + struct delayed_work work; +}; + +static struct tick_work __percpu *tick_work_cpu; + +static void sched_tick_remote(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct tick_work *twork = container_of(dwork, struct tick_work, work); + struct rq *rq = cpu_rq(twork->cpu); + struct rq_flags rf; + + rq_lock_irq(rq, &rf); + update_rq_clock(rq); + rq->curr->sched_class->task_tick(rq, rq->curr, 0); + rq_unlock_irq(rq, &rf); + + queue_delayed_work(system_unbound_wq, dwork, HZ); +} + +void sched_tick_start(int cpu) +{ + struct tick_work *twork; + + if (housekeeping_cpu(cpu, HK_FLAG_TICK_SCHED)) + return; + + WARN_ON_ONCE(!tick_work_cpu); + + twork = per_cpu_ptr(tick_work_cpu, cpu); + twork->cpu = cpu; + INIT_DELAYED_WORK(&twork->work, sched_tick_remote); + queue_delayed_work(system_unbound_wq, &twork->work, HZ); + + return; +} + +#ifdef CONFIG_HOTPLUG_CPU +void sched_tick_stop(int cpu) +{ + struct tick_work *twork; + + if (housekeeping_cpu(cpu, HK_FLAG_TICK_SCHED)) + return; + + WARN_ON_ONCE(!tick_work_cpu); + + twork = per_cpu_ptr(tick_work_cpu, cpu); + cancel_delayed_work_sync(&twork->work); + + return; +} +#endif /* CONFIG_HOTPLUG_CPU */ + +int __init sched_tick_offload_init(void) +{ + tick_work_cpu = alloc_percpu(struct tick_work); + if (!tick_work_cpu) { + pr_err("Can't allocate remote tick struct\n"); + return -ENOMEM; + } + + return 0; +} +#endif /* CONFIG_NO_HZ_FULL */ #ifdef CONFIG_SCHED_HRTICK /* -- 2.7.4