Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751458AbeAPElN (ORCPT + 1 other); Mon, 15 Jan 2018 23:41:13 -0500 Received: from mail.kernel.org ([198.145.29.99]:42244 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751376AbeAPElI (ORCPT ); Mon, 15 Jan 2018 23:41:08 -0500 DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 3F6B62178B Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=kernel.org Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=frederic@kernel.org From: Frederic Weisbecker To: LKML Cc: Frederic Weisbecker , Levin Alexander , Peter Zijlstra , Mauro Carvalho Chehab , Linus Torvalds , Hannes Frederic Sowa , "Paul E . McKenney" , Wanpeng Li , Dmitry Safonov , Thomas Gleixner , Eric Dumazet , Radu Rendec , Ingo Molnar , Stanislaw Gruszka , Paolo Abeni , Rik van Riel , Andrew Morton , David Miller Subject: [RFC PATCH 4/5] softirq: Replace ksoftirqd with workqueues entirely Date: Tue, 16 Jan 2018 05:40:39 +0100 Message-Id: <1516077640-19718-5-git-send-email-frederic@kernel.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1516077640-19718-1-git-send-email-frederic@kernel.org> References: <1516077640-19718-1-git-send-email-frederic@kernel.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Return-Path: Ksoftirqd only remains to implement threaded IRQs. Convert it to existing per-vector workqueues to avoid code duplication. Suggested-by: Linus Torvalds Suggested-by: Paolo Abeni Signed-off-by: Frederic Weisbecker Cc: Dmitry Safonov Cc: Eric Dumazet Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Andrew Morton Cc: David Miller Cc: Hannes Frederic Sowa Cc: Ingo Molnar Cc: Levin Alexander Cc: Paolo Abeni Cc: Paul E. McKenney Cc: Radu Rendec Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Mauro Carvalho Chehab --- Documentation/RCU/stallwarn.txt | 4 +- include/linux/interrupt.h | 7 ---- kernel/sched/cputime.c | 13 +++--- kernel/sched/sched.h | 4 +- kernel/softirq.c | 87 +++++++++-------------------------------- net/ipv4/tcp_output.c | 4 +- 6 files changed, 31 insertions(+), 88 deletions(-) diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index a08f928..ea3a8de 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -17,8 +17,8 @@ o A CPU looping in an RCU read-side critical section. o A CPU looping with interrupts disabled. o A CPU looping with preemption disabled. This condition can - result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh - stalls. + result in RCU-sched stalls and, if softirq workqueue is in use, + RCU-bh stalls. o A CPU looping with bottom halves disabled. This condition can result in RCU-sched and RCU-bh stalls. diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 92d044d..680f620 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -507,13 +507,6 @@ extern void __raise_softirq_irqoff(unsigned int nr); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); -DECLARE_PER_CPU(struct task_struct *, ksoftirqd); - -static inline struct task_struct *this_cpu_ksoftirqd(void) -{ - return this_cpu_read(ksoftirqd); -} - extern int softirq_serving_workqueue(void); /* Tasklets --- multithreaded analogue of BHs. diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 30f70e5..c5b8dbd 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -64,15 +64,14 @@ void irqtime_account_irq(struct task_struct *curr) irqtime->irq_start_time += delta; /* - * We do not account for softirq time from ksoftirqd here. - * We want to continue accounting softirq time to ksoftirqd thread + * We do not account for softirq time from workqueue here. + * We want to continue accounting softirq time to workqueue thread * in that case, so as not to confuse scheduler with a special task * that do not consume any time, but still wants to run. */ if (hardirq_count()) irqtime_account_delta(irqtime, delta, CPUTIME_IRQ); - else if (in_serving_softirq() && curr != this_cpu_ksoftirqd() && - !softirq_serving_workqueue()) + else if (in_serving_softirq() && !softirq_serving_workqueue()) irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); } EXPORT_SYMBOL_GPL(irqtime_account_irq); @@ -376,11 +375,11 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, cputime -= other; - if (this_cpu_ksoftirqd() == p || softirq_serving_workqueue()) { + if (softirq_serving_workqueue()) { /* - * ksoftirqd time do not get accounted in cpu_softirq_time. + * Softirq wq time do not get accounted in cpu_softirq_time. * So, we have to handle it separately here. - * Also, p->stime needs to be updated for ksoftirqd. + * Also, p->stime needs to be updated for workqueue. */ account_system_index_time(p, cputime, CPUTIME_SOFTIRQ); } else if (user_tick) { diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b19552a2..5d481f1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2061,8 +2061,8 @@ struct irqtime { DECLARE_PER_CPU(struct irqtime, cpu_irqtime); /* - * Returns the irqtime minus the softirq time computed by ksoftirqd. - * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime + * Returns the irqtime minus the softirq time computed by workqueue. + * Otherwise workqueue's sum_exec_runtime is substracted its own runtime * and never move forward. */ static inline u64 irq_time_read(int cpu) diff --git a/kernel/softirq.c b/kernel/softirq.c index 441e654..b2a5384 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -55,8 +55,6 @@ EXPORT_SYMBOL(irq_stat); static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; -DEFINE_PER_CPU(struct task_struct *, ksoftirqd); - const char * const softirq_to_name[NR_SOFTIRQS] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" @@ -78,32 +76,6 @@ struct softirq { static DEFINE_PER_CPU(struct softirq, softirq_cpu); /* - * we cannot loop indefinitely here to avoid userspace starvation, - * but we also don't want to introduce a worst case 1/HZ latency - * to the pending events, so lets the scheduler to balance - * the softirq load for us. - */ -static void wakeup_softirqd(void) -{ - /* Interrupts are disabled: no need to stop preemption */ - struct task_struct *tsk = __this_cpu_read(ksoftirqd); - - if (tsk && tsk->state != TASK_RUNNING) - wake_up_process(tsk); -} - -/* - * If ksoftirqd is scheduled, we do not want to process pending softirqs - * right now. Let ksoftirqd handle this at its own rate, to get fairness. - */ -static bool ksoftirqd_running(void) -{ - struct task_struct *tsk = __this_cpu_read(ksoftirqd); - - return tsk && (tsk->state == TASK_RUNNING); -} - -/* * preempt_count and SOFTIRQ_OFFSET usage: * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving * softirq processing. @@ -408,7 +380,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) asmlinkage __visible void do_softirq(void) { - __u32 pending; + __u32 pending, pending_work; unsigned long flags; if (in_interrupt()) @@ -417,8 +389,9 @@ asmlinkage __visible void do_softirq(void) local_irq_save(flags); pending = local_softirq_pending(); + pending_work = __this_cpu_read(softirq_cpu.pending_work_mask); - if (pending && !ksoftirqd_running()) + if (pending & ~pending_work) do_softirq_own_stack(); local_irq_restore(flags); @@ -432,7 +405,7 @@ void irq_enter(void) rcu_irq_enter(); if (is_idle_task(current) && !in_interrupt()) { /* - * Prevent raise_softirq from needlessly waking up ksoftirqd + * Prevent raise_softirq from needlessly waking up workqueue * here, as softirq will be serviced on return from interrupt. */ local_bh_disable(); @@ -445,7 +418,15 @@ void irq_enter(void) static inline void invoke_softirq(void) { - if (ksoftirqd_running()) + unsigned int pending_work, pending = local_softirq_pending(); + + if (!pending) + return; + + pending_work = __this_cpu_read(softirq_cpu.pending_work_mask); + pending &= ~pending_work; + + if (!pending) return; if (!force_irqthreads) { @@ -465,7 +446,7 @@ static inline void invoke_softirq(void) do_softirq_own_stack(); #endif } else { - wakeup_softirqd(); + do_softirq_workqueue(pending); } } @@ -494,7 +475,7 @@ void irq_exit(void) #endif account_irq_exit_time(current); preempt_count_sub(HARDIRQ_OFFSET); - if (!in_interrupt() && local_softirq_pending()) + if (!in_interrupt()) invoke_softirq(); tick_irq_exit(); @@ -515,11 +496,11 @@ inline void raise_softirq_irqoff(unsigned int nr) * actually run the softirq once we return from * the irq or softirq. * - * Otherwise we wake up ksoftirqd to make sure we + * Otherwise we wake up workqueue to make sure we * schedule the softirq soon. */ if (!in_interrupt()) - wakeup_softirqd(); + do_softirq_workqueue(BIT(nr)); } void raise_softirq(unsigned int nr) @@ -758,27 +739,6 @@ void __init softirq_init(void) open_softirq(HI_SOFTIRQ, tasklet_hi_action); } -static int ksoftirqd_should_run(unsigned int cpu) -{ - return local_softirq_pending(); -} - -static void run_ksoftirqd(unsigned int cpu) -{ - local_irq_disable(); - if (local_softirq_pending()) { - /* - * We can safely run softirq on inline stack, as we are not deep - * in the task stack here. - */ - __do_softirq(); - local_irq_enable(); - cond_resched_rcu_qs(); - return; - } - local_irq_enable(); -} - #ifdef CONFIG_HOTPLUG_CPU /* * tasklet_kill_immediate is called to remove a tasklet which can already be @@ -841,22 +801,13 @@ static int takeover_tasklets(unsigned int cpu) #define takeover_tasklets NULL #endif /* CONFIG_HOTPLUG_CPU */ -static struct smp_hotplug_thread softirq_threads = { - .store = &ksoftirqd, - .thread_should_run = ksoftirqd_should_run, - .thread_fn = run_ksoftirqd, - .thread_comm = "ksoftirqd/%u", -}; - -static __init int spawn_ksoftirqd(void) +static __init int tasklet_set_takeover(void) { cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, takeover_tasklets); - BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); - return 0; } -early_initcall(spawn_ksoftirqd); +early_initcall(tasklet_set_takeover); /* * [ These __weak aliases are kept in a separate compilation unit, so that diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4e4160..3b4811e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -912,7 +912,7 @@ void tcp_wfree(struct sk_buff *skb) */ WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc)); - /* If this softirq is serviced by ksoftirqd, we are likely under stress. + /* If this softirq is serviced by workqueue, we are likely under stress. * Wait until our queues (qdisc + devices) are drained. * This gives : * - less callbacks to tcp_write_xmit(), reducing stress (batches) @@ -920,7 +920,7 @@ void tcp_wfree(struct sk_buff *skb) * to migrate this flow (skb->ooo_okay will be eventually set) */ if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && - (this_cpu_ksoftirqd() == current || softirq_serving_workqueue())) + softirq_serving_workqueue()) goto out; for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { -- 2.7.4