Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754581AbbFJPbc (ORCPT ); Wed, 10 Jun 2015 11:31:32 -0400 Received: from ip4-83-240-67-251.cust.nbox.cz ([83.240.67.251]:36075 "EHLO ip4-83-240-18-248.cust.nbox.cz" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S933828AbbFJP1n (ORCPT ); Wed, 10 Jun 2015 11:27:43 -0400 From: Jiri Slaby To: stable@vger.kernel.org Cc: linux-kernel@vger.kernel.org, Jens Axboe , Jiri Slaby Subject: [PATCH 3.12 010/111] aio: fix serial draining in exit_aio() Date: Wed, 10 Jun 2015 17:25:59 +0200 Message-Id: <0b8b97704fbe442b3bba7a2c7eba9113122abafd.1433943052.git.jslaby@suse.cz> X-Mailer: git-send-email 2.4.2 In-Reply-To: <93091169a673f49c2574cddf1ef858cf0704f646.1433943052.git.jslaby@suse.cz> References: <93091169a673f49c2574cddf1ef858cf0704f646.1433943052.git.jslaby@suse.cz> In-Reply-To: References: Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4750 Lines: 164 From: Jens Axboe 3.12-stable review patch. If anyone has any objections, please let me know. =============== commit dc48e56d761610da4ea1088d1bea0a030b8e3e43 upstream. exit_aio() currently serializes killing io contexts. Each context killing ends up having to do percpu_ref_kill(), which in turns has to wait for an RCU grace period. This can take a long time, depending on the number of contexts. And there's no point in doing them serially, when we could be waiting for all of them in one fell swoop. This patches makes my fio thread offload test case exit 0.2s instead of almost 6s. Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe Signed-off-by: Jiri Slaby --- fs/aio.c | 49 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index aafb96629f49..329e6c1f3a43 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -77,6 +77,11 @@ struct kioctx_cpu { unsigned reqs_available; }; +struct ctx_rq_wait { + struct completion comp; + atomic_t count; +}; + struct kioctx { struct percpu_ref users; atomic_t dead; @@ -115,7 +120,7 @@ struct kioctx { /* * signals when all in-flight requests are done */ - struct completion *requests_done; + struct ctx_rq_wait *rq_wait; struct { /* @@ -521,8 +526,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) struct kioctx *ctx = container_of(ref, struct kioctx, reqs); /* At this point we know that there are no any in-flight requests */ - if (ctx->requests_done) - complete(ctx->requests_done); + if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) + complete(&ctx->rq_wait->comp); INIT_WORK(&ctx->free_work, free_ioctx); schedule_work(&ctx->free_work); @@ -738,7 +743,7 @@ err: * the rapid destruction of the kioctx. */ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, - struct completion *requests_done) + struct ctx_rq_wait *wait) { if (!atomic_xchg(&ctx->dead, 1)) { struct kioctx_table *table; @@ -767,11 +772,11 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, if (ctx->mmap_size) vm_munmap(ctx->mmap_base, ctx->mmap_size); - ctx->requests_done = requests_done; + ctx->rq_wait = wait; percpu_ref_kill(&ctx->users); } else { - if (requests_done) - complete(requests_done); + if (wait && atomic_dec_and_test(&wait->count)) + complete(&wait->comp); } } @@ -802,18 +807,24 @@ EXPORT_SYMBOL(wait_on_sync_kiocb); void exit_aio(struct mm_struct *mm) { struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table); - int i; + struct ctx_rq_wait wait; + int i, skipped; if (!table) return; + atomic_set(&wait.count, table->nr); + init_completion(&wait.comp); + + skipped = 0; for (i = 0; i < table->nr; ++i) { struct kioctx *ctx = table->table[i]; - struct completion requests_done = - COMPLETION_INITIALIZER_ONSTACK(requests_done); - if (!ctx) + if (!ctx) { + skipped++; continue; + } + /* * We don't need to bother with munmap() here - exit_mmap(mm) * is coming and it'll unmap everything. And we simply can't, @@ -823,10 +834,12 @@ void exit_aio(struct mm_struct *mm) */ ctx->mmap_size = 0; - kill_ioctx(mm, ctx, &requests_done); + kill_ioctx(mm, ctx, &wait); + } + if (!atomic_sub_and_test(skipped, &wait.count)) { /* Wait until all IO for the context are done. */ - wait_for_completion(&requests_done); + wait_for_completion(&wait.comp); } RCU_INIT_POINTER(mm->ioctx_table, NULL); @@ -1238,21 +1251,23 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) { struct kioctx *ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { - struct completion requests_done = - COMPLETION_INITIALIZER_ONSTACK(requests_done); + struct ctx_rq_wait wait; + + init_completion(&wait.comp); + atomic_set(&wait.count, 1); /* Pass requests_done to kill_ioctx() where it can be set * in a thread-safe way. If we try to set it here then we have * a race condition if two io_destroy() called simultaneously. */ - kill_ioctx(current->mm, ioctx, &requests_done); + kill_ioctx(current->mm, ioctx, &wait); percpu_ref_put(&ioctx->users); /* Wait until all IO for the context are done. Otherwise kernel * keep using user-space buffers even if user thinks the context * is destroyed. */ - wait_for_completion(&requests_done); + wait_for_completion(&wait.comp); return 0; } -- 2.4.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/