Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753521AbbBAOlB (ORCPT ); Sun, 1 Feb 2015 09:41:01 -0500 Received: from kanga.kvack.org ([205.233.56.17]:34178 "EHLO kanga.kvack.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752380AbbBAOk7 (ORCPT ); Sun, 1 Feb 2015 09:40:59 -0500 Date: Sun, 1 Feb 2015 09:40:58 -0500 From: Benjamin LaHaise To: Linus Torvalds Cc: linux-aio@kvack.org, Linux Kernel Subject: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE Message-ID: <20150201144058.GM2974@kvack.org> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.4.2.2i Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5801 Lines: 195 Hello Linus, The following changes since commit 5f785de588735306ec4d7c875caf9d28481c8b21: aio: Skip timer for io_getevents if timeout=0 (2014-12-13 17:50:20 -0500) are available in the git repository at: git://git.kvack.org/~bcrl/aio-fixes.git master for you to fetch changes up to f84249f4cfef5ffa8a3e6d588a1d185f3f1fef45: fs/aio: fix sleeping while TASK_INTERRUPTIBLE (2015-01-30 11:18:36 -0500) ---------------------------------------------------------------- Chris Mason (1): fs/aio: fix sleeping while TASK_INTERRUPTIBLE fs/aio.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 93 insertions(+), 24 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 1b7893e..71b192a 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1131,6 +1131,8 @@ EXPORT_SYMBOL(aio_complete); /* aio_read_events_ring * Pull an event off of the ioctx's event ring. Returns the number of * events fetched + * + * must be called with ctx->ring locked */ static long aio_read_events_ring(struct kioctx *ctx, struct io_event __user *event, long nr) @@ -1139,8 +1141,7 @@ static long aio_read_events_ring(struct kioctx *ctx, unsigned head, tail, pos; long ret = 0; int copy_ret; - - mutex_lock(&ctx->ring_lock); + int running = current->state == TASK_RUNNING; /* Access to ->ring_pages here is protected by ctx->ring_lock. */ ring = kmap_atomic(ctx->ring_pages[0]); @@ -1179,6 +1180,17 @@ static long aio_read_events_ring(struct kioctx *ctx, page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; pos %= AIO_EVENTS_PER_PAGE; + /* we're called after calling prepare_to_wait, which means + * our current state might not be TASK_RUNNING. Set it + * to running here to sleeps in kmap or copy_to_user don't + * trigger warnings. If we don't copy enough events out, we'll + * loop through schedule() one time before sleeping. + */ + if (!running) { + __set_current_state(TASK_RUNNING); + running = 1; + } + ev = kmap(page); copy_ret = copy_to_user(event + ret, ev + pos, sizeof(*ev) * avail); @@ -1201,11 +1213,10 @@ static long aio_read_events_ring(struct kioctx *ctx, pr_debug("%li h%u t%u\n", ret, head, tail); out: - mutex_unlock(&ctx->ring_lock); - return ret; } +/* must be called with ctx->ring locked */ static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr, struct io_event __user *event, long *i) { @@ -1223,6 +1234,73 @@ static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr, return ret < 0 || *i >= min_nr; } +/* + * called without ctx->ring_lock held + */ +static long aio_wait_events(struct kioctx *ctx, long min_nr, long nr, + struct io_event __user *event, + long *i_ret, ktime_t until) +{ + struct hrtimer_sleeper t; + long ret; + DEFINE_WAIT(wait); + + mutex_lock(&ctx->ring_lock); + + /* + * this is an open coding of wait_event_interruptible_hrtimeout + * so we can deal with the ctx->mutex nicely. It starts with the + * fast path for existing events: + */ + ret = aio_read_events(ctx, min_nr, nr, event, i_ret); + if (ret) { + mutex_unlock(&ctx->ring_lock); + return ret; + } + + hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init_sleeper(&t, current); + if (until.tv64 != KTIME_MAX) { + hrtimer_start_range_ns(&t.timer, until, current->timer_slack_ns, + HRTIMER_MODE_REL); + } + + while (1) { + ret = prepare_to_wait_event(&ctx->wait, &wait, + TASK_INTERRUPTIBLE); + if (ret) + break; + + ret = aio_read_events(ctx, min_nr, nr, event, i_ret); + if (ret) + break; + + /* make sure we didn't timeout taking the mutex */ + if (!t.task) { + ret = -ETIME; + break; + } + + mutex_unlock(&ctx->ring_lock); + schedule(); + + if (!t.task) { + ret = -ETIME; + goto out; + } + mutex_lock(&ctx->ring_lock); + + } + mutex_unlock(&ctx->ring_lock); + +out: + finish_wait(&ctx->wait, &wait); + hrtimer_cancel(&t.timer); + destroy_hrtimer_on_stack(&t.timer); + return ret; + +} + static long read_events(struct kioctx *ctx, long min_nr, long nr, struct io_event __user *event, struct timespec __user *timeout) @@ -1239,27 +1317,18 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr, until = timespec_to_ktime(ts); } - /* - * Note that aio_read_events() is being called as the conditional - i.e. - * we're calling it after prepare_to_wait() has set task state to - * TASK_INTERRUPTIBLE. - * - * But aio_read_events() can block, and if it blocks it's going to flip - * the task state back to TASK_RUNNING. - * - * This should be ok, provided it doesn't flip the state back to - * TASK_RUNNING and return 0 too much - that causes us to spin. That - * will only happen if the mutex_lock() call blocks, and we then find - * the ringbuffer empty. So in practice we should be ok, but it's - * something to be aware of when touching this code. - */ - if (until.tv64 == 0) - aio_read_events(ctx, min_nr, nr, event, &ret); - else - wait_event_interruptible_hrtimeout(ctx->wait, - aio_read_events(ctx, min_nr, nr, event, &ret), - until); + if (until.tv64 == 0) { + mutex_lock(&ctx->ring_lock); + aio_read_events(ctx, min_nr, nr, event, &ret); + mutex_unlock(&ctx->ring_lock); + } else { + /* + * we're pitching the -ETIME and -ERESTARTSYS return values + * here. It'll turn into -EINTR? ick + */ + aio_wait_events(ctx, min_nr, nr, event, &ret, until); + } if (!ret && signal_pending(current)) ret = -EINTR; -- "Thought is the essence of where you are now." -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/