Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756817AbdD1Qud (ORCPT ); Fri, 28 Apr 2017 12:50:33 -0400 Received: from scorn.kernelslacker.org ([45.56.101.199]:36908 "EHLO scorn.kernelslacker.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753035AbdD1Qu0 (ORCPT ); Fri, 28 Apr 2017 12:50:26 -0400 Date: Fri, 28 Apr 2017 12:50:24 -0400 From: Dave Jones To: Al Viro Cc: Linux Kernel Subject: Re: iov_iter_pipe warning. Message-ID: <20170428165024.ofyl2atpjwohekqa@codemonkey.org.uk> Mail-Followup-To: Dave Jones , Al Viro , Linux Kernel References: <20170412022911.nhefjqlnyrk3n7rr@codemonkey.org.uk> <20170412025842.GO29622@ZenIV.linux.org.uk> <20170412143519.4hh36l3egozgdrll@codemonkey.org.uk> <20170412152600.GP29622@ZenIV.linux.org.uk> <20170412162709.bn5qfk4seues3yos@codemonkey.org.uk> <20170412170723.GQ29622@ZenIV.linux.org.uk> <20170412190318.srkkdytf2ebrjzrg@codemonkey.org.uk> <20170421175430.GT29622@ZenIV.linux.org.uk> <20170428152955.mafs3f22srmm34aw@codemonkey.org.uk> <20170428164313.GK29622@ZenIV.linux.org.uk> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20170428164313.GK29622@ZenIV.linux.org.uk> User-Agent: NeoMutt/20170306 (1.8.0) X-Spam-Note: SpamAssassin invocation failed Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7806 Lines: 249 On Fri, Apr 28, 2017 at 05:43:13PM +0100, Al Viro wrote: > On Fri, Apr 28, 2017 at 11:29:55AM -0400, Dave Jones wrote: > > On Fri, Apr 21, 2017 at 06:54:30PM +0100, Al Viro wrote: > > > On Wed, Apr 12, 2017 at 03:03:18PM -0400, Dave Jones wrote: > > > > > > > Well it's been running an hour without incident, which looks promising. > > > > I'll leave it run, but I'd say you're on the right track given how quick > > > > it reproduced so far. > > > > > > Could you try this and see if it works? What happens is that unlike > > > e.g. generic_file_read_iter/generic_file_write_iter, NFS O_DIRECT handling > > > does not make sure that iov_iter had been advanced by the amount > > > actually transferred - it is left advanced by the amount *requested*. > > > > Crap. So I never ran it long enough it seems. I can still hit that trace. > > I re-added one of your earlier debug patches, and got this.. > > Could you send me the diff against something from mainline (identified by > sha1, ideally)? currently running v4.11-rc8-75-gf83246089ca0 sunrpc bit is for the other unrelated problem I'm chasing. note also, I saw the backtrace without the fs/splice.c changes. diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index aab32fc3d6a8..c1b5fed7c863 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -537,7 +537,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, if (put_dreq(dreq)) nfs_direct_complete(dreq); - return 0; + return requested_bytes; } /** @@ -566,7 +566,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = mapping->host; struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; - ssize_t result = -EINVAL; + ssize_t result = -EINVAL, requested; size_t count = iov_iter_count(iter); nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); @@ -600,14 +600,19 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) nfs_start_io_direct(inode); NFS_I(inode)->read_io += count; - result = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); + requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); nfs_end_io_direct(inode); - if (!result) { + if (requested > 0) { result = nfs_direct_wait(dreq); - if (result > 0) + if (result > 0) { + requested -= result; iocb->ki_pos += result; + } + iov_iter_revert(iter, requested); + } else { + result = requested; } out_release: @@ -954,7 +959,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, if (put_dreq(dreq)) nfs_direct_write_complete(dreq); - return 0; + return requested_bytes; } /** @@ -979,7 +984,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, */ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) { - ssize_t result = -EINVAL; + ssize_t result = -EINVAL, requested; size_t count; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -1022,7 +1027,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) nfs_start_io_direct(inode); - result = nfs_direct_write_schedule_iovec(dreq, iter, pos); + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); if (mapping->nrpages) { invalidate_inode_pages2_range(mapping, @@ -1031,13 +1036,17 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) nfs_end_io_direct(inode); - if (!result) { + if (requested > 0) { result = nfs_direct_wait(dreq); if (result > 0) { + requested -= result; iocb->ki_pos = pos + result; /* XXX: should check the generic_write_sync retval */ generic_write_sync(iocb, result); } + iov_iter_revert(iter, requested); + } else { + result = requested; } out_release: nfs_direct_req_release(dreq); diff --git a/fs/splice.c b/fs/splice.c index 006ba50f4ece..0e67ddf8618d 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -284,6 +284,43 @@ void splice_shrink_spd(struct splice_pipe_desc *spd) kfree(spd->partial); } +static bool test_it(struct pipe_inode_info *pipe, size_t len, long ret) +{ + int idx = pipe->curbuf; + int n = pipe->nrbufs; + size_t size = 0; + while (n--) { + size += pipe->bufs[idx++].len; + if (idx == pipe->buffers) + idx = 0; + } + if (WARN_ON(size != ret)) { + char c = '['; + printk(KERN_ERR "asked to read %zu, claims to have read %ld", + len, ret); + printk(KERN_CONT "actual size of data in pipe %zd ", size); + for (n = pipe->nrbufs, idx = pipe->curbuf; n--; ) { + printk(KERN_CONT "%c%d:%u", c, idx, + pipe->bufs[idx].len); + c = ','; + if (++idx == pipe->buffers) + idx = 0; + } + if (c != '[') + printk(KERN_CONT "]"); + return true; + } + return false; +} + +static inline bool insane_splice_read(struct pipe_inode_info *pipe, + size_t len, long ret) +{ + if (ret <= 0 || pipe != current->splice_pipe) + return false; + return test_it(pipe, len, ret); +} + /** * generic_file_splice_read - splice data from file to a pipe * @in: file to splice from @@ -311,8 +348,14 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, kiocb.ki_pos = *ppos; ret = call_read_iter(in, &kiocb, &to); if (ret > 0) { - *ppos = kiocb.ki_pos; file_accessed(in); + if (unlikely(insane_splice_read(pipe, len, ret))) { + printk(KERN_ERR "f_op: %p, f_flags: %d, pos: %lld/%lld, size: %lld", + in->f_op, in->f_flags, (long long)*ppos, + (long long)kiocb.ki_pos, + (long long)i_size_read(file_inode(in))); + } + *ppos = kiocb.ki_pos; } else if (ret < 0) { to.idx = idx; to.iov_offset = 0; @@ -394,7 +437,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, struct page **pages; unsigned int nr_pages; size_t offset, dummy, copied = 0; - ssize_t res; + ssize_t res, old_len = len; int i; if (pipe->nrbufs == pipe->buffers) @@ -448,6 +491,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, put_page(pages[i]); kvfree(pages); iov_iter_advance(&to, copied); /* truncates and discards */ + insane_splice_read(pipe, old_len, res); return res; } @@ -970,6 +1014,11 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, while (len) { size_t read_len; loff_t pos = sd->pos, prev_pos = pos; + if (WARN_ON(pipe->nrbufs)) { + printk(KERN_ERR "in->f_op = %p, ->splice_write = %p\n", + in->f_op, + sd->u.file->f_op->splice_write); + } ret = do_splice_to(in, &pos, pipe, len, flags); if (unlikely(ret <= 0)) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 7bfe1fb42add..eb5297573a8a 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -927,6 +927,15 @@ int svc_send(struct svc_rqst *rqstp) if (!xprt) goto out; + if (WARN_ON(rqstp->rq_res.head[0].iov_len + + rqstp->rq_res.page_len + + rqstp->rq_res.tail[0].iov_len > rqstp->rq_reserved)) { + + printk("dbg: rqstp->rq_res.head[0].iov_len:%ld\n", rqstp->rq_res.head[0].iov_len); + printk("dbg: rqstp->rq_res.page_len:%d\n", rqstp->rq_res.page_len); + printk("dbg: rqstp->rq_reserved:%d\n", rqstp->rq_reserved); + } + /* release the receive skb before sending the reply */ rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); @@ -936,6 +945,8 @@ int svc_send(struct svc_rqst *rqstp) xb->page_len + xb->tail[0].iov_len; + WARN_ON(xb->len > rqstp->rq_reserved); + /* Grab mutex to serialize outgoing data. */ mutex_lock(&xprt->xpt_mutex); if (test_bit(XPT_DEAD, &xprt->xpt_flags) @@ -944,6 +955,7 @@ int svc_send(struct svc_rqst *rqstp) else len = xprt->xpt_ops->xpo_sendto(rqstp); mutex_unlock(&xprt->xpt_mutex); + rpc_wake_up(&xprt->xpt_bc_pending); svc_xprt_release(rqstp);