Hi Al,
Greg reported a problem due to the fact that Android tests use procfs
files to test splice, which stopped working with 5.10-rc1. This series
adds read_iter support for seq_file, and uses those for various seq_files
in procfs to restore splice read support.
Chances since v1:
- only switch a subset of proc files to use seq_read_iter
iov_iter based variant for reading a seq_file. seq_read is
reimplemented on top of the iter variant.
Signed-off-by: Christoph Hellwig <[email protected]>
Tested-by: Greg Kroah-Hartman <[email protected]>
---
fs/seq_file.c | 45 ++++++++++++++++++++++++++++------------
include/linux/seq_file.h | 1 +
2 files changed, 33 insertions(+), 13 deletions(-)
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 31219c1db17de3..3b20e21604e74a 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/printk.h>
#include <linux/string_helpers.h>
+#include <linux/uio.h>
#include <linux/uaccess.h>
#include <asm/page.h>
@@ -146,7 +147,28 @@ static int traverse(struct seq_file *m, loff_t offset)
*/
ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
{
- struct seq_file *m = file->private_data;
+ struct iovec iov = { .iov_base = buf, .iov_len = size};
+ struct kiocb kiocb;
+ struct iov_iter iter;
+ ssize_t ret;
+
+ init_sync_kiocb(&kiocb, file);
+ iov_iter_init(&iter, READ, &iov, 1, size);
+
+ kiocb.ki_pos = *ppos;
+ ret = seq_read_iter(&kiocb, &iter);
+ *ppos = kiocb.ki_pos;
+ return ret;
+}
+EXPORT_SYMBOL(seq_read);
+
+/*
+ * Ready-made ->f_op->read_iter()
+ */
+ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct seq_file *m = iocb->ki_filp->private_data;
+ size_t size = iov_iter_count(iter);
size_t copied = 0;
size_t n;
void *p;
@@ -158,14 +180,14 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
* if request is to read from zero offset, reset iterator to first
* record as it might have been already advanced by previous requests
*/
- if (*ppos == 0) {
+ if (iocb->ki_pos == 0) {
m->index = 0;
m->count = 0;
}
- /* Don't assume *ppos is where we left it */
- if (unlikely(*ppos != m->read_pos)) {
- while ((err = traverse(m, *ppos)) == -EAGAIN)
+ /* Don't assume ki_pos is where we left it */
+ if (unlikely(iocb->ki_pos != m->read_pos)) {
+ while ((err = traverse(m, iocb->ki_pos)) == -EAGAIN)
;
if (err) {
/* With prejudice... */
@@ -174,7 +196,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
m->count = 0;
goto Done;
} else {
- m->read_pos = *ppos;
+ m->read_pos = iocb->ki_pos;
}
}
@@ -187,13 +209,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
/* if not empty - flush it first */
if (m->count) {
n = min(m->count, size);
- err = copy_to_user(buf, m->buf + m->from, n);
- if (err)
+ if (copy_to_iter(m->buf + m->from, n, iter) != n)
goto Efault;
m->count -= n;
m->from += n;
size -= n;
- buf += n;
copied += n;
if (!size)
goto Done;
@@ -254,8 +274,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
}
m->op->stop(m, p);
n = min(m->count, size);
- err = copy_to_user(buf, m->buf, n);
- if (err)
+ if (copy_to_iter(m->buf, n, iter) != n)
goto Efault;
copied += n;
m->count -= n;
@@ -264,7 +283,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
if (!copied)
copied = err;
else {
- *ppos += copied;
+ iocb->ki_pos += copied;
m->read_pos += copied;
}
mutex_unlock(&m->lock);
@@ -276,7 +295,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
err = -EFAULT;
goto Done;
}
-EXPORT_SYMBOL(seq_read);
+EXPORT_SYMBOL(seq_read_iter);
/**
* seq_lseek - ->llseek() method for sequential files.
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 813614d4b71fbc..b83b3ae3c877f3 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -107,6 +107,7 @@ void seq_pad(struct seq_file *m, char c);
char *mangle_path(char *s, const char *p, const char *esc);
int seq_open(struct file *, const struct seq_operations *);
ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
+ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter);
loff_t seq_lseek(struct file *, loff_t, int);
int seq_release(struct inode *, struct file *);
int seq_write(struct seq_file *seq, const void *data, size_t len);
--
2.28.0
Wire up generic_file_splice_read for the iter based proxy ops, so
that splice reads from them work.
Signed-off-by: Christoph Hellwig <[email protected]>
Tested-by: Greg Kroah-Hartman <[email protected]>
---
fs/proc/inode.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 58c075e2a452d6..bde6b6f69852d2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -597,6 +597,7 @@ static const struct file_operations proc_iter_file_ops = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
.write = proc_reg_write,
+ .splice_read = generic_file_splice_read,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
.mmap = proc_reg_mmap,
@@ -622,6 +623,7 @@ static const struct file_operations proc_reg_file_ops_compat = {
static const struct file_operations proc_iter_file_ops_compat = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
+ .splice_read = generic_file_splice_read,
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
--
2.28.0
Implement ->read_iter so that the Android bionic test suite can use
this random proc file for its splice test case.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/proc/cpuinfo.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c
index d0989a443c77df..419760fd77bdd8 100644
--- a/fs/proc/cpuinfo.c
+++ b/fs/proc/cpuinfo.c
@@ -19,7 +19,7 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
static const struct proc_ops cpuinfo_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_open = cpuinfo_open,
- .proc_read = seq_read,
+ .proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = seq_release,
};
--
2.28.0
Implement ->read_iter so that splice can be used on this file.
Suggested-by: Linus Torvalds <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/proc/stat.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 46b3293015fe61..4695b6de315129 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -226,7 +226,7 @@ static int stat_open(struct inode *inode, struct file *file)
static const struct proc_ops stat_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_open = stat_open,
- .proc_read = seq_read,
+ .proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
--
2.28.0
From: Greg Kroah-Hartman <[email protected]>
Implement ->read_iter for all proc "single files" so that more bionic
tests cases can pass when they call splice() on other fun files like
/proc/version
Signed-off-by: Greg Kroah-Hartman <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/proc/generic.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2f9fa179194d72..f81327673f4901 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -621,7 +621,7 @@ static int proc_single_open(struct inode *inode, struct file *file)
static const struct proc_ops proc_single_ops = {
/* not permanent -- can call into arbitrary ->single_show */
.proc_open = proc_single_open,
- .proc_read = seq_read,
+ .proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
--
2.28.0
Implement ->read_iter for all proc "seq files" so that splice works on
them.
Suggested-by: Linus Torvalds <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/proc/generic.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f81327673f4901..b84663252adda0 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -590,7 +590,7 @@ static int proc_seq_release(struct inode *inode, struct file *file)
static const struct proc_ops proc_seq_ops = {
/* not permanent -- can call into arbitrary seq_operations */
.proc_open = proc_seq_open,
- .proc_read = seq_read,
+ .proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = proc_seq_release,
};
--
2.28.0
On Wed, Nov 4, 2020 at 12:29 AM Christoph Hellwig <[email protected]> wrote:
>
> Greg reported a problem due to the fact that Android tests use procfs
> files to test splice, which stopped working with 5.10-rc1. This series
> adds read_iter support for seq_file, and uses those for various seq_files
> in procfs to restore splice read support.
Ack.
Al, do you want me to take these directly - we'll need this to avoid
the regression in 5.10? Or do you have other things pending and I'll
see them in a pull request.
Linus
On Wed, Nov 04, 2020 at 09:27:33AM +0100, Christoph Hellwig wrote:
> ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
> {
> - struct seq_file *m = file->private_data;
> + struct iovec iov = { .iov_base = buf, .iov_len = size};
> + struct kiocb kiocb;
> + struct iov_iter iter;
> + ssize_t ret;
> +
> + init_sync_kiocb(&kiocb, file);
> + iov_iter_init(&iter, READ, &iov, 1, size);
> +
> + kiocb.ki_pos = *ppos;
> + ret = seq_read_iter(&kiocb, &iter);
> + *ppos = kiocb.ki_pos;
> + return ret;
> +}
> +EXPORT_SYMBOL(seq_read);
This is basically an open-coded copy of new_sync_read()...
> if (m->count) {
> n = min(m->count, size);
> - err = copy_to_user(buf, m->buf + m->from, n);
> - if (err)
> + if (copy_to_iter(m->buf + m->from, n, iter) != n)
> goto Efault;
> m->count -= n;
> m->from += n;
> size -= n;
> - buf += n;
> copied += n;
> if (!size)
> goto Done;
> n = min(m->count, size);
> - err = copy_to_user(buf, m->buf, n);
> - if (err)
> + if (copy_to_iter(m->buf, n, iter) != n)
> goto Efault;
This is actually broken from generic_file_splice_read() POV; if you've
already emitted something, you will end up with more data spewed into
pipe than you report to caller. You want something similar to
copy_to_iter_full() here, with iterator _not_ advanced in case of
failure. The first call is not an issue (you have no data copied
yet, so you'll end up with -EFAULT, aka "discard everything you've
put there and return -EAGAIN"), but the second really is a problem.
BTW, other ->read_iter() instances might need to be careful with that
pattern as well; drivers/gpu/drm/drm_dp_aux_dev.c:auxdev_read_iter()
would appear to have the same problem.
<greps some more>
if (unlikely(iov_iter_is_pipe(iter))) {
void *addr = kmap_atomic(page);
written = copy_to_iter(addr, copy, iter);
kunmap_atomic(addr);
} else
in fs/cifs/file.c looks... interesting, considering the fact that
copy_to_iter() for pipe destination might very well have to do
allocations. With GFP_USER. Under kmap_atomic()...
Note that we have this:
static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
struct iov_iter *to)
{
int n;
n = copy_to_iter(skb->data + off, len, to);
if (n == len)
return 0;
iov_iter_revert(to, n);
return -EFAULT;
}
i.e. the same "do not advance on short copy" kind of thing.
AFAICS, not all callers want that semantics, but I think it's worth
a new primitive. I'm not saying it should be a prereq for your
series, but either that or an explicit iov_iter_revert() is needed.
On Tue, Nov 10, 2020 at 09:32:53PM +0000, Al Viro wrote:
> AFAICS, not all callers want that semantics, but I think it's worth
> a new primitive. I'm not saying it should be a prereq for your
> series, but either that or an explicit iov_iter_revert() is needed.
Seeing that it already went into mainline, it needs a followup fix.
And since it's not -stable fodder (AFAICS), I'd rather go with
adding a new primitive...
On Tue, Nov 10, 2020 at 09:35:11PM +0000, Al Viro wrote:
> On Tue, Nov 10, 2020 at 09:32:53PM +0000, Al Viro wrote:
>
> > AFAICS, not all callers want that semantics, but I think it's worth
> > a new primitive. I'm not saying it should be a prereq for your
> > series, but either that or an explicit iov_iter_revert() is needed.
>
> Seeing that it already went into mainline, it needs a followup fix.
> And since it's not -stable fodder (AFAICS), I'd rather go with
> adding a new primitive...
Any objections to the following?
Fix seq_read_iter() behaviour on full pipe
generic_file_splice_read() will purge what we'd left in pipe in case
of error; it will *not* do so in case of short write, so we must make
sure that reported amount of data stored by ->read_iter() matches the
reality.
It's not a rare situation (and we already have it open-coded in at least
one place), so let's introduce a new primitive - copy_to_iter_full().
Similar to copy_from_iter_full(), it returns true if we had been able
to copy everything we'd been asked to and false otherwise. Iterator
is advanced only on success.
Signed-off-by: Al Viro <[email protected]>
---
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3b20e21604e7..233d790ea301 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -209,7 +209,7 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
/* if not empty - flush it first */
if (m->count) {
n = min(m->count, size);
- if (copy_to_iter(m->buf + m->from, n, iter) != n)
+ if (!copy_to_iter_full(m->buf + m->from, n, iter))
goto Efault;
m->count -= n;
m->from += n;
@@ -274,7 +274,7 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
}
m->op->stop(m, p);
n = min(m->count, size);
- if (copy_to_iter(m->buf, n, iter) != n)
+ if (!copy_to_iter_full(m->buf, n, iter))
goto Efault;
copied += n;
m->count -= n;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 72d88566694e..388c05e371ad 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -138,6 +138,18 @@ size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
}
static __always_inline __must_check
+bool copy_to_iter_full(const void *addr, size_t bytes, struct iov_iter *i)
+{
+ if (likely(check_copy_size(addr, bytes, true))) {
+ size_t n = _copy_to_iter(addr, bytes, i);
+ if (likely(n == bytes))
+ return true;
+ iov_iter_revert(i, n);
+ }
+ return false;
+}
+
+static __always_inline __must_check
size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
if (unlikely(!check_copy_size(addr, bytes, false)))
diff --git a/include/net/udp.h b/include/net/udp.h
index 295d52a73598..91d1a2998a2d 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -390,14 +390,7 @@ static inline bool udp_skb_is_linear(struct sk_buff *skb)
static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
struct iov_iter *to)
{
- int n;
-
- n = copy_to_iter(skb->data + off, len, to);
- if (n == len)
- return 0;
-
- iov_iter_revert(to, n);
- return -EFAULT;
+ return copy_to_iter_full(skb->data + off, len, to) ? 0 : -EFAULT;
}
/*
On Tue, Nov 10, 2020 at 11:20:28PM +0000, Al Viro wrote:
> On Tue, Nov 10, 2020 at 09:35:11PM +0000, Al Viro wrote:
> > On Tue, Nov 10, 2020 at 09:32:53PM +0000, Al Viro wrote:
> >
> > > AFAICS, not all callers want that semantics, but I think it's worth
> > > a new primitive. I'm not saying it should be a prereq for your
> > > series, but either that or an explicit iov_iter_revert() is needed.
> >
> > Seeing that it already went into mainline, it needs a followup fix.
> > And since it's not -stable fodder (AFAICS), I'd rather go with
> > adding a new primitive...
>
> Any objections to the following?
>
> Fix seq_read_iter() behaviour on full pipe
>
> generic_file_splice_read() will purge what we'd left in pipe in case
> of error; it will *not* do so in case of short write, so we must make
> sure that reported amount of data stored by ->read_iter() matches the
> reality.
>
> It's not a rare situation (and we already have it open-coded in at least
> one place), so let's introduce a new primitive - copy_to_iter_full().
> Similar to copy_from_iter_full(), it returns true if we had been able
> to copy everything we'd been asked to and false otherwise. Iterator
> is advanced only on success.
>
> Signed-off-by: Al Viro <[email protected]>
Looks ok to me.
On Tue, Nov 10, 2020 at 3:20 PM Al Viro <[email protected]> wrote:
>
> Any objections to the following?
Well, I don't _object_, but I find it ugly.
And I think both the old and the "fixed" code is wrong when an EFAULT
happens in the middle.
Yes, we can just return EFAULT. But for read() and write() we really
try to do the proper partial returns in other places, why not here?
IOW, why isn't the proper fix just something like this:
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3b20e21604e7..ecc6909b71f5 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -209,7 +209,8 @@ ssize_t seq_read_iter(struct kiocb *iocb,
struct iov_iter *iter)
/* if not empty - flush it first */
if (m->count) {
n = min(m->count, size);
- if (copy_to_iter(m->buf + m->from, n, iter) != n)
+ n = copy_to_iter(m->buf + m->from, n, iter);
+ if (!n)
goto Efault;
m->count -= n;
m->from += n;
which should get the "efault in the middle" case roughly right (ie the
usual "exact byte alignment and page crosser" caveats apply).
Linus
On Wed, Nov 11, 2020 at 09:54:12AM -0800, Linus Torvalds wrote:
> On Tue, Nov 10, 2020 at 3:20 PM Al Viro <[email protected]> wrote:
> >
> > Any objections to the following?
>
> Well, I don't _object_, but I find it ugly.
>
> And I think both the old and the "fixed" code is wrong when an EFAULT
> happens in the middle.
>
> Yes, we can just return EFAULT. But for read() and write() we really
> try to do the proper partial returns in other places, why not here?
>
> IOW, why isn't the proper fix just something like this:
>
> diff --git a/fs/seq_file.c b/fs/seq_file.c
> index 3b20e21604e7..ecc6909b71f5 100644
> --- a/fs/seq_file.c
> +++ b/fs/seq_file.c
> @@ -209,7 +209,8 @@ ssize_t seq_read_iter(struct kiocb *iocb,
> struct iov_iter *iter)
> /* if not empty - flush it first */
> if (m->count) {
> n = min(m->count, size);
> - if (copy_to_iter(m->buf + m->from, n, iter) != n)
> + n = copy_to_iter(m->buf + m->from, n, iter);
> + if (!n)
> goto Efault;
> m->count -= n;
> m->from += n;
>
> which should get the "efault in the middle" case roughly right (ie the
> usual "exact byte alignment and page crosser" caveats apply).
Look at the loop after that one, specifically the "it doesn't fit,
allocate a bigger one" part:
kvfree(m->buf);
m->count = 0;
m->buf = seq_buf_alloc(m->size <<= 1);
It really depends upon having m->buf empty at the beginning of
the loop. Your variant will lose the data, unless we copy the
"old" part of buffer (from before the ->show()) into the
larger one.
That can be done, but I would rather go with
n = copy_to_iter(m->buf + m->from, m->count, iter);
m->count -= n;
m->from += n;
copied += n;
if (!size)
goto Done;
if (m->count)
goto Efault;
if we do it that way. Let me see if I can cook something
reasonable along those lines...
On Wed, Nov 11, 2020 at 09:52:20PM +0000, Al Viro wrote:
> That can be done, but I would rather go with
> n = copy_to_iter(m->buf + m->from, m->count, iter);
> m->count -= n;
> m->from += n;
> copied += n;
> if (!size)
> goto Done;
> if (m->count)
> goto Efault;
> if we do it that way. Let me see if I can cook something
> reasonable along those lines...
Something like below (build-tested only):
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3b20e21604e7..07b33c1f34a9 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -168,7 +168,6 @@ EXPORT_SYMBOL(seq_read);
ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct seq_file *m = iocb->ki_filp->private_data;
- size_t size = iov_iter_count(iter);
size_t copied = 0;
size_t n;
void *p;
@@ -208,14 +207,11 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
}
/* if not empty - flush it first */
if (m->count) {
- n = min(m->count, size);
- if (copy_to_iter(m->buf + m->from, n, iter) != n)
- goto Efault;
+ n = copy_to_iter(m->buf + m->from, m->count, iter);
m->count -= n;
m->from += n;
- size -= n;
copied += n;
- if (!size)
+ if (!iov_iter_count(iter) || m->count)
goto Done;
}
/* we need at least one record in buffer */
@@ -249,6 +245,7 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
goto Done;
Fill:
/* they want more? let's try to get some more */
+ /* m->count is positive and there's space left in iter */
while (1) {
size_t offs = m->count;
loff_t pos = m->index;
@@ -263,7 +260,7 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
err = PTR_ERR(p);
break;
}
- if (m->count >= size)
+ if (m->count >= iov_iter_count(iter))
break;
err = m->op->show(m, p);
if (seq_has_overflowed(m) || err) {
@@ -273,16 +270,14 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
}
}
m->op->stop(m, p);
- n = min(m->count, size);
- if (copy_to_iter(m->buf, n, iter) != n)
- goto Efault;
+ n = copy_to_iter(m->buf, m->count, iter);
copied += n;
m->count -= n;
m->from = n;
Done:
- if (!copied)
- copied = err;
- else {
+ if (unlikely(!copied)) {
+ copied = m->count ? -EFAULT : err;
+ } else {
iocb->ki_pos += copied;
m->read_pos += copied;
}
@@ -291,9 +286,6 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
Enomem:
err = -ENOMEM;
goto Done;
-Efault:
- err = -EFAULT;
- goto Done;
}
EXPORT_SYMBOL(seq_read_iter);
On Wed, Nov 11, 2020 at 02:27:02PM -0800, Linus Torvalds wrote:
> On Wed, Nov 11, 2020 at 2:21 PM Al Viro <[email protected]> wrote:
> >
> > Something like below (build-tested only):
>
> Apart from my usual "oh, Gods, the iter model really does confuse me"
> this looks more like what I expected, yes.
>
> Considering the original bug, I'm clearly not the only one confused by
> the iov_iter helper functions and the rules..
copy_to_iter() returns the amount it has actually copied, that's all;
the cause of that bug is not the primitives used, it's the rules for
->read_iter(). The rules are actually fairly simple - "->read_iter()
should not report less data than it has actually left there". For read(2)
it's a matter of QoI - if we hit an unmapped page, POSIX pretty much says
that all bets are off; read(fd, unmapped - 5, 8) might copy 5 bytes and
return 4. It is allowed (and read(2) on those files used to do just that),
but it's nicer not to do so. For generic_file_splice_read(), OTOH, it's
a bug - we end up with stray data spewed into pipe. So converting
to ->read_iter() needs some care. Probably something along those
lines should go into D/f/porting...
On Wed, Nov 11, 2020 at 2:21 PM Al Viro <[email protected]> wrote:
>
> Something like below (build-tested only):
Apart from my usual "oh, Gods, the iter model really does confuse me"
this looks more like what I expected, yes.
Considering the original bug, I'm clearly not the only one confused by
the iov_iter helper functions and the rules..
Linus
Hi Al,
On Wed, Nov 11, 2020 at 10:21:16PM +0000, Al Viro wrote:
> On Wed, Nov 11, 2020 at 09:52:20PM +0000, Al Viro wrote:
>
> > That can be done, but I would rather go with
> > n = copy_to_iter(m->buf + m->from, m->count, iter);
> > m->count -= n;
> > m->from += n;
> > copied += n;
> > if (!size)
> > goto Done;
> > if (m->count)
> > goto Efault;
> > if we do it that way. Let me see if I can cook something
> > reasonable along those lines...
>
> Something like below (build-tested only):
>
> diff --git a/fs/seq_file.c b/fs/seq_file.c
> index 3b20e21604e7..07b33c1f34a9 100644
> --- a/fs/seq_file.c
> +++ b/fs/seq_file.c
> @@ -168,7 +168,6 @@ EXPORT_SYMBOL(seq_read);
> ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> {
> struct seq_file *m = iocb->ki_filp->private_data;
> - size_t size = iov_iter_count(iter);
> size_t copied = 0;
> size_t n;
> void *p;
> @@ -208,14 +207,11 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> }
> /* if not empty - flush it first */
> if (m->count) {
> - n = min(m->count, size);
> - if (copy_to_iter(m->buf + m->from, n, iter) != n)
> - goto Efault;
> + n = copy_to_iter(m->buf + m->from, m->count, iter);
> m->count -= n;
> m->from += n;
> - size -= n;
> copied += n;
> - if (!size)
> + if (!iov_iter_count(iter) || m->count)
> goto Done;
> }
> /* we need at least one record in buffer */
> @@ -249,6 +245,7 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> goto Done;
> Fill:
> /* they want more? let's try to get some more */
> + /* m->count is positive and there's space left in iter */
> while (1) {
> size_t offs = m->count;
> loff_t pos = m->index;
> @@ -263,7 +260,7 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> err = PTR_ERR(p);
> break;
> }
> - if (m->count >= size)
> + if (m->count >= iov_iter_count(iter))
> break;
> err = m->op->show(m, p);
> if (seq_has_overflowed(m) || err) {
> @@ -273,16 +270,14 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> }
> }
> m->op->stop(m, p);
> - n = min(m->count, size);
> - if (copy_to_iter(m->buf, n, iter) != n)
> - goto Efault;
> + n = copy_to_iter(m->buf, m->count, iter);
> copied += n;
> m->count -= n;
> m->from = n;
> Done:
> - if (!copied)
> - copied = err;
> - else {
> + if (unlikely(!copied)) {
> + copied = m->count ? -EFAULT : err;
> + } else {
> iocb->ki_pos += copied;
> m->read_pos += copied;
> }
> @@ -291,9 +286,6 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> Enomem:
> err = -ENOMEM;
> goto Done;
> -Efault:
> - err = -EFAULT;
> - goto Done;
> }
> EXPORT_SYMBOL(seq_read_iter);
>
This patch in -next (6a9f696d1627bacc91d1cebcfb177f474484e8ba) breaks
WSL2's interoperability feature, where Windows paths automatically get
added to PATH on start up so that Windows binaries can be accessed from
within Linux (such as clip.exe to pipe output to the clipboard). Before,
I would see a bunch of Linux + Windows folders in $PATH but after, I
only see the Linux folders (I can give you the actual PATH value if you
care but it is really long).
I am not at all familiar with the semantics of this patch or how
Microsoft would be using it to inject folders into PATH (they have some
documentation on it here:
https://docs.microsoft.com/en-us/windows/wsl/interop) and I am not sure
how to go about figuring that out to see why this patch breaks something
(unless you have an idea). I have added the Hyper-V maintainers and list
to CC in case they know someone who could help.
Cheers,
Nathan
On Fri, Nov 13, 2020 at 04:54:53PM -0700, Nathan Chancellor wrote:
> This patch in -next (6a9f696d1627bacc91d1cebcfb177f474484e8ba) breaks
> WSL2's interoperability feature, where Windows paths automatically get
> added to PATH on start up so that Windows binaries can be accessed from
> within Linux (such as clip.exe to pipe output to the clipboard). Before,
> I would see a bunch of Linux + Windows folders in $PATH but after, I
> only see the Linux folders (I can give you the actual PATH value if you
> care but it is really long).
>
> I am not at all familiar with the semantics of this patch or how
> Microsoft would be using it to inject folders into PATH (they have some
> documentation on it here:
> https://docs.microsoft.com/en-us/windows/wsl/interop) and I am not sure
> how to go about figuring that out to see why this patch breaks something
> (unless you have an idea). I have added the Hyper-V maintainers and list
> to CC in case they know someone who could help.
Out of curiosity: could you slap WARN_ON(!iov_iter_count(iter)); right in
the beginning of seq_read_iter() and see if that triggers?
On Sat, Nov 14, 2020 at 01:17:54AM +0000, Al Viro wrote:
> On Fri, Nov 13, 2020 at 04:54:53PM -0700, Nathan Chancellor wrote:
>
> > This patch in -next (6a9f696d1627bacc91d1cebcfb177f474484e8ba) breaks
> > WSL2's interoperability feature, where Windows paths automatically get
> > added to PATH on start up so that Windows binaries can be accessed from
> > within Linux (such as clip.exe to pipe output to the clipboard). Before,
> > I would see a bunch of Linux + Windows folders in $PATH but after, I
> > only see the Linux folders (I can give you the actual PATH value if you
> > care but it is really long).
> >
> > I am not at all familiar with the semantics of this patch or how
> > Microsoft would be using it to inject folders into PATH (they have some
> > documentation on it here:
> > https://docs.microsoft.com/en-us/windows/wsl/interop) and I am not sure
> > how to go about figuring that out to see why this patch breaks something
> > (unless you have an idea). I have added the Hyper-V maintainers and list
> > to CC in case they know someone who could help.
>
> Out of curiosity: could you slap WARN_ON(!iov_iter_count(iter)); right in
> the beginning of seq_read_iter() and see if that triggers?
Sure thing, it does trigger.
[ 0.235058] ------------[ cut here ]------------
[ 0.235062] WARNING: CPU: 15 PID: 237 at fs/seq_file.c:176 seq_read_iter+0x3b3/0x3f0
[ 0.235064] CPU: 15 PID: 237 Comm: localhost Not tainted 5.10.0-rc2-microsoft-cbl-00002-g6a9f696d1627-dirty #15
[ 0.235065] RIP: 0010:seq_read_iter+0x3b3/0x3f0
[ 0.235066] Code: ba 01 00 00 00 e8 6d d2 fc ff 4c 89 e7 48 89 ee 48 8b 54 24 10 e8 ad 8b 45 00 49 01 c5 48 29 43 18 48 89 43 10 e9 61 fe ff ff <0f> 0b e9 6f fc ff ff 0f 0b 45 31 ed e9 0d fd ff ff 48 c7 43 18 00
[ 0.235067] RSP: 0018:ffff9c774063bd08 EFLAGS: 00010246
[ 0.235068] RAX: ffff91a77ac01f00 RBX: ffff91a50133c348 RCX: 0000000000000001
[ 0.235069] RDX: ffff9c774063bdb8 RSI: ffff9c774063bd60 RDI: ffff9c774063bd88
[ 0.235069] RBP: 0000000000000000 R08: 0000000000000000 R09: ffff91a50058b768
[ 0.235070] R10: ffff91a7f79f0000 R11: ffffffffbc2c2030 R12: ffff9c774063bd88
[ 0.235070] R13: ffff9c774063bd60 R14: ffff9c774063be48 R15: ffff91a77af58900
[ 0.235072] FS: 000000000029c800(0000) GS:ffff91a7f7bc0000(0000) knlGS:0000000000000000
[ 0.235073] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 0.235073] CR2: 00007ab6c1fabad0 CR3: 000000037a004000 CR4: 0000000000350ea0
[ 0.235074] Call Trace:
[ 0.235077] seq_read+0x127/0x150
[ 0.235078] proc_reg_read+0x42/0xa0
[ 0.235080] do_iter_read+0x14c/0x1e0
[ 0.235081] do_readv+0x18d/0x240
[ 0.235083] do_syscall_64+0x33/0x70
[ 0.235085] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 0.235086] RIP: 0033:0x22c483
[ 0.235086] Code: 4e 66 48 0f 7e c8 48 83 f8 01 48 89 d0 48 83 d0 ff 48 89 46 08 66 0f 7f 46 10 48 63 7f 78 b8 13 00 00 00 ba 02 00 00 00 0f 05 <48> 89 c7 e8 15 bb ff ff 48 85 c0 7e 34 48 89 c1 48 2b 4c 24 08 76
[ 0.235087] RSP: 002b:00007ffca2245ca0 EFLAGS: 00000257 ORIG_RAX: 0000000000000013
[ 0.235088] RAX: ffffffffffffffda RBX: 0000000000a58120 RCX: 000000000022c483
[ 0.235088] RDX: 0000000000000002 RSI: 00007ffca2245ca0 RDI: 0000000000000005
[ 0.235089] RBP: 00000000ffffffff R08: fefefefefefefeff R09: 8080808080808080
[ 0.235089] R10: 00007ab6c1fabb20 R11: 0000000000000257 R12: 0000000000a58120
[ 0.235089] R13: 00007ffca2245d90 R14: 0000000000000001 R15: 00007ffca2245ce7
[ 0.235091] CPU: 15 PID: 237 Comm: localhost Not tainted 5.10.0-rc2-microsoft-cbl-00002-g6a9f696d1627-dirty #15
[ 0.235091] Call Trace:
[ 0.235092] dump_stack+0xa1/0xfb
[ 0.235094] __warn+0x7f/0x120
[ 0.235095] ? seq_read_iter+0x3b3/0x3f0
[ 0.235096] report_bug+0xb1/0x110
[ 0.235097] handle_bug+0x3d/0x70
[ 0.235098] exc_invalid_op+0x18/0xb0
[ 0.235098] asm_exc_invalid_op+0x12/0x20
[ 0.235100] RIP: 0010:seq_read_iter+0x3b3/0x3f0
[ 0.235100] Code: ba 01 00 00 00 e8 6d d2 fc ff 4c 89 e7 48 89 ee 48 8b 54 24 10 e8 ad 8b 45 00 49 01 c5 48 29 43 18 48 89 43 10 e9 61 fe ff ff <0f> 0b e9 6f fc ff ff 0f 0b 45 31 ed e9 0d fd ff ff 48 c7 43 18 00
[ 0.235101] RSP: 0018:ffff9c774063bd08 EFLAGS: 00010246
[ 0.235101] RAX: ffff91a77ac01f00 RBX: ffff91a50133c348 RCX: 0000000000000001
[ 0.235102] RDX: ffff9c774063bdb8 RSI: ffff9c774063bd60 RDI: ffff9c774063bd88
[ 0.235102] RBP: 0000000000000000 R08: 0000000000000000 R09: ffff91a50058b768
[ 0.235103] R10: ffff91a7f79f0000 R11: ffffffffbc2c2030 R12: ffff9c774063bd88
[ 0.235103] R13: ffff9c774063bd60 R14: ffff9c774063be48 R15: ffff91a77af58900
[ 0.235104] ? seq_open+0x70/0x70
[ 0.235105] ? path_openat+0xbc0/0xc40
[ 0.235106] seq_read+0x127/0x150
[ 0.235107] proc_reg_read+0x42/0xa0
[ 0.235108] do_iter_read+0x14c/0x1e0
[ 0.235109] do_readv+0x18d/0x240
[ 0.235109] do_syscall_64+0x33/0x70
[ 0.235110] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 0.235111] RIP: 0033:0x22c483
[ 0.235111] Code: 4e 66 48 0f 7e c8 48 83 f8 01 48 89 d0 48 83 d0 ff 48 89 46 08 66 0f 7f 46 10 48 63 7f 78 b8 13 00 00 00 ba 02 00 00 00 0f 05 <48> 89 c7 e8 15 bb ff ff 48 85 c0 7e 34 48 89 c1 48 2b 4c 24 08 76
[ 0.235112] RSP: 002b:00007ffca2245ca0 EFLAGS: 00000257 ORIG_RAX: 0000000000000013
[ 0.235113] RAX: ffffffffffffffda RBX: 0000000000a58120 RCX: 000000000022c483
[ 0.235113] RDX: 0000000000000002 RSI: 00007ffca2245ca0 RDI: 0000000000000005
[ 0.235113] RBP: 00000000ffffffff R08: fefefefefefefeff R09: 8080808080808080
[ 0.235114] R10: 00007ab6c1fabb20 R11: 0000000000000257 R12: 0000000000a58120
[ 0.235114] R13: 00007ffca2245d90 R14: 0000000000000001 R15: 00007ffca2245ce7
[ 0.235115] ---[ end trace 92966dbcf1e9cae5 ]---
Cheers,
Nathan
On Fri, Nov 13, 2020 at 08:01:24PM -0700, Nathan Chancellor wrote:
> Sure thing, it does trigger.
>
> [ 0.235058] ------------[ cut here ]------------
> [ 0.235062] WARNING: CPU: 15 PID: 237 at fs/seq_file.c:176 seq_read_iter+0x3b3/0x3f0
> [ 0.235064] CPU: 15 PID: 237 Comm: localhost Not tainted 5.10.0-rc2-microsoft-cbl-00002-g6a9f696d1627-dirty #15
> [ 0.235065] RIP: 0010:seq_read_iter+0x3b3/0x3f0
> [ 0.235066] Code: ba 01 00 00 00 e8 6d d2 fc ff 4c 89 e7 48 89 ee 48 8b 54 24 10 e8 ad 8b 45 00 49 01 c5 48 29 43 18 48 89 43 10 e9 61 fe ff ff <0f> 0b e9 6f fc ff ff 0f 0b 45 31 ed e9 0d fd ff ff 48 c7 43 18 00
> [ 0.235067] RSP: 0018:ffff9c774063bd08 EFLAGS: 00010246
> [ 0.235068] RAX: ffff91a77ac01f00 RBX: ffff91a50133c348 RCX: 0000000000000001
> [ 0.235069] RDX: ffff9c774063bdb8 RSI: ffff9c774063bd60 RDI: ffff9c774063bd88
> [ 0.235069] RBP: 0000000000000000 R08: 0000000000000000 R09: ffff91a50058b768
> [ 0.235070] R10: ffff91a7f79f0000 R11: ffffffffbc2c2030 R12: ffff9c774063bd88
> [ 0.235070] R13: ffff9c774063bd60 R14: ffff9c774063be48 R15: ffff91a77af58900
> [ 0.235072] FS: 000000000029c800(0000) GS:ffff91a7f7bc0000(0000) knlGS:0000000000000000
> [ 0.235073] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 0.235073] CR2: 00007ab6c1fabad0 CR3: 000000037a004000 CR4: 0000000000350ea0
> [ 0.235074] Call Trace:
> [ 0.235077] seq_read+0x127/0x150
> [ 0.235078] proc_reg_read+0x42/0xa0
> [ 0.235080] do_iter_read+0x14c/0x1e0
> [ 0.235081] do_readv+0x18d/0x240
> [ 0.235083] do_syscall_64+0x33/0x70
> [ 0.235085] entry_SYSCALL_64_after_hwframe+0x44/0xa9
*blink*
Lovely... For one thing, it did *not* go through
proc_reg_read_iter(). For another, it has hit proc_reg_read() with
zero length, which must've been an iovec with zero ->iov_len in
readv(2) arguments. I wonder if we should use that kind of
pathology (readv() with zero-length segment in the middle of
iovec array) for regression tests...
OK... First of all, since that kind of crap can happen,
let's do this (incremental to be folded); then (and that's
a separate patch) we ought to switch the proc_ops with ->proc_read
equal to seq_read to ->proc_read_iter = seq_read_iter, so that
those guys would not mess with seq_read() wrapper at all.
Finally, is there any point having do_loop_readv_writev()
call any methods for zero-length segments?
In any case, the following should be folded into
"fix return values of seq_read_iter()"; could you check if that
fixes the problem you are seeing?
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 07b33c1f34a9..e66d6b8bae23 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -211,9 +211,9 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
m->count -= n;
m->from += n;
copied += n;
- if (!iov_iter_count(iter) || m->count)
- goto Done;
}
+ if (m->count || !iov_iter_count(iter))
+ goto Done;
/* we need at least one record in buffer */
m->from = 0;
p = m->op->start(m, &m->index);
On Sat, Nov 14, 2020 at 03:54:53AM +0000, Al Viro wrote:
> On Fri, Nov 13, 2020 at 08:01:24PM -0700, Nathan Chancellor wrote:
> > Sure thing, it does trigger.
> >
> > [ 0.235058] ------------[ cut here ]------------
> > [ 0.235062] WARNING: CPU: 15 PID: 237 at fs/seq_file.c:176 seq_read_iter+0x3b3/0x3f0
> > [ 0.235064] CPU: 15 PID: 237 Comm: localhost Not tainted 5.10.0-rc2-microsoft-cbl-00002-g6a9f696d1627-dirty #15
> > [ 0.235065] RIP: 0010:seq_read_iter+0x3b3/0x3f0
> > [ 0.235066] Code: ba 01 00 00 00 e8 6d d2 fc ff 4c 89 e7 48 89 ee 48 8b 54 24 10 e8 ad 8b 45 00 49 01 c5 48 29 43 18 48 89 43 10 e9 61 fe ff ff <0f> 0b e9 6f fc ff ff 0f 0b 45 31 ed e9 0d fd ff ff 48 c7 43 18 00
> > [ 0.235067] RSP: 0018:ffff9c774063bd08 EFLAGS: 00010246
> > [ 0.235068] RAX: ffff91a77ac01f00 RBX: ffff91a50133c348 RCX: 0000000000000001
> > [ 0.235069] RDX: ffff9c774063bdb8 RSI: ffff9c774063bd60 RDI: ffff9c774063bd88
> > [ 0.235069] RBP: 0000000000000000 R08: 0000000000000000 R09: ffff91a50058b768
> > [ 0.235070] R10: ffff91a7f79f0000 R11: ffffffffbc2c2030 R12: ffff9c774063bd88
> > [ 0.235070] R13: ffff9c774063bd60 R14: ffff9c774063be48 R15: ffff91a77af58900
> > [ 0.235072] FS: 000000000029c800(0000) GS:ffff91a7f7bc0000(0000) knlGS:0000000000000000
> > [ 0.235073] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 0.235073] CR2: 00007ab6c1fabad0 CR3: 000000037a004000 CR4: 0000000000350ea0
> > [ 0.235074] Call Trace:
> > [ 0.235077] seq_read+0x127/0x150
> > [ 0.235078] proc_reg_read+0x42/0xa0
> > [ 0.235080] do_iter_read+0x14c/0x1e0
> > [ 0.235081] do_readv+0x18d/0x240
> > [ 0.235083] do_syscall_64+0x33/0x70
> > [ 0.235085] entry_SYSCALL_64_after_hwframe+0x44/0xa9
>
> *blink*
>
> Lovely... For one thing, it did *not* go through
> proc_reg_read_iter(). For another, it has hit proc_reg_read() with
> zero length, which must've been an iovec with zero ->iov_len in
> readv(2) arguments. I wonder if we should use that kind of
> pathology (readv() with zero-length segment in the middle of
> iovec array) for regression tests...
>
> OK... First of all, since that kind of crap can happen,
> let's do this (incremental to be folded); then (and that's
> a separate patch) we ought to switch the proc_ops with ->proc_read
> equal to seq_read to ->proc_read_iter = seq_read_iter, so that
> those guys would not mess with seq_read() wrapper at all.
>
> Finally, is there any point having do_loop_readv_writev()
> call any methods for zero-length segments?
>
> In any case, the following should be folded into
> "fix return values of seq_read_iter()"; could you check if that
> fixes the problem you are seeing?
>
> diff --git a/fs/seq_file.c b/fs/seq_file.c
> index 07b33c1f34a9..e66d6b8bae23 100644
> --- a/fs/seq_file.c
> +++ b/fs/seq_file.c
> @@ -211,9 +211,9 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> m->count -= n;
> m->from += n;
> copied += n;
> - if (!iov_iter_count(iter) || m->count)
> - goto Done;
> }
> + if (m->count || !iov_iter_count(iter))
> + goto Done;
> /* we need at least one record in buffer */
> m->from = 0;
> p = m->op->start(m, &m->index);
Unfortunately that patch does not solve my issue. Is there any other
debugging I should add?
Cheers,
Nathan
On Fri, Nov 13, 2020 at 09:14:20PM -0700, Nathan Chancellor wrote:
> Unfortunately that patch does not solve my issue. Is there any other
> debugging I should add?
Hmm... I wonder which file it is; how about
if (WARN_ON(!iovec.iov_len))
printk(KERN_ERR "odd readv on %pd4\n", file);
in the loop in fs/read_write.c:do_loop_readv_writev()?
On Sat, Nov 14, 2020 at 05:50:48AM +0000, Al Viro wrote:
> On Fri, Nov 13, 2020 at 09:14:20PM -0700, Nathan Chancellor wrote:
>
> > Unfortunately that patch does not solve my issue. Is there any other
> > debugging I should add?
>
> Hmm... I wonder which file it is; how about
> if (WARN_ON(!iovec.iov_len))
> printk(KERN_ERR "odd readv on %pd4\n", file);
> in the loop in fs/read_write.c:do_loop_readv_writev()?
Assuming you mean this?
diff --git a/fs/read_write.c b/fs/read_write.c
index 75f764b43418..91dc07074a3f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -757,6 +757,9 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
struct iovec iovec = iov_iter_iovec(iter);
ssize_t nr;
+ if (WARN_ON(!iovec.iov_len))
+ printk(KERN_ERR "odd readv on %pd4\n", filp);
+
if (type == READ) {
nr = filp->f_op->read(filp, iovec.iov_base,
iovec.iov_len, ppos);
---
Assuming so, I have attached the output both with and without the
WARN_ON. Looks like mountinfo is what is causing the error?
Cheers,
Nathan
On Fri, Nov 13, 2020 at 11:19:34PM -0700, Nathan Chancellor wrote:
> Assuming so, I have attached the output both with and without the
> WARN_ON. Looks like mountinfo is what is causing the error?
Cute... FWIW, on #origin + that commit with fix folded in I don't
see anything unusual in reads from mountinfo ;-/ OTOH, they'd
obviously been... creative with readv(2) arguments, so it would
be very interesting to see what it is they are passing to it.
I'm half-asleep right now; will try to cook something to gather
that information tomorrow morning. 'Later...
On Sat, Nov 14, 2020 at 07:00:25AM +0000, Al Viro wrote:
> On Fri, Nov 13, 2020 at 11:19:34PM -0700, Nathan Chancellor wrote:
>
> > Assuming so, I have attached the output both with and without the
> > WARN_ON. Looks like mountinfo is what is causing the error?
>
> Cute... FWIW, on #origin + that commit with fix folded in I don't
> see anything unusual in reads from mountinfo ;-/ OTOH, they'd
> obviously been... creative with readv(2) arguments, so it would
> be very interesting to see what it is they are passing to it.
>
> I'm half-asleep right now; will try to cook something to gather
> that information tomorrow morning. 'Later...
OK, so let's do this: fix in seq_read_iter() + in do_loop_readv_writev()
(on entry) the following (racy as hell, but will do for debugging):
bool weird = false;
if (unlikely(memcmp(file->f_path.dentry->d_name.name, "mountinfo", 10))) {
int i;
for (i = 0; i < iter->nr_segs; i++)
if (!iter->iov[i].iov_len)
weird = true;
if (weird) {
printk(KERN_ERR "[%s]: weird readv on %p4D (%ld) ",
current->comm, filp, (long)filp->f_pos);
for (i = 0; i < iter->nr_segs; i++)
printk(KERN_CONT "%c%zd", i ? ':' : '<',
iter->iov[i].iov_len);
printk(KERN_CONT "> ");
}
}
and in the end (just before return)
if (weird)
printk(KERN_CONT "-> %zd\n", ret);
Preferably along with the results of cat /proc/<whatever it is>/mountinfo both
on that and on the working kernel...
On Fri, Nov 13, 2020 at 04:54:53PM -0700, Nathan Chancellor wrote:
> Hi Al,
>
> On Wed, Nov 11, 2020 at 10:21:16PM +0000, Al Viro wrote:
> > On Wed, Nov 11, 2020 at 09:52:20PM +0000, Al Viro wrote:
> >
> > > That can be done, but I would rather go with
> > > n = copy_to_iter(m->buf + m->from, m->count, iter);
> > > m->count -= n;
> > > m->from += n;
> > > copied += n;
> > > if (!size)
> > > goto Done;
> > > if (m->count)
> > > goto Efault;
> > > if we do it that way. Let me see if I can cook something
> > > reasonable along those lines...
> >
> > Something like below (build-tested only):
> >
> > diff --git a/fs/seq_file.c b/fs/seq_file.c
> > index 3b20e21604e7..07b33c1f34a9 100644
> > --- a/fs/seq_file.c
> > +++ b/fs/seq_file.c
> > @@ -168,7 +168,6 @@ EXPORT_SYMBOL(seq_read);
> > ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> > {
> > struct seq_file *m = iocb->ki_filp->private_data;
> > - size_t size = iov_iter_count(iter);
> > size_t copied = 0;
> > size_t n;
> > void *p;
> > @@ -208,14 +207,11 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> > }
> > /* if not empty - flush it first */
> > if (m->count) {
> > - n = min(m->count, size);
> > - if (copy_to_iter(m->buf + m->from, n, iter) != n)
> > - goto Efault;
> > + n = copy_to_iter(m->buf + m->from, m->count, iter);
> > m->count -= n;
> > m->from += n;
> > - size -= n;
> > copied += n;
> > - if (!size)
> > + if (!iov_iter_count(iter) || m->count)
> > goto Done;
> > }
> > /* we need at least one record in buffer */
> > @@ -249,6 +245,7 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> > goto Done;
> > Fill:
> > /* they want more? let's try to get some more */
> > + /* m->count is positive and there's space left in iter */
> > while (1) {
> > size_t offs = m->count;
> > loff_t pos = m->index;
> > @@ -263,7 +260,7 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> > err = PTR_ERR(p);
> > break;
> > }
> > - if (m->count >= size)
> > + if (m->count >= iov_iter_count(iter))
> > break;
> > err = m->op->show(m, p);
> > if (seq_has_overflowed(m) || err) {
> > @@ -273,16 +270,14 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> > }
> > }
> > m->op->stop(m, p);
> > - n = min(m->count, size);
> > - if (copy_to_iter(m->buf, n, iter) != n)
> > - goto Efault;
> > + n = copy_to_iter(m->buf, m->count, iter);
> > copied += n;
> > m->count -= n;
> > m->from = n;
> > Done:
> > - if (!copied)
> > - copied = err;
> > - else {
> > + if (unlikely(!copied)) {
> > + copied = m->count ? -EFAULT : err;
> > + } else {
> > iocb->ki_pos += copied;
> > m->read_pos += copied;
> > }
> > @@ -291,9 +286,6 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> > Enomem:
> > err = -ENOMEM;
> > goto Done;
> > -Efault:
> > - err = -EFAULT;
> > - goto Done;
> > }
> > EXPORT_SYMBOL(seq_read_iter);
> >
>
> This patch in -next (6a9f696d1627bacc91d1cebcfb177f474484e8ba) breaks
> WSL2's interoperability feature, where Windows paths automatically get
> added to PATH on start up so that Windows binaries can be accessed from
> within Linux (such as clip.exe to pipe output to the clipboard). Before,
> I would see a bunch of Linux + Windows folders in $PATH but after, I
> only see the Linux folders (I can give you the actual PATH value if you
> care but it is really long).
>
> I am not at all familiar with the semantics of this patch or how
> Microsoft would be using it to inject folders into PATH (they have some
> documentation on it here:
> https://docs.microsoft.com/en-us/windows/wsl/interop) and I am not sure
> how to go about figuring that out to see why this patch breaks something
> (unless you have an idea). I have added the Hyper-V maintainers and list
> to CC in case they know someone who could help.
FWIW, just to make sure:
1) does reverting just that commit recover the desired behaviour?
2) could you verify that your latest tests had been done with
the incremental I'd posted (shifting the if (....) goto Done; out of the if
body)?
3) does the build with that commit reverted produce any warnings
related to mountinfo?
4) your posted log with WARN_ON unfortunately starts *after*
the mountinfo accesses; could you check which process had been doing those?
The Comm: ... part in there, that is.
5) in the "I don't believe that could happen, but let's make sure"
department: turn the
/* m->count is positive and there's space left in iter */
comment in seq_read_iter() into an outright
BUG_ON(!m->count || !iov_iter_count(iter));
On Sat, Nov 14, 2020 at 08:50:00PM +0000, Al Viro wrote:
OK, I think I understand what's going on. Could you check if
reverting the variant in -next and applying the following instead
fixes what you are seeing?
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3b20e21604e7..35667112bbd1 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -168,7 +168,6 @@ EXPORT_SYMBOL(seq_read);
ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct seq_file *m = iocb->ki_filp->private_data;
- size_t size = iov_iter_count(iter);
size_t copied = 0;
size_t n;
void *p;
@@ -208,16 +207,15 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
}
/* if not empty - flush it first */
if (m->count) {
- n = min(m->count, size);
- if (copy_to_iter(m->buf + m->from, n, iter) != n)
- goto Efault;
+ n = copy_to_iter(m->buf + m->from, m->count, iter);
m->count -= n;
m->from += n;
- size -= n;
copied += n;
- if (!size)
- goto Done;
+ if (m->count)
+ goto Efault;
}
+ if (!iov_iter_count(iter))
+ goto Done;
/* we need at least one record in buffer */
m->from = 0;
p = m->op->start(m, &m->index);
@@ -249,6 +247,7 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
goto Done;
Fill:
/* they want more? let's try to get some more */
+ /* m->count is positive and there's space left in iter */
while (1) {
size_t offs = m->count;
loff_t pos = m->index;
@@ -263,7 +262,7 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
err = PTR_ERR(p);
break;
}
- if (m->count >= size)
+ if (m->count >= iov_iter_count(iter))
break;
err = m->op->show(m, p);
if (seq_has_overflowed(m) || err) {
@@ -273,12 +272,12 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
}
}
m->op->stop(m, p);
- n = min(m->count, size);
- if (copy_to_iter(m->buf, n, iter) != n)
- goto Efault;
+ n = copy_to_iter(m->buf, m->count, iter);
copied += n;
- m->count -= n;
m->from = n;
+ m->count -= n;
+ if (m->count)
+ goto Efault;
Done:
if (!copied)
copied = err;
On Sun, Nov 15, 2020 at 7:54 AM Al Viro <[email protected]> wrote:
>
> OK, I think I understand what's going on. Could you check if
> reverting the variant in -next and applying the following instead
> fixes what you are seeing?
Side note: if this ends up working, can you add a lot of comments
about this thing (both in the code and the commit message)? It
confused both Christoph and me, and clearly you were stumped too.
That's not a great sign.
Linus
Hi Al,
Apologies for the delay.
On Sun, Nov 15, 2020 at 03:53:55PM +0000, Al Viro wrote:
> On Sat, Nov 14, 2020 at 08:50:00PM +0000, Al Viro wrote:
>
> OK, I think I understand what's going on. Could you check if
> reverting the variant in -next and applying the following instead
> fixes what you are seeing?
The below diff on top of d4d50710a8b46082224376ef119a4dbb75b25c56 does
not fix my issue unfortunately.
> diff --git a/fs/seq_file.c b/fs/seq_file.c
> index 3b20e21604e7..35667112bbd1 100644
> --- a/fs/seq_file.c
> +++ b/fs/seq_file.c
> @@ -168,7 +168,6 @@ EXPORT_SYMBOL(seq_read);
> ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> {
> struct seq_file *m = iocb->ki_filp->private_data;
> - size_t size = iov_iter_count(iter);
> size_t copied = 0;
> size_t n;
> void *p;
> @@ -208,16 +207,15 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> }
> /* if not empty - flush it first */
> if (m->count) {
> - n = min(m->count, size);
> - if (copy_to_iter(m->buf + m->from, n, iter) != n)
> - goto Efault;
> + n = copy_to_iter(m->buf + m->from, m->count, iter);
> m->count -= n;
> m->from += n;
> - size -= n;
> copied += n;
> - if (!size)
> - goto Done;
> + if (m->count)
> + goto Efault;
> }
> + if (!iov_iter_count(iter))
> + goto Done;
> /* we need at least one record in buffer */
> m->from = 0;
> p = m->op->start(m, &m->index);
> @@ -249,6 +247,7 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> goto Done;
> Fill:
> /* they want more? let's try to get some more */
> + /* m->count is positive and there's space left in iter */
> while (1) {
> size_t offs = m->count;
> loff_t pos = m->index;
> @@ -263,7 +262,7 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> err = PTR_ERR(p);
> break;
> }
> - if (m->count >= size)
> + if (m->count >= iov_iter_count(iter))
> break;
> err = m->op->show(m, p);
> if (seq_has_overflowed(m) || err) {
> @@ -273,12 +272,12 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> }
> }
> m->op->stop(m, p);
> - n = min(m->count, size);
> - if (copy_to_iter(m->buf, n, iter) != n)
> - goto Efault;
> + n = copy_to_iter(m->buf, m->count, iter);
> copied += n;
> - m->count -= n;
> m->from = n;
> + m->count -= n;
> + if (m->count)
> + goto Efault;
> Done:
> if (!copied)
> copied = err;
>
Replying to your two other messages here:
> FWIW, just to make sure:
> 1) does reverting just that commit recover the desired behaviour?
Yes, d4d50710a8b46082224376ef119a4dbb75b25c56 is fine,
6a9f696d1627bacc91d1cebcfb177f474484e8ba is broken.
$ uname -r
5.10.0-rc2-microsoft-00001-gd4d50710a8b4
$ echo $PATH
/home/nathan/usr/bin:/home/nathan/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/mnt/c/Windows/system32:...
$ uname -r
5.10.0-rc2-microsoft-00002-g6a9f696d1627
$ echo $PATH
/home/nathan/usr/bin:/home/nathan/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games
> 2) could you verify that your latest tests had been done with
> the incremental I'd posted (shifting the if (....) goto Done; out of the if
> body)?
> 3) does the build with that commit reverted produce any warnings
> related to mountinfo?
d4d50710a8b46082224376ef119a4dbb75b25c56:
$ dmesg -l err
[ 0.064825] PCI: Fatal: No config space access function found
[ 0.077436] kvm: no hardware support
[ 0.077438] kvm: no hardware support
[ 0.108227] hv_utils: cannot register PTP clock: 0
[ 2.518229] FS-Cache: Duplicate cookie detected
[ 2.518232] FS-Cache: O-cookie c=000000005d51d0cd [p=000000006bb17fa6 fl=222 nc=0 na=1]
[ 2.518232] FS-Cache: O-cookie d=0000000021f3f873 n=000000002fb0c46e
[ 2.518233] FS-Cache: O-key=[10] '34323934393337353435'
[ 2.518236] FS-Cache: N-cookie c=000000002e0fa15c [p=000000006bb17fa6 fl=2 nc=0 na=1]
[ 2.518236] FS-Cache: N-cookie d=0000000021f3f873 n=00000000a3943d77
[ 2.518236] FS-Cache: N-key=[10] '34323934393337353435'
6a9f696d1627bacc91d1cebcfb177f474484e8ba:
$ dmesg -l err
[ 0.064266] PCI: Fatal: No config space access function found
[ 0.077833] kvm: no hardware support
[ 0.077835] kvm: no hardware support
[ 0.107284] hv_utils: cannot register PTP clock: 0
[ 0.221703] init: (235) ERROR: LogException:36: LOCALHOST: Could not start localhost port scanner.
[ 2.428629] FS-Cache: Duplicate cookie detected
[ 2.428631] FS-Cache: O-cookie c=000000008dc18c92 [p=00000000e8a82afe fl=222 nc=0 na=1]
[ 2.428631] FS-Cache: O-cookie d=000000007254ca01 n=0000000009bd1860
[ 2.428632] FS-Cache: O-key=[10] '34323934393337353336'
[ 2.428635] FS-Cache: N-cookie c=000000005941e9ee [p=00000000e8a82afe fl=2 nc=0 na=1]
[ 2.428635] FS-Cache: N-cookie d=000000007254ca01 n=0000000095ef7a9f
[ 2.428636] FS-Cache: N-key=[10] '34323934393337353336'
[ 2.438529] init: (9) ERROR: UtilFindMount:700: Invalid mountinfo line 14
[ 2.438535] init: (9) ERROR: CreateProcessParseCommon:849: Failed to translate C:\Users\natec
[ 2.438550] init: (9) ERROR: UtilFindMount:700: Invalid mountinfo line 14
[ 2.438552] init: (9) ERROR: UtilTranslatePathList:2373: Failed to translate C:\Windows\system32
[ 2.438558] init: (9) ERROR: UtilFindMount:700: Invalid mountinfo line 14
[ 2.438559] init: (9) ERROR: UtilTranslatePathList:2373: Failed to translate C:\Windows
[ 2.438565] init: (9) ERROR: UtilFindMount:700: Invalid mountinfo line 14
6a9f696d1627bacc91d1cebcfb177f474484e8ba + shifting the if (....) goto Done; out of the if body:
$ echo $PATH
/home/nathan/usr/bin:/home/nathan/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games
$ dmesg -l err
[ 0.059315] PCI: Fatal: No config space access function found
[ 0.077335] kvm: no hardware support
[ 0.077336] kvm: no hardware support
[ 0.107422] hv_utils: cannot register PTP clock: 0
[ 2.437321] FS-Cache: Duplicate cookie detected
[ 2.437323] FS-Cache: O-cookie c=0000000061ae161f [p=000000005e0c26a4 fl=222 nc=0 na=1]
[ 2.437323] FS-Cache: O-cookie d=000000006e487749 n=00000000d6f2b7cc
[ 2.437324] FS-Cache: O-key=[10] '34323934393337353337'
[ 2.437327] FS-Cache: N-cookie c=00000000c08b3ba9 [p=000000005e0c26a4 fl=2 nc=0 na=1]
[ 2.437327] FS-Cache: N-cookie d=000000006e487749 n=0000000026c46bbc
[ 2.437328] FS-Cache: N-key=[10] '34323934393337353337'
[ 2.447874] init: (9) ERROR: UtilFindMount:700: Invalid mountinfo line 14
[ 2.447878] init: (9) ERROR: CreateProcessParseCommon:849: Failed to translate C:\Users\natec
[ 2.447905] init: (9) ERROR: UtilFindMount:700: Invalid mountinfo line 14
[ 2.447906] init: (9) ERROR: UtilTranslatePathList:2373: Failed to translate C:\Windows\system32
[ 2.447923] init: (9) ERROR: UtilFindMount:700: Invalid mountinfo line 14
[ 2.447924] init: (9) ERROR: UtilTranslatePathList:2373: Failed to translate C:\Windows
[ 2.447940] init: (9) ERROR: UtilFindMount:700: Invalid mountinfo line 14
Your latest diff on top of d4d50710a8b46082224376ef119a4dbb75b25c56:
$ echo $PATH
/home/nathan/usr/bin:/home/nathan/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games
$ dmesg -l err
[ 0.064514] PCI: Fatal: No config space access function found
[ 0.078288] kvm: no hardware support
[ 0.078290] kvm: no hardware support
[ 0.108100] hv_utils: cannot register PTP clock: 0
[ 2.428126] FS-Cache: Duplicate cookie detected
[ 2.428128] FS-Cache: O-cookie c=00000000f90a28fc [p=000000008eaf59d5 fl=222 nc=0 na=1]
[ 2.428128] FS-Cache: O-cookie d=00000000e1e04d39 n=00000000109665e8
[ 2.428129] FS-Cache: O-key=[10] '34323934393337353336'
[ 2.428132] FS-Cache: N-cookie c=00000000c32f0eb9 [p=000000008eaf59d5 fl=2 nc=0 na=1]
[ 2.428132] FS-Cache: N-cookie d=00000000e1e04d39 n=000000003fef461f
[ 2.428132] FS-Cache: N-key=[10] '34323934393337353336'
[ 2.439541] init: (9) ERROR: UtilFindMount:700: Invalid mountinfo line 14
[ 2.439545] init: (9) ERROR: CreateProcessParseCommon:849: Failed to translate C:\Users\natec
[ 2.439571] init: (9) ERROR: UtilFindMount:700: Invalid mountinfo line 14
[ 2.439573] init: (9) ERROR: UtilTranslatePathList:2373: Failed to translate C:\Windows\system32
[ 2.439590] init: (9) ERROR: UtilFindMount:700: Invalid mountinfo line 14
[ 2.439591] init: (9) ERROR: UtilTranslatePathList:2373: Failed to translate C:\Windows
[ 2.439607] init: (9) ERROR: UtilFindMount:700: Invalid mountinfo line 14
> 4) your posted log with WARN_ON unfortunately starts *after*
> the mountinfo accesses; could you check which process had been doing those?
> The Comm: ... part in there, that is.
init it looks like. Attached is the output of
---
diff --git a/fs/read_write.c b/fs/read_write.c
index 75f764b43418..37708555cb8d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -757,6 +757,10 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
struct iovec iovec = iov_iter_iovec(iter);
ssize_t nr;
+ if (unlikely(!memcmp(filp->f_path.dentry->d_name.name, "mountinfo", 10)) &&
+ WARN_ON(!iovec.iov_len))
+ printk(KERN_ERR "odd readv on %pd4\n", filp);
+
if (type == READ) {
nr = filp->f_op->read(filp, iovec.iov_base,
iovec.iov_len, ppos);
---
against your latest diff on top of d4d50710a8b46082224376ef119a4dbb75b25c56.
> 5) in the "I don't believe that could happen, but let's make sure"
> department: turn the
> /* m->count is positive and there's space left in iter */
> comment in seq_read_iter() into an outright
> BUG_ON(!m->count || !iov_iter_count(iter));
Does not look like it triggers against your latest diff.
> OK, so let's do this: fix in seq_read_iter() + in do_loop_readv_writev()
> (on entry) the following (racy as hell, but will do for debugging):
>
> bool weird = false;
>
> if (unlikely(memcmp(file->f_path.dentry->d_name.name, "mountinfo", 10))) {
> int i;
>
> for (i = 0; i < iter->nr_segs; i++)
> if (!iter->iov[i].iov_len)
> weird = true;
> if (weird) {
> printk(KERN_ERR "[%s]: weird readv on %p4D (%ld) ",
> current->comm, filp, (long)filp->f_pos);
> for (i = 0; i < iter->nr_segs; i++)
> printk(KERN_CONT "%c%zd", i ? ':' : '<',
> iter->iov[i].iov_len);
> printk(KERN_CONT "> ");
> }
> }
> and in the end (just before return)
> if (weird)
> printk(KERN_CONT "-> %zd\n", ret);
>
> Preferably along with the results of cat /proc/<whatever it is>/mountinfo both
> on that and on the working kernel...
I applied this against your latest diff, attached as weird_readv.
Good (d4d50710a8b46082224376ef119a4dbb75b25c56):
$ cat /proc/9/mountinfo
29 21 8:0 / / rw,relatime - ext4 /dev/sda rw,discard,no_fc,errors=remount-ro,data=ordered
30 29 0:15 / /mnt/wsl rw,relatime shared:1 - tmpfs tmpfs rw
31 29 0:16 /init /init ro,relatime - 9p tools ro,dirsync,aname=tools;fmask=022,loose,access=client,trans=fd,rfd=6,wfd=6
32 29 0:5 / /dev rw,nosuid,relatime - devtmpfs none rw,size=8176740k,nr_inodes=2044185,mode=755
33 29 0:14 / /sys rw,nosuid,nodev,noexec,noatime - sysfs sysfs rw
34 29 0:19 / /proc rw,nosuid,nodev,noexec,noatime - proc proc rw
35 32 0:20 / /dev/pts rw,nosuid,noexec,noatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000
36 29 0:21 / /run rw,nosuid,noexec,noatime - tmpfs none rw,mode=755
37 36 0:22 / /run/lock rw,nosuid,nodev,noexec,noatime - tmpfs none rw
38 36 0:23 / /run/shm rw,nosuid,nodev,noatime - tmpfs none rw
39 36 0:24 / /run/user rw,nosuid,nodev,noexec,noatime - tmpfs none rw,mode=755
40 34 0:17 / /proc/sys/fs/binfmt_misc rw,relatime - binfmt_misc binfmt_misc rw
41 33 0:25 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755
42 41 0:26 / /sys/fs/cgroup/unified rw,nosuid,nodev,noexec,relatime - cgroup2 cgroup2 rw,nsdelegate
43 41 0:27 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset
44 41 0:28 / /sys/fs/cgroup/cpu rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu
45 41 0:29 / /sys/fs/cgroup/cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuacct
46 41 0:30 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio
47 41 0:31 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
48 41 0:32 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices
49 41 0:33 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer
50 41 0:34 / /sys/fs/cgroup/net_cls rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_cls
51 41 0:35 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event
52 41 0:36 / /sys/fs/cgroup/net_prio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_prio
53 41 0:37 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,hugetlb
54 41 0:38 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,pids
55 41 0:39 / /sys/fs/cgroup/rdma rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,rdma
90 29 0:40 / /mnt/c rw,noatime - 9p C:\134 rw,dirsync,aname=drvfs;path=C:\;uid=1000;gid=1000;symlinkroot=/mnt/,mmap,access=client,msize=65536,trans=fd,rfd=8,wfd=8
91 29 0:41 / /mnt/d rw,noatime - 9p D:\134 rw,dirsync,aname=drvfs;path=D:\;uid=1000;gid=1000;symlinkroot=/mnt/,mmap,access=client,msize=65536,trans=fd,rfd=8,wfd=8
Bad (your latest diff on top of d4d50710a8b46082224376ef119a4dbb75b25c56
$ cat /proc/9/mountinfo
29 21 8:0 / / rw,relatime - ext4 /dev/sda rw,discard,no_fc,errors=remount-ro,data=ordered
30 29 0:15 / /mnt/wsl rw,relatime shared:1 - tmpfs tmpfs rw
31 29 0:16 /init /init ro,relatime - 9p tools ro,dirsync,aname=tools;fmask=022,loose,access=client,trans=fd,rfd=6,wfd=6
32 29 0:5 / /dev rw,nosuid,relatime - devtmpfs none rw,size=8176740k,nr_inodes=2044185,mode=755
33 29 0:14 / /sys rw,nosuid,nodev,noexec,noatime - sysfs sysfs rw
34 29 0:19 / /proc rw,nosuid,nodev,noexec,noatime - proc proc rw
35 32 0:20 / /dev/pts rw,nosuid,noexec,noatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000
36 29 0:21 / /run rw,nosuid,noexec,noatime - tmpfs none rw,mode=755
37 36 0:22 / /run/lock rw,nosuid,nodev,noexec,noatime - tmpfs none rw
38 36 0:23 / /run/shm rw,nosuid,nodev,noatime - tmpfs none rw
39 36 0:24 / /run/user rw,nosuid,nodev,noexec,noatime - tmpfs none rw,mode=755
40 34 0:17 / /proc/sys/fs/binfmt_misc rw,relatime - binfmt_misc binfmt_misc rw
41 33 0:25 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755
42 41 0:26 / /sys/fs/cgroup/unified rw,nosuid,nodev,noexec,relatime - cgroup2 cgroup2 rw,nsdelegate
43 41 0:27 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset
44 41 0:28 / /sys/fs/cgroup/cpu rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu
45 41 0:29 / /sys/fs/cgroup/cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuacct
46 41 0:30 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio
47 41 0:31 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
48 41 0:32 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices
49 41 0:33 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer
50 41 0:34 / /sys/fs/cgroup/net_cls rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_cls
51 41 0:35 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event
52 41 0:36 / /sys/fs/cgroup/net_prio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_prio
53 41 0:37 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,hugetlb
54 41 0:38 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,pids
55 41 0:39 / /sys/fs/cgroup/rdma rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,rdma
90 29 0:40 / /mnt/c rw,noatime - 9p C:\134 rw,dirsync,aname=drvfs;path=C:\;uid=1000;gid=1000;symlinkroot=/mnt/,mmap,access=client,msize=65536,trans=fd,rfd=8,wfd=8
91 29 0:41 / /mnt/d rw,noatime - 9p D:\134 rw,dirsync,aname=drvfs;path=D:\;uid=1000;gid=1000;symlinkroot=/mnt/,mmap,access=client,msize=65536,trans=fd,rfd=8,wfd=8
If you need any more information, please let me know!
Cheers,
Nathan
On Sun, Nov 15, 2020 at 02:41:25PM -0700, Nathan Chancellor wrote:
> Hi Al,
>
> Apologies for the delay.
>
> On Sun, Nov 15, 2020 at 03:53:55PM +0000, Al Viro wrote:
> > On Sat, Nov 14, 2020 at 08:50:00PM +0000, Al Viro wrote:
> >
> > OK, I think I understand what's going on. Could you check if
> > reverting the variant in -next and applying the following instead
> > fixes what you are seeing?
>
> The below diff on top of d4d50710a8b46082224376ef119a4dbb75b25c56 does
> not fix my issue unfortunately.
OK... Now that I have a reproducer[1], I think I've sorted it out.
And yes, it had been too subtle for its own good ;-/
[1] I still wonder what the hell in the userland has come up with the
idea of reading through a file with readv(), each time with 2-element
iovec array, the first element covering 0 bytes and the second one - 1024.
AFAICS, nothing is systemd git appears to be _that_ weird... Makes for
a useful testcase, though...
Anyway, could you test this replacement?
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3b20e21604e7..c0dfe2861b35 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -168,12 +168,14 @@ EXPORT_SYMBOL(seq_read);
ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct seq_file *m = iocb->ki_filp->private_data;
- size_t size = iov_iter_count(iter);
size_t copied = 0;
size_t n;
void *p;
int err = 0;
+ if (!iov_iter_count(iter))
+ return 0;
+
mutex_lock(&m->lock);
/*
@@ -208,34 +210,32 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
}
/* if not empty - flush it first */
if (m->count) {
- n = min(m->count, size);
- if (copy_to_iter(m->buf + m->from, n, iter) != n)
- goto Efault;
+ n = copy_to_iter(m->buf + m->from, m->count, iter);
m->count -= n;
m->from += n;
- size -= n;
copied += n;
- if (!size)
+ if (m->count) // hadn't managed to copy everything
goto Done;
}
- /* we need at least one record in buffer */
+ /* we need at least one non-empty record in the buffer */
m->from = 0;
p = m->op->start(m, &m->index);
while (1) {
err = PTR_ERR(p);
- if (!p || IS_ERR(p))
+ if (!p || IS_ERR(p)) // EOF or an error
break;
err = m->op->show(m, p);
- if (err < 0)
+ if (err < 0) // hard error
break;
- if (unlikely(err))
+ if (unlikely(err)) // ->show() says "skip it"
m->count = 0;
- if (unlikely(!m->count)) {
+ if (unlikely(!m->count)) { // empty record
p = m->op->next(m, p, &m->index);
continue;
}
- if (m->count < m->size)
+ if (!seq_has_overflowed(m)) // got it
goto Fill;
+ // need a bigger buffer
m->op->stop(m, p);
kvfree(m->buf);
m->count = 0;
@@ -244,11 +244,14 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
goto Enomem;
p = m->op->start(m, &m->index);
}
+ // EOF or an error
m->op->stop(m, p);
m->count = 0;
goto Done;
Fill:
- /* they want more? let's try to get some more */
+ // one non-empty record is in the buffer; if they want more,
+ // try to fit more in, but in any case we need to advance
+ // the iterator at least once.
while (1) {
size_t offs = m->count;
loff_t pos = m->index;
@@ -259,11 +262,9 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
m->op->next);
m->index++;
}
- if (!p || IS_ERR(p)) {
- err = PTR_ERR(p);
+ if (!p || IS_ERR(p)) // no next record for us
break;
- }
- if (m->count >= size)
+ if (m->count >= iov_iter_count(iter))
break;
err = m->op->show(m, p);
if (seq_has_overflowed(m) || err) {
@@ -273,16 +274,14 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
}
}
m->op->stop(m, p);
- n = min(m->count, size);
- if (copy_to_iter(m->buf, n, iter) != n)
- goto Efault;
+ n = copy_to_iter(m->buf, m->count, iter);
copied += n;
m->count -= n;
m->from = n;
Done:
- if (!copied)
- copied = err;
- else {
+ if (unlikely(!copied)) {
+ copied = m->count ? -EFAULT : err;
+ } else {
iocb->ki_pos += copied;
m->read_pos += copied;
}
@@ -291,9 +290,6 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
Enomem:
err = -ENOMEM;
goto Done;
-Efault:
- err = -EFAULT;
- goto Done;
}
EXPORT_SYMBOL(seq_read_iter);
On Sun, Nov 15, 2020 at 11:38:14PM +0000, Al Viro wrote:
> On Sun, Nov 15, 2020 at 02:41:25PM -0700, Nathan Chancellor wrote:
> > Hi Al,
> >
> > Apologies for the delay.
> >
> > On Sun, Nov 15, 2020 at 03:53:55PM +0000, Al Viro wrote:
> > > On Sat, Nov 14, 2020 at 08:50:00PM +0000, Al Viro wrote:
> > >
> > > OK, I think I understand what's going on. Could you check if
> > > reverting the variant in -next and applying the following instead
> > > fixes what you are seeing?
> >
> > The below diff on top of d4d50710a8b46082224376ef119a4dbb75b25c56 does
> > not fix my issue unfortunately.
>
> OK... Now that I have a reproducer[1], I think I've sorted it out.
> And yes, it had been too subtle for its own good ;-/
>
> [1] I still wonder what the hell in the userland has come up with the
> idea of reading through a file with readv(), each time with 2-element
> iovec array, the first element covering 0 bytes and the second one - 1024.
> AFAICS, nothing is systemd git appears to be _that_ weird... Makes for
> a useful testcase, though...
>
> Anyway, could you test this replacement?
Looks good to me on top of d4d50710a8b46082224376ef119a4dbb75b25c56,
thanks for quickly looking into this!
Tested-by: Nathan Chancellor <[email protected]>
> diff --git a/fs/seq_file.c b/fs/seq_file.c
> index 3b20e21604e7..c0dfe2861b35 100644
> --- a/fs/seq_file.c
> +++ b/fs/seq_file.c
> @@ -168,12 +168,14 @@ EXPORT_SYMBOL(seq_read);
> ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> {
> struct seq_file *m = iocb->ki_filp->private_data;
> - size_t size = iov_iter_count(iter);
> size_t copied = 0;
> size_t n;
> void *p;
> int err = 0;
>
> + if (!iov_iter_count(iter))
> + return 0;
> +
> mutex_lock(&m->lock);
>
> /*
> @@ -208,34 +210,32 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> }
> /* if not empty - flush it first */
> if (m->count) {
> - n = min(m->count, size);
> - if (copy_to_iter(m->buf + m->from, n, iter) != n)
> - goto Efault;
> + n = copy_to_iter(m->buf + m->from, m->count, iter);
> m->count -= n;
> m->from += n;
> - size -= n;
> copied += n;
> - if (!size)
> + if (m->count) // hadn't managed to copy everything
> goto Done;
> }
> - /* we need at least one record in buffer */
> + /* we need at least one non-empty record in the buffer */
> m->from = 0;
> p = m->op->start(m, &m->index);
> while (1) {
> err = PTR_ERR(p);
> - if (!p || IS_ERR(p))
> + if (!p || IS_ERR(p)) // EOF or an error
> break;
> err = m->op->show(m, p);
> - if (err < 0)
> + if (err < 0) // hard error
> break;
> - if (unlikely(err))
> + if (unlikely(err)) // ->show() says "skip it"
> m->count = 0;
> - if (unlikely(!m->count)) {
> + if (unlikely(!m->count)) { // empty record
> p = m->op->next(m, p, &m->index);
> continue;
> }
> - if (m->count < m->size)
> + if (!seq_has_overflowed(m)) // got it
> goto Fill;
> + // need a bigger buffer
> m->op->stop(m, p);
> kvfree(m->buf);
> m->count = 0;
> @@ -244,11 +244,14 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> goto Enomem;
> p = m->op->start(m, &m->index);
> }
> + // EOF or an error
> m->op->stop(m, p);
> m->count = 0;
> goto Done;
> Fill:
> - /* they want more? let's try to get some more */
> + // one non-empty record is in the buffer; if they want more,
> + // try to fit more in, but in any case we need to advance
> + // the iterator at least once.
> while (1) {
> size_t offs = m->count;
> loff_t pos = m->index;
> @@ -259,11 +262,9 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> m->op->next);
> m->index++;
> }
> - if (!p || IS_ERR(p)) {
> - err = PTR_ERR(p);
> + if (!p || IS_ERR(p)) // no next record for us
> break;
> - }
> - if (m->count >= size)
> + if (m->count >= iov_iter_count(iter))
> break;
> err = m->op->show(m, p);
> if (seq_has_overflowed(m) || err) {
> @@ -273,16 +274,14 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> }
> }
> m->op->stop(m, p);
> - n = min(m->count, size);
> - if (copy_to_iter(m->buf, n, iter) != n)
> - goto Efault;
> + n = copy_to_iter(m->buf, m->count, iter);
> copied += n;
> m->count -= n;
> m->from = n;
> Done:
> - if (!copied)
> - copied = err;
> - else {
> + if (unlikely(!copied)) {
> + copied = m->count ? -EFAULT : err;
> + } else {
> iocb->ki_pos += copied;
> m->read_pos += copied;
> }
> @@ -291,9 +290,6 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> Enomem:
> err = -ENOMEM;
> goto Done;
> -Efault:
> - err = -EFAULT;
> - goto Done;
> }
> EXPORT_SYMBOL(seq_read_iter);
>
On Sun, Nov 15, 2020 at 04:51:49PM -0700, Nathan Chancellor wrote:
> Looks good to me on top of d4d50710a8b46082224376ef119a4dbb75b25c56,
> thanks for quickly looking into this!
>
> Tested-by: Nathan Chancellor <[email protected]>
OK... a variant with (hopefully) better comments and cleaned up
logics in the second loop (
if (seq_has_overflowed(m) || err) {
m->count = offs;
if (likely(err <= 0))
break;
}
replaced with
if (err > 0) { // ->show() says "skip it"
m->count = offs;
} else if (err || seq_has_overflowed(m)) {
m->count = offs;
break;
}
) follows. I'm quite certain that it is an equivalent transformation
(seq_has_overflowed() has no side effects) and IMO it's slightly
more readable that way. Survives local beating; could you check if
it's still OK with your testcase? Equivalent transformation or not,
I'd rather not slap anyone's Tested-by: on a modified variant of
patch...
BTW, is that call of readv() really coming from init? And if it
is, what version of init are you using?
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3b20e21604e7..03a369ccd28c 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -168,12 +168,14 @@ EXPORT_SYMBOL(seq_read);
ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct seq_file *m = iocb->ki_filp->private_data;
- size_t size = iov_iter_count(iter);
size_t copied = 0;
size_t n;
void *p;
int err = 0;
+ if (!iov_iter_count(iter))
+ return 0;
+
mutex_lock(&m->lock);
/*
@@ -206,36 +208,34 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
if (!m->buf)
goto Enomem;
}
- /* if not empty - flush it first */
+ // something left in the buffer - copy it out first
if (m->count) {
- n = min(m->count, size);
- if (copy_to_iter(m->buf + m->from, n, iter) != n)
- goto Efault;
+ n = copy_to_iter(m->buf + m->from, m->count, iter);
m->count -= n;
m->from += n;
- size -= n;
copied += n;
- if (!size)
+ if (m->count) // hadn't managed to copy everything
goto Done;
}
- /* we need at least one record in buffer */
+ // get a non-empty record in the buffer
m->from = 0;
p = m->op->start(m, &m->index);
while (1) {
err = PTR_ERR(p);
- if (!p || IS_ERR(p))
+ if (!p || IS_ERR(p)) // EOF or an error
break;
err = m->op->show(m, p);
- if (err < 0)
+ if (err < 0) // hard error
break;
- if (unlikely(err))
+ if (unlikely(err)) // ->show() says "skip it"
m->count = 0;
- if (unlikely(!m->count)) {
+ if (unlikely(!m->count)) { // empty record
p = m->op->next(m, p, &m->index);
continue;
}
- if (m->count < m->size)
+ if (!seq_has_overflowed(m)) // got it
goto Fill;
+ // need a bigger buffer
m->op->stop(m, p);
kvfree(m->buf);
m->count = 0;
@@ -244,11 +244,14 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
goto Enomem;
p = m->op->start(m, &m->index);
}
+ // EOF or an error
m->op->stop(m, p);
m->count = 0;
goto Done;
Fill:
- /* they want more? let's try to get some more */
+ // one non-empty record is in the buffer; if they want more,
+ // try to fit more in, but in any case we need to advance
+ // the iterator once for every record shown.
while (1) {
size_t offs = m->count;
loff_t pos = m->index;
@@ -259,30 +262,27 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
m->op->next);
m->index++;
}
- if (!p || IS_ERR(p)) {
- err = PTR_ERR(p);
+ if (!p || IS_ERR(p)) // no next record for us
break;
- }
- if (m->count >= size)
+ if (m->count >= iov_iter_count(iter))
break;
err = m->op->show(m, p);
- if (seq_has_overflowed(m) || err) {
+ if (err > 0) { // ->show() says "skip it"
m->count = offs;
- if (likely(err <= 0))
- break;
+ } else if (err || seq_has_overflowed(m)) {
+ m->count = offs;
+ break;
}
}
m->op->stop(m, p);
- n = min(m->count, size);
- if (copy_to_iter(m->buf, n, iter) != n)
- goto Efault;
+ n = copy_to_iter(m->buf, m->count, iter);
copied += n;
m->count -= n;
m->from = n;
Done:
- if (!copied)
- copied = err;
- else {
+ if (unlikely(!copied)) {
+ copied = m->count ? -EFAULT : err;
+ } else {
iocb->ki_pos += copied;
m->read_pos += copied;
}
@@ -291,9 +291,6 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
Enomem:
err = -ENOMEM;
goto Done;
-Efault:
- err = -EFAULT;
- goto Done;
}
EXPORT_SYMBOL(seq_read_iter);
On Mon, Nov 16, 2020 at 12:25:13AM +0000, Al Viro wrote:
> On Sun, Nov 15, 2020 at 04:51:49PM -0700, Nathan Chancellor wrote:
> > Looks good to me on top of d4d50710a8b46082224376ef119a4dbb75b25c56,
> > thanks for quickly looking into this!
> >
> > Tested-by: Nathan Chancellor <[email protected]>
>
> OK... a variant with (hopefully) better comments and cleaned up
> logics in the second loop (
> if (seq_has_overflowed(m) || err) {
> m->count = offs;
> if (likely(err <= 0))
> break;
> }
> replaced with
> if (err > 0) { // ->show() says "skip it"
> m->count = offs;
> } else if (err || seq_has_overflowed(m)) {
> m->count = offs;
> break;
> }
> ) follows. I'm quite certain that it is an equivalent transformation
> (seq_has_overflowed() has no side effects) and IMO it's slightly
> more readable that way. Survives local beating; could you check if
> it's still OK with your testcase? Equivalent transformation or not,
> I'd rather not slap anyone's Tested-by: on a modified variant of
> patch...
Still good.
Tested-by: Nathan Chancellor <[email protected]>
> BTW, is that call of readv() really coming from init? And if it
> is, what version of init are you using?
I believe that it is but since this is WSL2, I believe that /init is a
proprietary Microsoft implementation, rather than systemd or another
init system:
https://wiki.ubuntu.com/WSL#Keeping_Ubuntu_Updated_in_WSL
So I am not sure how possible it is to see exactly what is going on or
getting it improved.
Cheers,
Nathan
On Sun, Nov 15, 2020 at 05:34:16PM -0700, Nathan Chancellor wrote:
> Still good.
>
> Tested-by: Nathan Chancellor <[email protected]>
Pushed into #fixes
> > BTW, is that call of readv() really coming from init? And if it
> > is, what version of init are you using?
>
> I believe that it is but since this is WSL2, I believe that /init is a
> proprietary Microsoft implementation, rather than systemd or another
> init system:
>
> https://wiki.ubuntu.com/WSL#Keeping_Ubuntu_Updated_in_WSL
>
> So I am not sure how possible it is to see exactly what is going on or
> getting it improved.
Oh, well... Anyway, as a regression test it's interesting:
#include <sys/uio.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
main()
{
static char s[1024];
static struct iovec v[2] = {{NULL, 0}, {s, 1024}};
for(;;) {
ssize_t n = readv(0, v, 2), m, w;
if (n < 0) {
perror("readv");
return -1;
}
if (!n)
return 0;
for (m = 0; m < n; m += w) {
w = write(1, s + m, n - m);
if (w < 0)
perror("write");
}
}
}
which ought to copy stdin to stdout; with this bug it would (on sufficiently
large seq_file-based files) fail with "readv: Bad address" (-EFAULT, that is).
On Mon, Nov 16, 2020 at 03:29:42AM +0000, Al Viro wrote:
> > Still good.
> >
> > Tested-by: Nathan Chancellor <[email protected]>
>
> Pushed into #fixes
Shouldn't this go to Linus before v5.10 is released?
On Fri, Nov 27, 2020 at 05:29:02PM +0100, Christoph Hellwig wrote:
> On Mon, Nov 16, 2020 at 03:29:42AM +0000, Al Viro wrote:
> > > Still good.
> > >
> > > Tested-by: Nathan Chancellor <[email protected]>
> >
> > Pushed into #fixes
>
> Shouldn't this go to Linus before v5.10 is released?
ping?
On Tue, Dec 8, 2020 at 12:53 PM Al Viro <[email protected]> wrote:
>
> Umm... I've got
> fs: Handle I_DONTCACHE in iput_final() instead of generic_drop_inode()
> and
> fs: Kill DCACHE_DONTCACHE dentry even if DCACHE_REFERENCED is set
> in "to apply" pile; if that's what you are talking about,
Yup, those were the ones.
> I don't
> think they are anywhere critical enough for 5.10-final, but I might
> be missing something...
No, I agree, no hurry with them. I just wanted to make sure you're
aware of them.
Linus
On Tue, Dec 8, 2020 at 8:35 AM Christoph Hellwig <[email protected]> wrote:
> >
> > Shouldn't this go to Linus before v5.10 is released?
>
> ping?
So by now I'm a bit worried about this series, because the early fixes
caused more problems than the current state.
So considering the timing and Al having been spotty, I think this is
post-5.10 and marked for stable.
Al, I'm willing to be convinced otherwise, but you need to respond..
Linus
On Tue, Dec 08, 2020 at 10:34:45AM -0800, Linus Torvalds wrote:
> On Tue, Dec 8, 2020 at 8:35 AM Christoph Hellwig <[email protected]> wrote:
> > >
> > > Shouldn't this go to Linus before v5.10 is released?
> >
> > ping?
>
> So by now I'm a bit worried about this series, because the early fixes
> caused more problems than the current state.
>
> So considering the timing and Al having been spotty, I think this is
> post-5.10 and marked for stable.
If you want some sort of "do these really work" validation, these have
been running for a while now in the android 5.10-rc kernels just fine,
as I cherry-picked the patches there to get past their testing issues.
But if you want to wait until after 5.10 is out, that's fine with me
too, it's up to Al.
thanks,
greg k-h
On Tue, Dec 8, 2020 at 11:49 AM Al Viro <[email protected]> wrote:
>
> Said that, it does appear to survive all beating, and it does fix
> a regression introduced in this cycle, so, provided that amount of
> comments in there is OK with you...
Ok, considering Greg's note, I've pulled it. It's early in the last
week, if something comes up we can still fix it.
That said, considering that I think the only use-case was that odd
/proc splice use, and the really special WSL2 thing, and both of those
are verified, it does sound safe to pull.
Famous last words...
Al, since you're around, would you mind looking at the two
DCACHE_DONTCACHE patches too? Honestly, since they seem to be an issue
only for DAX, and only for DAX policy changes, I don't consider them
critical for 5.10, but they've been around for a while now.
Linus
On Tue, Dec 08, 2020 at 12:25:55PM -0800, Linus Torvalds wrote:
> On Tue, Dec 8, 2020 at 11:49 AM Al Viro <[email protected]> wrote:
> >
> > Said that, it does appear to survive all beating, and it does fix
> > a regression introduced in this cycle, so, provided that amount of
> > comments in there is OK with you...
>
> Ok, considering Greg's note, I've pulled it. It's early in the last
> week, if something comes up we can still fix it.
>
> That said, considering that I think the only use-case was that odd
> /proc splice use, and the really special WSL2 thing, and both of those
> are verified, it does sound safe to pull.
>
> Famous last words...
>
> Al, since you're around, would you mind looking at the two
> DCACHE_DONTCACHE patches too? Honestly, since they seem to be an issue
> only for DAX, and only for DAX policy changes, I don't consider them
> critical for 5.10, but they've been around for a while now.
Umm... I've got
fs: Handle I_DONTCACHE in iput_final() instead of generic_drop_inode()
and
fs: Kill DCACHE_DONTCACHE dentry even if DCACHE_REFERENCED is set
in "to apply" pile; if that's what you are talking about, I don't
think they are anywhere critical enough for 5.10-final, but I might
be missing something...
Al, still buried under piles of email ;-/
On Tue, Dec 08, 2020 at 10:34:45AM -0800, Linus Torvalds wrote:
> On Tue, Dec 8, 2020 at 8:35 AM Christoph Hellwig <[email protected]> wrote:
> > >
> > > Shouldn't this go to Linus before v5.10 is released?
> >
> > ping?
>
> So by now I'm a bit worried about this series, because the early fixes
> caused more problems than the current state.
*nod*
Said that, it does appear to survive all beating, and it does fix
a regression introduced in this cycle, so, provided that amount of
comments in there is OK with you...
The following changes since commit d4d50710a8b46082224376ef119a4dbb75b25c56:
seq_file: add seq_read_iter (2020-11-06 10:05:18 -0800)
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git fixes
for you to fetch changes up to 4bbf439b09c5ac3f8b3e9584fe080375d8d0ad2d:
fix return values of seq_read_iter() (2020-11-15 22:12:53 -0500)
----------------------------------------------------------------
Al Viro (1):
fix return values of seq_read_iter()
fs/seq_file.c | 57 +++++++++++++++++++++++++++------------------------------
1 file changed, 27 insertions(+), 30 deletions(-)