2014-10-16 13:34:20

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH v11 06/21] vfs: Add copy_to_iter(), copy_from_iter() and iov_iter_zero()

On 25-Sep-2014 04:33:23 PM, Matthew Wilcox wrote:
> From: Matthew Wilcox <[email protected]>
>
> For DAX, we want to be able to copy between iovecs and kernel addresses
> that don't necessarily have a struct page. This is a fairly simple
> rearrangement for bvec iters to kmap the pages outside and pass them in,
> but for user iovecs it gets more complicated because we might try various
> different ways to kmap the memory. Duplicating the existing logic works
> out best in this case.
>
> We need to be able to write zeroes to an iovec for reads from unwritten
> ranges in a file. This is performed by the new iov_iter_zero() function,
> again patterned after the existing code that handles iovec iterators.
>
> Signed-off-by: Matthew Wilcox <[email protected]>
> ---
> include/linux/uio.h | 3 +
> mm/iov_iter.c | 237 ++++++++++++++++++++++++++++++++++++++++++++++++----
> 2 files changed, 226 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 48d64e6..1863ddd 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -80,6 +80,9 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
> struct iov_iter *i);
> size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
> struct iov_iter *i);
> +size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i);
> +size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
> +size_t iov_iter_zero(size_t bytes, struct iov_iter *);
> unsigned long iov_iter_alignment(const struct iov_iter *i);
> void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
> unsigned long nr_segs, size_t count);
> diff --git a/mm/iov_iter.c b/mm/iov_iter.c
> index ab88dc0..d481fd8 100644
> --- a/mm/iov_iter.c
> +++ b/mm/iov_iter.c
> @@ -4,6 +4,96 @@
> #include <linux/slab.h>
> #include <linux/vmalloc.h>
>
> +static size_t copy_to_iter_iovec(void *from, size_t bytes, struct iov_iter *i)
> +{
> + size_t skip, copy, left, wanted;
> + const struct iovec *iov;
> + char __user *buf;
> +
> + if (unlikely(bytes > i->count))
> + bytes = i->count;
> +
> + if (unlikely(!bytes))
> + return 0;
> +
> + wanted = bytes;
> + iov = i->iov;
> + skip = i->iov_offset;
> + buf = iov->iov_base + skip;
> + copy = min(bytes, iov->iov_len - skip);
> +
> + left = __copy_to_user(buf, from, copy);

How comes this function uses __copy_to_user without any access_ok()
check ? This has security implications.

> + copy -= left;
> + skip += copy;
> + from += copy;
> + bytes -= copy;
> + while (unlikely(!left && bytes)) {
> + iov++;
> + buf = iov->iov_base;
> + copy = min(bytes, iov->iov_len);
> + left = __copy_to_user(buf, from, copy);

same here.

> + copy -= left;
> + skip = copy;
> + from += copy;
> + bytes -= copy;
> + }
> +
> + if (skip == iov->iov_len) {
> + iov++;
> + skip = 0;
> + }
> + i->count -= wanted - bytes;
> + i->nr_segs -= iov - i->iov;
> + i->iov = iov;
> + i->iov_offset = skip;
> + return wanted - bytes;
> +}
> +
> +static size_t copy_from_iter_iovec(void *to, size_t bytes, struct iov_iter *i)
> +{
> + size_t skip, copy, left, wanted;
> + const struct iovec *iov;
> + char __user *buf;
> +
> + if (unlikely(bytes > i->count))
> + bytes = i->count;
> +
> + if (unlikely(!bytes))
> + return 0;
> +
> + wanted = bytes;
> + iov = i->iov;
> + skip = i->iov_offset;
> + buf = iov->iov_base + skip;
> + copy = min(bytes, iov->iov_len - skip);
> +
> + left = __copy_from_user(to, buf, copy);

same here.

> + copy -= left;
> + skip += copy;
> + to += copy;
> + bytes -= copy;
> + while (unlikely(!left && bytes)) {
> + iov++;
> + buf = iov->iov_base;
> + copy = min(bytes, iov->iov_len);
> + left = __copy_from_user(to, buf, copy);

same.

> + copy -= left;
> + skip = copy;
> + to += copy;
> + bytes -= copy;
> + }
> +
> + if (skip == iov->iov_len) {
> + iov++;
> + skip = 0;
> + }
> + i->count -= wanted - bytes;
> + i->nr_segs -= iov - i->iov;
> + i->iov = iov;
> + i->iov_offset = skip;
> + return wanted - bytes;
> +}
> +
> static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
> struct iov_iter *i)
> {
> @@ -166,6 +256,50 @@ done:
> return wanted - bytes;
> }
>
> +static size_t zero_iovec(size_t bytes, struct iov_iter *i)
> +{
> + size_t skip, copy, left, wanted;
> + const struct iovec *iov;
> + char __user *buf;
> +
> + if (unlikely(bytes > i->count))
> + bytes = i->count;
> +
> + if (unlikely(!bytes))
> + return 0;
> +
> + wanted = bytes;
> + iov = i->iov;
> + skip = i->iov_offset;
> + buf = iov->iov_base + skip;
> + copy = min(bytes, iov->iov_len - skip);
> +
> + left = __clear_user(buf, copy);

I would guess an access_ok() would be needed here too.

> + copy -= left;
> + skip += copy;
> + bytes -= copy;
> +
> + while (unlikely(!left && bytes)) {
> + iov++;
> + buf = iov->iov_base;
> + copy = min(bytes, iov->iov_len);
> + left = __clear_user(buf, copy);

Same.

> + copy -= left;
> + skip = copy;
> + bytes -= copy;
> + }
> +
> + if (skip == iov->iov_len) {
> + iov++;
> + skip = 0;
> + }
> + i->count -= wanted - bytes;
> + i->nr_segs -= iov - i->iov;
> + i->iov = iov;
> + i->iov_offset = skip;
> + return wanted - bytes;
> +}
> +
> static size_t __iovec_copy_from_user_inatomic(char *vaddr,
> const struct iovec *iov, size_t base, size_t bytes)
> {
> @@ -412,12 +546,17 @@ static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t
> kunmap_atomic(to);
> }
>
> -static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes,
> - struct iov_iter *i)
> +static void memzero_page(struct page *page, size_t offset, size_t len)
> +{
> + char *addr = kmap_atomic(page);
> + memset(addr + offset, 0, len);
> + kunmap_atomic(addr);
> +}
> +
> +static size_t copy_to_iter_bvec(void *from, size_t bytes, struct iov_iter *i)
> {
> size_t skip, copy, wanted;
> const struct bio_vec *bvec;
> - void *kaddr, *from;
>
> if (unlikely(bytes > i->count))
> bytes = i->count;
> @@ -430,8 +569,6 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by
> skip = i->iov_offset;
> copy = min_t(size_t, bytes, bvec->bv_len - skip);
>
> - kaddr = kmap_atomic(page);
> - from = kaddr + offset;
> memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy);
> skip += copy;
> from += copy;
> @@ -444,7 +581,6 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by
> from += copy;
> bytes -= copy;
> }
> - kunmap_atomic(kaddr);
> if (skip == bvec->bv_len) {
> bvec++;
> skip = 0;
> @@ -456,12 +592,10 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by
> return wanted - bytes;
> }
>
> -static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes,
> - struct iov_iter *i)
> +static size_t copy_from_iter_bvec(void *to, size_t bytes, struct iov_iter *i)
> {
> size_t skip, copy, wanted;
> const struct bio_vec *bvec;
> - void *kaddr, *to;
>
> if (unlikely(bytes > i->count))
> bytes = i->count;
> @@ -473,10 +607,6 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t
> bvec = i->bvec;
> skip = i->iov_offset;
>
> - kaddr = kmap_atomic(page);
> -
> - to = kaddr + offset;
> -
> copy = min(bytes, bvec->bv_len - skip);
>
> memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy);
> @@ -493,7 +623,6 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t
> to += copy;
> bytes -= copy;
> }
> - kunmap_atomic(kaddr);
> if (skip == bvec->bv_len) {
> bvec++;
> skip = 0;
> @@ -505,6 +634,61 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t
> return wanted;
> }
>
> +static size_t copy_page_to_iter_bvec(struct page *page, size_t offset,
> + size_t bytes, struct iov_iter *i)
> +{
> + void *kaddr = kmap_atomic(page);
> + size_t wanted = copy_to_iter_bvec(kaddr + offset, bytes, i);

missing newline.

> + kunmap_atomic(kaddr);
> + return wanted;
> +}
> +
> +static size_t copy_page_from_iter_bvec(struct page *page, size_t offset,
> + size_t bytes, struct iov_iter *i)
> +{
> + void *kaddr = kmap_atomic(page);
> + size_t wanted = copy_from_iter_bvec(kaddr + offset, bytes, i);

missing newline.

Thanks,

Mathieu

> + kunmap_atomic(kaddr);
> + return wanted;
> +}
> +
> +static size_t zero_bvec(size_t bytes, struct iov_iter *i)
> +{
> + size_t skip, copy, wanted;
> + const struct bio_vec *bvec;
> +
> + if (unlikely(bytes > i->count))
> + bytes = i->count;
> +
> + if (unlikely(!bytes))
> + return 0;
> +
> + wanted = bytes;
> + bvec = i->bvec;
> + skip = i->iov_offset;
> + copy = min_t(size_t, bytes, bvec->bv_len - skip);
> +
> + memzero_page(bvec->bv_page, skip + bvec->bv_offset, copy);
> + skip += copy;
> + bytes -= copy;
> + while (bytes) {
> + bvec++;
> + copy = min(bytes, (size_t)bvec->bv_len);
> + memzero_page(bvec->bv_page, bvec->bv_offset, copy);
> + skip = copy;
> + bytes -= copy;
> + }
> + if (skip == bvec->bv_len) {
> + bvec++;
> + skip = 0;
> + }
> + i->count -= wanted - bytes;
> + i->nr_segs -= bvec - i->bvec;
> + i->bvec = bvec;
> + i->iov_offset = skip;
> + return wanted - bytes;
> +}
> +
> static size_t copy_from_user_bvec(struct page *page,
> struct iov_iter *i, unsigned long offset, size_t bytes)
> {
> @@ -668,6 +852,31 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
> }
> EXPORT_SYMBOL(copy_page_from_iter);
>
> +size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
> +{
> + if (i->type & ITER_BVEC)
> + return copy_to_iter_bvec(addr, bytes, i);
> + else
> + return copy_to_iter_iovec(addr, bytes, i);
> +}
> +
> +size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
> +{
> + if (i->type & ITER_BVEC)
> + return copy_from_iter_bvec(addr, bytes, i);
> + else
> + return copy_from_iter_iovec(addr, bytes, i);
> +}
> +
> +size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
> +{
> + if (i->type & ITER_BVEC) {
> + return zero_bvec(bytes, i);
> + } else {
> + return zero_iovec(bytes, i);
> + }
> +}
> +
> size_t iov_iter_copy_from_user_atomic(struct page *page,
> struct iov_iter *i, unsigned long offset, size_t bytes)
> {
> --
> 2.1.0
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected]. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>
>
>

--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
Key fingerprint: 2A0B 4ED9 15F2 D3FA 45F5 B162 1728 0A97 8118 6ACF


2014-10-16 13:59:10

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v11 06/21] vfs: Add copy_to_iter(), copy_from_iter() and iov_iter_zero()

On Thu, Oct 16, 2014 at 03:33:55PM +0200, Mathieu Desnoyers wrote:
> > +static size_t copy_to_iter_iovec(void *from, size_t bytes, struct iov_iter *i)
> > +{
[...]
> > + left = __copy_to_user(buf, from, copy);
>
> How comes this function uses __copy_to_user without any access_ok()
> check ? This has security implications.

The access_ok() check is done higher up the call-chain if it's appropriate.
These functions can be (intentionally) called to access kernel addresses,
so it wouldn't be appropriate to do that here.

> > +static size_t copy_page_to_iter_bvec(struct page *page, size_t offset,
> > + size_t bytes, struct iov_iter *i)
> > +{
> > + void *kaddr = kmap_atomic(page);
> > + size_t wanted = copy_to_iter_bvec(kaddr + offset, bytes, i);
>
> missing newline.
>
> > + kunmap_atomic(kaddr);
> > + return wanted;
> > +}

Are you seriously suggesting that:

static size_t copy_page_to_iter_bvec(struct page *page, size_t offset,
size_t bytes, struct iov_iter *i)
{
void *kaddr = kmap_atomic(page);
size_t wanted = copy_to_iter_bvec(kaddr + offset, bytes, i);

kunmap_atomic(kaddr);
return wanted;
}

is more readable than without the newline? I can see the point of the
rule for functions with a lot of variables, or a lot of lines, but I
don't see the point of it for such a small function.

In any case, this patch is now upstream, so I shan't be proposing any
stylistic changes for it.

2014-10-16 14:12:16

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH v11 06/21] vfs: Add copy_to_iter(), copy_from_iter() and iov_iter_zero()

----- Original Message -----
> From: "Matthew Wilcox" <[email protected]>
> To: "Mathieu Desnoyers" <[email protected]>
> Cc: "Matthew Wilcox" <[email protected]>, [email protected], [email protected],
> [email protected], "Matthew Wilcox" <[email protected]>
> Sent: Thursday, October 16, 2014 3:59:03 PM
> Subject: Re: [PATCH v11 06/21] vfs: Add copy_to_iter(), copy_from_iter() and iov_iter_zero()
>
> On Thu, Oct 16, 2014 at 03:33:55PM +0200, Mathieu Desnoyers wrote:
> > > +static size_t copy_to_iter_iovec(void *from, size_t bytes, struct
> > > iov_iter *i)
> > > +{
> [...]
> > > + left = __copy_to_user(buf, from, copy);
> >
> > How comes this function uses __copy_to_user without any access_ok()
> > check ? This has security implications.
>
> The access_ok() check is done higher up the call-chain if it's appropriate.
> These functions can be (intentionally) called to access kernel addresses,
> so it wouldn't be appropriate to do that here.

If the access_ok() are expected to be already done higher in the call-chain,
we might want to rename e.g. copy_to_iter_iovec to
__copy_to_iter_iovec(). It helps clarifying the check expectations for the
caller.

>
> > > +static size_t copy_page_to_iter_bvec(struct page *page, size_t offset,
> > > + size_t bytes, struct iov_iter *i)
> > > +{
> > > + void *kaddr = kmap_atomic(page);
> > > + size_t wanted = copy_to_iter_bvec(kaddr + offset, bytes, i);
> >
> > missing newline.
> >
> > > + kunmap_atomic(kaddr);
> > > + return wanted;
> > > +}
>
> Are you seriously suggesting that:
>
> static size_t copy_page_to_iter_bvec(struct page *page, size_t offset,
> size_t bytes, struct iov_iter *i)
> {
> void *kaddr = kmap_atomic(page);
> size_t wanted = copy_to_iter_bvec(kaddr + offset, bytes, i);
>
> kunmap_atomic(kaddr);
> return wanted;
> }
>
> is more readable than without the newline? I can see the point of the
> rule for functions with a lot of variables, or a lot of lines, but I
> don't see the point of it for such a small function.

I usually find it easier to read when variables and code are split,
but I don't feel strongly about this in this particular case.

>
> In any case, this patch is now upstream, so I shan't be proposing any
> stylistic changes for it.

The leading __ prefix before the function names appears to be important
enough though, since it allows future changes of this code to take into
account the specific check expectations of those functions.

Thanks,

Mathieu


--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

2014-10-17 07:02:32

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v11 06/21] vfs: Add copy_to_iter(), copy_from_iter() and iov_iter_zero()

On Thu, Oct 16, 2014 at 02:12:06PM +0000, Mathieu Desnoyers wrote:
> > The access_ok() check is done higher up the call-chain if it's appropriate.
> > These functions can be (intentionally) called to access kernel addresses,
> > so it wouldn't be appropriate to do that here.
>
> If the access_ok() are expected to be already done higher in the call-chain,
> we might want to rename e.g. copy_to_iter_iovec to
> __copy_to_iter_iovec(). It helps clarifying the check expectations for the
> caller.

I'm following the existing convention in this file; it already had
copy_page_to_iter() and copy_page_from_iter() as exported symbols. I
just added copy_to_iter() and copy_from_iter().

2014-10-17 15:39:52

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH v11 06/21] vfs: Add copy_to_iter(), copy_from_iter() and iov_iter_zero()

----- Original Message -----
> From: "Matthew Wilcox" <[email protected]>
> To: "Mathieu Desnoyers" <[email protected]>
> Cc: "Matthew Wilcox" <[email protected]>, "Matthew Wilcox" <[email protected]>,
> [email protected], [email protected], [email protected]
> Sent: Friday, October 17, 2014 12:21:46 AM
> Subject: Re: [PATCH v11 06/21] vfs: Add copy_to_iter(), copy_from_iter() and iov_iter_zero()
>
> On Thu, Oct 16, 2014 at 02:12:06PM +0000, Mathieu Desnoyers wrote:
> > > The access_ok() check is done higher up the call-chain if it's
> > > appropriate.
> > > These functions can be (intentionally) called to access kernel addresses,
> > > so it wouldn't be appropriate to do that here.
> >
> > If the access_ok() are expected to be already done higher in the
> > call-chain,
> > we might want to rename e.g. copy_to_iter_iovec to
> > __copy_to_iter_iovec(). It helps clarifying the check expectations for the
> > caller.
>
> I'm following the existing convention in this file; it already had
> copy_page_to_iter() and copy_page_from_iter() as exported symbols. I
> just added copy_to_iter() and copy_from_iter().
>

I understand you follow the local style. However, since these style
nits have been known to let security issues creep into the kernel
in the past,it would be good to change the style of this file to add
those also to the pre-existing functions, perhaps in a separate patch.

Thanks,

Mathieu

--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com