2022-07-07 06:09:08

by Alexander Gordeev

[permalink] [raw]
Subject: [PATCH v2 0/1] s390/crash: allow multi-segment iterators

Unlike other architectures s390 can not use copyout() and memcopy() for
accessing memory and thus using copy_to_iter() now is not possible. But
a fix is needed, since 'cp' routine as 'core_collector' for kdump service
initiates multi-segment iterator.

The reason iterate_iovec() and __iterate_and_advance() macros copied from
lib/iov_iter.c (thus introducing redundancy) is to avoid custom iterator-
treating in s390 code. I doubt these macros could be turned public (i.e
with a follow-up patch), so the intention is to do it like _copy_to_iter()
does.

Changes since v1:
- number of bytes left to copy on fail fixed;

Alexander Gordeev (1):
s390/crash: allow multi-segment iterators

arch/s390/kernel/crash_dump.c | 65 +++++++++++++++++++++++++++--------
1 file changed, 50 insertions(+), 15 deletions(-)

--
2.34.1


2022-07-07 06:19:27

by Alexander Gordeev

[permalink] [raw]
Subject: [PATCH v2 1/1] s390/crash: allow multi-segment iterators

Rework copy_oldmem_page() to allow multi-segment iterators.
Reuse existing iterate_iovec macro as is and only relevant
bits from __iterate_and_advance macro.

Fixes: 49b11524d648 ("s390/crash: add missing iterator advance in copy_oldmem_page())
Signed-off-by: Alexander Gordeev <[email protected]>
---
arch/s390/kernel/crash_dump.c | 65 +++++++++++++++++++++++++++--------
1 file changed, 50 insertions(+), 15 deletions(-)

diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 28124d0fa1d5..cc1586d64ce2 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -210,6 +210,52 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
return 0;
}

+#define iterate_iovec(i, n, base, len, off, __p, STEP) { \
+ size_t off = 0; \
+ size_t skip = i->iov_offset; \
+ do { \
+ len = min(n, __p->iov_len - skip); \
+ if (likely(len)) { \
+ base = __p->iov_base + skip; \
+ len -= (STEP); \
+ off += len; \
+ skip += len; \
+ n -= len; \
+ if (skip < __p->iov_len) \
+ break; \
+ } \
+ __p++; \
+ skip = 0; \
+ } while (n); \
+ i->iov_offset = skip; \
+ n = off; \
+}
+
+#define __iterate_and_advance(i, n, base, len, off, I, K) { \
+ if (unlikely(i->count < n)) \
+ n = i->count; \
+ if (likely(n)) { \
+ if (likely(iter_is_iovec(i))) { \
+ const struct iovec *iov = i->iov; \
+ void __user *base; \
+ size_t len; \
+ iterate_iovec(i, n, base, len, off, \
+ iov, (I)) \
+ i->nr_segs -= iov - i->iov; \
+ i->iov = iov; \
+ } else if (iov_iter_is_kvec(i)) { \
+ const struct kvec *kvec = i->kvec; \
+ void *base; \
+ size_t len; \
+ iterate_iovec(i, n, base, len, off, \
+ kvec, (K)) \
+ i->nr_segs -= kvec - i->kvec; \
+ i->kvec = kvec; \
+ } \
+ i->count -= n; \
+ } \
+}
+
/*
* Copy one page from "oldmem"
*/
@@ -217,25 +263,14 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
unsigned long offset)
{
unsigned long src;
- int rc;

if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter)))
return -EINVAL;
- /* Multi-segment iterators are not supported */
- if (iter->nr_segs > 1)
- return -EINVAL;
- if (!csize)
- return 0;
src = pfn_to_phys(pfn) + offset;
-
- /* XXX: pass the iov_iter down to a common function */
- if (iter_is_iovec(iter))
- rc = copy_oldmem_user(iter->iov->iov_base, src, csize);
- else
- rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize);
- if (rc < 0)
- return rc;
- iov_iter_advance(iter, csize);
+ __iterate_and_advance(iter, csize, base, len, off,
+ ({ copy_oldmem_user(base, src + off, len) < 0 ? len : 0; }),
+ ({ copy_oldmem_kernel(base, src + off, len) < 0 ? len : 0; })
+ )
return csize;
}

--
2.34.1

2022-07-07 12:52:19

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] s390/crash: allow multi-segment iterators

On Thu, Jul 07, 2022 at 08:01:15AM +0200, Alexander Gordeev wrote:
> Rework copy_oldmem_page() to allow multi-segment iterators.
> Reuse existing iterate_iovec macro as is and only relevant
> bits from __iterate_and_advance macro.
>
> Fixes: 49b11524d648 ("s390/crash: add missing iterator advance in copy_oldmem_page())
> Signed-off-by: Alexander Gordeev <[email protected]>

To be followed by duplication of every change ever done to those macros?
Hell, no. Do it the same way it's done for that _copy_from_iter_flushcache()
thing - i.e. an ifdef in lib/iov_iter.c and no duplication of that forest of
macros.

2022-07-07 13:19:50

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] s390/crash: allow multi-segment iterators

On Thu, Jul 07, 2022 at 08:01:15AM +0200, Alexander Gordeev wrote:
> Rework copy_oldmem_page() to allow multi-segment iterators.
> Reuse existing iterate_iovec macro as is and only relevant
> bits from __iterate_and_advance macro.

Or do it properly?

You should probably put a mutex around all of this because if you have two
threads accessing the hsa at the same time, they'll use the same buffer.
But that's a pre-existing problem. I also fixed the pre-existing bug
where you were using 'count' when you meant to use 'len'.

Uncompiled. You might need to include <linux/uio.h> somewhere.

diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 236b34b75ddb..d8b4c526e0f0 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -143,7 +143,7 @@ int sclp_ap_configure(u32 apid);
int sclp_ap_deconfigure(u32 apid);
int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
-int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
+int memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count);
void sclp_ocf_cpc_name_copy(char *dst);

static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 28124d0fa1d5..6e4dde377f8e 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -130,53 +130,11 @@ static inline void *load_real_addr(void *addr)
return (void *)real_addr;
}

-/*
- * Copy memory of the old, dumped system to a kernel space virtual address
- */
-int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
-{
- unsigned long len;
- void *ra;
- int rc;
-
- while (count) {
- if (!oldmem_data.start && src < sclp.hsa_size) {
- /* Copy from zfcp/nvme dump HSA area */
- len = min(count, sclp.hsa_size - src);
- rc = memcpy_hsa_kernel(dst, src, len);
- if (rc)
- return rc;
- } else {
- /* Check for swapped kdump oldmem areas */
- if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
- src -= oldmem_data.start;
- len = min(count, oldmem_data.size - src);
- } else if (oldmem_data.start && src < oldmem_data.size) {
- len = min(count, oldmem_data.size - src);
- src += oldmem_data.start;
- } else {
- len = count;
- }
- if (is_vmalloc_or_module_addr(dst)) {
- ra = load_real_addr(dst);
- len = min(PAGE_SIZE - offset_in_page(ra), len);
- } else {
- ra = dst;
- }
- if (memcpy_real(ra, src, len))
- return -EFAULT;
- }
- dst += len;
- src += len;
- count -= len;
- }
- return 0;
-}
-
/*
* Copy memory of the old, dumped system to a user space virtual address
*/
-static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
+static int copy_oldmem_iter(struct iov_iter *iter, unsigned long src,
+ size_t count)
{
unsigned long len;
int rc;
@@ -185,7 +143,7 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
if (!oldmem_data.start && src < sclp.hsa_size) {
/* Copy from zfcp/nvme dump HSA area */
len = min(count, sclp.hsa_size - src);
- rc = memcpy_hsa_user(dst, src, len);
+ rc = memcpy_hsa_iter(iter, src, len);
if (rc)
return rc;
} else {
@@ -199,8 +157,8 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
} else {
len = count;
}
- rc = copy_to_user_real(dst, src, count);
- if (rc)
+ rc = copy_to_iter(iter, src, len);
+ if (rc != len)
return rc;
}
dst += len;
@@ -219,23 +177,13 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
unsigned long src;
int rc;

- if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter)))
- return -EINVAL;
- /* Multi-segment iterators are not supported */
- if (iter->nr_segs > 1)
- return -EINVAL;
if (!csize)
return 0;
src = pfn_to_phys(pfn) + offset;

- /* XXX: pass the iov_iter down to a common function */
- if (iter_is_iovec(iter))
- rc = copy_oldmem_user(iter->iov->iov_base, src, csize);
- else
- rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize);
+ rc = copy_oldmem_iter(iter, src, csize);
if (rc < 0)
return rc;
- iov_iter_advance(iter, csize);
return csize;
}

diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 516783ba950f..26125718f3e0 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -59,7 +59,7 @@ static char hsa_buf[PAGE_SIZE] __aligned(PAGE_SIZE);
* @src: Start address within HSA where data should be copied
* @count: Size of buffer, which should be copied
*/
-int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
+int memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
unsigned long offset, bytes;

@@ -73,10 +73,9 @@ int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
}
offset = src % PAGE_SIZE;
bytes = min(PAGE_SIZE - offset, count);
- if (copy_to_user(dest, hsa_buf + offset, bytes))
+ if (copy_to_iter(hsa_buf + offset, bytes, iter) != bytes)
return -EFAULT;
src += bytes;
- dest += bytes;
count -= bytes;
}
return 0;

2022-07-07 13:56:45

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] s390/crash: allow multi-segment iterators

On Thu, Jul 07, 2022 at 01:54:22PM +0100, Matthew Wilcox wrote:
> On Thu, Jul 07, 2022 at 08:01:15AM +0200, Alexander Gordeev wrote:
> > Rework copy_oldmem_page() to allow multi-segment iterators.
> > Reuse existing iterate_iovec macro as is and only relevant
> > bits from __iterate_and_advance macro.
>
> Or do it properly?

Or that...

> You should probably put a mutex around all of this because if you have two
> threads accessing the hsa at the same time, they'll use the same buffer.
> But that's a pre-existing problem. I also fixed the pre-existing bug
> where you were using 'count' when you meant to use 'len'.
>
> Uncompiled. You might need to include <linux/uio.h> somewhere.
>
> diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
> index 236b34b75ddb..d8b4c526e0f0 100644
> --- a/arch/s390/include/asm/sclp.h
> +++ b/arch/s390/include/asm/sclp.h
> @@ -143,7 +143,7 @@ int sclp_ap_configure(u32 apid);
> int sclp_ap_deconfigure(u32 apid);
> int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
> int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
> -int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
> +int memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count);
> void sclp_ocf_cpc_name_copy(char *dst);
>
> static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
> diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
> index 28124d0fa1d5..6e4dde377f8e 100644
> --- a/arch/s390/kernel/crash_dump.c
> +++ b/arch/s390/kernel/crash_dump.c
> @@ -130,53 +130,11 @@ static inline void *load_real_addr(void *addr)
> return (void *)real_addr;
> }
>
> -/*
> - * Copy memory of the old, dumped system to a kernel space virtual address
> - */
> -int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
> -{
> - unsigned long len;
> - void *ra;
> - int rc;
> -
> - while (count) {
> - if (!oldmem_data.start && src < sclp.hsa_size) {
> - /* Copy from zfcp/nvme dump HSA area */
> - len = min(count, sclp.hsa_size - src);
> - rc = memcpy_hsa_kernel(dst, src, len);
> - if (rc)
> - return rc;
> - } else {
> - /* Check for swapped kdump oldmem areas */
> - if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
> - src -= oldmem_data.start;
> - len = min(count, oldmem_data.size - src);
> - } else if (oldmem_data.start && src < oldmem_data.size) {
> - len = min(count, oldmem_data.size - src);
> - src += oldmem_data.start;
> - } else {
> - len = count;
> - }
> - if (is_vmalloc_or_module_addr(dst)) {
> - ra = load_real_addr(dst);
> - len = min(PAGE_SIZE - offset_in_page(ra), len);
> - } else {
> - ra = dst;
> - }
> - if (memcpy_real(ra, src, len))
> - return -EFAULT;
> - }
> - dst += len;
> - src += len;
> - count -= len;
> - }
> - return 0;
> -}
> -
> /*
> * Copy memory of the old, dumped system to a user space virtual address
> */
> -static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
> +static int copy_oldmem_iter(struct iov_iter *iter, unsigned long src,
> + size_t count)
> {
> unsigned long len;
> int rc;
> @@ -185,7 +143,7 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
> if (!oldmem_data.start && src < sclp.hsa_size) {
> /* Copy from zfcp/nvme dump HSA area */
> len = min(count, sclp.hsa_size - src);
> - rc = memcpy_hsa_user(dst, src, len);
> + rc = memcpy_hsa_iter(iter, src, len);
> if (rc)
> return rc;
> } else {
> @@ -199,8 +157,8 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
> } else {
> len = count;
> }
> - rc = copy_to_user_real(dst, src, count);
> - if (rc)
> + rc = copy_to_iter(iter, src, len);
> + if (rc != len)
> return rc;
> }
> dst += len;
> @@ -219,23 +177,13 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
> unsigned long src;
> int rc;
>
> - if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter)))
> - return -EINVAL;
> - /* Multi-segment iterators are not supported */
> - if (iter->nr_segs > 1)
> - return -EINVAL;
> if (!csize)
> return 0;
> src = pfn_to_phys(pfn) + offset;
>
> - /* XXX: pass the iov_iter down to a common function */
> - if (iter_is_iovec(iter))
> - rc = copy_oldmem_user(iter->iov->iov_base, src, csize);
> - else
> - rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize);
> + rc = copy_oldmem_iter(iter, src, csize);
> if (rc < 0)
> return rc;
> - iov_iter_advance(iter, csize);
> return csize;
> }
>
> diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
> index 516783ba950f..26125718f3e0 100644
> --- a/drivers/s390/char/zcore.c
> +++ b/drivers/s390/char/zcore.c
> @@ -59,7 +59,7 @@ static char hsa_buf[PAGE_SIZE] __aligned(PAGE_SIZE);
> * @src: Start address within HSA where data should be copied
> * @count: Size of buffer, which should be copied
> */
> -int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
> +int memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count)
> {
> unsigned long offset, bytes;
>
> @@ -73,10 +73,9 @@ int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
> }
> offset = src % PAGE_SIZE;
> bytes = min(PAGE_SIZE - offset, count);
> - if (copy_to_user(dest, hsa_buf + offset, bytes))
> + if (copy_to_iter(hsa_buf + offset, bytes, iter) != bytes)
> return -EFAULT;

Umm... Then you want iov_iter_revert() on short copy...

2022-07-07 19:10:17

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] s390/crash: allow multi-segment iterators

On Thu, Jul 07, 2022 at 02:31:44PM +0100, Al Viro wrote:
> > @@ -73,10 +73,9 @@ int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
> > }
> > offset = src % PAGE_SIZE;
> > bytes = min(PAGE_SIZE - offset, count);
> > - if (copy_to_user(dest, hsa_buf + offset, bytes))
> > + if (copy_to_iter(hsa_buf + offset, bytes, iter) != bytes)
> > return -EFAULT;
>
> Umm... Then you want iov_iter_revert() on short copy...

... maybe better to change the calling convention to return the short
write and have the caller do it if they care?

2022-07-13 11:26:00

by Alexander Gordeev

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] s390/crash: allow multi-segment iterators

On Thu, Jul 07, 2022 at 01:54:22PM +0100, Matthew Wilcox wrote:
> On Thu, Jul 07, 2022 at 08:01:15AM +0200, Alexander Gordeev wrote:
> > Rework copy_oldmem_page() to allow multi-segment iterators.
> > Reuse existing iterate_iovec macro as is and only relevant
> > bits from __iterate_and_advance macro.

> Or do it properly?
>
> You should probably put a mutex around all of this because if you have two
> threads accessing the hsa at the same time, they'll use the same buffer.
> But that's a pre-existing problem. I also fixed the pre-existing bug
> where you were using 'count' when you meant to use 'len'.

Thank you, Matthew!

Would you mind being added with Suggested-by to the fix(es)?

> Uncompiled. You might need to include <linux/uio.h> somewhere.

The problem with your suggestion is memcpy()/copyout() might fail
since pages to be copied are not guaranteed to be mapped on s390.
For that we use s390-specific memcpy_real() routine that only takes
physical addresses and bypasses paging altogether (I added some
comments to the removed code, just in case you are interested).

Yet, I am still going to reuse and extend your approach and
hopefully come up with something soon.

As a side note - there is an intention to make things more like
others, but not just right now.

> diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
> index 236b34b75ddb..d8b4c526e0f0 100644
> --- a/arch/s390/include/asm/sclp.h
> +++ b/arch/s390/include/asm/sclp.h
> @@ -143,7 +143,7 @@ int sclp_ap_configure(u32 apid);
> int sclp_ap_deconfigure(u32 apid);
> int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
> int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
> -int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
> +int memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count);
> void sclp_ocf_cpc_name_copy(char *dst);
>
> static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
> diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
> index 28124d0fa1d5..6e4dde377f8e 100644
> --- a/arch/s390/kernel/crash_dump.c
> +++ b/arch/s390/kernel/crash_dump.c
> @@ -130,53 +130,11 @@ static inline void *load_real_addr(void *addr)
> return (void *)real_addr;
> }
>
> -/*
> - * Copy memory of the old, dumped system to a kernel space virtual address
> - */
> -int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
> -{
> - unsigned long len;
> - void *ra;
> - int rc;
> -
> - while (count) {
> - if (!oldmem_data.start && src < sclp.hsa_size) {
> - /* Copy from zfcp/nvme dump HSA area */
> - len = min(count, sclp.hsa_size - src);
> - rc = memcpy_hsa_kernel(dst, src, len);
> - if (rc)
> - return rc;
> - } else {
> - /* Check for swapped kdump oldmem areas */
> - if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
> - src -= oldmem_data.start;
> - len = min(count, oldmem_data.size - src);
> - } else if (oldmem_data.start && src < oldmem_data.size) {
> - len = min(count, oldmem_data.size - src);
> - src += oldmem_data.start;
> - } else {
> - len = count;
> - }
> - if (is_vmalloc_or_module_addr(dst)) {

There is no 1:1 match between vmalloc/module addresses.

> - ra = load_real_addr(dst);

load_real_addr() obtains the physical addresses from a virtual one to be
able passing it to memcpy_real().

> - len = min(PAGE_SIZE - offset_in_page(ra), len);
> - } else {
> - ra = dst;

Here a virtual address matches the physical one and thus - good to go.

> - }
> - if (memcpy_real(ra, src, len))
> - return -EFAULT;

As the source address might be unmapped, copy_to_iter()->memcpy()
would not be safe to use here.

> - }
> - dst += len;
> - src += len;
> - count -= len;
> - }
> - return 0;
> -}
> -
> /*
> * Copy memory of the old, dumped system to a user space virtual address
> */
> -static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
> +static int copy_oldmem_iter(struct iov_iter *iter, unsigned long src,
> + size_t count)
> {
> unsigned long len;
> int rc;
> @@ -185,7 +143,7 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
> if (!oldmem_data.start && src < sclp.hsa_size) {
> /* Copy from zfcp/nvme dump HSA area */
> len = min(count, sclp.hsa_size - src);
> - rc = memcpy_hsa_user(dst, src, len);
> + rc = memcpy_hsa_iter(iter, src, len);
> if (rc)
> return rc;
> } else {
> @@ -199,8 +157,8 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
> } else {
> len = count;
> }
> - rc = copy_to_user_real(dst, src, count);
> - if (rc)
> + rc = copy_to_iter(iter, src, len);

As the source address might be unmapped, copy_to_iter()->raw_copy_to_user()
is not safe to use here.

> + if (rc != len)
> return rc;
> }
> dst += len;

Thanks!