Unlike other architectures s390 can not use copyout() and memcopy() for
accessing memory and thus using copy_to_iter() now is not possible. But
a fix is needed, since 'cp' routine as 'core_collector' for kdump service
initiates multi-segment iterator.
The reason iterate_iovec() and __iterate_and_advance() macros copied from
lib/iov_iter.c (thus introducing redundancy) is to avoid custom iterator-
treating in s390 code. I doubt these macros could be turned public (i.e
with a follow-up patch), so the intention is to do it like _copy_to_iter()
does.
Changes since v1:
- number of bytes left to copy on fail fixed;
Alexander Gordeev (1):
s390/crash: allow multi-segment iterators
arch/s390/kernel/crash_dump.c | 65 +++++++++++++++++++++++++++--------
1 file changed, 50 insertions(+), 15 deletions(-)
--
2.34.1
Rework copy_oldmem_page() to allow multi-segment iterators.
Reuse existing iterate_iovec macro as is and only relevant
bits from __iterate_and_advance macro.
Fixes: 49b11524d648 ("s390/crash: add missing iterator advance in copy_oldmem_page())
Signed-off-by: Alexander Gordeev <[email protected]>
---
arch/s390/kernel/crash_dump.c | 65 +++++++++++++++++++++++++++--------
1 file changed, 50 insertions(+), 15 deletions(-)
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 28124d0fa1d5..cc1586d64ce2 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -210,6 +210,52 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
return 0;
}
+#define iterate_iovec(i, n, base, len, off, __p, STEP) { \
+ size_t off = 0; \
+ size_t skip = i->iov_offset; \
+ do { \
+ len = min(n, __p->iov_len - skip); \
+ if (likely(len)) { \
+ base = __p->iov_base + skip; \
+ len -= (STEP); \
+ off += len; \
+ skip += len; \
+ n -= len; \
+ if (skip < __p->iov_len) \
+ break; \
+ } \
+ __p++; \
+ skip = 0; \
+ } while (n); \
+ i->iov_offset = skip; \
+ n = off; \
+}
+
+#define __iterate_and_advance(i, n, base, len, off, I, K) { \
+ if (unlikely(i->count < n)) \
+ n = i->count; \
+ if (likely(n)) { \
+ if (likely(iter_is_iovec(i))) { \
+ const struct iovec *iov = i->iov; \
+ void __user *base; \
+ size_t len; \
+ iterate_iovec(i, n, base, len, off, \
+ iov, (I)) \
+ i->nr_segs -= iov - i->iov; \
+ i->iov = iov; \
+ } else if (iov_iter_is_kvec(i)) { \
+ const struct kvec *kvec = i->kvec; \
+ void *base; \
+ size_t len; \
+ iterate_iovec(i, n, base, len, off, \
+ kvec, (K)) \
+ i->nr_segs -= kvec - i->kvec; \
+ i->kvec = kvec; \
+ } \
+ i->count -= n; \
+ } \
+}
+
/*
* Copy one page from "oldmem"
*/
@@ -217,25 +263,14 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
unsigned long offset)
{
unsigned long src;
- int rc;
if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter)))
return -EINVAL;
- /* Multi-segment iterators are not supported */
- if (iter->nr_segs > 1)
- return -EINVAL;
- if (!csize)
- return 0;
src = pfn_to_phys(pfn) + offset;
-
- /* XXX: pass the iov_iter down to a common function */
- if (iter_is_iovec(iter))
- rc = copy_oldmem_user(iter->iov->iov_base, src, csize);
- else
- rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize);
- if (rc < 0)
- return rc;
- iov_iter_advance(iter, csize);
+ __iterate_and_advance(iter, csize, base, len, off,
+ ({ copy_oldmem_user(base, src + off, len) < 0 ? len : 0; }),
+ ({ copy_oldmem_kernel(base, src + off, len) < 0 ? len : 0; })
+ )
return csize;
}
--
2.34.1
On Thu, Jul 07, 2022 at 08:01:15AM +0200, Alexander Gordeev wrote:
> Rework copy_oldmem_page() to allow multi-segment iterators.
> Reuse existing iterate_iovec macro as is and only relevant
> bits from __iterate_and_advance macro.
>
> Fixes: 49b11524d648 ("s390/crash: add missing iterator advance in copy_oldmem_page())
> Signed-off-by: Alexander Gordeev <[email protected]>
To be followed by duplication of every change ever done to those macros?
Hell, no. Do it the same way it's done for that _copy_from_iter_flushcache()
thing - i.e. an ifdef in lib/iov_iter.c and no duplication of that forest of
macros.
On Thu, Jul 07, 2022 at 08:01:15AM +0200, Alexander Gordeev wrote:
> Rework copy_oldmem_page() to allow multi-segment iterators.
> Reuse existing iterate_iovec macro as is and only relevant
> bits from __iterate_and_advance macro.
Or do it properly?
You should probably put a mutex around all of this because if you have two
threads accessing the hsa at the same time, they'll use the same buffer.
But that's a pre-existing problem. I also fixed the pre-existing bug
where you were using 'count' when you meant to use 'len'.
Uncompiled. You might need to include <linux/uio.h> somewhere.
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 236b34b75ddb..d8b4c526e0f0 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -143,7 +143,7 @@ int sclp_ap_configure(u32 apid);
int sclp_ap_deconfigure(u32 apid);
int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
-int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
+int memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count);
void sclp_ocf_cpc_name_copy(char *dst);
static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 28124d0fa1d5..6e4dde377f8e 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -130,53 +130,11 @@ static inline void *load_real_addr(void *addr)
return (void *)real_addr;
}
-/*
- * Copy memory of the old, dumped system to a kernel space virtual address
- */
-int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
-{
- unsigned long len;
- void *ra;
- int rc;
-
- while (count) {
- if (!oldmem_data.start && src < sclp.hsa_size) {
- /* Copy from zfcp/nvme dump HSA area */
- len = min(count, sclp.hsa_size - src);
- rc = memcpy_hsa_kernel(dst, src, len);
- if (rc)
- return rc;
- } else {
- /* Check for swapped kdump oldmem areas */
- if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
- src -= oldmem_data.start;
- len = min(count, oldmem_data.size - src);
- } else if (oldmem_data.start && src < oldmem_data.size) {
- len = min(count, oldmem_data.size - src);
- src += oldmem_data.start;
- } else {
- len = count;
- }
- if (is_vmalloc_or_module_addr(dst)) {
- ra = load_real_addr(dst);
- len = min(PAGE_SIZE - offset_in_page(ra), len);
- } else {
- ra = dst;
- }
- if (memcpy_real(ra, src, len))
- return -EFAULT;
- }
- dst += len;
- src += len;
- count -= len;
- }
- return 0;
-}
-
/*
* Copy memory of the old, dumped system to a user space virtual address
*/
-static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
+static int copy_oldmem_iter(struct iov_iter *iter, unsigned long src,
+ size_t count)
{
unsigned long len;
int rc;
@@ -185,7 +143,7 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
if (!oldmem_data.start && src < sclp.hsa_size) {
/* Copy from zfcp/nvme dump HSA area */
len = min(count, sclp.hsa_size - src);
- rc = memcpy_hsa_user(dst, src, len);
+ rc = memcpy_hsa_iter(iter, src, len);
if (rc)
return rc;
} else {
@@ -199,8 +157,8 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
} else {
len = count;
}
- rc = copy_to_user_real(dst, src, count);
- if (rc)
+ rc = copy_to_iter(iter, src, len);
+ if (rc != len)
return rc;
}
dst += len;
@@ -219,23 +177,13 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
unsigned long src;
int rc;
- if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter)))
- return -EINVAL;
- /* Multi-segment iterators are not supported */
- if (iter->nr_segs > 1)
- return -EINVAL;
if (!csize)
return 0;
src = pfn_to_phys(pfn) + offset;
- /* XXX: pass the iov_iter down to a common function */
- if (iter_is_iovec(iter))
- rc = copy_oldmem_user(iter->iov->iov_base, src, csize);
- else
- rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize);
+ rc = copy_oldmem_iter(iter, src, csize);
if (rc < 0)
return rc;
- iov_iter_advance(iter, csize);
return csize;
}
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 516783ba950f..26125718f3e0 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -59,7 +59,7 @@ static char hsa_buf[PAGE_SIZE] __aligned(PAGE_SIZE);
* @src: Start address within HSA where data should be copied
* @count: Size of buffer, which should be copied
*/
-int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
+int memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
unsigned long offset, bytes;
@@ -73,10 +73,9 @@ int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
}
offset = src % PAGE_SIZE;
bytes = min(PAGE_SIZE - offset, count);
- if (copy_to_user(dest, hsa_buf + offset, bytes))
+ if (copy_to_iter(hsa_buf + offset, bytes, iter) != bytes)
return -EFAULT;
src += bytes;
- dest += bytes;
count -= bytes;
}
return 0;
On Thu, Jul 07, 2022 at 01:54:22PM +0100, Matthew Wilcox wrote:
> On Thu, Jul 07, 2022 at 08:01:15AM +0200, Alexander Gordeev wrote:
> > Rework copy_oldmem_page() to allow multi-segment iterators.
> > Reuse existing iterate_iovec macro as is and only relevant
> > bits from __iterate_and_advance macro.
>
> Or do it properly?
Or that...
> You should probably put a mutex around all of this because if you have two
> threads accessing the hsa at the same time, they'll use the same buffer.
> But that's a pre-existing problem. I also fixed the pre-existing bug
> where you were using 'count' when you meant to use 'len'.
>
> Uncompiled. You might need to include <linux/uio.h> somewhere.
>
> diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
> index 236b34b75ddb..d8b4c526e0f0 100644
> --- a/arch/s390/include/asm/sclp.h
> +++ b/arch/s390/include/asm/sclp.h
> @@ -143,7 +143,7 @@ int sclp_ap_configure(u32 apid);
> int sclp_ap_deconfigure(u32 apid);
> int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
> int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
> -int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
> +int memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count);
> void sclp_ocf_cpc_name_copy(char *dst);
>
> static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
> diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
> index 28124d0fa1d5..6e4dde377f8e 100644
> --- a/arch/s390/kernel/crash_dump.c
> +++ b/arch/s390/kernel/crash_dump.c
> @@ -130,53 +130,11 @@ static inline void *load_real_addr(void *addr)
> return (void *)real_addr;
> }
>
> -/*
> - * Copy memory of the old, dumped system to a kernel space virtual address
> - */
> -int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
> -{
> - unsigned long len;
> - void *ra;
> - int rc;
> -
> - while (count) {
> - if (!oldmem_data.start && src < sclp.hsa_size) {
> - /* Copy from zfcp/nvme dump HSA area */
> - len = min(count, sclp.hsa_size - src);
> - rc = memcpy_hsa_kernel(dst, src, len);
> - if (rc)
> - return rc;
> - } else {
> - /* Check for swapped kdump oldmem areas */
> - if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
> - src -= oldmem_data.start;
> - len = min(count, oldmem_data.size - src);
> - } else if (oldmem_data.start && src < oldmem_data.size) {
> - len = min(count, oldmem_data.size - src);
> - src += oldmem_data.start;
> - } else {
> - len = count;
> - }
> - if (is_vmalloc_or_module_addr(dst)) {
> - ra = load_real_addr(dst);
> - len = min(PAGE_SIZE - offset_in_page(ra), len);
> - } else {
> - ra = dst;
> - }
> - if (memcpy_real(ra, src, len))
> - return -EFAULT;
> - }
> - dst += len;
> - src += len;
> - count -= len;
> - }
> - return 0;
> -}
> -
> /*
> * Copy memory of the old, dumped system to a user space virtual address
> */
> -static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
> +static int copy_oldmem_iter(struct iov_iter *iter, unsigned long src,
> + size_t count)
> {
> unsigned long len;
> int rc;
> @@ -185,7 +143,7 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
> if (!oldmem_data.start && src < sclp.hsa_size) {
> /* Copy from zfcp/nvme dump HSA area */
> len = min(count, sclp.hsa_size - src);
> - rc = memcpy_hsa_user(dst, src, len);
> + rc = memcpy_hsa_iter(iter, src, len);
> if (rc)
> return rc;
> } else {
> @@ -199,8 +157,8 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
> } else {
> len = count;
> }
> - rc = copy_to_user_real(dst, src, count);
> - if (rc)
> + rc = copy_to_iter(iter, src, len);
> + if (rc != len)
> return rc;
> }
> dst += len;
> @@ -219,23 +177,13 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
> unsigned long src;
> int rc;
>
> - if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter)))
> - return -EINVAL;
> - /* Multi-segment iterators are not supported */
> - if (iter->nr_segs > 1)
> - return -EINVAL;
> if (!csize)
> return 0;
> src = pfn_to_phys(pfn) + offset;
>
> - /* XXX: pass the iov_iter down to a common function */
> - if (iter_is_iovec(iter))
> - rc = copy_oldmem_user(iter->iov->iov_base, src, csize);
> - else
> - rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize);
> + rc = copy_oldmem_iter(iter, src, csize);
> if (rc < 0)
> return rc;
> - iov_iter_advance(iter, csize);
> return csize;
> }
>
> diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
> index 516783ba950f..26125718f3e0 100644
> --- a/drivers/s390/char/zcore.c
> +++ b/drivers/s390/char/zcore.c
> @@ -59,7 +59,7 @@ static char hsa_buf[PAGE_SIZE] __aligned(PAGE_SIZE);
> * @src: Start address within HSA where data should be copied
> * @count: Size of buffer, which should be copied
> */
> -int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
> +int memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count)
> {
> unsigned long offset, bytes;
>
> @@ -73,10 +73,9 @@ int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
> }
> offset = src % PAGE_SIZE;
> bytes = min(PAGE_SIZE - offset, count);
> - if (copy_to_user(dest, hsa_buf + offset, bytes))
> + if (copy_to_iter(hsa_buf + offset, bytes, iter) != bytes)
> return -EFAULT;
Umm... Then you want iov_iter_revert() on short copy...
On Thu, Jul 07, 2022 at 02:31:44PM +0100, Al Viro wrote:
> > @@ -73,10 +73,9 @@ int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
> > }
> > offset = src % PAGE_SIZE;
> > bytes = min(PAGE_SIZE - offset, count);
> > - if (copy_to_user(dest, hsa_buf + offset, bytes))
> > + if (copy_to_iter(hsa_buf + offset, bytes, iter) != bytes)
> > return -EFAULT;
>
> Umm... Then you want iov_iter_revert() on short copy...
... maybe better to change the calling convention to return the short
write and have the caller do it if they care?
On Thu, Jul 07, 2022 at 01:54:22PM +0100, Matthew Wilcox wrote:
> On Thu, Jul 07, 2022 at 08:01:15AM +0200, Alexander Gordeev wrote:
> > Rework copy_oldmem_page() to allow multi-segment iterators.
> > Reuse existing iterate_iovec macro as is and only relevant
> > bits from __iterate_and_advance macro.
> Or do it properly?
>
> You should probably put a mutex around all of this because if you have two
> threads accessing the hsa at the same time, they'll use the same buffer.
> But that's a pre-existing problem. I also fixed the pre-existing bug
> where you were using 'count' when you meant to use 'len'.
Thank you, Matthew!
Would you mind being added with Suggested-by to the fix(es)?
> Uncompiled. You might need to include <linux/uio.h> somewhere.
The problem with your suggestion is memcpy()/copyout() might fail
since pages to be copied are not guaranteed to be mapped on s390.
For that we use s390-specific memcpy_real() routine that only takes
physical addresses and bypasses paging altogether (I added some
comments to the removed code, just in case you are interested).
Yet, I am still going to reuse and extend your approach and
hopefully come up with something soon.
As a side note - there is an intention to make things more like
others, but not just right now.
> diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
> index 236b34b75ddb..d8b4c526e0f0 100644
> --- a/arch/s390/include/asm/sclp.h
> +++ b/arch/s390/include/asm/sclp.h
> @@ -143,7 +143,7 @@ int sclp_ap_configure(u32 apid);
> int sclp_ap_deconfigure(u32 apid);
> int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
> int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
> -int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
> +int memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count);
> void sclp_ocf_cpc_name_copy(char *dst);
>
> static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
> diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
> index 28124d0fa1d5..6e4dde377f8e 100644
> --- a/arch/s390/kernel/crash_dump.c
> +++ b/arch/s390/kernel/crash_dump.c
> @@ -130,53 +130,11 @@ static inline void *load_real_addr(void *addr)
> return (void *)real_addr;
> }
>
> -/*
> - * Copy memory of the old, dumped system to a kernel space virtual address
> - */
> -int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
> -{
> - unsigned long len;
> - void *ra;
> - int rc;
> -
> - while (count) {
> - if (!oldmem_data.start && src < sclp.hsa_size) {
> - /* Copy from zfcp/nvme dump HSA area */
> - len = min(count, sclp.hsa_size - src);
> - rc = memcpy_hsa_kernel(dst, src, len);
> - if (rc)
> - return rc;
> - } else {
> - /* Check for swapped kdump oldmem areas */
> - if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
> - src -= oldmem_data.start;
> - len = min(count, oldmem_data.size - src);
> - } else if (oldmem_data.start && src < oldmem_data.size) {
> - len = min(count, oldmem_data.size - src);
> - src += oldmem_data.start;
> - } else {
> - len = count;
> - }
> - if (is_vmalloc_or_module_addr(dst)) {
There is no 1:1 match between vmalloc/module addresses.
> - ra = load_real_addr(dst);
load_real_addr() obtains the physical addresses from a virtual one to be
able passing it to memcpy_real().
> - len = min(PAGE_SIZE - offset_in_page(ra), len);
> - } else {
> - ra = dst;
Here a virtual address matches the physical one and thus - good to go.
> - }
> - if (memcpy_real(ra, src, len))
> - return -EFAULT;
As the source address might be unmapped, copy_to_iter()->memcpy()
would not be safe to use here.
> - }
> - dst += len;
> - src += len;
> - count -= len;
> - }
> - return 0;
> -}
> -
> /*
> * Copy memory of the old, dumped system to a user space virtual address
> */
> -static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
> +static int copy_oldmem_iter(struct iov_iter *iter, unsigned long src,
> + size_t count)
> {
> unsigned long len;
> int rc;
> @@ -185,7 +143,7 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
> if (!oldmem_data.start && src < sclp.hsa_size) {
> /* Copy from zfcp/nvme dump HSA area */
> len = min(count, sclp.hsa_size - src);
> - rc = memcpy_hsa_user(dst, src, len);
> + rc = memcpy_hsa_iter(iter, src, len);
> if (rc)
> return rc;
> } else {
> @@ -199,8 +157,8 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
> } else {
> len = count;
> }
> - rc = copy_to_user_real(dst, src, count);
> - if (rc)
> + rc = copy_to_iter(iter, src, len);
As the source address might be unmapped, copy_to_iter()->raw_copy_to_user()
is not safe to use here.
> + if (rc != len)
> return rc;
> }
> dst += len;
Thanks!