2022-10-29 01:20:08

by Peilin Ye

[permalink] [raw]
Subject: [PATCH v1] coredump: Use vmsplice_to_pipe() for pipes in dump_emit_page()

From: Peilin Ye <[email protected]>

Currently, there is a copy for each page when dumping VMAs to pipe
handlers using dump_emit_page(). For example:

fs/binfmt_elf.c:elf_core_dump()
fs/coredump.c:dump_user_range()
:dump_emit_page()
fs/read_write.c:__kernel_write_iter()
fs/pipe.c:pipe_write()
lib/iov_iter.c:copy_page_from_iter()

Use vmsplice_to_pipe() instead of __kernel_write_iter() to avoid this
copy for pipe handlers.

Tested by dumping a 40-GByte core into a simple handler that splice()s
from stdin to disk in a loop, PIPE_DEF_BUFFERS (16) pages at a time.

Before After Improved by
Time to Completion 52.04 seconds 46.30 seconds 11.03%
CPU Usage 89.43% 84.90% 5.07%

Suggested-by: Cong Wang <[email protected]>
Signed-off-by: Peilin Ye <[email protected]>
---
fs/coredump.c | 7 ++++++-
fs/splice.c | 4 ++--
include/linux/splice.h | 2 ++
3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 7bad7785e8e6..a6ef406dee43 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -42,6 +42,7 @@
#include <linux/timekeeping.h>
#include <linux/sysctl.h>
#include <linux/elf.h>
+#include <linux/splice.h>

#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -854,7 +855,11 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
return 0;
pos = file->f_pos;
iov_iter_bvec(&iter, WRITE, &bvec, 1, PAGE_SIZE);
- n = __kernel_write_iter(cprm->file, &iter, &pos);
+
+ n = vmsplice_to_pipe(file, &iter, 0);
+ if (n == -EBADF)
+ n = __kernel_write_iter(cprm->file, &iter, &pos);
+
if (n != PAGE_SIZE)
return 0;
file->f_pos = pos;
diff --git a/fs/splice.c b/fs/splice.c
index 0878b852b355..2051700cda79 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1234,8 +1234,8 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
* as splice-from-memory, where the regular splice is splice-from-file (or
* to file). In both cases the output is a pipe, naturally.
*/
-static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
- unsigned int flags)
+long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
+ unsigned int flags)
{
struct pipe_inode_info *pipe;
long ret = 0;
diff --git a/include/linux/splice.h b/include/linux/splice.h
index a55179fd60fc..0cd955cf5ee2 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -81,6 +81,8 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
extern long do_splice(struct file *in, loff_t *off_in,
struct file *out, loff_t *off_out,
size_t len, unsigned int flags);
+extern long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
+ unsigned int flags);

extern long do_tee(struct file *in, struct file *out, size_t len,
unsigned int flags);
--
2.20.1



2022-10-29 07:47:28

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v1] coredump: Use vmsplice_to_pipe() for pipes in dump_emit_page()

Hi Peilin,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.1-rc2 next-20221028]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Peilin-Ye/coredump-Use-vmsplice_to_pipe-for-pipes-in-dump_emit_page/20221029-085307
patch link: https://lore.kernel.org/r/20221029005147.2553-1-yepeilin.cs%40gmail.com
patch subject: [PATCH v1] coredump: Use vmsplice_to_pipe() for pipes in dump_emit_page()
config: x86_64-allyesconfig
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/49da2daf3c9dc6a687c0f64c047bec6199f232af
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Peilin-Ye/coredump-Use-vmsplice_to_pipe-for-pipes-in-dump_emit_page/20221029-085307
git checkout 49da2daf3c9dc6a687c0f64c047bec6199f232af
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash net/tls/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All warnings (new ones prefixed by >>):

In file included from net/tls/tls_sw.c:41:
>> include/linux/splice.h:84:56: warning: 'struct iov_iter' declared inside parameter list will not be visible outside of this definition or declaration
84 | extern long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
| ^~~~~~~~


vim +84 include/linux/splice.h

64
65 typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
66 struct splice_desc *);
67 typedef int (splice_direct_actor)(struct pipe_inode_info *,
68 struct splice_desc *);
69
70 extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
71 loff_t *, size_t, unsigned int,
72 splice_actor *);
73 extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
74 struct splice_desc *, splice_actor *);
75 extern ssize_t splice_to_pipe(struct pipe_inode_info *,
76 struct splice_pipe_desc *);
77 extern ssize_t add_to_pipe(struct pipe_inode_info *,
78 struct pipe_buffer *);
79 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
80 splice_direct_actor *);
81 extern long do_splice(struct file *in, loff_t *off_in,
82 struct file *out, loff_t *off_out,
83 size_t len, unsigned int flags);
> 84 extern long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
85 unsigned int flags);
86

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (3.01 kB)
config (297.65 kB)
Download all attachments

2022-10-29 09:31:55

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v1] coredump: Use vmsplice_to_pipe() for pipes in dump_emit_page()

Hi Peilin,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.1-rc2 next-20221028]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Peilin-Ye/coredump-Use-vmsplice_to_pipe-for-pipes-in-dump_emit_page/20221029-085307
patch link: https://lore.kernel.org/r/20221029005147.2553-1-yepeilin.cs%40gmail.com
patch subject: [PATCH v1] coredump: Use vmsplice_to_pipe() for pipes in dump_emit_page()
config: hexagon-randconfig-r041-20221029
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 791a7ae1ba3efd6bca96338e10ffde557ba83920)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/49da2daf3c9dc6a687c0f64c047bec6199f232af
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Peilin-Ye/coredump-Use-vmsplice_to_pipe-for-pipes-in-dump_emit_page/20221029-085307
git checkout 49da2daf3c9dc6a687c0f64c047bec6199f232af
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash net/tls/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All warnings (new ones prefixed by >>):

In file included from net/tls/tls_sw.c:41:
>> include/linux/splice.h:84:56: warning: declaration of 'struct iov_iter' will not be visible outside of this function [-Wvisibility]
extern long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
^
In file included from net/tls/tls_sw.c:44:
In file included from include/net/strparser.h:11:
In file included from include/linux/skbuff.h:17:
In file included from include/linux/bvec.h:10:
In file included from include/linux/highmem.h:12:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:334:
include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __raw_readb(PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
#define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
^
In file included from net/tls/tls_sw.c:44:
In file included from include/net/strparser.h:11:
In file included from include/linux/skbuff.h:17:
In file included from include/linux/bvec.h:10:
In file included from include/linux/highmem.h:12:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:334:
include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
#define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
^
In file included from net/tls/tls_sw.c:44:
In file included from include/net/strparser.h:11:
In file included from include/linux/skbuff.h:17:
In file included from include/linux/bvec.h:10:
In file included from include/linux/highmem.h:12:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:334:
include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writeb(value, PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
~~~~~~~~~~ ^
7 warnings generated.


vim +84 include/linux/splice.h

64
65 typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
66 struct splice_desc *);
67 typedef int (splice_direct_actor)(struct pipe_inode_info *,
68 struct splice_desc *);
69
70 extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
71 loff_t *, size_t, unsigned int,
72 splice_actor *);
73 extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
74 struct splice_desc *, splice_actor *);
75 extern ssize_t splice_to_pipe(struct pipe_inode_info *,
76 struct splice_pipe_desc *);
77 extern ssize_t add_to_pipe(struct pipe_inode_info *,
78 struct pipe_buffer *);
79 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
80 splice_direct_actor *);
81 extern long do_splice(struct file *in, loff_t *off_in,
82 struct file *out, loff_t *off_out,
83 size_t len, unsigned int flags);
> 84 extern long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
85 unsigned int flags);
86

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (7.09 kB)
config (121.78 kB)
Download all attachments

2022-10-31 21:45:51

by Peilin Ye

[permalink] [raw]
Subject: [PATCH v2] coredump: Use vmsplice_to_pipe() for pipes in dump_emit_page()

From: Peilin Ye <[email protected]>

Currently, there is a copy for each page when dumping VMAs to pipe
handlers using dump_emit_page(). For example:

fs/binfmt_elf.c:elf_core_dump()
fs/coredump.c:dump_user_range()
:dump_emit_page()
fs/read_write.c:__kernel_write_iter()
fs/pipe.c:pipe_write()
lib/iov_iter.c:copy_page_from_iter()

Use vmsplice_to_pipe() instead of __kernel_write_iter() to avoid this
copy for pipe handlers.

Tested by dumping a 40-GByte core into a simple handler that splice()s
from stdin to disk in a loop, PIPE_DEF_BUFFERS (16) pages at a time.

Before After Improved by
Time to Completion 52.04 seconds 46.30 seconds 11.03%
CPU Usage 89.43% 84.90% 5.07%

Suggested-by: Cong Wang <[email protected]>
Signed-off-by: Peilin Ye <[email protected]>
---
change in v2:
- fix warning in net/tls/tls_sw.c (kernel test robot)

fs/coredump.c | 7 ++++++-
fs/splice.c | 4 ++--
include/linux/splice.h | 3 +++
3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index da0e9525c4e8..c0a8713d9971 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -42,6 +42,7 @@
#include <linux/timekeeping.h>
#include <linux/sysctl.h>
#include <linux/elf.h>
+#include <linux/splice.h>

#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -862,7 +863,11 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
return 0;
pos = file->f_pos;
iov_iter_bvec(&iter, WRITE, &bvec, 1, PAGE_SIZE);
- n = __kernel_write_iter(cprm->file, &iter, &pos);
+
+ n = vmsplice_to_pipe(file, &iter, 0);
+ if (n == -EBADF)
+ n = __kernel_write_iter(cprm->file, &iter, &pos);
+
if (n != PAGE_SIZE)
return 0;
file->f_pos = pos;
diff --git a/fs/splice.c b/fs/splice.c
index 0878b852b355..2051700cda79 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1234,8 +1234,8 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
* as splice-from-memory, where the regular splice is splice-from-file (or
* to file). In both cases the output is a pipe, naturally.
*/
-static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
- unsigned int flags)
+long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
+ unsigned int flags)
{
struct pipe_inode_info *pipe;
long ret = 0;
diff --git a/include/linux/splice.h b/include/linux/splice.h
index a55179fd60fc..38b3560a318b 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -10,6 +10,7 @@
#define SPLICE_H

#include <linux/pipe_fs_i.h>
+#include <linux/uio.h>

/*
* Flags passed in from splice/tee/vmsplice
@@ -81,6 +82,8 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
extern long do_splice(struct file *in, loff_t *off_in,
struct file *out, loff_t *off_out,
size_t len, unsigned int flags);
+extern long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
+ unsigned int flags);

extern long do_tee(struct file *in, struct file *out, size_t len,
unsigned int flags);
--
2.20.1


2022-11-19 01:46:23

by Peilin Ye

[permalink] [raw]
Subject: [PATCH v2 RESEND] coredump: Use vmsplice_to_pipe() for pipes in dump_emit_page()

From: Peilin Ye <[email protected]>

Currently, there is a copy for each page when dumping VMAs to pipe
handlers using dump_emit_page(). For example:

fs/binfmt_elf.c:elf_core_dump()
fs/coredump.c:dump_user_range()
:dump_emit_page()
fs/read_write.c:__kernel_write_iter()
fs/pipe.c:pipe_write()
lib/iov_iter.c:copy_page_from_iter()

Use vmsplice_to_pipe() instead of __kernel_write_iter() to avoid this
copy for pipe handlers.

Tested by dumping a 40-GByte core into a simple handler that splice()s
from stdin to disk in a loop, PIPE_DEF_BUFFERS (16) pages at a time.

Before After Improved by
Time to Completion 52.04 seconds 46.30 seconds 11.03%
CPU Usage 89.43% 84.90% 5.07%

Suggested-by: Cong Wang <[email protected]>
Signed-off-by: Peilin Ye <[email protected]>
---
Hi all,

Resending this after 2+ weeks. Any suggestions, testing would be much
appreciated, thanks!

change in v2:
- fix warning in net/tls/tls_sw.c (kernel test robot)

Peilin Ye

fs/coredump.c | 7 ++++++-
fs/splice.c | 4 ++--
include/linux/splice.h | 3 +++
3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index da0e9525c4e8..c0a8713d9971 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -42,6 +42,7 @@
#include <linux/timekeeping.h>
#include <linux/sysctl.h>
#include <linux/elf.h>
+#include <linux/splice.h>

#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -862,7 +863,11 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
return 0;
pos = file->f_pos;
iov_iter_bvec(&iter, WRITE, &bvec, 1, PAGE_SIZE);
- n = __kernel_write_iter(cprm->file, &iter, &pos);
+
+ n = vmsplice_to_pipe(file, &iter, 0);
+ if (n == -EBADF)
+ n = __kernel_write_iter(cprm->file, &iter, &pos);
+
if (n != PAGE_SIZE)
return 0;
file->f_pos = pos;
diff --git a/fs/splice.c b/fs/splice.c
index 0878b852b355..2051700cda79 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1234,8 +1234,8 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
* as splice-from-memory, where the regular splice is splice-from-file (or
* to file). In both cases the output is a pipe, naturally.
*/
-static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
- unsigned int flags)
+long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
+ unsigned int flags)
{
struct pipe_inode_info *pipe;
long ret = 0;
diff --git a/include/linux/splice.h b/include/linux/splice.h
index a55179fd60fc..38b3560a318b 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -10,6 +10,7 @@
#define SPLICE_H

#include <linux/pipe_fs_i.h>
+#include <linux/uio.h>

/*
* Flags passed in from splice/tee/vmsplice
@@ -81,6 +82,8 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
extern long do_splice(struct file *in, loff_t *off_in,
struct file *out, loff_t *off_out,
size_t len, unsigned int flags);
+extern long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
+ unsigned int flags);

extern long do_tee(struct file *in, struct file *out, size_t len,
unsigned int flags);
--
2.20.1


2022-11-19 05:06:02

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH v2] coredump: Use vmsplice_to_pipe() for pipes in dump_emit_page()

On Mon, Oct 31, 2022 at 02:03:49PM -0700, Peilin Ye wrote:

> + n = vmsplice_to_pipe(file, &iter, 0);
> + if (n == -EBADF)
> + n = __kernel_write_iter(cprm->file, &iter, &pos);

Yuck. If anything, I would rather put a flag into coredump_params
and check it instead; this check for -EBADF is both unidiomatic and
brittle. Suppose someday somebody looks at vmsplice(2) and
decides that it would make sense to lift the "is it a pipe" check
into e.g. vmsplice_type(). There's no obvious reasons not to,
unless one happens to know that coredump relies upon that check done
in vmsplice_to_pipe(). It's asking for trouble several years down
the road.

Make it explicit and independent from details of error checking
in vmsplice(2).

2022-11-30 04:00:14

by Peilin Ye

[permalink] [raw]
Subject: Re: [PATCH v2] coredump: Use vmsplice_to_pipe() for pipes in dump_emit_page()

On Sat, Nov 19, 2022 at 04:46:17AM +0000, Al Viro wrote:
> On Mon, Oct 31, 2022 at 02:03:49PM -0700, Peilin Ye wrote:
>
> > + n = vmsplice_to_pipe(file, &iter, 0);
> > + if (n == -EBADF)
> > + n = __kernel_write_iter(cprm->file, &iter, &pos);
>
> Yuck. If anything, I would rather put a flag into coredump_params
> and check it instead; this check for -EBADF is both unidiomatic and
> brittle. Suppose someday somebody looks at vmsplice(2) and
> decides that it would make sense to lift the "is it a pipe" check
> into e.g. vmsplice_type(). There's no obvious reasons not to,
> unless one happens to know that coredump relies upon that check done
> in vmsplice_to_pipe(). It's asking for trouble several years down
> the road.
>
> Make it explicit and independent from details of error checking
> in vmsplice(2).

Thanks for the review! I was a bit hesitant about introducing a new
field to coredump_params for this optimization. Will do it in v3.

Peilin Ye

2022-11-30 06:32:13

by Peilin Ye

[permalink] [raw]
Subject: [PATCH v3] coredump: Use vmsplice_to_pipe() for pipes in dump_emit_page()

From: Peilin Ye <[email protected]>

Currently, there is a copy for each page when dumping VMAs to pipe
handlers using dump_emit_page(). For example:

fs/binfmt_elf.c:elf_core_dump()
fs/coredump.c:dump_user_range()
:dump_emit_page()
fs/read_write.c:__kernel_write_iter()
fs/pipe.c:pipe_write()
lib/iov_iter.c:copy_page_from_iter()

Use vmsplice_to_pipe() instead of __kernel_write_iter() to avoid this
copy for pipe handlers.

Tested by dumping a 32-GByte core into a simple handler that splice()s
from stdin to disk in a loop, PIPE_DEF_BUFFERS (16) pages at a time.

Before After Improved by
Time to Completion 40.77 seconds 35.49 seconds 12.95%
CPU Usage 92.27% 86.40% 6.36%

Suggested-by: Cong Wang <[email protected]>
Signed-off-by: Peilin Ye <[email protected]>
---
change in v3:
- do not rely on error checking in vmsplice_to_pipe() (Al Viro)
- rebase onto linux-next

change in v2:
- fix warning in net/tls/tls_sw.c (kernel test robot)

fs/coredump.c | 10 +++++++++-
fs/splice.c | 4 ++--
include/linux/coredump.h | 3 +++
include/linux/splice.h | 3 +++
4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index de78bde2991b..7f0981d71881 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -42,6 +42,7 @@
#include <linux/timekeeping.h>
#include <linux/sysctl.h>
#include <linux/elf.h>
+#include <linux/splice.h>

#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -586,6 +587,8 @@ void do_coredump(const kernel_siginfo_t *siginfo)
goto fail_unlock;
}

+ set_bit(COREDUMP_USE_PIPE, &cprm.flags);
+
if (cprm.limit == 1) {
/* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
*
@@ -861,7 +864,12 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
return 0;
pos = file->f_pos;
iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
- n = __kernel_write_iter(cprm->file, &iter, &pos);
+
+ if (test_bit(COREDUMP_USE_PIPE, &cprm->flags))
+ n = vmsplice_to_pipe(file, &iter, 0);
+ else
+ n = __kernel_write_iter(cprm->file, &iter, &pos);
+
if (n != PAGE_SIZE)
return 0;
file->f_pos = pos;
diff --git a/fs/splice.c b/fs/splice.c
index 5969b7a1d353..c9be20f4115e 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1234,8 +1234,8 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
* as splice-from-memory, where the regular splice is splice-from-file (or
* to file). In both cases the output is a pipe, naturally.
*/
-static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
- unsigned int flags)
+long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
+ unsigned int flags)
{
struct pipe_inode_info *pipe;
long ret = 0;
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index d3eba4360150..3e34009487bf 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -28,8 +28,11 @@ struct coredump_params {
int vma_count;
size_t vma_data_size;
struct core_vma_metadata *vma_meta;
+ unsigned long flags;
};

+#define COREDUMP_USE_PIPE 0
+
/*
* These are the only things you should do on a core-file: use only these
* functions to write out all the necessary info.
diff --git a/include/linux/splice.h b/include/linux/splice.h
index a55179fd60fc..38b3560a318b 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -10,6 +10,7 @@
#define SPLICE_H

#include <linux/pipe_fs_i.h>
+#include <linux/uio.h>

/*
* Flags passed in from splice/tee/vmsplice
@@ -81,6 +82,8 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
extern long do_splice(struct file *in, loff_t *off_in,
struct file *out, loff_t *off_out,
size_t len, unsigned int flags);
+extern long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
+ unsigned int flags);

extern long do_tee(struct file *in, struct file *out, size_t len,
unsigned int flags);
--
2.20.1

2023-01-12 23:06:10

by Peilin Ye

[permalink] [raw]
Subject: [PATCH v3 RESEND] coredump: Use vmsplice_to_pipe() for pipes in dump_emit_page()

From: Peilin Ye <[email protected]>

Currently, there is a copy for each page when dumping VMAs to pipe
handlers using dump_emit_page(). For example:

fs/binfmt_elf.c:elf_core_dump()
fs/coredump.c:dump_user_range()
:dump_emit_page()
fs/read_write.c:__kernel_write_iter()
fs/pipe.c:pipe_write()
lib/iov_iter.c:copy_page_from_iter()

Use vmsplice_to_pipe() instead of __kernel_write_iter() to avoid this
copy for pipe handlers.

Tested by dumping a 32-GByte core into a simple handler that splice()s
from stdin to disk in a loop, PIPE_DEF_BUFFERS (16) pages at a time.

Before After Improved by
Time to Completion 40.77 seconds 35.49 seconds 12.95%
CPU Usage 92.27% 86.40% 6.36%

Suggested-by: Cong Wang <[email protected]>
Signed-off-by: Peilin Ye <[email protected]>
---
fs/coredump.c | 10 +++++++++-
fs/splice.c | 4 ++--
include/linux/coredump.h | 3 +++
include/linux/splice.h | 3 +++
4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index f27d734f3102..4078069ede88 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -42,6 +42,7 @@
#include <linux/timekeeping.h>
#include <linux/sysctl.h>
#include <linux/elf.h>
+#include <linux/splice.h>

#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -586,6 +587,8 @@ void do_coredump(const kernel_siginfo_t *siginfo)
goto fail_unlock;
}

+ set_bit(COREDUMP_USE_PIPE, &cprm.flags);
+
if (cprm.limit == 1) {
/* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
*
@@ -861,7 +864,12 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
return 0;
pos = file->f_pos;
iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
- n = __kernel_write_iter(cprm->file, &iter, &pos);
+
+ if (test_bit(COREDUMP_USE_PIPE, &cprm->flags))
+ n = vmsplice_to_pipe(file, &iter, 0);
+ else
+ n = __kernel_write_iter(cprm->file, &iter, &pos);
+
if (n != PAGE_SIZE)
return 0;
file->f_pos = pos;
diff --git a/fs/splice.c b/fs/splice.c
index 5969b7a1d353..c9be20f4115e 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1234,8 +1234,8 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
* as splice-from-memory, where the regular splice is splice-from-file (or
* to file). In both cases the output is a pipe, naturally.
*/
-static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
- unsigned int flags)
+long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
+ unsigned int flags)
{
struct pipe_inode_info *pipe;
long ret = 0;
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index d3eba4360150..3e34009487bf 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -28,8 +28,11 @@ struct coredump_params {
int vma_count;
size_t vma_data_size;
struct core_vma_metadata *vma_meta;
+ unsigned long flags;
};

+#define COREDUMP_USE_PIPE 0
+
/*
* These are the only things you should do on a core-file: use only these
* functions to write out all the necessary info.
diff --git a/include/linux/splice.h b/include/linux/splice.h
index a55179fd60fc..38b3560a318b 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -10,6 +10,7 @@
#define SPLICE_H

#include <linux/pipe_fs_i.h>
+#include <linux/uio.h>

/*
* Flags passed in from splice/tee/vmsplice
@@ -81,6 +82,8 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
extern long do_splice(struct file *in, loff_t *off_in,
struct file *out, loff_t *off_out,
size_t len, unsigned int flags);
+extern long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
+ unsigned int flags);

extern long do_tee(struct file *in, struct file *out, size_t len,
unsigned int flags);
--
2.20.1