Hi Al and Andrew,
can one of you pick up this small series to avoid the set_fs
call in copy_strings_kernel?
Currently copy_string_kernel is just a wrapper around copy_strings that
simplifies the calling conventions and uses set_fs to allow passing a
kernel pointer. But due to the fact the we only need to handle a single
kernel argument pointer, the logic can be sigificantly simplified while
getting rid of the set_fs.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/exec.c | 43 ++++++++++++++++++++++++++++++++++---------
1 file changed, 34 insertions(+), 9 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c
index b2a77d5acedef..ea90af1fb2368 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -592,17 +592,42 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
*/
int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
{
- int r;
- mm_segment_t oldfs = get_fs();
- struct user_arg_ptr argv = {
- .ptr.native = (const char __user *const __user *)&arg,
- };
+ int len = strnlen(arg, MAX_ARG_STRLEN) + 1 /* terminating NUL */;
+ unsigned long pos = bprm->p;
+
+ if (len == 0)
+ return -EFAULT;
+ if (!valid_arg_len(bprm, len))
+ return -E2BIG;
+
+ /* We're going to work our way backwards. */
+ arg += len;
+ bprm->p -= len;
+ if (IS_ENABLED(CONFIG_MMU) && bprm->p < bprm->argmin)
+ return -E2BIG;
+
+ while (len > 0) {
+ unsigned int bytes_to_copy = min_t(unsigned int, len,
+ min_not_zero(offset_in_page(pos), PAGE_SIZE));
+ struct page *page;
+ char *kaddr;
- set_fs(KERNEL_DS);
- r = copy_strings(1, argv, bprm);
- set_fs(oldfs);
+ pos -= bytes_to_copy;
+ arg -= bytes_to_copy;
+ len -= bytes_to_copy;
- return r;
+ page = get_arg_page(bprm, pos, 1);
+ if (!page)
+ return -E2BIG;
+ kaddr = kmap_atomic(page);
+ flush_arg_page(bprm, pos & PAGE_MASK, page);
+ memcpy(kaddr + offset_in_page(pos), arg, bytes_to_copy);
+ flush_kernel_dcache_page(page);
+ kunmap_atomic(kaddr);
+ put_arg_page(page);
+ }
+
+ return 0;
}
EXPORT_SYMBOL(copy_string_kernel);
--
2.26.2
copy_strings_kernel is always used with a single argument,
adjust the calling convention to that.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/binfmt_em86.c | 6 +++---
fs/binfmt_misc.c | 4 ++--
fs/binfmt_script.c | 6 +++---
fs/exec.c | 13 ++++++-------
include/linux/binfmts.h | 3 +--
5 files changed, 15 insertions(+), 17 deletions(-)
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 466497860c626..f33fa668c91f5 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -68,15 +68,15 @@ static int load_em86(struct linux_binprm *bprm)
* user environment and arguments are stored.
*/
remove_arg_zero(bprm);
- retval = copy_strings_kernel(1, &bprm->filename, bprm);
+ retval = copy_string_kernel(bprm->filename, bprm);
if (retval < 0) return retval;
bprm->argc++;
if (i_arg) {
- retval = copy_strings_kernel(1, &i_arg, bprm);
+ retval = copy_string_kernel(i_arg, bprm);
if (retval < 0) return retval;
bprm->argc++;
}
- retval = copy_strings_kernel(1, &i_name, bprm);
+ retval = copy_string_kernel(i_name, bprm);
if (retval < 0) return retval;
bprm->argc++;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index cdb45829354d9..b15257d8ff5e4 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -190,13 +190,13 @@ static int load_misc_binary(struct linux_binprm *bprm)
bprm->file = NULL;
}
/* make argv[1] be the path to the binary */
- retval = copy_strings_kernel(1, &bprm->interp, bprm);
+ retval = copy_string_kernel(bprm->interp, bprm);
if (retval < 0)
goto error;
bprm->argc++;
/* add the interp as argv[0] */
- retval = copy_strings_kernel(1, &fmt->interpreter, bprm);
+ retval = copy_string_kernel(fmt->interpreter, bprm);
if (retval < 0)
goto error;
bprm->argc++;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index e9e6a6f4a35f5..c4fb7f52a46e5 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -117,17 +117,17 @@ static int load_script(struct linux_binprm *bprm)
retval = remove_arg_zero(bprm);
if (retval)
return retval;
- retval = copy_strings_kernel(1, &bprm->interp, bprm);
+ retval = copy_string_kernel(bprm->interp, bprm);
if (retval < 0)
return retval;
bprm->argc++;
if (i_arg) {
- retval = copy_strings_kernel(1, &i_arg, bprm);
+ retval = copy_string_kernel(i_arg, bprm);
if (retval < 0)
return retval;
bprm->argc++;
}
- retval = copy_strings_kernel(1, &i_name, bprm);
+ retval = copy_string_kernel(i_name, bprm);
if (retval)
return retval;
bprm->argc++;
diff --git a/fs/exec.c b/fs/exec.c
index 06b4c550af5d9..b2a77d5acedef 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -588,24 +588,23 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
}
/*
- * Like copy_strings, but get argv and its values from kernel memory.
+ * Copy and argument/environment string from the kernel to the processes stack.
*/
-int copy_strings_kernel(int argc, const char *const *__argv,
- struct linux_binprm *bprm)
+int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
{
int r;
mm_segment_t oldfs = get_fs();
struct user_arg_ptr argv = {
- .ptr.native = (const char __user *const __user *)__argv,
+ .ptr.native = (const char __user *const __user *)&arg,
};
set_fs(KERNEL_DS);
- r = copy_strings(argc, argv, bprm);
+ r = copy_strings(1, argv, bprm);
set_fs(oldfs);
return r;
}
-EXPORT_SYMBOL(copy_strings_kernel);
+EXPORT_SYMBOL(copy_string_kernel);
#ifdef CONFIG_MMU
@@ -1863,7 +1862,7 @@ static int __do_execve_file(int fd, struct filename *filename,
if (retval < 0)
goto out;
- retval = copy_strings_kernel(1, &bprm->filename, bprm);
+ retval = copy_string_kernel(bprm->filename, bprm);
if (retval < 0)
goto out;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index a345d9fed3d8d..3d3afe094c97d 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -144,8 +144,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
extern int transfer_args_to_stack(struct linux_binprm *bprm,
unsigned long *sp_location);
extern int bprm_change_interp(const char *interp, struct linux_binprm *bprm);
-extern int copy_strings_kernel(int argc, const char *const *argv,
- struct linux_binprm *bprm);
+int copy_string_kernel(const char *arg, struct linux_binprm *bprm);
extern void install_exec_creds(struct linux_binprm *bprm);
extern void set_binfmt(struct linux_binfmt *new);
extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);
--
2.26.2
On Fri, May 01, 2020 at 12:41:05PM +0200, Christoph Hellwig wrote:
> Currently copy_string_kernel is just a wrapper around copy_strings that
> simplifies the calling conventions and uses set_fs to allow passing a
> kernel pointer. But due to the fact the we only need to handle a single
> kernel argument pointer, the logic can be sigificantly simplified while
> getting rid of the set_fs.
I can live with that... BTW, why do we bother with flush_cache_page() (by
way of get_arg_page()) here and in copy_strings()? How could *anything*
have accessed that page by its address in new mm - what are we trying to
flush here?
On Fri, May 01, 2020 at 01:50:49PM +0100, Al Viro wrote:
> On Fri, May 01, 2020 at 12:41:05PM +0200, Christoph Hellwig wrote:
> > Currently copy_string_kernel is just a wrapper around copy_strings that
> > simplifies the calling conventions and uses set_fs to allow passing a
> > kernel pointer. But due to the fact the we only need to handle a single
> > kernel argument pointer, the logic can be sigificantly simplified while
> > getting rid of the set_fs.
>
> I can live with that... BTW, why do we bother with flush_cache_page() (by
> way of get_arg_page()) here and in copy_strings()? How could *anything*
> have accessed that page by its address in new mm - what are we trying to
> flush here?
s/get_arg_page/flush_arg_page/ ?
No idea, what the use case is, but this comes from:
commit b6a2fea39318e43fee84fa7b0b90d68bed92d2ba
Author: Ollie Wild <[email protected]>
Date: Thu Jul 19 01:48:16 2007 -0700
mm: variable length argument support
On Fri, 1 May 2020 12:41:05 +0200 Christoph Hellwig <[email protected]> wrote:
> Currently copy_string_kernel is just a wrapper around copy_strings that
> simplifies the calling conventions and uses set_fs to allow passing a
> kernel pointer. But due to the fact the we only need to handle a single
> kernel argument pointer, the logic can be sigificantly simplified while
> getting rid of the set_fs.
>
I don't get why this is better? copy_strings() is still there and
won't be going away - what's wrong with simply reusing it in this
fashion?
I guess set_fs() is a bit hacky, but there's the benefit of not having
to maintain two largely similar bits of code?
On Fri, May 01, 2020 at 02:19:03PM -0700, Andrew Morton wrote:
> On Fri, 1 May 2020 12:41:05 +0200 Christoph Hellwig <[email protected]> wrote:
>
> > Currently copy_string_kernel is just a wrapper around copy_strings that
> > simplifies the calling conventions and uses set_fs to allow passing a
> > kernel pointer. But due to the fact the we only need to handle a single
> > kernel argument pointer, the logic can be sigificantly simplified while
> > getting rid of the set_fs.
> >
>
> I don't get why this is better? copy_strings() is still there and
> won't be going away - what's wrong with simply reusing it in this
> fashion?
>
> I guess set_fs() is a bit hacky, but there's the benefit of not having
> to maintain two largely similar bits of code?
Killing set_fs() would be a very good thing...
On Fri, 1 May 2020 22:30:48 +0100 Al Viro <[email protected]> wrote:
> On Fri, May 01, 2020 at 02:19:03PM -0700, Andrew Morton wrote:
> > On Fri, 1 May 2020 12:41:05 +0200 Christoph Hellwig <[email protected]> wrote:
> >
> > > Currently copy_string_kernel is just a wrapper around copy_strings that
> > > simplifies the calling conventions and uses set_fs to allow passing a
> > > kernel pointer. But due to the fact the we only need to handle a single
> > > kernel argument pointer, the logic can be sigificantly simplified while
> > > getting rid of the set_fs.
> > >
> >
> > I don't get why this is better? copy_strings() is still there and
> > won't be going away - what's wrong with simply reusing it in this
> > fashion?
> >
> > I guess set_fs() is a bit hacky, but there's the benefit of not having
> > to maintain two largely similar bits of code?
>
> Killing set_fs() would be a very good thing...
Why is that? And is there a project afoot to do this?
On Fri, May 01, 2020 at 09:26:39PM +0200, Christoph Hellwig wrote:
> On Fri, May 01, 2020 at 01:50:49PM +0100, Al Viro wrote:
> > On Fri, May 01, 2020 at 12:41:05PM +0200, Christoph Hellwig wrote:
> > > Currently copy_string_kernel is just a wrapper around copy_strings that
> > > simplifies the calling conventions and uses set_fs to allow passing a
> > > kernel pointer. But due to the fact the we only need to handle a single
> > > kernel argument pointer, the logic can be sigificantly simplified while
> > > getting rid of the set_fs.
> >
> > I can live with that... BTW, why do we bother with flush_cache_page() (by
> > way of get_arg_page()) here and in copy_strings()? How could *anything*
> > have accessed that page by its address in new mm - what are we trying to
> > flush here?
>
> s/get_arg_page/flush_arg_page/ ?
of course - sorry...
> No idea, what the use case is, but this comes from:
>
> commit b6a2fea39318e43fee84fa7b0b90d68bed92d2ba
> Author: Ollie Wild <[email protected]>
> Date: Thu Jul 19 01:48:16 2007 -0700
>
> mm: variable length argument support
I know. And it comes with no explanations in there ;-/ AFAICS, back then
the situation hadn't been any different - mm we are inserting the arg pages
into is not active, so there shouldn't be anything in anyone's cache for
that virtual address in that vma...
On Fri, May 01, 2020 at 02:40:13PM -0700, Andrew Morton wrote:
> On Fri, 1 May 2020 22:30:48 +0100 Al Viro <[email protected]> wrote:
>
> > On Fri, May 01, 2020 at 02:19:03PM -0700, Andrew Morton wrote:
> > > On Fri, 1 May 2020 12:41:05 +0200 Christoph Hellwig <[email protected]> wrote:
> > >
> > > > Currently copy_string_kernel is just a wrapper around copy_strings that
> > > > simplifies the calling conventions and uses set_fs to allow passing a
> > > > kernel pointer. But due to the fact the we only need to handle a single
> > > > kernel argument pointer, the logic can be sigificantly simplified while
> > > > getting rid of the set_fs.
> > > >
> > >
> > > I don't get why this is better? copy_strings() is still there and
> > > won't be going away - what's wrong with simply reusing it in this
> > > fashion?
> > >
> > > I guess set_fs() is a bit hacky, but there's the benefit of not having
> > > to maintain two largely similar bits of code?
> >
> > Killing set_fs() would be a very good thing...
>
> Why is that? And is there a project afoot to do this?
Long story - basically, it's been a source of massive headache too many times.
No formal project, but there are several people (me, Arnd, Christoph) who'd
been reducing its use. For more than a decade now, I think...
FWIW, I doubt that it will be entirely killable; Christoph appears to be
more optimistic. In any case, its use has been greatly reduced and having
it narrowed down to even fewer places would be a good thing.
In the same direction: use_mm()/unuse_mm() regularization wrt set_fs(), getting
rid of it in coredump code, some movements towards killing ioctl_by_bdev();
not sure if I've spotted everything - Christoph?
On Fri, May 01, 2020 at 11:04:49PM +0100, Al Viro wrote:
> Long story - basically, it's been a source of massive headache too many times.
> No formal project, but there are several people (me, Arnd, Christoph) who'd
> been reducing its use. For more than a decade now, I think...
>
> FWIW, I doubt that it will be entirely killable; Christoph appears to be
> more optimistic. In any case, its use has been greatly reduced and having
> it narrowed down to even fewer places would be a good thing.
>
> In the same direction: use_mm()/unuse_mm() regularization wrt set_fs(), getting
> rid of it in coredump code, some movements towards killing ioctl_by_bdev();
> not sure if I've spotted everything - Christoph?
That's the big current projects out in the wild. I have a few more
growing.