The vhost driver will create a kthread when userspace does a
VHOST_SET_OWNER ioctl, but the thread is charged to the kthreadd thread.
We can then end up violating the userspace process's RLIMIT_NPROC. This
patchset allows drivers to pass in the user to charge/check.
The patches were made over Linus's current tree.
This allows kthread to pass copy_process the user we want to check for the
RLIMIT_NPROC limit for and also charge for the new process. It will be used
by vhost where userspace has that driver create threads but the kthreadd
thread is checked/charged.
Signed-off-by: Mike Christie <[email protected]>
---
include/linux/cred.h | 3 ++-
kernel/cred.c | 7 ++++---
kernel/fork.c | 12 +++++++-----
3 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 14971322e1a0..9a2c1398cdd4 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -153,7 +153,8 @@ struct cred {
extern void __put_cred(struct cred *);
extern void exit_creds(struct task_struct *);
-extern int copy_creds(struct task_struct *, unsigned long);
+extern int copy_creds(struct task_struct *, unsigned long,
+ struct user_struct *);
extern const struct cred *get_task_cred(struct task_struct *);
extern struct cred *cred_alloc_blank(void);
extern struct cred *prepare_creds(void);
diff --git a/kernel/cred.c b/kernel/cred.c
index e1d274cd741b..e006aafa8f05 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -330,7 +330,8 @@ struct cred *prepare_exec_creds(void)
* The new process gets the current process's subjective credentials as its
* objective and subjective credentials
*/
-int copy_creds(struct task_struct *p, unsigned long clone_flags)
+int copy_creds(struct task_struct *p, unsigned long clone_flags,
+ struct user_struct *user)
{
struct cred *new;
int ret;
@@ -351,7 +352,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
kdebug("share_creds(%p{%d,%d})",
p->cred, atomic_read(&p->cred->usage),
read_cred_subscribers(p->cred));
- atomic_inc(&p->cred->user->processes);
+ atomic_inc(&user->processes);
return 0;
}
@@ -384,7 +385,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
}
#endif
- atomic_inc(&new->user->processes);
+ atomic_inc(&user->processes);
p->cred = p->real_cred = get_cred(new);
alter_cred_subscribers(new, 2);
validate_creds(new);
diff --git a/kernel/fork.c b/kernel/fork.c
index dc06afd725cb..6389aea6d3eb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1860,6 +1860,7 @@ static __latent_entropy struct task_struct *copy_process(
struct file *pidfile = NULL;
u64 clone_flags = args->flags;
struct nsproxy *nsp = current->nsproxy;
+ struct user_struct *user = args->user;
/*
* Don't allow sharing the root directory with processes in a different
@@ -1976,16 +1977,17 @@ static __latent_entropy struct task_struct *copy_process(
#ifdef CONFIG_PROVE_LOCKING
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
+ if (!user)
+ user = p->real_cred->user;
retval = -EAGAIN;
- if (atomic_read(&p->real_cred->user->processes) >=
- task_rlimit(p, RLIMIT_NPROC)) {
- if (p->real_cred->user != INIT_USER &&
+ if (atomic_read(&user->processes) >= task_rlimit(p, RLIMIT_NPROC)) {
+ if (user != INIT_USER &&
!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
goto bad_fork_free;
}
current->flags &= ~PF_NPROC_EXCEEDED;
- retval = copy_creds(p, clone_flags);
+ retval = copy_creds(p, clone_flags, user);
if (retval < 0)
goto bad_fork_free;
@@ -2385,7 +2387,7 @@ static __latent_entropy struct task_struct *copy_process(
#endif
delayacct_tsk_free(p);
bad_fork_cleanup_count:
- atomic_dec(&p->cred->user->processes);
+ atomic_dec(&user->processes);
exit_creds(p);
bad_fork_free:
p->state = TASK_DEAD;
--
2.25.1
This has vhost pass in the user to the kthread API, so the process doing
the ioctl has its RLIMIT_NPROC checked and its processes count
incremented.
Signed-off-by: Mike Christie <[email protected]>
---
drivers/vhost/vhost.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 5ccb0705beae..141cca6fd50a 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -595,8 +595,9 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
dev->kcov_handle = kcov_common_handle();
if (dev->use_worker) {
- worker = kthread_create(vhost_worker, dev,
- "vhost-%d", current->pid);
+ worker = kthread_create_for_user(vhost_worker, dev,
+ current->real_cred->user,
+ "vhost-%d", current->pid);
if (IS_ERR(worker)) {
err = PTR_ERR(worker);
goto err_worker;
--
2.25.1
Currently, the kthreadd's user_struct has its processes checked against
the RLIMIT_NPROC limit. In cases like for vhost where the driver is making
a thread for userspace, we want the userspace process to have its processes
count checked and incremented.
This patch allows the kthread code to take a user_struct and pass it to
copy_process. The next patches will then convert the fork/cred code.
Signed-off-by: Mike Christie <[email protected]>
---
include/linux/kthread.h | 5 ++++
include/linux/sched/task.h | 2 ++
kernel/kthread.c | 58 ++++++++++++++++++++++++++++++++------
3 files changed, 56 insertions(+), 9 deletions(-)
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 2484ed97e72f..3c64bd8bf34c 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -28,6 +28,11 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
kthread_create_on_node(threadfn, data, NUMA_NO_NODE, namefmt, ##arg)
+struct task_struct *kthread_create_for_user(int (*threadfn)(void *data),
+ void *data,
+ struct user_struct *user,
+ const char namefmt[], ...);
+
struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
void *data,
unsigned int cpu,
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ef02be869cf2..357e95679e33 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -34,6 +34,8 @@ struct kernel_clone_args {
int io_thread;
struct cgroup *cgrp;
struct css_set *cset;
+ /* User to check RLIMIT_NPROC against */
+ struct user_struct *user;
};
/*
diff --git a/kernel/kthread.c b/kernel/kthread.c
index fe3f2a40d61e..9e7e4d04664f 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -41,6 +41,7 @@ struct kthread_create_info
int (*threadfn)(void *data);
void *data;
int node;
+ struct user_struct *user;
/* Result passed back to kthread_create() from kthreadd. */
struct task_struct *result;
@@ -327,13 +328,21 @@ int tsk_fork_get_node(struct task_struct *tsk)
static void create_kthread(struct kthread_create_info *create)
{
+ /* We want our own signal handler (we take no signals by default). */
+ struct kernel_clone_args clone_args = {
+ .flags = CLONE_FS | CLONE_FILES | CLONE_VM |
+ CLONE_UNTRACED,
+ .exit_signal = SIGCHLD,
+ .stack = (unsigned long)kthread,
+ .stack_size = (unsigned long)create,
+ .user = create->user,
+ };
int pid;
#ifdef CONFIG_NUMA
current->pref_node_fork = create->node;
#endif
- /* We want our own signal handler (we take no signals by default). */
- pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
+ pid = kernel_clone(&clone_args);
if (pid < 0) {
/* If user was SIGKILLed, I release the structure. */
struct completion *done = xchg(&create->done, NULL);
@@ -347,11 +356,11 @@ static void create_kthread(struct kthread_create_info *create)
}
}
-static __printf(4, 0)
+static __printf(5, 0)
struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
- void *data, int node,
- const char namefmt[],
- va_list args)
+ void *data, int node,
+ struct user_struct *user,
+ const char namefmt[], va_list args)
{
DECLARE_COMPLETION_ONSTACK(done);
struct task_struct *task;
@@ -364,6 +373,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
create->data = data;
create->node = node;
create->done = &done;
+ create->user = user;
spin_lock(&kthread_create_lock);
list_add_tail(&create->list, &kthread_create_list);
@@ -444,13 +454,43 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
va_list args;
va_start(args, namefmt);
- task = __kthread_create_on_node(threadfn, data, node, namefmt, args);
+ task = __kthread_create_on_node(threadfn, data, node, NULL, namefmt,
+ args);
va_end(args);
return task;
}
EXPORT_SYMBOL(kthread_create_on_node);
+/**
+ * kthread_create_for_user - create a kthread and check @user's RLIMIT_NPROC
+ * @threadfn: the function to run until signal_pending(current).
+ * @data: data ptr for @threadfn.
+ * @user: user_struct that will have its RLIMIT_NPROC checked
+ * @namefmt: printf-style name for the thread.
+ *
+ * This will create a kthread on the current node, leaving it in the stopped
+ * state. This is just a helper for kthread_create_on_node() that will check
+ * @user's process count against its RLIMIT_NPROC. See the
+ * kthread_create_on_node() documentation for more details.
+ */
+struct task_struct *kthread_create_for_user(int (*threadfn)(void *data),
+ void *data,
+ struct user_struct *user,
+ const char namefmt[], ...)
+{
+ struct task_struct *task;
+ va_list args;
+
+ va_start(args, namefmt);
+ task = __kthread_create_on_node(threadfn, data, NUMA_NO_NODE, user,
+ namefmt, args);
+ va_end(args);
+
+ return task;
+}
+EXPORT_SYMBOL(kthread_create_for_user);
+
static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state)
{
unsigned long flags;
@@ -785,8 +825,8 @@ __kthread_create_worker(int cpu, unsigned int flags,
if (cpu >= 0)
node = cpu_to_node(cpu);
- task = __kthread_create_on_node(kthread_worker_fn, worker,
- node, namefmt, args);
+ task = __kthread_create_on_node(kthread_worker_fn, worker, node, NULL,
+ namefmt, args);
if (IS_ERR(task))
goto fail_task;
--
2.25.1
Hi Mike,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on vhost/linux-next]
[also build test WARNING on linux/master linus/master v5.13-rc7]
[cannot apply to next-20210623]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Mike-Christie/kthread-pass-in-user-and-check-RLIMIT_NPROC/20210624-110925
base: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
config: um-x86_64_defconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
# https://github.com/0day-ci/linux/commit/9b4a744e588ed25e06eed415174977e7533b24dc
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Mike-Christie/kthread-pass-in-user-and-check-RLIMIT_NPROC/20210624-110925
git checkout 9b4a744e588ed25e06eed415174977e7533b24dc
# save the attached .config to linux build tree
make W=1 ARCH=um SUBARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All warnings (new ones prefixed by >>):
cc1: warning: arch/um/include/uapi: No such file or directory [-Wmissing-include-dirs]
kernel/kthread.c: In function 'kthread_create_for_user':
>> kernel/kthread.c:466:6: warning: function 'kthread_create_for_user' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format]
466 | namefmt, args);
| ^~~~~~~
vim +466 kernel/kthread.c
443
444 /**
445 * kthread_create_for_user - create a kthread and check @user's RLIMIT_NPROC
446 * @threadfn: the function to run until signal_pending(current).
447 * @data: data ptr for @threadfn.
448 * @user: user_struct that will have its RLIMIT_NPROC checked
449 * @namefmt: printf-style name for the thread.
450 *
451 * This will create a kthread on the current node, leaving it in the stopped
452 * state. This is just a helper for kthread_create_on_node() that will check
453 * @user's process count against its RLIMIT_NPROC. See the
454 * kthread_create_on_node() documentation for more details.
455 */
456 struct task_struct *kthread_create_for_user(int (*threadfn)(void *data),
457 void *data,
458 struct user_struct *user,
459 const char namefmt[], ...)
460 {
461 struct task_struct *task;
462 va_list args;
463
464 va_start(args, namefmt);
465 task = __kthread_create_on_node(threadfn, data, NUMA_NO_NODE, user,
> 466 namefmt, args);
467 va_end(args);
468
469 return task;
470 }
471 EXPORT_SYMBOL(kthread_create_for_user);
472
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
On Wed, Jun 23, 2021 at 10:08:01PM -0500, Mike Christie wrote:
> The vhost driver will create a kthread when userspace does a
> VHOST_SET_OWNER ioctl, but the thread is charged to the kthreadd thread.
> We can then end up violating the userspace process's RLIMIT_NPROC. This
> patchset allows drivers to pass in the user to charge/check.
>
> The patches were made over Linus's current tree.
>
Makes sense I guess.
Acked-by: Michael S. Tsirkin <[email protected]>
Hi Mike,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on vhost/linux-next]
[also build test WARNING on linux/master linus/master v5.13-rc7]
[cannot apply to next-20210623]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Mike-Christie/kthread-pass-in-user-and-check-RLIMIT_NPROC/20210624-110925
base: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
config: riscv-randconfig-s032-20210622 (attached as .config)
compiler: riscv64-linux-gcc (GCC) 9.3.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.3-341-g8af24329-dirty
# https://github.com/0day-ci/linux/commit/daae0f4bb5ef7264d67cab20da37192754f885b8
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Mike-Christie/kthread-pass-in-user-and-check-RLIMIT_NPROC/20210624-110925
git checkout daae0f4bb5ef7264d67cab20da37192754f885b8
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' W=1 ARCH=riscv
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
sparse warnings: (new ones prefixed by >>)
>> drivers/vhost/vhost.c:599:57: sparse: sparse: dereference of noderef expression
vim +599 drivers/vhost/vhost.c
581
582 /* Caller should have device mutex */
583 long vhost_dev_set_owner(struct vhost_dev *dev)
584 {
585 struct task_struct *worker;
586 int err;
587
588 /* Is there an owner already? */
589 if (vhost_dev_has_owner(dev)) {
590 err = -EBUSY;
591 goto err_mm;
592 }
593
594 vhost_attach_mm(dev);
595
596 dev->kcov_handle = kcov_common_handle();
597 if (dev->use_worker) {
598 worker = kthread_create_for_user(vhost_worker, dev,
> 599 current->real_cred->user,
600 "vhost-%d", current->pid);
601 if (IS_ERR(worker)) {
602 err = PTR_ERR(worker);
603 goto err_worker;
604 }
605
606 dev->worker = worker;
607 wake_up_process(worker); /* avoid contributing to loadavg */
608
609 err = vhost_attach_cgroups(dev);
610 if (err)
611 goto err_cgroup;
612 }
613
614 err = vhost_dev_alloc_iovecs(dev);
615 if (err)
616 goto err_cgroup;
617
618 return 0;
619 err_cgroup:
620 if (dev->worker) {
621 kthread_stop(dev->worker);
622 dev->worker = NULL;
623 }
624 err_worker:
625 vhost_detach_mm(dev);
626 dev->kcov_handle = 0;
627 err_mm:
628 return err;
629 }
630 EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
631
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
On 6/24/21 3:26 AM, kernel test robot wrote:
>>> drivers/vhost/vhost.c:599:57: sparse: sparse: dereference of noderef expression
> vim +599 drivers/vhost/vhost.c
>
> 581
> 582 /* Caller should have device mutex */
> 583 long vhost_dev_set_owner(struct vhost_dev *dev)
> 584 {
> 585 struct task_struct *worker;
> 586 int err;
> 587
> 588 /* Is there an owner already? */
> 589 if (vhost_dev_has_owner(dev)) {
> 590 err = -EBUSY;
> 591 goto err_mm;
> 592 }
> 593
> 594 vhost_attach_mm(dev);
> 595
> 596 dev->kcov_handle = kcov_common_handle();
> 597 if (dev->use_worker) {
> 598 worker = kthread_create_for_user(vhost_worker, dev,
> > 599 current->real_cred->user,
> 600 "vhost-%d", current->pid);
It looks like I should be doing something like get_uid(current_user())
then a free_uid() when doing using the user_struct.
Will fix.
On 6/23/21 11:34 PM, kernel test robot wrote:
>>> kernel/kthread.c:466:6: warning: function 'kthread_create_for_user' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format]
> 466 | namefmt, args);
> | ^~~~~~~
>
Will add a __printf(4, 5).
On Wed, Jun 23, 2021 at 10:08:01PM -0500, Mike Christie wrote:
> The vhost driver will create a kthread when userspace does a
> VHOST_SET_OWNER ioctl, but the thread is charged to the kthreadd thread.
> We can then end up violating the userspace process's RLIMIT_NPROC. This
> patchset allows drivers to pass in the user to charge/check.
>
> The patches were made over Linus's current tree.
Makes sense from a vhost perspective and for future users, but I'm not
familiar with the kthread internals:
Acked-by: Stefan Hajnoczi <[email protected]>
On Wed, Jun 23, 2021 at 10:08:03PM -0500, Mike Christie wrote:
> This allows kthread to pass copy_process the user we want to check for the
> RLIMIT_NPROC limit for and also charge for the new process. It will be used
> by vhost where userspace has that driver create threads but the kthreadd
> thread is checked/charged.
>
> Signed-off-by: Mike Christie <[email protected]>
> ---
> include/linux/cred.h | 3 ++-
> kernel/cred.c | 7 ++++---
> kernel/fork.c | 12 +++++++-----
> 3 files changed, 13 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/cred.h b/include/linux/cred.h
> index 14971322e1a0..9a2c1398cdd4 100644
> --- a/include/linux/cred.h
> +++ b/include/linux/cred.h
> @@ -153,7 +153,8 @@ struct cred {
>
> extern void __put_cred(struct cred *);
> extern void exit_creds(struct task_struct *);
> -extern int copy_creds(struct task_struct *, unsigned long);
> +extern int copy_creds(struct task_struct *, unsigned long,
> + struct user_struct *);
> extern const struct cred *get_task_cred(struct task_struct *);
> extern struct cred *cred_alloc_blank(void);
> extern struct cred *prepare_creds(void);
> diff --git a/kernel/cred.c b/kernel/cred.c
> index e1d274cd741b..e006aafa8f05 100644
> --- a/kernel/cred.c
> +++ b/kernel/cred.c
> @@ -330,7 +330,8 @@ struct cred *prepare_exec_creds(void)
> * The new process gets the current process's subjective credentials as its
> * objective and subjective credentials
> */
> -int copy_creds(struct task_struct *p, unsigned long clone_flags)
> +int copy_creds(struct task_struct *p, unsigned long clone_flags,
> + struct user_struct *user)
> {
> struct cred *new;
> int ret;
> @@ -351,7 +352,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
> kdebug("share_creds(%p{%d,%d})",
> p->cred, atomic_read(&p->cred->usage),
> read_cred_subscribers(p->cred));
> - atomic_inc(&p->cred->user->processes);
> + atomic_inc(&user->processes);
Hey Mike,
This won't work anymore since this has moved into ucounts. So in v5.14
atomic_inc(&p->cred->user->processes);
will have been replaced by
inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
From what I can see from your code vhost will always create this kthread
for current. So you could e.g. add an internal flag/bitfield entry to
struct kernel_clone_args that you can use to tell copy_creds() that you
want to charge this thread against current's process limit.
> return 0;
> }
>
> @@ -384,7 +385,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
> }
> #endif
>
> - atomic_inc(&new->user->processes);
> + atomic_inc(&user->processes);
> p->cred = p->real_cred = get_cred(new);
> alter_cred_subscribers(new, 2);
> validate_creds(new);
> diff --git a/kernel/fork.c b/kernel/fork.c
> index dc06afd725cb..6389aea6d3eb 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1860,6 +1860,7 @@ static __latent_entropy struct task_struct *copy_process(
> struct file *pidfile = NULL;
> u64 clone_flags = args->flags;
> struct nsproxy *nsp = current->nsproxy;
> + struct user_struct *user = args->user;
>
> /*
> * Don't allow sharing the root directory with processes in a different
> @@ -1976,16 +1977,17 @@ static __latent_entropy struct task_struct *copy_process(
> #ifdef CONFIG_PROVE_LOCKING
> DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
> #endif
> + if (!user)
> + user = p->real_cred->user;
> retval = -EAGAIN;
> - if (atomic_read(&p->real_cred->user->processes) >=
> - task_rlimit(p, RLIMIT_NPROC)) {
> - if (p->real_cred->user != INIT_USER &&
> + if (atomic_read(&user->processes) >= task_rlimit(p, RLIMIT_NPROC)) {
> + if (user != INIT_USER &&
> !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
> goto bad_fork_free;
> }
> current->flags &= ~PF_NPROC_EXCEEDED;
>
> - retval = copy_creds(p, clone_flags);
> + retval = copy_creds(p, clone_flags, user);
> if (retval < 0)
> goto bad_fork_free;
>
> @@ -2385,7 +2387,7 @@ static __latent_entropy struct task_struct *copy_process(
> #endif
> delayacct_tsk_free(p);
> bad_fork_cleanup_count:
> - atomic_dec(&p->cred->user->processes);
> + atomic_dec(&user->processes);
> exit_creds(p);
> bad_fork_free:
> p->state = TASK_DEAD;
> --
> 2.25.1
On 6/29/21 8:04 AM, Christian Brauner wrote:
> On Wed, Jun 23, 2021 at 10:08:03PM -0500, Mike Christie wrote:
>> This allows kthread to pass copy_process the user we want to check for the
>> RLIMIT_NPROC limit for and also charge for the new process. It will be used
>> by vhost where userspace has that driver create threads but the kthreadd
>> thread is checked/charged.
>>
>> Signed-off-by: Mike Christie <[email protected]>
>> ---
>> include/linux/cred.h | 3 ++-
>> kernel/cred.c | 7 ++++---
>> kernel/fork.c | 12 +++++++-----
>> 3 files changed, 13 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/linux/cred.h b/include/linux/cred.h
>> index 14971322e1a0..9a2c1398cdd4 100644
>> --- a/include/linux/cred.h
>> +++ b/include/linux/cred.h
>> @@ -153,7 +153,8 @@ struct cred {
>>
>> extern void __put_cred(struct cred *);
>> extern void exit_creds(struct task_struct *);
>> -extern int copy_creds(struct task_struct *, unsigned long);
>> +extern int copy_creds(struct task_struct *, unsigned long,
>> + struct user_struct *);
>> extern const struct cred *get_task_cred(struct task_struct *);
>> extern struct cred *cred_alloc_blank(void);
>> extern struct cred *prepare_creds(void);
>> diff --git a/kernel/cred.c b/kernel/cred.c
>> index e1d274cd741b..e006aafa8f05 100644
>> --- a/kernel/cred.c
>> +++ b/kernel/cred.c
>> @@ -330,7 +330,8 @@ struct cred *prepare_exec_creds(void)
>> * The new process gets the current process's subjective credentials as its
>> * objective and subjective credentials
>> */
>> -int copy_creds(struct task_struct *p, unsigned long clone_flags)
>> +int copy_creds(struct task_struct *p, unsigned long clone_flags,
>> + struct user_struct *user)
>> {
>> struct cred *new;
>> int ret;
>> @@ -351,7 +352,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
>> kdebug("share_creds(%p{%d,%d})",
>> p->cred, atomic_read(&p->cred->usage),
>> read_cred_subscribers(p->cred));
>> - atomic_inc(&p->cred->user->processes);
>> + atomic_inc(&user->processes);
>
> Hey Mike,
>
> This won't work anymore since this has moved into ucounts. So in v5.14
> atomic_inc(&p->cred->user->processes);
> will have been replaced by
> inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
>
Will do.
> From what I can see from your code vhost will always create this kthread
> for current. So you could e.g. add an internal flag/bitfield entry to
> struct kernel_clone_args that you can use to tell copy_creds() that you
> want to charge this thread against current's process limit.
If I understood you, I don't think a flag/bit will work. When vhost does
a kthread call we do kthread_create -> __kthread_create_on_node. This creates
a tmp kthread_create_info struct and adds it to the kthread_create_list list.
It then wakes up the kthreadd thread. kthreadd will then loop over the list,
and do the:
kernel_thread -> kernel_clone -> copy_process -> copy_creds
So copy_creds sees current == kthreadd.
I think I would have to add a task_struct pointer to kernel_clone_args
and kthread_create_info. If copy_creds sees kernel_clone_args->user_task
then it would use that.
On 6/29/21 11:53 AM, Mike Christie wrote:
> On 6/29/21 8:04 AM, Christian Brauner wrote:
>> On Wed, Jun 23, 2021 at 10:08:03PM -0500, Mike Christie wrote:
>>> This allows kthread to pass copy_process the user we want to check for the
>>> RLIMIT_NPROC limit for and also charge for the new process. It will be used
>>> by vhost where userspace has that driver create threads but the kthreadd
>>> thread is checked/charged.
>>>
>>> Signed-off-by: Mike Christie <[email protected]>
>>> ---
>>> include/linux/cred.h | 3 ++-
>>> kernel/cred.c | 7 ++++---
>>> kernel/fork.c | 12 +++++++-----
>>> 3 files changed, 13 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/include/linux/cred.h b/include/linux/cred.h
>>> index 14971322e1a0..9a2c1398cdd4 100644
>>> --- a/include/linux/cred.h
>>> +++ b/include/linux/cred.h
>>> @@ -153,7 +153,8 @@ struct cred {
>>>
>>> extern void __put_cred(struct cred *);
>>> extern void exit_creds(struct task_struct *);
>>> -extern int copy_creds(struct task_struct *, unsigned long);
>>> +extern int copy_creds(struct task_struct *, unsigned long,
>>> + struct user_struct *);
>>> extern const struct cred *get_task_cred(struct task_struct *);
>>> extern struct cred *cred_alloc_blank(void);
>>> extern struct cred *prepare_creds(void);
>>> diff --git a/kernel/cred.c b/kernel/cred.c
>>> index e1d274cd741b..e006aafa8f05 100644
>>> --- a/kernel/cred.c
>>> +++ b/kernel/cred.c
>>> @@ -330,7 +330,8 @@ struct cred *prepare_exec_creds(void)
>>> * The new process gets the current process's subjective credentials as its
>>> * objective and subjective credentials
>>> */
>>> -int copy_creds(struct task_struct *p, unsigned long clone_flags)
>>> +int copy_creds(struct task_struct *p, unsigned long clone_flags,
>>> + struct user_struct *user)
>>> {
>>> struct cred *new;
>>> int ret;
>>> @@ -351,7 +352,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
>>> kdebug("share_creds(%p{%d,%d})",
>>> p->cred, atomic_read(&p->cred->usage),
>>> read_cred_subscribers(p->cred));
>>> - atomic_inc(&p->cred->user->processes);
>>> + atomic_inc(&user->processes);
>>
>> Hey Mike,
>>
>> This won't work anymore since this has moved into ucounts. So in v5.14
>> atomic_inc(&p->cred->user->processes);
>> will have been replaced by
>> inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
>>
> Will do.
>
>> From what I can see from your code vhost will always create this kthread
>> for current. So you could e.g. add an internal flag/bitfield entry to
>> struct kernel_clone_args that you can use to tell copy_creds() that you
>> want to charge this thread against current's process limit.
>
> If I understood you, I don't think a flag/bit will work. When vhost does
> a kthread call we do kthread_create -> __kthread_create_on_node. This creates
> a tmp kthread_create_info struct and adds it to the kthread_create_list list.
> It then wakes up the kthreadd thread. kthreadd will then loop over the list,
> and do the:
>
> kernel_thread -> kernel_clone -> copy_process -> copy_creds
>
> So copy_creds sees current == kthreadd.
>
> I think I would have to add a task_struct pointer to kernel_clone_args
> and kthread_create_info. If copy_creds sees kernel_clone_args->user_task
> then it would use that.
One question/clarification. For 5.14, I could pass in the struct task_struct
or struct ucounts (in a previous mail I wrote user_struct).
I could also just have vhost.c do inc_rlimit_ucounts and is_ucounts_overlimit
directly.