This is a kernel enhancement to configure the cpu affinity of kernel
threads via kernel boot option kthread_cpus=<cpulist>.
With kthread_cpus specified, the cpumask is immediately applied upon
thread launch. This does not affect kernel threads that specify cpu
and node.
This allows CPU isolation (that is not allowing certain threads
to execute on certain CPUs) without using the isolcpus= parameter,
making it possible to enable load balancing on such CPUs
during runtime.
Note-1: this is based off on MontaVista's patch at
https://github.com/starlingx-staging/stx-integ/blob/master/kernel/kernel-std/centos/patches/affine-compute-kernel-threads.patch
Difference being that this patch is limited to modifying
kernel thread cpumask: Behaviour of other threads can
be controlled via cgroups or sched_setaffinity.
Note-2: MontaVista's patch was based off Christoph Lameter's patch at
https://lwn.net/Articles/565932/ with the only difference being
the kernel parameter changed from kthread to kthread_cpus.
Signed-off-by: Marcelo Tosatti <[email protected]>
---
Documentation/admin-guide/kernel-parameters.txt | 6 +++++
include/linux/cpumask.h | 5 ++++
init/main.c | 1
kernel/cpu.c | 26 ++++++++++++++++++++++++
kernel/kthread.c | 4 +--
5 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index c07815d230bc..c434c7dac5e4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2103,6 +2103,12 @@
0: force disabled
1: force enabled
+ kthread_cpus= [KNL, SMP] Only run kernel threads on the specified
+ list of processors. The kernel will start threads
+ on the indicated processors only (unless there
+ are specific reasons to run a thread with
+ different affinities).
+
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
Default is 0 (don't ignore, but inject #GP)
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d5cc88514aee..a0dc4b12e048 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -55,6 +55,7 @@ extern unsigned int nr_cpu_ids;
* cpu_present_mask - has bit 'cpu' set iff cpu is populated
* cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler
* cpu_active_mask - has bit 'cpu' set iff cpu available to migration
+ * cpu_kthread_mask - has bit 'cpu' set iff general kernel threads allowed
*
* If !CONFIG_HOTPLUG_CPU, present == possible, and active == online.
*
@@ -91,10 +92,12 @@ extern struct cpumask __cpu_possible_mask;
extern struct cpumask __cpu_online_mask;
extern struct cpumask __cpu_present_mask;
extern struct cpumask __cpu_active_mask;
+extern struct cpumask __cpu_kthread_mask;
#define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
#define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask)
#define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask)
#define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask)
+#define cpu_kthread_mask ((const struct cpumask *)&__cpu_kthread_mask)
extern atomic_t __num_online_cpus;
@@ -145,6 +148,8 @@ static inline unsigned int cpumask_check(unsigned int cpu)
return cpu;
}
+int __init init_kthread_cpumask(void);
+
#if NR_CPUS == 1
/* Uniprocessor. Assume all masks are "1". */
static inline unsigned int cpumask_first(const struct cpumask *srcp)
diff --git a/init/main.c b/init/main.c
index ee4947af823f..69f528ddc477 100644
--- a/init/main.c
+++ b/init/main.c
@@ -618,6 +618,7 @@ noinline void __ref rest_init(void)
int pid;
rcu_scheduler_starting();
+ init_kthread_cpumask();
/*
* We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9c706af713fb..c521ea82b76f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2311,9 +2311,35 @@ EXPORT_SYMBOL(__cpu_present_mask);
struct cpumask __cpu_active_mask __read_mostly;
EXPORT_SYMBOL(__cpu_active_mask);
+struct cpumask __cpu_kthread_mask __read_mostly;
+EXPORT_SYMBOL(__cpu_kthread_mask);
+
atomic_t __num_online_cpus __read_mostly;
EXPORT_SYMBOL(__num_online_cpus);
+static struct cpumask user_cpu_kthread_mask __read_mostly;
+static int user_cpu_kthread_mask_valid __read_mostly;
+
+int __init init_kthread_cpumask(void)
+{
+ if (user_cpu_kthread_mask_valid == 1)
+ cpumask_copy(&__cpu_kthread_mask, &user_cpu_kthread_mask);
+ else
+ cpumask_copy(&__cpu_kthread_mask, cpu_all_mask);
+
+ return 0;
+}
+
+static int __init kthread_setup(char *str)
+{
+ cpulist_parse(str, &user_cpu_kthread_mask);
+ if (!cpumask_empty(&user_cpu_kthread_mask))
+ user_cpu_kthread_mask_valid = 1;
+
+ return 1;
+}
+__setup("kthread_cpus=", kthread_setup);
+
void init_cpu_present(const struct cpumask *src)
{
cpumask_copy(&__cpu_present_mask, src);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index b262f47046ca..be9c8d53a986 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -347,7 +347,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
* The kernel thread should not inherit these properties.
*/
sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m);
- set_cpus_allowed_ptr(task, cpu_all_mask);
+ set_cpus_allowed_ptr(task, cpu_kthread_mask);
}
kfree(create);
return task;
@@ -572,7 +572,7 @@ int kthreadd(void *unused)
/* Setup a clean context for our children to inherit. */
set_task_comm(tsk, "kthreadd");
ignore_signals(tsk);
- set_cpus_allowed_ptr(tsk, cpu_all_mask);
+ set_cpus_allowed_ptr(tsk, cpu_kthread_mask);
set_mems_allowed(node_states[N_MEMORY]);
current->flags |= PF_NOFREEZE;
On 3/23/2020 7:54 AM, Marcelo Tosatti wrote:
>
> This is a kernel enhancement to configure the cpu affinity of kernel
> threads via kernel boot option kthread_cpus=<cpulist>.
>
> With kthread_cpus specified, the cpumask is immediately applied upon
> thread launch. This does not affect kernel threads that specify cpu
> and node.
>
> This allows CPU isolation (that is not allowing certain threads
> to execute on certain CPUs) without using the isolcpus= parameter,
> making it possible to enable load balancing on such CPUs
> during runtime.
>
> Note-1: this is based off on MontaVista's patch at
> https://github.com/starlingx-staging/stx-integ/blob/master/kernel/kernel-std/centos/patches/affine-compute-kernel-threads.patch
It's Wind River, not MontaVista. :)
> Difference being that this patch is limited to modifying
> kernel thread cpumask: Behaviour of other threads can
> be controlled via cgroups or sched_setaffinity.
What cgroup would the usermode helpers called by the kernel end up in?
Same as init?
Assuming that's covered, I'm good with this patch.
<snip>
> +static struct cpumask user_cpu_kthread_mask __read_mostly;
> +static int user_cpu_kthread_mask_valid __read_mostly;
Would it be cleaner to get rid of user_cpu_kthread_mask_valid and just
move the "if (!cpumask_empty" check into init_kthread_cpumask()? I'm
not really opinionated, just thinking out loud.
> +int __init init_kthread_cpumask(void)
> +{
> + if (user_cpu_kthread_mask_valid == 1)
> + cpumask_copy(&__cpu_kthread_mask, &user_cpu_kthread_mask);
> + else
> + cpumask_copy(&__cpu_kthread_mask, cpu_all_mask);
> +
> + return 0;
> +}
> +
> +static int __init kthread_setup(char *str)
> +{
> + cpulist_parse(str, &user_cpu_kthread_mask);
> + if (!cpumask_empty(&user_cpu_kthread_mask))
> + user_cpu_kthread_mask_valid = 1;
> +
> + return 1;
> +}
Marcelo,
Marcelo Tosatti <[email protected]> writes:
the subject lacks a prefix and the CC list a few people.
> This is a kernel enhancement to configure the cpu affinity of kernel
> threads via kernel boot option kthread_cpus=<cpulist>.
>
> With kthread_cpus specified, the cpumask is immediately applied upon
> thread launch. This does not affect kernel threads that specify cpu
> and node.
>
> This allows CPU isolation (that is not allowing certain threads
> to execute on certain CPUs) without using the isolcpus= parameter,
> making it possible to enable load balancing on such CPUs
> during runtime.
I'm surely missing some background information, but that sentence does
not make any sense to me.
Thanks,
tglx
On 3/23/2020 10:22 AM, Thomas Gleixner wrote:
> Marcelo,
> Marcelo Tosatti <[email protected]> writes:
>
> the subject lacks a prefix and the CC list a few people.
>
>> This is a kernel enhancement to configure the cpu affinity of kernel
>> threads via kernel boot option kthread_cpus=<cpulist>.
>>
>> With kthread_cpus specified, the cpumask is immediately applied upon
>> thread launch. This does not affect kernel threads that specify cpu
>> and node.
>>
>> This allows CPU isolation (that is not allowing certain threads
>> to execute on certain CPUs) without using the isolcpus= parameter,
>> making it possible to enable load balancing on such CPUs
>> during runtime.
>
> I'm surely missing some background information, but that sentence does
> not make any sense to me.
>
> Thanks,
>
> tglx
>
The idea is to affine general kernel threads to specific "housekeeping"
CPUs, while still allowing load balancing of tasks.
The isolcpus= boot parameter would prevent kernel threads from running
on the isolated CPUs, but it disables load balancing on the isolated CPUs.
Chris
Chris,
Chris Friesen <[email protected]> writes:
> On 3/23/2020 10:22 AM, Thomas Gleixner wrote:
>> Marcelo Tosatti <[email protected]> writes:
>>> This allows CPU isolation (that is not allowing certain threads
>>> to execute on certain CPUs) without using the isolcpus= parameter,
>>> making it possible to enable load balancing on such CPUs
>>> during runtime.
>>
>> I'm surely missing some background information, but that sentence does
>> not make any sense to me.
>
> The idea is to affine general kernel threads to specific "housekeeping"
> CPUs, while still allowing load balancing of tasks.
>
> The isolcpus= boot parameter would prevent kernel threads from running
> on the isolated CPUs, but it disables load balancing on the isolated CPUs.
So why can't we just have a isolcpus mode which allows that instead of
adding more command line options which are slightly different?
We just added some magic for managed interrupts to isolcpus, which is
surely interesting for your scenario as well...
Thanks,
tglx
On Mon, Mar 23, 2020 at 09:31:59PM +0100, Thomas Gleixner wrote:
> Chris,
>
> Chris Friesen <[email protected]> writes:
> > On 3/23/2020 10:22 AM, Thomas Gleixner wrote:
> >> Marcelo Tosatti <[email protected]> writes:
> >>> This allows CPU isolation (that is not allowing certain threads
> >>> to execute on certain CPUs) without using the isolcpus= parameter,
> >>> making it possible to enable load balancing on such CPUs
> >>> during runtime.
> >>
> >> I'm surely missing some background information, but that sentence does
> >> not make any sense to me.
> >
> > The idea is to affine general kernel threads to specific "housekeeping"
> > CPUs, while still allowing load balancing of tasks.
> >
> > The isolcpus= boot parameter would prevent kernel threads from running
> > on the isolated CPUs, but it disables load balancing on the isolated CPUs.
>
> So why can't we just have a isolcpus mode which allows that instead of
> adding more command line options which are slightly different?
>
> We just added some magic for managed interrupts to isolcpus, which is
> surely interesting for your scenario as well...
>
> Thanks,
>
> tglx
Hi Thomas, Chris,
Works for me, will adjust and resend.
This is a kernel enhancement to configure the cpu affinity of kernel
threads via kernel boot option isolcpus=no_kthreads,<isolcpus_params>,<cpulist>
When this option is specified, the cpumask is immediately applied upon
thread launch. This does not affect kernel threads that specify cpu
and node.
This allows CPU isolation (that is not allowing certain threads
to execute on certain CPUs) without using the isolcpus=domain parameter,
making it possible to enable load balancing on such CPUs
during runtime (see
Note-1: this is based off on Wind River's patch at
https://github.com/starlingx-staging/stx-integ/blob/master/kernel/kernel-std/centos/patches/affine-compute-kernel-threads.patch
Difference being that this patch is limited to modifying
kernel thread cpumask: Behaviour of other threads can
be controlled via cgroups or sched_setaffinity.
Note-2: MontaVista's patch was based off Christoph Lameter's patch at
https://lwn.net/Articles/565932/ with the only difference being
the kernel parameter changed from kthread to kthread_cpus.
Signed-off-by: Marcelo Tosatti <[email protected]>
---
v2: use isolcpus= subcommand (Thomas Gleixner)
Documentation/admin-guide/kernel-parameters.txt | 8 ++++++++
include/linux/cpumask.h | 5 +++++
include/linux/sched/isolation.h | 1 +
init/main.c | 1 +
kernel/cpu.c | 13 +++++++++++++
kernel/kthread.c | 4 ++--
kernel/sched/isolation.c | 6 ++++++
7 files changed, 36 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index c07815d230bc..7318e3057383 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1959,6 +1959,14 @@
the CPU affinity syscalls or cpuset.
<cpu number> begins at 0 and the maximum value is
"number of CPUs in system - 1".
+ When using cpusets, use the isolcpus option no_kthreads
+ to avoid creation of kernel threads on isolated CPUs.
+
+ no_kthreads
+ Adjust the CPU affinity mask of unbound kernel threads to
+ not contain CPUs on the isolated list. This complements
+ the isolation provided by the cpusets mechanism described
+ above.
managed_irq
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d5cc88514aee..a0dc4b12e048 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -55,6 +55,7 @@ extern unsigned int nr_cpu_ids;
* cpu_present_mask - has bit 'cpu' set iff cpu is populated
* cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler
* cpu_active_mask - has bit 'cpu' set iff cpu available to migration
+ * cpu_kthread_mask - has bit 'cpu' set iff general kernel threads allowed
*
* If !CONFIG_HOTPLUG_CPU, present == possible, and active == online.
*
@@ -91,10 +92,12 @@ extern struct cpumask __cpu_possible_mask;
extern struct cpumask __cpu_online_mask;
extern struct cpumask __cpu_present_mask;
extern struct cpumask __cpu_active_mask;
+extern struct cpumask __cpu_kthread_mask;
#define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
#define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask)
#define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask)
#define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask)
+#define cpu_kthread_mask ((const struct cpumask *)&__cpu_kthread_mask)
extern atomic_t __num_online_cpus;
@@ -145,6 +148,8 @@ static inline unsigned int cpumask_check(unsigned int cpu)
return cpu;
}
+int __init init_kthread_cpumask(void);
+
#if NR_CPUS == 1
/* Uniprocessor. Assume all masks are "1". */
static inline unsigned int cpumask_first(const struct cpumask *srcp)
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index 0fbcbacd1b29..d002332d00eb 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -14,6 +14,7 @@ enum hk_flags {
HK_FLAG_DOMAIN = (1 << 5),
HK_FLAG_WQ = (1 << 6),
HK_FLAG_MANAGED_IRQ = (1 << 7),
+ HK_FLAG_NO_KTHREADS = (1 << 8),
};
#ifdef CONFIG_CPU_ISOLATION
diff --git a/init/main.c b/init/main.c
index ee4947af823f..69f528ddc477 100644
--- a/init/main.c
+++ b/init/main.c
@@ -618,6 +618,7 @@ noinline void __ref rest_init(void)
int pid;
rcu_scheduler_starting();
+ init_kthread_cpumask();
/*
* We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9c706af713fb..c549ad8e6596 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2311,9 +2311,22 @@ EXPORT_SYMBOL(__cpu_present_mask);
struct cpumask __cpu_active_mask __read_mostly;
EXPORT_SYMBOL(__cpu_active_mask);
+struct cpumask __cpu_kthread_mask __read_mostly;
+EXPORT_SYMBOL(__cpu_kthread_mask);
+
atomic_t __num_online_cpus __read_mostly;
EXPORT_SYMBOL(__num_online_cpus);
+int __init init_kthread_cpumask(void)
+{
+ const struct cpumask *kthread_mask;
+
+ kthread_mask = housekeeping_cpumask(HK_FLAG_NO_KTHREADS);
+ cpumask_copy(&__cpu_kthread_mask, kthread_mask);
+
+ return 0;
+}
+
void init_cpu_present(const struct cpumask *src)
{
cpumask_copy(&__cpu_present_mask, src);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index b262f47046ca..be9c8d53a986 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -347,7 +347,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
* The kernel thread should not inherit these properties.
*/
sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m);
- set_cpus_allowed_ptr(task, cpu_all_mask);
+ set_cpus_allowed_ptr(task, cpu_kthread_mask);
}
kfree(create);
return task;
@@ -572,7 +572,7 @@ int kthreadd(void *unused)
/* Setup a clean context for our children to inherit. */
set_task_comm(tsk, "kthreadd");
ignore_signals(tsk);
- set_cpus_allowed_ptr(tsk, cpu_all_mask);
+ set_cpus_allowed_ptr(tsk, cpu_kthread_mask);
set_mems_allowed(node_states[N_MEMORY]);
current->flags |= PF_NOFREEZE;
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 008d6ac2342b..e9d48729efd4 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -169,6 +169,12 @@ static int __init housekeeping_isolcpus_setup(char *str)
continue;
}
+ if (!strncmp(str, "no_kthreads,", 12)) {
+ str += 12;
+ flags |= HK_FLAG_NO_KTHREADS;
+ continue;
+ }
+
pr_warn("isolcpus: Error, unknown flag\n");
return 0;
}
Hi Chris,
On Mon, Mar 23, 2020 at 09:29:23AM -0600, Chris Friesen wrote:
> On 3/23/2020 7:54 AM, Marcelo Tosatti wrote:
> >
> > This is a kernel enhancement to configure the cpu affinity of kernel
> > threads via kernel boot option kthread_cpus=<cpulist>.
> >
> > With kthread_cpus specified, the cpumask is immediately applied upon
> > thread launch. This does not affect kernel threads that specify cpu
> > and node.
> >
> > This allows CPU isolation (that is not allowing certain threads
> > to execute on certain CPUs) without using the isolcpus= parameter,
> > making it possible to enable load balancing on such CPUs
> > during runtime.
> >
> > Note-1: this is based off on MontaVista's patch at
> > https://github.com/starlingx-staging/stx-integ/blob/master/kernel/kernel-std/centos/patches/affine-compute-kernel-threads.patch
>
> It's Wind River, not MontaVista. :)
Doh.
> > Difference being that this patch is limited to modifying
> > kernel thread cpumask: Behaviour of other threads can
> > be controlled via cgroups or sched_setaffinity.
>
> What cgroup would the usermode helpers called by the kernel end up in?
> Same as init?
>
> Assuming that's covered, I'm good with this patch.
>
> <snip>
* Runs a user-space application. The application is started
* asynchronously if wait is not set, and runs as a child of system workqueues.
* (ie. it runs with full root capabilities and optimized affinity).
*/
int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
{
...
queue_work(system_unbound_wq, &sub_info->work);
And unbound workqueue workers cpumask are controllable:
static void worker_attach_to_pool(struct worker *worker,
struct worker_pool *pool)
{
mutex_lock(&wq_pool_attach_mutex);
/*
* set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
* online CPUs. It'll be re-applied when any of the CPUs come up.
*/
set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
>
> > +static struct cpumask user_cpu_kthread_mask __read_mostly;
> > +static int user_cpu_kthread_mask_valid __read_mostly;
>
> Would it be cleaner to get rid of user_cpu_kthread_mask_valid and just
> move the "if (!cpumask_empty" check into init_kthread_cpumask()? I'm
> not really opinionated, just thinking out loud.
Will get rid of this with Thomas's isolcpus= suggestion.
> > +int __init init_kthread_cpumask(void)
> > +{
> > + if (user_cpu_kthread_mask_valid == 1)
> > + cpumask_copy(&__cpu_kthread_mask, &user_cpu_kthread_mask);
> > + else
> > + cpumask_copy(&__cpu_kthread_mask, cpu_all_mask);
> > +
> > + return 0;
> > +}
> > +
> > +static int __init kthread_setup(char *str)
> > +{
> > + cpulist_parse(str, &user_cpu_kthread_mask);
> > + if (!cpumask_empty(&user_cpu_kthread_mask))
> > + user_cpu_kthread_mask_valid = 1;
> > +
> > + return 1;
> > +}
I hadn't been keeping up with all the changes to the "isolcpus" boot
arg. Given how it's been extended, I agree that it seems the logical
place to deal with this. Patch seems okay to me, but I've got a couple
of nits in the message portion.
If I want to specify both no_kthreads and managed_irq it then something
like "isolcpus=managed_irq,no_kthreads,2-16" would work?
On 3/24/2020 9:20 AM, Marcelo Tosatti wrote:
>
> This is a kernel enhancement to configure the cpu affinity of kernel
> threads via kernel boot option isolcpus=no_kthreads,<isolcpus_params>,<cpulist>
https://github.com/torvalds/linux/blob/master/Documentation/admin-guide/kernel-parameters.txt
says that "isolcpus" is deprecated. Are we un-deprecating it? Or is it
only really deprecated for the "domain" option?
> When this option is specified, the cpumask is immediately applied upon
> thread launch. This does not affect kernel threads that specify cpu
> and node.
>
> This allows CPU isolation (that is not allowing certain threads
> to execute on certain CPUs) without using the isolcpus=domain parameter,
> making it possible to enable load balancing on such CPUs
> during runtime (see
I think you're missing the rest of the sentence here.
> Note-1: this is based off on Wind River's patch at
> https://github.com/starlingx-staging/stx-integ/blob/master/kernel/kernel-std/centos/patches/affine-compute-kernel-threads.patch
>
> Difference being that this patch is limited to modifying
> kernel thread cpumask: Behaviour of other threads can
> be controlled via cgroups or sched_setaffinity.
>
> Note-2: MontaVista's patch was based off Christoph Lameter's patch at
> https://lwn.net/Articles/565932/ with the only difference being
> the kernel parameter changed from kthread to kthread_cpus.
Wind River, not MontaVista. I know all us embedded linux folks look the
same...
>
> Signed-off-by: Marcelo Tosatti <[email protected]>
>
> ---
>
> v2: use isolcpus= subcommand (Thomas Gleixner)
>
> Documentation/admin-guide/kernel-parameters.txt | 8 ++++++++
> include/linux/cpumask.h | 5 +++++
> include/linux/sched/isolation.h | 1 +
> init/main.c | 1 +
> kernel/cpu.c | 13 +++++++++++++
> kernel/kthread.c | 4 ++--
> kernel/sched/isolation.c | 6 ++++++
> 7 files changed, 36 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index c07815d230bc..7318e3057383 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -1959,6 +1959,14 @@
> the CPU affinity syscalls or cpuset.
> <cpu number> begins at 0 and the maximum value is
> "number of CPUs in system - 1".
> + When using cpusets, use the isolcpus option no_kthreads
> + to avoid creation of kernel threads on isolated CPUs.
> +
> + no_kthreads
> + Adjust the CPU affinity mask of unbound kernel threads to
> + not contain CPUs on the isolated list. This complements
> + the isolation provided by the cpusets mechanism described
> + above.
It also complements the "managed_irq" option below. In many cases I'd
expect the same set of CPUs to be isolated from both irqs and kernel
threads.
Chris
On Tue, Mar 24, 2020 at 09:56:26AM -0600, Chris Friesen wrote:
> I hadn't been keeping up with all the changes to the "isolcpus" boot arg.
> Given how it's been extended, I agree that it seems the logical place to
> deal with this. Patch seems okay to me, but I've got a couple of nits in
> the message portion.
>
> If I want to specify both no_kthreads and managed_irq it then something like
> "isolcpus=managed_irq,no_kthreads,2-16" would work?
Yes.
> On 3/24/2020 9:20 AM, Marcelo Tosatti wrote:
> >
> > This is a kernel enhancement to configure the cpu affinity of kernel
> > threads via kernel boot option isolcpus=no_kthreads,<isolcpus_params>,<cpulist>
>
> https://github.com/torvalds/linux/blob/master/Documentation/admin-guide/kernel-parameters.txt
> says that "isolcpus" is deprecated. Are we un-deprecating it? Or is it
> only really deprecated for the "domain" option?
I don't think its deprecated (see the recent inclusion of managed_irq,
and the suggestion from Thomas to extend it).
Will send another patch to remove that sentence.
> > When this option is specified, the cpumask is immediately applied upon
> > thread launch. This does not affect kernel threads that specify cpu
> > and node.
> >
> > This allows CPU isolation (that is not allowing certain threads
> > to execute on certain CPUs) without using the isolcpus=domain parameter,
> > making it possible to enable load balancing on such CPUs
> > during runtime (see
>
> I think you're missing the rest of the sentence here.
Right.
> > Note-1: this is based off on Wind River's patch at
> > https://github.com/starlingx-staging/stx-integ/blob/master/kernel/kernel-std/centos/patches/affine-compute-kernel-threads.patch
> >
> > Difference being that this patch is limited to modifying
> > kernel thread cpumask: Behaviour of other threads can
> > be controlled via cgroups or sched_setaffinity.
> >
> > Note-2: MontaVista's patch was based off Christoph Lameter's patch at
> > https://lwn.net/Articles/565932/ with the only difference being
> > the kernel parameter changed from kthread to kthread_cpus.
>
> Wind River, not MontaVista. I know all us embedded linux folks look the
> same...
Doh^2.
>
> >
> > Signed-off-by: Marcelo Tosatti <[email protected]>
> >
> > ---
> >
> > v2: use isolcpus= subcommand (Thomas Gleixner)
> >
> > Documentation/admin-guide/kernel-parameters.txt | 8 ++++++++
> > include/linux/cpumask.h | 5 +++++
> > include/linux/sched/isolation.h | 1 +
> > init/main.c | 1 +
> > kernel/cpu.c | 13 +++++++++++++
> > kernel/kthread.c | 4 ++--
> > kernel/sched/isolation.c | 6 ++++++
> > 7 files changed, 36 insertions(+), 2 deletions(-)
> >
> > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> > index c07815d230bc..7318e3057383 100644
> > --- a/Documentation/admin-guide/kernel-parameters.txt
> > +++ b/Documentation/admin-guide/kernel-parameters.txt
> > @@ -1959,6 +1959,14 @@
> > the CPU affinity syscalls or cpuset.
> > <cpu number> begins at 0 and the maximum value is
> > "number of CPUs in system - 1".
> > + When using cpusets, use the isolcpus option no_kthreads
> > + to avoid creation of kernel threads on isolated CPUs.
> > +
> > + no_kthreads
> > + Adjust the CPU affinity mask of unbound kernel threads to
> > + not contain CPUs on the isolated list. This complements
> > + the isolation provided by the cpusets mechanism described
> > + above.
>
> It also complements the "managed_irq" option below. In many cases I'd
> expect the same set of CPUs to be isolated from both irqs and kernel
> threads.
>
>
> Chris
Agree, will fix in -v3.
On Tue, Mar 24, 2020 at 12:20:16PM -0300, Marcelo Tosatti wrote:
>
> This is a kernel enhancement to configure the cpu affinity of kernel
> threads via kernel boot option isolcpus=no_kthreads,<isolcpus_params>,<cpulist>
>
> When this option is specified, the cpumask is immediately applied upon
> thread launch. This does not affect kernel threads that specify cpu
> and node.
>
> This allows CPU isolation (that is not allowing certain threads
> to execute on certain CPUs) without using the isolcpus=domain parameter,
> making it possible to enable load balancing on such CPUs
> during runtime (see
>
> Note-1: this is based off on Wind River's patch at
> https://github.com/starlingx-staging/stx-integ/blob/master/kernel/kernel-std/centos/patches/affine-compute-kernel-threads.patch
>
> Difference being that this patch is limited to modifying
> kernel thread cpumask: Behaviour of other threads can
> be controlled via cgroups or sched_setaffinity.
>
> Note-2: MontaVista's patch was based off Christoph Lameter's patch at
> https://lwn.net/Articles/565932/ with the only difference being
> the kernel parameter changed from kthread to kthread_cpus.
>
> Signed-off-by: Marcelo Tosatti <[email protected]>
I'm wondering, why do you need such a boot shift at all when you
can actually affine kthreads on runtime?
>
> ---
>
> v2: use isolcpus= subcommand (Thomas Gleixner)
>
> Documentation/admin-guide/kernel-parameters.txt | 8 ++++++++
> include/linux/cpumask.h | 5 +++++
> include/linux/sched/isolation.h | 1 +
> init/main.c | 1 +
> kernel/cpu.c | 13 +++++++++++++
> kernel/kthread.c | 4 ++--
> kernel/sched/isolation.c | 6 ++++++
> 7 files changed, 36 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index c07815d230bc..7318e3057383 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -1959,6 +1959,14 @@
> the CPU affinity syscalls or cpuset.
> <cpu number> begins at 0 and the maximum value is
> "number of CPUs in system - 1".
> + When using cpusets, use the isolcpus option no_kthreads
> + to avoid creation of kernel threads on isolated CPUs.
> +
> + no_kthreads
> + Adjust the CPU affinity mask of unbound kernel threads to
> + not contain CPUs on the isolated list. This complements
> + the isolation provided by the cpusets mechanism described
> + above.
Actually that should be "kthread" instead of no_kthreads. A flag of isolcpus
describes what we want a set of CPUs to be isolated from. Well, at least that's
how we started with "domain" and "managed_irq".
>
> managed_irq
>
> diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
> index 0fbcbacd1b29..d002332d00eb 100644
> --- a/include/linux/sched/isolation.h
> +++ b/include/linux/sched/isolation.h
> @@ -14,6 +14,7 @@ enum hk_flags {
> HK_FLAG_DOMAIN = (1 << 5),
> HK_FLAG_WQ = (1 << 6),
> HK_FLAG_MANAGED_IRQ = (1 << 7),
> + HK_FLAG_NO_KTHREADS = (1 << 8),
Similarly that should be HK_FLAG_KTHREAD.
> };
>
> #ifdef CONFIG_CPU_ISOLATION
> diff --git a/kernel/kthread.c b/kernel/kthread.c
> index b262f47046ca..be9c8d53a986 100644
> --- a/kernel/kthread.c
> +++ b/kernel/kthread.c
> @@ -347,7 +347,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
> * The kernel thread should not inherit these properties.
> */
> sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m);
> - set_cpus_allowed_ptr(task, cpu_all_mask);
> + set_cpus_allowed_ptr(task, cpu_kthread_mask);
I'm wondering, why are we using cpu_all_mask and not cpu_possible_mask here?
If we used the latter, you wouldn't need to create cpu_kthread_mask and
you could directly rely on housekeeping_cpumask(HK_FLAG_KTHREAD).
> diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> index 008d6ac2342b..e9d48729efd4 100644
> --- a/kernel/sched/isolation.c
> +++ b/kernel/sched/isolation.c
> @@ -169,6 +169,12 @@ static int __init housekeeping_isolcpus_setup(char *str)
> continue;
> }
>
> + if (!strncmp(str, "no_kthreads,", 12)) {
> + str += 12;
> + flags |= HK_FLAG_NO_KTHREADS;
You will certainly want HK_FLAG_WQ as well since workqueue has its own
way to deal with unbound affinity.
> + continue;
> + }
> +
> pr_warn("isolcpus: Error, unknown flag\n");
> return 0;
> }
>
Thanks.
Hi Frederic,
On Wed, Mar 25, 2020 at 01:30:00AM +0100, Frederic Weisbecker wrote:
> On Tue, Mar 24, 2020 at 12:20:16PM -0300, Marcelo Tosatti wrote:
> >
> > This is a kernel enhancement to configure the cpu affinity of kernel
> > threads via kernel boot option isolcpus=no_kthreads,<isolcpus_params>,<cpulist>
> >
> > When this option is specified, the cpumask is immediately applied upon
> > thread launch. This does not affect kernel threads that specify cpu
> > and node.
> >
> > This allows CPU isolation (that is not allowing certain threads
> > to execute on certain CPUs) without using the isolcpus=domain parameter,
> > making it possible to enable load balancing on such CPUs
> > during runtime (see
> >
> > Note-1: this is based off on Wind River's patch at
> > https://github.com/starlingx-staging/stx-integ/blob/master/kernel/kernel-std/centos/patches/affine-compute-kernel-threads.patch
> >
> > Difference being that this patch is limited to modifying
> > kernel thread cpumask: Behaviour of other threads can
> > be controlled via cgroups or sched_setaffinity.
> >
> > Note-2: MontaVista's patch was based off Christoph Lameter's patch at
> > https://lwn.net/Articles/565932/ with the only difference being
> > the kernel parameter changed from kthread to kthread_cpus.
> >
> > Signed-off-by: Marcelo Tosatti <[email protected]>
>
> I'm wondering, why do you need such a boot shift at all when you
> can actually affine kthreads on runtime?
New, unbound kernel threads inherit the cpumask of kthreadd.
Therefore there is a race between kernel thread creation
and affine.
If you know of a solution to that problem, that can be used instead.
> >
> > ---
> >
> > v2: use isolcpus= subcommand (Thomas Gleixner)
> >
> > Documentation/admin-guide/kernel-parameters.txt | 8 ++++++++
> > include/linux/cpumask.h | 5 +++++
> > include/linux/sched/isolation.h | 1 +
> > init/main.c | 1 +
> > kernel/cpu.c | 13 +++++++++++++
> > kernel/kthread.c | 4 ++--
> > kernel/sched/isolation.c | 6 ++++++
> > 7 files changed, 36 insertions(+), 2 deletions(-)
> >
> > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> > index c07815d230bc..7318e3057383 100644
> > --- a/Documentation/admin-guide/kernel-parameters.txt
> > +++ b/Documentation/admin-guide/kernel-parameters.txt
> > @@ -1959,6 +1959,14 @@
> > the CPU affinity syscalls or cpuset.
> > <cpu number> begins at 0 and the maximum value is
> > "number of CPUs in system - 1".
> > + When using cpusets, use the isolcpus option no_kthreads
> > + to avoid creation of kernel threads on isolated CPUs.
> > +
> > + no_kthreads
> > + Adjust the CPU affinity mask of unbound kernel threads to
> > + not contain CPUs on the isolated list. This complements
> > + the isolation provided by the cpusets mechanism described
> > + above.
>
> Actually that should be "kthread" instead of no_kthreads. A flag of isolcpus
> describes what we want a set of CPUs to be isolated from. Well, at least that's
> how we started with "domain" and "managed_irq".
Sure.
>
> >
> > managed_irq
> >
> > diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
> > index 0fbcbacd1b29..d002332d00eb 100644
> > --- a/include/linux/sched/isolation.h
> > +++ b/include/linux/sched/isolation.h
> > @@ -14,6 +14,7 @@ enum hk_flags {
> > HK_FLAG_DOMAIN = (1 << 5),
> > HK_FLAG_WQ = (1 << 6),
> > HK_FLAG_MANAGED_IRQ = (1 << 7),
> > + HK_FLAG_NO_KTHREADS = (1 << 8),
>
> Similarly that should be HK_FLAG_KTHREAD.
Sure.
>
> > };
> >
> > #ifdef CONFIG_CPU_ISOLATION
> > diff --git a/kernel/kthread.c b/kernel/kthread.c
> > index b262f47046ca..be9c8d53a986 100644
> > --- a/kernel/kthread.c
> > +++ b/kernel/kthread.c
> > @@ -347,7 +347,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
> > * The kernel thread should not inherit these properties.
> > */
> > sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m);
> > - set_cpus_allowed_ptr(task, cpu_all_mask);
> > + set_cpus_allowed_ptr(task, cpu_kthread_mask);
>
> I'm wondering, why are we using cpu_all_mask and not cpu_possible_mask here?
> If we used the latter, you wouldn't need to create cpu_kthread_mask and
> you could directly rely on housekeeping_cpumask(HK_FLAG_KTHREAD).
I suppose that either work: CPUs can only be online from
cpu_possible_mask (and is contained in cpu_possible_mask).
Nice cleanup, thanks.
>
> > diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> > index 008d6ac2342b..e9d48729efd4 100644
> > --- a/kernel/sched/isolation.c
> > +++ b/kernel/sched/isolation.c
> > @@ -169,6 +169,12 @@ static int __init housekeeping_isolcpus_setup(char *str)
> > continue;
> > }
> >
> > + if (!strncmp(str, "no_kthreads,", 12)) {
> > + str += 12;
> > + flags |= HK_FLAG_NO_KTHREADS;
>
> You will certainly want HK_FLAG_WQ as well since workqueue has its own
> way to deal with unbound affinity.
Yep. HK_FLAG_WQ is simply a convenience so that the user does not have
to configure this separately: OK.
>
> > + continue;
> > + }
> > +
> > pr_warn("isolcpus: Error, unknown flag\n");
> > return 0;
> > }
> >
>
> Thanks.
From: Marcelo Tosatti
> Sent: 24 March 2020 15:20
>
> This is a kernel enhancement to configure the cpu affinity of kernel
> threads via kernel boot option isolcpus=no_kthreads,<isolcpus_params>,<cpulist>
>
> When this option is specified, the cpumask is immediately applied upon
> thread launch. This does not affect kernel threads that specify cpu
> and node.
>
> This allows CPU isolation (that is not allowing certain threads
> to execute on certain CPUs) without using the isolcpus=domain parameter,
> making it possible to enable load balancing on such CPUs
> during runtime
...
How about making it possible to change the default affinity
for new kthreads at run time?
Is it possible to change the affinity of existing threads?
Or maybe only those that didn't specify an explicit one??
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
On Wed, Mar 25, 2020 at 06:05:27PM +0000, David Laight wrote:
> From: Marcelo Tosatti
> > Sent: 24 March 2020 15:20
> >
> > This is a kernel enhancement to configure the cpu affinity of kernel
> > threads via kernel boot option isolcpus=no_kthreads,<isolcpus_params>,<cpulist>
> >
> > When this option is specified, the cpumask is immediately applied upon
> > thread launch. This does not affect kernel threads that specify cpu
> > and node.
> >
> > This allows CPU isolation (that is not allowing certain threads
> > to execute on certain CPUs) without using the isolcpus=domain parameter,
> > making it possible to enable load balancing on such CPUs
> > during runtime
> ...
>
> How about making it possible to change the default affinity
> for new kthreads at run time?
> Is it possible to change the affinity of existing threads?
> Or maybe only those that didn't specify an explicit one??
>
> David
>
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
Hi David,
Problem with that approach is the window between kernel thread creation
and cpumask change.
On Wed, Mar 25, 2020 at 08:47:36AM -0300, Marcelo Tosatti wrote:
>
> Hi Frederic,
>
> On Wed, Mar 25, 2020 at 01:30:00AM +0100, Frederic Weisbecker wrote:
> > On Tue, Mar 24, 2020 at 12:20:16PM -0300, Marcelo Tosatti wrote:
> > >
> > > This is a kernel enhancement to configure the cpu affinity of kernel
> > > threads via kernel boot option isolcpus=no_kthreads,<isolcpus_params>,<cpulist>
> > >
> > > When this option is specified, the cpumask is immediately applied upon
> > > thread launch. This does not affect kernel threads that specify cpu
> > > and node.
> > >
> > > This allows CPU isolation (that is not allowing certain threads
> > > to execute on certain CPUs) without using the isolcpus=domain parameter,
> > > making it possible to enable load balancing on such CPUs
> > > during runtime (see
> > >
> > > Note-1: this is based off on Wind River's patch at
> > > https://github.com/starlingx-staging/stx-integ/blob/master/kernel/kernel-std/centos/patches/affine-compute-kernel-threads.patch
> > >
> > > Difference being that this patch is limited to modifying
> > > kernel thread cpumask: Behaviour of other threads can
> > > be controlled via cgroups or sched_setaffinity.
> > >
> > > Note-2: MontaVista's patch was based off Christoph Lameter's patch at
> > > https://lwn.net/Articles/565932/ with the only difference being
> > > the kernel parameter changed from kthread to kthread_cpus.
> > >
> > > Signed-off-by: Marcelo Tosatti <[email protected]>
> >
> > I'm wondering, why do you need such a boot shift at all when you
> > can actually affine kthreads on runtime?
>
> New, unbound kernel threads inherit the cpumask of kthreadd.
>
> Therefore there is a race between kernel thread creation
> and affine.
>
> If you know of a solution to that problem, that can be used instead.
Well, you could first set the affinity of kthreadd and only then the affinity
of the others. But I can still imagine some tiny races with fork().
> >
> > > };
> > >
> > > #ifdef CONFIG_CPU_ISOLATION
> > > diff --git a/kernel/kthread.c b/kernel/kthread.c
> > > index b262f47046ca..be9c8d53a986 100644
> > > --- a/kernel/kthread.c
> > > +++ b/kernel/kthread.c
> > > @@ -347,7 +347,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
> > > * The kernel thread should not inherit these properties.
> > > */
> > > sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m);
> > > - set_cpus_allowed_ptr(task, cpu_all_mask);
> > > + set_cpus_allowed_ptr(task, cpu_kthread_mask);
> >
> > I'm wondering, why are we using cpu_all_mask and not cpu_possible_mask here?
> > If we used the latter, you wouldn't need to create cpu_kthread_mask and
> > you could directly rely on housekeeping_cpumask(HK_FLAG_KTHREAD).
>
> I suppose that either work: CPUs can only be online from
> cpu_possible_mask (and is contained in cpu_possible_mask).
>
> Nice cleanup, thanks.
But may I suggest you to do:
- set_cpus_allowed_ptr(task, cpu_all_mask);
+ set_cpus_allowed_ptr(task, cpu_possible_mask);
as a first step in its own patch in the series. I just want to make sure that change
isn't missed by reviewers or bisections, in case someone catches something we
overlooked.
>
> >
> > > diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> > > index 008d6ac2342b..e9d48729efd4 100644
> > > --- a/kernel/sched/isolation.c
> > > +++ b/kernel/sched/isolation.c
> > > @@ -169,6 +169,12 @@ static int __init housekeeping_isolcpus_setup(char *str)
> > > continue;
> > > }
> > >
> > > + if (!strncmp(str, "no_kthreads,", 12)) {
> > > + str += 12;
> > > + flags |= HK_FLAG_NO_KTHREADS;
> >
> > You will certainly want HK_FLAG_WQ as well since workqueue has its own
> > way to deal with unbound affinity.
>
> Yep. HK_FLAG_WQ is simply a convenience so that the user does not have
> to configure this separately: OK.
Also, and that's a larger debate, are you interested in isolating kthreads
only or any kind of kernel unbound work that could be affine outside
a given CPU?
In case of all the unbound work, I may suggest an all-in-one "unbound"
flag that would do:
HK_FLAG_KTHREAD | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC
| HK_FLAG_SCHED
Otherwise we can stick with HK_FLAG_KTHREAD, but I'd be curious about your usecase.
Thanks.
On Wed, Mar 25, 2020 at 06:05:27PM +0000, David Laight wrote:
> From: Marcelo Tosatti
> > Sent: 24 March 2020 15:20
> >
> > This is a kernel enhancement to configure the cpu affinity of kernel
> > threads via kernel boot option isolcpus=no_kthreads,<isolcpus_params>,<cpulist>
> >
> > When this option is specified, the cpumask is immediately applied upon
> > thread launch. This does not affect kernel threads that specify cpu
> > and node.
> >
> > This allows CPU isolation (that is not allowing certain threads
> > to execute on certain CPUs) without using the isolcpus=domain parameter,
> > making it possible to enable load balancing on such CPUs
> > during runtime
> ...
>
> How about making it possible to change the default affinity
> for new kthreads at run time?
> Is it possible to change the affinity of existing threads?
> Or maybe only those that didn't specify an explicit one??
That's already possible yes, most unbound kthreads are accessible
through /proc including kthreadd from which new kthread will inherit
their CPU affinity.
>
> David
>
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
>
On 3/26/2020 10:22 AM, Frederic Weisbecker wrote:
> On Wed, Mar 25, 2020 at 06:05:27PM +0000, David Laight wrote:
>> How about making it possible to change the default affinity
>> for new kthreads at run time?
>> Is it possible to change the affinity of existing threads?
>> Or maybe only those that didn't specify an explicit one??
>
> That's already possible yes, most unbound kthreads are accessible
> through /proc including kthreadd from which new kthread will inherit
> their CPU affinity.
Are you sure that the new kthread will inherit the CPU affinity?
__kthread_create_on_node() explicitly sets the new thread as
SCHED_NORMAL with a mask of "cpu_all_mask".
Chris
On Thu, Mar 26, 2020 at 10:32:51AM -0600, Chris Friesen wrote:
> On 3/26/2020 10:22 AM, Frederic Weisbecker wrote:
> > On Wed, Mar 25, 2020 at 06:05:27PM +0000, David Laight wrote:
>
> > > How about making it possible to change the default affinity
> > > for new kthreads at run time?
> > > Is it possible to change the affinity of existing threads?
> > > Or maybe only those that didn't specify an explicit one??
> >
> > That's already possible yes, most unbound kthreads are accessible
> > through /proc including kthreadd from which new kthread will inherit
> > their CPU affinity.
>
> Are you sure that the new kthread will inherit the CPU affinity?
>
> __kthread_create_on_node() explicitly sets the new thread as SCHED_NORMAL
> with a mask of "cpu_all_mask".
Ah, ok that's the part I missed. What a weird behaviour...
Anyway, I'm working on making all those isolcpus options
controllable through cpusets. So that should be possible at some
future.
On Thu, Mar 26, 2020 at 05:20:05PM +0100, Frederic Weisbecker wrote:
> On Wed, Mar 25, 2020 at 08:47:36AM -0300, Marcelo Tosatti wrote:
> >
> > Hi Frederic,
> >
> > On Wed, Mar 25, 2020 at 01:30:00AM +0100, Frederic Weisbecker wrote:
> > > On Tue, Mar 24, 2020 at 12:20:16PM -0300, Marcelo Tosatti wrote:
> > > >
> > > > This is a kernel enhancement to configure the cpu affinity of kernel
> > > > threads via kernel boot option isolcpus=no_kthreads,<isolcpus_params>,<cpulist>
> > > >
> > > > When this option is specified, the cpumask is immediately applied upon
> > > > thread launch. This does not affect kernel threads that specify cpu
> > > > and node.
> > > >
> > > > This allows CPU isolation (that is not allowing certain threads
> > > > to execute on certain CPUs) without using the isolcpus=domain parameter,
> > > > making it possible to enable load balancing on such CPUs
> > > > during runtime (see
> > > >
> > > > Note-1: this is based off on Wind River's patch at
> > > > https://github.com/starlingx-staging/stx-integ/blob/master/kernel/kernel-std/centos/patches/affine-compute-kernel-threads.patch
> > > >
> > > > Difference being that this patch is limited to modifying
> > > > kernel thread cpumask: Behaviour of other threads can
> > > > be controlled via cgroups or sched_setaffinity.
> > > >
> > > > Note-2: MontaVista's patch was based off Christoph Lameter's patch at
> > > > https://lwn.net/Articles/565932/ with the only difference being
> > > > the kernel parameter changed from kthread to kthread_cpus.
> > > >
> > > > Signed-off-by: Marcelo Tosatti <[email protected]>
> > >
> > > I'm wondering, why do you need such a boot shift at all when you
> > > can actually affine kthreads on runtime?
> >
> > New, unbound kernel threads inherit the cpumask of kthreadd.
> >
> > Therefore there is a race between kernel thread creation
> > and affine.
> >
> > If you know of a solution to that problem, that can be used instead.
>
> Well, you could first set the affinity of kthreadd and only then the affinity
> of the others. But I can still imagine some tiny races with fork().
Ah forget that, I missed the part in kthread_create_on_node().
Thanks.
On Thu, Mar 26, 2020 at 05:20:05PM +0100, Frederic Weisbecker wrote:
> On Wed, Mar 25, 2020 at 08:47:36AM -0300, Marcelo Tosatti wrote:
> >
> > Hi Frederic,
> >
> > On Wed, Mar 25, 2020 at 01:30:00AM +0100, Frederic Weisbecker wrote:
> > > On Tue, Mar 24, 2020 at 12:20:16PM -0300, Marcelo Tosatti wrote:
> > > >
> > > > This is a kernel enhancement to configure the cpu affinity of kernel
> > > > threads via kernel boot option isolcpus=no_kthreads,<isolcpus_params>,<cpulist>
> > > >
> > > > When this option is specified, the cpumask is immediately applied upon
> > > > thread launch. This does not affect kernel threads that specify cpu
> > > > and node.
> > > >
> > > > This allows CPU isolation (that is not allowing certain threads
> > > > to execute on certain CPUs) without using the isolcpus=domain parameter,
> > > > making it possible to enable load balancing on such CPUs
> > > > during runtime (see
> > > >
> > > > Note-1: this is based off on Wind River's patch at
> > > > https://github.com/starlingx-staging/stx-integ/blob/master/kernel/kernel-std/centos/patches/affine-compute-kernel-threads.patch
> > > >
> > > > Difference being that this patch is limited to modifying
> > > > kernel thread cpumask: Behaviour of other threads can
> > > > be controlled via cgroups or sched_setaffinity.
> > > >
> > > > Note-2: MontaVista's patch was based off Christoph Lameter's patch at
> > > > https://lwn.net/Articles/565932/ with the only difference being
> > > > the kernel parameter changed from kthread to kthread_cpus.
> > > >
> > > > Signed-off-by: Marcelo Tosatti <[email protected]>
> > >
> > > I'm wondering, why do you need such a boot shift at all when you
> > > can actually affine kthreads on runtime?
> >
> > New, unbound kernel threads inherit the cpumask of kthreadd.
> >
> > Therefore there is a race between kernel thread creation
> > and affine.
> >
> > If you know of a solution to that problem, that can be used instead.
>
> Well, you could first set the affinity of kthreadd and only then the affinity
> of the others. But I can still imagine some tiny races with fork().
>
> > >
> > > > };
> > > >
> > > > #ifdef CONFIG_CPU_ISOLATION
> > > > diff --git a/kernel/kthread.c b/kernel/kthread.c
> > > > index b262f47046ca..be9c8d53a986 100644
> > > > --- a/kernel/kthread.c
> > > > +++ b/kernel/kthread.c
> > > > @@ -347,7 +347,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
> > > > * The kernel thread should not inherit these properties.
> > > > */
> > > > sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m);
> > > > - set_cpus_allowed_ptr(task, cpu_all_mask);
> > > > + set_cpus_allowed_ptr(task, cpu_kthread_mask);
> > >
> > > I'm wondering, why are we using cpu_all_mask and not cpu_possible_mask here?
> > > If we used the latter, you wouldn't need to create cpu_kthread_mask and
> > > you could directly rely on housekeeping_cpumask(HK_FLAG_KTHREAD).
> >
> > I suppose that either work: CPUs can only be online from
> > cpu_possible_mask (and is contained in cpu_possible_mask).
> >
> > Nice cleanup, thanks.
>
> But may I suggest you to do:
>
> - set_cpus_allowed_ptr(task, cpu_all_mask);
> + set_cpus_allowed_ptr(task, cpu_possible_mask);
>
> as a first step in its own patch in the series. I just want to make sure that change
> isn't missed by reviewers or bisections, in case someone catches something we
> overlooked.
>
> >
> > >
> > > > diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> > > > index 008d6ac2342b..e9d48729efd4 100644
> > > > --- a/kernel/sched/isolation.c
> > > > +++ b/kernel/sched/isolation.c
> > > > @@ -169,6 +169,12 @@ static int __init housekeeping_isolcpus_setup(char *str)
> > > > continue;
> > > > }
> > > >
> > > > + if (!strncmp(str, "no_kthreads,", 12)) {
> > > > + str += 12;
> > > > + flags |= HK_FLAG_NO_KTHREADS;
> > >
> > > You will certainly want HK_FLAG_WQ as well since workqueue has its own
> > > way to deal with unbound affinity.
> >
> > Yep. HK_FLAG_WQ is simply a convenience so that the user does not have
> > to configure this separately: OK.
>
> Also, and that's a larger debate, are you interested in isolating kthreads
> only or any kind of kernel unbound work that could be affine outside
> a given CPU?
Any kind of kernel work.
> In case of all the unbound work, I may suggest an all-in-one "unbound"
> flag that would do:
>
> HK_FLAG_KTHREAD | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC
> | HK_FLAG_SCHED
>
> Otherwise we can stick with HK_FLAG_KTHREAD, but I'd be curious about your usecase.
>
> Thanks.
BTW HK_FLAG_SCHED is not settable at the moment.
Any reason why nohz_full= is not setting it ?
Thanks