2020-04-16 08:57:54

by Li RongQing

[permalink] [raw]
Subject: [PATCH] sched/isolation: allow isolcpus and nohz_full for different cpus

when both isolcpus and nohz_full are set, their cpus must be
same now, in fact isolcpus and nohz_full are not related, and
different cpus are expected for some cases, for example, some
cores for polling threads wants to isolcpus, and some cores for
dedicated threads, only nohz_full is expected

so define two housekeeping mask to save these two configuration
separately and make cpus same only when both nohz_full and
isolcpus with nohz are passed into kernel

fix a build error when CONFIG_CPUMASK_OFFSTACK is not configured
reported by kbuild test robot <[email protected]>

Signed-off-by: Li RongQing <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
---
kernel/sched/isolation.c | 80 ++++++++++++++++++++++++++++++++----------------
1 file changed, 54 insertions(+), 26 deletions(-)

diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 008d6ac2342b..f2331c443121 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -11,7 +11,8 @@

DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
EXPORT_SYMBOL_GPL(housekeeping_overridden);
-static cpumask_var_t housekeeping_mask;
+static cpumask_var_t housekeeping_mask_isolcpus;
+static cpumask_var_t housekeeping_mask_nohz_full;
static unsigned int housekeeping_flags;

bool housekeeping_enabled(enum hk_flags flags)
@@ -20,12 +21,27 @@ bool housekeeping_enabled(enum hk_flags flags)
}
EXPORT_SYMBOL_GPL(housekeeping_enabled);

+static struct cpumask *housekeeping_get_mask(enum hk_flags flags)
+{
+ if (flags & (HK_FLAG_DOMAIN | HK_FLAG_MANAGED_IRQ))
+ return housekeeping_mask_isolcpus;
+
+ /* set by isolcpus=nohz only */
+ if ((flags & HK_FLAG_TICK) && !(housekeeping_flags & HK_FLAG_RCU))
+ return housekeeping_mask_isolcpus;
+
+ return housekeeping_mask_nohz_full;
+}
+
int housekeeping_any_cpu(enum hk_flags flags)
{
int cpu;

if (static_branch_unlikely(&housekeeping_overridden)) {
if (housekeeping_flags & flags) {
+ struct cpumask *housekeeping_mask;
+
+ housekeeping_mask = housekeeping_get_mask(flags);
cpu = sched_numa_find_closest(housekeeping_mask, smp_processor_id());
if (cpu < nr_cpu_ids)
return cpu;
@@ -41,7 +57,7 @@ const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
{
if (static_branch_unlikely(&housekeeping_overridden))
if (housekeeping_flags & flags)
- return housekeeping_mask;
+ return housekeeping_get_mask(flags);
return cpu_possible_mask;
}
EXPORT_SYMBOL_GPL(housekeeping_cpumask);
@@ -49,16 +65,24 @@ EXPORT_SYMBOL_GPL(housekeeping_cpumask);
void housekeeping_affine(struct task_struct *t, enum hk_flags flags)
{
if (static_branch_unlikely(&housekeeping_overridden))
- if (housekeeping_flags & flags)
+ if (housekeeping_flags & flags) {
+ struct cpumask *housekeeping_mask;
+
+ housekeeping_mask = housekeeping_get_mask(flags);
set_cpus_allowed_ptr(t, housekeeping_mask);
+ }
}
EXPORT_SYMBOL_GPL(housekeeping_affine);

bool housekeeping_test_cpu(int cpu, enum hk_flags flags)
{
if (static_branch_unlikely(&housekeeping_overridden))
- if (housekeeping_flags & flags)
+ if (housekeeping_flags & flags) {
+ struct cpumask *housekeeping_mask;
+
+ housekeeping_mask = housekeeping_get_mask(flags);
return cpumask_test_cpu(cpu, housekeeping_mask);
+ }
return true;
}
EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
@@ -74,10 +98,14 @@ void __init housekeeping_init(void)
sched_tick_offload_init();

/* We need at least one CPU to handle housekeeping work */
- WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
+ if (housekeeping_flags & (HK_FLAG_DOMAIN | HK_FLAG_MANAGED_IRQ))
+ WARN_ON_ONCE(cpumask_empty(housekeeping_mask_isolcpus));
+ if (housekeeping_flags & HK_FLAG_TICK)
+ WARN_ON_ONCE(cpumask_empty(housekeeping_mask_nohz_full));
}

-static int __init housekeeping_setup(char *str, enum hk_flags flags)
+static int __init housekeeping_setup(char *str, enum hk_flags flags,
+ cpumask_var_t *housekeeping_mask)
{
cpumask_var_t non_housekeeping_mask;
cpumask_var_t tmp;
@@ -92,25 +120,25 @@ static int __init housekeeping_setup(char *str, enum hk_flags flags)
}

alloc_bootmem_cpumask_var(&tmp);
- if (!housekeeping_flags) {
- alloc_bootmem_cpumask_var(&housekeeping_mask);
- cpumask_andnot(housekeeping_mask,
- cpu_possible_mask, non_housekeeping_mask);
-
- cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask);
- if (cpumask_empty(tmp)) {
- pr_warn("Housekeeping: must include one present CPU, "
+ alloc_bootmem_cpumask_var(housekeeping_mask);
+ cpumask_andnot(*housekeeping_mask,
+ cpu_possible_mask, non_housekeeping_mask);
+
+ cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask);
+ if (cpumask_empty(tmp)) {
+ pr_warn("Housekeeping: must include one present CPU, "
"using boot CPU:%d\n", smp_processor_id());
- __cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
- __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
- }
- } else {
- cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask);
- if (cpumask_empty(tmp))
- __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
- cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask);
- if (!cpumask_equal(tmp, housekeeping_mask)) {
- pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
+ __cpumask_set_cpu(smp_processor_id(), *housekeeping_mask);
+ __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
+ }
+
+ /* cpus should match when both nohz_full and isolcpus
+ * with nohz are passed into kernel
+ */
+ if (housekeeping_flags & flags & HK_FLAG_TICK) {
+ if (!cpumask_equal(housekeeping_mask_nohz_full,
+ housekeeping_mask_isolcpus)) {
+ pr_warn("Housekeeping: nohz_full= must match isolcpus=nohz\n");
free_bootmem_cpumask_var(tmp);
free_bootmem_cpumask_var(non_housekeeping_mask);
return 0;
@@ -142,7 +170,7 @@ static int __init housekeeping_nohz_full_setup(char *str)

flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC;

- return housekeeping_setup(str, flags);
+ return housekeeping_setup(str, flags, &housekeeping_mask_nohz_full);
}
__setup("nohz_full=", housekeeping_nohz_full_setup);

@@ -177,6 +205,6 @@ static int __init housekeeping_isolcpus_setup(char *str)
if (!flags)
flags |= HK_FLAG_DOMAIN;

- return housekeeping_setup(str, flags);
+ return housekeeping_setup(str, flags, &housekeeping_mask_isolcpus);
}
__setup("isolcpus=", housekeeping_isolcpus_setup);
--
2.16.2


2020-04-20 16:42:55

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH] sched/isolation: allow isolcpus and nohz_full for different cpus

On Thu, Apr 16, 2020 at 04:51:18PM +0800, Li RongQing wrote:
> when both isolcpus and nohz_full are set, their cpus must be
> same now, in fact isolcpus and nohz_full are not related, and
> different cpus are expected for some cases, for example, some
> cores for polling threads wants to isolcpus, and some cores for
> dedicated threads, only nohz_full is expected
>
> so define two housekeeping mask to save these two configuration
> separately and make cpus same only when both nohz_full and
> isolcpus with nohz are passed into kernel
>
> fix a build error when CONFIG_CPUMASK_OFFSTACK is not configured
> reported by kbuild test robot <[email protected]>

What is the usecase when you want to affine managed interrupt?
Do you only want to affine IRQ or do you also want to affine
every unbound work, such as kthread, workqueues, timers, etc...?

In the end I would like to group the isolation features that only
make sense together. So we could end up with three cpumasks, one
for "domains", one for "nohz" and one for all "unbound" works.

In fact "domains" should even disappear and become "unbound" +
"load_balance", as that's the desired outcome of having NULL domains.

I'm trying to prepare a suitable interface for all that in cpusets
where we already have the load_balance part.

Thanks.

2020-04-21 02:24:24

by Li RongQing

[permalink] [raw]
Subject: 答复: [PATCH] sched/isolation: allow isolcpus and nohz_full for different cpus



> -----?ʼ?ԭ??-----
> ??????: Frederic Weisbecker [mailto:[email protected]]
> ????ʱ??: 2020??4??21?? 0:34
> ?ռ???: Li,Rongqing <[email protected]>
> ????: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]
> ????: Re: [PATCH] sched/isolation: allow isolcpus and nohz_full for different
> cpus
>
> On Thu, Apr 16, 2020 at 04:51:18PM +0800, Li RongQing wrote:
> > when both isolcpus and nohz_full are set, their cpus must be same now,
> > in fact isolcpus and nohz_full are not related, and different cpus are
> > expected for some cases, for example, some cores for polling threads
> > wants to isolcpus, and some cores for dedicated threads, only
> > nohz_full is expected
> >
> > so define two housekeeping mask to save these two configuration
> > separately and make cpus same only when both nohz_full and isolcpus
> > with nohz are passed into kernel
> >
> > fix a build error when CONFIG_CPUMASK_OFFSTACK is not configured
> > reported by kbuild test robot <[email protected]>
>
> What is the usecase when you want to affine managed interrupt?
> Do you only want to affine IRQ or do you also want to affine every unbound
> work, such as kthread, workqueues, timers, etc...?
>

As cloud compute node, both qemu and ovs-dpdk run on it

Some cores are bound to run ovs-dpdk, and should be isolated from any disturb (isolcpus= nohz_full= rcu_nocbs= )

Other cores run kvm qemu, and do not wants to be disturbed by hrtimer (only nohz_full=) by using this feature https://patchwork.kernel.org/cover/11033533/

Thanks

-LiRongQing

> In the end I would like to group the isolation features that only make sense
> together. So we could end up with three cpumasks, one for "domains", one for
> "nohz" and one for all "unbound" works.
>
> In fact "domains" should even disappear and become "unbound" +
> "load_balance", as that's the desired outcome of having NULL domains.
>
> I'm trying to prepare a suitable interface for all that in cpusets where we
> already have the load_balance part.
>
> Thanks.