LinuxLists.cc - [patch 27/30] xen/events: Only force affinity mask for percpu interrupts

2020-12-10 19:52:48

Subject: [patch 27/30] xen/events: Only force affinity mask for percpu interrupts

All event channel setups bind the interrupt on CPU0 or the target CPU for
percpu interrupts and overwrite the affinity mask with the corresponding
cpumask. That does not make sense.

The XEN implementation of irqchip::irq_set_affinity() already picks a
single target CPU out of the affinity mask and the actual target is stored
in the effective CPU mask, so destroying the user chosen affinity mask
which might contain more than one CPU is wrong.

Change the implementation so that the channel is bound to CPU0 at the XEN
level and leave the affinity mask alone. At startup of the interrupt
affinity will be assigned out of the affinity mask and the XEN binding will
be updated. Only keep the enforcement for real percpu interrupts.

On resume the overwrite is not required either because info->cpu and the
affinity mask are still the same as at the time of suspend. Same for
rebind_evtchn_irq().

This also prepares for proper interrupt spreading.

Signed-off-by: Thomas Gleixner <[email protected]>
Cc: Boris Ostrovsky <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Stefano Stabellini <[email protected]>
Cc: [email protected]
---
drivers/xen/events/events_base.c | 42 ++++++++++++++++++++++++++-------------
1 file changed, 28 insertions(+), 14 deletions(-)

--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -433,15 +433,20 @@ static bool pirq_needs_eoi_flag(unsigned
return info->u.pirq.flags & PIRQ_NEEDS_EOI;
}

-static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu)
+static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+ bool force_affinity)
{
int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info = info_for_irq(irq);

BUG_ON(irq == -1);
-#ifdef CONFIG_SMP
- cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
-#endif
+
+ if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
+ cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
+ cpumask_copy(irq_get_effective_affinity_mask(irq),
+ cpumask_of(cpu));
+ }
+
xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);

info->cpu = cpu;
@@ -788,7 +793,7 @@ static unsigned int __startup_pirq(unsig
goto err;

info->evtchn = evtchn;
- bind_evtchn_to_cpu(evtchn, 0);
+ bind_evtchn_to_cpu(evtchn, 0, false);

rc = xen_evtchn_port_setup(evtchn);
if (rc)
@@ -1107,8 +1112,8 @@ static int bind_evtchn_to_irq_chip(evtch
irq = ret;
goto out;
}
- /* New interdomain events are bound to VCPU 0. */
- bind_evtchn_to_cpu(evtchn, 0);
+ /* New interdomain events are initially bound to VCPU 0. */
+ bind_evtchn_to_cpu(evtchn, 0, false);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
@@ -1156,7 +1161,11 @@ static int bind_ipi_to_irq(unsigned int
irq = ret;
goto out;
}
- bind_evtchn_to_cpu(evtchn, cpu);
+ /*
+ * Force the affinity mask to the target CPU so proc shows
+ * the correct target.
+ */
+ bind_evtchn_to_cpu(evtchn, cpu, true);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_IPI);
@@ -1269,7 +1278,11 @@ int bind_virq_to_irq(unsigned int virq,
goto out;
}

- bind_evtchn_to_cpu(evtchn, cpu);
+ /*
+ * Force the affinity mask for percpu interrupts so proc
+ * shows the correct target.
+ */
+ bind_evtchn_to_cpu(evtchn, cpu, percpu);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_VIRQ);
@@ -1634,8 +1647,7 @@ void rebind_evtchn_irq(evtchn_port_t evt

mutex_unlock(&irq_mapping_update_lock);

- bind_evtchn_to_cpu(evtchn, info->cpu);
- irq_set_affinity(irq, cpumask_of(info->cpu));
+ bind_evtchn_to_cpu(evtchn, info->cpu, false);

/* Unmask the event channel. */
enable_irq(irq);
@@ -1669,7 +1681,7 @@ static int xen_rebind_evtchn_to_cpu(evtc
* it, but don't do the xenlinux-level rebind in that case.
*/
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
- bind_evtchn_to_cpu(evtchn, tcpu);
+ bind_evtchn_to_cpu(evtchn, tcpu, false);

if (!masked)
unmask_evtchn(evtchn);
@@ -1798,7 +1810,8 @@ static void restore_cpu_virqs(unsigned i

/* Record the new mapping. */
(void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
- bind_evtchn_to_cpu(evtchn, cpu);
+ /* The affinity mask is still valid */
+ bind_evtchn_to_cpu(evtchn, cpu, false);
}
}

@@ -1823,7 +1836,8 @@ static void restore_cpu_ipis(unsigned in

/* Record the new mapping. */
(void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
- bind_evtchn_to_cpu(evtchn, cpu);
+ /* The affinity mask is still valid */
+ bind_evtchn_to_cpu(evtchn, cpu, false);
}
}

2020-12-11 10:22:20

by Boris Ostrovsky

[permalink] [raw]

Subject: Re: [patch 27/30] xen/events: Only force affinity mask for percpu interrupts

On 12/10/20 2:26 PM, Thomas Gleixner wrote:
> All event channel setups bind the interrupt on CPU0 or the target CPU for
> percpu interrupts and overwrite the affinity mask with the corresponding
> cpumask. That does not make sense.
>
> The XEN implementation of irqchip::irq_set_affinity() already picks a
> single target CPU out of the affinity mask and the actual target is stored
> in the effective CPU mask, so destroying the user chosen affinity mask
> which might contain more than one CPU is wrong.
>
> Change the implementation so that the channel is bound to CPU0 at the XEN
> level and leave the affinity mask alone. At startup of the interrupt
> affinity will be assigned out of the affinity mask and the XEN binding will
> be updated.

If that's the case then I wonder whether we need this call at all and instead bind at startup time.

-boris

> Only keep the enforcement for real percpu interrupts.

2020-12-11 10:31:00

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [patch 27/30] xen/events: Only force affinity mask for percpu interrupts

On Thu, Dec 10 2020 at 18:20, boris ostrovsky wrote:
> On 12/10/20 2:26 PM, Thomas Gleixner wrote:
>> All event channel setups bind the interrupt on CPU0 or the target CPU for
>> percpu interrupts and overwrite the affinity mask with the corresponding
>> cpumask. That does not make sense.
>>
>> The XEN implementation of irqchip::irq_set_affinity() already picks a
>> single target CPU out of the affinity mask and the actual target is stored
>> in the effective CPU mask, so destroying the user chosen affinity mask
>> which might contain more than one CPU is wrong.
>>
>> Change the implementation so that the channel is bound to CPU0 at the XEN
>> level and leave the affinity mask alone. At startup of the interrupt
>> affinity will be assigned out of the affinity mask and the XEN binding will
>> be updated.
>
> If that's the case then I wonder whether we need this call at all and
> instead bind at startup time.

I was wondering about that, but my knowledge about the Xen internal
requirements is pretty limited. The current set at least survived basic
testing by Jürgen.

Thanks,

tglx

2020-12-11 13:34:00

by Jürgen Groß

[permalink] [raw]

Subject: Re: [patch 27/30] xen/events: Only force affinity mask for percpu interrupts

On 11.12.20 11:13, Thomas Gleixner wrote:
> On Fri, Dec 11 2020 at 07:17, Jürgen Groß wrote:
>> On 11.12.20 00:20, [email protected] wrote:
>>>
>>> On 12/10/20 2:26 PM, Thomas Gleixner wrote:
>>>> All event channel setups bind the interrupt on CPU0 or the target CPU for
>>>> percpu interrupts and overwrite the affinity mask with the corresponding
>>>> cpumask. That does not make sense.
>>>>
>>>> The XEN implementation of irqchip::irq_set_affinity() already picks a
>>>> single target CPU out of the affinity mask and the actual target is stored
>>>> in the effective CPU mask, so destroying the user chosen affinity mask
>>>> which might contain more than one CPU is wrong.
>>>>
>>>> Change the implementation so that the channel is bound to CPU0 at the XEN
>>>> level and leave the affinity mask alone. At startup of the interrupt
>>>> affinity will be assigned out of the affinity mask and the XEN binding will
>>>> be updated.
>>>
>>>
>>> If that's the case then I wonder whether we need this call at all and instead bind at startup time.
>>
>> This binding to cpu0 was introduced with commit 97253eeeb792d61ed2
>> and I have no reason to believe the underlying problem has been
>> eliminated.
>
> "The kernel-side VCPU binding was not being correctly set for newly
> allocated or bound interdomain events. In ARM guests where 2-level
> events were used, this would result in no interdomain events being
> handled because the kernel-side VCPU masks would all be clear.
>
> x86 guests would work because the irq affinity was set during irq
> setup and this would set the correct kernel-side VCPU binding."
>
> I'm not convinced that this is really correctly analyzed because affinity
> setting is done at irq startup.
>
> switch (__irq_startup_managed(desc, aff, force)) {
> case IRQ_STARTUP_NORMAL:
> ret = __irq_startup(desc);
> irq_setup_affinity(desc);
> break;
>
> which is completely architecture agnostic. So why should this magically
> work on x86 and not on ARM if both are using the same XEN irqchip with
> the same irqchip callbacks.

I think this might be related to _initial_ cpu binding of events and
changing the binding later. This might be handled differently in the
hypervisor.

Juergen

Attachments:

OpenPGP_0xB0DE9DD628BF132F.asc (3.06 kB)
OpenPGP_signature (505.00 B)
OpenPGP digital signature Download all attachments

2020-12-11 14:05:50

by Jürgen Groß

[permalink] [raw]

Subject: Re: [patch 27/30] xen/events: Only force affinity mask for percpu interrupts

On 11.12.20 00:20, [email protected] wrote:
>
> On 12/10/20 2:26 PM, Thomas Gleixner wrote:
>> All event channel setups bind the interrupt on CPU0 or the target CPU for
>> percpu interrupts and overwrite the affinity mask with the corresponding
>> cpumask. That does not make sense.
>>
>> The XEN implementation of irqchip::irq_set_affinity() already picks a
>> single target CPU out of the affinity mask and the actual target is stored
>> in the effective CPU mask, so destroying the user chosen affinity mask
>> which might contain more than one CPU is wrong.
>>
>> Change the implementation so that the channel is bound to CPU0 at the XEN
>> level and leave the affinity mask alone. At startup of the interrupt
>> affinity will be assigned out of the affinity mask and the XEN binding will
>> be updated.
>
>
> If that's the case then I wonder whether we need this call at all and instead bind at startup time.

After some discussion with Thomas on IRC and xen-devel archaeology the
result is: this will be needed especially for systems running on a
single vcpu (e.g. small guests), as the .irq_set_affinity() callback
won't be called in this case when starting the irq.

Juergen

Attachments:

OpenPGP_0xB0DE9DD628BF132F.asc (3.06 kB)
OpenPGP_signature (505.00 B)
OpenPGP digital signature Download all attachments

2020-12-11 14:11:55

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [patch 27/30] xen/events: Only force affinity mask for percpu interrupts

On Fri, Dec 11 2020 at 13:10, Jürgen Groß wrote:
> On 11.12.20 00:20, [email protected] wrote:
>>
>> On 12/10/20 2:26 PM, Thomas Gleixner wrote:
>>> All event channel setups bind the interrupt on CPU0 or the target CPU for
>>> percpu interrupts and overwrite the affinity mask with the corresponding
>>> cpumask. That does not make sense.
>>>
>>> The XEN implementation of irqchip::irq_set_affinity() already picks a
>>> single target CPU out of the affinity mask and the actual target is stored
>>> in the effective CPU mask, so destroying the user chosen affinity mask
>>> which might contain more than one CPU is wrong.
>>>
>>> Change the implementation so that the channel is bound to CPU0 at the XEN
>>> level and leave the affinity mask alone. At startup of the interrupt
>>> affinity will be assigned out of the affinity mask and the XEN binding will
>>> be updated.
>>
>>
>> If that's the case then I wonder whether we need this call at all and instead bind at startup time.
>
> After some discussion with Thomas on IRC and xen-devel archaeology the
> result is: this will be needed especially for systems running on a
> single vcpu (e.g. small guests), as the .irq_set_affinity() callback
> won't be called in this case when starting the irq.

That's right, but not limited to ARM. The same problem exists on x86 UP.
So yes, the call makes sense, but the changelog is not really useful.
Let me add a comment to this.

Thanks,

tglx

2020-12-11 16:37:40

by Jürgen Groß

[permalink] [raw]

Subject: Re: [patch 27/30] xen/events: Only force affinity mask for percpu interrupts

On 11.12.20 00:20, [email protected] wrote:
>
> On 12/10/20 2:26 PM, Thomas Gleixner wrote:
>> All event channel setups bind the interrupt on CPU0 or the target CPU for
>> percpu interrupts and overwrite the affinity mask with the corresponding
>> cpumask. That does not make sense.
>>
>> The XEN implementation of irqchip::irq_set_affinity() already picks a
>> single target CPU out of the affinity mask and the actual target is stored
>> in the effective CPU mask, so destroying the user chosen affinity mask
>> which might contain more than one CPU is wrong.
>>
>> Change the implementation so that the channel is bound to CPU0 at the XEN
>> level and leave the affinity mask alone. At startup of the interrupt
>> affinity will be assigned out of the affinity mask and the XEN binding will
>> be updated.
>
>
> If that's the case then I wonder whether we need this call at all and instead bind at startup time.

This binding to cpu0 was introduced with commit 97253eeeb792d61ed2
and I have no reason to believe the underlying problem has been
eliminated.

Juergen

Attachments:

OpenPGP_0xB0DE9DD628BF132F.asc (3.06 kB)
OpenPGP_signature (505.00 B)
OpenPGP digital signature Download all attachments

2020-12-11 17:07:11

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [patch 27/30] xen/events: Only force affinity mask for percpu interrupts

On Fri, Dec 11 2020 at 07:17, Jürgen Groß wrote:
> On 11.12.20 00:20, [email protected] wrote:
>>
>> On 12/10/20 2:26 PM, Thomas Gleixner wrote:
>>> All event channel setups bind the interrupt on CPU0 or the target CPU for
>>> percpu interrupts and overwrite the affinity mask with the corresponding
>>> cpumask. That does not make sense.
>>>
>>> The XEN implementation of irqchip::irq_set_affinity() already picks a
>>> single target CPU out of the affinity mask and the actual target is stored
>>> in the effective CPU mask, so destroying the user chosen affinity mask
>>> which might contain more than one CPU is wrong.
>>>
>>> Change the implementation so that the channel is bound to CPU0 at the XEN
>>> level and leave the affinity mask alone. At startup of the interrupt
>>> affinity will be assigned out of the affinity mask and the XEN binding will
>>> be updated.
>>
>>
>> If that's the case then I wonder whether we need this call at all and instead bind at startup time.
>
> This binding to cpu0 was introduced with commit 97253eeeb792d61ed2
> and I have no reason to believe the underlying problem has been
> eliminated.

"The kernel-side VCPU binding was not being correctly set for newly
allocated or bound interdomain events. In ARM guests where 2-level
events were used, this would result in no interdomain events being
handled because the kernel-side VCPU masks would all be clear.

x86 guests would work because the irq affinity was set during irq
setup and this would set the correct kernel-side VCPU binding."

I'm not convinced that this is really correctly analyzed because affinity
setting is done at irq startup.

switch (__irq_startup_managed(desc, aff, force)) {
case IRQ_STARTUP_NORMAL:
ret = __irq_startup(desc);
irq_setup_affinity(desc);
break;

which is completely architecture agnostic. So why should this magically
work on x86 and not on ARM if both are using the same XEN irqchip with
the same irqchip callbacks.

Thanks,

tglx

2020-12-11 17:30:00

by Boris Ostrovsky

[permalink] [raw]

Subject: Re: [patch 27/30] xen/events: Only force affinity mask for percpu interrupts

On 12/11/20 7:37 AM, Thomas Gleixner wrote:
> On Fri, Dec 11 2020 at 13:10, Jürgen Groß wrote:
>> On 11.12.20 00:20, [email protected] wrote:
>>> On 12/10/20 2:26 PM, Thomas Gleixner wrote:
>>>> All event channel setups bind the interrupt on CPU0 or the target CPU for
>>>> percpu interrupts and overwrite the affinity mask with the corresponding
>>>> cpumask. That does not make sense.
>>>>
>>>> The XEN implementation of irqchip::irq_set_affinity() already picks a
>>>> single target CPU out of the affinity mask and the actual target is stored
>>>> in the effective CPU mask, so destroying the user chosen affinity mask
>>>> which might contain more than one CPU is wrong.
>>>>
>>>> Change the implementation so that the channel is bound to CPU0 at the XEN
>>>> level and leave the affinity mask alone. At startup of the interrupt
>>>> affinity will be assigned out of the affinity mask and the XEN binding will
>>>> be updated.
>>>
>>> If that's the case then I wonder whether we need this call at all and instead bind at startup time.
>> After some discussion with Thomas on IRC and xen-devel archaeology the
>> result is: this will be needed especially for systems running on a
>> single vcpu (e.g. small guests), as the .irq_set_affinity() callback
>> won't be called in this case when starting the irq.

On UP are we not then going to end up with an empty affinity mask? Or are we guaranteed to have it set to 1 by interrupt generic code?

This is actually why I brought this up in the first place --- a potential mismatch between the affinity mask and Xen-specific data (e.g. info->cpu and then protocol-specific data in event channel code). Even if they are re-synchronized later, at startup time (for SMP).

I don't see anything that would cause a problem right now but I worry that this inconsistency may come up at some point.

-boris

> That's right, but not limited to ARM. The same problem exists on x86 UP.
> So yes, the call makes sense, but the changelog is not really useful.
> Let me add a comment to this.
>
> Thanks,
>
> tglx
>

2020-12-13 03:33:25

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [patch 27/30] xen/events: Only force affinity mask for percpu interrupts

On Fri, Dec 11 2020 at 09:29, boris ostrovsky wrote:

> On 12/11/20 7:37 AM, Thomas Gleixner wrote:
>> On Fri, Dec 11 2020 at 13:10, Jürgen Groß wrote:
>>> On 11.12.20 00:20, [email protected] wrote:
>>>> On 12/10/20 2:26 PM, Thomas Gleixner wrote:
>>>>> Change the implementation so that the channel is bound to CPU0 at the XEN
>>>>> level and leave the affinity mask alone. At startup of the interrupt
>>>>> affinity will be assigned out of the affinity mask and the XEN binding will
>>>>> be updated.
>>>>
>>>> If that's the case then I wonder whether we need this call at all and instead bind at startup time.
>>> After some discussion with Thomas on IRC and xen-devel archaeology the
>>> result is: this will be needed especially for systems running on a
>>> single vcpu (e.g. small guests), as the .irq_set_affinity() callback
>>> won't be called in this case when starting the irq.
>
> On UP are we not then going to end up with an empty affinity mask? Or
> are we guaranteed to have it set to 1 by interrupt generic code?

An UP kernel does not ever look on the affinity mask. The
chip::irq_set_affinity() callback is not invoked so the mask is
irrelevant.

A SMP kernel on a UP machine sets CPU0 in the mask so all is good.

> This is actually why I brought this up in the first place --- a
> potential mismatch between the affinity mask and Xen-specific data
> (e.g. info->cpu and then protocol-specific data in event channel
> code). Even if they are re-synchronized later, at startup time (for
> SMP).

Which is not a problem either. The affinity mask is only relevant for
setting the affinity, but it's not relevant for delivery and never can
be.

> I don't see anything that would cause a problem right now but I worry
> that this inconsistency may come up at some point.

As long as the affinity mask becomes not part of the event channel magic
this should never matter.

Look at it from hardware:

interrupt is affine to CPU0

CPU0 runs:

set_affinity(CPU0 -> CPU1)
local_irq_disable()

--> interrupt is raised in hardware and pending on CPU0

irq hardware is reconfigured to be affine to CPU1

local_irq_enable()

--> interrupt is handled on CPU0

the next interrupt will be raised on CPU1

So info->cpu which is registered via the hypercall binds the 'hardware
delivery' and whenever the new affinity is written it is rebound to some
other CPU and the next interrupt is then raised on this other CPU.

It's not any different from the hardware example at least not as far as
I understood the code.

Thanks,

tglx

2020-12-13 05:38:17

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [patch 27/30] xen/events: Only force affinity mask for percpu interrupts

Andrew,

On Fri, Dec 11 2020 at 22:21, Andrew Cooper wrote:
> On 11/12/2020 21:27, Thomas Gleixner wrote:
>> It's not any different from the hardware example at least not as far as
>> I understood the code.
>
> Xen's event channels do have a couple of quirks.

Why am I not surprised?

> Binding an event channel always results in one spurious event being
> delivered. This is to cover notifications which can get lost during the
> bidirectional setup, or re-setups in certain configurations.
>
> Binding an interdomain or pirq event channel always defaults to vCPU0.
> There is no way to atomically set the affinity while binding. I believe
> the API predates SMP guest support in Xen, and noone has fixed it up
> since.

That's fine. I'm not changing that.

What I'm changing is the unwanted and unnecessary overwriting of the
actual affinity mask.

We have a similar issue on real hardware where we can only target _one_
CPU and not all CPUs in the affinity mask. So we still can preserve the
(user) requested mask and just affine it to one CPU which is reflected
in the effective affinity mask. This the right thing to do for two
reasons:

1) It allows proper interrupt distribution

2) It does not break (user) requested affinity when the effective
target CPU goes offline and the affinity mask still contains
online CPUs. If you overwrite it you lost track of the requested
broader mask.

> As a consequence, the guest will observe the event raised on vCPU0 as
> part of setting up the event, even if it attempts to set a different
> affinity immediately afterwards. A little bit of care needs to be taken
> when binding an event channel on vCPUs other than 0, to ensure that the
> callback is safe with respect to any remaining state needing
> initialisation.

That's preserved for all non percpu interrupts. The percpu variant of
VIRQ and IPIs did binding to vCPU != 0 already before this change.

> Beyond this, there is nothing magic I'm aware of.
>
> We have seen soft lockups before in certain scenarios, simply due to the
> quantity of events hitting vCPU0 before irqbalance gets around to
> spreading the load. This is why there is an attempt to round-robin the
> userspace event channel affinities by default, but I still don't see why
> this would need custom affinity logic itself.

Just the previous attempt makes no sense for the reasons I outlined in
the changelog. So now with this new spreading mechanics you get the
distribution for all cases:

1) Post setup using and respecting the default affinity mask which can
be set as a kernel commandline parameter.

2) Runtime (user) requested affinity change with a mask which contains
more than one vCPU. The previous logic always chose the first one
in the mask.

So assume userspace affines 4 irqs to a CPU 0-3 and 4 irqs to CPU
4-7 then 4 irqs end up on CPU0 and 4 on CPU4

The new algorithm which is similar to what we have on x86 (minus
the vector space limitation) picks the CPU which has the least
number of channels affine to it at that moment. If e.g. all 8 CPUs
have the same number of vectors before that change then in the
example above the first 4 are spread to CPU0-3 and the second 4 to
CPU4-7

Thanks,

tglx

2020-12-14 02:52:06

by tip-bot2 for Alexey Makhalov

[permalink] [raw]

Subject: [tip: irq/core] xen/events: Only force affinity mask for percpu interrupts

The following commit has been merged into the irq/core branch of tip:

Commit-ID: f61b8351facda691f1564097074976b30e987a81
Gitweb: https://git.kernel.org/tip/f61b8351facda691f1564097074976b30e987a81
Author: Thomas Gleixner <[email protected]>
AuthorDate: Thu, 10 Dec 2020 20:26:03 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Sat, 12 Dec 2020 12:59:06 +01:00

xen/events: Only force affinity mask for percpu interrupts

All event channel setups bind the interrupt on CPU0 or the target CPU for
percpu interrupts and overwrite the affinity mask with the corresponding
cpumask. That does not make sense.

The XEN implementation of irqchip::irq_set_affinity() already picks a
single target CPU out of the affinity mask and the actual target is stored
in the effective CPU mask, so destroying the user chosen affinity mask
which might contain more than one CPU is wrong.

Change the implementation so that the channel is bound to CPU0 at the XEN
level and leave the affinity mask alone. At startup of the interrupt
affinity will be assigned out of the affinity mask and the XEN binding will
be updated. Only keep the enforcement for real percpu interrupts.

On resume the overwrite is not required either because info->cpu and the
affinity mask are still the same as at the time of suspend. Same for
rebind_evtchn_irq().

This also prepares for proper interrupt spreading.

Signed-off-by: Thomas Gleixner <[email protected]>
Cc: Boris Ostrovsky <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Stefano Stabellini <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
drivers/xen/events/events_base.c | 48 +++++++++++++++++++++----------
1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index eaba42a..679b2cb 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -433,15 +433,20 @@ static bool pirq_needs_eoi_flag(unsigned irq)
return info->u.pirq.flags & PIRQ_NEEDS_EOI;
}

-static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu)
+static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+ bool force_affinity)
{
int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info = info_for_irq(irq);

BUG_ON(irq == -1);
-#ifdef CONFIG_SMP
- cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
-#endif
+
+ if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
+ cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
+ cpumask_copy(irq_get_effective_affinity_mask(irq),
+ cpumask_of(cpu));
+ }
+
xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);

info->cpu = cpu;
@@ -788,7 +793,7 @@ static unsigned int __startup_pirq(unsigned int irq)
goto err;

info->evtchn = evtchn;
- bind_evtchn_to_cpu(evtchn, 0);
+ bind_evtchn_to_cpu(evtchn, 0, false);

rc = xen_evtchn_port_setup(evtchn);
if (rc)
@@ -1107,8 +1112,14 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip)
irq = ret;
goto out;
}
- /* New interdomain events are bound to VCPU 0. */
- bind_evtchn_to_cpu(evtchn, 0);
+ /*
+ * New interdomain events are initially bound to vCPU0 This
+ * is required to setup the event channel in the first
+ * place and also important for UP guests because the
+ * affinity setting is not invoked on them so nothing would
+ * bind the channel.
+ */
+ bind_evtchn_to_cpu(evtchn, 0, false);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
@@ -1156,7 +1167,11 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
irq = ret;
goto out;
}
- bind_evtchn_to_cpu(evtchn, cpu);
+ /*
+ * Force the affinity mask to the target CPU so proc shows
+ * the correct target.
+ */
+ bind_evtchn_to_cpu(evtchn, cpu, true);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_IPI);
@@ -1269,7 +1284,11 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
goto out;
}

- bind_evtchn_to_cpu(evtchn, cpu);
+ /*
+ * Force the affinity mask for percpu interrupts so proc
+ * shows the correct target.
+ */
+ bind_evtchn_to_cpu(evtchn, cpu, percpu);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_VIRQ);
@@ -1634,8 +1653,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)

mutex_unlock(&irq_mapping_update_lock);

- bind_evtchn_to_cpu(evtchn, info->cpu);
- irq_set_affinity(irq, cpumask_of(info->cpu));
+ bind_evtchn_to_cpu(evtchn, info->cpu, false);

/* Unmask the event channel. */
enable_irq(irq);
@@ -1669,7 +1687,7 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu)
* it, but don't do the xenlinux-level rebind in that case.
*/
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
- bind_evtchn_to_cpu(evtchn, tcpu);
+ bind_evtchn_to_cpu(evtchn, tcpu, false);

if (!masked)
unmask_evtchn(evtchn);
@@ -1798,7 +1816,8 @@ static void restore_cpu_virqs(unsigned int cpu)

/* Record the new mapping. */
(void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
- bind_evtchn_to_cpu(evtchn, cpu);
+ /* The affinity mask is still valid */
+ bind_evtchn_to_cpu(evtchn, cpu, false);
}
}

@@ -1823,7 +1842,8 @@ static void restore_cpu_ipis(unsigned int cpu)

/* Record the new mapping. */
(void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
- bind_evtchn_to_cpu(evtchn, cpu);
+ /* The affinity mask is still valid */
+ bind_evtchn_to_cpu(evtchn, cpu, false);
}
}