If an interrupt is disabled the ITS driver has sent a discard removing
the DeviceID and EventID from the ITT. After this occurs it can't be
moved to another collection with a MOVI and a command error occurs if
attempted. Before issuing the MOVI command make sure that the IRQ isn't
disabled and change the activate code to try and use the previous
affinity.
Signed-off-by: Ali Saidi <[email protected]>
---
drivers/irqchip/irq-gic-v3-its.c | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 124251b0ccba..1235dd9a2fb2 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1540,7 +1540,11 @@ static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
/* don't set the affinity when the target cpu is same as current one */
if (cpu != its_dev->event_map.col_map[id]) {
target_col = &its_dev->its->collections[cpu];
- its_send_movi(its_dev, target_col, id);
+
+ /* If the IRQ is disabled a discard was sent so don't move */
+ if (!irqd_irq_disabled(d))
+ its_send_movi(its_dev, target_col, id);
+
its_dev->event_map.col_map[id] = cpu;
irq_data_update_effective_affinity(d, cpumask_of(cpu));
}
@@ -3439,8 +3443,16 @@ static int its_irq_domain_activate(struct irq_domain *domain,
if (its_dev->its->numa_node >= 0)
cpu_mask = cpumask_of_node(its_dev->its->numa_node);
- /* Bind the LPI to the first possible CPU */
- cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
+ /* If the cpu set to a different CPU that is still online use it */
+ cpu = its_dev->event_map.col_map[event];
+
+ cpumask_and(cpu_mask, cpu_mask, cpu_online_mask);
+
+ if (!cpumask_test_cpu(cpu, cpu_mask)) {
+ /* Bind the LPI to the first possible CPU */
+ cpu = cpumask_first(cpu_mask);
+ }
+
if (cpu >= nr_cpu_ids) {
if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144)
return -EINVAL;
--
2.24.1.AMZN
Hi,
On 2020/5/29 9:55, Ali Saidi wrote:
> If an interrupt is disabled the ITS driver has sent a discard removing
> the DeviceID and EventID from the ITT. After this occurs it can't be
> moved to another collection with a MOVI and a command error occurs if
> attempted. Before issuing the MOVI command make sure that the IRQ isn't
> disabled and change the activate code to try and use the previous
> affinity.
>
> Signed-off-by: Ali Saidi <[email protected]>
> ---
> drivers/irqchip/irq-gic-v3-its.c | 18 +++++++++++++++---
> 1 file changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
> index 124251b0ccba..1235dd9a2fb2 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -1540,7 +1540,11 @@ static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
> /* don't set the affinity when the target cpu is same as current one */
> if (cpu != its_dev->event_map.col_map[id]) {
> target_col = &its_dev->its->collections[cpu];
> - its_send_movi(its_dev, target_col, id);
> +
> + /* If the IRQ is disabled a discard was sent so don't move */
> + if (!irqd_irq_disabled(d))
> + its_send_movi(its_dev, target_col, id);
It looks to me that if the IRQ is disabled, we mask the enable bit in
the corresponding LPI configuration table entry, but not sending DISCARD
to remove the DevID/EventID mapping. And moving a disabled LPI is
actually allowed by the GIC architecture, right?
> +
> its_dev->event_map.col_map[id] = cpu;
> irq_data_update_effective_affinity(d, cpumask_of(cpu));
> }
> @@ -3439,8 +3443,16 @@ static int its_irq_domain_activate(struct irq_domain *domain,
> if (its_dev->its->numa_node >= 0)
> cpu_mask = cpumask_of_node(its_dev->its->numa_node);
>
> - /* Bind the LPI to the first possible CPU */
> - cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
> + /* If the cpu set to a different CPU that is still online use it */
> + cpu = its_dev->event_map.col_map[event];
> +
> + cpumask_and(cpu_mask, cpu_mask, cpu_online_mask);
> +
> + if (!cpumask_test_cpu(cpu, cpu_mask)) {
> + /* Bind the LPI to the first possible CPU */
> + cpu = cpumask_first(cpu_mask);
> + }
I'd like to know what actual problem you had seen and the way to
reproduce it :-)
Thanks,
Zenghui
Hi Ali,
On 2020-05-29 02:55, Ali Saidi wrote:
> If an interrupt is disabled the ITS driver has sent a discard removing
> the DeviceID and EventID from the ITT. After this occurs it can't be
> moved to another collection with a MOVI and a command error occurs if
> attempted. Before issuing the MOVI command make sure that the IRQ isn't
> disabled and change the activate code to try and use the previous
> affinity.
>
> Signed-off-by: Ali Saidi <[email protected]>
> ---
> drivers/irqchip/irq-gic-v3-its.c | 18 +++++++++++++++---
> 1 file changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/irqchip/irq-gic-v3-its.c
> b/drivers/irqchip/irq-gic-v3-its.c
> index 124251b0ccba..1235dd9a2fb2 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -1540,7 +1540,11 @@ static int its_set_affinity(struct irq_data *d,
> const struct cpumask *mask_val,
> /* don't set the affinity when the target cpu is same as current one
> */
> if (cpu != its_dev->event_map.col_map[id]) {
> target_col = &its_dev->its->collections[cpu];
> - its_send_movi(its_dev, target_col, id);
> +
> + /* If the IRQ is disabled a discard was sent so don't move */
> + if (!irqd_irq_disabled(d))
> + its_send_movi(its_dev, target_col, id);
> +
This looks wrong. What you are testing here is whether the interrupt
is masked, not that there isn't a valid translation.
In the commit message, you're saying that we've issued a discard. This
hints at doing a set_affinity on an interrupt that has been deactivated
(mapping removed). Is that actually the case? If so, why was it
deactivated
the first place?
> its_dev->event_map.col_map[id] = cpu;
> irq_data_update_effective_affinity(d, cpumask_of(cpu));
> }
> @@ -3439,8 +3443,16 @@ static int its_irq_domain_activate(struct
> irq_domain *domain,
> if (its_dev->its->numa_node >= 0)
> cpu_mask = cpumask_of_node(its_dev->its->numa_node);
>
> - /* Bind the LPI to the first possible CPU */
> - cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
> + /* If the cpu set to a different CPU that is still online use it */
> + cpu = its_dev->event_map.col_map[event];
> +
> + cpumask_and(cpu_mask, cpu_mask, cpu_online_mask);
> +
> + if (!cpumask_test_cpu(cpu, cpu_mask)) {
> + /* Bind the LPI to the first possible CPU */
> + cpu = cpumask_first(cpu_mask);
> + }
> +
> if (cpu >= nr_cpu_ids) {
> if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144)
> return -EINVAL;
So you deactivate an interrupt, do a set_affinity that doesn't issue
a MOVI but preserves the affinity, then reactivate it and hope that
the new mapping will target the "right" CPU.
That seems a bit mad, but I presume this isn't the whole story...
Thanks,
M.
--
Jazz is not dead. It just smells funny...
Hi Marc,
> On May 29, 2020, at 3:33 AM, Marc Zyngier <[email protected]> wrote:
>
> Hi Ali,
>
>> On 2020-05-29 02:55, Ali Saidi wrote:
>> If an interrupt is disabled the ITS driver has sent a discard removing
>> the DeviceID and EventID from the ITT. After this occurs it can't be
>> moved to another collection with a MOVI and a command error occurs if
>> attempted. Before issuing the MOVI command make sure that the IRQ isn't
>> disabled and change the activate code to try and use the previous
>> affinity.
>>
>> Signed-off-by: Ali Saidi <[email protected]>
>> ---
>> drivers/irqchip/irq-gic-v3-its.c | 18 +++++++++++++++---
>> 1 file changed, 15 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/irqchip/irq-gic-v3-its.c
>> b/drivers/irqchip/irq-gic-v3-its.c
>> index 124251b0ccba..1235dd9a2fb2 100644
>> --- a/drivers/irqchip/irq-gic-v3-its.c
>> +++ b/drivers/irqchip/irq-gic-v3-its.c
>> @@ -1540,7 +1540,11 @@ static int its_set_affinity(struct irq_data *d,
>> const struct cpumask *mask_val,
>> /* don't set the affinity when the target cpu is same as current one
>> */
>> if (cpu != its_dev->event_map.col_map[id]) {
>> target_col = &its_dev->its->collections[cpu];
>> - its_send_movi(its_dev, target_col, id);
>> +
>> + /* If the IRQ is disabled a discard was sent so don't move */
>> + if (!irqd_irq_disabled(d))
>> + its_send_movi(its_dev, target_col, id);
>> +
>
> This looks wrong. What you are testing here is whether the interrupt
> is masked, not that there isn't a valid translation.
I’m not exactly sure the correct condition, but what I’m looking for is interrupts which are deactivated and we have thus sent a discard.
>
> In the commit message, you're saying that we've issued a discard. This
> hints at doing a set_affinity on an interrupt that has been deactivated
> (mapping removed). Is that actually the case? If so, why was it
> deactivated
> the first place?
This is the case. If we down a NIC, that interface’s MSIs will be deactivated but remain allocated until the device is unbound from the driver or the NIC is brought up.
While stressing down/up a device I’ve found that irqbalance can move interrupts and you end up with the situation described. The device is downed, the interrupts are deactivated but still present and then trying to move one results in sending a MOVI after the DISCARD which is an error per the GIC spec.
>
>> its_dev->event_map.col_map[id] = cpu;
>> irq_data_update_effective_affinity(d, cpumask_of(cpu));
>> }
>> @@ -3439,8 +3443,16 @@ static int its_irq_domain_activate(struct
>> irq_domain *domain,
>> if (its_dev->its->numa_node >= 0)
>> cpu_mask = cpumask_of_node(its_dev->its->numa_node);
>>
>> - /* Bind the LPI to the first possible CPU */
>> - cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
>> + /* If the cpu set to a different CPU that is still online use it */
>> + cpu = its_dev->event_map.col_map[event];
>> +
>> + cpumask_and(cpu_mask, cpu_mask, cpu_online_mask);
>> +
>> + if (!cpumask_test_cpu(cpu, cpu_mask)) {
>> + /* Bind the LPI to the first possible CPU */
>> + cpu = cpumask_first(cpu_mask);
>> + }
>> +
>> if (cpu >= nr_cpu_ids) {
>> if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144)
>> return -EINVAL;
>
> So you deactivate an interrupt, do a set_affinity that doesn't issue
> a MOVI but preserves the affinity, then reactivate it and hope that
> the new mapping will target the "right" CPU.
>
> That seems a bit mad, but I presume this isn't the whole story...
Doing some experiments it appears as though other interrupts controllers do preserve affinity across deactivate/activate, so this is my attempt at doing the same.
Thanks,
Ali
Hi Ali,
On Fri, 29 May 2020 12:36:42 +0000
"Saidi, Ali" <[email protected]> wrote:
> Hi Marc,
>
> > On May 29, 2020, at 3:33 AM, Marc Zyngier <[email protected]> wrote:
> >
> > Hi Ali,
> >
> >> On 2020-05-29 02:55, Ali Saidi wrote:
> >> If an interrupt is disabled the ITS driver has sent a discard removing
> >> the DeviceID and EventID from the ITT. After this occurs it can't be
> >> moved to another collection with a MOVI and a command error occurs if
> >> attempted. Before issuing the MOVI command make sure that the IRQ isn't
> >> disabled and change the activate code to try and use the previous
> >> affinity.
> >>
> >> Signed-off-by: Ali Saidi <[email protected]>
> >> ---
> >> drivers/irqchip/irq-gic-v3-its.c | 18 +++++++++++++++---
> >> 1 file changed, 15 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/irqchip/irq-gic-v3-its.c
> >> b/drivers/irqchip/irq-gic-v3-its.c
> >> index 124251b0ccba..1235dd9a2fb2 100644
> >> --- a/drivers/irqchip/irq-gic-v3-its.c
> >> +++ b/drivers/irqchip/irq-gic-v3-its.c
> >> @@ -1540,7 +1540,11 @@ static int its_set_affinity(struct irq_data *d,
> >> const struct cpumask *mask_val,
> >> /* don't set the affinity when the target cpu is same as current one
> >> */
> >> if (cpu != its_dev->event_map.col_map[id]) {
> >> target_col = &its_dev->its->collections[cpu];
> >> - its_send_movi(its_dev, target_col, id);
> >> +
> >> + /* If the IRQ is disabled a discard was sent so don't move */
> >> + if (!irqd_irq_disabled(d))
> >> + its_send_movi(its_dev, target_col, id);
> >> +
> >
> > This looks wrong. What you are testing here is whether the interrupt
> > is masked, not that there isn't a valid translation.
> I’m not exactly sure the correct condition, but what I’m looking for
> is interrupts which are deactivated and we have thus sent a discard.
That looks like IRQD_IRQ_STARTED not being set in this case.
> >
> > In the commit message, you're saying that we've issued a discard.
> > This hints at doing a set_affinity on an interrupt that has been
> > deactivated (mapping removed). Is that actually the case? If so,
> > why was it deactivated
> > the first place?
> This is the case. If we down a NIC, that interface’s MSIs will be
> deactivated but remain allocated until the device is unbound from the
> driver or the NIC is brought up.
>
> While stressing down/up a device I’ve found that irqbalance can move
> interrupts and you end up with the situation described. The device is
> downed, the interrupts are deactivated but still present and then
> trying to move one results in sending a MOVI after the DISCARD which
> is an error per the GIC spec.
Not great indeed. But this is not, as far as I can tell, a GIC
driver problem.
The semantic of activate/deactivate (which maps to started/shutdown
in the IRQ code) is that the HW resources for a given interrupt are
only committed when the interrupt is activated. Trying to perform
actions involving the HW on an interrupt that isn't active cannot be
guaranteed to take effect.
I'd rather address it in the core code, by preventing set_affinity (and
potentially others) to take place when the interrupt is not in the
STARTED state. Userspace would get an error, which is perfectly
legitimate, and which it already has to deal with it for plenty of other
reasons.
>
> >
> >> its_dev->event_map.col_map[id] = cpu;
> >> irq_data_update_effective_affinity(d,
> >> cpumask_of(cpu)); }
> >> @@ -3439,8 +3443,16 @@ static int its_irq_domain_activate(struct
> >> irq_domain *domain,
> >> if (its_dev->its->numa_node >= 0)
> >> cpu_mask = cpumask_of_node(its_dev->its->numa_node);
> >>
> >> - /* Bind the LPI to the first possible CPU */
> >> - cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
> >> + /* If the cpu set to a different CPU that is still online
> >> use it */
> >> + cpu = its_dev->event_map.col_map[event];
> >> +
> >> + cpumask_and(cpu_mask, cpu_mask, cpu_online_mask);
> >> +
> >> + if (!cpumask_test_cpu(cpu, cpu_mask)) {
> >> + /* Bind the LPI to the first possible CPU */
> >> + cpu = cpumask_first(cpu_mask);
> >> + }
> >> +
> >> if (cpu >= nr_cpu_ids) {
> >> if (its_dev->its->flags &
> >> ITS_FLAGS_WORKAROUND_CAVIUM_23144) return -EINVAL;
> >
> > So you deactivate an interrupt, do a set_affinity that doesn't issue
> > a MOVI but preserves the affinity, then reactivate it and hope that
> > the new mapping will target the "right" CPU.
> >
> > That seems a bit mad, but I presume this isn't the whole story...
> Doing some experiments it appears as though other interrupts
> controllers do preserve affinity across deactivate/activate, so this
> is my attempt at doing the same.
I believe this is only an artefact of these other controllers not
requiring any resource to be committed into the HW (SPIs wouldn't care,
for example).
Thanks,
M.
--
Jazz is not dead. It just smells funny...
Hi Ali,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on linus/master]
[also build test WARNING on linux/master v5.7-rc7]
[cannot apply to tip/irq/core arm-jcooper/irqchip/for-next next-20200529]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Ali-Saidi/irqchip-gic-v3-its-Don-t-try-to-move-a-disabled-irq/20200531-043957
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 86852175b016f0c6873dcbc24b93d12b7b246612
config: arm64-allyesconfig (attached as .config)
compiler: aarch64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arm64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <[email protected]>
All warnings (new ones prefixed by >>, old ones prefixed by <<):
drivers/irqchip/irq-gic-v3-its.c: In function 'its_irq_domain_activate':
>> drivers/irqchip/irq-gic-v3-its.c:3449:14: warning: passing argument 1 of 'cpumask_and' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers]
3449 | cpumask_and(cpu_mask, cpu_mask, cpu_online_mask);
| ^~~~~~~~
In file included from include/linux/rcupdate.h:31,
from include/linux/radix-tree.h:15,
from include/linux/idr.h:15,
from include/linux/kernfs.h:13,
from include/linux/sysfs.h:16,
from include/linux/kobject.h:20,
from include/linux/of.h:17,
from include/linux/irqdomain.h:35,
from include/linux/acpi.h:13,
from drivers/irqchip/irq-gic-v3-its.c:7:
include/linux/cpumask.h:424:47: note: expected 'struct cpumask *' but argument is of type 'const struct cpumask *'
424 | static inline int cpumask_and(struct cpumask *dstp,
| ~~~~~~~~~~~~~~~~^~~~
In file included from include/linux/bits.h:23,
from include/linux/ioport.h:15,
from include/linux/acpi.h:12,
from drivers/irqchip/irq-gic-v3-its.c:7:
drivers/irqchip/irq-gic-v3-its.c: In function 'its_init_vpe_domain':
include/linux/bits.h:26:28: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits]
26 | __builtin_constant_p((l) > (h)), (l) > (h), 0)))
| ^
include/linux/build_bug.h:16:62: note: in definition of macro 'BUILD_BUG_ON_ZERO'
16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
| ^
include/linux/bits.h:39:3: note: in expansion of macro 'GENMASK_INPUT_CHECK'
39 | (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
| ^~~~~~~~~~~~~~~~~~~
drivers/irqchip/irq-gic-v3-its.c:4765:10: note: in expansion of macro 'GENMASK'
4765 | devid = GENMASK(device_ids(its) - 1, 0);
| ^~~~~~~
include/linux/bits.h:26:40: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits]
26 | __builtin_constant_p((l) > (h)), (l) > (h), 0)))
| ^
include/linux/build_bug.h:16:62: note: in definition of macro 'BUILD_BUG_ON_ZERO'
16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
| ^
include/linux/bits.h:39:3: note: in expansion of macro 'GENMASK_INPUT_CHECK'
39 | (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
| ^~~~~~~~~~~~~~~~~~~
drivers/irqchip/irq-gic-v3-its.c:4765:10: note: in expansion of macro 'GENMASK'
4765 | devid = GENMASK(device_ids(its) - 1, 0);
| ^~~~~~~
vim +3449 drivers/irqchip/irq-gic-v3-its.c
3433
3434 static int its_irq_domain_activate(struct irq_domain *domain,
3435 struct irq_data *d, bool reserve)
3436 {
3437 struct its_device *its_dev = irq_data_get_irq_chip_data(d);
3438 u32 event = its_get_event_id(d);
3439 const struct cpumask *cpu_mask = cpu_online_mask;
3440 int cpu;
3441
3442 /* get the cpu_mask of local node */
3443 if (its_dev->its->numa_node >= 0)
3444 cpu_mask = cpumask_of_node(its_dev->its->numa_node);
3445
3446 /* If the cpu set to a different CPU that is still online use it */
3447 cpu = its_dev->event_map.col_map[event];
3448
> 3449 cpumask_and(cpu_mask, cpu_mask, cpu_online_mask);
3450
3451 if (!cpumask_test_cpu(cpu, cpu_mask)) {
3452 /* Bind the LPI to the first possible CPU */
3453 cpu = cpumask_first(cpu_mask);
3454 }
3455
3456 if (cpu >= nr_cpu_ids) {
3457 if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144)
3458 return -EINVAL;
3459
3460 cpu = cpumask_first(cpu_online_mask);
3461 }
3462
3463 its_dev->event_map.col_map[event] = cpu;
3464 irq_data_update_effective_affinity(d, cpumask_of(cpu));
3465
3466 /* Map the GIC IRQ and event to the device */
3467 its_send_mapti(its_dev, d->hwirq, event);
3468 return 0;
3469 }
3470
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
On 2020-05-30 17:49, Marc Zyngier wrote:
> Hi Ali,
>
> On Fri, 29 May 2020 12:36:42 +0000
> "Saidi, Ali" <[email protected]> wrote:
>
>> Hi Marc,
>>
>> > On May 29, 2020, at 3:33 AM, Marc Zyngier <[email protected]> wrote:
>> >
>> > Hi Ali,
>> >
>> >> On 2020-05-29 02:55, Ali Saidi wrote:
>> >> If an interrupt is disabled the ITS driver has sent a discard removing
>> >> the DeviceID and EventID from the ITT. After this occurs it can't be
>> >> moved to another collection with a MOVI and a command error occurs if
>> >> attempted. Before issuing the MOVI command make sure that the IRQ isn't
>> >> disabled and change the activate code to try and use the previous
>> >> affinity.
>> >>
>> >> Signed-off-by: Ali Saidi <[email protected]>
>> >> ---
>> >> drivers/irqchip/irq-gic-v3-its.c | 18 +++++++++++++++---
>> >> 1 file changed, 15 insertions(+), 3 deletions(-)
>> >>
>> >> diff --git a/drivers/irqchip/irq-gic-v3-its.c
>> >> b/drivers/irqchip/irq-gic-v3-its.c
>> >> index 124251b0ccba..1235dd9a2fb2 100644
>> >> --- a/drivers/irqchip/irq-gic-v3-its.c
>> >> +++ b/drivers/irqchip/irq-gic-v3-its.c
>> >> @@ -1540,7 +1540,11 @@ static int its_set_affinity(struct irq_data *d,
>> >> const struct cpumask *mask_val,
>> >> /* don't set the affinity when the target cpu is same as current one
>> >> */
>> >> if (cpu != its_dev->event_map.col_map[id]) {
>> >> target_col = &its_dev->its->collections[cpu];
>> >> - its_send_movi(its_dev, target_col, id);
>> >> +
>> >> + /* If the IRQ is disabled a discard was sent so don't move */
>> >> + if (!irqd_irq_disabled(d))
>> >> + its_send_movi(its_dev, target_col, id);
>> >> +
>> >
>> > This looks wrong. What you are testing here is whether the interrupt
>> > is masked, not that there isn't a valid translation.
>> I’m not exactly sure the correct condition, but what I’m looking for
>> is interrupts which are deactivated and we have thus sent a discard.
>
> That looks like IRQD_IRQ_STARTED not being set in this case.
>
>> >
>> > In the commit message, you're saying that we've issued a discard.
>> > This hints at doing a set_affinity on an interrupt that has been
>> > deactivated (mapping removed). Is that actually the case? If so,
>> > why was it deactivated
>> > the first place?
>> This is the case. If we down a NIC, that interface’s MSIs will be
>> deactivated but remain allocated until the device is unbound from the
>> driver or the NIC is brought up.
>>
>> While stressing down/up a device I’ve found that irqbalance can move
>> interrupts and you end up with the situation described. The device is
>> downed, the interrupts are deactivated but still present and then
>> trying to move one results in sending a MOVI after the DISCARD which
>> is an error per the GIC spec.
>
> Not great indeed. But this is not, as far as I can tell, a GIC
> driver problem.
>
> The semantic of activate/deactivate (which maps to started/shutdown
> in the IRQ code) is that the HW resources for a given interrupt are
> only committed when the interrupt is activated. Trying to perform
> actions involving the HW on an interrupt that isn't active cannot be
> guaranteed to take effect.
>
> I'd rather address it in the core code, by preventing set_affinity (and
> potentially others) to take place when the interrupt is not in the
> STARTED state. Userspace would get an error, which is perfectly
> legitimate, and which it already has to deal with it for plenty of
> other
> reasons.
How about this:
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 453a8a0f4804..1a2ac1392c0f 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -147,7 +147,8 @@ cpumask_var_t irq_default_affinity;
static bool __irq_can_set_affinity(struct irq_desc *desc)
{
if (!desc || !irqd_can_balance(&desc->irq_data) ||
- !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity)
+ !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity ||
+ !irqd_is_started(&desc->irq_data))
return false;
return true;
}
Thanks,
M.
--
Jazz is not dead. It just smells funny...
Marc,
> On May 31, 2020, at 6:10 AM, Marc Zyngier <[email protected]> wrote:
>> Not great indeed. But this is not, as far as I can tell, a GIC
>> driver problem.
>>
>> The semantic of activate/deactivate (which maps to started/shutdown
>> in the IRQ code) is that the HW resources for a given interrupt are
>> only committed when the interrupt is activated. Trying to perform
>> actions involving the HW on an interrupt that isn't active cannot be
>> guaranteed to take effect.
Yes, then it absolutely makes sense to address it outside the GIC.
>>
>> I'd rather address it in the core code, by preventing set_affinity (and
>> potentially others) to take place when the interrupt is not in the
>> STARTED state. Userspace would get an error, which is perfectly
>> legitimate, and which it already has to deal with it for plenty of
>> other
>> reasons.
>
> How about this:
>
> diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
> index 453a8a0f4804..1a2ac1392c0f 100644
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -147,7 +147,8 @@ cpumask_var_t irq_default_affinity;
> static bool __irq_can_set_affinity(struct irq_desc *desc)
> {
> if (!desc || !irqd_can_balance(&desc->irq_data) ||
> - !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity)
> + !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity ||
> + !irqd_is_started(&desc->irq_data))
> return false;
> return true;
> }
Confirmed I can’t reproduce the issue with your fix.
Thanks,
Ali
On Sun, 2020-05-31 at 12:09 +0100, Marc Zyngier wrote:
>
>
> > Not great indeed. But this is not, as far as I can tell, a GIC
> > driver problem.
> >
> > The semantic of activate/deactivate (which maps to started/shutdown
> > in the IRQ code) is that the HW resources for a given interrupt are
> > only committed when the interrupt is activated. Trying to perform
> > actions involving the HW on an interrupt that isn't active cannot be
> > guaranteed to take effect.
> >
> > I'd rather address it in the core code, by preventing set_affinity (and
> > potentially others) to take place when the interrupt is not in the
> > STARTED state. Userspace would get an error, which is perfectly
> > legitimate, and which it already has to deal with it for plenty of
> > other
> > reasons.
So I finally found time to dig a bit in there :) Code has changed a bit
since last I looked. But I have memories of the startup code messing
around with the affinity, and here it is. In irq_startup() :
switch (__irq_startup_managed(desc, aff, force)) {
case IRQ_STARTUP_NORMAL:
ret = __irq_startup(desc);
irq_setup_affinity(desc);
break;
case IRQ_STARTUP_MANAGED:
irq_do_set_affinity(d, aff, false);
ret = __irq_startup(desc);
break;
case IRQ_STARTUP_ABORT:
irqd_set_managed_shutdown(d);
return 0;
So we have two cases here. Normal and managed.
In the managed case, we set the affinity before startup. I feel like your
patch might break that or am I missing something ?
Additionally, your patch would break any userspace program that expects to
be able to change the affinity on an interrupt before it's been started.
I don't know if such a thing exsits but the fact that we hit that bug
makes me think it might.
Now most controller drivers (at least that I'm familiar with, which doesn't
include GiC at this point) can deal with that just fine.
Now there's also another possible issue:
Your patch checks irqd_is_started(). Now I always mixup irqd vs irq_state these
days so I may be wrong but irq_state_set_started() is only done in __irq_startup
which will *not* be called if the interrupt has NOAUTOEN.
Is that ok ? Do we intend for affinity setting not to work until the first
enable_irq() for such an interrupt ? We could check activated instead of
started I suppose. (again provided I didn't mixup two different things
between the irqd and the irq_state stuff).
For these reasons my gut feeling is we should just fix GIC as Ali wanted to
do initially.
The basic idea is simply to defer the HW configuration until the interrupt
has been started. I don't see why that would be an issue. Have set_affinity just
store the mask (and apply whatever other sanity checking it might want to do)
until the itnerrupt is started and when started, apply things to HW.
I might be missing a reason why it's more complicated than that :) But I do
feel a bit uncomfortable with your approach.
Cheers,
Ben.
On 5/31/20, 9:40 PM, "Herrenschmidt, Benjamin" <[email protected]> wrote:
On Sun, 2020-05-31 at 12:09 +0100, Marc Zyngier wrote:
>
>
> > Not great indeed. But this is not, as far as I can tell, a GIC
> > driver problem.
> >
> > The semantic of activate/deactivate (which maps to started/shutdown
> > in the IRQ code) is that the HW resources for a given interrupt are
> > only committed when the interrupt is activated. Trying to perform
> > actions involving the HW on an interrupt that isn't active cannot be
> > guaranteed to take effect.
> >
> > I'd rather address it in the core code, by preventing set_affinity (and
> > potentially others) to take place when the interrupt is not in the
> > STARTED state. Userspace would get an error, which is perfectly
> > legitimate, and which it already has to deal with it for plenty of
> > other
> > reasons.
So I finally found time to dig a bit in there :) Code has changed a bit
since last I looked. But I have memories of the startup code messing
around with the affinity, and here it is. In irq_startup() :
switch (__irq_startup_managed(desc, aff, force)) {
case IRQ_STARTUP_NORMAL:
ret = __irq_startup(desc);
irq_setup_affinity(desc);
break;
case IRQ_STARTUP_MANAGED:
irq_do_set_affinity(d, aff, false);
ret = __irq_startup(desc);
break;
case IRQ_STARTUP_ABORT:
irqd_set_managed_shutdown(d);
return 0;
So we have two cases here. Normal and managed.
In the managed case, we set the affinity before startup. I feel like your
patch might break that or am I missing something ?
Additionally, your patch would break any userspace program that expects to
be able to change the affinity on an interrupt before it's been started.
I don't know if such a thing exsits but the fact that we hit that bug
makes me think it might.
Now most controller drivers (at least that I'm familiar with, which doesn't
include GiC at this point) can deal with that just fine.
Now there's also another possible issue:
Your patch checks irqd_is_started(). Now I always mixup irqd vs irq_state these
days so I may be wrong but irq_state_set_started() is only done in __irq_startup
which will *not* be called if the interrupt has NOAUTOEN.
Is that ok ? Do we intend for affinity setting not to work until the first
enable_irq() for such an interrupt ? We could check activated instead of
started I suppose. (again provided I didn't mixup two different things
between the irqd and the irq_state stuff).
For these reasons my gut feeling is we should just fix GIC as Ali wanted to
do initially.
The basic idea is simply to defer the HW configuration until the interrupt
has been started. I don't see why that would be an issue. Have set_affinity just
store the mask (and apply whatever other sanity checking it might want to do)
until the itnerrupt is started and when started, apply things to HW.
I might be missing a reason why it's more complicated than that :) But I do
feel a bit uncomfortable with your approach.
Looks like the x86 apic set_affinity call explicitly checks for if it’s activated in the managed case which makes sense given the code Ben posted above:
/*
* Core code can call here for inactive interrupts. For inactive
* interrupts which use managed or reservation mode there is no
* point in going through the vector assignment right now as the
* activation will assign a vector which fits the destination
* cpumask. Let the core code store the destination mask and be
* done with it.
*/
if (!irqd_is_activated(irqd) &&
(apicd->is_managed || apicd->can_reserve))
My original patch should certain check activated and not disabled. With that do you still have reservations Marc?
Thanks,
Ali
"Herrenschmidt, Benjamin" <[email protected]> writes:
> On Sun, 2020-05-31 at 12:09 +0100, Marc Zyngier wrote:
>> > The semantic of activate/deactivate (which maps to started/shutdown
>> > in the IRQ code) is that the HW resources for a given interrupt are
>> > only committed when the interrupt is activated. Trying to perform
>> > actions involving the HW on an interrupt that isn't active cannot be
>> > guaranteed to take effect.
>> >
>> > I'd rather address it in the core code, by preventing set_affinity (and
>> > potentially others) to take place when the interrupt is not in the
>> > STARTED state. Userspace would get an error, which is perfectly
>> > legitimate, and which it already has to deal with it for plenty of
>> > other
>> > reasons.
>
> So I finally found time to dig a bit in there :) Code has changed a bit
> since last I looked. But I have memories of the startup code messing
> around with the affinity, and here it is. In irq_startup() :
>
>
> switch (__irq_startup_managed(desc, aff, force)) {
> case IRQ_STARTUP_NORMAL:
> ret = __irq_startup(desc);
> irq_setup_affinity(desc);
> break;
> case IRQ_STARTUP_MANAGED:
> irq_do_set_affinity(d, aff, false);
> ret = __irq_startup(desc);
> break;
> case IRQ_STARTUP_ABORT:
> irqd_set_managed_shutdown(d);
> return 0;
>
> So we have two cases here. Normal and managed.
>
> In the managed case, we set the affinity before startup. I feel like your
> patch might break that or am I missing something ?
It will break stuff because the affinity is not stored in case that the
interrupt is not started.
I think we can fix this in the core code but that needs more thought.
__irq_can_set_affinity() is definitely the wrong place.
Thanks,
tglx
On 2020-06-02 21:54, Thomas Gleixner wrote:
> "Herrenschmidt, Benjamin" <[email protected]> writes:
>> On Sun, 2020-05-31 at 12:09 +0100, Marc Zyngier wrote:
>>> > The semantic of activate/deactivate (which maps to started/shutdown
>>> > in the IRQ code) is that the HW resources for a given interrupt are
>>> > only committed when the interrupt is activated. Trying to perform
>>> > actions involving the HW on an interrupt that isn't active cannot be
>>> > guaranteed to take effect.
>>> >
>>> > I'd rather address it in the core code, by preventing set_affinity (and
>>> > potentially others) to take place when the interrupt is not in the
>>> > STARTED state. Userspace would get an error, which is perfectly
>>> > legitimate, and which it already has to deal with it for plenty of
>>> > other
>>> > reasons.
>>
>> So I finally found time to dig a bit in there :) Code has changed a
>> bit
>> since last I looked. But I have memories of the startup code messing
>> around with the affinity, and here it is. In irq_startup() :
>>
>>
>> switch (__irq_startup_managed(desc, aff, force)) {
>> case IRQ_STARTUP_NORMAL:
>> ret = __irq_startup(desc);
>> irq_setup_affinity(desc);
>> break;
>> case IRQ_STARTUP_MANAGED:
>> irq_do_set_affinity(d, aff, false);
>> ret = __irq_startup(desc);
Grump. Nice catch. In hindsight, this is obvious, as managed interrupts
may have been allocated to target CPUs that have been hot-plugged off.
>> break;
>> case IRQ_STARTUP_ABORT:
>> irqd_set_managed_shutdown(d);
>> return 0;
>>
>> So we have two cases here. Normal and managed.
>>
>> In the managed case, we set the affinity before startup. I feel like
>> your
>> patch might break that or am I missing something ?
>
> It will break stuff because the affinity is not stored in case that the
> interrupt is not started.
>
> I think we can fix this in the core code but that needs more thought.
> __irq_can_set_affinity() is definitely the wrong place.
Indeed. I completely missed the above. Back to square one.
Thanks,
M.
--
Jazz is not dead. It just smells funny...
On 2020-06-02 19:47, Saidi, Ali wrote:
[...]
> Looks like the x86 apic set_affinity call explicitly checks for if
> it’s activated in the managed case which makes sense given the code
> Ben posted above:
> /*
> * Core code can call here for inactive interrupts. For
> inactive
> * interrupts which use managed or reservation mode there is
> no
> * point in going through the vector assignment right now as
> the
> * activation will assign a vector which fits the destination
> * cpumask. Let the core code store the destination mask and
> be
> * done with it.
> */
> if (!irqd_is_activated(irqd) &&
> (apicd->is_managed || apicd->can_reserve))
>
> My original patch should certain check activated and not disabled.
> With that do you still have reservations Marc?
I'd still prefer it if we could do something in core code, rather
than spreading these checks in the individual drivers. If we can't,
fair enough. But it feels like the core set_affinity function could
just do the same thing in a single place (although the started vs
activated is yet another piece of the puzzle I didn't consider,
and the ITS doesn't need the "can_reserve" thing).
Thanks,
M.
--
Jazz is not dead. It just smells funny...
On Wed, 2020-06-03 at 16:16 +0100, Marc Zyngier wrote:
> > My original patch should certain check activated and not disabled.
> > With that do you still have reservations Marc?
>
> I'd still prefer it if we could do something in core code, rather
> than spreading these checks in the individual drivers. If we can't,
> fair enough. But it feels like the core set_affinity function could
> just do the same thing in a single place (although the started vs
> activated is yet another piece of the puzzle I didn't consider,
> and the ITS doesn't need the "can_reserve" thing).
For the sake of fixing the problem in a timely and backportable way I
would suggest first merging the fix, *then* fixing the core core.
Cheers,
Ben.
"Herrenschmidt, Benjamin" <[email protected]> writes:
> On Wed, 2020-06-03 at 16:16 +0100, Marc Zyngier wrote:
>> > My original patch should certain check activated and not disabled.
>> > With that do you still have reservations Marc?
>>
>> I'd still prefer it if we could do something in core code, rather
>> than spreading these checks in the individual drivers. If we can't,
>> fair enough. But it feels like the core set_affinity function could
>> just do the same thing in a single place (although the started vs
>> activated is yet another piece of the puzzle I didn't consider,
>> and the ITS doesn't need the "can_reserve" thing).
>
> For the sake of fixing the problem in a timely and backportable way I
> would suggest first merging the fix, *then* fixing the core core.
The "fix" is just wrong
> if (cpu != its_dev->event_map.col_map[id]) {
> target_col = &its_dev->its->collections[cpu];
> - its_send_movi(its_dev, target_col, id);
> +
> + /* If the IRQ is disabled a discard was sent so don't move */
> + if (!irqd_irq_disabled(d))
That check needs to be !irqd_is_activated() because enable_irq() does
not touch anything affinity related.
> + its_send_movi(its_dev, target_col, id);
> +
> its_dev->event_map.col_map[id] = cpu;
> irq_data_update_effective_affinity(d, cpumask_of(cpu));
And then these associtations are disconnected from reality in any case.
Something like the completely untested patch below should work.
Thanks,
tglx
---
arch/x86/kernel/apic/vector.c | 21 +++------------------
kernel/irq/manage.c | 37 +++++++++++++++++++++++++++++++++++--
2 files changed, 38 insertions(+), 20 deletions(-)
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -446,12 +446,10 @@ static int x86_vector_activate(struct ir
trace_vector_activate(irqd->irq, apicd->is_managed,
apicd->can_reserve, reserve);
- /* Nothing to do for fixed assigned vectors */
- if (!apicd->can_reserve && !apicd->is_managed)
- return 0;
-
raw_spin_lock_irqsave(&vector_lock, flags);
- if (reserve || irqd_is_managed_and_shutdown(irqd))
+ if (!apicd->can_reserve && !apicd->is_managed)
+ assign_irq_vector_any_locked(irqd);
+ else if (reserve || irqd_is_managed_and_shutdown(irqd))
vector_assign_managed_shutdown(irqd);
else if (apicd->is_managed)
ret = activate_managed(irqd);
@@ -775,21 +773,8 @@ void lapic_offline(void)
static int apic_set_affinity(struct irq_data *irqd,
const struct cpumask *dest, bool force)
{
- struct apic_chip_data *apicd = apic_chip_data(irqd);
int err;
- /*
- * Core code can call here for inactive interrupts. For inactive
- * interrupts which use managed or reservation mode there is no
- * point in going through the vector assignment right now as the
- * activation will assign a vector which fits the destination
- * cpumask. Let the core code store the destination mask and be
- * done with it.
- */
- if (!irqd_is_activated(irqd) &&
- (apicd->is_managed || apicd->can_reserve))
- return IRQ_SET_MASK_OK;
-
raw_spin_lock(&vector_lock);
cpumask_and(vector_searchmask, dest, cpu_online_mask);
if (irqd_affinity_is_managed(irqd))
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -195,9 +195,9 @@ void irq_set_thread_affinity(struct irq_
set_bit(IRQTF_AFFINITY, &action->thread_flags);
}
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
static void irq_validate_effective_affinity(struct irq_data *data)
{
-#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
const struct cpumask *m = irq_data_get_effective_affinity_mask(data);
struct irq_chip *chip = irq_data_get_irq_chip(data);
@@ -205,9 +205,19 @@ static void irq_validate_effective_affin
return;
pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n",
chip->name, data->irq);
-#endif
}
+static inline void irq_init_effective_affinity(struct irq_data *data,
+ const struct cpumask *mask)
+{
+ cpumask_copy(irq_data_get_effective_affinity_mask(data), mask);
+}
+#else
+static inline void irq_validate_effective_affinity(struct irq_data *data) { }
+static inline boot irq_init_effective_affinity(struct irq_data *data,
+ const struct cpumask *mask) { }
+#endif
+
int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
{
@@ -304,6 +314,26 @@ static int irq_try_set_affinity(struct i
return ret;
}
+static bool irq_set_affinity_deactivated(struct irq_data *data,
+ const struct cpumask *mask, bool force)
+{
+ struct irq_desc *desc = irq_data_to_desc(data);
+
+ /*
+ * If the interrupt is not yet activated, just store the affinity
+ * mask and do not call the chip driver at all. On activation the
+ * driver has to make sure anyway that the interrupt is in a
+ * useable state so startup works.
+ */
+ if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data))
+ return false;
+
+ cpumask_copy(desc->irq_common_data.affinity, mask);
+ irq_init_effective_affinity(data, mask);
+ irqd_set(data, IRQD_AFFINITY_SET);
+ return true;
+}
+
int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
bool force)
{
@@ -314,6 +344,9 @@ int irq_set_affinity_locked(struct irq_d
if (!chip || !chip->irq_set_affinity)
return -EINVAL;
+ if (irq_set_affinity_deactivated(data, mask, force))
+ return 0;
+
if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) {
ret = irq_try_set_affinity(data, mask, force);
} else {
On Mon, 2020-06-08 at 15:48 +0200, Thomas Gleixner wrote:
> "Herrenschmidt, Benjamin" <[email protected]> writes:
> > On Wed, 2020-06-03 at 16:16 +0100, Marc Zyngier wrote:
> > > > My original patch should certain check activated and not disabled.
> > > > With that do you still have reservations Marc?
> > >
> > > I'd still prefer it if we could do something in core code, rather
> > > than spreading these checks in the individual drivers. If we can't,
> > > fair enough. But it feels like the core set_affinity function could
> > > just do the same thing in a single place (although the started vs
> > > activated is yet another piece of the puzzle I didn't consider,
> > > and the ITS doesn't need the "can_reserve" thing).
> >
> > For the sake of fixing the problem in a timely and backportable way I
> > would suggest first merging the fix, *then* fixing the core core.
>
> The "fix" is just wrong
>
> > if (cpu != its_dev->event_map.col_map[id]) {
> > target_col = &its_dev->its->collections[cpu];
> > - its_send_movi(its_dev, target_col, id);
> > +
> > + /* If the IRQ is disabled a discard was sent so don't move */
> > + if (!irqd_irq_disabled(d))
>
> That check needs to be !irqd_is_activated() because enable_irq() does
> not touch anything affinity related.
Right. Note: other drivers (like arch/powerpc/sysdev/xive/common.c
use irqd_is_started() ... this gets confusing :)
> > + its_send_movi(its_dev, target_col, id);
> > +
> > its_dev->event_map.col_map[id] = cpu;
> > irq_data_update_effective_affinity(d, cpumask_of(cpu));
>
> And then these associtations are disconnected from reality in any case.
Not sure what you mean here, that said...
> Something like the completely untested patch below should work.
Ok. One possible issue though is before, the driver always had the
opportunity to "vet" the affinity mask for whatever platform
constraints may be there and change it before applying it. This is no
longer the case on a deactivated interrupt with your patch as far as I
can tell. I don't know if that is a problem and if drivers that do that
have what it takes to "fixup" the affinity at startup time, the ones I
wrote don't need that feature, but...
Cheers,
Ben.
> Thanks,
>
> tglx
>
> ---
> arch/x86/kernel/apic/vector.c | 21 +++------------------
> kernel/irq/manage.c | 37 +++++++++++++++++++++++++++++++++++--
> 2 files changed, 38 insertions(+), 20 deletions(-)
>
> --- a/arch/x86/kernel/apic/vector.c
> +++ b/arch/x86/kernel/apic/vector.c
> @@ -446,12 +446,10 @@ static int x86_vector_activate(struct ir
> trace_vector_activate(irqd->irq, apicd->is_managed,
> apicd->can_reserve, reserve);
>
> - /* Nothing to do for fixed assigned vectors */
> - if (!apicd->can_reserve && !apicd->is_managed)
> - return 0;
> -
> raw_spin_lock_irqsave(&vector_lock, flags);
> - if (reserve || irqd_is_managed_and_shutdown(irqd))
> + if (!apicd->can_reserve && !apicd->is_managed)
> + assign_irq_vector_any_locked(irqd);
> + else if (reserve || irqd_is_managed_and_shutdown(irqd))
> vector_assign_managed_shutdown(irqd);
> else if (apicd->is_managed)
> ret = activate_managed(irqd);
> @@ -775,21 +773,8 @@ void lapic_offline(void)
> static int apic_set_affinity(struct irq_data *irqd,
> const struct cpumask *dest, bool force)
> {
> - struct apic_chip_data *apicd = apic_chip_data(irqd);
> int err;
>
> - /*
> - * Core code can call here for inactive interrupts. For inactive
> - * interrupts which use managed or reservation mode there is no
> - * point in going through the vector assignment right now as the
> - * activation will assign a vector which fits the destination
> - * cpumask. Let the core code store the destination mask and be
> - * done with it.
> - */
> - if (!irqd_is_activated(irqd) &&
> - (apicd->is_managed || apicd->can_reserve))
> - return IRQ_SET_MASK_OK;
> -
> raw_spin_lock(&vector_lock);
> cpumask_and(vector_searchmask, dest, cpu_online_mask);
> if (irqd_affinity_is_managed(irqd))
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -195,9 +195,9 @@ void irq_set_thread_affinity(struct irq_
> set_bit(IRQTF_AFFINITY, &action->thread_flags);
> }
>
> +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
> static void irq_validate_effective_affinity(struct irq_data *data)
> {
> -#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
> const struct cpumask *m = irq_data_get_effective_affinity_mask(data);
> struct irq_chip *chip = irq_data_get_irq_chip(data);
>
> @@ -205,9 +205,19 @@ static void irq_validate_effective_affin
> return;
> pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n",
> chip->name, data->irq);
> -#endif
> }
>
> +static inline void irq_init_effective_affinity(struct irq_data *data,
> + const struct cpumask *mask)
> +{
> + cpumask_copy(irq_data_get_effective_affinity_mask(data), mask);
> +}
> +#else
> +static inline void irq_validate_effective_affinity(struct irq_data *data) { }
> +static inline boot irq_init_effective_affinity(struct irq_data *data,
> + const struct cpumask *mask) { }
> +#endif
> +
> int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
> bool force)
> {
> @@ -304,6 +314,26 @@ static int irq_try_set_affinity(struct i
> return ret;
> }
>
> +static bool irq_set_affinity_deactivated(struct irq_data *data,
> + const struct cpumask *mask, bool force)
> +{
> + struct irq_desc *desc = irq_data_to_desc(data);
> +
> + /*
> + * If the interrupt is not yet activated, just store the affinity
> + * mask and do not call the chip driver at all. On activation the
> + * driver has to make sure anyway that the interrupt is in a
> + * useable state so startup works.
> + */
> + if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data))
> + return false;
> +
> + cpumask_copy(desc->irq_common_data.affinity, mask);
> + irq_init_effective_affinity(data, mask);
> + irqd_set(data, IRQD_AFFINITY_SET);
> + return true;
> +}
> +
> int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
> bool force)
> {
> @@ -314,6 +344,9 @@ int irq_set_affinity_locked(struct irq_d
> if (!chip || !chip->irq_set_affinity)
> return -EINVAL;
>
> + if (irq_set_affinity_deactivated(data, mask, force))
> + return 0;
> +
> if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) {
> ret = irq_try_set_affinity(data, mask, force);
> } else {
Ben,
Benjamin Herrenschmidt <[email protected]> writes:
> On Mon, 2020-06-08 at 15:48 +0200, Thomas Gleixner wrote:
>> > if (cpu != its_dev->event_map.col_map[id]) {
>> > target_col = &its_dev->its->collections[cpu];
>> > - its_send_movi(its_dev, target_col, id);
>> > +
>> > + /* If the IRQ is disabled a discard was sent so don't move */
>> > + if (!irqd_irq_disabled(d))
>>
>> That check needs to be !irqd_is_activated() because enable_irq() does
>> not touch anything affinity related.
>
> Right. Note: other drivers (like arch/powerpc/sysdev/xive/common.c
> use irqd_is_started() ... this gets confusing :)
Blast from the past ...
arch/powerpc does not use hierarchical irq domains, so the activated
state does not matter there.
>> > + its_send_movi(its_dev, target_col, id);
>> > +
>> > its_dev->event_map.col_map[id] = cpu;
>> > irq_data_update_effective_affinity(d, cpumask_of(cpu));
>>
>> And then these associtations are disconnected from reality in any case.
>
> Not sure what you mean here, that said...
You skip the setup and then you set that state to look like it really
happened. How is that NOT disconnected from reality and a proper source
for undecodable failure later on beause something else subtly depends on
that state?
>> Something like the completely untested patch below should work.
>
> Ok. One possible issue though is before, the driver always had the
> opportunity to "vet" the affinity mask for whatever platform
> constraints may be there and change it before applying it. This is no
> longer the case on a deactivated interrupt with your patch as far as I
> can tell. I don't know if that is a problem and if drivers that do that
> have what it takes to "fixup" the affinity at startup time, the ones I
> wrote don't need that feature, but...
The driver still has the opportunity to do so when the interrupt is
acticated. And if you look at the conditions of that patch it carefully
applies this only to architectures which actually use hiearachical irq
domains. Everything else including good old PPC won't notice at all.
>> Thanks,
>>
>> tglx
<SNIP 60+ lines of useless information ....>
Can you please trim your replies?
Thanks,
tglx
On 6/8/20, 8:49 AM, "Thomas Gleixner" <[email protected]> wrote:
"Herrenschmidt, Benjamin" <[email protected]> writes:
> On Wed, 2020-06-03 at 16:16 +0100, Marc Zyngier wrote:
>> > My original patch should certain check activated and not disabled.
>> > With that do you still have reservations Marc?
>>
>> I'd still prefer it if we could do something in core code, rather
>> than spreading these checks in the individual drivers. If we can't,
>> fair enough. But it feels like the core set_affinity function could
>> just do the same thing in a single place (although the started vs
>> activated is yet another piece of the puzzle I didn't consider,
>> and the ITS doesn't need the "can_reserve" thing).
>
> For the sake of fixing the problem in a timely and backportable way I
> would suggest first merging the fix, *then* fixing the core core.
The "fix" is just wrong
> if (cpu != its_dev->event_map.col_map[id]) {
> target_col = &its_dev->its->collections[cpu];
> - its_send_movi(its_dev, target_col, id);
> +
> + /* If the IRQ is disabled a discard was sent so don't move */
> + if (!irqd_irq_disabled(d))
That check needs to be !irqd_is_activated() because enable_irq() does
not touch anything affinity related.
> + its_send_movi(its_dev, target_col, id);
> +
> its_dev->event_map.col_map[id] = cpu;
> irq_data_update_effective_affinity(d, cpumask_of(cpu));
And then these associtations are disconnected from reality in any case.
Something like the completely untested patch below should work.
I've been unable to reproduce the problem with your patch on an Arm system.
Thanks,
Ali
The following commit has been merged into the irq/urgent branch of tip:
Commit-ID: baedb87d1b53532f81b4bd0387f83b05d4f7eb9a
Gitweb: https://git.kernel.org/tip/baedb87d1b53532f81b4bd0387f83b05d4f7eb9a
Author: Thomas Gleixner <[email protected]>
AuthorDate: Fri, 17 Jul 2020 18:00:02 +02:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Fri, 17 Jul 2020 23:30:43 +02:00
genirq/affinity: Handle affinity setting on inactive interrupts correctly
Setting interrupt affinity on inactive interrupts is inconsistent when
hierarchical irq domains are enabled. The core code should just store the
affinity and not call into the irq chip driver for inactive interrupts
because the chip drivers may not be in a state to handle such requests.
X86 has a hacky workaround for that but all other irq chips have not which
causes problems e.g. on GIC V3 ITS.
Instead of adding more ugly hacks all over the place, solve the problem in
the core code. If the affinity is set on an inactive interrupt then:
- Store it in the irq descriptors affinity mask
- Update the effective affinity to reflect that so user space has
a consistent view
- Don't call into the irq chip driver
This is the core equivalent of the X86 workaround and works correctly
because the affinity setting is established in the irq chip when the
interrupt is activated later on.
Note, that this is only effective when hierarchical irq domains are enabled
by the architecture. Doing it unconditionally would break legacy irq chip
implementations.
For hierarchial irq domains this works correctly as none of the drivers can
have a dependency on affinity setting in inactive state by design.
Remove the X86 workaround as it is not longer required.
Fixes: 02edee152d6e ("x86/apic/vector: Ignore set_affinity call for inactive interrupts")
Reported-by: Ali Saidi <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Ali Saidi <[email protected]>
Cc: [email protected]
Link: https://lore.kernel.org/r/[email protected]
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/kernel/apic/vector.c | 22 ++++----------------
kernel/irq/manage.c | 37 ++++++++++++++++++++++++++++++++--
2 files changed, 40 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index cc8b16f..7649da2 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -446,12 +446,10 @@ static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
trace_vector_activate(irqd->irq, apicd->is_managed,
apicd->can_reserve, reserve);
- /* Nothing to do for fixed assigned vectors */
- if (!apicd->can_reserve && !apicd->is_managed)
- return 0;
-
raw_spin_lock_irqsave(&vector_lock, flags);
- if (reserve || irqd_is_managed_and_shutdown(irqd))
+ if (!apicd->can_reserve && !apicd->is_managed)
+ assign_irq_vector_any_locked(irqd);
+ else if (reserve || irqd_is_managed_and_shutdown(irqd))
vector_assign_managed_shutdown(irqd);
else if (apicd->is_managed)
ret = activate_managed(irqd);
@@ -774,20 +772,10 @@ void lapic_offline(void)
static int apic_set_affinity(struct irq_data *irqd,
const struct cpumask *dest, bool force)
{
- struct apic_chip_data *apicd = apic_chip_data(irqd);
int err;
- /*
- * Core code can call here for inactive interrupts. For inactive
- * interrupts which use managed or reservation mode there is no
- * point in going through the vector assignment right now as the
- * activation will assign a vector which fits the destination
- * cpumask. Let the core code store the destination mask and be
- * done with it.
- */
- if (!irqd_is_activated(irqd) &&
- (apicd->is_managed || apicd->can_reserve))
- return IRQ_SET_MASK_OK;
+ if (WARN_ON_ONCE(!irqd_is_activated(irqd)))
+ return -EIO;
raw_spin_lock(&vector_lock);
cpumask_and(vector_searchmask, dest, cpu_online_mask);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 7619111..2a9fec5 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -195,9 +195,9 @@ void irq_set_thread_affinity(struct irq_desc *desc)
set_bit(IRQTF_AFFINITY, &action->thread_flags);
}
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
static void irq_validate_effective_affinity(struct irq_data *data)
{
-#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
const struct cpumask *m = irq_data_get_effective_affinity_mask(data);
struct irq_chip *chip = irq_data_get_irq_chip(data);
@@ -205,9 +205,19 @@ static void irq_validate_effective_affinity(struct irq_data *data)
return;
pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n",
chip->name, data->irq);
-#endif
}
+static inline void irq_init_effective_affinity(struct irq_data *data,
+ const struct cpumask *mask)
+{
+ cpumask_copy(irq_data_get_effective_affinity_mask(data), mask);
+}
+#else
+static inline void irq_validate_effective_affinity(struct irq_data *data) { }
+static inline void irq_init_effective_affinity(struct irq_data *data,
+ const struct cpumask *mask) { }
+#endif
+
int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
{
@@ -304,6 +314,26 @@ static int irq_try_set_affinity(struct irq_data *data,
return ret;
}
+static bool irq_set_affinity_deactivated(struct irq_data *data,
+ const struct cpumask *mask, bool force)
+{
+ struct irq_desc *desc = irq_data_to_desc(data);
+
+ /*
+ * If the interrupt is not yet activated, just store the affinity
+ * mask and do not call the chip driver at all. On activation the
+ * driver has to make sure anyway that the interrupt is in a
+ * useable state so startup works.
+ */
+ if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data))
+ return false;
+
+ cpumask_copy(desc->irq_common_data.affinity, mask);
+ irq_init_effective_affinity(data, mask);
+ irqd_set(data, IRQD_AFFINITY_SET);
+ return true;
+}
+
int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
bool force)
{
@@ -314,6 +344,9 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
if (!chip || !chip->irq_set_affinity)
return -EINVAL;
+ if (irq_set_affinity_deactivated(data, mask, force))
+ return 0;
+
if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) {
ret = irq_try_set_affinity(data, mask, force);
} else {