2015-08-26 16:02:45

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH v4 0/4] irqchip: GICv2/v3: Add support for irq_vcpu_affinity

The GICv2 and GICv3 architectures allow an active physical interrupt
to be forwarded to a guest, and the guest to indirectly perform the
deactivation of the interrupt by performing an EOI on the virtual
interrupt (see for example the GICv2 spec, 3.2.1).

This allows some substantial performance improvement for level
triggered interrupts that otherwise have to be masked/unmasked in
VFIO, not to mention the required trap back to KVM when the guest
performs an EOI.

To enable this, the GICs need to be switched to a different EOImode,
where a taken interrupt can be left "active" (which prevents the same
interrupt from being taken again), while other interrupts are still
being processed normally.

We also use the new irq_set_vcpu_affinity hook that was introduced for
Intel's "Posted Interrupts" to determine whether or not to perform the
deactivation at EOI-time.

As all of this only makes sense when the kernel can behave as a
hypervisor, we only enable this mode on detecting that the kernel was
actually booted in HYP mode, and that the GIC supports this feature.

This series is a complete rework of a RFC I sent over a year ago:

http://lists.infradead.org/pipermail/linux-arm-kernel/2014-June/266328.html

Since then, a lot has been either merged (the irqchip_state) or reworked
(my active-timer series: http://www.spinics.net/lists/kvm/msg118768.html),
and this implements the last few bits for Eric Auger's series to
finally make it into the kernel:

https://lkml.org/lkml/2015/7/2/268
https://lkml.org/lkml/2015/7/6/291

With all these patches combined, physical interrupt routing from the
kernel into a VM becomes possible.

Note that the implementation makes use of the static_key mechanism,
which is undergoing an extensive rework in 4.3. I intend to convert
this code once both are in mainline.

This has been tested on Juno (GICv2) and FastModel (GICv3). A branch
is available at:

git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git irq/gic-irq-vcpu-affinity-v4

* From v3:
- Use separate irq_chip structures, leading to much nicer code (tglx)
- Dropped Eric's Tested/Reviewed-by as there is significant changes

* From v2:
- Another small fix from Eric
- Some commit message cleanups

* From v1:
- Fixes after review from Eric
- Got rid of the cascaded GICv2 hack (it was broken anyway)
- Folded the LPI deactivation patch (it makes more sense as part of
the main one.
- Some clarifying comments about the "deactivate on mask"
- I haven't retained Eric's Reviewed/Tested-by, as the code as
significantly changed on GICv2

Marc Zyngier (4):
irqchip: GICv3: Convert to EOImode == 1
irqchip: GICv3: Don't deactivate interrupts forwarded to a guest
irqchip: GIC: Convert to EOImode == 1
irqchip: GIC: Don't deactivate interrupts forwarded to a guest

drivers/irqchip/irq-gic-v3.c | 70 +++++++++++++++++++++--
drivers/irqchip/irq-gic.c | 111 ++++++++++++++++++++++++++++++++++++-
include/linux/irqchip/arm-gic-v3.h | 9 +++
include/linux/irqchip/arm-gic.h | 4 ++
4 files changed, 188 insertions(+), 6 deletions(-)

--
2.1.4

*** BLURB HERE ***

Marc Zyngier (4):
irqchip: GICv3: Convert to EOImode == 1
irqchip: GICv3: Don't deactivate interrupts forwarded to a guest
irqchip: GIC: Convert to EOImode == 1
irqchip: GIC: Don't deactivate interrupts forwarded to a guest

drivers/irqchip/irq-gic-v3.c | 91 +++++++++++++++++++++++++--
drivers/irqchip/irq-gic.c | 124 ++++++++++++++++++++++++++++++++++++-
include/linux/irqchip/arm-gic-v3.h | 9 +++
include/linux/irqchip/arm-gic.h | 4 ++
4 files changed, 219 insertions(+), 9 deletions(-)

--
2.1.4


2015-08-26 16:02:07

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH v4 1/4] irqchip: GICv3: Convert to EOImode == 1

So far, GICv3 has been used in with EOImode == 0. The effect of this
mode is to perform the priority drop and the deactivation of the
interrupt at the same time.

While this works perfectly for Linux (we only have a single priority),
it causes issues when an interrupt is forwarded to a guest, and when
we want the guest to perform the EOI itself.

For this case, the GIC architecture provides EOImode == 1, where:
- A write to ICC_EOIR1_EL1 drops the priority of the interrupt and
leaves it active. Other interrupts at the same priority level can
now be taken, but the active interrupt cannot be taken again
- A write to ICC_DIR_EL1 marks the interrupt as inactive, meaning
it can now be taken again.

This patch converts the driver to be able to use this new mode,
depending on whether or not the kernel can behave as a hypervisor.
No feature change.

Signed-off-by: Marc Zyngier <[email protected]>
---
drivers/irqchip/irq-gic-v3.c | 68 ++++++++++++++++++++++++++++++++++----
include/linux/irqchip/arm-gic-v3.h | 9 +++++
2 files changed, 71 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index c52f7ba..e4eba7e 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -30,6 +30,7 @@
#include <asm/cputype.h>
#include <asm/exception.h>
#include <asm/smp_plat.h>
+#include <asm/virt.h>

#include "irq-gic-common.h"
#include "irqchip.h"
@@ -50,6 +51,7 @@ struct gic_chip_data {
};

static struct gic_chip_data gic_data __read_mostly;
+static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;

#define gic_data_rdist() (this_cpu_ptr(gic_data.rdists.rdist))
#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
@@ -231,6 +233,11 @@ static void gic_mask_irq(struct irq_data *d)
gic_poke_irq(d, GICD_ICENABLER);
}

+static void gic_eoimode1_mask_irq(struct irq_data *d)
+{
+ gic_mask_irq(d);
+}
+
static void gic_unmask_irq(struct irq_data *d)
{
gic_poke_irq(d, GICD_ISENABLER);
@@ -296,6 +303,16 @@ static void gic_eoi_irq(struct irq_data *d)
gic_write_eoir(gic_irq(d));
}

+static void gic_eoimode1_eoi_irq(struct irq_data *d)
+{
+ /*
+ * No need to deactivate an LPI.
+ */
+ if (gic_irq(d) >= 8192)
+ return;
+ gic_write_dir(gic_irq(d));
+}
+
static int gic_set_type(struct irq_data *d, unsigned int type)
{
unsigned int irq = gic_irq(d);
@@ -343,15 +360,26 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs

if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) {
int err;
+
+ if (static_key_true(&supports_deactivate))
+ gic_write_eoir(irqnr);
+
err = handle_domain_irq(gic_data.domain, irqnr, regs);
if (err) {
WARN_ONCE(true, "Unexpected interrupt received!\n");
- gic_write_eoir(irqnr);
+ if (static_key_true(&supports_deactivate)) {
+ if (irqnr < 8192)
+ gic_write_dir(irqnr);
+ } else {
+ gic_write_eoir(irqnr);
+ }
}
continue;
}
if (irqnr < 16) {
gic_write_eoir(irqnr);
+ if (static_key_true(&supports_deactivate))
+ gic_write_dir(irqnr);
#ifdef CONFIG_SMP
handle_IPI(irqnr, regs);
#else
@@ -451,8 +479,13 @@ static void gic_cpu_sys_reg_init(void)
/* Set priority mask register */
gic_write_pmr(DEFAULT_PMR_VALUE);

- /* EOI deactivates interrupt too (mode 0) */
- gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
+ if (static_key_true(&supports_deactivate)) {
+ /* EOI drops priority only (mode 1) */
+ gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop);
+ } else {
+ /* EOI deactivates interrupt too (mode 0) */
+ gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
+ }

/* ... and let's hit the road... */
gic_write_grpen1(1);
@@ -661,11 +694,28 @@ static struct irq_chip gic_chip = {
.flags = IRQCHIP_SET_TYPE_MASKED,
};

+static struct irq_chip gic_eoimode1_chip = {
+ .name = "GICv3",
+ .irq_mask = gic_eoimode1_mask_irq,
+ .irq_unmask = gic_unmask_irq,
+ .irq_eoi = gic_eoimode1_eoi_irq,
+ .irq_set_type = gic_set_type,
+ .irq_set_affinity = gic_set_affinity,
+ .irq_get_irqchip_state = gic_irq_get_irqchip_state,
+ .irq_set_irqchip_state = gic_irq_set_irqchip_state,
+ .flags = IRQCHIP_SET_TYPE_MASKED,
+};
+
#define GIC_ID_NR (1U << gic_data.rdists.id_bits)

static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hw)
{
+ struct irq_chip *chip = &gic_chip;
+
+ if (static_key_true(&supports_deactivate))
+ chip = &gic_eoimode1_chip;
+
/* SGIs are private to the core kernel */
if (hw < 16)
return -EPERM;
@@ -679,13 +729,13 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
/* PPIs */
if (hw < 32) {
irq_set_percpu_devid(irq);
- irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, chip, d->host_data,
handle_percpu_devid_irq, NULL, NULL);
set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
}
/* SPIs */
if (hw >= 32 && hw < gic_data.irq_nr) {
- irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, chip, d->host_data,
handle_fasteoi_irq, NULL, NULL);
set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
}
@@ -693,7 +743,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
if (hw >= 8192 && hw < GIC_ID_NR) {
if (!gic_dist_supports_lpis())
return -EPERM;
- irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, chip, d->host_data,
handle_fasteoi_irq, NULL, NULL);
set_irq_flags(irq, IRQF_VALID);
}
@@ -820,6 +870,12 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
if (of_property_read_u64(node, "redistributor-stride", &redist_stride))
redist_stride = 0;

+ if (!is_hyp_mode_available())
+ static_key_slow_dec(&supports_deactivate);
+
+ if (static_key_true(&supports_deactivate))
+ pr_info("GIC: Using split EOI/Deactivate mode\n");
+
gic_data.dist_base = dist_base;
gic_data.redist_regions = rdist_regs;
gic_data.nr_redist_regions = nr_redist_regions;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index ffbc034..bc98832 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -104,6 +104,8 @@
#define GICR_SYNCR 0x00C0
#define GICR_MOVLPIR 0x0100
#define GICR_MOVALLR 0x0110
+#define GICR_ISACTIVER GICD_ISACTIVER
+#define GICR_ICACTIVER GICD_ICACTIVER
#define GICR_IDREGS GICD_IDREGS
#define GICR_PIDR2 GICD_PIDR2

@@ -288,6 +290,7 @@
#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT)

#define ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
+#define ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
#define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
#define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5)
#define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
@@ -384,6 +387,12 @@ static inline void gic_write_eoir(u64 irq)
isb();
}

+static inline void gic_write_dir(u64 irq)
+{
+ asm volatile("msr_s " __stringify(ICC_DIR_EL1) ", %0" : : "r" (irq));
+ isb();
+}
+
struct irq_domain;
int its_cpu_init(void);
int its_init(struct device_node *node, struct rdists *rdists,
--
2.1.4

2015-08-26 16:01:45

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH v4 2/4] irqchip: GICv3: Don't deactivate interrupts forwarded to a guest

Commit 0a4377de3056 ("genirq: Introduce irq_set_vcpu_affinity() to
target an interrupt to a VCPU") added just what we needed at the
lowest level to allow an interrupt to be deactivated by a guest.

When such a request reaches the GIC, it knows it doesn't need to
perform the deactivation anymore, and can safely leave the guest
do its magic. This of course requires additional support in both
VFIO and KVM.

Signed-off-by: Marc Zyngier <[email protected]>
---
drivers/irqchip/irq-gic-v3.c | 27 +++++++++++++++++++++++++--
1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index e4eba7e..51635af 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -70,6 +70,11 @@ static inline int gic_irq_in_rdist(struct irq_data *d)
return gic_irq(d) < 32;
}

+static inline bool forwarded_irq(struct irq_data *d)
+{
+ return d->handler_data != NULL;
+}
+
static inline void __iomem *gic_dist_base(struct irq_data *d)
{
if (gic_irq_in_rdist(d)) /* SGI+PPI -> SGI_base for this CPU */
@@ -236,6 +241,16 @@ static void gic_mask_irq(struct irq_data *d)
static void gic_eoimode1_mask_irq(struct irq_data *d)
{
gic_mask_irq(d);
+ /*
+ * When masking a forwarded interrupt, make sure it is
+ * deactivated as well.
+ *
+ * This ensures that an interrupt that is getting
+ * disabled/masked will not get "stuck", because there is
+ * noone to deactivate it (guest is being terminated).
+ */
+ if (forwarded_irq(d))
+ gic_poke_irq(d, GICD_ICACTIVER);
}

static void gic_unmask_irq(struct irq_data *d)
@@ -306,9 +321,10 @@ static void gic_eoi_irq(struct irq_data *d)
static void gic_eoimode1_eoi_irq(struct irq_data *d)
{
/*
- * No need to deactivate an LPI.
+ * No need to deactivate an LPI, or an interrupt that
+ * is is getting forwarded to a vcpu.
*/
- if (gic_irq(d) >= 8192)
+ if (gic_irq(d) >= 8192 || forwarded_irq(d))
return;
gic_write_dir(gic_irq(d));
}
@@ -339,6 +355,12 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
return gic_configure_irq(irq, type, base, rwp_wait);
}

+static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
+{
+ d->handler_data = vcpu;
+ return 0;
+}
+
static u64 gic_mpidr_to_affinity(u64 mpidr)
{
u64 aff;
@@ -703,6 +725,7 @@ static struct irq_chip gic_eoimode1_chip = {
.irq_set_affinity = gic_set_affinity,
.irq_get_irqchip_state = gic_irq_get_irqchip_state,
.irq_set_irqchip_state = gic_irq_set_irqchip_state,
+ .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity,
.flags = IRQCHIP_SET_TYPE_MASKED,
};

--
2.1.4

2015-08-26 16:01:01

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH v4 3/4] irqchip: GIC: Convert to EOImode == 1

So far, GICv2 has been used with EOImode == 0. The effect of this
mode is to perform the priority drop and the deactivation of the
interrupt at the same time.

While this works perfectly for Linux (we only have a single priority),
it causes issues when an interrupt is forwarded to a guest, and when
we want the guest to perform the EOI itself.

For this case, the GIC architecture provides EOImode == 1, where:
- A write to the EOI register drops the priority of the interrupt
and leaves it active. Other interrupts at the same priority level
can now be taken, but the active interrupt cannot be taken again
- A write to the DIR marks the interrupt as inactive, meaning it can
now be taken again.

We only enable this feature when booted in HYP mode and that
the device-tree reported a suitable CPU interface. Observable behaviour
should remain unchanged.

Signed-off-by: Marc Zyngier <[email protected]>
---
drivers/irqchip/irq-gic.c | 69 +++++++++++++++++++++++++++++++++++++++--
include/linux/irqchip/arm-gic.h | 4 +++
2 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 4dd8826..9215897 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -46,6 +46,7 @@
#include <asm/irq.h>
#include <asm/exception.h>
#include <asm/smp_plat.h>
+#include <asm/virt.h>

#include "irq-gic-common.h"
#include "irqchip.h"
@@ -82,6 +83,8 @@ static DEFINE_RAW_SPINLOCK(irq_controller_lock);
#define NR_GIC_CPU_IF 8
static u8 gic_cpu_map[NR_GIC_CPU_IF] __read_mostly;

+static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
+
#ifndef MAX_GIC_NR
#define MAX_GIC_NR 1
#endif
@@ -157,6 +160,11 @@ static void gic_mask_irq(struct irq_data *d)
gic_poke_irq(d, GIC_DIST_ENABLE_CLEAR);
}

+static void gic_eoimode1_mask_irq(struct irq_data *d)
+{
+ gic_mask_irq(d);
+}
+
static void gic_unmask_irq(struct irq_data *d)
{
gic_poke_irq(d, GIC_DIST_ENABLE_SET);
@@ -167,6 +175,11 @@ static void gic_eoi_irq(struct irq_data *d)
writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
}

+static void gic_eoimode1_eoi_irq(struct irq_data *d)
+{
+ writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
+}
+
static int gic_irq_set_irqchip_state(struct irq_data *d,
enum irqchip_irq_state which, bool val)
{
@@ -272,11 +285,15 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
irqnr = irqstat & GICC_IAR_INT_ID_MASK;

if (likely(irqnr > 15 && irqnr < 1021)) {
+ if (static_key_true(&supports_deactivate))
+ writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
handle_domain_irq(gic->domain, irqnr, regs);
continue;
}
if (irqnr < 16) {
writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
+ if (static_key_true(&supports_deactivate))
+ writel_relaxed(irqstat, cpu_base + GIC_CPU_DEACTIVATE);
#ifdef CONFIG_SMP
handle_IPI(irqnr, regs);
#endif
@@ -327,6 +344,20 @@ static struct irq_chip gic_chip = {
.flags = IRQCHIP_SET_TYPE_MASKED,
};

+static struct irq_chip gic_eoimode1_chip = {
+ .name = "GICv2",
+ .irq_mask = gic_eoimode1_mask_irq,
+ .irq_unmask = gic_unmask_irq,
+ .irq_eoi = gic_eoimode1_eoi_irq,
+ .irq_set_type = gic_set_type,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = gic_set_affinity,
+#endif
+ .irq_get_irqchip_state = gic_irq_get_irqchip_state,
+ .irq_set_irqchip_state = gic_irq_set_irqchip_state,
+ .flags = IRQCHIP_SET_TYPE_MASKED,
+};
+
void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
{
if (gic_nr >= MAX_GIC_NR)
@@ -359,6 +390,10 @@ static void gic_cpu_if_up(void)
{
void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
u32 bypass = 0;
+ u32 mode = 0;
+
+ if (static_key_true(&supports_deactivate))
+ mode = GIC_CPU_CTRL_EOImodeNS;

/*
* Preserve bypass disable bits to be written back later
@@ -366,7 +401,7 @@ static void gic_cpu_if_up(void)
bypass = readl(cpu_base + GIC_CPU_CTRL);
bypass &= GICC_DIS_BYPASS_MASK;

- writel_relaxed(bypass | GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
+ writel_relaxed(bypass | mode | GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
}


@@ -789,13 +824,20 @@ void __init gic_init_physaddr(struct device_node *node)
static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hw)
{
+ struct irq_chip *chip = &gic_chip;
+
+ if (static_key_true(&supports_deactivate)) {
+ if (d->host_data == (void *)&gic_data[0])
+ chip = &gic_eoimode1_chip;
+ }
+
if (hw < 32) {
irq_set_percpu_devid(irq);
- irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, chip, d->host_data,
handle_percpu_devid_irq, NULL, NULL);
set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
} else {
- irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, chip, d->host_data,
handle_fasteoi_irq, NULL, NULL);
set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
}
@@ -986,6 +1028,8 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
register_cpu_notifier(&gic_cpu_notifier);
#endif
set_handle_irq(gic_handle_irq);
+ if (static_key_true(&supports_deactivate))
+ pr_info("GIC: Using split EOI/Deactivate mode\n");
}

gic_dist_init(gic);
@@ -1001,6 +1045,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
{
void __iomem *cpu_base;
void __iomem *dist_base;
+ struct resource cpu_res;
u32 percpu_offset;
int irq;

@@ -1013,6 +1058,16 @@ gic_of_init(struct device_node *node, struct device_node *parent)
cpu_base = of_iomap(node, 1);
WARN(!cpu_base, "unable to map gic cpu registers\n");

+ of_address_to_resource(node, 1, &cpu_res);
+
+ /*
+ * Disable split EOI/Deactivate if either HYP is not available
+ * or the CPU interface is too small.
+ */
+ if (gic_cnt == 0 && (!is_hyp_mode_available() ||
+ resource_size(&cpu_res) < SZ_8K))
+ static_key_slow_dec(&supports_deactivate);
+
if (of_property_read_u32(node, "cpu-offset", &percpu_offset))
percpu_offset = 0;

@@ -1132,6 +1187,14 @@ gic_v2_acpi_init(struct acpi_table_header *table)
}

/*
+ * Disable split EOI/Deactivate if HYP is not available. ACPI
+ * guarantees that we'll always have a GICv2, so the CPU
+ * interface will always be the right size.
+ */
+ if (!is_hyp_mode_available())
+ static_key_slow_dec(&supports_deactivate);
+
+ /*
* Initialize zero GIC instance (no multi-GIC support). Also, set GIC
* as default IRQ domain to allow for GSI registration and GSI to IRQ
* number translation (see acpi_register_gsi() and acpi_gsi_to_irq()).
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 9de976b..b1533c0 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -20,9 +20,13 @@
#define GIC_CPU_ALIAS_BINPOINT 0x1c
#define GIC_CPU_ACTIVEPRIO 0xd0
#define GIC_CPU_IDENT 0xfc
+#define GIC_CPU_DEACTIVATE 0x1000

#define GICC_ENABLE 0x1
#define GICC_INT_PRI_THRESHOLD 0xf0
+
+#define GIC_CPU_CTRL_EOImodeNS (1 << 9)
+
#define GICC_IAR_INT_ID_MASK 0x3ff
#define GICC_INT_SPURIOUS 1023
#define GICC_DIS_BYPASS_MASK 0x1e0
--
2.1.4

2015-08-26 16:01:15

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH v4 4/4] irqchip: GIC: Don't deactivate interrupts forwarded to a guest

Commit 0a4377de3056 ("genirq: Introduce irq_set_vcpu_affinity() to
target an interrupt to a VCPU") added just what we needed at the
lowest level to allow an interrupt to be deactivated by a guest.

When such a request reaches the GIC, it knows it doesn't need to
perform the deactivation anymore, and can safely leave the guest
do its magic. This of course requires additional support in both
VFIO and KVM.

Signed-off-by: Marc Zyngier <[email protected]>
---
drivers/irqchip/irq-gic.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 9215897..0f1f00c 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -140,6 +140,36 @@ static inline unsigned int gic_irq(struct irq_data *d)
return d->hwirq;
}

+static inline bool cascading_gic_irq(struct irq_data *d)
+{
+ void *data = irq_data_get_irq_handler_data(d);
+
+ /*
+ * If handler_data pointing to one of the secondary GICs, then
+ * this is a cascading interrupt, and it cannot possibly be
+ * forwarded.
+ */
+ if (data >= (void *)(gic_data + 1) &&
+ data < (void *)(gic_data + MAX_GIC_NR))
+ return true;
+
+ return false;
+}
+
+static inline bool forwarded_irq(struct irq_data *d)
+{
+ /*
+ * A forwarded interrupt:
+ * - is on the primary GIC
+ * - has its handler_data set to a value
+ * - that isn't a secondary GIC
+ */
+ if (d->handler_data && !cascading_gic_irq(d))
+ return true;
+
+ return false;
+}
+
/*
* Routines to acknowledge, disable and enable interrupts
*/
@@ -163,6 +193,16 @@ static void gic_mask_irq(struct irq_data *d)
static void gic_eoimode1_mask_irq(struct irq_data *d)
{
gic_mask_irq(d);
+ /*
+ * When masking a forwarded interrupt, make sure it is
+ * deactivated as well.
+ *
+ * This ensures that an interrupt that is getting
+ * disabled/masked will not get "stuck", because there is
+ * noone to deactivate it (guest is being terminated).
+ */
+ if (forwarded_irq(d))
+ gic_poke_irq(d, GIC_DIST_ACTIVE_CLEAR);
}

static void gic_unmask_irq(struct irq_data *d)
@@ -177,6 +217,10 @@ static void gic_eoi_irq(struct irq_data *d)

static void gic_eoimode1_eoi_irq(struct irq_data *d)
{
+ /* Do not deactivate an IRQ forwarded to a vcpu. */
+ if (forwarded_irq(d))
+ return;
+
writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
}

@@ -246,6 +290,16 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
return gic_configure_irq(gicirq, type, base, NULL);
}

+static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
+{
+ /* Only interrupts on the primary GIC can be forwarded to a vcpu. */
+ if (cascading_gic_irq(d))
+ return -EINVAL;
+
+ d->handler_data = vcpu;
+ return 0;
+}
+
#ifdef CONFIG_SMP
static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
bool force)
@@ -355,6 +409,7 @@ static struct irq_chip gic_eoimode1_chip = {
#endif
.irq_get_irqchip_state = gic_irq_get_irqchip_state,
.irq_set_irqchip_state = gic_irq_set_irqchip_state,
+ .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity,
.flags = IRQCHIP_SET_TYPE_MASKED,
};

--
2.1.4

2015-08-27 13:04:05

by Eric Auger

[permalink] [raw]
Subject: Re: [PATCH v4 0/4] irqchip: GICv2/v3: Add support for irq_vcpu_affinity

Hi Marc,

I tested the series on Calxeda Midway with VFIO use case. Also reviewed
it again without finding anything new.

Tested-by: Eric Auger <[email protected]>
Reviewed-by: Eric Auger <[email protected]>

Best Regards

Eric


On 08/26/2015 06:00 PM, Marc Zyngier wrote:
> The GICv2 and GICv3 architectures allow an active physical interrupt
> to be forwarded to a guest, and the guest to indirectly perform the
> deactivation of the interrupt by performing an EOI on the virtual
> interrupt (see for example the GICv2 spec, 3.2.1).
>
> This allows some substantial performance improvement for level
> triggered interrupts that otherwise have to be masked/unmasked in
> VFIO, not to mention the required trap back to KVM when the guest
> performs an EOI.
>
> To enable this, the GICs need to be switched to a different EOImode,
> where a taken interrupt can be left "active" (which prevents the same
> interrupt from being taken again), while other interrupts are still
> being processed normally.
>
> We also use the new irq_set_vcpu_affinity hook that was introduced for
> Intel's "Posted Interrupts" to determine whether or not to perform the
> deactivation at EOI-time.
>
> As all of this only makes sense when the kernel can behave as a
> hypervisor, we only enable this mode on detecting that the kernel was
> actually booted in HYP mode, and that the GIC supports this feature.
>
> This series is a complete rework of a RFC I sent over a year ago:
>
> http://lists.infradead.org/pipermail/linux-arm-kernel/2014-June/266328.html
>
> Since then, a lot has been either merged (the irqchip_state) or reworked
> (my active-timer series: http://www.spinics.net/lists/kvm/msg118768.html),
> and this implements the last few bits for Eric Auger's series to
> finally make it into the kernel:
>
> https://lkml.org/lkml/2015/7/2/268
> https://lkml.org/lkml/2015/7/6/291
>
> With all these patches combined, physical interrupt routing from the
> kernel into a VM becomes possible.
>
> Note that the implementation makes use of the static_key mechanism,
> which is undergoing an extensive rework in 4.3. I intend to convert
> this code once both are in mainline.
>
> This has been tested on Juno (GICv2) and FastModel (GICv3). A branch
> is available at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git irq/gic-irq-vcpu-affinity-v4
>
> * From v3:
> - Use separate irq_chip structures, leading to much nicer code (tglx)
> - Dropped Eric's Tested/Reviewed-by as there is significant changes
>
> * From v2:
> - Another small fix from Eric
> - Some commit message cleanups
>
> * From v1:
> - Fixes after review from Eric
> - Got rid of the cascaded GICv2 hack (it was broken anyway)
> - Folded the LPI deactivation patch (it makes more sense as part of
> the main one.
> - Some clarifying comments about the "deactivate on mask"
> - I haven't retained Eric's Reviewed/Tested-by, as the code as
> significantly changed on GICv2
>
> Marc Zyngier (4):
> irqchip: GICv3: Convert to EOImode == 1
> irqchip: GICv3: Don't deactivate interrupts forwarded to a guest
> irqchip: GIC: Convert to EOImode == 1
> irqchip: GIC: Don't deactivate interrupts forwarded to a guest
>
> drivers/irqchip/irq-gic-v3.c | 70 +++++++++++++++++++++--
> drivers/irqchip/irq-gic.c | 111 ++++++++++++++++++++++++++++++++++++-
> include/linux/irqchip/arm-gic-v3.h | 9 +++
> include/linux/irqchip/arm-gic.h | 4 ++
> 4 files changed, 188 insertions(+), 6 deletions(-)
>

2015-08-27 14:18:18

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH v4 0/4] irqchip: GICv2/v3: Add support for irq_vcpu_affinity

Hi Eric,

On 27/08/15 14:03, Eric Auger wrote:
> Hi Marc,
>
> I tested the series on Calxeda Midway with VFIO use case. Also reviewed
> it again without finding anything new.
>
> Tested-by: Eric Auger <[email protected]>
> Reviewed-by: Eric Auger <[email protected]>

Thanks a lot Eric, much appreciated!

M.
--
Jazz is not dead. It just smells funny...

Subject: [tip:irq/core] irqchip/GICv3: Convert to EOImode == 1

Commit-ID: 0b6a3da9617a08e13afc09cb7e148470ed0eb280
Gitweb: http://git.kernel.org/tip/0b6a3da9617a08e13afc09cb7e148470ed0eb280
Author: Marc Zyngier <[email protected]>
AuthorDate: Wed, 26 Aug 2015 17:00:42 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 27 Aug 2015 17:13:49 +0200

irqchip/GICv3: Convert to EOImode == 1

So far, GICv3 has been used in with EOImode == 0. The effect of this
mode is to perform the priority drop and the deactivation of the
interrupt at the same time.

While this works perfectly for Linux (we only have a single priority),
it causes issues when an interrupt is forwarded to a guest, and when
we want the guest to perform the EOI itself.

For this case, the GIC architecture provides EOImode == 1, where:
- A write to ICC_EOIR1_EL1 drops the priority of the interrupt and
leaves it active. Other interrupts at the same priority level can
now be taken, but the active interrupt cannot be taken again
- A write to ICC_DIR_EL1 marks the interrupt as inactive, meaning
it can now be taken again.

This patch converts the driver to be able to use this new mode,
depending on whether or not the kernel can behave as a hypervisor.
No feature change.

Signed-off-by: Marc Zyngier <[email protected]>
Reviewed-and-tested-by: Eric Auger <[email protected]>
Cc: Christoffer Dall <[email protected]>
Cc: Jiang Liu <[email protected]>
Cc: <[email protected]>
Cc: [email protected]
Cc: Jason Cooper <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
drivers/irqchip/irq-gic-v3.c | 68 ++++++++++++++++++++++++++++++++++----
include/linux/irqchip/arm-gic-v3.h | 9 +++++
2 files changed, 71 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index e406bc5..5c31cc9 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -31,6 +31,7 @@
#include <asm/cputype.h>
#include <asm/exception.h>
#include <asm/smp_plat.h>
+#include <asm/virt.h>

#include "irq-gic-common.h"

@@ -50,6 +51,7 @@ struct gic_chip_data {
};

static struct gic_chip_data gic_data __read_mostly;
+static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;

#define gic_data_rdist() (this_cpu_ptr(gic_data.rdists.rdist))
#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
@@ -231,6 +233,11 @@ static void gic_mask_irq(struct irq_data *d)
gic_poke_irq(d, GICD_ICENABLER);
}

+static void gic_eoimode1_mask_irq(struct irq_data *d)
+{
+ gic_mask_irq(d);
+}
+
static void gic_unmask_irq(struct irq_data *d)
{
gic_poke_irq(d, GICD_ISENABLER);
@@ -296,6 +303,16 @@ static void gic_eoi_irq(struct irq_data *d)
gic_write_eoir(gic_irq(d));
}

+static void gic_eoimode1_eoi_irq(struct irq_data *d)
+{
+ /*
+ * No need to deactivate an LPI.
+ */
+ if (gic_irq(d) >= 8192)
+ return;
+ gic_write_dir(gic_irq(d));
+}
+
static int gic_set_type(struct irq_data *d, unsigned int type)
{
unsigned int irq = gic_irq(d);
@@ -343,15 +360,26 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs

if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) {
int err;
+
+ if (static_key_true(&supports_deactivate))
+ gic_write_eoir(irqnr);
+
err = handle_domain_irq(gic_data.domain, irqnr, regs);
if (err) {
WARN_ONCE(true, "Unexpected interrupt received!\n");
- gic_write_eoir(irqnr);
+ if (static_key_true(&supports_deactivate)) {
+ if (irqnr < 8192)
+ gic_write_dir(irqnr);
+ } else {
+ gic_write_eoir(irqnr);
+ }
}
continue;
}
if (irqnr < 16) {
gic_write_eoir(irqnr);
+ if (static_key_true(&supports_deactivate))
+ gic_write_dir(irqnr);
#ifdef CONFIG_SMP
handle_IPI(irqnr, regs);
#else
@@ -451,8 +479,13 @@ static void gic_cpu_sys_reg_init(void)
/* Set priority mask register */
gic_write_pmr(DEFAULT_PMR_VALUE);

- /* EOI deactivates interrupt too (mode 0) */
- gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
+ if (static_key_true(&supports_deactivate)) {
+ /* EOI drops priority only (mode 1) */
+ gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop);
+ } else {
+ /* EOI deactivates interrupt too (mode 0) */
+ gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
+ }

/* ... and let's hit the road... */
gic_write_grpen1(1);
@@ -661,11 +694,28 @@ static struct irq_chip gic_chip = {
.flags = IRQCHIP_SET_TYPE_MASKED,
};

+static struct irq_chip gic_eoimode1_chip = {
+ .name = "GICv3",
+ .irq_mask = gic_eoimode1_mask_irq,
+ .irq_unmask = gic_unmask_irq,
+ .irq_eoi = gic_eoimode1_eoi_irq,
+ .irq_set_type = gic_set_type,
+ .irq_set_affinity = gic_set_affinity,
+ .irq_get_irqchip_state = gic_irq_get_irqchip_state,
+ .irq_set_irqchip_state = gic_irq_set_irqchip_state,
+ .flags = IRQCHIP_SET_TYPE_MASKED,
+};
+
#define GIC_ID_NR (1U << gic_data.rdists.id_bits)

static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hw)
{
+ struct irq_chip *chip = &gic_chip;
+
+ if (static_key_true(&supports_deactivate))
+ chip = &gic_eoimode1_chip;
+
/* SGIs are private to the core kernel */
if (hw < 16)
return -EPERM;
@@ -679,13 +729,13 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
/* PPIs */
if (hw < 32) {
irq_set_percpu_devid(irq);
- irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, chip, d->host_data,
handle_percpu_devid_irq, NULL, NULL);
set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
}
/* SPIs */
if (hw >= 32 && hw < gic_data.irq_nr) {
- irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, chip, d->host_data,
handle_fasteoi_irq, NULL, NULL);
set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
}
@@ -693,7 +743,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
if (hw >= 8192 && hw < GIC_ID_NR) {
if (!gic_dist_supports_lpis())
return -EPERM;
- irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, chip, d->host_data,
handle_fasteoi_irq, NULL, NULL);
set_irq_flags(irq, IRQF_VALID);
}
@@ -820,6 +870,12 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
if (of_property_read_u64(node, "redistributor-stride", &redist_stride))
redist_stride = 0;

+ if (!is_hyp_mode_available())
+ static_key_slow_dec(&supports_deactivate);
+
+ if (static_key_true(&supports_deactivate))
+ pr_info("GIC: Using split EOI/Deactivate mode\n");
+
gic_data.dist_base = dist_base;
gic_data.redist_regions = rdist_regs;
gic_data.nr_redist_regions = nr_redist_regions;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index bf982e0..71e4faf 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -104,6 +104,8 @@
#define GICR_SYNCR 0x00C0
#define GICR_MOVLPIR 0x0100
#define GICR_MOVALLR 0x0110
+#define GICR_ISACTIVER GICD_ISACTIVER
+#define GICR_ICACTIVER GICD_ICACTIVER
#define GICR_IDREGS GICD_IDREGS
#define GICR_PIDR2 GICD_PIDR2

@@ -288,6 +290,7 @@
#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT)

#define ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
+#define ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
#define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
#define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5)
#define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
@@ -385,6 +388,12 @@ static inline void gic_write_eoir(u64 irq)
isb();
}

+static inline void gic_write_dir(u64 irq)
+{
+ asm volatile("msr_s " __stringify(ICC_DIR_EL1) ", %0" : : "r" (irq));
+ isb();
+}
+
struct irq_domain;
int its_cpu_init(void);
int its_init(struct device_node *node, struct rdists *rdists,

Subject: [tip:irq/core] irqchip/GICv3: Don' t deactivate interrupts forwarded to a guest

Commit-ID: 530bf353e4eb06bcba5078390c949650cd26a7c7
Gitweb: http://git.kernel.org/tip/530bf353e4eb06bcba5078390c949650cd26a7c7
Author: Marc Zyngier <[email protected]>
AuthorDate: Wed, 26 Aug 2015 17:00:43 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 27 Aug 2015 17:13:49 +0200

irqchip/GICv3: Don't deactivate interrupts forwarded to a guest

Commit 0a4377de3056 ("genirq: Introduce irq_set_vcpu_affinity() to
target an interrupt to a VCPU") added just what we needed at the
lowest level to allow an interrupt to be deactivated by a guest.

When such a request reaches the GIC, it knows it doesn't need to
perform the deactivation anymore, and can safely leave the guest
do its magic. This of course requires additional support in both
VFIO and KVM.

Signed-off-by: Marc Zyngier <[email protected]>
Reviewed-and-tested-by: Eric Auger <[email protected]>
Cc: Christoffer Dall <[email protected]>
Cc: Jiang Liu <[email protected]>
Cc: <[email protected]>
Cc: [email protected]
Cc: Jason Cooper <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
drivers/irqchip/irq-gic-v3.c | 27 +++++++++++++++++++++++++--
1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 5c31cc9..7deed6e 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -70,6 +70,11 @@ static inline int gic_irq_in_rdist(struct irq_data *d)
return gic_irq(d) < 32;
}

+static inline bool forwarded_irq(struct irq_data *d)
+{
+ return d->handler_data != NULL;
+}
+
static inline void __iomem *gic_dist_base(struct irq_data *d)
{
if (gic_irq_in_rdist(d)) /* SGI+PPI -> SGI_base for this CPU */
@@ -236,6 +241,16 @@ static void gic_mask_irq(struct irq_data *d)
static void gic_eoimode1_mask_irq(struct irq_data *d)
{
gic_mask_irq(d);
+ /*
+ * When masking a forwarded interrupt, make sure it is
+ * deactivated as well.
+ *
+ * This ensures that an interrupt that is getting
+ * disabled/masked will not get "stuck", because there is
+ * noone to deactivate it (guest is being terminated).
+ */
+ if (forwarded_irq(d))
+ gic_poke_irq(d, GICD_ICACTIVER);
}

static void gic_unmask_irq(struct irq_data *d)
@@ -306,9 +321,10 @@ static void gic_eoi_irq(struct irq_data *d)
static void gic_eoimode1_eoi_irq(struct irq_data *d)
{
/*
- * No need to deactivate an LPI.
+ * No need to deactivate an LPI, or an interrupt that
+ * is is getting forwarded to a vcpu.
*/
- if (gic_irq(d) >= 8192)
+ if (gic_irq(d) >= 8192 || forwarded_irq(d))
return;
gic_write_dir(gic_irq(d));
}
@@ -339,6 +355,12 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
return gic_configure_irq(irq, type, base, rwp_wait);
}

+static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
+{
+ d->handler_data = vcpu;
+ return 0;
+}
+
static u64 gic_mpidr_to_affinity(u64 mpidr)
{
u64 aff;
@@ -703,6 +725,7 @@ static struct irq_chip gic_eoimode1_chip = {
.irq_set_affinity = gic_set_affinity,
.irq_get_irqchip_state = gic_irq_get_irqchip_state,
.irq_set_irqchip_state = gic_irq_set_irqchip_state,
+ .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity,
.flags = IRQCHIP_SET_TYPE_MASKED,
};

Subject: [tip:irq/core] irqchip/GIC: Convert to EOImode == 1

Commit-ID: 0b996fd35957a30568cddbce05b917c1897966e0
Gitweb: http://git.kernel.org/tip/0b996fd35957a30568cddbce05b917c1897966e0
Author: Marc Zyngier <[email protected]>
AuthorDate: Wed, 26 Aug 2015 17:00:44 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 27 Aug 2015 17:13:49 +0200

irqchip/GIC: Convert to EOImode == 1

So far, GICv2 has been used with EOImode == 0. The effect of this
mode is to perform the priority drop and the deactivation of the
interrupt at the same time.

While this works perfectly for Linux (we only have a single priority),
it causes issues when an interrupt is forwarded to a guest, and when
we want the guest to perform the EOI itself.

For this case, the GIC architecture provides EOImode == 1, where:
- A write to the EOI register drops the priority of the interrupt
and leaves it active. Other interrupts at the same priority level
can now be taken, but the active interrupt cannot be taken again
- A write to the DIR marks the interrupt as inactive, meaning it can
now be taken again.

We only enable this feature when booted in HYP mode and that
the device-tree reported a suitable CPU interface. Observable behaviour
should remain unchanged.

Signed-off-by: Marc Zyngier <[email protected]>
Reviewed-and-tested-by: Eric Auger <[email protected]>
Cc: Christoffer Dall <[email protected]>
Cc: Jiang Liu <[email protected]>
Cc: <[email protected]>
Cc: [email protected]
Cc: Jason Cooper <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
drivers/irqchip/irq-gic.c | 71 +++++++++++++++++++++++++++++++++++++++--
include/linux/irqchip/arm-gic.h | 4 +++
2 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index aa3e7b8..c835f4c 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -47,6 +47,7 @@
#include <asm/irq.h>
#include <asm/exception.h>
#include <asm/smp_plat.h>
+#include <asm/virt.h>

#include "irq-gic-common.h"

@@ -82,6 +83,8 @@ static DEFINE_RAW_SPINLOCK(irq_controller_lock);
#define NR_GIC_CPU_IF 8
static u8 gic_cpu_map[NR_GIC_CPU_IF] __read_mostly;

+static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
+
#ifndef MAX_GIC_NR
#define MAX_GIC_NR 1
#endif
@@ -157,6 +160,11 @@ static void gic_mask_irq(struct irq_data *d)
gic_poke_irq(d, GIC_DIST_ENABLE_CLEAR);
}

+static void gic_eoimode1_mask_irq(struct irq_data *d)
+{
+ gic_mask_irq(d);
+}
+
static void gic_unmask_irq(struct irq_data *d)
{
gic_poke_irq(d, GIC_DIST_ENABLE_SET);
@@ -167,6 +175,11 @@ static void gic_eoi_irq(struct irq_data *d)
writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
}

+static void gic_eoimode1_eoi_irq(struct irq_data *d)
+{
+ writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
+}
+
static int gic_irq_set_irqchip_state(struct irq_data *d,
enum irqchip_irq_state which, bool val)
{
@@ -272,11 +285,15 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
irqnr = irqstat & GICC_IAR_INT_ID_MASK;

if (likely(irqnr > 15 && irqnr < 1021)) {
+ if (static_key_true(&supports_deactivate))
+ writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
handle_domain_irq(gic->domain, irqnr, regs);
continue;
}
if (irqnr < 16) {
writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
+ if (static_key_true(&supports_deactivate))
+ writel_relaxed(irqstat, cpu_base + GIC_CPU_DEACTIVATE);
#ifdef CONFIG_SMP
handle_IPI(irqnr, regs);
#endif
@@ -329,6 +346,22 @@ static struct irq_chip gic_chip = {
IRQCHIP_MASK_ON_SUSPEND,
};

+static struct irq_chip gic_eoimode1_chip = {
+ .name = "GICv2",
+ .irq_mask = gic_eoimode1_mask_irq,
+ .irq_unmask = gic_unmask_irq,
+ .irq_eoi = gic_eoimode1_eoi_irq,
+ .irq_set_type = gic_set_type,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = gic_set_affinity,
+#endif
+ .irq_get_irqchip_state = gic_irq_get_irqchip_state,
+ .irq_set_irqchip_state = gic_irq_set_irqchip_state,
+ .flags = IRQCHIP_SET_TYPE_MASKED |
+ IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_MASK_ON_SUSPEND,
+};
+
void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
{
if (gic_nr >= MAX_GIC_NR)
@@ -360,6 +393,10 @@ static void gic_cpu_if_up(struct gic_chip_data *gic)
{
void __iomem *cpu_base = gic_data_cpu_base(gic);
u32 bypass = 0;
+ u32 mode = 0;
+
+ if (static_key_true(&supports_deactivate))
+ mode = GIC_CPU_CTRL_EOImodeNS;

/*
* Preserve bypass disable bits to be written back later
@@ -367,7 +404,7 @@ static void gic_cpu_if_up(struct gic_chip_data *gic)
bypass = readl(cpu_base + GIC_CPU_CTRL);
bypass &= GICC_DIS_BYPASS_MASK;

- writel_relaxed(bypass | GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
+ writel_relaxed(bypass | mode | GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
}


@@ -803,13 +840,20 @@ void __init gic_init_physaddr(struct device_node *node)
static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hw)
{
+ struct irq_chip *chip = &gic_chip;
+
+ if (static_key_true(&supports_deactivate)) {
+ if (d->host_data == (void *)&gic_data[0])
+ chip = &gic_eoimode1_chip;
+ }
+
if (hw < 32) {
irq_set_percpu_devid(irq);
- irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, chip, d->host_data,
handle_percpu_devid_irq, NULL, NULL);
set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
} else {
- irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, chip, d->host_data,
handle_fasteoi_irq, NULL, NULL);
set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
}
@@ -995,6 +1039,8 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
register_cpu_notifier(&gic_cpu_notifier);
#endif
set_handle_irq(gic_handle_irq);
+ if (static_key_true(&supports_deactivate))
+ pr_info("GIC: Using split EOI/Deactivate mode\n");
}

gic_dist_init(gic);
@@ -1010,6 +1056,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
{
void __iomem *cpu_base;
void __iomem *dist_base;
+ struct resource cpu_res;
u32 percpu_offset;
int irq;

@@ -1022,6 +1069,16 @@ gic_of_init(struct device_node *node, struct device_node *parent)
cpu_base = of_iomap(node, 1);
WARN(!cpu_base, "unable to map gic cpu registers\n");

+ of_address_to_resource(node, 1, &cpu_res);
+
+ /*
+ * Disable split EOI/Deactivate if either HYP is not available
+ * or the CPU interface is too small.
+ */
+ if (gic_cnt == 0 && (!is_hyp_mode_available() ||
+ resource_size(&cpu_res) < SZ_8K))
+ static_key_slow_dec(&supports_deactivate);
+
if (of_property_read_u32(node, "cpu-offset", &percpu_offset))
percpu_offset = 0;

@@ -1141,6 +1198,14 @@ gic_v2_acpi_init(struct acpi_table_header *table)
}

/*
+ * Disable split EOI/Deactivate if HYP is not available. ACPI
+ * guarantees that we'll always have a GICv2, so the CPU
+ * interface will always be the right size.
+ */
+ if (!is_hyp_mode_available())
+ static_key_slow_dec(&supports_deactivate);
+
+ /*
* Initialize zero GIC instance (no multi-GIC support). Also, set GIC
* as default IRQ domain to allow for GSI registration and GSI to IRQ
* number translation (see acpi_register_gsi() and acpi_gsi_to_irq()).
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 65da435..af3d29f 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -20,9 +20,13 @@
#define GIC_CPU_ALIAS_BINPOINT 0x1c
#define GIC_CPU_ACTIVEPRIO 0xd0
#define GIC_CPU_IDENT 0xfc
+#define GIC_CPU_DEACTIVATE 0x1000

#define GICC_ENABLE 0x1
#define GICC_INT_PRI_THRESHOLD 0xf0
+
+#define GIC_CPU_CTRL_EOImodeNS (1 << 9)
+
#define GICC_IAR_INT_ID_MASK 0x3ff
#define GICC_INT_SPURIOUS 1023
#define GICC_DIS_BYPASS_MASK 0x1e0

Subject: [tip:irq/core] irqchip/GIC: Don' t deactivate interrupts forwarded to a guest

Commit-ID: 01f779f4862b53810ba4eb247f57bd1ad31d1c18
Gitweb: http://git.kernel.org/tip/01f779f4862b53810ba4eb247f57bd1ad31d1c18
Author: Marc Zyngier <[email protected]>
AuthorDate: Wed, 26 Aug 2015 17:00:45 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 27 Aug 2015 17:13:49 +0200

irqchip/GIC: Don't deactivate interrupts forwarded to a guest

Commit 0a4377de3056 ("genirq: Introduce irq_set_vcpu_affinity() to
target an interrupt to a VCPU") added just what we needed at the
lowest level to allow an interrupt to be deactivated by a guest.

When such a request reaches the GIC, it knows it doesn't need to
perform the deactivation anymore, and can safely leave the guest
do its magic. This of course requires additional support in both
VFIO and KVM.

Signed-off-by: Marc Zyngier <[email protected]>
Reviewed-and-tested-by: Eric Auger <[email protected]>
Cc: Christoffer Dall <[email protected]>
Cc: Jiang Liu <[email protected]>
Cc: <[email protected]>
Cc: [email protected]
Cc: Jason Cooper <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
drivers/irqchip/irq-gic.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index c835f4c..72bf81b 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -140,6 +140,36 @@ static inline unsigned int gic_irq(struct irq_data *d)
return d->hwirq;
}

+static inline bool cascading_gic_irq(struct irq_data *d)
+{
+ void *data = irq_data_get_irq_handler_data(d);
+
+ /*
+ * If handler_data pointing to one of the secondary GICs, then
+ * this is a cascading interrupt, and it cannot possibly be
+ * forwarded.
+ */
+ if (data >= (void *)(gic_data + 1) &&
+ data < (void *)(gic_data + MAX_GIC_NR))
+ return true;
+
+ return false;
+}
+
+static inline bool forwarded_irq(struct irq_data *d)
+{
+ /*
+ * A forwarded interrupt:
+ * - is on the primary GIC
+ * - has its handler_data set to a value
+ * - that isn't a secondary GIC
+ */
+ if (d->handler_data && !cascading_gic_irq(d))
+ return true;
+
+ return false;
+}
+
/*
* Routines to acknowledge, disable and enable interrupts
*/
@@ -163,6 +193,16 @@ static void gic_mask_irq(struct irq_data *d)
static void gic_eoimode1_mask_irq(struct irq_data *d)
{
gic_mask_irq(d);
+ /*
+ * When masking a forwarded interrupt, make sure it is
+ * deactivated as well.
+ *
+ * This ensures that an interrupt that is getting
+ * disabled/masked will not get "stuck", because there is
+ * noone to deactivate it (guest is being terminated).
+ */
+ if (forwarded_irq(d))
+ gic_poke_irq(d, GIC_DIST_ACTIVE_CLEAR);
}

static void gic_unmask_irq(struct irq_data *d)
@@ -177,6 +217,10 @@ static void gic_eoi_irq(struct irq_data *d)

static void gic_eoimode1_eoi_irq(struct irq_data *d)
{
+ /* Do not deactivate an IRQ forwarded to a vcpu. */
+ if (forwarded_irq(d))
+ return;
+
writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
}

@@ -246,6 +290,16 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
return gic_configure_irq(gicirq, type, base, NULL);
}

+static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
+{
+ /* Only interrupts on the primary GIC can be forwarded to a vcpu. */
+ if (cascading_gic_irq(d))
+ return -EINVAL;
+
+ d->handler_data = vcpu;
+ return 0;
+}
+
#ifdef CONFIG_SMP
static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
bool force)
@@ -357,6 +411,7 @@ static struct irq_chip gic_eoimode1_chip = {
#endif
.irq_get_irqchip_state = gic_irq_get_irqchip_state,
.irq_set_irqchip_state = gic_irq_set_irqchip_state,
+ .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity,
.flags = IRQCHIP_SET_TYPE_MASKED |
IRQCHIP_SKIP_SET_WAKE |
IRQCHIP_MASK_ON_SUSPEND,