The GIC architecture (ARM's Generic Interrupt Controller) allows an
active physical interrupt to be forwarded to a guest, and the guest to
indirectly perform the deactivation of the interrupt by performing an
EOI on the virtual interrupt (see for example the GICv2 spec, 3.2.1).
So far, Linux doesn't have this notion, which is a bit of a pain.
This patch series introduce two generic features:
- A way to mark an interrupt as "forwarded": this allows an irq_chip
to know that it shouldn't perform the deactivation itself
- A way to save/restore the "state" of a "forwarded" interrupt
The series then adapts both GIC drivers to switch to EOImode == 1
(split priority drop and deactivation), to support this "forwarded"
feature and hacks the KVM/ARM timer backend to use all of this.
This requires yet another bit of surgery in the vgic code in order to
allow a mapping between physical interrupts and virtual
ones. Hopefully, this should plug into VFIO and the whole irqfd thing,
but I don't understand any of that just yet (Eric?)
The patches are against 3.16-rc2, plus a massive amount of GICv3
code. They are also available in my git repo:
git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git kvm-arm64/irq-forward
Open questions (Thomas, these are mostly targeted at you):
- Are the generic features generic enough?
- Would the "forwarded" thing be better implemented as a handler
rather than an irq_chip specific thing?
- The split priority drop/deactivate also fits the threaded interrupt
model fairly well (no need to mask/unmask). Should we have a go at
this too?
- Does it fit the VFIO+KVM model without playing the ugly mask/unmask
dance?
Thanks,
M.
Marc Zyngier (9):
genirq: Add IRQD_IRQ_FORWARDED flag and accessors
genirq: Allow the state of a forwarded irq to be save/restored
irqchip: GIC: Convert to EOImode == 1
irqchip: GIC: add support for forwarded interrupts
irqchip: GICv3: Convert to EOImode == 1
irqchip: GICv3: add support for forwarded interrupts
KVM: arm: vgic: allow dynamic mapping of physical/virtual interrupts
arm: KVM: timer: move the timer switch into the non-preemptible
section
KVM: arm: timer: make the interrupt state part of the timer state
arch/arm/kvm/arm.c | 7 +--
drivers/irqchip/irq-gic-v3-its.c | 2 +
drivers/irqchip/irq-gic-v3.c | 33 +++++++++++-
drivers/irqchip/irq-gic.c | 107 +++++++++++++++++++++++++++++++++----
include/kvm/arm_arch_timer.h | 3 ++
include/kvm/arm_vgic.h | 13 +++++
include/linux/interrupt.h | 2 +
include/linux/irq.h | 32 +++++++++++
include/linux/irqchip/arm-gic-v3.h | 12 +++++
include/linux/irqchip/arm-gic.h | 5 ++
kernel/irq/manage.c | 80 +++++++++++++++++++++++++++
virt/kvm/arm/arch_timer.c | 31 ++++++++++-
virt/kvm/arm/vgic-v2.c | 14 ++++-
virt/kvm/arm/vgic-v3.c | 22 +++++++-
virt/kvm/arm/vgic.c | 88 ++++++++++++++++++++++++++++++
15 files changed, 431 insertions(+), 20 deletions(-)
--
1.8.3.4
Now that we've switched to EOImode == 1, prevent a forwarded interrupt
from being deactivated after its priority has been dropped.
Also add support for the interrupt state to be saved/restored.
Signed-off-by: Marc Zyngier <[email protected]>
---
drivers/irqchip/irq-gic-v3-its.c | 3 ++-
drivers/irqchip/irq-gic-v3.c | 29 ++++++++++++++++++++++++++++-
include/linux/irqchip/arm-gic-v3.h | 2 ++
3 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 922f228..7c354ff 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -616,7 +616,8 @@ static void its_unmask_irq(struct irq_data *d)
static void its_eoi_irq(struct irq_data *d)
{
gic_write_eoir(d->hwirq);
- gic_write_dir(d->hwirq);
+ if (!irqd_irq_forwarded(d))
+ gic_write_dir(d->hwirq);
}
static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 26a08ce..1f080d6 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -235,10 +235,35 @@ static void gic_unmask_irq(struct irq_data *d)
gic_poke_irq(d, GICD_ISENABLER);
}
+static void gic_irq_set_fwd_state(struct irq_data *d, u32 val, u32 mask)
+{
+ if (mask & IRQ_FWD_STATE_PENDING)
+ gic_poke_irq(d, (val & IRQ_FWD_STATE_PENDING) ? GICD_ISPENDR : GICD_ICPENDR);
+ if (mask & IRQ_FWD_STATE_ACTIVE)
+ gic_poke_irq(d, (val & IRQ_FWD_STATE_ACTIVE) ? GICD_ISACTIVER : GICD_ICACTIVER);
+ if (mask & IRQ_FWD_STATE_MASKED)
+ gic_poke_irq(d, (val & IRQ_FWD_STATE_MASKED) ? GICD_ICENABLER : GICD_ISENABLER);
+}
+
+static u32 gic_irq_get_fwd_state(struct irq_data *d, u32 mask)
+{
+ u32 val = 0;
+
+ if ((mask & IRQ_FWD_STATE_PENDING) && gic_peek_irq(d, GICD_ISPENDR))
+ val |= IRQ_FWD_STATE_PENDING;
+ if ((mask & IRQ_FWD_STATE_ACTIVE) && gic_peek_irq(d, GICD_ISACTIVER))
+ val |= IRQ_FWD_STATE_ACTIVE;
+ if ((mask & IRQ_FWD_STATE_MASKED) && !gic_peek_irq(d, GICD_ISENABLER))
+ val |= IRQ_FWD_STATE_MASKED;
+
+ return val;
+}
+
static void gic_eoi_irq(struct irq_data *d)
{
gic_write_eoir(gic_irq(d));
- gic_write_dir(gic_irq(d));
+ if (!irqd_irq_forwarded(d))
+ gic_write_dir(gic_irq(d));
}
static int gic_set_type(struct irq_data *d, unsigned int type)
@@ -598,6 +623,8 @@ static struct irq_chip gic_chip = {
.irq_eoi = gic_eoi_irq,
.irq_set_type = gic_set_type,
.irq_set_affinity = gic_set_affinity,
+ .irq_get_fwd_state = gic_irq_get_fwd_state,
+ .irq_set_fwd_state = gic_irq_set_fwd_state,
};
static struct irq_chip *its_chip;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index a0bff2f..0e74c19 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -90,6 +90,8 @@
#define GICR_SYNCR 0x00C0
#define GICR_MOVLPIR 0x0100
#define GICR_MOVALLR 0x0110
+#define GICR_ISACTIVER GICD_ISACTIVER
+#define GICR_ICACTIVER GICD_ICACTIVER
#define GICR_IDREGS GICD_IDREGS
#define GICR_PIDR2 GICD_PIDR2
--
1.8.3.4
In order to remove the crude hack where we sneak the masked bit
into the timer's control register, make use of the forwarded
interrupt API to save/restore the active state of the interrupt.
Signed-off-by: Marc Zyngier <[email protected]>
---
include/kvm/arm_arch_timer.h | 3 +++
virt/kvm/arm/arch_timer.c | 31 ++++++++++++++++++++++++++++++-
2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 6d9aedd..6a69f42 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -53,6 +53,9 @@ struct arch_timer_cpu {
/* Background timer active */
bool armed;
+ /* Is interrupt active at the distributor level */
+ u32 irq_active;
+
/* Timer IRQ */
const struct kvm_irq_level *irq;
#endif
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 22fa819..57817c5 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -63,7 +63,7 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
+ timer->irq_active = IRQ_FWD_STATE_ACTIVE;
kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
timer->irq->irq,
timer->irq->level);
@@ -117,6 +117,16 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
* populate the CPU timer again.
*/
timer_disarm(timer);
+
+ if (timer->irq_active) {
+ int ret;
+
+ ret = irq_set_fwd_state(host_vtimer_irq,
+ timer->irq_active,
+ IRQ_FWD_STATE_ACTIVE);
+ if (ret)
+ kvm_err("unable to restore timer state");
+ }
}
/**
@@ -130,8 +140,16 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
cycle_t cval, now;
+ int ret;
u64 ns;
+ ret = irq_get_fwd_state(host_vtimer_irq, &timer->irq_active,
+ IRQ_FWD_STATE_ACTIVE);
+ if (ret)
+ kvm_err("unable to retrieve timer state");
+ if (timer->irq_active)
+ irq_set_fwd_state(host_vtimer_irq, 0, IRQ_FWD_STATE_ACTIVE);
+
if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
!(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
return;
@@ -166,6 +184,12 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
* vcpu timer irq number when the vcpu is reset.
*/
timer->irq = irq;
+
+ /*
+ * Tell the VGIC that the virtual interrupt is tied to a
+ * physical interrupt. We do that once per VCPU.
+ */
+ vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq);
}
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
@@ -290,6 +314,10 @@ int kvm_timer_hyp_init(void)
}
kvm_info("%s IRQ%d\n", np->name, ppi);
+
+ /* Tell the GIC we're forwarding the interrupt to a guest */
+ irqd_set_irq_forwarded(irq_get_irq_data(host_vtimer_irq));
+
on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
goto out;
@@ -305,6 +333,7 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
timer_disarm(timer);
+ vgic_unmap_phys_irq(vcpu, timer->irq->irq, host_vtimer_irq);
}
int kvm_timer_init(struct kvm *kvm)
--
1.8.3.4
Now that we've switched to EOImode == 1, prevent a forwarded interrupt
from being deactivated after its priority has been dropped.
Also add support for the interrupt state to be saved/restored.
Signed-off-by: Marc Zyngier <[email protected]>
---
drivers/irqchip/irq-gic.c | 48 +++++++++++++++++++++++++++++++++++++++++------
1 file changed, 42 insertions(+), 6 deletions(-)
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 9295bf2..bde1637 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -157,12 +157,22 @@ static inline unsigned int gic_irq(struct irq_data *d)
/*
* Routines to acknowledge, disable and enable interrupts
*/
-static void gic_mask_irq(struct irq_data *d)
+static void gic_poke_irq(struct irq_data *d, u32 offset)
+{
+ u32 mask = 1 << (gic_irq(d) % 32);
+ writel_relaxed(mask, gic_dist_base(d) + offset + (gic_irq(d) / 32) * 4);
+}
+
+static int gic_peek_irq(struct irq_data *d, u32 offset)
{
u32 mask = 1 << (gic_irq(d) % 32);
+ return !!(readl_relaxed(gic_dist_base(d) + offset + (gic_irq(d) / 32) * 4) & mask);
+}
+static void gic_mask_irq(struct irq_data *d)
+{
raw_spin_lock(&irq_controller_lock);
- writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_CLEAR + (gic_irq(d) / 32) * 4);
+ gic_poke_irq(d, GIC_DIST_ENABLE_CLEAR);
if (gic_arch_extn.irq_mask)
gic_arch_extn.irq_mask(d);
raw_spin_unlock(&irq_controller_lock);
@@ -170,12 +180,10 @@ static void gic_mask_irq(struct irq_data *d)
static void gic_unmask_irq(struct irq_data *d)
{
- u32 mask = 1 << (gic_irq(d) % 32);
-
raw_spin_lock(&irq_controller_lock);
if (gic_arch_extn.irq_unmask)
gic_arch_extn.irq_unmask(d);
- writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_SET + (gic_irq(d) / 32) * 4);
+ gic_poke_irq(d, GIC_DIST_ENABLE_SET);
raw_spin_unlock(&irq_controller_lock);
}
@@ -193,7 +201,33 @@ static void gic_eoi_irq(struct irq_data *d)
static void gic_eoi_dir_irq(struct irq_data *d)
{
gic_eoi_irq(d);
- writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
+ if (!irqd_irq_forwarded(d))
+ writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
+}
+
+static void gic_irq_set_fwd_state(struct irq_data *d, u32 val, u32 mask)
+{
+ if (mask & IRQ_FWD_STATE_PENDING)
+ gic_poke_irq(d, (val & IRQ_FWD_STATE_PENDING) ? GIC_DIST_ENABLE_SET : GIC_DIST_ENABLE_CLEAR);
+ if (mask & IRQ_FWD_STATE_ACTIVE)
+ gic_poke_irq(d, (val & IRQ_FWD_STATE_ACTIVE) ? GIC_DIST_ACTIVE_SET : GIC_DIST_ACTIVE_CLEAR);
+ if (mask & IRQ_FWD_STATE_MASKED)
+ gic_poke_irq(d, (val & IRQ_FWD_STATE_MASKED) ? GIC_DIST_ENABLE_CLEAR : GIC_DIST_ENABLE_SET);
+
+}
+
+static u32 gic_irq_get_fwd_state(struct irq_data *d, u32 mask)
+{
+ u32 val = 0;
+
+ if (mask & IRQ_FWD_STATE_PENDING && gic_peek_irq(d, GIC_DIST_ENABLE_SET))
+ val |= IRQ_FWD_STATE_PENDING;
+ if (mask & IRQ_FWD_STATE_ACTIVE && gic_peek_irq(d, GIC_DIST_ACTIVE_SET))
+ val |= IRQ_FWD_STATE_ACTIVE;
+ if (mask & IRQ_FWD_STATE_MASKED && !gic_peek_irq(d, GIC_DIST_ENABLE_SET))
+ val |= IRQ_FWD_STATE_MASKED;
+
+ return val;
}
static int gic_set_type(struct irq_data *d, unsigned int type)
@@ -349,6 +383,8 @@ static struct irq_chip gicv2_chip = {
.irq_set_affinity = gic_set_affinity,
#endif
.irq_set_wake = gic_set_wake,
+ .irq_get_fwd_state = gic_irq_get_fwd_state,
+ .irq_set_fwd_state = gic_irq_set_fwd_state,
};
void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
--
1.8.3.4
In order to be able to feed physical interrupts to a guest, we need
to be able to establish the virtual-physical mapping between the two
worlds.
As we try to keep the injection interface simple, find out what the
physical interrupt is (if any) when we actually build the LR.
The mapping is kept in a rbtree, indexed by virtual interrupts.
Signed-off-by: Marc Zyngier <[email protected]>
---
include/kvm/arm_vgic.h | 13 ++++++
include/linux/irqchip/arm-gic-v3.h | 3 ++
include/linux/irqchip/arm-gic.h | 1 +
virt/kvm/arm/vgic-v2.c | 14 +++++-
virt/kvm/arm/vgic-v3.c | 22 +++++++++-
virt/kvm/arm/vgic.c | 88 ++++++++++++++++++++++++++++++++++++++
6 files changed, 138 insertions(+), 3 deletions(-)
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 82e00a5..5f61dfa 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -134,6 +134,12 @@ struct vgic_vm_ops {
int (*vgic_init)(struct kvm *kvm, const struct vgic_params *params);
};
+struct irq_phys_map {
+ struct rb_node node;
+ u32 virt_irq;
+ u32 phys_irq;
+};
+
struct vgic_dist {
#ifdef CONFIG_KVM_ARM_VGIC
spinlock_t lock;
@@ -190,6 +196,8 @@ struct vgic_dist {
unsigned long irq_pending_on_cpu;
struct vgic_vm_ops vm_ops;
+
+ struct rb_root irq_phys_map;
#endif
};
@@ -237,6 +245,8 @@ struct vgic_cpu {
struct vgic_v2_cpu_if vgic_v2;
struct vgic_v3_cpu_if vgic_v3;
};
+
+ struct rb_root irq_phys_map;
#endif
};
@@ -265,6 +275,9 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
struct kvm_exit_mmio *mmio);
+int vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq);
+int vgic_get_phys_irq(struct kvm_vcpu *vcpu, int virt_irq);
+int vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq);
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
#define vgic_initialized(k) ((k)->arch.vgic.ready)
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 0e74c19..7753d18 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -210,9 +210,12 @@
#define ICH_LR_EOI (1UL << 41)
#define ICH_LR_GROUP (1UL << 60)
+#define ICH_LR_HW (1UL << 61)
#define ICH_LR_STATE (3UL << 62)
#define ICH_LR_PENDING_BIT (1UL << 62)
#define ICH_LR_ACTIVE_BIT (1UL << 63)
+#define ICH_LR_PHYS_ID_SHIFT 32
+#define ICH_LR_PHYS_ID_MASK (0x3ffUL << ICH_LR_PHYS_ID_SHIFT)
#define ICH_MISR_EOI (1 << 0)
#define ICH_MISR_U (1 << 1)
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index ffe3911..18c4e29 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -64,6 +64,7 @@
#define GICH_LR_PENDING_BIT (1 << 28)
#define GICH_LR_ACTIVE_BIT (1 << 29)
#define GICH_LR_EOI (1 << 19)
+#define GICH_LR_HW (1 << 31);
#define GICH_VMCR_CTRL_SHIFT 0
#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT)
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 4091078..6764d44 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -58,7 +58,9 @@ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
struct vgic_lr lr_desc)
{
- u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
+ u32 lr_val;
+
+ lr_val = lr_desc.irq;
if (lr_desc.state & LR_STATE_PENDING)
lr_val |= GICH_LR_PENDING_BIT;
@@ -67,6 +69,16 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
if (lr_desc.state & LR_EOI_INT)
lr_val |= GICH_LR_EOI;
+ if (lr_desc.irq < VGIC_NR_SGIS) {
+ lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT);
+ } else {
+ int phys_irq = vgic_get_phys_irq(vcpu, lr_desc.irq);
+ if (phys_irq >= 0) {
+ lr_val |= ((u32)phys_irq) << GICH_LR_PHYSID_CPUID_SHIFT;
+ lr_val |= GICH_LR_HW;
+ }
+ }
+
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
}
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index d26d12f..41dee6c 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -116,6 +116,15 @@ static void vgic_v3_on_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
lr_val |= sync_lr_val(lr_desc.state);
+ if (lr_desc.irq >= VGIC_NR_SGIS) {
+ int phys_irq;
+ phys_irq = vgic_get_phys_irq(vcpu, lr_desc.irq);
+ if (phys_irq >= 0) {
+ lr_val |= ((u64)phys_irq) << ICH_LR_PHYS_ID_SHIFT;
+ lr_val |= ICH_LR_HW;
+ }
+ }
+
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
}
@@ -126,10 +135,19 @@ static void vgic_v2_on_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
lr_val = lr_desc.irq;
- lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
-
lr_val |= sync_lr_val(lr_desc.state);
+ if (lr_desc.irq < VGIC_NR_SGIS) {
+ lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
+ } else {
+ int phys_irq;
+ phys_irq = vgic_get_phys_irq(vcpu, lr_desc.irq);
+ if (phys_irq >= 0) {
+ lr_val |= ((u64)phys_irq) << ICH_LR_PHYS_ID_SHIFT;
+ lr_val |= ICH_LR_HW;
+ }
+ }
+
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index e3c7189..c404682c 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -24,6 +24,7 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
+#include <linux/rbtree.h>
#include <linux/uaccess.h>
#include <linux/irqchip/arm-gic.h>
@@ -1163,6 +1164,93 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
return IRQ_HANDLED;
}
+static struct rb_root *vgic_get_irq_phys_map(struct kvm_vcpu *vcpu,
+ int virt_irq)
+{
+ if (virt_irq < VGIC_NR_PRIVATE_IRQS)
+ return &vcpu->arch.vgic_cpu.irq_phys_map;
+ else
+ return &vcpu->kvm->arch.vgic.irq_phys_map;
+}
+
+int vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq)
+{
+ struct rb_root *root = vgic_get_irq_phys_map(vcpu, virt_irq);
+ struct rb_node **new = &root->rb_node, *parent = NULL;
+ struct irq_phys_map *new_map;
+
+ /* Boilerplate rb_tree code */
+ while (*new) {
+ struct irq_phys_map *this;
+
+ this = container_of(*new, struct irq_phys_map, node);
+ parent = *new;
+ if (this->virt_irq < virt_irq)
+ new = &(*new)->rb_left;
+ else if (this->virt_irq > virt_irq)
+ new = &(*new)->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ new_map = kzalloc(sizeof(*new_map), GFP_KERNEL);
+ if (!new_map)
+ return -ENOMEM;
+
+ new_map->virt_irq = virt_irq;
+ new_map->phys_irq = phys_irq;
+
+ rb_link_node(&new_map->node, parent, new);
+ rb_insert_color(&new_map->node, root);
+
+ return 0;
+}
+
+static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
+ int virt_irq)
+{
+ struct rb_root *root = vgic_get_irq_phys_map(vcpu, virt_irq);
+ struct rb_node *node = root->rb_node;
+
+ while(node) {
+ struct irq_phys_map *this;
+
+ this = container_of(node, struct irq_phys_map, node);
+
+ if (this->virt_irq < virt_irq)
+ node = node->rb_left;
+ else if (this->virt_irq > virt_irq)
+ node = node->rb_right;
+ else
+ return this;
+ }
+
+ return NULL;
+}
+
+int vgic_get_phys_irq(struct kvm_vcpu *vcpu, int virt_irq)
+{
+ struct irq_phys_map *map = vgic_irq_map_search(vcpu, virt_irq);
+
+ if (map)
+ return map->phys_irq;
+
+ return -ENOENT;
+}
+
+int vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq)
+{
+ struct irq_phys_map *map = vgic_irq_map_search(vcpu, virt_irq);
+
+ if (map && map->phys_irq == phys_irq) {
+ rb_erase(&map->node, vgic_get_irq_phys_map(vcpu, virt_irq));
+ kfree(map);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
static void vgic_vcpu_free_maps(struct vgic_cpu *vgic_cpu)
{
kfree(vgic_cpu->pending_shared);
--
1.8.3.4
In order to convert the timer code to the new interrupt save/restore API,
make sure the timer switch is done in a non-premtible section, so that
we save/restore the state on the right CPU.
Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm/kvm/arm.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a291e63..92875b2 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -554,9 +554,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu_pause(vcpu);
kvm_vgic_flush_hwstate(vcpu);
- kvm_timer_flush_hwstate(vcpu);
local_irq_disable();
+ kvm_timer_flush_hwstate(vcpu);
/*
* Re-check atomic conditions
@@ -567,8 +567,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
}
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
- local_irq_enable();
kvm_timer_sync_hwstate(vcpu);
+ local_irq_enable();
kvm_vgic_sync_hwstate(vcpu);
continue;
}
@@ -586,6 +586,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu->arch.last_pcpu = smp_processor_id();
kvm_guest_exit();
trace_kvm_exit(*vcpu_pc(vcpu));
+ kvm_timer_sync_hwstate(vcpu);
+
/*
* We may have taken a host interrupt in HYP mode (ie
* while executing the guest). This interrupt is still
@@ -602,7 +604,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
* Back from guest
*************************************************************/
- kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
ret = handle_exit(vcpu, run, ret);
--
1.8.3.4
When a peripheral is shared between virtual machines, its interrupt
state becomes part of the guest's state, and must be switched accordingly.
Introduce a pair of accessors (irq_get_fwd_state/irq_set_fwd_state) to
retrieve the bits that can be of interest to KVM: pending, active, and masked.
- irq_get_fwd_state returns the state of the interrupt according to a mask
containing any of the IRQ_STATE_PENDING, IRQ_STATE_ACTIVE or IRQ_STATE_MASKED
bits.
- irq_set_fwd_state sets the state of the interrupt according to a similar mask.
Only a forwarded interrupt can be manipulated in such a way.
Signed-off-by: Marc Zyngier <[email protected]>
---
include/linux/interrupt.h | 2 ++
include/linux/irq.h | 16 ++++++++++
kernel/irq/manage.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 98 insertions(+)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 698ad05..cfb8006 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -361,6 +361,8 @@ static inline int disable_irq_wake(unsigned int irq)
return irq_set_irq_wake(irq, 0);
}
+extern int irq_get_fwd_state(unsigned int irq, u32 *val, u32 mask);
+extern int irq_set_fwd_state(unsigned int irq, u32 val, u32 mask);
#ifdef CONFIG_IRQ_FORCED_THREADING
extern bool force_irqthreads;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index b4b3120..88ecd58 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -323,6 +323,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
* any other callback related to this irq
* @irq_release_resources: optional to release resources acquired with
* irq_request_resources
+ * @irq_get_fwd_state: return the state of a forwarded interrupt
+ * @irq_set_fwd_state: set the state of a forwarded interrupt
* @flags: chip specific flags
*/
struct irq_chip {
@@ -359,6 +361,9 @@ struct irq_chip {
int (*irq_request_resources)(struct irq_data *data);
void (*irq_release_resources)(struct irq_data *data);
+ u32 (*irq_get_fwd_state)(struct irq_data *data, u32 mask);
+ void (*irq_set_fwd_state)(struct irq_data *data, u32 val, u32 mask);
+
unsigned long flags;
};
@@ -384,6 +389,17 @@ enum {
IRQCHIP_EOI_THREADED = (1 << 6),
};
+/*
+ * irq_get_fwd_state/irq_set_fwd_state specific flags (used for both
+ * state and mask). Only valid for an interrupt that has the
+ * IRQD_IRQ_FORWARDED flag.
+ */
+enum {
+ IRQ_FWD_STATE_PENDING = (1 << 0),
+ IRQ_FWD_STATE_ACTIVE = (1 << 1),
+ IRQ_FWD_STATE_MASKED = (1 << 2),
+};
+
/* This include will go away once we isolated irq_desc usage to core code */
#include <linux/irqdesc.h>
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 3dc6a61..c42dd73 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1770,3 +1770,83 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler,
return retval;
}
+
+/**
+ * irq_get_fwd_state - return the state of a forwarded interrupt.
+ * @irq: Interrupt line that is forwarded to a VM
+ * @val: Pointer where the state is being stored
+ * @mask: Bitmask of IRQ_FWD_STATE_* the caller wants to know about
+ *
+ * This call snapshots the state of a forwarded interrupt,
+ * depending on the mask which indicates the requested bits.
+ *
+ * This function should be called with preemption disabled if the
+ * interrupt controller has per-cpu registers.
+ */
+int irq_get_fwd_state(unsigned int irq, u32 *val, u32 mask)
+{
+ struct irq_desc *desc;
+ struct irq_data *data;
+ struct irq_chip *chip;
+ unsigned long flags;
+
+ desc = irq_to_desc(irq);
+ if (!desc)
+ return -EINVAL;
+
+ data = irq_desc_get_irq_data(desc);
+ if (!irqd_irq_forwarded(data))
+ return -EINVAL;
+
+ chip = irq_desc_get_chip(desc);
+ if (!chip->irq_get_fwd_state)
+ return -EINVAL;
+
+ chip_bus_lock(desc);
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ *val = chip->irq_get_fwd_state(data, mask);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+ chip_bus_sync_unlock(desc);
+
+ return 0;
+}
+
+/**
+ * irq_set_fwd_state - set the state of a forwarded interrupt.
+ * @irq: Interrupt line that is forwarded to a VM
+ * @val: State to be restored
+ * @mask: Bitmask of IRQ_FWD_STATE_* defining the valid bits in @val
+ *
+ * This call sets the state of a forwarded interrupt, depending
+ * on the mask which indicates the valid bits.
+ *
+ * This function should be called with preemption disabled if the
+ * interrupt controller has per-cpu registers.
+ */
+int irq_set_fwd_state(unsigned int irq, u32 val, u32 mask)
+{
+ struct irq_desc *desc;
+ struct irq_data *data;
+ struct irq_chip *chip;
+ unsigned long flags;
+
+ desc = irq_to_desc(irq);
+ if (!desc)
+ return -EINVAL;
+
+ data = irq_desc_get_irq_data(desc);
+ if (!irqd_irq_forwarded(data))
+ return -EINVAL;
+
+ chip = irq_desc_get_chip(desc);
+ if (!chip->irq_set_fwd_state)
+ return -EINVAL;
+
+ chip_bus_lock(desc);
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ chip->irq_set_fwd_state(data, val, mask);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+ chip_bus_sync_unlock(desc);
+
+ return 0;
+}
--
1.8.3.4
Some interrupt controllers support a split EOI behaviour, where
an active interrupt can be forwarded to a VM, and directly EOIed
from the guest.
Allow the irq_data to carry a flag describing this setup.
Signed-off-by: Marc Zyngier <[email protected]>
---
include/linux/irq.h | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 0d998d8..b4b3120 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -186,6 +186,7 @@ enum {
IRQD_IRQ_DISABLED = (1 << 16),
IRQD_IRQ_MASKED = (1 << 17),
IRQD_IRQ_INPROGRESS = (1 << 18),
+ IRQD_IRQ_FORWARDED = (1 << 19),
};
static inline bool irqd_is_setaffinity_pending(struct irq_data *d)
@@ -272,6 +273,21 @@ static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d)
d->state_use_accessors &= ~IRQD_IRQ_INPROGRESS;
}
+static inline void irqd_set_irq_forwarded(struct irq_data *d)
+{
+ d->state_use_accessors |= IRQD_IRQ_FORWARDED;
+}
+
+static inline void irqd_clr_irq_forwarded(struct irq_data *d)
+{
+ d->state_use_accessors &= ~IRQD_IRQ_FORWARDED;
+}
+
+static inline bool irqd_irq_forwarded(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_IRQ_FORWARDED;
+}
+
static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
{
return d->hwirq;
--
1.8.3.4
So far, GICv3 has been used in with EOImode == 0. The effect of this
mode is to perform the priority drop and the deactivation of the
interrupt at the same time.
While this works perfectly for Linux (we only have a single priority),
it causes issues when an interrupt is forwarded to a guest, and when
we want the guest to perform the EOI itself.
For this case, the GIC architecture provides EOImode == 1, where:
- A write to ICC_EOIR1_EL1 drops the priority of the interrupt and leaves
it active. Other interrupts at the same priority level can now be taken,
but the active interrupt cannot be taken again
- A write to ICC_DIR_EL1 marks the interrupt as inactive, meaning it can
now be taken again.
This patch converts the driver to this new mode, without any other feature.
Signed-off-by: Marc Zyngier <[email protected]>
---
drivers/irqchip/irq-gic-v3-its.c | 1 +
drivers/irqchip/irq-gic-v3.c | 6 ++++--
include/linux/irqchip/arm-gic-v3.h | 7 +++++++
3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 822f55e..922f228 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -616,6 +616,7 @@ static void its_unmask_irq(struct irq_data *d)
static void its_eoi_irq(struct irq_data *d)
{
gic_write_eoir(d->hwirq);
+ gic_write_dir(d->hwirq);
}
static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 3ae53c1..26a08ce 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -238,6 +238,7 @@ static void gic_unmask_irq(struct irq_data *d)
static void gic_eoi_irq(struct irq_data *d)
{
gic_write_eoir(gic_irq(d));
+ gic_write_dir(gic_irq(d));
}
static int gic_set_type(struct irq_data *d, unsigned int type)
@@ -297,6 +298,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
}
if (irqnr < 16) {
gic_write_eoir(irqnr);
+ gic_write_dir(irqnr);
#ifdef CONFIG_SMP
handle_IPI(irqnr, regs);
#else
@@ -400,8 +402,8 @@ static void gic_cpu_sys_reg_init(void)
/* Set priority mask register */
gic_write_pmr(DEFAULT_PMR_VALUE);
- /* EOI deactivates interrupt too (mode 0) */
- gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
+ /* EOI drops priority only (mode 1) */
+ gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop);
/* ... and let's hit the road... */
gic_write_grpen1(1);
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 023b49d..a0bff2f 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -229,6 +229,7 @@
#define ICC_EOIR1_EL1 S3_0_C12_C12_1
#define ICC_IAR1_EL1 S3_0_C12_C12_0
+#define ICC_DIR_EL1 S3_0_C12_C11_1
#define ICC_SGI1R_EL1 S3_0_C12_C11_5
#define ICC_PMR_EL1 S3_0_C4_C6_0
#define ICC_CTLR_EL1 S3_0_C12_C12_4
@@ -303,6 +304,12 @@ static inline void gic_write_eoir(u64 irq)
isb();
}
+static inline void gic_write_dir(u64 irq)
+{
+ asm volatile("msr " __stringify(ICC_DIR_EL1) ", %0" : : "r" (irq));
+ isb();
+}
+
int its_cpu_init(void);
struct irq_chip *its_init(struct device_node *node, struct rdist *rdist,
struct irq_domain *domain);
--
1.8.3.4
So far, GICv2 has been used in with EOImode == 0. The effect of this
mode is to perform the priority drop and the deactivation of the
interrupt at the same time.
While this works perfectly for Linux (we only have a single priority),
it causes issues when an interrupt is forwarded to a guest, and when
we want the guest to perform the EOI itself.
For this case, the GIC architecture provides EOImode == 1, where:
- A write to the EOI register drops the priority of the interrupt and leaves
it active. Other interrupts at the same priority level can now be taken,
but the active interrupt cannot be taken again
- A write to the DIR marks the interrupt as inactive, meaning it can
now be taken again.
We only enable this feature when booted in HYP mode. Also, as most device
trees are broken (they report the CPU interface size to be 4kB, while
the GICv2 CPU interface size is 8kB), output a warning if we're booted
in HYP mode, and disable the feature.
Signed-off-by: Marc Zyngier <[email protected]>
---
drivers/irqchip/irq-gic.c | 61 +++++++++++++++++++++++++++++++++++++----
include/linux/irqchip/arm-gic.h | 4 +++
2 files changed, 59 insertions(+), 6 deletions(-)
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 508b815..9295bf2 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -45,6 +45,7 @@
#include <asm/irq.h>
#include <asm/exception.h>
#include <asm/smp_plat.h>
+#include <asm/virt.h>
#include "irq-gic-common.h"
#include "irqchip.h"
@@ -94,6 +95,10 @@ struct irq_chip gic_arch_extn = {
.irq_set_wake = NULL,
};
+static struct irq_chip *gic_chip;
+
+static bool supports_deactivate = false;
+
#ifndef MAX_GIC_NR
#define MAX_GIC_NR 1
#endif
@@ -185,6 +190,12 @@ static void gic_eoi_irq(struct irq_data *d)
writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
}
+static void gic_eoi_dir_irq(struct irq_data *d)
+{
+ gic_eoi_irq(d);
+ writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
+}
+
static int gic_set_type(struct irq_data *d, unsigned int type)
{
void __iomem *base = gic_dist_base(d);
@@ -277,6 +288,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
}
if (irqnr < 16) {
writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
+ writel_relaxed(irqstat, cpu_base + GIC_CPU_DEACTIVATE);
#ifdef CONFIG_SMP
handle_IPI(irqnr, regs);
#endif
@@ -313,7 +325,7 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
chained_irq_exit(chip, desc);
}
-static struct irq_chip gic_chip = {
+static struct irq_chip gicv1_chip = {
.name = "GIC",
.irq_mask = gic_mask_irq,
.irq_unmask = gic_unmask_irq,
@@ -326,6 +338,19 @@ static struct irq_chip gic_chip = {
.irq_set_wake = gic_set_wake,
};
+static struct irq_chip gicv2_chip = {
+ .name = "GIC",
+ .irq_mask = gic_mask_irq,
+ .irq_unmask = gic_unmask_irq,
+ .irq_eoi = gic_eoi_dir_irq,
+ .irq_set_type = gic_set_type,
+ .irq_retrigger = gic_retrigger,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = gic_set_affinity,
+#endif
+ .irq_set_wake = gic_set_wake,
+};
+
void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
{
if (gic_nr >= MAX_GIC_NR)
@@ -377,6 +402,16 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
writel_relaxed(1, base + GIC_DIST_CTRL);
}
+static void gic_cpu_if_up(void)
+{
+ void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
+ u32 val = 0;
+
+ if (supports_deactivate)
+ val = GIC_CPU_CTRL_EOImodeNS;
+ writel_relaxed(val | 1, cpu_base + GIC_CPU_CTRL);
+}
+
static void gic_cpu_init(struct gic_chip_data *gic)
{
void __iomem *dist_base = gic_data_dist_base(gic);
@@ -402,7 +437,7 @@ static void gic_cpu_init(struct gic_chip_data *gic)
gic_cpu_config(dist_base, NULL);
writel_relaxed(0xf0, base + GIC_CPU_PRIMASK);
- writel_relaxed(1, base + GIC_CPU_CTRL);
+ gic_cpu_if_up();
}
void gic_cpu_if_down(void)
@@ -543,7 +578,7 @@ static void gic_cpu_restore(unsigned int gic_nr)
writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4);
writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK);
- writel_relaxed(1, cpu_base + GIC_CPU_CTRL);
+ gic_cpu_if_up();
}
static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v)
@@ -770,11 +805,11 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
{
if (hw < 32) {
irq_set_percpu_devid(irq);
- irq_set_chip_and_handler(irq, &gic_chip,
+ irq_set_chip_and_handler(irq, gic_chip,
handle_percpu_devid_irq);
set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
} else {
- irq_set_chip_and_handler(irq, &gic_chip,
+ irq_set_chip_and_handler(irq, gic_chip,
handle_fasteoi_irq);
set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
@@ -951,6 +986,11 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
gic_irqs -= hwirq_base; /* calculate # of irqs to allocate */
+ if (!supports_deactivate)
+ gic_chip = &gicv1_chip;
+ else
+ gic_chip = &gicv2_chip;
+
if (of_property_read_u32(node, "arm,routable-irqs",
&nr_routable_irqs)) {
irq_base = irq_alloc_descs(irq_start, 16, gic_irqs,
@@ -980,7 +1020,7 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
set_handle_irq(gic_handle_irq);
}
- gic_chip.flags |= gic_arch_extn.flags;
+ gic_chip->flags |= gic_arch_extn.flags;
gic_dist_init(gic);
gic_cpu_init(gic);
gic_pm_init(gic);
@@ -994,6 +1034,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
{
void __iomem *cpu_base;
void __iomem *dist_base;
+ struct resource cpu_res;
u32 percpu_offset;
int irq;
@@ -1006,6 +1047,14 @@ gic_of_init(struct device_node *node, struct device_node *parent)
cpu_base = of_iomap(node, 1);
WARN(!cpu_base, "unable to map gic cpu registers\n");
+ of_address_to_resource(node, 1, &cpu_res);
+ if (is_hyp_mode_available()) {
+ if (resource_size(&cpu_res) >= SZ_8K)
+ supports_deactivate = true;
+ else
+ pr_warn("GIC: CPU interface size is %x, DT is probably wrong\n", (int)resource_size(&cpu_res));
+ }
+
if (of_property_read_u32(node, "cpu-offset", &percpu_offset))
percpu_offset = 0;
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 45e2d8c..ffe3911 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -21,6 +21,10 @@
#define GIC_CPU_ACTIVEPRIO 0xd0
#define GIC_CPU_IDENT 0xfc
+#define GIC_CPU_DEACTIVATE 0x1000
+
+#define GIC_CPU_CTRL_EOImodeNS (1 << 9)
+
#define GICC_IAR_INT_ID_MASK 0x3ff
#define GIC_DIST_CTRL 0x000
--
1.8.3.4
On Wed, Jun 25, 2014 at 4:28 AM, Marc Zyngier <[email protected]> wrote:
> So far, GICv2 has been used in with EOImode == 0. The effect of this
> mode is to perform the priority drop and the deactivation of the
> interrupt at the same time.
>
> While this works perfectly for Linux (we only have a single priority),
> it causes issues when an interrupt is forwarded to a guest, and when
> we want the guest to perform the EOI itself.
>
> For this case, the GIC architecture provides EOImode == 1, where:
> - A write to the EOI register drops the priority of the interrupt and leaves
> it active. Other interrupts at the same priority level can now be taken,
> but the active interrupt cannot be taken again
> - A write to the DIR marks the interrupt as inactive, meaning it can
> now be taken again.
>
> We only enable this feature when booted in HYP mode. Also, as most device
> trees are broken (they report the CPU interface size to be 4kB, while
> the GICv2 CPU interface size is 8kB), output a warning if we're booted
> in HYP mode, and disable the feature.
Why not fix-up the size so the feature can be enabled?
Rob
On Wed, Jun 25 2014 at 01:50:12 PM, Rob Herring <[email protected]> wrote:
> On Wed, Jun 25, 2014 at 4:28 AM, Marc Zyngier <[email protected]> wrote:
>> So far, GICv2 has been used in with EOImode == 0. The effect of this
>> mode is to perform the priority drop and the deactivation of the
>> interrupt at the same time.
>>
>> While this works perfectly for Linux (we only have a single priority),
>> it causes issues when an interrupt is forwarded to a guest, and when
>> we want the guest to perform the EOI itself.
>>
>> For this case, the GIC architecture provides EOImode == 1, where:
>> - A write to the EOI register drops the priority of the interrupt and leaves
>> it active. Other interrupts at the same priority level can now be taken,
>> but the active interrupt cannot be taken again
>> - A write to the DIR marks the interrupt as inactive, meaning it can
>> now be taken again.
>>
>> We only enable this feature when booted in HYP mode. Also, as most device
>> trees are broken (they report the CPU interface size to be 4kB, while
>> the GICv2 CPU interface size is 8kB), output a warning if we're booted
>> in HYP mode, and disable the feature.
>
> Why not fix-up the size so the feature can be enabled?
Is it a bet we're willing to take? We'd end-up with a kernel that
doesn't boot if the DT was actually right. If we stay with EOImode==0,
we can still boot (KVM will probably be broken though).
M.
--
Jazz is not dead. It just smells funny.
On Wed, Jun 25, 2014 at 8:03 AM, Marc Zyngier <[email protected]> wrote:
> On Wed, Jun 25 2014 at 01:50:12 PM, Rob Herring <[email protected]> wrote:
>> On Wed, Jun 25, 2014 at 4:28 AM, Marc Zyngier <[email protected]> wrote:
>>> So far, GICv2 has been used in with EOImode == 0. The effect of this
>>> mode is to perform the priority drop and the deactivation of the
>>> interrupt at the same time.
>>>
>>> While this works perfectly for Linux (we only have a single priority),
>>> it causes issues when an interrupt is forwarded to a guest, and when
>>> we want the guest to perform the EOI itself.
>>>
>>> For this case, the GIC architecture provides EOImode == 1, where:
>>> - A write to the EOI register drops the priority of the interrupt and leaves
>>> it active. Other interrupts at the same priority level can now be taken,
>>> but the active interrupt cannot be taken again
>>> - A write to the DIR marks the interrupt as inactive, meaning it can
>>> now be taken again.
>>>
>>> We only enable this feature when booted in HYP mode. Also, as most device
>>> trees are broken (they report the CPU interface size to be 4kB, while
>>> the GICv2 CPU interface size is 8kB), output a warning if we're booted
>>> in HYP mode, and disable the feature.
>>
>> Why not fix-up the size so the feature can be enabled?
>
> Is it a bet we're willing to take? We'd end-up with a kernel that
> doesn't boot if the DT was actually right. If we stay with EOImode==0,
> we can still boot (KVM will probably be broken though).
I think so. Seems like your last statement answers this. Why is KVM
not working on my system seems like a much more likely and frequent
support issue than a potentially broken system.
The only place I really could see it be broken is an SBSA system doing
the address swizzling trick with the gic-400 to get 64KB spaced
regions but does not use the 60KB aligned cpu interface address. But
DTBs are hardly stable for 64-bit systems and can be updated.
Rob
Hi Marc,
On Wed, Jun 25, 2014 at 2:58 PM, Marc Zyngier <[email protected]> wrote:
> So far, GICv2 has been used in with EOImode == 0. The effect of this
> mode is to perform the priority drop and the deactivation of the
> interrupt at the same time.
>
> While this works perfectly for Linux (we only have a single priority),
> it causes issues when an interrupt is forwarded to a guest, and when
> we want the guest to perform the EOI itself.
>
> For this case, the GIC architecture provides EOImode == 1, where:
> - A write to the EOI register drops the priority of the interrupt and leaves
> it active. Other interrupts at the same priority level can now be taken,
> but the active interrupt cannot be taken again
> - A write to the DIR marks the interrupt as inactive, meaning it can
> now be taken again.
>
> We only enable this feature when booted in HYP mode. Also, as most device
> trees are broken (they report the CPU interface size to be 4kB, while
> the GICv2 CPU interface size is 8kB), output a warning if we're booted
> in HYP mode, and disable the feature.
>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> drivers/irqchip/irq-gic.c | 61 +++++++++++++++++++++++++++++++++++++----
> include/linux/irqchip/arm-gic.h | 4 +++
> 2 files changed, 59 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
> index 508b815..9295bf2 100644
> --- a/drivers/irqchip/irq-gic.c
> +++ b/drivers/irqchip/irq-gic.c
> @@ -45,6 +45,7 @@
> #include <asm/irq.h>
> #include <asm/exception.h>
> #include <asm/smp_plat.h>
> +#include <asm/virt.h>
>
> #include "irq-gic-common.h"
> #include "irqchip.h"
> @@ -94,6 +95,10 @@ struct irq_chip gic_arch_extn = {
> .irq_set_wake = NULL,
> };
>
> +static struct irq_chip *gic_chip;
> +
> +static bool supports_deactivate = false;
> +
> #ifndef MAX_GIC_NR
> #define MAX_GIC_NR 1
> #endif
> @@ -185,6 +190,12 @@ static void gic_eoi_irq(struct irq_data *d)
> writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
> }
>
> +static void gic_eoi_dir_irq(struct irq_data *d)
> +{
> + gic_eoi_irq(d);
> + writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
> +}
> +
> static int gic_set_type(struct irq_data *d, unsigned int type)
> {
> void __iomem *base = gic_dist_base(d);
> @@ -277,6 +288,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
> }
> if (irqnr < 16) {
> writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
> + writel_relaxed(irqstat, cpu_base + GIC_CPU_DEACTIVATE);
> #ifdef CONFIG_SMP
> handle_IPI(irqnr, regs);
> #endif
> @@ -313,7 +325,7 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
> chained_irq_exit(chip, desc);
> }
>
> -static struct irq_chip gic_chip = {
> +static struct irq_chip gicv1_chip = {
> .name = "GIC",
> .irq_mask = gic_mask_irq,
> .irq_unmask = gic_unmask_irq,
> @@ -326,6 +338,19 @@ static struct irq_chip gic_chip = {
> .irq_set_wake = gic_set_wake,
> };
>
> +static struct irq_chip gicv2_chip = {
> + .name = "GIC",
> + .irq_mask = gic_mask_irq,
> + .irq_unmask = gic_unmask_irq,
> + .irq_eoi = gic_eoi_dir_irq,
> + .irq_set_type = gic_set_type,
> + .irq_retrigger = gic_retrigger,
> +#ifdef CONFIG_SMP
> + .irq_set_affinity = gic_set_affinity,
> +#endif
> + .irq_set_wake = gic_set_wake,
> +};
> +
> void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
> {
> if (gic_nr >= MAX_GIC_NR)
> @@ -377,6 +402,16 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
> writel_relaxed(1, base + GIC_DIST_CTRL);
> }
>
> +static void gic_cpu_if_up(void)
> +{
> + void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
> + u32 val = 0;
> +
> + if (supports_deactivate)
> + val = GIC_CPU_CTRL_EOImodeNS;
> + writel_relaxed(val | 1, cpu_base + GIC_CPU_CTRL);
> +}
> +
> static void gic_cpu_init(struct gic_chip_data *gic)
> {
> void __iomem *dist_base = gic_data_dist_base(gic);
> @@ -402,7 +437,7 @@ static void gic_cpu_init(struct gic_chip_data *gic)
> gic_cpu_config(dist_base, NULL);
>
> writel_relaxed(0xf0, base + GIC_CPU_PRIMASK);
> - writel_relaxed(1, base + GIC_CPU_CTRL);
> + gic_cpu_if_up();
> }
>
> void gic_cpu_if_down(void)
> @@ -543,7 +578,7 @@ static void gic_cpu_restore(unsigned int gic_nr)
> writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4);
>
> writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK);
> - writel_relaxed(1, cpu_base + GIC_CPU_CTRL);
> + gic_cpu_if_up();
> }
>
> static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v)
> @@ -770,11 +805,11 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
> {
> if (hw < 32) {
> irq_set_percpu_devid(irq);
> - irq_set_chip_and_handler(irq, &gic_chip,
> + irq_set_chip_and_handler(irq, gic_chip,
> handle_percpu_devid_irq);
> set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
> } else {
> - irq_set_chip_and_handler(irq, &gic_chip,
> + irq_set_chip_and_handler(irq, gic_chip,
> handle_fasteoi_irq);
> set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
>
> @@ -951,6 +986,11 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
>
> gic_irqs -= hwirq_base; /* calculate # of irqs to allocate */
>
> + if (!supports_deactivate)
> + gic_chip = &gicv1_chip;
> + else
> + gic_chip = &gicv2_chip;
> +
> if (of_property_read_u32(node, "arm,routable-irqs",
> &nr_routable_irqs)) {
> irq_base = irq_alloc_descs(irq_start, 16, gic_irqs,
> @@ -980,7 +1020,7 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
> set_handle_irq(gic_handle_irq);
> }
>
> - gic_chip.flags |= gic_arch_extn.flags;
> + gic_chip->flags |= gic_arch_extn.flags;
> gic_dist_init(gic);
> gic_cpu_init(gic);
> gic_pm_init(gic);
> @@ -994,6 +1034,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
> {
> void __iomem *cpu_base;
> void __iomem *dist_base;
> + struct resource cu_res;
> u32 percpu_offset;
> int irq;
>
> @@ -1006,6 +1047,14 @@ gic_of_init(struct device_node *node, struct device_node *parent)
> cpu_base = of_iomap(node, 1);
> WARN(!cpu_base, "unable to map gic cpu registers\n");
>
> + of_address_to_resource(node, 1, &cpu_res);
> + if (is_hyp_mode_available()) {
Interrupt deactivation feature is not dependent on whether
hyp-mode is available or not.
We can use GICC_DIR even if CPU does not have
virtualization extension. Also, we can use GICV_DIR
when running as Guest or VM.
I think "supports_deactivate" should depend on
the GIC compatible string.
> + if (resource_size(&cpu_res) >= SZ_8K)
> + supports_deactivate = true;
> + else
> + pr_warn("GIC: CPU interface size is %x, DT is probably wrong\n", (int)resource_size(&cpu_res));
This will not work on APM X-Gene because, for
X-Gene first CPU page is at 0x78020000 and
second CPU page is at 0x78030000.
Ian had send-out a patch long time back to extend
GIC dt-bindings for addressing this issue.
(http://www.spinics.net/lists/arm-kernel/msg283767.html)
Regards,
Anup
> + }
> +
> if (of_property_read_u32(node, "cpu-offset", &percpu_offset))
> percpu_offset = 0;
>
> diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
> index 45e2d8c..ffe3911 100644
> --- a/include/linux/irqchip/arm-gic.h
> +++ b/include/linux/irqchip/arm-gic.h
> @@ -21,6 +21,10 @@
> #define GIC_CPU_ACTIVEPRIO 0xd0
> #define GIC_CPU_IDENT 0xfc
>
> +#define GIC_CPU_DEACTIVATE 0x1000
> +
> +#define GIC_CPU_CTRL_EOImodeNS (1 << 9)
> +
> #define GICC_IAR_INT_ID_MASK 0x3ff
>
> #define GIC_DIST_CTRL 0x000
> --
> 1.8.3.4
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On Wed, 2014-06-25 at 19:26 +0530, Anup Patel wrote:
>
> Ian had send-out a patch long time back to extend
> GIC dt-bindings for addressing this issue.
> (http://www.spinics.net/lists/arm-kernel/msg283767.html)
I've been meaning to revisit this. Since that original patch I've been
wondering if some sort of stride property wouldn't be better.
Ian.
On 25 June 2014 10:28, Marc Zyngier <[email protected]> wrote:
> For this case, the GIC architecture provides EOImode == 1, where:
> - A write to the EOI register drops the priority of the interrupt and leaves
> it active. Other interrupts at the same priority level can now be taken,
> but the active interrupt cannot be taken again
> - A write to the DIR marks the interrupt as inactive, meaning it can
> now be taken again.
>
> We only enable this feature when booted in HYP mode. Also, as most device
> trees are broken (they report the CPU interface size to be 4kB, while
> the GICv2 CPU interface size is 8kB), output a warning if we're booted
> in HYP mode, and disable the feature.
Does that mean you guarantee not to write to the DEACTIVATE
register if not booted in Hyp mode? I ask because QEMU's
GIC emulation doesn't emulate that register, so it would be
useful to know if this patch means newer kernels are going to fall
over under TCG QEMU...
(The correct fix, obviously, is to actually implement the QEMU
support for split prio-drop and deactivate. Christoffer, you're our
GIC emulation expert now, right? :-) )
thanks
-- PMM
On Wed, Jun 25, 2014 at 8:56 AM, Anup Patel <[email protected]> wrote:
> Hi Marc,
>
> On Wed, Jun 25, 2014 at 2:58 PM, Marc Zyngier <[email protected]> wrote:
>> So far, GICv2 has been used in with EOImode == 0. The effect of this
>> mode is to perform the priority drop and the deactivation of the
>> interrupt at the same time.
>>
>> While this works perfectly for Linux (we only have a single priority),
>> it causes issues when an interrupt is forwarded to a guest, and when
>> we want the guest to perform the EOI itself.
>>
>> For this case, the GIC architecture provides EOImode == 1, where:
>> - A write to the EOI register drops the priority of the interrupt and leaves
>> it active. Other interrupts at the same priority level can now be taken,
>> but the active interrupt cannot be taken again
>> - A write to the DIR marks the interrupt as inactive, meaning it can
>> now be taken again.
>>
>> We only enable this feature when booted in HYP mode. Also, as most device
>> trees are broken (they report the CPU interface size to be 4kB, while
>> the GICv2 CPU interface size is 8kB), output a warning if we're booted
>> in HYP mode, and disable the feature.
>>
>> Signed-off-by: Marc Zyngier <[email protected]>
>> ---
[...]
>> + if (resource_size(&cpu_res) >= SZ_8K)
>> + supports_deactivate = true;
>> + else
>> + pr_warn("GIC: CPU interface size is %x, DT is probably wrong\n", (int)resource_size(&cpu_res));
>
> This will not work on APM X-Gene because, for
> X-Gene first CPU page is at 0x78020000 and
> second CPU page is at 0x78030000.
Does 0x7802f000 for the cpu address not work? It should if X-Gene is
"SBSA compliant" for whatever that is worth.
Rob
Hi Anup,
On 25/06/14 14:56, Anup Patel wrote:
> Hi Marc,
>
> On Wed, Jun 25, 2014 at 2:58 PM, Marc Zyngier <[email protected]> wrote:
>> So far, GICv2 has been used in with EOImode == 0. The effect of this
>> mode is to perform the priority drop and the deactivation of the
>> interrupt at the same time.
>>
>> While this works perfectly for Linux (we only have a single priority),
>> it causes issues when an interrupt is forwarded to a guest, and when
>> we want the guest to perform the EOI itself.
>>
>> For this case, the GIC architecture provides EOImode == 1, where:
>> - A write to the EOI register drops the priority of the interrupt and leaves
>> it active. Other interrupts at the same priority level can now be taken,
>> but the active interrupt cannot be taken again
>> - A write to the DIR marks the interrupt as inactive, meaning it can
>> now be taken again.
>>
>> We only enable this feature when booted in HYP mode. Also, as most device
>> trees are broken (they report the CPU interface size to be 4kB, while
>> the GICv2 CPU interface size is 8kB), output a warning if we're booted
>> in HYP mode, and disable the feature.
>>
>> Signed-off-by: Marc Zyngier <[email protected]>
>> ---
>> drivers/irqchip/irq-gic.c | 61 +++++++++++++++++++++++++++++++++++++----
>> include/linux/irqchip/arm-gic.h | 4 +++
>> 2 files changed, 59 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
>> index 508b815..9295bf2 100644
>> --- a/drivers/irqchip/irq-gic.c
>> +++ b/drivers/irqchip/irq-gic.c
>> @@ -45,6 +45,7 @@
>> #include <asm/irq.h>
>> #include <asm/exception.h>
>> #include <asm/smp_plat.h>
>> +#include <asm/virt.h>
>>
>> #include "irq-gic-common.h"
>> #include "irqchip.h"
>> @@ -94,6 +95,10 @@ struct irq_chip gic_arch_extn = {
>> .irq_set_wake = NULL,
>> };
>>
>> +static struct irq_chip *gic_chip;
>> +
>> +static bool supports_deactivate = false;
>> +
>> #ifndef MAX_GIC_NR
>> #define MAX_GIC_NR 1
>> #endif
>> @@ -185,6 +190,12 @@ static void gic_eoi_irq(struct irq_data *d)
>> writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
>> }
>>
>> +static void gic_eoi_dir_irq(struct irq_data *d)
>> +{
>> + gic_eoi_irq(d);
>> + writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
>> +}
>> +
>> static int gic_set_type(struct irq_data *d, unsigned int type)
>> {
>> void __iomem *base = gic_dist_base(d);
>> @@ -277,6 +288,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
>> }
>> if (irqnr < 16) {
>> writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
>> + writel_relaxed(irqstat, cpu_base + GIC_CPU_DEACTIVATE);
>> #ifdef CONFIG_SMP
>> handle_IPI(irqnr, regs);
>> #endif
>> @@ -313,7 +325,7 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
>> chained_irq_exit(chip, desc);
>> }
>>
>> -static struct irq_chip gic_chip = {
>> +static struct irq_chip gicv1_chip = {
>> .name = "GIC",
>> .irq_mask = gic_mask_irq,
>> .irq_unmask = gic_unmask_irq,
>> @@ -326,6 +338,19 @@ static struct irq_chip gic_chip = {
>> .irq_set_wake = gic_set_wake,
>> };
>>
>> +static struct irq_chip gicv2_chip = {
>> + .name = "GIC",
>> + .irq_mask = gic_mask_irq,
>> + .irq_unmask = gic_unmask_irq,
>> + .irq_eoi = gic_eoi_dir_irq,
>> + .irq_set_type = gic_set_type,
>> + .irq_retrigger = gic_retrigger,
>> +#ifdef CONFIG_SMP
>> + .irq_set_affinity = gic_set_affinity,
>> +#endif
>> + .irq_set_wake = gic_set_wake,
>> +};
>> +
>> void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
>> {
>> if (gic_nr >= MAX_GIC_NR)
>> @@ -377,6 +402,16 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
>> writel_relaxed(1, base + GIC_DIST_CTRL);
>> }
>>
>> +static void gic_cpu_if_up(void)
>> +{
>> + void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
>> + u32 val = 0;
>> +
>> + if (supports_deactivate)
>> + val = GIC_CPU_CTRL_EOImodeNS;
>> + writel_relaxed(val | 1, cpu_base + GIC_CPU_CTRL);
>> +}
>> +
>> static void gic_cpu_init(struct gic_chip_data *gic)
>> {
>> void __iomem *dist_base = gic_data_dist_base(gic);
>> @@ -402,7 +437,7 @@ static void gic_cpu_init(struct gic_chip_data *gic)
>> gic_cpu_config(dist_base, NULL);
>>
>> writel_relaxed(0xf0, base + GIC_CPU_PRIMASK);
>> - writel_relaxed(1, base + GIC_CPU_CTRL);
>> + gic_cpu_if_up();
>> }
>>
>> void gic_cpu_if_down(void)
>> @@ -543,7 +578,7 @@ static void gic_cpu_restore(unsigned int gic_nr)
>> writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4);
>>
>> writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK);
>> - writel_relaxed(1, cpu_base + GIC_CPU_CTRL);
>> + gic_cpu_if_up();
>> }
>>
>> static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v)
>> @@ -770,11 +805,11 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>> {
>> if (hw < 32) {
>> irq_set_percpu_devid(irq);
>> - irq_set_chip_and_handler(irq, &gic_chip,
>> + irq_set_chip_and_handler(irq, gic_chip,
>> handle_percpu_devid_irq);
>> set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
>> } else {
>> - irq_set_chip_and_handler(irq, &gic_chip,
>> + irq_set_chip_and_handler(irq, gic_chip,
>> handle_fasteoi_irq);
>> set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
>>
>> @@ -951,6 +986,11 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
>>
>> gic_irqs -= hwirq_base; /* calculate # of irqs to allocate */
>>
>> + if (!supports_deactivate)
>> + gic_chip = &gicv1_chip;
>> + else
>> + gic_chip = &gicv2_chip;
>> +
>> if (of_property_read_u32(node, "arm,routable-irqs",
>> &nr_routable_irqs)) {
>> irq_base = irq_alloc_descs(irq_start, 16, gic_irqs,
>> @@ -980,7 +1020,7 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
>> set_handle_irq(gic_handle_irq);
>> }
>>
>> - gic_chip.flags |= gic_arch_extn.flags;
>> + gic_chip->flags |= gic_arch_extn.flags;
>> gic_dist_init(gic);
>> gic_cpu_init(gic);
>> gic_pm_init(gic);
>> @@ -994,6 +1034,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
>> {
>> void __iomem *cpu_base;
>> void __iomem *dist_base;
>> + struct resource cu_res;
>> u32 percpu_offset;
>> int irq;
>>
>> @@ -1006,6 +1047,14 @@ gic_of_init(struct device_node *node, struct device_node *parent)
>> cpu_base = of_iomap(node, 1);
>> WARN(!cpu_base, "unable to map gic cpu registers\n");
>>
>> + of_address_to_resource(node, 1, &cpu_res);
>> + if (is_hyp_mode_available()) {
>
> Interrupt deactivation feature is not dependent on whether hyp-mode
> is available or not.
Indeed. But for the Linux use-case, this is a pointless feature, unless
you want to use it for things like threaded interrupts (but that's not
what this patch series is about).
> We can use GICC_DIR even if CPU does not have virtualization
> extension. Also, we can use GICV_DIR when running as Guest or VM.
I know that. But what's the point? The joy of doing two MMIO accesses
instead of one? It is likely to be a net performance loss if you're not
actively using it.
> I think "supports_deactivate" should depend on
> the GIC compatible string.
Probably, but again, what is the point of using it if there is no benefit?
>> + if (resource_size(&cpu_res) >= SZ_8K)
>> + supports_deactivate = true;
>> + else
>> + pr_warn("GIC: CPU interface size is %x, DT is probably wrong\n", (int)resource_size(&cpu_res));
>
> This will not work on APM X-Gene because, for X-Gene first CPU page
> is at 0x78020000 and second CPU page is at 0x78030000.
Then, as far as I'm concerned, it is not compliant with the architecture
(the GICv2 spec says clearly that GICC_DIR is at offset 0x1000, not
0x10000).
We can work around it, but that's not the purpose of this series.
> Ian had send-out a patch long time back to extend GIC dt-bindings for
> addressing this issue.
> (http://www.spinics.net/lists/arm-kernel/msg283767.html)
Wow. That's really horrible. Blimey! Ian, you owe me a few beers, just
so I can forget this... ;-)
M.
--
Jazz is not dead. It just smells funny...
On Wed, 2014-06-25 at 15:24 +0100, Marc Zyngier wrote:
> > Ian had send-out a patch long time back to extend GIC dt-bindings for
> > addressing this issue.
> > (http://www.spinics.net/lists/arm-kernel/msg283767.html)
>
> Wow. That's really horrible. Blimey! Ian, you owe me a few beers, just
> so I can forget this... ;-)
I was even more naive about DT stuff then than now, if you can believe
that!
Ian.
On 25/06/14 15:03, Ian Campbell wrote:
> On Wed, 2014-06-25 at 19:26 +0530, Anup Patel wrote:
>>
>> Ian had send-out a patch long time back to extend
>> GIC dt-bindings for addressing this issue.
>> (http://www.spinics.net/lists/arm-kernel/msg283767.html)
>
> I've been meaning to revisit this. Since that original patch I've been
> wondering if some sort of stride property wouldn't be better.
Possibly. I used something like that for the GICv3 redistributors, just
in case someone messes it up. Given the GICv2 track record, I probably
did the right thing...
M.
--
Jazz is not dead. It just smells funny...
On 25/06/14 15:06, Peter Maydell wrote:
> On 25 June 2014 10:28, Marc Zyngier <[email protected]> wrote:
>> For this case, the GIC architecture provides EOImode == 1, where:
>> - A write to the EOI register drops the priority of the interrupt and leaves
>> it active. Other interrupts at the same priority level can now be taken,
>> but the active interrupt cannot be taken again
>> - A write to the DIR marks the interrupt as inactive, meaning it can
>> now be taken again.
>>
>> We only enable this feature when booted in HYP mode. Also, as most device
>> trees are broken (they report the CPU interface size to be 4kB, while
>> the GICv2 CPU interface size is 8kB), output a warning if we're booted
>> in HYP mode, and disable the feature.
>
> Does that mean you guarantee not to write to the DEACTIVATE register
> if not booted in Hyp mode? I ask because QEMU's GIC emulation doesn't
> emulate that register, so it would be useful to know if this patch
> means newer kernels are going to fall over under TCG QEMU...
So far, I only plan to support it when booted in HYP. But it may be that
the split prio-drop/deactivate is also beneficial to threaded
interrupts, saving the writes to the distributor to mask/unmask. That
would require to be a bit more subtle in identifying a GICv2
implementation (DT binding, probably).
M.
--
Jazz is not dead. It just smells funny...
On 06/25/2014 11:28 AM, Marc Zyngier wrote:
> The GIC architecture (ARM's Generic Interrupt Controller) allows an
> active physical interrupt to be forwarded to a guest, and the guest to
> indirectly perform the deactivation of the interrupt by performing an
> EOI on the virtual interrupt (see for example the GICv2 spec, 3.2.1).
>
> So far, Linux doesn't have this notion, which is a bit of a pain.
>
> This patch series introduce two generic features:
>
> - A way to mark an interrupt as "forwarded": this allows an irq_chip
> to know that it shouldn't perform the deactivation itself
> - A way to save/restore the "state" of a "forwarded" interrupt
>
> The series then adapts both GIC drivers to switch to EOImode == 1
> (split priority drop and deactivation), to support this "forwarded"
> feature and hacks the KVM/ARM timer backend to use all of this.
>
> This requires yet another bit of surgery in the vgic code in order to
> allow a mapping between physical interrupts and virtual
> ones. Hopefully, this should plug into VFIO and the whole irqfd thing,
> but I don't understand any of that just yet (Eric?)
Hello Marc,
Thanks for the patch, it brings a very interesting capability for
improving the performance of KVM device assignment.
>From the integration pov I understand we need to
1) call irq_set_fwd_state to tell the gic the physical IRQ is forwarded
and not deactivate it
2) call vgic_map_phys_irq to the tell the vgic it must program the LRs
accordingly.
We currently have the vfio driver VFIO_DEVICE_SET_IRQS user API that
makes possible to tell: device IRQ index #i (i=0, 1, 2 for my xgmac)
shall trigger this fd.
At that point it would be possible to tell the GIC the physical IRQ
corresponding to i is forwarded.
On the other hand we have KVM_IRQFD that enables to tell KVM: when this
fd is triggered, you implement its handler in KVM irqfd framework and
the handler injects the provided irchip.pin(gsi)=virtualIRQ - the famous
GSI routing table - into this VM.
Building the vgic map table hence requires to do some glue around vfio
and irqfd info: physical IRQ ->(vfio) fd ->(irqfd) gsi.
As such I would say those 2 user APIs(VFIO and IRQFD) are not fully
adapted to put that in place but this may be feasible. Previous
KVM_ASSIGN_DEV_IRQ was directly associated the pIRQ and vIRQ.
we should be able to remove the physical IRQ mask in the vfio driver
(this masking is done when triggering the fd and the IRQ is unmasked
when the virtual IRQ is completed). It was there because the physical
IRQ was completed and could hit again. Now with 2 stage completion the
same IRQ cannot hit while guest has not not DIR'ed the IRQ so it fixes
the issue I guess.
Since we do not have EOI trap anymore we cannot trigger level-sensitive
resamplefd in irqfd (this would be an ARM specificity)
A last comment/question, wouldn't it be possible to inject the vIRQ
(programming the LR) direcly in the irqchip, instead of relying on VFIO
to trigger an eventfd whose handler does the job? This could be an
optional capavility per forwarded IRQ. Of course this would create a
relationship between gic and vgic. Do you see it as ugly - I dare to ask - ?
BR
Eric
>
> The patches are against 3.16-rc2, plus a massive amount of GICv3
> code. They are also available in my git repo:
> git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git kvm-arm64/irq-forward
>
> Open questions (Thomas, these are mostly targeted at you):
>
> - Are the generic features generic enough?
> - Would the "forwarded" thing be better implemented as a handler
> rather than an irq_chip specific thing?
> - The split priority drop/deactivate also fits the threaded interrupt
> model fairly well (no need to mask/unmask). Should we have a go at
> this too?
> - Does it fit the VFIO+KVM model without playing the ugly mask/unmask
> dance?
>
> Thanks,
>
> M.
>
> Marc Zyngier (9):
> genirq: Add IRQD_IRQ_FORWARDED flag and accessors
> genirq: Allow the state of a forwarded irq to be save/restored
> irqchip: GIC: Convert to EOImode == 1
> irqchip: GIC: add support for forwarded interrupts
> irqchip: GICv3: Convert to EOImode == 1
> irqchip: GICv3: add support for forwarded interrupts
> KVM: arm: vgic: allow dynamic mapping of physical/virtual interrupts
> arm: KVM: timer: move the timer switch into the non-preemptible
> section
> KVM: arm: timer: make the interrupt state part of the timer state
>
> arch/arm/kvm/arm.c | 7 +--
> drivers/irqchip/irq-gic-v3-its.c | 2 +
> drivers/irqchip/irq-gic-v3.c | 33 +++++++++++-
> drivers/irqchip/irq-gic.c | 107 +++++++++++++++++++++++++++++++++----
> include/kvm/arm_arch_timer.h | 3 ++
> include/kvm/arm_vgic.h | 13 +++++
> include/linux/interrupt.h | 2 +
> include/linux/irq.h | 32 +++++++++++
> include/linux/irqchip/arm-gic-v3.h | 12 +++++
> include/linux/irqchip/arm-gic.h | 5 ++
> kernel/irq/manage.c | 80 +++++++++++++++++++++++++++
> virt/kvm/arm/arch_timer.c | 31 ++++++++++-
> virt/kvm/arm/vgic-v2.c | 14 ++++-
> virt/kvm/arm/vgic-v3.c | 22 +++++++-
> virt/kvm/arm/vgic.c | 88 ++++++++++++++++++++++++++++++
> 15 files changed, 431 insertions(+), 20 deletions(-)
>
>> + if (resource_size(&cpu_res) >= SZ_8K)
>> + supports_deactivate = true;
>> + else
>> + pr_warn("GIC: CPU interface size is %x, DT is probably wrong\n", (int)resource_size(&cpu_res));
> This will not work on APM X-Gene because, for
> X-Gene first CPU page is at 0x78020000 and
> second CPU page is at 0x78030000.
>
> Ian had send-out a patch long time back to extend
> GIC dt-bindings for addressing this issue.
> (http://www.spinics.net/lists/arm-kernel/msg283767.html)
We have a similar issue with an AMD SOC. You can add 0xf000 (60K) page
offset to it to cleverly work around the issue but it seems quite likely
that the page offset has to be communicated to userspace and handled
there at no small effort.
Anybody want to revive Ian's split patches? They do seem convoluted but
seem like they might work as an approach.
-Joel
Hi Eric,
On 25/06/14 15:52, Eric Auger wrote:
> On 06/25/2014 11:28 AM, Marc Zyngier wrote:
>> The GIC architecture (ARM's Generic Interrupt Controller) allows an
>> active physical interrupt to be forwarded to a guest, and the guest to
>> indirectly perform the deactivation of the interrupt by performing an
>> EOI on the virtual interrupt (see for example the GICv2 spec, 3.2.1).
>>
>> So far, Linux doesn't have this notion, which is a bit of a pain.
>>
>> This patch series introduce two generic features:
>>
>> - A way to mark an interrupt as "forwarded": this allows an irq_chip
>> to know that it shouldn't perform the deactivation itself
>> - A way to save/restore the "state" of a "forwarded" interrupt
>>
>> The series then adapts both GIC drivers to switch to EOImode == 1
>> (split priority drop and deactivation), to support this "forwarded"
>> feature and hacks the KVM/ARM timer backend to use all of this.
>>
>> This requires yet another bit of surgery in the vgic code in order to
>> allow a mapping between physical interrupts and virtual
>> ones. Hopefully, this should plug into VFIO and the whole irqfd thing,
>> but I don't understand any of that just yet (Eric?)
>
> Hello Marc,
>
> Thanks for the patch, it brings a very interesting capability for
> improving the performance of KVM device assignment.
>
> From the integration pov I understand we need to
> 1) call irq_set_fwd_state to tell the gic the physical IRQ is forwarded
> and not deactivate it
That would be irqd_set_irq_forwarded().
irq_{g,s}et_fwd_state() are used when you're actually sharing a device
between guests, and need to context-switch its HW interrupt state
(typically, the timer). I wouldn't expect VFIO to use this, as the
device is exclusively assigned to a guest.
> 2) call vgic_map_phys_irq to the tell the vgic it must program the LRs
> accordingly.
>
> We currently have the vfio driver VFIO_DEVICE_SET_IRQS user API that
> makes possible to tell: device IRQ index #i (i=0, 1, 2 for my xgmac)
> shall trigger this fd.
> At that point it would be possible to tell the GIC the physical IRQ
> corresponding to i is forwarded.
>
> On the other hand we have KVM_IRQFD that enables to tell KVM: when this
> fd is triggered, you implement its handler in KVM irqfd framework and
> the handler injects the provided irchip.pin(gsi)=virtualIRQ - the famous
> GSI routing table - into this VM.
>
> Building the vgic map table hence requires to do some glue around vfio
> and irqfd info: physical IRQ ->(vfio) fd ->(irqfd) gsi.
>
> As such I would say those 2 user APIs(VFIO and IRQFD) are not fully
> adapted to put that in place but this may be feasible. Previous
> KVM_ASSIGN_DEV_IRQ was directly associated the pIRQ and vIRQ.
>
> we should be able to remove the physical IRQ mask in the vfio driver
> (this masking is done when triggering the fd and the IRQ is unmasked
> when the virtual IRQ is completed). It was there because the physical
> IRQ was completed and could hit again. Now with 2 stage completion the
> same IRQ cannot hit while guest has not not DIR'ed the IRQ so it fixes
> the issue I guess.
Yes, that's exactly the idea.
> Since we do not have EOI trap anymore we cannot trigger level-sensitive
> resamplefd in irqfd (this would be an ARM specificity)
For a level interrupt, we still have the EOI maintenance interrupt,
which we could hook into to perform whatever resampling we need.
The thing is, I don't think we need it at all. If the IRQ line is still
up, we'll take another interrupt right away. So it is not so much that
we cannot trigger the resample mechanism, it is just that it seems to
become useless. What do you think?
> A last comment/question, wouldn't it be possible to inject the vIRQ
> (programming the LR) direcly in the irqchip, instead of relying on VFIO
> to trigger an eventfd whose handler does the job? This could be an
> optional capavility per forwarded IRQ. Of course this would create a
> relationship between gic and vgic. Do you see it as ugly - I dare to ask - ?
I would say that it is what the interrupt handler is for. We could
entirely bypass the eventfd, and inject the interrupt from the VFIO
interrupt handler, couldn't we?
I think that gives you the bypass you're asking for, without creating a
dependency between the host GIC driver, and the guest support code.
Thanks,
M.
--
Jazz is not dead. It just smells funny...
On 06/26/2014 11:31 AM, Marc Zyngier wrote:
> Hi Eric,
>
> On 25/06/14 15:52, Eric Auger wrote:
>> On 06/25/2014 11:28 AM, Marc Zyngier wrote:
>>> The GIC architecture (ARM's Generic Interrupt Controller) allows an
>>> active physical interrupt to be forwarded to a guest, and the guest to
>>> indirectly perform the deactivation of the interrupt by performing an
>>> EOI on the virtual interrupt (see for example the GICv2 spec, 3.2.1).
>>>
>>> So far, Linux doesn't have this notion, which is a bit of a pain.
>>>
>>> This patch series introduce two generic features:
>>>
>>> - A way to mark an interrupt as "forwarded": this allows an irq_chip
>>> to know that it shouldn't perform the deactivation itself
>>> - A way to save/restore the "state" of a "forwarded" interrupt
>>>
>>> The series then adapts both GIC drivers to switch to EOImode == 1
>>> (split priority drop and deactivation), to support this "forwarded"
>>> feature and hacks the KVM/ARM timer backend to use all of this.
>>>
>>> This requires yet another bit of surgery in the vgic code in order to
>>> allow a mapping between physical interrupts and virtual
>>> ones. Hopefully, this should plug into VFIO and the whole irqfd thing,
>>> but I don't understand any of that just yet (Eric?)
>>
>> Hello Marc,
Hi Marc
>>
>> Thanks for the patch, it brings a very interesting capability for
>> improving the performance of KVM device assignment.
>>
>> From the integration pov I understand we need to
>> 1) call irq_set_fwd_state to tell the gic the physical IRQ is forwarded
>> and not deactivate it
>
> That would be irqd_set_irq_forwarded().
>
> irq_{g,s}et_fwd_state() are used when you're actually sharing a device
> between guests, and need to context-switch its HW interrupt state
> (typically, the timer). I wouldn't expect VFIO to use this, as the
> device is exclusively assigned to a guest.
OK
>
>> 2) call vgic_map_phys_irq to the tell the vgic it must program the LRs
>> accordingly.
>>
>> We currently have the vfio driver VFIO_DEVICE_SET_IRQS user API that
>> makes possible to tell: device IRQ index #i (i=0, 1, 2 for my xgmac)
>> shall trigger this fd.
>> At that point it would be possible to tell the GIC the physical IRQ
>> corresponding to i is forwarded.
>>
>> On the other hand we have KVM_IRQFD that enables to tell KVM: when this
>> fd is triggered, you implement its handler in KVM irqfd framework and
>> the handler injects the provided irchip.pin(gsi)=virtualIRQ - the famous
>> GSI routing table - into this VM.
>>
>> Building the vgic map table hence requires to do some glue around vfio
>> and irqfd info: physical IRQ ->(vfio) fd ->(irqfd) gsi.
>>
>> As such I would say those 2 user APIs(VFIO and IRQFD) are not fully
>> adapted to put that in place but this may be feasible. Previous
>> KVM_ASSIGN_DEV_IRQ was directly associated the pIRQ and vIRQ.
>>
>> we should be able to remove the physical IRQ mask in the vfio driver
>> (this masking is done when triggering the fd and the IRQ is unmasked
>> when the virtual IRQ is completed). It was there because the physical
>> IRQ was completed and could hit again. Now with 2 stage completion the
>> same IRQ cannot hit while guest has not not DIR'ed the IRQ so it fixes
>> the issue I guess.
>
> Yes, that's exactly the idea.
>
>> Since we do not have EOI trap anymore we cannot trigger level-sensitive
>> resamplefd in irqfd (this would be an ARM specificity)
>
> For a level interrupt, we still have the EOI maintenance interrupt,
> which we could hook into to perform whatever resampling we need.
Sorry I am confused by the above sentence. I thought you removed
maintenance IRQ for both edge and level-sensitive IRQS? Thought also the
2 features were exclusive, ie EOI maintenance IRQ (only if HW bit = 0)
and forward with HWbit = 1, since occupying GICH_LR[19:10].Sorry if I
misunderstood your code or the spec.
>
> The thing is, I don't think we need it at all. If the IRQ line is still
> up, we'll take another interrupt right away. So it is not so much that
> we cannot trigger the resample mechanism, it is just that it seems to
> become useless. What do you think?
Yes indeed I think it becomes useless. Besides as far as I understand
resamplefd feature is not mandatory. There is a capability associated to
it, KVM_CAP_IRQFD_RESAMPLE. Also if we want to use it we pass the
KVM_IRQFD_FLAG_RESAMPLE as irqfd flag.
>
>> A last comment/question, wouldn't it be possible to inject the vIRQ
>> (programming the LR) direcly in the irqchip, instead of relying on VFIO
>> to trigger an eventfd whose handler does the job? This could be an
>> optional capavility per forwarded IRQ. Of course this would create a
>> relationship between gic and vgic. Do you see it as ugly - I dare to ask - ?
>
> I would say that it is what the interrupt handler is for. We could
> entirely bypass the eventfd, and inject the interrupt from the VFIO
> interrupt handler, couldn't we?
The problem is the VFIO driver currently is not meant for that. It is
meant to trigger an eventfd and that's it. Anyway we may need to invent
something new around existing API semantics.
Best Regards
Eric
>
> I think that gives you the bypass you're asking for, without creating a
> dependency between the host GIC driver, and the guest support code.
>
> Thanks,
>
> M.
>
On 26/06/14 13:58, Eric Auger wrote:
> On 06/26/2014 11:31 AM, Marc Zyngier wrote:
>> Hi Eric,
>>
>> On 25/06/14 15:52, Eric Auger wrote:
>>> On 06/25/2014 11:28 AM, Marc Zyngier wrote:
>>>> The GIC architecture (ARM's Generic Interrupt Controller) allows an
>>>> active physical interrupt to be forwarded to a guest, and the guest to
>>>> indirectly perform the deactivation of the interrupt by performing an
>>>> EOI on the virtual interrupt (see for example the GICv2 spec, 3.2.1).
>>>>
>>>> So far, Linux doesn't have this notion, which is a bit of a pain.
>>>>
>>>> This patch series introduce two generic features:
>>>>
>>>> - A way to mark an interrupt as "forwarded": this allows an irq_chip
>>>> to know that it shouldn't perform the deactivation itself
>>>> - A way to save/restore the "state" of a "forwarded" interrupt
>>>>
>>>> The series then adapts both GIC drivers to switch to EOImode == 1
>>>> (split priority drop and deactivation), to support this "forwarded"
>>>> feature and hacks the KVM/ARM timer backend to use all of this.
>>>>
>>>> This requires yet another bit of surgery in the vgic code in order to
>>>> allow a mapping between physical interrupts and virtual
>>>> ones. Hopefully, this should plug into VFIO and the whole irqfd thing,
>>>> but I don't understand any of that just yet (Eric?)
>>>
>>> Hello Marc,
> Hi Marc
>>>
>>> Thanks for the patch, it brings a very interesting capability for
>>> improving the performance of KVM device assignment.
>>>
>>> From the integration pov I understand we need to
>>> 1) call irq_set_fwd_state to tell the gic the physical IRQ is forwarded
>>> and not deactivate it
>>
>> That would be irqd_set_irq_forwarded().
>>
>> irq_{g,s}et_fwd_state() are used when you're actually sharing a device
>> between guests, and need to context-switch its HW interrupt state
>> (typically, the timer). I wouldn't expect VFIO to use this, as the
>> device is exclusively assigned to a guest.
> OK
>>
>>> 2) call vgic_map_phys_irq to the tell the vgic it must program the LRs
>>> accordingly.
>>>
>>> We currently have the vfio driver VFIO_DEVICE_SET_IRQS user API that
>>> makes possible to tell: device IRQ index #i (i=0, 1, 2 for my xgmac)
>>> shall trigger this fd.
>>> At that point it would be possible to tell the GIC the physical IRQ
>>> corresponding to i is forwarded.
>>>
>>> On the other hand we have KVM_IRQFD that enables to tell KVM: when this
>>> fd is triggered, you implement its handler in KVM irqfd framework and
>>> the handler injects the provided irchip.pin(gsi)=virtualIRQ - the famous
>>> GSI routing table - into this VM.
>>>
>>> Building the vgic map table hence requires to do some glue around vfio
>>> and irqfd info: physical IRQ ->(vfio) fd ->(irqfd) gsi.
>>>
>>> As such I would say those 2 user APIs(VFIO and IRQFD) are not fully
>>> adapted to put that in place but this may be feasible. Previous
>>> KVM_ASSIGN_DEV_IRQ was directly associated the pIRQ and vIRQ.
>>>
>>> we should be able to remove the physical IRQ mask in the vfio driver
>>> (this masking is done when triggering the fd and the IRQ is unmasked
>>> when the virtual IRQ is completed). It was there because the physical
>>> IRQ was completed and could hit again. Now with 2 stage completion the
>>> same IRQ cannot hit while guest has not not DIR'ed the IRQ so it fixes
>>> the issue I guess.
>>
>> Yes, that's exactly the idea.
>>
>>> Since we do not have EOI trap anymore we cannot trigger level-sensitive
>>> resamplefd in irqfd (this would be an ARM specificity)
>>
>> For a level interrupt, we still have the EOI maintenance interrupt,
>> which we could hook into to perform whatever resampling we need.
>
> Sorry I am confused by the above sentence. I thought you removed
> maintenance IRQ for both edge and level-sensitive IRQS? Thought also the
> 2 features were exclusive, ie EOI maintenance IRQ (only if HW bit = 0)
> and forward with HWbit = 1, since occupying GICH_LR[19:10].Sorry if I
> misunderstood your code or the spec.
Blah. Of course you're right. HW and EOI are mutually exclusive. Typical
morning brain fart.
>>
>> The thing is, I don't think we need it at all. If the IRQ line is still
>> up, we'll take another interrupt right away. So it is not so much that
>> we cannot trigger the resample mechanism, it is just that it seems to
>> become useless. What do you think?
>
> Yes indeed I think it becomes useless. Besides as far as I understand
> resamplefd feature is not mandatory. There is a capability associated to
> it, KVM_CAP_IRQFD_RESAMPLE. Also if we want to use it we pass the
> KVM_IRQFD_FLAG_RESAMPLE as irqfd flag.
OK, that's pretty good then. We can just avoid supporting it.
>>
>>> A last comment/question, wouldn't it be possible to inject the vIRQ
>>> (programming the LR) direcly in the irqchip, instead of relying on VFIO
>>> to trigger an eventfd whose handler does the job? This could be an
>>> optional capavility per forwarded IRQ. Of course this would create a
>>> relationship between gic and vgic. Do you see it as ugly - I dare to ask - ?
>>
>> I would say that it is what the interrupt handler is for. We could
>> entirely bypass the eventfd, and inject the interrupt from the VFIO
>> interrupt handler, couldn't we?
>
> The problem is the VFIO driver currently is not meant for that. It is
> meant to trigger an eventfd and that's it. Anyway we may need to invent
> something new around existing API semantics.
Indeed. We're in uncharted territories, and we may need to wire things
in a slightly different way. Which is fine, as long as we preserve the
userspace API.
But this looks very much like an optimization we can look at later, once
we bet the basics up and running.
Thanks,
M.
--
Jazz is not dead. It just smells funny...
Hi Marc,
On Wed, Jun 25, 2014 at 10:28:43AM +0100, Marc Zyngier wrote:
> When a peripheral is shared between virtual machines, its interrupt
> state becomes part of the guest's state, and must be switched accordingly.
>
> Introduce a pair of accessors (irq_get_fwd_state/irq_set_fwd_state) to
> retrieve the bits that can be of interest to KVM: pending, active, and masked.
>
> - irq_get_fwd_state returns the state of the interrupt according to a mask
> containing any of the IRQ_STATE_PENDING, IRQ_STATE_ACTIVE or IRQ_STATE_MASKED
> bits.
> - irq_set_fwd_state sets the state of the interrupt according to a similar mask.
>
> Only a forwarded interrupt can be manipulated in such a way.
[...]
> +/**
> + * irq_set_fwd_state - set the state of a forwarded interrupt.
> + * @irq: Interrupt line that is forwarded to a VM
> + * @val: State to be restored
> + * @mask: Bitmask of IRQ_FWD_STATE_* defining the valid bits in @val
> + *
> + * This call sets the state of a forwarded interrupt, depending
> + * on the mask which indicates the valid bits.
> + *
> + * This function should be called with preemption disabled if the
> + * interrupt controller has per-cpu registers.
> + */
> +int irq_set_fwd_state(unsigned int irq, u32 val, u32 mask)
> +{
> + struct irq_desc *desc;
> + struct irq_data *data;
> + struct irq_chip *chip;
> + unsigned long flags;
> +
> + desc = irq_to_desc(irq);
> + if (!desc)
> + return -EINVAL;
> +
> + data = irq_desc_get_irq_data(desc);
> + if (!irqd_irq_forwarded(data))
> + return -EINVAL;
> +
> + chip = irq_desc_get_chip(desc);
> + if (!chip->irq_set_fwd_state)
> + return -EINVAL;
> +
> + chip_bus_lock(desc);
> + raw_spin_lock_irqsave(&desc->lock, flags);
> + chip->irq_set_fwd_state(data, val, mask);
> + raw_spin_unlock_irqrestore(&desc->lock, flags);
> + chip_bus_sync_unlock(desc);
Having looked at this, I don't think you need to take the desc->lock after
all. I thought you might need it for irq_desc_get_chip, but that's not going
to change dynamically and you'd end up with a lock ordering problem against
chip_bus_lock anyway.
Will
Hi Marc,
On Wed, Jun 25, 2014 at 10:28:45AM +0100, Marc Zyngier wrote:
> Now that we've switched to EOImode == 1, prevent a forwarded interrupt
> from being deactivated after its priority has been dropped.
>
> Also add support for the interrupt state to be saved/restored.
>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> drivers/irqchip/irq-gic.c | 48 +++++++++++++++++++++++++++++++++++++++++------
> 1 file changed, 42 insertions(+), 6 deletions(-)
[...]
> +static void gic_irq_set_fwd_state(struct irq_data *d, u32 val, u32 mask)
> +{
> + if (mask & IRQ_FWD_STATE_PENDING)
> + gic_poke_irq(d, (val & IRQ_FWD_STATE_PENDING) ? GIC_DIST_ENABLE_SET : GIC_DIST_ENABLE_CLEAR);
> + if (mask & IRQ_FWD_STATE_ACTIVE)
> + gic_poke_irq(d, (val & IRQ_FWD_STATE_ACTIVE) ? GIC_DIST_ACTIVE_SET : GIC_DIST_ACTIVE_CLEAR);
> + if (mask & IRQ_FWD_STATE_MASKED)
> + gic_poke_irq(d, (val & IRQ_FWD_STATE_MASKED) ? GIC_DIST_ENABLE_CLEAR : GIC_DIST_ENABLE_SET);
Given that this isn't atomic and KVM only cares about ACTIVE, why not change
mask to be a single state only? Renumbering the states so that's not bits
would help to enforce this (i.e. make IRQ_FWD_STATE_ACTIVE 0, PENDING 1,
...).
That would also allow you to switch on the state and return early.
> +static u32 gic_irq_get_fwd_state(struct irq_data *d, u32 mask)
> +{
> + u32 val = 0;
> +
> + if (mask & IRQ_FWD_STATE_PENDING && gic_peek_irq(d, GIC_DIST_ENABLE_SET))
> + val |= IRQ_FWD_STATE_PENDING;
> + if (mask & IRQ_FWD_STATE_ACTIVE && gic_peek_irq(d, GIC_DIST_ACTIVE_SET))
> + val |= IRQ_FWD_STATE_ACTIVE;
> + if (mask & IRQ_FWD_STATE_MASKED && !gic_peek_irq(d, GIC_DIST_ENABLE_SET))
> + val |= IRQ_FWD_STATE_MASKED;
*and* you could peek GIC_DIST_ENABLE_SET in one place here.
Will
On Wed, 25 Jun 2014, Anup Patel wrote:
> Hi Marc,
>
> On Wed, Jun 25, 2014 at 2:58 PM, Marc Zyngier <[email protected]> wrote:
> > So far, GICv2 has been used in with EOImode == 0. The effect of this
> > mode is to perform the priority drop and the deactivation of the
> > interrupt at the same time.
> >
> > While this works perfectly for Linux (we only have a single priority),
> > it causes issues when an interrupt is forwarded to a guest, and when
> > we want the guest to perform the EOI itself.
> >
> > For this case, the GIC architecture provides EOImode == 1, where:
> > - A write to the EOI register drops the priority of the interrupt and leaves
> > it active. Other interrupts at the same priority level can now be taken,
> > but the active interrupt cannot be taken again
> > - A write to the DIR marks the interrupt as inactive, meaning it can
> > now be taken again.
> >
> > We only enable this feature when booted in HYP mode. Also, as most device
> > trees are broken (they report the CPU interface size to be 4kB, while
> > the GICv2 CPU interface size is 8kB), output a warning if we're booted
> > in HYP mode, and disable the feature.
> >
> > Signed-off-by: Marc Zyngier <[email protected]>
> > ---
> > drivers/irqchip/irq-gic.c | 61 +++++++++++++++++++++++++++++++++++++----
> > include/linux/irqchip/arm-gic.h | 4 +++
> > 2 files changed, 59 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
> > index 508b815..9295bf2 100644
> > --- a/drivers/irqchip/irq-gic.c
> > +++ b/drivers/irqchip/irq-gic.c
> > @@ -45,6 +45,7 @@
> > #include <asm/irq.h>
> > #include <asm/exception.h>
> > #include <asm/smp_plat.h>
> > +#include <asm/virt.h>
> >
> > #include "irq-gic-common.h"
> > #include "irqchip.h"
> > @@ -94,6 +95,10 @@ struct irq_chip gic_arch_extn = {
> > .irq_set_wake = NULL,
> > };
> >
> > +static struct irq_chip *gic_chip;
> > +
> > +static bool supports_deactivate = false;
> > +
> > #ifndef MAX_GIC_NR
> > #define MAX_GIC_NR 1
> > #endif
> > @@ -185,6 +190,12 @@ static void gic_eoi_irq(struct irq_data *d)
> > writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
> > }
> >
> > +static void gic_eoi_dir_irq(struct irq_data *d)
> > +{
> > + gic_eoi_irq(d);
> > + writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
> > +}
Would it be better if you moved the gic_eoi_irq call earlier? Maybe
somewhere in gic_handle_irq?
> > static int gic_set_type(struct irq_data *d, unsigned int type)
> > {
> > void __iomem *base = gic_dist_base(d);
> > @@ -277,6 +288,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
> > }
> > if (irqnr < 16) {
> > writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
> > + writel_relaxed(irqstat, cpu_base + GIC_CPU_DEACTIVATE);
> > #ifdef CONFIG_SMP
> > handle_IPI(irqnr, regs);
> > #endif
> > @@ -313,7 +325,7 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
> > chained_irq_exit(chip, desc);
> > }
> >
> > -static struct irq_chip gic_chip = {
> > +static struct irq_chip gicv1_chip = {
> > .name = "GIC",
> > .irq_mask = gic_mask_irq,
> > .irq_unmask = gic_unmask_irq,
> > @@ -326,6 +338,19 @@ static struct irq_chip gic_chip = {
> > .irq_set_wake = gic_set_wake,
> > };
> >
> > +static struct irq_chip gicv2_chip = {
> > + .name = "GIC",
> > + .irq_mask = gic_mask_irq,
> > + .irq_unmask = gic_unmask_irq,
> > + .irq_eoi = gic_eoi_dir_irq,
> > + .irq_set_type = gic_set_type,
> > + .irq_retrigger = gic_retrigger,
> > +#ifdef CONFIG_SMP
> > + .irq_set_affinity = gic_set_affinity,
> > +#endif
> > + .irq_set_wake = gic_set_wake,
> > +};
> > +
> > void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
> > {
> > if (gic_nr >= MAX_GIC_NR)
> > @@ -377,6 +402,16 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
> > writel_relaxed(1, base + GIC_DIST_CTRL);
> > }
> >
> > +static void gic_cpu_if_up(void)
> > +{
> > + void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
> > + u32 val = 0;
> > +
> > + if (supports_deactivate)
> > + val = GIC_CPU_CTRL_EOImodeNS;
> > + writel_relaxed(val | 1, cpu_base + GIC_CPU_CTRL);
> > +}
> > +
> > static void gic_cpu_init(struct gic_chip_data *gic)
> > {
> > void __iomem *dist_base = gic_data_dist_base(gic);
> > @@ -402,7 +437,7 @@ static void gic_cpu_init(struct gic_chip_data *gic)
> > gic_cpu_config(dist_base, NULL);
> >
> > writel_relaxed(0xf0, base + GIC_CPU_PRIMASK);
> > - writel_relaxed(1, base + GIC_CPU_CTRL);
> > + gic_cpu_if_up();
> > }
> >
> > void gic_cpu_if_down(void)
> > @@ -543,7 +578,7 @@ static void gic_cpu_restore(unsigned int gic_nr)
> > writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4);
> >
> > writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK);
> > - writel_relaxed(1, cpu_base + GIC_CPU_CTRL);
> > + gic_cpu_if_up();
> > }
> >
> > static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v)
> > @@ -770,11 +805,11 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
> > {
> > if (hw < 32) {
> > irq_set_percpu_devid(irq);
> > - irq_set_chip_and_handler(irq, &gic_chip,
> > + irq_set_chip_and_handler(irq, gic_chip,
> > handle_percpu_devid_irq);
> > set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
> > } else {
> > - irq_set_chip_and_handler(irq, &gic_chip,
> > + irq_set_chip_and_handler(irq, gic_chip,
> > handle_fasteoi_irq);
> > set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
> >
> > @@ -951,6 +986,11 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
> >
> > gic_irqs -= hwirq_base; /* calculate # of irqs to allocate */
> >
> > + if (!supports_deactivate)
> > + gic_chip = &gicv1_chip;
> > + else
> > + gic_chip = &gicv2_chip;
> > +
> > if (of_property_read_u32(node, "arm,routable-irqs",
> > &nr_routable_irqs)) {
> > irq_base = irq_alloc_descs(irq_start, 16, gic_irqs,
> > @@ -980,7 +1020,7 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
> > set_handle_irq(gic_handle_irq);
> > }
> >
> > - gic_chip.flags |= gic_arch_extn.flags;
> > + gic_chip->flags |= gic_arch_extn.flags;
> > gic_dist_init(gic);
> > gic_cpu_init(gic);
> > gic_pm_init(gic);
> > @@ -994,6 +1034,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
> > {
> > void __iomem *cpu_base;
> > void __iomem *dist_base;
> > + struct resource cu_res;
> > u32 percpu_offset;
> > int irq;
> >
> > @@ -1006,6 +1047,14 @@ gic_of_init(struct device_node *node, struct device_node *parent)
> > cpu_base = of_iomap(node, 1);
> > WARN(!cpu_base, "unable to map gic cpu registers\n");
> >
> > + of_address_to_resource(node, 1, &cpu_res);
> > + if (is_hyp_mode_available()) {
>
> Interrupt deactivation feature is not dependent on whether
> hyp-mode is available or not.
>
> We can use GICC_DIR even if CPU does not have
> virtualization extension. Also, we can use GICV_DIR
> when running as Guest or VM.
>
> I think "supports_deactivate" should depend on
> the GIC compatible string.
>
> > + if (resource_size(&cpu_res) >= SZ_8K)
> > + supports_deactivate = true;
> > + else
> > + pr_warn("GIC: CPU interface size is %x, DT is probably wrong\n", (int)resource_size(&cpu_res));
>
> This will not work on APM X-Gene because, for
> X-Gene first CPU page is at 0x78020000 and
> second CPU page is at 0x78030000.
>
> Ian had send-out a patch long time back to extend
> GIC dt-bindings for addressing this issue.
> (http://www.spinics.net/lists/arm-kernel/msg283767.html)
>
> Regards,
> Anup
>
> > + }
> > +
> > if (of_property_read_u32(node, "cpu-offset", &percpu_offset))
> > percpu_offset = 0;
> >
> > diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
> > index 45e2d8c..ffe3911 100644
> > --- a/include/linux/irqchip/arm-gic.h
> > +++ b/include/linux/irqchip/arm-gic.h
> > @@ -21,6 +21,10 @@
> > #define GIC_CPU_ACTIVEPRIO 0xd0
> > #define GIC_CPU_IDENT 0xfc
> >
> > +#define GIC_CPU_DEACTIVATE 0x1000
> > +
> > +#define GIC_CPU_CTRL_EOImodeNS (1 << 9)
> > +
> > #define GICC_IAR_INT_ID_MASK 0x3ff
> >
> > #define GIC_DIST_CTRL 0x000
> > --
> > 1.8.3.4
> >
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > [email protected]
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
Hi Stefano,
On 30/06/14 20:09, Stefano Stabellini wrote:
> On Wed, 25 Jun 2014, Anup Patel wrote:
>> Hi Marc,
>>
>> On Wed, Jun 25, 2014 at 2:58 PM, Marc Zyngier <[email protected]> wrote:
>>> So far, GICv2 has been used in with EOImode == 0. The effect of this
>>> mode is to perform the priority drop and the deactivation of the
>>> interrupt at the same time.
>>>
>>> While this works perfectly for Linux (we only have a single priority),
>>> it causes issues when an interrupt is forwarded to a guest, and when
>>> we want the guest to perform the EOI itself.
>>>
>>> For this case, the GIC architecture provides EOImode == 1, where:
>>> - A write to the EOI register drops the priority of the interrupt and leaves
>>> it active. Other interrupts at the same priority level can now be taken,
>>> but the active interrupt cannot be taken again
>>> - A write to the DIR marks the interrupt as inactive, meaning it can
>>> now be taken again.
>>>
>>> We only enable this feature when booted in HYP mode. Also, as most device
>>> trees are broken (they report the CPU interface size to be 4kB, while
>>> the GICv2 CPU interface size is 8kB), output a warning if we're booted
>>> in HYP mode, and disable the feature.
>>>
>>> Signed-off-by: Marc Zyngier <[email protected]>
>>> ---
>>> drivers/irqchip/irq-gic.c | 61 +++++++++++++++++++++++++++++++++++++----
>>> include/linux/irqchip/arm-gic.h | 4 +++
>>> 2 files changed, 59 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
>>> index 508b815..9295bf2 100644
>>> --- a/drivers/irqchip/irq-gic.c
>>> +++ b/drivers/irqchip/irq-gic.c
>>> @@ -45,6 +45,7 @@
>>> #include <asm/irq.h>
>>> #include <asm/exception.h>
>>> #include <asm/smp_plat.h>
>>> +#include <asm/virt.h>
>>>
>>> #include "irq-gic-common.h"
>>> #include "irqchip.h"
>>> @@ -94,6 +95,10 @@ struct irq_chip gic_arch_extn = {
>>> .irq_set_wake = NULL,
>>> };
>>>
>>> +static struct irq_chip *gic_chip;
>>> +
>>> +static bool supports_deactivate = false;
>>> +
>>> #ifndef MAX_GIC_NR
>>> #define MAX_GIC_NR 1
>>> #endif
>>> @@ -185,6 +190,12 @@ static void gic_eoi_irq(struct irq_data *d)
>>> writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
>>> }
>>>
>>> +static void gic_eoi_dir_irq(struct irq_data *d)
>>> +{
>>> + gic_eoi_irq(d);
>>> + writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
>>> +}
>
> Would it be better if you moved the gic_eoi_irq call earlier? Maybe
> somewhere in gic_handle_irq?
I'm not sure I see what we'd gain by doing so. Can you elaborate?
Thanks,
M.
--
Jazz is not dead. It just smells funny...
On Tue, 1 Jul 2014, Marc Zyngier wrote:
> Hi Stefano,
>
> On 30/06/14 20:09, Stefano Stabellini wrote:
> > On Wed, 25 Jun 2014, Anup Patel wrote:
> >> Hi Marc,
> >>
> >> On Wed, Jun 25, 2014 at 2:58 PM, Marc Zyngier <[email protected]> wrote:
> >>> So far, GICv2 has been used in with EOImode == 0. The effect of this
> >>> mode is to perform the priority drop and the deactivation of the
> >>> interrupt at the same time.
> >>>
> >>> While this works perfectly for Linux (we only have a single priority),
> >>> it causes issues when an interrupt is forwarded to a guest, and when
> >>> we want the guest to perform the EOI itself.
> >>>
> >>> For this case, the GIC architecture provides EOImode == 1, where:
> >>> - A write to the EOI register drops the priority of the interrupt and leaves
> >>> it active. Other interrupts at the same priority level can now be taken,
> >>> but the active interrupt cannot be taken again
> >>> - A write to the DIR marks the interrupt as inactive, meaning it can
> >>> now be taken again.
> >>>
> >>> We only enable this feature when booted in HYP mode. Also, as most device
> >>> trees are broken (they report the CPU interface size to be 4kB, while
> >>> the GICv2 CPU interface size is 8kB), output a warning if we're booted
> >>> in HYP mode, and disable the feature.
> >>>
> >>> Signed-off-by: Marc Zyngier <[email protected]>
> >>> ---
> >>> drivers/irqchip/irq-gic.c | 61 +++++++++++++++++++++++++++++++++++++----
> >>> include/linux/irqchip/arm-gic.h | 4 +++
> >>> 2 files changed, 59 insertions(+), 6 deletions(-)
> >>>
> >>> diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
> >>> index 508b815..9295bf2 100644
> >>> --- a/drivers/irqchip/irq-gic.c
> >>> +++ b/drivers/irqchip/irq-gic.c
> >>> @@ -45,6 +45,7 @@
> >>> #include <asm/irq.h>
> >>> #include <asm/exception.h>
> >>> #include <asm/smp_plat.h>
> >>> +#include <asm/virt.h>
> >>>
> >>> #include "irq-gic-common.h"
> >>> #include "irqchip.h"
> >>> @@ -94,6 +95,10 @@ struct irq_chip gic_arch_extn = {
> >>> .irq_set_wake = NULL,
> >>> };
> >>>
> >>> +static struct irq_chip *gic_chip;
> >>> +
> >>> +static bool supports_deactivate = false;
> >>> +
> >>> #ifndef MAX_GIC_NR
> >>> #define MAX_GIC_NR 1
> >>> #endif
> >>> @@ -185,6 +190,12 @@ static void gic_eoi_irq(struct irq_data *d)
> >>> writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
> >>> }
> >>>
> >>> +static void gic_eoi_dir_irq(struct irq_data *d)
> >>> +{
> >>> + gic_eoi_irq(d);
> >>> + writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
> >>> +}
> >
> > Would it be better if you moved the gic_eoi_irq call earlier? Maybe
> > somewhere in gic_handle_irq?
>
> I'm not sure I see what we'd gain by doing so. Can you elaborate?
You would be dropping the priority earlier. It would be beneficial if
Linux was running the interrupt handlers with interrupts enabled, but I
realize now that it is not the case.
On 01/07/14 17:34, Stefano Stabellini wrote:
> On Tue, 1 Jul 2014, Marc Zyngier wrote:
>> Hi Stefano,
>>
>> On 30/06/14 20:09, Stefano Stabellini wrote:
>>> On Wed, 25 Jun 2014, Anup Patel wrote:
>>>> Hi Marc,
>>>>
>>>> On Wed, Jun 25, 2014 at 2:58 PM, Marc Zyngier <[email protected]> wrote:
>>>>> So far, GICv2 has been used in with EOImode == 0. The effect of this
>>>>> mode is to perform the priority drop and the deactivation of the
>>>>> interrupt at the same time.
>>>>>
>>>>> While this works perfectly for Linux (we only have a single priority),
>>>>> it causes issues when an interrupt is forwarded to a guest, and when
>>>>> we want the guest to perform the EOI itself.
>>>>>
>>>>> For this case, the GIC architecture provides EOImode == 1, where:
>>>>> - A write to the EOI register drops the priority of the interrupt and leaves
>>>>> it active. Other interrupts at the same priority level can now be taken,
>>>>> but the active interrupt cannot be taken again
>>>>> - A write to the DIR marks the interrupt as inactive, meaning it can
>>>>> now be taken again.
>>>>>
>>>>> We only enable this feature when booted in HYP mode. Also, as most device
>>>>> trees are broken (they report the CPU interface size to be 4kB, while
>>>>> the GICv2 CPU interface size is 8kB), output a warning if we're booted
>>>>> in HYP mode, and disable the feature.
>>>>>
>>>>> Signed-off-by: Marc Zyngier <[email protected]>
>>>>> ---
>>>>> drivers/irqchip/irq-gic.c | 61 +++++++++++++++++++++++++++++++++++++----
>>>>> include/linux/irqchip/arm-gic.h | 4 +++
>>>>> 2 files changed, 59 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
>>>>> index 508b815..9295bf2 100644
>>>>> --- a/drivers/irqchip/irq-gic.c
>>>>> +++ b/drivers/irqchip/irq-gic.c
>>>>> @@ -45,6 +45,7 @@
>>>>> #include <asm/irq.h>
>>>>> #include <asm/exception.h>
>>>>> #include <asm/smp_plat.h>
>>>>> +#include <asm/virt.h>
>>>>>
>>>>> #include "irq-gic-common.h"
>>>>> #include "irqchip.h"
>>>>> @@ -94,6 +95,10 @@ struct irq_chip gic_arch_extn = {
>>>>> .irq_set_wake = NULL,
>>>>> };
>>>>>
>>>>> +static struct irq_chip *gic_chip;
>>>>> +
>>>>> +static bool supports_deactivate = false;
>>>>> +
>>>>> #ifndef MAX_GIC_NR
>>>>> #define MAX_GIC_NR 1
>>>>> #endif
>>>>> @@ -185,6 +190,12 @@ static void gic_eoi_irq(struct irq_data *d)
>>>>> writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
>>>>> }
>>>>>
>>>>> +static void gic_eoi_dir_irq(struct irq_data *d)
>>>>> +{
>>>>> + gic_eoi_irq(d);
>>>>> + writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
>>>>> +}
>>>
>>> Would it be better if you moved the gic_eoi_irq call earlier? Maybe
>>> somewhere in gic_handle_irq?
>>
>> I'm not sure I see what we'd gain by doing so. Can you elaborate?
>
> You would be dropping the priority earlier. It would be beneficial if
> Linux was running the interrupt handlers with interrupts enabled, but I
> realize now that it is not the case.
Ah, I see what you mean. No, as you noticed, we run the handlers with
interrupts disabled, so keeping the EOI and DIR close together is
probably what makes the most sense.
Thanks,
M.
--
Jazz is not dead. It just smells funny...
Hi Will,
On Fri, Jun 27 2014 at 2:10:18 pm BST, Will Deacon <[email protected]> wrote:
> Hi Marc,
>
> On Wed, Jun 25, 2014 at 10:28:43AM +0100, Marc Zyngier wrote:
>> When a peripheral is shared between virtual machines, its interrupt
>> state becomes part of the guest's state, and must be switched accordingly.
>>
>> Introduce a pair of accessors (irq_get_fwd_state/irq_set_fwd_state) to
>> retrieve the bits that can be of interest to KVM: pending, active, and masked.
>>
>> - irq_get_fwd_state returns the state of the interrupt according to a mask
>> containing any of the IRQ_STATE_PENDING, IRQ_STATE_ACTIVE or IRQ_STATE_MASKED
>> bits.
>> - irq_set_fwd_state sets the state of the interrupt according to a
>> similar mask.
>>
>> Only a forwarded interrupt can be manipulated in such a way.
>
> [...]
>
>> +/**
>> + * irq_set_fwd_state - set the state of a forwarded interrupt.
>> + * @irq: Interrupt line that is forwarded to a VM
>> + * @val: State to be restored
>> + * @mask: Bitmask of IRQ_FWD_STATE_* defining the valid bits in @val
>> + *
>> + * This call sets the state of a forwarded interrupt, depending
>> + * on the mask which indicates the valid bits.
>> + *
>> + * This function should be called with preemption disabled if the
>> + * interrupt controller has per-cpu registers.
>> + */
>> +int irq_set_fwd_state(unsigned int irq, u32 val, u32 mask)
>> +{
>> + struct irq_desc *desc;
>> + struct irq_data *data;
>> + struct irq_chip *chip;
>> + unsigned long flags;
>> +
>> + desc = irq_to_desc(irq);
>> + if (!desc)
>> + return -EINVAL;
>> +
>> + data = irq_desc_get_irq_data(desc);
>> + if (!irqd_irq_forwarded(data))
>> + return -EINVAL;
>> +
>> + chip = irq_desc_get_chip(desc);
>> + if (!chip->irq_set_fwd_state)
>> + return -EINVAL;
>> +
>> + chip_bus_lock(desc);
>> + raw_spin_lock_irqsave(&desc->lock, flags);
>> + chip->irq_set_fwd_state(data, val, mask);
>> + raw_spin_unlock_irqrestore(&desc->lock, flags);
>> + chip_bus_sync_unlock(desc);
>
> Having looked at this, I don't think you need to take the desc->lock
> after all. I thought you might need it for irq_desc_get_chip, but
> that's not going to change dynamically and you'd end up with a lock
> ordering problem against chip_bus_lock anyway.
I think this all depends on the state of the device generating the
interrupt:
- If we can guarantee that the device is completely off and won't
generate an interrupt when the state is save/restored, then the locks
can go.
- If the device can generate an interrupt, then we need a more complex
locking strategy, similar to the one described in handle.c (the VM
should be considered as a form a thread handler itself).
For the particular use case that we have on ARM, the first possibility
applies (we ensure that the timer is completely off when scheduling the
VM out).
But can we make this a requirement?
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
On Fri, Jun 27 2014 at 2:17:48 pm BST, Will Deacon <[email protected]> wrote:
> Hi Marc,
>
> On Wed, Jun 25, 2014 at 10:28:45AM +0100, Marc Zyngier wrote:
>> Now that we've switched to EOImode == 1, prevent a forwarded interrupt
>> from being deactivated after its priority has been dropped.
>>
>> Also add support for the interrupt state to be saved/restored.
>>
>> Signed-off-by: Marc Zyngier <[email protected]>
>> ---
>> drivers/irqchip/irq-gic.c | 48
>> +++++++++++++++++++++++++++++++++++++++++------
>> 1 file changed, 42 insertions(+), 6 deletions(-)
>
> [...]
>
>> +static void gic_irq_set_fwd_state(struct irq_data *d, u32 val, u32 mask)
>> +{
>> + if (mask & IRQ_FWD_STATE_PENDING)
>> + gic_poke_irq(d, (val & IRQ_FWD_STATE_PENDING) ?
>> GIC_DIST_ENABLE_SET : GIC_DIST_ENABLE_CLEAR);
>> + if (mask & IRQ_FWD_STATE_ACTIVE)
>> + gic_poke_irq(d, (val & IRQ_FWD_STATE_ACTIVE) ? GIC_DIST_ACTIVE_SET
>> : GIC_DIST_ACTIVE_CLEAR);
>> + if (mask & IRQ_FWD_STATE_MASKED)
>> + gic_poke_irq(d, (val & IRQ_FWD_STATE_MASKED) ?
>> GIC_DIST_ENABLE_CLEAR : GIC_DIST_ENABLE_SET);
>
> Given that this isn't atomic and KVM only cares about ACTIVE, why not change
> mask to be a single state only? Renumbering the states so that's not bits
> would help to enforce this (i.e. make IRQ_FWD_STATE_ACTIVE 0, PENDING 1,
> ...).
>
> That would also allow you to switch on the state and return early.
Well, this is atomic from the point of view of the interrupt (we hold
the descriptor spinlock). Now, given my answer to your remark about the
locking earlier in this thread and given the KVM use case, we could
indeed relax this and loose the atomicity. But we need to agree on what
is the model we want to expose.
>> +static u32 gic_irq_get_fwd_state(struct irq_data *d, u32 mask)
>> +{
>> + u32 val = 0;
>> +
>> + if (mask & IRQ_FWD_STATE_PENDING && gic_peek_irq(d,
>> GIC_DIST_ENABLE_SET))
>> + val |= IRQ_FWD_STATE_PENDING;
>> + if (mask & IRQ_FWD_STATE_ACTIVE && gic_peek_irq(d, GIC_DIST_ACTIVE_SET))
>> + val |= IRQ_FWD_STATE_ACTIVE;
>> + if (mask & IRQ_FWD_STATE_MASKED && !gic_peek_irq(d,
>> GIC_DIST_ENABLE_SET))
>> + val |= IRQ_FWD_STATE_MASKED;
>
> *and* you could peek GIC_DIST_ENABLE_SET in one place here.
Ah yes, this is a GIC specificity (MASKED = !ENABLED). Good point.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
On 06/25/2014 11:28 AM, Marc Zyngier wrote:
> So far, GICv2 has been used in with EOImode == 0. The effect of this
> mode is to perform the priority drop and the deactivation of the
> interrupt at the same time.
>
> While this works perfectly for Linux (we only have a single priority),
> it causes issues when an interrupt is forwarded to a guest, and when
> we want the guest to perform the EOI itself.
>
> For this case, the GIC architecture provides EOImode == 1, where:
> - A write to the EOI register drops the priority of the interrupt and leaves
> it active. Other interrupts at the same priority level can now be taken,
> but the active interrupt cannot be taken again
> - A write to the DIR marks the interrupt as inactive, meaning it can
> now be taken again.
>
> We only enable this feature when booted in HYP mode. Also, as most device
> trees are broken (they report the CPU interface size to be 4kB, while
> the GICv2 CPU interface size is 8kB), output a warning if we're booted
> in HYP mode, and disable the feature.
>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> drivers/irqchip/irq-gic.c | 61 +++++++++++++++++++++++++++++++++++++----
> include/linux/irqchip/arm-gic.h | 4 +++
> 2 files changed, 59 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
> index 508b815..9295bf2 100644
> --- a/drivers/irqchip/irq-gic.c
> +++ b/drivers/irqchip/irq-gic.c
> @@ -45,6 +45,7 @@
> #include <asm/irq.h>
> #include <asm/exception.h>
> #include <asm/smp_plat.h>
> +#include <asm/virt.h>
>
> #include "irq-gic-common.h"
> #include "irqchip.h"
> @@ -94,6 +95,10 @@ struct irq_chip gic_arch_extn = {
> .irq_set_wake = NULL,
> };
>
> +static struct irq_chip *gic_chip;
> +
> +static bool supports_deactivate = false;
> +
> #ifndef MAX_GIC_NR
> #define MAX_GIC_NR 1
> #endif
> @@ -185,6 +190,12 @@ static void gic_eoi_irq(struct irq_data *d)
> writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
> }
>
> +static void gic_eoi_dir_irq(struct irq_data *d)
> +{
> + gic_eoi_irq(d);
> + writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
> +}
> +
> static int gic_set_type(struct irq_data *d, unsigned int type)
> {
> void __iomem *base = gic_dist_base(d);
> @@ -277,6 +288,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
> }
> if (irqnr < 16) {
> writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
> + writel_relaxed(irqstat, cpu_base + GIC_CPU_DEACTIVATE);
Hi Marc,
a minor comment: archi spec says writing to DIR without EOIMode set is
"unpredictable" (4.4.15). however in practice it seems to work on my
test env.
Best Regards
Eric
> #ifdef CONFIG_SMP
> handle_IPI(irqnr, regs);
> #endif
> @@ -313,7 +325,7 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
> chained_irq_exit(chip, desc);
> }
>
> -static struct irq_chip gic_chip = {
> +static struct irq_chip gicv1_chip = {
> .name = "GIC",
> .irq_mask = gic_mask_irq,
> .irq_unmask = gic_unmask_irq,
> @@ -326,6 +338,19 @@ static struct irq_chip gic_chip = {
> .irq_set_wake = gic_set_wake,
> };
>
> +static struct irq_chip gicv2_chip = {
> + .name = "GIC",
> + .irq_mask = gic_mask_irq,
> + .irq_unmask = gic_unmask_irq,
> + .irq_eoi = gic_eoi_dir_irq,
> + .irq_set_type = gic_set_type,
> + .irq_retrigger = gic_retrigger,
> +#ifdef CONFIG_SMP
> + .irq_set_affinity = gic_set_affinity,
> +#endif
> + .irq_set_wake = gic_set_wake,
> +};
> +
> void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
> {
> if (gic_nr >= MAX_GIC_NR)
> @@ -377,6 +402,16 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
> writel_relaxed(1, base + GIC_DIST_CTRL);
> }
>
> +static void gic_cpu_if_up(void)
> +{
> + void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
> + u32 val = 0;
> +
> + if (supports_deactivate)
> + val = GIC_CPU_CTRL_EOImodeNS;
> + writel_relaxed(val | 1, cpu_base + GIC_CPU_CTRL);
> +}
> +
> static void gic_cpu_init(struct gic_chip_data *gic)
> {
> void __iomem *dist_base = gic_data_dist_base(gic);
> @@ -402,7 +437,7 @@ static void gic_cpu_init(struct gic_chip_data *gic)
> gic_cpu_config(dist_base, NULL);
>
> writel_relaxed(0xf0, base + GIC_CPU_PRIMASK);
> - writel_relaxed(1, base + GIC_CPU_CTRL);
> + gic_cpu_if_up();
> }
>
> void gic_cpu_if_down(void)
> @@ -543,7 +578,7 @@ static void gic_cpu_restore(unsigned int gic_nr)
> writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4);
>
> writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK);
> - writel_relaxed(1, cpu_base + GIC_CPU_CTRL);
> + gic_cpu_if_up();
> }
>
> static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v)
> @@ -770,11 +805,11 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
> {
> if (hw < 32) {
> irq_set_percpu_devid(irq);
> - irq_set_chip_and_handler(irq, &gic_chip,
> + irq_set_chip_and_handler(irq, gic_chip,
> handle_percpu_devid_irq);
> set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
> } else {
> - irq_set_chip_and_handler(irq, &gic_chip,
> + irq_set_chip_and_handler(irq, gic_chip,
> handle_fasteoi_irq);
> set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
>
> @@ -951,6 +986,11 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
>
> gic_irqs -= hwirq_base; /* calculate # of irqs to allocate */
>
> + if (!supports_deactivate)
> + gic_chip = &gicv1_chip;
> + else
> + gic_chip = &gicv2_chip;
> +
> if (of_property_read_u32(node, "arm,routable-irqs",
> &nr_routable_irqs)) {
> irq_base = irq_alloc_descs(irq_start, 16, gic_irqs,
> @@ -980,7 +1020,7 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
> set_handle_irq(gic_handle_irq);
> }
>
> - gic_chip.flags |= gic_arch_extn.flags;
> + gic_chip->flags |= gic_arch_extn.flags;
> gic_dist_init(gic);
> gic_cpu_init(gic);
> gic_pm_init(gic);
> @@ -994,6 +1034,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
> {
> void __iomem *cpu_base;
> void __iomem *dist_base;
> + struct resource cpu_res;
> u32 percpu_offset;
> int irq;
>
> @@ -1006,6 +1047,14 @@ gic_of_init(struct device_node *node, struct device_node *parent)
> cpu_base = of_iomap(node, 1);
> WARN(!cpu_base, "unable to map gic cpu registers\n");
>
> + of_address_to_resource(node, 1, &cpu_res);
> + if (is_hyp_mode_available()) {
> + if (resource_size(&cpu_res) >= SZ_8K)
> + supports_deactivate = true;
> + else
> + pr_warn("GIC: CPU interface size is %x, DT is probably wrong\n", (int)resource_size(&cpu_res));
> + }
> +
> if (of_property_read_u32(node, "cpu-offset", &percpu_offset))
> percpu_offset = 0;
>
> diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
> index 45e2d8c..ffe3911 100644
> --- a/include/linux/irqchip/arm-gic.h
> +++ b/include/linux/irqchip/arm-gic.h
> @@ -21,6 +21,10 @@
> #define GIC_CPU_ACTIVEPRIO 0xd0
> #define GIC_CPU_IDENT 0xfc
>
> +#define GIC_CPU_DEACTIVATE 0x1000
> +
> +#define GIC_CPU_CTRL_EOImodeNS (1 << 9)
> +
> #define GICC_IAR_INT_ID_MASK 0x3ff
>
> #define GIC_DIST_CTRL 0x000
>