Add a separate hook for checking if interrupt injection is blocked and
use the hook to handle the case where an interrupt arrives between
check_nested_events() and the injection logic. Drop the retry of
check_nested_events() that hack-a-fixed the same condition.
Blocking injection is also a bit of a hack, e.g. KVM should do exiting
and non-exiting interrupt processing in a single pass, but it's a more
precise hack. The old comment is also misleading, e.g. KVM_REQ_EVENT is
purely an optimization, setting it on every run loop (which KVM doesn't
do) should not affect functionality, only performance.
Signed-off-by: Sean Christopherson <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/svm/svm.c | 1 +
arch/x86/kvm/vmx/vmx.c | 13 +++++++++++++
arch/x86/kvm/x86.c | 22 ++++------------------
4 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 787636acd648..16fdeddb4a65 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1140,6 +1140,7 @@ struct kvm_x86_ops {
void (*queue_exception)(struct kvm_vcpu *vcpu);
void (*cancel_injection)(struct kvm_vcpu *vcpu);
bool (*interrupt_allowed)(struct kvm_vcpu *vcpu);
+ bool (*interrupt_injection_allowed)(struct kvm_vcpu *vcpu);
bool (*nmi_allowed)(struct kvm_vcpu *vcpu);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f21f734861dd..6d3ccbfc9e6a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3993,6 +3993,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.queue_exception = svm_queue_exception,
.cancel_injection = svm_cancel_injection,
.interrupt_allowed = svm_interrupt_allowed,
+ .interrupt_injection_allowed = svm_interrupt_allowed,
.nmi_allowed = svm_nmi_allowed,
.get_nmi_mask = svm_get_nmi_mask,
.set_nmi_mask = svm_set_nmi_mask,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 2f8cacb3aa9b..68b3748b5383 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4550,6 +4550,18 @@ static bool vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
return !vmx_interrupt_blocked(vcpu);
}
+static bool vmx_interrupt_injection_allowed(struct kvm_vcpu *vcpu)
+{
+ /*
+ * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
+ * e.g. if the IRQ arrived asynchronously after checking nested events.
+ */
+ if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
+ return false;
+
+ return vmx_interrupt_allowed(vcpu);
+}
+
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
int ret;
@@ -7823,6 +7835,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.queue_exception = vmx_queue_exception,
.cancel_injection = vmx_cancel_injection,
.interrupt_allowed = vmx_interrupt_allowed,
+ .interrupt_injection_allowed = vmx_interrupt_injection_allowed,
.nmi_allowed = vmx_nmi_allowed,
.get_nmi_mask = vmx_get_nmi_mask,
.set_nmi_mask = vmx_set_nmi_mask,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7c49a7dc601f..d9d6028a77e0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7755,24 +7755,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
--vcpu->arch.nmi_pending;
vcpu->arch.nmi_injected = true;
kvm_x86_ops.set_nmi(vcpu);
- } else if (kvm_cpu_has_injectable_intr(vcpu)) {
- /*
- * Because interrupts can be injected asynchronously, we are
- * calling check_nested_events again here to avoid a race condition.
- * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
- * proposal and current concerns. Perhaps we should be setting
- * KVM_REQ_EVENT only on certain events and not unconditionally?
- */
- if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
- r = kvm_x86_ops.check_nested_events(vcpu);
- if (r != 0)
- return r;
- }
- if (kvm_x86_ops.interrupt_allowed(vcpu)) {
- kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
- false);
- kvm_x86_ops.set_irq(vcpu);
- }
+ } else if (kvm_cpu_has_injectable_intr(vcpu) &&
+ kvm_x86_ops.interrupt_injection_allowed(vcpu)) {
+ kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
+ kvm_x86_ops.set_irq(vcpu);
}
return 0;
--
2.26.0
On 23/04/20 04:25, Sean Christopherson wrote:
> Add a separate hook for checking if interrupt injection is blocked and
> use the hook to handle the case where an interrupt arrives between
> check_nested_events() and the injection logic. Drop the retry of
> check_nested_events() that hack-a-fixed the same condition.
>
> Blocking injection is also a bit of a hack, e.g. KVM should do exiting
> and non-exiting interrupt processing in a single pass, but it's a more
> precise hack. The old comment is also misleading, e.g. KVM_REQ_EVENT is
> purely an optimization, setting it on every run loop (which KVM doesn't
> do) should not affect functionality, only performance.
>
> Signed-off-by: Sean Christopherson <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/svm/svm.c | 1 +
> arch/x86/kvm/vmx/vmx.c | 13 +++++++++++++
> arch/x86/kvm/x86.c | 22 ++++------------------
> 4 files changed, 19 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 787636acd648..16fdeddb4a65 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1140,6 +1140,7 @@ struct kvm_x86_ops {
> void (*queue_exception)(struct kvm_vcpu *vcpu);
> void (*cancel_injection)(struct kvm_vcpu *vcpu);
> bool (*interrupt_allowed)(struct kvm_vcpu *vcpu);
> + bool (*interrupt_injection_allowed)(struct kvm_vcpu *vcpu);
> bool (*nmi_allowed)(struct kvm_vcpu *vcpu);
> bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
> void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index f21f734861dd..6d3ccbfc9e6a 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3993,6 +3993,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> .queue_exception = svm_queue_exception,
> .cancel_injection = svm_cancel_injection,
> .interrupt_allowed = svm_interrupt_allowed,
> + .interrupt_injection_allowed = svm_interrupt_allowed,
> .nmi_allowed = svm_nmi_allowed,
> .get_nmi_mask = svm_get_nmi_mask,
> .set_nmi_mask = svm_set_nmi_mask,
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 2f8cacb3aa9b..68b3748b5383 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -4550,6 +4550,18 @@ static bool vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
> return !vmx_interrupt_blocked(vcpu);
> }
>
> +static bool vmx_interrupt_injection_allowed(struct kvm_vcpu *vcpu)
> +{
> + /*
> + * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
> + * e.g. if the IRQ arrived asynchronously after checking nested events.
> + */
> + if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
> + return false;
> +
> + return vmx_interrupt_allowed(vcpu);
> +}
> +
> static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
> {
> int ret;
> @@ -7823,6 +7835,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
> .queue_exception = vmx_queue_exception,
> .cancel_injection = vmx_cancel_injection,
> .interrupt_allowed = vmx_interrupt_allowed,
> + .interrupt_injection_allowed = vmx_interrupt_injection_allowed,
> .nmi_allowed = vmx_nmi_allowed,
> .get_nmi_mask = vmx_get_nmi_mask,
> .set_nmi_mask = vmx_set_nmi_mask,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 7c49a7dc601f..d9d6028a77e0 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7755,24 +7755,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
> --vcpu->arch.nmi_pending;
> vcpu->arch.nmi_injected = true;
> kvm_x86_ops.set_nmi(vcpu);
> - } else if (kvm_cpu_has_injectable_intr(vcpu)) {
> - /*
> - * Because interrupts can be injected asynchronously, we are
> - * calling check_nested_events again here to avoid a race condition.
> - * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
> - * proposal and current concerns. Perhaps we should be setting
> - * KVM_REQ_EVENT only on certain events and not unconditionally?
> - */
> - if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
> - r = kvm_x86_ops.check_nested_events(vcpu);
> - if (r != 0)
> - return r;
> - }
> - if (kvm_x86_ops.interrupt_allowed(vcpu)) {
> - kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> - false);
> - kvm_x86_ops.set_irq(vcpu);
> - }
> + } else if (kvm_cpu_has_injectable_intr(vcpu) &&
> + kvm_x86_ops.interrupt_injection_allowed(vcpu)) {
> + kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
> + kvm_x86_ops.set_irq(vcpu);
Hmm I'm interested in how this can help with AMD introducing another
instance of the late random check_nested_events. I'll play with it.
Paolo
On Wed, Apr 22, 2020 at 7:26 PM Sean Christopherson
<[email protected]> wrote:
>
> Add a separate hook for checking if interrupt injection is blocked and
> use the hook to handle the case where an interrupt arrives between
> check_nested_events() and the injection logic. Drop the retry of
> check_nested_events() that hack-a-fixed the same condition.
>
> Blocking injection is also a bit of a hack, e.g. KVM should do exiting
> and non-exiting interrupt processing in a single pass, but it's a more
> precise hack. The old comment is also misleading, e.g. KVM_REQ_EVENT is
> purely an optimization, setting it on every run loop (which KVM doesn't
> do) should not affect functionality, only performance.
>
> Signed-off-by: Sean Christopherson <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/svm/svm.c | 1 +
> arch/x86/kvm/vmx/vmx.c | 13 +++++++++++++
> arch/x86/kvm/x86.c | 22 ++++------------------
> 4 files changed, 19 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 787636acd648..16fdeddb4a65 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1140,6 +1140,7 @@ struct kvm_x86_ops {
> void (*queue_exception)(struct kvm_vcpu *vcpu);
> void (*cancel_injection)(struct kvm_vcpu *vcpu);
> bool (*interrupt_allowed)(struct kvm_vcpu *vcpu);
> + bool (*interrupt_injection_allowed)(struct kvm_vcpu *vcpu);
> bool (*nmi_allowed)(struct kvm_vcpu *vcpu);
> bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
> void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index f21f734861dd..6d3ccbfc9e6a 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3993,6 +3993,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> .queue_exception = svm_queue_exception,
> .cancel_injection = svm_cancel_injection,
> .interrupt_allowed = svm_interrupt_allowed,
> + .interrupt_injection_allowed = svm_interrupt_allowed,
> .nmi_allowed = svm_nmi_allowed,
> .get_nmi_mask = svm_get_nmi_mask,
> .set_nmi_mask = svm_set_nmi_mask,
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 2f8cacb3aa9b..68b3748b5383 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -4550,6 +4550,18 @@ static bool vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
> return !vmx_interrupt_blocked(vcpu);
> }
>
> +static bool vmx_interrupt_injection_allowed(struct kvm_vcpu *vcpu)
> +{
> + /*
> + * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
> + * e.g. if the IRQ arrived asynchronously after checking nested events.
> + */
> + if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
> + return false;
> +
> + return vmx_interrupt_allowed(vcpu);
> +}
> +
> static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
> {
> int ret;
> @@ -7823,6 +7835,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
> .queue_exception = vmx_queue_exception,
> .cancel_injection = vmx_cancel_injection,
> .interrupt_allowed = vmx_interrupt_allowed,
> + .interrupt_injection_allowed = vmx_interrupt_injection_allowed,
> .nmi_allowed = vmx_nmi_allowed,
> .get_nmi_mask = vmx_get_nmi_mask,
> .set_nmi_mask = vmx_set_nmi_mask,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 7c49a7dc601f..d9d6028a77e0 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7755,24 +7755,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
> --vcpu->arch.nmi_pending;
> vcpu->arch.nmi_injected = true;
> kvm_x86_ops.set_nmi(vcpu);
> - } else if (kvm_cpu_has_injectable_intr(vcpu)) {
> - /*
> - * Because interrupts can be injected asynchronously, we are
> - * calling check_nested_events again here to avoid a race condition.
> - * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
> - * proposal and current concerns. Perhaps we should be setting
> - * KVM_REQ_EVENT only on certain events and not unconditionally?
> - */
> - if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
> - r = kvm_x86_ops.check_nested_events(vcpu);
> - if (r != 0)
> - return r;
> - }
> - if (kvm_x86_ops.interrupt_allowed(vcpu)) {
> - kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> - false);
> - kvm_x86_ops.set_irq(vcpu);
> - }
> + } else if (kvm_cpu_has_injectable_intr(vcpu) &&
> + kvm_x86_ops.interrupt_injection_allowed(vcpu)) {
> + kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
> + kvm_x86_ops.set_irq(vcpu);
> }
So, that's what this mess was all about! Well, this certainly looks better.
Reviewed-by: Jim Mattson <[email protected]>
On Tue, Apr 28, 2020 at 03:12:51PM -0700, Jim Mattson wrote:
> On Wed, Apr 22, 2020 at 7:26 PM Sean Christopherson
> <[email protected]> wrote:
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 7c49a7dc601f..d9d6028a77e0 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -7755,24 +7755,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
> > --vcpu->arch.nmi_pending;
> > vcpu->arch.nmi_injected = true;
> > kvm_x86_ops.set_nmi(vcpu);
> > - } else if (kvm_cpu_has_injectable_intr(vcpu)) {
> > - /*
> > - * Because interrupts can be injected asynchronously, we are
> > - * calling check_nested_events again here to avoid a race condition.
> > - * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
> > - * proposal and current concerns. Perhaps we should be setting
> > - * KVM_REQ_EVENT only on certain events and not unconditionally?
> > - */
> > - if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
> > - r = kvm_x86_ops.check_nested_events(vcpu);
> > - if (r != 0)
> > - return r;
> > - }
> > - if (kvm_x86_ops.interrupt_allowed(vcpu)) {
> > - kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> > - false);
> > - kvm_x86_ops.set_irq(vcpu);
> > - }
> > + } else if (kvm_cpu_has_injectable_intr(vcpu) &&
> > + kvm_x86_ops.interrupt_injection_allowed(vcpu)) {
> > + kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
> > + kvm_x86_ops.set_irq(vcpu);
> > }
> So, that's what this mess was all about! Well, this certainly looks better.
Right? I can't count the number of times I've looked at this code and
wondered what the hell it was doing.
Side topic, I just realized you're reviewing my original series. Paolo
commandeered it to extend it to SVM. https://patchwork.kernel.org/cover/11508679/
On 29/04/20 00:20, Sean Christopherson wrote:
>> So, that's what this mess was all about! Well, this certainly looks better.
> Right? I can't count the number of times I've looked at this code and
> wondered what the hell it was doing.
>
> Side topic, I just realized you're reviewing my original series. Paolo
> commandeered it to extend it to SVM. https://patchwork.kernel.org/cover/11508679/
If you can just send a patch to squash into 9/13 I can take care of it.
Paolo
On Wed, Apr 29, 2020 at 10:36:17AM +0200, Paolo Bonzini wrote:
> On 29/04/20 00:20, Sean Christopherson wrote:
> >> So, that's what this mess was all about! Well, this certainly looks better.
> > Right? I can't count the number of times I've looked at this code and
> > wondered what the hell it was doing.
> >
> > Side topic, I just realized you're reviewing my original series. Paolo
> > commandeered it to extend it to SVM. https://patchwork.kernel.org/cover/11508679/
>
> If you can just send a patch to squash into 9/13 I can take care of it.
Ugh, correctly prioritizing SMI is a mess. It has migration implications,
a proper fix requires non-trivial changes to inject_pending_event(), there
are pre-existing (minor) bugs related to MTF handling, and technically INIT
should have lower priority than non-trap exceptions (because the exception
happens before the event window is opened).
Can you just drop 9/13, "Prioritize SMI over nested IRQ/NMI" from kvm/queue?
It's probably best to deal with this in a new series rather than trying to
squeeze it in.
On 29/04/20 18:45, Sean Christopherson wrote:
>
> Can you just drop 9/13, "Prioritize SMI over nested IRQ/NMI" from kvm/queue?
> It's probably best to deal with this in a new series rather than trying to
> squeeze it in.
With AMD we just have IRQ/NMI/SMI, and it's important to handle SMI in
check_nested_events because you can turn SMIs into vmexit without stuff
such as dual-monitor treatment. On the other hand there is no MTF and
we're not handling exceptions yet. So, since SMIs should be pretty rare
anyway, I'd rather just add a comment detailing the correct order and
why we're not following it. The minimal fix would be to move SMI above
the preemption timer, right?
Paolo
On Wed, Apr 29, 2020 at 06:58:45PM +0200, Paolo Bonzini wrote:
> On 29/04/20 18:45, Sean Christopherson wrote:
> >
> > Can you just drop 9/13, "Prioritize SMI over nested IRQ/NMI" from kvm/queue?
> > It's probably best to deal with this in a new series rather than trying to
> > squeeze it in.
>
> With AMD we just have IRQ/NMI/SMI, and it's important to handle SMI in
Ah, forgot about that angle.
> check_nested_events because you can turn SMIs into vmexit without stuff
> such as dual-monitor treatment. On the other hand there is no MTF and
> we're not handling exceptions yet. So, since SMIs should be pretty rare
> anyway, I'd rather just add a comment detailing the correct order and
> why we're not following it. The minimal fix would be to move SMI above
> the preemption timer, right?
Yep, that works for now.
I'd still like to do a full fix for SMI and INIT. Correctness aside, I
think/hope the changes I have in mind will make it easier to connect the
dots betwen KVM's event priority and the SDM's event priority. But that
can definitely wait for 5.9.