Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.18 as permitted sender) client-ip=23.128.96.18;
Message-ID: <43ef1a1ea488977db11d40ec9672b524ec816112.camel@redhat.com>
Subject: Re: [PATCH 02/10] KVM: x86: APICv: fix race in
 kvm_request_apicv_update on SVM
From:   Maxim Levitsky <mlevitsk@redhat.com>
To:     Paolo Bonzini <pbonzini@redhat.com>, kvm@vger.kernel.org
Cc:     Thomas Gleixner <tglx@linutronix.de>,
        Sean Christopherson <seanjc@google.com>,
        Wanpeng Li <wanpengli@tencent.com>,
        Vitaly Kuznetsov <vkuznets@redhat.com>,
        Joerg Roedel <joro@8bytes.org>, Borislav Petkov <bp@alien8.de>,
        "H. Peter Anvin" <hpa@zytor.com>, Ingo Molnar <mingo@redhat.com>,
        "open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)" 
        <linux-kernel@vger.kernel.org>,
        "maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)" <x86@kernel.org>,
        Jim Mattson <jmattson@google.com>
Date:   Thu, 24 Jun 2021 11:07:02 +0300
In-Reply-To: <6c4a69ce-595e-d5a1-7b4e-e6ce1afe1252@redhat.com>
References: <20210623113002.111448-1-mlevitsk@redhat.com>
         <20210623113002.111448-3-mlevitsk@redhat.com>
         <6c4a69ce-595e-d5a1-7b4e-e6ce1afe1252@redhat.com>
Content-Type: text/plain; charset="UTF-8"
User-Agent: Evolution 3.36.5 (3.36.5-2.fc32) 
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Precedence: bulk

On Wed, 2021-06-23 at 23:50 +0200, Paolo Bonzini wrote:
> On 23/06/21 13:29, Maxim Levitsky wrote:
> > +	kvm_block_guest_entries(kvm);
> > +
> >   	trace_kvm_apicv_update_request(activate, bit);
> >   	if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
> >   		static_call(kvm_x86_pre_update_apicv_exec_ctrl)(kvm, activate);
> > @@ -9243,6 +9245,8 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
> >   	except = kvm_get_running_vcpu();
> >   	kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE,
> >   					 except);
> > +
> > +	kvm_allow_guest_entries(kvm);
> 
> Doesn't this cause a busy loop during synchronize_rcu?

Hi,

If you mean busy loop on other vcpus, then the answer is sadly yes.
Other option is to use a mutex, which is what I did in a former
version of this patch, but at last minute I decided that this
way it was done in this patch would be simplier. 
AVIC updates don't happen often.
Also with a request, the KVM_REQ_APICV_UPDATE can be handled in parallel,
while mutex enforces unneeded mutual execution of it.


>   It should be 
> possible to request the vmexit of other CPUs from 
> avic_update_access_page, and do a lock/unlock of kvm->slots_lock to wait 
> for the memslot to be updated.

This would still keep the race. The other vCPUs must not enter the guest mode
from the moment the memslot update was started and until the KVM_REQ_APICV_UPDATE
is raised.
 
If I were to do any kind of synchronization in avic_update_access_page, then I will
have to drop the lock/request there, and from this point and till the common code
raises the KVM_REQ_APICV_UPDATE there is a possibility of a vCPU reentering the
guest mode without updating its AVIC.
 
 
Here is an older version of this patch that does use mutex instead. 
Please let me know if you prefer it.

I copy pasted it here, thus its likely not to apply as my email client
probably destroys whitespace.
  
Thanks for the review,
	Best regards,
		Maxim Levitsky


--

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fdc6b8a4348f..b7dc7fd7b63d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9183,11 +9183,8 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
 	kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
 }
 
-void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
+void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 {
-	if (!lapic_in_kernel(vcpu))
-		return;
-
 	vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
 	kvm_apic_update_apicv(vcpu);
 	static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
@@ -9201,6 +9198,16 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 	if (!vcpu->arch.apicv_active)
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
+
+void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
+{
+	if (!lapic_in_kernel(vcpu))
+		return;
+
+	mutex_lock(&vcpu->kvm->apicv_update_lock);
+	__kvm_vcpu_update_apicv(vcpu);
+	mutex_unlock(&vcpu->kvm->apicv_update_lock);
+}
 EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
 
 /*
@@ -9213,30 +9220,26 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
 void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
 {
 	struct kvm_vcpu *except;
-	unsigned long old, new, expected;
+	unsigned long old, new;
 
 	if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
 	    !static_call(kvm_x86_check_apicv_inhibit_reasons)(bit))
 		return;
 
-	old = READ_ONCE(kvm->arch.apicv_inhibit_reasons);
-	do {
-		expected = new = old;
-		if (activate)
-			__clear_bit(bit, &new);
-		else
-			__set_bit(bit, &new);
-		if (new == old)
-			break;
-		old = cmpxchg(&kvm->arch.apicv_inhibit_reasons, expected, new);
-	} while (old != expected);
+	mutex_lock(&kvm->apicv_update_lock);
+
+	old = new = kvm->arch.apicv_inhibit_reasons;
+	if (activate)
+		__clear_bit(bit, &new);
+	else
+		__set_bit(bit, &new);
+
+	WRITE_ONCE(kvm->arch.apicv_inhibit_reasons, new);
 
 	if (!!old == !!new)
-		return;
+		goto out;
 
 	trace_kvm_apicv_update_request(activate, bit);
-	if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
-		static_call(kvm_x86_pre_update_apicv_exec_ctrl)(kvm, activate);
 
 	/*
 	 * Sending request to update APICV for all other vcpus,
@@ -9244,10 +9247,24 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
 	 * waiting for another #VMEXIT to handle the request.
 	 */
 	except = kvm_get_running_vcpu();
+
+	/*
+	 * on SVM, raising the KVM_REQ_APICV_UPDATE request while holding the
+	 *  apicv_update_lock ensures that we kick all vCPUs out of the
+	 *  guest mode and let them wait until the AVIC memslot update
+	 *  has completed.
+	 */
+
 	kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE,
 					 except);
+
+	if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
+		static_call(kvm_x86_pre_update_apicv_exec_ctrl)(kvm, activate);
+
 	if (except)
-		kvm_vcpu_update_apicv(except);
+		__kvm_vcpu_update_apicv(except);
+out:
+	mutex_unlock(&kvm->apicv_update_lock);
 }
 EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
 
@@ -9454,8 +9471,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		 */
 		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
 			kvm_hv_process_stimers(vcpu);
-		if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
+		if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu)) {
+			srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 			kvm_vcpu_update_apicv(vcpu);
+			vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+		}
 		if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
 			kvm_check_async_pf_completion(vcpu);
 		if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 37cbb56ccd09..0364d35d43dc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -524,6 +524,7 @@ struct kvm {
 #endif /* KVM_HAVE_MMU_RWLOCK */
 
 	struct mutex slots_lock;
+	struct mutex apicv_update_lock;
 
 	/*
 	 * Protects the arch-specific fields of struct kvm_memory_slots in
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ed4d1581d502..ba5d5d9ebc64 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -943,6 +943,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	mutex_init(&kvm->irq_lock);
 	mutex_init(&kvm->slots_lock);
 	mutex_init(&kvm->slots_arch_lock);
+	mutex_init(&kvm->apicv_update_lock);
 	INIT_LIST_HEAD(&kvm->devices);
 
 	BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);


> 
> (As an aside, I'd like to get rid of KVM_REQ_MCLOCK_IN_PROGRESS in 5.15...).
> 
> Paolo
>