Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751501AbdFFMsZ (ORCPT ); Tue, 6 Jun 2017 08:48:25 -0400 Received: from szxga01-in.huawei.com ([45.249.212.187]:7754 "EHLO szxga01-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751305AbdFFMsY (ORCPT ); Tue, 6 Jun 2017 08:48:24 -0400 Message-ID: <5936A3E2.1090509@huawei.com> Date: Tue, 6 Jun 2017 20:45:22 +0800 From: "Longpeng (Mike)" User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:11.0) Gecko/20120327 Thunderbird/11.0.1 MIME-Version: 1.0 To: Paolo Bonzini CC: , , Huangweidong , Gonglei , wangxin , =?UTF-8?B?UmFkaW0gS3LEjW3DocWZ?= Subject: Re: [PATCH 2/4] KVM: VMX: avoid double list add with VT-d posted interrupts References: <20170606105707.23207-1-pbonzini@redhat.com> <20170606105707.23207-3-pbonzini@redhat.com> <5936A080.2070306@huawei.com> In-Reply-To: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: 8bit X-Originating-IP: [10.177.246.209] X-CFilter-Loop: Reflected X-Mirapoint-Virus-RAPID-Raw: score=unknown(0), refid=str=0001.0A020201.5936A470.006D,ss=1,re=0.000,recu=0.000,reip=0.000,cl=1,cld=1,fgs=0, ip=0.0.0.0, so=2014-11-16 11:51:01, dmn=2013-03-21 17:37:32 X-Mirapoint-Loop-Id: 0c362f076bc3aa1d381fbf24172df5a6 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5437 Lines: 181 On 2017/6/6 20:35, Paolo Bonzini wrote: > > > On 06/06/2017 14:30, Longpeng (Mike) wrote: >> >> >> On 2017/6/6 18:57, Paolo Bonzini wrote: >> >>> In some cases, for example involving hot-unplug of assigned >>> devices, pi_post_block can forget to remove the vCPU from the >>> blocked_vcpu_list. When this happens, the next call to >>> pi_pre_block corrupts the list. >>> >>> Fix this in two ways. First, check vcpu->pre_pcpu in pi_pre_block >>> and WARN instead of adding the element twice in the list. Second, >>> always do the list removal in pi_post_block if vcpu->pre_pcpu is >>> set (not -1). >>> >>> The new code keeps interrupts disabled for the whole duration of >>> pi_pre_block/pi_post_block. This is not strictly necessary, but >>> easier to follow. For the same reason, PI.ON is checked only >>> after the cmpxchg, and to handle it we just call the post-block >>> code. This removes duplication of the list removal code. >>> >>> Cc: Longpeng (Mike) >>> Cc: Huangweidong >>> Cc: Gonglei >>> Cc: wangxin >>> Cc: Radim Krčmář >>> Signed-off-by: Paolo Bonzini >>> --- >>> arch/x86/kvm/vmx.c | 62 ++++++++++++++++++++++-------------------------------- >>> 1 file changed, 25 insertions(+), 37 deletions(-) >>> >> >> >> [...] >> >> >>> @@ -11256,14 +11257,10 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) >>> } while (cmpxchg(&pi_desc->control, old.control, >>> new.control) != old.control); >>> >>> - if(vcpu->pre_pcpu != -1) { >>> - spin_lock_irqsave( >>> - &per_cpu(blocked_vcpu_on_cpu_lock, >>> - vcpu->pre_pcpu), flags); >>> + if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { >>> + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); >>> list_del(&vcpu->blocked_vcpu_list); >>> - spin_unlock_irqrestore( >>> - &per_cpu(blocked_vcpu_on_cpu_lock, >>> - vcpu->pre_pcpu), flags); >>> + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); >> >> >> Hi Paolo, >> >> spin_lock_irqsave() will disable kernel preempt, but spin_lock() won't. is there >> some potential problems ? > > Hi, > > This function (and pi_pre_block too's part where it takes the spin lock) > runs with interrupts disabled now. > Oh, yes, please forgive my foolish. We'll continue to find why the list is corrupt when repeat poweron/shutdown Thanks. > Thanks, > > Paolo > >> Regards, >> Longpeng(Mike) >> >>> vcpu->pre_pcpu = -1; >>> } >>> } >>> @@ -11283,7 +11280,6 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) >>> */ >>> static int pi_pre_block(struct kvm_vcpu *vcpu) >>> { >>> - unsigned long flags; >>> unsigned int dest; >>> struct pi_desc old, new; >>> struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); >>> @@ -11293,34 +11289,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) >>> !kvm_vcpu_apicv_active(vcpu)) >>> return 0; >>> >>> - vcpu->pre_pcpu = vcpu->cpu; >>> - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, >>> - vcpu->pre_pcpu), flags); >>> - list_add_tail(&vcpu->blocked_vcpu_list, >>> - &per_cpu(blocked_vcpu_on_cpu, >>> - vcpu->pre_pcpu)); >>> - spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, >>> - vcpu->pre_pcpu), flags); >>> + WARN_ON(irqs_disabled()); >>> + local_irq_disable(); >>> + if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { >>> + vcpu->pre_pcpu = vcpu->cpu; >>> + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); >>> + list_add_tail(&vcpu->blocked_vcpu_list, >>> + &per_cpu(blocked_vcpu_on_cpu, >>> + vcpu->pre_pcpu)); >>> + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); >>> + } >>> >>> do { >>> old.control = new.control = pi_desc->control; >>> >>> - /* >>> - * We should not block the vCPU if >>> - * an interrupt is posted for it. >>> - */ >>> - if (pi_test_on(pi_desc) == 1) { >>> - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, >>> - vcpu->pre_pcpu), flags); >>> - list_del(&vcpu->blocked_vcpu_list); >>> - spin_unlock_irqrestore( >>> - &per_cpu(blocked_vcpu_on_cpu_lock, >>> - vcpu->pre_pcpu), flags); >>> - vcpu->pre_pcpu = -1; >>> - >>> - return 1; >>> - } >>> - >>> WARN((pi_desc->sn == 1), >>> "Warning: SN field of posted-interrupts " >>> "is set before blocking\n"); >>> @@ -11345,7 +11327,12 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) >>> } while (cmpxchg(&pi_desc->control, old.control, >>> new.control) != old.control); >>> >>> - return 0; >>> + /* We should not block the vCPU if an interrupt is posted for it. */ >>> + if (pi_test_on(pi_desc) == 1) >>> + __pi_post_block(vcpu); >>> + >>> + local_irq_enable(); >>> + return (vcpu->pre_pcpu == -1); >>> } >>> >>> static int vmx_pre_block(struct kvm_vcpu *vcpu) >>> @@ -11361,12 +11348,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu) >>> >>> static void pi_post_block(struct kvm_vcpu *vcpu) >>> { >>> - if (!kvm_arch_has_assigned_device(vcpu->kvm) || >>> - !irq_remapping_cap(IRQ_POSTING_CAP) || >>> - !kvm_vcpu_apicv_active(vcpu)) >>> + if (vcpu->pre_pcpu == -1) >>> return; >>> >>> + WARN_ON(irqs_disabled()); >>> + local_irq_disable(); >>> __pi_post_block(vcpu); >>> + local_irq_enable(); >>> } >>> >>> static void vmx_post_block(struct kvm_vcpu *vcpu) >> >> > > . > -- Regards, Longpeng(Mike)