Syzroot reports crash in kvm_irqfd_assign() is caused by use-after-free.
Because kvm_irqfd_assign() and kvm_irqfd_deassign() can't run in parallel
for same eventfd. When assign path hasn't been finished after irqfd
has been added to kvm->irqfds.items list, another thead may deassign the
eventfd and free struct kvm_kernel_irqfd(). This causes assign path still
uses struct kvm_kernel_irqfd freed by deassign path. To avoid such issue,
add "initialized" flag in the struct kvm_kernel_irqfd and check the flag before
deactivating irqfd. If irqfd is still in initialization, deassign path
return fault.
Reported-by: Dmitry Vyukov <[email protected]>
Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Tianyu Lan <[email protected]>
---
include/linux/kvm_irqfd.h | 1 +
virt/kvm/eventfd.c | 11 +++++++++--
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index 76c2fbc..be6b254 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -66,6 +66,7 @@ struct kvm_kernel_irqfd {
struct work_struct shutdown;
struct irq_bypass_consumer consumer;
struct irq_bypass_producer *producer;
+ u8 initialized:1;
};
#endif /* __LINUX_KVM_IRQFD_H */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a334399..80f06e6 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -421,6 +421,7 @@ int __attribute__((weak)) kvm_arch_update_irqfd_routing(
}
#endif
+ irqfd->initialized = 1;
return 0;
fail:
@@ -525,6 +526,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
{
struct kvm_kernel_irqfd *irqfd, *tmp;
struct eventfd_ctx *eventfd;
+ int ret = 0;
eventfd = eventfd_ctx_fdget(args->fd);
if (IS_ERR(eventfd))
@@ -543,7 +545,12 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
write_seqcount_begin(&irqfd->irq_entry_sc);
irqfd->irq_entry.type = 0;
write_seqcount_end(&irqfd->irq_entry_sc);
- irqfd_deactivate(irqfd);
+
+ if (irqfd->initialized)
+ irqfd_deactivate(irqfd);
+ else
+ ret = -EFAULT;
+
}
}
@@ -557,7 +564,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
*/
flush_workqueue(irqfd_cleanup_wq);
- return 0;
+ return ret;
}
int
--
1.8.3.1
On 18.12.2017 00:40, Lan Tianyu wrote:
> Syzroot reports crash in kvm_irqfd_assign() is caused by use-after-free.
> Because kvm_irqfd_assign() and kvm_irqfd_deassign() can't run in parallel
> for same eventfd. When assign path hasn't been finished after irqfd
> has been added to kvm->irqfds.items list, another thead may deassign the
> eventfd and free struct kvm_kernel_irqfd(). This causes assign path still
> uses struct kvm_kernel_irqfd freed by deassign path. To avoid such issue,
> add "initialized" flag in the struct kvm_kernel_irqfd and check the flag before
> deactivating irqfd. If irqfd is still in initialization, deassign path
> return fault.>
> Reported-by: Dmitry Vyukov <[email protected]>
> Cc: Paolo Bonzini <[email protected]>
> Cc: Radim Krčmář <[email protected]>
> Cc: Dmitry Vyukov <[email protected]>
> Cc: Wanpeng Li <[email protected]>
> Signed-off-by: Tianyu Lan <[email protected]>
> ---
> include/linux/kvm_irqfd.h | 1 +
> virt/kvm/eventfd.c | 11 +++++++++--
> 2 files changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
> index 76c2fbc..be6b254 100644
> --- a/include/linux/kvm_irqfd.h
> +++ b/include/linux/kvm_irqfd.h
> @@ -66,6 +66,7 @@ struct kvm_kernel_irqfd {
> struct work_struct shutdown;
> struct irq_bypass_consumer consumer;
> struct irq_bypass_producer *producer;
> + u8 initialized:1;
> };
>
> #endif /* __LINUX_KVM_IRQFD_H */
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index a334399..80f06e6 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -421,6 +421,7 @@ int __attribute__((weak)) kvm_arch_update_irqfd_routing(
> }
> #endif
>
> + irqfd->initialized = 1;
The ugly thing in kvm_irqfd_assign() is that we access irqfd without
holding a lock. I think that should rather be fixed than working around
that issue. (e.g. lock() -> lookup again -> verify still in list ->
unlock())
> return 0;
>
> fail:
> @@ -525,6 +526,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> {
Which tool are you using to generate diffs? git format-patch?
Mentioning, because the indicated function here .... (kvm_irqfd_deassign)
> struct kvm_kernel_irqfd *irqfd, *tmp;
> struct eventfd_ctx *eventfd;
> + int ret = 0;
>
> eventfd = eventfd_ctx_fdget(args->fd);
> if (IS_ERR(eventfd))
> @@ -543,7 +545,12 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> write_seqcount_begin(&irqfd->irq_entry_sc);
> irqfd->irq_entry.type = 0;
> write_seqcount_end(&irqfd->irq_entry_sc);
> - irqfd_deactivate(irqfd);
> +
> + if (irqfd->initialized)
> + irqfd_deactivate(irqfd);
> + else
> + ret = -EFAULT;
> +
> }
> }
>
> @@ -557,7 +564,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> */
and here are wrong and misleading (kvm_irqfd_deassig). (and just noticed
also in the second hunk)
> flush_workqueue(irqfd_cleanup_wq);
>
> - return 0;
> + return ret;
> }
>
> int
>
--
Thanks,
David / dhildenb
On 18/12/2017 09:30, David Hildenbrand wrote:
> The ugly thing in kvm_irqfd_assign() is that we access irqfd without
> holding a lock. I think that should rather be fixed than working around
> that issue. (e.g. lock() -> lookup again -> verify still in list ->
> unlock())
I wonder if it's even simpler:
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index f2ac53ab8243..17ed298bd66f 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -387,7 +387,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
idx = srcu_read_lock(&kvm->irq_srcu);
irqfd_update(kvm, irqfd);
- srcu_read_unlock(&kvm->irq_srcu, idx);
list_add_tail(&irqfd->list, &kvm->irqfds.items);
@@ -420,10 +419,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
irqfd->consumer.token, ret);
}
#endif
+ srcu_read_unlock(&kvm->irq_srcu, idx);
return 0;
fail:
+ /* irq_srcu is *not* held here. */
if (irqfd->resampler)
irqfd_resampler_shutdown(irqfd);
Thanks,
Paolo
On 18.12.2017 09:50, Paolo Bonzini wrote:
> On 18/12/2017 09:30, David Hildenbrand wrote:
>> The ugly thing in kvm_irqfd_assign() is that we access irqfd without
>> holding a lock. I think that should rather be fixed than working around
>> that issue. (e.g. lock() -> lookup again -> verify still in list ->
>> unlock())
>
> I wonder if it's even simpler:
>
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index f2ac53ab8243..17ed298bd66f 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -387,7 +387,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>
> idx = srcu_read_lock(&kvm->irq_srcu);
> irqfd_update(kvm, irqfd);
> - srcu_read_unlock(&kvm->irq_srcu, idx);
>
> list_add_tail(&irqfd->list, &kvm->irqfds.items);
>
> @@ -420,10 +419,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
> irqfd->consumer.token, ret);
> }
> #endif
> + srcu_read_unlock(&kvm->irq_srcu, idx);
>
Was worried about the poll() call. But if that works, it would be very nice.
> return 0;
>
> fail:
> + /* irq_srcu is *not* held here. */
> if (irqfd->resampler)
> irqfd_resampler_shutdown(irqfd);
>
>
> Thanks,
>
> Paolo
>
--
Thanks,
David / dhildenb
On 18/12/2017 10:08, David Hildenbrand wrote:
> On 18.12.2017 09:50, Paolo Bonzini wrote:
>> On 18/12/2017 09:30, David Hildenbrand wrote:
>>> The ugly thing in kvm_irqfd_assign() is that we access irqfd without
>>> holding a lock. I think that should rather be fixed than working around
>>> that issue. (e.g. lock() -> lookup again -> verify still in list ->
>>> unlock())
>>
>> I wonder if it's even simpler:
>>
>> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
>> index f2ac53ab8243..17ed298bd66f 100644
>> --- a/virt/kvm/eventfd.c
>> +++ b/virt/kvm/eventfd.c
>> @@ -387,7 +387,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>>
>> idx = srcu_read_lock(&kvm->irq_srcu);
>> irqfd_update(kvm, irqfd);
>> - srcu_read_unlock(&kvm->irq_srcu, idx);
>>
>> list_add_tail(&irqfd->list, &kvm->irqfds.items);
>>
>> @@ -420,10 +419,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>> irqfd->consumer.token, ret);
>> }
>> #endif
>> + srcu_read_unlock(&kvm->irq_srcu, idx);
>>
>
> Was worried about the poll() call. But if that works, it would be very nice.
Good point.
The poll() call is effectively a callback to irqfd_ptable_queue_proc.
So, after the above change, rqfd_wakeup takes irq_srcu inside
wqh->lock, while kvm_irqfd_assign would take them in the opposite order.
However, this is a read-side critical section so this doesn't cause a
deadlock directly. The effect is only that synchronize_srcu would now
wait for wqh->lock to be released. The opposite, which *would* cause a
deadlock, would be a call to synchronize_srcu while wqh->lock is held.
However, this cannot happen because wqh->lock is a spinlock and
synchronize_srcu, which sleeps, cannot be called at all while wqh->lock
is held. So I think it's okay.
Thanks,
Paolo
>
>> return 0;
>>
>> fail:
>> + /* irq_srcu is *not* held here. */
>> if (irqfd->resampler)
>> irqfd_resampler_shutdown(irqfd);
>>
>>
>> Thanks,
>>
>> Paolo
>>
>
>
Hi David:
Thanks for your review.
On 2017年12月18日 16:30, David Hildenbrand wrote:
> On 18.12.2017 00:40, Lan Tianyu wrote:
>> Syzroot reports crash in kvm_irqfd_assign() is caused by use-after-free.
>> Because kvm_irqfd_assign() and kvm_irqfd_deassign() can't run in parallel
>> for same eventfd. When assign path hasn't been finished after irqfd
>> has been added to kvm->irqfds.items list, another thead may deassign the
>> eventfd and free struct kvm_kernel_irqfd(). This causes assign path still
>> uses struct kvm_kernel_irqfd freed by deassign path. To avoid such issue,
>> add "initialized" flag in the struct kvm_kernel_irqfd and check the flag before
>> deactivating irqfd. If irqfd is still in initialization, deassign path
>> return fault.>
>> Reported-by: Dmitry Vyukov <[email protected]>
>> Cc: Paolo Bonzini <[email protected]>
>> Cc: Radim Krčmář <[email protected]>
>> Cc: Dmitry Vyukov <[email protected]>
>> Cc: Wanpeng Li <[email protected]>
>> Signed-off-by: Tianyu Lan <[email protected]>
>> ---
>> include/linux/kvm_irqfd.h | 1 +
>> virt/kvm/eventfd.c | 11 +++++++++--
>> 2 files changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
>> index 76c2fbc..be6b254 100644
>> --- a/include/linux/kvm_irqfd.h
>> +++ b/include/linux/kvm_irqfd.h
>> @@ -66,6 +66,7 @@ struct kvm_kernel_irqfd {
>> struct work_struct shutdown;
>> struct irq_bypass_consumer consumer;
>> struct irq_bypass_producer *producer;
>> + u8 initialized:1;
>> };
>>
>> #endif /* __LINUX_KVM_IRQFD_H */
>> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
>> index a334399..80f06e6 100644
>> --- a/virt/kvm/eventfd.c
>> +++ b/virt/kvm/eventfd.c
>> @@ -421,6 +421,7 @@ int __attribute__((weak)) kvm_arch_update_irqfd_routing(
>> }
>> #endif
>>
>> + irqfd->initialized = 1;
>
> The ugly thing in kvm_irqfd_assign() is that we access irqfd without
> holding a lock. I think that should rather be fixed than working around
> that issue. (e.g. lock() -> lookup again -> verify still in list ->
> unlock())
The new lock should be always held in assign path otherwise we need to
lookup irqfds list frequently, right?
At first, I tried to use a mutex lock between assign and deassign path
but assign path already involves some locks and add new lock maybe
introduce dead lock. So I used flag check to replace with new lock.
>
>> return 0;
>>
>> fail:
>> @@ -525,6 +526,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>> {
>
> Which tool are you using to generate diffs? git format-patch?
Yes, I used git version 1.8.3.1 :)
This also confused me. I will try newer version.
>
> Mentioning, because the indicated function here .... (kvm_irqfd_deassign)
>
>> struct kvm_kernel_irqfd *irqfd, *tmp;
>> struct eventfd_ctx *eventfd;
>> + int ret = 0;
>>
>> eventfd = eventfd_ctx_fdget(args->fd);
>> if (IS_ERR(eventfd))
>> @@ -543,7 +545,12 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>> write_seqcount_begin(&irqfd->irq_entry_sc);
>> irqfd->irq_entry.type = 0;
>> write_seqcount_end(&irqfd->irq_entry_sc);
>> - irqfd_deactivate(irqfd);
>> +
>> + if (irqfd->initialized)
>> + irqfd_deactivate(irqfd);
>> + else
>> + ret = -EFAULT;
>> +
>> }
>> }
>>
>> @@ -557,7 +564,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>> */
>
> and here are wrong and misleading (kvm_irqfd_deassig). (and just noticed
> also in the second hunk)
>
>> flush_workqueue(irqfd_cleanup_wq);
>>
>> - return 0;
>> + return ret;
>> }
>>
>> int
>>
>
>
--
Best regards
Tianyu Lan
On 2017年12月18日 16:50, Paolo Bonzini wrote:
> On 18/12/2017 09:30, David Hildenbrand wrote:
>> The ugly thing in kvm_irqfd_assign() is that we access irqfd without
>> holding a lock. I think that should rather be fixed than working around
>> that issue. (e.g. lock() -> lookup again -> verify still in list ->
>> unlock())
>
> I wonder if it's even simpler:
>
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index f2ac53ab8243..17ed298bd66f 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -387,7 +387,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>
> idx = srcu_read_lock(&kvm->irq_srcu);
> irqfd_update(kvm, irqfd);
> - srcu_read_unlock(&kvm->irq_srcu, idx);
>
> list_add_tail(&irqfd->list, &kvm->irqfds.items);
>
> @@ -420,10 +419,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
> irqfd->consumer.token, ret);
> }
> #endif
> + srcu_read_unlock(&kvm->irq_srcu, idx);
>
> return 0;
>
> fail:
> + /* irq_srcu is *not* held here. */
> if (irqfd->resampler)
> irqfd_resampler_shutdown(irqfd);
>
>
Hi Paolo:
This patch still can't prevent from freeing struct irqfd in
irq_shutdown() during assign irqfd. Once irqfd is added to
kvm->irqfds.items list, deassign path can get irqfd and free it.
--
Best regards
Tianyu Lan
On 19/12/2017 07:35, Lan Tianyu wrote:
> On 2017年12月18日 16:50, Paolo Bonzini wrote:
>> On 18/12/2017 09:30, David Hildenbrand wrote:
>>> The ugly thing in kvm_irqfd_assign() is that we access irqfd without
>>> holding a lock. I think that should rather be fixed than working around
>>> that issue. (e.g. lock() -> lookup again -> verify still in list ->
>>> unlock())
>>
>> I wonder if it's even simpler:
>>
>> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
>> index f2ac53ab8243..17ed298bd66f 100644
>> --- a/virt/kvm/eventfd.c
>> +++ b/virt/kvm/eventfd.c
>> @@ -387,7 +387,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>>
>> idx = srcu_read_lock(&kvm->irq_srcu);
>> irqfd_update(kvm, irqfd);
>> - srcu_read_unlock(&kvm->irq_srcu, idx);
>>
>> list_add_tail(&irqfd->list, &kvm->irqfds.items);
>>
>> @@ -420,10 +419,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>> irqfd->consumer.token, ret);
>> }
>> #endif
>> + srcu_read_unlock(&kvm->irq_srcu, idx);
>>
>> return 0;
>>
>> fail:
>> + /* irq_srcu is *not* held here. */
>> if (irqfd->resampler)
>> irqfd_resampler_shutdown(irqfd);
>>
>>
>
> Hi Paolo:
> This patch still can't prevent from freeing struct irqfd in
> irq_shutdown() during assign irqfd. Once irqfd is added to
> kvm->irqfds.items list, deassign path can get irqfd and free it.
You're right, that also has to be protected by SRCU. So a new bool is
needed (probably "active" more than "initialized") in order to replace
list_del_init with list_del_rcu.
Paolo
On 2017年12月19日 18:41, Paolo Bonzini wrote:
> On 19/12/2017 07:35, Lan Tianyu wrote:
>> On 2017年12月18日 16:50, Paolo Bonzini wrote:
>>> On 18/12/2017 09:30, David Hildenbrand wrote:
>>>> The ugly thing in kvm_irqfd_assign() is that we access irqfd without
>>>> holding a lock. I think that should rather be fixed than working around
>>>> that issue. (e.g. lock() -> lookup again -> verify still in list ->
>>>> unlock())
>>>
>>> I wonder if it's even simpler:
>>>
>>> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
>>> index f2ac53ab8243..17ed298bd66f 100644
>>> --- a/virt/kvm/eventfd.c
>>> +++ b/virt/kvm/eventfd.c
>>> @@ -387,7 +387,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>>>
>>> idx = srcu_read_lock(&kvm->irq_srcu);
>>> irqfd_update(kvm, irqfd);
>>> - srcu_read_unlock(&kvm->irq_srcu, idx);
>>>
>>> list_add_tail(&irqfd->list, &kvm->irqfds.items);
>>>
>>> @@ -420,10 +419,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>>> irqfd->consumer.token, ret);
>>> }
>>> #endif
>>> + srcu_read_unlock(&kvm->irq_srcu, idx);
>>>
>>> return 0;
>>>
>>> fail:
>>> + /* irq_srcu is *not* held here. */
>>> if (irqfd->resampler)
>>> irqfd_resampler_shutdown(irqfd);
>>>
>>>
>>
>> Hi Paolo:
>> This patch still can't prevent from freeing struct irqfd in
>> irq_shutdown() during assign irqfd. Once irqfd is added to
>> kvm->irqfds.items list, deassign path can get irqfd and free it.
>
> You're right, that also has to be protected by SRCU. So a new bool is
> needed (probably "active" more than "initialized") in order to replace
> list_del_init with list_del_rcu.
>
Adding new flag is sufficient to fix the crash issue. All list
operations for kvm->irqfds.items list are already under protection
kvm->irqfds.lock. Do you mean we should use list_add/del_rcu for
kvm->irqfds.items?
--
Best regards
Tianyu Lan
On Mon, Dec 18, 2017 at 09:50:04AM +0100, Paolo Bonzini wrote:
> On 18/12/2017 09:30, David Hildenbrand wrote:
> > The ugly thing in kvm_irqfd_assign() is that we access irqfd without
> > holding a lock. I think that should rather be fixed than working around
> > that issue. (e.g. lock() -> lookup again -> verify still in list ->
> > unlock())
>
> I wonder if it's even simpler:
>
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index f2ac53ab8243..17ed298bd66f 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -387,7 +387,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>
> idx = srcu_read_lock(&kvm->irq_srcu);
> irqfd_update(kvm, irqfd);
> - srcu_read_unlock(&kvm->irq_srcu, idx);
>
> list_add_tail(&irqfd->list, &kvm->irqfds.items);
>
> @@ -420,10 +419,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
> irqfd->consumer.token, ret);
> }
> #endif
> + srcu_read_unlock(&kvm->irq_srcu, idx);
>
> return 0;
>
> fail:
> + /* irq_srcu is *not* held here. */
> if (irqfd->resampler)
> irqfd_resampler_shutdown(irqfd);
Sorry if I miss anything important, but... should we extend the unlock
of kvm->irqfds.lock instead of kvm->irq_srcu here? AFAIU kvm->irq_srcu
is protecting accesses to kvm->irq_routing, while kvm->irqfds.lock is
the one that really protects kvm->irqfds? Thanks,
--
Peter Xu