In current async pagefault logic, when a page is ready, KVM relies on
kvm_arch_can_dequeue_async_page_present() to determine whether to deliver
a READY event to the Guest. This function test token value of struct
kvm_vcpu_pv_apf_data, which must be reset to zero by Guest kernel when a
READY event is finished by Guest. If value is zero meaning that a READY
event is done, so the KVM can deliver another.
But the kvm_arch_setup_async_pf() may produce a valid token with zero
value, which is confused with previous mention and may lead the loss of
this READY event.
This bug may cause task blocked forever in Guest:
INFO: task stress:7532 blocked for more than 1254 seconds.
Not tainted 5.10.0 #16
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:stress state:D stack: 0 pid: 7532 ppid: 1409
flags:0x00000080
Call Trace:
__schedule+0x1e7/0x650
schedule+0x46/0xb0
kvm_async_pf_task_wait_schedule+0xad/0xe0
? exit_to_user_mode_prepare+0x60/0x70
__kvm_handle_async_pf+0x4f/0xb0
? asm_exc_page_fault+0x8/0x30
exc_page_fault+0x6f/0x110
? asm_exc_page_fault+0x8/0x30
asm_exc_page_fault+0x1e/0x30
RIP: 0033:0x402d00
RSP: 002b:00007ffd31912500 EFLAGS: 00010206
RAX: 0000000000071000 RBX: ffffffffffffffff RCX: 00000000021a32b0
RDX: 000000000007d011 RSI: 000000000007d000 RDI: 00000000021262b0
RBP: 00000000021262b0 R08: 0000000000000003 R09: 0000000000000086
R10: 00000000000000eb R11: 00007fefbdf2baa0 R12: 0000000000000000
R13: 0000000000000002 R14: 000000000007d000 R15: 0000000000001000
Signed-off-by: Liang Zhang <[email protected]>
---
arch/x86/kvm/mmu/mmu.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 593093b52395..8e24f73bf60b 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3889,12 +3889,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
walk_shadow_page_lockless_end(vcpu);
}
+static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
+{
+ /* make sure the token value is not 0 */
+ u32 id = vcpu->arch.apf.id;
+
+ if (id << 12 == 0)
+ vcpu->arch.apf.id = 1;
+
+ return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
+}
+
static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
gfn_t gfn)
{
struct kvm_arch_async_pf arch;
- arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
+ arch.token = alloc_apf_token(vcpu);
arch.gfn = gfn;
arch.direct_map = vcpu->arch.mmu->direct_map;
arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
--
2.30.0
On 2/22/22 04:12, Liang Zhang wrote:
> In current async pagefault logic, when a page is ready, KVM relies on
> kvm_arch_can_dequeue_async_page_present() to determine whether to deliver
> a READY event to the Guest. This function test token value of struct
> kvm_vcpu_pv_apf_data, which must be reset to zero by Guest kernel when a
> READY event is finished by Guest. If value is zero meaning that a READY
> event is done, so the KVM can deliver another.
> But the kvm_arch_setup_async_pf() may produce a valid token with zero
> value, which is confused with previous mention and may lead the loss of
> this READY event.
>
> This bug may cause task blocked forever in Guest:
> INFO: task stress:7532 blocked for more than 1254 seconds.
> Not tainted 5.10.0 #16
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> task:stress state:D stack: 0 pid: 7532 ppid: 1409
> flags:0x00000080
> Call Trace:
> __schedule+0x1e7/0x650
> schedule+0x46/0xb0
> kvm_async_pf_task_wait_schedule+0xad/0xe0
> ? exit_to_user_mode_prepare+0x60/0x70
> __kvm_handle_async_pf+0x4f/0xb0
> ? asm_exc_page_fault+0x8/0x30
> exc_page_fault+0x6f/0x110
> ? asm_exc_page_fault+0x8/0x30
> asm_exc_page_fault+0x1e/0x30
> RIP: 0033:0x402d00
> RSP: 002b:00007ffd31912500 EFLAGS: 00010206
> RAX: 0000000000071000 RBX: ffffffffffffffff RCX: 00000000021a32b0
> RDX: 000000000007d011 RSI: 000000000007d000 RDI: 00000000021262b0
> RBP: 00000000021262b0 R08: 0000000000000003 R09: 0000000000000086
> R10: 00000000000000eb R11: 00007fefbdf2baa0 R12: 0000000000000000
> R13: 0000000000000002 R14: 000000000007d000 R15: 0000000000001000
>
> Signed-off-by: Liang Zhang <[email protected]>
> ---
> arch/x86/kvm/mmu/mmu.c | 13 ++++++++++++-
> 1 file changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 593093b52395..8e24f73bf60b 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3889,12 +3889,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
> walk_shadow_page_lockless_end(vcpu);
> }
>
> +static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
> +{
> + /* make sure the token value is not 0 */
> + u32 id = vcpu->arch.apf.id;
> +
> + if (id << 12 == 0)
> + vcpu->arch.apf.id = 1;
> +
> + return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
> +}
> +
> static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> gfn_t gfn)
> {
> struct kvm_arch_async_pf arch;
>
> - arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
> + arch.token = alloc_apf_token(vcpu);
> arch.gfn = gfn;
> arch.direct_map = vcpu->arch.mmu->direct_map;
> arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
Queued, thanks.
Paolo
Thank you and sorry for the noise.
On 2022/3/10 11:42, zhangliang (AG) wrote:
> No. Because '(++vcpu->arch.apf.id << 12)' may also produce zero value.
>
> On 2022/3/10 11:34, Xinlong Lin wrote:
>>
>>
>> On 2022/2/22 11:12, Liang Zhang wrote:
>>> In current async pagefault logic, when a page is ready, KVM relies on
>>> kvm_arch_can_dequeue_async_page_present() to determine whether to deliver
>>> a READY event to the Guest. This function test token value of struct
>>> kvm_vcpu_pv_apf_data, which must be reset to zero by Guest kernel when a
>>> READY event is finished by Guest. If value is zero meaning that a READY
>>> event is done, so the KVM can deliver another.
>>> But the kvm_arch_setup_async_pf() may produce a valid token with zero
>>> value, which is confused with previous mention and may lead the loss of
>>> this READY event.
>>>
>>> This bug may cause task blocked forever in Guest:
>>> INFO: task stress:7532 blocked for more than 1254 seconds.
>>> Not tainted 5.10.0 #16
>>> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>>> task:stress state:D stack: 0 pid: 7532 ppid: 1409
>>> flags:0x00000080
>>> Call Trace:
>>> __schedule+0x1e7/0x650
>>> schedule+0x46/0xb0
>>> kvm_async_pf_task_wait_schedule+0xad/0xe0
>>> ? exit_to_user_mode_prepare+0x60/0x70
>>> __kvm_handle_async_pf+0x4f/0xb0
>>> ? asm_exc_page_fault+0x8/0x30
>>> exc_page_fault+0x6f/0x110
>>> ? asm_exc_page_fault+0x8/0x30
>>> asm_exc_page_fault+0x1e/0x30
>>> RIP: 0033:0x402d00
>>> RSP: 002b:00007ffd31912500 EFLAGS: 00010206
>>> RAX: 0000000000071000 RBX: ffffffffffffffff RCX: 00000000021a32b0
>>> RDX: 000000000007d011 RSI: 000000000007d000 RDI: 00000000021262b0
>>> RBP: 00000000021262b0 R08: 0000000000000003 R09: 0000000000000086
>>> R10: 00000000000000eb R11: 00007fefbdf2baa0 R12: 0000000000000000
>>> R13: 0000000000000002 R14: 000000000007d000 R15: 0000000000001000
>>>
>>> Signed-off-by: Liang Zhang <[email protected]>
>>> ---
>>> arch/x86/kvm/mmu/mmu.c | 13 ++++++++++++-
>>> 1 file changed, 12 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
>>> index 593093b52395..8e24f73bf60b 100644
>>> --- a/arch/x86/kvm/mmu/mmu.c
>>> +++ b/arch/x86/kvm/mmu/mmu.c
>>> @@ -3889,12 +3889,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
>>> walk_shadow_page_lockless_end(vcpu);
>>> }
>>> +static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
>>> +{
>>> + /* make sure the token value is not 0 */
>>> + u32 id = vcpu->arch.apf.id;
>>> +
>>> + if (id << 12 == 0)
>>> + vcpu->arch.apf.id = 1;
>>> +
>>> + return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
>>> +}
>>> +
>>> static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>>> gfn_t gfn)
>>> {
>>> struct kvm_arch_async_pf arch;
>>> - arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
>> This patch is completely OK. But I have a question, can we simplify it to
>> arch.token = (++vcpu->arch.apf.id << 12) | vcpu->vcpu_id;
>>> + arch.token = alloc_apf_token(vcpu);
>>> arch.gfn = gfn;
>>> arch.direct_map = vcpu->arch.mmu->direct_map;
>>> arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
>>
>> .
>
No. Because '(++vcpu->arch.apf.id << 12)' may also produce zero value.
On 2022/3/10 11:34, Xinlong Lin wrote:
>
>
> On 2022/2/22 11:12, Liang Zhang wrote:
>> In current async pagefault logic, when a page is ready, KVM relies on
>> kvm_arch_can_dequeue_async_page_present() to determine whether to deliver
>> a READY event to the Guest. This function test token value of struct
>> kvm_vcpu_pv_apf_data, which must be reset to zero by Guest kernel when a
>> READY event is finished by Guest. If value is zero meaning that a READY
>> event is done, so the KVM can deliver another.
>> But the kvm_arch_setup_async_pf() may produce a valid token with zero
>> value, which is confused with previous mention and may lead the loss of
>> this READY event.
>>
>> This bug may cause task blocked forever in Guest:
>> INFO: task stress:7532 blocked for more than 1254 seconds.
>> Not tainted 5.10.0 #16
>> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>> task:stress state:D stack: 0 pid: 7532 ppid: 1409
>> flags:0x00000080
>> Call Trace:
>> __schedule+0x1e7/0x650
>> schedule+0x46/0xb0
>> kvm_async_pf_task_wait_schedule+0xad/0xe0
>> ? exit_to_user_mode_prepare+0x60/0x70
>> __kvm_handle_async_pf+0x4f/0xb0
>> ? asm_exc_page_fault+0x8/0x30
>> exc_page_fault+0x6f/0x110
>> ? asm_exc_page_fault+0x8/0x30
>> asm_exc_page_fault+0x1e/0x30
>> RIP: 0033:0x402d00
>> RSP: 002b:00007ffd31912500 EFLAGS: 00010206
>> RAX: 0000000000071000 RBX: ffffffffffffffff RCX: 00000000021a32b0
>> RDX: 000000000007d011 RSI: 000000000007d000 RDI: 00000000021262b0
>> RBP: 00000000021262b0 R08: 0000000000000003 R09: 0000000000000086
>> R10: 00000000000000eb R11: 00007fefbdf2baa0 R12: 0000000000000000
>> R13: 0000000000000002 R14: 000000000007d000 R15: 0000000000001000
>>
>> Signed-off-by: Liang Zhang <[email protected]>
>> ---
>> arch/x86/kvm/mmu/mmu.c | 13 ++++++++++++-
>> 1 file changed, 12 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
>> index 593093b52395..8e24f73bf60b 100644
>> --- a/arch/x86/kvm/mmu/mmu.c
>> +++ b/arch/x86/kvm/mmu/mmu.c
>> @@ -3889,12 +3889,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
>> walk_shadow_page_lockless_end(vcpu);
>> }
>> +static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
>> +{
>> + /* make sure the token value is not 0 */
>> + u32 id = vcpu->arch.apf.id;
>> +
>> + if (id << 12 == 0)
>> + vcpu->arch.apf.id = 1;
>> +
>> + return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
>> +}
>> +
>> static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>> gfn_t gfn)
>> {
>> struct kvm_arch_async_pf arch;
>> - arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
> This patch is completely OK. But I have a question, can we simplify it to
> arch.token = (++vcpu->arch.apf.id << 12) | vcpu->vcpu_id;
>> + arch.token = alloc_apf_token(vcpu);
>> arch.gfn = gfn;
>> arch.direct_map = vcpu->arch.mmu->direct_map;
>> arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
>
> .
--
Best Regards,
Liang Zhang
On 2022/2/22 11:12, Liang Zhang wrote:
> In current async pagefault logic, when a page is ready, KVM relies on
> kvm_arch_can_dequeue_async_page_present() to determine whether to deliver
> a READY event to the Guest. This function test token value of struct
> kvm_vcpu_pv_apf_data, which must be reset to zero by Guest kernel when a
> READY event is finished by Guest. If value is zero meaning that a READY
> event is done, so the KVM can deliver another.
> But the kvm_arch_setup_async_pf() may produce a valid token with zero
> value, which is confused with previous mention and may lead the loss of
> this READY event.
>
> This bug may cause task blocked forever in Guest:
> INFO: task stress:7532 blocked for more than 1254 seconds.
> Not tainted 5.10.0 #16
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> task:stress state:D stack: 0 pid: 7532 ppid: 1409
> flags:0x00000080
> Call Trace:
> __schedule+0x1e7/0x650
> schedule+0x46/0xb0
> kvm_async_pf_task_wait_schedule+0xad/0xe0
> ? exit_to_user_mode_prepare+0x60/0x70
> __kvm_handle_async_pf+0x4f/0xb0
> ? asm_exc_page_fault+0x8/0x30
> exc_page_fault+0x6f/0x110
> ? asm_exc_page_fault+0x8/0x30
> asm_exc_page_fault+0x1e/0x30
> RIP: 0033:0x402d00
> RSP: 002b:00007ffd31912500 EFLAGS: 00010206
> RAX: 0000000000071000 RBX: ffffffffffffffff RCX: 00000000021a32b0
> RDX: 000000000007d011 RSI: 000000000007d000 RDI: 00000000021262b0
> RBP: 00000000021262b0 R08: 0000000000000003 R09: 0000000000000086
> R10: 00000000000000eb R11: 00007fefbdf2baa0 R12: 0000000000000000
> R13: 0000000000000002 R14: 000000000007d000 R15: 0000000000001000
>
> Signed-off-by: Liang Zhang <[email protected]>
> ---
> arch/x86/kvm/mmu/mmu.c | 13 ++++++++++++-
> 1 file changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 593093b52395..8e24f73bf60b 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3889,12 +3889,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
> walk_shadow_page_lockless_end(vcpu);
> }
>
> +static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
> +{
> + /* make sure the token value is not 0 */
> + u32 id = vcpu->arch.apf.id;
> +
> + if (id << 12 == 0)
> + vcpu->arch.apf.id = 1;
> +
> + return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
> +}
> +
> static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> gfn_t gfn)
> {
> struct kvm_arch_async_pf arch;
>
> - arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
This patch is completely OK. But I have a question, can we simplify it to
arch.token = (++vcpu->arch.apf.id << 12) | vcpu->vcpu_id;
> + arch.token = alloc_apf_token(vcpu);
> arch.gfn = gfn;
> arch.direct_map = vcpu->arch.mmu->direct_map;
> arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);