svm_vm_migrate_from() uses sev_lock_vcpus_for_migration() to lock all
source and target vcpu->locks. Mark the nested subclasses to avoid false
positives from lockdep.
Warning example:
============================================
WARNING: possible recursive locking detected
5.17.0-dbg-DEV #15 Tainted: G O
--------------------------------------------
sev_migrate_tes/18859 is trying to acquire lock:
ffff8d672d484238 (&vcpu->mutex){+.+.}-{3:3}, at: sev_lock_vcpus_for_migration+0x7e/0x150
but task is already holding lock:
ffff8d67703f81f8 (&vcpu->mutex){+.+.}-{3:3}, at: sev_lock_vcpus_for_migration+0x7e/0x150
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(&vcpu->mutex);
lock(&vcpu->mutex);
*** DEADLOCK ***
May be due to missing lock nesting notation
3 locks held by sev_migrate_tes/18859:
#0: ffff9302f91323b8 (&kvm->lock){+.+.}-{3:3}, at: sev_vm_move_enc_context_from+0x96/0x740
#1: ffff9302f906a3b8 (&kvm->lock/1){+.+.}-{3:3}, at: sev_vm_move_enc_context_from+0xae/0x740
#2: ffff8d67703f81f8 (&vcpu->mutex){+.+.}-{3:3}, at: sev_lock_vcpus_for_migration+0x7e/0x150
Fixes: b56639318bb2b ("KVM: SEV: Add support for SEV intra host migration")
Reported-by: John Sperbeck<[email protected]>
Suggested-by: David Rientjes <[email protected]>
Suggested-by: Sean Christopherson <[email protected]>
Cc: Paolo Bonzini <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Peter Gonda <[email protected]>
---
V3
* Updated signature to enum to self-document argument.
* Updated comment as Seanjc@ suggested.
Tested by running sev_migrate_tests with lockdep enabled. Before we see
a warning from sev_lock_vcpus_for_migration(). After we get no warnings.
---
arch/x86/kvm/svm/sev.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 75fa6dd268f0..f66550ec8eaf 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1591,14 +1591,26 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
atomic_set_release(&src_sev->migration_in_progress, 0);
}
+/*
+ * To suppress lockdep false positives, subclass all vCPU mutex locks by
+ * assigning even numbers to the source vCPUs and odd numbers to destination
+ * vCPUs based on the vCPU's index.
+ */
+enum sev_migration_role {
+ SEV_MIGRATION_SOURCE = 0,
+ SEV_MIGRATION_TARGET,
+ SEV_NR_MIGRATION_ROLES,
+};
-static int sev_lock_vcpus_for_migration(struct kvm *kvm)
+static int sev_lock_vcpus_for_migration(struct kvm *kvm,
+ enum sev_migration_role role)
{
struct kvm_vcpu *vcpu;
unsigned long i, j;
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (mutex_lock_killable(&vcpu->mutex))
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (mutex_lock_killable_nested(
+ &vcpu->mutex, i * SEV_NR_MIGRATION_ROLES + role))
goto out_unlock;
}
@@ -1745,10 +1757,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
charged = true;
}
- ret = sev_lock_vcpus_for_migration(kvm);
+ ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE);
if (ret)
goto out_dst_cgroup;
- ret = sev_lock_vcpus_for_migration(source_kvm);
+ ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET);
if (ret)
goto out_dst_vcpu;
--
2.35.1.1178.g4f1659d476-goog
On Thu, Apr 7, 2022 at 12:59 PM Peter Gonda <[email protected]> wrote:
>
> svm_vm_migrate_from() uses sev_lock_vcpus_for_migration() to lock all
> source and target vcpu->locks. Mark the nested subclasses to avoid false
> positives from lockdep.
>
> Warning example:
> ============================================
> WARNING: possible recursive locking detected
> 5.17.0-dbg-DEV #15 Tainted: G O
> --------------------------------------------
> sev_migrate_tes/18859 is trying to acquire lock:
> ffff8d672d484238 (&vcpu->mutex){+.+.}-{3:3}, at: sev_lock_vcpus_for_migration+0x7e/0x150
> but task is already holding lock:
> ffff8d67703f81f8 (&vcpu->mutex){+.+.}-{3:3}, at: sev_lock_vcpus_for_migration+0x7e/0x150
> other info that might help us debug this:
> Possible unsafe locking scenario:
> CPU0
> ----
> lock(&vcpu->mutex);
> lock(&vcpu->mutex);
> *** DEADLOCK ***
> May be due to missing lock nesting notation
> 3 locks held by sev_migrate_tes/18859:
> #0: ffff9302f91323b8 (&kvm->lock){+.+.}-{3:3}, at: sev_vm_move_enc_context_from+0x96/0x740
> #1: ffff9302f906a3b8 (&kvm->lock/1){+.+.}-{3:3}, at: sev_vm_move_enc_context_from+0xae/0x740
> #2: ffff8d67703f81f8 (&vcpu->mutex){+.+.}-{3:3}, at: sev_lock_vcpus_for_migration+0x7e/0x150
>
> Fixes: b56639318bb2b ("KVM: SEV: Add support for SEV intra host migration")
> Reported-by: John Sperbeck<[email protected]>
> Suggested-by: David Rientjes <[email protected]>
> Suggested-by: Sean Christopherson <[email protected]>
> Cc: Paolo Bonzini <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Peter Gonda <[email protected]>
>
> ---
>
> V3
> * Updated signature to enum to self-document argument.
> * Updated comment as Seanjc@ suggested.
>
> Tested by running sev_migrate_tests with lockdep enabled. Before we see
> a warning from sev_lock_vcpus_for_migration(). After we get no warnings.
>
> ---
> arch/x86/kvm/svm/sev.c | 22 +++++++++++++++++-----
> 1 file changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 75fa6dd268f0..f66550ec8eaf 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -1591,14 +1591,26 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
> atomic_set_release(&src_sev->migration_in_progress, 0);
> }
>
> +/*
> + * To suppress lockdep false positives, subclass all vCPU mutex locks by
> + * assigning even numbers to the source vCPUs and odd numbers to destination
> + * vCPUs based on the vCPU's index.
> + */
> +enum sev_migration_role {
> + SEV_MIGRATION_SOURCE = 0,
> + SEV_MIGRATION_TARGET,
> + SEV_NR_MIGRATION_ROLES,
> +};
>
> -static int sev_lock_vcpus_for_migration(struct kvm *kvm)
> +static int sev_lock_vcpus_for_migration(struct kvm *kvm,
> + enum sev_migration_role role)
> {
> struct kvm_vcpu *vcpu;
> unsigned long i, j;
>
> - kvm_for_each_vcpu(i, vcpu, kvm) {
> - if (mutex_lock_killable(&vcpu->mutex))
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + if (mutex_lock_killable_nested(
> + &vcpu->mutex, i * SEV_NR_MIGRATION_ROLES + role))
> goto out_unlock;
> }
>
> @@ -1745,10 +1757,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
> charged = true;
> }
>
> - ret = sev_lock_vcpus_for_migration(kvm);
> + ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE);
> if (ret)
> goto out_dst_cgroup;
> - ret = sev_lock_vcpus_for_migration(source_kvm);
> + ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET);
> if (ret)
> goto out_dst_vcpu;
>
> --
> 2.35.1.1178.g4f1659d476-goog
>
Does sev_migrate_tests survive lockdep checking if
NR_MIGRATE_TEST_VCPUS is changed to 16?
On Thu, Apr 7, 2022 at 3:17 PM John Sperbeck <[email protected]> wrote:
>
> On Thu, Apr 7, 2022 at 12:59 PM Peter Gonda <[email protected]> wrote:
> >
> > svm_vm_migrate_from() uses sev_lock_vcpus_for_migration() to lock all
> > source and target vcpu->locks. Mark the nested subclasses to avoid false
> > positives from lockdep.
Nope. Good catch, I didn't realize there was a limit 8 subclasses:
[ 509.093776] BUG: looking up invalid subclass: 8
[ 509.098314] turning off the locking correctness validator.
[ 509.103800] CPU: 185 PID: 28570 Comm: sev_migrate_tes Tainted: G
O 5.17.0-dbg-DEV #24
[ 509.112925] Hardware name: Google, Inc.
Arcadia_IT_80/Arcadia_IT_80, BIOS
30.6.12-gce 09/27/2021
[ 509.126386] Call Trace:
[ 509.128835] <TASK>
[ 509.130939] dump_stack_lvl+0x6c/0x9a
[ 509.134609] dump_stack+0x10/0x12
[ 509.137925] look_up_lock_class+0xf1/0x130
[ 509.142027] register_lock_class+0x54/0x730
[ 509.146214] __lock_acquire+0x85/0xf00
[ 509.149964] ? lock_is_held_type+0xff/0x170
[ 509.154154] lock_acquire+0xca/0x210
[ 509.157730] ? sev_lock_vcpus_for_migration+0x82/0x150
[ 509.162872] __mutex_lock_common+0xe4/0xe30
[ 509.167054] ? sev_lock_vcpus_for_migration+0x82/0x150
[ 509.172194] ? sev_lock_vcpus_for_migration+0x82/0x150
[ 509.177335] ? rcu_lock_release+0x17/0x20
[ 509.181348] mutex_lock_killable_nested+0x20/0x30
[ 509.186053] sev_lock_vcpus_for_migration+0x82/0x150
[ 509.191019] sev_vm_move_enc_context_from+0x190/0x750
[ 509.196072] ? lock_release+0x20e/0x290
[ 509.199912] kvm_vm_ioctl_enable_cap+0x29d/0x320
[ 509.204531] kvm_vm_ioctl+0xc58/0x1060
[ 509.208285] ? __this_cpu_preempt_check+0x13/0x20
[ 509.212989] __se_sys_ioctl+0x77/0xc0
[ 509.216656] __x64_sys_ioctl+0x1d/0x20
[ 509.220408] do_syscall_64+0x44/0xa0
[ 509.223987] entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 509.229041] RIP: 0033:0x7f91b8531347
[ 509.232618] Code: 5d c3 cc 48 8b 05 f9 2f 07 00 64 c7 00 26 00 00
00 48 c7 c0 ff ff ff ff c3 cc cc cc cc cc cc cc cc cc cc b8 10 00 00
00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c9 2f 07 00 f7 d8 64 89
01 48
[ 509.251371] RSP: 002b:00007ffef7feb778 EFLAGS: 00000246 ORIG_RAX:
0000000000000010
[ 509.258940] RAX: ffffffffffffffda RBX: 0000000000af6210 RCX: 00007f91b8531347
[ 509.266073] RDX: 00007ffef7feb790 RSI: 000000004068aea3 RDI: 0000000000000018
[ 509.273207] RBP: 00007ffef7feba10 R08: 000000000020331b R09: 000000000000000f
[ 509.280338] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000af8df0
[ 509.287470] R13: 0000000000afa3e0 R14: 0000000000000000 R15: 0000000000af7800
[ 509.294607] </TASK>
> >
> > Warning example:
> > ============================================
> > WARNING: possible recursive locking detected
> > 5.17.0-dbg-DEV #15 Tainted: G O
> > --------------------------------------------
> > sev_migrate_tes/18859 is trying to acquire lock:
> > ffff8d672d484238 (&vcpu->mutex){+.+.}-{3:3}, at: sev_lock_vcpus_for_migration+0x7e/0x150
> > but task is already holding lock:
> > ffff8d67703f81f8 (&vcpu->mutex){+.+.}-{3:3}, at: sev_lock_vcpus_for_migration+0x7e/0x150
> > other info that might help us debug this:
> > Possible unsafe locking scenario:
> > CPU0
> > ----
> > lock(&vcpu->mutex);
> > lock(&vcpu->mutex);
> > *** DEADLOCK ***
> > May be due to missing lock nesting notation
> > 3 locks held by sev_migrate_tes/18859:
> > #0: ffff9302f91323b8 (&kvm->lock){+.+.}-{3:3}, at: sev_vm_move_enc_context_from+0x96/0x740
> > #1: ffff9302f906a3b8 (&kvm->lock/1){+.+.}-{3:3}, at: sev_vm_move_enc_context_from+0xae/0x740
> > #2: ffff8d67703f81f8 (&vcpu->mutex){+.+.}-{3:3}, at: sev_lock_vcpus_for_migration+0x7e/0x150
> >
> > Fixes: b56639318bb2b ("KVM: SEV: Add support for SEV intra host migration")
> > Reported-by: John Sperbeck<[email protected]>
> > Suggested-by: David Rientjes <[email protected]>
> > Suggested-by: Sean Christopherson <[email protected]>
> > Cc: Paolo Bonzini <[email protected]>
> > Cc: [email protected]
> > Cc: [email protected]
> > Signed-off-by: Peter Gonda <[email protected]>
> >
> > ---
> >
> > V3
> > * Updated signature to enum to self-document argument.
> > * Updated comment as Seanjc@ suggested.
> >
> > Tested by running sev_migrate_tests with lockdep enabled. Before we see
> > a warning from sev_lock_vcpus_for_migration(). After we get no warnings.
> >
> > ---
> > arch/x86/kvm/svm/sev.c | 22 +++++++++++++++++-----
> > 1 file changed, 17 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> > index 75fa6dd268f0..f66550ec8eaf 100644
> > --- a/arch/x86/kvm/svm/sev.c
> > +++ b/arch/x86/kvm/svm/sev.c
> > @@ -1591,14 +1591,26 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
> > atomic_set_release(&src_sev->migration_in_progress, 0);
> > }
> >
> > +/*
> > + * To suppress lockdep false positives, subclass all vCPU mutex locks by
> > + * assigning even numbers to the source vCPUs and odd numbers to destination
> > + * vCPUs based on the vCPU's index.
> > + */
> > +enum sev_migration_role {
> > + SEV_MIGRATION_SOURCE = 0,
> > + SEV_MIGRATION_TARGET,
> > + SEV_NR_MIGRATION_ROLES,
> > +};
> >
> > -static int sev_lock_vcpus_for_migration(struct kvm *kvm)
> > +static int sev_lock_vcpus_for_migration(struct kvm *kvm,
> > + enum sev_migration_role role)
> > {
> > struct kvm_vcpu *vcpu;
> > unsigned long i, j;
> >
> > - kvm_for_each_vcpu(i, vcpu, kvm) {
> > - if (mutex_lock_killable(&vcpu->mutex))
> > + kvm_for_each_vcpu(i, vcpu, kvm) {
> > + if (mutex_lock_killable_nested(
> > + &vcpu->mutex, i * SEV_NR_MIGRATION_ROLES + role))
> > goto out_unlock;
> > }
> >
> > @@ -1745,10 +1757,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
> > charged = true;
> > }
> >
> > - ret = sev_lock_vcpus_for_migration(kvm);
> > + ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE);
> > if (ret)
> > goto out_dst_cgroup;
> > - ret = sev_lock_vcpus_for_migration(source_kvm);
> > + ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET);
> > if (ret)
> > goto out_dst_vcpu;
> >
> > --
> > 2.35.1.1178.g4f1659d476-goog
> >
>
> Does sev_migrate_tests survive lockdep checking if
> NR_MIGRATE_TEST_VCPUS is changed to 16?
On Fri, Apr 8, 2022 at 9:08 AM Peter Gonda <[email protected]> wrote:
>
> On Thu, Apr 7, 2022 at 3:17 PM John Sperbeck <[email protected]> wrote:
> >
> > On Thu, Apr 7, 2022 at 12:59 PM Peter Gonda <[email protected]> wrote:
> > >
> > > svm_vm_migrate_from() uses sev_lock_vcpus_for_migration() to lock all
> > > source and target vcpu->locks. Mark the nested subclasses to avoid false
> > > positives from lockdep.
>
> Nope. Good catch, I didn't realize there was a limit 8 subclasses:
Does anyone have thoughts on how we can resolve this vCPU locking with
the 8 subclass max?
>
> [ 509.093776] BUG: looking up invalid subclass: 8
> [ 509.098314] turning off the locking correctness validator.
> [ 509.103800] CPU: 185 PID: 28570 Comm: sev_migrate_tes Tainted: G
> O 5.17.0-dbg-DEV #24
> [ 509.112925] Hardware name: Google, Inc.
> Arcadia_IT_80/Arcadia_IT_80, BIOS
> 30.6.12-gce 09/27/2021
> [ 509.126386] Call Trace:
> [ 509.128835] <TASK>
> [ 509.130939] dump_stack_lvl+0x6c/0x9a
> [ 509.134609] dump_stack+0x10/0x12
> [ 509.137925] look_up_lock_class+0xf1/0x130
> [ 509.142027] register_lock_class+0x54/0x730
> [ 509.146214] __lock_acquire+0x85/0xf00
> [ 509.149964] ? lock_is_held_type+0xff/0x170
> [ 509.154154] lock_acquire+0xca/0x210
> [ 509.157730] ? sev_lock_vcpus_for_migration+0x82/0x150
> [ 509.162872] __mutex_lock_common+0xe4/0xe30
> [ 509.167054] ? sev_lock_vcpus_for_migration+0x82/0x150
> [ 509.172194] ? sev_lock_vcpus_for_migration+0x82/0x150
> [ 509.177335] ? rcu_lock_release+0x17/0x20
> [ 509.181348] mutex_lock_killable_nested+0x20/0x30
> [ 509.186053] sev_lock_vcpus_for_migration+0x82/0x150
> [ 509.191019] sev_vm_move_enc_context_from+0x190/0x750
> [ 509.196072] ? lock_release+0x20e/0x290
> [ 509.199912] kvm_vm_ioctl_enable_cap+0x29d/0x320
> [ 509.204531] kvm_vm_ioctl+0xc58/0x1060
> [ 509.208285] ? __this_cpu_preempt_check+0x13/0x20
> [ 509.212989] __se_sys_ioctl+0x77/0xc0
> [ 509.216656] __x64_sys_ioctl+0x1d/0x20
> [ 509.220408] do_syscall_64+0x44/0xa0
> [ 509.223987] entry_SYSCALL_64_after_hwframe+0x44/0xae
> [ 509.229041] RIP: 0033:0x7f91b8531347
> [ 509.232618] Code: 5d c3 cc 48 8b 05 f9 2f 07 00 64 c7 00 26 00 00
> 00 48 c7 c0 ff ff ff ff c3 cc cc cc cc cc cc cc cc cc cc b8 10 00 00
> 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c9 2f 07 00 f7 d8 64 89
> 01 48
> [ 509.251371] RSP: 002b:00007ffef7feb778 EFLAGS: 00000246 ORIG_RAX:
> 0000000000000010
> [ 509.258940] RAX: ffffffffffffffda RBX: 0000000000af6210 RCX: 00007f91b8531347
> [ 509.266073] RDX: 00007ffef7feb790 RSI: 000000004068aea3 RDI: 0000000000000018
> [ 509.273207] RBP: 00007ffef7feba10 R08: 000000000020331b R09: 000000000000000f
> [ 509.280338] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000af8df0
> [ 509.287470] R13: 0000000000afa3e0 R14: 0000000000000000 R15: 0000000000af7800
> [ 509.294607] </TASK>
>
>
>
> > >
> > > Warning example:
> > > ============================================
> > > WARNING: possible recursive locking detected
> > > 5.17.0-dbg-DEV #15 Tainted: G O
> > > --------------------------------------------
> > > sev_migrate_tes/18859 is trying to acquire lock:
> > > ffff8d672d484238 (&vcpu->mutex){+.+.}-{3:3}, at: sev_lock_vcpus_for_migration+0x7e/0x150
> > > but task is already holding lock:
> > > ffff8d67703f81f8 (&vcpu->mutex){+.+.}-{3:3}, at: sev_lock_vcpus_for_migration+0x7e/0x150
> > > other info that might help us debug this:
> > > Possible unsafe locking scenario:
> > > CPU0
> > > ----
> > > lock(&vcpu->mutex);
> > > lock(&vcpu->mutex);
> > > *** DEADLOCK ***
> > > May be due to missing lock nesting notation
> > > 3 locks held by sev_migrate_tes/18859:
> > > #0: ffff9302f91323b8 (&kvm->lock){+.+.}-{3:3}, at: sev_vm_move_enc_context_from+0x96/0x740
> > > #1: ffff9302f906a3b8 (&kvm->lock/1){+.+.}-{3:3}, at: sev_vm_move_enc_context_from+0xae/0x740
> > > #2: ffff8d67703f81f8 (&vcpu->mutex){+.+.}-{3:3}, at: sev_lock_vcpus_for_migration+0x7e/0x150
> > >
> > > Fixes: b56639318bb2b ("KVM: SEV: Add support for SEV intra host migration")
> > > Reported-by: John Sperbeck<[email protected]>
> > > Suggested-by: David Rientjes <[email protected]>
> > > Suggested-by: Sean Christopherson <[email protected]>
> > > Cc: Paolo Bonzini <[email protected]>
> > > Cc: [email protected]
> > > Cc: [email protected]
> > > Signed-off-by: Peter Gonda <[email protected]>
> > >
> > > ---
> > >
> > > V3
> > > * Updated signature to enum to self-document argument.
> > > * Updated comment as Seanjc@ suggested.
> > >
> > > Tested by running sev_migrate_tests with lockdep enabled. Before we see
> > > a warning from sev_lock_vcpus_for_migration(). After we get no warnings.
> > >
> > > ---
> > > arch/x86/kvm/svm/sev.c | 22 +++++++++++++++++-----
> > > 1 file changed, 17 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> > > index 75fa6dd268f0..f66550ec8eaf 100644
> > > --- a/arch/x86/kvm/svm/sev.c
> > > +++ b/arch/x86/kvm/svm/sev.c
> > > @@ -1591,14 +1591,26 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
> > > atomic_set_release(&src_sev->migration_in_progress, 0);
> > > }
> > >
> > > +/*
> > > + * To suppress lockdep false positives, subclass all vCPU mutex locks by
> > > + * assigning even numbers to the source vCPUs and odd numbers to destination
> > > + * vCPUs based on the vCPU's index.
> > > + */
> > > +enum sev_migration_role {
> > > + SEV_MIGRATION_SOURCE = 0,
> > > + SEV_MIGRATION_TARGET,
> > > + SEV_NR_MIGRATION_ROLES,
> > > +};
> > >
> > > -static int sev_lock_vcpus_for_migration(struct kvm *kvm)
> > > +static int sev_lock_vcpus_for_migration(struct kvm *kvm,
> > > + enum sev_migration_role role)
> > > {
> > > struct kvm_vcpu *vcpu;
> > > unsigned long i, j;
> > >
> > > - kvm_for_each_vcpu(i, vcpu, kvm) {
> > > - if (mutex_lock_killable(&vcpu->mutex))
> > > + kvm_for_each_vcpu(i, vcpu, kvm) {
> > > + if (mutex_lock_killable_nested(
> > > + &vcpu->mutex, i * SEV_NR_MIGRATION_ROLES + role))
> > > goto out_unlock;
> > > }
> > >
> > > @@ -1745,10 +1757,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
> > > charged = true;
> > > }
> > >
> > > - ret = sev_lock_vcpus_for_migration(kvm);
> > > + ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE);
> > > if (ret)
> > > goto out_dst_cgroup;
> > > - ret = sev_lock_vcpus_for_migration(source_kvm);
> > > + ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET);
> > > if (ret)
> > > goto out_dst_vcpu;
> > >
> > > --
> > > 2.35.1.1178.g4f1659d476-goog
> > >
> >
> > Does sev_migrate_tests survive lockdep checking if
> > NR_MIGRATE_TEST_VCPUS is changed to 16?
On 4/20/22 22:14, Peter Gonda wrote:
>>>> svm_vm_migrate_from() uses sev_lock_vcpus_for_migration() to lock all
>>>> source and target vcpu->locks. Mark the nested subclasses to avoid false
>>>> positives from lockdep.
>> Nope. Good catch, I didn't realize there was a limit 8 subclasses:
> Does anyone have thoughts on how we can resolve this vCPU locking with
> the 8 subclass max?
The documentation does not have anything. Maybe you can call
mutex_release manually (and mutex_acquire before unlocking).
Paolo
On Thu, Apr 21, 2022 at 9:56 AM Paolo Bonzini <[email protected]> wrote:
>
> On 4/20/22 22:14, Peter Gonda wrote:
> >>>> svm_vm_migrate_from() uses sev_lock_vcpus_for_migration() to lock all
> >>>> source and target vcpu->locks. Mark the nested subclasses to avoid false
> >>>> positives from lockdep.
> >> Nope. Good catch, I didn't realize there was a limit 8 subclasses:
> > Does anyone have thoughts on how we can resolve this vCPU locking with
> > the 8 subclass max?
>
> The documentation does not have anything. Maybe you can call
> mutex_release manually (and mutex_acquire before unlocking).
>
> Paolo
Hmm this seems to be working thanks Paolo. To lock I have been using:
...
if (mutex_lock_killable_nested(
&vcpu->mutex, i * SEV_NR_MIGRATION_ROLES + role))
goto out_unlock;
mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
...
To unlock:
...
mutex_acquire(&vcpu->mutex.dep_map, 0, 0, _THIS_IP_);
mutex_unlock(&vcpu->mutex);
...
If I understand correctly we are fully disabling lockdep by doing
this. If this is the case should I just remove all the '_nested' usage
so switch to mutex_lock_killable() and remove the per vCPU subclass?
On 4/26/22 21:06, Peter Gonda wrote:
> On Thu, Apr 21, 2022 at 9:56 AM Paolo Bonzini <[email protected]> wrote:
>>
>> On 4/20/22 22:14, Peter Gonda wrote:
>>>>>> svm_vm_migrate_from() uses sev_lock_vcpus_for_migration() to lock all
>>>>>> source and target vcpu->locks. Mark the nested subclasses to avoid false
>>>>>> positives from lockdep.
>>>> Nope. Good catch, I didn't realize there was a limit 8 subclasses:
>>> Does anyone have thoughts on how we can resolve this vCPU locking with
>>> the 8 subclass max?
>>
>> The documentation does not have anything. Maybe you can call
>> mutex_release manually (and mutex_acquire before unlocking).
>>
>> Paolo
>
> Hmm this seems to be working thanks Paolo. To lock I have been using:
>
> ...
> if (mutex_lock_killable_nested(
> &vcpu->mutex, i * SEV_NR_MIGRATION_ROLES + role))
> goto out_unlock;
> mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
> ...
>
> To unlock:
> ...
> mutex_acquire(&vcpu->mutex.dep_map, 0, 0, _THIS_IP_);
> mutex_unlock(&vcpu->mutex);
> ...
>
> If I understand correctly we are fully disabling lockdep by doing
> this. If this is the case should I just remove all the '_nested' usage
> so switch to mutex_lock_killable() and remove the per vCPU subclass?
Yes, though you could also do:
bool acquired = false;
kvm_for_each_vcpu(...) {
if (acquired)
mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
if (mutex_lock_killable_nested(&vcpu->mutex, role)
goto out_unlock;
acquired = true;
...
and to unlock:
bool acquired = true;
kvm_for_each_vcpu(...) {
if (!acquired)
mutex_acquire(&vcpu->mutex.dep_map, 0, role, _THIS_IP_);
mutex_unlock(&vcpu->mutex);
acquired = false;
}
where role is either 0 or SINGLE_DEPTH_NESTING and is passed to
sev_{,un}lock_vcpus_for_migration.
That coalesces all the mutexes for a vm in a single subclass, essentially.
Paolo
On Wed, Apr 27, 2022 at 10:04 AM Paolo Bonzini <[email protected]> wrote:
>
> On 4/26/22 21:06, Peter Gonda wrote:
> > On Thu, Apr 21, 2022 at 9:56 AM Paolo Bonzini <[email protected]> wrote:
> >>
> >> On 4/20/22 22:14, Peter Gonda wrote:
> >>>>>> svm_vm_migrate_from() uses sev_lock_vcpus_for_migration() to lock all
> >>>>>> source and target vcpu->locks. Mark the nested subclasses to avoid false
> >>>>>> positives from lockdep.
> >>>> Nope. Good catch, I didn't realize there was a limit 8 subclasses:
> >>> Does anyone have thoughts on how we can resolve this vCPU locking with
> >>> the 8 subclass max?
> >>
> >> The documentation does not have anything. Maybe you can call
> >> mutex_release manually (and mutex_acquire before unlocking).
> >>
> >> Paolo
> >
> > Hmm this seems to be working thanks Paolo. To lock I have been using:
> >
> > ...
> > if (mutex_lock_killable_nested(
> > &vcpu->mutex, i * SEV_NR_MIGRATION_ROLES + role))
> > goto out_unlock;
> > mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
> > ...
> >
> > To unlock:
> > ...
> > mutex_acquire(&vcpu->mutex.dep_map, 0, 0, _THIS_IP_);
> > mutex_unlock(&vcpu->mutex);
> > ...
> >
> > If I understand correctly we are fully disabling lockdep by doing
> > this. If this is the case should I just remove all the '_nested' usage
> > so switch to mutex_lock_killable() and remove the per vCPU subclass?
>
> Yes, though you could also do:
>
> bool acquired = false;
> kvm_for_each_vcpu(...) {
> if (acquired)
> mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
> if (mutex_lock_killable_nested(&vcpu->mutex, role)
> goto out_unlock;
> acquired = true;
> ...
>
> and to unlock:
>
> bool acquired = true;
> kvm_for_each_vcpu(...) {
> if (!acquired)
> mutex_acquire(&vcpu->mutex.dep_map, 0, role, _THIS_IP_);
> mutex_unlock(&vcpu->mutex);
> acquired = false;
> }
>
> where role is either 0 or SINGLE_DEPTH_NESTING and is passed to
> sev_{,un}lock_vcpus_for_migration.
>
> That coalesces all the mutexes for a vm in a single subclass, essentially.
Ah thats a great idea to allow for lockdep to work still. I'll try
that out, thanks again Paolo.
>
> Paolo
>
On 4/29/22 03:03, Hillf Danton wrote:
> Wonder if local lock classes [1] help.
>
> [1]https://lore.kernel.org/lkml/165055518776.3745911.9346998911322224736.stgit@dwillia2-desk3.amr.corp.intel.com/
No, they wouldn't. Local lock classes are more of a per-subsystem lock,
while here the issue is that we are taking an arbitrary amount of locks
at the same time.
Technically it would be possible to put a struct lock_class_key in
struct kvm_vcpu, but that wouldn't scale and would actually _reduce_ the
likelihood of lockdep reporting bad things.
The effectiveness of lockdep comes exactly from using the same map for
all locks in the class, so that AB/BA scenarios are caught throughout
the whole life of the system. If each lock has a separate they would be
caught only if the "B" is exactly the same mutex in both AB and BA cases.
Paolo
On Fri, Apr 29, 2022 at 11:32 AM Paolo Bonzini <[email protected]> wrote:
>
> On 4/29/22 19:27, Peter Gonda wrote:
> > Ah yes I missed that. I would suggest `role = SEV_NR_MIGRATION_ROLES`
> > or something else instead of role++ to avoid leaking this
> > implementation detail outside of the function signature / enum.
>
> Sure.
OK. I'll get that tested and get a V4 out. Thank you for all the help
here Paolo!!
>
> Paolo
>
On Fri, Apr 29, 2022 at 9:38 AM Paolo Bonzini <[email protected]> wrote:
>
> On 4/29/22 17:35, Peter Gonda wrote:
> > On Thu, Apr 28, 2022 at 5:59 PM Paolo Bonzini <[email protected]> wrote:
> >>
> >> On 4/28/22 23:28, Peter Gonda wrote:
> >>>
> >>> So when actually trying this out I noticed that we are releasing the
> >>> current vcpu iterator but really we haven't actually taken that lock
> >>> yet. So we'd need to maintain a prev_* pointer and release that one.
> >>
> >> Not entirely true because all vcpu->mutex.dep_maps will be for the same
> >> lock. The dep_map is essentially a fancy string, in this case
> >> "&vcpu->mutex".
> >>
> >> See the definition of mutex_init:
> >>
> >> #define mutex_init(mutex) \
> >> do { \
> >> static struct lock_class_key __key; \
> >> \
> >> __mutex_init((mutex), #mutex, &__key); \
> >> } while (0)
> >>
> >> and the dep_map field is initialized with
> >>
> >> lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);
> >>
> >> (i.e. all vcpu->mutexes share the same name and key because they have a
> >> single mutex_init-ialization site). Lockdep is as crude in theory as it
> >> is effective in practice!
> >>
> >>>
> >>> bool acquired = false;
> >>> kvm_for_each_vcpu(...) {
> >>> if (!acquired) {
> >>> if (mutex_lock_killable_nested(&vcpu->mutex, role)
> >>> goto out_unlock;
> >>> acquired = true;
> >>> } else {
> >>> if (mutex_lock_killable(&vcpu->mutex, role)
> >>> goto out_unlock;
> >>
> >> This will cause a lockdep splat because it uses subclass 0. All the
> >> *_nested functions is allow you to specify a subclass other than zero.
> >
> > OK got it. I now have this to lock:
> >
> > kvm_for_each_vcpu (i, vcpu, kvm) {
> > if (prev_vcpu != NULL) {
> > mutex_release(&prev_vcpu->mutex.dep_map, _THIS_IP_);
> > prev_vcpu = NULL;
> > }
> >
> > if (mutex_lock_killable_nested(&vcpu->mutex, role))
> > goto out_unlock;
> > prev_vcpu = vcpu;
> > }
> >
> > But I've noticed the unlocking is in the wrong order since we are
> > using kvm_for_each_vcpu() I think we need a kvm_for_each_vcpu_rev() or
> > something. Which maybe a bit for work:
> > https://elixir.bootlin.com/linux/latest/source/lib/xarray.c#L1119.
>
> No, you don't need any of this. You can rely on there being only one
> depmap, otherwise you wouldn't need the mock releases and acquires at
> all. Also the unlocking order does not matter for deadlocks, only the
> locking order does. You're overdoing it. :)
Hmm I'm slightly confused here then. If I take your original suggestion of:
bool acquired = false;
kvm_for_each_vcpu(...) {
if (acquired)
mutex_release(&vcpu->mutex.dep_map,
_THIS_IP_); <-- Warning here
if (mutex_lock_killable_nested(&vcpu->mutex, role)
goto out_unlock;
acquired = true;
"""
[ 2810.088982] =====================================
[ 2810.093687] WARNING: bad unlock balance detected!
[ 2810.098388] 5.17.0-dbg-DEV #5 Tainted: G O
[ 2810.103788] -------------------------------------
[ 2810.108490] sev_migrate_tes/107600 is trying to release lock
(&vcpu->mutex) at:
[ 2810.115798] [<ffffffffb7cd3592>] sev_lock_vcpus_for_migration+0xe2/0x1e0
[ 2810.122497] but there are no more locks to release!
[ 2810.127376]
other info that might help us debug this:
[ 2810.133911] 3 locks held by sev_migrate_tes/107600:
[ 2810.138791] #0: ffffa6cbf31ca3b8 (&kvm->lock){+.+.}-{3:3}, at:
sev_vm_move_enc_context_from+0x96/0x690
[ 2810.148178] #1: ffffa6cbf28523b8 (&kvm->lock/1){+.+.}-{3:3}, at:
sev_vm_move_enc_context_from+0xae/0x690
[ 2810.157738] #2: ffff9220683b01f8 (&vcpu->mutex){+.+.}-{3:3}, at:
sev_lock_vcpus_for_migration+0x89/0x1e0
"""
This makes sense to me given we are actually trying to release lock we
haven't locked yet. So thats why I thought we'd need to work with a
prev_vcpu pointer. So the behavior I've observed is slightly different
than I'd expect from your statement "(i.e. all vcpu->mutexes share the
same name and key because they have a
single mutex_init-ialization site)."
Ack about the unlocking order, that makes things easier.
>
> Paolo
>
On 4/30/22 03:50, Hillf Danton wrote:
> lock for migration
> ===
> kvm_for_each_vcpu(i, vcpu, kvm) {
> if (mutex_lock_killable(&vcpu->mutex))
> goto out_unlock;
> lockdep_copy_map(&vcpu->v_dep_map, &vcpu->mutex.dep_map);
> mutex_release(&vcpu->mutex.dep_map, ip);
> }
>
>
> unlock for migration
> ===
> kvm_for_each_vcpu(i, vcpu, kvm) {
> lockdep_copy_map(&vcpu->mutex.dep_map, &vcpu->v_dep_map);
> /*
> * Or directly acquire without v_dep_map added
> *
> mutex_acquire(&vcpu->mutex.dep_map, 0, 1,_RET_IP_);
> */
> mutex_unlock(&vcpu->mutex);
> }
Yes this is exactly what Peter is doing, except that we're trying to
keep one lock taken. Thanks for pointing to lock_sock_nested().
Paolo
On Wed, Apr 27, 2022 at 2:18 PM Peter Gonda <[email protected]> wrote:
>
> On Wed, Apr 27, 2022 at 10:04 AM Paolo Bonzini <[email protected]> wrote:
> >
> > On 4/26/22 21:06, Peter Gonda wrote:
> > > On Thu, Apr 21, 2022 at 9:56 AM Paolo Bonzini <[email protected]> wrote:
> > >>
> > >> On 4/20/22 22:14, Peter Gonda wrote:
> > >>>>>> svm_vm_migrate_from() uses sev_lock_vcpus_for_migration() to lock all
> > >>>>>> source and target vcpu->locks. Mark the nested subclasses to avoid false
> > >>>>>> positives from lockdep.
> > >>>> Nope. Good catch, I didn't realize there was a limit 8 subclasses:
> > >>> Does anyone have thoughts on how we can resolve this vCPU locking with
> > >>> the 8 subclass max?
> > >>
> > >> The documentation does not have anything. Maybe you can call
> > >> mutex_release manually (and mutex_acquire before unlocking).
> > >>
> > >> Paolo
> > >
> > > Hmm this seems to be working thanks Paolo. To lock I have been using:
> > >
> > > ...
> > > if (mutex_lock_killable_nested(
> > > &vcpu->mutex, i * SEV_NR_MIGRATION_ROLES + role))
> > > goto out_unlock;
> > > mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
> > > ...
> > >
> > > To unlock:
> > > ...
> > > mutex_acquire(&vcpu->mutex.dep_map, 0, 0, _THIS_IP_);
> > > mutex_unlock(&vcpu->mutex);
> > > ...
> > >
> > > If I understand correctly we are fully disabling lockdep by doing
> > > this. If this is the case should I just remove all the '_nested' usage
> > > so switch to mutex_lock_killable() and remove the per vCPU subclass?
> >
> > Yes, though you could also do:
> >
> > bool acquired = false;
> > kvm_for_each_vcpu(...) {
> > if (acquired)
> > mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
> > if (mutex_lock_killable_nested(&vcpu->mutex, role)
> > goto out_unlock;
> > acquired = true;
> > ...
> >
> > and to unlock:
> >
> > bool acquired = true;
> > kvm_for_each_vcpu(...) {
> > if (!acquired)
> > mutex_acquire(&vcpu->mutex.dep_map, 0, role, _THIS_IP_);
> > mutex_unlock(&vcpu->mutex);
> > acquired = false;
> > }
So when actually trying this out I noticed that we are releasing the
current vcpu iterator but really we haven't actually taken that lock
yet. So we'd need to maintain a prev_* pointer and release that one.
That seems a bit more complicated than just doing this:
To lock:
bool acquired = false;
kvm_for_each_vcpu(...) {
if (!acquired) {
if (mutex_lock_killable_nested(&vcpu->mutex, role)
goto out_unlock;
acquired = true;
} else {
if (mutex_lock_killable(&vcpu->mutex, role)
goto out_unlock;
}
}
To unlock:
kvm_for_each_vcpu(...) {
mutex_unlock(&vcpu->mutex);
}
This way instead of mocking and releasing the lock_dep we just lock
the fist vcpu with mutex_lock_killable_nested(). I think this
maintains the property you suggested of "coalesces all the mutexes for
a vm in a single subclass". Thoughts?
> >
> > where role is either 0 or SINGLE_DEPTH_NESTING and is passed to
> > sev_{,un}lock_vcpus_for_migration.
> >
> > That coalesces all the mutexes for a vm in a single subclass, essentially.
>
> Ah thats a great idea to allow for lockdep to work still. I'll try
> that out, thanks again Paolo.
>
> >
> > Paolo
> >
On Fri, Apr 29, 2022 at 11:21 AM Paolo Bonzini <[email protected]> wrote:
>
> On Fri, Apr 29, 2022 at 7:12 PM Peter Gonda <[email protected]> wrote:
> > Sounds good. Instead of doing this prev_vcpu solution we could just
> > keep the 1st vcpu for source and target. I think this could work since
> > all the vcpu->mutex.dep_maps do not point to the same string.
> >
> > Lock:
> > bool acquired = false;
> > kvm_for_each_vcpu(...) {
> > if (mutex_lock_killable_nested(&vcpu->mutex, role)
> > goto out_unlock;
> > acquired = true;
> > if (acquired)
> > mutex_release(&vcpu->mutex, role)
> > }
>
> Almost:
>
> bool first = true;
> kvm_for_each_vcpu(...) {
> if (mutex_lock_killable_nested(&vcpu->mutex, role)
> goto out_unlock;
> if (first)
> ++role, first = false;
> else
> mutex_release(&vcpu->mutex, role);
> }
>
> and to unlock:
>
> bool first = true;
> kvm_for_each_vcpu(...) {
> if (first)
> first = false;
> else
> mutex_acquire(&vcpu->mutex, role);
> mutex_unlock(&vcpu->mutex);
> acquired = false;
> }
>
> because you cannot use the first vCPU's role again when locking.
Ah yes I missed that. I would suggest `role = SEV_NR_MIGRATION_ROLES`
or something else instead of role++ to avoid leaking this
implementation detail outside of the function signature / enum.
>
> Paolo
>
On Fri, Apr 29, 2022 at 7:12 PM Peter Gonda <[email protected]> wrote:
> Sounds good. Instead of doing this prev_vcpu solution we could just
> keep the 1st vcpu for source and target. I think this could work since
> all the vcpu->mutex.dep_maps do not point to the same string.
>
> Lock:
> bool acquired = false;
> kvm_for_each_vcpu(...) {
> if (mutex_lock_killable_nested(&vcpu->mutex, role)
> goto out_unlock;
> acquired = true;
> if (acquired)
> mutex_release(&vcpu->mutex, role)
> }
Almost:
bool first = true;
kvm_for_each_vcpu(...) {
if (mutex_lock_killable_nested(&vcpu->mutex, role)
goto out_unlock;
if (first)
++role, first = false;
else
mutex_release(&vcpu->mutex, role);
}
and to unlock:
bool first = true;
kvm_for_each_vcpu(...) {
if (first)
first = false;
else
mutex_acquire(&vcpu->mutex, role);
mutex_unlock(&vcpu->mutex);
acquired = false;
}
because you cannot use the first vCPU's role again when locking.
Paolo
On Fri, Apr 29, 2022 at 9:59 AM Paolo Bonzini <[email protected]> wrote:
>
> On 4/29/22 17:51, Peter Gonda wrote:
> >> No, you don't need any of this. You can rely on there being only one
> >> depmap, otherwise you wouldn't need the mock releases and acquires at
> >> all. Also the unlocking order does not matter for deadlocks, only the
> >> locking order does. You're overdoing it. :)
> >
> > Hmm I'm slightly confused here then. If I take your original suggestion of:
> >
> > bool acquired = false;
> > kvm_for_each_vcpu(...) {
> > if (acquired)
> > mutex_release(&vcpu->mutex.dep_map,
> > _THIS_IP_); <-- Warning here
> > if (mutex_lock_killable_nested(&vcpu->mutex, role)
> > goto out_unlock;
> > acquired = true;
> >
> > """
> > [ 2810.088982] =====================================
> > [ 2810.093687] WARNING: bad unlock balance detected!
> > [ 2810.098388] 5.17.0-dbg-DEV #5 Tainted: G O
> > [ 2810.103788] -------------------------------------
>
> Ah even if the contents of the dep_map are the same for all locks, it
> also uses the *pointer* to the dep_map to track (class, subclass) ->
> pointer and checks for a match.
>
> So yeah, prev_cpu is needed. The unlock ordering OTOH is irrelevant so
> you don't need to visit the xarray backwards.
Sounds good. Instead of doing this prev_vcpu solution we could just
keep the 1st vcpu for source and target. I think this could work since
all the vcpu->mutex.dep_maps do not point to the same string.
Lock:
bool acquired = false;
kvm_for_each_vcpu(...) {
if (mutex_lock_killable_nested(&vcpu->mutex, role)
goto out_unlock;
acquired = true;
if (acquired)
mutex_release(&vcpu->mutex, role)
}
Unlock
bool acquired = true;
kvm_for_each_vcpu(...) {
if (!acquired)
mutex_acquire(&vcpu->mutex, role)
mutex_unlock(&vcpu->mutex);
acquired = false;
}
So when locking we release all but the first dep_maps. Then when
unlocking we acquire all but the first dep_maps. Thoughts?
>
> Paolo
>
On Thu, Apr 28, 2022 at 5:59 PM Paolo Bonzini <[email protected]> wrote:
>
> On 4/28/22 23:28, Peter Gonda wrote:
> >
> > So when actually trying this out I noticed that we are releasing the
> > current vcpu iterator but really we haven't actually taken that lock
> > yet. So we'd need to maintain a prev_* pointer and release that one.
>
> Not entirely true because all vcpu->mutex.dep_maps will be for the same
> lock. The dep_map is essentially a fancy string, in this case
> "&vcpu->mutex".
>
> See the definition of mutex_init:
>
> #define mutex_init(mutex) \
> do { \
> static struct lock_class_key __key; \
> \
> __mutex_init((mutex), #mutex, &__key); \
> } while (0)
>
> and the dep_map field is initialized with
>
> lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);
>
> (i.e. all vcpu->mutexes share the same name and key because they have a
> single mutex_init-ialization site). Lockdep is as crude in theory as it
> is effective in practice!
>
> >
> > bool acquired = false;
> > kvm_for_each_vcpu(...) {
> > if (!acquired) {
> > if (mutex_lock_killable_nested(&vcpu->mutex, role)
> > goto out_unlock;
> > acquired = true;
> > } else {
> > if (mutex_lock_killable(&vcpu->mutex, role)
> > goto out_unlock;
>
> This will cause a lockdep splat because it uses subclass 0. All the
> *_nested functions is allow you to specify a subclass other than zero.
OK got it. I now have this to lock:
kvm_for_each_vcpu (i, vcpu, kvm) {
if (prev_vcpu != NULL) {
mutex_release(&prev_vcpu->mutex.dep_map, _THIS_IP_);
prev_vcpu = NULL;
}
if (mutex_lock_killable_nested(&vcpu->mutex, role))
goto out_unlock;
prev_vcpu = vcpu;
}
But I've noticed the unlocking is in the wrong order since we are
using kvm_for_each_vcpu() I think we need a kvm_for_each_vcpu_rev() or
something. Which maybe a bit for work:
https://elixir.bootlin.com/linux/latest/source/lib/xarray.c#L1119.
Then I think we could something like this to unlock:
bool acquired = true;
kvm_for_each_vcpu_rev(i, vcpu, kvm) {
if (!acquired) {
mutex_acquire(&vcpu->mutex.dep_map, 0, role,
_THIS_IP_);
}
mutex_unlock(&vcpu->mutex);
acquired = false;
}
>
> Paolo
>
> > }
> > }
> >
> > To unlock:
> >
> > kvm_for_each_vcpu(...) {
> > mutex_unlock(&vcpu->mutex);
> > }
> >
> > This way instead of mocking and releasing the lock_dep we just lock
> > the fist vcpu with mutex_lock_killable_nested(). I think this
> > maintains the property you suggested of "coalesces all the mutexes for
> > a vm in a single subclass". Thoughts?
>
On 4/29/22 13:40, Hillf Danton wrote:
> To avoid acquiring more than one mutexes at the same time, add a completion
> in paralle to the mutex in question and ask mutex locker for non-migration
> purpose to take a nap on completion instead of mutex.
Acquiring more than one mutex at a time is perfectly fine. It also
cannot get into philosophers-problem-like deadlocks because 1) it's got
a well-defined iteration order 2) it's protected by sev_lock_two_vms.
Just, lockdep that has to be convinced that it is fine. I don't shy
away from complex locking schemes if they're necessary, but here it
absolutely isn't.
Paolo
On 4/30/22 03:50, Hillf Danton wrote:
> lock for migration
> ===
> kvm_for_each_vcpu(i, vcpu, kvm) {
> if (mutex_lock_killable(&vcpu->mutex))
> goto out_unlock;
> lockdep_copy_map(&vcpu->v_dep_map, &vcpu->mutex.dep_map);
> mutex_release(&vcpu->mutex.dep_map, ip);
> }
>
>
> unlock for migration
> ===
> kvm_for_each_vcpu(i, vcpu, kvm) {
> lockdep_copy_map(&vcpu->mutex.dep_map, &vcpu->v_dep_map);
> /*
> * Or directly acquire without v_dep_map added
> *
> mutex_acquire(&vcpu->mutex.dep_map, 0, 1,_RET_IP_);
> */
> mutex_unlock(&vcpu->mutex);
> }
Yes this is exactly what Peter is doing, except that we're trying to
keep one lock taken. Thanks for pointing to lock_sock_nested().
Paolo
On 4/28/22 23:28, Peter Gonda wrote:
>
> So when actually trying this out I noticed that we are releasing the
> current vcpu iterator but really we haven't actually taken that lock
> yet. So we'd need to maintain a prev_* pointer and release that one.
Not entirely true because all vcpu->mutex.dep_maps will be for the same
lock. The dep_map is essentially a fancy string, in this case
"&vcpu->mutex".
See the definition of mutex_init:
#define mutex_init(mutex) \
do { \
static struct lock_class_key __key; \
\
__mutex_init((mutex), #mutex, &__key); \
} while (0)
and the dep_map field is initialized with
lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);
(i.e. all vcpu->mutexes share the same name and key because they have a
single mutex_init-ialization site). Lockdep is as crude in theory as it
is effective in practice!
>
> bool acquired = false;
> kvm_for_each_vcpu(...) {
> if (!acquired) {
> if (mutex_lock_killable_nested(&vcpu->mutex, role)
> goto out_unlock;
> acquired = true;
> } else {
> if (mutex_lock_killable(&vcpu->mutex, role)
> goto out_unlock;
This will cause a lockdep splat because it uses subclass 0. All the
*_nested functions is allow you to specify a subclass other than zero.
Paolo
> }
> }
>
> To unlock:
>
> kvm_for_each_vcpu(...) {
> mutex_unlock(&vcpu->mutex);
> }
>
> This way instead of mocking and releasing the lock_dep we just lock
> the fist vcpu with mutex_lock_killable_nested(). I think this
> maintains the property you suggested of "coalesces all the mutexes for
> a vm in a single subclass". Thoughts?
On 4/29/22 17:51, Peter Gonda wrote:
>> No, you don't need any of this. You can rely on there being only one
>> depmap, otherwise you wouldn't need the mock releases and acquires at
>> all. Also the unlocking order does not matter for deadlocks, only the
>> locking order does. You're overdoing it. :)
>
> Hmm I'm slightly confused here then. If I take your original suggestion of:
>
> bool acquired = false;
> kvm_for_each_vcpu(...) {
> if (acquired)
> mutex_release(&vcpu->mutex.dep_map,
> _THIS_IP_); <-- Warning here
> if (mutex_lock_killable_nested(&vcpu->mutex, role)
> goto out_unlock;
> acquired = true;
>
> """
> [ 2810.088982] =====================================
> [ 2810.093687] WARNING: bad unlock balance detected!
> [ 2810.098388] 5.17.0-dbg-DEV #5 Tainted: G O
> [ 2810.103788] -------------------------------------
Ah even if the contents of the dep_map are the same for all locks, it
also uses the *pointer* to the dep_map to track (class, subclass) ->
pointer and checks for a match.
So yeah, prev_cpu is needed. The unlock ordering OTOH is irrelevant so
you don't need to visit the xarray backwards.
Paolo
On 4/29/22 19:27, Peter Gonda wrote:
> Ah yes I missed that. I would suggest `role = SEV_NR_MIGRATION_ROLES`
> or something else instead of role++ to avoid leaking this
> implementation detail outside of the function signature / enum.
Sure.
Paolo
On 4/29/22 17:35, Peter Gonda wrote:
> On Thu, Apr 28, 2022 at 5:59 PM Paolo Bonzini <[email protected]> wrote:
>>
>> On 4/28/22 23:28, Peter Gonda wrote:
>>>
>>> So when actually trying this out I noticed that we are releasing the
>>> current vcpu iterator but really we haven't actually taken that lock
>>> yet. So we'd need to maintain a prev_* pointer and release that one.
>>
>> Not entirely true because all vcpu->mutex.dep_maps will be for the same
>> lock. The dep_map is essentially a fancy string, in this case
>> "&vcpu->mutex".
>>
>> See the definition of mutex_init:
>>
>> #define mutex_init(mutex) \
>> do { \
>> static struct lock_class_key __key; \
>> \
>> __mutex_init((mutex), #mutex, &__key); \
>> } while (0)
>>
>> and the dep_map field is initialized with
>>
>> lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);
>>
>> (i.e. all vcpu->mutexes share the same name and key because they have a
>> single mutex_init-ialization site). Lockdep is as crude in theory as it
>> is effective in practice!
>>
>>>
>>> bool acquired = false;
>>> kvm_for_each_vcpu(...) {
>>> if (!acquired) {
>>> if (mutex_lock_killable_nested(&vcpu->mutex, role)
>>> goto out_unlock;
>>> acquired = true;
>>> } else {
>>> if (mutex_lock_killable(&vcpu->mutex, role)
>>> goto out_unlock;
>>
>> This will cause a lockdep splat because it uses subclass 0. All the
>> *_nested functions is allow you to specify a subclass other than zero.
>
> OK got it. I now have this to lock:
>
> kvm_for_each_vcpu (i, vcpu, kvm) {
> if (prev_vcpu != NULL) {
> mutex_release(&prev_vcpu->mutex.dep_map, _THIS_IP_);
> prev_vcpu = NULL;
> }
>
> if (mutex_lock_killable_nested(&vcpu->mutex, role))
> goto out_unlock;
> prev_vcpu = vcpu;
> }
>
> But I've noticed the unlocking is in the wrong order since we are
> using kvm_for_each_vcpu() I think we need a kvm_for_each_vcpu_rev() or
> something. Which maybe a bit for work:
> https://elixir.bootlin.com/linux/latest/source/lib/xarray.c#L1119.
No, you don't need any of this. You can rely on there being only one
depmap, otherwise you wouldn't need the mock releases and acquires at
all. Also the unlocking order does not matter for deadlocks, only the
locking order does. You're overdoing it. :)
Paolo