From: Isaku Yamahata <[email protected]>
kvm_vm_ioctl_set_mem_attributes() discarded an error code of xa_err()
unconditionally. If an error occurred at the beginning, return error.
Fixes: 3779c214835b ("KVM: Introduce per-page memory attributes")
Signed-off-by: Isaku Yamahata <[email protected]>
---
Changes v2 -> v3:
- Newly added
---
virt/kvm/kvm_main.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 422d49634c56..fdef56f85174 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2423,6 +2423,7 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
gfn_t start, end;
unsigned long i;
void *entry;
+ int err = 0;
/* flags is currently not used. */
if (attrs->flags)
@@ -2447,14 +2448,17 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
KVM_MMU_UNLOCK(kvm);
for (i = start; i < end; i++) {
- if (xa_err(xa_store(&kvm->mem_attr_array, i, entry,
- GFP_KERNEL_ACCOUNT)))
+ err = xa_err(xa_store(&kvm->mem_attr_array, i, entry,
+ GFP_KERNEL_ACCOUNT));
+ if (err)
break;
}
KVM_MMU_LOCK(kvm);
- if (i > start)
+ if (i > start) {
+ err = 0;
kvm_mem_attrs_changed(kvm, attrs->attributes, start, i);
+ }
kvm_mmu_invalidate_end(kvm);
KVM_MMU_UNLOCK(kvm);
@@ -2463,7 +2467,7 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
attrs->address = i << PAGE_SHIFT;
attrs->size = (end - i) << PAGE_SHIFT;
- return 0;
+ return err;
}
#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
--
2.25.1
On Wed, Jun 28, 2023, [email protected] wrote:
> From: Isaku Yamahata <[email protected]>
>
> kvm_vm_ioctl_set_mem_attributes() discarded an error code of xa_err()
> unconditionally. If an error occurred at the beginning, return error.
>
> Fixes: 3779c214835b ("KVM: Introduce per-page memory attributes")
> Signed-off-by: Isaku Yamahata <[email protected]>
>
> ---
> Changes v2 -> v3:
> - Newly added
> ---
> virt/kvm/kvm_main.c | 12 ++++++++----
> 1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 422d49634c56..fdef56f85174 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2423,6 +2423,7 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
> gfn_t start, end;
> unsigned long i;
> void *entry;
> + int err = 0;
>
> /* flags is currently not used. */
> if (attrs->flags)
> @@ -2447,14 +2448,17 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
> KVM_MMU_UNLOCK(kvm);
>
> for (i = start; i < end; i++) {
> - if (xa_err(xa_store(&kvm->mem_attr_array, i, entry,
> - GFP_KERNEL_ACCOUNT)))
> + err = xa_err(xa_store(&kvm->mem_attr_array, i, entry,
> + GFP_KERNEL_ACCOUNT));
> + if (err)
> break;
> }
>
> KVM_MMU_LOCK(kvm);
> - if (i > start)
> + if (i > start) {
> + err = 0;
> kvm_mem_attrs_changed(kvm, attrs->attributes, start, i);
> + }
> kvm_mmu_invalidate_end(kvm);
> KVM_MMU_UNLOCK(kvm);
>
> @@ -2463,7 +2467,7 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
> attrs->address = i << PAGE_SHIFT;
> attrs->size = (end - i) << PAGE_SHIFT;
>
> - return 0;
> + return err;
Aha! Idea (stolen from commit afb2acb2e3a3 ("KVM: Fix vcpu_array[0] races")).
Rather than deal with a potential error partway through the updates, reserve all
xarray entries head of time. That way the ioctl() is all-or-nothing, e.g. KVM
doesn't need to update the address+size to capture progress, and userspace doesn't
have to retry (which is probably pointless anyways since failure to allocate an
xarray entry likely means the system/cgroup is under intense memory pressure).
Assuming it works (compile tested only), I'll squash this:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 46fbb4e019a6..8cb972038dab 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -2278,7 +2278,7 @@ struct kvm_s390_zpci_op {
/* Available with KVM_CAP_MEMORY_ATTRIBUTES */
#define KVM_GET_SUPPORTED_MEMORY_ATTRIBUTES _IOR(KVMIO, 0xd2, __u64)
-#define KVM_SET_MEMORY_ATTRIBUTES _IOWR(KVMIO, 0xd3, struct kvm_memory_attributes)
+#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd3, struct kvm_memory_attributes)
struct kvm_memory_attributes {
__u64 address;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9584491c0cd3..93e82e3f1e1f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2425,6 +2425,7 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
gfn_t start, end;
unsigned long i;
void *entry;
+ int r;
/* flags is currently not used. */
if (attrs->flags)
@@ -2439,18 +2440,32 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
start = attrs->address >> PAGE_SHIFT;
end = (attrs->address + attrs->size - 1 + PAGE_SIZE) >> PAGE_SHIFT;
+ if (WARN_ON_ONCE(start == end))
+ return -EINVAL;
+
entry = attrs->attributes ? xa_mk_value(attrs->attributes) : NULL;
mutex_lock(&kvm->slots_lock);
+ /*
+ * Reserve memory ahead of time to avoid having to deal with failures
+ * partway through setting the new attributes.
+ */
+ for (i = start; i < end; i++) {
+ r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT);
+ if (r)
+ goto out_unlock;
+ }
+
KVM_MMU_LOCK(kvm);
kvm_mmu_invalidate_begin(kvm);
kvm_mmu_invalidate_range_add(kvm, start, end);
KVM_MMU_UNLOCK(kvm);
for (i = start; i < end; i++) {
- if (xa_err(xa_store(&kvm->mem_attr_array, i, entry,
- GFP_KERNEL_ACCOUNT)))
+ r = xa_err(xa_store(&kvm->mem_attr_array, i, entry,
+ GFP_KERNEL_ACCOUNT));
+ if (KVM_BUG_ON(r, kvm))
break;
}
@@ -2460,12 +2475,10 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
kvm_mmu_invalidate_end(kvm);
KVM_MMU_UNLOCK(kvm);
+out_unlock:
mutex_unlock(&kvm->slots_lock);
- attrs->address = i << PAGE_SHIFT;
- attrs->size = (end - i) << PAGE_SHIFT;
-
- return 0;
+ return r;
}
#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
@@ -5078,9 +5091,6 @@ static long kvm_vm_ioctl(struct file *filp,
goto out;
r = kvm_vm_ioctl_set_mem_attributes(kvm, &attrs);
-
- if (!r && copy_to_user(argp, &attrs, sizeof(attrs)))
- r = -EFAULT;
break;
}
#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
On Thu, 13 Jul 2023 15:03:54 -0700
Sean Christopherson <[email protected]> wrote:
> On Wed, Jun 28, 2023, [email protected] wrote:
> > From: Isaku Yamahata <[email protected]>
> >
> > kvm_vm_ioctl_set_mem_attributes() discarded an error code of xa_err()
> > unconditionally. If an error occurred at the beginning, return error.
> >
> > Fixes: 3779c214835b ("KVM: Introduce per-page memory attributes")
> > Signed-off-by: Isaku Yamahata <[email protected]>
> >
> > ---
> > Changes v2 -> v3:
> > - Newly added
> > ---
> > virt/kvm/kvm_main.c | 12 ++++++++----
> > 1 file changed, 8 insertions(+), 4 deletions(-)
> >
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 422d49634c56..fdef56f85174 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -2423,6 +2423,7 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
> > gfn_t start, end;
> > unsigned long i;
> > void *entry;
> > + int err = 0;
> >
> > /* flags is currently not used. */
> > if (attrs->flags)
> > @@ -2447,14 +2448,17 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
> > KVM_MMU_UNLOCK(kvm);
> >
> > for (i = start; i < end; i++) {
> > - if (xa_err(xa_store(&kvm->mem_attr_array, i, entry,
> > - GFP_KERNEL_ACCOUNT)))
> > + err = xa_err(xa_store(&kvm->mem_attr_array, i, entry,
> > + GFP_KERNEL_ACCOUNT));
> > + if (err)
> > break;
> > }
> >
> > KVM_MMU_LOCK(kvm);
> > - if (i > start)
> > + if (i > start) {
> > + err = 0;
> > kvm_mem_attrs_changed(kvm, attrs->attributes, start, i);
> > + }
> > kvm_mmu_invalidate_end(kvm);
> > KVM_MMU_UNLOCK(kvm);
> >
> > @@ -2463,7 +2467,7 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
> > attrs->address = i << PAGE_SHIFT;
> > attrs->size = (end - i) << PAGE_SHIFT;
> >
> > - return 0;
> > + return err;
>
> Aha! Idea (stolen from commit afb2acb2e3a3 ("KVM: Fix vcpu_array[0] races")).
> Rather than deal with a potential error partway through the updates, reserve all
> xarray entries head of time. That way the ioctl() is all-or-nothing, e.g. KVM
> doesn't need to update the address+size to capture progress, and userspace doesn't
> have to retry (which is probably pointless anyways since failure to allocate an
> xarray entry likely means the system/cgroup is under intense memory pressure).
>
> Assuming it works (compile tested only), I'll squash this:
>
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 46fbb4e019a6..8cb972038dab 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -2278,7 +2278,7 @@ struct kvm_s390_zpci_op {
>
> /* Available with KVM_CAP_MEMORY_ATTRIBUTES */
> #define KVM_GET_SUPPORTED_MEMORY_ATTRIBUTES _IOR(KVMIO, 0xd2, __u64)
> -#define KVM_SET_MEMORY_ATTRIBUTES _IOWR(KVMIO, 0xd3, struct kvm_memory_attributes)
> +#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd3, struct kvm_memory_attributes)
>
> struct kvm_memory_attributes {
> __u64 address;
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 9584491c0cd3..93e82e3f1e1f 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2425,6 +2425,7 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
> gfn_t start, end;
> unsigned long i;
> void *entry;
> + int r;
>
> /* flags is currently not used. */
> if (attrs->flags)
> @@ -2439,18 +2440,32 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
> start = attrs->address >> PAGE_SHIFT;
> end = (attrs->address + attrs->size - 1 + PAGE_SIZE) >> PAGE_SHIFT;
>
> + if (WARN_ON_ONCE(start == end))
> + return -EINVAL;
> +
> entry = attrs->attributes ? xa_mk_value(attrs->attributes) : NULL;
>
> mutex_lock(&kvm->slots_lock);
>
> + /*
> + * Reserve memory ahead of time to avoid having to deal with failures
> + * partway through setting the new attributes.
> + */
> + for (i = start; i < end; i++) {
> + r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT);
> + if (r)
> + goto out_unlock;
> + }
> +
> KVM_MMU_LOCK(kvm);
> kvm_mmu_invalidate_begin(kvm);
> kvm_mmu_invalidate_range_add(kvm, start, end);
> KVM_MMU_UNLOCK(kvm);
>
> for (i = start; i < end; i++) {
> - if (xa_err(xa_store(&kvm->mem_attr_array, i, entry,
> - GFP_KERNEL_ACCOUNT)))
> + r = xa_err(xa_store(&kvm->mem_attr_array, i, entry,
> + GFP_KERNEL_ACCOUNT));
> + if (KVM_BUG_ON(r, kvm))
> break;
> }
>
IIUC, If an error happenes here, we should bail out and call xa_release()?
Or the code below (which is not shown here) still changes the memory attrs
partially.
> @@ -2460,12 +2475,10 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
> kvm_mmu_invalidate_end(kvm);
> KVM_MMU_UNLOCK(kvm);
>
> +out_unlock:
> mutex_unlock(&kvm->slots_lock);
>
> - attrs->address = i << PAGE_SHIFT;
> - attrs->size = (end - i) << PAGE_SHIFT;
> -
> - return 0;
> + return r;
> }
> #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
>
> @@ -5078,9 +5091,6 @@ static long kvm_vm_ioctl(struct file *filp,
> goto out;
>
> r = kvm_vm_ioctl_set_mem_attributes(kvm, &attrs);
> -
> - if (!r && copy_to_user(argp, &attrs, sizeof(attrs)))
> - r = -EFAULT;
> break;
> }
> #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
>
On Fri, Jul 14, 2023, Zhi Wang wrote:
> On Thu, 13 Jul 2023 15:03:54 -0700
> Sean Christopherson <[email protected]> wrote:
> > + /*
> > + * Reserve memory ahead of time to avoid having to deal with failures
> > + * partway through setting the new attributes.
> > + */
> > + for (i = start; i < end; i++) {
> > + r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT);
> > + if (r)
> > + goto out_unlock;
> > + }
> > +
> > KVM_MMU_LOCK(kvm);
> > kvm_mmu_invalidate_begin(kvm);
> > kvm_mmu_invalidate_range_add(kvm, start, end);
> > KVM_MMU_UNLOCK(kvm);
> >
> > for (i = start; i < end; i++) {
> > - if (xa_err(xa_store(&kvm->mem_attr_array, i, entry,
> > - GFP_KERNEL_ACCOUNT)))
> > + r = xa_err(xa_store(&kvm->mem_attr_array, i, entry,
> > + GFP_KERNEL_ACCOUNT));
> > + if (KVM_BUG_ON(r, kvm))
> > break;
> > }
> >
>
> IIUC, If an error happenes here, we should bail out and call xa_release()?
> Or the code below (which is not shown here) still changes the memory attrs
> partially.
I'm pretty sure we want to continue on. The VM is dead (killed by KVM_BUG_ON()),
so the attributes as seen by userspace and/or the VM don't matter. What does
matter is that KVM's internal state is consistent, e.g. that KVM doesn't have
shared SPTEs while the attributes say a GFN is private. That might not matter
for teardown, but I can't think of any reason not to tidy up.
And there can also be other ioctls() in flight. KVM_REQ_VM_DEAD ensures vCPU
can't enter the guest, and vm->vm_dead ensures no new ioctls() cant start, but
neither of those guarantees there aren't other tasks doing KVM things.
Regardless, we definitely don't need to do xa_release(). The VM is dead and all
its memory will be reclaimed soon enough. And there's no guarantee xa_release()
will actually be able to free anything, e.g. already processed entries won't be
freed, nor will any entries that existed _before_ the ioctl() was invoked. Not
to mention that the xarray probably isn't consuming much memory, relatively
speaking. I.e. in the majority of scenarios, it's likely preferable to get out
and destroy the VM asap.