From: Isaku Yamahata <[email protected]>
To test error_remove_page() method of KVM gmem, add a new ioctl to
inject memory failure based on offset of guest memfd.
Signed-off-by: Isaku Yamahata <[email protected]>
---
include/uapi/linux/kvm.h | 6 ++++
virt/kvm/guest_mem.c | 68 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 74 insertions(+)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 65fc983af840..4160614bcc0f 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -2323,4 +2323,10 @@ struct kvm_create_guest_memfd {
__u64 reserved[6];
};
+#define KVM_GUEST_MEMORY_FAILURE _IOWR(KVMIO, 0xd5, struct kvm_guest_memory_failure)
+
+struct kvm_guest_memory_failure {
+ __u64 offset;
+};
+
#endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/guest_mem.c b/virt/kvm/guest_mem.c
index 01fb4ca861d0..bc9dae50004b 100644
--- a/virt/kvm/guest_mem.c
+++ b/virt/kvm/guest_mem.c
@@ -291,10 +291,78 @@ static struct file *kvm_gmem_get_file(struct kvm_memory_slot *slot)
return file;
}
+static int kvm_gmem_inject_failure(struct file *file,
+ struct kvm_guest_memory_failure *mf)
+{
+ struct inode *inode = file_inode(file);
+ struct address_space *mapping = inode->i_mapping;
+ pgoff_t index = mf->offset >> PAGE_SHIFT;
+ struct folio *folio;
+ unsigned long pfn;
+ int err = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ filemap_invalidate_lock_shared(mapping);
+
+ /* Don't allocate page. */
+ folio = filemap_get_folio(mapping, index);
+ if (!folio) {
+ err = -ENOENT;
+ goto out;
+ }
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
+ goto out;
+ }
+
+ pfn = folio_pfn(folio) + (index - folio_index(folio));
+ folio_put(folio);
+
+out:
+ filemap_invalidate_unlock_shared(mapping);
+ if (err)
+ return err;
+
+ /*
+ * Race with pfn: memory_failure() and unpoison_memory() gain invalidate
+ * lock as the error recovery logic tries to remove pages from
+ * mapping.
+ */
+ if (!pfn_valid(pfn))
+ return -ENXIO;
+ return memory_failure(pfn, MF_SW_SIMULATED);
+}
+
+static long kvm_gmem_ioctl(struct file *file, unsigned int ioctl,
+ unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ int r = -EINVAL;
+
+ switch (ioctl) {
+ case KVM_GUEST_MEMORY_FAILURE: {
+ struct kvm_guest_memory_failure mf;
+
+ r = -EFAULT;
+ if (copy_from_user(&mf, argp, sizeof(mf)))
+ break;
+ r = kvm_gmem_inject_failure(file, &mf);
+ break;
+ }
+ default:
+ break;
+ }
+
+ return r;
+}
+
static const struct file_operations kvm_gmem_fops = {
.open = generic_file_open,
.release = kvm_gmem_release,
.fallocate = kvm_gmem_fallocate,
+ .unlocked_ioctl = kvm_gmem_ioctl,
};
static int kvm_gmem_migrate_folio(struct address_space *mapping,
--
2.25.1
On Thu, Sep 21, 2023, [email protected] wrote:
> From: Isaku Yamahata <[email protected]>
>
> To test error_remove_page() method of KVM gmem, add a new ioctl to
> inject memory failure based on offset of guest memfd.
>
> Signed-off-by: Isaku Yamahata <[email protected]>
> ---
> include/uapi/linux/kvm.h | 6 ++++
> virt/kvm/guest_mem.c | 68 ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 74 insertions(+)
>
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 65fc983af840..4160614bcc0f 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -2323,4 +2323,10 @@ struct kvm_create_guest_memfd {
> __u64 reserved[6];
> };
>
> +#define KVM_GUEST_MEMORY_FAILURE _IOWR(KVMIO, 0xd5, struct kvm_guest_memory_failure)
If we're going to add a KVM ioctl(), my vote is to make it a generic ioctl(), not
something that's specific to guest_memfd(). IIUC, all we need is the PFN, so the
only downside is that it'd require valid memslots. But the test isn't all that
interesting unless there are memslots, so I don't see that as a negative.
And if we add an ioctl(), it should be conditioned on CONFIG_HWPOISON_INJECT.
An alternative I think we should seriously consider is using the FAULT_INJECTION
framework to poison pages. We (Google) have plans to utilize fault injection for
other things in KVM, e.g. to inject "failures" on CMPXCHG in atomic SPTE updates
to force KVM down unlikely slow paths. I don't expect us to get patches posted
until early next year due to priorities, but hell or high water we will get patches
posted at some point.
The fault injection framework might be overkill for injecting memory errors, e.g.
a single ioctl() is definitely simpler to setup, but I suspect it would also be
much more powerful in the long run..
On Thu, Sep 21, 2023, [email protected] wrote:
> + if (!pfn_valid(pfn))
> + return -ENXIO;
> + return memory_failure(pfn, MF_SW_SIMULATED);
memory_failure is defined iff CONFIG_MEMORY_FAILURE=y. All of this code would
need to be conditioned on that (in addition to the injection configs).
address_space_operations.error_remove_page() arguably should be conditioned on
that as well, but that's a much bigger change and not a problem that needs to be
solved anytime soon.