From: Isaku Yamahata <[email protected]>
When resolving kvm page fault and hwpoisoned page is given, KVM exit
with HWPOISONED flag so that user space VMM, e.g. qemu, handle it.
- Add a new flag POISON to KVM_EXIT_MEMORY_FAULT to indicate the page is
poisoned.
- Make kvm_gmem_get_pfn() return hwpoison state by -EHWPOISON when the
folio is hw-poisoned.
- When page is hw-poisoned on faulting in private gmem, return
KVM_EXIT_MEMORY_FAULT with HWPOISONED flag.
Signed-off-by: Isaku Yamahata <[email protected]>
---
arch/x86/kvm/mmu/mmu.c | 21 +++++++++++++++------
include/uapi/linux/kvm.h | 3 ++-
virt/kvm/guest_mem.c | 4 +++-
3 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 05943ccb55a4..5dc9d1fdadca 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4335,19 +4335,24 @@ static inline u8 kvm_max_level_for_order(int order)
return PG_LEVEL_4K;
}
-static int kvm_do_memory_fault_exit(struct kvm_vcpu *vcpu,
- struct kvm_page_fault *fault)
+static int __kvm_do_memory_fault_exit(struct kvm_vcpu *vcpu,
+ struct kvm_page_fault *fault, __u64 flags)
{
vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
if (fault->is_private)
- vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
- else
- vcpu->run->memory.flags = 0;
+ flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE;
+ vcpu->run->flags = flags;
vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
vcpu->run->memory.size = PAGE_SIZE;
return RET_PF_USER;
}
+static int kvm_do_memory_fault_exit(struct kvm_vcpu *vcpu,
+ struct kvm_page_fault *fault)
+{
+ return __kvm_do_memory_fault_exit(vcpu, fault, 0);
+}
+
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault)
{
@@ -4358,12 +4363,16 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn,
&max_order);
- if (r)
+ if (r && r != -EHWPOISON)
return r;
fault->max_level = min(kvm_max_level_for_order(max_order),
fault->max_level);
fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY);
+
+ if (r == -EHWPOISON)
+ return __kvm_do_memory_fault_exit(vcpu, fault,
+ KVM_MEMORY_EXIT_FLAG_HWPOISON);
return RET_PF_CONTINUE;
}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index eb900344a054..48329cb44415 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -527,7 +527,8 @@ struct kvm_run {
} notify;
/* KVM_EXIT_MEMORY_FAULT */
struct {
-#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)
+#define KVM_MEMORY_EXIT_FLAG_PRIVATE BIT_ULL(3)
+#define KVM_MEMORY_EXIT_FLAG_HWPOISON BIT_ULL(4)
__u64 flags;
__u64 gpa;
__u64 size;
diff --git a/virt/kvm/guest_mem.c b/virt/kvm/guest_mem.c
index 746e683df589..3678287d7c9d 100644
--- a/virt/kvm/guest_mem.c
+++ b/virt/kvm/guest_mem.c
@@ -589,6 +589,7 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
{
pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
struct kvm_gmem *gmem;
+ bool hwpoison = false;
struct folio *folio;
struct page *page;
struct file *file;
@@ -610,6 +611,7 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
return -ENOMEM;
}
+ hwpoison = folio_test_hwpoison(folio);
page = folio_file_page(folio, index);
*pfn = page_to_pfn(page);
@@ -618,7 +620,7 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
folio_unlock(folio);
fput(file);
- return 0;
+ return hwpoison ? -EHWPOISON : 0;
}
EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
--
2.25.1
On Wed, Sep 13, 2023, [email protected] wrote:
> From: Isaku Yamahata <[email protected]>
>
> When resolving kvm page fault and hwpoisoned page is given, KVM exit
> with HWPOISONED flag so that user space VMM, e.g. qemu, handle it.
>
> - Add a new flag POISON to KVM_EXIT_MEMORY_FAULT to indicate the page is
> poisoned.
> - Make kvm_gmem_get_pfn() return hwpoison state by -EHWPOISON when the
> folio is hw-poisoned.
> - When page is hw-poisoned on faulting in private gmem, return
> KVM_EXIT_MEMORY_FAULT with HWPOISONED flag.
>
> Signed-off-by: Isaku Yamahata <[email protected]>
> ---
...
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index eb900344a054..48329cb44415 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -527,7 +527,8 @@ struct kvm_run {
> } notify;
> /* KVM_EXIT_MEMORY_FAULT */
> struct {
> -#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)
> +#define KVM_MEMORY_EXIT_FLAG_PRIVATE BIT_ULL(3)
> +#define KVM_MEMORY_EXIT_FLAG_HWPOISON BIT_ULL(4)
Rather than add a flag, I think we should double down on returning -1 + errno
when exiting with vcpu->run->exit_reason == KVM_EXIT_MEMORY_FAULT, as is being
proposed in Anish's series for accelerating UFFD-like behavior in KVM[*].
Then KVM can simply return -EFAULT or -EHWPOISON to communicate why KVM is
existing at a higher level, and let the kvm_run structure provide the finer
details about the access itself. E.g. kvm_faultin_pfn_private() can simply
propagate the return value from kvm_gmem_get_pfn() without having to identify
*why* kvm_gmem_get_pfn() failed.
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault)
{
int max_order, r;
if (!kvm_slot_can_be_private(fault->slot)) {
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
return -EFAULT;
}
r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn,
&max_order);
if (r) {
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
return r;
}
...
}
[*] https://lore.kernel.org/all/[email protected]