The current handling of the MVPG instruction when executed in a nested
guest is wrong, and can lead to the nested guest hanging.
This patchset fixes the behaviour to be more architecturally correct,
and fixes the hangs observed.
v4->v5
* split kvm_s390_logical_to_effective so it can be reused for vSIE
* fix existing comments and add some more comments
* use the new split _kvm_s390_logical_to_effective in vsie_handle_mvpg
v3->v4
* added PEI_ prefix to DAT_PROT and NOT_PTE macros
* added small comment to explain what they are about
v2->v3
* improved some comments
* improved some variable and parameter names for increased readability
* fixed missing handling of page faults in the MVPG handler
* small readability improvements
v1->v2
* complete rewrite
Claudio Imbrenda (3):
s390/kvm: split kvm_s390_logical_to_effective
s390/kvm: extend kvm_s390_shadow_fault to return entry pointer
s390/kvm: VSIE: correctly handle MVPG when in VSIE
arch/s390/kvm/gaccess.c | 30 ++++++++++--
arch/s390/kvm/gaccess.h | 35 ++++++++++---
arch/s390/kvm/vsie.c | 106 ++++++++++++++++++++++++++++++++++++----
3 files changed, 151 insertions(+), 20 deletions(-)
--
2.26.2
Correctly handle the MVPG instruction when issued by a VSIE guest.
Fixes: a3508fbe9dc6d ("KVM: s390: vsie: initial support for nested virtualization")
Cc: [email protected]
Signed-off-by: Claudio Imbrenda <[email protected]>
Acked-by: Janosch Frank <[email protected]>
Reviewed-by: David Hildenbrand <[email protected]>
---
arch/s390/kvm/vsie.c | 98 +++++++++++++++++++++++++++++++++++++++++---
1 file changed, 93 insertions(+), 5 deletions(-)
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 78b604326016..48aab6290a77 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -417,11 +417,6 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
memcpy((void *)((u64)scb_o + 0xc0),
(void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
break;
- case ICPT_PARTEXEC:
- /* MVPG only */
- memcpy((void *)((u64)scb_o + 0xc0),
- (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
- break;
}
if (scb_s->ihcpu != 0xffffU)
@@ -983,6 +978,95 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
return 0;
}
+/*
+ * Get a register for a nested guest.
+ * @vcpu the vcpu of the guest
+ * @vsie_page the vsie_page for the nested guest
+ * @reg the register number, the upper 4 bits are ignored.
+ * returns: the value of the register.
+ */
+static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, u8 reg)
+{
+ /* no need to validate the parameter and/or perform error handling */
+ reg &= 0xf;
+ switch (reg) {
+ case 15:
+ return vsie_page->scb_s.gg15;
+ case 14:
+ return vsie_page->scb_s.gg14;
+ default:
+ return vcpu->run->s.regs.gprs[reg];
+ }
+}
+
+static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ unsigned long pei_dest, pei_src, src, dest, mask;
+ u64 *pei_block = &vsie_page->scb_o->mcic;
+ int edat, rc_dest, rc_src;
+ union ctlreg0 cr0;
+
+ cr0.val = vcpu->arch.sie_block->gcr[0];
+ edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
+ mask = _kvm_s390_logical_to_effective(&scb_s->gpsw, PAGE_MASK);
+
+ dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask;
+ src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & mask;
+
+ rc_dest = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei_dest);
+ rc_src = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei_src);
+ /*
+ * Either everything went well, or something non-critical went wrong
+ * e.g. because of a race. In either case, simply retry.
+ */
+ if (rc_dest == -EAGAIN || rc_src == -EAGAIN || (!rc_dest && !rc_src)) {
+ retry_vsie_icpt(vsie_page);
+ return -EAGAIN;
+ }
+ /* Something more serious went wrong, propagate the error */
+ if (rc_dest < 0)
+ return rc_dest;
+ if (rc_src < 0)
+ return rc_src;
+
+ /* The only possible suppressing exception: just deliver it */
+ if (rc_dest == PGM_TRANSLATION_SPEC || rc_src == PGM_TRANSLATION_SPEC) {
+ clear_vsie_icpt(vsie_page);
+ rc_dest = kvm_s390_inject_program_int(vcpu, PGM_TRANSLATION_SPEC);
+ WARN_ON_ONCE(rc_dest);
+ return 1;
+ }
+
+ /*
+ * Forward the PEI intercept to the guest if it was a page fault, or
+ * also for segment and region table faults if EDAT applies.
+ */
+ if (edat) {
+ rc_dest = rc_dest == PGM_ASCE_TYPE ? rc_dest : 0;
+ rc_src = rc_src == PGM_ASCE_TYPE ? rc_src : 0;
+ } else {
+ rc_dest = rc_dest != PGM_PAGE_TRANSLATION ? rc_dest : 0;
+ rc_src = rc_src != PGM_PAGE_TRANSLATION ? rc_src : 0;
+ }
+ if (!rc_dest && !rc_src) {
+ pei_block[0] = pei_dest;
+ pei_block[1] = pei_src;
+ return 1;
+ }
+
+ retry_vsie_icpt(vsie_page);
+
+ /*
+ * The host has edat, and the guest does not, or it was an ASCE type
+ * exception. The host needs to inject the appropriate DAT interrupts
+ * into the guest.
+ */
+ if (rc_dest)
+ return inject_fault(vcpu, rc_dest, dest, 1);
+ return inject_fault(vcpu, rc_src, src, 0);
+}
+
/*
* Run the vsie on a shadow scb and a shadow gmap, without any further
* sanity checks, handling SIE faults.
@@ -1071,6 +1155,10 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if ((scb_s->ipa & 0xf000) != 0xf000)
scb_s->ipa += 0x1000;
break;
+ case ICPT_PARTEXEC:
+ if (scb_s->ipa == 0xb254)
+ rc = vsie_handle_mvpg(vcpu, vsie_page);
+ break;
}
return rc;
}
--
2.26.2
On 02.03.21 18:44, Claudio Imbrenda wrote:
> Correctly handle the MVPG instruction when issued by a VSIE guest.
>
> Fixes: a3508fbe9dc6d ("KVM: s390: vsie: initial support for nested virtualization")
> Cc: [email protected]
> Signed-off-by: Claudio Imbrenda <[email protected]>
> Acked-by: Janosch Frank <[email protected]>
> Reviewed-by: David Hildenbrand <[email protected]>
looks sane.
Acked-by: Christian Borntraeger <[email protected]>
> ---
> arch/s390/kvm/vsie.c | 98 +++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 93 insertions(+), 5 deletions(-)
>
> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> index 78b604326016..48aab6290a77 100644
> --- a/arch/s390/kvm/vsie.c
> +++ b/arch/s390/kvm/vsie.c
> @@ -417,11 +417,6 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> memcpy((void *)((u64)scb_o + 0xc0),
> (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
> break;
> - case ICPT_PARTEXEC:
> - /* MVPG only */
> - memcpy((void *)((u64)scb_o + 0xc0),
> - (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
> - break;
> }
>
> if (scb_s->ihcpu != 0xffffU)
> @@ -983,6 +978,95 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> return 0;
> }
>
> +/*
> + * Get a register for a nested guest.
> + * @vcpu the vcpu of the guest
> + * @vsie_page the vsie_page for the nested guest
> + * @reg the register number, the upper 4 bits are ignored.
> + * returns: the value of the register.
> + */
> +static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, u8 reg)
> +{
> + /* no need to validate the parameter and/or perform error handling */
> + reg &= 0xf;
> + switch (reg) {
> + case 15:
> + return vsie_page->scb_s.gg15;
> + case 14:
> + return vsie_page->scb_s.gg14;
> + default:
> + return vcpu->run->s.regs.gprs[reg];
> + }
> +}
> +
> +static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> +{
> + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
> + unsigned long pei_dest, pei_src, src, dest, mask;
> + u64 *pei_block = &vsie_page->scb_o->mcic;
> + int edat, rc_dest, rc_src;
> + union ctlreg0 cr0;
> +
> + cr0.val = vcpu->arch.sie_block->gcr[0];
> + edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
> + mask = _kvm_s390_logical_to_effective(&scb_s->gpsw, PAGE_MASK);
> +
> + dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask;
> + src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & mask;
> +
> + rc_dest = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei_dest);
> + rc_src = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei_src);
> + /*
> + * Either everything went well, or something non-critical went wrong
> + * e.g. because of a race. In either case, simply retry.
> + */
> + if (rc_dest == -EAGAIN || rc_src == -EAGAIN || (!rc_dest && !rc_src)) {
> + retry_vsie_icpt(vsie_page);
> + return -EAGAIN;
> + }
> + /* Something more serious went wrong, propagate the error */
> + if (rc_dest < 0)
> + return rc_dest;
> + if (rc_src < 0)
> + return rc_src;
> +
> + /* The only possible suppressing exception: just deliver it */
> + if (rc_dest == PGM_TRANSLATION_SPEC || rc_src == PGM_TRANSLATION_SPEC) {
> + clear_vsie_icpt(vsie_page);
> + rc_dest = kvm_s390_inject_program_int(vcpu, PGM_TRANSLATION_SPEC);
> + WARN_ON_ONCE(rc_dest);
> + return 1;
> + }
> +
> + /*
> + * Forward the PEI intercept to the guest if it was a page fault, or
> + * also for segment and region table faults if EDAT applies.
> + */
> + if (edat) {
> + rc_dest = rc_dest == PGM_ASCE_TYPE ? rc_dest : 0;
> + rc_src = rc_src == PGM_ASCE_TYPE ? rc_src : 0;
> + } else {
> + rc_dest = rc_dest != PGM_PAGE_TRANSLATION ? rc_dest : 0;
> + rc_src = rc_src != PGM_PAGE_TRANSLATION ? rc_src : 0;
> + }
> + if (!rc_dest && !rc_src) {
> + pei_block[0] = pei_dest;
> + pei_block[1] = pei_src;
> + return 1;
> + }
> +
> + retry_vsie_icpt(vsie_page);
> +
> + /*
> + * The host has edat, and the guest does not, or it was an ASCE type
> + * exception. The host needs to inject the appropriate DAT interrupts
> + * into the guest.
> + */
> + if (rc_dest)
> + return inject_fault(vcpu, rc_dest, dest, 1);
> + return inject_fault(vcpu, rc_src, src, 0);
> +}
> +
> /*
> * Run the vsie on a shadow scb and a shadow gmap, without any further
> * sanity checks, handling SIE faults.
> @@ -1071,6 +1155,10 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> if ((scb_s->ipa & 0xf000) != 0xf000)
> scb_s->ipa += 0x1000;
> break;
> + case ICPT_PARTEXEC:
> + if (scb_s->ipa == 0xb254)
> + rc = vsie_handle_mvpg(vcpu, vsie_page);
> + break;
> }
> return rc;
> }
>
On 02.03.21 18:44, Claudio Imbrenda wrote:
> The current handling of the MVPG instruction when executed in a nested
> guest is wrong, and can lead to the nested guest hanging.
>
> This patchset fixes the behaviour to be more architecturally correct,
> and fixes the hangs observed.
>
> v4->v5
> * split kvm_s390_logical_to_effective so it can be reused for vSIE
> * fix existing comments and add some more comments
> * use the new split _kvm_s390_logical_to_effective in vsie_handle_mvpg
>
> v3->v4
> * added PEI_ prefix to DAT_PROT and NOT_PTE macros
> * added small comment to explain what they are about
>
> v2->v3
> * improved some comments
> * improved some variable and parameter names for increased readability
> * fixed missing handling of page faults in the MVPG handler
> * small readability improvements
>
> v1->v2
> * complete rewrite
queued (with small fixups) for kvms390. Still not sure if this will land in master or next.
Opinions?
>
> Claudio Imbrenda (3):
> s390/kvm: split kvm_s390_logical_to_effective
> s390/kvm: extend kvm_s390_shadow_fault to return entry pointer
> s390/kvm: VSIE: correctly handle MVPG when in VSIE
>
> arch/s390/kvm/gaccess.c | 30 ++++++++++--
> arch/s390/kvm/gaccess.h | 35 ++++++++++---
> arch/s390/kvm/vsie.c | 106 ++++++++++++++++++++++++++++++++++++----
> 3 files changed, 151 insertions(+), 20 deletions(-)
>
On 3/8/21 4:19 PM, Christian Borntraeger wrote:
> On 02.03.21 18:44, Claudio Imbrenda wrote:
>> The current handling of the MVPG instruction when executed in a nested
>> guest is wrong, and can lead to the nested guest hanging.
>>
>> This patchset fixes the behaviour to be more architecturally correct,
>> and fixes the hangs observed.
>>
>> v4->v5
>> * split kvm_s390_logical_to_effective so it can be reused for vSIE
>> * fix existing comments and add some more comments
>> * use the new split _kvm_s390_logical_to_effective in vsie_handle_mvpg
>>
>> v3->v4
>> * added PEI_ prefix to DAT_PROT and NOT_PTE macros
>> * added small comment to explain what they are about
>>
>> v2->v3
>> * improved some comments
>> * improved some variable and parameter names for increased readability
>> * fixed missing handling of page faults in the MVPG handler
>> * small readability improvements
>>
>> v1->v2
>> * complete rewrite
>
>
> queued (with small fixups) for kvms390. Still not sure if this will land in master or next.
> Opinions?
I'd go for the next merge window
>>
>> Claudio Imbrenda (3):
>> s390/kvm: split kvm_s390_logical_to_effective
>> s390/kvm: extend kvm_s390_shadow_fault to return entry pointer
>> s390/kvm: VSIE: correctly handle MVPG when in VSIE
>>
>> arch/s390/kvm/gaccess.c | 30 ++++++++++--
>> arch/s390/kvm/gaccess.h | 35 ++++++++++---
>> arch/s390/kvm/vsie.c | 106 ++++++++++++++++++++++++++++++++++++----
>> 3 files changed, 151 insertions(+), 20 deletions(-)
>>
On Mon, 8 Mar 2021 16:26:58 +0100
Janosch Frank <[email protected]> wrote:
> On 3/8/21 4:19 PM, Christian Borntraeger wrote:
> > On 02.03.21 18:44, Claudio Imbrenda wrote:
> >> The current handling of the MVPG instruction when executed in a
> >> nested guest is wrong, and can lead to the nested guest hanging.
> >>
> >> This patchset fixes the behaviour to be more architecturally
> >> correct, and fixes the hangs observed.
> >>
> >> v4->v5
> >> * split kvm_s390_logical_to_effective so it can be reused for vSIE
> >> * fix existing comments and add some more comments
> >> * use the new split _kvm_s390_logical_to_effective in
> >> vsie_handle_mvpg
> >>
> >> v3->v4
> >> * added PEI_ prefix to DAT_PROT and NOT_PTE macros
> >> * added small comment to explain what they are about
> >>
> >> v2->v3
> >> * improved some comments
> >> * improved some variable and parameter names for increased
> >> readability
> >> * fixed missing handling of page faults in the MVPG handler
> >> * small readability improvements
> >>
> >> v1->v2
> >> * complete rewrite
> >
> >
> > queued (with small fixups) for kvms390. Still not sure if this will
> > land in master or next. Opinions?
>
> I'd go for the next merge window
I agree
> >>
> >> Claudio Imbrenda (3):
> >> s390/kvm: split kvm_s390_logical_to_effective
> >> s390/kvm: extend kvm_s390_shadow_fault to return entry pointer
> >> s390/kvm: VSIE: correctly handle MVPG when in VSIE
> >>
> >> arch/s390/kvm/gaccess.c | 30 ++++++++++--
> >> arch/s390/kvm/gaccess.h | 35 ++++++++++---
> >> arch/s390/kvm/vsie.c | 106
> >> ++++++++++++++++++++++++++++++++++++---- 3 files changed, 151
> >> insertions(+), 20 deletions(-)
>