2021-08-16 15:10:16

by Janis Schoetterl-Glausch

[permalink] [raw]
Subject: [PATCH 2/2] KVM: s390: gaccess: Refactor access address range check

Do not round down the first address to the page boundary, just translate
it normally, which gives the value we care about in the first place.
Given this, translating a single address is just the special case of
translating a range spanning a single page.

Make the output optional, so the function can be used to just check a
range.

Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
---
arch/s390/kvm/gaccess.c | 91 ++++++++++++++++++-----------------------
1 file changed, 39 insertions(+), 52 deletions(-)

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index df83de0843de..e5a19d8b30e2 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -794,35 +794,45 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
return 1;
}

-static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
- unsigned long *pages, unsigned long nr_pages,
- const union asce asce, enum gacc_mode mode)
+/* Stores the gpas for each page in a real/virtual range into @gpas
+ * Modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu in the same
+ * way read_guest/write_guest do, the meaning of the return value is likewise
+ * the same.
+ * If @gpas is NULL only the checks are performed.
+ */
+static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ unsigned long *gpas, unsigned long len,
+ const union asce asce, enum gacc_mode mode)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned long gpa;
+ unsigned int seg;
+ unsigned int offset = offset_in_page(ga);
int lap_enabled, rc = 0;
enum prot_type prot;

lap_enabled = low_address_protection_enabled(vcpu, asce);
- while (nr_pages) {
+ while ((seg = min(PAGE_SIZE - offset, len)) != 0) {
ga = kvm_s390_logical_to_effective(vcpu, ga);
if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
PROT_TYPE_LA);
- ga &= PAGE_MASK;
if (psw_bits(*psw).dat) {
- rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
+ rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
if (rc < 0)
return rc;
} else {
- *pages = kvm_s390_real_to_abs(vcpu, ga);
- if (kvm_is_error_gpa(vcpu->kvm, *pages))
+ gpa = kvm_s390_real_to_abs(vcpu, ga);
+ if (kvm_is_error_gpa(vcpu->kvm, gpa))
rc = PGM_ADDRESSING;
}
if (rc)
return trans_exc(vcpu, rc, ga, ar, mode, prot);
- ga += PAGE_SIZE;
- pages++;
- nr_pages--;
+ if (gpas)
+ *gpas++ = gpa;
+ offset = 0;
+ ga += seg;
+ len -= seg;
}
return 0;
}
@@ -845,10 +855,10 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
unsigned long len, enum gacc_mode mode)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
- unsigned long nr_pages, gpa, idx;
+ unsigned long nr_pages, idx;
unsigned int seg;
- unsigned long pages_array[2];
- unsigned long *pages;
+ unsigned long gpa_array[2];
+ unsigned long *gpas;
int need_ipte_lock;
union asce asce;
int rc;
@@ -860,27 +870,25 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
if (rc)
return rc;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
- pages = pages_array;
- if (nr_pages > ARRAY_SIZE(pages_array))
- pages = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
- if (!pages)
+ gpas = gpa_array;
+ if (nr_pages > ARRAY_SIZE(gpa_array))
+ gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
+ if (!gpas)
return -ENOMEM;
need_ipte_lock = psw_bits(*psw).dat && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
- rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
+ rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode);
for (idx = 0; idx < nr_pages && !rc; idx++) {
- gpa = pages[idx] + offset_in_page(ga);
- seg = min(PAGE_SIZE - offset_in_page(gpa), len);
- rc = access_guest_frame(vcpu->kvm, mode, gpa, data, seg);
+ seg = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
+ rc = access_guest_frame(vcpu->kvm, mode, gpas[idx], data, seg);
len -= seg;
- ga += seg;
data += seg;
}
if (need_ipte_lock)
ipte_unlock(vcpu);
- if (nr_pages > ARRAY_SIZE(pages_array))
- vfree(pages);
+ if (nr_pages > ARRAY_SIZE(gpa_array))
+ vfree(gpas);
return rc;
}

@@ -914,8 +922,6 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
unsigned long *gpa, enum gacc_mode mode)
{
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
- enum prot_type prot;
union asce asce;
int rc;

@@ -923,23 +929,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
if (rc)
return rc;
- if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
- if (mode == GACC_STORE)
- return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
- mode, PROT_TYPE_LA);
- }
-
- if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */
- rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
- if (rc > 0)
- return trans_exc(vcpu, rc, gva, 0, mode, prot);
- } else {
- *gpa = kvm_s390_real_to_abs(vcpu, gva);
- if (kvm_is_error_gpa(vcpu->kvm, *gpa))
- return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
- }
-
- return rc;
+ return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode);
}

/**
@@ -948,17 +938,14 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
unsigned long length, enum gacc_mode mode)
{
- unsigned long gpa;
- unsigned long currlen;
+ union asce asce;
int rc = 0;

+ rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
+ if (rc)
+ return rc;
ipte_lock(vcpu);
- while (length > 0 && !rc) {
- currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
- rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
- gva += currlen;
- length -= currlen;
- }
+ rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode);
ipte_unlock(vcpu);

return rc;
--
2.25.1


2021-08-18 10:10:03

by Claudio Imbrenda

[permalink] [raw]
Subject: Re: [PATCH 2/2] KVM: s390: gaccess: Refactor access address range check

On Mon, 16 Aug 2021 17:07:17 +0200
Janis Schoetterl-Glausch <[email protected]> wrote:

> Do not round down the first address to the page boundary, just translate
> it normally, which gives the value we care about in the first place.
> Given this, translating a single address is just the special case of
> translating a range spanning a single page.
>
> Make the output optional, so the function can be used to just check a
> range.

I like the idea, but see a few nits below

>
> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
> ---
> arch/s390/kvm/gaccess.c | 91 ++++++++++++++++++-----------------------
> 1 file changed, 39 insertions(+), 52 deletions(-)
>
> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> index df83de0843de..e5a19d8b30e2 100644
> --- a/arch/s390/kvm/gaccess.c
> +++ b/arch/s390/kvm/gaccess.c
> @@ -794,35 +794,45 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
> return 1;
> }
>
> -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
> - unsigned long *pages, unsigned long nr_pages,
> - const union asce asce, enum gacc_mode mode)
> +/* Stores the gpas for each page in a real/virtual range into @gpas
> + * Modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu in the same
> + * way read_guest/write_guest do, the meaning of the return value is likewise

this comment is a bit confusing; why telling us to look what a
different function is doing?

either don't mention this at all (since it's more or less the expected
behaviour), or explain in full what's going on

> + * the same.
> + * If @gpas is NULL only the checks are performed.
> + */
> +static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
> + unsigned long *gpas, unsigned long len,
> + const union asce asce, enum gacc_mode mode)
> {
> psw_t *psw = &vcpu->arch.sie_block->gpsw;
> + unsigned long gpa;
> + unsigned int seg;
> + unsigned int offset = offset_in_page(ga);
> int lap_enabled, rc = 0;
> enum prot_type prot;
>
> lap_enabled = low_address_protection_enabled(vcpu, asce);
> - while (nr_pages) {
> + while ((seg = min(PAGE_SIZE - offset, len)) != 0) {

I'm not terribly fond of assignments-as-values; moreover offset is used
only once.

why not something like:

seg = min(PAGE_SIZE - offset, len);
while (seg) {

...

seg = min(PAGE_SIZE, len);
}

or maybe even:

seg = min(PAGE_SIZE - offset, len);
for (; seg; seg = min(PAGE_SIZE, len)) {

(although the one with the while is perhaps more readable)

> ga = kvm_s390_logical_to_effective(vcpu, ga);
> if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
> return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
> PROT_TYPE_LA);
> - ga &= PAGE_MASK;
> if (psw_bits(*psw).dat) {
> - rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
> + rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
> if (rc < 0)
> return rc;
> } else {
> - *pages = kvm_s390_real_to_abs(vcpu, ga);
> - if (kvm_is_error_gpa(vcpu->kvm, *pages))
> + gpa = kvm_s390_real_to_abs(vcpu, ga);
> + if (kvm_is_error_gpa(vcpu->kvm, gpa))
> rc = PGM_ADDRESSING;
> }
> if (rc)
> return trans_exc(vcpu, rc, ga, ar, mode, prot);
> - ga += PAGE_SIZE;
> - pages++;
> - nr_pages--;
> + if (gpas)
> + *gpas++ = gpa;
> + offset = 0;
> + ga += seg;
> + len -= seg;
> }
> return 0;
> }
> @@ -845,10 +855,10 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
> unsigned long len, enum gacc_mode mode)
> {
> psw_t *psw = &vcpu->arch.sie_block->gpsw;
> - unsigned long nr_pages, gpa, idx;
> + unsigned long nr_pages, idx;
> unsigned int seg;
> - unsigned long pages_array[2];
> - unsigned long *pages;
> + unsigned long gpa_array[2];
> + unsigned long *gpas;

reverse Christmas tree?

also, since you're touching this: have you checked if a different size
for the array would bring any benefit?
2 seems a little too small, but I have no idea if anything bigger would
bring any advantages.

> int need_ipte_lock;
> union asce asce;
> int rc;
> @@ -860,27 +870,25 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
> if (rc)
> return rc;
> nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
> - pages = pages_array;
> - if (nr_pages > ARRAY_SIZE(pages_array))
> - pages = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
> - if (!pages)
> + gpas = gpa_array;
> + if (nr_pages > ARRAY_SIZE(gpa_array))
> + gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
> + if (!gpas)
> return -ENOMEM;
> need_ipte_lock = psw_bits(*psw).dat && !asce.r;
> if (need_ipte_lock)
> ipte_lock(vcpu);
> - rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
> + rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode);
> for (idx = 0; idx < nr_pages && !rc; idx++) {
> - gpa = pages[idx] + offset_in_page(ga);
> - seg = min(PAGE_SIZE - offset_in_page(gpa), len);
> - rc = access_guest_frame(vcpu->kvm, mode, gpa, data, seg);
> + seg = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
> + rc = access_guest_frame(vcpu->kvm, mode, gpas[idx], data, seg);
> len -= seg;
> - ga += seg;
> data += seg;
> }
> if (need_ipte_lock)
> ipte_unlock(vcpu);
> - if (nr_pages > ARRAY_SIZE(pages_array))
> - vfree(pages);
> + if (nr_pages > ARRAY_SIZE(gpa_array))
> + vfree(gpas);
> return rc;
> }
>
> @@ -914,8 +922,6 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
> int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> unsigned long *gpa, enum gacc_mode mode)
> {
> - psw_t *psw = &vcpu->arch.sie_block->gpsw;
> - enum prot_type prot;
> union asce asce;
> int rc;
>
> @@ -923,23 +929,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
> if (rc)
> return rc;
> - if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
> - if (mode == GACC_STORE)
> - return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
> - mode, PROT_TYPE_LA);
> - }
> -
> - if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */
> - rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
> - if (rc > 0)
> - return trans_exc(vcpu, rc, gva, 0, mode, prot);
> - } else {
> - *gpa = kvm_s390_real_to_abs(vcpu, gva);
> - if (kvm_is_error_gpa(vcpu->kvm, *gpa))
> - return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
> - }
> -
> - return rc;
> + return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode);
> }
>
> /**
> @@ -948,17 +938,14 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> unsigned long length, enum gacc_mode mode)
> {
> - unsigned long gpa;
> - unsigned long currlen;
> + union asce asce;
> int rc = 0;
>
> + rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
> + if (rc)
> + return rc;
> ipte_lock(vcpu);
> - while (length > 0 && !rc) {
> - currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
> - rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
> - gva += currlen;
> - length -= currlen;
> - }
> + rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode);
> ipte_unlock(vcpu);
>
> return rc;

2021-08-19 12:40:52

by Janis Schoetterl-Glausch

[permalink] [raw]
Subject: Re: [PATCH 2/2] KVM: s390: gaccess: Refactor access address range check

On 8/18/21 12:08 PM, Claudio Imbrenda wrote:
> On Mon, 16 Aug 2021 17:07:17 +0200
> Janis Schoetterl-Glausch <[email protected]> wrote:
>
>> Do not round down the first address to the page boundary, just translate
>> it normally, which gives the value we care about in the first place.
>> Given this, translating a single address is just the special case of
>> translating a range spanning a single page.
>>
>> Make the output optional, so the function can be used to just check a
>> range.
>
> I like the idea, but see a few nits below
>
>>
>> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
>> ---
>> arch/s390/kvm/gaccess.c | 91 ++++++++++++++++++-----------------------
>> 1 file changed, 39 insertions(+), 52 deletions(-)
>>
>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
>> index df83de0843de..e5a19d8b30e2 100644
>> --- a/arch/s390/kvm/gaccess.c
>> +++ b/arch/s390/kvm/gaccess.c
>> @@ -794,35 +794,45 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
>> return 1;
>> }
>>
>> -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
>> - unsigned long *pages, unsigned long nr_pages,
>> - const union asce asce, enum gacc_mode mode)
>> +/* Stores the gpas for each page in a real/virtual range into @gpas
>> + * Modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu in the same
>> + * way read_guest/write_guest do, the meaning of the return value is likewise
>
> this comment is a bit confusing; why telling us to look what a
> different function is doing?
>
> either don't mention this at all (since it's more or less the expected
> behaviour), or explain in full what's going on

Yeah, it's not ideal. I haven't decided yet what I'll do.
I think a comment would be helpful, and it may be expected behavior only if one has
looked at the code for long enough :).
>
>> + * the same.
>> + * If @gpas is NULL only the checks are performed.
>> + */
>> +static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
>> + unsigned long *gpas, unsigned long len,
>> + const union asce asce, enum gacc_mode mode)
>> {
>> psw_t *psw = &vcpu->arch.sie_block->gpsw;
>> + unsigned long gpa;
>> + unsigned int seg;
>> + unsigned int offset = offset_in_page(ga);
>> int lap_enabled, rc = 0;
>> enum prot_type prot;
>>
>> lap_enabled = low_address_protection_enabled(vcpu, asce);
>> - while (nr_pages) {
>> + while ((seg = min(PAGE_SIZE - offset, len)) != 0) {
>
> I'm not terribly fond of assignments-as-values; moreover offset is used
> only once.
>
> why not something like:
>
> seg = min(PAGE_SIZE - offset, len);
> while (seg) {
>
> ...
>
> seg = min(PAGE_SIZE, len);
> }
>
> or maybe even:
>
> seg = min(PAGE_SIZE - offset, len);
> for (; seg; seg = min(PAGE_SIZE, len)) {
>
> (although the one with the while is perhaps more readable)

That code pattern is not entirely uncommon, but I'll change it to:

while(min(PAGE_SIZE - offset, len) > 0) {
seg = min(PAGE_SIZE - offset, len);
...
}

which I think reads better than having the assignment at the end.
I assume the compiler gets rid of the redundancy.
>
[...]

>> @@ -845,10 +855,10 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
>> unsigned long len, enum gacc_mode mode)
>> {
>> psw_t *psw = &vcpu->arch.sie_block->gpsw;
>> - unsigned long nr_pages, gpa, idx;
>> + unsigned long nr_pages, idx;
>> unsigned int seg;
>> - unsigned long pages_array[2];
>> - unsigned long *pages;
>> + unsigned long gpa_array[2];
>> + unsigned long *gpas;
>
> reverse Christmas tree?
>
> also, since you're touching this: have you checked if a different size
> for the array would bring any benefit?
> 2 seems a little too small, but I have no idea if anything bigger would
> bring any advantages.

I have not checked it, no. When emulating instructions, you would only need >2
entries if an operand is >8k or >4k and weirdly aligned, hardly seems like a common occurrence.
On the other hand, bumping it up should not have any negative consequences.
I'll leave it as is.

[...]