Do not round down the first address to the page boundary, just translate
it normally, which gives the value we care about in the first place.
Given this, translating a single address is just the special case of
translating a range spanning a single page.
Make the output optional, so the function can be used to just check a
range.
Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
---
arch/s390/kvm/gaccess.c | 91 ++++++++++++++++++-----------------------
1 file changed, 39 insertions(+), 52 deletions(-)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index df83de0843de..e5a19d8b30e2 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -794,35 +794,45 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
return 1;
}
-static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
- unsigned long *pages, unsigned long nr_pages,
- const union asce asce, enum gacc_mode mode)
+/* Stores the gpas for each page in a real/virtual range into @gpas
+ * Modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu in the same
+ * way read_guest/write_guest do, the meaning of the return value is likewise
+ * the same.
+ * If @gpas is NULL only the checks are performed.
+ */
+static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ unsigned long *gpas, unsigned long len,
+ const union asce asce, enum gacc_mode mode)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned long gpa;
+ unsigned int seg;
+ unsigned int offset = offset_in_page(ga);
int lap_enabled, rc = 0;
enum prot_type prot;
lap_enabled = low_address_protection_enabled(vcpu, asce);
- while (nr_pages) {
+ while ((seg = min(PAGE_SIZE - offset, len)) != 0) {
ga = kvm_s390_logical_to_effective(vcpu, ga);
if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
PROT_TYPE_LA);
- ga &= PAGE_MASK;
if (psw_bits(*psw).dat) {
- rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
+ rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
if (rc < 0)
return rc;
} else {
- *pages = kvm_s390_real_to_abs(vcpu, ga);
- if (kvm_is_error_gpa(vcpu->kvm, *pages))
+ gpa = kvm_s390_real_to_abs(vcpu, ga);
+ if (kvm_is_error_gpa(vcpu->kvm, gpa))
rc = PGM_ADDRESSING;
}
if (rc)
return trans_exc(vcpu, rc, ga, ar, mode, prot);
- ga += PAGE_SIZE;
- pages++;
- nr_pages--;
+ if (gpas)
+ *gpas++ = gpa;
+ offset = 0;
+ ga += seg;
+ len -= seg;
}
return 0;
}
@@ -845,10 +855,10 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
unsigned long len, enum gacc_mode mode)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
- unsigned long nr_pages, gpa, idx;
+ unsigned long nr_pages, idx;
unsigned int seg;
- unsigned long pages_array[2];
- unsigned long *pages;
+ unsigned long gpa_array[2];
+ unsigned long *gpas;
int need_ipte_lock;
union asce asce;
int rc;
@@ -860,27 +870,25 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
if (rc)
return rc;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
- pages = pages_array;
- if (nr_pages > ARRAY_SIZE(pages_array))
- pages = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
- if (!pages)
+ gpas = gpa_array;
+ if (nr_pages > ARRAY_SIZE(gpa_array))
+ gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
+ if (!gpas)
return -ENOMEM;
need_ipte_lock = psw_bits(*psw).dat && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
- rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
+ rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode);
for (idx = 0; idx < nr_pages && !rc; idx++) {
- gpa = pages[idx] + offset_in_page(ga);
- seg = min(PAGE_SIZE - offset_in_page(gpa), len);
- rc = access_guest_frame(vcpu->kvm, mode, gpa, data, seg);
+ seg = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
+ rc = access_guest_frame(vcpu->kvm, mode, gpas[idx], data, seg);
len -= seg;
- ga += seg;
data += seg;
}
if (need_ipte_lock)
ipte_unlock(vcpu);
- if (nr_pages > ARRAY_SIZE(pages_array))
- vfree(pages);
+ if (nr_pages > ARRAY_SIZE(gpa_array))
+ vfree(gpas);
return rc;
}
@@ -914,8 +922,6 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
unsigned long *gpa, enum gacc_mode mode)
{
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
- enum prot_type prot;
union asce asce;
int rc;
@@ -923,23 +929,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
if (rc)
return rc;
- if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
- if (mode == GACC_STORE)
- return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
- mode, PROT_TYPE_LA);
- }
-
- if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */
- rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
- if (rc > 0)
- return trans_exc(vcpu, rc, gva, 0, mode, prot);
- } else {
- *gpa = kvm_s390_real_to_abs(vcpu, gva);
- if (kvm_is_error_gpa(vcpu->kvm, *gpa))
- return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
- }
-
- return rc;
+ return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode);
}
/**
@@ -948,17 +938,14 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
unsigned long length, enum gacc_mode mode)
{
- unsigned long gpa;
- unsigned long currlen;
+ union asce asce;
int rc = 0;
+ rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
+ if (rc)
+ return rc;
ipte_lock(vcpu);
- while (length > 0 && !rc) {
- currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
- rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
- gva += currlen;
- length -= currlen;
- }
+ rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode);
ipte_unlock(vcpu);
return rc;
--
2.25.1
On Mon, 16 Aug 2021 17:07:17 +0200
Janis Schoetterl-Glausch <[email protected]> wrote:
> Do not round down the first address to the page boundary, just translate
> it normally, which gives the value we care about in the first place.
> Given this, translating a single address is just the special case of
> translating a range spanning a single page.
>
> Make the output optional, so the function can be used to just check a
> range.
I like the idea, but see a few nits below
>
> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
> ---
> arch/s390/kvm/gaccess.c | 91 ++++++++++++++++++-----------------------
> 1 file changed, 39 insertions(+), 52 deletions(-)
>
> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> index df83de0843de..e5a19d8b30e2 100644
> --- a/arch/s390/kvm/gaccess.c
> +++ b/arch/s390/kvm/gaccess.c
> @@ -794,35 +794,45 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
> return 1;
> }
>
> -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
> - unsigned long *pages, unsigned long nr_pages,
> - const union asce asce, enum gacc_mode mode)
> +/* Stores the gpas for each page in a real/virtual range into @gpas
> + * Modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu in the same
> + * way read_guest/write_guest do, the meaning of the return value is likewise
this comment is a bit confusing; why telling us to look what a
different function is doing?
either don't mention this at all (since it's more or less the expected
behaviour), or explain in full what's going on
> + * the same.
> + * If @gpas is NULL only the checks are performed.
> + */
> +static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
> + unsigned long *gpas, unsigned long len,
> + const union asce asce, enum gacc_mode mode)
> {
> psw_t *psw = &vcpu->arch.sie_block->gpsw;
> + unsigned long gpa;
> + unsigned int seg;
> + unsigned int offset = offset_in_page(ga);
> int lap_enabled, rc = 0;
> enum prot_type prot;
>
> lap_enabled = low_address_protection_enabled(vcpu, asce);
> - while (nr_pages) {
> + while ((seg = min(PAGE_SIZE - offset, len)) != 0) {
I'm not terribly fond of assignments-as-values; moreover offset is used
only once.
why not something like:
seg = min(PAGE_SIZE - offset, len);
while (seg) {
...
seg = min(PAGE_SIZE, len);
}
or maybe even:
seg = min(PAGE_SIZE - offset, len);
for (; seg; seg = min(PAGE_SIZE, len)) {
(although the one with the while is perhaps more readable)
> ga = kvm_s390_logical_to_effective(vcpu, ga);
> if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
> return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
> PROT_TYPE_LA);
> - ga &= PAGE_MASK;
> if (psw_bits(*psw).dat) {
> - rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
> + rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
> if (rc < 0)
> return rc;
> } else {
> - *pages = kvm_s390_real_to_abs(vcpu, ga);
> - if (kvm_is_error_gpa(vcpu->kvm, *pages))
> + gpa = kvm_s390_real_to_abs(vcpu, ga);
> + if (kvm_is_error_gpa(vcpu->kvm, gpa))
> rc = PGM_ADDRESSING;
> }
> if (rc)
> return trans_exc(vcpu, rc, ga, ar, mode, prot);
> - ga += PAGE_SIZE;
> - pages++;
> - nr_pages--;
> + if (gpas)
> + *gpas++ = gpa;
> + offset = 0;
> + ga += seg;
> + len -= seg;
> }
> return 0;
> }
> @@ -845,10 +855,10 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
> unsigned long len, enum gacc_mode mode)
> {
> psw_t *psw = &vcpu->arch.sie_block->gpsw;
> - unsigned long nr_pages, gpa, idx;
> + unsigned long nr_pages, idx;
> unsigned int seg;
> - unsigned long pages_array[2];
> - unsigned long *pages;
> + unsigned long gpa_array[2];
> + unsigned long *gpas;
reverse Christmas tree?
also, since you're touching this: have you checked if a different size
for the array would bring any benefit?
2 seems a little too small, but I have no idea if anything bigger would
bring any advantages.
> int need_ipte_lock;
> union asce asce;
> int rc;
> @@ -860,27 +870,25 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
> if (rc)
> return rc;
> nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
> - pages = pages_array;
> - if (nr_pages > ARRAY_SIZE(pages_array))
> - pages = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
> - if (!pages)
> + gpas = gpa_array;
> + if (nr_pages > ARRAY_SIZE(gpa_array))
> + gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
> + if (!gpas)
> return -ENOMEM;
> need_ipte_lock = psw_bits(*psw).dat && !asce.r;
> if (need_ipte_lock)
> ipte_lock(vcpu);
> - rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
> + rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode);
> for (idx = 0; idx < nr_pages && !rc; idx++) {
> - gpa = pages[idx] + offset_in_page(ga);
> - seg = min(PAGE_SIZE - offset_in_page(gpa), len);
> - rc = access_guest_frame(vcpu->kvm, mode, gpa, data, seg);
> + seg = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
> + rc = access_guest_frame(vcpu->kvm, mode, gpas[idx], data, seg);
> len -= seg;
> - ga += seg;
> data += seg;
> }
> if (need_ipte_lock)
> ipte_unlock(vcpu);
> - if (nr_pages > ARRAY_SIZE(pages_array))
> - vfree(pages);
> + if (nr_pages > ARRAY_SIZE(gpa_array))
> + vfree(gpas);
> return rc;
> }
>
> @@ -914,8 +922,6 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
> int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> unsigned long *gpa, enum gacc_mode mode)
> {
> - psw_t *psw = &vcpu->arch.sie_block->gpsw;
> - enum prot_type prot;
> union asce asce;
> int rc;
>
> @@ -923,23 +929,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
> if (rc)
> return rc;
> - if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
> - if (mode == GACC_STORE)
> - return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
> - mode, PROT_TYPE_LA);
> - }
> -
> - if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */
> - rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
> - if (rc > 0)
> - return trans_exc(vcpu, rc, gva, 0, mode, prot);
> - } else {
> - *gpa = kvm_s390_real_to_abs(vcpu, gva);
> - if (kvm_is_error_gpa(vcpu->kvm, *gpa))
> - return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
> - }
> -
> - return rc;
> + return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode);
> }
>
> /**
> @@ -948,17 +938,14 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> unsigned long length, enum gacc_mode mode)
> {
> - unsigned long gpa;
> - unsigned long currlen;
> + union asce asce;
> int rc = 0;
>
> + rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
> + if (rc)
> + return rc;
> ipte_lock(vcpu);
> - while (length > 0 && !rc) {
> - currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
> - rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
> - gva += currlen;
> - length -= currlen;
> - }
> + rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode);
> ipte_unlock(vcpu);
>
> return rc;
On 8/18/21 12:08 PM, Claudio Imbrenda wrote:
> On Mon, 16 Aug 2021 17:07:17 +0200
> Janis Schoetterl-Glausch <[email protected]> wrote:
>
>> Do not round down the first address to the page boundary, just translate
>> it normally, which gives the value we care about in the first place.
>> Given this, translating a single address is just the special case of
>> translating a range spanning a single page.
>>
>> Make the output optional, so the function can be used to just check a
>> range.
>
> I like the idea, but see a few nits below
>
>>
>> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
>> ---
>> arch/s390/kvm/gaccess.c | 91 ++++++++++++++++++-----------------------
>> 1 file changed, 39 insertions(+), 52 deletions(-)
>>
>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
>> index df83de0843de..e5a19d8b30e2 100644
>> --- a/arch/s390/kvm/gaccess.c
>> +++ b/arch/s390/kvm/gaccess.c
>> @@ -794,35 +794,45 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
>> return 1;
>> }
>>
>> -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
>> - unsigned long *pages, unsigned long nr_pages,
>> - const union asce asce, enum gacc_mode mode)
>> +/* Stores the gpas for each page in a real/virtual range into @gpas
>> + * Modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu in the same
>> + * way read_guest/write_guest do, the meaning of the return value is likewise
>
> this comment is a bit confusing; why telling us to look what a
> different function is doing?
>
> either don't mention this at all (since it's more or less the expected
> behaviour), or explain in full what's going on
Yeah, it's not ideal. I haven't decided yet what I'll do.
I think a comment would be helpful, and it may be expected behavior only if one has
looked at the code for long enough :).
>
>> + * the same.
>> + * If @gpas is NULL only the checks are performed.
>> + */
>> +static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
>> + unsigned long *gpas, unsigned long len,
>> + const union asce asce, enum gacc_mode mode)
>> {
>> psw_t *psw = &vcpu->arch.sie_block->gpsw;
>> + unsigned long gpa;
>> + unsigned int seg;
>> + unsigned int offset = offset_in_page(ga);
>> int lap_enabled, rc = 0;
>> enum prot_type prot;
>>
>> lap_enabled = low_address_protection_enabled(vcpu, asce);
>> - while (nr_pages) {
>> + while ((seg = min(PAGE_SIZE - offset, len)) != 0) {
>
> I'm not terribly fond of assignments-as-values; moreover offset is used
> only once.
>
> why not something like:
>
> seg = min(PAGE_SIZE - offset, len);
> while (seg) {
>
> ...
>
> seg = min(PAGE_SIZE, len);
> }
>
> or maybe even:
>
> seg = min(PAGE_SIZE - offset, len);
> for (; seg; seg = min(PAGE_SIZE, len)) {
>
> (although the one with the while is perhaps more readable)
That code pattern is not entirely uncommon, but I'll change it to:
while(min(PAGE_SIZE - offset, len) > 0) {
seg = min(PAGE_SIZE - offset, len);
...
}
which I think reads better than having the assignment at the end.
I assume the compiler gets rid of the redundancy.
>
[...]
>> @@ -845,10 +855,10 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
>> unsigned long len, enum gacc_mode mode)
>> {
>> psw_t *psw = &vcpu->arch.sie_block->gpsw;
>> - unsigned long nr_pages, gpa, idx;
>> + unsigned long nr_pages, idx;
>> unsigned int seg;
>> - unsigned long pages_array[2];
>> - unsigned long *pages;
>> + unsigned long gpa_array[2];
>> + unsigned long *gpas;
>
> reverse Christmas tree?
>
> also, since you're touching this: have you checked if a different size
> for the array would bring any benefit?
> 2 seems a little too small, but I have no idea if anything bigger would
> bring any advantages.
I have not checked it, no. When emulating instructions, you would only need >2
entries if an operand is >8k or >4k and weirdly aligned, hardly seems like a common occurrence.
On the other hand, bumping it up should not have any negative consequences.
I'll leave it as is.
[...]