Check keys when emulating instructions and let user space do key checked
accesses.
User space can do so via an extension of the MEMOP IOCTL:
* allow optional key checking
* allow MEMOP on vm fd, so key checked accesses on absolute memory
become possible
TODO:
* Documentation changes for MEMOP
* Consider redesign of capability for MEMOP
Janis Schoetterl-Glausch (10):
s390/uaccess: Add storage key checked access to user memory
KVM: s390: Honor storage keys when accessing guest memory
KVM: s390: handle_tprot: Honor storage keys
KVM: s390: selftests: Test TEST PROTECTION emulation
KVM: s390: Add optional storage key checking to MEMOP IOCTL
KVM: s390: Add vm IOCTL for key checked guest absolute memory access
KVM: s390: Rename existing vcpu memop functions
KVM: s390: selftests: Test memops with storage keys
KVM: s390: Add capability for storage key extension of MEM_OP IOCTL
KVM: s390: selftests: Make use of capability in MEM_OP test
arch/s390/include/asm/ctl_reg.h | 2 +
arch/s390/include/asm/page.h | 2 +
arch/s390/include/asm/uaccess.h | 32 ++
arch/s390/kvm/gaccess.c | 237 ++++++++-
arch/s390/kvm/gaccess.h | 85 +++-
arch/s390/kvm/intercept.c | 12 +-
arch/s390/kvm/kvm-s390.c | 122 ++++-
arch/s390/kvm/priv.c | 66 +--
arch/s390/lib/uaccess.c | 57 ++-
include/uapi/linux/kvm.h | 4 +
tools/testing/selftests/kvm/.gitignore | 1 +
tools/testing/selftests/kvm/Makefile | 1 +
tools/testing/selftests/kvm/s390x/memop.c | 560 +++++++++++++++++++---
tools/testing/selftests/kvm/s390x/tprot.c | 184 +++++++
14 files changed, 1207 insertions(+), 158 deletions(-)
create mode 100644 tools/testing/selftests/kvm/s390x/tprot.c
base-commit: bad13799e0305deb258372b7298a86be4c78aaba
prerequisite-patch-id: 5f8ae41bde2fa5717a775e17c08239ed1ddbcc83
--
2.32.0
KVM needs a mechanism to do accesses to guest memory that honor
storage key protection.
Since the copy_to/from_user implementation makes use of move
instructions that support having an additional access key supplied,
we can implement __copy_from/to_user_with_key by enhancing the
existing implementation.
Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
---
arch/s390/include/asm/uaccess.h | 32 ++++++++++++++++++
arch/s390/lib/uaccess.c | 57 +++++++++++++++++++++++----------
2 files changed, 72 insertions(+), 17 deletions(-)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 02b467461163..5138040348cc 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -33,6 +33,38 @@ static inline int __range_ok(unsigned long addr, unsigned long size)
#define access_ok(addr, size) __access_ok(addr, size)
+unsigned long __must_check
+raw_copy_from_user_with_key(void *to, const void __user *from, unsigned long n,
+ char key);
+
+unsigned long __must_check
+raw_copy_to_user_with_key(void __user *to, const void *from, unsigned long n,
+ char key);
+
+static __always_inline __must_check unsigned long
+__copy_from_user_with_key(void *to, const void __user *from, unsigned long n,
+ char key)
+{
+ might_fault();
+ if (should_fail_usercopy())
+ return n;
+ instrument_copy_from_user(to, from, n);
+ check_object_size(to, n, false);
+ return raw_copy_from_user_with_key(to, from, n, key);
+}
+
+static __always_inline __must_check unsigned long
+__copy_to_user_with_key(void __user *to, const void *from, unsigned long n,
+ char key)
+{
+ might_fault();
+ if (should_fail_usercopy())
+ return n;
+ instrument_copy_to_user(to, from, n);
+ check_object_size(from, n, true);
+ return raw_copy_to_user_with_key(to, from, n, key);
+}
+
unsigned long __must_check
raw_copy_from_user(void *to, const void __user *from, unsigned long n);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index d3a700385875..ce7a150dd93a 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -59,11 +59,13 @@ static inline int copy_with_mvcos(void)
#endif
static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
- unsigned long size)
+ unsigned long size, char key)
{
unsigned long tmp1, tmp2;
union oac spec = {
+ .oac2.key = key,
.oac2.as = PSW_BITS_AS_SECONDARY,
+ .oac2.k = 1,
.oac2.a = 1,
};
@@ -94,19 +96,19 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
}
static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
- unsigned long size)
+ unsigned long size, char key)
{
unsigned long tmp1, tmp2;
tmp1 = -256UL;
asm volatile(
" sacf 0\n"
- "0: mvcp 0(%0,%2),0(%1),%3\n"
+ "0: mvcp 0(%0,%2),0(%1),%[key]\n"
"7: jz 5f\n"
"1: algr %0,%3\n"
" la %1,256(%1)\n"
" la %2,256(%2)\n"
- "2: mvcp 0(%0,%2),0(%1),%3\n"
+ "2: mvcp 0(%0,%2),0(%1),%[key]\n"
"8: jnz 1b\n"
" j 5f\n"
"3: la %4,255(%1)\n" /* %4 = ptr + 255 */
@@ -115,7 +117,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
" slgr %4,%1\n"
" clgr %0,%4\n" /* copy crosses next page boundary? */
" jnh 6f\n"
- "4: mvcp 0(%4,%2),0(%1),%3\n"
+ "4: mvcp 0(%4,%2),0(%1),%[key]\n"
"9: slgr %0,%4\n"
" j 6f\n"
"5: slgr %0,%0\n"
@@ -123,24 +125,36 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : : "cc", "memory");
+ : [key] "d" (key << 4)
+ : "cc", "memory");
return size;
}
unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
if (copy_with_mvcos())
- return copy_from_user_mvcos(to, from, n);
- return copy_from_user_mvcp(to, from, n);
+ return copy_from_user_mvcos(to, from, n, 0);
+ return copy_from_user_mvcp(to, from, n, 0);
}
EXPORT_SYMBOL(raw_copy_from_user);
-static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
- unsigned long size)
+unsigned long raw_copy_from_user_with_key(void *to, const void __user *from,
+ unsigned long n, char key)
+{
+ if (copy_with_mvcos())
+ return copy_from_user_mvcos(to, from, n, key);
+ return copy_from_user_mvcp(to, from, n, key);
+}
+EXPORT_SYMBOL(raw_copy_from_user_with_key);
+
+inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
+ unsigned long size, char key)
{
unsigned long tmp1, tmp2;
union oac spec = {
+ .oac1.key = key,
.oac1.as = PSW_BITS_AS_SECONDARY,
+ .oac1.k = 1,
.oac1.a = 1,
};
@@ -171,19 +185,19 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
}
static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
- unsigned long size)
+ unsigned long size, char key)
{
unsigned long tmp1, tmp2;
tmp1 = -256UL;
asm volatile(
" sacf 0\n"
- "0: mvcs 0(%0,%1),0(%2),%3\n"
+ "0: mvcs 0(%0,%1),0(%2),%[key]\n"
"7: jz 5f\n"
"1: algr %0,%3\n"
" la %1,256(%1)\n"
" la %2,256(%2)\n"
- "2: mvcs 0(%0,%1),0(%2),%3\n"
+ "2: mvcs 0(%0,%1),0(%2),%[key]\n"
"8: jnz 1b\n"
" j 5f\n"
"3: la %4,255(%1)\n" /* %4 = ptr + 255 */
@@ -192,7 +206,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
" slgr %4,%1\n"
" clgr %0,%4\n" /* copy crosses next page boundary? */
" jnh 6f\n"
- "4: mvcs 0(%4,%1),0(%2),%3\n"
+ "4: mvcs 0(%4,%1),0(%2),%[key]\n"
"9: slgr %0,%4\n"
" j 6f\n"
"5: slgr %0,%0\n"
@@ -200,17 +214,26 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : : "cc", "memory");
+ : [key] "d" (key << 4)
+ : "cc", "memory");
return size;
}
unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
{
if (copy_with_mvcos())
- return copy_to_user_mvcos(to, from, n);
- return copy_to_user_mvcs(to, from, n);
+ return copy_to_user_mvcos(to, from, n, 0);
+ return copy_to_user_mvcs(to, from, n, 0);
}
EXPORT_SYMBOL(raw_copy_to_user);
+unsigned long raw_copy_to_user_with_key(void __user *to, const void *from,
+ unsigned long n, char key)
+{
+ if (copy_with_mvcos())
+ return copy_to_user_mvcos(to, from, n, key);
+ return copy_to_user_mvcs(to, from, n, key);
+}
+EXPORT_SYMBOL(raw_copy_to_user_with_key);
static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
{
--
2.32.0
Use the access key operand to check for key protection when
translating guest addresses.
Since the translation code checks for accessing exceptions/error hvas,
we can remove the check here and simplify the control flow.
Keep checking if the memory is read-only even if such memslots are
currently not supported.
handle_tprot was the last user of guest_translate_address,
so remove it.
Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
Reviewed-by: Janosch Frank <[email protected]>
Reviewed-by: Claudio Imbrenda <[email protected]>
---
arch/s390/kvm/gaccess.c | 9 ------
arch/s390/kvm/gaccess.h | 3 --
arch/s390/kvm/priv.c | 66 ++++++++++++++++++++++-------------------
3 files changed, 35 insertions(+), 43 deletions(-)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 92ab96d55504..efe33cda38b6 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1104,15 +1104,6 @@ int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u
access_key);
}
-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
- unsigned long *gpa, enum gacc_mode mode)
-{
- char access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
-
- return guest_translate_address_with_key(vcpu, gva, ar, gpa, mode,
- access_key);
-}
-
/**
* check_gva_range - test a range of guest virtual addresses for accessibility
* @vcpu: virtual cpu
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 3df432702cd6..0d4416178bb6 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -190,9 +190,6 @@ int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u
unsigned long *gpa, enum gacc_mode mode,
char access_key);
-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
- u8 ar, unsigned long *gpa, enum gacc_mode mode);
-
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
unsigned long length, enum gacc_mode mode,
char access_key);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 417154b314a6..7c68f893545c 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1443,10 +1443,11 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
static int handle_tprot(struct kvm_vcpu *vcpu)
{
- u64 address1, address2;
- unsigned long hva, gpa;
- int ret = 0, cc = 0;
+ u64 address, operand2;
+ unsigned long gpa;
+ char access_key;
bool writable;
+ int ret, cc;
u8 ar;
vcpu->stat.instruction_tprot++;
@@ -1454,43 +1455,46 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL);
+ kvm_s390_get_base_disp_sse(vcpu, &address, &operand2, &ar, NULL);
+ access_key = (operand2 & 0xf0) >> 4;
- /* we only handle the Linux memory detection case:
- * access key == 0
- * everything else goes to userspace. */
- if (address2 & 0xf0)
- return -EOPNOTSUPP;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
ipte_lock(vcpu);
- ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE);
- if (ret == PGM_PROTECTION) {
+
+ ret = guest_translate_address_with_key(vcpu, address, ar, &gpa,
+ GACC_STORE, access_key);
+ if (ret == 0) {
+ gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable);
+ } else if (ret == PGM_PROTECTION) {
+ writable = false;
/* Write protected? Try again with read-only... */
- cc = 1;
- ret = guest_translate_address(vcpu, address1, ar, &gpa,
- GACC_FETCH);
+ ret = guest_translate_address_with_key(vcpu, address, ar, &gpa,
+ GACC_FETCH, access_key);
}
- if (ret) {
- if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
- ret = kvm_s390_inject_program_int(vcpu, ret);
- } else if (ret > 0) {
- /* Translation not available */
- kvm_s390_set_psw_cc(vcpu, 3);
+ if (ret >= 0) {
+ cc = -1;
+
+ /* Fetching permitted; storing permitted */
+ if (ret == 0 && writable)
+ cc = 0;
+ /* Fetching permitted; storing not permitted */
+ else if (ret == 0 && !writable)
+ cc = 1;
+ /* Fetching not permitted; storing not permitted */
+ else if (ret == PGM_PROTECTION)
+ cc = 2;
+ /* Translation not available */
+ else if (ret != PGM_ADDRESSING && ret != PGM_TRANSLATION_SPEC)
+ cc = 3;
+
+ if (cc != -1) {
+ kvm_s390_set_psw_cc(vcpu, cc);
ret = 0;
+ } else {
+ ret = kvm_s390_inject_program_int(vcpu, ret);
}
- goto out_unlock;
}
- hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable);
- if (kvm_is_error_hva(hva)) {
- ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- } else {
- if (!writable)
- cc = 1; /* Write not permitted ==> read-only */
- kvm_s390_set_psw_cc(vcpu, cc);
- /* Note: CC2 only occurs for storage keys (not supported yet) */
- }
-out_unlock:
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
ipte_unlock(vcpu);
return ret;
--
2.32.0
User space needs a mechanism to perform key checked accesses when
emulating instructions.
The key can be passed as an additional argument via the flags field.
As reserved flags need to be 0, and key 0 matches all storage keys,
by default no key checking is performed, as before.
Having an additional argument is flexible, as user space can
pass the guest PSW's key, in order to make an access the same way the
CPU would, or pass another key if necessary.
Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
Acked-by: Janosch Frank <[email protected]>
Reviewed-by: Claudio Imbrenda <[email protected]>
---
arch/s390/kvm/kvm-s390.c | 21 ++++++++++++++-------
include/uapi/linux/kvm.h | 1 +
2 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 38b304e81c57..c4acdb025ff1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -32,6 +32,7 @@
#include <linux/sched/signal.h>
#include <linux/string.h>
#include <linux/pgtable.h>
+#include <linux/bitfield.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
@@ -4727,9 +4728,11 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
{
void __user *uaddr = (void __user *)mop->buf;
void *tmpbuf = NULL;
+ char access_key = 0;
int r = 0;
const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
- | KVM_S390_MEMOP_F_CHECK_ONLY;
+ | KVM_S390_MEMOP_F_CHECK_ONLY
+ | KVM_S390_MEMOP_F_SKEYS_ACC;
if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
return -EINVAL;
@@ -4746,14 +4749,17 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
return -ENOMEM;
}
+ access_key = FIELD_GET(KVM_S390_MEMOP_F_SKEYS_ACC, mop->flags);
+
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar,
- mop->size, GACC_FETCH, 0);
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
+ GACC_FETCH, access_key);
break;
}
- r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+ r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
+ mop->size, access_key);
if (r == 0) {
if (copy_to_user(uaddr, tmpbuf, mop->size))
r = -EFAULT;
@@ -4761,15 +4767,16 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
break;
case KVM_S390_MEMOP_LOGICAL_WRITE:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar,
- mop->size, GACC_STORE, 0);
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
+ GACC_STORE, access_key);
break;
}
if (copy_from_user(tmpbuf, uaddr, mop->size)) {
r = -EFAULT;
break;
}
- r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+ r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
+ mop->size, access_key);
break;
}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 1daa45268de2..e3f450b2f346 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -575,6 +575,7 @@ struct kvm_s390_mem_op {
/* flags for kvm_s390_mem_op->flags */
#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
+#define KVM_S390_MEMOP_F_SKEYS_ACC 0x0f00ULL
/* for KVM_INTERRUPT */
struct kvm_interrupt {
--
2.32.0
Storage key checking had not been implemented for instructions emulated
by KVM. Implement it by enhancing the functions used for guest access,
in particular those making use of access_guest which has been renamed
to access_guest_with_key.
Accesses via access_guest_real should not be key checked.
For actual accesses, key checking is done by __copy_from/to_user_with_key
(which internally uses MVCOS/MVCP/MVCS).
In cases where accessibility is checked without an actual access,
this is performed by getting the storage key and checking
if the access key matches.
In both cases, if applicable, storage and fetch protection override
are honored.
Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
---
arch/s390/include/asm/ctl_reg.h | 2 +
arch/s390/include/asm/page.h | 2 +
arch/s390/kvm/gaccess.c | 174 +++++++++++++++++++++++++++++---
arch/s390/kvm/gaccess.h | 78 ++++++++++++--
arch/s390/kvm/intercept.c | 12 +--
arch/s390/kvm/kvm-s390.c | 4 +-
6 files changed, 241 insertions(+), 31 deletions(-)
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 04dc65f8901d..c800199a376b 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -12,6 +12,8 @@
#define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10)
#define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35)
+#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38)
+#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39)
#define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49)
#define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50)
#define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52)
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index d98d17a36c7b..cfc4d6fb2385 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -20,6 +20,8 @@
#define PAGE_SIZE _PAGE_SIZE
#define PAGE_MASK _PAGE_MASK
#define PAGE_DEFAULT_ACC 0
+/* storage-protection override */
+#define PAGE_SPO_ACC 9
#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
#define HPAGE_SHIFT 20
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4460808c3b9a..92ab96d55504 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -10,6 +10,7 @@
#include <linux/mm_types.h>
#include <linux/err.h>
#include <linux/pgtable.h>
+#include <linux/bitfield.h>
#include <asm/gmap.h>
#include "kvm-s390.h"
@@ -794,6 +795,79 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
return 1;
}
+static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
+ union asce asce)
+{
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned long override;
+
+ if (mode == GACC_FETCH || mode == GACC_IFETCH) {
+ /* check if fetch protection override enabled */
+ override = vcpu->arch.sie_block->gcr[0];
+ override &= CR0_FETCH_PROTECTION_OVERRIDE;
+ /* not applicable if subject to DAT && private space */
+ override = override && !(psw_bits(*psw).dat && asce.p);
+ return override;
+ }
+ return false;
+}
+
+static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
+{
+ return ga < 2048 && ga + len <= 2048;
+}
+
+static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
+{
+ /* check if storage protection override enabled */
+ return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
+}
+
+static bool storage_prot_override_applies(char access_control)
+{
+ /* matches special storage protection override key (9) -> allow */
+ return access_control == PAGE_SPO_ACC;
+}
+
+static int vcpu_check_access_key(struct kvm_vcpu *vcpu, char access_key,
+ enum gacc_mode mode, union asce asce, gpa_t gpa,
+ unsigned long ga, unsigned int len)
+{
+ unsigned char storage_key, access_control;
+ unsigned long hva;
+ int r;
+
+ /* access key 0 matches any storage key -> allow */
+ if (access_key == 0)
+ return 0;
+ /*
+ * caller needs to ensure that gfn is accessible, so we can
+ * assume that this cannot fail
+ */
+ hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
+ mmap_read_lock(current->mm);
+ r = get_guest_storage_key(current->mm, hva, &storage_key);
+ mmap_read_unlock(current->mm);
+ if (r)
+ return r;
+ access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
+ /* access key matches storage key -> allow */
+ if (access_control == access_key)
+ return 0;
+ if (mode == GACC_FETCH || mode == GACC_IFETCH) {
+ /* mismatching keys, no fetch protection -> allowed */
+ if (!(storage_key & _PAGE_FP_BIT))
+ return 0;
+ if (fetch_prot_override_applicable(vcpu, mode, asce))
+ if (fetch_prot_override_applies(ga, len))
+ return 0;
+ }
+ if (storage_prot_override_applicable(vcpu))
+ if (storage_prot_override_applies(access_control))
+ return 0;
+ return PGM_PROTECTION;
+}
+
/**
* guest_range_to_gpas() - Calculate guest physical addresses of page fragments
* covering a logical range
@@ -804,6 +878,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
* @len: length of range in bytes
* @asce: address-space-control element to use for translation
* @mode: access mode
+ * @access_key: access key to mach the range's storage keys against
*
* Translate a logical range to a series of guest absolute addresses,
* such that the concatenation of page fragments starting at each gpa make up
@@ -830,7 +905,8 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
*/
static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
unsigned long *gpas, unsigned long len,
- const union asce asce, enum gacc_mode mode)
+ const union asce asce, enum gacc_mode mode,
+ char access_key)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
unsigned int offset = offset_in_page(ga);
@@ -857,6 +933,10 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
}
if (rc)
return trans_exc(vcpu, rc, ga, ar, mode, prot);
+ rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga,
+ fragment_len);
+ if (rc)
+ return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC);
if (gpas)
*gpas++ = gpa;
offset = 0;
@@ -880,16 +960,50 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
return rc;
}
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
- unsigned long len, enum gacc_mode mode)
+static int
+access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
+ void *data, unsigned int len, char key)
+{
+ struct kvm_memory_slot *slot;
+ bool writable;
+ gfn_t gfn;
+ hva_t hva;
+ int rc;
+
+ gfn = gpa >> PAGE_SHIFT;
+ slot = gfn_to_memslot(kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+
+ if (kvm_is_error_hva(hva))
+ return PGM_ADDRESSING;
+ if (!writable && mode == GACC_STORE)
+ return -EOPNOTSUPP;
+ hva += offset_in_page(gpa);
+ if (mode == GACC_STORE)
+ rc = __copy_to_user_with_key((void __user *)hva, data, len, key);
+ else
+ rc = __copy_from_user_with_key(data, (void __user *)hva, len, key);
+ if (rc)
+ return PGM_PROTECTION;
+ if (mode == GACC_STORE)
+ mark_page_dirty_in_slot(kvm, slot, gfn);
+ return 0;
+}
+
+int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ void *data, unsigned long len, enum gacc_mode mode,
+ char access_key)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
unsigned long nr_pages, idx;
unsigned long gpa_array[2];
unsigned int fragment_len;
unsigned long *gpas;
+ enum prot_type prot;
int need_ipte_lock;
union asce asce;
+ bool try_storage_prot_override;
+ bool try_fetch_prot_override;
int rc;
if (!len)
@@ -904,16 +1018,37 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
if (!gpas)
return -ENOMEM;
+ try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
+ try_storage_prot_override = storage_prot_override_applicable(vcpu);
need_ipte_lock = psw_bits(*psw).dat && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
- rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode);
- for (idx = 0; idx < nr_pages && !rc; idx++) {
+ rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
+ if (rc)
+ goto out_unlock;
+ for (idx = 0; idx < nr_pages; idx++) {
fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
- rc = access_guest_page(vcpu->kvm, mode, gpas[idx], data, fragment_len);
+ if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) {
+ rc = access_guest_page(vcpu->kvm, mode, gpas[idx],
+ data, fragment_len);
+ } else {
+ rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
+ data, fragment_len, access_key);
+ }
+ if (rc == PGM_PROTECTION && try_storage_prot_override)
+ rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
+ data, fragment_len, PAGE_SPO_ACC);
+ if (rc == PGM_PROTECTION)
+ prot = PROT_TYPE_KEYC;
+ if (rc)
+ break;
len -= fragment_len;
data += fragment_len;
+ ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len);
}
+ if (rc > 0)
+ rc = trans_exc(vcpu, rc, ga, 0, mode, prot);
+out_unlock:
if (need_ipte_lock)
ipte_unlock(vcpu);
if (nr_pages > ARRAY_SIZE(gpa_array))
@@ -940,12 +1075,13 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
}
/**
- * guest_translate_address - translate guest logical into guest absolute address
+ * guest_translate_address_with_key - translate guest logical into guest absolute address
* @vcpu: virtual cpu
* @gva: Guest virtual address
* @ar: Access register
* @gpa: Guest physical address
* @mode: Translation access mode
+ * @access_key: access key to mach the storage key with
*
* Parameter semantics are the same as the ones from guest_translate.
* The memory contents at the guest address are not changed.
@@ -953,8 +1089,9 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* Note: The IPTE lock is not taken during this function, so the caller
* has to take care of this.
*/
-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
- unsigned long *gpa, enum gacc_mode mode)
+int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+ unsigned long *gpa, enum gacc_mode mode,
+ char access_key)
{
union asce asce;
int rc;
@@ -963,7 +1100,17 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
if (rc)
return rc;
- return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode);
+ return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode,
+ access_key);
+}
+
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+ unsigned long *gpa, enum gacc_mode mode)
+{
+ char access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
+
+ return guest_translate_address_with_key(vcpu, gva, ar, gpa, mode,
+ access_key);
}
/**
@@ -973,9 +1120,11 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
* @ar: Access register
* @length: Length of test range
* @mode: Translation access mode
+ * @access_key: access key to mach the storage keys with
*/
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
- unsigned long length, enum gacc_mode mode)
+ unsigned long length, enum gacc_mode mode,
+ char access_key)
{
union asce asce;
int rc = 0;
@@ -984,7 +1133,8 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
if (rc)
return rc;
ipte_lock(vcpu);
- rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode);
+ rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
+ access_key);
ipte_unlock(vcpu);
return rc;
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 7c72a5e3449f..3df432702cd6 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -186,24 +186,32 @@ enum gacc_mode {
GACC_IFETCH,
};
+int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+ unsigned long *gpa, enum gacc_mode mode,
+ char access_key);
+
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
u8 ar, unsigned long *gpa, enum gacc_mode mode);
+
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
- unsigned long length, enum gacc_mode mode);
+ unsigned long length, enum gacc_mode mode,
+ char access_key);
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
- unsigned long len, enum gacc_mode mode);
+int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ void *data, unsigned long len, enum gacc_mode mode,
+ char access_key);
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
void *data, unsigned long len, enum gacc_mode mode);
/**
- * write_guest - copy data from kernel space to guest space
+ * write_guest_with_key - copy data from kernel space to guest space
* @vcpu: virtual cpu
* @ga: guest address
* @ar: access register
* @data: source address in kernel space
* @len: number of bytes to copy
+ * @access_key: access key the storage key needs to match
*
* Copy @len bytes from @data (kernel space) to @ga (guest address).
* In order to copy data to guest space the PSW of the vcpu is inspected:
@@ -214,8 +222,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* The addressing mode of the PSW is also inspected, so that address wrap
* around is taken into account for 24-, 31- and 64-bit addressing mode,
* if the to be copied data crosses page boundaries in guest address space.
- * In addition also low address and DAT protection are inspected before
- * copying any data (key protection is currently not implemented).
+ * In addition low address, DAT and key protection checks are performed before
+ * copying any data.
*
* This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu.
* In case of an access exception (e.g. protection exception) pgm will contain
@@ -243,10 +251,53 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* if data has been changed in guest space in case of an exception.
*/
static inline __must_check
+int write_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ void *data, unsigned long len, char access_key)
+{
+ return access_guest_with_key(vcpu, ga, ar, data, len, GACC_STORE,
+ access_key);
+}
+
+/**
+ * write_guest - copy data from kernel space to guest space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @ar: access register
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * The behaviour of write_guest is identical to write_guest_with_key, except
+ * that the PSW access key is used instead of an explicit argument.
+ */
+static inline __must_check
int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, ar, data, len, GACC_STORE);
+ char access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
+
+ return write_guest_with_key(vcpu, ga, ar, data, len, access_key);
+}
+
+/**
+ * read_guest_with_key - copy data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @ar: access register
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ * @access_key: access key the storage key needs to match
+ *
+ * Copy @len bytes from @ga (guest address) to @data (kernel space).
+ *
+ * The behaviour of read_guest_with_key is identical to write_guest_with_key,
+ * except that data will be copied from guest space to kernel space.
+ */
+static inline __must_check
+int read_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ void *data, unsigned long len, char access_key)
+{
+ return access_guest_with_key(vcpu, ga, ar, data, len, GACC_FETCH,
+ access_key);
}
/**
@@ -259,14 +310,16 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
*
* Copy @len bytes from @ga (guest address) to @data (kernel space).
*
- * The behaviour of read_guest is identical to write_guest, except that
- * data will be copied from guest space to kernel space.
+ * The behaviour of read_guest is identical to read_guest_with_key, except
+ * that the PSW access key is used instead of an explicit argument.
*/
static inline __must_check
int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, ar, data, len, GACC_FETCH);
+ char access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
+
+ return read_guest_with_key(vcpu, ga, ar, data, len, access_key);
}
/**
@@ -287,7 +340,10 @@ static inline __must_check
int read_guest_instr(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, 0, data, len, GACC_IFETCH);
+ char access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
+
+ return access_guest_with_key(vcpu, ga, 0, data, len, GACC_IFETCH,
+ access_key);
}
/**
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index d07ff646d844..8bd42a20d924 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -331,18 +331,18 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
kvm_s390_get_regs_rre(vcpu, ®1, ®2);
- /* Make sure that the source is paged-in */
- rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
- reg2, &srcaddr, GACC_FETCH);
+ /* Ensure that the source is paged-in, no actual access -> no key checking */
+ rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg2],
+ reg2, &srcaddr, GACC_FETCH, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
if (rc != 0)
return rc;
- /* Make sure that the destination is paged-in */
- rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
- reg1, &dstaddr, GACC_STORE);
+ /* Ensure that the source is paged-in, no actual access -> no key checking */
+ rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg1],
+ reg1, &dstaddr, GACC_STORE, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 14a18ba5ff2c..38b304e81c57 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4750,7 +4750,7 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
case KVM_S390_MEMOP_LOGICAL_READ:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
r = check_gva_range(vcpu, mop->gaddr, mop->ar,
- mop->size, GACC_FETCH);
+ mop->size, GACC_FETCH, 0);
break;
}
r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
@@ -4762,7 +4762,7 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
case KVM_S390_MEMOP_LOGICAL_WRITE:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
r = check_gva_range(vcpu, mop->gaddr, mop->ar,
- mop->size, GACC_STORE);
+ mop->size, GACC_STORE, 0);
break;
}
if (copy_from_user(tmpbuf, uaddr, mop->size)) {
--
2.32.0
Test the emulation of TEST PROTECTION in the presence of storage keys.
Emulation only occurs under certain conditions, one of which is the host
page being protected.
Trigger this by protecting the test pages via mprotect.
Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
---
tools/testing/selftests/kvm/.gitignore | 1 +
tools/testing/selftests/kvm/Makefile | 1 +
tools/testing/selftests/kvm/s390x/tprot.c | 184 ++++++++++++++++++++++
3 files changed, 186 insertions(+)
create mode 100644 tools/testing/selftests/kvm/s390x/tprot.c
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 3763105029fb..82c0470b6849 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -7,6 +7,7 @@
/s390x/memop
/s390x/resets
/s390x/sync_regs_test
+/s390x/tprot
/x86_64/cr4_cpuid_sync_test
/x86_64/debug_regs
/x86_64/evmcs_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index c4e34717826a..df6de8d155e8 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -109,6 +109,7 @@ TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
TEST_GEN_PROGS_s390x = s390x/memop
TEST_GEN_PROGS_s390x += s390x/resets
TEST_GEN_PROGS_s390x += s390x/sync_regs_test
+TEST_GEN_PROGS_s390x += s390x/tprot
TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c
new file mode 100644
index 000000000000..8b52675307f6
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/tprot.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test TEST PROTECTION emulation.
+ * In order for emulation occur the target page has to be DAT protected in the
+ * host mappings. Since the page tables are shared, we can use mprotect
+ * to achieve this.
+ *
+ * Copyright IBM Corp. 2021
+ */
+
+#include <sys/mman.h>
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
+
+#define VCPU_ID 1
+
+static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE];
+static uint8_t *const page_store_prot = pages[0];
+static uint8_t *const page_fetch_prot = pages[1];
+
+static int set_storage_key(void *addr, uint8_t key)
+{
+ int not_mapped = 0;
+
+ asm volatile (
+ "lra %[addr], 0(0,%[addr])\n"
+ " jz 0f\n"
+ " llill %[not_mapped],1\n"
+ " j 1f\n"
+ "0: sske %[key], %[addr]\n"
+ "1:"
+ : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
+ : [key] "r" (key)
+ : "cc"
+ );
+ return -not_mapped;
+}
+
+enum permission {
+ READ_WRITE = 0,
+ READ = 1,
+ NONE = 2,
+ UNAVAILABLE = 3,
+};
+
+static enum permission test_protection(void *addr, uint8_t key)
+{
+ uint64_t mask;
+
+ asm volatile (
+ "tprot %[addr], 0(%[key])\n"
+ " ipm %[mask]\n"
+ : [mask] "=r" (mask)
+ : [addr] "Q" (*(char *)addr),
+ [key] "a" (key)
+ : "cc"
+ );
+
+ return (enum permission)mask >> 28;
+}
+
+enum stage {
+ STAGE_END,
+ STAGE_INIT_SIMPLE,
+ TEST_SIMPLE,
+ STAGE_INIT_FETCH_PROT_OVERRIDE,
+ TEST_FETCH_PROT_OVERRIDE,
+ TEST_STORAGE_PROT_OVERRIDE,
+};
+
+struct test {
+ enum stage stage;
+ void *addr;
+ uint8_t key;
+ enum permission expected;
+} tests[] = {
+ /* Those which result in NONE/UNAVAILABLE will be interpreted by SIE,
+ * not KVM, but there is no harm in testing them also.
+ * See Enhanced Suppression-on-Protection Facilities in the
+ * Interpretive-Execution Mode
+ */
+ { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE },
+ { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE },
+ { TEST_SIMPLE, page_store_prot, 0x20, READ },
+ { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE },
+ { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE },
+ { TEST_SIMPLE, page_fetch_prot, 0x10, NONE },
+ { TEST_SIMPLE, (void *)0x00, 0x10, UNAVAILABLE },
+ /* Fetch-protection override */
+ { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ },
+ { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, NONE },
+ /* Storage-protection override */
+ { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE },
+ { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ },
+ { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE },
+ /* End marker */
+ { STAGE_END, 0, 0, 0 },
+};
+
+static enum stage perform_next_stage(int *i, bool mapped_0)
+{
+ enum stage stage = tests[*i].stage;
+ enum permission result;
+ bool skip;
+
+ for (; tests[*i].stage == stage; (*i)++) {
+ skip = tests[*i].addr < (void *)4096 &&
+ !mapped_0 &&
+ tests[*i].expected != UNAVAILABLE;
+ if (!skip) {
+ result = test_protection(tests[*i].addr, tests[*i].key);
+ GUEST_ASSERT_2(result == tests[*i].expected, *i, result);
+ }
+ }
+ return stage;
+}
+
+static void guest_code(void)
+{
+ bool mapped_0;
+ int i = 0;
+
+ GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0);
+ GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0);
+ GUEST_SYNC(STAGE_INIT_SIMPLE);
+ GUEST_SYNC(perform_next_stage(&i, false));
+
+ /* Fetch-protection override */
+ mapped_0 = !set_storage_key((void *)0, 0x98);
+ GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE);
+ GUEST_SYNC(perform_next_stage(&i, mapped_0));
+
+ /* Storage-protection override */
+ GUEST_SYNC(perform_next_stage(&i, mapped_0));
+}
+
+#define HOST_SYNC(vmp, stage) \
+({ \
+ struct kvm_vm *__vm = (vmp); \
+ struct ucall uc; \
+ int __stage = (stage); \
+ \
+ vcpu_run(__vm, VCPU_ID); \
+ get_ucall(__vm, VCPU_ID, &uc); \
+ if (uc.cmd == UCALL_ABORT) { \
+ TEST_FAIL("line %lu: %s, hints: %lu, %lu", uc.args[1], \
+ (const char *)uc.args[0], uc.args[2], uc.args[3]); \
+ } \
+ ASSERT_EQ(uc.cmd, UCALL_SYNC); \
+ ASSERT_EQ(uc.args[1], __stage); \
+})
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ vm_vaddr_t guest_0_page;
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ run = vcpu_state(vm, VCPU_ID);
+
+ HOST_SYNC(vm, STAGE_INIT_SIMPLE);
+ mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ);
+ HOST_SYNC(vm, TEST_SIMPLE);
+
+ guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
+ if (guest_0_page != 0)
+ print_skip("Did not allocate page at 0 for fetch protection override tests");
+ HOST_SYNC(vm, STAGE_INIT_FETCH_PROT_OVERRIDE);
+ if (guest_0_page == 0)
+ mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
+ run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+ run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(vm, TEST_FETCH_PROT_OVERRIDE);
+
+ run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+ run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(vm, TEST_STORAGE_PROT_OVERRIDE);
+}
--
2.32.0
Channel I/O honors storage keys and is performed on absolute memory.
For I/O emulation user space therefore needs to be able to do key
checked accesses.
The vm IOCTL supports read/write accesses, as well as checking
if an access would succeed.
Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
Acked-by: Janosch Frank <[email protected]>
---
arch/s390/kvm/gaccess.c | 72 +++++++++++++++++++++++++++++++++++
arch/s390/kvm/gaccess.h | 6 +++
arch/s390/kvm/kvm-s390.c | 81 ++++++++++++++++++++++++++++++++++++++++
include/uapi/linux/kvm.h | 2 +
4 files changed, 161 insertions(+)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index efe33cda38b6..db1d9a494f77 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -795,6 +795,35 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
return 1;
}
+static int vm_check_access_key(struct kvm *kvm, char access_key,
+ enum gacc_mode mode, gpa_t gpa)
+{
+ unsigned long hva;
+ unsigned char storage_key, access_control;
+ bool fetch_protected;
+ int r;
+
+ if (access_key == 0)
+ return 0;
+
+ hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+ if (kvm_is_error_hva(hva))
+ return PGM_ADDRESSING;
+
+ mmap_read_lock(current->mm);
+ r = get_guest_storage_key(current->mm, hva, &storage_key);
+ mmap_read_unlock(current->mm);
+ if (r)
+ return r;
+ access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
+ if (access_control == access_key)
+ return 0;
+ fetch_protected = storage_key & _PAGE_FP_BIT;
+ if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected)
+ return 0;
+ return PGM_PROTECTION;
+}
+
static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
union asce asce)
{
@@ -990,6 +1019,26 @@ access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
return 0;
}
+int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
+ unsigned long len, enum gacc_mode mode, char key)
+{
+ int offset = offset_in_page(gpa);
+ int fragment_len;
+ int rc;
+
+ while (min(PAGE_SIZE - offset, len) > 0) {
+ fragment_len = min(PAGE_SIZE - offset, len);
+ rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, key);
+ if (rc)
+ return rc;
+ offset = 0;
+ len -= fragment_len;
+ data += fragment_len;
+ gpa += fragment_len;
+ }
+ return 0;
+}
+
int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
void *data, unsigned long len, enum gacc_mode mode,
char access_key)
@@ -1131,6 +1180,29 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
return rc;
}
+/**
+ * check_gpa_range - test a range of guest physical addresses for accessibility
+ * @kvm: virtual machine instance
+ * @gpa: guest physical address
+ * @length: length of test range
+ * @mode: access mode to test, relevant for storage keys
+ * @access_key: access key to mach the storage keys with
+ */
+int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
+ enum gacc_mode mode, char access_key)
+{
+ unsigned int fragment_len;
+ int rc = 0;
+
+ while (length && !rc) {
+ fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length);
+ rc = vm_check_access_key(kvm, access_key, mode, gpa);
+ length -= fragment_len;
+ gpa += fragment_len;
+ }
+ return rc;
+}
+
/**
* kvm_s390_check_low_addr_prot_real - check for low-address protection
* @vcpu: virtual cpu
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 0d4416178bb6..d89178b92d51 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -194,6 +194,12 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
unsigned long length, enum gacc_mode mode,
char access_key);
+int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
+ enum gacc_mode mode, char access_key);
+
+int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
+ unsigned long len, enum gacc_mode mode, char key);
+
int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
void *data, unsigned long len, enum gacc_mode mode,
char access_key);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c4acdb025ff1..8dab956f84a6 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2390,6 +2390,78 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
return r;
}
+static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+ static const __u8 zeros[sizeof(mop->reserved)] = {0};
+ void __user *uaddr = (void __user *)mop->buf;
+ u64 supported_flags;
+ void *tmpbuf = NULL;
+ char access_key;
+ int r, srcu_idx;
+
+ access_key = FIELD_GET(KVM_S390_MEMOP_F_SKEYS_ACC, mop->flags);
+ supported_flags = KVM_S390_MEMOP_F_SKEYS_ACC
+ | KVM_S390_MEMOP_F_CHECK_ONLY;
+ if (mop->flags & ~supported_flags)
+ return -EINVAL;
+ if (mop->size > MEM_OP_MAX_SIZE)
+ return -E2BIG;
+ if (kvm_s390_pv_is_protected(kvm))
+ return -EINVAL;
+ if (memcmp(mop->reserved, zeros, sizeof(zeros)) != 0)
+ return -EINVAL;
+
+ if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
+ tmpbuf = vmalloc(mop->size);
+ if (!tmpbuf)
+ return -ENOMEM;
+ }
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ if (kvm_is_error_gpa(kvm, mop->gaddr)) {
+ r = PGM_ADDRESSING;
+ goto out_unlock;
+ }
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_ABSOLUTE_READ: {
+ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+ r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, access_key);
+ } else {
+ r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+ mop->size, GACC_FETCH, access_key);
+ if (r == 0) {
+ if (copy_to_user(uaddr, tmpbuf, mop->size))
+ r = -EFAULT;
+ }
+ }
+ break;
+ }
+ case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
+ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+ r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, access_key);
+ } else {
+ if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+ r = -EFAULT;
+ break;
+ }
+ r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+ mop->size, GACC_STORE, access_key);
+ }
+ break;
+ }
+ default:
+ r = -EINVAL;
+ }
+
+out_unlock:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ vfree(tmpbuf);
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2514,6 +2586,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
break;
}
+ case KVM_S390_MEM_OP: {
+ struct kvm_s390_mem_op mem_op;
+
+ if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
+ r = kvm_s390_vm_mem_op(kvm, &mem_op);
+ else
+ r = -EFAULT;
+ break;
+ }
default:
r = -ENOTTY;
}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index e3f450b2f346..dd04170287fd 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -572,6 +572,8 @@ struct kvm_s390_mem_op {
#define KVM_S390_MEMOP_LOGICAL_WRITE 1
#define KVM_S390_MEMOP_SIDA_READ 2
#define KVM_S390_MEMOP_SIDA_WRITE 3
+#define KVM_S390_MEMOP_ABSOLUTE_READ 4
+#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
/* flags for kvm_s390_mem_op->flags */
#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
--
2.32.0
Availability of the KVM_CAP_S390_MEM_OP_SKEY capability signals that:
* The vcpu MEM_OP IOCTL supports storage key checking.
* The vm MEM_OP IOCTL exists.
Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
---
Maybe this should be redesigned. As is, the capability mixes
support of storage keys for the vcpu ioctl with the availability
of the vm ioctl (which always supports storage keys).
We could have two capabilities, one to indicate the availability
of the vm memop and another used to derive the available functionality.
Janosch suggested that the second capability indicates the availability
of a "query" memop operation.
arch/s390/kvm/kvm-s390.c | 1 +
include/uapi/linux/kvm.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ab07389fb4d9..3c6517ad43a3 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -565,6 +565,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_VCPU_RESETS:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_S390_DIAG318:
+ case KVM_CAP_S390_MEM_OP_SKEY:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index dd04170287fd..1bb38efd1156 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1134,6 +1134,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
#define KVM_CAP_ARM_MTE 205
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
+#define KVM_CAP_S390_MEM_OP_SKEY 209
#ifdef KVM_CAP_IRQ_ROUTING
--
2.32.0
Only test the functionality whose availability is indicated by
KVM_CAP_S390_MEM_OP_SKEY if the capability indicates support.
Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
---
tools/testing/selftests/kvm/s390x/memop.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index 774d5756f41d..7bdd6727d0ff 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -308,6 +308,7 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
struct kvm_run *run;
struct kvm_s390_mem_op ksmo;
+ bool has_skey_ext;
vm_vaddr_t guest_mem1;
vm_vaddr_t guest_mem2;
vm_paddr_t guest_mem1_abs;
@@ -322,6 +323,9 @@ int main(int argc, char *argv[])
}
if (maxsize > sizeof(mem1))
maxsize = sizeof(mem1);
+ has_skey_ext = kvm_check_cap(KVM_CAP_S390_MEM_OP_SKEY);
+ if (!has_skey_ext)
+ print_skip("CAP_S390_MEM_OP_SKEY not supported");
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, guest_code);
@@ -342,7 +346,7 @@ int main(int argc, char *argv[])
TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
"Memory contents do not match!");
- {
+ if (has_skey_ext) {
vm_vaddr_t guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
vm_vaddr_t guest_last_page = vm_vaddr_alloc(vm, PAGE_SIZE, last_page_addr);
vm_paddr_t guest_mem2_abs = addr_gva2gpa(vm, guest_mem2);
@@ -515,6 +519,14 @@ int main(int argc, char *argv[])
TEST_ASSERT(rv != 0, "Fetch should result in exception");
rv = _vm_read_guest_key(vm, mem2, addr_gva2gpa(vm, 0), 2048, 2);
TEST_ASSERT(rv == 4, "Fetch should result in protection exception");
+ } else {
+ struct ucall uc;
+
+ do {
+ vcpu_run(vm, VCPU_ID);
+ get_ucall(vm, VCPU_ID, &uc);
+ ASSERT_EQ(uc.cmd, UCALL_SYNC);
+ } while (uc.args[1] < 100);
}
/* Check error conditions */
--
2.32.0
Test vm and vcpu memops with storage keys, both successful accesses
as well as various exception conditions.
Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
---
tools/testing/selftests/kvm/s390x/memop.c | 548 +++++++++++++++++++---
1 file changed, 485 insertions(+), 63 deletions(-)
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index 9f49ead380ab..774d5756f41d 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -13,28 +13,305 @@
#include "test_util.h"
#include "kvm_util.h"
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
+
#define VCPU_ID 1
+const uint64_t last_page_addr = UINT64_MAX - PAGE_SIZE + 1;
+const unsigned int key_shift = ffs(KVM_S390_MEMOP_F_SKEYS_ACC) - 1;
+
static uint8_t mem1[65536];
static uint8_t mem2[65536];
+static void set_storage_key_range(void *addr, size_t len, char key)
+{
+ uintptr_t _addr, abs, i;
+
+ _addr = (uintptr_t)addr;
+ for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) {
+ abs = i;
+ asm volatile (
+ "lra %[abs], 0(0,%[abs])\n"
+ " sske %[key], %[abs]\n"
+ : [abs] "+&a" (abs)
+ : [key] "r" (key)
+ : "cc"
+ );
+ }
+}
+
static void guest_code(void)
+{
+ /* Set storage key */
+ set_storage_key_range(mem1, sizeof(mem1), 0x90);
+ set_storage_key_range(mem2, sizeof(mem2), 0x90);
+ GUEST_SYNC(0);
+
+ /* Write, read back, without keys */
+ memcpy(mem2, mem1, sizeof(mem2));
+ GUEST_SYNC(10);
+
+ /* Write, read back, key 0 */
+ memcpy(mem2, mem1, sizeof(mem2));
+ GUEST_SYNC(20);
+
+ /* Write, read back, matching key, 1 page */
+ memcpy(mem2, mem1, sizeof(mem2));
+ GUEST_SYNC(30);
+
+ /* Write, read back, matching key, all pages */
+ memcpy(mem2, mem1, sizeof(mem2));
+ GUEST_SYNC(40);
+
+ /* Set fetch protection */
+ set_storage_key_range(0, 1, 0x18);
+ GUEST_SYNC(50);
+
+ /* Enable fetch protection override */
+ GUEST_SYNC(60);
+
+ /* Enable storage protection override, set fetch protection*/
+ set_storage_key_range(mem1, sizeof(mem1), 0x98);
+ set_storage_key_range(mem2, sizeof(mem2), 0x98);
+ GUEST_SYNC(70);
+
+ /* Write, read back, mismatching key,
+ * storage protection override, all pages
+ */
+ memcpy(mem2, mem1, sizeof(mem2));
+ GUEST_SYNC(80);
+
+ /* VM memop, write, read back, matching key */
+ memcpy(mem2, mem1, sizeof(mem2));
+ GUEST_SYNC(90);
+
+ /* VM memop, write, read back, key 0 */
+ memcpy(mem2, mem1, sizeof(mem2));
+ /* VM memop, fail to read from 0 absolute/virtual, mismatching key,
+ * fetch protection override does not apply to VM memops
+ */
+ asm volatile ("sske %1,%0\n"
+ : : "r"(0), "r"(0x18) : "cc"
+ );
+ GUEST_SYNC(100);
+
+ /* Enable AR mode */
+ GUEST_SYNC(110);
+
+ /* Disable AR mode */
+ GUEST_SYNC(120);
+}
+
+static void reroll_mem1(void)
{
int i;
- for (;;) {
- for (i = 0; i < sizeof(mem2); i++)
- mem2[i] = mem1[i];
- GUEST_SYNC(0);
- }
+ for (i = 0; i < sizeof(mem1); i++)
+ mem1[i] = rand();
+}
+
+static int _vcpu_read_guest(struct kvm_vm *vm, void *host_addr,
+ uintptr_t guest_addr, size_t len)
+{
+ struct kvm_s390_mem_op ksmo = {
+ .gaddr = guest_addr,
+ .flags = 0,
+ .size = len,
+ .op = KVM_S390_MEMOP_LOGICAL_READ,
+ .buf = (uintptr_t)host_addr,
+ .ar = 0,
+ };
+
+ return _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+}
+
+static void vcpu_read_guest(struct kvm_vm *vm, void *host_addr,
+ uintptr_t guest_addr, size_t len)
+{
+ int rv;
+
+ rv = _vcpu_read_guest(vm, host_addr, guest_addr, len);
+ TEST_ASSERT(rv == 0, "vcpu memop read failed: reason = %d\n", rv);
+}
+
+static int _vcpu_read_guest_key(struct kvm_vm *vm, void *host_addr,
+ uintptr_t guest_addr, size_t len, char key)
+{
+ struct kvm_s390_mem_op ksmo = {
+ .gaddr = guest_addr,
+ .flags = key << key_shift,
+ .size = len,
+ .op = KVM_S390_MEMOP_LOGICAL_READ,
+ .buf = (uintptr_t)host_addr,
+ .ar = 0,
+ };
+
+ return _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+}
+
+static void vcpu_read_guest_key(struct kvm_vm *vm, void *host_addr,
+ uintptr_t guest_addr, size_t len, char key)
+{
+ int rv;
+
+ rv = _vcpu_read_guest_key(vm, host_addr, guest_addr, len, key);
+ TEST_ASSERT(rv == 0, "vcpu memop read failed: reason = %d\n", rv);
+}
+
+static int _vcpu_write_guest(struct kvm_vm *vm, uintptr_t guest_addr,
+ void *host_addr, size_t len)
+{
+ struct kvm_s390_mem_op ksmo = {
+ .gaddr = guest_addr,
+ .flags = 0,
+ .size = len,
+ .op = KVM_S390_MEMOP_LOGICAL_WRITE,
+ .buf = (uintptr_t)host_addr,
+ .ar = 0,
+ };
+ return _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+}
+
+static void vcpu_write_guest(struct kvm_vm *vm, uintptr_t guest_addr,
+ void *host_addr, size_t len)
+{
+ int rv;
+
+ rv = _vcpu_write_guest(vm, guest_addr, host_addr, len);
+ TEST_ASSERT(rv == 0, "vcpu memop write failed: reason = %d\n", rv);
+}
+
+static int _vcpu_write_guest_key(struct kvm_vm *vm, uintptr_t guest_addr,
+ void *host_addr, size_t len, char key)
+{
+ struct kvm_s390_mem_op ksmo = {
+ .gaddr = guest_addr,
+ .flags = key << key_shift,
+ .size = len,
+ .op = KVM_S390_MEMOP_LOGICAL_WRITE,
+ .buf = (uintptr_t)host_addr,
+ .ar = 0,
+ };
+
+ return _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+}
+
+static void vcpu_write_guest_key(struct kvm_vm *vm, uintptr_t guest_addr,
+ void *host_addr, size_t len, char key)
+{
+ int rv;
+
+ rv = _vcpu_write_guest_key(vm, guest_addr, host_addr, len, key);
+ TEST_ASSERT(rv == 0, "vcpu memop write failed: reason = %d\n", rv);
+}
+
+static int _vm_read_guest_key(struct kvm_vm *vm, void *host_addr,
+ uintptr_t guest_addr, size_t len, char key)
+{
+ struct kvm_s390_mem_op ksmo = {
+ .gaddr = guest_addr,
+ .flags = key << key_shift,
+ .size = len,
+ .op = KVM_S390_MEMOP_ABSOLUTE_READ,
+ .buf = (uintptr_t)host_addr,
+ .reserved = {0},
+ };
+
+ return _vm_ioctl(vm, KVM_S390_MEM_OP, &ksmo);
+}
+
+static void vm_read_guest_key(struct kvm_vm *vm, void *host_addr,
+ uintptr_t guest_addr, size_t len, char key)
+{
+ int rv;
+
+ rv = _vm_read_guest_key(vm, host_addr, guest_addr, len, key);
+ TEST_ASSERT(rv == 0, "vm memop read failed: reason = %d\n", rv);
+}
+
+static int _vm_write_guest_key(struct kvm_vm *vm, uintptr_t guest_addr,
+ void *host_addr, size_t len, char key)
+{
+ struct kvm_s390_mem_op ksmo = {
+ .gaddr = guest_addr,
+ .flags = key << key_shift,
+ .size = len,
+ .op = KVM_S390_MEMOP_ABSOLUTE_WRITE,
+ .buf = (uintptr_t)host_addr,
+ .reserved = {0},
+ };
+
+ return _vm_ioctl(vm, KVM_S390_MEM_OP, &ksmo);
+}
+
+static void vm_write_guest_key(struct kvm_vm *vm, uintptr_t guest_addr,
+ void *host_addr, size_t len, char key)
+{
+ int rv;
+
+ rv = _vm_write_guest_key(vm, guest_addr, host_addr, len, key);
+ TEST_ASSERT(rv == 0, "vm memop write failed: reason = %d\n", rv);
+}
+
+enum access_mode {
+ ACCESS_READ,
+ ACCESS_WRITE
+};
+
+static int _vm_check_guest_key(struct kvm_vm *vm, enum access_mode mode,
+ uintptr_t guest_addr, size_t len, char key)
+{
+ int op;
+
+ if (mode == ACCESS_READ)
+ op = KVM_S390_MEMOP_ABSOLUTE_READ;
+ else
+ op = KVM_S390_MEMOP_ABSOLUTE_WRITE;
+ struct kvm_s390_mem_op ksmo = {
+ .gaddr = guest_addr,
+ .flags = key << key_shift | KVM_S390_MEMOP_F_CHECK_ONLY,
+ .size = len,
+ .op = op,
+ .reserved = {0},
+ };
+
+ return _vm_ioctl(vm, KVM_S390_MEM_OP, &ksmo);
}
+static void vm_check_guest_key(struct kvm_vm *vm, enum access_mode mode,
+ uintptr_t guest_addr, size_t len, char key)
+{
+ int rv;
+
+ rv = _vm_check_guest_key(vm, mode, guest_addr, len, key);
+ TEST_ASSERT(rv == 0, "vm memop write failed: reason = %d\n", rv);
+}
+
+#define HOST_SYNC(vmp, stage) \
+({ \
+ struct kvm_vm *__vm = (vmp); \
+ struct ucall uc; \
+ int __stage = (stage); \
+ \
+ vcpu_run(__vm, VCPU_ID); \
+ get_ucall(__vm, VCPU_ID, &uc); \
+ ASSERT_EQ(uc.cmd, UCALL_SYNC); \
+ ASSERT_EQ(uc.args[1], __stage); \
+}) \
+
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_run *run;
struct kvm_s390_mem_op ksmo;
- int rv, i, maxsize;
+ vm_vaddr_t guest_mem1;
+ vm_vaddr_t guest_mem2;
+ vm_paddr_t guest_mem1_abs;
+ int rv, maxsize;
setbuf(stdout, NULL); /* Tell stdout not to buffer its content */
@@ -49,63 +326,210 @@ int main(int argc, char *argv[])
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, guest_code);
run = vcpu_state(vm, VCPU_ID);
+ guest_mem1 = (uintptr_t)mem1;
+ guest_mem2 = (uintptr_t)mem2;
+ guest_mem1_abs = addr_gva2gpa(vm, guest_mem1);
- for (i = 0; i < sizeof(mem1); i++)
- mem1[i] = i * i + i;
+ /* Set storage key */
+ HOST_SYNC(vm, 0);
- /* Set the first array */
- ksmo.gaddr = addr_gva2gpa(vm, (uintptr_t)mem1);
- ksmo.flags = 0;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ /* Write, read back, without keys */
+ reroll_mem1();
+ vcpu_write_guest(vm, guest_mem1, mem1, maxsize);
+ HOST_SYNC(vm, 10); // Copy in vm
+ memset(mem2, 0xaa, sizeof(mem2));
+ vcpu_read_guest(vm, mem2, guest_mem2, maxsize);
+ TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
+ "Memory contents do not match!");
- /* Let the guest code copy the first array to the second */
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
- "Unexpected exit reason: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ {
+ vm_vaddr_t guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
+ vm_vaddr_t guest_last_page = vm_vaddr_alloc(vm, PAGE_SIZE, last_page_addr);
+ vm_paddr_t guest_mem2_abs = addr_gva2gpa(vm, guest_mem2);
- memset(mem2, 0xaa, sizeof(mem2));
+ /* Write, read back, key 0 */
+ reroll_mem1();
+ vcpu_write_guest_key(vm, guest_mem1, mem1, maxsize, 0);
+ HOST_SYNC(vm, 20); // Copy in vm
+ memset(mem2, 0xaa, sizeof(mem2));
+ vcpu_read_guest_key(vm, mem2, guest_mem2, maxsize, 0);
+ TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
+ "Memory contents do not match!");
- /* Get the second array */
- ksmo.gaddr = (uintptr_t)mem2;
- ksmo.flags = 0;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
- ksmo.buf = (uintptr_t)mem2;
- ksmo.ar = 0;
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ /* Write, read back, matching key, 1 page */
+ reroll_mem1();
+ vcpu_write_guest_key(vm, guest_mem1, mem1, PAGE_SIZE, 9);
+ HOST_SYNC(vm, 30); // Copy in vm
+ memset(mem2, 0xaa, sizeof(mem2));
+ vcpu_read_guest_key(vm, mem2, guest_mem2, PAGE_SIZE, 9);
+ TEST_ASSERT(!memcmp(mem1, mem2, PAGE_SIZE),
+ "Memory contents do not match!");
- TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
- "Memory contents do not match!");
+ /* Write, read back, matching key, all pages */
+ reroll_mem1();
+ vcpu_write_guest_key(vm, guest_mem1, mem1, maxsize, 9);
+ HOST_SYNC(vm, 40); // Copy in vm
+ memset(mem2, 0xaa, sizeof(mem2));
+ vcpu_read_guest_key(vm, mem2, guest_mem2, maxsize, 9);
+ TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
+ "Memory contents do not match!");
- /* Check error conditions - first bad size: */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = 0;
- ksmo.size = -1;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ /* Fail to write, read back old value, mismatching key */
+ rv = _vcpu_write_guest_key(vm, guest_mem1, mem1, maxsize, 2);
+ TEST_ASSERT(rv == 4, "Store should result in protection exception");
+ memset(mem2, 0xaa, sizeof(mem2));
+ vcpu_read_guest_key(vm, mem2, guest_mem2, maxsize, 2);
+ TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
+ "Memory contents do not match!");
+
+ /* Set fetch protection */
+ HOST_SYNC(vm, 50);
+
+ /* Write without key, read back, machting key, fetch protection */
+ reroll_mem1();
+ vcpu_write_guest(vm, guest_0_page, mem1, PAGE_SIZE);
+ memset(mem2, 0xaa, sizeof(mem2));
+ /* Lets not copy in the guest, in case guest_0_page != 0 */
+ vcpu_read_guest_key(vm, mem2, guest_0_page, PAGE_SIZE, 1);
+ TEST_ASSERT(!memcmp(mem1, mem2, PAGE_SIZE),
+ "Memory contents do not match!");
+
+ /* Fail to read, mismatching key, fetch protection */
+ rv = _vcpu_read_guest_key(vm, mem2, guest_0_page, PAGE_SIZE, 2);
+ TEST_ASSERT(rv == 4, "Fetch should result in protection exception");
+
+ /* Enable fetch protection override */
+ run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+ run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(vm, 60);
+
+ if (guest_0_page != 0)
+ print_skip("Did not allocate page at 0 for fetch protection override test");
+
+ /* Write without key, read back, mismachting key,
+ * fetch protection override, 1 page
+ */
+ if (guest_0_page == 0) {
+ reroll_mem1();
+ vcpu_write_guest(vm, guest_0_page, mem1, PAGE_SIZE);
+ memset(mem2, 0xaa, sizeof(mem2));
+ /* Lets not copy in the guest, in case guest_0_page != 0 */
+ vcpu_read_guest_key(vm, mem2, guest_0_page, 2048, 2);
+ TEST_ASSERT(!memcmp(mem1, mem2, 2048),
+ "Memory contents do not match!");
+ }
+
+ /* Fail to read, mismatching key,
+ * fetch protection override address exceeded, 1 page
+ */
+ if (guest_0_page == 0) {
+ rv = _vcpu_read_guest_key(vm, mem2, 0, 2048 + 1, 2);
+ TEST_ASSERT(rv == 4,
+ "Fetch should result in protection exception");
+ }
+
+ if (guest_last_page != last_page_addr)
+ print_skip("Did not allocate last page for fetch protection override test");
+
+ /* Write without key, read back, mismachting key,
+ * fetch protection override, 2 pages, last page not fetch protected
+ */
+ reroll_mem1();
+ vcpu_write_guest(vm, guest_last_page, mem1, PAGE_SIZE);
+ vcpu_write_guest(vm, guest_0_page, mem1 + PAGE_SIZE, PAGE_SIZE);
+ if (guest_0_page == 0 && guest_last_page == last_page_addr) {
+ memset(mem2, 0xaa, sizeof(mem2));
+ /* Lets not copy in the guest, in case guest_0_page != 0 */
+ vcpu_read_guest_key(vm, mem2, last_page_addr,
+ PAGE_SIZE + 2048, 2);
+ TEST_ASSERT(!memcmp(mem1, mem2, PAGE_SIZE + 2048),
+ "Memory contents do not match!");
+ }
+
+ /* Fail to read, mismatching key, fetch protection override address
+ * exceeded, 2 pages, last page not fetch protected
+ */
+ if (guest_0_page == 0 && guest_last_page == last_page_addr) {
+ rv = _vcpu_read_guest_key(vm, mem2, last_page_addr,
+ PAGE_SIZE + 2048 + 1, 2);
+ TEST_ASSERT(rv == 4,
+ "Fetch should result in protection exception");
+ }
+
+ /* Enable storage protection override, set fetch protection*/
+ run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+ run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(vm, 70);
+
+ /* Write, read back, mismatching key,
+ * storage protection override, all pages
+ */
+ reroll_mem1();
+ vcpu_write_guest_key(vm, guest_mem1, mem1, maxsize, 2);
+ HOST_SYNC(vm, 80); // Copy in vm
+ memset(mem2, 0xaa, sizeof(mem2));
+ vcpu_read_guest_key(vm, mem2, guest_mem2, maxsize, 2);
+ TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
+ "Memory contents do not match!");
+
+ /* VM memop, write, read back, matching key */
+ reroll_mem1();
+ vm_write_guest_key(vm, guest_mem1_abs, mem1, maxsize, 9);
+ HOST_SYNC(vm, 90); // Copy in vm
+ memset(mem2, 0xaa, sizeof(mem2));
+ vm_read_guest_key(vm, mem2, guest_mem2_abs, maxsize, 9);
+ TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
+ "Memory contents do not match!");
+ vm_check_guest_key(vm, ACCESS_WRITE, guest_mem1_abs, maxsize, 9);
+ vm_check_guest_key(vm, ACCESS_READ, guest_mem2_abs, maxsize, 9);
+
+ /* VM memop, write, read back, key 0 */
+ reroll_mem1();
+ vm_write_guest_key(vm, guest_mem1_abs, mem1, maxsize, 0);
+ HOST_SYNC(vm, 100); // Copy in vm
+ memset(mem2, 0xaa, sizeof(mem2));
+ vm_read_guest_key(vm, mem2, guest_mem2_abs, maxsize, 0);
+ TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
+ "Memory contents do not match!");
+ rv = _vm_check_guest_key(vm, ACCESS_READ, guest_mem1_abs, maxsize, 9);
+ TEST_ASSERT(rv == 0, "Check should succeed");
+ vm_check_guest_key(vm, ACCESS_WRITE, guest_mem1_abs, maxsize, 0);
+ vm_check_guest_key(vm, ACCESS_READ, guest_mem2_abs, maxsize, 0);
+
+ /* VM memop, fail to write, fail to read, mismatching key,
+ * storage protection override does not apply to VM memops
+ */
+ rv = _vm_write_guest_key(vm, guest_mem1_abs, mem1, maxsize, 2);
+ TEST_ASSERT(rv == 4, "Store should result in protection exception");
+ rv = _vm_read_guest_key(vm, mem2, guest_mem2_abs, maxsize, 2);
+ TEST_ASSERT(rv == 4, "Fetch should result in protection exception");
+ rv = _vm_check_guest_key(vm, ACCESS_WRITE, guest_mem1_abs, maxsize, 2);
+ TEST_ASSERT(rv == 4, "Check should indicate protection exception");
+ rv = _vm_check_guest_key(vm, ACCESS_READ, guest_mem2_abs, maxsize, 2);
+ TEST_ASSERT(rv == 4, "Check should indicate protection exception");
+
+ /* VM memop, fail to read from 0 absolute/virtual, mismatching key,
+ * fetch protection override does not apply to VM memops
+ */
+ rv = _vm_read_guest_key(vm, mem2, 0, 2048, 2);
+ TEST_ASSERT(rv != 0, "Fetch should result in exception");
+ rv = _vm_read_guest_key(vm, mem2, addr_gva2gpa(vm, 0), 2048, 2);
+ TEST_ASSERT(rv == 4, "Fetch should result in protection exception");
+ }
+
+ /* Check error conditions */
+
+ /* Bad size: */
+ rv = _vcpu_write_guest(vm, (uintptr_t)mem1, mem1, -1);
TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
/* Zero size: */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = 0;
- ksmo.size = 0;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ rv = _vcpu_write_guest(vm, (uintptr_t)mem1, mem1, 0);
TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
"ioctl allows 0 as size");
/* Bad flags: */
- ksmo.gaddr = (uintptr_t)mem1;
+ ksmo.gaddr = guest_mem1;
ksmo.flags = -1;
ksmo.size = maxsize;
ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
@@ -115,7 +539,7 @@ int main(int argc, char *argv[])
TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
/* Bad operation: */
- ksmo.gaddr = (uintptr_t)mem1;
+ ksmo.gaddr = guest_mem1;
ksmo.flags = 0;
ksmo.size = maxsize;
ksmo.op = -1;
@@ -135,21 +559,17 @@ int main(int argc, char *argv[])
TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access");
/* Bad host address: */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = 0;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = 0;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ rv = _vcpu_write_guest(vm, guest_mem1, 0, maxsize);
TEST_ASSERT(rv == -1 && errno == EFAULT,
"ioctl does not report bad host memory address");
- /* Bad access register: */
+ /* Enable AR mode */
run->psw_mask &= ~(3UL << (63 - 17));
- run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
- vcpu_run(vm, VCPU_ID); /* To sync new state to SIE block */
- ksmo.gaddr = (uintptr_t)mem1;
+ run->psw_mask |= 1UL << (63 - 17);
+ HOST_SYNC(vm, 110);
+
+ /* Bad access register: */
+ ksmo.gaddr = guest_mem1;
ksmo.flags = 0;
ksmo.size = maxsize;
ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
@@ -157,8 +577,10 @@ int main(int argc, char *argv[])
ksmo.ar = 17;
rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
- run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */
- vcpu_run(vm, VCPU_ID); /* Run to sync new state */
+
+ /* Disable AR mode */
+ run->psw_mask &= ~(3UL << (63 - 17));
+ HOST_SYNC(vm, 120);
kvm_vm_free(vm);
--
2.32.0
Makes the naming consistent, now that we also have a vm ioctl.
Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
Reviewed-by: Janosch Frank <[email protected]>
Reviewed-by: Claudio Imbrenda <[email protected]>
---
arch/s390/kvm/kvm-s390.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8dab956f84a6..ab07389fb4d9 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4776,8 +4776,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return r;
}
-static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
- struct kvm_s390_mem_op *mop)
+static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
int r = 0;
@@ -4804,8 +4804,9 @@ static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
}
return r;
}
-static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
- struct kvm_s390_mem_op *mop)
+
+static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
void *tmpbuf = NULL;
@@ -4868,8 +4869,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
return r;
}
-static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
- struct kvm_s390_mem_op *mop)
+static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
{
int r, srcu_idx;
@@ -4878,12 +4879,12 @@ static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
case KVM_S390_MEMOP_LOGICAL_WRITE:
- r = kvm_s390_guest_mem_op(vcpu, mop);
+ r = kvm_s390_vcpu_mem_op(vcpu, mop);
break;
case KVM_S390_MEMOP_SIDA_READ:
case KVM_S390_MEMOP_SIDA_WRITE:
/* we are locked against sida going away by the vcpu->mutex */
- r = kvm_s390_guest_sida_op(vcpu, mop);
+ r = kvm_s390_vcpu_sida_op(vcpu, mop);
break;
default:
r = -EINVAL;
@@ -5046,7 +5047,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_s390_mem_op mem_op;
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
- r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
+ r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
else
r = -EFAULT;
break;
--
2.32.0
Am 18.01.22 um 10:52 schrieb Janis Schoetterl-Glausch:
> User space needs a mechanism to perform key checked accesses when
> emulating instructions.
>
> The key can be passed as an additional argument via the flags field.
> As reserved flags need to be 0, and key 0 matches all storage keys,
> by default no key checking is performed, as before.
> Having an additional argument is flexible, as user space can
> pass the guest PSW's key, in order to make an access the same way the
> CPU would, or pass another key if necessary.
>
> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
> Acked-by: Janosch Frank <[email protected]>
> Reviewed-by: Claudio Imbrenda <[email protected]>
> ---
> arch/s390/kvm/kvm-s390.c | 21 ++++++++++++++-------
> include/uapi/linux/kvm.h | 1 +
> 2 files changed, 15 insertions(+), 7 deletions(-)
>
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 38b304e81c57..c4acdb025ff1 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -32,6 +32,7 @@
> #include <linux/sched/signal.h>
> #include <linux/string.h>
> #include <linux/pgtable.h>
> +#include <linux/bitfield.h>
>
> #include <asm/asm-offsets.h>
> #include <asm/lowcore.h>
> @@ -4727,9 +4728,11 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
> {
> void __user *uaddr = (void __user *)mop->buf;
> void *tmpbuf = NULL;
> + char access_key = 0;
> int r = 0;
> const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
> - | KVM_S390_MEMOP_F_CHECK_ONLY;
> + | KVM_S390_MEMOP_F_CHECK_ONLY
> + | KVM_S390_MEMOP_F_SKEYS_ACC;
I think it would be better to just have a flag here (single bit) to check the key and
then embed the key value in the payload.
union {
__u8 ar; /* the access register number */
__u32 sida_offset; /* offset into the sida */
__u8 reserved[32]; /* should be set to 0 */
};
There are cases when we need both, AR and key so maybe an unname struct is the easiest.
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 1daa45268de2..e8fa1f82d472 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -562,7 +562,10 @@ struct kvm_s390_mem_op {
__u32 op; /* type of operation */
__u64 buf; /* buffer in userspace */
union {
- __u8 ar; /* the access register number */
+ struct {
+ __u8 ar; /* the access register number */
+ __u8 key; /* effective storage key */
+ };
__u32 sida_offset; /* offset into the sida */
__u8 reserved[32]; /* should be set to 0 */
};
(or acc instead of key)
>
> if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
> return -EINVAL;
> @@ -4746,14 +4749,17 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
> return -ENOMEM;
> }
>
> + access_key = FIELD_GET(KVM_S390_MEMOP_F_SKEYS_ACC, mop->flags);
> +
> switch (mop->op) {
> case KVM_S390_MEMOP_LOGICAL_READ:
> if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
> - r = check_gva_range(vcpu, mop->gaddr, mop->ar,
> - mop->size, GACC_FETCH, 0);
> + r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
> + GACC_FETCH, access_key);
> break;
> }
> - r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
> + r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
> + mop->size, access_key);
> if (r == 0) {
> if (copy_to_user(uaddr, tmpbuf, mop->size))
> r = -EFAULT;
> @@ -4761,15 +4767,16 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
> break;
> case KVM_S390_MEMOP_LOGICAL_WRITE:
> if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
> - r = check_gva_range(vcpu, mop->gaddr, mop->ar,
> - mop->size, GACC_STORE, 0);
> + r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
> + GACC_STORE, access_key);
> break;
> }
> if (copy_from_user(tmpbuf, uaddr, mop->size)) {
> r = -EFAULT;
> break;
> }
> - r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
> + r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
> + mop->size, access_key);
> break;
> }
>
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 1daa45268de2..e3f450b2f346 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -575,6 +575,7 @@ struct kvm_s390_mem_op {
> /* flags for kvm_s390_mem_op->flags */
> #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
> #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
> +#define KVM_S390_MEMOP_F_SKEYS_ACC 0x0f00ULL
>
> /* for KVM_INTERRUPT */
> struct kvm_interrupt {
On 1/18/22 10:52, Janis Schoetterl-Glausch wrote:
> KVM needs a mechanism to do accesses to guest memory that honor
> storage key protection.
> Since the copy_to/from_user implementation makes use of move
> instructions that support having an additional access key supplied,
> we can implement __copy_from/to_user_with_key by enhancing the
> existing implementation.
>
> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
For this I'd like to have buy-in from the kernel maintainers.
The patch looks good to me but currently I don't understand all of the
background so:
Acked-by: Janosch Frank <[email protected]>
> ---
> arch/s390/include/asm/uaccess.h | 32 ++++++++++++++++++
> arch/s390/lib/uaccess.c | 57 +++++++++++++++++++++++----------
> 2 files changed, 72 insertions(+), 17 deletions(-)
>
> diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
> index 02b467461163..5138040348cc 100644
> --- a/arch/s390/include/asm/uaccess.h
> +++ b/arch/s390/include/asm/uaccess.h
> @@ -33,6 +33,38 @@ static inline int __range_ok(unsigned long addr, unsigned long size)
>
> #define access_ok(addr, size) __access_ok(addr, size)
>
> +unsigned long __must_check
> +raw_copy_from_user_with_key(void *to, const void __user *from, unsigned long n,
> + char key);
> +
> +unsigned long __must_check
> +raw_copy_to_user_with_key(void __user *to, const void *from, unsigned long n,
> + char key);
> +
> +static __always_inline __must_check unsigned long
> +__copy_from_user_with_key(void *to, const void __user *from, unsigned long n,
> + char key)
> +{
> + might_fault();
> + if (should_fail_usercopy())
> + return n;
> + instrument_copy_from_user(to, from, n);
> + check_object_size(to, n, false);
> + return raw_copy_from_user_with_key(to, from, n, key);
> +}
> +
> +static __always_inline __must_check unsigned long
> +__copy_to_user_with_key(void __user *to, const void *from, unsigned long n,
> + char key)
> +{
> + might_fault();
> + if (should_fail_usercopy())
> + return n;
> + instrument_copy_to_user(to, from, n);
> + check_object_size(from, n, true);
> + return raw_copy_to_user_with_key(to, from, n, key);
> +}
> +
> unsigned long __must_check
> raw_copy_from_user(void *to, const void __user *from, unsigned long n);
>
> diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
> index d3a700385875..ce7a150dd93a 100644
> --- a/arch/s390/lib/uaccess.c
> +++ b/arch/s390/lib/uaccess.c
> @@ -59,11 +59,13 @@ static inline int copy_with_mvcos(void)
> #endif
>
> static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
> - unsigned long size)
> + unsigned long size, char key)
> {
> unsigned long tmp1, tmp2;
> union oac spec = {
> + .oac2.key = key,
> .oac2.as = PSW_BITS_AS_SECONDARY,
> + .oac2.k = 1,
> .oac2.a = 1,
> };
>
> @@ -94,19 +96,19 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
> }
>
> static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
> - unsigned long size)
> + unsigned long size, char key)
> {
> unsigned long tmp1, tmp2;
>
> tmp1 = -256UL;
> asm volatile(
> " sacf 0\n"
> - "0: mvcp 0(%0,%2),0(%1),%3\n"
> + "0: mvcp 0(%0,%2),0(%1),%[key]\n"
> "7: jz 5f\n"
> "1: algr %0,%3\n"
> " la %1,256(%1)\n"
> " la %2,256(%2)\n"
> - "2: mvcp 0(%0,%2),0(%1),%3\n"
> + "2: mvcp 0(%0,%2),0(%1),%[key]\n"
> "8: jnz 1b\n"
> " j 5f\n"
> "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
> @@ -115,7 +117,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
> " slgr %4,%1\n"
> " clgr %0,%4\n" /* copy crosses next page boundary? */
> " jnh 6f\n"
> - "4: mvcp 0(%4,%2),0(%1),%3\n"
> + "4: mvcp 0(%4,%2),0(%1),%[key]\n"
> "9: slgr %0,%4\n"
> " j 6f\n"
> "5: slgr %0,%0\n"
> @@ -123,24 +125,36 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
> EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
> EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
> : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
> - : : "cc", "memory");
> + : [key] "d" (key << 4)
> + : "cc", "memory");
> return size;
> }
>
> unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
> {
> if (copy_with_mvcos())
> - return copy_from_user_mvcos(to, from, n);
> - return copy_from_user_mvcp(to, from, n);
> + return copy_from_user_mvcos(to, from, n, 0);
> + return copy_from_user_mvcp(to, from, n, 0);
> }
> EXPORT_SYMBOL(raw_copy_from_user);
>
> -static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
> - unsigned long size)
> +unsigned long raw_copy_from_user_with_key(void *to, const void __user *from,
> + unsigned long n, char key)
> +{
> + if (copy_with_mvcos())
> + return copy_from_user_mvcos(to, from, n, key);
> + return copy_from_user_mvcp(to, from, n, key);
> +}
> +EXPORT_SYMBOL(raw_copy_from_user_with_key);
> +
> +inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
> + unsigned long size, char key)
> {
> unsigned long tmp1, tmp2;
> union oac spec = {
> + .oac1.key = key,
> .oac1.as = PSW_BITS_AS_SECONDARY,
> + .oac1.k = 1,
> .oac1.a = 1,
> };
>
> @@ -171,19 +185,19 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
> }
>
> static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
> - unsigned long size)
> + unsigned long size, char key)
> {
> unsigned long tmp1, tmp2;
>
> tmp1 = -256UL;
> asm volatile(
> " sacf 0\n"
> - "0: mvcs 0(%0,%1),0(%2),%3\n"
> + "0: mvcs 0(%0,%1),0(%2),%[key]\n"
> "7: jz 5f\n"
> "1: algr %0,%3\n"
> " la %1,256(%1)\n"
> " la %2,256(%2)\n"
> - "2: mvcs 0(%0,%1),0(%2),%3\n"
> + "2: mvcs 0(%0,%1),0(%2),%[key]\n"
> "8: jnz 1b\n"
> " j 5f\n"
> "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
> @@ -192,7 +206,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
> " slgr %4,%1\n"
> " clgr %0,%4\n" /* copy crosses next page boundary? */
> " jnh 6f\n"
> - "4: mvcs 0(%4,%1),0(%2),%3\n"
> + "4: mvcs 0(%4,%1),0(%2),%[key]\n"
> "9: slgr %0,%4\n"
> " j 6f\n"
> "5: slgr %0,%0\n"
> @@ -200,17 +214,26 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
> EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
> EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
> : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
> - : : "cc", "memory");
> + : [key] "d" (key << 4)
> + : "cc", "memory");
> return size;
> }
>
> unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
> {
> if (copy_with_mvcos())
> - return copy_to_user_mvcos(to, from, n);
> - return copy_to_user_mvcs(to, from, n);
> + return copy_to_user_mvcos(to, from, n, 0);
> + return copy_to_user_mvcs(to, from, n, 0);
> }
> EXPORT_SYMBOL(raw_copy_to_user);
> +unsigned long raw_copy_to_user_with_key(void __user *to, const void *from,
> + unsigned long n, char key)
> +{
> + if (copy_with_mvcos())
> + return copy_to_user_mvcos(to, from, n, key);
> + return copy_to_user_mvcs(to, from, n, key);
> +}
> +EXPORT_SYMBOL(raw_copy_to_user_with_key);
>
> static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
> {
>
On 1/18/22 10:52, Janis Schoetterl-Glausch wrote:
> Storage key checking had not been implemented for instructions emulated
> by KVM. Implement it by enhancing the functions used for guest access,
> in particular those making use of access_guest which has been renamed
> to access_guest_with_key.
> Accesses via access_guest_real should not be key checked.
>
> For actual accesses, key checking is done by __copy_from/to_user_with_key
> (which internally uses MVCOS/MVCP/MVCS).
> In cases where accessibility is checked without an actual access,
> this is performed by getting the storage key and checking
> if the access key matches.
> In both cases, if applicable, storage and fetch protection override
> are honored.
>
> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
Once you've fixed my nits:
Reviewed-by: Janosch Frank <[email protected]>
> ---
> arch/s390/include/asm/ctl_reg.h | 2 +
> arch/s390/include/asm/page.h | 2 +
> arch/s390/kvm/gaccess.c | 174 +++++++++++++++++++++++++++++---
> arch/s390/kvm/gaccess.h | 78 ++++++++++++--
> arch/s390/kvm/intercept.c | 12 +--
> arch/s390/kvm/kvm-s390.c | 4 +-
> 6 files changed, 241 insertions(+), 31 deletions(-)
>
> diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
> index 04dc65f8901d..c800199a376b 100644
> --- a/arch/s390/include/asm/ctl_reg.h
> +++ b/arch/s390/include/asm/ctl_reg.h
> @@ -12,6 +12,8 @@
>
> #define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10)
> #define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35)
> +#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38)
> +#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39)
> #define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49)
> #define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50)
> #define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52)
> diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
> index d98d17a36c7b..cfc4d6fb2385 100644
> --- a/arch/s390/include/asm/page.h
> +++ b/arch/s390/include/asm/page.h
> @@ -20,6 +20,8 @@
> #define PAGE_SIZE _PAGE_SIZE
> #define PAGE_MASK _PAGE_MASK
> #define PAGE_DEFAULT_ACC 0
> +/* storage-protection override */
> +#define PAGE_SPO_ACC 9
> #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
>
> #define HPAGE_SHIFT 20
> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> index 4460808c3b9a..92ab96d55504 100644
> --- a/arch/s390/kvm/gaccess.c
> +++ b/arch/s390/kvm/gaccess.c
> @@ -10,6 +10,7 @@
> #include <linux/mm_types.h>
> #include <linux/err.h>
> #include <linux/pgtable.h>
> +#include <linux/bitfield.h>
>
> #include <asm/gmap.h>
> #include "kvm-s390.h"
> @@ -794,6 +795,79 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
> return 1;
> }
>
> +static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
> + union asce asce)
> +{
> + psw_t *psw = &vcpu->arch.sie_block->gpsw;
> + unsigned long override;
> +
> + if (mode == GACC_FETCH || mode == GACC_IFETCH) {
> + /* check if fetch protection override enabled */
> + override = vcpu->arch.sie_block->gcr[0];
> + override &= CR0_FETCH_PROTECTION_OVERRIDE;
> + /* not applicable if subject to DAT && private space */
> + override = override && !(psw_bits(*psw).dat && asce.p);
> + return override;
> + }
> + return false;
> +}
> +
> +static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
> +{
> + return ga < 2048 && ga + len <= 2048;
> +}
> +
> +static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
> +{
> + /* check if storage protection override enabled */
> + return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
> +}
> +
> +static bool storage_prot_override_applies(char access_control)
> +{
> + /* matches special storage protection override key (9) -> allow */
> + return access_control == PAGE_SPO_ACC;
> +}
> +
> +static int vcpu_check_access_key(struct kvm_vcpu *vcpu, char access_key,
> + enum gacc_mode mode, union asce asce, gpa_t gpa,
> + unsigned long ga, unsigned int len)
> +{
> + unsigned char storage_key, access_control;
> + unsigned long hva;
> + int r;
> +
> + /* access key 0 matches any storage key -> allow */
> + if (access_key == 0)
> + return 0;
> + /*
> + * caller needs to ensure that gfn is accessible, so we can
> + * assume that this cannot fail
> + */
> + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
> + mmap_read_lock(current->mm);
> + r = get_guest_storage_key(current->mm, hva, &storage_key);
> + mmap_read_unlock(current->mm);
> + if (r)
> + return r;
> + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
> + /* access key matches storage key -> allow */
> + if (access_control == access_key)
> + return 0;
> + if (mode == GACC_FETCH || mode == GACC_IFETCH) {
> + /* mismatching keys, no fetch protection -> allowed */
we want to fetch and fetch protection is off -> allow access
> + if (!(storage_key & _PAGE_FP_BIT))
> + return 0;
> + if (fetch_prot_override_applicable(vcpu, mode, asce))
> + if (fetch_prot_override_applies(ga, len))
Personally I'd prefer a "&&" at the end of the first if and then a \n
> + return 0;
> + }
> + if (storage_prot_override_applicable(vcpu))
> + if (storage_prot_override_applies(access_control))
> + return 0;
> + return PGM_PROTECTION;
> +}
Everything above looks like I'd expect it to
> +
> /**
> * guest_range_to_gpas() - Calculate guest physical addresses of page fragments
> * covering a logical range
> @@ -804,6 +878,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
> * @len: length of range in bytes
> * @asce: address-space-control element to use for translation
> * @mode: access mode
> + * @access_key: access key to mach the range's storage keys against
> *
> * Translate a logical range to a series of guest absolute addresses,
> * such that the concatenation of page fragments starting at each gpa make up
> @@ -830,7 +905,8 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
> */
> static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
> unsigned long *gpas, unsigned long len,
> - const union asce asce, enum gacc_mode mode)
> + const union asce asce, enum gacc_mode mode,
> + char access_key)
> {
> psw_t *psw = &vcpu->arch.sie_block->gpsw;
> unsigned int offset = offset_in_page(ga);
> @@ -857,6 +933,10 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
> }
> if (rc)
> return trans_exc(vcpu, rc, ga, ar, mode, prot);
> + rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga,
> + fragment_len);
> + if (rc)
> + return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC);
> if (gpas)
> *gpas++ = gpa;
> offset = 0;
> @@ -880,16 +960,50 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
> return rc;
> }
>
> -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
> - unsigned long len, enum gacc_mode mode)
> +static int
> +access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
> + void *data, unsigned int len, char key)
> +{
> + struct kvm_memory_slot *slot;
> + bool writable;
> + gfn_t gfn;
> + hva_t hva;
> + int rc;
> +
> + gfn = gpa >> PAGE_SHIFT;
> + slot = gfn_to_memslot(kvm, gfn);
> + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
> +
> + if (kvm_is_error_hva(hva))
> + return PGM_ADDRESSING;
> + if (!writable && mode == GACC_STORE)
> + return -EOPNOTSUPP;
/* We don't support ro memslots so this should suffice */
> + hva += offset_in_page(gpa);
> + if (mode == GACC_STORE)
> + rc = __copy_to_user_with_key((void __user *)hva, data, len, key);
> + else
> + rc = __copy_from_user_with_key(data, (void __user *)hva, len, key);
> + if (rc)
> + return PGM_PROTECTION;
> + if (mode == GACC_STORE)
> + mark_page_dirty_in_slot(kvm, slot, gfn);
> + return 0;
> +}
> +
> +int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
> + void *data, unsigned long len, enum gacc_mode mode,
> + char access_key)
> {
> psw_t *psw = &vcpu->arch.sie_block->gpsw;
> unsigned long nr_pages, idx;
> unsigned long gpa_array[2];
> unsigned int fragment_len;
> unsigned long *gpas;
> + enum prot_type prot;
> int need_ipte_lock;
> union asce asce;
> + bool try_storage_prot_override;
> + bool try_fetch_prot_override;
> int rc;
>
> if (!len)
> @@ -904,16 +1018,37 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
> gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
> if (!gpas)
> return -ENOMEM;
> + try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
> + try_storage_prot_override = storage_prot_override_applicable(vcpu);
> need_ipte_lock = psw_bits(*psw).dat && !asce.r;
> if (need_ipte_lock)
> ipte_lock(vcpu);
> - rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode);
> - for (idx = 0; idx < nr_pages && !rc; idx++) {
/*
We'll do an actual access via the mv instruction which will return
access errors to us so we don't need to check here.
*/
> + rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
> + if (rc)
> + goto out_unlock;
> + for (idx = 0; idx < nr_pages; idx++) {
> fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
> - rc = access_guest_page(vcpu->kvm, mode, gpas[idx], data, fragment_len);
> + if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) {
> + rc = access_guest_page(vcpu->kvm, mode, gpas[idx],
> + data, fragment_len);
> + } else {
> + rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
> + data, fragment_len, access_key);
> + }
> + if (rc == PGM_PROTECTION && try_storage_prot_override)
> + rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
> + data, fragment_len, PAGE_SPO_ACC);
> + if (rc == PGM_PROTECTION)
> + prot = PROT_TYPE_KEYC;
> + if (rc)
> + break;
> len -= fragment_len;
> data += fragment_len;
> + ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len);
> }
> + if (rc > 0)
> + rc = trans_exc(vcpu, rc, ga, 0, mode, prot);
> +out_unlock:
> if (need_ipte_lock)
> ipte_unlock(vcpu);
> if (nr_pages > ARRAY_SIZE(gpa_array))
> @@ -940,12 +1075,13 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
> }
>
> /**
> - * guest_translate_address - translate guest logical into guest absolute address
> + * guest_translate_address_with_key - translate guest logical into guest absolute address
> * @vcpu: virtual cpu
> * @gva: Guest virtual address
> * @ar: Access register
> * @gpa: Guest physical address
> * @mode: Translation access mode
> + * @access_key: access key to mach the storage key with
> *
> * Parameter semantics are the same as the ones from guest_translate.
> * The memory contents at the guest address are not changed.
> @@ -953,8 +1089,9 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
> * Note: The IPTE lock is not taken during this function, so the caller
> * has to take care of this.
> */
> -int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> - unsigned long *gpa, enum gacc_mode mode)
> +int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> + unsigned long *gpa, enum gacc_mode mode,
> + char access_key)
> {
> union asce asce;
> int rc;
> @@ -963,7 +1100,17 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
> if (rc)
> return rc;
> - return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode);
> + return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode,
> + access_key);
> +}
> +
> +int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> + unsigned long *gpa, enum gacc_mode mode)
> +{
> + char access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
> +
> + return guest_translate_address_with_key(vcpu, gva, ar, gpa, mode,
> + access_key);
> }
>
> /**
> @@ -973,9 +1120,11 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> * @ar: Access register
> * @length: Length of test range
> * @mode: Translation access mode
> + * @access_key: access key to mach the storage keys with
> */
> int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> - unsigned long length, enum gacc_mode mode)
> + unsigned long length, enum gacc_mode mode,
> + char access_key)
> {
> union asce asce;
> int rc = 0;
> @@ -984,7 +1133,8 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> if (rc)
> return rc;
> ipte_lock(vcpu);
> - rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode);
> + rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
> + access_key);
> ipte_unlock(vcpu);
>
> return rc;
> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
> index 7c72a5e3449f..3df432702cd6 100644
> --- a/arch/s390/kvm/gaccess.h
> +++ b/arch/s390/kvm/gaccess.h
> @@ -186,24 +186,32 @@ enum gacc_mode {
> GACC_IFETCH,
> };
>
> +int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> + unsigned long *gpa, enum gacc_mode mode,
> + char access_key);
> +
> int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
> u8 ar, unsigned long *gpa, enum gacc_mode mode);
> +
> int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
> - unsigned long length, enum gacc_mode mode);
> + unsigned long length, enum gacc_mode mode,
> + char access_key);
>
> -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
> - unsigned long len, enum gacc_mode mode);
> +int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
> + void *data, unsigned long len, enum gacc_mode mode,
> + char access_key);
Normally we group them without \n.
Yes this was already inconsistent before you added your changes :)
>
> int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
> void *data, unsigned long len, enum gacc_mode mode);
>
> /**
> - * write_guest - copy data from kernel space to guest space
> + * write_guest_with_key - copy data from kernel space to guest space
> * @vcpu: virtual cpu
> * @ga: guest address
> * @ar: access register
> * @data: source address in kernel space
> * @len: number of bytes to copy
> + * @access_key: access key the storage key needs to match
> *
> * Copy @len bytes from @data (kernel space) to @ga (guest address).
> * In order to copy data to guest space the PSW of the vcpu is inspected:
> @@ -214,8 +222,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
> * The addressing mode of the PSW is also inspected, so that address wrap
> * around is taken into account for 24-, 31- and 64-bit addressing mode,
> * if the to be copied data crosses page boundaries in guest address space.
> - * In addition also low address and DAT protection are inspected before
> - * copying any data (key protection is currently not implemented).
> + * In addition low address, DAT and key protection checks are performed before
> + * copying any data.
> *
> * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu.
> * In case of an access exception (e.g. protection exception) pgm will contain
> @@ -243,10 +251,53 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
> * if data has been changed in guest space in case of an exception.
> */
> static inline __must_check
> +int write_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
> + void *data, unsigned long len, char access_key)
> +{
> + return access_guest_with_key(vcpu, ga, ar, data, len, GACC_STORE,
> + access_key);
> +}
> +
> +/**
> + * write_guest - copy data from kernel space to guest space
> + * @vcpu: virtual cpu
> + * @ga: guest address
> + * @ar: access register
> + * @data: source address in kernel space
> + * @len: number of bytes to copy
> + *
> + * The behaviour of write_guest is identical to write_guest_with_key, except
> + * that the PSW access key is used instead of an explicit argument.
> + */
> +static inline __must_check
> int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
> unsigned long len)
> {
> - return access_guest(vcpu, ga, ar, data, len, GACC_STORE);
> + char access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
> +
> + return write_guest_with_key(vcpu, ga, ar, data, len, access_key);
> +}
> +
> +/**
> + * read_guest_with_key - copy data from guest space to kernel space
> + * @vcpu: virtual cpu
> + * @ga: guest address
> + * @ar: access register
> + * @data: destination address in kernel space
> + * @len: number of bytes to copy
> + * @access_key: access key the storage key needs to match
> + *
> + * Copy @len bytes from @ga (guest address) to @data (kernel space).
> + *
> + * The behaviour of read_guest_with_key is identical to write_guest_with_key,
> + * except that data will be copied from guest space to kernel space.
> + */
> +static inline __must_check
> +int read_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
> + void *data, unsigned long len, char access_key)
> +{
> + return access_guest_with_key(vcpu, ga, ar, data, len, GACC_FETCH,
> + access_key);
> }
>
> /**
> @@ -259,14 +310,16 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
> *
> * Copy @len bytes from @ga (guest address) to @data (kernel space).
> *
> - * The behaviour of read_guest is identical to write_guest, except that
> - * data will be copied from guest space to kernel space.
> + * The behaviour of read_guest is identical to read_guest_with_key, except
> + * that the PSW access key is used instead of an explicit argument.
> */
> static inline __must_check
> int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
> unsigned long len)
> {
> - return access_guest(vcpu, ga, ar, data, len, GACC_FETCH);
> + char access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
> +
> + return read_guest_with_key(vcpu, ga, ar, data, len, access_key);
> }
>
> /**
> @@ -287,7 +340,10 @@ static inline __must_check
> int read_guest_instr(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
> unsigned long len)
> {
> - return access_guest(vcpu, ga, 0, data, len, GACC_IFETCH);
> + char access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
> +
> + return access_guest_with_key(vcpu, ga, 0, data, len, GACC_IFETCH,
> + access_key);
> }
>
> /**
> diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
> index d07ff646d844..8bd42a20d924 100644
> --- a/arch/s390/kvm/intercept.c
> +++ b/arch/s390/kvm/intercept.c
> @@ -331,18 +331,18 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
>
> kvm_s390_get_regs_rre(vcpu, ®1, ®2);
>
> - /* Make sure that the source is paged-in */
> - rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
> - reg2, &srcaddr, GACC_FETCH);
> + /* Ensure that the source is paged-in, no actual access -> no key checking */
> + rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg2],
> + reg2, &srcaddr, GACC_FETCH, 0);
> if (rc)
> return kvm_s390_inject_prog_cond(vcpu, rc);
> rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
> if (rc != 0)
> return rc;
>
> - /* Make sure that the destination is paged-in */
> - rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
> - reg1, &dstaddr, GACC_STORE);
> + /* Ensure that the source is paged-in, no actual access -> no key checking */
> + rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg1],
> + reg1, &dstaddr, GACC_STORE, 0);
> if (rc)
> return kvm_s390_inject_prog_cond(vcpu, rc);
> rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 14a18ba5ff2c..38b304e81c57 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -4750,7 +4750,7 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
> case KVM_S390_MEMOP_LOGICAL_READ:
> if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
> r = check_gva_range(vcpu, mop->gaddr, mop->ar,
> - mop->size, GACC_FETCH);
> + mop->size, GACC_FETCH, 0);
> break;
> }
> r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
> @@ -4762,7 +4762,7 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
> case KVM_S390_MEMOP_LOGICAL_WRITE:
> if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
> r = check_gva_range(vcpu, mop->gaddr, mop->ar,
> - mop->size, GACC_STORE);
> + mop->size, GACC_STORE, 0);
> break;
> }
> if (copy_from_user(tmpbuf, uaddr, mop->size)) {
>
Am 18.01.22 um 10:52 schrieb Janis Schoetterl-Glausch:
> Availability of the KVM_CAP_S390_MEM_OP_SKEY capability signals that:
> * The vcpu MEM_OP IOCTL supports storage key checking.
> * The vm MEM_OP IOCTL exists.
>
> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
> ---
> Maybe this should be redesigned. As is, the capability mixes
> support of storage keys for the vcpu ioctl with the availability
> of the vm ioctl (which always supports storage keys).
>
> We could have two capabilities, one to indicate the availability
> of the vm memop and another used to derive the available functionality.
> Janosch suggested that the second capability indicates the availability
> of a "query" memop operation.
I think one capability covering both changes is totally fine as long as we document
that in api.rst.
>
> arch/s390/kvm/kvm-s390.c | 1 +
> include/uapi/linux/kvm.h | 1 +
> 2 files changed, 2 insertions(+)
>
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index ab07389fb4d9..3c6517ad43a3 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -565,6 +565,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> case KVM_CAP_S390_VCPU_RESETS:
> case KVM_CAP_SET_GUEST_DEBUG:
> case KVM_CAP_S390_DIAG318:
> + case KVM_CAP_S390_MEM_OP_SKEY:
> r = 1;
> break;
> case KVM_CAP_SET_GUEST_DEBUG2:
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index dd04170287fd..1bb38efd1156 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1134,6 +1134,7 @@ struct kvm_ppc_resize_hpt {
> #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
> #define KVM_CAP_ARM_MTE 205
> #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
> +#define KVM_CAP_S390_MEM_OP_SKEY 209
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
Hi Janis,
Janis Schoetterl-Glausch <[email protected]> writes:
> KVM needs a mechanism to do accesses to guest memory that honor
> storage key protection.
> Since the copy_to/from_user implementation makes use of move
> instructions that support having an additional access key supplied,
> we can implement __copy_from/to_user_with_key by enhancing the
> existing implementation.
>
> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
This doesn't apply to my master branch.
> diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
> index d3a700385875..ce7a150dd93a 100644
> --- a/arch/s390/lib/uaccess.c
> +++ b/arch/s390/lib/uaccess.c
> @@ -59,11 +59,13 @@ static inline int copy_with_mvcos(void)
> #endif
>
> static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
> - unsigned long size)
> + unsigned long size, char key)
> {
> unsigned long tmp1, tmp2;
> union oac spec = {
> + .oac2.key = key,
> .oac2.as = PSW_BITS_AS_SECONDARY,
> + .oac2.k = 1,
> .oac2.a = 1,
> };
>
> @@ -94,19 +96,19 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
> }
>
> static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
> - unsigned long size)
> + unsigned long size, char key)
Any special reason for using 'char' as type for key here? Given the left shift
below i would prefer 'unsigned char' to avoid having to think about
whether this can overflow. The end result wouldn't look different,
so more or less a cosmetic issue.
> {
> unsigned long tmp1, tmp2;
>
> tmp1 = -256UL;
> asm volatile(
> " sacf 0\n"
> - "0: mvcp 0(%0,%2),0(%1),%3\n"
> + "0: mvcp 0(%0,%2),0(%1),%[key]\n"
> "7: jz 5f\n"
> "1: algr %0,%3\n"
> " la %1,256(%1)\n"
> " la %2,256(%2)\n"
> - "2: mvcp 0(%0,%2),0(%1),%3\n"
> + "2: mvcp 0(%0,%2),0(%1),%[key]\n"
> "8: jnz 1b\n"
> " j 5f\n"
> "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
> @@ -115,7 +117,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
> " slgr %4,%1\n"
> " clgr %0,%4\n" /* copy crosses next page boundary? */
> " jnh 6f\n"
> - "4: mvcp 0(%4,%2),0(%1),%3\n"
> + "4: mvcp 0(%4,%2),0(%1),%[key]\n"
> "9: slgr %0,%4\n"
> " j 6f\n"
> "5: slgr %0,%0\n"
> @@ -123,24 +125,36 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
> EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
> EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
> : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
> - : : "cc", "memory");
> + : [key] "d" (key << 4)
> + : "cc", "memory");
> return size;
> }
>
With that minor nitpick:
Reviewed-by: Sven Schnelle <[email protected]>
On 1/18/22 16:37, Sven Schnelle wrote:
> Hi Janis,
>
> Janis Schoetterl-Glausch <[email protected]> writes:
>
>> KVM needs a mechanism to do accesses to guest memory that honor
>> storage key protection.
>> Since the copy_to/from_user implementation makes use of move
>> instructions that support having an additional access key supplied,
>> we can implement __copy_from/to_user_with_key by enhancing the
>> existing implementation.
>>
>> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
>
> This doesn't apply to my master branch.
Maybe it's due to the prerequisite patch missing?
https://lore.kernel.org/linux-s390/YeGBmPBJ8NMi0Rkp@osiris/T/#t
>
>> diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
>> index d3a700385875..ce7a150dd93a 100644
>> --- a/arch/s390/lib/uaccess.c
>> +++ b/arch/s390/lib/uaccess.c
>> @@ -59,11 +59,13 @@ static inline int copy_with_mvcos(void)
>> #endif
>>
>> static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
>> - unsigned long size)
>> + unsigned long size, char key)
>> {
>> unsigned long tmp1, tmp2;
>> union oac spec = {
>> + .oac2.key = key,
>> .oac2.as = PSW_BITS_AS_SECONDARY,
>> + .oac2.k = 1,
>> .oac2.a = 1,
>> };
>>
>> @@ -94,19 +96,19 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
>> }
>>
>> static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
>> - unsigned long size)
>> + unsigned long size, char key)
>
> Any special reason for using 'char' as type for key here? Given the left shift
> below i would prefer 'unsigned char' to avoid having to think about
> whether this can overflow. The end result wouldn't look different,
> so more or less a cosmetic issue.
Will do.
[...]
>
> With that minor nitpick:
>
> Reviewed-by: Sven Schnelle <[email protected]>
Thanks!
On Tue, Jan 18, 2022 at 10:52:01AM +0100, Janis Schoetterl-Glausch wrote:
> KVM needs a mechanism to do accesses to guest memory that honor
> storage key protection.
> Since the copy_to/from_user implementation makes use of move
> instructions that support having an additional access key supplied,
> we can implement __copy_from/to_user_with_key by enhancing the
> existing implementation.
>
> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
> ---
> arch/s390/include/asm/uaccess.h | 32 ++++++++++++++++++
> arch/s390/lib/uaccess.c | 57 +++++++++++++++++++++++----------
> 2 files changed, 72 insertions(+), 17 deletions(-)
>
> diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
> index 02b467461163..5138040348cc 100644
> --- a/arch/s390/include/asm/uaccess.h
> +++ b/arch/s390/include/asm/uaccess.h
> @@ -33,6 +33,38 @@ static inline int __range_ok(unsigned long addr, unsigned long size)
>
> #define access_ok(addr, size) __access_ok(addr, size)
>
> +unsigned long __must_check
> +raw_copy_from_user_with_key(void *to, const void __user *from, unsigned long n,
> + char key);
> +
> +unsigned long __must_check
> +raw_copy_to_user_with_key(void __user *to, const void *from, unsigned long n,
> + char key);
> +
> +static __always_inline __must_check unsigned long
> +__copy_from_user_with_key(void *to, const void __user *from, unsigned long n,
> + char key)
> +{
> + might_fault();
> + if (should_fail_usercopy())
> + return n;
> + instrument_copy_from_user(to, from, n);
> + check_object_size(to, n, false);
> + return raw_copy_from_user_with_key(to, from, n, key);
> +}
> +
> +static __always_inline __must_check unsigned long
> +__copy_to_user_with_key(void __user *to, const void *from, unsigned long n,
> + char key)
> +{
> + might_fault();
> + if (should_fail_usercopy())
> + return n;
> + instrument_copy_to_user(to, from, n);
> + check_object_size(from, n, true);
> + return raw_copy_to_user_with_key(to, from, n, key);
> +}
> +
> unsigned long __must_check
> raw_copy_from_user(void *to, const void __user *from, unsigned long n);
That's a lot of code churn... I would have expected that the existing
functions will be renamed, get an additional key parameter, and the
current API is implemented by defines which map copy_to_user() &
friends to the new functions, and add a zero key.
This would avoid a lot of code duplication, even though the kernel
image would get slightly larger.
Could you do that, please, and also provide bloat-a-meter results?
And as already mentioned: please don't use "char" for passing a
key. Besides that this leads to the overflow question as pointed out
by Sven, this might as usual also raise the question if there might be
any bugs wrt to sign extension. That is: for anything but characters,
please always explicitely use signed or unsigned char (or u8/s8), so
nobody has to think about this.
Thanks!
On 1/19/22 10:48, Heiko Carstens wrote:
>
> On Tue, Jan 18, 2022 at 10:52:01AM +0100, Janis Schoetterl-Glausch wrote:
>> KVM needs a mechanism to do accesses to guest memory that honor
>> storage key protection.
>> Since the copy_to/from_user implementation makes use of move
>> instructions that support having an additional access key supplied,
>> we can implement __copy_from/to_user_with_key by enhancing the
>> existing implementation.
>>
>> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
>> ---
>> arch/s390/include/asm/uaccess.h | 32 ++++++++++++++++++
>> arch/s390/lib/uaccess.c | 57 +++++++++++++++++++++++----------
>> 2 files changed, 72 insertions(+), 17 deletions(-)
>>
>> diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
>> index 02b467461163..5138040348cc 100644
>> --- a/arch/s390/include/asm/uaccess.h
>> +++ b/arch/s390/include/asm/uaccess.h
>> @@ -33,6 +33,38 @@ static inline int __range_ok(unsigned long addr, unsigned long size)
>>
>> #define access_ok(addr, size) __access_ok(addr, size)
>>
>> +unsigned long __must_check
>> +raw_copy_from_user_with_key(void *to, const void __user *from, unsigned long n,
>> + char key);
>> +
>> +unsigned long __must_check
>> +raw_copy_to_user_with_key(void __user *to, const void *from, unsigned long n,
>> + char key);
>> +
>> +static __always_inline __must_check unsigned long
>> +__copy_from_user_with_key(void *to, const void __user *from, unsigned long n,
>> + char key)
>> +{
>> + might_fault();
>> + if (should_fail_usercopy())
>> + return n;
>> + instrument_copy_from_user(to, from, n);
>> + check_object_size(to, n, false);
>> + return raw_copy_from_user_with_key(to, from, n, key);
>> +}
>> +
>> +static __always_inline __must_check unsigned long
>> +__copy_to_user_with_key(void __user *to, const void *from, unsigned long n,
>> + char key)
>> +{
>> + might_fault();
>> + if (should_fail_usercopy())
>> + return n;
>> + instrument_copy_to_user(to, from, n);
>> + check_object_size(from, n, true);
>> + return raw_copy_to_user_with_key(to, from, n, key);
>> +}
>> +
>> unsigned long __must_check
>> raw_copy_from_user(void *to, const void __user *from, unsigned long n);
>
> That's a lot of code churn... I would have expected that the existing
> functions will be renamed, get an additional key parameter, and the
> current API is implemented by defines which map copy_to_user() &
> friends to the new functions, and add a zero key.
I don't think I understand you. I can implement raw_copy_from/to_user
in terms of raw_copy_from/to_user_with_key, which does save a few lines,
but that's it, isn't it?
Thanks!
>
> This would avoid a lot of code duplication, even though the kernel
> image would get slightly larger.
>
> Could you do that, please, and also provide bloat-a-meter results?
>
> And as already mentioned: please don't use "char" for passing a
> key. Besides that this leads to the overflow question as pointed out
> by Sven, this might as usual also raise the question if there might be
> any bugs wrt to sign extension. That is: for anything but characters,
> please always explicitely use signed or unsigned char (or u8/s8), so
> nobody has to think about this.
Will do.
>
> Thanks!
On 18/01/2022 10.52, Janis Schoetterl-Glausch wrote:
> Channel I/O honors storage keys and is performed on absolute memory.
> For I/O emulation user space therefore needs to be able to do key
> checked accesses.
Can't we do the checking in userspace? We already have functions for
handling the storage keys there (see hw/s390x/s390-skeys-kvm.c), so why
can't we do the checking in QEMU?
Thomas
Am 19.01.22 um 12:52 schrieb Thomas Huth:
> On 18/01/2022 10.52, Janis Schoetterl-Glausch wrote:
>> Channel I/O honors storage keys and is performed on absolute memory.
>> For I/O emulation user space therefore needs to be able to do key
>> checked accesses.
>
> Can't we do the checking in userspace? We already have functions for handling the storage keys there (see hw/s390x/s390-skeys-kvm.c), so why can't we do the checking in QEMU?
That would separate the key check from the memory operation. Potentially for a long time.
Wenn we piggy back on access_guest_abs_with_key we use mvcos in the host and thus do the key check in lockstep with the keycheck which is the preferrable solution.
I would also like to avoid reading guest storage keys via the ioctl that was done for migration in the I/O path just to do a single key check. This has peformance concerns.
On 19/01/2022 13.46, Christian Borntraeger wrote:
>
>
> Am 19.01.22 um 12:52 schrieb Thomas Huth:
>> On 18/01/2022 10.52, Janis Schoetterl-Glausch wrote:
>>> Channel I/O honors storage keys and is performed on absolute memory.
>>> For I/O emulation user space therefore needs to be able to do key
>>> checked accesses.
>>
>> Can't we do the checking in userspace? We already have functions for
>> handling the storage keys there (see hw/s390x/s390-skeys-kvm.c), so why
>> can't we do the checking in QEMU?
>
> That would separate the key check from the memory operation. Potentially for
> a long time.
> Wenn we piggy back on access_guest_abs_with_key we use mvcos in the host and
> thus do the key check in lockstep with the keycheck which is the preferrable
> solution.
Ok, makes sense - Janis, could you please add this rationale to the patch
description?
Thomas
On Wed, Jan 19, 2022 at 12:02:34PM +0100, Janis Schoetterl-Glausch wrote:
> > That's a lot of code churn... I would have expected that the existing
> > functions will be renamed, get an additional key parameter, and the
> > current API is implemented by defines which map copy_to_user() &
> > friends to the new functions, and add a zero key.
>
> I don't think I understand you. I can implement raw_copy_from/to_user
> in terms of raw_copy_from/to_user_with_key, which does save a few lines,
> but that's it, isn't it?
Right you are. I only looked at your patch, and forgot about that all
the wrapping is nowadays done in common code.
So what I really don't like about this approach is that we get an arch
specific copy_to_user() implementation back. This means that all those
extra calls like might_fault(), instrument_copy_to_user(), and friends
now have to be kept in sync by us again, if new instrumentation or
security options are added to common code.
Given that this is manual work / monitoring I'm sure this will not
work in the mid or long term, like it has been proven several times in
the past for other features. We need something better, which works
out-of-the-box wrt common code changes / enhancements.
On 1/19/22 13:53, Thomas Huth wrote:
> On 19/01/2022 13.46, Christian Borntraeger wrote:
>>
>>
>> Am 19.01.22 um 12:52 schrieb Thomas Huth:
>>> On 18/01/2022 10.52, Janis Schoetterl-Glausch wrote:
>>>> Channel I/O honors storage keys and is performed on absolute memory.
>>>> For I/O emulation user space therefore needs to be able to do key
>>>> checked accesses.
>>>
>>> Can't we do the checking in userspace? We already have functions for handling the storage keys there (see hw/s390x/s390-skeys-kvm.c), so why can't we do the checking in QEMU?
>>
>> That would separate the key check from the memory operation. Potentially for a long time.
>> Wenn we piggy back on access_guest_abs_with_key we use mvcos in the host and thus do the key check in lockstep with the keycheck which is the preferrable solution.
>
> Ok, makes sense - Janis, could you please add this rationale to the patch description?
Will do.
>
> Thomas
>
Am 18.01.22 um 10:52 schrieb Janis Schoetterl-Glausch:
> Storage key checking had not been implemented for instructions emulated
> by KVM. Implement it by enhancing the functions used for guest access,
> in particular those making use of access_guest which has been renamed
> to access_guest_with_key.
> Accesses via access_guest_real should not be key checked.
>
> For actual accesses, key checking is done by __copy_from/to_user_with_key
> (which internally uses MVCOS/MVCP/MVCS).
> In cases where accessibility is checked without an actual access,
> this is performed by getting the storage key and checking
> if the access key matches.
> In both cases, if applicable, storage and fetch protection override
> are honored.
>
> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
> ---
> arch/s390/include/asm/ctl_reg.h | 2 +
> arch/s390/include/asm/page.h | 2 +
> arch/s390/kvm/gaccess.c | 174 +++++++++++++++++++++++++++++---
> arch/s390/kvm/gaccess.h | 78 ++++++++++++--
> arch/s390/kvm/intercept.c | 12 +--
> arch/s390/kvm/kvm-s390.c | 4 +-
> 6 files changed, 241 insertions(+), 31 deletions(-)
>
> diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
> index 04dc65f8901d..c800199a376b 100644
> --- a/arch/s390/include/asm/ctl_reg.h
> +++ b/arch/s390/include/asm/ctl_reg.h
> @@ -12,6 +12,8 @@
>
> #define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10)
> #define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35)
> +#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38)
> +#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39)
> #define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49)
> #define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50)
> #define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52)
> diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
> index d98d17a36c7b..cfc4d6fb2385 100644
> --- a/arch/s390/include/asm/page.h
> +++ b/arch/s390/include/asm/page.h
> @@ -20,6 +20,8 @@
> #define PAGE_SIZE _PAGE_SIZE
> #define PAGE_MASK _PAGE_MASK
> #define PAGE_DEFAULT_ACC 0
> +/* storage-protection override */
> +#define PAGE_SPO_ACC 9
> #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
>
> #define HPAGE_SHIFT 20
> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> index 4460808c3b9a..92ab96d55504 100644
> --- a/arch/s390/kvm/gaccess.c
> +++ b/arch/s390/kvm/gaccess.c
> @@ -10,6 +10,7 @@
> #include <linux/mm_types.h>
> #include <linux/err.h>
> #include <linux/pgtable.h>
> +#include <linux/bitfield.h>
>
> #include <asm/gmap.h>
> #include "kvm-s390.h"
> @@ -794,6 +795,79 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
> return 1;
> }
>
> +static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
> + union asce asce)
> +{
> + psw_t *psw = &vcpu->arch.sie_block->gpsw;
> + unsigned long override;
> +
> + if (mode == GACC_FETCH || mode == GACC_IFETCH) {
> + /* check if fetch protection override enabled */
> + override = vcpu->arch.sie_block->gcr[0];
> + override &= CR0_FETCH_PROTECTION_OVERRIDE;
> + /* not applicable if subject to DAT && private space */
> + override = override && !(psw_bits(*psw).dat && asce.p);
> + return override;
> + }
> + return false;
> +}
> +
> +static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
> +{
> + return ga < 2048 && ga + len <= 2048;
> +}
> +
> +static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
> +{
> + /* check if storage protection override enabled */
> + return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
> +}
> +
> +static bool storage_prot_override_applies(char access_control)
> +{
> + /* matches special storage protection override key (9) -> allow */
> + return access_control == PAGE_SPO_ACC;
> +}
> +
> +static int vcpu_check_access_key(struct kvm_vcpu *vcpu, char access_key,
> + enum gacc_mode mode, union asce asce, gpa_t gpa,
> + unsigned long ga, unsigned int len)
> +{
> + unsigned char storage_key, access_control;
> + unsigned long hva;
> + int r;
> +
> + /* access key 0 matches any storage key -> allow */
> + if (access_key == 0)
> + return 0;
> + /*
> + * caller needs to ensure that gfn is accessible, so we can
> + * assume that this cannot fail
> + */
> + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
> + mmap_read_lock(current->mm);
> + r = get_guest_storage_key(current->mm, hva, &storage_key);
> + mmap_read_unlock(current->mm);
> + if (r)
> + return r;
> + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
> + /* access key matches storage key -> allow */
> + if (access_control == access_key)
> + return 0;
> + if (mode == GACC_FETCH || mode == GACC_IFETCH) {
> + /* mismatching keys, no fetch protection -> allowed */
> + if (!(storage_key & _PAGE_FP_BIT))
> + return 0;
> + if (fetch_prot_override_applicable(vcpu, mode, asce))
> + if (fetch_prot_override_applies(ga, len))
> + return 0;
> + }
> + if (storage_prot_override_applicable(vcpu))
> + if (storage_prot_override_applies(access_control))
> + return 0;
> + return PGM_PROTECTION;
> +}
This function is just a pre-check (and early-exit) and we do an additional final check
in the MVCOS routing later on, correct? It might actually be faster to get rid of this
pre-test and simply rely on MVCOS. MVCOS is usually just some cycles while ISKE to read
the key is really slow path and take hundreds of cycles. This would even simplify the
patch (assuming that we do proper key checking all the time).
On 1/19/22 20:27, Christian Borntraeger wrote:
> Am 18.01.22 um 10:52 schrieb Janis Schoetterl-Glausch:
>> Storage key checking had not been implemented for instructions emulated
>> by KVM. Implement it by enhancing the functions used for guest access,
>> in particular those making use of access_guest which has been renamed
>> to access_guest_with_key.
>> Accesses via access_guest_real should not be key checked.
>>
>> For actual accesses, key checking is done by __copy_from/to_user_with_key
>> (which internally uses MVCOS/MVCP/MVCS).
>> In cases where accessibility is checked without an actual access,
>> this is performed by getting the storage key and checking
>> if the access key matches.
>> In both cases, if applicable, storage and fetch protection override
>> are honored.
>>
>> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
>> ---
>> arch/s390/include/asm/ctl_reg.h | 2 +
>> arch/s390/include/asm/page.h | 2 +
>> arch/s390/kvm/gaccess.c | 174 +++++++++++++++++++++++++++++---
>> arch/s390/kvm/gaccess.h | 78 ++++++++++++--
>> arch/s390/kvm/intercept.c | 12 +--
>> arch/s390/kvm/kvm-s390.c | 4 +-
>> 6 files changed, 241 insertions(+), 31 deletions(-)
>>
>> diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
>> index 04dc65f8901d..c800199a376b 100644
>> --- a/arch/s390/include/asm/ctl_reg.h
>> +++ b/arch/s390/include/asm/ctl_reg.h
>> @@ -12,6 +12,8 @@
>> #define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10)
>> #define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35)
>> +#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38)
>> +#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39)
>> #define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49)
>> #define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50)
>> #define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52)
>> diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
>> index d98d17a36c7b..cfc4d6fb2385 100644
>> --- a/arch/s390/include/asm/page.h
>> +++ b/arch/s390/include/asm/page.h
>> @@ -20,6 +20,8 @@
>> #define PAGE_SIZE _PAGE_SIZE
>> #define PAGE_MASK _PAGE_MASK
>> #define PAGE_DEFAULT_ACC 0
>> +/* storage-protection override */
>> +#define PAGE_SPO_ACC 9
>> #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
>> #define HPAGE_SHIFT 20
>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
>> index 4460808c3b9a..92ab96d55504 100644
>> --- a/arch/s390/kvm/gaccess.c
>> +++ b/arch/s390/kvm/gaccess.c
>> @@ -10,6 +10,7 @@
>> #include <linux/mm_types.h>
>> #include <linux/err.h>
>> #include <linux/pgtable.h>
>> +#include <linux/bitfield.h>
>> #include <asm/gmap.h>
>> #include "kvm-s390.h"
>> @@ -794,6 +795,79 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
>> return 1;
>> }
>> +static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
>> + union asce asce)
>> +{
>> + psw_t *psw = &vcpu->arch.sie_block->gpsw;
>> + unsigned long override;
>> +
>> + if (mode == GACC_FETCH || mode == GACC_IFETCH) {
>> + /* check if fetch protection override enabled */
>> + override = vcpu->arch.sie_block->gcr[0];
>> + override &= CR0_FETCH_PROTECTION_OVERRIDE;
>> + /* not applicable if subject to DAT && private space */
>> + override = override && !(psw_bits(*psw).dat && asce.p);
>> + return override;
>> + }
>> + return false;
>> +}
>> +
>> +static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
>> +{
>> + return ga < 2048 && ga + len <= 2048;
>> +}
>> +
>> +static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
>> +{
>> + /* check if storage protection override enabled */
>> + return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
>> +}
>> +
>> +static bool storage_prot_override_applies(char access_control)
>> +{
>> + /* matches special storage protection override key (9) -> allow */
>> + return access_control == PAGE_SPO_ACC;
>> +}
>> +
>> +static int vcpu_check_access_key(struct kvm_vcpu *vcpu, char access_key,
>> + enum gacc_mode mode, union asce asce, gpa_t gpa,
>> + unsigned long ga, unsigned int len)
>> +{
>> + unsigned char storage_key, access_control;
>> + unsigned long hva;
>> + int r;
>> +
>> + /* access key 0 matches any storage key -> allow */
>> + if (access_key == 0)
>> + return 0;
>> + /*
>> + * caller needs to ensure that gfn is accessible, so we can
>> + * assume that this cannot fail
>> + */
>> + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
>> + mmap_read_lock(current->mm);
>> + r = get_guest_storage_key(current->mm, hva, &storage_key);
>> + mmap_read_unlock(current->mm);
>> + if (r)
>> + return r;
>> + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
>> + /* access key matches storage key -> allow */
>> + if (access_control == access_key)
>> + return 0;
>> + if (mode == GACC_FETCH || mode == GACC_IFETCH) {
>> + /* mismatching keys, no fetch protection -> allowed */
>> + if (!(storage_key & _PAGE_FP_BIT))
>> + return 0;
>> + if (fetch_prot_override_applicable(vcpu, mode, asce))
>> + if (fetch_prot_override_applies(ga, len))
>> + return 0;
>> + }
>> + if (storage_prot_override_applicable(vcpu))
>> + if (storage_prot_override_applies(access_control))
>> + return 0;
>> + return PGM_PROTECTION;
>> +}
>
> This function is just a pre-check (and early-exit) and we do an additional final check
> in the MVCOS routing later on, correct? It might actually be faster to get rid of this
No, this exists for those cases that do not do an actual access, that is MEMOPs with
the check only flag, as well as the TEST PROTECTION emulation. access_guest_with_key
passes key 0 so we take the early return. It's easy to miss so Janosch suggested a comment there.
> pre-test and simply rely on MVCOS. MVCOS is usually just some cycles while ISKE to read
> the key is really slow path and take hundreds of cycles. This would even simplify the
> patch (assuming that we do proper key checking all the time).
On 1/19/22 14:20, Heiko Carstens wrote:
> On Wed, Jan 19, 2022 at 12:02:34PM +0100, Janis Schoetterl-Glausch wrote:
>>> That's a lot of code churn... I would have expected that the existing
>>> functions will be renamed, get an additional key parameter, and the
>>> current API is implemented by defines which map copy_to_user() &
>>> friends to the new functions, and add a zero key.
>>
>> I don't think I understand you. I can implement raw_copy_from/to_user
>> in terms of raw_copy_from/to_user_with_key, which does save a few lines,
>> but that's it, isn't it?
>
> Right you are. I only looked at your patch, and forgot about that all
> the wrapping is nowadays done in common code.
>
> So what I really don't like about this approach is that we get an arch
> specific copy_to_user() implementation back. This means that all those
> extra calls like might_fault(), instrument_copy_to_user(), and friends
> now have to be kept in sync by us again, if new instrumentation or
> security options are added to common code.
>
> Given that this is manual work / monitoring I'm sure this will not
> work in the mid or long term, like it has been proven several times in
> the past for other features. We need something better, which works
> out-of-the-box wrt common code changes / enhancements.
What are our options?
1. Tooling
1.1 Automatic monitoring
1.2 ?
2. Implementation changes
2.1 Modify common code
2.2 Don't modify common code, pass key argument via well known location
2.3 ?
Neither of 2.1 and 2.2 seem great.
How might 1.1 work? A build error if they are out of sync?
Am 20.01.22 um 09:11 schrieb Janis Schoetterl-Glausch:
> On 1/19/22 20:27, Christian Borntraeger wrote:
>> Am 18.01.22 um 10:52 schrieb Janis Schoetterl-Glausch:
>>> Storage key checking had not been implemented for instructions emulated
>>> by KVM. Implement it by enhancing the functions used for guest access,
>>> in particular those making use of access_guest which has been renamed
>>> to access_guest_with_key.
>>> Accesses via access_guest_real should not be key checked.
>>>
>>> For actual accesses, key checking is done by __copy_from/to_user_with_key
>>> (which internally uses MVCOS/MVCP/MVCS).
>>> In cases where accessibility is checked without an actual access,
>>> this is performed by getting the storage key and checking
>>> if the access key matches.
>>> In both cases, if applicable, storage and fetch protection override
>>> are honored.
>>>
>>> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
>>> ---
>>> arch/s390/include/asm/ctl_reg.h | 2 +
>>> arch/s390/include/asm/page.h | 2 +
>>> arch/s390/kvm/gaccess.c | 174 +++++++++++++++++++++++++++++---
>>> arch/s390/kvm/gaccess.h | 78 ++++++++++++--
>>> arch/s390/kvm/intercept.c | 12 +--
>>> arch/s390/kvm/kvm-s390.c | 4 +-
>>> 6 files changed, 241 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
>>> index 04dc65f8901d..c800199a376b 100644
>>> --- a/arch/s390/include/asm/ctl_reg.h
>>> +++ b/arch/s390/include/asm/ctl_reg.h
>>> @@ -12,6 +12,8 @@
>>> #define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10)
>>> #define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35)
>>> +#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38)
>>> +#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39)
>>> #define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49)
>>> #define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50)
>>> #define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52)
>>> diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
>>> index d98d17a36c7b..cfc4d6fb2385 100644
>>> --- a/arch/s390/include/asm/page.h
>>> +++ b/arch/s390/include/asm/page.h
>>> @@ -20,6 +20,8 @@
>>> #define PAGE_SIZE _PAGE_SIZE
>>> #define PAGE_MASK _PAGE_MASK
>>> #define PAGE_DEFAULT_ACC 0
>>> +/* storage-protection override */
>>> +#define PAGE_SPO_ACC 9
>>> #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
>>> #define HPAGE_SHIFT 20
>>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
>>> index 4460808c3b9a..92ab96d55504 100644
>>> --- a/arch/s390/kvm/gaccess.c
>>> +++ b/arch/s390/kvm/gaccess.c
>>> @@ -10,6 +10,7 @@
>>> #include <linux/mm_types.h>
>>> #include <linux/err.h>
>>> #include <linux/pgtable.h>
>>> +#include <linux/bitfield.h>
>>> #include <asm/gmap.h>
>>> #include "kvm-s390.h"
>>> @@ -794,6 +795,79 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
>>> return 1;
>>> }
>>> +static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
>>> + union asce asce)
>>> +{
>>> + psw_t *psw = &vcpu->arch.sie_block->gpsw;
>>> + unsigned long override;
>>> +
>>> + if (mode == GACC_FETCH || mode == GACC_IFETCH) {
>>> + /* check if fetch protection override enabled */
>>> + override = vcpu->arch.sie_block->gcr[0];
>>> + override &= CR0_FETCH_PROTECTION_OVERRIDE;
>>> + /* not applicable if subject to DAT && private space */
>>> + override = override && !(psw_bits(*psw).dat && asce.p);
>>> + return override;
>>> + }
>>> + return false;
>>> +}
>>> +
>>> +static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
>>> +{
>>> + return ga < 2048 && ga + len <= 2048;
>>> +}
>>> +
>>> +static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
>>> +{
>>> + /* check if storage protection override enabled */
>>> + return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
>>> +}
>>> +
>>> +static bool storage_prot_override_applies(char access_control)
>>> +{
>>> + /* matches special storage protection override key (9) -> allow */
>>> + return access_control == PAGE_SPO_ACC;
>>> +}
>>> +
>>> +static int vcpu_check_access_key(struct kvm_vcpu *vcpu, char access_key,
>>> + enum gacc_mode mode, union asce asce, gpa_t gpa,
>>> + unsigned long ga, unsigned int len)
>>> +{
>>> + unsigned char storage_key, access_control;
>>> + unsigned long hva;
>>> + int r;
>>> +
>>> + /* access key 0 matches any storage key -> allow */
>>> + if (access_key == 0)
>>> + return 0;
>>> + /*
>>> + * caller needs to ensure that gfn is accessible, so we can
>>> + * assume that this cannot fail
>>> + */
>>> + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
>>> + mmap_read_lock(current->mm);
>>> + r = get_guest_storage_key(current->mm, hva, &storage_key);
>>> + mmap_read_unlock(current->mm);
>>> + if (r)
>>> + return r;
>>> + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
>>> + /* access key matches storage key -> allow */
>>> + if (access_control == access_key)
>>> + return 0;
>>> + if (mode == GACC_FETCH || mode == GACC_IFETCH) {
>>> + /* mismatching keys, no fetch protection -> allowed */
>>> + if (!(storage_key & _PAGE_FP_BIT))
>>> + return 0;
>>> + if (fetch_prot_override_applicable(vcpu, mode, asce))
>>> + if (fetch_prot_override_applies(ga, len))
>>> + return 0;
>>> + }
>>> + if (storage_prot_override_applicable(vcpu))
>>> + if (storage_prot_override_applies(access_control))
>>> + return 0;
>>> + return PGM_PROTECTION;
>>> +}
>>
>> This function is just a pre-check (and early-exit) and we do an additional final check
>> in the MVCOS routing later on, correct? It might actually be faster to get rid of this
>
> No, this exists for those cases that do not do an actual access, that is MEMOPs with
> the check only flag, as well as the TEST PROTECTION emulation. access_guest_with_key
> passes key 0 so we take the early return. It's easy to miss so Janosch suggested a comment there.
Dont we always call it in guest_range_to_gpas, which is also called for the memory access in
access_guest_with_key?
>
>> pre-test and simply rely on MVCOS. MVCOS is usually just some cycles while ISKE to read
>> the key is really slow path and take hundreds of cycles. This would even simplify the
>> patch (assuming that we do proper key checking all the time).
>
Am 20.01.22 um 09:58 schrieb Janis Schoetterl-Glausch:
> On 1/20/22 09:50, Christian Borntraeger wrote:
>>
>>
>> Am 20.01.22 um 09:11 schrieb Janis Schoetterl-Glausch:
>>> On 1/19/22 20:27, Christian Borntraeger wrote:
>>>> Am 18.01.22 um 10:52 schrieb Janis Schoetterl-Glausch:
>>>>> Storage key checking had not been implemented for instructions emulated
>>>>> by KVM. Implement it by enhancing the functions used for guest access,
>>>>> in particular those making use of access_guest which has been renamed
>>>>> to access_guest_with_key.
>>>>> Accesses via access_guest_real should not be key checked.
>>>>>
>>>>> For actual accesses, key checking is done by __copy_from/to_user_with_key
>>>>> (which internally uses MVCOS/MVCP/MVCS).
>>>>> In cases where accessibility is checked without an actual access,
>>>>> this is performed by getting the storage key and checking
>>>>> if the access key matches.
>>>>> In both cases, if applicable, storage and fetch protection override
>>>>> are honored.
>>>>>
>>>>> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
>>>>> ---
>>>>> arch/s390/include/asm/ctl_reg.h | 2 +
>>>>> arch/s390/include/asm/page.h | 2 +
>>>>> arch/s390/kvm/gaccess.c | 174 +++++++++++++++++++++++++++++---
>>>>> arch/s390/kvm/gaccess.h | 78 ++++++++++++--
>>>>> arch/s390/kvm/intercept.c | 12 +--
>>>>> arch/s390/kvm/kvm-s390.c | 4 +-
>>>>> 6 files changed, 241 insertions(+), 31 deletions(-)
>>>>>
>>>>> diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
>>>>> index 04dc65f8901d..c800199a376b 100644
>>>>> --- a/arch/s390/include/asm/ctl_reg.h
>>>>> +++ b/arch/s390/include/asm/ctl_reg.h
>>>>> @@ -12,6 +12,8 @@
>>>>> #define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10)
>>>>> #define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35)
>>>>> +#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38)
>>>>> +#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39)
>>>>> #define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49)
>>>>> #define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50)
>>>>> #define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52)
>>>>> diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
>>>>> index d98d17a36c7b..cfc4d6fb2385 100644
>>>>> --- a/arch/s390/include/asm/page.h
>>>>> +++ b/arch/s390/include/asm/page.h
>>>>> @@ -20,6 +20,8 @@
>>>>> #define PAGE_SIZE _PAGE_SIZE
>>>>> #define PAGE_MASK _PAGE_MASK
>>>>> #define PAGE_DEFAULT_ACC 0
>>>>> +/* storage-protection override */
>>>>> +#define PAGE_SPO_ACC 9
>>>>> #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
>>>>> #define HPAGE_SHIFT 20
>>>>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
>>>>> index 4460808c3b9a..92ab96d55504 100644
>>>>> --- a/arch/s390/kvm/gaccess.c
>>>>> +++ b/arch/s390/kvm/gaccess.c
>>>>> @@ -10,6 +10,7 @@
>>>>> #include <linux/mm_types.h>
>>>>> #include <linux/err.h>
>>>>> #include <linux/pgtable.h>
>>>>> +#include <linux/bitfield.h>
>>>>> #include <asm/gmap.h>
>>>>> #include "kvm-s390.h"
>>>>> @@ -794,6 +795,79 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
>>>>> return 1;
>>>>> }
>>>>> +static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
>>>>> + union asce asce)
>>>>> +{
>>>>> + psw_t *psw = &vcpu->arch.sie_block->gpsw;
>>>>> + unsigned long override;
>>>>> +
>>>>> + if (mode == GACC_FETCH || mode == GACC_IFETCH) {
>>>>> + /* check if fetch protection override enabled */
>>>>> + override = vcpu->arch.sie_block->gcr[0];
>>>>> + override &= CR0_FETCH_PROTECTION_OVERRIDE;
>>>>> + /* not applicable if subject to DAT && private space */
>>>>> + override = override && !(psw_bits(*psw).dat && asce.p);
>>>>> + return override;
>>>>> + }
>>>>> + return false;
>>>>> +}
>>>>> +
>>>>> +static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
>>>>> +{
>>>>> + return ga < 2048 && ga + len <= 2048;
>>>>> +}
>>>>> +
>>>>> +static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
>>>>> +{
>>>>> + /* check if storage protection override enabled */
>>>>> + return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
>>>>> +}
>>>>> +
>>>>> +static bool storage_prot_override_applies(char access_control)
>>>>> +{
>>>>> + /* matches special storage protection override key (9) -> allow */
>>>>> + return access_control == PAGE_SPO_ACC;
>>>>> +}
>>>>> +
>>>>> +static int vcpu_check_access_key(struct kvm_vcpu *vcpu, char access_key,
>>>>> + enum gacc_mode mode, union asce asce, gpa_t gpa,
>>>>> + unsigned long ga, unsigned int len)
>>>>> +{
>>>>> + unsigned char storage_key, access_control;
>>>>> + unsigned long hva;
>>>>> + int r;
>>>>> +
>>>>> + /* access key 0 matches any storage key -> allow */
>>>>> + if (access_key == 0)
>>>>> + return 0;
>>>>> + /*
>>>>> + * caller needs to ensure that gfn is accessible, so we can
>>>>> + * assume that this cannot fail
>>>>> + */
>>>>> + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
>>>>> + mmap_read_lock(current->mm);
>>>>> + r = get_guest_storage_key(current->mm, hva, &storage_key);
>>>>> + mmap_read_unlock(current->mm);
>>>>> + if (r)
>>>>> + return r;
>>>>> + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
>>>>> + /* access key matches storage key -> allow */
>>>>> + if (access_control == access_key)
>>>>> + return 0;
>>>>> + if (mode == GACC_FETCH || mode == GACC_IFETCH) {
>>>>> + /* mismatching keys, no fetch protection -> allowed */
>>>>> + if (!(storage_key & _PAGE_FP_BIT))
>>>>> + return 0;
>>>>> + if (fetch_prot_override_applicable(vcpu, mode, asce))
>>>>> + if (fetch_prot_override_applies(ga, len))
>>>>> + return 0;
>>>>> + }
>>>>> + if (storage_prot_override_applicable(vcpu))
>>>>> + if (storage_prot_override_applies(access_control))
>>>>> + return 0;
>>>>> + return PGM_PROTECTION;
>>>>> +}
>>>>
>>>> This function is just a pre-check (and early-exit) and we do an additional final check
>>>> in the MVCOS routing later on, correct? It might actually be faster to get rid of this
>>>
>>> No, this exists for those cases that do not do an actual access, that is MEMOPs with
>>> the check only flag, as well as the TEST PROTECTION emulation. access_guest_with_key
>>> passes key 0 so we take the early return. It's easy to miss so Janosch suggested a comment there.
>>
>> Dont we always call it in guest_range_to_gpas, which is also called for the memory access in
>> access_guest_with_key?
> @@ -904,16 +1018,37 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
> gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
> if (!gpas)
> return -ENOMEM;
> + try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
> + try_storage_prot_override = storage_prot_override_applicable(vcpu);
> need_ipte_lock = psw_bits(*psw).dat && !asce.r;
> if (need_ipte_lock)
> ipte_lock(vcpu);
> - rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode);
> - for (idx = 0; idx < nr_pages && !rc; idx++) {
> + rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
>
> Yes, but the key is 0 in that case, so we don't do any key checking. ^
So yes, we need a comment :-)
On 1/20/22 09:50, Christian Borntraeger wrote:
>
>
> Am 20.01.22 um 09:11 schrieb Janis Schoetterl-Glausch:
>> On 1/19/22 20:27, Christian Borntraeger wrote:
>>> Am 18.01.22 um 10:52 schrieb Janis Schoetterl-Glausch:
>>>> Storage key checking had not been implemented for instructions emulated
>>>> by KVM. Implement it by enhancing the functions used for guest access,
>>>> in particular those making use of access_guest which has been renamed
>>>> to access_guest_with_key.
>>>> Accesses via access_guest_real should not be key checked.
>>>>
>>>> For actual accesses, key checking is done by __copy_from/to_user_with_key
>>>> (which internally uses MVCOS/MVCP/MVCS).
>>>> In cases where accessibility is checked without an actual access,
>>>> this is performed by getting the storage key and checking
>>>> if the access key matches.
>>>> In both cases, if applicable, storage and fetch protection override
>>>> are honored.
>>>>
>>>> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
>>>> ---
>>>> arch/s390/include/asm/ctl_reg.h | 2 +
>>>> arch/s390/include/asm/page.h | 2 +
>>>> arch/s390/kvm/gaccess.c | 174 +++++++++++++++++++++++++++++---
>>>> arch/s390/kvm/gaccess.h | 78 ++++++++++++--
>>>> arch/s390/kvm/intercept.c | 12 +--
>>>> arch/s390/kvm/kvm-s390.c | 4 +-
>>>> 6 files changed, 241 insertions(+), 31 deletions(-)
>>>>
>>>> diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
>>>> index 04dc65f8901d..c800199a376b 100644
>>>> --- a/arch/s390/include/asm/ctl_reg.h
>>>> +++ b/arch/s390/include/asm/ctl_reg.h
>>>> @@ -12,6 +12,8 @@
>>>> #define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10)
>>>> #define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35)
>>>> +#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38)
>>>> +#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39)
>>>> #define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49)
>>>> #define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50)
>>>> #define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52)
>>>> diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
>>>> index d98d17a36c7b..cfc4d6fb2385 100644
>>>> --- a/arch/s390/include/asm/page.h
>>>> +++ b/arch/s390/include/asm/page.h
>>>> @@ -20,6 +20,8 @@
>>>> #define PAGE_SIZE _PAGE_SIZE
>>>> #define PAGE_MASK _PAGE_MASK
>>>> #define PAGE_DEFAULT_ACC 0
>>>> +/* storage-protection override */
>>>> +#define PAGE_SPO_ACC 9
>>>> #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
>>>> #define HPAGE_SHIFT 20
>>>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
>>>> index 4460808c3b9a..92ab96d55504 100644
>>>> --- a/arch/s390/kvm/gaccess.c
>>>> +++ b/arch/s390/kvm/gaccess.c
>>>> @@ -10,6 +10,7 @@
>>>> #include <linux/mm_types.h>
>>>> #include <linux/err.h>
>>>> #include <linux/pgtable.h>
>>>> +#include <linux/bitfield.h>
>>>> #include <asm/gmap.h>
>>>> #include "kvm-s390.h"
>>>> @@ -794,6 +795,79 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
>>>> return 1;
>>>> }
>>>> +static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
>>>> + union asce asce)
>>>> +{
>>>> + psw_t *psw = &vcpu->arch.sie_block->gpsw;
>>>> + unsigned long override;
>>>> +
>>>> + if (mode == GACC_FETCH || mode == GACC_IFETCH) {
>>>> + /* check if fetch protection override enabled */
>>>> + override = vcpu->arch.sie_block->gcr[0];
>>>> + override &= CR0_FETCH_PROTECTION_OVERRIDE;
>>>> + /* not applicable if subject to DAT && private space */
>>>> + override = override && !(psw_bits(*psw).dat && asce.p);
>>>> + return override;
>>>> + }
>>>> + return false;
>>>> +}
>>>> +
>>>> +static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
>>>> +{
>>>> + return ga < 2048 && ga + len <= 2048;
>>>> +}
>>>> +
>>>> +static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
>>>> +{
>>>> + /* check if storage protection override enabled */
>>>> + return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
>>>> +}
>>>> +
>>>> +static bool storage_prot_override_applies(char access_control)
>>>> +{
>>>> + /* matches special storage protection override key (9) -> allow */
>>>> + return access_control == PAGE_SPO_ACC;
>>>> +}
>>>> +
>>>> +static int vcpu_check_access_key(struct kvm_vcpu *vcpu, char access_key,
>>>> + enum gacc_mode mode, union asce asce, gpa_t gpa,
>>>> + unsigned long ga, unsigned int len)
>>>> +{
>>>> + unsigned char storage_key, access_control;
>>>> + unsigned long hva;
>>>> + int r;
>>>> +
>>>> + /* access key 0 matches any storage key -> allow */
>>>> + if (access_key == 0)
>>>> + return 0;
>>>> + /*
>>>> + * caller needs to ensure that gfn is accessible, so we can
>>>> + * assume that this cannot fail
>>>> + */
>>>> + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
>>>> + mmap_read_lock(current->mm);
>>>> + r = get_guest_storage_key(current->mm, hva, &storage_key);
>>>> + mmap_read_unlock(current->mm);
>>>> + if (r)
>>>> + return r;
>>>> + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
>>>> + /* access key matches storage key -> allow */
>>>> + if (access_control == access_key)
>>>> + return 0;
>>>> + if (mode == GACC_FETCH || mode == GACC_IFETCH) {
>>>> + /* mismatching keys, no fetch protection -> allowed */
>>>> + if (!(storage_key & _PAGE_FP_BIT))
>>>> + return 0;
>>>> + if (fetch_prot_override_applicable(vcpu, mode, asce))
>>>> + if (fetch_prot_override_applies(ga, len))
>>>> + return 0;
>>>> + }
>>>> + if (storage_prot_override_applicable(vcpu))
>>>> + if (storage_prot_override_applies(access_control))
>>>> + return 0;
>>>> + return PGM_PROTECTION;
>>>> +}
>>>
>>> This function is just a pre-check (and early-exit) and we do an additional final check
>>> in the MVCOS routing later on, correct? It might actually be faster to get rid of this
>>
>> No, this exists for those cases that do not do an actual access, that is MEMOPs with
>> the check only flag, as well as the TEST PROTECTION emulation. access_guest_with_key
>> passes key 0 so we take the early return. It's easy to miss so Janosch suggested a comment there.
>
> Dont we always call it in guest_range_to_gpas, which is also called for the memory access in
> access_guest_with_key?
@@ -904,16 +1018,37 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
if (!gpas)
return -ENOMEM;
+ try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
+ try_storage_prot_override = storage_prot_override_applicable(vcpu);
need_ipte_lock = psw_bits(*psw).dat && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
- rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode);
- for (idx = 0; idx < nr_pages && !rc; idx++) {
+ rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
Yes, but the key is 0 in that case, so we don't do any key checking. ^
>
>>
>>> pre-test and simply rely on MVCOS. MVCOS is usually just some cycles while ISKE to read
>>> the key is really slow path and take hundreds of cycles. This would even simplify the
>>> patch (assuming that we do proper key checking all the time).
>>
Am 18.01.22 um 15:38 schrieb Janosch Frank:
[...]
> /*
> We'll do an actual access via the mv instruction which will return access errors to us so we don't need to check here.
> */
Be slightly more verbose I guess. Something like
We'll do an actual access via the mv instruction which will return access errors to us so we don't need to check here.
By using key 0 all checks are skipped and no performance overhead occurs.
?
>> + rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
On 1/20/22 11:27, Christian Borntraeger wrote:
>
>
> Am 18.01.22 um 15:38 schrieb Janosch Frank:
> [...]
>> /*
>> We'll do an actual access via the mv instruction which will return access errors to us so we don't need to check here.
>> */
>
> Be slightly more verbose I guess. Something like
> We'll do an actual access via the mv instruction which will return access errors to us so we don't need to check here.
> By using key 0 all checks are skipped and no performance overhead occurs.
>
> ?
Yes, I'll also mention that we implement storage protection override by retrying.
>
>>> + rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
On 18/01/2022 10.52, Janis Schoetterl-Glausch wrote:
> Channel I/O honors storage keys and is performed on absolute memory.
> For I/O emulation user space therefore needs to be able to do key
> checked accesses.
> The vm IOCTL supports read/write accesses, as well as checking
> if an access would succeed.
...
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index e3f450b2f346..dd04170287fd 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -572,6 +572,8 @@ struct kvm_s390_mem_op {
> #define KVM_S390_MEMOP_LOGICAL_WRITE 1
> #define KVM_S390_MEMOP_SIDA_READ 2
> #define KVM_S390_MEMOP_SIDA_WRITE 3
> +#define KVM_S390_MEMOP_ABSOLUTE_READ 4
> +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
Not quite sure about this - maybe it is, but at least I'd like to see this
discussed: Do we really want to re-use the same ioctl layout for both, the
VM and the VCPU file handles? Where the userspace developer has to know that
the *_ABSOLUTE_* ops only work with VM handles, and the others only work
with the VCPU handles? A CPU can also address absolute memory, so why not
adding the *_ABSOLUTE_* ops there, too? And if we'd do that, wouldn't it be
sufficient to have the VCPU ioctls only - or do you want to call these
ioctls from spots in QEMU where you do not have a VCPU handle available?
(I/O instructions are triggered from a CPU, so I'd assume that you should
have a VCPU handle around?)
Thomas
Am 20.01.22 um 11:38 schrieb Thomas Huth:
> On 18/01/2022 10.52, Janis Schoetterl-Glausch wrote:
>> Channel I/O honors storage keys and is performed on absolute memory.
>> For I/O emulation user space therefore needs to be able to do key
>> checked accesses.
>> The vm IOCTL supports read/write accesses, as well as checking
>> if an access would succeed.
> ...
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index e3f450b2f346..dd04170287fd 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -572,6 +572,8 @@ struct kvm_s390_mem_op {
>> #define KVM_S390_MEMOP_LOGICAL_WRITE 1
>> #define KVM_S390_MEMOP_SIDA_READ 2
>> #define KVM_S390_MEMOP_SIDA_WRITE 3
>> +#define KVM_S390_MEMOP_ABSOLUTE_READ 4
>> +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
>
> Not quite sure about this - maybe it is, but at least I'd like to see this discussed: Do we really want to re-use the same ioctl layout for both, the VM and the VCPU file handles? Where the userspace developer has to know that the *_ABSOLUTE_* ops only work with VM handles, and the others only work with the VCPU handles? A CPU can also address absolute memory, so why not adding the *_ABSOLUTE_* ops there, too? And if we'd do that, wouldn't it be sufficient to have the VCPU ioctls only - or do you want to call these ioctls from spots in QEMU where you do not have a VCPU handle available? (I/O instructions are triggered from a CPU, so I'd assume that you should have a VCPU handle around?)
I paritally agree. the ABSOLUTE calls should also work from a VCPU handle. But you also want to be able to call this from a different thread than the vpcu threads as you do not want to kick a CPU out of the kernel just for that. So it makes sense to have this ioctl also for the VM fd.
>
> Thomas
>
>
On 1/20/22 11:38, Thomas Huth wrote:
> On 18/01/2022 10.52, Janis Schoetterl-Glausch wrote:
>> Channel I/O honors storage keys and is performed on absolute memory.
>> For I/O emulation user space therefore needs to be able to do key
>> checked accesses.
>> The vm IOCTL supports read/write accesses, as well as checking
>> if an access would succeed.
> ...
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index e3f450b2f346..dd04170287fd 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -572,6 +572,8 @@ struct kvm_s390_mem_op {
>> #define KVM_S390_MEMOP_LOGICAL_WRITE 1
>> #define KVM_S390_MEMOP_SIDA_READ 2
>> #define KVM_S390_MEMOP_SIDA_WRITE 3
>> +#define KVM_S390_MEMOP_ABSOLUTE_READ 4
>> +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
>
> Not quite sure about this - maybe it is, but at least I'd like to see this discussed: Do we really want to re-use the same ioctl layout for both, the VM and the VCPU file handles? Where the userspace developer has to know that the *_ABSOLUTE_* ops only work with VM handles, and the others only work with the VCPU handles? A CPU can also address absolute memory, so why not adding the *_ABSOLUTE_* ops there, too? And if we'd do that, wouldn't it be sufficient to have the VCPU ioctls only - or do you want to call these ioctls from spots in QEMU where you do not have a VCPU handle available? (I/O instructions are triggered from a CPU, so I'd assume that you should have a VCPU handle around?)
There are some differences between the vm and the vcpu memops.
No storage or fetch protection overrides apply to IO/vm memops, after all there is no control register to enable them.
Additionally, quiescing is not required for IO, tho in practice we use the same code path for the vcpu and the vm here.
Allowing absolute accesses with a vcpu is doable, but I'm not sure what the use case for it would be, I'm not aware of
a precedence in the architecture. Of course the vcpu memop already supports logical=real accesses.
>
> Thomas
>
>
On Thu, Jan 20, 2022 at 09:34:05AM +0100, Janis Schoetterl-Glausch wrote:
> On 1/19/22 14:20, Heiko Carstens wrote:
> > On Wed, Jan 19, 2022 at 12:02:34PM +0100, Janis Schoetterl-Glausch wrote:
> >>> That's a lot of code churn... I would have expected that the existing
> >>> functions will be renamed, get an additional key parameter, and the
> >>> current API is implemented by defines which map copy_to_user() &
> >>> friends to the new functions, and add a zero key.
> >>
> >> I don't think I understand you. I can implement raw_copy_from/to_user
> >> in terms of raw_copy_from/to_user_with_key, which does save a few lines,
> >> but that's it, isn't it?
> >
> > Right you are. I only looked at your patch, and forgot about that all
> > the wrapping is nowadays done in common code.
> >
> > So what I really don't like about this approach is that we get an arch
> > specific copy_to_user() implementation back. This means that all those
> > extra calls like might_fault(), instrument_copy_to_user(), and friends
> > now have to be kept in sync by us again, if new instrumentation or
> > security options are added to common code.
> >
> > Given that this is manual work / monitoring I'm sure this will not
> > work in the mid or long term, like it has been proven several times in
> > the past for other features. We need something better, which works
> > out-of-the-box wrt common code changes / enhancements.
>
> What are our options?
>
> 1. Tooling
> 1.1 Automatic monitoring
> 1.2 ?
No tooling please.
> 2. Implementation changes
> 2.1 Modify common code
In general such changes are done in way that common code is or _may_ be
modified to fulfill our needs. Common code header file explicitely states
that architectures should get rid of private instances of
copy_{to,from}_user() and __copy_{to,from}_user{,_inatomic}().
So we should not add anything like that to arch code again, since nobody
would expect that.
> 2.2 Don't modify common code, pass key argument via well known location
This might also break, and adds complex code and dependencies which should
be avoided.
But maybe you find something else which is nice and easily maintainable.
On 1/18/22 10:52, Janis Schoetterl-Glausch wrote:
> Test the emulation of TEST PROTECTION in the presence of storage keys.
> Emulation only occurs under certain conditions, one of which is the host
> page being protected.
> Trigger this by protecting the test pages via mprotect.
>
> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
> ---
> tools/testing/selftests/kvm/.gitignore | 1 +
> tools/testing/selftests/kvm/Makefile | 1 +
> tools/testing/selftests/kvm/s390x/tprot.c | 184 ++++++++++++++++++++++
> 3 files changed, 186 insertions(+)
> create mode 100644 tools/testing/selftests/kvm/s390x/tprot.c
>
> diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
> index 3763105029fb..82c0470b6849 100644
> --- a/tools/testing/selftests/kvm/.gitignore
> +++ b/tools/testing/selftests/kvm/.gitignore
> @@ -7,6 +7,7 @@
> /s390x/memop
> /s390x/resets
> /s390x/sync_regs_test
> +/s390x/tprot
> /x86_64/cr4_cpuid_sync_test
> /x86_64/debug_regs
> /x86_64/evmcs_test
> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> index c4e34717826a..df6de8d155e8 100644
> --- a/tools/testing/selftests/kvm/Makefile
> +++ b/tools/testing/selftests/kvm/Makefile
> @@ -109,6 +109,7 @@ TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
> TEST_GEN_PROGS_s390x = s390x/memop
> TEST_GEN_PROGS_s390x += s390x/resets
> TEST_GEN_PROGS_s390x += s390x/sync_regs_test
> +TEST_GEN_PROGS_s390x += s390x/tprot
> TEST_GEN_PROGS_s390x += demand_paging_test
> TEST_GEN_PROGS_s390x += dirty_log_test
> TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
> diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c
> new file mode 100644
> index 000000000000..8b52675307f6
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/s390x/tprot.c
> @@ -0,0 +1,184 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Test TEST PROTECTION emulation.
> + * In order for emulation occur the target page has to be DAT protected in the
> + * host mappings. Since the page tables are shared, we can use mprotect
> + * to achieve this.
> + *
> + * Copyright IBM Corp. 2021
> + */
> +
> +#include <sys/mman.h>
> +#include "test_util.h"
> +#include "kvm_util.h"
> +
> +#define PAGE_SHIFT 12
> +#define PAGE_SIZE (1 << PAGE_SHIFT)
> +#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
> +#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
> +
> +#define VCPU_ID 1
> +
> +static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE];
> +static uint8_t *const page_store_prot = pages[0];
> +static uint8_t *const page_fetch_prot = pages[1];
> +
> +static int set_storage_key(void *addr, uint8_t key)
> +{
> + int not_mapped = 0;
> +
Maybe add a short comment:
Check if address is mapped via lra and set the storage key if it is.
> + asm volatile (
> + "lra %[addr], 0(0,%[addr])\n"
> + " jz 0f\n"
> + " llill %[not_mapped],1\n"
> + " j 1f\n"
> + "0: sske %[key], %[addr]\n"
> + "1:"
> + : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
Shouldn't this be a "=r" instead of a "+r" for not_mapped?
> + : [key] "r" (key)
> + : "cc"
> + );
> + return -not_mapped;
> +}
> +
> +enum permission {
> + READ_WRITE = 0,
> + READ = 1,
> + NONE = 2,
> + UNAVAILABLE = 3,
TRANSLATION_NA ?
I'm not completely happy with these names but I've yet to come up with a
better naming scheme here.
> +};
> +
> +static enum permission test_protection(void *addr, uint8_t key)
> +{
> + uint64_t mask;
> +
> + asm volatile (
> + "tprot %[addr], 0(%[key])\n"
> + " ipm %[mask]\n"
> + : [mask] "=r" (mask)
> + : [addr] "Q" (*(char *)addr),
> + [key] "a" (key)
> + : "cc"
> + );
> +
> + return (enum permission)mask >> 28;
You could replace the shift with the "srl" that we normally do.
> +}
> +
> +enum stage {
> + STAGE_END,
> + STAGE_INIT_SIMPLE,
> + TEST_SIMPLE,
> + STAGE_INIT_FETCH_PROT_OVERRIDE,
> + TEST_FETCH_PROT_OVERRIDE,
> + TEST_STORAGE_PROT_OVERRIDE,
> +};
> +
> +struct test {
> + enum stage stage;
> + void *addr;
> + uint8_t key;
> + enum permission expected;
> +} tests[] = {
> + /* Those which result in NONE/UNAVAILABLE will be interpreted by SIE,
> + * not KVM, but there is no harm in testing them also.
> + * See Enhanced Suppression-on-Protection Facilities in the
> + * Interpretive-Execution Mode
> + */
Outside of net/ we put the first line on "*" not on "/*"
s/Those which result in/Tests resulting in/ ?
> + { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE },
> + { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE },
> + { TEST_SIMPLE, page_store_prot, 0x20, READ },
> + { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE },
> + { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE },
> + { TEST_SIMPLE, page_fetch_prot, 0x10, NONE },
> + { TEST_SIMPLE, (void *)0x00, 0x10, UNAVAILABLE },
> + /* Fetch-protection override */
> + { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ },
> + { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, NONE },
> + /* Storage-protection override */
> + { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE },
> + { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ },
> + { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE },
> + /* End marker */
> + { STAGE_END, 0, 0, 0 },
> +};
> +
> +static enum stage perform_next_stage(int *i, bool mapped_0)
> +{
> + enum stage stage = tests[*i].stage;
> + enum permission result;
> + bool skip;
> +
> + for (; tests[*i].stage == stage; (*i)++) {
> + skip = tests[*i].addr < (void *)4096 &&
> + !mapped_0 &&
> + tests[*i].expected != UNAVAILABLE;
Time for a comment?
> + if (!skip) {
> + result = test_protection(tests[*i].addr, tests[*i].key);
> + GUEST_ASSERT_2(result == tests[*i].expected, *i, result);
> + }
> + }
> + return stage;
> +}
> +
> +static void guest_code(void)
> +{
> + bool mapped_0;
> + int i = 0;
> +
It's __really__ hard to understand this since the state is changed both
by the guest and host. Please add comments to this and maybe also add
some to the test struct explaining why you expect the results for each test.
> + GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0);
> + GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0);
> + GUEST_SYNC(STAGE_INIT_SIMPLE);
> + GUEST_SYNC(perform_next_stage(&i, false));
> +
> + /* Fetch-protection override */
> + mapped_0 = !set_storage_key((void *)0, 0x98);
> + GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE);
> + GUEST_SYNC(perform_next_stage(&i, mapped_0));
> +
> + /* Storage-protection override */
> + GUEST_SYNC(perform_next_stage(&i, mapped_0));
> +}
> +
> +#define HOST_SYNC(vmp, stage) \
> +({ \
> + struct kvm_vm *__vm = (vmp); \
> + struct ucall uc; \
> + int __stage = (stage); \
> + \
> + vcpu_run(__vm, VCPU_ID); \
> + get_ucall(__vm, VCPU_ID, &uc); \
> + if (uc.cmd == UCALL_ABORT) { \
> + TEST_FAIL("line %lu: %s, hints: %lu, %lu", uc.args[1], \
> + (const char *)uc.args[0], uc.args[2], uc.args[3]); \
> + } \
> + ASSERT_EQ(uc.cmd, UCALL_SYNC); \
> + ASSERT_EQ(uc.args[1], __stage); \
> +})
> +
> +int main(int argc, char *argv[])
> +{
> + struct kvm_vm *vm;
> + struct kvm_run *run;
> + vm_vaddr_t guest_0_page;
> +
> + vm = vm_create_default(VCPU_ID, 0, guest_code);
> + run = vcpu_state(vm, VCPU_ID);
> +
> + HOST_SYNC(vm, STAGE_INIT_SIMPLE);
> + mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ);
> + HOST_SYNC(vm, TEST_SIMPLE);
> +
> + guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
> + if (guest_0_page != 0)
> + print_skip("Did not allocate page at 0 for fetch protection override tests");
> + HOST_SYNC(vm, STAGE_INIT_FETCH_PROT_OVERRIDE);
> + if (guest_0_page == 0)
> + mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
> + run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
> + run->kvm_dirty_regs = KVM_SYNC_CRS;
> + HOST_SYNC(vm, TEST_FETCH_PROT_OVERRIDE);
> +
> + run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
> + run->kvm_dirty_regs = KVM_SYNC_CRS;
> + HOST_SYNC(vm, TEST_STORAGE_PROT_OVERRIDE);
> +}
>
On Thu, Jan 20, 2022 at 01:56:40PM +0100, Heiko Carstens wrote:
> > 2. Implementation changes
> > 2.1 Modify common code
>
> In general such changes are done in way that common code is or _may_ be
> modified to fulfill our needs. Common code header file explicitely states
> that architectures should get rid of private instances of
> copy_{to,from}_user() and __copy_{to,from}_user{,_inatomic}().
>
> So we should not add anything like that to arch code again, since nobody
> would expect that.
Or to be more specific: I think the most simple solution would be to
try to get the new *key variants into include/linux/uaccess.h, and add
the raw variants in architecture code, similar to the rest of the
uaccess functions.
There is some (sort of) prior art with copy_mc_to_kernel() even,
though that can only partially be compared.
Am 20.01.22 um 19:19 schrieb Heiko Carstens:
> On Thu, Jan 20, 2022 at 01:56:40PM +0100, Heiko Carstens wrote:
>>> 2. Implementation changes
>>> 2.1 Modify common code
>>
>> In general such changes are done in way that common code is or _may_ be
>> modified to fulfill our needs. Common code header file explicitely states
>> that architectures should get rid of private instances of
>> copy_{to,from}_user() and __copy_{to,from}_user{,_inatomic}().
>>
>> So we should not add anything like that to arch code again, since nobody
>> would expect that.
>
> Or to be more specific: I think the most simple solution would be to
> try to get the new *key variants into include/linux/uaccess.h, and add
> the raw variants in architecture code, similar to the rest of the
> uaccess functions.
> There is some (sort of) prior art with copy_mc_to_kernel() even,
> though that can only partially be compared.
So in essence adding something like this and then providing raw_copy_from/to_user_key?
(whitespace damaged, just pasted in)
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index ac0394087f7d..3b6e78ee211c 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -201,6 +201,59 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
return n;
}
+
+#if defined(__s390x__) && defined(CONFIG_KVM)
+/*
+ * Variants that pass along an access key. Uses by KVM on s390x to implement
+ * key checks for guests that use storage keys Must be kept in sync with the
+ * non-key variants from above. The only difference is the _key suffix when
+ * calling raw_copy_from/to_user_key.
+ */
+static inline __must_check unsigned long
+_copy_from_user_key(void *to, const void __user *from, unsigned long n, u8 key)
+{
+ unsigned long res = n;
+ might_fault();
+ if (!should_fail_usercopy() && likely(access_ok(from, n))) {
+ instrument_copy_from_user(to, from, n);
+ res = raw_copy_from_user_key(to, from, n, key);
+ }
+ if (unlikely(res))
+ memset(to + (n - res), 0, res);
+ return res;
+}
+
+static inline __must_check unsigned long
+_copy_to_user_key(void __user *to, const void *from, unsigned long n, u8 key)
+{
+ might_fault();
+ if (should_fail_usercopy())
+ return n;
+ if (access_ok(to, n)) {
+ instrument_copy_to_user(to, from, n);
+ n = raw_copy_to_user_key(to, from, n, key);
+ }
+ return n;
+}
+
+static __always_inline unsigned long __must_check
+copy_from_user_key(void *to, const void __user *from, unsigned long n, u8 key)
+{
+ if (likely(check_copy_size(to, n, false)))
+ n = _copy_from_user_key(to, from, n, key);
+ return n;
+}
+
+static __always_inline unsigned long __must_check
+copy_to_user_key(void __user *to, const void *from, unsigned long n, u8 key)
+{
+ if (likely(check_copy_size(from, n, true)))
+ n = _copy_to_user_key(to, from, n);
+ return n;
+}
+#endif
+
+
#ifndef copy_mc_to_kernel
/*
* Without arch opt-in this generic copy_mc_to_kernel() will not handle
On 1/20/22 16:40, Janosch Frank wrote:
> On 1/18/22 10:52, Janis Schoetterl-Glausch wrote:
>> Test the emulation of TEST PROTECTION in the presence of storage keys.
>> Emulation only occurs under certain conditions, one of which is the host
>> page being protected.
>> Trigger this by protecting the test pages via mprotect.
>>
>> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
>> ---
>> tools/testing/selftests/kvm/.gitignore | 1 +
>> tools/testing/selftests/kvm/Makefile | 1 +
>> tools/testing/selftests/kvm/s390x/tprot.c | 184 ++++++++++++++++++++++
>> 3 files changed, 186 insertions(+)
>> create mode 100644 tools/testing/selftests/kvm/s390x/tprot.c
>>
>> diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
>> index 3763105029fb..82c0470b6849 100644
>> --- a/tools/testing/selftests/kvm/.gitignore
>> +++ b/tools/testing/selftests/kvm/.gitignore
>> @@ -7,6 +7,7 @@
>> /s390x/memop
>> /s390x/resets
>> /s390x/sync_regs_test
>> +/s390x/tprot
>> /x86_64/cr4_cpuid_sync_test
>> /x86_64/debug_regs
>> /x86_64/evmcs_test
>> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
>> index c4e34717826a..df6de8d155e8 100644
>> --- a/tools/testing/selftests/kvm/Makefile
>> +++ b/tools/testing/selftests/kvm/Makefile
>> @@ -109,6 +109,7 @@ TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
>> TEST_GEN_PROGS_s390x = s390x/memop
>> TEST_GEN_PROGS_s390x += s390x/resets
>> TEST_GEN_PROGS_s390x += s390x/sync_regs_test
>> +TEST_GEN_PROGS_s390x += s390x/tprot
>> TEST_GEN_PROGS_s390x += demand_paging_test
>> TEST_GEN_PROGS_s390x += dirty_log_test
>> TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
>> diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c
>> new file mode 100644
>> index 000000000000..8b52675307f6
>> --- /dev/null
>> +++ b/tools/testing/selftests/kvm/s390x/tprot.c
[...]
>> +
>> +static int set_storage_key(void *addr, uint8_t key)
>> +{
>> + int not_mapped = 0;
>> +
>
> Maybe add a short comment:
> Check if address is mapped via lra and set the storage key if it is.
>
>> + asm volatile (
>> + "lra %[addr], 0(0,%[addr])\n"
>> + " jz 0f\n"
>> + " llill %[not_mapped],1\n"
>> + " j 1f\n"
>> + "0: sske %[key], %[addr]\n"
>> + "1:"
>> + : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
>
> Shouldn't this be a "=r" instead of a "+r" for not_mapped?
I don't think so. We only write to it on one code path and the compiler mustn't conclude
that it can remove the = 0 assignment because the value gets overwritten anyway.
Initially I tried to implement the function like this:
static int set_storage_key(void *addr, uint8_t key)
{
asm goto ("lra %[addr], 0(0,%[addr])\n\t"
"jnz %l[not_mapped]\n\t"
"sske %[key], %[addr]\n"
: [addr] "+&a" (addr)
: [key] "r" (key)
: "cc", "memory"
: not_mapped
);
return 0;
not_mapped:
return -1;
}
Which I think is nicer, but the compiler just optimized that completely away.
I have no clue why it (thinks it) is allowed to do that.
>
>> + : [key] "r" (key)
>> + : "cc"
>> + );
>> + return -not_mapped;
>> +}
>> +
>> +enum permission {
>> + READ_WRITE = 0,
>> + READ = 1,
>> + NONE = 2,
>> + UNAVAILABLE = 3,
>
> TRANSLATION_NA ?
> I'm not completely happy with these names but I've yet to come up with a better naming scheme here.
Mentioning translation is a good idea. Don't think there is any harm in using
TRANSLATION_NOT_AVAILABLE or TRANSLATION_UNAVAILABLE.
>
>> +};
>> +
>> +static enum permission test_protection(void *addr, uint8_t key)
>> +{
>> + uint64_t mask;
>> +
>> + asm volatile (
>> + "tprot %[addr], 0(%[key])\n"
>> + " ipm %[mask]\n"
>> + : [mask] "=r" (mask)
>> + : [addr] "Q" (*(char *)addr),
>> + [key] "a" (key)
>> + : "cc"
>> + );
>> +
>> + return (enum permission)mask >> 28;
>
> You could replace the shift with the "srl" that we normally do.
I prefer keeping the asm as small as possible, C is just so much easier to understand.
[...]
> It's __really__ hard to understand this since the state is changed both by the guest and host. Please add comments to this and maybe also add some to the test struct explaining why you expect the results for each test.
>
I think I'll concentrate the comments at the tests array so we have one location
that lays out the complete logic and then one only has to check if the guest
and host match up with that, respectively, instead of having to model their interaction
in ones head.
I'll incorporate your other feedback, too.
Thanks!
On Fri, Jan 21, 2022 at 08:32:25AM +0100, Christian Borntraeger wrote:
> So in essence adding something like this and then providing raw_copy_from/to_user_key?
> (whitespace damaged, just pasted in)
>
> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
> index ac0394087f7d..3b6e78ee211c 100644
> --- a/include/linux/uaccess.h
> +++ b/include/linux/uaccess.h
> @@ -201,6 +201,59 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
> return n;
> }
> +
> +#if defined(__s390x__) && defined(CONFIG_KVM)
> +/*
> + * Variants that pass along an access key. Uses by KVM on s390x to implement
> + * key checks for guests that use storage keys Must be kept in sync with the
> + * non-key variants from above. The only difference is the _key suffix when
> + * calling raw_copy_from/to_user_key.
> + */
This is too architecture specific, I wouldn't like to see __s390__ or
KVM dependencies. This should be a bit more generic, so other
architectures _might_ also make use of this interface that is:
> +static inline __must_check unsigned long
> +_copy_from_user_key(void *to, const void __user *from, unsigned long n, u8 key)
Make key unsigned long, add support for INLINE_COPY_TO_USER, and maybe
add a wrapper, so this works on all architectures, e.g. if
raw_copy_to_user_key() is not defined, then fall back to
raw_copy_to_user() and ignore the key parameter.
Just some ideas. The _only_ concern about the previous implementation
I have, is that we could run out-of-sync with common code wrt
instrumentation and other options. Given how sensitive uaccess ops
are, I think we should try to avoid that by all cost, if possible.
Does that make sense?
On Fri, 21 Jan 2022 12:03:20 +0100
Janis Schoetterl-Glausch <[email protected]> wrote:
[...]
> >> +
> >> +static int set_storage_key(void *addr, uint8_t key)
> >> +{
> >> + int not_mapped = 0;
> >> +
> >
> > Maybe add a short comment:
> > Check if address is mapped via lra and set the storage key if it is.
> >
> >> + asm volatile (
> >> + "lra %[addr], 0(0,%[addr])\n"
> >> + " jz 0f\n"
> >> + " llill %[not_mapped],1\n"
> >> + " j 1f\n"
> >> + "0: sske %[key], %[addr]\n"
> >> + "1:"
> >> + : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
> >
> > Shouldn't this be a "=r" instead of a "+r" for not_mapped?
>
> I don't think so. We only write to it on one code path and the compiler mustn't conclude
> that it can remove the = 0 assignment because the value gets overwritten anyway.
>
> Initially I tried to implement the function like this:
>
> static int set_storage_key(void *addr, uint8_t key)
> {
> asm goto ("lra %[addr], 0(0,%[addr])\n\t"
> "jnz %l[not_mapped]\n\t"
> "sske %[key], %[addr]\n"
> : [addr] "+&a" (addr)
> : [key] "r" (key)
> : "cc", "memory"
> : not_mapped
> );
> return 0;
> not_mapped:
> return -1;
> }
>
> Which I think is nicer, but the compiler just optimized that completely away.
> I have no clue why it (thinks it) is allowed to do that.
>
> >
> >> + : [key] "r" (key)
> >> + : "cc"
> >> + );
> >> + return -not_mapped;
> >> +}
> >> +
> >> +enum permission {
> >> + READ_WRITE = 0,
> >> + READ = 1,
> >> + NONE = 2,
> >> + UNAVAILABLE = 3,
> >
> > TRANSLATION_NA ?
> > I'm not completely happy with these names but I've yet to come up with a better naming scheme here.
>
> Mentioning translation is a good idea. Don't think there is any harm in using
> TRANSLATION_NOT_AVAILABLE or TRANSLATION_UNAVAILABLE.
it's too long, it actually makes the code harder to read when used
maybe consider something like TRANSL_UNAVAIL as well
> >
> >> +};
> >> +
> >> +static enum permission test_protection(void *addr, uint8_t key)
> >> +{
> >> + uint64_t mask;
> >> +
> >> + asm volatile (
> >> + "tprot %[addr], 0(%[key])\n"
> >> + " ipm %[mask]\n"
> >> + : [mask] "=r" (mask)
> >> + : [addr] "Q" (*(char *)addr),
> >> + [key] "a" (key)
> >> + : "cc"
> >> + );
> >> +
> >> + return (enum permission)mask >> 28;
> >
> > You could replace the shift with the "srl" that we normally do.
>
> I prefer keeping the asm as small as possible, C is just so much easier to understand.
we use srl everywhere, but I agree that explicitly using C makes it
less obscure. and in the end the compiler should generate the same
instructions anyway.
my only comment about the above code is that you are casting the
uint64_t to enum permission _and then_ shifting. _technically_ it
should still work (enums are just ints), but I think it would
look cleaner if you write
return (enum permission)(mask >> 28);
>
> [...]
>
> > It's __really__ hard to understand this since the state is changed both by the guest and host. Please add comments to this and maybe also add some to the test struct explaining why you expect the results for each test.
> >
>
> I think I'll concentrate the comments at the tests array so we have one location
> that lays out the complete logic and then one only has to check if the guest
> and host match up with that, respectively, instead of having to model their interaction
> in ones head.
>
> I'll incorporate your other feedback, too.
>
> Thanks!
On 1/21/22 12:04, Heiko Carstens wrote:
> On Fri, Jan 21, 2022 at 08:32:25AM +0100, Christian Borntraeger wrote:
>> So in essence adding something like this and then providing raw_copy_from/to_user_key?
>> (whitespace damaged, just pasted in)
>>
>> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
>> index ac0394087f7d..3b6e78ee211c 100644
>> --- a/include/linux/uaccess.h
>> +++ b/include/linux/uaccess.h
>> @@ -201,6 +201,59 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
>> return n;
>> }
>> +
>> +#if defined(__s390x__) && defined(CONFIG_KVM)
>> +/*
>> + * Variants that pass along an access key. Uses by KVM on s390x to implement
>> + * key checks for guests that use storage keys Must be kept in sync with the
>> + * non-key variants from above. The only difference is the _key suffix when
>> + * calling raw_copy_from/to_user_key.
>> + */
>
> This is too architecture specific, I wouldn't like to see __s390__ or
> KVM dependencies. This should be a bit more generic, so other
> architectures _might_ also make use of this interface that is:
>
>> +static inline __must_check unsigned long
>> +_copy_from_user_key(void *to, const void __user *from, unsigned long n, u8 key)
>
> Make key unsigned long, add support for INLINE_COPY_TO_USER, and maybe
> add a wrapper, so this works on all architectures, e.g. if
> raw_copy_to_user_key() is not defined, then fall back to
> raw_copy_to_user() and ignore the key parameter.
>
Since we only need the double underscore variants, even if we're going to be more
general than we need to be, we can restrict ourselves to those, can't we?
I don't understand your comment about the wrapper. You'd want an error on misuse,
that is, if you try to use a _with_key function if the functionality is not defined, no?
I see the following possibilities:
1. raw_copy_from/to_user is declared by each architecture.
Mirror that for raw_copy_from/to_user_with_key.
Linker error on misuse.
2. Opt in with #define UACCESS_WITH_KEY or similar.
Undeclared function on misuse.
3. Opt in by requiring that raw_copy_from/to_user_with_key are macros.
Undeclared function on misuse.
4. Declare raw_copy_from/to_user_with_key in include/linux/uacess.h.
Linker error on misuse.
On 1/21/22 13:28, Claudio Imbrenda wrote:
> On Fri, 21 Jan 2022 12:03:20 +0100
> Janis Schoetterl-Glausch <[email protected]> wrote:
>
> [...]
>
>>>> +
>>>> +static int set_storage_key(void *addr, uint8_t key)
>>>> +{
>>>> + int not_mapped = 0;
>>>> +
>>>
>>> Maybe add a short comment:
>>> Check if address is mapped via lra and set the storage key if it is.
>>>
>>>> + asm volatile (
>>>> + "lra %[addr], 0(0,%[addr])\n"
>>>> + " jz 0f\n"
>>>> + " llill %[not_mapped],1\n"
>>>> + " j 1f\n"
>>>> + "0: sske %[key], %[addr]\n"
>>>> + "1:"
>>>> + : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
>>>
>>> Shouldn't this be a "=r" instead of a "+r" for not_mapped?
>>
>> I don't think so. We only write to it on one code path and the compiler mustn't conclude
>> that it can remove the = 0 assignment because the value gets overwritten anyway.
>>
>> Initially I tried to implement the function like this:
>>
>> static int set_storage_key(void *addr, uint8_t key)
>> {
>> asm goto ("lra %[addr], 0(0,%[addr])\n\t"
>> "jnz %l[not_mapped]\n\t"
>> "sske %[key], %[addr]\n"
>> : [addr] "+&a" (addr)
>> : [key] "r" (key)
>> : "cc", "memory"
>> : not_mapped
>> );
>> return 0;
>> not_mapped:
>> return -1;
>> }
>>
>> Which I think is nicer, but the compiler just optimized that completely away.
>> I have no clue why it (thinks it) is allowed to do that.
>>
>>>
>>>> + : [key] "r" (key)
>>>> + : "cc"
>>>> + );
>>>> + return -not_mapped;
>>>> +}
>>>> +
>>>> +enum permission {
>>>> + READ_WRITE = 0,
>>>> + READ = 1,
>>>> + NONE = 2,
>>>> + UNAVAILABLE = 3,
>>>
>>> TRANSLATION_NA ?
>>> I'm not completely happy with these names but I've yet to come up with a better naming scheme here.
>>
>> Mentioning translation is a good idea. Don't think there is any harm in using
>> TRANSLATION_NOT_AVAILABLE or TRANSLATION_UNAVAILABLE.
>
> it's too long, it actually makes the code harder to read when used
>
> maybe consider something like TRANSL_UNAVAIL as well
Fine with me. I'll rename NONE to RW_PROTECTED. NONE is too nondescript.
>
>>>
>>>> +};
>>>> +
>>>> +static enum permission test_protection(void *addr, uint8_t key)
>>>> +{
>>>> + uint64_t mask;
>>>> +
>>>> + asm volatile (
>>>> + "tprot %[addr], 0(%[key])\n"
>>>> + " ipm %[mask]\n"
>>>> + : [mask] "=r" (mask)
>>>> + : [addr] "Q" (*(char *)addr),
>>>> + [key] "a" (key)
>>>> + : "cc"
>>>> + );
>>>> +
>>>> + return (enum permission)mask >> 28;
>>>
>>> You could replace the shift with the "srl" that we normally do.
>>
>> I prefer keeping the asm as small as possible, C is just so much easier to understand.
>
> we use srl everywhere, but I agree that explicitly using C makes it
> less obscure. and in the end the compiler should generate the same
> instructions anyway.
>
> my only comment about the above code is that you are casting the
> uint64_t to enum permission _and then_ shifting. _technically_ it
> should still work (enums are just ints), but I think it would
> look cleaner if you write
>
> return (enum permission)(mask >> 28);
That is better indeed.
[...]
On Fri, Jan 21, 2022 at 02:46:02PM +0100, Janis Schoetterl-Glausch wrote:
> >> +static inline __must_check unsigned long
> >> +_copy_from_user_key(void *to, const void __user *from, unsigned long n, u8 key)
> >
> > Make key unsigned long, add support for INLINE_COPY_TO_USER, and maybe
> > add a wrapper, so this works on all architectures, e.g. if
> > raw_copy_to_user_key() is not defined, then fall back to
> > raw_copy_to_user() and ignore the key parameter.
> >
>
> Since we only need the double underscore variants, even if we're going to be more
> general than we need to be, we can restrict ourselves to those, can't we?
I don't think that works with INLINE_COPY_TO_USER. The reason for the
single underline variants is being able to have an out-line-implementation
in lib/usercopy.c, isn't it?
> I don't understand your comment about the wrapper. You'd want an error on misuse,
> that is, if you try to use a _with_key function if the functionality is not defined, no?
No. My intention was that it should simply work/compile by default for
architectures which do not support a key mechanism:
something like
#ifndef raw_copy_to_user_key
static inline unsigned long raw_copy_to_user_key(..., unsigned long key)
{
return raw_copy_to_user(...);
}
#endif
Again, this is just a proposal and might be turned down by common code
guys. Just trying to come up with something which could also work on other
architectures, and which does not cause pain because architectures don't
really need it.
KVM on s390 needs a mechanism to do accesses to guest memory
that honors storage key protection.
__copy_from/to_user_with_key is implemented by introducing
raw_copy_from/to_user_with_key.
Since the existing uaccess implementation on s390 makes use of move
instructions that support having an additional access key supplied,
we can implement raw_copy_from/to_user_with_key by enhancing the
existing implementation.
Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
---
This works for us and compiles on other architectures (tested x86).
The patch only implements __copy_from/to_user_with_key, since those
are the ones we actually need. On other architectures those functions
don't exists, but they aren't used either, so it's not a problem.
Should we also implement single and no underscore variants? Why?
Completeness?
arch/s390/include/asm/uaccess.h | 20 +++++++++++--
arch/s390/lib/uaccess.c | 50 +++++++++++++++++++--------------
include/linux/uaccess.h | 28 ++++++++++++++++++
3 files changed, 75 insertions(+), 23 deletions(-)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 02b467461163..fc8477b5e98c 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -33,11 +33,27 @@ static inline int __range_ok(unsigned long addr, unsigned long size)
#define access_ok(addr, size) __access_ok(addr, size)
+#define raw_copy_from_user_with_key raw_copy_from_user_with_key
unsigned long __must_check
-raw_copy_from_user(void *to, const void __user *from, unsigned long n);
+raw_copy_from_user_with_key(void *to, const void __user *from, unsigned long n,
+ unsigned long key);
+#define raw_copy_to_user_with_key raw_copy_to_user_with_key
unsigned long __must_check
-raw_copy_to_user(void __user *to, const void *from, unsigned long n);
+raw_copy_to_user_with_key(void __user *to, const void *from, unsigned long n,
+ unsigned long key);
+
+static __always_inline unsigned long __must_check
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ return raw_copy_from_user_with_key(to, from, n, 0);
+}
+
+static __always_inline unsigned long __must_check
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ return raw_copy_to_user_with_key(to, from, n, 0);
+}
#ifndef CONFIG_KASAN
#define INLINE_COPY_FROM_USER
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index d3a700385875..63845dd82691 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -59,11 +59,13 @@ static inline int copy_with_mvcos(void)
#endif
static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
- unsigned long size)
+ unsigned long size, u8 key)
{
unsigned long tmp1, tmp2;
union oac spec = {
+ .oac2.key = key,
.oac2.as = PSW_BITS_AS_SECONDARY,
+ .oac2.k = 1,
.oac2.a = 1,
};
@@ -94,19 +96,19 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
}
static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
- unsigned long size)
+ unsigned long size, u8 key)
{
unsigned long tmp1, tmp2;
tmp1 = -256UL;
asm volatile(
" sacf 0\n"
- "0: mvcp 0(%0,%2),0(%1),%3\n"
+ "0: mvcp 0(%0,%2),0(%1),%[key]\n"
"7: jz 5f\n"
"1: algr %0,%3\n"
" la %1,256(%1)\n"
" la %2,256(%2)\n"
- "2: mvcp 0(%0,%2),0(%1),%3\n"
+ "2: mvcp 0(%0,%2),0(%1),%[key]\n"
"8: jnz 1b\n"
" j 5f\n"
"3: la %4,255(%1)\n" /* %4 = ptr + 255 */
@@ -115,7 +117,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
" slgr %4,%1\n"
" clgr %0,%4\n" /* copy crosses next page boundary? */
" jnh 6f\n"
- "4: mvcp 0(%4,%2),0(%1),%3\n"
+ "4: mvcp 0(%4,%2),0(%1),%[key]\n"
"9: slgr %0,%4\n"
" j 6f\n"
"5: slgr %0,%0\n"
@@ -123,24 +125,28 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : : "cc", "memory");
+ : [key] "d" (key << 4)
+ : "cc", "memory");
return size;
}
-unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+unsigned long raw_copy_from_user_with_key(void *to, const void __user *from,
+ unsigned long n, unsigned long key)
{
if (copy_with_mvcos())
- return copy_from_user_mvcos(to, from, n);
- return copy_from_user_mvcp(to, from, n);
+ return copy_from_user_mvcos(to, from, n, (u8)key);
+ return copy_from_user_mvcp(to, from, n, (u8)key);
}
-EXPORT_SYMBOL(raw_copy_from_user);
+EXPORT_SYMBOL(raw_copy_from_user_with_key);
-static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
- unsigned long size)
+inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
+ unsigned long size, u8 key)
{
unsigned long tmp1, tmp2;
union oac spec = {
+ .oac1.key = key,
.oac1.as = PSW_BITS_AS_SECONDARY,
+ .oac1.k = 1,
.oac1.a = 1,
};
@@ -171,19 +177,19 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
}
static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
- unsigned long size)
+ unsigned long size, u8 key)
{
unsigned long tmp1, tmp2;
tmp1 = -256UL;
asm volatile(
" sacf 0\n"
- "0: mvcs 0(%0,%1),0(%2),%3\n"
+ "0: mvcs 0(%0,%1),0(%2),%[key]\n"
"7: jz 5f\n"
"1: algr %0,%3\n"
" la %1,256(%1)\n"
" la %2,256(%2)\n"
- "2: mvcs 0(%0,%1),0(%2),%3\n"
+ "2: mvcs 0(%0,%1),0(%2),%[key]\n"
"8: jnz 1b\n"
" j 5f\n"
"3: la %4,255(%1)\n" /* %4 = ptr + 255 */
@@ -192,7 +198,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
" slgr %4,%1\n"
" clgr %0,%4\n" /* copy crosses next page boundary? */
" jnh 6f\n"
- "4: mvcs 0(%4,%1),0(%2),%3\n"
+ "4: mvcs 0(%4,%1),0(%2),%[key]\n"
"9: slgr %0,%4\n"
" j 6f\n"
"5: slgr %0,%0\n"
@@ -200,17 +206,19 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : : "cc", "memory");
+ : [key] "d" (key << 4)
+ : "cc", "memory");
return size;
}
-unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+unsigned long raw_copy_to_user_with_key(void __user *to, const void *from,
+ unsigned long n, unsigned long key)
{
if (copy_with_mvcos())
- return copy_to_user_mvcos(to, from, n);
- return copy_to_user_mvcs(to, from, n);
+ return copy_to_user_mvcos(to, from, n, (u8)key);
+ return copy_to_user_mvcs(to, from, n, (u8)key);
}
-EXPORT_SYMBOL(raw_copy_to_user);
+EXPORT_SYMBOL(raw_copy_to_user_with_key);
static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
{
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index ac0394087f7d..adce966edb7a 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -114,6 +114,20 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
return raw_copy_from_user(to, from, n);
}
+#ifdef raw_copy_from_user_with_key
+static __always_inline __must_check unsigned long
+__copy_from_user_with_key(void *to, const void __user *from, unsigned long n,
+ unsigned long key)
+{
+ might_fault();
+ if (should_fail_usercopy())
+ return n;
+ instrument_copy_from_user(to, from, n);
+ check_object_size(to, n, false);
+ return raw_copy_from_user_with_key(to, from, n, key);
+}
+#endif /* raw_copy_from_user_with_key */
+
/**
* __copy_to_user_inatomic: - Copy a block of data into user space, with less checking.
* @to: Destination address, in user space.
@@ -148,6 +162,20 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
return raw_copy_to_user(to, from, n);
}
+#ifdef raw_copy_to_user_with_key
+static __always_inline __must_check unsigned long
+__copy_to_user_with_key(void __user *to, const void *from, unsigned long n,
+ unsigned long key)
+{
+ might_fault();
+ if (should_fail_usercopy())
+ return n;
+ instrument_copy_to_user(to, from, n);
+ check_object_size(from, n, true);
+ return raw_copy_to_user_with_key(to, from, n, key);
+}
+#endif /* raw_copy_to_user_with_key */
+
#ifdef INLINE_COPY_FROM_USER
static inline __must_check unsigned long
_copy_from_user(void *to, const void __user *from, unsigned long n)
base-commit: bad13799e0305deb258372b7298a86be4c78aaba
prerequisite-patch-id: 5f8ae41bde2fa5717a775e17c08239ed1ddbcc83
--
2.32.0
On Mon, Jan 24, 2022 at 11:38:12AM +0100, Janis Schoetterl-Glausch wrote:
> KVM on s390 needs a mechanism to do accesses to guest memory
> that honors storage key protection.
> __copy_from/to_user_with_key is implemented by introducing
> raw_copy_from/to_user_with_key.
> Since the existing uaccess implementation on s390 makes use of move
> instructions that support having an additional access key supplied,
> we can implement raw_copy_from/to_user_with_key by enhancing the
> existing implementation.
>
> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
> ---
>
> This works for us and compiles on other architectures (tested x86).
> The patch only implements __copy_from/to_user_with_key, since those
> are the ones we actually need. On other architectures those functions
> don't exists, but they aren't used either, so it's not a problem.
Adding an API where only underscored function names are to be used can be
considered suboptimal.
> Should we also implement single and no underscore variants? Why?
> Completeness?
Please make this _fully_ symmetrical to the existing copy_to/from_user()
implementations, like I tried to say several times. Maybe I wasn't clear
enough about this. Also the default implementation - that is if an
architecture makes use of copy_to_user_key() without providing a
raw_copy_from_user_key() implementation - should fallback to regular
copy_to_user() semantics, like I tried to outline with the ifndef example
of raw_copy_from_user_key() previously.
Furthermore this should be splitted into two patches: one which adds the
common code infrastructure, like described above; and a second patch which
adds the actual s390 architecture backend/override.
The patches should contain a _detailed_ description why the first patch,
aka API, should probably be in common code (staying in sync with code
instrumentation, etc.); and of course it should contain enough information
for people not familiar with s390's storage keys so they can figure out
what this is about.
Hopefully we get some feedback and either this is acceptable for common
code one way or the other, or we have to maintain this on our own, and get
the additional maintenance cost for free.
Please make sure to add Al Viro, Kees Cook, Arnd Bergmann, and Andrew
Morton to cc on your next version, so we hopefully come to a conclusion and
can move on.
On 20/01/2022 13.23, Janis Schoetterl-Glausch wrote:
> On 1/20/22 11:38, Thomas Huth wrote:
>> On 18/01/2022 10.52, Janis Schoetterl-Glausch wrote:
>>> Channel I/O honors storage keys and is performed on absolute memory.
>>> For I/O emulation user space therefore needs to be able to do key
>>> checked accesses.
>>> The vm IOCTL supports read/write accesses, as well as checking
>>> if an access would succeed.
>> ...
>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>> index e3f450b2f346..dd04170287fd 100644
>>> --- a/include/uapi/linux/kvm.h
>>> +++ b/include/uapi/linux/kvm.h
>>> @@ -572,6 +572,8 @@ struct kvm_s390_mem_op {
>>> #define KVM_S390_MEMOP_LOGICAL_WRITE 1
>>> #define KVM_S390_MEMOP_SIDA_READ 2
>>> #define KVM_S390_MEMOP_SIDA_WRITE 3
>>> +#define KVM_S390_MEMOP_ABSOLUTE_READ 4
>>> +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
>>
>> Not quite sure about this - maybe it is, but at least I'd like to see this discussed: Do we really want to re-use the same ioctl layout for both, the VM and the VCPU file handles? Where the userspace developer has to know that the *_ABSOLUTE_* ops only work with VM handles, and the others only work with the VCPU handles? A CPU can also address absolute memory, so why not adding the *_ABSOLUTE_* ops there, too? And if we'd do that, wouldn't it be sufficient to have the VCPU ioctls only - or do you want to call these ioctls from spots in QEMU where you do not have a VCPU handle available? (I/O instructions are triggered from a CPU, so I'd assume that you should have a VCPU handle around?)
>
> There are some differences between the vm and the vcpu memops.
> No storage or fetch protection overrides apply to IO/vm memops, after all there is no control register to enable them.
> Additionally, quiescing is not required for IO, tho in practice we use the same code path for the vcpu and the vm here.
> Allowing absolute accesses with a vcpu is doable, but I'm not sure what the use case for it would be, I'm not aware of
> a precedence in the architecture. Of course the vcpu memop already supports logical=real accesses.
Ok. Maybe it then would be better to call new ioctl and the new op defines
differently, to avoid confusion? E.g. call it "vmmemop" and use:
#define KVM_S390_VMMEMOP_ABSOLUTE_READ 1
#define KVM_S390_VMMEMOP_ABSOLUTE_WRITE 2
?
Thomas
On 1/24/22 18:41, Heiko Carstens wrote:
> On Mon, Jan 24, 2022 at 11:38:12AM +0100, Janis Schoetterl-Glausch wrote:
>> KVM on s390 needs a mechanism to do accesses to guest memory
>> that honors storage key protection.
>> __copy_from/to_user_with_key is implemented by introducing
>> raw_copy_from/to_user_with_key.
>> Since the existing uaccess implementation on s390 makes use of move
>> instructions that support having an additional access key supplied,
>> we can implement raw_copy_from/to_user_with_key by enhancing the
>> existing implementation.
>>
>> Signed-off-by: Janis Schoetterl-Glausch <[email protected]>
>> ---
>>
>> This works for us and compiles on other architectures (tested x86).
>> The patch only implements __copy_from/to_user_with_key, since those
>> are the ones we actually need. On other architectures those functions
>> don't exists, but they aren't used either, so it's not a problem.
>
> Adding an API where only underscored function names are to be used can be
> considered suboptimal.
>
>> Should we also implement single and no underscore variants? Why?
>> Completeness?
>
> Please make this _fully_ symmetrical to the existing copy_to/from_user()
> implementations, like I tried to say several times. Maybe I wasn't clear
> enough about this. Also the default implementation - that is if an
> architecture makes use of copy_to_user_key() without providing a
> raw_copy_from_user_key() implementation - should fallback to regular
> copy_to_user() semantics, like I tried to outline with the ifndef example
> of raw_copy_from_user_key() previously.
That does help. One thing I'm still confused about is the rational
for the default implementation.
Are you suggesting that copy_from/to_user be implemented in terms of
copy_from/to_user_with_key? I didn't think so, even tho you said something along
those lines, because I assumed you were referring to the architecture specific
implementations for copy_from/to_user, since we weren't talking about
common code changes back then and Christian's suggestion didn't feature it either.
When you say "fully symmetrical" do you mean all functions that wrap architecture
defined access to user space:
__copy_from_user_inatomic
__copy_from_user
__copy_to_user_inatomic
__copy_to_user
_copy_from_user
_copy_to_user
copy_from_user
copy_to_user
__copy_from_user_inatomic_nocache
copy_struct_from_user
copy_from_user_nofault
copy_to_user_nofault
strncpy_from_user_nofault
strnlen_user_nofault
>
> Furthermore this should be splitted into two patches: one which adds the
> common code infrastructure, like described above; and a second patch which
> adds the actual s390 architecture backend/override.
>
> The patches should contain a _detailed_ description why the first patch,
> aka API, should probably be in common code (staying in sync with code
> instrumentation, etc.); and of course it should contain enough information
> for people not familiar with s390's storage keys so they can figure out
> what this is about.
>
> Hopefully we get some feedback and either this is acceptable for common
> code one way or the other, or we have to maintain this on our own, and get
> the additional maintenance cost for free.
>
> Please make sure to add Al Viro, Kees Cook, Arnd Bergmann, and Andrew
> Morton to cc on your next version, so we hopefully come to a conclusion and
> can move on.
Thanks, will do.
> > Please make this _fully_ symmetrical to the existing copy_to/from_user()
> > implementations, like I tried to say several times. Maybe I wasn't clear
> > enough about this. Also the default implementation - that is if an
> > architecture makes use of copy_to_user_key() without providing a
> > raw_copy_from_user_key() implementation - should fallback to regular
> > copy_to_user() semantics, like I tried to outline with the ifndef example
> > of raw_copy_from_user_key() previously.
>
> That does help. One thing I'm still confused about is the rational
> for the default implementation.
This is to avoid link errors, and potentially having something that
works everywhere without having to add ifdefs everywhere in case this
needs to be used in common code.
Again: this is just a proposal, nothing else.
> Are you suggesting that copy_from/to_user be implemented in terms of
> copy_from/to_user_with_key? I didn't think so, even tho you said something along
> those lines, because I assumed you were referring to the architecture specific
> implementations for copy_from/to_user, since we weren't talking about
> common code changes back then and Christian's suggestion didn't feature it either.
>
> When you say "fully symmetrical" do you mean all functions that wrap architecture
> defined access to user space:
> __copy_from_user_inatomic
> __copy_from_user
> __copy_to_user_inatomic
> __copy_to_user
> _copy_from_user
> _copy_to_user
> copy_from_user
> copy_to_user
> __copy_from_user_inatomic_nocache
> copy_struct_from_user
> copy_from_user_nofault
> copy_to_user_nofault
> strncpy_from_user_nofault
> strnlen_user_nofault
I meant something like below - not even compile tested. It is missing
any comments, commit message and sanity checks. That's up to you.
Patch is on top of your patch, and of course needs to be splitted.
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 35fda4251f47..422066d7c5e2 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -33,31 +33,33 @@ static inline int __range_ok(unsigned long addr, unsigned long size)
#define access_ok(addr, size) __access_ok(addr, size)
-#define raw_copy_from_user_with_key raw_copy_from_user_with_key
+#define raw_copy_from_user_key raw_copy_from_user_key
unsigned long __must_check
-raw_copy_from_user_with_key(void *to, const void __user *from, unsigned long n,
- unsigned long key);
+raw_copy_from_user_key(void *to, const void __user *from, unsigned long n,
+ unsigned long key);
-#define raw_copy_to_user_with_key raw_copy_to_user_with_key
+#define raw_copy_to_user_key raw_copy_to_user_key
unsigned long __must_check
-raw_copy_to_user_with_key(void __user *to, const void *from, unsigned long n,
- unsigned long key);
+raw_copy_to_user_key(void __user *to, const void *from, unsigned long n,
+ unsigned long key);
static __always_inline unsigned long __must_check
raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
- return raw_copy_from_user_with_key(to, from, n, 0);
+ return raw_copy_from_user_key(to, from, n, 0);
}
static __always_inline unsigned long __must_check
raw_copy_to_user(void __user *to, const void *from, unsigned long n)
{
- return raw_copy_to_user_with_key(to, from, n, 0);
+ return raw_copy_to_user_key(to, from, n, 0);
}
#ifndef CONFIG_KASAN
#define INLINE_COPY_FROM_USER
#define INLINE_COPY_TO_USER
+#define INLINE_COPY_FROM_USER_KEY
+#define INLINE_COPY_TO_USER_KEY
#endif
int __put_user_bad(void) __attribute__((noreturn));
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 2e4e55580185..689a5ab3121a 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -59,7 +59,7 @@ static inline int copy_with_mvcos(void)
#endif
static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
- unsigned long size, u8 key)
+ unsigned long size, unsigned long key)
{
unsigned long tmp1, tmp2;
union oac spec = {
@@ -96,7 +96,7 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
}
static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
- unsigned long size, u8 key)
+ unsigned long size, unsigned long key)
{
unsigned long tmp1, tmp2;
@@ -130,17 +130,17 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
return size;
}
-unsigned long raw_copy_from_user_with_key(void *to, const void __user *from,
- unsigned long n, unsigned long key)
+unsigned long raw_copy_from_user_key(void *to, const void __user *from,
+ unsigned long n, unsigned long key)
{
if (copy_with_mvcos())
- return copy_from_user_mvcos(to, from, n, (u8)key);
- return copy_from_user_mvcp(to, from, n, (u8)key);
+ return copy_from_user_mvcos(to, from, n, key);
+ return copy_from_user_mvcp(to, from, n, key);
}
-EXPORT_SYMBOL(raw_copy_from_user_with_key);
+EXPORT_SYMBOL(raw_copy_from_user_key);
-inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
- unsigned long size, u8 key)
+static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
+ unsigned long size, unsigned long key)
{
unsigned long tmp1, tmp2;
union oac spec = {
@@ -177,7 +177,7 @@ inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
}
static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
- unsigned long size, u8 key)
+ unsigned long size, unsigned long key)
{
unsigned long tmp1, tmp2;
@@ -211,14 +211,14 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
return size;
}
-unsigned long raw_copy_to_user_with_key(void __user *to, const void *from,
- unsigned long n, unsigned long key)
+unsigned long raw_copy_to_user_key(void __user *to, const void *from,
+ unsigned long n, unsigned long key)
{
if (copy_with_mvcos())
- return copy_to_user_mvcos(to, from, n, (u8)key);
- return copy_to_user_mvcs(to, from, n, (u8)key);
+ return copy_to_user_mvcos(to, from, n, key);
+ return copy_to_user_mvcs(to, from, n, key);
}
-EXPORT_SYMBOL(raw_copy_to_user_with_key);
+EXPORT_SYMBOL(raw_copy_to_user_key);
static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
{
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index adce966edb7a..644955b31958 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -114,20 +114,6 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
return raw_copy_from_user(to, from, n);
}
-#ifdef raw_copy_from_user_with_key
-static __always_inline __must_check unsigned long
-__copy_from_user_with_key(void *to, const void __user *from, unsigned long n,
- unsigned long key)
-{
- might_fault();
- if (should_fail_usercopy())
- return n;
- instrument_copy_from_user(to, from, n);
- check_object_size(to, n, false);
- return raw_copy_from_user_with_key(to, from, n, key);
-}
-#endif /* raw_copy_from_user_with_key */
-
/**
* __copy_to_user_inatomic: - Copy a block of data into user space, with less checking.
* @to: Destination address, in user space.
@@ -162,20 +148,6 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
return raw_copy_to_user(to, from, n);
}
-#ifdef raw_copy_to_user_with_key
-static __always_inline __must_check unsigned long
-__copy_to_user_with_key(void __user *to, const void *from, unsigned long n,
- unsigned long key)
-{
- might_fault();
- if (should_fail_usercopy())
- return n;
- instrument_copy_to_user(to, from, n);
- check_object_size(from, n, true);
- return raw_copy_to_user_with_key(to, from, n, key);
-}
-#endif /* raw_copy_to_user_with_key */
-
#ifdef INLINE_COPY_FROM_USER
static inline __must_check unsigned long
_copy_from_user(void *to, const void __user *from, unsigned long n)
@@ -229,6 +201,81 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
return n;
}
+#ifndef raw_copy_from_user_key
+static __always_inline unsigned long __must_check
+raw_copy_from_user_key(void *to, const void __user *from, unsigned long n,
+ unsigned long key)
+{
+ return raw_copy_from_user(to, from, n);
+}
+#endif
+
+#ifdef INLINE_COPY_FROM_USER_KEY
+static inline __must_check unsigned long
+_copy_from_user_key(void *to, const void __user *from, unsigned long n,
+ unsigned long key)
+{
+ unsigned long res = n;
+ might_fault();
+ if (!should_fail_usercopy() && likely(access_ok(from, n))) {
+ instrument_copy_from_user(to, from, n);
+ res = raw_copy_from_user_key(to, from, n, key);
+ }
+ if (unlikely(res))
+ memset(to + (n - res), 0, res);
+ return res;
+}
+#else
+extern __must_check unsigned long
+_copy_from_user_key(void *, const void __user *, unsigned long, unsigned long);
+#endif
+
+#ifndef raw_copy_to_user
+static __always_inline unsigned long __must_check
+raw_copy_to_user_key(void __user *to, const void *from, unsigned long n,
+ unsigned long key)
+{
+ return raw_copy_to_user(to, from, n);
+}
+#endif
+
+#ifdef INLINE_COPY_TO_USER_KEY
+static inline __must_check unsigned long
+_copy_to_user_key(void __user *to, const void *from, unsigned long n,
+ unsigned long key)
+{
+ might_fault();
+ if (should_fail_usercopy())
+ return n;
+ if (access_ok(to, n)) {
+ instrument_copy_to_user(to, from, n);
+ n = raw_copy_to_user_key(to, from, n, key);
+ }
+ return n;
+}
+#else
+extern __must_check unsigned long
+_copy_to_user_key(void __user *, const void *, unsigned long, unsigned long);
+#endif
+
+static __always_inline unsigned long __must_check
+copy_from_user_key(void *to, const void __user *from, unsigned long n,
+ unsigned long key)
+{
+ if (likely(check_copy_size(to, n, false)))
+ n = _copy_from_user(to, from, n);
+ return n;
+}
+
+static __always_inline unsigned long __must_check
+copy_to_user_key(void __user *to, const void *from, unsigned long n,
+ unsigned long key)
+{
+ if (likely(check_copy_size(from, n, true)))
+ n = _copy_to_user(to, from, n);
+ return n;
+}
+
#ifndef copy_mc_to_kernel
/*
* Without arch opt-in this generic copy_mc_to_kernel() will not handle
diff --git a/lib/usercopy.c b/lib/usercopy.c
index 7413dd300516..c13394d0f306 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -37,6 +37,39 @@ unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
EXPORT_SYMBOL(_copy_to_user);
#endif
+#ifndef INLINE_COPY_FROM_USER_KEY
+unsigned long _copy_from_user_key(void *to, const void __user *from,
+ unsigned long n, unsigned long key)
+{
+ unsigned long res = n;
+ might_fault();
+ if (!should_fail_usercopy() && likely(access_ok(from, n))) {
+ instrument_copy_from_user(to, from, n);
+ res = raw_copy_from_user_key(to, from, n, key);
+ }
+ if (unlikely(res))
+ memset(to + (n - res), 0, res);
+ return res;
+}
+EXPORT_SYMBOL(_copy_from_user_key);
+#endif
+
+#ifndef INLINE_COPY_TO_USER_KEY
+unsigned long _copy_to_user_key(void __user *to, const void *from,
+ unsigned long n, unsigned long key)
+{
+ might_fault();
+ if (should_fail_usercopy())
+ return n;
+ if (likely(access_ok(to, n))) {
+ instrument_copy_to_user(to, from, n);
+ n = raw_copy_to_user_key(to, from, n, key);
+ }
+ return n;
+}
+EXPORT_SYMBOL(_copy_to_user_key);
+#endif
+
/**
* check_zeroed_user: check if a userspace buffer only contains zero bytes
* @from: Source address, in userspace.
On 1/25/22 13:00, Thomas Huth wrote:
> On 20/01/2022 13.23, Janis Schoetterl-Glausch wrote:
>> On 1/20/22 11:38, Thomas Huth wrote:
>>> On 18/01/2022 10.52, Janis Schoetterl-Glausch wrote:
>>>> Channel I/O honors storage keys and is performed on absolute memory.
>>>> For I/O emulation user space therefore needs to be able to do key
>>>> checked accesses.
>>>> The vm IOCTL supports read/write accesses, as well as checking
>>>> if an access would succeed.
>>> ...
>>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>>> index e3f450b2f346..dd04170287fd 100644
>>>> --- a/include/uapi/linux/kvm.h
>>>> +++ b/include/uapi/linux/kvm.h
>>>> @@ -572,6 +572,8 @@ struct kvm_s390_mem_op {
>>>> #define KVM_S390_MEMOP_LOGICAL_WRITE 1
>>>> #define KVM_S390_MEMOP_SIDA_READ 2
>>>> #define KVM_S390_MEMOP_SIDA_WRITE 3
>>>> +#define KVM_S390_MEMOP_ABSOLUTE_READ 4
>>>> +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
>>>
>>> Not quite sure about this - maybe it is, but at least I'd like to see this discussed: Do we really want to re-use the same ioctl layout for both, the VM and the VCPU file handles? Where the userspace developer has to know that the *_ABSOLUTE_* ops only work with VM handles, and the others only work with the VCPU handles? A CPU can also address absolute memory, so why not adding the *_ABSOLUTE_* ops there, too? And if we'd do that, wouldn't it be sufficient to have the VCPU ioctls only - or do you want to call these ioctls from spots in QEMU where you do not have a VCPU handle available? (I/O instructions are triggered from a CPU, so I'd assume that you should have a VCPU handle around?)
>>
>> There are some differences between the vm and the vcpu memops.
>> No storage or fetch protection overrides apply to IO/vm memops, after all there is no control register to enable them.
>> Additionally, quiescing is not required for IO, tho in practice we use the same code path for the vcpu and the vm here.
>> Allowing absolute accesses with a vcpu is doable, but I'm not sure what the use case for it would be, I'm not aware of
>> a precedence in the architecture. Of course the vcpu memop already supports logical=real accesses.
>
> Ok. Maybe it then would be better to call new ioctl and the new op defines differently, to avoid confusion? E.g. call it "vmmemop" and use:
>
> #define KVM_S390_VMMEMOP_ABSOLUTE_READ 1
> #define KVM_S390_VMMEMOP_ABSOLUTE_WRITE 2
>
> ?
>
> Thomas
>
Thanks for the suggestion, I had to think about it for a while :). Here are my thoughts:
The ioctl type (vm/vcpu) and the operations cannot be completely orthogonal (vm + logical cannot work),
but with regards to the absolute operations they could be. We don't have a use case for that
right now and the semantics are a bit unclear, so I think we should choose a design now that
leaves us space for future extension. If we need to, we can add a NON_QUIESCING flag backwards compatibly
(tho it seems a rather unlikely requirement to me), that would behave the same for vm/vcpu memops.
We could also have a NO_PROT_OVERRIDE flag, which the vm memop would ignore.
Whether override is possible is dependent on the vcpu state, so user space leaves the exact behavior to KVM anyway.
If you wanted to enforce that protection override occurs, you would have to adjust
the vcpu state and therefore there should be no confusion about whether to use a vcpu or vm ioctl.
So I'm inclined to have one ioctl code and keep the operations as they are.
I moved the key to the union. One question that remains is whether to enforce that reserved bytes must be 0.
In general I think that it is a good idea, since it leaves a bigger design space for future extensions.
However the vcpu memop has not done that. I think it should be enforced for new functionality (operations, flags),
any objections?
I'll try to be thorough in documenting the currently supported behavior.
On Thu, 27 Jan 2022 17:29:44 +0100
Janis Schoetterl-Glausch <[email protected]> wrote:
> On 1/25/22 13:00, Thomas Huth wrote:
> > On 20/01/2022 13.23, Janis Schoetterl-Glausch wrote:
> >> On 1/20/22 11:38, Thomas Huth wrote:
> >>> On 18/01/2022 10.52, Janis Schoetterl-Glausch wrote:
> >>>> Channel I/O honors storage keys and is performed on absolute memory.
> >>>> For I/O emulation user space therefore needs to be able to do key
> >>>> checked accesses.
> >>>> The vm IOCTL supports read/write accesses, as well as checking
> >>>> if an access would succeed.
> >>> ...
> >>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> >>>> index e3f450b2f346..dd04170287fd 100644
> >>>> --- a/include/uapi/linux/kvm.h
> >>>> +++ b/include/uapi/linux/kvm.h
> >>>> @@ -572,6 +572,8 @@ struct kvm_s390_mem_op {
> >>>> #define KVM_S390_MEMOP_LOGICAL_WRITE 1
> >>>> #define KVM_S390_MEMOP_SIDA_READ 2
> >>>> #define KVM_S390_MEMOP_SIDA_WRITE 3
> >>>> +#define KVM_S390_MEMOP_ABSOLUTE_READ 4
> >>>> +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
> >>>
> >>> Not quite sure about this - maybe it is, but at least I'd like to see this discussed: Do we really want to re-use the same ioctl layout for both, the VM and the VCPU file handles? Where the userspace developer has to know that the *_ABSOLUTE_* ops only work with VM handles, and the others only work with the VCPU handles? A CPU can also address absolute memory, so why not adding the *_ABSOLUTE_* ops there, too? And if we'd do that, wouldn't it be sufficient to have the VCPU ioctls only - or do you want to call these ioctls from spots in QEMU where you do not have a VCPU handle available? (I/O instructions are triggered from a CPU, so I'd assume that you should have a VCPU handle around?)
> >>
> >> There are some differences between the vm and the vcpu memops.
> >> No storage or fetch protection overrides apply to IO/vm memops, after all there is no control register to enable them.
> >> Additionally, quiescing is not required for IO, tho in practice we use the same code path for the vcpu and the vm here.
> >> Allowing absolute accesses with a vcpu is doable, but I'm not sure what the use case for it would be, I'm not aware of
> >> a precedence in the architecture. Of course the vcpu memop already supports logical=real accesses.
> >
> > Ok. Maybe it then would be better to call new ioctl and the new op defines differently, to avoid confusion? E.g. call it "vmmemop" and use:
> >
> > #define KVM_S390_VMMEMOP_ABSOLUTE_READ 1
> > #define KVM_S390_VMMEMOP_ABSOLUTE_WRITE 2
> >
> > ?
> >
> > Thomas
> >
>
> Thanks for the suggestion, I had to think about it for a while :). Here are my thoughts:
> The ioctl type (vm/vcpu) and the operations cannot be completely orthogonal (vm + logical cannot work),
> but with regards to the absolute operations they could be. We don't have a use case for that
> right now and the semantics are a bit unclear, so I think we should choose a design now that
> leaves us space for future extension. If we need to, we can add a NON_QUIESCING flag backwards compatibly
> (tho it seems a rather unlikely requirement to me), that would behave the same for vm/vcpu memops.
> We could also have a NO_PROT_OVERRIDE flag, which the vm memop would ignore.
> Whether override is possible is dependent on the vcpu state, so user space leaves the exact behavior to KVM anyway.
> If you wanted to enforce that protection override occurs, you would have to adjust
> the vcpu state and therefore there should be no confusion about whether to use a vcpu or vm ioctl.
>
> So I'm inclined to have one ioctl code and keep the operations as they are.
> I moved the key to the union. One question that remains is whether to enforce that reserved bytes must be 0.
> In general I think that it is a good idea, since it leaves a bigger design space for future extensions.
> However the vcpu memop has not done that. I think it should be enforced for new functionality (operations, flags),
I agree with enforcing that unused bits should be 0
> any objections?
>
> I'll try to be thorough in documenting the currently supported behavior.
this is also a good idea :)