2020-11-24 11:07:33

by Ravi Bangoria

[permalink] [raw]
Subject: [PATCH v2 0/4] KVM: PPC: Power10 2nd DAWR enablement

Enable p10 2nd DAWR feature for Book3S kvm guest. DAWR is a hypervisor
resource and thus H_SET_MODE hcall is used to set/unset it. A new case
H_SET_MODE_RESOURCE_SET_DAWR1 is introduced in H_SET_MODE hcall for
setting/unsetting 2nd DAWR. Also, new capability KVM_CAP_PPC_DAWR1 has
been added to query 2nd DAWR support via kvm ioctl.

This feature also needs to be enabled in Qemu to really use it. I'll
post Qemu patches once kvm patches get accepted.

v1: https://lore.kernel.org/r/[email protected]

v1->v2:
- patch #1: New patch
- patch #2: Don't rename KVM_REG_PPC_DAWR, it's an uapi macro
- patch #3: Increment HV_GUEST_STATE_VERSION
- Split kvm and selftests patches into different series
- Patches rebased to paulus/kvm-ppc-next (cf59eb13e151) + few
other watchpoint patches which are yet to be merged in
paulus/kvm-ppc-next.

Ravi Bangoria (4):
KVM: PPC: Allow nested guest creation when L0 hv_guest_state > L1
KVM: PPC: Rename current DAWR macros and variables
KVM: PPC: Add infrastructure to support 2nd DAWR
KVM: PPC: Introduce new capability for 2nd DAWR

Documentation/virt/kvm/api.rst | 2 +
arch/powerpc/include/asm/hvcall.h | 25 ++++++++-
arch/powerpc/include/asm/kvm_host.h | 6 +-
arch/powerpc/include/uapi/asm/kvm.h | 2 +
arch/powerpc/kernel/asm-offsets.c | 6 +-
arch/powerpc/kvm/book3s_hv.c | 65 ++++++++++++++++++----
arch/powerpc/kvm/book3s_hv_nested.c | 68 ++++++++++++++++++-----
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 43 ++++++++++----
arch/powerpc/kvm/powerpc.c | 3 +
include/uapi/linux/kvm.h | 1 +
tools/arch/powerpc/include/uapi/asm/kvm.h | 2 +
11 files changed, 181 insertions(+), 42 deletions(-)

--
2.26.2


2020-11-25 02:02:27

by Ravi Bangoria

[permalink] [raw]
Subject: [PATCH v2 2/4] KVM: PPC: Rename current DAWR macros and variables

Power10 is introducing second DAWR. Use real register names (with
suffix 0) from ISA for current macros and variables used by kvm.
One exception is KVM_REG_PPC_DAWR. Keep it as it is because it's
uapi so changing it will break userspace.

Signed-off-by: Ravi Bangoria <[email protected]>
---
arch/powerpc/include/asm/kvm_host.h | 4 ++--
arch/powerpc/kernel/asm-offsets.c | 4 ++--
arch/powerpc/kvm/book3s_hv.c | 24 ++++++++++++------------
arch/powerpc/kvm/book3s_hv_nested.c | 8 ++++----
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 20 ++++++++++----------
5 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d67a470e95a3..62cadf1a596e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -584,8 +584,8 @@ struct kvm_vcpu_arch {
u32 ctrl;
u32 dabrx;
ulong dabr;
- ulong dawr;
- ulong dawrx;
+ ulong dawr0;
+ ulong dawrx0;
ulong ciabr;
ulong cfar;
ulong ppr;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8711c2164b45..e4256f5b4602 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -547,8 +547,8 @@ int main(void)
OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl);
OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr);
OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
- OFFSET(VCPU_DAWR, kvm_vcpu, arch.dawr);
- OFFSET(VCPU_DAWRX, kvm_vcpu, arch.dawrx);
+ OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
+ OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 490a0f6a7285..d5c6efc8a76e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -782,8 +782,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
return H_UNSUPPORTED_FLAG_START;
if (value2 & DABRX_HYP)
return H_P4;
- vcpu->arch.dawr = value1;
- vcpu->arch.dawrx = value2;
+ vcpu->arch.dawr0 = value1;
+ vcpu->arch.dawrx0 = value2;
return H_SUCCESS;
case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
/* KVM does not support mflags=2 (AIL=2) */
@@ -1747,10 +1747,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, vcpu->arch.vcore->vtb);
break;
case KVM_REG_PPC_DAWR:
- *val = get_reg_val(id, vcpu->arch.dawr);
+ *val = get_reg_val(id, vcpu->arch.dawr0);
break;
case KVM_REG_PPC_DAWRX:
- *val = get_reg_val(id, vcpu->arch.dawrx);
+ *val = get_reg_val(id, vcpu->arch.dawrx0);
break;
case KVM_REG_PPC_CIABR:
*val = get_reg_val(id, vcpu->arch.ciabr);
@@ -1979,10 +1979,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
vcpu->arch.vcore->vtb = set_reg_val(id, *val);
break;
case KVM_REG_PPC_DAWR:
- vcpu->arch.dawr = set_reg_val(id, *val);
+ vcpu->arch.dawr0 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_DAWRX:
- vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
+ vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP;
break;
case KVM_REG_PPC_CIABR:
vcpu->arch.ciabr = set_reg_val(id, *val);
@@ -3437,8 +3437,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
int trap;
unsigned long host_hfscr = mfspr(SPRN_HFSCR);
unsigned long host_ciabr = mfspr(SPRN_CIABR);
- unsigned long host_dawr = mfspr(SPRN_DAWR0);
- unsigned long host_dawrx = mfspr(SPRN_DAWRX0);
+ unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
+ unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
unsigned long host_psscr = mfspr(SPRN_PSSCR);
unsigned long host_pidr = mfspr(SPRN_PID);

@@ -3477,8 +3477,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_SPURR, vcpu->arch.spurr);

if (dawr_enabled()) {
- mtspr(SPRN_DAWR0, vcpu->arch.dawr);
- mtspr(SPRN_DAWRX0, vcpu->arch.dawrx);
+ mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+ mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
}
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
mtspr(SPRN_IC, vcpu->arch.ic);
@@ -3530,8 +3530,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, host_hfscr);
mtspr(SPRN_CIABR, host_ciabr);
- mtspr(SPRN_DAWR0, host_dawr);
- mtspr(SPRN_DAWRX0, host_dawrx);
+ mtspr(SPRN_DAWR0, host_dawr0);
+ mtspr(SPRN_DAWRX0, host_dawrx0);
mtspr(SPRN_PID, host_pidr);

/*
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 2b433c3bacea..1d4b335485d0 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -33,8 +33,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
hr->dpdes = vc->dpdes;
hr->hfscr = vcpu->arch.hfscr;
hr->tb_offset = vc->tb_offset;
- hr->dawr0 = vcpu->arch.dawr;
- hr->dawrx0 = vcpu->arch.dawrx;
+ hr->dawr0 = vcpu->arch.dawr0;
+ hr->dawrx0 = vcpu->arch.dawrx0;
hr->ciabr = vcpu->arch.ciabr;
hr->purr = vcpu->arch.purr;
hr->spurr = vcpu->arch.spurr;
@@ -151,8 +151,8 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
vc->pcr = hr->pcr | PCR_MASK;
vc->dpdes = hr->dpdes;
vcpu->arch.hfscr = hr->hfscr;
- vcpu->arch.dawr = hr->dawr0;
- vcpu->arch.dawrx = hr->dawrx0;
+ vcpu->arch.dawr0 = hr->dawr0;
+ vcpu->arch.dawrx0 = hr->dawrx0;
vcpu->arch.ciabr = hr->ciabr;
vcpu->arch.purr = hr->purr;
vcpu->arch.spurr = hr->spurr;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 799d6d0f4ead..8e3fb393a980 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -52,8 +52,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_PID (SFS-32)
#define STACK_SLOT_IAMR (SFS-40)
#define STACK_SLOT_CIABR (SFS-48)
-#define STACK_SLOT_DAWR (SFS-56)
-#define STACK_SLOT_DAWRX (SFS-64)
+#define STACK_SLOT_DAWR0 (SFS-56)
+#define STACK_SLOT_DAWRX0 (SFS-64)
#define STACK_SLOT_HFSCR (SFS-72)
#define STACK_SLOT_AMR (SFS-80)
#define STACK_SLOT_UAMOR (SFS-88)
@@ -711,8 +711,8 @@ BEGIN_FTR_SECTION
mfspr r7, SPRN_DAWRX0
mfspr r8, SPRN_IAMR
std r5, STACK_SLOT_CIABR(r1)
- std r6, STACK_SLOT_DAWR(r1)
- std r7, STACK_SLOT_DAWRX(r1)
+ std r6, STACK_SLOT_DAWR0(r1)
+ std r7, STACK_SLOT_DAWRX0(r1)
std r8, STACK_SLOT_IAMR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)

@@ -801,8 +801,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
lbz r5, 0(r5)
cmpdi r5, 0
beq 1f
- ld r5, VCPU_DAWR(r4)
- ld r6, VCPU_DAWRX(r4)
+ ld r5, VCPU_DAWR0(r4)
+ ld r6, VCPU_DAWRX0(r4)
mtspr SPRN_DAWR0, r5
mtspr SPRN_DAWRX0, r6
1:
@@ -1759,8 +1759,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/* Restore host values of some registers */
BEGIN_FTR_SECTION
ld r5, STACK_SLOT_CIABR(r1)
- ld r6, STACK_SLOT_DAWR(r1)
- ld r7, STACK_SLOT_DAWRX(r1)
+ ld r6, STACK_SLOT_DAWR0(r1)
+ ld r7, STACK_SLOT_DAWRX0(r1)
mtspr SPRN_CIABR, r5
/*
* If the DAWR doesn't work, it's ok to write these here as
@@ -2566,8 +2566,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
rlwimi r5, r4, 2, DAWRX_WT
clrrdi r4, r4, 3
- std r4, VCPU_DAWR(r3)
- std r5, VCPU_DAWRX(r3)
+ std r4, VCPU_DAWR0(r3)
+ std r5, VCPU_DAWRX0(r3)
/*
* If came in through the real mode hcall handler then it is necessary
* to write the registers since the return path won't. Otherwise it is
--
2.26.2

2020-11-25 02:02:32

by Ravi Bangoria

[permalink] [raw]
Subject: [PATCH v2 1/4] KVM: PPC: Allow nested guest creation when L0 hv_guest_state > L1

On powerpc, L1 hypervisor takes help of L0 using H_ENTER_NESTED
hcall to load L2 guest state in cpu. L1 hypervisor prepares the
L2 state in struct hv_guest_state and passes a pointer to it via
hcall. Using that pointer, L0 reads/writes that state directly
from/to L1 memory. Thus L0 must be aware of hv_guest_state layout
of L1. Currently it uses version field to achieve this. i.e. If
L0 hv_guest_state.version != L1 hv_guest_state.version, L0 won't
allow nested kvm guest.

This restriction can be loosen up a bit. L0 can be taught to
understand older layout of hv_guest_state, if we restrict the
new member to be added only at the end. i.e. we can allow
nested guest even when L0 hv_guest_state.version > L1
hv_guest_state.version. Though, the other way around is not
possible.

Signed-off-by: Ravi Bangoria <[email protected]>
---
arch/powerpc/include/asm/hvcall.h | 17 +++++++--
arch/powerpc/kvm/book3s_hv_nested.c | 53 ++++++++++++++++++++++++-----
2 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index fbb377055471..a7073fddb657 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -524,9 +524,12 @@ struct h_cpu_char_result {
u64 behaviour;
};

-/* Register state for entering a nested guest with H_ENTER_NESTED */
+/*
+ * Register state for entering a nested guest with H_ENTER_NESTED.
+ * New member must be added at the end.
+ */
struct hv_guest_state {
- u64 version; /* version of this structure layout */
+ u64 version; /* version of this structure layout, must be first */
u32 lpid;
u32 vcpu_token;
/* These registers are hypervisor privileged (at least for writing) */
@@ -560,6 +563,16 @@ struct hv_guest_state {
/* Latest version of hv_guest_state structure */
#define HV_GUEST_STATE_VERSION 1

+static inline int hv_guest_state_size(unsigned int version)
+{
+ switch (version) {
+ case 1:
+ return offsetofend(struct hv_guest_state, ppr);
+ default:
+ return -1;
+ }
+}
+
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 33b58549a9aa..2b433c3bacea 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -215,6 +215,45 @@ static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr)
}
}

+static int kvmhv_read_guest_state_and_regs(struct kvm_vcpu *vcpu,
+ struct hv_guest_state *l2_hv,
+ struct pt_regs *l2_regs,
+ u64 hv_ptr, u64 regs_ptr)
+{
+ int size;
+
+ if (kvm_vcpu_read_guest(vcpu, hv_ptr, &(l2_hv->version),
+ sizeof(l2_hv->version)))
+ return -1;
+
+ if (kvmppc_need_byteswap(vcpu))
+ l2_hv->version = swab64(l2_hv->version);
+
+ size = hv_guest_state_size(l2_hv->version);
+ if (size < 0)
+ return -1;
+
+ return kvm_vcpu_read_guest(vcpu, hv_ptr, l2_hv, size) ||
+ kvm_vcpu_read_guest(vcpu, regs_ptr, l2_regs,
+ sizeof(struct pt_regs));
+}
+
+static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu,
+ struct hv_guest_state *l2_hv,
+ struct pt_regs *l2_regs,
+ u64 hv_ptr, u64 regs_ptr)
+{
+ int size;
+
+ size = hv_guest_state_size(l2_hv->version);
+ if (size < 0)
+ return -1;
+
+ return kvm_vcpu_write_guest(vcpu, hv_ptr, l2_hv, size) ||
+ kvm_vcpu_write_guest(vcpu, regs_ptr, l2_regs,
+ sizeof(struct pt_regs));
+}
+
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
{
long int err, r;
@@ -235,17 +274,15 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
hv_ptr = kvmppc_get_gpr(vcpu, 4);
regs_ptr = kvmppc_get_gpr(vcpu, 5);
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
- sizeof(struct hv_guest_state)) ||
- kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
- sizeof(struct pt_regs));
+ err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
+ hv_ptr, regs_ptr);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (err)
return H_PARAMETER;

if (kvmppc_need_byteswap(vcpu))
byteswap_hv_regs(&l2_hv);
- if (l2_hv.version != HV_GUEST_STATE_VERSION)
+ if (l2_hv.version > HV_GUEST_STATE_VERSION)
return H_P2;

if (kvmppc_need_byteswap(vcpu))
@@ -325,10 +362,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
byteswap_pt_regs(&l2_regs);
}
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
- sizeof(struct hv_guest_state)) ||
- kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
- sizeof(struct pt_regs));
+ err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
+ hv_ptr, regs_ptr);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (err)
return H_AUTHORITY;
--
2.26.2

2020-11-25 02:02:33

by Ravi Bangoria

[permalink] [raw]
Subject: [PATCH v2 3/4] KVM: PPC: Add infrastructure to support 2nd DAWR

kvm code assumes single DAWR everywhere. Add code to support 2nd DAWR.
DAWR is a hypervisor resource and thus H_SET_MODE hcall is used to set/
unset it. Introduce new case H_SET_MODE_RESOURCE_SET_DAWR1 for 2nd DAWR.
Also, kvm will support 2nd DAWR only if CPU_FTR_DAWR1 is set.

Signed-off-by: Ravi Bangoria <[email protected]>
---
Documentation/virt/kvm/api.rst | 2 ++
arch/powerpc/include/asm/hvcall.h | 8 ++++-
arch/powerpc/include/asm/kvm_host.h | 2 ++
arch/powerpc/include/uapi/asm/kvm.h | 2 ++
arch/powerpc/kernel/asm-offsets.c | 2 ++
arch/powerpc/kvm/book3s_hv.c | 41 +++++++++++++++++++++++
arch/powerpc/kvm/book3s_hv_nested.c | 7 ++++
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 23 +++++++++++++
tools/arch/powerpc/include/uapi/asm/kvm.h | 2 ++
9 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index eb3a1316f03e..72c98735aa52 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -2249,6 +2249,8 @@ registers, find a list below:
PPC KVM_REG_PPC_PSSCR 64
PPC KVM_REG_PPC_DEC_EXPIRY 64
PPC KVM_REG_PPC_PTCR 64
+ PPC KVM_REG_PPC_DAWR1 64
+ PPC KVM_REG_PPC_DAWRX1 64
PPC KVM_REG_PPC_TM_GPR0 64
...
PPC KVM_REG_PPC_TM_GPR31 64
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a7073fddb657..4bacd27a348b 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -558,16 +558,22 @@ struct hv_guest_state {
u64 pidr;
u64 cfar;
u64 ppr;
+ /* Version 1 ends here */
+ u64 dawr1;
+ u64 dawrx1;
+ /* Version 2 ends here */
};

/* Latest version of hv_guest_state structure */
-#define HV_GUEST_STATE_VERSION 1
+#define HV_GUEST_STATE_VERSION 2

static inline int hv_guest_state_size(unsigned int version)
{
switch (version) {
case 1:
return offsetofend(struct hv_guest_state, ppr);
+ case 2:
+ return offsetofend(struct hv_guest_state, dawrx1);
default:
return -1;
}
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 62cadf1a596e..9804afdf8578 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -586,6 +586,8 @@ struct kvm_vcpu_arch {
ulong dabr;
ulong dawr0;
ulong dawrx0;
+ ulong dawr1;
+ ulong dawrx1;
ulong ciabr;
ulong cfar;
ulong ppr;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index c3af3f324c5a..9f18fa090f1f 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
+#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)

/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index e4256f5b4602..26d4fa8fe51e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -549,6 +549,8 @@ int main(void)
OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
+ OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1);
+ OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1);
OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d5c6efc8a76e..2ff645789e9e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -785,6 +785,20 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
vcpu->arch.dawr0 = value1;
vcpu->arch.dawrx0 = value2;
return H_SUCCESS;
+ case H_SET_MODE_RESOURCE_SET_DAWR1:
+ if (!kvmppc_power8_compatible(vcpu))
+ return H_P2;
+ if (!ppc_breakpoint_available())
+ return H_P2;
+ if (!cpu_has_feature(CPU_FTR_DAWR1))
+ return H_P2;
+ if (mflags)
+ return H_UNSUPPORTED_FLAG_START;
+ if (value2 & DABRX_HYP)
+ return H_P4;
+ vcpu->arch.dawr1 = value1;
+ vcpu->arch.dawrx1 = value2;
+ return H_SUCCESS;
case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
/* KVM does not support mflags=2 (AIL=2) */
if (mflags != 0 && mflags != 3)
@@ -1752,6 +1766,12 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_DAWRX:
*val = get_reg_val(id, vcpu->arch.dawrx0);
break;
+ case KVM_REG_PPC_DAWR1:
+ *val = get_reg_val(id, vcpu->arch.dawr1);
+ break;
+ case KVM_REG_PPC_DAWRX1:
+ *val = get_reg_val(id, vcpu->arch.dawrx1);
+ break;
case KVM_REG_PPC_CIABR:
*val = get_reg_val(id, vcpu->arch.ciabr);
break;
@@ -1984,6 +2004,12 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_DAWRX:
vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP;
break;
+ case KVM_REG_PPC_DAWR1:
+ vcpu->arch.dawr1 = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_DAWRX1:
+ vcpu->arch.dawrx1 = set_reg_val(id, *val) & ~DAWRX_HYP;
+ break;
case KVM_REG_PPC_CIABR:
vcpu->arch.ciabr = set_reg_val(id, *val);
/* Don't allow setting breakpoints in hypervisor code */
@@ -3441,6 +3467,13 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
unsigned long host_psscr = mfspr(SPRN_PSSCR);
unsigned long host_pidr = mfspr(SPRN_PID);
+ unsigned long host_dawr1 = 0;
+ unsigned long host_dawrx1 = 0;
+
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ host_dawr1 = mfspr(SPRN_DAWR1);
+ host_dawrx1 = mfspr(SPRN_DAWRX1);
+ }

/*
* P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0,
@@ -3479,6 +3512,10 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
if (dawr_enabled()) {
mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
+ mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
+ }
}
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
mtspr(SPRN_IC, vcpu->arch.ic);
@@ -3532,6 +3569,10 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_CIABR, host_ciabr);
mtspr(SPRN_DAWR0, host_dawr0);
mtspr(SPRN_DAWRX0, host_dawrx0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ mtspr(SPRN_DAWR1, host_dawr1);
+ mtspr(SPRN_DAWRX1, host_dawrx1);
+ }
mtspr(SPRN_PID, host_pidr);

/*
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 1d4b335485d0..aec86013f7de 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -49,6 +49,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
hr->pidr = vcpu->arch.pid;
hr->cfar = vcpu->arch.cfar;
hr->ppr = vcpu->arch.ppr;
+ hr->dawr1 = vcpu->arch.dawr1;
+ hr->dawrx1 = vcpu->arch.dawrx1;
}

static void byteswap_pt_regs(struct pt_regs *regs)
@@ -91,6 +93,8 @@ static void byteswap_hv_regs(struct hv_guest_state *hr)
hr->pidr = swab64(hr->pidr);
hr->cfar = swab64(hr->cfar);
hr->ppr = swab64(hr->ppr);
+ hr->dawr1 = swab64(hr->dawr1);
+ hr->dawrx1 = swab64(hr->dawrx1);
}

static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
@@ -138,6 +142,7 @@ static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)

/* Don't let data address watchpoint match in hypervisor state */
hr->dawrx0 &= ~DAWRX_HYP;
+ hr->dawrx1 &= ~DAWRX_HYP;

/* Don't let completed instruction address breakpt match in HV state */
if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
@@ -167,6 +172,8 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
vcpu->arch.pid = hr->pidr;
vcpu->arch.cfar = hr->cfar;
vcpu->arch.ppr = hr->ppr;
+ vcpu->arch.dawr1 = hr->dawr1;
+ vcpu->arch.dawrx1 = hr->dawrx1;
}

void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 8e3fb393a980..fe31c5338fdf 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -57,6 +57,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_HFSCR (SFS-72)
#define STACK_SLOT_AMR (SFS-80)
#define STACK_SLOT_UAMOR (SFS-88)
+#define STACK_SLOT_DAWR1 (SFS-96)
+#define STACK_SLOT_DAWRX1 (SFS-104)
/* the following is used by the P9 short path */
#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */

@@ -715,6 +717,12 @@ BEGIN_FTR_SECTION
std r7, STACK_SLOT_DAWRX0(r1)
std r8, STACK_SLOT_IAMR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
+ mfspr r6, SPRN_DAWR1
+ mfspr r7, SPRN_DAWRX1
+ std r6, STACK_SLOT_DAWR1(r1)
+ std r7, STACK_SLOT_DAWRX1(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)

mfspr r5, SPRN_AMR
std r5, STACK_SLOT_AMR(r1)
@@ -805,6 +813,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
ld r6, VCPU_DAWRX0(r4)
mtspr SPRN_DAWR0, r5
mtspr SPRN_DAWRX0, r6
+BEGIN_FTR_SECTION
+ ld r5, VCPU_DAWR1(r4)
+ ld r6, VCPU_DAWRX1(r4)
+ mtspr SPRN_DAWR1, r5
+ mtspr SPRN_DAWRX1, r6
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
1:
ld r7, VCPU_CIABR(r4)
ld r8, VCPU_TAR(r4)
@@ -1769,6 +1783,12 @@ BEGIN_FTR_SECTION
mtspr SPRN_DAWR0, r6
mtspr SPRN_DAWRX0, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
+ ld r6, STACK_SLOT_DAWR1(r1)
+ ld r7, STACK_SLOT_DAWRX1(r1)
+ mtspr SPRN_DAWR1, r6
+ mtspr SPRN_DAWRX1, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1)
ld r6, STACK_SLOT_PSSCR(r1)
@@ -3335,6 +3355,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_IAMR, r0
mtspr SPRN_CIABR, r0
mtspr SPRN_DAWRX0, r0
+BEGIN_FTR_SECTION
+ mtspr SPRN_DAWRX1, r0
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)

BEGIN_MMU_FTR_SECTION
b 4f
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index c3af3f324c5a..9f18fa090f1f 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
+#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)

/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
--
2.26.2

2020-12-09 15:15:39

by Fabiano Rosas

[permalink] [raw]
Subject: Re: [PATCH v2 1/4] KVM: PPC: Allow nested guest creation when L0 hv_guest_state > L1

Ravi Bangoria <[email protected]> writes:

> On powerpc, L1 hypervisor takes help of L0 using H_ENTER_NESTED
> hcall to load L2 guest state in cpu. L1 hypervisor prepares the
> L2 state in struct hv_guest_state and passes a pointer to it via
> hcall. Using that pointer, L0 reads/writes that state directly
> from/to L1 memory. Thus L0 must be aware of hv_guest_state layout
> of L1. Currently it uses version field to achieve this. i.e. If
> L0 hv_guest_state.version != L1 hv_guest_state.version, L0 won't
> allow nested kvm guest.
>
> This restriction can be loosen up a bit. L0 can be taught to
> understand older layout of hv_guest_state, if we restrict the
> new member to be added only at the end. i.e. we can allow
> nested guest even when L0 hv_guest_state.version > L1
> hv_guest_state.version. Though, the other way around is not
> possible.
>
> Signed-off-by: Ravi Bangoria <[email protected]>

Reviewed-by: Fabiano Rosas <[email protected]>

> ---
> arch/powerpc/include/asm/hvcall.h | 17 +++++++--
> arch/powerpc/kvm/book3s_hv_nested.c | 53 ++++++++++++++++++++++++-----
> 2 files changed, 59 insertions(+), 11 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
> index fbb377055471..a7073fddb657 100644
> --- a/arch/powerpc/include/asm/hvcall.h
> +++ b/arch/powerpc/include/asm/hvcall.h
> @@ -524,9 +524,12 @@ struct h_cpu_char_result {
> u64 behaviour;
> };
>
> -/* Register state for entering a nested guest with H_ENTER_NESTED */
> +/*
> + * Register state for entering a nested guest with H_ENTER_NESTED.
> + * New member must be added at the end.
> + */
> struct hv_guest_state {
> - u64 version; /* version of this structure layout */
> + u64 version; /* version of this structure layout, must be first */
> u32 lpid;
> u32 vcpu_token;
> /* These registers are hypervisor privileged (at least for writing) */
> @@ -560,6 +563,16 @@ struct hv_guest_state {
> /* Latest version of hv_guest_state structure */
> #define HV_GUEST_STATE_VERSION 1
>
> +static inline int hv_guest_state_size(unsigned int version)
> +{
> + switch (version) {
> + case 1:
> + return offsetofend(struct hv_guest_state, ppr);
> + default:
> + return -1;
> + }
> +}
> +
> #endif /* __ASSEMBLY__ */
> #endif /* __KERNEL__ */
> #endif /* _ASM_POWERPC_HVCALL_H */
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
> index 33b58549a9aa..2b433c3bacea 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -215,6 +215,45 @@ static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr)
> }
> }
>
> +static int kvmhv_read_guest_state_and_regs(struct kvm_vcpu *vcpu,
> + struct hv_guest_state *l2_hv,
> + struct pt_regs *l2_regs,
> + u64 hv_ptr, u64 regs_ptr)
> +{
> + int size;
> +
> + if (kvm_vcpu_read_guest(vcpu, hv_ptr, &(l2_hv->version),
> + sizeof(l2_hv->version)))
> + return -1;
> +
> + if (kvmppc_need_byteswap(vcpu))
> + l2_hv->version = swab64(l2_hv->version);
> +
> + size = hv_guest_state_size(l2_hv->version);
> + if (size < 0)
> + return -1;
> +
> + return kvm_vcpu_read_guest(vcpu, hv_ptr, l2_hv, size) ||
> + kvm_vcpu_read_guest(vcpu, regs_ptr, l2_regs,
> + sizeof(struct pt_regs));
> +}
> +
> +static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu,
> + struct hv_guest_state *l2_hv,
> + struct pt_regs *l2_regs,
> + u64 hv_ptr, u64 regs_ptr)
> +{
> + int size;
> +
> + size = hv_guest_state_size(l2_hv->version);
> + if (size < 0)
> + return -1;
> +
> + return kvm_vcpu_write_guest(vcpu, hv_ptr, l2_hv, size) ||
> + kvm_vcpu_write_guest(vcpu, regs_ptr, l2_regs,
> + sizeof(struct pt_regs));
> +}
> +
> long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
> {
> long int err, r;
> @@ -235,17 +274,15 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
> hv_ptr = kvmppc_get_gpr(vcpu, 4);
> regs_ptr = kvmppc_get_gpr(vcpu, 5);
> vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
> - err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
> - sizeof(struct hv_guest_state)) ||
> - kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
> - sizeof(struct pt_regs));
> + err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
> + hv_ptr, regs_ptr);
> srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
> if (err)
> return H_PARAMETER;
>
> if (kvmppc_need_byteswap(vcpu))
> byteswap_hv_regs(&l2_hv);
> - if (l2_hv.version != HV_GUEST_STATE_VERSION)
> + if (l2_hv.version > HV_GUEST_STATE_VERSION)
> return H_P2;
>
> if (kvmppc_need_byteswap(vcpu))
> @@ -325,10 +362,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
> byteswap_pt_regs(&l2_regs);
> }
> vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
> - err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
> - sizeof(struct hv_guest_state)) ||
> - kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
> - sizeof(struct pt_regs));
> + err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
> + hv_ptr, regs_ptr);
> srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
> if (err)
> return H_AUTHORITY;