2024-03-22 10:49:22

by Gautam Menghani

[permalink] [raw]
Subject: [PATCH v3] arch/powerpc/kvm: Add support for reading VPA counters for pseries guests

PAPR hypervisor has introduced three new counters in the VPA area of
LPAR CPUs for KVM L2 guest (see [1] for terminology) observability - 2
for context switches from host to guest and vice versa, and 1 counter
for getting the total time spent inside the KVM guest. Add a tracepoint
that enables reading the counters for use by ftrace/perf. Note that this
tracepoint is only available for nestedv2 API (i.e, KVM on PowerVM).

Also maintain an aggregation of the context switch times in vcpu->arch.
This will be useful in getting the aggregate times with a pmu driver
which will be upstreamed in the near future.

[1] Terminology:
a. L1 refers to the VM (LPAR) booted on top of PAPR hypervisor
b. L2 refers to the KVM guest booted on top of L1.

Signed-off-by: Vaibhav Jain <[email protected]>
Signed-off-by: Gautam Menghani <[email protected]>
---
v1 -> v2:
1. Fix the build error due to invalid struct member reference.

v2 -> v3:
1. Move the counter disabling and zeroing code to a different function.
2. Move the get_lppaca() inside the tracepoint_enabled() branch.
3. Add the aggregation logic to maintain total context switch time.

arch/powerpc/include/asm/kvm_host.h | 5 +++++
arch/powerpc/include/asm/lppaca.h | 11 +++++++---
arch/powerpc/kvm/book3s_hv.c | 33 +++++++++++++++++++++++++++++
arch/powerpc/kvm/trace_hv.h | 25 ++++++++++++++++++++++
4 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 8abac5321..d953b32dd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -847,6 +847,11 @@ struct kvm_vcpu_arch {
gpa_t nested_io_gpr;
/* For nested APIv2 guests*/
struct kvmhv_nestedv2_io nestedv2_io;
+
+ /* Aggregate context switch and guest run time info (in ns) */
+ u64 l1_to_l2_cs_agg;
+ u64 l2_to_l1_cs_agg;
+ u64 l2_runtime_agg;
#endif

#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 61ec2447d..bda6b86b9 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -62,7 +62,8 @@ struct lppaca {
u8 donate_dedicated_cpu; /* Donate dedicated CPU cycles */
u8 fpregs_in_use;
u8 pmcregs_in_use;
- u8 reserved8[28];
+ u8 l2_accumul_cntrs_enable; /* Enable usage of counters for KVM guest */
+ u8 reserved8[27];
__be64 wait_state_cycles; /* Wait cycles for this proc */
u8 reserved9[28];
__be16 slb_count; /* # of SLBs to maintain */
@@ -92,9 +93,13 @@ struct lppaca {
/* cacheline 4-5 */

__be32 page_ins; /* CMO Hint - # page ins by OS */
- u8 reserved12[148];
+ u8 reserved12[28];
+ volatile __be64 l1_to_l2_cs_tb;
+ volatile __be64 l2_to_l1_cs_tb;
+ volatile __be64 l2_runtime_tb;
+ u8 reserved13[96];
volatile __be64 dtl_idx; /* Dispatch Trace Log head index */
- u8 reserved13[96];
+ u8 reserved14[96];
} ____cacheline_aligned;

#define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8e86eb577..5a0bcb57e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4108,6 +4108,30 @@ static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
}
}

+static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu)
+{
+ struct lppaca *lp = get_lppaca();
+ u64 l1_to_l2_ns, l2_to_l1_ns, l2_runtime_ns;
+
+ if (!lp->l2_accumul_cntrs_enable)
+ return;
+
+ l1_to_l2_ns = tb_to_ns(be64_to_cpu(lp->l1_to_l2_cs_tb));
+ l2_to_l1_ns = tb_to_ns(be64_to_cpu(lp->l2_to_l1_cs_tb));
+ l2_runtime_ns = tb_to_ns(be64_to_cpu(lp->l2_runtime_tb));
+ trace_kvmppc_vcpu_exit_cs_time(vcpu, l1_to_l2_ns, l2_to_l1_ns,
+ l2_runtime_ns);
+ lp->l1_to_l2_cs_tb = 0;
+ lp->l2_to_l1_cs_tb = 0;
+ lp->l2_runtime_tb = 0;
+ lp->l2_accumul_cntrs_enable = 0;
+
+ // Maintain an aggregate of context switch times
+ vcpu->arch.l1_to_l2_cs_agg += l1_to_l2_ns;
+ vcpu->arch.l2_to_l1_cs_agg += l2_to_l1_ns;
+ vcpu->arch.l2_runtime_agg += l2_runtime_ns;
+}
+
static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr, u64 *tb)
{
@@ -4130,6 +4154,11 @@ static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
kvmppc_gse_put_u64(io->vcpu_run_input, KVMPPC_GSID_LPCR, lpcr);

accumulate_time(vcpu, &vcpu->arch.in_guest);
+
+ /* Enable the guest host context switch time tracking */
+ if (unlikely(trace_kvmppc_vcpu_exit_cs_time_enabled()))
+ get_lppaca()->l2_accumul_cntrs_enable = 1;
+
rc = plpar_guest_run_vcpu(0, vcpu->kvm->arch.lpid, vcpu->vcpu_id,
&trap, &i);

@@ -4156,6 +4185,10 @@ static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,

timer_rearm_host_dec(*tb);

+ /* Record context switch and guest_run_time data */
+ if (unlikely(trace_kvmppc_vcpu_exit_cs_time_enabled()))
+ do_trace_nested_cs_time(vcpu);
+
return trap;
}

diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index 8d57c8428..ab19977c9 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -491,6 +491,31 @@ TRACE_EVENT(kvmppc_run_vcpu_enter,
TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid)
);

+TRACE_EVENT(kvmppc_vcpu_exit_cs_time,
+ TP_PROTO(struct kvm_vcpu *vcpu, u64 l1_to_l2_cs, u64 l2_to_l1_cs,
+ u64 l2_runtime),
+
+ TP_ARGS(vcpu, l1_to_l2_cs, l2_to_l1_cs, l2_runtime),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(__u64, l1_to_l2_cs_ns)
+ __field(__u64, l2_to_l1_cs_ns)
+ __field(__u64, l2_runtime_ns)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->l1_to_l2_cs_ns = l1_to_l2_cs;
+ __entry->l2_to_l1_cs_ns = l2_to_l1_cs;
+ __entry->l2_runtime_ns = l2_runtime;
+ ),
+
+ TP_printk("VCPU %d: l1_to_l2_cs_time=%llu-ns l2_to_l1_cs_time=%llu-ns l2_runtime=%llu-ns",
+ __entry->vcpu_id, __entry->l1_to_l2_cs_ns,
+ __entry->l2_to_l1_cs_ns, __entry->l2_runtime_ns)
+);
+
TRACE_EVENT(kvmppc_run_vcpu_exit,
TP_PROTO(struct kvm_vcpu *vcpu),

--
2.43.2



2024-03-25 12:56:31

by Gautam Menghani

[permalink] [raw]
Subject: Re: [PATCH v3] arch/powerpc/kvm: Add support for reading VPA counters for pseries guests

On Fri, Mar 22, 2024 at 03:41:32PM +0530, Gautam Menghani wrote:
> PAPR hypervisor has introduced three new counters in the VPA area of
> LPAR CPUs for KVM L2 guest (see [1] for terminology) observability - 2
> for context switches from host to guest and vice versa, and 1 counter
> for getting the total time spent inside the KVM guest. Add a tracepoint
> that enables reading the counters for use by ftrace/perf. Note that this
> tracepoint is only available for nestedv2 API (i.e, KVM on PowerVM).
>
> Also maintain an aggregation of the context switch times in vcpu->arch.
> This will be useful in getting the aggregate times with a pmu driver
> which will be upstreamed in the near future.
>
> [1] Terminology:
> a. L1 refers to the VM (LPAR) booted on top of PAPR hypervisor
> b. L2 refers to the KVM guest booted on top of L1.
>
> Signed-off-by: Vaibhav Jain <[email protected]>
> Signed-off-by: Gautam Menghani <[email protected]>
> ---
> v1 -> v2:
> 1. Fix the build error due to invalid struct member reference.
>
> v2 -> v3:
> 1. Move the counter disabling and zeroing code to a different function.
> 2. Move the get_lppaca() inside the tracepoint_enabled() branch.
> 3. Add the aggregation logic to maintain total context switch time.
>
> arch/powerpc/include/asm/kvm_host.h | 5 +++++
> arch/powerpc/include/asm/lppaca.h | 11 +++++++---
> arch/powerpc/kvm/book3s_hv.c | 33 +++++++++++++++++++++++++++++
> arch/powerpc/kvm/trace_hv.h | 25 ++++++++++++++++++++++
> 4 files changed, 71 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 8abac5321..d953b32dd 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -847,6 +847,11 @@ struct kvm_vcpu_arch {
> gpa_t nested_io_gpr;
> /* For nested APIv2 guests*/
> struct kvmhv_nestedv2_io nestedv2_io;
> +
> + /* Aggregate context switch and guest run time info (in ns) */
> + u64 l1_to_l2_cs_agg;
> + u64 l2_to_l1_cs_agg;
> + u64 l2_runtime_agg;
> #endif
>
> #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
> diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
> index 61ec2447d..bda6b86b9 100644
> --- a/arch/powerpc/include/asm/lppaca.h
> +++ b/arch/powerpc/include/asm/lppaca.h
> @@ -62,7 +62,8 @@ struct lppaca {
> u8 donate_dedicated_cpu; /* Donate dedicated CPU cycles */
> u8 fpregs_in_use;
> u8 pmcregs_in_use;
> - u8 reserved8[28];
> + u8 l2_accumul_cntrs_enable; /* Enable usage of counters for KVM guest */
> + u8 reserved8[27];
> __be64 wait_state_cycles; /* Wait cycles for this proc */
> u8 reserved9[28];
> __be16 slb_count; /* # of SLBs to maintain */
> @@ -92,9 +93,13 @@ struct lppaca {
> /* cacheline 4-5 */
>
> __be32 page_ins; /* CMO Hint - # page ins by OS */
> - u8 reserved12[148];
> + u8 reserved12[28];
> + volatile __be64 l1_to_l2_cs_tb;
> + volatile __be64 l2_to_l1_cs_tb;
> + volatile __be64 l2_runtime_tb;
> + u8 reserved13[96];
> volatile __be64 dtl_idx; /* Dispatch Trace Log head index */
> - u8 reserved13[96];
> + u8 reserved14[96];
> } ____cacheline_aligned;
>
> #define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr)
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 8e86eb577..5a0bcb57e 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -4108,6 +4108,30 @@ static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
> }
> }
>
> +static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu)
> +{
> + struct lppaca *lp = get_lppaca();
> + u64 l1_to_l2_ns, l2_to_l1_ns, l2_runtime_ns;
> +
> + if (!lp->l2_accumul_cntrs_enable)
> + return;
> +
> + l1_to_l2_ns = tb_to_ns(be64_to_cpu(lp->l1_to_l2_cs_tb));
> + l2_to_l1_ns = tb_to_ns(be64_to_cpu(lp->l2_to_l1_cs_tb));
> + l2_runtime_ns = tb_to_ns(be64_to_cpu(lp->l2_runtime_tb));
> + trace_kvmppc_vcpu_exit_cs_time(vcpu, l1_to_l2_ns, l2_to_l1_ns,
> + l2_runtime_ns);
> + lp->l1_to_l2_cs_tb = 0;
> + lp->l2_to_l1_cs_tb = 0;
> + lp->l2_runtime_tb = 0;
> + lp->l2_accumul_cntrs_enable = 0;
> +
> + // Maintain an aggregate of context switch times
> + vcpu->arch.l1_to_l2_cs_agg += l1_to_l2_ns;
> + vcpu->arch.l2_to_l1_cs_agg += l2_to_l1_ns;
> + vcpu->arch.l2_runtime_agg += l2_runtime_ns;
> +}
> +
> static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
> unsigned long lpcr, u64 *tb)
> {
> @@ -4130,6 +4154,11 @@ static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
> kvmppc_gse_put_u64(io->vcpu_run_input, KVMPPC_GSID_LPCR, lpcr);
>
> accumulate_time(vcpu, &vcpu->arch.in_guest);
> +
> + /* Enable the guest host context switch time tracking */
> + if (unlikely(trace_kvmppc_vcpu_exit_cs_time_enabled()))
> + get_lppaca()->l2_accumul_cntrs_enable = 1;
> +
> rc = plpar_guest_run_vcpu(0, vcpu->kvm->arch.lpid, vcpu->vcpu_id,
> &trap, &i);
>
> @@ -4156,6 +4185,10 @@ static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
>
> timer_rearm_host_dec(*tb);
>
> + /* Record context switch and guest_run_time data */
> + if (unlikely(trace_kvmppc_vcpu_exit_cs_time_enabled()))
> + do_trace_nested_cs_time(vcpu);
> +

There is an issue with this part - when we enable the
tracepoint, run the vcpu, but disable the tracepoint before vcpu exit,
this condition will not be hit and we will continue accumulating the
context switch times in the VPA. I'll send a v4 where I check for the
VPA flag. I'll also incorporate any other changes, if required.

Thanks,
Gautam