2020-03-05 10:00:15

by Luwei Kang

[permalink] [raw]
Subject: [PATCH v1 05/11] KVM: x86/pmu: Add support to reprogram PEBS event for guest counters

From: Like Xu <[email protected]>

When the event precise level is non-zero, the performance counter
will be reprogramed for PEBS event and set PBES PMI bit in global_status
when the PEBS event is overflowed. Since KVM never knows the setting
of precise level in guest because it's a SW parameter, we force all PEBS
events to be precise level 1 for enough accuracy with a dedicated counter.

Originally-by: Andi Kleen <[email protected]>
Signed-off-by: Like Xu <[email protected]>
Co-developed-by: Kan Liang <[email protected]>
Signed-off-by: Kan Liang <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/pmu.c | 69 ++++++++++++++++++++++++++++++++++++++++-
arch/x86/kvm/vmx/pmu_intel.c | 1 +
3 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 98959e8..83abb49 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -478,6 +478,7 @@ struct kvm_pmu {
u64 global_ctrl_mask;
u64 global_ovf_ctrl_mask;
u64 reserved_bits;
+ u64 pebs_enable;
u8 version;
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index b4f9e97..b2bdacb 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -77,6 +77,11 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event,

if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
+
+ /* Indicate PEBS overflow to guest. */
+ if (perf_event->attr.precise_ip)
+ __set_bit(62, (unsigned long *)&pmu->global_status);
+
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);

/*
@@ -99,6 +104,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
bool exclude_kernel, bool intr,
bool in_tx, bool in_tx_cp)
{
+ struct kvm_pmu *pmu = vcpu_to_pmu(pmc->vcpu);
struct perf_event *event;
struct perf_event_attr attr = {
.type = type,
@@ -111,6 +117,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
.config = config,
.disabled = 1,
};
+ bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable);

attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);

@@ -126,8 +133,50 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
attr.config |= HSW_IN_TX_CHECKPOINTED;
}

+ if (pebs) {
+ /*
+ * Host never knows the precision level set by guest.
+ * Force Host's PEBS event to precision level 1, which will
+ * not impact the accuracy of the results for guest PEBS events.
+ * Because,
+ * - For most cases, there is no difference among precision
+ * level 1 to 3 for PEBS events.
+ * - The functions as below checks the precision level in host.
+ * But the results from these functions in host are replaced
+ * by guest when sampling the guest.
+ * The accuracy for guest PEBS events will not be impacted.
+ * -- event_constraints() impacts the index of counter.
+ * The index for host event is exactly the same as guest.
+ * It's decided by guest.
+ * -- pebs_update_adaptive_cfg() impacts the value of
+ * MSR_PEBS_DATA_CFG. When guest is switched in,
+ * the MSR value will be replaced by the value from guest.
+ * -- setup_sample () impacts the output of a PEBS record.
+ * Guest handles the PEBS records.
+ */
+ attr.precise_ip = 1;
+ /*
+ * When the host's PMI handler completes, it's going to
+ * enter the guest and trigger the guest's PMI handler.
+ *
+ * At this moment, this function may be called by
+ * kvm_pmu_handle_event(). However the next sample_period
+ * hasn't been determined by guest yet and the left period,
+ * which probably be 0, is used for current sample_period.
+ *
+ * In this case, perf will mistakenly treat it as non
+ * sampling events. The PEBS event will error out.
+ *
+ * Fill it with maximum period to prevent the error out.
+ * The guest PMI handler will soon reprogram the counter.
+ */
+ if (!attr.sample_period)
+ attr.sample_period = (-1ULL) & pmc_bitmask(pmc);
+ }
+
event = perf_event_create_kernel_counter(&attr, -1, current,
- intr ? kvm_perf_overflow_intr :
+ (intr || pebs) ?
+ kvm_perf_overflow_intr :
kvm_perf_overflow, pmc);
if (IS_ERR(event)) {
pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
@@ -135,6 +184,20 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
return;
}

+ if (pebs) {
+ event->guest_dedicated_idx = pmc->idx;
+ /*
+ * For guest PEBS events, guest takes the responsibility to
+ * drain PEBS buffers, and load proper values to reset counters.
+ *
+ * Host will unconditionally set auto-reload flag for PEBS
+ * events with fixed period which is not necessary. Host should
+ * do nothing in drain_pebs() but inject the PMI into the guest.
+ *
+ * Unset the auto-reload flag for guest PEBS events.
+ */
+ perf_x86_pmu_unset_auto_reload(event);
+ }
pmc->perf_event = event;
pmc_to_pmu(pmc)->event_count++;
perf_event_enable(event);
@@ -158,6 +221,10 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
if (!pmc->perf_event)
return false;

+ if (test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable)
+ != (!!pmc->perf_event->attr.precise_ip))
+ return false;
+
/* recalibrate sample period and check if it's accepted by perf core */
if (perf_event_period(pmc->perf_event,
(-pmc->counter) & pmc_bitmask(pmc)))
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index fd21cdb..ebadc33 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -293,6 +293,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->counter_bitmask[KVM_PMC_GP] = 0;
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
pmu->version = 0;
+ pmu->pebs_enable = 0;
pmu->reserved_bits = 0xffffffff00200000ull;

entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
--
1.8.3.1


2020-03-06 16:29:50

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v1 05/11] KVM: x86/pmu: Add support to reprogram PEBS event for guest counters

Hi Luwei,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on tip/perf/core tip/auto-latest v5.6-rc4 next-20200306]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url: https://github.com/0day-ci/linux/commits/Luwei-Kang/PEBS-virtualization-enabling-via-DS/20200306-013049
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: x86_64-randconfig-h003-20200305 (attached as .config)
compiler: gcc-7 (Debian 7.5.0-5) 7.5.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <[email protected]>

All errors (new ones prefixed by >>):

ld: arch/x86/kvm/pmu.o: in function `pmc_reprogram_counter':
>> arch/x86/kvm/pmu.c:199: undefined reference to `perf_x86_pmu_unset_auto_reload'

vim +199 arch/x86/kvm/pmu.c

101
102 static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
103 unsigned config, bool exclude_user,
104 bool exclude_kernel, bool intr,
105 bool in_tx, bool in_tx_cp)
106 {
107 struct kvm_pmu *pmu = vcpu_to_pmu(pmc->vcpu);
108 struct perf_event *event;
109 struct perf_event_attr attr = {
110 .type = type,
111 .size = sizeof(attr),
112 .pinned = true,
113 .exclude_idle = true,
114 .exclude_host = 1,
115 .exclude_user = exclude_user,
116 .exclude_kernel = exclude_kernel,
117 .config = config,
118 .disabled = 1,
119 };
120 bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable);
121
122 attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
123
124 if (in_tx)
125 attr.config |= HSW_IN_TX;
126 if (in_tx_cp) {
127 /*
128 * HSW_IN_TX_CHECKPOINTED is not supported with nonzero
129 * period. Just clear the sample period so at least
130 * allocating the counter doesn't fail.
131 */
132 attr.sample_period = 0;
133 attr.config |= HSW_IN_TX_CHECKPOINTED;
134 }
135
136 if (pebs) {
137 /*
138 * Host never knows the precision level set by guest.
139 * Force Host's PEBS event to precision level 1, which will
140 * not impact the accuracy of the results for guest PEBS events.
141 * Because,
142 * - For most cases, there is no difference among precision
143 * level 1 to 3 for PEBS events.
144 * - The functions as below checks the precision level in host.
145 * But the results from these functions in host are replaced
146 * by guest when sampling the guest.
147 * The accuracy for guest PEBS events will not be impacted.
148 * -- event_constraints() impacts the index of counter.
149 * The index for host event is exactly the same as guest.
150 * It's decided by guest.
151 * -- pebs_update_adaptive_cfg() impacts the value of
152 * MSR_PEBS_DATA_CFG. When guest is switched in,
153 * the MSR value will be replaced by the value from guest.
154 * -- setup_sample () impacts the output of a PEBS record.
155 * Guest handles the PEBS records.
156 */
157 attr.precise_ip = 1;
158 /*
159 * When the host's PMI handler completes, it's going to
160 * enter the guest and trigger the guest's PMI handler.
161 *
162 * At this moment, this function may be called by
163 * kvm_pmu_handle_event(). However the next sample_period
164 * hasn't been determined by guest yet and the left period,
165 * which probably be 0, is used for current sample_period.
166 *
167 * In this case, perf will mistakenly treat it as non
168 * sampling events. The PEBS event will error out.
169 *
170 * Fill it with maximum period to prevent the error out.
171 * The guest PMI handler will soon reprogram the counter.
172 */
173 if (!attr.sample_period)
174 attr.sample_period = (-1ULL) & pmc_bitmask(pmc);
175 }
176
177 event = perf_event_create_kernel_counter(&attr, -1, current,
178 (intr || pebs) ?
179 kvm_perf_overflow_intr :
180 kvm_perf_overflow, pmc);
181 if (IS_ERR(event)) {
182 pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
183 PTR_ERR(event), pmc->idx);
184 return;
185 }
186
187 if (pebs) {
188 event->guest_dedicated_idx = pmc->idx;
189 /*
190 * For guest PEBS events, guest takes the responsibility to
191 * drain PEBS buffers, and load proper values to reset counters.
192 *
193 * Host will unconditionally set auto-reload flag for PEBS
194 * events with fixed period which is not necessary. Host should
195 * do nothing in drain_pebs() but inject the PMI into the guest.
196 *
197 * Unset the auto-reload flag for guest PEBS events.
198 */
> 199 perf_x86_pmu_unset_auto_reload(event);
200 }
201 pmc->perf_event = event;
202 pmc_to_pmu(pmc)->event_count++;
203 perf_event_enable(event);
204 clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
205 }
206

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]


Attachments:
(No filename) (5.52 kB)
.config.gz (30.74 kB)
Download all attachments

2020-03-09 02:12:00

by Xu, Like

[permalink] [raw]
Subject: RE: [PATCH v1 05/11] KVM: x86/pmu: Add support to reprogram PEBS event for guest counters

> -----Original Message-----
> From: kbuild test robot <[email protected]>
> Sent: Saturday, March 7, 2020 12:28 AM
> To: Luwei Kang <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected]; [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected]
> Subject: Re: [PATCH v1 05/11] KVM: x86/pmu: Add support to reprogram PEBS
> event for guest counters
>
> Hi Luwei,
>
> Thank you for the patch! Yet something to improve:
>
> [auto build test ERROR on kvm/linux-next] [also build test ERROR on
> tip/perf/core tip/auto-latest v5.6-rc4 next-20200306] [if your patch is applied to
> the wrong git tree, please drop us a note to help improve the system. BTW, we
> also suggest to use '--base' option to specify the base tree in git format-patch,
> please see https://stackoverflow.com/a/37406982]
>
> url:
> https://github.com/0day-ci/linux/commits/Luwei-Kang/PEBS-virtualization-ena
> bling-via-DS/20200306-013049
> base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
> config: x86_64-randconfig-h003-20200305 (attached as .config)
> compiler: gcc-7 (Debian 7.5.0-5) 7.5.0
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=x86_64
>
> If you fix the issue, kindly add following tag
> Reported-by: kbuild test robot <[email protected]>
>
> All errors (new ones prefixed by >>):
>
> ld: arch/x86/kvm/pmu.o: in function `pmc_reprogram_counter':
> >> arch/x86/kvm/pmu.c:199: undefined reference to
> `perf_x86_pmu_unset_auto_reload'

Since we may not lose PEBS functionality for other x86 vendors on KVM
and we already have defined PERF_X86_EVENT_AUTO_RELOAD in the general
arch/x86/events/perf_event.h,

one of the ways to fix this issue is to
move the definition of perf_x86_pmu_unset_auto_reload()
to the end of arch/x86/events/core.c
instead of making it Intel specific
in previous patch "perf/x86: Expose a function to disable auto-reload."

Thanks,
Like Xu

>
> vim +199 arch/x86/kvm/pmu.c
>
> 101
> 102 static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
> 103 unsigned config, bool exclude_user,
> 104 bool exclude_kernel, bool intr,
> 105 bool in_tx, bool in_tx_cp)
> 106 {
> 107 struct kvm_pmu *pmu = vcpu_to_pmu(pmc->vcpu);
> 108 struct perf_event *event;
> 109 struct perf_event_attr attr = {
> 110 .type = type,
> 111 .size = sizeof(attr),
> 112 .pinned = true,
> 113 .exclude_idle = true,
> 114 .exclude_host = 1,
> 115 .exclude_user = exclude_user,
> 116 .exclude_kernel = exclude_kernel,
> 117 .config = config,
> 118 .disabled = 1,
> 119 };
> 120 bool pebs = test_bit(pmc->idx, (unsigned long
> *)&pmu->pebs_enable);
> 121
> 122 attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
> 123
> 124 if (in_tx)
> 125 attr.config |= HSW_IN_TX;
> 126 if (in_tx_cp) {
> 127 /*
> 128 * HSW_IN_TX_CHECKPOINTED is not supported with
> nonzero
> 129 * period. Just clear the sample period so at least
> 130 * allocating the counter doesn't fail.
> 131 */
> 132 attr.sample_period = 0;
> 133 attr.config |= HSW_IN_TX_CHECKPOINTED;
> 134 }
> 135
> 136 if (pebs) {
> 137 /*
> 138 * Host never knows the precision level set by guest.
> 139 * Force Host's PEBS event to precision level 1, which will
> 140 * not impact the accuracy of the results for guest PEBS
> events.
> 141 * Because,
> 142 * - For most cases, there is no difference among precision
> 143 * level 1 to 3 for PEBS events.
> 144 * - The functions as below checks the precision level in
> host.
> 145 * But the results from these functions in host are
> replaced
> 146 * by guest when sampling the guest.
> 147 * The accuracy for guest PEBS events will not be
> impacted.
> 148 * -- event_constraints() impacts the index of counter.
> 149 * The index for host event is exactly the same as guest.
> 150 * It's decided by guest.
> 151 * -- pebs_update_adaptive_cfg() impacts the value of
> 152 * MSR_PEBS_DATA_CFG. When guest is switched in,
> 153 * the MSR value will be replaced by the value from
> guest.
> 154 * -- setup_sample () impacts the output of a PEBS
> record.
> 155 * Guest handles the PEBS records.
> 156 */
> 157 attr.precise_ip = 1;
> 158 /*
> 159 * When the host's PMI handler completes, it's going to
> 160 * enter the guest and trigger the guest's PMI handler.
> 161 *
> 162 * At this moment, this function may be called by
> 163 * kvm_pmu_handle_event(). However the next
> sample_period
> 164 * hasn't been determined by guest yet and the left period,
> 165 * which probably be 0, is used for current sample_period.
> 166 *
> 167 * In this case, perf will mistakenly treat it as non
> 168 * sampling events. The PEBS event will error out.
> 169 *
> 170 * Fill it with maximum period to prevent the error out.
> 171 * The guest PMI handler will soon reprogram the counter.
> 172 */
> 173 if (!attr.sample_period)
> 174 attr.sample_period = (-1ULL) & pmc_bitmask(pmc);
> 175 }
> 176
> 177 event = perf_event_create_kernel_counter(&attr, -1, current,
> 178 (intr || pebs) ?
> 179 kvm_perf_overflow_intr :
> 180 kvm_perf_overflow, pmc);
> 181 if (IS_ERR(event)) {
> 182 pr_debug_ratelimited("kvm_pmu: event creation failed %ld
> for pmc->idx = %d\n",
> 183 PTR_ERR(event), pmc->idx);
> 184 return;
> 185 }
> 186
> 187 if (pebs) {
> 188 event->guest_dedicated_idx = pmc->idx;
> 189 /*
> 190 * For guest PEBS events, guest takes the responsibility to
> 191 * drain PEBS buffers, and load proper values to reset
> counters.
> 192 *
> 193 * Host will unconditionally set auto-reload flag for PEBS
> 194 * events with fixed period which is not necessary. Host
> should
> 195 * do nothing in drain_pebs() but inject the PMI into the
> guest.
> 196 *
> 197 * Unset the auto-reload flag for guest PEBS events.
> 198 */
> > 199 perf_x86_pmu_unset_auto_reload(event);
> 200 }
> 201 pmc->perf_event = event;
> 202 pmc_to_pmu(pmc)->event_count++;
> 203 perf_event_enable(event);
> 204 clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
> 205 }
> 206
>
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/[email protected]