Avoid the POPCNT by noting we can decrement the weight for each
cleared bit.
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/events/intel/core.c | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2838,7 +2838,7 @@ intel_get_excl_constraints(struct cpu_hw
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct intel_excl_states *xlo;
int tid = cpuc->excl_thread_id;
- int is_excl, i;
+ int is_excl, i, w;
/*
* validating a group does not require
@@ -2894,36 +2894,40 @@ intel_get_excl_constraints(struct cpu_hw
* SHARED : sibling counter measuring non-exclusive event
* UNUSED : sibling counter unused
*/
+ w = c->weight;
for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
/*
* exclusive event in sibling counter
* our corresponding counter cannot be used
* regardless of our event
*/
- if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+ if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) {
__clear_bit(i, c->idxmsk);
+ w--;
+ continue;
+ }
/*
* if measuring an exclusive event, sibling
* measuring non-exclusive, then counter cannot
* be used
*/
- if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+ if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) {
__clear_bit(i, c->idxmsk);
+ w--;
+ continue;
+ }
}
/*
- * recompute actual bit weight for scheduling algorithm
- */
- c->weight = hweight64(c->idxmsk64);
-
- /*
* if we return an empty mask, then switch
* back to static empty constraint to avoid
* the cost of freeing later on
*/
- if (c->weight == 0)
+ if (!w)
c = &emptyconstraint;
+ c->weight = w;
+
return c;
}
On Thu, Mar 14, 2019 at 6:11 AM Peter Zijlstra <[email protected]> wrote:
>
> Avoid the POPCNT by noting we can decrement the weight for each
> cleared bit.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Looks good to me.
Reviewed-by: Stephane Eranian <[email protected]>
> ---
> arch/x86/events/intel/core.c | 22 +++++++++++++---------
> 1 file changed, 13 insertions(+), 9 deletions(-)
>
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -2838,7 +2838,7 @@ intel_get_excl_constraints(struct cpu_hw
> struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
> struct intel_excl_states *xlo;
> int tid = cpuc->excl_thread_id;
> - int is_excl, i;
> + int is_excl, i, w;
>
> /*
> * validating a group does not require
> @@ -2894,36 +2894,40 @@ intel_get_excl_constraints(struct cpu_hw
> * SHARED : sibling counter measuring non-exclusive event
> * UNUSED : sibling counter unused
> */
> + w = c->weight;
> for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
> /*
> * exclusive event in sibling counter
> * our corresponding counter cannot be used
> * regardless of our event
> */
> - if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
> + if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) {
> __clear_bit(i, c->idxmsk);
> + w--;
> + continue;
> + }
> /*
> * if measuring an exclusive event, sibling
> * measuring non-exclusive, then counter cannot
> * be used
> */
> - if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
> + if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) {
> __clear_bit(i, c->idxmsk);
> + w--;
> + continue;
> + }
> }
>
> /*
> - * recompute actual bit weight for scheduling algorithm
> - */
> - c->weight = hweight64(c->idxmsk64);
> -
> - /*
> * if we return an empty mask, then switch
> * back to static empty constraint to avoid
> * the cost of freeing later on
> */
> - if (c->weight == 0)
> + if (!w)
> c = &emptyconstraint;
>
> + c->weight = w;
> +
> return c;
> }
>
>
>