The flag PERF_X86_EVENT_COMMITTED is used to find uncommitted events
for which to call put_event_constraint() when scheduling fails.
These are the newly added events to the list, and must form, per
definition, the tail of cpuc->event_list[]. By computing the list
index of the last successfull schedule, then iteration can start there
and the flag is redundant.
There are only 3 callers of x86_schedule_events(), notably:
- x86_pmu_add()
- x86_pmu_commit_txn()
- validate_group()
For x86_pmu_add(), cpuc->n_events isn't updated until after
schedule_events() succeeds, therefore cpuc->n_events points to the
desired index.
For x86_pmu_commit_txn(), cpuc->n_events is updated, but we can
trivially compute the desired value with cpuc->n_txn -- the number of
events added in this transaction.
For validate_group(), we can make the rule for x86_pmu_add() work by
simply setting cpuc->n_events to 0 before calling schedule_events().
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/events/core.c | 21 ++++++---------------
arch/x86/events/perf_event.h | 1 -
2 files changed, 6 insertions(+), 16 deletions(-)
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -925,19 +925,16 @@ int x86_schedule_events(struct cpu_hw_ev
if (!unsched && assign) {
for (i = 0; i < n; i++) {
e = cpuc->event_list[i];
- e->hw.flags |= PERF_X86_EVENT_COMMITTED;
if (x86_pmu.commit_scheduling)
x86_pmu.commit_scheduling(cpuc, i, assign[i]);
}
} else {
- for (i = 0; i < n; i++) {
+ i = cpuc->n_events;
+ if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
+ i -= cpuc->n_txn;
+
+ for (; i < n; i++) {
e = cpuc->event_list[i];
- /*
- * do not put_constraint() on comitted events,
- * because they are good to go
- */
- if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
- continue;
/*
* release events that failed scheduling
@@ -1372,11 +1369,6 @@ static void x86_pmu_del(struct perf_even
int i;
/*
- * event is descheduled
- */
- event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
-
- /*
* If we're called during a txn, we only need to undo x86_pmu.add.
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
@@ -2079,8 +2071,7 @@ static int validate_group(struct perf_ev
if (n < 0)
goto out;
- fake_cpuc->n_events = n;
-
+ fake_cpuc->n_events = 0;
ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
out:
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -61,7 +61,6 @@ struct event_constraint {
#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
-#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */
#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */
#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */
#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
On Thu, Mar 14, 2019 at 6:11 AM Peter Zijlstra <[email protected]> wrote:
>
> The flag PERF_X86_EVENT_COMMITTED is used to find uncommitted events
> for which to call put_event_constraint() when scheduling fails.
>
> These are the newly added events to the list, and must form, per
> definition, the tail of cpuc->event_list[]. By computing the list
> index of the last successfull schedule, then iteration can start there
> and the flag is redundant.
>
> There are only 3 callers of x86_schedule_events(), notably:
>
> - x86_pmu_add()
> - x86_pmu_commit_txn()
> - validate_group()
>
> For x86_pmu_add(), cpuc->n_events isn't updated until after
> schedule_events() succeeds, therefore cpuc->n_events points to the
> desired index.
>
Correct.
> For x86_pmu_commit_txn(), cpuc->n_events is updated, but we can
> trivially compute the desired value with cpuc->n_txn -- the number of
> events added in this transaction.
>
I suggest you put this explanation in the code so that it is easier to
understand.
> For validate_group(), we can make the rule for x86_pmu_add() work by
> simply setting cpuc->n_events to 0 before calling schedule_events().
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Stephane Eranian <[email protected]>
> ---
> arch/x86/events/core.c | 21 ++++++---------------
> arch/x86/events/perf_event.h | 1 -
> 2 files changed, 6 insertions(+), 16 deletions(-)
>
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -925,19 +925,16 @@ int x86_schedule_events(struct cpu_hw_ev
> if (!unsched && assign) {
> for (i = 0; i < n; i++) {
> e = cpuc->event_list[i];
> - e->hw.flags |= PERF_X86_EVENT_COMMITTED;
> if (x86_pmu.commit_scheduling)
> x86_pmu.commit_scheduling(cpuc, i, assign[i]);
> }
> } else {
> - for (i = 0; i < n; i++) {
> + i = cpuc->n_events;
> + if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
> + i -= cpuc->n_txn;
> +
> + for (; i < n; i++) {
> e = cpuc->event_list[i];
> - /*
> - * do not put_constraint() on comitted events,
> - * because they are good to go
> - */
> - if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
> - continue;
>
> /*
> * release events that failed scheduling
> @@ -1372,11 +1369,6 @@ static void x86_pmu_del(struct perf_even
> int i;
>
> /*
> - * event is descheduled
> - */
> - event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
> -
> - /*
> * If we're called during a txn, we only need to undo x86_pmu.add.
> * The events never got scheduled and ->cancel_txn will truncate
> * the event_list.
> @@ -2079,8 +2071,7 @@ static int validate_group(struct perf_ev
> if (n < 0)
> goto out;
>
> - fake_cpuc->n_events = n;
> -
> + fake_cpuc->n_events = 0;
> ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
>
> out:
> --- a/arch/x86/events/perf_event.h
> +++ b/arch/x86/events/perf_event.h
> @@ -61,7 +61,6 @@ struct event_constraint {
> #define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
> #define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
> #define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
> -#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */
I would put a placeholder saying that bit 3 is available or renumbered
the other masks below
> #define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */
> #define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */
> #define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
>
>
On Tue, Mar 19, 2019 at 01:48:18PM -0700, Stephane Eranian wrote:
> On Thu, Mar 14, 2019 at 6:11 AM Peter Zijlstra <[email protected]> wrote:
>
> > For x86_pmu_commit_txn(), cpuc->n_events is updated, but we can
> > trivially compute the desired value with cpuc->n_txn -- the number of
> > events added in this transaction.
> >
> I suggest you put this explanation in the code so that it is easier to
> understand.
Right, I actually attempted writing that comment a few times but every
time it became a mess. I'll try again, because you're quite right, this
is a bit magical.
> > - for (i = 0; i < n; i++) {
> > + i = cpuc->n_events;
> > + if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
> > + i -= cpuc->n_txn;
> > +
> > + for (; i < n; i++) {
Subject: perf/x86: Remove PERF_X86_EVENT_COMMITTED
From: Peter Zijlstra <[email protected]>
Date: Thu Mar 14 12:58:52 CET 2019
The flag PERF_X86_EVENT_COMMITTED is used to find uncommitted events
for which to call put_event_constraint() when scheduling fails.
These are the newly added events to the list, and must form, per
definition, the tail of cpuc->event_list[]. By computing the list
index of the last successfull schedule, then iteration can start there
and the flag is redundant.
There are only 3 callers of x86_schedule_events(), notably:
- x86_pmu_add()
- x86_pmu_commit_txn()
- validate_group()
For x86_pmu_add(), cpuc->n_events isn't updated until after
schedule_events() succeeds, therefore cpuc->n_events points to the
desired index.
For x86_pmu_commit_txn(), cpuc->n_events is updated, but we can
trivially compute the desired value with cpuc->n_txn -- the number of
events added in this transaction.
For validate_group(), we can make the rule for x86_pmu_add() work by
simply setting cpuc->n_events to 0 before calling schedule_events().
Reviewed-by: Stephane Eranian <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/events/core.c | 28 +++++++++++++---------------
arch/x86/events/perf_event.h | 19 +++++++++----------
2 files changed, 22 insertions(+), 25 deletions(-)
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -925,19 +925,23 @@ int x86_schedule_events(struct cpu_hw_ev
if (!unsched && assign) {
for (i = 0; i < n; i++) {
e = cpuc->event_list[i];
- e->hw.flags |= PERF_X86_EVENT_COMMITTED;
if (x86_pmu.commit_scheduling)
x86_pmu.commit_scheduling(cpuc, i, assign[i]);
}
} else {
- for (i = 0; i < n; i++) {
+ /*
+ * In a transaction cpuc->n_events is already updated, but we
+ * can use cpuc->n_txn know how many new events there are.
+ *
+ * Outside of a transaction, cpuc->n_events is not yet updated,
+ * and indicates how many events how many events are scheduled.
+ */
+ i = cpuc->n_events;
+ if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
+ i -= cpuc->n_txn;
+
+ for (; i < n; i++) {
e = cpuc->event_list[i];
- /*
- * do not put_constraint() on comitted events,
- * because they are good to go
- */
- if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
- continue;
/*
* release events that failed scheduling
@@ -1372,11 +1376,6 @@ static void x86_pmu_del(struct perf_even
int i;
/*
- * event is descheduled
- */
- event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
-
- /*
* If we're called during a txn, we only need to undo x86_pmu.add.
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
@@ -2079,8 +2078,7 @@ static int validate_group(struct perf_ev
if (n < 0)
goto out;
- fake_cpuc->n_events = n;
-
+ fake_cpuc->n_events = 0;
ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
out:
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -55,22 +55,21 @@ struct event_constraint {
int overlap;
int flags;
};
+
/*
* struct hw_perf_event.flags flags
*/
#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
-#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */
-#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
-#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
-#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
-#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_LARGE_PEBS 0x0800 /* use large PEBS */
-
+#define PERF_X86_EVENT_PEBS_LD_HSW 0x0008 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW 0x0010 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL 0x0020 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC 0x0040 /* dynamic alloc'd constraint */
+#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0080 /* grant rdpmc permission */
+#define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */
struct amd_nb {
int nb_id; /* NorthBridge id */
On Tue, Mar 19, 2019 at 10:00:18PM +0100, Peter Zijlstra wrote:
> On Tue, Mar 19, 2019 at 01:48:18PM -0700, Stephane Eranian wrote:
> > On Thu, Mar 14, 2019 at 6:11 AM Peter Zijlstra <[email protected]> wrote:
> >
> > > For x86_pmu_commit_txn(), cpuc->n_events is updated, but we can
> > > trivially compute the desired value with cpuc->n_txn -- the number of
> > > events added in this transaction.
> > >
> > I suggest you put this explanation in the code so that it is easier to
> > understand.
>
> Right, I actually attempted writing that comment a few times but every
> time it became a mess. I'll try again, because you're quite right, this
> is a bit magical.
Ha! I found the comment, it's in patch 8 :-)