Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752389AbdHBIPq (ORCPT ); Wed, 2 Aug 2017 04:15:46 -0400 Received: from mga14.intel.com ([192.55.52.115]:37489 "EHLO mga14.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751900AbdHBIPo (ORCPT ); Wed, 2 Aug 2017 04:15:44 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.41,310,1498546800"; d="scan'208";a="999810457" Subject: [PATCH v6 2/3]: perf/core: use context tstamp_data for skipped events on mux interrupt From: Alexey Budankov To: Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Alexander Shishkin Cc: Andi Kleen , Kan Liang , Dmitri Prokhorov , Valery Cherepennikov , Mark Rutland , Stephane Eranian , David Carrillo-Cisneros , linux-kernel References: <96c7776f-1f17-a39e-23e9-658596216d6b@linux.intel.com> Organization: Intel Corp. Message-ID: Date: Wed, 2 Aug 2017 11:15:39 +0300 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Thunderbird/52.2.1 MIME-Version: 1.0 In-Reply-To: <96c7776f-1f17-a39e-23e9-658596216d6b@linux.intel.com> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8584 Lines: 249 Event groups allocated for CPU's different from the one that handles multiplexing hrtimer interrupt may be skipped by interrupt handler however the events tstamp_enabled, tstamp_running and tstamp_stopped fields still need to be updated to have correct timings. To implement that tstamp_data object is introduced at the event context and the skipped events' tstamps pointers are switched between self and context tstamp_data objects. The context object timings are updated by update_context_time() on every multiplexing hrtimer interrupt so all events referencing the context object get its timings properly updated all at once. Event groups tstamps are switched to the context object and back to self object if they don't pass thru event_filter_match() on thread context switch in and out. Signed-off-by: Alexey Budankov --- include/linux/perf_event.h | 36 ++++++++++++++++++---------- kernel/events/core.c | 58 ++++++++++++++++++++++++++++------------------ 2 files changed, 60 insertions(+), 34 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 282f121..69d60f2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -550,6 +550,22 @@ struct pmu_event_list { struct list_head list; }; +struct perf_event_tstamp { + /* + * These are timestamps used for computing total_time_enabled + * and total_time_running when the event is in INACTIVE or + * ACTIVE state, measured in nanoseconds from an arbitrary point + * in time. + * enabled: the notional time when the event was enabled + * running: the notional time when the event was scheduled on + * stopped: in INACTIVE state, the notional time when the + * event was scheduled off. + */ + u64 enabled; + u64 running; + u64 stopped; +}; + /** * struct perf_event - performance event kernel representation: */ @@ -625,19 +641,11 @@ struct perf_event { u64 total_time_running; /* - * These are timestamps used for computing total_time_enabled - * and total_time_running when the event is in INACTIVE or - * ACTIVE state, measured in nanoseconds from an arbitrary point - * in time. - * tstamp_enabled: the notional time when the event was enabled - * tstamp_running: the notional time when the event was scheduled on - * tstamp_stopped: in INACTIVE state, the notional time when the - * event was scheduled off. + * tstamp points to the tstamp_data object below or to the object + * located at the event context; */ - u64 tstamp_enabled; - u64 tstamp_running; - u64 tstamp_stopped; - + struct perf_event_tstamp *tstamp; + struct perf_event_tstamp tstamp_data; /* * timestamp shadows the actual context timing but it can * be safely used in NMI interrupt context. It reflects the @@ -772,6 +780,10 @@ struct perf_event_context { */ u64 time; u64 timestamp; + /* + * Context cache for filtered out events; + */ + struct perf_event_tstamp tstamp_data; /* * These fields let us detect when two contexts have both diff --git a/kernel/events/core.c b/kernel/events/core.c index 0a4f619..5ccb8a2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -866,10 +866,10 @@ perf_cgroup_mark_enabled(struct perf_event *event, event->cgrp_defer_enabled = 0; - event->tstamp_enabled = tstamp - event->total_time_enabled; + event->tstamp->enabled = tstamp - event->total_time_enabled; list_for_each_entry(sub, &event->sibling_list, group_entry) { if (sub->state >= PERF_EVENT_STATE_INACTIVE) { - sub->tstamp_enabled = tstamp - sub->total_time_enabled; + sub->tstamp->enabled = tstamp - sub->total_time_enabled; sub->cgrp_defer_enabled = 0; } } @@ -1379,6 +1379,9 @@ static void update_context_time(struct perf_event_context *ctx) ctx->time += now - ctx->timestamp; ctx->timestamp = now; + + ctx->tstamp_data.running += ctx->time - ctx->tstamp_data.stopped; + ctx->tstamp_data.stopped = ctx->time; } static u64 perf_event_time(struct perf_event *event) @@ -1420,16 +1423,16 @@ static void update_event_times(struct perf_event *event) else if (ctx->is_active) run_end = ctx->time; else - run_end = event->tstamp_stopped; + run_end = event->tstamp->stopped; - event->total_time_enabled = run_end - event->tstamp_enabled; + event->total_time_enabled = run_end - event->tstamp->enabled; if (event->state == PERF_EVENT_STATE_INACTIVE) - run_end = event->tstamp_stopped; + run_end = event->tstamp->stopped; else run_end = perf_event_time(event); - event->total_time_running = run_end - event->tstamp_running; + event->total_time_running = run_end - event->tstamp->running; } @@ -1968,9 +1971,13 @@ event_sched_out(struct perf_event *event, */ if (event->state == PERF_EVENT_STATE_INACTIVE && !event_filter_match(event)) { - delta = tstamp - event->tstamp_stopped; - event->tstamp_running += delta; - event->tstamp_stopped = tstamp; + delta = tstamp - event->tstamp->stopped; + event->tstamp->running += delta; + event->tstamp->stopped = tstamp; + if (event->tstamp != &event->tstamp_data) { + event->tstamp_data = *event->tstamp; + event->tstamp = &event->tstamp_data; + } } if (event->state != PERF_EVENT_STATE_ACTIVE) @@ -1978,7 +1985,7 @@ event_sched_out(struct perf_event *event, perf_pmu_disable(event->pmu); - event->tstamp_stopped = tstamp; + event->tstamp->stopped = tstamp; event->pmu->del(event, 0); event->oncpu = -1; event->state = PERF_EVENT_STATE_INACTIVE; @@ -2269,7 +2276,7 @@ event_sched_in(struct perf_event *event, goto out; } - event->tstamp_running += tstamp - event->tstamp_stopped; + event->tstamp->running += tstamp - event->tstamp->stopped; if (!is_software_event(event)) cpuctx->active_oncpu++; @@ -2341,8 +2348,8 @@ group_sched_in(struct perf_event *group_event, simulate = true; if (simulate) { - event->tstamp_running += now - event->tstamp_stopped; - event->tstamp_stopped = now; + event->tstamp->running += now - event->tstamp->stopped; + event->tstamp->stopped = now; } else { event_sched_out(event, cpuctx, ctx); } @@ -2394,9 +2401,9 @@ static void add_event_to_ctx(struct perf_event *event, list_add_event(event, ctx); perf_group_attach(event); - event->tstamp_enabled = tstamp; - event->tstamp_running = tstamp; - event->tstamp_stopped = tstamp; + event->tstamp->enabled = tstamp; + event->tstamp->running = tstamp; + event->tstamp->stopped = tstamp; } static void ctx_sched_out(struct perf_event_context *ctx, @@ -2641,10 +2648,10 @@ static void __perf_event_mark_enabled(struct perf_event *event) u64 tstamp = perf_event_time(event); event->state = PERF_EVENT_STATE_INACTIVE; - event->tstamp_enabled = tstamp - event->total_time_enabled; + event->tstamp->enabled = tstamp - event->total_time_enabled; list_for_each_entry(sub, &event->sibling_list, group_entry) { if (sub->state >= PERF_EVENT_STATE_INACTIVE) - sub->tstamp_enabled = tstamp - sub->total_time_enabled; + sub->tstamp->enabled = tstamp - sub->total_time_enabled; } } @@ -3239,8 +3246,11 @@ ctx_pinned_sched_in(struct perf_event *event, void *data) if (event->state <= PERF_EVENT_STATE_OFF) return 0; - if (!event_filter_match(event)) + if (!event_filter_match(event)) { + if (event->tstamp != ¶ms->ctx->tstamp_data) + event->tstamp = ¶ms->ctx->tstamp_data; return 0; + } /* may need to reset tstamp_enabled */ if (is_cgroup_event(event)) @@ -3273,8 +3283,11 @@ ctx_flexible_sched_in(struct perf_event *event, void *data) * Listen to the 'cpu' scheduling filter constraint * of events: */ - if (!event_filter_match(event)) + if (!event_filter_match(event)) { + if (event->tstamp != ¶ms->ctx->tstamp_data) + event->tstamp = ¶ms->ctx->tstamp_data; return 0; + } /* may need to reset tstamp_enabled */ if (is_cgroup_event(event)) @@ -5042,8 +5055,8 @@ static void calc_timer_values(struct perf_event *event, *now = perf_clock(); ctx_time = event->shadow_ctx_time + *now; - *enabled = ctx_time - event->tstamp_enabled; - *running = ctx_time - event->tstamp_running; + *enabled = ctx_time - event->tstamp->enabled; + *running = ctx_time - event->tstamp->running; } static void perf_event_init_userpage(struct perf_event *event) @@ -9568,6 +9581,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, raw_spin_lock_init(&event->addr_filters.lock); atomic_long_set(&event->refcount, 1); + event->tstamp = &event->tstamp_data; event->cpu = cpu; event->attr = *attr; event->group_leader = group_leader;