Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756439AbbDOSK4 (ORCPT ); Wed, 15 Apr 2015 14:10:56 -0400 Received: from bombadil.infradead.org ([198.137.202.9]:55510 "EHLO bombadil.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753860AbbDOSKq (ORCPT ); Wed, 15 Apr 2015 14:10:46 -0400 Date: Wed, 15 Apr 2015 20:10:31 +0200 From: Peter Zijlstra To: "Liang, Kan" Cc: "linux-kernel@vger.kernel.org" , "mingo@kernel.org" , "acme@infradead.org" , "eranian@google.com" , "andi@firstfloor.org" Subject: Re: [PATCH V6 3/6] perf, x86: large PEBS interrupt threshold Message-ID: <20150415181031.GW21418@twins.programming.kicks-ass.net> References: <1428597466-8154-1-git-send-email-kan.liang@intel.com> <1428597466-8154-4-git-send-email-kan.liang@intel.com> <20150415171432.GW23123@twins.programming.kicks-ass.net> <37D7C6CF3E00A74B8858931C1DB2F077017C7149@SHSMSX103.ccr.corp.intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <37D7C6CF3E00A74B8858931C1DB2F077017C7149@SHSMSX103.ccr.corp.intel.com> User-Agent: Mutt/1.5.21 (2012-12-30) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6760 Lines: 182 On Wed, Apr 15, 2015 at 05:48:39PM +0000, Liang, Kan wrote: > > > +++ b/arch/x86/kernel/cpu/perf_event_intel.c > > > @@ -2306,7 +2306,9 @@ static int intel_pmu_hw_config(struct perf_event *event) > > > return ret; > > > > > > if (event->attr.precise_ip) { > > > - if (!event->attr.freq) > > > + /* only enable auto reload when fix period and large PEBS */ > > > + if (!event->attr.freq && > > > + !(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS)) > > > event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; > > > if (x86_pmu.pebs_aliases) > > > x86_pmu.pebs_aliases(event); > > > > I suspect you meant the above change right? > > Yes. > > > > > But this negates part of the benefit of the auto reload; where previously it > > saved an MSR write for pretty much all PEBS usage it now becomes a > > burden for pretty much everyone. > > > > Why cannot we retain the win for all PEBS users? > > The change tries to address your comments. > https://lkml.org/lkml/2015/3/30/294 > > Yes, we can retain the win. If so, I think we need to introduce another > flag like PERF_X86_EVENT_LARGE_PEBS and check it in pebs_is_enabled. > Or just keep the previous V5 patch unchanged. Right, something like so. --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -65,16 +65,17 @@ struct event_constraint { /* * struct hw_perf_event.flags flags */ -#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ -#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ -#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style datala, store */ -#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ -#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ -#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ -#define PERF_X86_EVENT_EXCL 0x40 /* HT exclusivity on counter */ -#define PERF_X86_EVENT_DYNAMIC 0x80 /* dynamic alloc'd constraint */ -#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */ -#define PERF_X86_EVENT_AUTO_RELOAD 0x80 /* use PEBS auto-reload */ +#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */ +#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */ +#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */ +#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */ +#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */ +#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */ +#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */ +#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */ +#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ +#define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */ +#define PERF_X86_EVENT_FREERUNNING 0x0400 /* use freerunning PEBS */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -87,6 +88,17 @@ struct amd_nb { #define MAX_PEBS_EVENTS 8 /* + * Flags PEBS can handle without an PMI. + * + * TID can only be handled by flushing at context switch. + */ +#define PEBS_FREERUNNING_FLAGS \ + (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_TRANSACTION) + +/* * A debug store configuration. * * We only support architectures that use 64bit fields. --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2306,8 +2306,12 @@ static int intel_pmu_hw_config(struct pe return ret; if (event->attr.precise_ip) { - if (!event->attr.freq) + if (!event->attr.freq) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; + + if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS)) + event->hw.flags |= PERF_X86_EVENT_FREERUNNING; + } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); } --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -250,7 +250,7 @@ static int alloc_pebs_buffer(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; int node = cpu_to_node(cpu); - int max, thresh = 1; /* always use a single PEBS record */ + int max; void *buffer, *ibuffer; if (!x86_pmu.pebs) @@ -277,11 +277,13 @@ static int alloc_pebs_buffer(int cpu) ds->pebs_buffer_base = (u64)(unsigned long)buffer; ds->pebs_index = ds->pebs_buffer_base; - ds->pebs_absolute_maximum = ds->pebs_buffer_base + - max * x86_pmu.pebs_record_size; + ds->pebs_absolute_maximum = + ds->pebs_buffer_base + max * x86_pmu.pebs_record_size; - ds->pebs_interrupt_threshold = ds->pebs_buffer_base + - thresh * x86_pmu.pebs_record_size; + if (x86_pmu.intel_cap.pebs_format < 1) { + ds->pebs_interrupt_threshold = + ds->pebs_buffer_base + x86_pmu.pebs_record_size; + } return 0; } @@ -684,14 +686,22 @@ struct event_constraint *intel_pebs_cons return &emptyconstraint; } +static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc) +{ + return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1)); +} + void intel_pmu_pebs_enable(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; struct debug_store *ds = cpuc->ds; + bool first_pebs; + u64 threshold; hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; + first_pebs = !pebs_is_enabled(cpuc); cpuc->pebs_enabled |= 1ULL << hwc->idx; if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) @@ -699,11 +709,26 @@ void intel_pmu_pebs_enable(struct perf_e else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) cpuc->pebs_enabled |= 1ULL << 63; + /* + * When the event is constrained enough we can use a larger + * threshold and run the event with less frequent PMI. + */ + if (0 && /* disable this temporarily */ + (hwc->flags & PERF_X86_EVENT_FREERUNNING)) { + threshold = ds->pebs_absolute_maximum - + x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; + } else { + threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; + } + /* Use auto-reload if possible to save a MSR write in the PMI */ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { ds->pebs_event_reset[hwc->idx] = (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; } + + if (first_pebs || ds->pebs_interrupt_threshold > threshold) + ds->pebs_interrupt_threshold = threshold; } void intel_pmu_pebs_disable(struct perf_event *event) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/