Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758545AbZCaOAL (ORCPT ); Tue, 31 Mar 2009 10:00:11 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1757213AbZCaN74 (ORCPT ); Tue, 31 Mar 2009 09:59:56 -0400 Received: from viefep16-int.chello.at ([62.179.121.36]:12174 "EHLO viefep16-int.chello.at" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756114AbZCaN7z (ORCPT ); Tue, 31 Mar 2009 09:59:55 -0400 X-SourceIP: 213.93.53.227 Subject: Re: [PATCH 13/15] perf_counter: provide generic callchain bits From: Peter Zijlstra To: Paul Mackerras Cc: Corey Ashford , Ingo Molnar , linux-kernel@vger.kernel.org In-Reply-To: <18897.56973.324304.995540@cargo.ozlabs.ibm.com> References: <20090330170701.856843742@chello.nl> <20090330171024.254266860@chello.nl> <18897.46177.528910.51044@cargo.ozlabs.ibm.com> <1238481552.28248.1384.camel@twins> <49D1C544.7020403@linux.vnet.ibm.com> <18897.56973.324304.995540@cargo.ozlabs.ibm.com> Content-Type: text/plain Date: Tue, 31 Mar 2009 16:00:32 +0200 Message-Id: <1238508032.8530.278.camel@twins> Mime-Version: 1.0 X-Mailer: Evolution 2.26.0 Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8273 Lines: 281 On Tue, 2009-03-31 at 20:12 +1100, Paul Mackerras wrote: > Corey Ashford writes: > > > If this is the case, what is the exact meaning of PERF_COUNTER_SIMPLE > > now? and PERF_COUNTER_GROUP? One simplification would be that reading > > the fd of a group leader would always read up all of the counters in the > > group (along with their enabled and running times if those bits are > > set), and that reading a single counter's fd would yield only that > > counter's values and times (if enabled). In effect, these two values, > > PERF_COUNTER_GROUP and PERF_COUNTER_SIMPLE would no longer be necessary > > at all. Other bits would be used to determine what's in the mmap'd samples. > > Now that events are only read through mmap, it's quite simple - > hw_event.read_format controls what read() gives you, and > hw_event.record_type controls what gets put into the pages that you > get with mmap. > > Currently read_format and record_type don't use the same set of bit > definitions. Maybe they should? I don't have a strong feeling about > it, but that might be a nice simplification. Something like the below? That still has the record and read things separate, but as one unified overflow output. I reduced record and read fields to 32bits, as the output type only has 32bits. diff did a wonderful job making the patch unreadable :/ --- Subject: From: Peter Zijlstra Date: Tue Mar 31 15:13:36 CEST 2009 Signed-off-by: Peter Zijlstra --- include/linux/perf_counter.h | 37 +++++++--------- kernel/perf_counter.c | 99 ++++++++++++++++--------------------------- 2 files changed, 57 insertions(+), 79 deletions(-) Index: linux-2.6/include/linux/perf_counter.h =================================================================== --- linux-2.6.orig/include/linux/perf_counter.h +++ linux-2.6/include/linux/perf_counter.h @@ -73,15 +73,6 @@ enum sw_event_ids { PERF_SW_EVENTS_MAX = 7, }; -/* - * IRQ-notification data record type: - */ -enum perf_counter_record_type { - PERF_RECORD_SIMPLE = 0, - PERF_RECORD_IRQ = 1, - PERF_RECORD_GROUP = 2, -}; - #define __PERF_COUNTER_MASK(name) \ (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ PERF_COUNTER_##name##_SHIFT) @@ -103,6 +94,17 @@ enum perf_counter_record_type { #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) /* + * Bits that can be set in hw_event.record_type to request information + * in the overflow packets. + */ +enum perf_counter_record_format { + PERF_RECORD_IP = 1U << 0, + PERF_RECORD_TID = 1U << 1, + PERF_RECORD_GROUP = 1U << 2, + PERF_RECORD_CALLCHAIN = 1U << 3, +}; + +/* * Bits that can be set in hw_event.read_format to request that * reads on the counter should return the indicated quantities, * in increasing order of bit value, after the counter value. @@ -125,8 +127,8 @@ struct perf_counter_hw_event { __u64 config; __u64 irq_period; - __u64 record_type; - __u64 read_format; + __u32 record_type; + __u32 read_format; __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ @@ -137,12 +139,10 @@ struct perf_counter_hw_event { exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - include_tid : 1, /* include the tid */ mmap : 1, /* include mmap data */ munmap : 1, /* include munmap data */ - callchain : 1, /* add callchain data */ - __reserved_1 : 51; + __reserved_1 : 53; __u32 extra_config_len; __u32 __reserved_4; @@ -212,15 +212,14 @@ struct perf_event_header { enum perf_event_type { - PERF_EVENT_GROUP = 1, - - PERF_EVENT_MMAP = 2, - PERF_EVENT_MUNMAP = 3, + PERF_EVENT_MMAP = 1, + PERF_EVENT_MUNMAP = 2, PERF_EVENT_OVERFLOW = 1UL << 31, __PERF_EVENT_IP = 1UL << 30, __PERF_EVENT_TID = 1UL << 29, - __PERF_EVENT_CALLCHAIN = 1UL << 28, + __PERF_EVENT_GROUP = 1UL << 28, + __PERF_EVENT_CALLCHAIN = 1UL << 27, }; #ifdef __KERNEL__ Index: linux-2.6/kernel/perf_counter.c =================================================================== --- linux-2.6.orig/kernel/perf_counter.c +++ linux-2.6/kernel/perf_counter.c @@ -1765,27 +1765,34 @@ static void perf_output_end(struct perf_ rcu_read_unlock(); } -static void perf_output_simple(struct perf_counter *counter, - int nmi, struct pt_regs *regs) +void perf_counter_output(struct perf_counter *counter, + int nmi, struct pt_regs *regs) { int ret; + u64 record_type = counter->hw_event.record_type; struct perf_output_handle handle; struct perf_event_header header; u64 ip; struct { u32 pid, tid; } tid_entry; + struct { + u64 event; + u64 counter; + } group_entry; struct perf_callchain_entry *callchain = NULL; int callchain_size = 0; header.type = PERF_EVENT_OVERFLOW; header.size = sizeof(header); - ip = instruction_pointer(regs); - header.type |= __PERF_EVENT_IP; - header.size += sizeof(ip); + if (record_type & PERF_RECORD_IP) { + ip = instruction_pointer(regs); + header.type |= __PERF_EVENT_IP; + header.size += sizeof(ip); + } - if (counter->hw_event.include_tid) { + if (record_type & PERF_RECORD_TID) { /* namespace issues */ tid_entry.pid = current->group_leader->pid; tid_entry.tid = current->pid; @@ -1794,7 +1801,13 @@ static void perf_output_simple(struct pe header.size += sizeof(tid_entry); } - if (counter->hw_event.callchain) { + if (record_type & PERF_RECORD_GROUP) { + header.type |= __PERF_EVENT_GROUP; + header.size += sizeof(u64) + + counter->nr_siblings * sizeof(group_entry); + } + + if (record_type & PERF_RECORD_CALLCHAIN) { callchain = perf_callchain(regs); if (callchain) { @@ -1810,69 +1823,35 @@ static void perf_output_simple(struct pe return; perf_output_put(&handle, header); - perf_output_put(&handle, ip); - if (counter->hw_event.include_tid) - perf_output_put(&handle, tid_entry); + if (record_type & PERF_RECORD_IP) + perf_output_put(&handle, ip); - if (callchain) - perf_output_copy(&handle, callchain, callchain_size); - - perf_output_end(&handle); -} - -static void perf_output_group(struct perf_counter *counter, int nmi) -{ - struct perf_output_handle handle; - struct perf_event_header header; - struct perf_counter *leader, *sub; - unsigned int size; - struct { - u64 event; - u64 counter; - } entry; - int ret; - - size = sizeof(header) + counter->nr_siblings * sizeof(entry); + if (record_type & PERF_RECORD_TID) + perf_output_put(&handle, tid_entry); - ret = perf_output_begin(&handle, counter, size, nmi); - if (ret) - return; + if (record_type & PERF_RECORD_GROUP) { + struct perf_counter *leader, *sub; + u64 nr = counter->nr_siblings; - header.type = PERF_EVENT_GROUP; - header.size = size; + perf_output_put(&handle, nr); - perf_output_put(&handle, header); + leader = counter->group_leader; + list_for_each_entry(sub, &leader->sibling_list, list_entry) { + if (sub != counter) + sub->hw_ops->read(sub); - leader = counter->group_leader; - list_for_each_entry(sub, &leader->sibling_list, list_entry) { - if (sub != counter) - sub->hw_ops->read(sub); + group_entry.event = sub->hw_event.config; + group_entry.counter = atomic64_read(&sub->count); - entry.event = sub->hw_event.config; - entry.counter = atomic64_read(&sub->count); - - perf_output_put(&handle, entry); + perf_output_put(&handle, group_entry); + } } - perf_output_end(&handle); -} - -void perf_counter_output(struct perf_counter *counter, - int nmi, struct pt_regs *regs) -{ - switch (counter->hw_event.record_type) { - case PERF_RECORD_SIMPLE: - return; - - case PERF_RECORD_IRQ: - perf_output_simple(counter, nmi, regs); - break; + if (callchain) + perf_output_copy(&handle, callchain, callchain_size); - case PERF_RECORD_GROUP: - perf_output_group(counter, nmi); - break; - } + perf_output_end(&handle); } /* -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/