Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755865AbdIGR56 (ORCPT ); Thu, 7 Sep 2017 13:57:58 -0400 Received: from mga09.intel.com ([134.134.136.24]:39783 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755582AbdIGR4b (ORCPT ); Thu, 7 Sep 2017 13:56:31 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.42,359,1500966000"; d="scan'208";a="149347712" From: kan.liang@intel.com To: acme@kernel.org, peterz@infradead.org, mingo@redhat.com, linux-kernel@vger.kernel.org Cc: jolsa@kernel.org, namhyung@kernel.org, adrian.hunter@intel.com, lukasz.odzioba@intel.com, ak@linux.intel.com, Kan Liang Subject: [PATCH RFC 01/10] perf tools: hashtable for machine threads Date: Thu, 7 Sep 2017 10:55:45 -0700 Message-Id: <1504806954-150842-2-git-send-email-kan.liang@intel.com> X-Mailer: git-send-email 2.5.5 In-Reply-To: <1504806954-150842-1-git-send-email-kan.liang@intel.com> References: <1504806954-150842-1-git-send-email-kan.liang@intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 13093 Lines: 408 From: Kan Liang To process any events, it needs to find the thread in the machine first. The machine maintains a rb tree to store all threads. The rb tree is protected by a rw lock. It is not a problem for current perf which serially processing events. However, it will have scalability performance issue to process events in parallel, especially on a heave load system which have many threads. Introduce a hashtable to divide the big rb tree into many samll rb tree for threads. The index is thread id % hashtable size. It can reduce the lock contention. Signed-off-by: Kan Liang --- tools/perf/builtin-trace.c | 19 +++--- tools/perf/util/machine.c | 139 ++++++++++++++++++++++++++++---------------- tools/perf/util/machine.h | 23 ++++++-- tools/perf/util/rb_resort.h | 5 +- 4 files changed, 120 insertions(+), 66 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 771ddab..af4e4e4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2730,20 +2730,23 @@ DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_event static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) { - DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host); size_t printed = trace__fprintf_threads_header(fp); struct rb_node *nd; + int i; - if (threads == NULL) { - fprintf(fp, "%s", "Error sorting output by nr_events!\n"); - return 0; - } + for (i = 0; i < MACHINE_TH_TABLE_SIZE; i++) { + DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i); - resort_rb__for_each_entry(nd, threads) - printed += trace__fprintf_thread(fp, threads_entry->thread, trace); + if (threads == NULL) { + fprintf(fp, "%s", "Error sorting output by nr_events!\n"); + return 0; + } - resort_rb__delete(threads); + resort_rb__for_each_entry(nd, threads) + printed += trace__fprintf_thread(fp, threads_entry->thread, trace); + resort_rb__delete(threads); + } return printed; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index df70936..5e451f9 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -33,6 +33,21 @@ static void dsos__init(struct dsos *dsos) pthread_rwlock_init(&dsos->lock, NULL); } +static void machine__thread_init(struct machine *machine) +{ + struct machine_th *machine_th; + int i; + + for (i = 0; i < MACHINE_TH_TABLE_SIZE; i++) { + machine_th = &machine->threads[i]; + machine_th->threads = RB_ROOT; + pthread_rwlock_init(&machine_th->threads_lock, NULL); + machine_th->nr_threads = 0; + INIT_LIST_HEAD(&machine_th->dead_threads); + machine_th->last_match = NULL; + } +} + int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { memset(machine, 0, sizeof(*machine)); @@ -40,11 +55,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) RB_CLEAR_NODE(&machine->rb_node); dsos__init(&machine->dsos); - machine->threads = RB_ROOT; - pthread_rwlock_init(&machine->threads_lock, NULL); - machine->nr_threads = 0; - INIT_LIST_HEAD(&machine->dead_threads); - machine->last_match = NULL; + machine__thread_init(machine); machine->vdso_info = NULL; machine->env = NULL; @@ -140,28 +151,39 @@ static void dsos__exit(struct dsos *dsos) void machine__delete_threads(struct machine *machine) { + struct machine_th *machine_th; struct rb_node *nd; + int i; - pthread_rwlock_wrlock(&machine->threads_lock); - nd = rb_first(&machine->threads); - while (nd) { - struct thread *t = rb_entry(nd, struct thread, rb_node); + for (i = 0; i < MACHINE_TH_TABLE_SIZE; i++) { + machine_th = &machine->threads[i]; + pthread_rwlock_wrlock(&machine_th->threads_lock); + nd = rb_first(&machine_th->threads); + while (nd) { + struct thread *t = rb_entry(nd, struct thread, rb_node); - nd = rb_next(nd); - __machine__remove_thread(machine, t, false); + nd = rb_next(nd); + __machine__remove_thread(machine, t, false); + } + pthread_rwlock_unlock(&machine_th->threads_lock); } - pthread_rwlock_unlock(&machine->threads_lock); } void machine__exit(struct machine *machine) { + struct machine_th *machine_th; + int i; + machine__destroy_kernel_maps(machine); map_groups__exit(&machine->kmaps); dsos__exit(&machine->dsos); machine__exit_vdso(machine); zfree(&machine->root_dir); zfree(&machine->current_tid); - pthread_rwlock_destroy(&machine->threads_lock); + for (i = 0; i < MACHINE_TH_TABLE_SIZE; i++) { + machine_th = &machine->threads[i]; + pthread_rwlock_destroy(&machine_th->threads_lock); + } } void machine__delete(struct machine *machine) @@ -382,7 +404,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid, bool create) { - struct rb_node **p = &machine->threads.rb_node; + struct machine_th *machine_th = machine_thread(machine, tid); + struct rb_node **p = &machine_th->threads.rb_node; struct rb_node *parent = NULL; struct thread *th; @@ -391,14 +414,14 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * so most of the time we dont have to look up * the full rbtree: */ - th = machine->last_match; + th = machine_th->last_match; if (th != NULL) { if (th->tid == tid) { machine__update_thread_pid(machine, th, pid); return thread__get(th); } - machine->last_match = NULL; + machine_th->last_match = NULL; } while (*p != NULL) { @@ -406,7 +429,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, th = rb_entry(parent, struct thread, rb_node); if (th->tid == tid) { - machine->last_match = th; + machine_th->last_match = th; machine__update_thread_pid(machine, th, pid); return thread__get(th); } @@ -423,7 +446,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, th = thread__new(pid, tid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, &machine->threads); + rb_insert_color(&th->rb_node, &machine_th->threads); /* * We have to initialize map_groups separately @@ -434,7 +457,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * leader and that would screwed the rb tree. */ if (thread__init_map_groups(th, machine)) { - rb_erase_init(&th->rb_node, &machine->threads); + rb_erase_init(&th->rb_node, &machine_th->threads); RB_CLEAR_NODE(&th->rb_node); thread__put(th); return NULL; @@ -443,8 +466,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * It is now in the rbtree, get a ref */ thread__get(th); - machine->last_match = th; - ++machine->nr_threads; + machine_th->last_match = th; + ++machine_th->nr_threads; } return th; @@ -458,21 +481,24 @@ struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) { + struct machine_th *machine_th = machine_thread(machine, tid); struct thread *th; - pthread_rwlock_wrlock(&machine->threads_lock); + pthread_rwlock_wrlock(&machine_th->threads_lock); th = __machine__findnew_thread(machine, pid, tid); - pthread_rwlock_unlock(&machine->threads_lock); + pthread_rwlock_unlock(&machine_th->threads_lock); return th; } struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid) { + struct machine_th *machine_th = machine_thread(machine, tid); struct thread *th; - pthread_rwlock_rdlock(&machine->threads_lock); + + pthread_rwlock_rdlock(&machine_th->threads_lock); th = ____machine__findnew_thread(machine, pid, tid, false); - pthread_rwlock_unlock(&machine->threads_lock); + pthread_rwlock_unlock(&machine_th->threads_lock); return th; } @@ -719,21 +745,25 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) size_t machine__fprintf(struct machine *machine, FILE *fp) { - size_t ret; + struct machine_th *machine_th; struct rb_node *nd; + size_t ret; + int i; - pthread_rwlock_rdlock(&machine->threads_lock); - - ret = fprintf(fp, "Threads: %u\n", machine->nr_threads); + for (i = 0; i < MACHINE_TH_TABLE_SIZE; i++) { + machine_th = &machine->threads[i]; + pthread_rwlock_rdlock(&machine_th->threads_lock); - for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { - struct thread *pos = rb_entry(nd, struct thread, rb_node); + ret = fprintf(fp, "Threads: %u\n", machine_th->nr_threads); - ret += thread__fprintf(pos, fp); - } + for (nd = rb_first(&machine_th->threads); nd; nd = rb_next(nd)) { + struct thread *pos = rb_entry(nd, struct thread, rb_node); - pthread_rwlock_unlock(&machine->threads_lock); + ret += thread__fprintf(pos, fp); + } + pthread_rwlock_unlock(&machine_th->threads_lock); + } return ret; } @@ -1479,23 +1509,25 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock) { - if (machine->last_match == th) - machine->last_match = NULL; + struct machine_th *machine_th = machine_thread(machine, th->tid); + + if (machine_th->last_match == th) + machine_th->last_match = NULL; BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) - pthread_rwlock_wrlock(&machine->threads_lock); - rb_erase_init(&th->rb_node, &machine->threads); + pthread_rwlock_wrlock(&machine_th->threads_lock); + rb_erase_init(&th->rb_node, &machine_th->threads); RB_CLEAR_NODE(&th->rb_node); - --machine->nr_threads; + --machine_th->nr_threads; /* * Move it first to the dead_threads list, then drop the reference, * if this is the last reference, then the thread__delete destructor * will be called and we will remove it from the dead_threads list. */ - list_add_tail(&th->node, &machine->dead_threads); + list_add_tail(&th->node, &machine_th->dead_threads); if (lock) - pthread_rwlock_unlock(&machine->threads_lock); + pthread_rwlock_unlock(&machine_th->threads_lock); thread__put(th); } @@ -2140,21 +2172,26 @@ int machine__for_each_thread(struct machine *machine, int (*fn)(struct thread *thread, void *p), void *priv) { + struct machine_th *machine_th; struct rb_node *nd; struct thread *thread; int rc = 0; + int i; - for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { - thread = rb_entry(nd, struct thread, rb_node); - rc = fn(thread, priv); - if (rc != 0) - return rc; - } + for (i = 0; i < MACHINE_TH_TABLE_SIZE; i++) { + machine_th = &machine->threads[i]; + for (nd = rb_first(&machine_th->threads); nd; nd = rb_next(nd)) { + thread = rb_entry(nd, struct thread, rb_node); + rc = fn(thread, priv); + if (rc != 0) + return rc; + } - list_for_each_entry(thread, &machine->dead_threads, node) { - rc = fn(thread, priv); - if (rc != 0) - return rc; + list_for_each_entry(thread, &machine_th->dead_threads, node) { + rc = fn(thread, priv); + if (rc != 0) + return rc; + } } return rc; } diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 3cdb134..745a0ca 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -23,6 +23,17 @@ extern const char *ref_reloc_sym_names[]; struct vdso_info; +#define MACHINE_TH_TABLE_BITS 8 +#define MACHINE_TH_TABLE_SIZE (1 << MACHINE_TH_TABLE_BITS) + +struct machine_th { + struct rb_root threads; + pthread_rwlock_t threads_lock; + unsigned int nr_threads; + struct list_head dead_threads; + struct thread *last_match; +}; + struct machine { struct rb_node rb_node; pid_t pid; @@ -30,11 +41,7 @@ struct machine { bool comm_exec; bool kptr_restrict_warned; char *root_dir; - struct rb_root threads; - pthread_rwlock_t threads_lock; - unsigned int nr_threads; - struct list_head dead_threads; - struct thread *last_match; + struct machine_th threads[MACHINE_TH_TABLE_SIZE]; struct vdso_info *vdso_info; struct perf_env *env; struct dsos dsos; @@ -49,6 +56,12 @@ struct machine { }; static inline +struct machine_th *machine_thread(struct machine *machine, pid_t tid) +{ + return &machine->threads[tid % MACHINE_TH_TABLE_SIZE]; +} + +static inline struct map *__machine__kernel_map(struct machine *machine, enum map_type type) { return machine->vmlinux_maps[type]; diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h index 808cc45..bbd78fa 100644 --- a/tools/perf/util/rb_resort.h +++ b/tools/perf/util/rb_resort.h @@ -143,7 +143,8 @@ struct __name##_sorted *__name = __name##_sorted__new __ilist->rblist.nr_entries) /* For 'struct machine->threads' */ -#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \ - DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads) +#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, tid) \ + DECLARE_RESORT_RB(__name)(&__machine->threads[tid].threads, \ + __machine->threads[tid].nr_threads) #endif /* _PERF_RESORT_RB_H_ */ -- 2.5.5