Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751898AbdIMN3Z (ORCPT ); Wed, 13 Sep 2017 09:29:25 -0400 Received: from mail.kernel.org ([198.145.29.99]:44426 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751019AbdIMN3V (ORCPT ); Wed, 13 Sep 2017 09:29:21 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 59C1220C48 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=kernel.org Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=acme@kernel.org Date: Wed, 13 Sep 2017 10:29:16 -0300 From: Arnaldo Carvalho de Melo To: kan.liang@intel.com Cc: peterz@infradead.org, mingo@redhat.com, linux-kernel@vger.kernel.org, jolsa@kernel.org, namhyung@kernel.org, adrian.hunter@intel.com, lukasz.odzioba@intel.com, ak@linux.intel.com Subject: Re: [PATCH RFC V2 01/10] perf tools: hashtable for machine threads Message-ID: <20170913132916.GC5866@kernel.org> References: <1505096603-215017-1-git-send-email-kan.liang@intel.com> <1505096603-215017-2-git-send-email-kan.liang@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1505096603-215017-2-git-send-email-kan.liang@intel.com> X-Url: http://acmel.wordpress.com User-Agent: Mutt/1.8.3 (2017-05-23) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7904 Lines: 270 Em Sun, Sep 10, 2017 at 07:23:14PM -0700, kan.liang@intel.com escreveu: > From: Kan Liang > > To process any events, it needs to find the thread in the machine first. > The machine maintains a rb tree to store all threads. The rb tree is > protected by a rw lock. > It is not a problem for current perf which serially processing events. > However, it will have scalability performance issue to process events in > parallel, especially on a heavy load system which have many threads. > > Introduce a hashtable to divide the big rb tree into many samll rb tree > for threads. The index is thread id % hashtable size. It can reduce the > lock contention. Ok, I renamed variables of the type 'struct threads' from 'thread' to 'threads', to reduce confusion when looking at a specific variable as to its type, so if you see a variable named 'threads', its of type 'struct threads', if it is 'thread', ditto. Interdiff from your patch to mine is below, now I'm testing this. No need to resend anything now, I'll cope with whatever fallout due to this comes up, if any. - Arnaldo diff -u b/tools/perf/util/machine.c b/tools/perf/util/machine.c --- b/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -33,18 +33,17 @@ pthread_rwlock_init(&dsos->lock, NULL); } -static void machine__thread_init(struct machine *machine) +static void machine__threads_init(struct machine *machine) { - struct threads *thread; int i; for (i = 0; i < THREADS__TABLE_SIZE; i++) { - thread = &machine->threads[i]; - thread->entries = RB_ROOT; - pthread_rwlock_init(&thread->lock, NULL); - thread->nr = 0; - INIT_LIST_HEAD(&thread->dead); - thread->last_match = NULL; + struct threads *threads = &machine->threads[i]; + threads->entries = RB_ROOT; + pthread_rwlock_init(&threads->lock, NULL); + threads->nr = 0; + INIT_LIST_HEAD(&threads->dead); + threads->last_match = NULL; } } @@ -55,7 +54,7 @@ RB_CLEAR_NODE(&machine->rb_node); dsos__init(&machine->dsos); - machine__thread_init(machine); + machine__threads_init(machine); machine->vdso_info = NULL; machine->env = NULL; @@ -151,27 +150,25 @@ void machine__delete_threads(struct machine *machine) { - struct threads *thread; struct rb_node *nd; int i; for (i = 0; i < THREADS__TABLE_SIZE; i++) { - thread = &machine->threads[i]; - pthread_rwlock_wrlock(&thread->lock); - nd = rb_first(&thread->entries); + struct threads *threads = &machine->threads[i]; + pthread_rwlock_wrlock(&threads->lock); + nd = rb_first(&threads->entries); while (nd) { struct thread *t = rb_entry(nd, struct thread, rb_node); nd = rb_next(nd); __machine__remove_thread(machine, t, false); } - pthread_rwlock_unlock(&thread->lock); + pthread_rwlock_unlock(&threads->lock); } } void machine__exit(struct machine *machine) { - struct threads *thread; int i; machine__destroy_kernel_maps(machine); @@ -180,9 +177,10 @@ machine__exit_vdso(machine); zfree(&machine->root_dir); zfree(&machine->current_tid); + for (i = 0; i < THREADS__TABLE_SIZE; i++) { - thread = &machine->threads[i]; - pthread_rwlock_destroy(&thread->lock); + struct threads *threads = &machine->threads[i]; + pthread_rwlock_destroy(&threads->lock); } } @@ -404,8 +402,8 @@ pid_t pid, pid_t tid, bool create) { - struct threads *thread = machine__thread(machine, tid); - struct rb_node **p = &thread->entries.rb_node; + struct threads *threads = machine__thread(machine, tid); + struct rb_node **p = &threads->entries.rb_node; struct rb_node *parent = NULL; struct thread *th; @@ -414,14 +412,14 @@ * so most of the time we dont have to look up * the full rbtree: */ - th = thread->last_match; + th = threads->last_match; if (th != NULL) { if (th->tid == tid) { machine__update_thread_pid(machine, th, pid); return thread__get(th); } - thread->last_match = NULL; + threads->last_match = NULL; } while (*p != NULL) { @@ -429,7 +427,7 @@ th = rb_entry(parent, struct thread, rb_node); if (th->tid == tid) { - thread->last_match = th; + threads->last_match = th; machine__update_thread_pid(machine, th, pid); return thread__get(th); } @@ -446,7 +444,7 @@ th = thread__new(pid, tid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, &thread->entries); + rb_insert_color(&th->rb_node, &threads->entries); /* * We have to initialize map_groups separately @@ -457,7 +455,7 @@ * leader and that would screwed the rb tree. */ if (thread__init_map_groups(th, machine)) { - rb_erase_init(&th->rb_node, &thread->entries); + rb_erase_init(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); thread__put(th); return NULL; @@ -466,8 +464,8 @@ * It is now in the rbtree, get a ref */ thread__get(th); - thread->last_match = th; - ++thread->nr; + threads->last_match = th; + ++threads->nr; } return th; @@ -481,24 +479,24 @@ struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) { - struct threads *thread = machine__thread(machine, tid); + struct threads *threads = machine__thread(machine, tid); struct thread *th; - pthread_rwlock_wrlock(&thread->lock); + pthread_rwlock_wrlock(&threads->lock); th = __machine__findnew_thread(machine, pid, tid); - pthread_rwlock_unlock(&thread->lock); + pthread_rwlock_unlock(&threads->lock); return th; } struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid) { - struct threads *thread = machine__thread(machine, tid); + struct threads *threads = machine__thread(machine, tid); struct thread *th; - pthread_rwlock_rdlock(&thread->lock); + pthread_rwlock_rdlock(&threads->lock); th = ____machine__findnew_thread(machine, pid, tid, false); - pthread_rwlock_unlock(&thread->lock); + pthread_rwlock_unlock(&threads->lock); return th; } @@ -745,24 +743,23 @@ size_t machine__fprintf(struct machine *machine, FILE *fp) { - struct threads *thread; struct rb_node *nd; size_t ret; int i; for (i = 0; i < THREADS__TABLE_SIZE; i++) { - thread = &machine->threads[i]; - pthread_rwlock_rdlock(&thread->lock); + struct threads *threads = &machine->threads[i]; + pthread_rwlock_rdlock(&threads->lock); - ret = fprintf(fp, "Threads: %u\n", thread->nr); + ret = fprintf(fp, "Threads: %u\n", threads->nr); - for (nd = rb_first(&thread->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { struct thread *pos = rb_entry(nd, struct thread, rb_node); ret += thread__fprintf(pos, fp); } - pthread_rwlock_unlock(&thread->lock); + pthread_rwlock_unlock(&threads->lock); } return ret; } @@ -1509,25 +1506,25 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock) { - struct threads *thread = machine__thread(machine, th->tid); + struct threads *threads = machine__thread(machine, th->tid); - if (thread->last_match == th) - thread->last_match = NULL; + if (threads->last_match == th) + threads->last_match = NULL; BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) - pthread_rwlock_wrlock(&thread->lock); - rb_erase_init(&th->rb_node, &thread->entries); + pthread_rwlock_wrlock(&threads->lock); + rb_erase_init(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); - --thread->nr; + --threads->nr; /* * Move it first to the dead_threads list, then drop the reference, * if this is the last reference, then the thread__delete destructor * will be called and we will remove it from the dead_threads list. */ - list_add_tail(&th->node, &thread->dead); + list_add_tail(&th->node, &threads->dead); if (lock) - pthread_rwlock_unlock(&thread->lock); + pthread_rwlock_unlock(&threads->lock); thread__put(th); }