Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754644AbZCUEq5 (ORCPT ); Sat, 21 Mar 2009 00:46:57 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1750882AbZCUEqq (ORCPT ); Sat, 21 Mar 2009 00:46:46 -0400 Received: from byss.tchmachines.com ([208.76.80.75]:54813 "EHLO byss.tchmachines.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750706AbZCUEqp (ORCPT ); Sat, 21 Mar 2009 00:46:45 -0400 Date: Fri, 20 Mar 2009 21:46:37 -0700 From: Ravikiran G Thirumalai To: linux-kernel@vger.kernel.org Cc: Ingo Molnar , shai@scalex86.org Subject: [rfc] [patch 1/2 ] Process private hash tables for private futexes Message-ID: <20090321044637.GA7278@localdomain> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.15+20070412 (2007-04-11) X-AntiAbuse: This header was added to track abuse, please include it with any abuse report X-AntiAbuse: Primary Hostname - byss.tchmachines.com X-AntiAbuse: Original Domain - vger.kernel.org X-AntiAbuse: Originator/Caller UID/GID - [47 12] / [47 12] X-AntiAbuse: Sender Address Domain - scalex86.org Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6979 Lines: 204 Patch to have a process private hash table for 'PRIVATE' futexes. On large core count systems running multiple threaded processes causes false sharing on the global futex hash table. The global futex hash table is an array of struct futex_hash_bucket which is defined as: struct futex_hash_bucket { spinlock_t lock; struct plist_head chain; }; static struct futex_hash_bucket futex_queues[1< Signed-off-by: Shai Fultheim Index: linux-2.6.28.6/include/linux/mm_types.h =================================================================== --- linux-2.6.28.6.orig/include/linux/mm_types.h 2009-03-11 16:52:06.000000000 -0800 +++ linux-2.6.28.6/include/linux/mm_types.h 2009-03-11 16:52:23.000000000 -0800 @@ -256,6 +256,10 @@ struct mm_struct { #ifdef CONFIG_MMU_NOTIFIER struct mmu_notifier_mm *mmu_notifier_mm; #endif +#ifdef CONFIG_PROCESS_PRIVATE_FUTEX + /* Process private futex hash table */ + struct futex_hash_bucket *htb; +#endif }; #endif /* _LINUX_MM_TYPES_H */ Index: linux-2.6.28.6/init/Kconfig =================================================================== --- linux-2.6.28.6.orig/init/Kconfig 2009-03-11 16:52:06.000000000 -0800 +++ linux-2.6.28.6/init/Kconfig 2009-03-18 17:06:23.000000000 -0800 @@ -672,6 +672,14 @@ config FUTEX support for "fast userspace mutexes". The resulting kernel may not run glibc-based applications correctly. +config PROCESS_PRIVATE_FUTEX + bool "Process private futexes" if FUTEX + default n + help + This option enables ability to have per-process hashtables for private + futexes. This makes sense on large core-count systems (more than + 32 cores) + config ANON_INODES bool Index: linux-2.6.28.6/kernel/fork.c =================================================================== --- linux-2.6.28.6.orig/kernel/fork.c 2009-02-17 09:29:27.000000000 -0800 +++ linux-2.6.28.6/kernel/fork.c 2009-03-12 17:12:40.000000000 -0800 @@ -424,6 +424,7 @@ static struct mm_struct * mm_init(struct return mm; } + free_futex_htb(mm); free_mm(mm); return NULL; } Index: linux-2.6.28.6/kernel/futex.c =================================================================== --- linux-2.6.28.6.orig/kernel/futex.c 2009-03-11 16:52:13.000000000 -0800 +++ linux-2.6.28.6/kernel/futex.c 2009-03-18 17:36:04.000000000 -0800 @@ -140,15 +140,84 @@ static inline void futex_unlock_mm(struc up_read(fshared); } +#ifdef CONFIG_PROCESS_PRIVATE_FUTEX +static void free_htb(struct futex_hash_bucket *htb) +{ + if (htb != futex_queues) + kfree(htb); +} + +void free_futex_htb(struct mm_struct *mm) +{ + free_htb(mm->htb); +} + +static void alloc_htb(struct mm_struct *mm) +{ + struct futex_hash_bucket *htb; + int i; + /* + * Allocate and install a private hash table of the + * same size as the global hash table. We fall + * back onto the global hash on allocation failure + */ + htb = kmalloc(sizeof(futex_queues), GFP_KERNEL); + if (!htb) + htb = futex_queues; + else { + for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { + plist_head_init(&htb[i].chain, &htb[i].lock); + spin_lock_init(&htb[i].lock); + } + } + /* Install the hash table */ + spin_lock(&mm->page_table_lock); + if (mm->htb) { + /* Another thread installed the hash table */ + spin_unlock(&mm->page_table_lock); + free_htb(htb); + } else { + mm->htb = htb; + spin_unlock(&mm->page_table_lock); + } + +} + +static struct futex_hash_bucket *get_futex_hashtable(union futex_key *key) +{ + struct mm_struct *mm; + if (key->both.offset & FUT_OFF_INODE) + /* Shared inode based mapping uses global hash */ + return futex_queues; + /* + * Private futexes -- This covers both FUTEX_PRIVATE_FLAG + * and 'mm' only private futexes + */ + + mm = current->mm; + if (unlikely(!mm->htb)) + alloc_htb(mm); + return mm->htb; +} +#else +static inline +struct futex_hash_bucket *get_futex_hashtable(union futex_key *key) +{ + return futex_queues; +} +#endif /* * We hash on the keys returned from get_futex_key (see below). */ static struct futex_hash_bucket *hash_futex(union futex_key *key) { - u32 hash = jhash2((u32*)&key->both.word, + struct futex_hash_bucket *htb; + u32 hash; + htb = get_futex_hashtable(key); + hash = jhash2((u32 *)&key->both.word, (sizeof(key->both.word)+sizeof(key->both.ptr))/4, key->both.offset); - return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)]; + return &htb[hash & ((1 << FUTEX_HASHBITS)-1)]; } /* Index: linux-2.6.28.6/include/linux/futex.h =================================================================== --- linux-2.6.28.6.orig/include/linux/futex.h 2009-02-17 09:29:27.000000000 -0800 +++ linux-2.6.28.6/include/linux/futex.h 2009-03-18 16:59:27.000000000 -0800 @@ -176,6 +176,15 @@ static inline void exit_pi_state_list(st { } #endif + +#ifdef CONFIG_PROCESS_PRIVATE_FUTEX +extern void free_futex_htb(struct mm_struct *mm); +#else +static inline void free_futex_htb(struct mm_struct *mm) +{ + return; +} +#endif #endif /* __KERNEL__ */ #define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/