Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753276AbcC2BwK (ORCPT ); Mon, 28 Mar 2016 21:52:10 -0400 Received: from mail-pf0-f176.google.com ([209.85.192.176]:34059 "EHLO mail-pf0-f176.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751322AbcC2BwH (ORCPT ); Mon, 28 Mar 2016 21:52:07 -0400 Subject: Re: [RFC][PATCH] mm/slub: Skip CPU slab activation when debugging To: Christoph Lameter , Pekka Enberg , David Rientjes , Joonsoo Kim , Andrew Morton References: <1459205581-4605-1-git-send-email-labbott@fedoraproject.org> Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Kees Cook From: Laura Abbott Message-ID: <56F9DFC3.501@redhat.com> Date: Mon, 28 Mar 2016 18:52:03 -0700 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.7.1 MIME-Version: 1.0 In-Reply-To: <1459205581-4605-1-git-send-email-labbott@fedoraproject.org> Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4358 Lines: 143 On 03/28/2016 03:53 PM, Laura Abbott wrote: > The per-cpu slab is designed to be the primary path for allocation in SLUB > since it assumed allocations will go through the fast path if possible. > When debugging is enabled, the fast path is disabled and per-cpu > allocations are not used. The current debugging code path still activates > the cpu slab for allocations and then immediately deactivates it. This > is useless work. When a slab is enabled for debugging, skip cpu > activation. > > Signed-off-by: Laura Abbott > --- > This is a follow on to the optimization of the debug paths for poisoning > With this I get ~2 second drop on hackbench -g 20 -l 1000 with slub_debug=P > and no noticable change with slub_debug=- . zero day robot pointed out this is triggering one of the BUG_ON on bootup. I'll take a deeper look tomorrow unless the approach is actually worthless. > --- > mm/slub.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 77 insertions(+), 5 deletions(-) > > diff --git a/mm/slub.c b/mm/slub.c > index 7277413..4507bd8 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -1482,8 +1482,8 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) > } > > page->freelist = fixup_red_left(s, start); > - page->inuse = page->objects; > - page->frozen = 1; > + page->inuse = kmem_cache_debug(s) ? 1 : page->objects; > + page->frozen = kmem_cache_debug(s) ? 0 : 1; > > out: > if (gfpflags_allow_blocking(flags)) > @@ -1658,6 +1658,64 @@ static inline void *acquire_slab(struct kmem_cache *s, > return freelist; > } > > + > +static inline void *acquire_slab_debug(struct kmem_cache *s, > + struct kmem_cache_node *n, struct page *page, > + int mode, int *objects) > +{ > + void *freelist; > + unsigned long counters; > + struct page new; > + void *next; > + > + lockdep_assert_held(&n->list_lock); > + > + > + /* > + * Zap the freelist and set the frozen bit. > + * The old freelist is the list of objects for the > + * per cpu allocation list. > + */ > + freelist = page->freelist; > + counters = page->counters; > + > + BUG_ON(!freelist); > + > + next = get_freepointer_safe(s, freelist); > + > + new.counters = counters; > + *objects = new.objects - new.inuse; > + if (mode) { > + new.inuse++; > + new.freelist = next; > + } else { > + BUG(); > + } > + > + VM_BUG_ON(new.frozen); > + > + if (!new.freelist) { > + remove_partial(n, page); > + add_full(s, n, page); > + } > + > + if (!__cmpxchg_double_slab(s, page, > + freelist, counters, > + new.freelist, new.counters, > + "acquire_slab")) { > + if (!new.freelist) { > + remove_full(s, n, page); > + add_partial(n, page, DEACTIVATE_TO_HEAD); > + } > + return NULL; > + } > + > + WARN_ON(!freelist); > + return freelist; > +} > + > + > + > static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); > static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); > > @@ -1688,7 +1746,11 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, > if (!pfmemalloc_match(page, flags)) > continue; > > - t = acquire_slab(s, n, page, object == NULL, &objects); > + if (kmem_cache_debug(s)) > + t = acquire_slab_debug(s, n, page, object == NULL, &objects); > + else > + t = acquire_slab(s, n, page, object == NULL, &objects); > + > if (!t) > break; > > @@ -2284,7 +2346,17 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, > * muck around with it freely without cmpxchg > */ > freelist = page->freelist; > - page->freelist = NULL; > + page->freelist = kmem_cache_debug(s) ? > + get_freepointer(s, freelist) : NULL; > + > + if (kmem_cache_debug(s)) { > + struct kmem_cache_node *n; > + > + n = get_node(s, page_to_nid(page)); > + spin_lock(&n->list_lock); > + add_partial(n, page, DEACTIVATE_TO_HEAD); > + spin_unlock(&n->list_lock); > + } > > stat(s, ALLOC_SLAB); > c->page = page; > @@ -2446,7 +2518,7 @@ new_slab: > !alloc_debug_processing(s, page, freelist, addr)) > goto new_slab; /* Slab failed checks. Next slab needed */ > > - deactivate_slab(s, page, get_freepointer(s, freelist)); > + /* No need to deactivate, no cpu slab */ > c->page = NULL; > c->freelist = NULL; > return freelist; >