Received: by 2002:ac0:a5a6:0:0:0:0:0 with SMTP id m35-v6csp3629128imm; Mon, 10 Sep 2018 22:38:24 -0700 (PDT) X-Google-Smtp-Source: ANB0VdbtFxYK/azEQ39xlgmDh2bg7F8JaZvomfgvfTkuKEoDwERIaD2m4oG6+31PuWovOpSjUD4l X-Received: by 2002:a63:e841:: with SMTP id a1-v6mr25949382pgk.126.1536644304737; Mon, 10 Sep 2018 22:38:24 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1536644304; cv=none; d=google.com; s=arc-20160816; b=kU/esNO9VLAe9zzGb5I6ZTZrobR5EJc/4EdUZcvZ5lvY7x4+N1r50cpJEdf0m52PSy jiJKbzJTuDeMjmo+MdMbpHFa9WT8ib+3tt84fI5XCiYUsGyH56UxjYohjuQ8yh5WI6qC u5JFeQg3HdyqF73Lb4jMtPhAbIgxOdCJsg5ANle8YRlnOye5oAsR6nfmTL5FM4ArR8aH D5spkCtMxrJoiA+6Zavfo/7rPZsAIAbwrISBGw6QqlTz6rp9bF0mqeL8UrkMlD6IP8h4 sSZ3kbZDsoRdlWIDvAhB4wrBTtA1dB0DVckxnOfDRgH+qtm/Pa2fyWET52PBoJARgylz PL0w== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=list-id:precedence:sender:references:in-reply-to:message-id:date :subject:cc:to:from; bh=4O70Xhie5CRoBrxcV+MjY32OisDasNHvRoGgOAziD+w=; b=Qx2Vj/MBQVHBYLAvGuGl17Gz9akXEZGiElP16qc19XGR2EggLNEhwN56ckfvsGjK6A U/oLo3Is+unNpKEbRSwpUlT7kwwqG8x1sLvBYpk2vjx020qlTWPMCBphxdCVjPWIUKtC ukcMqgk4cywh+WggXtOMv2so4DACnqjfd+T1zyY/qS8PR4Q/R6spOm+L/ukEDf9oytur BCA6vlgZiFaDJME0QW7kAznLoMOgU/xkT/s+64Ab9/5VUfYPdSVeE6/7xWHXUEzxIjny z7UJNtUT1GBeBuuDo++Wc6ngzNXiTwCH7FTchlTioHV2v44Gfio7erzIRCp7siDuIqoA zHrA== ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org; dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=intel.com Return-Path: Received: from vger.kernel.org (vger.kernel.org. [209.132.180.67]) by mx.google.com with ESMTP id d7-v6si16427117pls.136.2018.09.10.22.38.08; Mon, 10 Sep 2018 22:38:24 -0700 (PDT) Received-SPF: pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) client-ip=209.132.180.67; Authentication-Results: mx.google.com; spf=pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org; dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=intel.com Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727664AbeIKKeN (ORCPT + 99 others); Tue, 11 Sep 2018 06:34:13 -0400 Received: from mga17.intel.com ([192.55.52.151]:62537 "EHLO mga17.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726426AbeIKKeM (ORCPT ); Tue, 11 Sep 2018 06:34:12 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga107.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 10 Sep 2018 22:36:31 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.53,359,1531810800"; d="scan'208";a="262426329" Received: from aaronlu.sh.intel.com ([10.239.159.44]) by fmsmga006.fm.intel.com with ESMTP; 10 Sep 2018 22:36:28 -0700 From: Aaron Lu To: linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: Andrew Morton , Dave Hansen , Michal Hocko , Vlastimil Babka , Mel Gorman , Matthew Wilcox , Daniel Jordan , Tariq Toukan , Yosef Lev , Jesper Dangaard Brouer Subject: [RFC PATCH 4/9] mm: convert zone lock from spinlock to rwlock Date: Tue, 11 Sep 2018 13:36:11 +0800 Message-Id: <20180911053616.6894-5-aaron.lu@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20180911053616.6894-1-aaron.lu@intel.com> References: <20180911053616.6894-1-aaron.lu@intel.com> Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org This patch converts zone lock from spinlock to rwlock and always take the lock in write mode so there is no functionality change. This is a preparation for free path to take the lock in read mode to make free path work concurrently. compact_trylock and compact_unlock_should_abort are taken from Daniel Jordan's patch. Signed-off-by: Aaron Lu --- include/linux/mmzone.h | 2 +- mm/compaction.c | 90 +++++++++++++++++++++--------------------- mm/hugetlb.c | 8 ++-- mm/page_alloc.c | 52 ++++++++++++------------ mm/page_isolation.c | 12 +++--- mm/vmstat.c | 4 +- 6 files changed, 85 insertions(+), 83 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1e22d96734e0..84cfa56e2d19 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -465,7 +465,7 @@ struct zone { unsigned long flags; /* Primarily protects free_area */ - spinlock_t lock; + rwlock_t lock; /* Write-intensive fields used by compaction and vmstats. */ ZONE_PADDING(_pad2_) diff --git a/mm/compaction.c b/mm/compaction.c index faca45ebe62d..6ecf74d8e287 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -347,20 +347,20 @@ static inline void update_pageblock_skip(struct compact_control *cc, * Returns true if the lock is held * Returns false if the lock is not held and compaction should abort */ -static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags, - struct compact_control *cc) -{ - if (cc->mode == MIGRATE_ASYNC) { - if (!spin_trylock_irqsave(lock, *flags)) { - cc->contended = true; - return false; - } - } else { - spin_lock_irqsave(lock, *flags); - } - - return true; -} +#define compact_trylock(lock, flags, cc, lockf, trylockf) \ +({ \ + bool __ret = true; \ + if ((cc)->mode == MIGRATE_ASYNC) { \ + if (!trylockf((lock), *(flags))) { \ + (cc)->contended = true; \ + __ret = false; \ + } \ + } else { \ + lockf((lock), *(flags)); \ + } \ + \ + __ret; \ +}) /* * Compaction requires the taking of some coarse locks that are potentially @@ -377,29 +377,29 @@ static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags, * Returns false when compaction can continue (sync compaction might have * scheduled) */ -static bool compact_unlock_should_abort(spinlock_t *lock, - unsigned long flags, bool *locked, struct compact_control *cc) -{ - if (*locked) { - spin_unlock_irqrestore(lock, flags); - *locked = false; - } - - if (fatal_signal_pending(current)) { - cc->contended = true; - return true; - } - - if (need_resched()) { - if (cc->mode == MIGRATE_ASYNC) { - cc->contended = true; - return true; - } - cond_resched(); - } - - return false; -} +#define compact_unlock_should_abort(lock, flags, locked, cc, unlockf) \ +({ \ + bool __ret = false; \ + \ + if (*(locked)) { \ + unlockf((lock), (flags)); \ + *(locked) = false; \ + } \ + \ + if (fatal_signal_pending(current)) { \ + (cc)->contended = true; \ + __ret = true; \ + } else if (need_resched()) { \ + if ((cc)->mode == MIGRATE_ASYNC) { \ + (cc)->contended = true; \ + __ret = true; \ + } else { \ + cond_resched(); \ + } \ + } \ + \ + __ret; \ +}) /* * Aside from avoiding lock contention, compaction also periodically checks @@ -457,7 +457,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, */ if (!(blockpfn % SWAP_CLUSTER_MAX) && compact_unlock_should_abort(&cc->zone->lock, flags, - &locked, cc)) + &locked, cc, write_unlock_irqrestore)) break; nr_scanned++; @@ -502,8 +502,9 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, * spin on the lock and we acquire the lock as late as * possible. */ - locked = compact_trylock_irqsave(&cc->zone->lock, - &flags, cc); + locked = compact_trylock(&cc->zone->lock, &flags, cc, + write_lock_irqsave, + write_trylock_irqsave); if (!locked) break; @@ -541,7 +542,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, } if (locked) - spin_unlock_irqrestore(&cc->zone->lock, flags); + write_unlock_irqrestore(&cc->zone->lock, flags); /* * There is a tiny chance that we have read bogus compound_order(), @@ -758,7 +759,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, */ if (!(low_pfn % SWAP_CLUSTER_MAX) && compact_unlock_should_abort(zone_lru_lock(zone), flags, - &locked, cc)) + &locked, cc, spin_unlock_irqrestore)) break; if (!pfn_valid_within(low_pfn)) @@ -847,8 +848,9 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* If we already hold the lock, we can skip some rechecking */ if (!locked) { - locked = compact_trylock_irqsave(zone_lru_lock(zone), - &flags, cc); + locked = compact_trylock(zone_lru_lock(zone), &flags, cc, + spin_lock_irqsave, + spin_trylock_irqsave); if (!locked) break; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3c21775f196b..18fde0139f4a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1113,7 +1113,7 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, zonelist = node_zonelist(nid, gfp_mask); for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nodemask) { - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); pfn = ALIGN(zone->zone_start_pfn, nr_pages); while (zone_spans_last_pfn(zone, pfn, nr_pages)) { @@ -1125,16 +1125,16 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, * spinning on this lock, it may win the race * and cause alloc_contig_range() to fail... */ - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); ret = __alloc_gigantic_page(pfn, nr_pages, gfp_mask); if (!ret) return pfn_to_page(pfn); - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); } pfn += nr_pages; } - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); } return NULL; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 05e983f42316..38e39ccdd6d9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1133,7 +1133,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, } while (--count && --batch_free && !list_empty(list)); } - spin_lock(&zone->lock); + write_lock(&zone->lock); isolated_pageblocks = has_isolate_pageblock(zone); /* @@ -1151,7 +1151,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, __free_one_page(page, page_to_pfn(page), zone, 0, mt); trace_mm_page_pcpu_drain(page, 0, mt); } - spin_unlock(&zone->lock); + write_unlock(&zone->lock); } static void free_one_page(struct zone *zone, @@ -1159,13 +1159,13 @@ static void free_one_page(struct zone *zone, unsigned int order, int migratetype) { - spin_lock(&zone->lock); + write_lock(&zone->lock); if (unlikely(has_isolate_pageblock(zone) || is_migrate_isolate(migratetype))) { migratetype = get_pfnblock_migratetype(page, pfn); } __free_one_page(page, pfn, zone, order, migratetype); - spin_unlock(&zone->lock); + write_unlock(&zone->lock); } static void __meminit __init_single_page(struct page *page, unsigned long pfn, @@ -2251,7 +2251,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone, if (zone->nr_reserved_highatomic >= max_managed) return; - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); /* Recheck the nr_reserved_highatomic limit under the lock */ if (zone->nr_reserved_highatomic >= max_managed) @@ -2267,7 +2267,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone, } out_unlock: - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); } /* @@ -2300,7 +2300,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, pageblock_nr_pages) continue; - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { struct free_area *area = &(zone->free_area[order]); @@ -2343,11 +2343,11 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, ret = move_freepages_block(zone, page, ac->migratetype, NULL); if (ret) { - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); return ret; } } - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); } return false; @@ -2465,7 +2465,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, { int i, alloced = 0; - spin_lock(&zone->lock); + write_lock(&zone->lock); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype); if (unlikely(page == NULL)) @@ -2498,7 +2498,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * pages added to the pcp list. */ __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); - spin_unlock(&zone->lock); + write_unlock(&zone->lock); return alloced; } @@ -2687,7 +2687,7 @@ void mark_free_pages(struct zone *zone) if (zone_is_empty(zone)) return; - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) @@ -2721,7 +2721,7 @@ void mark_free_pages(struct zone *zone) } } } - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); } #endif /* CONFIG_PM */ @@ -2990,7 +2990,7 @@ struct page *rmqueue(struct zone *preferred_zone, * allocate greater than order-1 page units with __GFP_NOFAIL. */ WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); do { page = NULL; @@ -3002,7 +3002,7 @@ struct page *rmqueue(struct zone *preferred_zone, if (!page) page = __rmqueue(zone, order, migratetype); } while (page && check_new_pages(page, order)); - spin_unlock(&zone->lock); + write_unlock(&zone->lock); if (!page) goto failed; __mod_zone_freepage_state(zone, -(1 << order), @@ -5009,7 +5009,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) show_node(zone); printk(KERN_CONT "%s: ", zone->name); - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { struct free_area *area = &zone->free_area[order]; int type; @@ -5023,7 +5023,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) types[order] |= 1 << type; } } - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { printk(KERN_CONT "%lu*%lukB ", nr[order], K(1UL) << order); @@ -6247,7 +6247,7 @@ static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, zone_set_nid(zone, nid); zone->name = zone_names[idx]; zone->zone_pgdat = NODE_DATA(nid); - spin_lock_init(&zone->lock); + rwlock_init(&zone->lock); zone_seqlock_init(zone); zone_pcp_init(zone); } @@ -7239,7 +7239,7 @@ static void __setup_per_zone_wmarks(void) for_each_zone(zone) { u64 tmp; - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); tmp = (u64)pages_min * zone->managed_pages; do_div(tmp, lowmem_pages); if (is_highmem(zone)) { @@ -7277,7 +7277,7 @@ static void __setup_per_zone_wmarks(void) zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp; zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2; - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); } /* update totalreserve_pages */ @@ -8041,7 +8041,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) return; offline_mem_sections(pfn, end_pfn); zone = page_zone(pfn_to_page(pfn)); - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); pfn = start_pfn; while (pfn < end_pfn) { if (!pfn_valid(pfn)) { @@ -8073,7 +8073,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) SetPageReserved((page+i)); pfn += (1 << order); } - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); } #endif @@ -8084,14 +8084,14 @@ bool is_free_buddy_page(struct page *page) unsigned long flags; unsigned int order; - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { struct page *page_head = page - (pfn & ((1 << order) - 1)); if (PageBuddy(page_head) && page_order(page_head) >= order) break; } - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); return order < MAX_ORDER; } @@ -8110,7 +8110,7 @@ bool set_hwpoison_free_buddy_page(struct page *page) unsigned int order; bool hwpoisoned = false; - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { struct page *page_head = page - (pfn & ((1 << order) - 1)); @@ -8120,7 +8120,7 @@ bool set_hwpoison_free_buddy_page(struct page *page) break; } } - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); return hwpoisoned; } diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 43e085608846..5c99fc2a1616 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -26,7 +26,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, zone = page_zone(page); - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); /* * We assume the caller intended to SET migrate type to isolate. @@ -82,7 +82,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, __mod_zone_freepage_state(zone, -nr_pages, mt); } - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); if (!ret) drain_all_pages(zone); return ret; @@ -98,7 +98,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype) struct page *buddy; zone = page_zone(page); - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); if (!is_migrate_isolate_page(page)) goto out; @@ -137,7 +137,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype) set_pageblock_migratetype(page, migratetype); zone->nr_isolate_pageblock--; out: - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); if (isolated_page) { post_alloc_hook(page, order, __GFP_MOVABLE); __free_pages(page, order); @@ -299,10 +299,10 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, return -EBUSY; /* Check all pages are free or marked as ISOLATED */ zone = page_zone(page); - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, skip_hwpoisoned_pages); - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); trace_test_pages_isolated(start_pfn, end_pfn, pfn); diff --git a/mm/vmstat.c b/mm/vmstat.c index 8ba0870ecddd..06d79271a8ae 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1337,10 +1337,10 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, continue; if (!nolock) - spin_lock_irqsave(&zone->lock, flags); + write_lock_irqsave(&zone->lock, flags); print(m, pgdat, zone); if (!nolock) - spin_unlock_irqrestore(&zone->lock, flags); + write_unlock_irqrestore(&zone->lock, flags); } } #endif -- 2.17.1