From: Vlastimil Babka <vbabka@suse.cz>
To: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org, Andrew Morton <akpm@linux-foundation.org>,
        Hugh Dickins <hughd@google.com>,
        Andrea Arcangeli <aarcange@redhat.com>,
        "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
        Rik van Riel <riel@redhat.com>, Mel Gorman <mgorman@suse.de>,
        Michal Hocko <mhocko@suse.cz>,
        Ebru Akagunduz <ebru.akagunduz@gmail.com>,
        Alex Thorlton <athorlton@sgi.com>,
        David Rientjes <rientjes@google.com>,
        Peter Zijlstra <peterz@infradead.org>, Ingo Molnar <mingo@kernel.org>,
        Vlastimil Babka <vbabka@suse.cz>
Subject: [RFC 3/6] mm, thp: try fault allocations only if we expect them to succeed
Date: Mon, 23 Feb 2015 13:58:39 +0100
Message-Id: <1424696322-21952-4-git-send-email-vbabka@suse.cz>
In-Reply-To: <1424696322-21952-1-git-send-email-vbabka@suse.cz>
References: <1424696322-21952-1-git-send-email-vbabka@suse.cz>
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 2972
Lines: 89

Since we check THP availability for khugepaged THP collapses, we can use it
also for page fault THP allocations. If khugepaged with its sync compaction
is not able to allocate a hugepage, then it's unlikely that the less involved
attempt on page fault would succeed.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 mm/huge_memory.c | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 55846b8..1eec1a6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -761,6 +761,32 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag)
 	return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT));
 }
 
+//TODO: inline? check bloat-o-meter
+static inline struct page *
+fault_alloc_hugepage(struct vm_area_struct *vma, unsigned long haddr)
+{
+	struct page *hpage;
+	gfp_t gfp;
+	int nid;
+
+	nid = numa_node_id();
+	/*
+	 * This check is not exact for interleave policy, but we can leave such
+	 * cases to later scanning.
+	 * TODO: should VM_HUGEPAGE madvised vma's proceed regardless of the check?
+	 */
+	if (!node_isset(nid, thp_avail_nodes))
+		return NULL;
+
+	gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma));
+	hpage = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+
+	if (!hpage)
+		node_clear(nid, thp_avail_nodes);
+
+	return hpage;
+}
+
 /* Caller must hold page table lock. */
 static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 		struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
@@ -781,7 +807,6 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			       unsigned long address, pmd_t *pmd,
 			       unsigned int flags)
 {
-	gfp_t gfp;
 	struct page *page;
 	unsigned long haddr = address & HPAGE_PMD_MASK;
 
@@ -816,8 +841,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		}
 		return 0;
 	}
-	gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma));
-	page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+	page = fault_alloc_hugepage(vma, haddr);
 	if (unlikely(!page)) {
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -1105,12 +1129,9 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	spin_unlock(ptl);
 alloc:
 	if (transparent_hugepage_enabled(vma) &&
-	    !transparent_hugepage_debug_cow()) {
-		gfp_t gfp;
-
-		gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma));
-		new_page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
-	} else
+	    !transparent_hugepage_debug_cow())
+		new_page = fault_alloc_hugepage(vma, haddr);
+	else
 		new_page = NULL;
 
 	if (unlikely(!new_page)) {
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/