Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932418AbbBLQUP (ORCPT ); Thu, 12 Feb 2015 11:20:15 -0500 Received: from mga09.intel.com ([134.134.136.24]:13488 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932349AbbBLQUM (ORCPT ); Thu, 12 Feb 2015 11:20:12 -0500 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.09,565,1418112000"; d="scan'208";a="651295502" From: "Kirill A. Shutemov" To: Andrew Morton , Andrea Arcangeli , Hugh Dickins Cc: Dave Hansen , Mel Gorman , Rik van Riel , Vlastimil Babka , Christoph Lameter , Naoya Horiguchi , Steve Capper , "Aneesh Kumar K.V" , Johannes Weiner , Michal Hocko , Jerome Marchand , linux-kernel@vger.kernel.org, linux-mm@kvack.org, "Kirill A. Shutemov" Subject: [PATCHv3 15/24] mm, thp: remove infrastructure for handling splitting PMDs Date: Thu, 12 Feb 2015 18:18:29 +0200 Message-Id: <1423757918-197669-16-git-send-email-kirill.shutemov@linux.intel.com> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1423757918-197669-1-git-send-email-kirill.shutemov@linux.intel.com> References: <1423757918-197669-1-git-send-email-kirill.shutemov@linux.intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 14729 Lines: 440 With new refcounting we don't need to mark PMDs splitting. Let's drop code to handle this. Arch-specific code will removed separately. Signed-off-by: Kirill A. Shutemov --- fs/proc/task_mmu.c | 8 +++--- include/asm-generic/pgtable.h | 5 ---- include/linux/huge_mm.h | 16 ------------ mm/gup.c | 7 ------ mm/huge_memory.c | 57 +++++++++---------------------------------- mm/memcontrol.c | 14 ++--------- mm/memory.c | 18 ++------------ mm/mincore.c | 2 +- mm/pgtable-generic.c | 14 ----------- mm/rmap.c | 4 +-- 10 files changed, 21 insertions(+), 124 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8a0a78174cc6..090008d88e0f 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -534,7 +534,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; spinlock_t *ptl; - if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl)) { smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); return 0; @@ -799,7 +799,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, spinlock_t *ptl; struct page *page; - if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl)) { if (cp->type == CLEAR_REFS_SOFT_DIRTY) { clear_soft_dirty_pmd(vma, addr, pmd); goto out; @@ -1112,7 +1112,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte, *orig_pte; int err = 0; - if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl)) { int pmd_flags2; if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) @@ -1418,7 +1418,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, pte_t *orig_pte; pte_t *pte; - if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl)) { pte_t huge_pte = *(pte_t *)pmd; struct page *page; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 4d46085c1b90..34e99a82a336 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -178,11 +178,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif -#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH -extern void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); -#endif - #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable); diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 3c5fe722cc14..7a0c477a2b38 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -49,15 +49,9 @@ enum transparent_hugepage_flag { #endif }; -enum page_check_address_pmd_flag { - PAGE_CHECK_ADDRESS_PMD_FLAG, - PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG, - PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG, -}; extern pmd_t *page_check_address_pmd(struct page *page, struct mm_struct *mm, unsigned long address, - enum page_check_address_pmd_flag flag, spinlock_t **ptl); extern int pmd_freeable(pmd_t pmd); @@ -110,14 +104,6 @@ extern void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (unlikely(pmd_trans_huge(*____pmd))) \ __split_huge_pmd(__vma, __pmd, __address); \ } while (0) -#define wait_split_huge_page(__anon_vma, __pmd) \ - do { \ - pmd_t *____pmd = (__pmd); \ - anon_vma_lock_write(__anon_vma); \ - anon_vma_unlock_write(__anon_vma); \ - BUG_ON(pmd_trans_splitting(*____pmd) || \ - pmd_trans_huge(*____pmd)); \ - } while (0) #if HPAGE_PMD_ORDER >= MAX_ORDER #error "hugepages can't be allocated by the buddy allocator" #endif @@ -184,8 +170,6 @@ static inline int split_huge_page(struct page *page) { return 0; } -#define wait_split_huge_page(__anon_vma, __pmd) \ - do { } while (0) #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) static inline int hugepage_madvise(struct vm_area_struct *vma, diff --git a/mm/gup.c b/mm/gup.c index 0c8d076d7744..22585ef667d9 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -208,13 +208,6 @@ struct page *follow_page_mask(struct vm_area_struct *vma, spin_unlock(ptl); return follow_page_pte(vma, address, pmd, flags); } - - if (unlikely(pmd_trans_splitting(*pmd))) { - spin_unlock(ptl); - wait_split_huge_page(vma->anon_vma, pmd); - return follow_page_pte(vma, address, pmd, flags); - } - if (flags & FOLL_SPLIT) { int ret; page = pmd_page(*pmd); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5d1d80d7d3b8..fa79d3b89825 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -880,15 +880,6 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, goto out_unlock; } - if (unlikely(pmd_trans_splitting(pmd))) { - /* split huge page running from under us */ - spin_unlock(src_ptl); - spin_unlock(dst_ptl); - pte_free(dst_mm, pgtable); - - wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */ - goto out; - } src_page = pmd_page(pmd); VM_BUG_ON_PAGE(!PageHead(src_page), src_page); get_page(src_page); @@ -1392,7 +1383,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, spinlock_t *ptl; int ret = 0; - if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (__pmd_trans_huge_lock(pmd, vma, &ptl)) { struct page *page; pgtable_t pgtable; pmd_t orig_pmd; @@ -1432,7 +1423,6 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, pmd_t *old_pmd, pmd_t *new_pmd) { spinlock_t *old_ptl, *new_ptl; - int ret = 0; pmd_t pmd; struct mm_struct *mm = vma->vm_mm; @@ -1441,7 +1431,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, (new_addr & ~HPAGE_PMD_MASK) || old_end - old_addr < HPAGE_PMD_SIZE || (new_vma->vm_flags & VM_NOHUGEPAGE)) - goto out; + return 0; /* * The destination pmd shouldn't be established, free_pgtables() @@ -1449,15 +1439,14 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, */ if (WARN_ON(!pmd_none(*new_pmd))) { VM_BUG_ON(pmd_trans_huge(*new_pmd)); - goto out; + return 0; } /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_sem prevents deadlock. */ - ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl); - if (ret == 1) { + if (__pmd_trans_huge_lock(old_pmd, vma, &old_ptl)) { new_ptl = pmd_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); @@ -1473,9 +1462,9 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); + return 1; } -out: - return ret; + return 0; } /* @@ -1491,7 +1480,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, spinlock_t *ptl; int ret = 0; - if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (__pmd_trans_huge_lock(pmd, vma, &ptl)) { pmd_t entry; /* @@ -1529,17 +1518,8 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, spinlock_t **ptl) { *ptl = pmd_lock(vma->vm_mm, pmd); - if (likely(pmd_trans_huge(*pmd))) { - if (unlikely(pmd_trans_splitting(*pmd))) { - spin_unlock(*ptl); - wait_split_huge_page(vma->anon_vma, pmd); - return -1; - } else { - /* Thp mapped by 'pmd' is stable, so we can - * handle it as it is. */ - return 1; - } - } + if (likely(pmd_trans_huge(*pmd))) + return 1; spin_unlock(*ptl); return 0; } @@ -1555,7 +1535,6 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, pmd_t *page_check_address_pmd(struct page *page, struct mm_struct *mm, unsigned long address, - enum page_check_address_pmd_flag flag, spinlock_t **ptl) { pgd_t *pgd; @@ -1578,21 +1557,8 @@ pmd_t *page_check_address_pmd(struct page *page, goto unlock; if (pmd_page(*pmd) != page) goto unlock; - /* - * split_vma() may create temporary aliased mappings. There is - * no risk as long as all huge pmd are found and have their - * splitting bit set before __split_huge_page_refcount - * runs. Finding the same huge pmd more than once during the - * same rmap walk is not a problem. - */ - if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG && - pmd_trans_splitting(*pmd)) - goto unlock; - if (pmd_trans_huge(*pmd)) { - VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG && - !pmd_trans_splitting(*pmd)); + if (pmd_trans_huge(*pmd)) return pmd; - } unlock: spin_unlock(*ptl); return NULL; @@ -1921,8 +1887,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) mmun_end = haddr + HPAGE_PMD_SIZE; mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end); - pmd = page_check_address_pmd(page, vma->vm_mm, addr, - PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); + pmd = page_check_address_pmd(page, vma->vm_mm, addr, &ptl); if (pmd) { __split_huge_pmd_locked(vma, pmd, addr); spin_unlock(ptl); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d18d3a6e7337..1c6786c457bf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4897,7 +4897,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, pte_t *pte; spinlock_t *ptl; - if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl)) { if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE) mc.precharge += HPAGE_PMD_NR; spin_unlock(ptl); @@ -5065,17 +5065,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, union mc_target target; struct page *page; - /* - * We don't take compound_lock() here but no race with splitting thp - * happens because: - * - if pmd_trans_huge_lock() returns 1, the relevant thp is not - * under splitting, which means there's no concurrent thp split, - * - if another thread runs into split_huge_page() just after we - * entered this if-block, the thread must wait for page table lock - * to be unlocked in __split_huge_page_splitting(), where the main - * part of thp split is not executed yet. - */ - if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl)) { if (mc.precharge < HPAGE_PMD_NR) { spin_unlock(ptl); return 0; diff --git a/mm/memory.c b/mm/memory.c index 6f030a3a7636..c1878afb6466 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -565,7 +565,6 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, { spinlock_t *ptl; pgtable_t new = pte_alloc_one(mm, address); - int wait_split_huge_page; if (!new) return -ENOMEM; @@ -585,18 +584,14 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ ptl = pmd_lock(mm, pmd); - wait_split_huge_page = 0; if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ atomic_long_inc(&mm->nr_ptes); pmd_populate(mm, pmd, new); new = NULL; - } else if (unlikely(pmd_trans_splitting(*pmd))) - wait_split_huge_page = 1; + } spin_unlock(ptl); if (new) pte_free(mm, new); - if (wait_split_huge_page) - wait_split_huge_page(vma->anon_vma, pmd); return 0; } @@ -612,8 +607,7 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ pmd_populate_kernel(&init_mm, pmd, new); new = NULL; - } else - VM_BUG_ON(pmd_trans_splitting(*pmd)); + } spin_unlock(&init_mm.page_table_lock); if (new) pte_free_kernel(&init_mm, new); @@ -3222,14 +3216,6 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (pmd_trans_huge(orig_pmd)) { unsigned int dirty = flags & FAULT_FLAG_WRITE; - /* - * If the pmd is splitting, return and retry the - * the fault. Alternative: wait until the split - * is done, and goto retry. - */ - if (pmd_trans_splitting(orig_pmd)) - return 0; - if (pmd_protnone(orig_pmd)) return do_huge_pmd_numa_page(mm, vma, address, orig_pmd, pmd); diff --git a/mm/mincore.c b/mm/mincore.c index be25efde64a4..feb867f5fdf4 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -117,7 +117,7 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned char *vec = walk->private; int nr = (end - addr) >> PAGE_SHIFT; - if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl)) { memset(vec, 1, nr); spin_unlock(ptl); goto out; diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index c25f94b33811..2fe699cedd4d 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -133,20 +133,6 @@ pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif -#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) -{ - pmd_t pmd = pmd_mksplitting(*pmdp); - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - set_pmd_at(vma->vm_mm, address, pmdp, pmd); - /* tlb flush only to serialize against gup-fast */ - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); -} -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -#endif - #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT #ifdef CONFIG_TRANSPARENT_HUGEPAGE void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, diff --git a/mm/rmap.c b/mm/rmap.c index db8b99e48966..eb2f4a0d3961 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -730,8 +730,7 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, * rmap might return false positives; we must filter * these out using page_check_address_pmd(). */ - pmd = page_check_address_pmd(page, mm, address, - PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); + pmd = page_check_address_pmd(page, mm, address, &ptl); if (!pmd) return SWAP_AGAIN; @@ -741,7 +740,6 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, return SWAP_FAIL; /* To break the loop */ } - /* go ahead even if the pmd is pmd_trans_splitting() */ if (pmdp_clear_flush_young_notify(vma, address, pmd)) referenced++; -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/