Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751871AbdHaVRv (ORCPT ); Thu, 31 Aug 2017 17:17:51 -0400 Received: from mx1.redhat.com ([209.132.183.28]:44842 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751774AbdHaVRs (ORCPT ); Thu, 31 Aug 2017 17:17:48 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 30730806AF Authentication-Results: ext-mx02.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx02.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=jglisse@redhat.com From: jglisse@redhat.com To: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org, =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= , Dan Williams , Ross Zwisler , Linus Torvalds , Bernhard Held , Adam Borowski , Andrea Arcangeli , =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= , Wanpeng Li , Paolo Bonzini , Takashi Iwai , Nadav Amit , Mike Galbraith , "Kirill A . Shutemov" , axie , Andrew Morton Subject: [PATCH 01/13] dax: update to new mmu_notifier semantic Date: Thu, 31 Aug 2017 17:17:26 -0400 Message-Id: <20170831211738.17922-2-jglisse@redhat.com> In-Reply-To: <20170831211738.17922-1-jglisse@redhat.com> References: <20170831211738.17922-1-jglisse@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.26]); Thu, 31 Aug 2017 21:17:48 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6006 Lines: 182 From: Jérôme Glisse Replacing all mmu_notifier_invalida_page() by mmu_notifier_invalidat_range and making sure it is bracketed by call to mmu_notifier_invalidate_range_start/ end. Note that because we can not presume the pmd value or pte value we have to assume the worse and unconditionaly report an invalidation as happening. Signed-off-by: Jérôme Glisse Cc: Dan Williams Cc: Ross Zwisler Cc: Linus Torvalds Cc: Bernhard Held Cc: Adam Borowski Cc: Andrea Arcangeli Cc: Radim Krčmář Cc: Wanpeng Li Cc: Paolo Bonzini Cc: Takashi Iwai Cc: Nadav Amit Cc: Mike Galbraith Cc: Kirill A. Shutemov Cc: axie Cc: Andrew Morton --- fs/dax.c | 19 +++++++++++-------- include/linux/mm.h | 1 + mm/memory.c | 26 +++++++++++++++++++++----- 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 865d42c63e23..ab925dc6647a 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -646,11 +646,10 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, pte_t pte, *ptep = NULL; pmd_t *pmdp = NULL; spinlock_t *ptl; - bool changed; i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) { - unsigned long address; + unsigned long address, start, end; cond_resched(); @@ -658,8 +657,13 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, continue; address = pgoff_address(index, vma); - changed = false; - if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl)) + + /* + * Note because we provide start/end to follow_pte_pmd it will + * call mmu_notifier_invalidate_range_start() on our behalf + * before taking any lock. + */ + if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl)) continue; if (pmdp) { @@ -676,7 +680,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, pmd = pmd_wrprotect(pmd); pmd = pmd_mkclean(pmd); set_pmd_at(vma->vm_mm, address, pmdp, pmd); - changed = true; + mmu_notifier_invalidate_range(vma->vm_mm, start, end); unlock_pmd: spin_unlock(ptl); #endif @@ -691,13 +695,12 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, pte = pte_wrprotect(pte); pte = pte_mkclean(pte); set_pte_at(vma->vm_mm, address, ptep, pte); - changed = true; + mmu_notifier_invalidate_range(vma->vm_mm, start, end); unlock_pte: pte_unmap_unlock(ptep, ptl); } - if (changed) - mmu_notifier_invalidate_page(vma->vm_mm, address); + mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); } i_mmap_unlock_read(mapping); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 46b9ac5e8569..c1f6c95f3496 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1260,6 +1260,7 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows); int follow_pte_pmd(struct mm_struct *mm, unsigned long address, + unsigned long *start, unsigned long *end, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); int follow_pfn(struct vm_area_struct *vma, unsigned long address, unsigned long *pfn); diff --git a/mm/memory.c b/mm/memory.c index fe2fba27ded2..56e48e4593cb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4008,7 +4008,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) #endif /* __PAGETABLE_PMD_FOLDED */ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, - pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) + unsigned long *start, unsigned long *end, + pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) { pgd_t *pgd; p4d_t *p4d; @@ -4035,17 +4036,29 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, if (!pmdpp) goto out; + if (start && end) { + *start = address & PMD_MASK; + *end = *start + PMD_SIZE; + mmu_notifier_invalidate_range_start(mm, *start, *end); + } *ptlp = pmd_lock(mm, pmd); if (pmd_huge(*pmd)) { *pmdpp = pmd; return 0; } spin_unlock(*ptlp); + if (start && end) + mmu_notifier_invalidate_range_end(mm, *start, *end); } if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) goto out; + if (start && end) { + *start = address & PAGE_MASK; + *end = *start + PAGE_SIZE; + mmu_notifier_invalidate_range_start(mm, *start, *end); + } ptep = pte_offset_map_lock(mm, pmd, address, ptlp); if (!pte_present(*ptep)) goto unlock; @@ -4053,6 +4066,8 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, return 0; unlock: pte_unmap_unlock(ptep, *ptlp); + if (start && end) + mmu_notifier_invalidate_range_end(mm, *start, *end); out: return -EINVAL; } @@ -4064,20 +4079,21 @@ static inline int follow_pte(struct mm_struct *mm, unsigned long address, /* (void) is needed to make gcc happy */ (void) __cond_lock(*ptlp, - !(res = __follow_pte_pmd(mm, address, ptepp, NULL, - ptlp))); + !(res = __follow_pte_pmd(mm, address, NULL, NULL, + ptepp, NULL, ptlp))); return res; } int follow_pte_pmd(struct mm_struct *mm, unsigned long address, + unsigned long *start, unsigned long *end, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) { int res; /* (void) is needed to make gcc happy */ (void) __cond_lock(*ptlp, - !(res = __follow_pte_pmd(mm, address, ptepp, pmdpp, - ptlp))); + !(res = __follow_pte_pmd(mm, address, start, end, + ptepp, pmdpp, ptlp))); return res; } EXPORT_SYMBOL(follow_pte_pmd); -- 2.13.5