Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754887Ab3FLEYA (ORCPT ); Wed, 12 Jun 2013 00:24:00 -0400 Received: from mail-pa0-f50.google.com ([209.85.220.50]:63491 "EHLO mail-pa0-f50.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753698Ab3FLEX2 (ORCPT ); Wed, 12 Jun 2013 00:23:28 -0400 From: John Stultz To: LKML Cc: Minchan Kim , Andrew Morton , Android Kernel Team , Robert Love , Mel Gorman , Hugh Dickins , Dave Hansen , Rik van Riel , Dmitry Adamushko , Dave Chinner , Neil Brown , Andrea Righi , Andrea Arcangeli , "Aneesh Kumar K.V" , Mike Hommey , Taras Glek , Dhaval Giani , Jan Kara , KOSAKI Motohiro , Michel Lespinasse , "linux-mm@kvack.org" , John Stultz Subject: [PATCH 8/8] vrange: Send SIGBUS when user try to access purged page Date: Tue, 11 Jun 2013 21:22:51 -0700 Message-Id: <1371010971-15647-9-git-send-email-john.stultz@linaro.org> X-Mailer: git-send-email 1.8.1.2 In-Reply-To: <1371010971-15647-1-git-send-email-john.stultz@linaro.org> References: <1371010971-15647-1-git-send-email-john.stultz@linaro.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6735 Lines: 216 From: Minchan Kim By vrange(2) semantic, user should see SIGBUG if he try to access purged page without vrange(...VRANGE_NOVOLATILE). This patch implements it. XXX: I reused PSE bit for quick prototype without enough considering so need time to see what's empty bit and I am surely missing many places to handle vrange pte bit. I should investigate all of pte handling places, especially pte_none case. Cc: Andrew Morton Cc: Android Kernel Team Cc: Robert Love Cc: Mel Gorman Cc: Hugh Dickins Cc: Dave Hansen Cc: Rik van Riel Cc: Dmitry Adamushko Cc: Dave Chinner Cc: Neil Brown Cc: Andrea Righi Cc: Andrea Arcangeli Cc: Aneesh Kumar K.V Cc: Mike Hommey Cc: Taras Glek Cc: Dhaval Giani Cc: Jan Kara Cc: KOSAKI Motohiro Cc: Michel Lespinasse Cc: Minchan Kim Cc: linux-mm@kvack.org Signed-off-by: Minchan Kim [jstultz: Extended to work with file pages] Signed-off-by: John Stultz --- arch/x86/include/asm/pgtable_types.h | 2 ++ include/asm-generic/pgtable.h | 11 +++++++++++ include/linux/vrange.h | 2 ++ mm/memory.c | 23 +++++++++++++++++++++-- mm/vrange.c | 35 ++++++++++++++++++++++++++++++++++- 5 files changed, 70 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index e642300..d7ea6a0 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -64,6 +64,8 @@ #define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) +#define _PAGE_VRANGE _PAGE_BIT_PSE + /* * _PAGE_NUMA indicates that this page will trigger a numa hinting * minor page fault to gather numa placement statistics (see diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index a59ff51..91e8f6f 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -479,6 +479,17 @@ static inline unsigned long my_zero_pfn(unsigned long addr) #ifdef CONFIG_MMU +static inline pte_t pte_mkvrange(pte_t pte) +{ + pte = pte_set_flags(pte, _PAGE_VRANGE); + return pte_clear_flags(pte, _PAGE_PRESENT); +} + +static inline int pte_vrange(pte_t pte) +{ + return ((pte_flags(pte) | _PAGE_PRESENT) == _PAGE_VRANGE); +} + #ifndef CONFIG_TRANSPARENT_HUGEPAGE static inline int pmd_trans_huge(pmd_t pmd) { diff --git a/include/linux/vrange.h b/include/linux/vrange.h index cbb609a..75754d1 100644 --- a/include/linux/vrange.h +++ b/include/linux/vrange.h @@ -41,6 +41,8 @@ int discard_vpage(struct page *page); bool vrange_address(struct mm_struct *mm, unsigned long start, unsigned long end); +extern bool is_purged_vrange(struct mm_struct *mm, unsigned long address); + #else static inline void vrange_init(void) {}; diff --git a/mm/memory.c b/mm/memory.c index 61a262b..cc5c70b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -832,7 +833,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, /* pte contains position in swap or file, so copy. */ if (unlikely(!pte_present(pte))) { - if (!pte_file(pte)) { + if (!pte_file(pte) && !pte_vrange(pte)) { swp_entry_t entry = pte_to_swp_entry(pte); if (swap_duplicate(entry) < 0) @@ -1172,7 +1173,7 @@ again: if (pte_file(ptent)) { if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) print_bad_pte(vma, addr, ptent, NULL); - } else { + } else if (!pte_vrange(ptent)) { swp_entry_t entry = pte_to_swp_entry(ptent); if (!non_swap_entry(entry)) @@ -3707,9 +3708,27 @@ int handle_pte_fault(struct mm_struct *mm, return do_linear_fault(mm, vma, address, pte, pmd, flags, entry); } +anon: return do_anonymous_page(mm, vma, address, pte, pmd, flags); } + + if (unlikely(pte_vrange(entry))) { + if (!is_purged_vrange(mm, address)) { + /* zap pte */ + ptl = pte_lockptr(mm, pmd); + spin_lock(ptl); + if (unlikely(!pte_same(*pte, entry))) + goto unlock; + flush_cache_page(vma, address, pte_pfn(*pte)); + ptep_clear_flush(vma, address, pte); + pte_unmap_unlock(pte, ptl); + goto anon; + } + + return VM_FAULT_SIGBUS; + } + if (pte_file(entry)) return do_nonlinear_fault(mm, vma, address, pte, pmd, flags, entry); diff --git a/mm/vrange.c b/mm/vrange.c index 1c8c447..fa965fb 100644 --- a/mm/vrange.c +++ b/mm/vrange.c @@ -504,7 +504,9 @@ int try_to_discard_one(struct vrange_root *vroot, struct page *page, present = pte_present(*pte); flush_cache_page(vma, address, page_to_pfn(page)); - pteval = ptep_clear_flush(vma, address, pte); + + ptep_clear_flush(vma, address, pte); + pteval = pte_mkvrange(*pte); update_hiwater_rss(mm); if (PageAnon(page)) @@ -521,6 +523,7 @@ int try_to_discard_one(struct vrange_root *vroot, struct page *page, BUG_ON(1); } + set_pte_at(mm, address, pte, pteval); pte_unmap_unlock(pte, ptl); mmu_notifier_invalidate_page(mm, address); ret = 1; @@ -696,3 +699,33 @@ int discard_vpage(struct page *page) return 0; } +bool is_purged_vrange(struct mm_struct *mm, unsigned long address) +{ + struct vrange_root *vroot; + struct interval_tree_node *node; + struct vrange *range; + unsigned long vstart_idx; + struct vm_area_struct *vma; + bool ret = false; + + vma = find_vma(mm, address); + if (vma->vm_file && (vma->vm_flags & VM_SHARED)) { + vroot = &vma->vm_file->f_mapping->vroot; + vstart_idx = vma->vm_pgoff + address - vma->vm_start; + } else { + vroot = &mm->vroot; + vstart_idx = address; + } + + vrange_lock(vroot); + node = interval_tree_iter_first(&vroot->v_rb, vstart_idx, + vstart_idx + PAGE_SIZE - 1); + if (node) { + range = container_of(node, struct vrange, node); + if (range->purged) + ret = true; + } + vrange_unlock(vroot); + return ret; +} + -- 1.8.1.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/