Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932309AbcCCQxD (ORCPT ); Thu, 3 Mar 2016 11:53:03 -0500 Received: from mga04.intel.com ([192.55.52.120]:15213 "EHLO mga04.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1758266AbcCCQw7 (ORCPT ); Thu, 3 Mar 2016 11:52:59 -0500 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.22,532,1449561600"; d="scan'208";a="916227219" From: "Kirill A. Shutemov" To: Hugh Dickins , Andrea Arcangeli , Andrew Morton Cc: Dave Hansen , Vlastimil Babka , Christoph Lameter , Naoya Horiguchi , Jerome Marchand , Yang Shi , Sasha Levin , linux-kernel@vger.kernel.org, linux-mm@kvack.org, "Kirill A. Shutemov" Subject: [PATCHv3 24/29] filemap: prepare find and delete operations for huge pages Date: Thu, 3 Mar 2016 19:52:14 +0300 Message-Id: <1457023939-98083-25-git-send-email-kirill.shutemov@linux.intel.com> X-Mailer: git-send-email 2.7.0 In-Reply-To: <1457023939-98083-1-git-send-email-kirill.shutemov@linux.intel.com> References: <1457023939-98083-1-git-send-email-kirill.shutemov@linux.intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10763 Lines: 381 For now, we would have HPAGE_PMD_NR entries in radix tree for every huge page. That's suboptimal and it will be changed to use Matthew's multi-order entries later. 'add' operation is not changed, because we don't need it to implement hugetmpfs: shmem uses its own implementation. Signed-off-by: Kirill A. Shutemov --- mm/filemap.c | 187 ++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 134 insertions(+), 53 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index de867f83dc0a..eb9dbdf5573d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -110,43 +110,18 @@ * ->tasklist_lock (memory_failure, collect_procs_ao) */ -static void page_cache_tree_delete(struct address_space *mapping, - struct page *page, void *shadow) +static void __page_cache_tree_delete(struct address_space *mapping, + struct radix_tree_node *node, void **slot, unsigned long index, + void *shadow) { - struct radix_tree_node *node; - unsigned long index; - unsigned int offset; unsigned int tag; - void **slot; - - VM_BUG_ON(!PageLocked(page)); - __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot); - - if (shadow) { - mapping->nrexceptional++; - /* - * Make sure the nrexceptional update is committed before - * the nrpages update so that final truncate racing - * with reclaim does not see both counters 0 at the - * same time and miss a shadow entry. - */ - smp_wmb(); - } - mapping->nrpages--; - - if (!node) { - /* Clear direct pointer tags in root node */ - mapping->page_tree.gfp_mask &= __GFP_BITS_MASK; - radix_tree_replace_slot(slot, shadow); - return; - } + VM_BUG_ON(node == NULL); + VM_BUG_ON(*slot == NULL); /* Clear tree tags for the removed page */ - index = page->index; - offset = index & RADIX_TREE_MAP_MASK; for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { - if (test_bit(offset, node->tags[tag])) + if (test_bit(index & RADIX_TREE_MAP_MASK, node->tags[tag])) radix_tree_tag_clear(&mapping->page_tree, index, tag); } @@ -173,6 +148,54 @@ static void page_cache_tree_delete(struct address_space *mapping, } } +static void page_cache_tree_delete(struct address_space *mapping, + struct page *page, void *shadow) +{ + struct radix_tree_node *node; + unsigned long index; + void **slot; + int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page); + + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageTail(page), page); + + __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot); + + if (shadow) { + mapping->nrexceptional += nr; + /* + * Make sure the nrexceptional update is committed before + * the nrpages update so that final truncate racing + * with reclaim does not see both counters 0 at the + * same time and miss a shadow entry. + */ + smp_wmb(); + } + mapping->nrpages -= nr; + + if (!node) { + /* Clear direct pointer tags in root node */ + mapping->page_tree.gfp_mask &= __GFP_BITS_MASK; + VM_BUG_ON(nr != 1); + radix_tree_replace_slot(slot, shadow); + return; + } + + index = page->index; + VM_BUG_ON_PAGE(index & (nr - 1), page); + for (i = 0; i < nr; i++) { + /* Cross node border */ + if (i && ((index + i) & RADIX_TREE_MAP_MASK) == 0) { + __radix_tree_lookup(&mapping->page_tree, + page->index + i, &node, &slot); + } + + __page_cache_tree_delete(mapping, node, + slot + (i & RADIX_TREE_MAP_MASK), index + i, + shadow); + } +} + /* * Delete a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage @@ -181,6 +204,7 @@ static void page_cache_tree_delete(struct address_space *mapping, void __delete_from_page_cache(struct page *page, void *shadow) { struct address_space *mapping = page->mapping; + int nr = hpage_nr_pages(page); trace_mm_filemap_delete_from_page_cache(page); /* @@ -200,9 +224,10 @@ void __delete_from_page_cache(struct page *page, void *shadow) /* hugetlb pages do not participate in page cache accounting. */ if (!PageHuge(page)) - __dec_zone_page_state(page, NR_FILE_PAGES); + __mod_zone_page_state(page_zone(page), NR_FILE_PAGES, -nr); if (PageSwapBacked(page)) - __dec_zone_page_state(page, NR_SHMEM); + __mod_zone_page_state(page_zone(page), NR_SHMEM, -nr); + VM_BUG_ON_PAGE(PageTail(page), page); VM_BUG_ON_PAGE(page_mapped(page), page); /* @@ -227,9 +252,8 @@ void __delete_from_page_cache(struct page *page, void *shadow) */ void delete_from_page_cache(struct page *page) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page_mapping(page); unsigned long flags; - void (*freepage)(struct page *); BUG_ON(!PageLocked(page)); @@ -242,7 +266,13 @@ void delete_from_page_cache(struct page *page) if (freepage) freepage(page); - page_cache_release(page); + + if (PageTransHuge(page) && !PageHuge(page)) { + atomic_sub(HPAGE_PMD_NR, &page->_count); + VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page); + } else { + page_cache_release(page); + } } EXPORT_SYMBOL(delete_from_page_cache); @@ -1035,7 +1065,7 @@ EXPORT_SYMBOL(page_cache_prev_hole); struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) { void **pagep; - struct page *page; + struct page *head, *page; rcu_read_lock(); repeat: @@ -1055,9 +1085,17 @@ repeat: */ goto out; } - if (!page_cache_get_speculative(page)) + + head = compound_head(page); + if (!page_cache_get_speculative(head)) goto repeat; + /* The page was split under us? */ + if (compound_head(page) != head) { + page_cache_release(page); + goto repeat; + } + /* * Has the page moved? * This is part of the lockless pagecache protocol. See @@ -1100,12 +1138,12 @@ repeat: if (page && !radix_tree_exception(page)) { lock_page(page); /* Has the page been truncated? */ - if (unlikely(page->mapping != mapping)) { + if (unlikely(page_mapping(page) != mapping)) { unlock_page(page); page_cache_release(page); goto repeat; } - VM_BUG_ON_PAGE(page->index != offset, page); + VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page); } return page; } @@ -1238,7 +1276,7 @@ unsigned find_get_entries(struct address_space *mapping, rcu_read_lock(); restart: radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { - struct page *page; + struct page *head, *page; repeat: page = radix_tree_deref_slot(slot); if (unlikely(!page)) @@ -1253,8 +1291,16 @@ repeat: */ goto export; } - if (!page_cache_get_speculative(page)) + + head = compound_head(page); + if (!page_cache_get_speculative(head)) + goto repeat; + + /* The page was split under us? */ + if (compound_head(page) != head) { + page_cache_release(page); goto repeat; + } /* Has the page moved? */ if (unlikely(page != *slot)) { @@ -1300,7 +1346,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, rcu_read_lock(); restart: radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { - struct page *page; + struct page *head, *page; repeat: page = radix_tree_deref_slot(slot); if (unlikely(!page)) @@ -1324,9 +1370,16 @@ repeat: continue; } - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) goto repeat; + /* The page was split under us? */ + if (compound_head(page) != head) { + page_cache_release(page); + goto repeat; + } + /* Has the page moved? */ if (unlikely(page != *slot)) { page_cache_release(page); @@ -1367,7 +1420,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, rcu_read_lock(); restart: radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) { - struct page *page; + struct page *head, *page; repeat: page = radix_tree_deref_slot(slot); /* The hole, there no reason to continue */ @@ -1391,8 +1444,14 @@ repeat: break; } - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) goto repeat; + /* The page was split under us? */ + if (compound_head(page) != head) { + page_cache_release(page); + goto repeat; + } /* Has the page moved? */ if (unlikely(page != *slot)) { @@ -1405,7 +1464,7 @@ repeat: * otherwise we can get both false positives and false * negatives, which is just confusing to the caller. */ - if (page->mapping == NULL || page->index != iter.index) { + if (page->mapping == NULL || page_to_pgoff(page) != iter.index) { page_cache_release(page); break; } @@ -1444,7 +1503,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, restart: radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, *index, tag) { - struct page *page; + struct page *head, *page; repeat: page = radix_tree_deref_slot(slot); if (unlikely(!page)) @@ -1473,8 +1532,15 @@ repeat: continue; } - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) + goto repeat; + + /* The page was split under us? */ + if (compound_head(page) != head) { + page_cache_release(page); goto repeat; + } /* Has the page moved? */ if (unlikely(page != *slot)) { @@ -1523,7 +1589,7 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start, restart: radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, start, tag) { - struct page *page; + struct page *head, *page; repeat: page = radix_tree_deref_slot(slot); if (unlikely(!page)) @@ -1545,9 +1611,17 @@ repeat: */ goto export; } - if (!page_cache_get_speculative(page)) + + head = compound_head(page); + if (!page_cache_get_speculative(head)) goto repeat; + /* The page was split under us? */ + if (compound_head(page) != head) { + page_cache_release(page); + goto repeat; + } + /* Has the page moved? */ if (unlikely(page != *slot)) { page_cache_release(page); @@ -2140,7 +2214,7 @@ void filemap_map_pages(struct fault_env *fe, struct address_space *mapping = file->f_mapping; pgoff_t last_pgoff = start_pgoff; loff_t size; - struct page *page; + struct page *head, *page; rcu_read_lock(); radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, @@ -2158,8 +2232,15 @@ repeat: goto next; } - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) + goto repeat; + + /* The page was split under us? */ + if (compound_head(page) != head) { + page_cache_release(page); goto repeat; + } /* Has the page moved? */ if (unlikely(page != *slot)) { -- 2.7.0