Originally the callers of find_get_entries() and find_lock_entries()
were keeping track of the start index themselves as
they traverse the search range.
This resulted in hacky code such as in shmem_undo_range():
index = folio->index + folio_nr_pages(folio) - 1;
where the - 1 is only present to stay in the right spot after
incrementing index later. This sort of calculation was also being done
on every folio despite not even using index later within that function.
These patches change find_get_entries() and find_lock_entries() to calculate
the new index instead of leaving it to the callers so we can avoid all
these complications.
---
v3:
Fixed a typo in commit messages
Shifted calculations to after the rcu_read_unlock()
v2:
Fixed an issue when handling shadow entries
Dropped patches removing the indices array; it is required for value
entries
Vishal Moola (Oracle) (2):
filemap: find_lock_entries() now updates start offset
filemap: find_get_entries() now updates start offset
mm/filemap.c | 28 +++++++++++++++++++++++-----
mm/internal.h | 4 ++--
mm/shmem.c | 19 ++++++-------------
mm/truncate.c | 30 ++++++++++--------------------
4 files changed, 41 insertions(+), 40 deletions(-)
--
2.36.1
Initially, find_get_entries() was being passed in the start offset as a
value. That left the calculation of the offset to the callers. This led
to complexity in the callers trying to keep track of the index.
Now find_get_entries() takes in a pointer to the start offset and
updates the value to be directly after the last entry found. If no entry is
found, the offset is not changed. This gets rid of multiple hacky
calculations that kept track of the start offset.
Signed-off-by: Vishal Moola (Oracle) <[email protected]>
---
mm/filemap.c | 13 +++++++++++--
mm/internal.h | 2 +-
mm/shmem.c | 11 ++++-------
mm/truncate.c | 19 +++++++------------
4 files changed, 23 insertions(+), 22 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index f1fec7bf5b15..804d335504f0 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2053,10 +2053,10 @@ static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
*
* Return: The number of entries which were found.
*/
-unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
+unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
{
- XA_STATE(xas, &mapping->i_pages, start);
+ XA_STATE(xas, &mapping->i_pages, *start);
struct folio *folio;
rcu_read_lock();
@@ -2067,6 +2067,15 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
}
rcu_read_unlock();
+ if (folio_batch_count(fbatch)) {
+ unsigned long nr = 1;
+ int idx = folio_batch_count(fbatch) - 1;
+
+ folio = fbatch->folios[idx];
+ if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+ nr = folio_nr_pages(folio);
+ *start = indices[idx] + nr;
+ }
return folio_batch_count(fbatch);
}
diff --git a/mm/internal.h b/mm/internal.h
index 14625de6714b..e87982cf1d48 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -106,7 +106,7 @@ static inline void force_page_cache_readahead(struct address_space *mapping,
unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
-unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
+unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
void filemap_free_folio(struct address_space *mapping, struct folio *folio);
int truncate_inode_folio(struct address_space *mapping, struct folio *folio);
diff --git a/mm/shmem.c b/mm/shmem.c
index 9e17a2b0dc43..8c3c2ac15759 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -983,7 +983,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
while (index < end) {
cond_resched();
- if (!find_get_entries(mapping, index, end - 1, &fbatch,
+ if (!find_get_entries(mapping, &index, end - 1, &fbatch,
indices)) {
/* If all gone or hole-punch or unfalloc, we're done */
if (index == start || end != -1)
@@ -995,13 +995,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
for (i = 0; i < folio_batch_count(&fbatch); i++) {
folio = fbatch.folios[i];
- index = indices[i];
if (xa_is_value(folio)) {
if (unfalloc)
continue;
- if (shmem_free_swap(mapping, index, folio)) {
+ if (shmem_free_swap(mapping, indices[i], folio)) {
/* Swap was replaced by page: retry */
- index--;
+ index = indices[i];
break;
}
nr_swaps_freed++;
@@ -1014,19 +1013,17 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
if (folio_mapping(folio) != mapping) {
/* Page was replaced by swap: retry */
folio_unlock(folio);
- index--;
+ index = indices[i];
break;
}
VM_BUG_ON_FOLIO(folio_test_writeback(folio),
folio);
truncate_inode_folio(mapping, folio);
}
- index = folio->index + folio_nr_pages(folio) - 1;
folio_unlock(folio);
}
folio_batch_remove_exceptionals(&fbatch);
folio_batch_release(&fbatch);
- index++;
}
spin_lock_irq(&info->lock);
diff --git a/mm/truncate.c b/mm/truncate.c
index 9fbe282e70ba..faeeca45d4ed 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -400,7 +400,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
index = start;
while (index < end) {
cond_resched();
- if (!find_get_entries(mapping, index, end - 1, &fbatch,
+ if (!find_get_entries(mapping, &index, end - 1, &fbatch,
indices)) {
/* If all gone from start onwards, we're done */
if (index == start)
@@ -414,21 +414,18 @@ void truncate_inode_pages_range(struct address_space *mapping,
struct folio *folio = fbatch.folios[i];
/* We rely upon deletion not changing page->index */
- index = indices[i];
if (xa_is_value(folio))
continue;
folio_lock(folio);
- VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
+ VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
folio_wait_writeback(folio);
truncate_inode_folio(mapping, folio);
folio_unlock(folio);
- index = folio_index(folio) + folio_nr_pages(folio) - 1;
}
truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
folio_batch_release(&fbatch);
- index++;
}
}
EXPORT_SYMBOL(truncate_inode_pages_range);
@@ -636,16 +633,15 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
folio_batch_init(&fbatch);
index = start;
- while (find_get_entries(mapping, index, end, &fbatch, indices)) {
+ while (find_get_entries(mapping, &index, end, &fbatch, indices)) {
for (i = 0; i < folio_batch_count(&fbatch); i++) {
struct folio *folio = fbatch.folios[i];
/* We rely upon deletion not changing folio->index */
- index = indices[i];
if (xa_is_value(folio)) {
if (!invalidate_exceptional_entry2(mapping,
- index, folio))
+ indices[i], folio))
ret = -EBUSY;
continue;
}
@@ -655,13 +651,13 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
* If folio is mapped, before taking its lock,
* zap the rest of the file in one hit.
*/
- unmap_mapping_pages(mapping, index,
- (1 + end - index), false);
+ unmap_mapping_pages(mapping, indices[i],
+ (1 + end - indices[i]), false);
did_range_unmap = 1;
}
folio_lock(folio);
- VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
+ VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
if (folio->mapping != mapping) {
folio_unlock(folio);
continue;
@@ -684,7 +680,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
folio_batch_remove_exceptionals(&fbatch);
folio_batch_release(&fbatch);
cond_resched();
- index++;
}
/*
* For DAX we invalidate page tables after invalidating page cache. We
--
2.36.1
Initially, find_lock_entries() was being passed in the start offset as a
value. That left the calculation of the offset to the callers. This led
to complexity in the callers trying to keep track of the index.
Now find_lock_entries() takes in a pointer to the start offset and
updates the value to be directly after the last entry found. If no entry is
found, the offset is not changed. This gets rid of multiple hacky
calculations that kept track of the start offset.
Signed-off-by: Vishal Moola (Oracle) <[email protected]>
---
mm/filemap.c | 15 ++++++++++++---
mm/internal.h | 2 +-
mm/shmem.c | 8 ++------
mm/truncate.c | 11 +++--------
4 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index c943d1b90cc2..f1fec7bf5b15 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2090,16 +2090,16 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
*
* Return: The number of entries which were found.
*/
-unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
+unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
{
- XA_STATE(xas, &mapping->i_pages, start);
+ XA_STATE(xas, &mapping->i_pages, *start);
struct folio *folio;
rcu_read_lock();
while ((folio = find_get_entry(&xas, end, XA_PRESENT))) {
if (!xa_is_value(folio)) {
- if (folio->index < start)
+ if (folio->index < *start)
goto put;
if (folio->index + folio_nr_pages(folio) - 1 > end)
goto put;
@@ -2122,6 +2122,15 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
}
rcu_read_unlock();
+ if (folio_batch_count(fbatch)) {
+ unsigned long nr = 1;
+ int idx = folio_batch_count(fbatch) - 1;
+
+ folio = fbatch->folios[idx];
+ if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+ nr = folio_nr_pages(folio);
+ *start = indices[idx] + nr;
+ }
return folio_batch_count(fbatch);
}
diff --git a/mm/internal.h b/mm/internal.h
index 785409805ed7..14625de6714b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -104,7 +104,7 @@ static inline void force_page_cache_readahead(struct address_space *mapping,
force_page_cache_ra(&ractl, nr_to_read);
}
-unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
+unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
diff --git a/mm/shmem.c b/mm/shmem.c
index 42e5888bf84d..9e17a2b0dc43 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -932,21 +932,18 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
folio_batch_init(&fbatch);
index = start;
- while (index < end && find_lock_entries(mapping, index, end - 1,
+ while (index < end && find_lock_entries(mapping, &index, end - 1,
&fbatch, indices)) {
for (i = 0; i < folio_batch_count(&fbatch); i++) {
folio = fbatch.folios[i];
- index = indices[i];
-
if (xa_is_value(folio)) {
if (unfalloc)
continue;
nr_swaps_freed += !shmem_free_swap(mapping,
- index, folio);
+ indices[i], folio);
continue;
}
- index += folio_nr_pages(folio) - 1;
if (!unfalloc || !folio_test_uptodate(folio))
truncate_inode_folio(mapping, folio);
@@ -955,7 +952,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
folio_batch_remove_exceptionals(&fbatch);
folio_batch_release(&fbatch);
cond_resched();
- index++;
}
same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
diff --git a/mm/truncate.c b/mm/truncate.c
index 0b0708bf935f..9fbe282e70ba 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -361,9 +361,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
folio_batch_init(&fbatch);
index = start;
- while (index < end && find_lock_entries(mapping, index, end - 1,
+ while (index < end && find_lock_entries(mapping, &index, end - 1,
&fbatch, indices)) {
- index = indices[folio_batch_count(&fbatch) - 1] + 1;
truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
for (i = 0; i < folio_batch_count(&fbatch); i++)
truncate_cleanup_folio(fbatch.folios[i]);
@@ -510,20 +509,17 @@ unsigned long invalidate_mapping_pagevec(struct address_space *mapping,
int i;
folio_batch_init(&fbatch);
- while (find_lock_entries(mapping, index, end, &fbatch, indices)) {
+ while (find_lock_entries(mapping, &index, end, &fbatch, indices)) {
for (i = 0; i < folio_batch_count(&fbatch); i++) {
struct folio *folio = fbatch.folios[i];
/* We rely upon deletion not changing folio->index */
- index = indices[i];
if (xa_is_value(folio)) {
count += invalidate_exceptional_entry(mapping,
- index,
- folio);
+ indices[i], folio);
continue;
}
- index += folio_nr_pages(folio) - 1;
ret = mapping_evict_folio(mapping, folio);
folio_unlock(folio);
@@ -542,7 +538,6 @@ unsigned long invalidate_mapping_pagevec(struct address_space *mapping,
folio_batch_remove_exceptionals(&fbatch);
folio_batch_release(&fbatch);
cond_resched();
- index++;
}
return count;
}
--
2.36.1
On Mon, Oct 17, 2022 at 09:17:59AM -0700, Vishal Moola (Oracle) wrote:
> +++ b/mm/shmem.c
> @@ -932,21 +932,18 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
>
> folio_batch_init(&fbatch);
> index = start;
> - while (index < end && find_lock_entries(mapping, index, end - 1,
> + while (index < end && find_lock_entries(mapping, &index, end - 1,
Sorry for not spotting this in earlier revisions, but this is wrong.
Before, find_lock_entries() would go up to (end - 1) and then the
index++ at the end of the loop would increment index to "end", causing
the loop to terminate. Now we don't increment index any more, so the
condition is wrong.
I suggest just removing the 'index < end" half of the condition.
> @@ -361,9 +361,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
>
> folio_batch_init(&fbatch);
> index = start;
> - while (index < end && find_lock_entries(mapping, index, end - 1,
> + while (index < end && find_lock_entries(mapping, &index, end - 1,
> &fbatch, indices)) {
Similarly here.
> @@ -510,20 +509,17 @@ unsigned long invalidate_mapping_pagevec(struct address_space *mapping,
> int i;
>
> folio_batch_init(&fbatch);
> - while (find_lock_entries(mapping, index, end, &fbatch, indices)) {
> + while (find_lock_entries(mapping, &index, end, &fbatch, indices)) {
While this one had the check removed already, so is fine ;-)
On Mon, Oct 17, 2022 at 9:56 AM Matthew Wilcox <[email protected]> wrote:
>
> On Mon, Oct 17, 2022 at 09:17:59AM -0700, Vishal Moola (Oracle) wrote:
> > +++ b/mm/shmem.c
> > @@ -932,21 +932,18 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
> >
> > folio_batch_init(&fbatch);
> > index = start;
> > - while (index < end && find_lock_entries(mapping, index, end - 1,
> > + while (index < end && find_lock_entries(mapping, &index, end - 1,
>
> Sorry for not spotting this in earlier revisions, but this is wrong.
> Before, find_lock_entries() would go up to (end - 1) and then the
> index++ at the end of the loop would increment index to "end", causing
> the loop to terminate. Now we don't increment index any more, so the
> condition is wrong.
The condition is correct. Index maintains the exact same behavior.
If a find_lock_entries() finds a folio, index is set to be directly after
the last page in that folio, or simply incrementing for a value entry.
The only time index is not changed at all is when find_lock_entries()
finds no folios, which is the same as the original behavior as well.
> I suggest just removing the 'index < end" half of the condition.
I hadn't thought about it earlier but this index < end check seems
unnecessary anyways. If index > end then find_lock_entries()
shouldn't find any folios which would cause the loop to terminate.
I could send an updated version getting rid of the "index < end"
condition as well if you would like?
On Mon, Oct 17, 2022 at 12:37:48PM -0700, Vishal Moola wrote:
> On Mon, Oct 17, 2022 at 9:56 AM Matthew Wilcox <[email protected]> wrote:
> >
> > On Mon, Oct 17, 2022 at 09:17:59AM -0700, Vishal Moola (Oracle) wrote:
> > > +++ b/mm/shmem.c
> > > @@ -932,21 +932,18 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
> > >
> > > folio_batch_init(&fbatch);
> > > index = start;
> > > - while (index < end && find_lock_entries(mapping, index, end - 1,
> > > + while (index < end && find_lock_entries(mapping, &index, end - 1,
> >
> > Sorry for not spotting this in earlier revisions, but this is wrong.
> > Before, find_lock_entries() would go up to (end - 1) and then the
> > index++ at the end of the loop would increment index to "end", causing
> > the loop to terminate. Now we don't increment index any more, so the
> > condition is wrong.
>
> The condition is correct. Index maintains the exact same behavior.
> If a find_lock_entries() finds a folio, index is set to be directly after
> the last page in that folio, or simply incrementing for a value entry.
> The only time index is not changed at all is when find_lock_entries()
> finds no folios, which is the same as the original behavior as well.
Uh, right. I had the wrong idea in my head that index wouldn't increase
past end-1, but of course it can.
> > I suggest just removing the 'index < end" half of the condition.
>
> I hadn't thought about it earlier but this index < end check seems
> unnecessary anyways. If index > end then find_lock_entries()
> shouldn't find any folios which would cause the loop to terminate.
>
> I could send an updated version getting rid of the "index < end"
> condition as well if you would like?
Something to consider is that if end is 0 then end-1 is -1, which is
effectively infinity, and we'll do the wrong thing? So maybe just
leave it alone, and go with v3 as-is?
On Mon, Oct 17, 2022 at 12:43 PM Matthew Wilcox <[email protected]> wrote:
>
> On Mon, Oct 17, 2022 at 12:37:48PM -0700, Vishal Moola wrote:
> > On Mon, Oct 17, 2022 at 9:56 AM Matthew Wilcox <[email protected]> wrote:
> > >
> > > On Mon, Oct 17, 2022 at 09:17:59AM -0700, Vishal Moola (Oracle) wrote:
> > > > +++ b/mm/shmem.c
> > > > @@ -932,21 +932,18 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
> > > >
> > > > folio_batch_init(&fbatch);
> > > > index = start;
> > > > - while (index < end && find_lock_entries(mapping, index, end - 1,
> > > > + while (index < end && find_lock_entries(mapping, &index, end - 1,
> > >
> > > Sorry for not spotting this in earlier revisions, but this is wrong.
> > > Before, find_lock_entries() would go up to (end - 1) and then the
> > > index++ at the end of the loop would increment index to "end", causing
> > > the loop to terminate. Now we don't increment index any more, so the
> > > condition is wrong.
> >
> > The condition is correct. Index maintains the exact same behavior.
> > If a find_lock_entries() finds a folio, index is set to be directly after
> > the last page in that folio, or simply incrementing for a value entry.
> > The only time index is not changed at all is when find_lock_entries()
> > finds no folios, which is the same as the original behavior as well.
>
> Uh, right. I had the wrong idea in my head that index wouldn't increase
> past end-1, but of course it can.
>
> > > I suggest just removing the 'index < end" half of the condition.
> >
> > I hadn't thought about it earlier but this index < end check seems
> > unnecessary anyways. If index > end then find_lock_entries()
> > shouldn't find any folios which would cause the loop to terminate.
> >
> > I could send an updated version getting rid of the "index < end"
> > condition as well if you would like?
>
> Something to consider is that if end is 0 then end-1 is -1, which is
> effectively infinity, and we'll do the wrong thing? So maybe just
> leave it alone, and go with v3 as-is?
Yeah in that case find_lock_entries() would definitely do the
wrong thing. I was thinking the "end-1" could be replaced with
"end" as well as removing the "index < end". But that would
change the behavior of the function(s) to now deal with
end inclusive rather than exclusive which may or may not
be problematic. Considering that I don't see any compelling
reason to eliminate the "index < end" condition.
I say we go with v3 as-is if there are no problems.