LinuxLists.cc - [PATCH v2 0/2] Rework find_get_entries() and find_lock

2022-10-13 23:40:40

Subject: [PATCH v2 0/2] Rework find_get_entries() and find_lock_entries()

Originally the callers of find_get_entries() and find_lock_entries()
were keeping track of the start index themselves as
they traverse the search range.

This resulted in hacky code such as in shmem_undo_range():

index = folio->index + folio_nr_pages(folio) - 1;

where the - 1 is only present to stay in the right spot after
incrementing index later. This sort of calculation was also being done
on every folio despite not even using index later within that function.

These patches change find_get_entries() and find_lock_entries() to calculate
the new index instead of leaving it to the callers so we can avoid all
these complications.

---
v2:
Fixed an issue when handling shadow entries
Dropped patches removing the indices array; it is required for value
entries

Vishal Moola (Oracle) (2):
filemap: find_lock_entries() now updates start offset
filemap: find_get_entries() now updates start offset

mm/filemap.c | 32 +++++++++++++++++++++++++-------
mm/internal.h | 4 ++--
mm/shmem.c | 19 ++++++-------------
mm/truncate.c | 30 ++++++++++--------------------
4 files changed, 43 insertions(+), 42 deletions(-)

--
2.36.1

2022-10-13 23:51:10

by Vishal Moola

[permalink] [raw]

Subject: [PATCH v2 2/2] filemap: find_get_entries() now updates start offset

Initially, find_get_entries() was being passed in the start offset as a
value. That left the calculation of the offset to the callers. This led
to complexity in the callers trying to keep track of the index.

Now find_get_entires() takes in a pointer to the start offset and
updates the value to be directly after the last entry found. If no entry is
found, the offset is not changed. This gets rid of multiple hacky
calculations that kept track of the start offset.

Signed-off-by: Vishal Moola (Oracle) <[email protected]>
---
mm/filemap.c | 15 ++++++++++++---
mm/internal.h | 2 +-
mm/shmem.c | 11 ++++-------
mm/truncate.c | 19 +++++++------------
4 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index b6aaded95132..ed66fecf06d9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2053,10 +2053,10 @@ static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
*
* Return: The number of entries which were found.
*/
-unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
+unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
{
- XA_STATE(xas, &mapping->i_pages, start);
+ XA_STATE(xas, &mapping->i_pages, *start);
struct folio *folio;

rcu_read_lock();
@@ -2065,8 +2065,17 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
if (!folio_batch_add(fbatch, folio))
break;
}
- rcu_read_unlock();

+ if (folio_batch_count(fbatch)) {
+ unsigned long nr = 1;
+ int idx = folio_batch_count(fbatch) - 1;
+
+ folio = fbatch->folios[idx];
+ if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+ nr = folio_nr_pages(folio);
+ *start = indices[idx] + nr;
+ }
+ rcu_read_unlock();
return folio_batch_count(fbatch);
}

diff --git a/mm/internal.h b/mm/internal.h
index 14625de6714b..e87982cf1d48 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -106,7 +106,7 @@ static inline void force_page_cache_readahead(struct address_space *mapping,

unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
-unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
+unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
void filemap_free_folio(struct address_space *mapping, struct folio *folio);
int truncate_inode_folio(struct address_space *mapping, struct folio *folio);
diff --git a/mm/shmem.c b/mm/shmem.c
index 9e17a2b0dc43..8c3c2ac15759 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -983,7 +983,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
while (index < end) {
cond_resched();

- if (!find_get_entries(mapping, index, end - 1, &fbatch,
+ if (!find_get_entries(mapping, &index, end - 1, &fbatch,
indices)) {
/* If all gone or hole-punch or unfalloc, we're done */
if (index == start || end != -1)
@@ -995,13 +995,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
for (i = 0; i < folio_batch_count(&fbatch); i++) {
folio = fbatch.folios[i];

- index = indices[i];
if (xa_is_value(folio)) {
if (unfalloc)
continue;
- if (shmem_free_swap(mapping, index, folio)) {
+ if (shmem_free_swap(mapping, indices[i], folio)) {
/* Swap was replaced by page: retry */
- index--;
+ index = indices[i];
break;
}
nr_swaps_freed++;
@@ -1014,19 +1013,17 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
if (folio_mapping(folio) != mapping) {
/* Page was replaced by swap: retry */
folio_unlock(folio);
- index--;
+ index = indices[i];
break;
}
VM_BUG_ON_FOLIO(folio_test_writeback(folio),
folio);
truncate_inode_folio(mapping, folio);
}
- index = folio->index + folio_nr_pages(folio) - 1;
folio_unlock(folio);
}
folio_batch_remove_exceptionals(&fbatch);
folio_batch_release(&fbatch);
- index++;
}

spin_lock_irq(&info->lock);
diff --git a/mm/truncate.c b/mm/truncate.c
index 9fbe282e70ba..faeeca45d4ed 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -400,7 +400,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
index = start;
while (index < end) {
cond_resched();
- if (!find_get_entries(mapping, index, end - 1, &fbatch,
+ if (!find_get_entries(mapping, &index, end - 1, &fbatch,
indices)) {
/* If all gone from start onwards, we're done */
if (index == start)
@@ -414,21 +414,18 @@ void truncate_inode_pages_range(struct address_space *mapping,
struct folio *folio = fbatch.folios[i];

/* We rely upon deletion not changing page->index */
- index = indices[i];

if (xa_is_value(folio))
continue;

folio_lock(folio);
- VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
+ VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
folio_wait_writeback(folio);
truncate_inode_folio(mapping, folio);
folio_unlock(folio);
- index = folio_index(folio) + folio_nr_pages(folio) - 1;
}
truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
folio_batch_release(&fbatch);
- index++;
}
}
EXPORT_SYMBOL(truncate_inode_pages_range);
@@ -636,16 +633,15 @@ int invalidate_inode_pages2_range(struct address_space *mapping,

folio_batch_init(&fbatch);
index = start;
- while (find_get_entries(mapping, index, end, &fbatch, indices)) {
+ while (find_get_entries(mapping, &index, end, &fbatch, indices)) {
for (i = 0; i < folio_batch_count(&fbatch); i++) {
struct folio *folio = fbatch.folios[i];

/* We rely upon deletion not changing folio->index */
- index = indices[i];

if (xa_is_value(folio)) {
if (!invalidate_exceptional_entry2(mapping,
- index, folio))
+ indices[i], folio))
ret = -EBUSY;
continue;
}
@@ -655,13 +651,13 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
* If folio is mapped, before taking its lock,
* zap the rest of the file in one hit.
*/
- unmap_mapping_pages(mapping, index,
- (1 + end - index), false);
+ unmap_mapping_pages(mapping, indices[i],
+ (1 + end - indices[i]), false);
did_range_unmap = 1;
}

folio_lock(folio);
- VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
+ VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
if (folio->mapping != mapping) {
folio_unlock(folio);
continue;
@@ -684,7 +680,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
folio_batch_remove_exceptionals(&fbatch);
folio_batch_release(&fbatch);
cond_resched();
- index++;
}
/*
* For DAX we invalidate page tables after invalidating page cache. We
--
2.36.1

2022-10-13 23:55:29

by Vishal Moola

[permalink] [raw]

Subject: [PATCH v2 1/2] filemap: find_lock_entries() now updates start offset

Initially, find_lock_entries() was being passed in the start offset as a
value. That left the calculation of the offset to the callers. This led
to complexity in the callers trying to keep track of the index.

Now find_lock_entires() takes in a pointer to the start offset and
updates the value to be directly after the last entry found. If no entry is
found, the offset is not changed. This gets rid of multiple hacky
calculations that kept track of the start offset.

Signed-off-by: Vishal Moola (Oracle) <[email protected]>
---
mm/filemap.c | 17 +++++++++++++----
mm/internal.h | 2 +-
mm/shmem.c | 8 ++------
mm/truncate.c | 11 +++--------
4 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index c943d1b90cc2..b6aaded95132 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2090,16 +2090,16 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
*
* Return: The number of entries which were found.
*/
-unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
+unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
{
- XA_STATE(xas, &mapping->i_pages, start);
+ XA_STATE(xas, &mapping->i_pages, *start);
struct folio *folio;

rcu_read_lock();
while ((folio = find_get_entry(&xas, end, XA_PRESENT))) {
if (!xa_is_value(folio)) {
- if (folio->index < start)
+ if (folio->index < *start)
goto put;
if (folio->index + folio_nr_pages(folio) - 1 > end)
goto put;
@@ -2120,8 +2120,17 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
put:
folio_put(folio);
}
- rcu_read_unlock();

+ if (folio_batch_count(fbatch)) {
+ unsigned long nr = 1;
+ int idx = folio_batch_count(fbatch) - 1;
+
+ folio = fbatch->folios[idx];
+ if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+ nr = folio_nr_pages(folio);
+ *start = indices[idx] + nr;
+ }
+ rcu_read_unlock();
return folio_batch_count(fbatch);
}

diff --git a/mm/internal.h b/mm/internal.h
index 785409805ed7..14625de6714b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -104,7 +104,7 @@ static inline void force_page_cache_readahead(struct address_space *mapping,
force_page_cache_ra(&ractl, nr_to_read);
}

-unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
+unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
diff --git a/mm/shmem.c b/mm/shmem.c
index 42e5888bf84d..9e17a2b0dc43 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -932,21 +932,18 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,

folio_batch_init(&fbatch);
index = start;
- while (index < end && find_lock_entries(mapping, index, end - 1,
+ while (index < end && find_lock_entries(mapping, &index, end - 1,
&fbatch, indices)) {
for (i = 0; i < folio_batch_count(&fbatch); i++) {
folio = fbatch.folios[i];

- index = indices[i];
-
if (xa_is_value(folio)) {
if (unfalloc)
continue;
nr_swaps_freed += !shmem_free_swap(mapping,
- index, folio);
+ indices[i], folio);
continue;
}
- index += folio_nr_pages(folio) - 1;

if (!unfalloc || !folio_test_uptodate(folio))
truncate_inode_folio(mapping, folio);
@@ -955,7 +952,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
folio_batch_remove_exceptionals(&fbatch);
folio_batch_release(&fbatch);
cond_resched();
- index++;
}

same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
diff --git a/mm/truncate.c b/mm/truncate.c
index 0b0708bf935f..9fbe282e70ba 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -361,9 +361,8 @@ void truncate_inode_pages_range(struct address_space *mapping,

folio_batch_init(&fbatch);
index = start;
- while (index < end && find_lock_entries(mapping, index, end - 1,
+ while (index < end && find_lock_entries(mapping, &index, end - 1,
&fbatch, indices)) {
- index = indices[folio_batch_count(&fbatch) - 1] + 1;
truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
for (i = 0; i < folio_batch_count(&fbatch); i++)
truncate_cleanup_folio(fbatch.folios[i]);
@@ -510,20 +509,17 @@ unsigned long invalidate_mapping_pagevec(struct address_space *mapping,
int i;

folio_batch_init(&fbatch);
- while (find_lock_entries(mapping, index, end, &fbatch, indices)) {
+ while (find_lock_entries(mapping, &index, end, &fbatch, indices)) {
for (i = 0; i < folio_batch_count(&fbatch); i++) {
struct folio *folio = fbatch.folios[i];

/* We rely upon deletion not changing folio->index */
- index = indices[i];

if (xa_is_value(folio)) {
count += invalidate_exceptional_entry(mapping,
- index,
- folio);
+ indices[i], folio);
continue;
}
- index += folio_nr_pages(folio) - 1;

ret = mapping_evict_folio(mapping, folio);
folio_unlock(folio);
@@ -542,7 +538,6 @@ unsigned long invalidate_mapping_pagevec(struct address_space *mapping,
folio_batch_remove_exceptionals(&fbatch);
folio_batch_release(&fbatch);
cond_resched();
- index++;
}
return count;
}
--
2.36.1

2022-10-14 04:39:43

by Matthew Wilcox

[permalink] [raw]

Subject: Re: [PATCH v2 1/2] filemap: find_lock_entries() now updates start offset

On Thu, Oct 13, 2022 at 03:57:07PM -0700, Vishal Moola (Oracle) wrote:
> Initially, find_lock_entries() was being passed in the start offset as a
> value. That left the calculation of the offset to the callers. This led
> to complexity in the callers trying to keep track of the index.
>
> Now find_lock_entires() takes in a pointer to the start offset and

s/entires/entries/

> updates the value to be directly after the last entry found. If no entry is
> found, the offset is not changed. This gets rid of multiple hacky
> calculations that kept track of the start offset.

> @@ -2120,8 +2120,17 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
> put:
> folio_put(folio);
> }
> - rcu_read_unlock();
>
> + if (folio_batch_count(fbatch)) {
> + unsigned long nr = 1;
> + int idx = folio_batch_count(fbatch) - 1;
> +
> + folio = fbatch->folios[idx];
> + if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
> + nr = folio_nr_pages(folio);
> + *start = indices[idx] + nr;
> + }
> + rcu_read_unlock();
> return folio_batch_count(fbatch);

Do we need to move the rcu_read_unlock()? Pretty sure we can do all
these calculations without it.

This all looks good. It's certainly more ergonomic to use.

Reviewed-by: Matthew Wilcox (Oracle) <[email protected]>