In preparation for the LSF/MM/BPF 2024 discussion [1], the patches below add
support for large folios in shmem for the write and fallocate paths.
[1] https://lore.kernel.org/all/4ktpayu66noklllpdpspa3vm5gbmb5boxskcj2q6qn7md3pwwt@kvlu64pqwjzl/
test
This version includes per-block uptodate tracking required for lseek when
enabling support for large folios. Initially, this feature was introduced to
address lseek fstests (specifically generic/285 and generic/436) for huge pages.
However, it was suggested that, for THP, the test should be adapted to PAGE_SIZE
and PMD_SIZE. Nevertheless, with arbitrary folio orders we require the lowest
granularity possible. This topic will be part of the discussion in tomorrow's
session.
Fstests expunges results can be found in kdevops' tree:
https://github.com/linux-kdevops/kdevops/tree/main/workflows/fstests/expunges/6.9.0-shmem-large-folios-with-block-tracking/tmpfs
https://github.com/linux-kdevops/kdevops/tree/main/workflows/fstests/expunges/6.8.0-shmem-large-folios-with-block-tracking/tmpfs
Daniel
Daniel Gomez (11):
shmem: add per-block uptodate tracking for large folios
shmem: move folio zero operation to write_begin()
shmem: exit shmem_get_folio_gfp() if block is uptodate
shmem: clear_highpage() if block is not uptodate
shmem: set folio uptodate when reclaim
shmem: check if a block is uptodate before splice into pipe
shmem: clear uptodate blocks after PUNCH_HOLE
shmem: enable per-block uptodate
shmem: add order arg to shmem_alloc_folio()
shmem: add file length arg in shmem_get_folio() path
shmem: add large folio support to the write and fallocate paths
Pankaj Raghav (1):
splice: don't check for uptodate if partially uptodate is impl
fs/splice.c | 17 +-
fs/xfs/scrub/xfile.c | 6 +-
fs/xfs/xfs_buf_mem.c | 3 +-
include/linux/shmem_fs.h | 2 +-
mm/khugepaged.c | 3 +-
mm/shmem.c | 441 ++++++++++++++++++++++++++++++++++-----
mm/userfaultfd.c | 2 +-
7 files changed, 417 insertions(+), 57 deletions(-)
--
2.43.0
Simplify zero out operation by moving it from write_end() to the
write_begin(). If a large folio does not have any block uptodate when we
first get it, zero it out entirely.
Signed-off-by: Daniel Gomez <[email protected]>
---
mm/shmem.c | 27 ++++++++++++++++++++-------
1 file changed, 20 insertions(+), 7 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 4818f9fbd328..86ad539b6a0f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -149,6 +149,14 @@ static inline bool sfs_is_fully_uptodate(struct folio *folio)
return bitmap_full(sfs->state, i_blocks_per_folio(inode, folio));
}
+static inline bool sfs_is_any_uptodate(struct folio *folio)
+{
+ struct inode *inode = folio->mapping->host;
+ struct shmem_folio_state *sfs = folio->private;
+
+ return !bitmap_empty(sfs->state, i_blocks_per_folio(inode, folio));
+}
+
static inline bool sfs_is_block_uptodate(struct shmem_folio_state *sfs,
unsigned int block)
{
@@ -239,6 +247,15 @@ static void sfs_free(struct folio *folio, bool force)
kfree(folio_detach_private(folio));
}
+static inline bool shmem_is_any_uptodate(struct folio *folio)
+{
+ struct shmem_folio_state *sfs = folio->private;
+
+ if (folio_test_large(folio) && sfs)
+ return sfs_is_any_uptodate(folio);
+ return folio_test_uptodate(folio);
+}
+
static void shmem_set_range_uptodate(struct folio *folio, size_t off,
size_t len)
{
@@ -2872,6 +2889,9 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
if (ret)
return ret;
+ if (!shmem_is_any_uptodate(folio))
+ folio_zero_range(folio, 0, folio_size(folio));
+
*pagep = folio_file_page(folio, index);
if (PageHWPoison(*pagep)) {
folio_unlock(folio);
@@ -2894,13 +2914,6 @@ shmem_write_end(struct file *file, struct address_space *mapping,
if (pos + copied > inode->i_size)
i_size_write(inode, pos + copied);
- if (!folio_test_uptodate(folio)) {
- if (copied < folio_size(folio)) {
- size_t from = offset_in_folio(folio, pos);
- folio_zero_segments(folio, 0, from,
- from + copied, folio_size(folio));
- }
- }
shmem_set_range_uptodate(folio, 0, folio_size(folio));
folio_mark_dirty(folio);
folio_unlock(folio);
--
2.43.0