2023-01-09 17:28:34

by Fabio M. De Francesco

[permalink] [raw]
Subject: [PATCH v2 0/4] fs/sysv: Replace kmap() with kmap_local_page()

kmap() is deprecated in favor of kmap_local_page().

There are two main problems with kmap(): (1) It comes with an overhead as
the mapping space is restricted and protected by a global lock for
synchronization and (2) it also requires global TLB invalidation when the
kmap’s pool wraps and it might block when the mapping space is fully
utilized until a slot becomes available.

With kmap_local_page() the mappings are per thread, CPU local, can take
page faults, and can be called from any context (including interrupts).
It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore,
the tasks can be preempted and, when they are scheduled to run again, the
kernel virtual addresses are restored and still valid.

kmap_local_page() in fs/sysv does not violate any of the strict rules of
its use, therefore it should be preferred.

Therefore, replace kmap() with kmap_local_page() in fs/sysv. kunmap_local()
requires the mapping address, so return that address from dir_get_page()
to be used in dir_put_page().

I had submitted a patch with the same purpose but it has been replaced
by this series.[1] This is based on a long series of very appreciated
comments and suggestions kindly provided by Al Viro (again thanks!).[2][3][4]

Changes from v1:[5]
1/4 - No changes.
2/4 - Delete an unnecessary assignment (thanks to Dan Carpenter).
3/4 - No changes.
4/4 - No changes.

[1] https://lore.kernel.org/lkml/[email protected]/
[2] https://lore.kernel.org/lkml/Y4E++JERgUMoqfjG@ZenIV/#t
[3] https://lore.kernel.org/lkml/Y4FG0O7VWTTng5yh@ZenIV/#t
[4] https://lore.kernel.org/lkml/Y4ONIFJatIGsVNpf@ZenIV/#t
[5] https://lore.kernel.org/lkml/[email protected]/

Cc: Al Viro <[email protected]>
Cc: Ira Weiny <[email protected]>
Signed-off-by: Fabio M. De Francesco <[email protected]>

Fabio M. De Francesco (4):
fs/sysv: Use the offset_in_page() helper
fs/sysv: Change the signature of dir_get_page()
fs/sysv: Use dir_put_page() in sysv_rename()
fs/sysv: Replace kmap() with kmap_local_page()

fs/sysv/dir.c | 118 +++++++++++++++++++++++++++---------------------
fs/sysv/namei.c | 9 ++--
fs/sysv/sysv.h | 1 +
3 files changed, 71 insertions(+), 57 deletions(-)

--
2.39.0


2023-01-09 17:29:11

by Fabio M. De Francesco

[permalink] [raw]
Subject: [PATCH v2 4/4] fs/sysv: Replace kmap() with kmap_local_page()

kmap() is being deprecated in favor of kmap_local_page().

There are two main problems with kmap(): (1) It comes with an overhead as
the mapping space is restricted and protected by a global lock for
synchronization and (2) it also requires global TLB invalidation when the
kmap’s pool wraps and it might block when the mapping space is fully
utilized until a slot becomes available.

With kmap_local_page() the mappings are per thread, CPU local, can take
page faults, and can be called from any context (including interrupts).
It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore,
the tasks can be preempted and, when they are scheduled to run again, the
kernel virtual addresses are restored and still valid.

Since kmap_local_page() would not break the strict rules of local mappings
(i.e., the thread locality and the stack based nesting), this function can
be easily and safely replace the deprecated API.

Therefore, replace kmap() with kmap_local_page() in fs/sysv. kunmap_local()
requires the mapping address, so return that address from dir_get_page()
to be used in dir_put_page().

Suggested-by: Al Viro <[email protected]>
Suggested-by: Ira Weiny <[email protected]>
Signed-off-by: Fabio M. De Francesco <[email protected]>
---

No changes from v1.

fs/sysv/dir.c | 64 +++++++++++++++++++++++++++++++++----------------
fs/sysv/namei.c | 4 ++--
fs/sysv/sysv.h | 2 +-
3 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index ee38dc5a3010..db5c483a9b68 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -28,9 +28,9 @@ const struct file_operations sysv_dir_operations = {
.fsync = generic_file_fsync,
};

-inline void dir_put_page(struct page *page)
+inline void dir_put_page(struct page *page, void *page_addr)
{
- kunmap(page);
+ kunmap_local(page_addr);
put_page(page);
}

@@ -52,15 +52,21 @@ static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
return err;
}

+/*
+ * Calls to dir_get_page()/dir_put_page() must be nested according to the
+ * rules documented in mm/highmem.rst.
+ *
+ * NOTE: sysv_find_entry() and sysv_dotdot() act as calls to dir_get_page()
+ * and must be treated accordingly for nesting purposes.
+ */
static void *dir_get_page(struct inode *dir, unsigned long n, struct page **p)
{
struct address_space *mapping = dir->i_mapping;
struct page *page = read_mapping_page(mapping, n, NULL);
if (IS_ERR(page))
return ERR_CAST(page);
- kmap(page);
*p = page;
- return page_address(page);
+ return kmap_local_page(page);
}

static int sysv_readdir(struct file *file, struct dir_context *ctx)
@@ -98,11 +104,11 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx)
if (!dir_emit(ctx, name, strnlen(name,SYSV_NAMELEN),
fs16_to_cpu(SYSV_SB(sb), de->inode),
DT_UNKNOWN)) {
- dir_put_page(page);
+ dir_put_page(page, kaddr);
return 0;
}
}
- dir_put_page(page);
+ dir_put_page(page, kaddr);
}
return 0;
}
@@ -125,6 +131,11 @@ static inline int namecompare(int len, int maxlen,
* returns the cache buffer in which the entry was found, and the entry
* itself (as a parameter - res_dir). It does NOT read the inode of the
* entry - you'll have to do that yourself if you want to.
+ *
+ * On Success dir_put_page() should be called on *res_page.
+ *
+ * sysv_find_entry() acts as a call to dir_get_page() and must be treated
+ * accordingly for nesting purposes.
*/
struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_page)
{
@@ -156,7 +167,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
name, de->name))
goto found;
}
- dir_put_page(page);
+ dir_put_page(page, kaddr);
}

if (++n >= npages)
@@ -199,7 +210,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
goto out_page;
de++;
}
- dir_put_page(page);
+ dir_put_page(page, kaddr);
}
BUG();
return -EINVAL;
@@ -217,7 +228,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
dir->i_mtime = dir->i_ctime = current_time(dir);
mark_inode_dirty(dir);
out_page:
- dir_put_page(page);
+ dir_put_page(page, kaddr);
return err;
out_unlock:
unlock_page(page);
@@ -228,6 +239,12 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
{
struct inode *inode = page->mapping->host;
loff_t pos = page_offset(page) + offset_in_page(de);
+ /*
+ * The "de" dentry points somewhere in the same page whose we need the
+ * address of; therefore, we can simply get the base address "kaddr" by
+ * masking the previous with PAGE_MASK.
+ */
+ char *kaddr = (char *)((unsigned long)de & PAGE_MASK);
int err;

lock_page(page);
@@ -235,7 +252,7 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
BUG_ON(err);
de->inode = 0;
err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
- dir_put_page(page);
+ dir_put_page(page, kaddr);
inode->i_ctime = inode->i_mtime = current_time(inode);
mark_inode_dirty(inode);
return err;
@@ -255,9 +272,7 @@ int sysv_make_empty(struct inode *inode, struct inode *dir)
unlock_page(page);
goto fail;
}
- kmap(page);
-
- base = (char*)page_address(page);
+ base = kmap_local_page(page);
memset(base, 0, PAGE_SIZE);

de = (struct sysv_dir_entry *) base;
@@ -267,7 +282,7 @@ int sysv_make_empty(struct inode *inode, struct inode *dir)
de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), dir->i_ino);
strcpy(de->name,"..");

- kunmap(page);
+ kunmap_local(base);
err = dir_commit_chunk(page, 0, 2 * SYSV_DIRSIZE);
fail:
put_page(page);
@@ -282,10 +297,10 @@ int sysv_empty_dir(struct inode * inode)
struct super_block *sb = inode->i_sb;
struct page *page = NULL;
unsigned long i, npages = dir_pages(inode);
+ char *kaddr;

for (i = 0; i < npages; i++) {
- char *kaddr;
- struct sysv_dir_entry * de;
+ struct sysv_dir_entry *de;

kaddr = dir_get_page(inode, i, &page);
if (IS_ERR(kaddr))
@@ -309,12 +324,12 @@ int sysv_empty_dir(struct inode * inode)
if (de->name[1] != '.' || de->name[2])
goto not_empty;
}
- dir_put_page(page);
+ dir_put_page(page, kaddr);
}
return 1;

not_empty:
- dir_put_page(page);
+ dir_put_page(page, kaddr);
return 0;
}

@@ -331,11 +346,18 @@ void sysv_set_link(struct sysv_dir_entry *de, struct page *page,
BUG_ON(err);
de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
- dir_put_page(page);
+ dir_put_page(page, de);
dir->i_mtime = dir->i_ctime = current_time(dir);
mark_inode_dirty(dir);
}

+/*
+ * Calls to dir_get_page()/dir_put_page() must be nested according to the
+ * rules documented in mm/highmem.rst.
+ *
+ * sysv_dotdot() acts as a call to dir_get_page() and must be treated
+ * accordingly for nesting purposes.
+ */
struct sysv_dir_entry *sysv_dotdot(struct inode *dir, struct page **p)
{
struct page *page = NULL;
@@ -343,7 +365,7 @@ struct sysv_dir_entry *sysv_dotdot(struct inode *dir, struct page **p)

if (!IS_ERR(de)) {
*p = page;
- return (struct sysv_dir_entry *)page_address(page) + 1;
+ return (struct sysv_dir_entry *)de + 1;
}
return NULL;
}
@@ -356,7 +378,7 @@ ino_t sysv_inode_by_name(struct dentry *dentry)

if (de) {
res = fs16_to_cpu(SYSV_SB(dentry->d_sb), de->inode);
- dir_put_page(page);
+ dir_put_page(page, de);
}
return res;
}
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 981c1d76f342..371cf9012052 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -251,9 +251,9 @@ static int sysv_rename(struct user_namespace *mnt_userns, struct inode *old_dir,

out_dir:
if (dir_de)
- dir_put_page(dir_page);
+ dir_put_page(dir_page, dir_de);
out_old:
- dir_put_page(old_page);
+ dir_put_page(old_page, old_de);
out:
return err;
}
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index b250ac1dd348..50f19bfd8d10 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -148,7 +148,7 @@ extern void sysv_destroy_icache(void);


/* dir.c */
-extern void dir_put_page(struct page *page);
+extern void dir_put_page(struct page *page, void *vaddr);
extern struct sysv_dir_entry *sysv_find_entry(struct dentry *, struct page **);
extern int sysv_add_link(struct dentry *, struct inode *);
extern int sysv_delete_entry(struct sysv_dir_entry *, struct page *);
--
2.39.0

2023-01-09 17:29:13

by Fabio M. De Francesco

[permalink] [raw]
Subject: [PATCH v2 3/4] fs/sysv: Use dir_put_page() in sysv_rename()

Use the dir_put_page() helper in sysv_rename() instead of open-coding two
kunmap() + put_page().

Cc: Al Viro <[email protected]>
Suggested-by: Ira Weiny <[email protected]>
Signed-off-by: Fabio M. De Francesco <[email protected]>
---

No changes from v1.

fs/sysv/dir.c | 2 +-
fs/sysv/namei.c | 9 +++------
fs/sysv/sysv.h | 1 +
3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index f953e6b9251e..ee38dc5a3010 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -28,7 +28,7 @@ const struct file_operations sysv_dir_operations = {
.fsync = generic_file_fsync,
};

-static inline void dir_put_page(struct page *page)
+inline void dir_put_page(struct page *page)
{
kunmap(page);
put_page(page);
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b2e6abc06a2d..981c1d76f342 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -250,13 +250,10 @@ static int sysv_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
return 0;

out_dir:
- if (dir_de) {
- kunmap(dir_page);
- put_page(dir_page);
- }
+ if (dir_de)
+ dir_put_page(dir_page);
out_old:
- kunmap(old_page);
- put_page(old_page);
+ dir_put_page(old_page);
out:
return err;
}
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 99ddf033da4f..b250ac1dd348 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -148,6 +148,7 @@ extern void sysv_destroy_icache(void);


/* dir.c */
+extern void dir_put_page(struct page *page);
extern struct sysv_dir_entry *sysv_find_entry(struct dentry *, struct page **);
extern int sysv_add_link(struct dentry *, struct inode *);
extern int sysv_delete_entry(struct sysv_dir_entry *, struct page *);
--
2.39.0

2023-01-09 17:29:55

by Fabio M. De Francesco

[permalink] [raw]
Subject: [PATCH v2 1/4] fs/sysv: Use the offset_in_page() helper

Use the offset_in_page() helper because it is more suitable than doing
explicit subtractions between pointers to directory entries and kernel
virtual addresses of mapped pages.

Cc: Ira Weiny <[email protected]>
Suggested-by: Al Viro <[email protected]>
Signed-off-by: Fabio M. De Francesco <[email protected]>
---

No changes from v1.

fs/sysv/dir.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 88e38cd8f5c9..685379bc9d64 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -206,8 +206,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
return -EINVAL;

got_it:
- pos = page_offset(page) +
- (char*)de - (char*)page_address(page);
+ pos = page_offset(page) + offset_in_page(de);
lock_page(page);
err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
if (err)
@@ -230,8 +229,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
{
struct inode *inode = page->mapping->host;
- char *kaddr = (char*)page_address(page);
- loff_t pos = page_offset(page) + (char *)de - kaddr;
+ loff_t pos = page_offset(page) + offset_in_page(de);
int err;

lock_page(page);
@@ -328,8 +326,7 @@ void sysv_set_link(struct sysv_dir_entry *de, struct page *page,
struct inode *inode)
{
struct inode *dir = page->mapping->host;
- loff_t pos = page_offset(page) +
- (char *)de-(char*)page_address(page);
+ loff_t pos = page_offset(page) + offset_in_page(de);
int err;

lock_page(page);
--
2.39.0