2022-10-16 16:58:32

by Fabio M. De Francesco

[permalink] [raw]
Subject: [RESEND PATCH v4] fs/ufs: Replace kmap() with kmap_local_page()

kmap() is being deprecated in favor of kmap_local_page().

There are two main problems with kmap(): (1) It comes with an overhead as
the mapping space is restricted and protected by a global lock for
synchronization and (2) it also requires global TLB invalidation when the
kmap’s pool wraps and it might block when the mapping space is fully
utilized until a slot becomes available.

With kmap_local_page() the mappings are per thread, CPU local, can take
page faults, and can be called from any context (including interrupts).
It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore,
the tasks can be preempted and, when they are scheduled to run again, the
kernel virtual addresses are restored and still valid.

Since its use in fs/ufs is safe everywhere, it should be preferred.

Therefore, replace kmap() with kmap_local_page() in fs/ufs. kunmap_local()
requires the mapping address, so return that address from ufs_get_page()
to be used in ufs_put_page(). Where suited, use the standard helper
memzero_page() instead of open coding kmap_local_page() plus memset().

These changes are essentially ported from fs/ext2 and are largely based on
commit 782b76d7abdf ("fs/ext2: Replace kmap() with kmap_local_page()").

Cc: "Venkataramanan, Anirudh" <[email protected]>
Suggested-by: Ira Weiny <[email protected]>
Reviewed-by: Ira Weiny <[email protected]>
Signed-off-by: Fabio M. De Francesco <[email protected]>
---

This code is not tested. I have no means to create an UFS filesystem.
Despite nothing here seems to break the strict rules about the use of
kmap_local_page(), any help with testing will be much appreciated :-)

v3 -> v4: Convert another kmap() which was overlooked. Since the code
changed, remove the "Reviewed-by" tag from Ira.

v2 -> v3: Rename a variable for consistency (Ira Weiny). Add a
"Reviewed-by" tag.

v1 -> v2: Correct some style's issues reported by checkpatch.pl.
Remove an "inline" compiler directive from fs/ufs/ufs.h,
Reported-by: kernel test robot <[email protected]>

fs/ufs/dir.c | 116 +++++++++++++++++++++++++++++++------------------
fs/ufs/namei.c | 38 ++++++++--------
fs/ufs/ufs.h | 12 +++--
3 files changed, 102 insertions(+), 64 deletions(-)

diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 391efaf1d528..db7564852391 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -61,9 +61,9 @@ static int ufs_commit_chunk(struct page *page, loff_t pos, unsigned len)
return err;
}

-static inline void ufs_put_page(struct page *page)
+inline void ufs_put_page(struct page *page, void *page_addr)
{
- kunmap(page);
+ kunmap_local(page_addr);
put_page(page);
}

@@ -72,11 +72,12 @@ ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr)
ino_t res = 0;
struct ufs_dir_entry *de;
struct page *page;
-
- de = ufs_find_entry(dir, qstr, &page);
+ void *page_addr;
+
+ de = ufs_find_entry(dir, qstr, &page, &page_addr);
if (de) {
res = fs32_to_cpu(dir->i_sb, de->d_ino);
- ufs_put_page(page);
+ ufs_put_page(page, page_addr);
}
return res;
}
@@ -84,11 +85,11 @@ ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr)

/* Releases the page */
void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
- struct page *page, struct inode *inode,
- bool update_times)
+ struct page *page, void *page_addr,
+ struct inode *inode, bool update_times)
{
loff_t pos = page_offset(page) +
- (char *) de - (char *) page_address(page);
+ (char *)de - (char *)page_addr;
unsigned len = fs16_to_cpu(dir->i_sb, de->d_reclen);
int err;

@@ -100,18 +101,17 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
ufs_set_de_type(dir->i_sb, de, inode->i_mode);

err = ufs_commit_chunk(page, pos, len);
- ufs_put_page(page);
+ ufs_put_page(page, page_addr);
if (update_times)
dir->i_mtime = dir->i_ctime = current_time(dir);
mark_inode_dirty(dir);
}


-static bool ufs_check_page(struct page *page)
+static bool ufs_check_page(struct page *page, char *kaddr)
{
struct inode *dir = page->mapping->host;
struct super_block *sb = dir->i_sb;
- char *kaddr = page_address(page);
unsigned offs, rec_len;
unsigned limit = PAGE_SIZE;
const unsigned chunk_mask = UFS_SB(sb)->s_uspi->s_dirblksize - 1;
@@ -186,21 +186,28 @@ static bool ufs_check_page(struct page *page)
return false;
}

-static struct page *ufs_get_page(struct inode *dir, unsigned long n)
+/*
+ * Calls to ufs_get_page()/ufs_put_page() must be nested according to the
+ * rules documented in kmap_local_page()/kunmap_local().
+ *
+ * NOTE: ufs_find_entry() and ufs_dotdot() act as calls to ufs_get_page()
+ * and must be treated accordingly for nesting purposes.
+ */
+static struct page *ufs_get_page(struct inode *dir, unsigned long n, void **page_addr)
{
struct address_space *mapping = dir->i_mapping;
struct page *page = read_mapping_page(mapping, n, NULL);
if (!IS_ERR(page)) {
- kmap(page);
+ *page_addr = kmap_local_page(page);
if (unlikely(!PageChecked(page))) {
- if (!ufs_check_page(page))
+ if (!ufs_check_page(page, *page_addr))
goto fail;
}
}
return page;

fail:
- ufs_put_page(page);
+ ufs_put_page(page, *page_addr);
return ERR_PTR(-EIO);
}

@@ -226,15 +233,29 @@ ufs_next_entry(struct super_block *sb, struct ufs_dir_entry *p)
fs16_to_cpu(sb, p->d_reclen));
}

-struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p)
+/*
+ * Return the '..' directory entry and the page in which the entry was found
+ * (as a parameter - p).
+ *
+ * On Success ufs_put_page() should be called on *p.
+ *
+ * NOTE: Calls to ufs_get_page()/ufs_put_page() must be nested according to
+ * the rules documented in kmap_local_page()/kunmap_local().
+ *
+ * ufs_find_entry() and ufs_dotdot() act as calls to ufs_get_page() and
+ * must be treated accordingly for nesting purposes.
+ */
+struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p, void **pa)
{
- struct page *page = ufs_get_page(dir, 0);
+ void *page_addr;
+ struct page *page = ufs_get_page(dir, 0, &page_addr);
struct ufs_dir_entry *de = NULL;

if (!IS_ERR(page)) {
de = ufs_next_entry(dir->i_sb,
- (struct ufs_dir_entry *)page_address(page));
+ (struct ufs_dir_entry *)page_addr);
*p = page;
+ *pa = page_addr;
}
return de;
}
@@ -246,9 +267,17 @@ struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p)
* returns the page in which the entry was found, and the entry itself
* (as a parameter - res_dir). Page is returned mapped and unlocked.
* Entry is guaranteed to be valid.
+ *
+ * On Success ufs_put_page() should be called on *res_page.
+ *
+ * NOTE: Calls to ufs_get_page()/ufs_put_page() must be nested according to
+ * the rules documented in kmap_local_page()/kunmap_local().
+ *
+ * ufs_find_entry() and ufs_dotdot() act as calls to ufs_get_page() and
+ * must be treated accordingly for nesting purposes.
*/
struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
- struct page **res_page)
+ struct page **res_page, void **res_page_addr)
{
struct super_block *sb = dir->i_sb;
const unsigned char *name = qstr->name;
@@ -259,6 +288,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
struct page *page = NULL;
struct ufs_inode_info *ui = UFS_I(dir);
struct ufs_dir_entry *de;
+ void *page_addr;

UFSD("ENTER, dir_ino %lu, name %s, namlen %u\n", dir->i_ino, name, namelen);

@@ -267,6 +297,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,

/* OFFSET_CACHE */
*res_page = NULL;
+ *res_page_addr = NULL;

start = ui->i_dir_start_lookup;

@@ -275,9 +306,10 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
n = start;
do {
char *kaddr;
- page = ufs_get_page(dir, n);
+
+ page = ufs_get_page(dir, n, &page_addr);
if (!IS_ERR(page)) {
- kaddr = page_address(page);
+ kaddr = page_addr;
de = (struct ufs_dir_entry *) kaddr;
kaddr += ufs_last_byte(dir, n) - reclen;
while ((char *) de <= kaddr) {
@@ -285,7 +317,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
goto found;
de = ufs_next_entry(sb, de);
}
- ufs_put_page(page);
+ ufs_put_page(page, page_addr);
}
if (++n >= npages)
n = 0;
@@ -295,6 +327,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,

found:
*res_page = page;
+ *res_page_addr = page_addr;
ui->i_dir_start_lookup = n;
return de;
}
@@ -312,6 +345,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
const unsigned int chunk_size = UFS_SB(sb)->s_uspi->s_dirblksize;
unsigned short rec_len, name_len;
struct page *page = NULL;
+ void *page_addr = NULL;
struct ufs_dir_entry *de;
unsigned long npages = dir_pages(dir);
unsigned long n;
@@ -329,12 +363,12 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
for (n = 0; n <= npages; n++) {
char *dir_end;

- page = ufs_get_page(dir, n);
+ page = ufs_get_page(dir, n, &page_addr);
err = PTR_ERR(page);
if (IS_ERR(page))
goto out;
lock_page(page);
- kaddr = page_address(page);
+ kaddr = page_addr;
dir_end = kaddr + ufs_last_byte(dir, n);
de = (struct ufs_dir_entry *)kaddr;
kaddr += PAGE_SIZE - reclen;
@@ -365,14 +399,14 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
de = (struct ufs_dir_entry *) ((char *) de + rec_len);
}
unlock_page(page);
- ufs_put_page(page);
+ ufs_put_page(page, page_addr);
}
BUG();
return -EINVAL;

got_it:
pos = page_offset(page) +
- (char*)de - (char*)page_address(page);
+ (char *)de - (char *)page_addr;
err = ufs_prepare_chunk(page, pos, rec_len);
if (err)
goto out_unlock;
@@ -396,7 +430,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
mark_inode_dirty(dir);
/* OFFSET_CACHE */
out_put:
- ufs_put_page(page);
+ ufs_put_page(page, page_addr);
out:
return err;
out_unlock:
@@ -441,7 +475,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
char *kaddr, *limit;
struct ufs_dir_entry *de;

- struct page *page = ufs_get_page(inode, n);
+ struct page *page = ufs_get_page(inode, n, (void **)&kaddr);

if (IS_ERR(page)) {
ufs_error(sb, __func__,
@@ -450,7 +484,6 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
ctx->pos += PAGE_SIZE - offset;
return -EIO;
}
- kaddr = page_address(page);
if (unlikely(need_revalidate)) {
if (offset) {
offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask);
@@ -476,13 +509,13 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
ufs_get_de_namlen(sb, de),
fs32_to_cpu(sb, de->d_ino),
d_type)) {
- ufs_put_page(page);
+ ufs_put_page(page, kaddr);
return 0;
}
}
ctx->pos += fs16_to_cpu(sb, de->d_reclen);
}
- ufs_put_page(page);
+ ufs_put_page(page, kaddr);
}
return 0;
}
@@ -493,10 +526,9 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
* previous entry.
*/
int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
- struct page * page)
+ struct page *page, char *kaddr)
{
struct super_block *sb = inode->i_sb;
- char *kaddr = page_address(page);
unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen);
loff_t pos;
@@ -522,7 +554,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
de = ufs_next_entry(sb, de);
}
if (pde)
- from = (char*)pde - (char*)page_address(page);
+ from = (char *)pde - kaddr;

pos = page_offset(page) + from;
lock_page(page);
@@ -535,7 +567,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
inode->i_ctime = inode->i_mtime = current_time(inode);
mark_inode_dirty(inode);
out:
- ufs_put_page(page);
+ ufs_put_page(page, kaddr);
UFSD("EXIT\n");
return err;
}
@@ -559,8 +591,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
goto fail;
}

- kmap(page);
- base = (char*)page_address(page);
+ base = kmap_local_page(page);
memset(base, 0, PAGE_SIZE);

de = (struct ufs_dir_entry *) base;
@@ -577,7 +608,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
de->d_reclen = cpu_to_fs16(sb, chunk_size - UFS_DIR_REC_LEN(1));
ufs_set_de_namlen(sb, de, 2);
strcpy (de->d_name, "..");
- kunmap(page);
+ kunmap_local(base);

err = ufs_commit_chunk(page, 0, chunk_size);
fail:
@@ -592,17 +623,18 @@ int ufs_empty_dir(struct inode * inode)
{
struct super_block *sb = inode->i_sb;
struct page *page = NULL;
+ void *page_addr;
unsigned long i, npages = dir_pages(inode);

for (i = 0; i < npages; i++) {
char *kaddr;
struct ufs_dir_entry *de;
- page = ufs_get_page(inode, i);

+ page = ufs_get_page(inode, i, &page_addr);
if (IS_ERR(page))
continue;

- kaddr = page_address(page);
+ kaddr = page_addr;
de = (struct ufs_dir_entry *)kaddr;
kaddr += ufs_last_byte(inode, i) - UFS_DIR_REC_LEN(1);

@@ -629,12 +661,12 @@ int ufs_empty_dir(struct inode * inode)
}
de = ufs_next_entry(sb, de);
}
- ufs_put_page(page);
+ ufs_put_page(page, page_addr);
}
return 1;

not_empty:
- ufs_put_page(page);
+ ufs_put_page(page, page_addr);
return 0;
}

diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 29d5a0e0c8f0..cdf3bcf9d02e 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -210,13 +210,14 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
struct inode * inode = d_inode(dentry);
struct ufs_dir_entry *de;
struct page *page;
+ void *page_addr;
int err = -ENOENT;

- de = ufs_find_entry(dir, &dentry->d_name, &page);
+ de = ufs_find_entry(dir, &dentry->d_name, &page, &page_addr);
if (!de)
goto out;

- err = ufs_delete_entry(dir, de, page);
+ err = ufs_delete_entry(dir, de, page, page_addr);
if (err)
goto out;

@@ -250,27 +251,31 @@ static int ufs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
struct page *dir_page = NULL;
+ void *dir_page_addr;
struct ufs_dir_entry * dir_de = NULL;
struct page *old_page;
+ void *old_page_addr;
struct ufs_dir_entry *old_de;
int err = -ENOENT;

if (flags & ~RENAME_NOREPLACE)
return -EINVAL;

- old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
+ old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page,
+ &old_page_addr);
if (!old_de)
goto out;

if (S_ISDIR(old_inode->i_mode)) {
err = -EIO;
- dir_de = ufs_dotdot(old_inode, &dir_page);
+ dir_de = ufs_dotdot(old_inode, &dir_page, &dir_page_addr);
if (!dir_de)
goto out_old;
}

if (new_inode) {
struct page *new_page;
+ void *new_page_addr;
struct ufs_dir_entry *new_de;

err = -ENOTEMPTY;
@@ -278,10 +283,11 @@ static int ufs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
goto out_dir;

err = -ENOENT;
- new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
+ new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page,
+ &new_page_addr);
if (!new_de)
goto out_dir;
- ufs_set_link(new_dir, new_de, new_page, old_inode, 1);
+ ufs_set_link(new_dir, new_de, new_page, new_page_addr, old_inode, 1);
new_inode->i_ctime = current_time(new_inode);
if (dir_de)
drop_nlink(new_inode);
@@ -300,29 +306,25 @@ static int ufs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
*/
old_inode->i_ctime = current_time(old_inode);

- ufs_delete_entry(old_dir, old_de, old_page);
+ ufs_delete_entry(old_dir, old_de, old_page, old_page_addr);
mark_inode_dirty(old_inode);

if (dir_de) {
if (old_dir != new_dir)
- ufs_set_link(old_inode, dir_de, dir_page, new_dir, 0);
- else {
- kunmap(dir_page);
- put_page(dir_page);
- }
+ ufs_set_link(old_inode, dir_de, dir_page,
+ dir_page_addr, new_dir, 0);
+ else
+ ufs_put_page(dir_page, dir_page_addr);
inode_dec_link_count(old_dir);
}
return 0;


out_dir:
- if (dir_de) {
- kunmap(dir_page);
- put_page(dir_page);
- }
+ if (dir_de)
+ ufs_put_page(dir_page, dir_page_addr);
out_old:
- kunmap(old_page);
- put_page(old_page);
+ ufs_put_page(old_page, old_page_addr);
out:
return err;
}
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 550f7c5a3636..20d224c163ab 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -102,12 +102,16 @@ extern const struct inode_operations ufs_dir_inode_operations;
extern int ufs_add_link (struct dentry *, struct inode *);
extern ino_t ufs_inode_by_name(struct inode *, const struct qstr *);
extern int ufs_make_empty(struct inode *, struct inode *);
-extern struct ufs_dir_entry *ufs_find_entry(struct inode *, const struct qstr *, struct page **);
-extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page *);
+extern struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
+ struct page **res_page, void **res_page_addr);
+extern int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
+ struct page *page, char *kaddr);
extern int ufs_empty_dir (struct inode *);
-extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **);
+extern struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p, void **pa);
extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
- struct page *page, struct inode *inode, bool update_times);
+ struct page *page, void *page_addr,
+ struct inode *inode, bool update_times);
+extern void ufs_put_page(struct page *page, void *page_addr);

/* file.c */
extern const struct inode_operations ufs_file_inode_operations;
--
2.37.1


2022-11-25 16:24:16

by Fabio M. De Francesco

[permalink] [raw]
Subject: Re: [RESEND PATCH v4] fs/ufs: Replace kmap() with kmap_local_page()

On domenica 16 ottobre 2022 18:38:55 CET Fabio M. De Francesco wrote:
> kmap() is being deprecated in favor of kmap_local_page().
>
> There are two main problems with kmap(): (1) It comes with an overhead as
> the mapping space is restricted and protected by a global lock for
> synchronization and (2) it also requires global TLB invalidation when the
> kmap’s pool wraps and it might block when the mapping space is fully
> utilized until a slot becomes available.
>
> With kmap_local_page() the mappings are per thread, CPU local, can take
> page faults, and can be called from any context (including interrupts).
> It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore,
> the tasks can be preempted and, when they are scheduled to run again, the
> kernel virtual addresses are restored and still valid.
>
> Since its use in fs/ufs is safe everywhere, it should be preferred.
>
> Therefore, replace kmap() with kmap_local_page() in fs/ufs. kunmap_local()
> requires the mapping address, so return that address from ufs_get_page()
> to be used in ufs_put_page(). Where suited, use the standard helper
> memzero_page() instead of open coding kmap_local_page() plus memset().
>
> These changes are essentially ported from fs/ext2 and are largely based on
> commit 782b76d7abdf ("fs/ext2: Replace kmap() with kmap_local_page()").
>
> Cc: "Venkataramanan, Anirudh" <[email protected]>
> Suggested-by: Ira Weiny <[email protected]>
> Reviewed-by: Ira Weiny <[email protected]>
> Signed-off-by: Fabio M. De Francesco <[email protected]>
> ---
>
> This code is not tested. I have no means to create an UFS filesystem.
> Despite nothing here seems to break the strict rules about the use of
> kmap_local_page(), any help with testing will be much appreciated :-)
>
> v3 -> v4: Convert another kmap() which was overlooked. Since the code
> changed, remove the "Reviewed-by" tag from Ira.
>
> v2 -> v3: Rename a variable for consistency (Ira Weiny). Add a
> "Reviewed-by" tag.
>
> v1 -> v2: Correct some style's issues reported by checkpatch.pl.
> Remove an "inline" compiler directive from fs/ufs/ufs.h,
> Reported-by: kernel test robot <[email protected]>
>
> fs/ufs/dir.c | 116 +++++++++++++++++++++++++++++++------------------
> fs/ufs/namei.c | 38 ++++++++--------
> fs/ufs/ufs.h | 12 +++--
> 3 files changed, 102 insertions(+), 64 deletions(-)

Evgeniy, Christian,

I'm again here to gently ping and remind this patch to both of you. I have no
idea about why it didn't yet deserve any reply. Is there anything I'm still
missing?

I'm looking forward to hear from you.

Thanks,

Fabio


> diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
> index 391efaf1d528..db7564852391 100644
> --- a/fs/ufs/dir.c
> +++ b/fs/ufs/dir.c
> @@ -61,9 +61,9 @@ static int ufs_commit_chunk(struct page *page, loff_t pos,
> unsigned len) return err;
> }
>
> -static inline void ufs_put_page(struct page *page)
> +inline void ufs_put_page(struct page *page, void *page_addr)
> {
> - kunmap(page);
> + kunmap_local(page_addr);
> put_page(page);
> }
>
> @@ -72,11 +72,12 @@ ino_t ufs_inode_by_name(struct inode *dir, const struct
> qstr *qstr) ino_t res = 0;
> struct ufs_dir_entry *de;
> struct page *page;
> -
> - de = ufs_find_entry(dir, qstr, &page);
> + void *page_addr;
> +
> + de = ufs_find_entry(dir, qstr, &page, &page_addr);
> if (de) {
> res = fs32_to_cpu(dir->i_sb, de->d_ino);
> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);
> }
> return res;
> }
> @@ -84,11 +85,11 @@ ino_t ufs_inode_by_name(struct inode *dir, const struct
> qstr *qstr)
>
> /* Releases the page */
> void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
> - struct page *page, struct inode *inode,
> - bool update_times)
> + struct page *page, void *page_addr,
> + struct inode *inode, bool update_times)
> {
> loff_t pos = page_offset(page) +
> - (char *) de - (char *) page_address(page);
> + (char *)de - (char *)page_addr;
> unsigned len = fs16_to_cpu(dir->i_sb, de->d_reclen);
> int err;
>
> @@ -100,18 +101,17 @@ void ufs_set_link(struct inode *dir, struct
> ufs_dir_entry *de, ufs_set_de_type(dir->i_sb, de, inode->i_mode);
>
> err = ufs_commit_chunk(page, pos, len);
> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);
> if (update_times)
> dir->i_mtime = dir->i_ctime = current_time(dir);
> mark_inode_dirty(dir);
> }
>
>
> -static bool ufs_check_page(struct page *page)
> +static bool ufs_check_page(struct page *page, char *kaddr)
> {
> struct inode *dir = page->mapping->host;
> struct super_block *sb = dir->i_sb;
> - char *kaddr = page_address(page);
> unsigned offs, rec_len;
> unsigned limit = PAGE_SIZE;
> const unsigned chunk_mask = UFS_SB(sb)->s_uspi->s_dirblksize - 1;
> @@ -186,21 +186,28 @@ static bool ufs_check_page(struct page *page)
> return false;
> }
>
> -static struct page *ufs_get_page(struct inode *dir, unsigned long n)
> +/*
> + * Calls to ufs_get_page()/ufs_put_page() must be nested according to the
> + * rules documented in kmap_local_page()/kunmap_local().
> + *
> + * NOTE: ufs_find_entry() and ufs_dotdot() act as calls to ufs_get_page()
> + * and must be treated accordingly for nesting purposes.
> + */
> +static struct page *ufs_get_page(struct inode *dir, unsigned long n, void
> **page_addr) {
> struct address_space *mapping = dir->i_mapping;
> struct page *page = read_mapping_page(mapping, n, NULL);
> if (!IS_ERR(page)) {
> - kmap(page);
> + *page_addr = kmap_local_page(page);
> if (unlikely(!PageChecked(page))) {
> - if (!ufs_check_page(page))
> + if (!ufs_check_page(page, *page_addr))
> goto fail;
> }
> }
> return page;
>
> fail:
> - ufs_put_page(page);
> + ufs_put_page(page, *page_addr);
> return ERR_PTR(-EIO);
> }
>
> @@ -226,15 +233,29 @@ ufs_next_entry(struct super_block *sb, struct
> ufs_dir_entry *p) fs16_to_cpu(sb, p->d_reclen));
> }
>
> -struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p)
> +/*
> + * Return the '..' directory entry and the page in which the entry was
found
> + * (as a parameter - p).
> + *
> + * On Success ufs_put_page() should be called on *p.
> + *
> + * NOTE: Calls to ufs_get_page()/ufs_put_page() must be nested according to
> + * the rules documented in kmap_local_page()/kunmap_local().
> + *
> + * ufs_find_entry() and ufs_dotdot() act as calls to ufs_get_page() and
> + * must be treated accordingly for nesting purposes.
> + */
> +struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p, void
> **pa) {
> - struct page *page = ufs_get_page(dir, 0);
> + void *page_addr;
> + struct page *page = ufs_get_page(dir, 0, &page_addr);
> struct ufs_dir_entry *de = NULL;
>
> if (!IS_ERR(page)) {
> de = ufs_next_entry(dir->i_sb,
> - (struct ufs_dir_entry
*)page_address(page));
> + (struct ufs_dir_entry
*)page_addr);
> *p = page;
> + *pa = page_addr;
> }
> return de;
> }
> @@ -246,9 +267,17 @@ struct ufs_dir_entry *ufs_dotdot(struct inode *dir,
> struct page **p) * returns the page in which the entry was found, and the
> entry itself * (as a parameter - res_dir). Page is returned mapped and
> unlocked. * Entry is guaranteed to be valid.
> + *
> + * On Success ufs_put_page() should be called on *res_page.
> + *
> + * NOTE: Calls to ufs_get_page()/ufs_put_page() must be nested according to
> + * the rules documented in kmap_local_page()/kunmap_local().
> + *
> + * ufs_find_entry() and ufs_dotdot() act as calls to ufs_get_page() and
> + * must be treated accordingly for nesting purposes.
> */
> struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr
> *qstr, - struct page **res_page)
> + struct page **res_page, void
**res_page_addr)
> {
> struct super_block *sb = dir->i_sb;
> const unsigned char *name = qstr->name;
> @@ -259,6 +288,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir,
> const struct qstr *qstr, struct page *page = NULL;
> struct ufs_inode_info *ui = UFS_I(dir);
> struct ufs_dir_entry *de;
> + void *page_addr;
>
> UFSD("ENTER, dir_ino %lu, name %s, namlen %u\n", dir->i_ino, name,
namelen);
>
> @@ -267,6 +297,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir,
> const struct qstr *qstr,
>
> /* OFFSET_CACHE */
> *res_page = NULL;
> + *res_page_addr = NULL;
>
> start = ui->i_dir_start_lookup;
>
> @@ -275,9 +306,10 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir,
> const struct qstr *qstr, n = start;
> do {
> char *kaddr;
> - page = ufs_get_page(dir, n);
> +
> + page = ufs_get_page(dir, n, &page_addr);
> if (!IS_ERR(page)) {
> - kaddr = page_address(page);
> + kaddr = page_addr;
> de = (struct ufs_dir_entry *) kaddr;
> kaddr += ufs_last_byte(dir, n) - reclen;
> while ((char *) de <= kaddr) {
> @@ -285,7 +317,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir,
> const struct qstr *qstr, goto found;
> de = ufs_next_entry(sb, de);
> }
> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);
> }
> if (++n >= npages)
> n = 0;
> @@ -295,6 +327,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir,
> const struct qstr *qstr,
>
> found:
> *res_page = page;
> + *res_page_addr = page_addr;
> ui->i_dir_start_lookup = n;
> return de;
> }
> @@ -312,6 +345,7 @@ int ufs_add_link(struct dentry *dentry, struct inode
> *inode) const unsigned int chunk_size = UFS_SB(sb)->s_uspi->s_dirblksize;
> unsigned short rec_len, name_len;
> struct page *page = NULL;
> + void *page_addr = NULL;
> struct ufs_dir_entry *de;
> unsigned long npages = dir_pages(dir);
> unsigned long n;
> @@ -329,12 +363,12 @@ int ufs_add_link(struct dentry *dentry, struct inode
> *inode) for (n = 0; n <= npages; n++) {
> char *dir_end;
>
> - page = ufs_get_page(dir, n);
> + page = ufs_get_page(dir, n, &page_addr);
> err = PTR_ERR(page);
> if (IS_ERR(page))
> goto out;
> lock_page(page);
> - kaddr = page_address(page);
> + kaddr = page_addr;
> dir_end = kaddr + ufs_last_byte(dir, n);
> de = (struct ufs_dir_entry *)kaddr;
> kaddr += PAGE_SIZE - reclen;
> @@ -365,14 +399,14 @@ int ufs_add_link(struct dentry *dentry, struct inode
> *inode) de = (struct ufs_dir_entry *) ((char *) de + rec_len);
> }
> unlock_page(page);
> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);
> }
> BUG();
> return -EINVAL;
>
> got_it:
> pos = page_offset(page) +
> - (char*)de - (char*)page_address(page);
> + (char *)de - (char *)page_addr;
> err = ufs_prepare_chunk(page, pos, rec_len);
> if (err)
> goto out_unlock;
> @@ -396,7 +430,7 @@ int ufs_add_link(struct dentry *dentry, struct inode
> *inode) mark_inode_dirty(dir);
> /* OFFSET_CACHE */
> out_put:
> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);
> out:
> return err;
> out_unlock:
> @@ -441,7 +475,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
> char *kaddr, *limit;
> struct ufs_dir_entry *de;
>
> - struct page *page = ufs_get_page(inode, n);
> + struct page *page = ufs_get_page(inode, n, (void
**)&kaddr);
>
> if (IS_ERR(page)) {
> ufs_error(sb, __func__,
> @@ -450,7 +484,6 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
> ctx->pos += PAGE_SIZE - offset;
> return -EIO;
> }
> - kaddr = page_address(page);
> if (unlikely(need_revalidate)) {
> if (offset) {
> offset = ufs_validate_entry(sb, kaddr,
offset, chunk_mask);
> @@ -476,13 +509,13 @@ ufs_readdir(struct file *file, struct dir_context
*ctx)
> ufs_get_de_namlen(sb,
de),
> fs32_to_cpu(sb, de-
>d_ino),
> d_type)) {
> - ufs_put_page(page);
> + ufs_put_page(page, kaddr);
> return 0;
> }
> }
> ctx->pos += fs16_to_cpu(sb, de->d_reclen);
> }
> - ufs_put_page(page);
> + ufs_put_page(page, kaddr);
> }
> return 0;
> }
> @@ -493,10 +526,9 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
> * previous entry.
> */
> int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
> - struct page * page)
> + struct page *page, char *kaddr)
> {
> struct super_block *sb = inode->i_sb;
> - char *kaddr = page_address(page);
> unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi-
>s_dirblksize -
> 1); unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen);
> loff_t pos;
> @@ -522,7 +554,7 @@ int ufs_delete_entry(struct inode *inode, struct
> ufs_dir_entry *dir, de = ufs_next_entry(sb, de);
> }
> if (pde)
> - from = (char*)pde - (char*)page_address(page);
> + from = (char *)pde - kaddr;
>
> pos = page_offset(page) + from;
> lock_page(page);
> @@ -535,7 +567,7 @@ int ufs_delete_entry(struct inode *inode, struct
> ufs_dir_entry *dir, inode->i_ctime = inode->i_mtime = current_time(inode);
> mark_inode_dirty(inode);
> out:
> - ufs_put_page(page);
> + ufs_put_page(page, kaddr);
> UFSD("EXIT\n");
> return err;
> }
> @@ -559,8 +591,7 @@ int ufs_make_empty(struct inode * inode, struct inode
> *dir) goto fail;
> }
>
> - kmap(page);
> - base = (char*)page_address(page);
> + base = kmap_local_page(page);
> memset(base, 0, PAGE_SIZE);
>
> de = (struct ufs_dir_entry *) base;
> @@ -577,7 +608,7 @@ int ufs_make_empty(struct inode * inode, struct inode
> *dir) de->d_reclen = cpu_to_fs16(sb, chunk_size - UFS_DIR_REC_LEN(1));
> ufs_set_de_namlen(sb, de, 2);
> strcpy (de->d_name, "..");
> - kunmap(page);
> + kunmap_local(base);
>
> err = ufs_commit_chunk(page, 0, chunk_size);
> fail:
> @@ -592,17 +623,18 @@ int ufs_empty_dir(struct inode * inode)
> {
> struct super_block *sb = inode->i_sb;
> struct page *page = NULL;
> + void *page_addr;
> unsigned long i, npages = dir_pages(inode);
>
> for (i = 0; i < npages; i++) {
> char *kaddr;
> struct ufs_dir_entry *de;
> - page = ufs_get_page(inode, i);
>
> + page = ufs_get_page(inode, i, &page_addr);
> if (IS_ERR(page))
> continue;
>
> - kaddr = page_address(page);
> + kaddr = page_addr;
> de = (struct ufs_dir_entry *)kaddr;
> kaddr += ufs_last_byte(inode, i) - UFS_DIR_REC_LEN(1);
>
> @@ -629,12 +661,12 @@ int ufs_empty_dir(struct inode * inode)
> }
> de = ufs_next_entry(sb, de);
> }
> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);
> }
> return 1;
>
> not_empty:
> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);
> return 0;
> }
>
> diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
> index 29d5a0e0c8f0..cdf3bcf9d02e 100644
> --- a/fs/ufs/namei.c
> +++ b/fs/ufs/namei.c
> @@ -210,13 +210,14 @@ static int ufs_unlink(struct inode *dir, struct dentry
> *dentry) struct inode * inode = d_inode(dentry);
> struct ufs_dir_entry *de;
> struct page *page;
> + void *page_addr;
> int err = -ENOENT;
>
> - de = ufs_find_entry(dir, &dentry->d_name, &page);
> + de = ufs_find_entry(dir, &dentry->d_name, &page, &page_addr);
> if (!de)
> goto out;
>
> - err = ufs_delete_entry(dir, de, page);
> + err = ufs_delete_entry(dir, de, page, page_addr);
> if (err)
> goto out;
>
> @@ -250,27 +251,31 @@ static int ufs_rename(struct user_namespace
*mnt_userns,
> struct inode *old_dir, struct inode *old_inode = d_inode(old_dentry);
> struct inode *new_inode = d_inode(new_dentry);
> struct page *dir_page = NULL;
> + void *dir_page_addr;
> struct ufs_dir_entry * dir_de = NULL;
> struct page *old_page;
> + void *old_page_addr;
> struct ufs_dir_entry *old_de;
> int err = -ENOENT;
>
> if (flags & ~RENAME_NOREPLACE)
> return -EINVAL;
>
> - old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
> + old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page,
> + &old_page_addr);
> if (!old_de)
> goto out;
>
> if (S_ISDIR(old_inode->i_mode)) {
> err = -EIO;
> - dir_de = ufs_dotdot(old_inode, &dir_page);
> + dir_de = ufs_dotdot(old_inode, &dir_page, &dir_page_addr);
> if (!dir_de)
> goto out_old;
> }
>
> if (new_inode) {
> struct page *new_page;
> + void *new_page_addr;
> struct ufs_dir_entry *new_de;
>
> err = -ENOTEMPTY;
> @@ -278,10 +283,11 @@ static int ufs_rename(struct user_namespace
*mnt_userns,
> struct inode *old_dir, goto out_dir;
>
> err = -ENOENT;
> - new_de = ufs_find_entry(new_dir, &new_dentry->d_name,
&new_page);
> + new_de = ufs_find_entry(new_dir, &new_dentry->d_name,
&new_page,
> + &new_page_addr);
> if (!new_de)
> goto out_dir;
> - ufs_set_link(new_dir, new_de, new_page, old_inode, 1);
> + ufs_set_link(new_dir, new_de, new_page, new_page_addr,
old_inode, 1);
> new_inode->i_ctime = current_time(new_inode);
> if (dir_de)
> drop_nlink(new_inode);
> @@ -300,29 +306,25 @@ static int ufs_rename(struct user_namespace
*mnt_userns,
> struct inode *old_dir, */
> old_inode->i_ctime = current_time(old_inode);
>
> - ufs_delete_entry(old_dir, old_de, old_page);
> + ufs_delete_entry(old_dir, old_de, old_page, old_page_addr);
> mark_inode_dirty(old_inode);
>
> if (dir_de) {
> if (old_dir != new_dir)
> - ufs_set_link(old_inode, dir_de, dir_page,
new_dir, 0);
> - else {
> - kunmap(dir_page);
> - put_page(dir_page);
> - }
> + ufs_set_link(old_inode, dir_de, dir_page,
> + dir_page_addr, new_dir, 0);
> + else
> + ufs_put_page(dir_page, dir_page_addr);
> inode_dec_link_count(old_dir);
> }
> return 0;
>
>
> out_dir:
> - if (dir_de) {
> - kunmap(dir_page);
> - put_page(dir_page);
> - }
> + if (dir_de)
> + ufs_put_page(dir_page, dir_page_addr);
> out_old:
> - kunmap(old_page);
> - put_page(old_page);
> + ufs_put_page(old_page, old_page_addr);
> out:
> return err;
> }
> diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
> index 550f7c5a3636..20d224c163ab 100644
> --- a/fs/ufs/ufs.h
> +++ b/fs/ufs/ufs.h
> @@ -102,12 +102,16 @@ extern const struct inode_operations
> ufs_dir_inode_operations; extern int ufs_add_link (struct dentry *, struct
> inode *);
> extern ino_t ufs_inode_by_name(struct inode *, const struct qstr *);
> extern int ufs_make_empty(struct inode *, struct inode *);
> -extern struct ufs_dir_entry *ufs_find_entry(struct inode *, const struct
qstr
> *, struct page **); -extern int ufs_delete_entry(struct inode *, struct
> ufs_dir_entry *, struct page *); +extern struct ufs_dir_entry
> *ufs_find_entry(struct inode *dir, const struct qstr *qstr, +
struct
> page **res_page, void **res_page_addr);
> +extern int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
> + struct page *page, char *kaddr);
> extern int ufs_empty_dir (struct inode *);
> -extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **);
> +extern struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p,
> void **pa); extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry
> *de, - struct page *page, struct inode *inode, bool
update_times);
> + struct page *page, void *page_addr,
> + struct inode *inode, bool update_times);
> +extern void ufs_put_page(struct page *page, void *page_addr);
>
> /* file.c */
> extern const struct inode_operations ufs_file_inode_operations;
> --
> 2.37.1

2022-11-25 22:36:24

by Al Viro

[permalink] [raw]
Subject: Re: [RESEND PATCH v4] fs/ufs: Replace kmap() with kmap_local_page()

On Sun, Oct 16, 2022 at 06:38:55PM +0200, Fabio M. De Francesco wrote:
> kmap() is being deprecated in favor of kmap_local_page().

Umm... I'm not sure it's the right way to handle that. Look:


> -static inline void ufs_put_page(struct page *page)
> +inline void ufs_put_page(struct page *page, void *page_addr)
> {
> - kunmap(page);
> + kunmap_local(page_addr);

Make that
kunmap_local((void *)((unsigned long)page_addr & PAGE_MASK));
and things become much easier.

> put_page(page);
> }
>
> @@ -72,11 +72,12 @@ ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr)
> if (de) {
> res = fs32_to_cpu(dir->i_sb, de->d_ino);
> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);
ufs_put_page(page, de);
and page_addr is not needed.

> /* Releases the page */
> void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
> - struct page *page, struct inode *inode,
> - bool update_times)
> + struct page *page, void *page_addr,
> + struct inode *inode, bool update_times)
> {
> loff_t pos = page_offset(page) +
> - (char *) de - (char *) page_address(page);
> + (char *)de - (char *)page_addr;

loff_t pos = page_offset(page) + offset_in_page(de);

> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);
ufs_put_page(page, de);
and page_addr is unused, i.e. caller of ufs_set_link() don't need
any changes at all.

> +static struct page *ufs_get_page(struct inode *dir, unsigned long n, void **page_addr)

I suspect that
static void *ufs_get_page(struct inode *dir, unsigned long n, struct page **page)
would be better for callers.

> -struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p)
> +/*
> + * Return the '..' directory entry and the page in which the entry was found
> + * (as a parameter - p).
> + *
> + * On Success ufs_put_page() should be called on *p.
> + *
> + * NOTE: Calls to ufs_get_page()/ufs_put_page() must be nested according to
> + * the rules documented in kmap_local_page()/kunmap_local().
> + *
> + * ufs_find_entry() and ufs_dotdot() act as calls to ufs_get_page() and
> + * must be treated accordingly for nesting purposes.
> + */
> +struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p, void **pa)
> {
> - struct page *page = ufs_get_page(dir, 0);
> + void *page_addr;
> + struct page *page = ufs_get_page(dir, 0, &page_addr);
> struct ufs_dir_entry *de = NULL;
>
> if (!IS_ERR(page)) {
> de = ufs_next_entry(dir->i_sb,
> - (struct ufs_dir_entry *)page_address(page));
> + (struct ufs_dir_entry *)page_addr);
> *p = page;
> + *pa = page_addr;

Callers can reconstruct page_addr by de. The function itself becomes

struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **page)
{
struct ufs_dir_entry *de = ufs_get_page(dir, 0, page);

if (!IS_ERR(de))
return ufs_next_entry(dir->i_sb, de);
else
return NULL;
}

and callers need no changes at all.

> struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
> - struct page **res_page)
> + struct page **res_page, void **res_page_addr)

Same story; *res_page_addr is rounded return value.
> @@ -275,9 +306,10 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
> n = start;
> do {
> char *kaddr;
> - page = ufs_get_page(dir, n);
> +
> + page = ufs_get_page(dir, n, &page_addr);
> if (!IS_ERR(page)) {
> - kaddr = page_address(page);
> + kaddr = page_addr;

char *kaddr = ufs_get_page(dir, n, &page);
if (!IS_ERR(kaddr) {
> de = (struct ufs_dir_entry *) kaddr;
> kaddr += ufs_last_byte(dir, n) - reclen;
> while ((char *) de <= kaddr) {
> @@ -285,7 +317,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
> goto found;
> de = ufs_next_entry(sb, de);
> }
> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);
ufs_put_page(page, kaddr);
it's in the same page...

> @@ -312,6 +345,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
> const unsigned int chunk_size = UFS_SB(sb)->s_uspi->s_dirblksize;
> unsigned short rec_len, name_len;
> struct page *page = NULL;
> + void *page_addr = NULL;
> struct ufs_dir_entry *de;
> unsigned long npages = dir_pages(dir);
> unsigned long n;
> @@ -329,12 +363,12 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
> for (n = 0; n <= npages; n++) {
> char *dir_end;
>
> - page = ufs_get_page(dir, n);
> + page = ufs_get_page(dir, n, &page_addr);
> err = PTR_ERR(page);
> if (IS_ERR(page))
> goto out;
> lock_page(page);
> - kaddr = page_address(page);
> + kaddr = page_addr;

kaddr = ufs_get_page(dir, n, &page);
err = PTR_ERR(kaddr);
if (IS_ERR(kaddr))
goto out;
lock_page(page);

> dir_end = kaddr + ufs_last_byte(dir, n);
> de = (struct ufs_dir_entry *)kaddr;
> kaddr += PAGE_SIZE - reclen;
> @@ -365,14 +399,14 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
> de = (struct ufs_dir_entry *) ((char *) de + rec_len);
> }
> unlock_page(page);
> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);
ufs_put_page(page, kaddr);

> }
> BUG();
> return -EINVAL;
>
> got_it:
> pos = page_offset(page) +
> - (char*)de - (char*)page_address(page);
> + (char *)de - (char *)page_addr;

pos = page_offset(page) + offset_in_page(de);

> err = ufs_prepare_chunk(page, pos, rec_len);
> if (err)
> goto out_unlock;
> @@ -396,7 +430,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
> mark_inode_dirty(dir);
> /* OFFSET_CACHE */
> out_put:
> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);
ufs_put_page(page, kaddr);

> out:
> return err;
> out_unlock:
> @@ -441,7 +475,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
> char *kaddr, *limit;
> struct ufs_dir_entry *de;
>
> - struct page *page = ufs_get_page(inode, n);
> + struct page *page = ufs_get_page(inode, n, (void **)&kaddr);

Yecch...
kaddr = ufs_get_page(inode, n, &page);
with obvious change of error check below. It definitely looks like
more convenient calling conventions - getting rid of casts like that...

> int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
> - struct page * page)
> + struct page *page, char *kaddr)

Do we need it? If not, ufs_delete_entry() calls need no changes...

> {
> struct super_block *sb = inode->i_sb;
> - char *kaddr = page_address(page);
> unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
> unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen);
> loff_t pos;

umm...
kaddr = (char *)((unsigned long)dir & PAGE_MASK);
unsigned from = offset_in_page(dir) & ....
unsigned to = offset_in_page(dir) + ....

> @@ -522,7 +554,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
> de = ufs_next_entry(sb, de);
> }
> if (pde)
> - from = (char*)pde - (char*)page_address(page);
> + from = (char *)pde - kaddr;

Note that this is
from = offset_in_page(pde);
> out:
> - ufs_put_page(page);
> + ufs_put_page(page, kaddr);
> UFSD("EXIT\n");
> return err;
> }

> @@ -592,17 +623,18 @@ int ufs_empty_dir(struct inode * inode)
> {
> struct super_block *sb = inode->i_sb;
> struct page *page = NULL;
> + void *page_addr;
> unsigned long i, npages = dir_pages(inode);
>
> for (i = 0; i < npages; i++) {
> char *kaddr;
> struct ufs_dir_entry *de;
> - page = ufs_get_page(inode, i);
>
> + page = ufs_get_page(inode, i, &page_addr);
> if (IS_ERR(page))
> continue;
>
> - kaddr = page_address(page);
> + kaddr = page_addr;

kaddr = ufs_get_page(inode, i, &page);
if (IS_ERR(kaddr))
continue;

> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);

ufs_put_page(page, kaddr)
> }
> return 1;
>
> not_empty:
> - ufs_put_page(page);
> + ufs_put_page(page, page_addr);
same here.

> return 0;
> }
>
> diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
> index 29d5a0e0c8f0..cdf3bcf9d02e 100644
> --- a/fs/ufs/namei.c
> +++ b/fs/ufs/namei.c
> @@ -210,13 +210,14 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
> struct inode * inode = d_inode(dentry);
> struct ufs_dir_entry *de;
> struct page *page;
> + void *page_addr;
> int err = -ENOENT;
>
> - de = ufs_find_entry(dir, &dentry->d_name, &page);
> + de = ufs_find_entry(dir, &dentry->d_name, &page, &page_addr);
> if (!de)
> goto out;
>
> - err = ufs_delete_entry(dir, de, page);
> + err = ufs_delete_entry(dir, de, page, page_addr);
> if (err)
> goto out;

None of that is needed.

> @@ -250,27 +251,31 @@ static int ufs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
> struct inode *old_inode = d_inode(old_dentry);
> struct inode *new_inode = d_inode(new_dentry);
> struct page *dir_page = NULL;
> + void *dir_page_addr;
> struct ufs_dir_entry * dir_de = NULL;
> struct page *old_page;
> + void *old_page_addr;
> struct ufs_dir_entry *old_de;
> int err = -ENOENT;
>
> if (flags & ~RENAME_NOREPLACE)
> return -EINVAL;
>
> - old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
> + old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page,
> + &old_page_addr);
> if (!old_de)
> goto out;
>
> if (S_ISDIR(old_inode->i_mode)) {
> err = -EIO;
> - dir_de = ufs_dotdot(old_inode, &dir_page);
> + dir_de = ufs_dotdot(old_inode, &dir_page, &dir_page_addr);
> if (!dir_de)
> goto out_old;
> }
>
> if (new_inode) {
> struct page *new_page;
> + void *new_page_addr;
> struct ufs_dir_entry *new_de;
>
> err = -ENOTEMPTY;
> @@ -278,10 +283,11 @@ static int ufs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
> goto out_dir;
>
> err = -ENOENT;
> - new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
> + new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page,
> + &new_page_addr);
> if (!new_de)
> goto out_dir;
> - ufs_set_link(new_dir, new_de, new_page, old_inode, 1);
> + ufs_set_link(new_dir, new_de, new_page, new_page_addr, old_inode, 1);
> new_inode->i_ctime = current_time(new_inode);
> if (dir_de)
> drop_nlink(new_inode);
> @@ -300,29 +306,25 @@ static int ufs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
> */
> old_inode->i_ctime = current_time(old_inode);
>
> - ufs_delete_entry(old_dir, old_de, old_page);
> + ufs_delete_entry(old_dir, old_de, old_page, old_page_addr);
> mark_inode_dirty(old_inode);
>
> if (dir_de) {
> if (old_dir != new_dir)
> - ufs_set_link(old_inode, dir_de, dir_page, new_dir, 0);
> - else {
> - kunmap(dir_page);
> - put_page(dir_page);
> - }
> + ufs_set_link(old_inode, dir_de, dir_page,
> + dir_page_addr, new_dir, 0);
> + else
> + ufs_put_page(dir_page, dir_page_addr);
> inode_dec_link_count(old_dir);
> }
> return 0;
>
>
> out_dir:
> - if (dir_de) {
> - kunmap(dir_page);
> - put_page(dir_page);
> - }
> + if (dir_de)
> + ufs_put_page(dir_page, dir_page_addr);
> out_old:
> - kunmap(old_page);
> - put_page(old_page);
> + ufs_put_page(old_page, old_page_addr);

Just pass dir_de and old_de resp. in these two calls of ufs_put_page() - nothing
else is needed...


The bottom line:
* teach your ufs_put_page() to accept any address within the page
* flip the ways you return page and address in ufs_get_page()
* use offset_in_page(addr) instead of these addr - page_address(page)
and you'll get a much smaller patch, with a lot less noise in it.
What's more, offset_in_page() part can be carved out into a separate
commit - it's valid on its own, and it makes both halves easier to
follow.

AFAICS, similar observations apply in your sysvfs patch; the point about
calling conventions for ufs_get_page() definitely applies there, and
stronger than for ufs - those casts are eye-watering...

2022-11-25 23:05:51

by Al Viro

[permalink] [raw]
Subject: Re: [RESEND PATCH v4] fs/ufs: Replace kmap() with kmap_local_page()

On Fri, Nov 25, 2022 at 10:17:28PM +0000, Al Viro wrote:

> The bottom line:
> * teach your ufs_put_page() to accept any address within the page
> * flip the ways you return page and address in ufs_get_page()
> * use offset_in_page(addr) instead of these addr - page_address(page)
> and you'll get a much smaller patch, with a lot less noise in it.
> What's more, offset_in_page() part can be carved out into a separate
> commit - it's valid on its own, and it makes both halves easier to
> follow.
>
> AFAICS, similar observations apply in your sysvfs patch; the point about
> calling conventions for ufs_get_page() definitely applies there, and
> stronger than for ufs - those casts are eye-watering...

As the matter of fact, I'd probably split it into 3 steps:
1) switch the calling conventions of ufs_get_page() - callers follow
it with something like kaddr = page_address(page); and that change makes
sense on its own. Isolated and easy to follow.
2) offset_in_page() changes. Again, separate, stands on its own and
is easy to follow.
3) kmap_local() switch itself - pass address to ufs_put_page(), etc.
Considerably smaller and less cluttered than your current variant.

2022-11-26 14:29:06

by Fabio M. De Francesco

[permalink] [raw]
Subject: Re: [RESEND PATCH v4] fs/ufs: Replace kmap() with kmap_local_page()

On venerd? 25 novembre 2022 23:50:56 CET Al Viro wrote:
> On Fri, Nov 25, 2022 at 10:17:28PM +0000, Al Viro wrote:
> > The bottom line:
> > * teach your ufs_put_page() to accept any address within the page
> > * flip the ways you return page and address in ufs_get_page()
> > * use offset_in_page(addr) instead of these addr -
page_address(page)
> >
> > and you'll get a much smaller patch, with a lot less noise in it.
> > What's more, offset_in_page() part can be carved out into a separate
> > commit - it's valid on its own, and it makes both halves easier to
> > follow.
> >
> > AFAICS, similar observations apply in your sysvfs patch; the point about
> > calling conventions for ufs_get_page() definitely applies there, and
> > stronger than for ufs - those casts are eye-watering...
>
> As the matter of fact, I'd probably split it into 3 steps:
> 1) switch the calling conventions of ufs_get_page() - callers follow
> it with something like kaddr = page_address(page); and that change makes
> sense on its own. Isolated and easy to follow.
> 2) offset_in_page() changes. Again, separate, stands on its own and
> is easy to follow.
> 3) kmap_local() switch itself - pass address to ufs_put_page(), etc.
> Considerably smaller and less cluttered than your current variant.

Al, thanks for taking the time to answer in such detail although I can believe
that, to spot the mistakes in my patch and then write your email, it probably
took you 10 minutes or so :-)

Instead I had to read two or three times to make sense of it all. I will do my
best to first rewrite these kmap_local_page() conversions in fs/ufs by
separating the work into three patches, as you recommended.

I'm pretty sure that the next attempt won't be applicable yet, because I might
not fully understand your suggestions or I might not be able to implement them
correctly. I'm afraid I addressed something that, as it stands, is a little
beyond my knowledge and experience, so I hope you'll want to chime in and
comment on the next version as well.

When the changes to fs/ufs will be done I will also rewrite the patch for fs/
sysv which (as you pointed out) needs to be worked out according to the same
requirements.

I am sincerely grateful to you also because it seemed to me that I would never
get any feedback regarding these two old patches.

Regards,

Fabio



2022-11-27 16:19:25

by Al Viro

[permalink] [raw]
Subject: Re: [RESEND PATCH v4] fs/ufs: Replace kmap() with kmap_local_page()

On Sat, Nov 26, 2022 at 03:12:16PM +0100, Fabio M. De Francesco wrote:

> Al, thanks for taking the time to answer in such detail although I can believe
> that, to spot the mistakes in my patch and then write your email, it probably
> took you 10 minutes or so :-)

FWIW, it's generally useful to ask yourself if patch can be made less noisy -
are there any regular parts that could be carved out of it, how much is due to
calling conventions changes and can those be done differently, etc.

If it can be decomposed into simpler parts, it becomes easier to review (and
that includes debugging - when bisect points to a commit and you need to find
what's wrong with it, splitting that change into several steps is often
a useful approach; if nothing else, you get a chance to bisect that and
narrow the area to look into, but often enough you'll spot the bug while
carving the change up).

Below is what I'd actually done when reviewing that thing (and it took more
than 10 minutes):

What happens in this patch?
1) switch from kmap/kunmap to kmap_local_page/kunmap_local; that's
in ufs_get_page() and ufs_put_page(). OK, so ufs_put_page() needs to be
supplied with the virtual address in addition to page. And callers of
ufs_get_page() need the virtual address to access, not just the page.

2) pages fed to ufs_put_page() ultimately come from earlier
ufs_get_page(), so presumably virtual address could be propagated along
the same path. OK...

3) change in ufs_inode_by_name() seems to fit the above -
ufs_find_entry() used to return page, now it returns page and address;
that address is fed to ufs_put_page().
So what has ufs_find_entry() become? 2 "in" arguments (directory
inode and entry name), 2 "out" arguments (page and virtual address of the
first byte in that page) and return value - pointer to directory entry.
So basically we are passing directory and name and get a triple -
* pointer to directory entry
* page reference that need to be held so we could access that
directory entry
* pointer to the first byte of the page that contains directory
entry...
Looks redundant - the 3rd component is simply the 1st one rounded
down to page size...

And it seems that at least some of the affected functions are
in the same situation - they are already working in terms of <pointer to
directory entry, pointer to page> pairs, and with those adding the
virtual address is redundant. Let's see...

4) functions with changed calling conventions are
ufs_put_page()
ufs_set_link()
ufs_check_page()
ufs_get_page()
ufs_dotdot()
ufs_find_entry()
ufs_delete_entry()
Out of those, ufs_put_page() and ufs_get_page() are consumer and
producer of the virtual address, and ufs_check_page() needs the address
as well, but other four all seem to be in the same boat -
pair <pointer to directory entry, page> becomes a triple
<pointer to directory entry, page, pointer to the first byte of page>
and that's redundant.
ufs_set_link() and ufs_delete_entry() have that triple passed
to them; ufs_find_entry() and ufs_dotdot() return such triple to the
caller.
Looks like we could replace e.g.
de = ufs_dotdot(inode, &page, &addr);
if (de) {
...
ufs_put_page(page, addr);
}
with
de = ufs_dotdot(inode, &page);
if (de) {
addr = <round de down>
...
ufs_put_page(page, addr);
}
That would return the call to what it used to be; whether it's a win
or not, depends upon the things we use addr for. Let's check...

5) ufs_set_link(inode, de, page, addr, ...) always gets de, page and
addr coming from the same ufs_find_entry() or ufs_dotdot() call. I.e.
in all cases addr is rounded-down de; could as well find that value inside
ufs_set_link(), returning the callers to original state.

6) ufs_delete_entry(dir, de, page, addr) - same story; de, page and
addr come from the same call of ufs_find_entry(), so we can reconstruct
addr by de inside ufs_delete_entry(); no need to pass it down, returning
those callers to original state.

7) ufs_find_entry() - 4 callers, address is passed to ufs_put_page(),
ufs_delete_entry(), ufs_set_link(). The last two don't really need that
argument, so we are left with "need to round down for passing to ufs_put_page()".

8) ufs_dotdot() - address is passed to ufs_put_page() or ufs_set_link().
The latter doesn't need that argument, so we are again left with "need to round
down for passing to ufs_put_page()".

9) So ufs_put_page() gets rounded-down pointer to directory entry here...
Might be worth a helper that would take a page + address and do ufs_put_page()
for page and rounded down address... But do we even need that to be separate
from ufs_put_page()? After all, the addresses we are currently passing to
ufs_put_page() are page-aligned, so rounding any of them down to page size
is a no-op. Might as well change ufs_put_page() from "takes page reference
and virtual address of beginning of the page" to "takes page reference and
virtual address anywhere within the page"; if nothing else, it would reduce
the clutter in fs/ufs/namei.c parts and it might be useful in fs/ufs/dir.c
part as well. OK, the intermediate plan:
* ufs_put_page() rounds the address down to page size
* ufs_set_link() and ufs_delete_entry() do not get page_addr passed
to them; reconstruct it as rounded-down directory entry pointer. Calling
conventions are back to original.
* ufs_find_entry() and ufs_dotdot() do not bother returning page_addr.
Calling conventions are back to original.
* callers of ufs_put_page() in fs/ufs/namei.c give it directory entry
instead of page_addr. Same for ufs_inode_by_name(), that got us started in
that direction.

10) ufs_set_link() uses page_addr for two things - finding location
in file (page_offset() + difference between de and page_addr) and feeding it
to ufs_put_page(). The latter can be given de, as we do in other places;
the former would become "difference between de and de rounded down to page
size"... wait a sec - that's just the address modulo page size, aka.
offset_in_page(de). OK, we don't even need to reconstruct page_addr in
there. Win...

11) for ufs_get_page() we want the virtual address returned. Looking
through the callers, though, shows that calling conventions are clumsy -
we have page = ufs_get_page(dir, n, &page_addr) followed by casting
page_addr to various pointer types, assigning it to various pointer
variables or, in one case, even ufs_get_page(dir, n, (void **)&char_pointer_var)
Ouch... We have 2 "in" arguments (directory and page number) and
2 values to return to caller - page reference and pointer to beginning of
the page. The type of the first component is always the same (struct page *),
but the second... depending upon the caller, it may be void *, char *
or struct ufs_dir_entry *. Passing it out via "out" argument is painful -
return value is much more suitable for that; there we can return void *
and have the assignment in callers convert it as needed. Page reference,
OTOH, is well-suited for passing out via "out" argument - same type in
all cases.
So it looks like we want addr = ufs_get_page(dir, n, &page);
as calling conventions. Callers need to check IS_ERR(addr) instead of
IS_ERR(page), but that's about it. *AND* they might be able to cut
down on assignments - just store the return value directly in variable
they would copy it into.

12) ufs_dotdot() needs not bother with returning page_address.
With new calling conventions for ufs_get_page() it doesn't need
'page' or 'page_addr' at all - we want the value of
ufs_get_page(dir, 0, p)
but we only want it converted to struct ufs_dir_entry *. And we already
have a variable of just that type declared right there, so we can do
simply
struct ufs_dir_entry *de = ufs_get_page(dir, 0, p);
The rest becomes
if (!IS_ERR(de)) {
de = ufs_next_entry(dir->i_sb, de);
} else {
de = NULL;
}
return de;
or, better yet,
if (!IS_ERR(de))
return ufs_next_entry(dir->i_sb, de);
else
return NULL;
Makes sense - ".." is the second entry in directory, so we ask for the
first page, treat its beginning as entry pointer and return the next
entry... And yes, it looks like ufs_get_page() callers are happier
with such calling conventions.

13) ufs_find_entry() - getting rid of res_page_addr argument
is obvious, so's adjusting the calling conventions for ufs_get_page().
Surprisingly subtle question is whether we can get rid of 'page_addr'
there; now that we don't store it in *res_page_addr the only thing
we use it for is ufs_put_page(). With ufs_get_page() calling
conventions change we'd get
page_addr = ufs_get_page(dir, n, &page);
if (!IS_ERR(page_addr)) {
kaddr = page_addr;
de = (struct ufs_dir_entry *) kaddr;
kaddr += ufs_last_byte(dir, n) - reclen;
while ((char *) de <= kaddr) {
if (ufs_match(sb, namelen, name, de))
goto found;
de = ufs_next_entry(sb, de);
}
ufs_put_page(page, page_addr);
}
We have a couple of other pointers around - kaddr and de. kaddr is
set to page_addr + something, then de iterates through the entries
starting at page_addr until it gets past kaddr, looking for the match.
So we get to the end of loop if we have *not* found a match within
this page, and de is not guaranteed to point within the same page when
we leave the loop. Not suitable for passing to ufs_put_page(), then.
kaddr, OTOH, would better be pointing into the same page - we use
de <= kaddr as permission to dereference de, and we'd better not run
out of page. A look at ufs_last_byte() shows that its return value
is always no greater than PAGE_SIZE, so kaddr is guaranteed to point
within the same page. Good. So we can turn that into
kaddr = ufs_get_page(dir, n, &page);
if (!IS_ERR(kaddr)) {
de = (struct ufs_dir_entry *) kaddr;
kaddr += ufs_last_byte(dir, n) - reclen;
....
ufs_put_page(page, kaddr);
}
and page_addr bites the dust.

14) ufs_add_link() - similar adjustment to new calling conventions
for ufs_get_page(). Uses of page_addr: fed to ufs_put_page() (same as
in ufs_find_entry() kaddr is guaranteed to point into the same page and
thus can be used instead) and calculation of position in directory, same
as we'd seen in ufs_set_link(). The latter becomes page_offset(page) +
offset_in_page(de), killing page_addr off. BTW, we get
kaddr = ufs_get_page(dir, n, &page);
err = PTR_ERR(kaddr);
if (IS_ERR(kaddr))
goto out;
with out: being just 'return err;', which suggests
kaddr = ufs_get_page(dir, n, &page);
if (IS_ERR(kaddr))
return ERR_PTR(kaddr);
instead (and that was the only goto out; so the label can be removed).
The value stored in err in case !IS_ERR(kaddr) is (thankfully) never
used - would've been a bug otherwise. So this is an equivalent transformation.

15) ufs_readdir() - trivial modifications to deal with ufs_get_page()
calling conventions change; an ugly cast goes away.

16) ufs_delete_entry() - kaddr argument is dropped, we make it a local
variable initialized with rounded down dir. Incidentally, 'dir - kaddr' can
be replaced with offset_in_page(dir) in there and similar for (char *)pde - kaddr.

17) ufs_empty_dir() - we adjust to ufs_get_page() calling conventions
change, page_addr can be dropped same as in ufs_find_entry(). Need to lift
kaddr out of loop to deal with not_empty: part (hadn't noticed when writing
original reply, but compiler would've caught that)

18) fs/ufs/namei.c parts do shrink down to pretty much just 'pass
the right directory entry to ufs_put_page()' (and most of ufs.h changes
disapper). ufs_get_page() calling conventions change is actually the bulk
of what remains of this patch after that massage. Other than that we have
several 'use offset_in_page()' local changes and extra argument of
ufs_put_page(). Hmm... offset_in_page() actually counts as a valid (and
trivially safe) optimization, independent of anything kmap-related.
Worth carving out as the first step in series. Come to think of that,
ufs_get_page() calling conventions change *also* can be done without
the kmap_local_page() switchover; we would do kmap() and return page_address(page).
Callers would be happier with such calling conventions, and carving that
part out would reduce the patch to just the 'switch ufs_get_page() to
kmap_local_page(), switch ufs_put_page() to kunmap_local() and supply it
with a pointer into the page in question'. Which is much smaller and
easier to review.

2022-11-29 00:02:07

by Fabio M. De Francesco

[permalink] [raw]
Subject: Re: [RESEND PATCH v4] fs/ufs: Replace kmap() with kmap_local_page()

On domenica 27 novembre 2022 17:15:28 CET Al Viro wrote:
> On Sat, Nov 26, 2022 at 03:12:16PM +0100, Fabio M. De Francesco wrote:
> > Al, thanks for taking the time to answer in such detail although I can
> > believe that, to spot the mistakes in my patch and then write your email,
> > it probably took you 10 minutes or so :-)
>
> FWIW, it's generally useful to ask yourself if patch can be made less noisy
-
> are there any regular parts that could be carved out of it, how much is due
to
> calling conventions changes and can those be done differently, etc.
>
> If it can be decomposed into simpler parts, it becomes easier to review (and
> that includes debugging - when bisect points to a commit and you need to
find
> what's wrong with it, splitting that change into several steps is often
> a useful approach; if nothing else, you get a chance to bisect that and
> narrow the area to look into, but often enough you'll spot the bug while
> carving the change up).

About a year ago, Greg K-H asked me (and another developer who was working
with me) to split a small batch of 5 patches into more than 20. He explained
pretty much the same reasons why we should split patches into smaller parts,
easier to understand and review. It's also much easier for the developers
themselves to prepare and debug before committing.

I thought I got the message, but I fell into the same trap again and sent one
big patch (two if we're even talking about fs/sysv) which is not so easy to
review. I guess that's why they've been set aside and not commented on for a
long time.

>
> Below is what I'd actually done when reviewing that thing (and it took more
> than 10 minutes):

I was just kidding when I was talking about those "10 minutes". At the same
time I wanted to underline how the things that are difficult for me are much
easier for you to analyze and solve.

>
> What happens in this patch?

What follows is a great lesson about how to (1) make sense of what this change
is doing (2) how it is doing it (3) how we can optimize (especially how to
remove redundancy and unnecessary complexity.

The lessons which follow are great. Furthermore, I'm also having the
opportunity to follow your mental process. That will also help very much with
this task and also in future.

I see no need to reply to each of the suggestions you provided with such
clarity and huge amount of details.

As I already said I'll rework trying to do my best, although it will take some
time because of several personal reasons that are of no interests to the
readers.

I'm sincerely grateful to you for spending so much time (forget those 10
minutes, please) to help me.

Again, thank you very much,

Fabio

> 1) switch from kmap/kunmap to kmap_local_page/kunmap_local; that's
> in ufs_get_page() and ufs_put_page(). OK, so ufs_put_page() needs to be
> supplied with the virtual address in addition to page. And callers of
> ufs_get_page() need the virtual address to access, not just the page.
>
> 2) pages fed to ufs_put_page() ultimately come from earlier
> ufs_get_page(), so presumably virtual address could be propagated along
> the same path. OK...
>
> 3) change in ufs_inode_by_name() seems to fit the above -
> ufs_find_entry() used to return page, now it returns page and address;
> that address is fed to ufs_put_page().
> So what has ufs_find_entry() become? 2 "in" arguments (directory
> inode and entry name), 2 "out" arguments (page and virtual address of the
> first byte in that page) and return value - pointer to directory entry.
> So basically we are passing directory and name and get a triple -
> * pointer to directory entry
> * page reference that need to be held so we could access that
> directory entry
> * pointer to the first byte of the page that contains directory
> entry...
> Looks redundant - the 3rd component is simply the 1st one rounded
> down to page size...
>
> And it seems that at least some of the affected functions are
> in the same situation - they are already working in terms of <pointer to
> directory entry, pointer to page> pairs, and with those adding the
> virtual address is redundant. Let's see...
>
> 4) functions with changed calling conventions are
> ufs_put_page()
> ufs_set_link()
> ufs_check_page()
> ufs_get_page()
> ufs_dotdot()
> ufs_find_entry()
> ufs_delete_entry()
> Out of those, ufs_put_page() and ufs_get_page() are consumer and
> producer of the virtual address, and ufs_check_page() needs the address
> as well, but other four all seem to be in the same boat -
> pair <pointer to directory entry, page> becomes a triple
> <pointer to directory entry, page, pointer to the first byte of page>
> and that's redundant.
> ufs_set_link() and ufs_delete_entry() have that triple passed
> to them; ufs_find_entry() and ufs_dotdot() return such triple to the
> caller.
> Looks like we could replace e.g.
> de = ufs_dotdot(inode, &page, &addr);
> if (de) {
> ...
> ufs_put_page(page, addr);
> }
> with
> de = ufs_dotdot(inode, &page);
> if (de) {
> addr = <round de down>
> ...
> ufs_put_page(page, addr);
> }
> That would return the call to what it used to be; whether it's a win
> or not, depends upon the things we use addr for. Let's check...
>
> 5) ufs_set_link(inode, de, page, addr, ...) always gets de, page and
> addr coming from the same ufs_find_entry() or ufs_dotdot() call. I.e.
> in all cases addr is rounded-down de; could as well find that value inside
> ufs_set_link(), returning the callers to original state.
>
> 6) ufs_delete_entry(dir, de, page, addr) - same story; de, page and
> addr come from the same call of ufs_find_entry(), so we can reconstruct
> addr by de inside ufs_delete_entry(); no need to pass it down, returning
> those callers to original state.
>
> 7) ufs_find_entry() - 4 callers, address is passed to
ufs_put_page(),
> ufs_delete_entry(), ufs_set_link(). The last two don't really need that
> argument, so we are left with "need to round down for passing to
> ufs_put_page()".
>
> 8) ufs_dotdot() - address is passed to ufs_put_page() or
ufs_set_link().
> The latter doesn't need that argument, so we are again left with "need to
> round down for passing to ufs_put_page()".
>
> 9) So ufs_put_page() gets rounded-down pointer to directory entry
here...
> Might be worth a helper that would take a page + address and do
ufs_put_page()
> for page and rounded down address... But do we even need that to be
separate
> from ufs_put_page()? After all, the addresses we are currently passing to
> ufs_put_page() are page-aligned, so rounding any of them down to page size
is
> a no-op. Might as well change ufs_put_page() from "takes page reference and
> virtual address of beginning of the page" to "takes page reference and
> virtual address anywhere within the page"; if nothing else, it would reduce
> the clutter in fs/ufs/namei.c parts and it might be useful in fs/ufs/dir.c
> part as well. OK, the intermediate plan:
> * ufs_put_page() rounds the address down to page size
> * ufs_set_link() and ufs_delete_entry() do not get page_addr passed
> to them; reconstruct it as rounded-down directory entry pointer. Calling
> conventions are back to original.
> * ufs_find_entry() and ufs_dotdot() do not bother returning
page_addr.
> Calling conventions are back to original.
> * callers of ufs_put_page() in fs/ufs/namei.c give it directory
entry
> instead of page_addr. Same for ufs_inode_by_name(), that got us started in
> that direction.
>
> 10) ufs_set_link() uses page_addr for two things - finding location
> in file (page_offset() + difference between de and page_addr) and feeding it
> to ufs_put_page(). The latter can be given de, as we do in other places;
> the former would become "difference between de and de rounded down to page
> size"... wait a sec - that's just the address modulo page size, aka.
> offset_in_page(de). OK, we don't even need to reconstruct page_addr in
> there. Win...
>
> 11) for ufs_get_page() we want the virtual address returned.
Looking
> through the callers, though, shows that calling conventions are clumsy -
> we have page = ufs_get_page(dir, n, &page_addr) followed by casting
> page_addr to various pointer types, assigning it to various pointer
> variables or, in one case, even ufs_get_page(dir, n, (void
> **)&char_pointer_var) Ouch... We have 2 "in" arguments (directory and page
> number) and
> 2 values to return to caller - page reference and pointer to beginning of
> the page. The type of the first component is always the same (struct page
*),
> but the second... depending upon the caller, it may be void *, char * or
> struct ufs_dir_entry *. Passing it out via "out" argument is painful -
> return value is much more suitable for that; there we can return void * and
> have the assignment in callers convert it as needed. Page reference, OTOH,
> is well-suited for passing out via "out" argument - same type in all cases.
> So it looks like we want addr = ufs_get_page(dir, n, &page);
> as calling conventions. Callers need to check IS_ERR(addr) instead of
> IS_ERR(page), but that's about it. *AND* they might be able to cut
> down on assignments - just store the return value directly in variable
> they would copy it into.
>
> 12) ufs_dotdot() needs not bother with returning page_address.
> With new calling conventions for ufs_get_page() it doesn't need
> 'page' or 'page_addr' at all - we want the value of
> ufs_get_page(dir, 0, p)
> but we only want it converted to struct ufs_dir_entry *. And we already
> have a variable of just that type declared right there, so we can do
> simply
> struct ufs_dir_entry *de = ufs_get_page(dir, 0, p);
> The rest becomes
> if (!IS_ERR(de)) {
> de = ufs_next_entry(dir->i_sb, de);
> } else {
> de = NULL;
> }
> return de;
> or, better yet,
> if (!IS_ERR(de))
> return ufs_next_entry(dir->i_sb, de);
> else
> return NULL;
> Makes sense - ".." is the second entry in directory, so we ask for the
> first page, treat its beginning as entry pointer and return the next
> entry... And yes, it looks like ufs_get_page() callers are happier
> with such calling conventions.
>
> 13) ufs_find_entry() - getting rid of res_page_addr argument
> is obvious, so's adjusting the calling conventions for ufs_get_page().
> Surprisingly subtle question is whether we can get rid of 'page_addr'
> there; now that we don't store it in *res_page_addr the only thing
> we use it for is ufs_put_page(). With ufs_get_page() calling
> conventions change we'd get
> page_addr = ufs_get_page(dir, n, &page);
> if (!IS_ERR(page_addr)) {
> kaddr = page_addr;
> de = (struct ufs_dir_entry *) kaddr;
> kaddr += ufs_last_byte(dir, n) - reclen;
> while ((char *) de <= kaddr) {
> if (ufs_match(sb, namelen, name, de))
> goto found;
> de = ufs_next_entry(sb, de);
> }
> ufs_put_page(page, page_addr);
> }
> We have a couple of other pointers around - kaddr and de. kaddr is
> set to page_addr + something, then de iterates through the entries
> starting at page_addr until it gets past kaddr, looking for the match.
> So we get to the end of loop if we have *not* found a match within
> this page, and de is not guaranteed to point within the same page when
> we leave the loop. Not suitable for passing to ufs_put_page(), then.
> kaddr, OTOH, would better be pointing into the same page - we use
> de <= kaddr as permission to dereference de, and we'd better not run
> out of page. A look at ufs_last_byte() shows that its return value
> is always no greater than PAGE_SIZE, so kaddr is guaranteed to point
> within the same page. Good. So we can turn that into
> kaddr = ufs_get_page(dir, n, &page);
> if (!IS_ERR(kaddr)) {
> de = (struct ufs_dir_entry *) kaddr;
> kaddr += ufs_last_byte(dir, n) - reclen;
> ....
> ufs_put_page(page, kaddr);
> }
> and page_addr bites the dust.
>
> 14) ufs_add_link() - similar adjustment to new calling conventions
> for ufs_get_page(). Uses of page_addr: fed to ufs_put_page() (same as
> in ufs_find_entry() kaddr is guaranteed to point into the same page and
> thus can be used instead) and calculation of position in directory, same
> as we'd seen in ufs_set_link(). The latter becomes page_offset(page) +
> offset_in_page(de), killing page_addr off. BTW, we get
> kaddr = ufs_get_page(dir, n, &page);
> err = PTR_ERR(kaddr);
> if (IS_ERR(kaddr))
> goto out;
> with out: being just 'return err;', which suggests
> kaddr = ufs_get_page(dir, n, &page);
> if (IS_ERR(kaddr))
> return ERR_PTR(kaddr);
> instead (and that was the only goto out; so the label can be removed).
> The value stored in err in case !IS_ERR(kaddr) is (thankfully) never
> used - would've been a bug otherwise. So this is an equivalent
> transformation.
>
> 15) ufs_readdir() - trivial modifications to deal with
ufs_get_page()
> calling conventions change; an ugly cast goes away.
>
> 16) ufs_delete_entry() - kaddr argument is dropped, we make it a
local
> variable initialized with rounded down dir. Incidentally, 'dir - kaddr' can
> be replaced with offset_in_page(dir) in there and similar for (char *)pde -
> kaddr.
>
> 17) ufs_empty_dir() - we adjust to ufs_get_page() calling
conventions
> change, page_addr can be dropped same as in ufs_find_entry(). Need to lift
> kaddr out of loop to deal with not_empty: part (hadn't noticed when writing
> original reply, but compiler would've caught that)
>
> 18) fs/ufs/namei.c parts do shrink down to pretty much just 'pass
> the right directory entry to ufs_put_page()' (and most of ufs.h changes
> disapper). ufs_get_page() calling conventions change is actually the bulk
> of what remains of this patch after that massage. Other than that we have
> several 'use offset_in_page()' local changes and extra argument of
> ufs_put_page(). Hmm... offset_in_page() actually counts as a valid (and
> trivially safe) optimization, independent of anything kmap-related.
> Worth carving out as the first step in series. Come to think of that,
> ufs_get_page() calling conventions change *also* can be done without
> the kmap_local_page() switchover; we would do kmap() and return
> page_address(page). Callers would be happier with such calling conventions,
> and carving that part out would reduce the patch to just the 'switch
> ufs_get_page() to kmap_local_page(), switch ufs_put_page() to kunmap_local()
> and supply it with a pointer into the page in question'. Which is much
> smaller and easier to review.