2020-09-14 17:46:43

by Mateusz Nosek

[permalink] [raw]
Subject: [RFC PATCH] fs: micro-optimization remove branches by adjusting flag values

From: Mateusz Nosek <[email protected]>

When flags A and B have equal values than the following code

if(flags1 & A)
flags2 |= B;

is equivalent to

flags2 |= (flags1 & A);

The latter code should generate less instructions and be faster as one
branch is omitted in it.

Introduced patch changes the value of 'LOOKUP_EMPTY' and makes it equal
to the value of 'AT_EMPTY_PATH'. Thanks to that, few branches can be
changed in a way showed above which improves both performance and the
size of the code.

Signed-off-by: Mateusz Nosek <[email protected]>
---
fs/exec.c | 14 ++++++++++----
fs/fhandle.c | 4 ++--
fs/namespace.c | 4 ++--
fs/open.c | 8 ++++----
fs/stat.c | 4 ++--
fs/utimes.c | 6 +++---
include/linux/namei.h | 4 ++--
7 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index a91003e28eaa..39e1ada1ee6c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -904,8 +904,8 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
return ERR_PTR(-EINVAL);
if (flags & AT_SYMLINK_NOFOLLOW)
open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
- if (flags & AT_EMPTY_PATH)
- open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
+ BUILD_BUG_ON(AT_EMPTY_PATH != LOOKUP_EMPTY);
+ open_exec_flags.lookup_flags |= (flags & AT_EMPTY_PATH);

file = do_filp_open(fd, name, &open_exec_flags);
if (IS_ERR(file))
@@ -2176,7 +2176,10 @@ SYSCALL_DEFINE5(execveat,
const char __user *const __user *, envp,
int, flags)
{
- int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+ int lookup_flags;
+
+ BUILD_BUG_ON(AT_EMPTY_PATH != LOOKUP_EMPTY);
+ lookup_flags = (flags & AT_EMPTY_PATH);

return do_execveat(fd,
getname_flags(filename, lookup_flags, NULL),
@@ -2197,7 +2200,10 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
const compat_uptr_t __user *, envp,
int, flags)
{
- int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+ int lookup_flags;
+
+ BUILD_BUG_ON(AT_EMPTY_PATH != LOOKUP_EMPTY);
+ lookup_flags = (flags & AT_EMPTY_PATH);

return compat_do_execveat(fd,
getname_flags(filename, lookup_flags, NULL),
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 01263ffbc4c0..579bf462bf89 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -102,8 +102,8 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
return -EINVAL;

lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
- if (flag & AT_EMPTY_PATH)
- lookup_flags |= LOOKUP_EMPTY;
+ BUILD_BUG_ON(AT_EMPTY_PATH != LOOKUP_EMPTY);
+ lookup_flags |= (flag & AT_EMPTY_PATH);
err = user_path_at(dfd, name, lookup_flags, &path);
if (!err) {
err = do_sys_name_to_handle(&path, handle, mnt_id);
diff --git a/fs/namespace.c b/fs/namespace.c
index 098f981dce54..319f42d11236 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2456,8 +2456,8 @@ SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, fl
lookup_flags &= ~LOOKUP_AUTOMOUNT;
if (flags & AT_SYMLINK_NOFOLLOW)
lookup_flags &= ~LOOKUP_FOLLOW;
- if (flags & AT_EMPTY_PATH)
- lookup_flags |= LOOKUP_EMPTY;
+ BUILD_BUG_ON(AT_EMPTY_PATH != LOOKUP_EMPTY);
+ lookup_flags |= (flags & AT_EMPTY_PATH);

if (detached && !may_mount())
return -EPERM;
diff --git a/fs/open.c b/fs/open.c
index 9af548fb841b..8b6fe1e89811 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -410,8 +410,8 @@ static long do_faccessat(int dfd, const char __user *filename, int mode, int fla

if (flags & AT_SYMLINK_NOFOLLOW)
lookup_flags &= ~LOOKUP_FOLLOW;
- if (flags & AT_EMPTY_PATH)
- lookup_flags |= LOOKUP_EMPTY;
+ BUILD_BUG_ON(AT_EMPTY_PATH != LOOKUP_EMPTY);
+ lookup_flags |= (flags & AT_EMPTY_PATH);

if (!(flags & AT_EACCESS)) {
old_cred = access_override_creds();
@@ -692,8 +692,8 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
goto out;

lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
- if (flag & AT_EMPTY_PATH)
- lookup_flags |= LOOKUP_EMPTY;
+ BUILD_BUG_ON(AT_EMPTY_PATH != LOOKUP_EMPTY);
+ lookup_flags |= (flag & AT_EMPTY_PATH);
retry:
error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
diff --git a/fs/stat.c b/fs/stat.c
index 44f8ad346db4..a9feb7a7e9ec 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -168,8 +168,8 @@ static inline unsigned vfs_stat_set_lookup_flags(unsigned *lookup_flags,
*lookup_flags &= ~LOOKUP_FOLLOW;
if (flags & AT_NO_AUTOMOUNT)
*lookup_flags &= ~LOOKUP_AUTOMOUNT;
- if (flags & AT_EMPTY_PATH)
- *lookup_flags |= LOOKUP_EMPTY;
+ BUILD_BUG_ON(AT_EMPTY_PATH != LOOKUP_EMPTY);
+ *lookup_flags |= (flags & AT_EMPTY_PATH);

return 0;
}
diff --git a/fs/utimes.c b/fs/utimes.c
index fd3cc4226224..95a48dbda7e1 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -79,15 +79,15 @@ static int do_utimes_path(int dfd, const char __user *filename,
struct timespec64 *times, int flags)
{
struct path path;
- int lookup_flags = 0, error;
+ int lookup_flags, error;

if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
return -EINVAL;

+ BUILD_BUG_ON(AT_EMPTY_PATH != LOOKUP_EMPTY);
+ lookup_flags = (flags & AT_EMPTY_PATH);
if (!(flags & AT_SYMLINK_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
- if (flags & AT_EMPTY_PATH)
- lookup_flags |= LOOKUP_EMPTY;

retry:
error = user_path_at(dfd, filename, lookup_flags, &path);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index a4bb992623c4..52f8015717c0 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -21,7 +21,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
#define LOOKUP_FOLLOW 0x0001 /* follow links at the end */
#define LOOKUP_DIRECTORY 0x0002 /* require a directory */
#define LOOKUP_AUTOMOUNT 0x0004 /* force terminal automount */
-#define LOOKUP_EMPTY 0x4000 /* accept empty path [user_... only] */
+#define LOOKUP_EMPTY 0x1000 /* accept empty path [user_... only], AT_EMPTY_PATH set */
#define LOOKUP_DOWN 0x8000 /* follow mounts in the starting point */
#define LOOKUP_MOUNTPOINT 0x0080 /* follow mounts in the end */

@@ -36,7 +36,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};

/* internal use only */
#define LOOKUP_PARENT 0x0010
-#define LOOKUP_JUMPED 0x1000
+#define LOOKUP_JUMPED 0x4000
#define LOOKUP_ROOT 0x2000
#define LOOKUP_ROOT_GRABBED 0x0008

--
2.20.1


2020-09-14 18:09:58

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [RFC PATCH] fs: micro-optimization remove branches by adjusting flag values

On Mon, Sep 14, 2020 at 07:43:38PM +0200, [email protected] wrote:
> From: Mateusz Nosek <[email protected]>
>
> When flags A and B have equal values than the following code
>
> if(flags1 & A)
> flags2 |= B;
>
> is equivalent to
>
> flags2 |= (flags1 & A);
>
> The latter code should generate less instructions and be faster as one
> branch is omitted in it.

[citation needed]

$ cat test.c
int a(int x)
{
int y = 0;

if (x & 1)
y |= 1;
if (x & 2)
y |= 2;

return y;
}

$ objdump -d test.o
0000000000000000 <a>:
0: 89 f8 mov %edi,%eax
2: 83 e0 03 and $0x3,%eax
5: c3 retq

Please stop submitting uglifying patches without checking they actually
improve anything. GCC is smarter than you think it is.

2020-09-15 00:33:40

by Al Viro

[permalink] [raw]
Subject: Re: [RFC PATCH] fs: micro-optimization remove branches by adjusting flag values

On Mon, Sep 14, 2020 at 07:43:38PM +0200, [email protected] wrote:
> From: Mateusz Nosek <[email protected]>
>
> When flags A and B have equal values than the following code
>
> if(flags1 & A)
> flags2 |= B;
>
> is equivalent to
>
> flags2 |= (flags1 & A);
>
> The latter code should generate less instructions and be faster as one
> branch is omitted in it.
>
> Introduced patch changes the value of 'LOOKUP_EMPTY' and makes it equal
> to the value of 'AT_EMPTY_PATH'. Thanks to that, few branches can be
> changed in a way showed above which improves both performance and the
> size of the code.

No. AT_EMPTY_PATH is a part of userland ABI; to tie LOOKUP_EMPTY to it
means that we can't ever modify the sucker. Worse, it restricts any
possible reshuffling of the LOOKUP_... bits in the future.

So unless you can show an effect on the real-world profiles, there are
fairly strong reasons to avoid that headache.

2020-09-15 00:41:27

by Al Viro

[permalink] [raw]
Subject: Re: [RFC PATCH] fs: micro-optimization remove branches by adjusting flag values

On Mon, Sep 14, 2020 at 07:06:29PM +0100, Matthew Wilcox wrote:
> $ objdump -d test.o
> 0000000000000000 <a>:
> 0: 89 f8 mov %edi,%eax
> 2: 83 e0 03 and $0x3,%eax
> 5: c3 retq
>
> Please stop submitting uglifying patches without checking they actually
> improve anything. GCC is smarter than you think it is.

His main point isn't that - it's reshuffling LOOKUP_... bits to make that
kind of optimisation possible. However, doing that sets us up for PITA
down the road (e.g. reshuffling LOOKUP_... bits becomes forbidden, etc.)
and I'd rather not go there unless we have a real-world evidence that it
does buy us anything.