2019-09-30 21:04:10

by Aleksa Sarai

[permalink] [raw]
Subject: [PATCH v13 7/9] open: openat2(2) syscall

The most obvious syscall to add support for the new LOOKUP_* scoping
flags would be openat(2). However, there are a few reasons why this is
not the best course of action:

* The new LOOKUP_* flags are intended to be security features, and
openat(2) will silently ignore all unknown flags. This means that
users would need to avoid foot-gunning themselves constantly when
using this interface if it were part of openat(2). This can be fixed
by having userspace libraries handle this for users[1], but should be
avoided if possible.

* Resolution scoping feels like a different operation to the existing
O_* flags. And since openat(2) has limited flag space, it seems to be
quite wasteful to clutter it with 5 flags that are all
resolution-related. Arguably O_NOFOLLOW is also a resolution flag but
its entire purpose is to error out if you encounter a trailing
symlink -- not to scope resolution.

* Other systems would be able to reimplement this syscall allowing for
cross-OS standardisation rather than being hidden amongst O_* flags
which may result in it not being used by all the parties that might
want to use it (file servers, web servers, container runtimes, etc).

* It gives us the opportunity to iterate on the O_PATH interface. In
particular, the new @how->upgrade_mask field for fd re-opening is
only possible because we have a clean slate without needing to re-use
the ACC_MODE flag design nor the existing openat(2) @mode semantics.

To this end, we introduce the openat2(2) syscall. It provides all of the
features of openat(2) through the @how->flags argument, but also
also provides a new @how->resolve argument which exposes RESOLVE_* flags
that map to our new LOOKUP_* flags. It also eliminates the long-standing
ugliness of variadic-open(2) by embedding it in a struct.

In order to allow for userspace to lock down their usage of file
descriptor re-opening, openat2(2) has the ability for users to disallow
certain re-opening modes through @how->upgrade_mask. At the moment,
there is no UPGRADE_NOEXEC.

[1]: https://github.com/openSUSE/libpathrs

Suggested-by: Christian Brauner <[email protected]>
Signed-off-by: Aleksa Sarai <[email protected]>
---
arch/alpha/kernel/syscalls/syscall.tbl | 1 +
arch/arm/tools/syscall.tbl | 1 +
arch/arm64/include/asm/unistd.h | 2 +-
arch/arm64/include/asm/unistd32.h | 2 +
arch/ia64/kernel/syscalls/syscall.tbl | 1 +
arch/m68k/kernel/syscalls/syscall.tbl | 1 +
arch/microblaze/kernel/syscalls/syscall.tbl | 1 +
arch/mips/kernel/syscalls/syscall_n32.tbl | 1 +
arch/mips/kernel/syscalls/syscall_n64.tbl | 1 +
arch/mips/kernel/syscalls/syscall_o32.tbl | 1 +
arch/parisc/kernel/syscalls/syscall.tbl | 1 +
arch/powerpc/kernel/syscalls/syscall.tbl | 1 +
arch/s390/kernel/syscalls/syscall.tbl | 1 +
arch/sh/kernel/syscalls/syscall.tbl | 1 +
arch/sparc/kernel/syscalls/syscall.tbl | 1 +
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
arch/xtensa/kernel/syscalls/syscall.tbl | 1 +
fs/open.c | 94 ++++++++++++++++-----
include/linux/fcntl.h | 19 ++++-
include/linux/fs.h | 4 +-
include/linux/syscalls.h | 14 ++-
include/uapi/asm-generic/unistd.h | 5 +-
include/uapi/linux/fcntl.h | 42 +++++++++
24 files changed, 168 insertions(+), 30 deletions(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 728fe028c02c..9f374f7d9514 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -475,3 +475,4 @@
543 common fspick sys_fspick
544 common pidfd_open sys_pidfd_open
# 545 reserved for clone3
+547 common openat2 sys_openat2
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 6da7dc4d79cc..4ba54bc7e19a 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -449,3 +449,4 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3
+437 common openat2 sys_openat2
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 2629a68b8724..8aa00ccb0b96 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)

-#define __NR_compat_syscalls 436
+#define __NR_compat_syscalls 438
#endif

#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 94ab29cf4f00..57f6f592d460 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -879,6 +879,8 @@ __SYSCALL(__NR_fspick, sys_fspick)
__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
#define __NR_clone3 435
__SYSCALL(__NR_clone3, sys_clone3)
+#define __NR_openat2 437
+__SYSCALL(__NR_openat2, sys_openat2)

/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 36d5faf4c86c..8d36f2e2dc89 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -356,3 +356,4 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
# 435 reserved for clone3
+437 common openat2 sys_openat2
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index a88a285a0e5f..2559925f1924 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -435,3 +435,4 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
# 435 reserved for clone3
+437 common openat2 sys_openat2
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 09b0cd7dab0a..c04385e60833 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -441,3 +441,4 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3
+437 common openat2 sys_openat2
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index c9c879ec9b6d..ba06cae655c6 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -374,3 +374,4 @@
433 n32 fspick sys_fspick
434 n32 pidfd_open sys_pidfd_open
# 435 reserved for clone3
+437 n32 openat2 sys_openat2
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index bbce9159caa1..0f3de320ae51 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -350,3 +350,4 @@
433 n64 fspick sys_fspick
434 n64 pidfd_open sys_pidfd_open
# 435 reserved for clone3
+437 n64 openat2 sys_openat2
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 9653591428ec..f108464d09a3 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -423,3 +423,4 @@
433 o32 fspick sys_fspick
434 o32 pidfd_open sys_pidfd_open
# 435 reserved for clone3
+437 o32 openat2 sys_openat2 sys_openat2
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 285ff516150c..b550ae9a7fea 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -433,3 +433,4 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3_wrapper
+437 common openat2 sys_openat2
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 43f736ed47f2..a8b5ecb5b602 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -517,3 +517,4 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 nospu clone3 ppc_clone3
+437 common openat2 sys_openat2
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 3054e9c035a3..16b571c06161 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -438,3 +438,4 @@
433 common fspick sys_fspick sys_fspick
434 common pidfd_open sys_pidfd_open sys_pidfd_open
435 common clone3 sys_clone3 sys_clone3
+437 common openat2 sys_openat2 sys_openat2
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index b5ed26c4c005..a7185cc18626 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -438,3 +438,4 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
# 435 reserved for clone3
+437 common openat2 sys_openat2
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 8c8cc7537fb2..b11c19552022 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -481,3 +481,4 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
# 435 reserved for clone3
+437 common openat2 sys_openat2
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 3fe02546aed3..e5c022e9a5c4 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -440,3 +440,4 @@
433 i386 fspick sys_fspick __ia32_sys_fspick
434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open
435 i386 clone3 sys_clone3 __ia32_sys_clone3
+437 i386 openat2 sys_openat2 __ia32_sys_openat2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index c29976eca4a8..9035647ef236 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -357,6 +357,7 @@
433 common fspick __x64_sys_fspick
434 common pidfd_open __x64_sys_pidfd_open
435 common clone3 __x64_sys_clone3/ptregs
+437 common openat2 __x64_sys_openat2

#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 25f4de729a6d..f0a68013c038 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -406,3 +406,4 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3
+437 common openat2 sys_openat2
diff --git a/fs/open.c b/fs/open.c
index a45fd4cbda1f..d68e4743d705 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -955,19 +955,27 @@ struct file *open_with_fake_path(const struct path *path, int flags,
}
EXPORT_SYMBOL(open_with_fake_path);

-static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
+static inline int build_open_flags(const struct open_how *how,
+ struct open_flags *op)
{
+ int flags = how->flags;
int lookup_flags = 0;
+ int opath_mask = 0;
int acc_mode = ACC_MODE(flags);

/*
- * Clear out all open flags we don't know about so that we don't report
- * them in fcntl(F_GETFD) or similar interfaces.
+ * Older syscalls still clear these bits before calling
+ * build_open_flags(), but openat2(2) checks all its arguments.
*/
- flags &= VALID_OPEN_FLAGS;
+ if (flags & ~VALID_OPEN_FLAGS)
+ return -EINVAL;
+ if (how->resolve & ~VALID_RESOLVE_FLAGS)
+ return -EINVAL;
+ if (!(how->flags & (O_PATH | O_CREAT | __O_TMPFILE)) && how->mode != 0)
+ return -EINVAL;

if (flags & (O_CREAT | __O_TMPFILE))
- op->mode = (mode & S_IALLUGO) | S_IFREG;
+ op->mode = (how->mode & S_IALLUGO) | S_IFREG;
else
op->mode = 0;

@@ -995,6 +1003,14 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
*/
flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
acc_mode = 0;
+
+ /* Allow userspace to restrict the re-opening of O_PATH fds. */
+ if (how->upgrade_mask & ~VALID_UPGRADE_FLAGS)
+ return -EINVAL;
+ if (!(how->upgrade_mask & UPGRADE_NOREAD))
+ opath_mask |= FMODE_PATH_READ;
+ if (!(how->upgrade_mask & UPGRADE_NOWRITE))
+ opath_mask |= FMODE_PATH_WRITE;
}

op->open_flag = flags;
@@ -1010,8 +1026,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o

op->acc_mode = acc_mode;
op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
- /* For O_PATH backwards-compatibility we default to an all-set mask. */
- op->opath_mask = FMODE_PATH_READ | FMODE_PATH_WRITE;
+ op->opath_mask = opath_mask;

if (flags & O_CREAT) {
op->intent |= LOOKUP_CREATE;
@@ -1025,6 +1040,18 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
lookup_flags |= LOOKUP_FOLLOW;
if (flags & O_EMPTYPATH)
lookup_flags |= LOOKUP_EMPTY;
+
+ if (how->resolve & RESOLVE_NO_XDEV)
+ lookup_flags |= LOOKUP_NO_XDEV;
+ if (how->resolve & RESOLVE_NO_MAGICLINKS)
+ lookup_flags |= LOOKUP_NO_MAGICLINKS;
+ if (how->resolve & RESOLVE_NO_SYMLINKS)
+ lookup_flags |= LOOKUP_NO_SYMLINKS;
+ if (how->resolve & RESOLVE_BENEATH)
+ lookup_flags |= LOOKUP_BENEATH;
+ if (how->resolve & RESOLVE_IN_ROOT)
+ lookup_flags |= LOOKUP_IN_ROOT;
+
op->lookup_flags = lookup_flags;
return 0;
}
@@ -1043,8 +1070,11 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
struct file *file_open_name(struct filename *name, int flags, umode_t mode)
{
struct open_flags op;
- int err = build_open_flags(flags, mode, &op);
- return err ? ERR_PTR(err) : do_filp_open(AT_FDCWD, name, &op);
+ struct open_how how = OPEN_HOW_FROM(flags, mode);
+ int err = build_open_flags(&how, &op);
+ if (err)
+ return ERR_PTR(err);
+ return do_filp_open(AT_FDCWD, name, &op);
}

/**
@@ -1075,17 +1105,19 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
const char *filename, int flags, umode_t mode)
{
struct open_flags op;
- int err = build_open_flags(flags, mode, &op);
+ struct open_how how = OPEN_HOW_FROM(flags, mode);
+ int err = build_open_flags(&how, &op);
if (err)
return ERR_PTR(err);
return do_file_open_root(dentry, mnt, filename, &op);
}
EXPORT_SYMBOL(file_open_root);

-long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
+long do_sys_open(int dfd, const char __user *filename,
+ struct open_how *how)
{
struct open_flags op;
- int fd = build_open_flags(flags, mode, &op);
+ int fd = build_open_flags(how, &op);
int empty = 0;
struct filename *tmp;

@@ -1098,7 +1130,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
if (!empty)
op.open_flag &= ~O_EMPTYPATH;

- fd = get_unused_fd_flags(flags);
+ fd = get_unused_fd_flags(how->flags);
if (fd >= 0) {
struct file *f = do_filp_open(dfd, tmp, &op);
if (IS_ERR(f)) {
@@ -1115,19 +1147,37 @@ long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)

SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
{
- if (force_o_largefile())
- flags |= O_LARGEFILE;
-
- return do_sys_open(AT_FDCWD, filename, flags, mode);
+ return ksys_open(filename, flags, mode);
}

SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
umode_t, mode)
{
+ struct open_how how = OPEN_HOW_FROM(flags, mode);
+
+ if (force_o_largefile())
+ how.flags |= O_LARGEFILE;
+
+ return do_sys_open(dfd, filename, &how);
+}
+
+SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename,
+ const struct open_how __user *, how, size_t, usize)
+{
+ int err;
+ struct open_how tmp;
+
+ if (unlikely(usize < OPEN_HOW_SIZE_VER0))
+ return -EINVAL;
+
+ err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize);
+ if (err)
+ return err;
+
if (force_o_largefile())
- flags |= O_LARGEFILE;
+ tmp.flags |= O_LARGEFILE;

- return do_sys_open(dfd, filename, flags, mode);
+ return do_sys_open(dfd, filename, &tmp);
}

#ifdef CONFIG_COMPAT
@@ -1137,7 +1187,8 @@ SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
*/
COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
{
- return do_sys_open(AT_FDCWD, filename, flags, mode);
+ struct open_how how = OPEN_HOW_FROM(flags, mode);
+ return do_sys_open(AT_FDCWD, filename, &how);
}

/*
@@ -1146,7 +1197,8 @@ COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t,
*/
COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
{
- return do_sys_open(dfd, filename, flags, mode);
+ struct open_how how = OPEN_HOW_FROM(flags, mode);
+ return do_sys_open(dfd, filename, &how);
}
#endif

diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index 2868ae6c8fc1..66125211caba 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -4,13 +4,30 @@

#include <uapi/linux/fcntl.h>

-/* list of all valid flags for the open/openat flags argument: */
+/* Should open_how.mode be set for older syscalls wrappers? */
+#define OPEN_HOW_MODE(flags, mode) \
+ (((flags) & (O_CREAT | __O_TMPFILE)) ? (mode) : 0)
+/* Convert older syscall (flags, mode) arguments to open_how. */
+#define OPEN_HOW_FROM(flags, mode) \
+ { .flags = (flags) & VALID_OPEN_FLAGS, \
+ .mode = OPEN_HOW_MODE((flags), (mode)) }
+
+/* List of all valid flags for the open/openat flags argument: */
#define VALID_OPEN_FLAGS \
(O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | \
O_APPEND | O_NDELAY | O_NONBLOCK | O_NDELAY | __O_SYNC | O_DSYNC | \
FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \
O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE | O_EMPTYPATH)

+/* List of all valid flags for the how->upgrade_mask argument: */
+#define VALID_UPGRADE_FLAGS \
+ (UPGRADE_NOWRITE | UPGRADE_NOREAD)
+
+/* List of all valid flags for the how->resolve argument: */
+#define VALID_RESOLVE_FLAGS \
+ (RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS | \
+ RESOLVE_BENEATH | RESOLVE_IN_ROOT)
+
#ifndef force_o_largefile
#define force_o_largefile() (!IS_ENABLED(CONFIG_ARCH_32BIT_OFF_T))
#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 70b90162e224..0df1c6c2d1e5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2525,8 +2525,8 @@ extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
struct file *filp);
extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
loff_t len);
-extern long do_sys_open(int dfd, const char __user *filename, int flags,
- umode_t mode);
+extern long do_sys_open(int dfd, const char __user *filename,
+ struct open_how *how);
extern struct file *file_open_name(struct filename *, int, umode_t);
extern struct file *filp_open(const char *, int, umode_t);
extern struct file *file_open_root(struct dentry *, struct vfsmount *,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index f7c561c4dcdd..3a5879eb1b82 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -69,6 +69,7 @@ struct rseq;
union bpf_attr;
struct io_uring_params;
struct clone_args;
+struct open_how;

#include <linux/types.h>
#include <linux/aio_abi.h>
@@ -439,6 +440,8 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
umode_t mode);
+asmlinkage long sys_openat2(int dfd, const char __user *filename,
+ const struct open_how *how, size_t size);
asmlinkage long sys_close(unsigned int fd);
asmlinkage long sys_vhangup(void);

@@ -1374,15 +1377,18 @@ static inline int ksys_close(unsigned int fd)
return __close_fd(current->files, fd);
}

-extern long do_sys_open(int dfd, const char __user *filename, int flags,
- umode_t mode);
+extern long do_sys_open(int dfd, const char __user *filename,
+ struct open_how *how);

static inline long ksys_open(const char __user *filename, int flags,
umode_t mode)
{
+ struct open_how how = OPEN_HOW_FROM(flags, mode);
+
if (force_o_largefile())
- flags |= O_LARGEFILE;
- return do_sys_open(AT_FDCWD, filename, flags, mode);
+ how.flags |= O_LARGEFILE;
+
+ return do_sys_open(AT_FDCWD, filename, &how);
}

extern long do_sys_truncate(const char __user *pathname, loff_t length);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 1fc8faa6e973..d4122c091472 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -851,8 +851,11 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
__SYSCALL(__NR_clone3, sys_clone3)
#endif

+#define __NR_openat2 437
+__SYSCALL(__NR_openat2, sys_openat2)
+
#undef __NR_syscalls
-#define __NR_syscalls 436
+#define __NR_syscalls 438

/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 1d338357df8a..5dee7540aee1 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -93,5 +93,47 @@

#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */

+/*
+ * Arguments for how openat2(2) should open the target path. If @resolve is
+ * zero, then openat2(2) operates identically to openat(2).
+ *
+ * However, unlike openat(2), unknown bits in @flags result in -EINVAL rather
+ * than being silently ignored. In addition, @mode (or @upgrade_mask) must be
+ * zero unless one of {O_CREAT, O_TMPFILE, O_PATH} are set.
+ *
+ * @flags: O_* flags.
+ * @mode: O_CREAT/O_TMPFILE file mode.
+ * @upgrade_mask: UPGRADE_* flags (to restrict O_PATH re-opening).
+ * @resolve: RESOLVE_* flags.
+ */
+struct open_how {
+ __u32 flags;
+ union {
+ __u16 mode;
+ __u16 upgrade_mask;
+ };
+ __u16 resolve;
+};
+
+#define OPEN_HOW_SIZE_VER0 8 /* sizeof first published struct */
+
+/* how->resolve flags for openat2(2). */
+#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings
+ (includes bind-mounts). */
+#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style
+ "magic-links". */
+#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks
+ (implies OEXT_NO_MAGICLINKS) */
+#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like
+ "..", symlinks, and absolute
+ paths which escape the dirfd. */
+#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
+ be scoped inside the dirfd
+ (similar to chroot(2)). */
+
+/* how->upgrade flags for openat2(2). */
+/* First bit is reserved for a future UPGRADE_NOEXEC flag. */
+#define UPGRADE_NOREAD 0x02 /* Block re-opening with MAY_READ. */
+#define UPGRADE_NOWRITE 0x04 /* Block re-opening with MAY_WRITE. */

#endif /* _UAPI_LINUX_FCNTL_H */
--
2.23.0


2019-10-01 00:26:06

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v13 7/9] open: openat2(2) syscall

Hi Aleksa,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[cannot apply to v5.4-rc1 next-20190930]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url: https://github.com/0day-ci/linux/commits/Aleksa-Sarai/namei-openat2-2-path-resolution-restrictions/20191001-025628
config: x86_64-randconfig-s0-201939 (attached as .config)
compiler: gcc-5 (Ubuntu 5.5.0-12ubuntu1) 5.5.0 20171010
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <[email protected]>

All errors (new ones prefixed by >>):

fs/open.c: In function '__do_sys_openat2':
>> fs/open.c:1173:8: error: implicit declaration of function 'copy_struct_from_user' [-Werror=implicit-function-declaration]
err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize);
^
cc1: some warnings being treated as errors

vim +/copy_struct_from_user +1173 fs/open.c

1163
1164 SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename,
1165 const struct open_how __user *, how, size_t, usize)
1166 {
1167 int err;
1168 struct open_how tmp;
1169
1170 if (unlikely(usize < OPEN_HOW_SIZE_VER0))
1171 return -EINVAL;
1172
> 1173 err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize);
1174 if (err)
1175 return err;
1176
1177 if (force_o_largefile())
1178 tmp.flags |= O_LARGEFILE;
1179
1180 return do_sys_open(dfd, filename, &tmp);
1181 }
1182

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (1.90 kB)
.config.gz (30.06 kB)
Download all attachments

2019-10-01 05:07:40

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v13 7/9] open: openat2(2) syscall

Hi Aleksa,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[cannot apply to v5.4-rc1 next-20191001]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url: https://github.com/0day-ci/linux/commits/Aleksa-Sarai/namei-openat2-2-path-resolution-restrictions/20191001-025628
reproduce:
# apt-get install sparse
# sparse version: v0.6.1-rc1-37-gd466a02-dirty
make ARCH=x86_64 allmodconfig
make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <[email protected]>


sparse warnings: (new ones prefixed by >>)

fs/open.c:757:13: sparse: sparse: restricted fmode_t degrades to integer
fs/open.c:983:18: sparse: sparse: restricted fmode_t degrades to integer
>> fs/open.c:1011:36: sparse: sparse: invalid assignment: |=
>> fs/open.c:1011:36: sparse: left side has type int
>> fs/open.c:1011:36: sparse: right side has type restricted fmode_t
fs/open.c:1013:36: sparse: sparse: invalid assignment: |=
fs/open.c:1013:36: sparse: left side has type int
fs/open.c:1013:36: sparse: right side has type restricted fmode_t
>> fs/open.c:1029:24: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted fmode_t [usertype] opath_mask @@ got pe] opath_mask @@
>> fs/open.c:1029:24: sparse: expected restricted fmode_t [usertype] opath_mask
>> fs/open.c:1029:24: sparse: got int opath_mask
>> fs/open.c:1011:36: sparse: sparse: invalid assignment: |=
>> fs/open.c:1011:36: sparse: left side has type int
>> fs/open.c:1011:36: sparse: right side has type restricted fmode_t
fs/open.c:1013:36: sparse: sparse: invalid assignment: |=
fs/open.c:1013:36: sparse: left side has type int
fs/open.c:1013:36: sparse: right side has type restricted fmode_t
>> fs/open.c:1029:24: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted fmode_t [usertype] opath_mask @@ got pe] opath_mask @@
>> fs/open.c:1029:24: sparse: expected restricted fmode_t [usertype] opath_mask
>> fs/open.c:1029:24: sparse: got int opath_mask
>> fs/open.c:1011:36: sparse: sparse: invalid assignment: |=
>> fs/open.c:1011:36: sparse: left side has type int
>> fs/open.c:1011:36: sparse: right side has type restricted fmode_t
fs/open.c:1013:36: sparse: sparse: invalid assignment: |=
fs/open.c:1013:36: sparse: left side has type int
fs/open.c:1013:36: sparse: right side has type restricted fmode_t
>> fs/open.c:1029:24: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted fmode_t [usertype] opath_mask @@ got pe] opath_mask @@
>> fs/open.c:1029:24: sparse: expected restricted fmode_t [usertype] opath_mask
>> fs/open.c:1029:24: sparse: got int opath_mask
fs/open.c:1173:15: sparse: sparse: undefined identifier 'copy_struct_from_user'

vim +1011 fs/open.c

957
958 static inline int build_open_flags(const struct open_how *how,
959 struct open_flags *op)
960 {
961 int flags = how->flags;
962 int lookup_flags = 0;
963 int opath_mask = 0;
964 int acc_mode = ACC_MODE(flags);
965
966 /*
967 * Older syscalls still clear these bits before calling
968 * build_open_flags(), but openat2(2) checks all its arguments.
969 */
970 if (flags & ~VALID_OPEN_FLAGS)
971 return -EINVAL;
972 if (how->resolve & ~VALID_RESOLVE_FLAGS)
973 return -EINVAL;
974 if (!(how->flags & (O_PATH | O_CREAT | __O_TMPFILE)) && how->mode != 0)
975 return -EINVAL;
976
977 if (flags & (O_CREAT | __O_TMPFILE))
978 op->mode = (how->mode & S_IALLUGO) | S_IFREG;
979 else
980 op->mode = 0;
981
982 /* Must never be set by userspace */
> 983 flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
984
985 /*
986 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
987 * check for O_DSYNC if the need any syncing at all we enforce it's
988 * always set instead of having to deal with possibly weird behaviour
989 * for malicious applications setting only __O_SYNC.
990 */
991 if (flags & __O_SYNC)
992 flags |= O_DSYNC;
993
994 if (flags & __O_TMPFILE) {
995 if ((flags & O_TMPFILE_MASK) != O_TMPFILE)
996 return -EINVAL;
997 if (!(acc_mode & MAY_WRITE))
998 return -EINVAL;
999 } else if (flags & O_PATH) {
1000 /*
1001 * If we have O_PATH in the open flag. Then we
1002 * cannot have anything other than the below set of flags
1003 */
1004 flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
1005 acc_mode = 0;
1006
1007 /* Allow userspace to restrict the re-opening of O_PATH fds. */
1008 if (how->upgrade_mask & ~VALID_UPGRADE_FLAGS)
1009 return -EINVAL;
1010 if (!(how->upgrade_mask & UPGRADE_NOREAD))
> 1011 opath_mask |= FMODE_PATH_READ;
1012 if (!(how->upgrade_mask & UPGRADE_NOWRITE))
1013 opath_mask |= FMODE_PATH_WRITE;
1014 }
1015
1016 op->open_flag = flags;
1017
1018 /* O_TRUNC implies we need access checks for write permissions */
1019 if (flags & O_TRUNC)
1020 acc_mode |= MAY_WRITE;
1021
1022 /* Allow the LSM permission hook to distinguish append
1023 access from general write access. */
1024 if (flags & O_APPEND)
1025 acc_mode |= MAY_APPEND;
1026
1027 op->acc_mode = acc_mode;
1028 op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
> 1029 op->opath_mask = opath_mask;
1030
1031 if (flags & O_CREAT) {
1032 op->intent |= LOOKUP_CREATE;
1033 if (flags & O_EXCL)
1034 op->intent |= LOOKUP_EXCL;
1035 }
1036
1037 if (flags & O_DIRECTORY)
1038 lookup_flags |= LOOKUP_DIRECTORY;
1039 if (!(flags & O_NOFOLLOW))
1040 lookup_flags |= LOOKUP_FOLLOW;
1041 if (flags & O_EMPTYPATH)
1042 lookup_flags |= LOOKUP_EMPTY;
1043
1044 if (how->resolve & RESOLVE_NO_XDEV)
1045 lookup_flags |= LOOKUP_NO_XDEV;
1046 if (how->resolve & RESOLVE_NO_MAGICLINKS)
1047 lookup_flags |= LOOKUP_NO_MAGICLINKS;
1048 if (how->resolve & RESOLVE_NO_SYMLINKS)
1049 lookup_flags |= LOOKUP_NO_SYMLINKS;
1050 if (how->resolve & RESOLVE_BENEATH)
1051 lookup_flags |= LOOKUP_BENEATH;
1052 if (how->resolve & RESOLVE_IN_ROOT)
1053 lookup_flags |= LOOKUP_IN_ROOT;
1054
1055 op->lookup_flags = lookup_flags;
1056 return 0;
1057 }
1058

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation