This patch-set implements the OA2_INHERIT_CRED flag for openat2() syscall.
It is needed to perform an open operation with the creds that were in
effect when the dir_fd was opened. This allows the process to pre-open
some dirs and switch eUID (and other UIDs/GIDs) to the less-privileged
user, while still retaining the possibility to open/create files within
the pre-opened directory set.
Changes in v2:
- capture full struct cred instead of just fsuid/fsgid.
Suggested by Stefan Metzmacher <[email protected]>
Stas Sergeev (2):
fs: reorganize path_openat()
openat2: add OA2_INHERIT_CRED flag
fs/internal.h | 2 +-
fs/namei.c | 52 +++++++++++++++++++++++++++++-------
fs/open.c | 2 +-
include/linux/fcntl.h | 2 ++
include/uapi/linux/openat2.h | 3 +++
5 files changed, 50 insertions(+), 11 deletions(-)
--
2.44.0
This patch moves the call to alloc_empty_file() below the call to
path_init(). That changes is needed for the next patch, which adds
a cred override for alloc_empty_file(). The needed cred info is only
available after the call to path_init().
No functional changes are intended by that patch.
Signed-off-by: Stas Sergeev <[email protected]>
CC: Eric Biederman <[email protected]>
CC: Alexander Viro <[email protected]>
CC: Christian Brauner <[email protected]>
CC: Jan Kara <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: [email protected]
CC: [email protected]
---
fs/namei.c | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index c5b2a25be7d0..2fde2c320ae9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3782,22 +3782,30 @@ static struct file *path_openat(struct nameidata *nd,
struct file *file;
int error;
- file = alloc_empty_file(op->open_flag, current_cred());
- if (IS_ERR(file))
- return file;
-
- if (unlikely(file->f_flags & __O_TMPFILE)) {
+ if (unlikely(op->open_flag & __O_TMPFILE)) {
+ file = alloc_empty_file(op->open_flag, current_cred());
+ if (IS_ERR(file))
+ return file;
error = do_tmpfile(nd, flags, op, file);
- } else if (unlikely(file->f_flags & O_PATH)) {
+ } else if (unlikely(op->open_flag & O_PATH)) {
+ file = alloc_empty_file(op->open_flag, current_cred());
+ if (IS_ERR(file))
+ return file;
error = do_o_path(nd, flags, file);
} else {
const char *s = path_init(nd, flags);
- while (!(error = link_path_walk(s, nd)) &&
- (s = open_last_lookups(nd, file, op)) != NULL)
- ;
+ file = alloc_empty_file(op->open_flag, current_cred());
+ error = PTR_ERR_OR_ZERO(file);
+ if (!error) {
+ while (!(error = link_path_walk(s, nd)) &&
+ (s = open_last_lookups(nd, file, op)) != NULL)
+ ;
+ }
if (!error)
error = do_open(nd, file, op);
terminate_walk(nd);
+ if (IS_ERR(file))
+ return file;
}
if (likely(!error)) {
if (likely(file->f_mode & FMODE_OPENED))
--
2.44.0
This flag performs the open operation with the credentials that
were in effect when dir_fd was opened.
This allows the process to pre-open some directories and then
change eUID (and all other UIDs/GIDs) to a less-privileged user,
retaining the ability to open/create files within these directories.
Design goal:
The idea is to provide a very light-weight sandboxing, where the
process, without the use of any heavy-weight techniques like chroot
within namespaces, can restrict the access to the set of pre-opened
directories.
This patch is just a first step to such sandboxing. If things go
well, in the future the same extension can be added to more syscalls.
These should include at least unlinkat(), renameat2() and the
not-yet-upstreamed setxattrat().
Security considerations:
To avoid sandboxing escape, this patch makes sure the restricted
lookup modes are used. Namely, RESOLVE_BENEATH or RESOLVE_IN_ROOT.
To avoid leaking creds across exec, this patch requires O_CLOEXEC
flag on a directory.
Use cases:
Virtual machines that deal with untrusted code, can use that
instead of a more heavy-weighted approaches.
Currently the approach is being tested on a dosemu2 VM.
Signed-off-by: Stas Sergeev <[email protected]>
CC: Stefan Metzmacher <[email protected]>
CC: Eric Biederman <[email protected]>
CC: Alexander Viro <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Christian Brauner <[email protected]>
CC: Jan Kara <[email protected]>
CC: Jeff Layton <[email protected]>
CC: Chuck Lever <[email protected]>
CC: Alexander Aring <[email protected]>
CC: [email protected]
CC: [email protected]
CC: Paolo Bonzini <[email protected]>
CC: Christian Göttsche <[email protected]>
---
fs/internal.h | 2 +-
fs/namei.c | 30 ++++++++++++++++++++++++++++--
fs/open.c | 2 +-
include/linux/fcntl.h | 2 ++
include/uapi/linux/openat2.h | 3 +++
5 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/fs/internal.h b/fs/internal.h
index 7ca738904e34..692b53b19aad 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -169,7 +169,7 @@ static inline void sb_end_ro_state_change(struct super_block *sb)
* open.c
*/
struct open_flags {
- int open_flag;
+ u64 open_flag;
umode_t mode;
int acc_mode;
int intent;
diff --git a/fs/namei.c b/fs/namei.c
index 2fde2c320ae9..0e0f2e32ef02 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -586,6 +586,7 @@ struct nameidata {
int dfd;
vfsuid_t dir_vfsuid;
umode_t dir_mode;
+ const struct cred *dir_open_cred;
} __randomize_layout;
#define ND_ROOT_PRESET 1
@@ -695,6 +696,7 @@ static void terminate_walk(struct nameidata *nd)
nd->depth = 0;
nd->path.mnt = NULL;
nd->path.dentry = NULL;
+ put_cred(nd->dir_open_cred);
}
/* path_put is needed afterwards regardless of success or failure */
@@ -2414,6 +2416,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
get_fs_pwd(current->fs, &nd->path);
nd->inode = nd->path.dentry->d_inode;
}
+ nd->dir_open_cred = get_current_cred();
} else {
/* Caller must check execute permissions on the starting path component */
struct fd f = fdget_raw(nd->dfd);
@@ -2437,6 +2440,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
path_get(&nd->path);
nd->inode = nd->path.dentry->d_inode;
}
+ nd->dir_open_cred = get_cred(f.file->f_cred);
fdput(f);
}
@@ -3794,8 +3798,28 @@ static struct file *path_openat(struct nameidata *nd,
error = do_o_path(nd, flags, file);
} else {
const char *s = path_init(nd, flags);
- file = alloc_empty_file(op->open_flag, current_cred());
- error = PTR_ERR_OR_ZERO(file);
+ const struct cred *old_cred = NULL;
+
+ error = 0;
+ if (op->open_flag & OA2_INHERIT_CRED) {
+ /* Make sure to work only with restricted
+ * look-up modes.
+ */
+ if (!(nd->flags & (LOOKUP_BENEATH | LOOKUP_IN_ROOT)))
+ error = -EPERM;
+ /* Only work with O_CLOEXEC dirs. */
+ if (!get_close_on_exec(nd->dfd))
+ error = -EPERM;
+
+ if (!error)
+ old_cred = override_creds(nd->dir_open_cred);
+ }
+ if (!error) {
+ file = alloc_empty_file(op->open_flag, current_cred());
+ error = PTR_ERR_OR_ZERO(file);
+ } else {
+ file = ERR_PTR(error);
+ }
if (!error) {
while (!(error = link_path_walk(s, nd)) &&
(s = open_last_lookups(nd, file, op)) != NULL)
@@ -3803,6 +3827,8 @@ static struct file *path_openat(struct nameidata *nd,
}
if (!error)
error = do_open(nd, file, op);
+ if (old_cred)
+ revert_creds(old_cred);
terminate_walk(nd);
if (IS_ERR(file))
return file;
diff --git a/fs/open.c b/fs/open.c
index ee8460c83c77..6be013182a35 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1225,7 +1225,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
* values before calling build_open_flags(), but openat2(2) checks all
* of its arguments.
*/
- if (flags & ~VALID_OPEN_FLAGS)
+ if (flags & ~VALID_OPENAT2_FLAGS)
return -EINVAL;
if (how->resolve & ~VALID_RESOLVE_FLAGS)
return -EINVAL;
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index a332e79b3207..b71f8b162102 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -12,6 +12,8 @@
FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \
O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE)
+#define VALID_OPENAT2_FLAGS (VALID_OPEN_FLAGS | OA2_INHERIT_CRED)
+
/* List of all valid flags for the how->resolve argument: */
#define VALID_RESOLVE_FLAGS \
(RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS | \
diff --git a/include/uapi/linux/openat2.h b/include/uapi/linux/openat2.h
index a5feb7604948..cdd676a10b62 100644
--- a/include/uapi/linux/openat2.h
+++ b/include/uapi/linux/openat2.h
@@ -40,4 +40,7 @@ struct open_how {
return -EAGAIN if that's not
possible. */
+/* openat2-specific flags go to upper 4 bytes. */
+#define OA2_INHERIT_CRED (1ULL << 32)
+
#endif /* _UAPI_LINUX_OPENAT2_H */
--
2.44.0
Am 23.04.24 um 12:48 schrieb Stas Sergeev:
> This patch-set implements the OA2_INHERIT_CRED flag for openat2() syscall.
> It is needed to perform an open operation with the creds that were in
> effect when the dir_fd was opened. This allows the process to pre-open
> some dirs and switch eUID (and other UIDs/GIDs) to the less-privileged
> user, while still retaining the possibility to open/create files within
> the pre-opened directory set.
>
> Changes in v2:
> - capture full struct cred instead of just fsuid/fsgid.
> Suggested by Stefan Metzmacher <[email protected]>
>
> CC: Stefan Metzmacher <[email protected]>
> CC: Eric Biederman <[email protected]>
> CC: Alexander Viro <[email protected]>
> CC: Andy Lutomirski <[email protected]>
> CC: Christian Brauner <[email protected]>
> CC: Jan Kara <[email protected]>
> CC: Jeff Layton <[email protected]>
> CC: Chuck Lever <[email protected]>
> CC: Alexander Aring <[email protected]>
> CC: [email protected]
> CC: [email protected]
I guess this is something that should cc [email protected] ...
metze
23.04.2024 13:58, Stefan Metzmacher пишет:
>
> I guess this is something that should cc [email protected] ...
Done, thanks.