access(2) remains commonly used, for example on exec:
access("/etc/ld.so.preload", R_OK)
or when running gcc: strace -c gcc empty.c
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
0.00 0.000000 0 42 26 access
It falls down to do_faccessat without the AT_EACCESS flag, which in turn
results in allocation of new creds in order to modify fsuid/fsgid and
caps. This is a very expensive process single-threaded and most notably
multi-threaded, with numerous structures getting refed and unrefed on
imminent new cred destruction.
Turns out for typical consumers the resulting creds would be identical
and this can be checked upfront, avoiding the hard work.
An access benchmark plugged into will-it-scale running on Cascade Lake
shows:
test proc before after
access1 1 1310582 2908735 (+121%) # distinct files
access1 24 4716491 63822173 (+1353%) # distinct files
access2 24 2378041 5370335 (+125%) # same file
The above benchmarks are not integrated into will-it-scale, but can be
found in a pull request:
https://github.com/antonblanchard/will-it-scale/pull/36/files
Signed-off-by: Mateusz Guzik <[email protected]>
---
fs/open.c | 32 +++++++++++++++++++++++++++++++-
1 file changed, 31 insertions(+), 1 deletion(-)
diff --git a/fs/open.c b/fs/open.c
index 82c1a28b3308..c5bfc4e3df94 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -367,7 +367,37 @@ COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, compat_arg_u64_dual(offset
* access() needs to use the real uid/gid, not the effective uid/gid.
* We do this by temporarily clearing all FS-related capabilities and
* switching the fsuid/fsgid around to the real ones.
+ *
+ * Creating new credentials is expensive, so we try to skip doing it,
+ * which we can if the result would match what we already got.
*/
+static bool access_need_override_creds(int flags)
+{
+ const struct cred *cred;
+
+ if (flags & AT_EACCESS)
+ return false;
+
+ cred = current->cred;
+ if (!uid_eq(cred->fsuid, cred->uid) ||
+ !gid_eq(cred->fsgid, cred->gid))
+ return true;
+
+ if (!issecure(SECURE_NO_SETUID_FIXUP)) {
+ kuid_t root_uid = make_kuid(cred->user_ns, 0);
+ if (!uid_eq(cred->uid, root_uid)) {
+ if (!cap_isclear(cred->cap_effective))
+ return true;
+ } else {
+ if (!cap_isidentical(cred->cap_effective,
+ cred->cap_permitted))
+ return true;
+ }
+ }
+
+ return false;
+}
+
static const struct cred *access_override_creds(void)
{
const struct cred *old_cred;
@@ -436,7 +466,7 @@ static long do_faccessat(int dfd, const char __user *filename, int mode, int fla
if (flags & AT_EMPTY_PATH)
lookup_flags |= LOOKUP_EMPTY;
- if (!(flags & AT_EACCESS)) {
+ if (access_need_override_creds(flags)) {
old_cred = access_override_creds();
if (!old_cred)
return -ENOMEM;
--
2.34.1
Hi Mateusz,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on viro-vfs/for-next]
[also build test WARNING on linus/master v6.2-rc3 next-20230113]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Mateusz-Guzik/vfs-avoid-duplicating-creds-in-faccessat-if-possible/20230115-020359
base: https://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git for-next
patch link: https://lore.kernel.org/r/20230114180224.1777699-2-mjguzik%40gmail.com
patch subject: [PATCH 2/2] vfs: avoid duplicating creds in faccessat if possible
config: ia64-randconfig-s051-20230115
compiler: ia64-linux-gcc (GCC) 12.1.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.4-39-gce1a6720-dirty
# https://github.com/intel-lab-lkp/linux/commit/66a936fbf1bdfa33fa679f2906b306c426b7d0da
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Mateusz-Guzik/vfs-avoid-duplicating-creds-in-faccessat-if-possible/20230115-020359
git checkout 66a936fbf1bdfa33fa679f2906b306c426b7d0da
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=ia64 olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=ia64 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>
sparse warnings: (new ones prefixed by >>)
>> fs/open.c:381:14: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct cred const *cred @@ got struct cred const [noderef] __rcu *cred @@
fs/open.c:381:14: sparse: expected struct cred const *cred
fs/open.c:381:14: sparse: got struct cred const [noderef] __rcu *cred
fs/open.c:1151:21: sparse: sparse: restricted fmode_t degrades to integer
vim +381 fs/open.c
365
366 /*
367 * access() needs to use the real uid/gid, not the effective uid/gid.
368 * We do this by temporarily clearing all FS-related capabilities and
369 * switching the fsuid/fsgid around to the real ones.
370 *
371 * Creating new credentials is expensive, so we try to skip doing it,
372 * which we can if the result would match what we already got.
373 */
374 static bool access_need_override_creds(int flags)
375 {
376 const struct cred *cred;
377
378 if (flags & AT_EACCESS)
379 return false;
380
> 381 cred = current->cred;
382 if (!uid_eq(cred->fsuid, cred->uid) ||
383 !gid_eq(cred->fsgid, cred->gid))
384 return true;
385
386 if (!issecure(SECURE_NO_SETUID_FIXUP)) {
387 kuid_t root_uid = make_kuid(cred->user_ns, 0);
388 if (!uid_eq(cred->uid, root_uid)) {
389 if (!cap_isclear(cred->cap_effective))
390 return true;
391 } else {
392 if (!cap_isidentical(cred->cap_effective,
393 cred->cap_permitted))
394 return true;
395 }
396 }
397
398 return false;
399 }
400
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests