Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933278AbdCKVr0 (ORCPT ); Sat, 11 Mar 2017 16:47:26 -0500 Received: from mail-pf0-f194.google.com ([209.85.192.194]:32910 "EHLO mail-pf0-f194.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932675AbdCKVrP (ORCPT ); Sat, 11 Mar 2017 16:47:15 -0500 From: Eric Biggers To: linux-fsdevel@vger.kernel.org Cc: Al Viro , David Howells , linux-kernel@vger.kernel.org, Eric Biggers Subject: [PATCH v2] statx: optimize copy of struct statx to userspace Date: Sat, 11 Mar 2017 13:45:55 -0800 Message-Id: <20170311214555.941-1-ebiggers3@gmail.com> X-Mailer: git-send-email 2.12.0 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5125 Lines: 144 From: Eric Biggers I found that statx() was significantly slower than stat(). As a microbenchmark, I compared 10,000,000 invocations of fstat() on a tmpfs file to the same with statx() passed a NULL path: $ time ./stat_benchmark real 0m1.464s user 0m0.275s sys 0m1.187s $ time ./statx_benchmark real 0m5.530s user 0m0.281s sys 0m5.247s statx is expected to be a little slower than stat because struct statx is larger than struct stat, but not by *that* much. It turns out that most of the overhead was in copying struct statx to userspace, apparently mostly in all the stac/clac instructions that got generated for each __put_user() call. (This was on x86_64, but some other architectures, e.g. arm64, have something similar now too.) stat() instead initializes its struct on the stack and copies it to userspace with a single call to copy_to_user(). This turns out to be much faster, and changing statx to do this makes it almost as fast as stat: $ time ./statx_benchmark real 0m1.573s user 0m0.229s sys 0m1.344s Signed-off-by: Eric Biggers --- fs/stat.c | 72 +++++++++++++++++++++++++++++---------------------------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/fs/stat.c b/fs/stat.c index fa0be59340cc..5cc267ec7865 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -509,46 +509,41 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename, } #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ -static inline int __put_timestamp(struct timespec *kts, - struct statx_timestamp __user *uts) +static inline void init_statx_timestamp(struct statx_timestamp *uts, + const struct timespec *kts) { - return (__put_user(kts->tv_sec, &uts->tv_sec ) || - __put_user(kts->tv_nsec, &uts->tv_nsec ) || - __put_user(0, &uts->__reserved )); + uts->tv_sec = kts->tv_sec; + uts->tv_nsec = kts->tv_nsec; + uts->__reserved = 0; } -/* - * Set the statx results. - */ -static long statx_set_result(struct kstat *stat, struct statx __user *buffer) +static int cp_statx(const struct kstat *stat, struct statx __user *buffer) { - uid_t uid = from_kuid_munged(current_user_ns(), stat->uid); - gid_t gid = from_kgid_munged(current_user_ns(), stat->gid); - - if (__put_user(stat->result_mask, &buffer->stx_mask ) || - __put_user(stat->mode, &buffer->stx_mode ) || - __clear_user(&buffer->__spare0, sizeof(buffer->__spare0)) || - __put_user(stat->nlink, &buffer->stx_nlink ) || - __put_user(uid, &buffer->stx_uid ) || - __put_user(gid, &buffer->stx_gid ) || - __put_user(stat->attributes, &buffer->stx_attributes ) || - __put_user(stat->blksize, &buffer->stx_blksize ) || - __put_user(MAJOR(stat->rdev), &buffer->stx_rdev_major ) || - __put_user(MINOR(stat->rdev), &buffer->stx_rdev_minor ) || - __put_user(MAJOR(stat->dev), &buffer->stx_dev_major ) || - __put_user(MINOR(stat->dev), &buffer->stx_dev_minor ) || - __put_timestamp(&stat->atime, &buffer->stx_atime ) || - __put_timestamp(&stat->btime, &buffer->stx_btime ) || - __put_timestamp(&stat->ctime, &buffer->stx_ctime ) || - __put_timestamp(&stat->mtime, &buffer->stx_mtime ) || - __put_user(stat->ino, &buffer->stx_ino ) || - __put_user(stat->size, &buffer->stx_size ) || - __put_user(stat->blocks, &buffer->stx_blocks ) || - __clear_user(&buffer->__spare1, sizeof(buffer->__spare1)) || - __clear_user(&buffer->__spare2, sizeof(buffer->__spare2))) - return -EFAULT; - - return 0; + struct statx tmp; + + tmp.stx_mask = stat->result_mask; + tmp.stx_blksize = stat->blksize; + tmp.stx_attributes = stat->attributes; + tmp.stx_nlink = stat->nlink; + tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid); + tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid); + tmp.stx_mode = stat->mode; + memset(tmp.__spare0, 0, sizeof(tmp.__spare0)); + tmp.stx_ino = stat->ino; + tmp.stx_size = stat->size; + tmp.stx_blocks = stat->blocks; + memset(tmp.__spare1, 0, sizeof(tmp.__spare1)); + init_statx_timestamp(&tmp.stx_atime, &stat->atime); + init_statx_timestamp(&tmp.stx_btime, &stat->btime); + init_statx_timestamp(&tmp.stx_ctime, &stat->ctime); + init_statx_timestamp(&tmp.stx_mtime, &stat->mtime); + tmp.stx_rdev_major = MAJOR(stat->rdev); + tmp.stx_rdev_minor = MINOR(stat->rdev); + tmp.stx_dev_major = MAJOR(stat->dev); + tmp.stx_dev_minor = MINOR(stat->dev); + memset(tmp.__spare2, 0, sizeof(tmp.__spare2)); + + return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; } /** @@ -572,8 +567,6 @@ SYSCALL_DEFINE5(statx, if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; - if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer))) - return -EFAULT; if (filename) error = vfs_statx(dfd, filename, flags, &stat, mask); @@ -581,7 +574,8 @@ SYSCALL_DEFINE5(statx, error = vfs_statx_fd(dfd, &stat, mask, flags); if (error) return error; - return statx_set_result(&stat, buffer); + + return cp_statx(&stat, buffer); } /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ -- 2.12.0