Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S965766AbbEMW2X (ORCPT ); Wed, 13 May 2015 18:28:23 -0400 Received: from zeniv.linux.org.uk ([195.92.253.2]:44949 "EHLO ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S965608AbbEMW0o (ORCPT ); Wed, 13 May 2015 18:26:44 -0400 From: Al Viro To: Linus Torvalds Cc: Neil Brown , Christoph Hellwig , linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org Subject: [PATCH 06/21] namei: be careful with mountpoint crossings in follow_dotdot_rcu() Date: Wed, 13 May 2015 23:26:00 +0100 Message-Id: <1431555975-25997-6-git-send-email-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 1.7.7.6 In-Reply-To: <20150513222533.GA24192@ZenIV.linux.org.uk> References: <20150513222533.GA24192@ZenIV.linux.org.uk> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 3792 Lines: 121 From: Al Viro Otherwise we are risking a hard error where nonlazy restart would be the right thing to do; it's a very narrow race with mount --move and most of the time it ends up being completely harmless, but it's possible to construct a case when we'll get a bogus hard error instead of falling back to non-lazy walk... For one thing, when crossing _into_ overmount of parent we need to check for mount_lock bumps when we get NULL from __lookup_mnt() as well. For another, and less exotically, we need to make sure that the data fetched in follow_up_rcu() had been consistent. ->mnt_mountpoint is pinned for as long as it is a mountpoint, but we need to check mount_lock after fetching to verify that. Signed-off-by: Al Viro --- fs/namei.c | 51 +++++++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index e736722..e907e4b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1017,21 +1017,6 @@ const char *get_link(struct nameidata *nd) return res; } -static int follow_up_rcu(struct path *path) -{ - struct mount *mnt = real_mount(path->mnt); - struct mount *parent; - struct dentry *mountpoint; - - parent = mnt->mnt_parent; - if (&parent->mnt == path->mnt) - return 0; - mountpoint = mnt->mnt_mountpoint; - path->dentry = mountpoint; - path->mnt = &parent->mnt; - return 1; -} - /* * follow_up - Find the mountpoint of path's vfsmount * @@ -1288,10 +1273,8 @@ static int follow_dotdot_rcu(struct nameidata *nd) set_root_rcu(nd); while (1) { - if (nd->path.dentry == nd->root.dentry && - nd->path.mnt == nd->root.mnt) { + if (path_equal(&nd->path, &nd->root)) break; - } if (nd->path.dentry != nd->path.mnt->mnt_root) { struct dentry *old = nd->path.dentry; struct dentry *parent = old->d_parent; @@ -1299,34 +1282,42 @@ static int follow_dotdot_rcu(struct nameidata *nd) inode = parent->d_inode; seq = read_seqcount_begin(&parent->d_seq); - if (read_seqcount_retry(&old->d_seq, nd->seq)) - goto failed; + if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq))) + return -ECHILD; nd->path.dentry = parent; nd->seq = seq; break; + } else { + struct mount *mnt = real_mount(nd->path.mnt); + struct mount *mparent = mnt->mnt_parent; + struct dentry *mountpoint = mnt->mnt_mountpoint; + struct inode *inode2 = mountpoint->d_inode; + unsigned seq = read_seqcount_begin(&mountpoint->d_seq); + if (unlikely(read_seqretry(&mount_lock, nd->m_seq))) + return -ECHILD; + if (&mparent->mnt == nd->path.mnt) + break; + /* we know that mountpoint was pinned */ + nd->path.dentry = mountpoint; + nd->path.mnt = &mparent->mnt; + inode = inode2; + nd->seq = seq; } - if (!follow_up_rcu(&nd->path)) - break; - inode = nd->path.dentry->d_inode; - nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } - while (d_mountpoint(nd->path.dentry)) { + while (unlikely(d_mountpoint(nd->path.dentry))) { struct mount *mounted; mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); + if (unlikely(read_seqretry(&mount_lock, nd->m_seq))) + return -ECHILD; if (!mounted) break; nd->path.mnt = &mounted->mnt; nd->path.dentry = mounted->mnt.mnt_root; inode = nd->path.dentry->d_inode; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); - if (read_seqretry(&mount_lock, nd->m_seq)) - goto failed; } nd->inode = inode; return 0; - -failed: - return -ECHILD; } /* -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/