2012-06-05 13:10:08

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 00/21] vfs: atomic open v6 (part 2)

This is part 2 of the atomic open series. It introduces i_op->atomic_open() and
converts filesystems that abuse ->lookup() and ->create() to use this new
interface instead.

This version has one bugfix and several cleanups, reported by David Howells.
Also updated documentation in Documentation/filesytems/{vfs.txt,Locking}.

Al, please apply.

git tree is here:

git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git atomic-open.v6

Thanks,
Miklos
---

Miklos Szeredi (21):
vfs: do_last(): inline lookup_slow()
vfs: do_last(): separate O_CREAT specific code
vfs: do_last(): common slow lookup
vfs: add lookup_open()
vfs: lookup_open(): expand lookup_hash()
vfs: add i_op->atomic_open()
nfs: implement i_op->atomic_open()
nfs: clean up ->create in nfs_rpc_ops
nfs: don't use nd->intent.open.flags
nfs: don't use intents for checking atomic open
fuse: implement i_op->atomic_open()
cifs: implement i_op->atomic_open()
ceph: remove unused arg from ceph_lookup_open()
ceph: implement i_op->atomic_open()
9p: implement i_op->atomic_open()
vfs: remove open intents from nameidata
vfs: do_last(): clean up error handling
vfs: do_last(): clean up labels
vfs: do_last(): clean up bool
vfs: do_last(): clean up retry
vfs: move O_DIRECT check to common code

---
Documentation/filesystems/Locking | 4 +
Documentation/filesystems/vfs.txt | 11 +
fs/9p/vfs_inode.c | 169 ++++++++-----
fs/9p/vfs_inode_dotl.c | 52 +++--
fs/ceph/dir.c | 68 ++++--
fs/ceph/file.c | 22 +-
fs/ceph/super.h | 6 +-
fs/cifs/cifsfs.c | 1 +
fs/cifs/cifsfs.h | 3 +
fs/cifs/dir.c | 437 ++++++++++++++++++---------------
fs/fuse/dir.c | 94 +++++--
fs/internal.h | 8 +-
fs/namei.c | 492 ++++++++++++++++++++++++++-----------
fs/nfs/dir.c | 242 ++++++++-----------
fs/nfs/nfs3proc.c | 2 +-
fs/nfs/nfs4proc.c | 37 +--
fs/nfs/proc.c | 2 +-
fs/open.c | 91 ++-----
include/linux/fs.h | 7 +
include/linux/namei.h | 11 -
include/linux/nfs_xdr.h | 2 +-
21 files changed, 1023 insertions(+), 738 deletions(-)


2012-06-05 13:10:12

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 01/21] vfs: do_last(): inline lookup_slow()

From: Miklos Szeredi <[email protected]>

Copy lookup_slow() into do_last().

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/namei.c | 17 +++++++++++++++--
1 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 7d69419..d6e8c55 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2245,9 +2245,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (error < 0)
goto exit;

- error = lookup_slow(nd, &nd->last, path);
- if (error < 0)
+ BUG_ON(nd->inode != dir->d_inode);
+
+ mutex_lock(&dir->d_inode->i_mutex);
+ dentry = __lookup_hash(&nd->last, dir, nd);
+ mutex_unlock(&dir->d_inode->i_mutex);
+ error = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
goto exit;
+ path->mnt = nd->path.mnt;
+ path->dentry = dentry;
+ error = follow_managed(path, nd->flags);
+ if (unlikely(error < 0))
+ goto exit_dput;
+
+ if (error)
+ nd->flags |= LOOKUP_JUMPED;

inode = path->dentry->d_inode;
}
--
1.7.7

2012-06-05 13:10:27

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 04/21] vfs: add lookup_open()

From: Miklos Szeredi <[email protected]>

Split out lookup + maybe create from do_last(). This is the part under i_mutex
protection.

The function is called lookup_open() and returns a filp even though the open
part is not used yet.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/namei.c | 99 +++++++++++++++++++++++++++++++++++++-----------------------
1 files changed, 61 insertions(+), 38 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 0a56b61..1153476 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2188,19 +2188,73 @@ static inline int open_to_namei_flags(int flag)
}

/*
+ * Lookup, maybe create and open the last component
+ *
+ * Must be called with i_mutex held on parent.
+ *
+ * Returns open file or NULL on success, error otherwise. NULL means no open
+ * was performed, only lookup.
+ */
+static struct file *lookup_open(struct nameidata *nd, struct path *path,
+ const struct open_flags *op,
+ int *want_write, bool *created)
+{
+ struct dentry *dir = nd->path.dentry;
+ struct dentry *dentry;
+ int error;
+
+ *created = false;
+ dentry = lookup_hash(nd);
+ if (IS_ERR(dentry))
+ return ERR_CAST(dentry);
+
+ /* Negative dentry, just create the file */
+ if (!dentry->d_inode && (op->open_flag & O_CREAT)) {
+ umode_t mode = op->mode;
+ if (!IS_POSIXACL(dir->d_inode))
+ mode &= ~current_umask();
+ /*
+ * This write is needed to ensure that a
+ * rw->ro transition does not occur between
+ * the time when the file is created and when
+ * a permanent write count is taken through
+ * the 'struct file' in nameidata_to_filp().
+ */
+ error = mnt_want_write(nd->path.mnt);
+ if (error)
+ goto out_dput;
+ *want_write = 1;
+ *created = true;
+ error = security_path_mknod(&nd->path, dentry, mode, 0);
+ if (error)
+ goto out_dput;
+ error = vfs_create(dir->d_inode, dentry, mode, nd);
+ if (error)
+ goto out_dput;
+ }
+ path->dentry = dentry;
+ path->mnt = nd->path.mnt;
+ return NULL;
+
+out_dput:
+ dput(dentry);
+ return ERR_PTR(error);
+}
+
+/*
* Handle the last step of open()
*/
static struct file *do_last(struct nameidata *nd, struct path *path,
const struct open_flags *op, const char *pathname)
{
struct dentry *dir = nd->path.dentry;
- struct dentry *dentry;
int open_flag = op->open_flag;
int will_truncate = open_flag & O_TRUNC;
int want_write = 0;
int acc_mode = op->acc_mode;
struct file *filp;
struct inode *inode;
+ bool created;
int symlink_ok = 0;
struct path save_parent = { .dentry = NULL, .mnt = NULL };
bool retried = false;
@@ -2268,53 +2322,24 @@ static struct file *do_last(struct nameidata *nd, struct path *path,

retry_lookup:
mutex_lock(&dir->d_inode->i_mutex);
+ filp = lookup_open(nd, path, op, &want_write, &created);
+ mutex_unlock(&dir->d_inode->i_mutex);

- dentry = lookup_hash(nd);
- error = PTR_ERR(dentry);
- if (IS_ERR(dentry)) {
- mutex_unlock(&dir->d_inode->i_mutex);
- goto exit;
- }
-
- path->dentry = dentry;
- path->mnt = nd->path.mnt;
+ if (IS_ERR(filp))
+ goto out;

- /* Negative dentry, just create the file */
- if (!dentry->d_inode && (open_flag & O_CREAT)) {
- umode_t mode = op->mode;
- if (!IS_POSIXACL(dir->d_inode))
- mode &= ~current_umask();
- /*
- * This write is needed to ensure that a
- * rw->ro transition does not occur between
- * the time when the file is created and when
- * a permanent write count is taken through
- * the 'struct file' in nameidata_to_filp().
- */
- error = mnt_want_write(nd->path.mnt);
- if (error)
- goto exit_mutex_unlock;
- want_write = 1;
+ if (created) {
/* Don't check for write permission, don't truncate */
open_flag &= ~O_TRUNC;
will_truncate = 0;
acc_mode = MAY_OPEN;
- error = security_path_mknod(&nd->path, dentry, mode, 0);
- if (error)
- goto exit_mutex_unlock;
- error = vfs_create(dir->d_inode, dentry, mode, nd);
- if (error)
- goto exit_mutex_unlock;
- mutex_unlock(&dir->d_inode->i_mutex);
- dput(nd->path.dentry);
- nd->path.dentry = dentry;
+ path_to_nameidata(path, nd);
goto common;
}

/*
* It already exists.
*/
- mutex_unlock(&dir->d_inode->i_mutex);
audit_inode(pathname, path->dentry);

error = -EEXIST;
@@ -2423,8 +2448,6 @@ out:
terminate_walk(nd);
return filp;

-exit_mutex_unlock:
- mutex_unlock(&dir->d_inode->i_mutex);
exit_dput:
path_put_conditional(path, nd);
exit:
--
1.7.7

2012-06-05 13:10:18

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 03/21] vfs: do_last(): common slow lookup

From: Miklos Szeredi <[email protected]>

Make the slow lookup part of O_CREAT and non-O_CREAT opens common.

This allows atomic_open to be hooked into the slow lookup part.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/namei.c | 27 +++++----------------------
1 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 73e824c..0a56b61 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2241,30 +2241,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
symlink_ok = 1;
/* we _can_ be in RCU mode here */
error = lookup_fast(nd, &nd->last, path, &inode);
- if (unlikely(error)) {
- if (error < 0)
- goto exit;
-
- BUG_ON(nd->inode != dir->d_inode);
-
- mutex_lock(&dir->d_inode->i_mutex);
- dentry = __lookup_hash(&nd->last, dir, nd);
- mutex_unlock(&dir->d_inode->i_mutex);
- error = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- goto exit;
- path->mnt = nd->path.mnt;
- path->dentry = dentry;
- error = follow_managed(path, nd->flags);
- if (unlikely(error < 0))
- goto exit_dput;
+ if (likely(!error))
+ goto finish_lookup;

- if (error)
- nd->flags |= LOOKUP_JUMPED;
+ if (error < 0)
+ goto exit;

- inode = path->dentry->d_inode;
- }
- goto finish_lookup;
+ BUG_ON(nd->inode != dir->d_inode);
} else {
/* create side of things */
/*
--
1.7.7

2012-06-05 13:10:36

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 05/21] vfs: lookup_open(): expand lookup_hash()

From: Miklos Szeredi <[email protected]>

Copy __lookup_hash() into lookup_open(). The next patch will insert the atomic
open call just before the real lookup.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/namei.c | 12 +++++++++++-
1 files changed, 11 insertions(+), 1 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 1153476..cd20ddc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2200,14 +2200,24 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
int *want_write, bool *created)
{
struct dentry *dir = nd->path.dentry;
+ struct inode *dir_inode = dir->d_inode;
struct dentry *dentry;
int error;
+ bool need_lookup;

*created = false;
- dentry = lookup_hash(nd);
+ dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup);
if (IS_ERR(dentry))
return ERR_CAST(dentry);

+ if (need_lookup) {
+ BUG_ON(dentry->d_inode);
+
+ dentry = lookup_real(dir_inode, dentry, nd);
+ if (IS_ERR(dentry))
+ return ERR_CAST(dentry);
+ }
+
/* Negative dentry, just create the file */
if (!dentry->d_inode && (op->open_flag & O_CREAT)) {
umode_t mode = op->mode;
--
1.7.7

2012-06-05 13:10:44

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 06/21] vfs: add i_op->atomic_open()

From: Miklos Szeredi <[email protected]>

Add a new inode operation which is called on the last component of an open.
Using this the filesystem can look up, possibly create and open the file in one
atomic operation. If it cannot perform this (e.g. the file type turned out to
be wrong) it may signal this by returning NULL instead of an open struct file
pointer.

i_op->atomic_open() is only called if the last component is negative or needs
lookup. Handling cached positive dentries here doesn't add much value: these
can be opened using f_op->open(). If the cached file turns out to be invalid,
the open can be retried, this time using ->atomic_open() with a fresh dentry.

For now leave the old way of using open intents in lookup and revalidate in
place. This will be removed once all the users are converted.

David Howells noticed that if ->atomic_open() opens the file but does not create
it, handle_truncate() will be called on it even if it is not a regular file.
Fix this by checking the file type in this case too.

Signed-off-by: Miklos Szeredi <[email protected]>
---
Documentation/filesystems/Locking | 4 +
Documentation/filesystems/vfs.txt | 11 ++
fs/internal.h | 5 +
fs/namei.c | 203 ++++++++++++++++++++++++++++++++++++-
fs/open.c | 42 ++++++++
include/linux/fs.h | 7 ++
6 files changed, 270 insertions(+), 2 deletions(-)

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 8e2da1e..8157488 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -62,6 +62,9 @@ ata *);
int (*removexattr) (struct dentry *, const char *);
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
void (*update_time)(struct inode *, struct timespec *, int);
+ struct file * (*atomic_open)(struct inode *, struct dentry *,
+ struct opendata *, unsigned open_flag,
+ umode_t create_mode, bool *created);

locking rules:
all may block
@@ -89,6 +92,7 @@ listxattr: no
removexattr: yes
fiemap: no
update_time: no
+atomic_open: yes

Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
victim.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index efd23f4..beb6e69 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -364,6 +364,9 @@ struct inode_operations {
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char *);
void (*update_time)(struct inode *, struct timespec *, int);
+ struct file * (*atomic_open)(struct inode *, struct dentry *,
+ struct opendata *, unsigned open_flag,
+ umode_t create_mode, bool *created);
};

Again, all methods are called without any locks being held, unless
@@ -476,6 +479,14 @@ otherwise noted.
an inode. If this is not defined the VFS will update the inode itself
and call mark_inode_dirty_sync.

+ atomic_open: called on the last component of an open. Using this optional
+ method the filesystem can look up, possibly create and open the file in
+ one atomic operation. If it cannot perform this (e.g. the file type
+ turned out to be wrong) it may signal this by returning NULL instead of
+ an open struct file pointer. This method is only called if the last
+ component is negative or needs lookup. Cached positive dentries are
+ still handled by f_op->open().
+
The Address Space Object
========================

diff --git a/fs/internal.h b/fs/internal.h
index 18bc216..628907c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -87,6 +87,11 @@ extern struct super_block *user_get_super(dev_t);
struct nameidata;
extern struct file *nameidata_to_filp(struct nameidata *);
extern void release_open_intent(struct nameidata *);
+struct opendata {
+ struct dentry *dentry;
+ struct vfsmount *mnt;
+ struct file **filp;
+};
struct open_flags {
int open_flag;
umode_t mode;
diff --git a/fs/namei.c b/fs/namei.c
index cd20ddc..959ebb0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2187,6 +2187,176 @@ static inline int open_to_namei_flags(int flag)
return flag;
}

+static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
+{
+ int error = security_path_mknod(dir, dentry, mode, 0);
+ if (error)
+ return error;
+
+ error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
+ if (error)
+ return error;
+
+ return security_inode_create(dir->dentry->d_inode, dentry, mode);
+}
+
+static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
+ struct path *path, const struct open_flags *op,
+ int *want_write, bool need_lookup,
+ bool *created)
+{
+ struct inode *dir = nd->path.dentry->d_inode;
+ unsigned open_flag = open_to_namei_flags(op->open_flag);
+ umode_t mode;
+ int error;
+ int acc_mode;
+ struct opendata od;
+ struct file *filp;
+ int create_error = 0;
+ struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
+
+ BUG_ON(dentry->d_inode);
+
+ /* Don't create child dentry for a dead directory. */
+ if (unlikely(IS_DEADDIR(dir))) {
+ filp = ERR_PTR(-ENOENT);
+ goto out;
+ }
+
+ mode = op->mode & S_IALLUGO;
+ if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
+ mode &= ~current_umask();
+
+ if (open_flag & O_EXCL) {
+ open_flag &= ~O_TRUNC;
+ *created = true;
+ }
+
+ /*
+ * Checking write permission is tricky, bacuse we don't know if we are
+ * going to actually need it: O_CREAT opens should work as long as the
+ * file exists. But checking existence breaks atomicity. The trick is
+ * to check access and if not granted clear O_CREAT from the flags.
+ *
+ * Another problem is returing the "right" error value (e.g. for an
+ * O_EXCL open we want to return EEXIST not EROFS).
+ */
+ if ((open_flag & (O_CREAT | O_TRUNC)) ||
+ (open_flag & O_ACCMODE) != O_RDONLY) {
+ error = mnt_want_write(nd->path.mnt);
+ if (!error) {
+ *want_write = 1;
+ } else if (!(open_flag & O_CREAT)) {
+ /*
+ * No O_CREATE -> atomicity not a requirement -> fall
+ * back to lookup + open
+ */
+ goto no_open;
+ } else if (open_flag & (O_EXCL | O_TRUNC)) {
+ /* Fall back and fail with the right error */
+ create_error = error;
+ goto no_open;
+ } else {
+ /* No side effects, safe to clear O_CREAT */
+ create_error = error;
+ open_flag &= ~O_CREAT;
+ }
+ }
+
+ if (open_flag & O_CREAT) {
+ error = may_o_create(&nd->path, dentry, op->mode);
+ if (error) {
+ create_error = error;
+ if (open_flag & O_EXCL)
+ goto no_open;
+ open_flag &= ~O_CREAT;
+ }
+ }
+
+ if (nd->flags & LOOKUP_DIRECTORY)
+ open_flag |= O_DIRECTORY;
+
+ od.dentry = DENTRY_NOT_SET;
+ od.mnt = nd->path.mnt;
+ od.filp = &nd->intent.open.file;
+ filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode,
+ created);
+ if (IS_ERR(filp)) {
+ if (WARN_ON(od.dentry != DENTRY_NOT_SET))
+ dput(od.dentry);
+
+ if (create_error && PTR_ERR(filp) == -ENOENT)
+ filp = ERR_PTR(create_error);
+ goto out;
+ }
+
+ acc_mode = op->acc_mode;
+ if (*created) {
+ fsnotify_create(dir, dentry);
+ acc_mode = MAY_OPEN;
+ }
+
+ if (!filp) {
+ if (WARN_ON(od.dentry == DENTRY_NOT_SET)) {
+ filp = ERR_PTR(-EIO);
+ goto out;
+ }
+ if (od.dentry) {
+ dput(dentry);
+ dentry = od.dentry;
+ }
+ goto looked_up;
+ }
+
+ /*
+ * We didn't have the inode before the open, so check open permission
+ * here.
+ */
+ error = may_open(&filp->f_path, acc_mode, open_flag);
+ if (error)
+ goto out_fput;
+
+ error = open_check_o_direct(filp);
+ if (error)
+ goto out_fput;
+
+out:
+ dput(dentry);
+ return filp;
+
+out_fput:
+ fput(filp);
+ filp = ERR_PTR(error);
+ goto out;
+
+no_open:
+ if (need_lookup) {
+ dentry = lookup_real(dir, dentry, nd);
+ if (IS_ERR(dentry))
+ return ERR_CAST(dentry);
+
+ if (create_error) {
+ int open_flag = op->open_flag;
+
+ filp = ERR_PTR(create_error);
+ if ((open_flag & O_EXCL)) {
+ if (!dentry->d_inode)
+ goto out;
+ } else if (!dentry->d_inode) {
+ goto out;
+ } else if ((open_flag & O_TRUNC) &&
+ S_ISREG(dentry->d_inode->i_mode)) {
+ goto out;
+ }
+ /* will fail later, go on to get the right error */
+ }
+ }
+looked_up:
+ path->dentry = dentry;
+ path->mnt = nd->path.mnt;
+ return NULL;
+}
+
/*
* Lookup, maybe create and open the last component
*
@@ -2210,6 +2380,15 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
if (IS_ERR(dentry))
return ERR_CAST(dentry);

+ /* Cached positive dentry: will open in f_op->open */
+ if (!need_lookup && dentry->d_inode)
+ goto out_no_open;
+
+ if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
+ return atomic_open(nd, dentry, path, op, want_write,
+ need_lookup, created);
+ }
+
if (need_lookup) {
BUG_ON(dentry->d_inode);

@@ -2242,6 +2421,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
if (error)
goto out_dput;
}
+out_no_open:
path->dentry = dentry;
path->mnt = nd->path.mnt;
return NULL;
@@ -2335,8 +2515,16 @@ retry_lookup:
filp = lookup_open(nd, path, op, &want_write, &created);
mutex_unlock(&dir->d_inode->i_mutex);

- if (IS_ERR(filp))
- goto out;
+ if (filp) {
+ if (IS_ERR(filp))
+ goto out;
+
+ if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode))
+ will_truncate = 0;
+
+ audit_inode(pathname, filp->f_path.dentry);
+ goto opened;
+ }

if (created) {
/* Don't check for write permission, don't truncate */
@@ -2352,6 +2540,16 @@ retry_lookup:
*/
audit_inode(pathname, path->dentry);

+ /*
+ * If atomic_open() acquired write access it is dropped now due to
+ * possible mount and symlink following (this might be optimized away if
+ * necessary...)
+ */
+ if (want_write) {
+ mnt_drop_write(nd->path.mnt);
+ want_write = 0;
+ }
+
error = -EEXIST;
if (open_flag & O_EXCL)
goto exit_dput;
@@ -2435,6 +2633,7 @@ common:
retried = true;
goto retry_lookup;
}
+opened:
if (!IS_ERR(filp)) {
error = ima_file_check(filp, op->acc_mode);
if (error) {
diff --git a/fs/open.c b/fs/open.c
index d6c79a0..9d7e0dd 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -811,6 +811,48 @@ out_err:
EXPORT_SYMBOL_GPL(lookup_instantiate_filp);

/**
+ * finish_open - finish opening a file
+ * @od: opaque open data
+ * @dentry: pointer to dentry
+ * @open: open callback
+ *
+ * This can be used to finish opening a file passed to i_op->atomic_open().
+ *
+ * If the open callback is set to NULL, then the standard f_op->open()
+ * filesystem callback is substituted.
+ */
+struct file *finish_open(struct opendata *od, struct dentry *dentry,
+ int (*open)(struct inode *, struct file *))
+{
+ struct file *res;
+
+ mntget(od->mnt);
+ dget(dentry);
+
+ res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred());
+ if (!IS_ERR(res))
+ *od->filp = NULL;
+
+ return res;
+}
+EXPORT_SYMBOL(finish_open);
+
+/**
+ * finish_no_open - finish ->atomic_open() without opening the file
+ *
+ * @od: opaque open data
+ * @dentry: dentry or NULL (as returned from ->lookup())
+ *
+ * This can be used to set the result of a successful lookup in ->atomic_open().
+ * The filesystem's atomic_open() method shall return NULL after calling this.
+ */
+void finish_no_open(struct opendata *od, struct dentry *dentry)
+{
+ od->dentry = dentry;
+}
+EXPORT_SYMBOL(finish_no_open);
+
+/**
* nameidata_to_filp - convert a nameidata to an open filp.
* @nd: pointer to nameidata
* @flags: open flags
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51978ed..19c699b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -427,6 +427,7 @@ struct kstatfs;
struct vm_area_struct;
struct vfsmount;
struct cred;
+struct opendata;

extern void __init inode_init(void);
extern void __init inode_init_early(void);
@@ -1693,6 +1694,9 @@ struct inode_operations {
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
u64 len);
int (*update_time)(struct inode *, struct timespec *, int);
+ struct file * (*atomic_open)(struct inode *, struct dentry *,
+ struct opendata *, unsigned open_flag,
+ umode_t create_mode, bool *created);
} ____cacheline_aligned;

struct seq_file;
@@ -2061,6 +2065,9 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
const struct cred *);
extern int filp_close(struct file *, fl_owner_t id);
extern char * getname(const char __user *);
+extern struct file *finish_open(struct opendata *od, struct dentry *dentry,
+ int (*open)(struct inode *, struct file *));
+extern void finish_no_open(struct opendata *od, struct dentry *dentry);

/* fs/ioctl.c */

--
1.7.7

2012-06-05 13:10:53

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 07/21] nfs: implement i_op->atomic_open()

From: Miklos Szeredi <[email protected]>

Replace NFS4 specific ->lookup implementation with ->atomic_open impelementation
and use the generic nfs_lookup for other lookups.

Signed-off-by: Miklos Szeredi <[email protected]>
CC: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 183 +++++++++++++++++++++++++++++++---------------------------
1 files changed, 97 insertions(+), 86 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f430057..0d8c712 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -111,11 +111,15 @@ const struct inode_operations nfs3_dir_inode_operations = {

#ifdef CONFIG_NFS_V4

-static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int nfs_open_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd);
+static struct file *nfs_atomic_open(struct inode *, struct dentry *,
+ struct opendata *, unsigned, umode_t,
+ bool *);
+static int nfs4_create(struct inode *dir, struct dentry *dentry,
+ umode_t mode, struct nameidata *nd);
const struct inode_operations nfs4_dir_inode_operations = {
- .create = nfs_open_create,
- .lookup = nfs_atomic_lookup,
+ .create = nfs4_create,
+ .lookup = nfs_lookup,
+ .atomic_open = nfs_atomic_open,
.link = nfs_link,
.unlink = nfs_unlink,
.symlink = nfs_symlink,
@@ -1403,120 +1407,132 @@ static int do_open(struct inode *inode, struct file *filp)
return 0;
}

-static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx)
+static struct file *nfs_finish_open(struct nfs_open_context *ctx,
+ struct dentry *dentry,
+ struct opendata *od, unsigned open_flags)
{
struct file *filp;
- int ret = 0;
+ int err;
+
+ if (ctx->dentry != dentry) {
+ dput(ctx->dentry);
+ ctx->dentry = dget(dentry);
+ }

/* If the open_intent is for execute, we have an extra check to make */
if (ctx->mode & FMODE_EXEC) {
- ret = nfs_may_open(ctx->dentry->d_inode,
- ctx->cred,
- nd->intent.open.flags);
- if (ret < 0)
+ err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags);
+ if (err < 0) {
+ filp = ERR_PTR(err);
goto out;
+ }
}
- filp = lookup_instantiate_filp(nd, ctx->dentry, do_open);
- if (IS_ERR(filp))
- ret = PTR_ERR(filp);
- else
+
+ filp = finish_open(od, dentry, do_open);
+ if (!IS_ERR(filp))
nfs_file_set_open_context(filp, ctx);
+
out:
put_nfs_open_context(ctx);
- return ret;
+ return filp;
}

-static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry,
+ struct opendata *od, unsigned open_flags,
+ umode_t mode, bool *created)
{
struct nfs_open_context *ctx;
- struct iattr attr;
- struct dentry *res = NULL;
+ struct dentry *res;
+ struct iattr attr = { .ia_valid = ATTR_OPEN };
struct inode *inode;
- int open_flags;
+ struct file *filp;
int err;

- dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
+ /* Expect a negative dentry */
+ BUG_ON(dentry->d_inode);
+
+ dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n",
dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);

- /* Check that we are indeed trying to open this file */
- if (!is_atomic_open(nd))
+ /* NFS only supports OPEN on regular files */
+ if ((open_flags & O_DIRECTORY)) {
+ err = -ENOENT;
+ if (!d_unhashed(dentry)) {
+ /*
+ * Hashed negative dentry with O_DIRECTORY: dentry was
+ * revalidated and is fine, no need to perform lookup
+ * again
+ */
+ goto out_err;
+ }
goto no_open;
-
- if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
- res = ERR_PTR(-ENAMETOOLONG);
- goto out;
}

- /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
- * the dentry. */
- if (nd->flags & LOOKUP_EXCL) {
- d_instantiate(dentry, NULL);
- goto out;
- }
-
- open_flags = nd->intent.open.flags;
- attr.ia_valid = ATTR_OPEN;
-
- ctx = create_nfs_open_context(dentry, open_flags);
- res = ERR_CAST(ctx);
- if (IS_ERR(ctx))
- goto out;
+ err = -ENAMETOOLONG;
+ if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
+ goto out_err;

- if (nd->flags & LOOKUP_CREATE) {
- attr.ia_mode = nd->intent.open.create_mode;
+ if (open_flags & O_CREAT) {
attr.ia_valid |= ATTR_MODE;
- attr.ia_mode &= ~current_umask();
- } else
- open_flags &= ~(O_EXCL | O_CREAT);
-
+ attr.ia_mode = mode & ~current_umask();
+ }
if (open_flags & O_TRUNC) {
attr.ia_valid |= ATTR_SIZE;
attr.ia_size = 0;
}

- /* Open the file on the server */
+ ctx = create_nfs_open_context(dentry, open_flags);
+ err = PTR_ERR(ctx);
+ if (IS_ERR(ctx))
+ goto out_err;
+
nfs_block_sillyrename(dentry->d_parent);
inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
+ d_drop(dentry);
if (IS_ERR(inode)) {
nfs_unblock_sillyrename(dentry->d_parent);
put_nfs_open_context(ctx);
- switch (PTR_ERR(inode)) {
- /* Make a negative dentry */
- case -ENOENT:
- d_add(dentry, NULL);
- res = NULL;
- goto out;
- /* This turned out not to be a regular file */
- case -EISDIR:
- case -ENOTDIR:
+ err = PTR_ERR(inode);
+ switch (err) {
+ case -ENOENT:
+ d_add(dentry, NULL);
+ break;
+ case -EISDIR:
+ case -ENOTDIR:
+ goto no_open;
+ case -ELOOP:
+ if (!(open_flags & O_NOFOLLOW))
goto no_open;
- case -ELOOP:
- if (!(nd->intent.open.flags & O_NOFOLLOW))
- goto no_open;
+ break;
/* case -EINVAL: */
- default:
- res = ERR_CAST(inode);
- goto out;
+ default:
+ break;
}
+ goto out_err;
}
res = d_add_unique(dentry, inode);
- nfs_unblock_sillyrename(dentry->d_parent);
- if (res != NULL) {
- dput(ctx->dentry);
- ctx->dentry = dget(res);
+ if (res != NULL)
dentry = res;
- }
- err = nfs_intent_set_file(nd, ctx);
- if (err < 0) {
- if (res != NULL)
- dput(res);
- return ERR_PTR(err);
- }
-out:
+
+ nfs_unblock_sillyrename(dentry->d_parent);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
- return res;
+
+ filp = nfs_finish_open(ctx, dentry, od, open_flags);
+
+ dput(res);
+ return filp;
+
+out_err:
+ return ERR_PTR(err);
+
no_open:
- return nfs_lookup(dir, dentry, nd);
+ res = nfs_lookup(dir, dentry, NULL);
+ err = PTR_ERR(res);
+ if (IS_ERR(res))
+ goto out_err;
+
+ finish_no_open(od, res);
+ return NULL;
}

static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
@@ -1566,8 +1582,8 @@ no_open:
return nfs_lookup_revalidate(dentry, nd);
}

-static int nfs_open_create(struct inode *dir, struct dentry *dentry,
- umode_t mode, struct nameidata *nd)
+static int nfs4_create(struct inode *dir, struct dentry *dentry,
+ umode_t mode, struct nameidata *nd)
{
struct nfs_open_context *ctx = NULL;
struct iattr attr;
@@ -1591,19 +1607,14 @@ static int nfs_open_create(struct inode *dir, struct dentry *dentry,
error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
if (error != 0)
goto out_put_ctx;
- if (nd) {
- error = nfs_intent_set_file(nd, ctx);
- if (error < 0)
- goto out_err;
- } else {
- put_nfs_open_context(ctx);
- }
+
+ put_nfs_open_context(ctx);
+
return 0;
out_put_ctx:
put_nfs_open_context(ctx);
out_err_drop:
d_drop(dentry);
-out_err:
return error;
}

--
1.7.7

2012-06-05 13:10:58

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 08/21] nfs: clean up ->create in nfs_rpc_ops

From: Miklos Szeredi <[email protected]>

Don't pass nfs_open_context() to ->create(). Only the NFS4 implementation
needed that and only because it wanted to return an open file using open
intents. That task has been replaced by ->atomic_open so it is not necessary
anymore to pass the context to the create rpc operation.

Despite nfs4_proc_create apparently being okay with a NULL context it Oopses
somewhere down the call chain. So allocate a context here.

Signed-off-by: Miklos Szeredi <[email protected]>
CC: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 42 ++----------------------------------------
fs/nfs/nfs3proc.c | 2 +-
fs/nfs/nfs4proc.c | 37 ++++++++++---------------------------
fs/nfs/proc.c | 2 +-
include/linux/nfs_xdr.h | 2 +-
5 files changed, 15 insertions(+), 70 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 0d8c712..45015d3 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -114,10 +114,8 @@ const struct inode_operations nfs3_dir_inode_operations = {
static struct file *nfs_atomic_open(struct inode *, struct dentry *,
struct opendata *, unsigned, umode_t,
bool *);
-static int nfs4_create(struct inode *dir, struct dentry *dentry,
- umode_t mode, struct nameidata *nd);
const struct inode_operations nfs4_dir_inode_operations = {
- .create = nfs4_create,
+ .create = nfs_create,
.lookup = nfs_lookup,
.atomic_open = nfs_atomic_open,
.link = nfs_link,
@@ -1582,42 +1580,6 @@ no_open:
return nfs_lookup_revalidate(dentry, nd);
}

-static int nfs4_create(struct inode *dir, struct dentry *dentry,
- umode_t mode, struct nameidata *nd)
-{
- struct nfs_open_context *ctx = NULL;
- struct iattr attr;
- int error;
- int open_flags = O_CREAT|O_EXCL;
-
- dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
- dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
-
- attr.ia_mode = mode;
- attr.ia_valid = ATTR_MODE;
-
- if (nd)
- open_flags = nd->intent.open.flags;
-
- ctx = create_nfs_open_context(dentry, open_flags);
- error = PTR_ERR(ctx);
- if (IS_ERR(ctx))
- goto out_err_drop;
-
- error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
- if (error != 0)
- goto out_put_ctx;
-
- put_nfs_open_context(ctx);
-
- return 0;
-out_put_ctx:
- put_nfs_open_context(ctx);
-out_err_drop:
- d_drop(dentry);
- return error;
-}
-
#endif /* CONFIG_NFSV4 */

/*
@@ -1684,7 +1646,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry,
if (nd)
open_flags = nd->intent.open.flags;

- error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL);
+ error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
if (error != 0)
goto out_err;
return 0;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2292a0f..3187e24 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -314,7 +314,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data)
*/
static int
nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
- int flags, struct nfs_open_context *ctx)
+ int flags)
{
struct nfs3_createdata *data;
umode_t mode = sattr->ia_mode;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d48dbef..267be3c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2779,37 +2779,22 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page,
}

/*
- * Got race?
- * We will need to arrange for the VFS layer to provide an atomic open.
- * Until then, this create/open method is prone to inefficiency and race
- * conditions due to the lookup, create, and open VFS calls from sys_open()
- * placed on the wire.
- *
- * Given the above sorry state of affairs, I'm simply sending an OPEN.
- * The file will be opened again in the subsequent VFS open call
- * (nfs4_proc_file_open).
- *
- * The open for read will just hang around to be used by any process that
- * opens the file O_RDONLY. This will all be resolved with the VFS changes.
+ * This is just for mknod. open(O_CREAT) will always do ->open_context().
*/
-
static int
nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
- int flags, struct nfs_open_context *ctx)
+ int flags)
{
- struct dentry *de = dentry;
+ struct nfs_open_context *ctx;
struct nfs4_state *state;
- struct rpc_cred *cred = NULL;
- fmode_t fmode = 0;
int status = 0;

- if (ctx != NULL) {
- cred = ctx->cred;
- de = ctx->dentry;
- fmode = ctx->mode;
- }
+ ctx = alloc_nfs_open_context(dentry, FMODE_READ);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
sattr->ia_mode &= ~current_umask();
- state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL);
+ state = nfs4_do_open(dir, dentry, ctx->mode, flags, sattr, ctx->cred, NULL);
d_drop(dentry);
if (IS_ERR(state)) {
status = PTR_ERR(state);
@@ -2817,11 +2802,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
}
d_add(dentry, igrab(state->inode));
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
- if (ctx != NULL)
- ctx->state = state;
- else
- nfs4_close_sync(state, fmode);
+ ctx->state = state;
out:
+ put_nfs_open_context(ctx);
return status;
}

diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index a706b6b..81a0a42 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -259,7 +259,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data)

static int
nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
- int flags, struct nfs_open_context *ctx)
+ int flags)
{
struct nfs_createdata *data;
struct rpc_message msg = {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d1a7bf5..660c0f5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1371,7 +1371,7 @@ struct nfs_rpc_ops {
int (*readlink)(struct inode *, struct page *, unsigned int,
unsigned int);
int (*create) (struct inode *, struct dentry *,
- struct iattr *, int, struct nfs_open_context *);
+ struct iattr *, int);
int (*remove) (struct inode *, struct qstr *);
void (*unlink_setup) (struct rpc_message *, struct inode *dir);
void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *);
--
1.7.7

2012-06-05 13:11:06

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 10/21] nfs: don't use intents for checking atomic open

From: Miklos Szeredi <[email protected]>

is_atomic_open() is now only used by nfs4_lookup_revalidate() to check whether
it's okay to skip normal revalidation.

It does a racy check for mount read-onlyness and falls back to normal
revalidation if the open would fail. This makes little sense now that this
function isn't used for determining whether to actually open the file or not.

The d_mountpoint() check still makes sense since it is an indication that we
might be following a mount and so open may not revalidate the dentry.

Signed-off-by: Miklos Szeredi <[email protected]>
CC: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 24 ++++--------------------
1 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 0432f47..e6d55dc 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1366,24 +1366,6 @@ const struct dentry_operations nfs4_dentry_operations = {
.d_release = nfs_d_release,
};

-/*
- * Use intent information to determine whether we need to substitute
- * the NFSv4-style stateful OPEN for the LOOKUP call
- */
-static int is_atomic_open(struct nameidata *nd)
-{
- if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0)
- return 0;
- /* NFS does not (yet) have a stateful open for directories */
- if (nd->flags & LOOKUP_DIRECTORY)
- return 0;
- /* Are we trying to write to a read only partition? */
- if (__mnt_is_readonly(nd->path.mnt) &&
- (nd->intent.open.flags & (O_CREAT|O_TRUNC|O_ACCMODE)))
- return 0;
- return 1;
-}
-
static fmode_t flags_to_mode(int flags)
{
fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
@@ -1543,10 +1525,12 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
if (nd->flags & LOOKUP_RCU)
return -ECHILD;

- inode = dentry->d_inode;
- if (!is_atomic_open(nd) || d_mountpoint(dentry))
+ if (!(nd->flags & LOOKUP_OPEN) || (nd->flags & LOOKUP_DIRECTORY))
+ goto no_open;
+ if (d_mountpoint(dentry))
goto no_open;

+ inode = dentry->d_inode;
parent = dget_parent(dentry);
dir = parent->d_inode;

--
1.7.7

2012-06-05 13:11:12

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 12/21] cifs: implement i_op->atomic_open()

From: Miklos Szeredi <[email protected]>

Add an ->atomic_open implementation which replaces the atomic lookup+open+create
operation implemented via ->lookup and ->create operations.

Signed-off-by: Miklos Szeredi <[email protected]>
CC: Steve French <[email protected]>
---
fs/cifs/cifsfs.c | 1 +
fs/cifs/cifsfs.h | 3 +
fs/cifs/dir.c | 437 ++++++++++++++++++++++++++++++------------------------
3 files changed, 245 insertions(+), 196 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8b6e344..dd29c7c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -778,6 +778,7 @@ struct file_system_type cifs_fs_type = {
};
const struct inode_operations cifs_dir_inode_ops = {
.create = cifs_create,
+ .atomic_open = cifs_atomic_open,
.lookup = cifs_lookup,
.getattr = cifs_getattr,
.unlink = cifs_unlink,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 6536535..3a572bf 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -46,6 +46,9 @@ extern const struct inode_operations cifs_dir_inode_ops;
extern struct inode *cifs_root_iget(struct super_block *);
extern int cifs_create(struct inode *, struct dentry *, umode_t,
struct nameidata *);
+extern struct file *cifs_atomic_open(struct inode *, struct dentry *,
+ struct opendata *, unsigned, umode_t,
+ bool *);
extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
struct nameidata *);
extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ec4e9a2..7a3dcd1 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -133,108 +133,141 @@ cifs_bp_rename_retry:
return full_path;
}

+/*
+ * Don't allow the separator character in a path component.
+ * The VFS will not allow "/", but "\" is allowed by posix.
+ */
+static int
+check_name(struct dentry *direntry)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
+ int i;
+
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
+ for (i = 0; i < direntry->d_name.len; i++) {
+ if (direntry->d_name.name[i] == '\\') {
+ cFYI(1, "Invalid file name");
+ return -EINVAL;
+ }
+ }
+ }
+ return 0;
+}
+
+
/* Inode operations in similar order to how they appear in Linux file fs.h */

-int
-cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
- struct nameidata *nd)
+static int cifs_do_create(struct inode *inode, struct dentry *direntry,
+ int xid, struct tcon_link *tlink, unsigned oflags,
+ umode_t mode, __u32 *oplock, __u16 *fileHandle,
+ bool *created)
{
int rc = -ENOENT;
- int xid;
int create_options = CREATE_NOT_DIR;
- __u32 oplock = 0;
- int oflags;
- /*
- * BB below access is probably too much for mknod to request
- * but we have to do query and setpathinfo so requesting
- * less could fail (unless we want to request getatr and setatr
- * permissions (only). At least for POSIX we do not have to
- * request so much.
- */
- int desiredAccess = GENERIC_READ | GENERIC_WRITE;
- __u16 fileHandle;
- struct cifs_sb_info *cifs_sb;
- struct tcon_link *tlink;
- struct cifs_tcon *tcon;
+ int desiredAccess;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_tcon *tcon = tlink_tcon(tlink);
char *full_path = NULL;
FILE_ALL_INFO *buf = NULL;
struct inode *newinode = NULL;
- int disposition = FILE_OVERWRITE_IF;
-
- xid = GetXid();
-
- cifs_sb = CIFS_SB(inode->i_sb);
- tlink = cifs_sb_tlink(cifs_sb);
- if (IS_ERR(tlink)) {
- FreeXid(xid);
- return PTR_ERR(tlink);
- }
- tcon = tlink_tcon(tlink);
+ int disposition;

+ *oplock = 0;
if (tcon->ses->server->oplocks)
- oplock = REQ_OPLOCK;
-
- if (nd)
- oflags = nd->intent.open.file->f_flags;
- else
- oflags = O_RDONLY | O_CREAT;
+ *oplock = REQ_OPLOCK;

full_path = build_path_from_dentry(direntry);
if (full_path == NULL) {
rc = -ENOMEM;
- goto cifs_create_out;
+ goto out;
}

if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
+ !tcon->broken_posix_open &&
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
rc = cifs_posix_open(full_path, &newinode,
- inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
- /* EIO could indicate that (posix open) operation is not
- supported, despite what server claimed in capability
- negotiation. EREMOTE indicates DFS junction, which is not
- handled in posix open */
+ inode->i_sb, mode, oflags, oplock, fileHandle, xid);
+ switch (rc) {
+ case 0:
+ if (newinode == NULL) {
+ /* query inode info */
+ goto cifs_create_get_file_info;
+ }

- if (rc == 0) {
- if (newinode == NULL) /* query inode info */
+ if (!S_ISREG(newinode->i_mode)) {
+ /*
+ * The server may allow us to open things like
+ * FIFOs, but the client isn't set up to deal
+ * with that. If it's not a regular file, just
+ * close it and proceed as if it were a normal
+ * lookup.
+ */
+ CIFSSMBClose(xid, tcon, *fileHandle);
goto cifs_create_get_file_info;
- else /* success, no need to query */
- goto cifs_create_set_dentry;
- } else if ((rc != -EIO) && (rc != -EREMOTE) &&
- (rc != -EOPNOTSUPP) && (rc != -EINVAL))
- goto cifs_create_out;
- /* else fallthrough to retry, using older open call, this is
- case where server does not support this SMB level, and
- falsely claims capability (also get here for DFS case
- which should be rare for path not covered on files) */
- }
+ }
+ /* success, no need to query */
+ goto cifs_create_set_dentry;
+
+ case -ENOENT:
+ goto cifs_create_get_file_info;
+
+ case -EIO:
+ case -EINVAL:
+ /*
+ * EIO could indicate that (posix open) operation is not
+ * supported, despite what server claimed in capability
+ * negotiation.
+ *
+ * POSIX open in samba versions 3.3.1 and earlier could
+ * incorrectly fail with invalid parameter.
+ */
+ tcon->broken_posix_open = true;
+ break;

- if (nd) {
- /* if the file is going to stay open, then we
- need to set the desired access properly */
- desiredAccess = 0;
- if (OPEN_FMODE(oflags) & FMODE_READ)
- desiredAccess |= GENERIC_READ; /* is this too little? */
- if (OPEN_FMODE(oflags) & FMODE_WRITE)
- desiredAccess |= GENERIC_WRITE;
+ case -EREMOTE:
+ case -EOPNOTSUPP:
+ /*
+ * EREMOTE indicates DFS junction, which is not handled
+ * in posix open. If either that or op not supported
+ * returned, follow the normal lookup.
+ */
+ break;

- if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
- disposition = FILE_CREATE;
- else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
- disposition = FILE_OVERWRITE_IF;
- else if ((oflags & O_CREAT) == O_CREAT)
- disposition = FILE_OPEN_IF;
- else
- cFYI(1, "Create flag not set in create function");
+ default:
+ goto out;
+ }
+ /*
+ * fallthrough to retry, using older open call, this is case
+ * where server does not support this SMB level, and falsely
+ * claims capability (also get here for DFS case which should be
+ * rare for path not covered on files)
+ */
}

+ desiredAccess = 0;
+ if (OPEN_FMODE(oflags) & FMODE_READ)
+ desiredAccess |= GENERIC_READ; /* is this too little? */
+ if (OPEN_FMODE(oflags) & FMODE_WRITE)
+ desiredAccess |= GENERIC_WRITE;
+
+ disposition = FILE_OVERWRITE_IF;
+ if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+ disposition = FILE_CREATE;
+ else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
+ disposition = FILE_OVERWRITE_IF;
+ else if ((oflags & O_CREAT) == O_CREAT)
+ disposition = FILE_OPEN_IF;
+ else
+ cFYI(1, "Create flag not set in create function");
+
/* BB add processing to set equivalent of mode - e.g. via CreateX with
ACLs */

buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
if (buf == NULL) {
rc = -ENOMEM;
- goto cifs_create_out;
+ goto out;
}

/*
@@ -250,7 +283,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
if (tcon->ses->capabilities & CAP_NT_SMBS)
rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
desiredAccess, create_options,
- &fileHandle, &oplock, buf, cifs_sb->local_nls,
+ fileHandle, oplock, buf, cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
else
rc = -EIO; /* no NT SMB support fall into legacy open below */
@@ -259,17 +292,17 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
/* old server, retry the open legacy style */
rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
desiredAccess, create_options,
- &fileHandle, &oplock, buf, cifs_sb->local_nls,
+ fileHandle, oplock, buf, cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
}
if (rc) {
cFYI(1, "cifs_create returned 0x%x", rc);
- goto cifs_create_out;
+ goto out;
}

/* If Open reported that we actually created a file
then we now have to set the mode if possible */
- if ((tcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) {
+ if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) {
struct cifs_unix_set_info_args args = {
.mode = mode,
.ctime = NO_CHANGE_64,
@@ -278,6 +311,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
.device = 0,
};

+ *created = true;
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
args.uid = (__u64) current_fsuid();
if (inode->i_mode & S_ISGID)
@@ -288,7 +322,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
args.uid = NO_CHANGE_64;
args.gid = NO_CHANGE_64;
}
- CIFSSMBUnixSetFileInfo(xid, tcon, &args, fileHandle,
+ CIFSSMBUnixSetFileInfo(xid, tcon, &args, *fileHandle,
current->tgid);
} else {
/* BB implement mode setting via Windows security
@@ -305,11 +339,11 @@ cifs_create_get_file_info:
inode->i_sb, xid);
else {
rc = cifs_get_inode_info(&newinode, full_path, buf,
- inode->i_sb, xid, &fileHandle);
+ inode->i_sb, xid, fileHandle);
if (newinode) {
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
newinode->i_mode = mode;
- if ((oplock & CIFS_CREATE_ACTION) &&
+ if ((*oplock & CIFS_CREATE_ACTION) &&
(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
newinode->i_uid = current_fsuid();
if (inode->i_mode & S_ISGID)
@@ -321,37 +355,139 @@ cifs_create_get_file_info:
}

cifs_create_set_dentry:
- if (rc == 0)
- d_instantiate(direntry, newinode);
- else
+ if (rc != 0) {
cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
+ goto out;
+ }
+ d_drop(direntry);
+ d_add(direntry, newinode);

- if (newinode && nd) {
- struct cifsFileInfo *pfile_info;
- struct file *filp;
+ /* ENOENT for create? How weird... */
+ rc = -ENOENT;
+ if (!newinode) {
+ CIFSSMBClose(xid, tcon, *fileHandle);
+ goto out;
+ }
+ rc = 0;

- filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
- if (IS_ERR(filp)) {
- rc = PTR_ERR(filp);
- CIFSSMBClose(xid, tcon, fileHandle);
- goto cifs_create_out;
- }
+out:
+ kfree(buf);
+ kfree(full_path);
+ return rc;
+}

- pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock);
- if (pfile_info == NULL) {
- fput(filp);
- CIFSSMBClose(xid, tcon, fileHandle);
- rc = -ENOMEM;
- }
- } else {
+struct file *
+cifs_atomic_open(struct inode *inode, struct dentry *direntry,
+ struct opendata *od, unsigned oflags, umode_t mode,
+ bool *created)
+{
+ int rc;
+ int xid;
+ struct tcon_link *tlink;
+ struct cifs_tcon *tcon;
+ __u16 fileHandle;
+ __u32 oplock;
+ struct file *filp;
+ struct cifsFileInfo *pfile_info;
+
+ /* Posix open is only called (at lookup time) for file create now. For
+ * opens (rather than creates), because we do not know if it is a file
+ * or directory yet, and current Samba no longer allows us to do posix
+ * open on dirs, we could end up wasting an open call on what turns out
+ * to be a dir. For file opens, we wait to call posix open till
+ * cifs_open. It could be added to atomic_open in the future but the
+ * performance tradeoff of the extra network request when EISDIR or
+ * EACCES is returned would have to be weighed against the 50% reduction
+ * in network traffic in the other paths.
+ */
+ if (!(oflags & O_CREAT)) {
+ struct dentry *res = cifs_lookup(inode, direntry, NULL);
+ if (IS_ERR(res))
+ return ERR_CAST(res);
+
+ finish_no_open(od, res);
+ return NULL;
+ }
+
+ rc = check_name(direntry);
+ if (rc)
+ return ERR_PTR(rc);
+
+ xid = GetXid();
+
+ cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p",
+ inode, direntry->d_name.name, direntry);
+
+ tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
+ filp = ERR_CAST(tlink);
+ if (IS_ERR(tlink))
+ goto free_xid;
+
+ tcon = tlink_tcon(tlink);
+
+ rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
+ &oplock, &fileHandle, created);
+
+ if (rc) {
+ filp = ERR_PTR(rc);
+ goto out;
+ }
+
+ filp = finish_open(od, direntry, generic_file_open);
+ if (IS_ERR(filp)) {
CIFSSMBClose(xid, tcon, fileHandle);
+ goto out;
}

-cifs_create_out:
- kfree(buf);
- kfree(full_path);
+ pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock);
+ if (pfile_info == NULL) {
+ CIFSSMBClose(xid, tcon, fileHandle);
+ fput(filp);
+ filp = ERR_PTR(-ENOMEM);
+ }
+
+out:
+ cifs_put_tlink(tlink);
+free_xid:
+ FreeXid(xid);
+ return filp;
+}
+
+int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
+ struct nameidata *nd)
+{
+ int rc;
+ int xid = GetXid();
+ /*
+ * BB below access is probably too much for mknod to request
+ * but we have to do query and setpathinfo so requesting
+ * less could fail (unless we want to request getatr and setatr
+ * permissions (only). At least for POSIX we do not have to
+ * request so much.
+ */
+ unsigned oflags = O_EXCL | O_CREAT | O_RDWR;
+ struct tcon_link *tlink;
+ __u16 fileHandle;
+ __u32 oplock;
+ bool created = true;
+
+ cFYI(1, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p",
+ inode, direntry->d_name.name, direntry);
+
+ tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
+ rc = PTR_ERR(tlink);
+ if (IS_ERR(tlink))
+ goto free_xid;
+
+ rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
+ &oplock, &fileHandle, &created);
+ if (!rc)
+ CIFSSMBClose(xid, tlink_tcon(tlink), fileHandle);
+
cifs_put_tlink(tlink);
+free_xid:
FreeXid(xid);
+
return rc;
}

@@ -492,16 +628,11 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
{
int xid;
int rc = 0; /* to get around spurious gcc warning, set to zero here */
- __u32 oplock;
- __u16 fileHandle = 0;
- bool posix_open = false;
struct cifs_sb_info *cifs_sb;
struct tcon_link *tlink;
struct cifs_tcon *pTcon;
- struct cifsFileInfo *cfile;
struct inode *newInode = NULL;
char *full_path = NULL;
- struct file *filp;

xid = GetXid();

@@ -518,31 +649,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
}
pTcon = tlink_tcon(tlink);

- oplock = pTcon->ses->server->oplocks ? REQ_OPLOCK : 0;
-
- /*
- * Don't allow the separator character in a path component.
- * The VFS will not allow "/", but "\" is allowed by posix.
- */
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
- int i;
- for (i = 0; i < direntry->d_name.len; i++)
- if (direntry->d_name.name[i] == '\\') {
- cFYI(1, "Invalid file name");
- rc = -EINVAL;
- goto lookup_out;
- }
- }
-
- /*
- * O_EXCL: optimize away the lookup, but don't hash the dentry. Let
- * the VFS handle the create.
- */
- if (nd && (nd->flags & LOOKUP_EXCL)) {
- d_instantiate(direntry, NULL);
- rc = 0;
+ rc = check_name(direntry);
+ if (rc)
goto lookup_out;
- }

/* can not grab the rename sem here since it would
deadlock in the cases (beginning of sys_rename itself)
@@ -560,80 +669,16 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
}
cFYI(1, "Full path: %s inode = 0x%p", full_path, direntry->d_inode);

- /* Posix open is only called (at lookup time) for file create now.
- * For opens (rather than creates), because we do not know if it
- * is a file or directory yet, and current Samba no longer allows
- * us to do posix open on dirs, we could end up wasting an open call
- * on what turns out to be a dir. For file opens, we wait to call posix
- * open till cifs_open. It could be added here (lookup) in the future
- * but the performance tradeoff of the extra network request when EISDIR
- * or EACCES is returned would have to be weighed against the 50%
- * reduction in network traffic in the other paths.
- */
if (pTcon->unix_ext) {
- if (nd && !(nd->flags & LOOKUP_DIRECTORY) &&
- (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
- (nd->intent.open.file->f_flags & O_CREAT)) {
- rc = cifs_posix_open(full_path, &newInode,
- parent_dir_inode->i_sb,
- nd->intent.open.create_mode,
- nd->intent.open.file->f_flags, &oplock,
- &fileHandle, xid);
- /*
- * The check below works around a bug in POSIX
- * open in samba versions 3.3.1 and earlier where
- * open could incorrectly fail with invalid parameter.
- * If either that or op not supported returned, follow
- * the normal lookup.
- */
- switch (rc) {
- case 0:
- /*
- * The server may allow us to open things like
- * FIFOs, but the client isn't set up to deal
- * with that. If it's not a regular file, just
- * close it and proceed as if it were a normal
- * lookup.
- */
- if (newInode && !S_ISREG(newInode->i_mode)) {
- CIFSSMBClose(xid, pTcon, fileHandle);
- break;
- }
- case -ENOENT:
- posix_open = true;
- case -EOPNOTSUPP:
- break;
- default:
- pTcon->broken_posix_open = true;
- }
- }
- if (!posix_open)
- rc = cifs_get_inode_info_unix(&newInode, full_path,
- parent_dir_inode->i_sb, xid);
- } else
+ rc = cifs_get_inode_info_unix(&newInode, full_path,
+ parent_dir_inode->i_sb, xid);
+ } else {
rc = cifs_get_inode_info(&newInode, full_path, NULL,
parent_dir_inode->i_sb, xid, NULL);
+ }

if ((rc == 0) && (newInode != NULL)) {
d_add(direntry, newInode);
- if (posix_open) {
- filp = lookup_instantiate_filp(nd, direntry,
- generic_file_open);
- if (IS_ERR(filp)) {
- rc = PTR_ERR(filp);
- CIFSSMBClose(xid, pTcon, fileHandle);
- goto lookup_out;
- }
-
- cfile = cifs_new_fileinfo(fileHandle, filp, tlink,
- oplock);
- if (cfile == NULL) {
- fput(filp);
- CIFSSMBClose(xid, pTcon, fileHandle);
- rc = -ENOMEM;
- goto lookup_out;
- }
- }
/* since paths are not looked up by component - the parent
directories are presumed to be good here */
renew_parental_timestamps(direntry);
--
1.7.7

2012-06-05 13:11:21

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 15/21] 9p: implement i_op->atomic_open()

From: Miklos Szeredi <[email protected]>

Add an ->atomic_open implementation which replaces the atomic open+create
operation implemented via ->create. No functionality is changed.

Signed-off-by: Miklos Szeredi <[email protected]>
CC: Eric Van Hensbergen <[email protected]>
---
fs/9p/vfs_inode.c | 169 +++++++++++++++++++++++++++++-------------------
fs/9p/vfs_inode_dotl.c | 52 ++++++++++-----
2 files changed, 137 insertions(+), 84 deletions(-)

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 57ccb75..e8c42ce 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -712,11 +712,14 @@ error:
}

/**
- * v9fs_vfs_create - VFS hook to create files
+ * v9fs_vfs_create - VFS hook to create a regular file
+ *
+ * open(.., O_CREAT) is handled in v9fs_vfs_atomic_open(). This is only called
+ * for mknod(2).
+ *
* @dir: directory inode that is being created
* @dentry: dentry that is being deleted
* @mode: create permissions
- * @nd: path information
*
*/

@@ -724,76 +727,19 @@ static int
v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct nameidata *nd)
{
- int err;
- u32 perm;
- int flags;
- struct file *filp;
- struct v9fs_inode *v9inode;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid, *inode_fid;
-
- err = 0;
- fid = NULL;
- v9ses = v9fs_inode2v9ses(dir);
- perm = unixmode2p9mode(v9ses, mode);
- if (nd)
- flags = nd->intent.open.flags;
- else
- flags = O_RDWR;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+ u32 perm = unixmode2p9mode(v9ses, mode);
+ struct p9_fid *fid;

- fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
- v9fs_uflags2omode(flags,
- v9fs_proto_dotu(v9ses)));
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
- fid = NULL;
- goto error;
- }
+ /* P9_OEXCL? */
+ fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_ORDWR);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);

v9fs_invalidate_inode_attr(dir);
- /* if we are opening a file, assign the open fid to the file */
- if (nd) {
- v9inode = V9FS_I(dentry->d_inode);
- mutex_lock(&v9inode->v_mutex);
- if (v9ses->cache && !v9inode->writeback_fid &&
- ((flags & O_ACCMODE) != O_RDONLY)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during open time instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- inode_fid = v9fs_writeback_fid(dentry);
- if (IS_ERR(inode_fid)) {
- err = PTR_ERR(inode_fid);
- mutex_unlock(&v9inode->v_mutex);
- goto error;
- }
- v9inode->writeback_fid = (void *) inode_fid;
- }
- mutex_unlock(&v9inode->v_mutex);
- filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
- if (IS_ERR(filp)) {
- err = PTR_ERR(filp);
- goto error;
- }
-
- filp->private_data = fid;
-#ifdef CONFIG_9P_FSCACHE
- if (v9ses->cache)
- v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
-#endif
- } else
- p9_client_clunk(fid);
+ p9_client_clunk(fid);

return 0;
-
-error:
- if (fid)
- p9_client_clunk(fid);
-
- return err;
}

/**
@@ -910,6 +856,93 @@ error:
return ERR_PTR(result);
}

+static struct file *
+v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
+ struct opendata *od, unsigned flags, umode_t mode,
+ bool *created)
+{
+ int err;
+ u32 perm;
+ struct file *filp;
+ struct v9fs_inode *v9inode;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid, *inode_fid;
+ struct dentry *res = NULL;
+
+ if (d_unhashed(dentry)) {
+ res = v9fs_vfs_lookup(dir, dentry, NULL);
+ if (IS_ERR(res))
+ return ERR_CAST(res);
+
+ if (res)
+ dentry = res;
+ }
+
+ /* Only creates */
+ if (!(flags & O_CREAT) || dentry->d_inode) {
+ finish_no_open(od, res);
+ return NULL;
+ }
+
+ err = 0;
+ fid = NULL;
+ v9ses = v9fs_inode2v9ses(dir);
+ perm = unixmode2p9mode(v9ses, mode);
+ fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
+ v9fs_uflags2omode(flags,
+ v9fs_proto_dotu(v9ses)));
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ fid = NULL;
+ goto error;
+ }
+
+ v9fs_invalidate_inode_attr(dir);
+ v9inode = V9FS_I(dentry->d_inode);
+ mutex_lock(&v9inode->v_mutex);
+ if (v9ses->cache && !v9inode->writeback_fid &&
+ ((flags & O_ACCMODE) != O_RDONLY)) {
+ /*
+ * clone a fid and add it to writeback_fid
+ * we do it during open time instead of
+ * page dirty time via write_begin/page_mkwrite
+ * because we want write after unlink usecase
+ * to work.
+ */
+ inode_fid = v9fs_writeback_fid(dentry);
+ if (IS_ERR(inode_fid)) {
+ err = PTR_ERR(inode_fid);
+ mutex_unlock(&v9inode->v_mutex);
+ goto error;
+ }
+ v9inode->writeback_fid = (void *) inode_fid;
+ }
+ mutex_unlock(&v9inode->v_mutex);
+ filp = finish_open(od, dentry, generic_file_open);
+ if (IS_ERR(filp)) {
+ err = PTR_ERR(filp);
+ goto error;
+ }
+
+ filp->private_data = fid;
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache)
+ v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
+#endif
+
+ *created = true;
+out:
+ dput(res);
+ return filp;
+
+error:
+ if (fid)
+ p9_client_clunk(fid);
+
+ filp = ERR_PTR(err);
+ goto out;
+}
+
/**
* v9fs_vfs_unlink - VFS unlink hook to delete an inode
* @i: inode that is being unlinked
@@ -1488,6 +1521,7 @@ out:
static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
+ .atomic_open = v9fs_vfs_atomic_open,
.symlink = v9fs_vfs_symlink,
.link = v9fs_vfs_link,
.unlink = v9fs_vfs_unlink,
@@ -1502,6 +1536,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
static const struct inode_operations v9fs_dir_inode_operations = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
+ .atomic_open = v9fs_vfs_atomic_open,
.unlink = v9fs_vfs_unlink,
.mkdir = v9fs_vfs_mkdir,
.rmdir = v9fs_vfs_rmdir,
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index e3dd2a1..a354fe2 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -230,7 +230,6 @@ int v9fs_open_to_dotl_flags(int flags)
* @dir: directory inode that is being created
* @dentry: dentry that is being deleted
* @mode: create permissions
- * @nd: path information
*
*/

@@ -238,9 +237,16 @@ static int
v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
struct nameidata *nd)
{
+ return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
+}
+
+static struct file *
+v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
+ struct opendata *od, unsigned flags, umode_t omode,
+ bool *created)
+{
int err = 0;
gid_t gid;
- int flags;
umode_t mode;
char *name = NULL;
struct file *filp;
@@ -251,19 +257,25 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
struct p9_fid *dfid, *ofid, *inode_fid;
struct v9fs_session_info *v9ses;
struct posix_acl *pacl = NULL, *dacl = NULL;
+ struct dentry *res = NULL;

- v9ses = v9fs_inode2v9ses(dir);
- if (nd)
- flags = nd->intent.open.flags;
- else {
- /*
- * create call without LOOKUP_OPEN is due
- * to mknod of regular files. So use mknod
- * operation.
- */
- return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
+ if (d_unhashed(dentry)) {
+ res = v9fs_vfs_lookup(dir, dentry, NULL);
+ if (IS_ERR(res))
+ return ERR_CAST(res);
+
+ if (res)
+ dentry = res;
+ }
+
+ /* Only creates */
+ if (!(flags & O_CREAT) || dentry->d_inode) {
+ finish_no_open(od, res);
+ return NULL;
}

+ v9ses = v9fs_inode2v9ses(dir);
+
name = (char *) dentry->d_name.name;
p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n",
name, flags, omode);
@@ -272,7 +284,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
if (IS_ERR(dfid)) {
err = PTR_ERR(dfid);
p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
- return err;
+ goto err_return;
}

/* clone a fid to use for creation */
@@ -280,7 +292,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
if (IS_ERR(ofid)) {
err = PTR_ERR(ofid);
p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
- return err;
+ goto err_return;
}

gid = v9fs_get_fsgid_for_create(dir);
@@ -345,7 +357,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
}
mutex_unlock(&v9inode->v_mutex);
/* Since we are opening a file, assign the open fid to the file */
- filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
+ filp = finish_open(od, dentry, generic_file_open);
if (IS_ERR(filp)) {
err = PTR_ERR(filp);
goto err_clunk_old_fid;
@@ -355,7 +367,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
if (v9ses->cache)
v9fs_cache_inode_set_cookie(inode, filp);
#endif
- return 0;
+ *created = true;
+out:
+ dput(res);
+ return filp;

error:
if (fid)
@@ -364,7 +379,9 @@ err_clunk_old_fid:
if (ofid)
p9_client_clunk(ofid);
v9fs_set_create_acl(NULL, &dacl, &pacl);
- return err;
+err_return:
+ filp = ERR_PTR(err);
+ goto out;
}

/**
@@ -982,6 +999,7 @@ out:

const struct inode_operations v9fs_dir_inode_operations_dotl = {
.create = v9fs_vfs_create_dotl,
+ .atomic_open = v9fs_vfs_atomic_open_dotl,
.lookup = v9fs_vfs_lookup,
.link = v9fs_vfs_link_dotl,
.symlink = v9fs_vfs_symlink_dotl,
--
1.7.7

2012-06-05 13:11:30

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 20/21] vfs: do_last(): clean up retry

From: Miklos Szeredi <[email protected]>

Move the lookup retry logic to the bottom of the function to make the normal
case simpler to read.

Reported-by: David Howells <[email protected]>
Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/namei.c | 36 +++++++++++++++++++++---------------
1 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 7d87c63..d5dd6fb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2605,22 +2605,11 @@ finish_open_created:
goto exit;
od->mnt = nd->path.mnt;
filp = finish_open(od, nd->path.dentry, NULL);
- if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) {
- BUG_ON(save_parent.dentry != dir);
- path_put(&nd->path);
- nd->path = save_parent;
- nd->inode = dir->d_inode;
- save_parent.mnt = NULL;
- save_parent.dentry = NULL;
- if (want_write) {
- mnt_drop_write(nd->path.mnt);
- want_write = false;
- }
- retried = true;
- goto retry_lookup;
- }
- if (IS_ERR(filp))
+ if (IS_ERR(filp)) {
+ if (filp == ERR_PTR(-EOPENSTALE))
+ goto stale_open;
goto out;
+ }
error = open_check_o_direct(filp);
if (error)
goto exit_fput;
@@ -2650,6 +2639,23 @@ exit_fput:
fput(filp);
goto exit;

+stale_open:
+ /* If no saved parent or already retried then can't retry */
+ if (!save_parent.dentry || retried)
+ goto out;
+
+ BUG_ON(save_parent.dentry != dir);
+ path_put(&nd->path);
+ nd->path = save_parent;
+ nd->inode = dir->d_inode;
+ save_parent.mnt = NULL;
+ save_parent.dentry = NULL;
+ if (want_write) {
+ mnt_drop_write(nd->path.mnt);
+ want_write = false;
+ }
+ retried = true;
+ goto retry_lookup;
}

static struct file *path_openat(int dfd, const char *pathname,
--
1.7.7

2012-06-05 13:11:46

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 21/21] vfs: move O_DIRECT check to common code

From: Miklos Szeredi <[email protected]>

Perform open_check_o_direct() in a common place in do_last after opening the
file.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/namei.c | 17 +++++------------
1 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index d5dd6fb..62514ae 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2296,22 +2296,15 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
* here.
*/
error = may_open(&filp->f_path, acc_mode, open_flag);
- if (error)
- goto out_fput;
-
- error = open_check_o_direct(filp);
- if (error)
- goto out_fput;
+ if (error) {
+ fput(filp);
+ filp = ERR_PTR(error);
+ }

out:
dput(dentry);
return filp;

-out_fput:
- fput(filp);
- filp = ERR_PTR(error);
- goto out;
-
no_open:
if (need_lookup) {
dentry = lookup_real(dir, dentry, nd);
@@ -2610,10 +2603,10 @@ finish_open_created:
goto stale_open;
goto out;
}
+opened:
error = open_check_o_direct(filp);
if (error)
goto exit_fput;
-opened:
error = ima_file_check(filp, op->acc_mode);
if (error)
goto exit_fput;
--
1.7.7

2012-06-05 13:11:28

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 19/21] vfs: do_last(): clean up bool

From: Miklos Szeredi <[email protected]>

Consistently use bool for boolean values in do_last().

Reported-by: David Howells <[email protected]>
Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/namei.c | 28 ++++++++++++++--------------
1 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index e57cec2..7d87c63 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2187,7 +2187,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
struct path *path, struct opendata *od,
const struct open_flags *op,
- int *want_write, bool need_lookup,
+ bool *want_write, bool need_lookup,
bool *created)
{
struct inode *dir = nd->path.dentry->d_inode;
@@ -2229,7 +2229,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
(open_flag & O_ACCMODE) != O_RDONLY) {
error = mnt_want_write(nd->path.mnt);
if (!error) {
- *want_write = 1;
+ *want_write = true;
} else if (!(open_flag & O_CREAT)) {
/*
* No O_CREATE -> atomicity not a requirement -> fall
@@ -2351,7 +2351,7 @@ looked_up:
static struct file *lookup_open(struct nameidata *nd, struct path *path,
struct opendata *od,
const struct open_flags *op,
- int *want_write, bool *created)
+ bool *want_write, bool *created)
{
struct dentry *dir = nd->path.dentry;
struct inode *dir_inode = dir->d_inode;
@@ -2396,7 +2396,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
error = mnt_want_write(nd->path.mnt);
if (error)
goto out_dput;
- *want_write = 1;
+ *want_write = true;
*created = true;
error = security_path_mknod(&nd->path, dentry, mode, 0);
if (error)
@@ -2424,13 +2424,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
{
struct dentry *dir = nd->path.dentry;
int open_flag = op->open_flag;
- int will_truncate = open_flag & O_TRUNC;
- int want_write = 0;
+ bool will_truncate = (open_flag & O_TRUNC) != 0;
+ bool want_write = false;
int acc_mode = op->acc_mode;
struct file *filp;
struct inode *inode;
bool created;
- int symlink_ok = 0;
+ bool symlink_ok = false;
struct path save_parent = { .dentry = NULL, .mnt = NULL };
bool retried = false;
int error;
@@ -2467,7 +2467,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
- symlink_ok = 1;
+ symlink_ok = true;
/* we _can_ be in RCU mode here */
error = lookup_fast(nd, &nd->last, path, &inode);
if (likely(!error))
@@ -2505,7 +2505,7 @@ retry_lookup:
goto out;

if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode))
- will_truncate = 0;
+ will_truncate = false;

audit_inode(pathname, filp->f_path.dentry);
goto opened;
@@ -2514,7 +2514,7 @@ retry_lookup:
if (created) {
/* Don't check for write permission, don't truncate */
open_flag &= ~O_TRUNC;
- will_truncate = 0;
+ will_truncate = false;
acc_mode = MAY_OPEN;
path_to_nameidata(path, nd);
goto finish_open_created;
@@ -2532,7 +2532,7 @@ retry_lookup:
*/
if (want_write) {
mnt_drop_write(nd->path.mnt);
- want_write = 0;
+ want_write = false;
}

error = -EEXIST;
@@ -2591,13 +2591,13 @@ finish_lookup:
audit_inode(pathname, nd->path.dentry);
finish_open:
if (!S_ISREG(nd->inode->i_mode))
- will_truncate = 0;
+ will_truncate = false;

if (will_truncate) {
error = mnt_want_write(nd->path.mnt);
if (error)
goto exit;
- want_write = 1;
+ want_write = true;
}
finish_open_created:
error = may_open(&nd->path, acc_mode, open_flag);
@@ -2614,7 +2614,7 @@ finish_open_created:
save_parent.dentry = NULL;
if (want_write) {
mnt_drop_write(nd->path.mnt);
- want_write = 0;
+ want_write = false;
}
retried = true;
goto retry_lookup;
--
1.7.7

2012-06-05 13:12:18

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 18/21] vfs: do_last(): clean up labels

From: Miklos Szeredi <[email protected]>

Reported-by: David Howells <[email protected]>
Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/namei.c | 10 +++++-----
1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 55f5f27..e57cec2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2454,13 +2454,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
error = -EISDIR;
goto exit;
}
- goto ok;
+ goto finish_open;
case LAST_BIND:
error = complete_walk(nd);
if (error)
return ERR_PTR(error);
audit_inode(pathname, dir);
- goto ok;
+ goto finish_open;
}

if (!(open_flag & O_CREAT)) {
@@ -2517,7 +2517,7 @@ retry_lookup:
will_truncate = 0;
acc_mode = MAY_OPEN;
path_to_nameidata(path, nd);
- goto common;
+ goto finish_open_created;
}

/*
@@ -2589,7 +2589,7 @@ finish_lookup:
if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup)
goto exit;
audit_inode(pathname, nd->path.dentry);
-ok:
+finish_open:
if (!S_ISREG(nd->inode->i_mode))
will_truncate = 0;

@@ -2599,7 +2599,7 @@ ok:
goto exit;
want_write = 1;
}
-common:
+finish_open_created:
error = may_open(&nd->path, acc_mode, open_flag);
if (error)
goto exit;
--
1.7.7

2012-06-05 13:12:38

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 17/21] vfs: do_last(): clean up error handling

From: Miklos Szeredi <[email protected]>

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/namei.c | 23 ++++++++---------------
1 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 47d81f4..55f5f27 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2625,21 +2625,14 @@ common:
if (error)
goto exit_fput;
opened:
- if (!IS_ERR(filp)) {
- error = ima_file_check(filp, op->acc_mode);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
- if (!IS_ERR(filp)) {
- if (will_truncate) {
- error = handle_truncate(filp);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
+ error = ima_file_check(filp, op->acc_mode);
+ if (error)
+ goto exit_fput;
+
+ if (will_truncate) {
+ error = handle_truncate(filp);
+ if (error)
+ goto exit_fput;
}
out:
if (want_write)
--
1.7.7

2012-06-05 13:12:55

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 16/21] vfs: remove open intents from nameidata

From: Miklos Szeredi <[email protected]>

All users of open intents have been converted to use ->atomic_{open,create}.

This patch gets rid of nd->intent.open and related infrastructure.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/internal.h | 5 +--
fs/namei.c | 97 +++++++++++++++++++++++--------------------------
fs/open.c | 87 +-------------------------------------------
include/linux/namei.h | 11 ------
4 files changed, 49 insertions(+), 151 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index 628907c..4d044e1 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -84,13 +84,10 @@ extern struct super_block *user_get_super(dev_t);
/*
* open.c
*/
-struct nameidata;
-extern struct file *nameidata_to_filp(struct nameidata *);
-extern void release_open_intent(struct nameidata *);
struct opendata {
struct dentry *dentry;
struct vfsmount *mnt;
- struct file **filp;
+ struct file *filp;
};
struct open_flags {
int open_flag;
diff --git a/fs/namei.c b/fs/namei.c
index 959ebb0..47d81f4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -463,22 +463,6 @@ err_root:
return -ECHILD;
}

-/**
- * release_open_intent - free up open intent resources
- * @nd: pointer to nameidata
- */
-void release_open_intent(struct nameidata *nd)
-{
- struct file *file = nd->intent.open.file;
-
- if (file && !IS_ERR(file)) {
- if (file->f_path.dentry == NULL)
- put_filp(file);
- else
- fput(file);
- }
-}
-
static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
return dentry->d_op->d_revalidate(dentry, nd);
@@ -2201,7 +2185,8 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
}

static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
- struct path *path, const struct open_flags *op,
+ struct path *path, struct opendata *od,
+ const struct open_flags *op,
int *want_write, bool need_lookup,
bool *created)
{
@@ -2210,7 +2195,6 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
umode_t mode;
int error;
int acc_mode;
- struct opendata od;
struct file *filp;
int create_error = 0;
struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
@@ -2276,14 +2260,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
if (nd->flags & LOOKUP_DIRECTORY)
open_flag |= O_DIRECTORY;

- od.dentry = DENTRY_NOT_SET;
- od.mnt = nd->path.mnt;
- od.filp = &nd->intent.open.file;
- filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode,
+ od->dentry = DENTRY_NOT_SET;
+ od->mnt = nd->path.mnt;
+ filp = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode,
created);
if (IS_ERR(filp)) {
- if (WARN_ON(od.dentry != DENTRY_NOT_SET))
- dput(od.dentry);
+ if (WARN_ON(od->dentry != DENTRY_NOT_SET))
+ dput(od->dentry);

if (create_error && PTR_ERR(filp) == -ENOENT)
filp = ERR_PTR(create_error);
@@ -2297,13 +2280,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
}

if (!filp) {
- if (WARN_ON(od.dentry == DENTRY_NOT_SET)) {
+ if (WARN_ON(od->dentry == DENTRY_NOT_SET)) {
filp = ERR_PTR(-EIO);
goto out;
}
- if (od.dentry) {
+ if (od->dentry) {
dput(dentry);
- dentry = od.dentry;
+ dentry = od->dentry;
}
goto looked_up;
}
@@ -2366,6 +2349,7 @@ looked_up:
* was performed, only lookup.
*/
static struct file *lookup_open(struct nameidata *nd, struct path *path,
+ struct opendata *od,
const struct open_flags *op,
int *want_write, bool *created)
{
@@ -2385,7 +2369,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
goto out_no_open;

if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
- return atomic_open(nd, dentry, path, op, want_write,
+ return atomic_open(nd, dentry, path, od, op, want_write,
need_lookup, created);
}

@@ -2407,7 +2391,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
* rw->ro transition does not occur between
* the time when the file is created and when
* a permanent write count is taken through
- * the 'struct file' in nameidata_to_filp().
+ * the 'struct file' in finish_open().
*/
error = mnt_want_write(nd->path.mnt);
if (error)
@@ -2435,7 +2419,8 @@ out_dput:
* Handle the last step of open()
*/
static struct file *do_last(struct nameidata *nd, struct path *path,
- const struct open_flags *op, const char *pathname)
+ struct opendata *od, const struct open_flags *op,
+ const char *pathname)
{
struct dentry *dir = nd->path.dentry;
int open_flag = op->open_flag;
@@ -2512,7 +2497,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,

retry_lookup:
mutex_lock(&dir->d_inode->i_mutex);
- filp = lookup_open(nd, path, op, &want_write, &created);
+ filp = lookup_open(nd, path, od, op, &want_write, &created);
mutex_unlock(&dir->d_inode->i_mutex);

if (filp) {
@@ -2618,7 +2603,8 @@ common:
error = may_open(&nd->path, acc_mode, open_flag);
if (error)
goto exit;
- filp = nameidata_to_filp(nd);
+ od->mnt = nd->path.mnt;
+ filp = finish_open(od, nd->path.dentry, NULL);
if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) {
BUG_ON(save_parent.dentry != dir);
path_put(&nd->path);
@@ -2633,6 +2619,11 @@ common:
retried = true;
goto retry_lookup;
}
+ if (IS_ERR(filp))
+ goto out;
+ error = open_check_o_direct(filp);
+ if (error)
+ goto exit_fput;
opened:
if (!IS_ERR(filp)) {
error = ima_file_check(filp, op->acc_mode);
@@ -2662,24 +2653,26 @@ exit_dput:
exit:
filp = ERR_PTR(error);
goto out;
+exit_fput:
+ fput(filp);
+ goto exit;
+
}

static struct file *path_openat(int dfd, const char *pathname,
struct nameidata *nd, const struct open_flags *op, int flags)
{
struct file *base = NULL;
- struct file *filp;
+ struct opendata od;
+ struct file *res;
struct path path;
int error;

- filp = get_empty_filp();
- if (!filp)
+ od.filp = get_empty_filp();
+ if (!od.filp)
return ERR_PTR(-ENFILE);

- filp->f_flags = op->open_flag;
- nd->intent.open.file = filp;
- nd->intent.open.flags = open_to_namei_flags(op->open_flag);
- nd->intent.open.create_mode = op->mode;
+ od.filp->f_flags = op->open_flag;

error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
if (unlikely(error))
@@ -2690,23 +2683,23 @@ static struct file *path_openat(int dfd, const char *pathname,
if (unlikely(error))
goto out_filp;

- filp = do_last(nd, &path, op, pathname);
- while (unlikely(!filp)) { /* trailing symlink */
+ res = do_last(nd, &path, &od, op, pathname);
+ while (unlikely(!res)) { /* trailing symlink */
struct path link = path;
void *cookie;
if (!(nd->flags & LOOKUP_FOLLOW)) {
path_put_conditional(&path, nd);
path_put(&nd->path);
- filp = ERR_PTR(-ELOOP);
+ res = ERR_PTR(-ELOOP);
break;
}
nd->flags |= LOOKUP_PARENT;
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
error = follow_link(&link, nd, &cookie);
if (unlikely(error))
- filp = ERR_PTR(error);
+ res = ERR_PTR(error);
else
- filp = do_last(nd, &path, op, pathname);
+ res = do_last(nd, &path, &od, op, pathname);
put_link(nd, &link, cookie);
}
out:
@@ -2714,17 +2707,20 @@ out:
path_put(&nd->root);
if (base)
fput(base);
- release_open_intent(nd);
- if (filp == ERR_PTR(-EOPENSTALE)) {
+ if (od.filp) {
+ BUG_ON(od.filp->f_path.dentry);
+ put_filp(od.filp);
+ }
+ if (res == ERR_PTR(-EOPENSTALE)) {
if (flags & LOOKUP_RCU)
- filp = ERR_PTR(-ECHILD);
+ res = ERR_PTR(-ECHILD);
else
- filp = ERR_PTR(-ESTALE);
+ res = ERR_PTR(-ESTALE);
}
- return filp;
+ return res;

out_filp:
- filp = ERR_PTR(error);
+ res = ERR_PTR(error);
goto out;
}

@@ -2780,7 +2776,6 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
goto out;
nd.flags &= ~LOOKUP_PARENT;
nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
- nd.intent.open.flags = O_EXCL;

/*
* Do the final lookup.
diff --git a/fs/open.c b/fs/open.c
index 9d7e0dd..316982b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -771,46 +771,6 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
}

/**
- * lookup_instantiate_filp - instantiates the open intent filp
- * @nd: pointer to nameidata
- * @dentry: pointer to dentry
- * @open: open callback
- *
- * Helper for filesystems that want to use lookup open intents and pass back
- * a fully instantiated struct file to the caller.
- * This function is meant to be called from within a filesystem's
- * lookup method.
- * Beware of calling it for non-regular files! Those ->open methods might block
- * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo,
- * leading to a deadlock, as nobody can open that fifo anymore, because
- * another process to open fifo will block on locked parent when doing lookup).
- * Note that in case of error, nd->intent.open.file is destroyed, but the
- * path information remains valid.
- * If the open callback is set to NULL, then the standard f_op->open()
- * filesystem callback is substituted.
- */
-struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
- int (*open)(struct inode *, struct file *))
-{
- const struct cred *cred = current_cred();
-
- if (IS_ERR(nd->intent.open.file))
- goto out;
- if (IS_ERR(dentry))
- goto out_err;
- nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
- nd->intent.open.file,
- open, cred);
-out:
- return nd->intent.open.file;
-out_err:
- release_open_intent(nd);
- nd->intent.open.file = ERR_CAST(dentry);
- goto out;
-}
-EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
-
-/**
* finish_open - finish opening a file
* @od: opaque open data
* @dentry: pointer to dentry
@@ -829,9 +789,9 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry,
mntget(od->mnt);
dget(dentry);

- res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred());
+ res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred());
if (!IS_ERR(res))
- *od->filp = NULL;
+ od->filp = NULL;

return res;
}
@@ -852,49 +812,6 @@ void finish_no_open(struct opendata *od, struct dentry *dentry)
}
EXPORT_SYMBOL(finish_no_open);

-/**
- * nameidata_to_filp - convert a nameidata to an open filp.
- * @nd: pointer to nameidata
- * @flags: open flags
- *
- * Note that this function destroys the original nameidata
- */
-struct file *nameidata_to_filp(struct nameidata *nd)
-{
- const struct cred *cred = current_cred();
- struct file *filp;
-
- /* Pick up the filp from the open intent */
- filp = nd->intent.open.file;
-
- /* Has the filesystem initialised the file for us? */
- if (filp->f_path.dentry != NULL) {
- nd->intent.open.file = NULL;
- } else {
- struct file *res;
-
- path_get(&nd->path);
- res = do_dentry_open(nd->path.dentry, nd->path.mnt,
- filp, NULL, cred);
- if (!IS_ERR(res)) {
- int error;
-
- nd->intent.open.file = NULL;
- BUG_ON(res != filp);
-
- error = open_check_o_direct(filp);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- } else {
- /* Allow nd->intent.open.file to be recycled */
- filp = res;
- }
- }
- return filp;
-}
-
/*
* dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
* error.
diff --git a/include/linux/namei.h b/include/linux/namei.h
index ffc0213..54dadda 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -7,12 +7,6 @@

struct vfsmount;

-struct open_intent {
- int flags;
- int create_mode;
- struct file *file;
-};
-
enum { MAX_NESTED_LINKS = 8 };

struct nameidata {
@@ -25,11 +19,6 @@ struct nameidata {
int last_type;
unsigned depth;
char *saved_names[MAX_NESTED_LINKS + 1];
-
- /* Intent data */
- union {
- struct open_intent open;
- } intent;
};

/*
--
1.7.7

2012-06-05 13:13:18

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 14/21] ceph: implement i_op->atomic_open()

From: Miklos Szeredi <[email protected]>

Add an ->atomic_open implementation which replaces the atomic lookup+open+create
operation implemented via ->lookup and ->create operations.

Signed-off-by: Miklos Szeredi <[email protected]>
CC: Sage Weil <[email protected]>
---
fs/ceph/dir.c | 68 ++++++++++++++++++++++++++++++++++--------------------
fs/ceph/file.c | 21 ++++++++---------
fs/ceph/super.h | 5 ++-
3 files changed, 56 insertions(+), 38 deletions(-)

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index c4b7832..75df600 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -594,14 +594,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
if (err < 0)
return ERR_PTR(err);

- /* open (but not create!) intent? */
- if (nd &&
- (nd->flags & LOOKUP_OPEN) &&
- !(nd->intent.open.flags & O_CREAT)) {
- int mode = nd->intent.open.create_mode & ~current->fs->umask;
- return ceph_lookup_open(dir, dentry, nd, mode);
- }
-
/* can we conclude ENOENT locally? */
if (dentry->d_inode == NULL) {
struct ceph_inode_info *ci = ceph_inode(dir);
@@ -642,6 +634,47 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
return dentry;
}

+struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry,
+ struct opendata *od, unsigned flags, umode_t mode,
+ bool *created)
+{
+ int err;
+ struct dentry *res = NULL;
+ struct file *filp;
+
+ if (!(flags & O_CREAT)) {
+ if (dentry->d_name.len > NAME_MAX)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ err = ceph_init_dentry(dentry);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ return ceph_lookup_open(dir, dentry, od, flags, mode);
+ }
+
+ if (d_unhashed(dentry)) {
+ res = ceph_lookup(dir, dentry, NULL);
+ if (IS_ERR(res))
+ return ERR_CAST(res);
+
+ if (res)
+ dentry = res;
+ }
+
+ /* We don't deal with positive dentries here */
+ if (dentry->d_inode) {
+ finish_no_open(od, res);
+ return NULL;
+ }
+
+ *created = true;
+ filp = ceph_lookup_open(dir, dentry, od, flags, mode);
+ dput(res);
+
+ return filp;
+}
+
/*
* If we do a create but get no trace back from the MDS, follow up with
* a lookup (the VFS expects us to link up the provided dentry).
@@ -702,23 +735,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct nameidata *nd)
{
- dout("create in dir %p dentry %p name '%.*s'\n",
- dir, dentry, dentry->d_name.len, dentry->d_name.name);
-
- if (ceph_snap(dir) != CEPH_NOSNAP)
- return -EROFS;
-
- if (nd) {
- BUG_ON((nd->flags & LOOKUP_OPEN) == 0);
- dentry = ceph_lookup_open(dir, dentry, nd, mode);
- /* hrm, what should i do here if we get aliased? */
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- return 0;
- }
-
- /* fall back to mknod */
- return ceph_mknod(dir, dentry, (mode & ~S_IFMT) | S_IFREG, 0);
+ return ceph_mknod(dir, dentry, mode, 0);
}

static int ceph_symlink(struct inode *dir, struct dentry *dentry,
@@ -1357,6 +1374,7 @@ const struct inode_operations ceph_dir_iops = {
.rmdir = ceph_unlink,
.rename = ceph_rename,
.create = ceph_create,
+ .atomic_open = ceph_atomic_open,
};

const struct dentry_operations ceph_dentry_ops = {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4bf9773..e34dc22 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -213,21 +213,15 @@ out:
* may_open() fails, the struct *file gets cleaned up (i.e.
* ceph_release gets called). So fear not!
*/
-/*
- * flags
- * path_lookup_open -> LOOKUP_OPEN
- * path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE
- */
-struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
- struct nameidata *nd, int mode)
+struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
+ struct opendata *od, unsigned flags, umode_t mode)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
- struct file *file;
+ struct file *file = NULL;
struct ceph_mds_request *req;
struct dentry *ret;
int err;
- int flags = nd->intent.open.flags;

dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n",
dentry, dentry->d_name.len, dentry->d_name.name, flags, mode);
@@ -253,14 +247,19 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
err = ceph_handle_notrace_create(dir, dentry);
if (err)
goto out;
- file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open);
+ file = finish_open(od, req->r_dentry, ceph_open);
if (IS_ERR(file))
err = PTR_ERR(file);
out:
ret = ceph_finish_lookup(req, dentry, err);
ceph_mdsc_put_request(req);
dout("ceph_lookup_open result=%p\n", ret);
- return ret;
+
+ if (IS_ERR(ret))
+ return ERR_CAST(ret);
+
+ dput(ret);
+ return err ? ERR_PTR(err) : file;
}

int ceph_release(struct inode *inode, struct file *file)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 8471db9..e61e546 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -806,8 +806,9 @@ extern int ceph_copy_from_page_vector(struct page **pages,
loff_t off, size_t len);
extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
extern int ceph_open(struct inode *inode, struct file *file);
-extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
- struct nameidata *nd, int mode);
+extern struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
+ struct opendata *od, unsigned flags,
+ umode_t mode);
extern int ceph_release(struct inode *inode, struct file *filp);

/* dir.c */
--
1.7.7

2012-06-05 13:13:45

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 13/21] ceph: remove unused arg from ceph_lookup_open()

From: Miklos Szeredi <[email protected]>

What was the purpose of this?

Signed-off-by: Miklos Szeredi <[email protected]>
CC: Sage Weil <[email protected]>
---
fs/ceph/dir.c | 4 ++--
fs/ceph/file.c | 3 +--
fs/ceph/super.h | 3 +--
3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 3e8094b..c4b7832 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -599,7 +599,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
(nd->flags & LOOKUP_OPEN) &&
!(nd->intent.open.flags & O_CREAT)) {
int mode = nd->intent.open.create_mode & ~current->fs->umask;
- return ceph_lookup_open(dir, dentry, nd, mode, 1);
+ return ceph_lookup_open(dir, dentry, nd, mode);
}

/* can we conclude ENOENT locally? */
@@ -710,7 +710,7 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,

if (nd) {
BUG_ON((nd->flags & LOOKUP_OPEN) == 0);
- dentry = ceph_lookup_open(dir, dentry, nd, mode, 0);
+ dentry = ceph_lookup_open(dir, dentry, nd, mode);
/* hrm, what should i do here if we get aliased? */
if (IS_ERR(dentry))
return PTR_ERR(dentry);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 988d4f3..4bf9773 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -219,8 +219,7 @@ out:
* path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE
*/
struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
- struct nameidata *nd, int mode,
- int locked_dir)
+ struct nameidata *nd, int mode)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index fc35036..8471db9 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -807,8 +807,7 @@ extern int ceph_copy_from_page_vector(struct page **pages,
extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
extern int ceph_open(struct inode *inode, struct file *file);
extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
- struct nameidata *nd, int mode,
- int locked_dir);
+ struct nameidata *nd, int mode);
extern int ceph_release(struct inode *inode, struct file *filp);

/* dir.c */
--
1.7.7

2012-06-05 13:14:06

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 11/21] fuse: implement i_op->atomic_open()

From: Miklos Szeredi <[email protected]>

Add an ->atomic_open implementation which replaces the atomic open+create
operation implemented via ->create. No functionality is changed.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/dir.c | 94 ++++++++++++++++++++++++++++++++++++++++----------------
1 files changed, 67 insertions(+), 27 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index df5ac04..b0063d3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -369,8 +369,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
* If the filesystem doesn't support this, then fall back to separate
* 'mknod' + 'open' requests.
*/
-static int fuse_create_open(struct inode *dir, struct dentry *entry,
- umode_t mode, struct nameidata *nd)
+static struct file *fuse_create_open(struct inode *dir, struct dentry *entry,
+ struct opendata *od, unsigned flags,
+ umode_t mode)
{
int err;
struct inode *inode;
@@ -382,14 +383,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
struct fuse_entry_out outentry;
struct fuse_file *ff;
struct file *file;
- int flags = nd->intent.open.flags;
-
- if (fc->no_create)
- return -ENOSYS;

forget = fuse_alloc_forget();
+ err = -ENOMEM;
if (!forget)
- return -ENOMEM;
+ goto out_err;

req = fuse_get_req(fc);
err = PTR_ERR(req);
@@ -428,11 +426,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
req->out.args[1].value = &outopen;
fuse_request_send(fc, req);
err = req->out.h.error;
- if (err) {
- if (err == -ENOSYS)
- fc->no_create = 1;
+ if (err)
goto out_free_ff;
- }

err = -EIO;
if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
@@ -448,28 +443,78 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
fuse_sync_release(ff, flags);
fuse_queue_forget(fc, forget, outentry.nodeid, 1);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out_err;
}
kfree(forget);
d_instantiate(entry, inode);
fuse_change_entry_timeout(entry, &outentry);
fuse_invalidate_attr(dir);
- file = lookup_instantiate_filp(nd, entry, generic_file_open);
+ file = finish_open(od, entry, generic_file_open);
if (IS_ERR(file)) {
fuse_sync_release(ff, flags);
- return PTR_ERR(file);
+ } else {
+ file->private_data = fuse_file_get(ff);
+ fuse_finish_open(inode, file);
}
- file->private_data = fuse_file_get(ff);
- fuse_finish_open(inode, file);
- return 0;
+ return file;

- out_free_ff:
+out_free_ff:
fuse_file_free(ff);
- out_put_request:
+out_put_request:
fuse_put_request(fc, req);
- out_put_forget_req:
+out_put_forget_req:
kfree(forget);
- return err;
+out_err:
+ return ERR_PTR(err);
+}
+
+static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
+static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry,
+ struct opendata *od, unsigned flags,
+ umode_t mode, bool *created)
+{
+ int err;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct file *file;
+ struct dentry *res = NULL;
+
+ if (d_unhashed(entry)) {
+ res = fuse_lookup(dir, entry, NULL);
+ if (IS_ERR(res))
+ return ERR_CAST(res);
+
+ if (res)
+ entry = res;
+ }
+
+ if (!(flags & O_CREAT) || entry->d_inode)
+ goto no_open;
+
+ /* Only creates */
+ *created = true;
+
+ if (fc->no_create)
+ goto mknod;
+
+ file = fuse_create_open(dir, entry, od, flags, mode);
+ if (PTR_ERR(file) == -ENOSYS) {
+ fc->no_create = 1;
+ goto mknod;
+ }
+out_dput:
+ dput(res);
+ return file;
+
+mknod:
+ err = fuse_mknod(dir, entry, mode, 0);
+ if (err) {
+ file = ERR_PTR(err);
+ goto out_dput;
+ }
+no_open:
+ finish_no_open(od, res);
+ return NULL;
}

/*
@@ -573,12 +618,6 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
struct nameidata *nd)
{
- if (nd) {
- int err = fuse_create_open(dir, entry, mode, nd);
- if (err != -ENOSYS)
- return err;
- /* Fall back on mknod */
- }
return fuse_mknod(dir, entry, mode, 0);
}

@@ -1637,6 +1676,7 @@ static const struct inode_operations fuse_dir_inode_operations = {
.link = fuse_link,
.setattr = fuse_setattr,
.create = fuse_create,
+ .atomic_open = fuse_atomic_open,
.mknod = fuse_mknod,
.permission = fuse_permission,
.getattr = fuse_getattr,
--
1.7.7

2012-06-05 13:14:46

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 09/21] nfs: don't use nd->intent.open.flags

From: Miklos Szeredi <[email protected]>

Instead check LOOKUP_EXCL in nd->flags, which is basically what the open intent
flags were used for.

Signed-off-by: Miklos Szeredi <[email protected]>
CC: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 9 ++++-----
1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 45015d3..0432f47 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1538,7 +1538,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
struct dentry *parent = NULL;
struct inode *inode;
struct inode *dir;
- int openflags, ret = 0;
+ int ret = 0;

if (nd->flags & LOOKUP_RCU)
return -ECHILD;
@@ -1562,9 +1562,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
/* NFS only supports OPEN on regular files */
if (!S_ISREG(inode->i_mode))
goto no_open_dput;
- openflags = nd->intent.open.flags;
/* We cannot do exclusive creation on a positive dentry */
- if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
+ if (nd && nd->flags & LOOKUP_EXCL)
goto no_open_dput;

/* Let f_op->open() actually open (and revalidate) the file */
@@ -1643,8 +1642,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry,
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;

- if (nd)
- open_flags = nd->intent.open.flags;
+ if (nd && !(nd->flags & LOOKUP_EXCL))
+ open_flags = O_CREAT;

error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
if (error != 0)
--
1.7.7

2012-06-05 13:16:12

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 02/21] vfs: do_last(): separate O_CREAT specific code

From: Miklos Szeredi <[email protected]>

Check O_CREAT on the slow lookup paths where necessary. This allows the rest to
be shared with plain open.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/namei.c | 33 +++++++++++++++++----------------
1 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index d6e8c55..73e824c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2265,22 +2265,23 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
inode = path->dentry->d_inode;
}
goto finish_lookup;
- }
-
- /* create side of things */
- /*
- * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED has been
- * cleared when we got to the last component we are about to look up
- */
- error = complete_walk(nd);
- if (error)
- return ERR_PTR(error);
+ } else {
+ /* create side of things */
+ /*
+ * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED
+ * has been cleared when we got to the last component we are
+ * about to look up
+ */
+ error = complete_walk(nd);
+ if (error)
+ return ERR_PTR(error);

- audit_inode(pathname, dir);
- error = -EISDIR;
- /* trailing slashes? */
- if (nd->last.name[nd->last.len])
- goto exit;
+ audit_inode(pathname, dir);
+ error = -EISDIR;
+ /* trailing slashes? */
+ if (nd->last.name[nd->last.len])
+ goto exit;
+ }

retry_lookup:
mutex_lock(&dir->d_inode->i_mutex);
@@ -2296,7 +2297,7 @@ retry_lookup:
path->mnt = nd->path.mnt;

/* Negative dentry, just create the file */
- if (!dentry->d_inode) {
+ if (!dentry->d_inode && (open_flag & O_CREAT)) {
umode_t mode = op->mode;
if (!IS_POSIXACL(dir->d_inode))
mode &= ~current_umask();
--
1.7.7

2012-06-05 15:40:22

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

On Tue, Jun 5, 2012 at 6:10 AM, Miklos Szeredi <[email protected]> wrote:
>
> This version has one bugfix and several cleanups, reported by David Howells.
> Also updated documentation in Documentation/filesytems/{vfs.txt,Locking}.

I assume the "one bugfix" is against an earlier version of this
series, not against the currently merged stuff? Because this is too
late for 3.5, so if the bugfix is for current code, please send that
separately..

Linus

2012-06-05 15:50:13

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

Linus Torvalds <[email protected]> writes:

> On Tue, Jun 5, 2012 at 6:10 AM, Miklos Szeredi <[email protected]> wrote:
>>
>> This version has one bugfix and several cleanups, reported by David Howells.
>> Also updated documentation in Documentation/filesytems/{vfs.txt,Locking}.
>
> I assume the "one bugfix" is against an earlier version of this
> series, not against the currently merged stuff?

Right, the fix is against the second part of the series, so code
committed into 3.5 shouldn't be affected.

Thanks,
Miklos

2012-06-10 03:49:25

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

On Tue, Jun 05, 2012 at 03:10:11PM +0200, Miklos Szeredi wrote:
> This is part 2 of the atomic open series. It introduces i_op->atomic_open() and
> converts filesystems that abuse ->lookup() and ->create() to use this new
> interface instead.
>
> This version has one bugfix and several cleanups, reported by David Howells.
> Also updated documentation in Documentation/filesytems/{vfs.txt,Locking}.
>
> Al, please apply.
>
> git tree is here:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git atomic-open.v6

I'm more or less OK with that; one major exception is your struct opendata.
I think it needs to go. Really. You are using it to store three references -
file, vfsmount and dentry. You allocate struct file at the very beginning
and store it there; no arguments with that, if you are opening a file, by
damn you do need one. But that's where it starts to get really, really
odd. At some point you find vfsmount to be used. Again, you'd better -
we will need it. It ends up in file->f_path.mnt. And that's struct file
opendata ->filp points to. So why the hell do you not store it right there?
The same goes for dentry. You will need something to put into
file->f_path.dentry; that field is not going away, no matter what - we do
need to know which filesystem object read() and write() should deal with,
after all. So why bother with storing it in opendata ->dentry in the
meanwhile, when you have the instance of struct file whose ->f_path.dentry
is to be determined by all that activity?

And if you do that, your struct opendata suddenly shrinks to single struct
file *. Which you assign in the very beginning, pass by reference to all
that code and do not reassign. OK, you do reassign it. In one place:
res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred());
if (!IS_ERR(res))
od->filp = NULL;
return res;
Here do_dentry_open() returns one of two things - the value of its third
argument (i.e. od->filp) or ERR_PTR(-E...). In the latter case struct
file refered to by the third argument has been freed.

So the only remaining reason for having that thing is this: what if we
call ->atomic_open(), but it doesn't call finish_open()? Then we need
to free that unused struct file. If finish_open() failed, we wouldn't.
Same if it succeeded and something *after* it in ->atomic_open() failed
(then we need to fput() that file - your code in ceph leaks it, BTW).
Fair enough. So we need to add one more helper that would discard that
half-set-up struct file as we want it to be discarded. That's all.

IOW, you get three helpers:
finish_open()
finish_no_open() (really confusing name, BTW)
fail_open()

All of them taking struct file *. Any call of ->atomic_open() must
call one of those. Rules:
* if we called finish_no_open(), we need to return NULL.
That's the "won't open, here's your dentry, open it yourself".
BTW, I would've made it return void * - always NULL. So that
callers would be of form return finish_no_open(file, dentry);
* if we called finish_open() and it returned us an error,
we need to return that to our caller. End of story, we'd failed.
* if we called finish_open() and it succeeded, the file
has been opened. Either return it, or fput() it and return ERR_PTR()
if you have something fail past that point.
* otherwise we need to call failed_open(file). And return
appropriate ERR_PTR(). Might make sense to turn that into
return failed_open(file, ERR_PTR(-E...)); to make dumb errors easy
to spot.

And this is it - no more struct opendata, considerably simpler cleanup
in fs/namei.c and much more natural prototypes. Either ->atomic_open()
returns an opened struct file, or it gives us ERR_PTR(...) (in which case
struct file has been freed/vfsmount dropped/etc.) or it gives us NULL.
In the last case struct file is still there and file->f_path contains
the vfsmount/dentry pair we are supposed to deal with (e.g. on hitting
a symlink).

I can massage it to that form, or I can leave conversion at the end;
up to you - it's going to be a single pull request anyway, so prototype
changes midway through the series are not something tragic. OTOH, folding
said changes back into the original patches might make for less confusing
reading later. Credit for dealing with that really, really scary pile
of shit is clearly yours anyway. And I'm absolutely serious - that has
been a work that needed doing and nobody had stomach for it. You have
my sincere gratitude.

2012-06-10 05:54:58

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

BTW, I realize that right now you are relying on -EOPENSTALE
never coming out of ->atomic_open(), but that needs to be documented.
For now I'm adding BUG_ON() there as a crude way of documenting it;
AFAICS, there's no point whatsoever doing retries in that case.

2012-06-10 11:11:01

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

On Sun, Jun 10, 2012 at 04:49:21AM +0100, Al Viro wrote:

> So the only remaining reason for having that thing is this: what if we
> call ->atomic_open(), but it doesn't call finish_open()? Then we need
> to free that unused struct file. If finish_open() failed, we wouldn't.
> Same if it succeeded and something *after* it in ->atomic_open() failed
> (then we need to fput() that file - your code in ceph leaks it, BTW).
> Fair enough. So we need to add one more helper that would discard that
> half-set-up struct file as we want it to be discarded. That's all.

Actually, I take that back - that code in ceph is unreachable when
finish_open() succeeds.

Anyway, see vfs.git#atomic_open; it's a port of your queue + COMPLETELY
UNTESTED followups massaging it along the following lines:
* ->atomic_open() takes struct file * instead of struct opendata *
* it return int instead of struct file * - 0 for succeess, -E...
for error, 1 for "here's your sodding dentry, do it yourself". Said
dentry is returned via file->f_path.dentry.
* the same had been done to atomic_open()/lookup_open()/do_last()
* finish_open() takes struct file and returns an int
* it *also* takes int * - used to keep track of whether we'd done
successful do_dentry_open(), instead of "has opendata->filp been cleared?"
as in your variant. Said int * is what your bool *created of ->atomic_open()
and friends has been turned into. So the check in path_openat() is
if (!(opened & FILE_OPENED)) {
BUG_ON(!error);
put_filp(file);
}
which is as explicit as it gets, IMO.

The forest of failure exits in do_last() got cleaned up a bit, BTW. Probably
can be cleaned up some more...

WARNING: I haven't even tried to boot it. It builds, but this is all I can
promise at the moment. I'm about to fall down (it's 7am here already ;-/),
will give it some beating when I get up. It almost certainly has bugs, so
consider that as call for review and not much more.

2012-06-10 17:56:13

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

On Sun, Jun 10, 2012 at 12:10:56PM +0100, Al Viro wrote:

> WARNING: I haven't even tried to boot it. It builds, but this is all I can
> promise at the moment. I'm about to fall down (it's 7am here already ;-/),
> will give it some beating when I get up. It almost certainly has bugs, so
> consider that as call for review and not much more.

OK, it boots, mounts NFS and even manages to build a kernel on it. Which
probably would meet Linus' "it's perfect, ship it" criteria, so go ahead
and beat it up, folks.

2012-06-10 22:27:50

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

On Sun, Jun 10, 2012 at 06:56:05PM +0100, Al Viro wrote:
> On Sun, Jun 10, 2012 at 12:10:56PM +0100, Al Viro wrote:
>
> > WARNING: I haven't even tried to boot it. It builds, but this is all I can
> > promise at the moment. I'm about to fall down (it's 7am here already ;-/),
> > will give it some beating when I get up. It almost certainly has bugs, so
> > consider that as call for review and not much more.
>
> OK, it boots, mounts NFS and even manages to build a kernel on it. Which
> probably would meet Linus' "it's perfect, ship it" criteria, so go ahead
> and beat it up, folks.

And in vfs.git#master there's a followup to that. ->d_revalidate(),
->lookup() and ->create() are nameidata-free now. IOW, open intents
crap is well and truly dead. Good riddance.

Miklos, if I see you at Kernel Summit (or anywhere else, for that matter),
I owe you a bottle of booze of your choice.

2012-06-11 10:58:11

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

On 06/10/2012 08:56 PM, Al Viro wrote:

> On Sun, Jun 10, 2012 at 12:10:56PM +0100, Al Viro wrote:
>
>> WARNING: I haven't even tried to boot it. It builds, but this is all I can
>> promise at the moment. I'm about to fall down (it's 7am here already ;-/),
>> will give it some beating when I get up. It almost certainly has bugs, so
>> consider that as call for review and not much more.
>
> OK, it boots, mounts NFS and even manages to build a kernel on it. Which
> probably would meet Linus' "it's perfect, ship it" criteria, so go ahead
> and beat it up, folks.


If the "manages to build a kernel on it" part involves a "git clone" first
then I agree. From past bug I found that even if xfstest cthon and bunch of
other testes pass, I still got data-corruption/bugs under
"git clone linux, umount, mount, git status". It's my best test so far.
(It needs to be a very big git tree with lots of history as well, like linux or gcc)

Cheers
Boaz

2012-06-11 15:18:05

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

Al Viro <[email protected]> writes:

>
> I'm more or less OK with that; one major exception is your struct opendata.
> I think it needs to go. Really. You are using it to store three references -
> file, vfsmount and dentry.

That's not the real purpose. We can store those in struct file, I can
see that.

The real reason I went with a separate and opaque type there is that
that thing is simply not an open file. If it was a struct file and the
filesystem would do an fput() on it by accident it would lead to really
ugly problems, and the complier would have no chance at detecting it.

If it's a separate type (and it can have just that struct file embedded
into it) then such mistakes are instantly detected by the compiler

So I really don't see the downsides of having a new type for that.

Thanks,
Miklos

2012-06-11 16:32:38

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

I don't like this:

make ->atomic_open() return int

Change of calling conventions:
old new
NULL 1
file 0
ERR_PTR(-ve) -ve

Caller *knows* that struct file *; no need to return it.

Yes, ordinary filesystems the caller knows the 'struct file *' but for
overlayfs (and stacking in general) wants to be able to return a file
obtained by opening the file on another filesystem.

Thanks,
Miklos

2012-06-13 11:21:17

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

On Sun, Jun 10, 2012 at 11:27:45PM +0100, Al Viro wrote:
> And in vfs.git#master there's a followup to that. ->d_revalidate(),
> ->lookup() and ->create() are nameidata-free now. IOW, open intents
> crap is well and truly dead. Good riddance.
>
> Miklos, if I see you at Kernel Summit (or anywhere else, for that matter),
> I owe you a bottle of booze of your choice.

It also shows that were are really close to getting nameidata out of the
filesystem. The remaning issues are kern_path_parent usages in devtmpfs
and audit_watch, as well as direct access to nd->path in
proc_pid_follow_link. A hacky patch to demonstrate this is below (not
intended for submission).


diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 765c3a2..b5e907b 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -25,6 +25,8 @@
#include <linux/slab.h>
#include <linux/kthread.h>

+#include "../internal.h"
+
static struct task_struct *thread;

#if defined CONFIG_DEVTMPFS_MOUNT
diff --git a/fs/internal.h b/fs/internal.h
index 8a9f5fa..3826dcc 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -115,3 +115,23 @@ extern int invalidate_inodes(struct super_block *, bool);
* dcache.c
*/
extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
+
+
+enum { MAX_NESTED_LINKS = 8 };
+
+struct nameidata {
+ struct path path;
+ struct qstr last;
+ struct path root;
+ struct inode *inode; /* path.dentry.d_inode */
+ unsigned int flags;
+ unsigned seq;
+ int last_type;
+ unsigned depth;
+ char *saved_names[MAX_NESTED_LINKS + 1];
+};
+
+/*
+ * Type of the last component on LOOKUP_PARENT
+ */
+enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
diff --git a/fs/namei.c b/fs/namei.c
index 1fc02ff..2ea6608 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3629,6 +3629,18 @@ out:
return len;
}

+void nd_set_link(struct nameidata *nd, char *path)
+{
+ nd->saved_names[nd->depth] = path;
+}
+EXPORT_SYMBOL(nd_set_link);
+
+char *nd_get_link(struct nameidata *nd)
+{
+ return nd->saved_names[nd->depth];
+}
+EXPORT_SYMBOL(nd_get_link);
+
/*
* A helper for ->readlink(). This should be used *ONLY* for symlinks that
* have ->follow_link() touching nd only in nd_set_link(). Using (or not
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8eaa5ea..5453fdd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -90,6 +90,7 @@
#endif
#include <trace/events/oom.h>
#include "internal.h"
+#include "../internal.h"

/* NOTE:
* Implementing inode permission operations in /proc is almost
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 54dadda..5d92c0a 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -5,26 +5,7 @@
#include <linux/linkage.h>
#include <linux/path.h>

-struct vfsmount;
-
-enum { MAX_NESTED_LINKS = 8 };
-
-struct nameidata {
- struct path path;
- struct qstr last;
- struct path root;
- struct inode *inode; /* path.dentry.d_inode */
- unsigned int flags;
- unsigned seq;
- int last_type;
- unsigned depth;
- char *saved_names[MAX_NESTED_LINKS + 1];
-};
-
-/*
- * Type of the last component on LOOKUP_PARENT
- */
-enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
+struct nameidata;

/*
* The bitmask for a lookup event:
@@ -71,9 +52,6 @@ extern int kern_path_parent(const char *, struct nameidata *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);

-extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
- int (*open)(struct inode *, struct file *));
-
extern struct dentry *lookup_one_len(const char *, struct dentry *, int);

extern int follow_down_one(struct path *);
@@ -83,15 +61,8 @@ extern int follow_up(struct path *);
extern struct dentry *lock_rename(struct dentry *, struct dentry *);
extern void unlock_rename(struct dentry *, struct dentry *);

-static inline void nd_set_link(struct nameidata *nd, char *path)
-{
- nd->saved_names[nd->depth] = path;
-}
-
-static inline char *nd_get_link(struct nameidata *nd)
-{
- return nd->saved_names[nd->depth];
-}
+extern void nd_set_link(struct nameidata *nd, char *path);
+extern char *nd_get_link(struct nameidata *nd);

static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
{
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index e683869..0943897 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -32,6 +32,8 @@
#include <linux/security.h>
#include "audit.h"

+#include "../fs/internal.h"
+
/*
* Reference counting:
*

2012-06-14 08:08:16

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

On Wed, Jun 13, 2012 at 07:21:12AM -0400, Christoph Hellwig wrote:

> It also shows that were are really close to getting nameidata out of the
> filesystem. The remaning issues are kern_path_parent usages in devtmpfs
> and audit_watch, as well as direct access to nd->path in
> proc_pid_follow_link. A hacky patch to demonstrate this is below (not
> intended for submission).

Those are easily handled - kern_path_parent() ones are begging for
something like
int path_lookup_locked(char *name, struct path *path)
resulting in dentry/vfsmount pair stored in path, dentry possibly
negative and its parent known to have locked inode (i.e. path->dentry->d_parent
is stable until we unlock path->dentry->d_parent->d_inode->i_mutex).

And proc_pid_follow_link() is easier yet - explicit nd_jump_link(nd, path),
to be called by magical symlinks' ->follow_link().

Can do.. As for Miklos' objection re overlayfs - I'm tempted to make
path_openat() take struct file * as explicit argument, convert the
existing callers into path_openat(get_empty_filp(), ...) and let the
stacking ones use that.

My objection against opendata is that it's both an offense against Occam's
Razor (i.e. opaque object where none is needed) *and* not really opaque at
that - restrictions on the sequence of operations are non-trivial and
that has at least as high potential for bugs as bogus fput() done by broken fs.

2012-06-17 20:38:02

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

vfs.git#master fails xfstests 005 (Test symlinks & ELOOP) for me. I'll
try to get it bisected tomorrow, unless anyone gets to it earlier.

2012-06-18 11:58:10

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

On Sun, Jun 17, 2012 at 04:37:55PM -0400, Christoph Hellwig wrote:
> vfs.git#master fails xfstests 005 (Test symlinks & ELOOP) for me. I'll
> try to get it bisected tomorrow, unless anyone gets to it earlier.

this failure was caused by

"namei.c: let follow_link() do put_link() on failure"

and the reason was that we didn't do a proper path_put when failing
inside follow_link(). Fix that should be squashed in below. With this
xfstests 049 is still failing, I'll look into that next.


Index: linux-2.6/fs/namei.c
===================================================================
--- linux-2.6.orig/fs/namei.c 2012-06-18 12:17:08.084097978 +0200
+++ linux-2.6/fs/namei.c 2012-06-18 13:39:29.764224510 +0200
@@ -597,18 +591,19 @@ static inline void put_link(struct namei
static __always_inline int
follow_link(struct path *link, struct nameidata *nd, void **p)
{
- int error;
struct dentry *dentry = link->dentry;
+ int error;
+ char *s;

BUG_ON(nd->flags & LOOKUP_RCU);

if (link->mnt == nd->path.mnt)
mntget(link->mnt);

- if (unlikely(current->total_link_count >= 40)) {
- path_put(&nd->path);
- return -ELOOP;
- }
+ error = -ELOOP;
+ if (unlikely(current->total_link_count >= 40))
+ goto out_put_nd_path;
+
cond_resched();
current->total_link_count++;

@@ -616,31 +611,36 @@ follow_link(struct path *link, struct na
nd_set_link(nd, NULL);

error = security_inode_follow_link(link->dentry, nd);
- if (error) {
- path_put(&nd->path);
- return error;
- }
+ if (error)
+ goto out_put_nd_path;

nd->last_type = LAST_BIND;
*p = dentry->d_inode->i_op->follow_link(dentry, nd);
error = PTR_ERR(*p);
- if (!IS_ERR(*p)) {
- char *s = nd_get_link(nd);
- error = 0;
- if (s)
- error = __vfs_follow_link(nd, s);
- else if (nd->last_type == LAST_BIND) {
- nd->flags |= LOOKUP_JUMPED;
- nd->inode = nd->path.dentry->d_inode;
- if (nd->inode->i_op->follow_link) {
- /* stepped on a _really_ weird one */
- path_put(&nd->path);
- error = -ELOOP;
- }
+ if (IS_ERR(*p))
+ goto out_put_link;
+
+ error = 0;
+ s = nd_get_link(nd);
+ if (s) {
+ error = __vfs_follow_link(nd, s);
+ } else if (nd->last_type == LAST_BIND) {
+ nd->flags |= LOOKUP_JUMPED;
+ nd->inode = nd->path.dentry->d_inode;
+ if (nd->inode->i_op->follow_link) {
+ /* stepped on a _really_ weird one */
+ path_put(&nd->path);
+ error = -ELOOP;
}
- if (unlikely(error))
- put_link(nd, link, p);
}
+ if (unlikely(error))
+ put_link(nd, link, p);
+ return error;
+
+out_put_nd_path:
+ path_put(&nd->path);
+out_put_link:
+ path_put(link);
return error;
}

2012-06-18 13:12:57

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

And here is fix number two, to be folded into the same commit:


Index: linux-2.6/fs/namei.c
===================================================================
--- linux-2.6.orig/fs/namei.c 2012-06-18 15:01:47.540350941 +0200
+++ linux-2.6/fs/namei.c 2012-06-18 15:01:48.160350957 +0200
@@ -640,7 +640,8 @@ follow_link(struct path *link, struct na
}
}
if (unlikely(error))
- put_link(nd, link, p);
+ put_link(nd, link, *p);
+
return error;

out_put_nd_path:

2012-06-18 14:27:54

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

Here's fix against e8c30bb7 "->atomic_open() prototype change - pass int
* instead of bool *"

Rename FILE_CREATE to FILE_CREATED because it conflicts with a define in
fs/cifs/cifspdu.h

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/9p/vfs_inode.c | 2 +-
fs/9p/vfs_inode_dotl.c | 2 +-
fs/ceph/dir.c | 2 +-
fs/cifs/dir.c | 4 ++--
fs/fuse/dir.c | 2 +-
fs/namei.c | 14 +++++++-------
include/linux/fs.h | 2 +-
7 files changed, 14 insertions(+), 14 deletions(-)

Index: linux-2.6/fs/9p/vfs_inode.c
===================================================================
--- linux-2.6.orig/fs/9p/vfs_inode.c 2012-06-18 13:50:17.000000000 +0200
+++ linux-2.6/fs/9p/vfs_inode.c 2012-06-18 15:52:59.000000000 +0200
@@ -925,7 +925,7 @@ v9fs_vfs_atomic_open(struct inode *dir,
v9fs_cache_inode_set_cookie(dentry->d_inode, file);
#endif

- *opened |= FILE_CREATE;
+ *opened |= FILE_CREATED;
out:
dput(res);
return err;
Index: linux-2.6/fs/9p/vfs_inode_dotl.c
===================================================================
--- linux-2.6.orig/fs/9p/vfs_inode_dotl.c 2012-06-18 13:50:17.000000000 +0200
+++ linux-2.6/fs/9p/vfs_inode_dotl.c 2012-06-18 15:57:11.000000000 +0200
@@ -362,7 +362,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *
if (v9ses->cache)
v9fs_cache_inode_set_cookie(inode, file);
#endif
- *opened |= FILE_CREATE;
+ *opened |= FILE_CREATED;
out:
dput(res);
return err;
Index: linux-2.6/fs/ceph/dir.c
===================================================================
--- linux-2.6.orig/fs/ceph/dir.c 2012-06-18 13:50:17.000000000 +0200
+++ linux-2.6/fs/ceph/dir.c 2012-06-18 16:01:31.000000000 +0200
@@ -665,7 +665,7 @@ int ceph_atomic_open(struct inode *dir,
if (dentry->d_inode)
return finish_no_open(file, res);

- *opened |= FILE_CREATE;
+ *opened |= FILE_CREATED;
err = ceph_lookup_open(dir, dentry, file, flags, mode, opened);
dput(res);

Index: linux-2.6/fs/cifs/dir.c
===================================================================
--- linux-2.6.orig/fs/cifs/dir.c 2012-06-18 13:50:17.000000000 +0200
+++ linux-2.6/fs/cifs/dir.c 2012-06-18 16:03:39.000000000 +0200
@@ -311,7 +311,7 @@ static int cifs_do_create(struct inode *
.device = 0,
};

- *created |= FILE_CREATE;
+ *created |= FILE_CREATED;
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
args.uid = (__u64) current_fsuid();
if (inode->i_mode & S_ISGID)
@@ -466,7 +466,7 @@ int cifs_create(struct inode *inode, str
struct tcon_link *tlink;
__u16 fileHandle;
__u32 oplock;
- int created = FILE_CREATE;
+ int created = FILE_CREATED;

cFYI(1, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p",
inode, direntry->d_name.name, direntry);
Index: linux-2.6/fs/fuse/dir.c
===================================================================
--- linux-2.6.orig/fs/fuse/dir.c 2012-06-18 13:50:17.000000000 +0200
+++ linux-2.6/fs/fuse/dir.c 2012-06-18 16:04:46.000000000 +0200
@@ -490,7 +490,7 @@ static int fuse_atomic_open(struct inode
goto no_open;

/* Only creates */
- *opened |= FILE_CREATE;
+ *opened |= FILE_CREATED;

if (fc->no_create)
goto mknod;
Index: linux-2.6/fs/namei.c
===================================================================
--- linux-2.6.orig/fs/namei.c 2012-06-18 15:43:07.000000000 +0200
+++ linux-2.6/fs/namei.c 2012-06-18 16:05:27.000000000 +0200
@@ -2244,7 +2244,7 @@ static int atomic_open(struct nameidata

if (open_flag & O_EXCL) {
open_flag &= ~O_TRUNC;
- *opened |= FILE_CREATE;
+ *opened |= FILE_CREATED;
}

/*
@@ -2302,7 +2302,7 @@ static int atomic_open(struct nameidata
}

acc_mode = op->acc_mode;
- if (*opened & FILE_CREATE) {
+ if (*opened & FILE_CREATED) {
fsnotify_create(dir, dentry);
acc_mode = MAY_OPEN;
}
@@ -2374,7 +2374,7 @@ static int atomic_open(struct nameidata
*
* An error code is returned otherwise.
*
- * FILE_CREATE will be set in @*opened if the dentry was created and will be
+ * FILE_CREATED will be set in @*opened if the dentry was created and will be
* cleared otherwise prior to returning.
*/
static int lookup_open(struct nameidata *nd, struct path *path,
@@ -2388,7 +2388,7 @@ static int lookup_open(struct nameidata
int error;
bool need_lookup;

- *opened &= ~FILE_CREATE;
+ *opened &= ~FILE_CREATED;
dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
@@ -2426,7 +2426,7 @@ static int lookup_open(struct nameidata
if (error)
goto out_dput;
*want_write = true;
- *opened |= FILE_CREATE;
+ *opened |= FILE_CREATED;
error = security_path_mknod(&nd->path, dentry, mode, 0);
if (error)
goto out_dput;
@@ -2532,7 +2532,7 @@ static int do_last(struct nameidata *nd,
if (error)
goto out;

- if ((*opened & FILE_CREATE) ||
+ if ((*opened & FILE_CREATED) ||
!S_ISREG(file->f_path.dentry->d_inode->i_mode))
will_truncate = false;

@@ -2540,7 +2540,7 @@ static int do_last(struct nameidata *nd,
goto opened;
}

- if (*opened & FILE_CREATE) {
+ if (*opened & FILE_CREATED) {
/* Don't check for write permission, don't truncate */
open_flag &= ~O_TRUNC;
will_truncate = false;
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h 2012-06-18 13:50:17.000000000 +0200
+++ linux-2.6/include/linux/fs.h 2012-06-18 16:05:37.000000000 +0200
@@ -2065,7 +2065,7 @@ extern struct file * dentry_open(struct
extern int filp_close(struct file *, fl_owner_t id);
extern char * getname(const char __user *);
enum {
- FILE_CREATE = 1,
+ FILE_CREATED = 1,
FILE_OPENED = 2
};
extern int finish_open(struct file *file, struct dentry *dentry,

2012-06-22 08:50:13

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

On Mon, Jun 18, 2012 at 04:27:41PM +0200, Miklos Szeredi wrote:
> Here's fix against e8c30bb7 "->atomic_open() prototype change - pass int
> * instead of bool *"
>
> Rename FILE_CREATE to FILE_CREATED because it conflicts with a define in
> fs/cifs/cifspdu.h

... and that leads to actual breakage in e.g. cifs_do_create(), since the macro
from cifspdu.h wins and we end up setting the wrong bit there. Folded.

2012-06-22 10:07:52

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 00/21] vfs: atomic open v6 (part 2)

On Mon, Jun 18, 2012 at 09:12:52AM -0400, Christoph Hellwig wrote:
> And here is fix number two, to be folded into the same commit:

Both folded. FWIW, I'm not sure I'm happy about the reorg of failure
exits in follow_link() - gcc can't figure out that follow_link(..., &cookie)
can't return 0 without having passed through the assignment to cookie.
Hell knows; I still don't like the calling conventions in that one...