Usage of the AUTOFS_TYPE_* defines is a little confusing and
appears inconsistent.
Signed-off-by: Ian Kent <[email protected]>
---
fs/autofs4/autofs_i.h | 6 ++----
fs/autofs4/expire.c | 2 +-
fs/autofs4/inode.c | 6 +++---
fs/autofs4/waitq.c | 8 ++++----
include/linux/auto_fs4.h | 5 +++++
5 files changed, 15 insertions(+), 12 deletions(-)
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 69a2f5c..ea024d8 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -21,6 +21,8 @@
#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
#define AUTOFS_IOC_COUNT 32
+#define AUTOFS_TYPE_TRIGGER (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET)
+
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/time.h>
@@ -92,10 +94,6 @@ struct autofs_wait_queue {
#define AUTOFS_SBI_MAGIC 0x6d4a556d
-#define AUTOFS_TYPE_INDIRECT 0x0001
-#define AUTOFS_TYPE_DIRECT 0x0002
-#define AUTOFS_TYPE_OFFSET 0x0004
-
struct autofs_sb_info {
u32 magic;
int pipefd;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index cdabb79..e79dd09 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -479,7 +479,7 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
if (arg && get_user(do_now, arg))
return -EFAULT;
- if (sbi->type & AUTOFS_TYPE_DIRECT)
+ if (sbi->type & AUTOFS_TYPE_TRIGGER)
dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
else
dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 7bb3e5b..9ca2d07 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -288,7 +288,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
*type = AUTOFS_TYPE_DIRECT;
break;
case Opt_offset:
- *type = AUTOFS_TYPE_DIRECT | AUTOFS_TYPE_OFFSET;
+ *type = AUTOFS_TYPE_OFFSET;
break;
default:
return 1;
@@ -336,7 +336,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
sbi->sb = s;
sbi->version = 0;
sbi->sub_version = 0;
- sbi->type = 0;
+ sbi->type = AUTOFS_TYPE_INDIRECT;
sbi->min_proto = 0;
sbi->max_proto = 0;
mutex_init(&sbi->wq_mutex);
@@ -378,7 +378,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
}
root_inode->i_fop = &autofs4_root_operations;
- root_inode->i_op = sbi->type & AUTOFS_TYPE_DIRECT ?
+ root_inode->i_op = sbi->type & AUTOFS_TYPE_TRIGGER ?
&autofs4_direct_root_inode_operations :
&autofs4_indirect_root_inode_operations;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 35216d1..6d87bb1 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -337,7 +337,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
* is very similar for indirect mounts except only dentrys
* in the root of the autofs file system may be negative.
*/
- if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET))
+ if (sbi->type & AUTOFS_TYPE_TRIGGER)
return -ENOENT;
else if (!IS_ROOT(dentry->d_parent))
return -ENOENT;
@@ -348,7 +348,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
return -ENOMEM;
/* If this is a direct mount request create a dummy name */
- if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
+ if (IS_ROOT(dentry) && sbi->type & AUTOFS_TYPE_TRIGGER)
qstr.len = sprintf(name, "%p", dentry);
else {
qstr.len = autofs4_getpath(sbi, dentry, &name);
@@ -406,11 +406,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
type = autofs_ptype_expire_multi;
} else {
if (notify == NFY_MOUNT)
- type = (sbi->type & AUTOFS_TYPE_DIRECT) ?
+ type = (sbi->type & AUTOFS_TYPE_TRIGGER) ?
autofs_ptype_missing_direct :
autofs_ptype_missing_indirect;
else
- type = (sbi->type & AUTOFS_TYPE_DIRECT) ?
+ type = (sbi->type & AUTOFS_TYPE_TRIGGER) ?
autofs_ptype_expire_direct :
autofs_ptype_expire_indirect;
}
diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index b785c6f..aa96a04 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -29,6 +29,11 @@
#define AUTOFS_EXP_IMMEDIATE 1
#define AUTOFS_EXP_LEAVES 2
+#define AUTOFS_TYPE_ANY 0x0000
+#define AUTOFS_TYPE_INDIRECT 0x0001
+#define AUTOFS_TYPE_DIRECT 0x0002
+#define AUTOFS_TYPE_OFFSET 0x0004
+
/* Daemon notification packet types */
enum autofs_notify {
NFY_NONE,
Patch to track the uid and gid of the last process to request a mount
for on an autofs dentry.
Signed-off-by: Ian Kent <[email protected]>
---
fs/autofs4/autofs_i.h | 3 +++
fs/autofs4/inode.c | 2 ++
fs/autofs4/waitq.c | 34 ++++++++++++++++++++++++++++++++++
3 files changed, 39 insertions(+), 0 deletions(-)
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index ea024d8..fa76d18 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -63,6 +63,9 @@ struct autofs_info {
unsigned long last_used;
atomic_t count;
+ uid_t uid;
+ gid_t gid;
+
mode_t mode;
size_t size;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 9ca2d07..9408507 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -53,6 +53,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
atomic_set(&ino->count, 0);
}
+ ino->uid = 0;
+ ino->gid = 0;
ino->mode = mode;
ino->last_used = jiffies;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 6d87bb1..7c60c0b 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -457,6 +457,40 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
status = wq->status;
+ /*
+ * For direct and offset mounts we need to track the requestrer
+ * uid and gid in the dentry info struct. This is so it can be
+ * supplied, on request, by the misc device ioctl interface.
+ * This is needed during daemon resatart when reconnecting
+ * to existing, active, autofs mounts. The uid and gid (and
+ * related string values) may be used for macro substitution
+ * in autofs mount maps.
+ */
+ if (!status) {
+ struct autofs_info *ino;
+ struct dentry *de = NULL;
+
+ /* direct mount or browsable map */
+ ino = autofs4_dentry_ino(dentry);
+ if (!ino) {
+ /* If not lookup actual dentry used */
+ de = d_lookup(dentry->d_parent, &dentry->d_name);
+ if (de)
+ ino = autofs4_dentry_ino(de);
+ }
+
+ /* Set mount requester */
+ if (ino) {
+ spin_lock(&sbi->fs_lock);
+ ino->uid = wq->uid;
+ ino->gid = wq->gid;
+ spin_unlock(&sbi->fs_lock);
+ }
+
+ if (de)
+ dput(de);
+ }
+
/* Are we the last process to need status? */
mutex_lock(&sbi->wq_mutex);
if (!--wq->wait_ctr)
Patch to add documentation for the miscellaneous device module of
autofs4.
Signed-off-by: Ian Kent <[email protected]>
---
.../filesystems/autofs4-mount-control.txt | 393 ++++++++++++++++++++
1 files changed, 393 insertions(+), 0 deletions(-)
create mode 100644 Documentation/filesystems/autofs4-mount-control.txt
diff --git a/Documentation/filesystems/autofs4-mount-control.txt b/Documentation/filesystems/autofs4-mount-control.txt
new file mode 100644
index 0000000..ab8c78d
--- /dev/null
+++ b/Documentation/filesystems/autofs4-mount-control.txt
@@ -0,0 +1,393 @@
+
+Miscellaneous Device control operations for the autofs4 kernel module
+====================================================================
+
+The problem
+===========
+
+There is a problem with active restarts in autofs (that is to say
+restarting autofs when there are busy mounts).
+
+During normal operation autofs uses a file descriptor opened on the
+directory that is being managed in order to be able to issue control
+operations. Using a file descriptor gives ioctl operations access to
+autofs specific information stored in the super block. The operations
+are things such as setting an autofs mount catatonic, setting the
+expire timeout and requesting expire checks. As is explained below,
+certain types of autofs triggered mounts can end up covering an autofs
+mount itself which prevents us being able to use open(2) to obtain a
+file descriptor for these operations if we don't already have one open.
+
+Currently autofs uses "umount -l" (lazy umount) to clear active mounts
+at restart. While using lazy umount works for most cases, anything that
+needs to walk back up the mount tree to construct a path, such as
+getcwd(2) and the proc file system /proc/<pid>/cwd, no longer works
+because the point from which the path is constructed has been detached
+from the mount tree.
+
+The actual problem with autofs is that it can't reconnect to existing
+mounts. Immediately one thinks of just adding the ability to remount
+autofs file systems would solve it, but alas, that can't work. This is
+because autofs direct mounts and the implementation of "on demand mount
+and expire" of nested mount trees have the file system mounted directly
+on top of the mount trigger directory dentry.
+
+For example, there are two types of automount maps, direct (in the kernel
+module source you will see a third type called an offset, which is just
+a direct mount in disguise) and indirect.
+
+Here is a master map with direct and indirect map entries:
+
+/- /etc/auto.direct
+/test /etc/auto.indirect
+
+and the corresponding map files:
+
+/etc/auto.direct:
+
+/automount/dparse/g6 budgie:/autofs/export1
+/automount/dparse/g1 shark:/autofs/export1
+and so on.
+
+/etc/auto.indirect:
+
+g1 shark:/autofs/export1
+g6 budgie:/autofs/export1
+and so on.
+
+For the above indirect map an autofs file system is mounted on /test and
+mounts are triggered for each sub-directory key by the inode lookup
+operation. So we see a mount of shark:/autofs/export1 on /test/g1, for
+example.
+
+The way that direct mounts are handled is by making an autofs mount on
+each full path, such as /automount/dparse/g1, and using it as a mount
+trigger. So when we walk on the path we mount shark:/autofs/export1 "on
+top of this mount point". Since these are always directories we can
+use the follow_link inode operation to trigger the mount.
+
+But, each entry in direct and indirect maps can have offsets (making
+them multi-mount map entries).
+
+For example, an indirect mount map entry could also be:
+
+g1 \
+ / shark:/autofs/export5/testing/test \
+ /s1 shark:/autofs/export/testing/test/s1 \
+ /s2 shark:/autofs/export5/testing/test/s2 \
+ /s1/ss1 shark:/autofs/export1 \
+ /s2/ss2 shark:/autofs/export2
+
+and a similarly a direct mount map entry could also be:
+
+/automount/dparse/g1 \
+ / shark:/autofs/export5/testing/test \
+ /s1 shark:/autofs/export/testing/test/s1 \
+ /s2 shark:/autofs/export5/testing/test/s2 \
+ /s1/ss1 shark:/autofs/export2 \
+ /s2/ss2 shark:/autofs/export2
+
+One of the issues with version 4 of autofs was that, when mounting an
+entry with a large number of offsets, possibly with nesting, we needed
+to mount and umount all of the offsets as a single unit. Not really a
+problem, except for people with a large number of offsets in map entries.
+This mechanism is used for the well known "hosts" map and we have seen
+cases (in 2.4) where the available number of mounts are exhausted or
+where the number of privileged ports available is exhausted.
+
+In version 5 we mount only as we go down the tree of offsets and
+similarly for expiring them which resolves the above problem. There is
+somewhat more detail to the implementation but it isn't needed for the
+sake of the problem explanation. The one important detail is that these
+offsets are implemented using the same mechanism as the direct mounts
+above and so the mount points can be covered by a mount.
+
+The current autofs implementation uses an ioctl file descriptor opened
+on the mount point for control operations. The references held by the
+descriptor are accounted for in checks made to determine if a mount is
+in use and is also used to access autofs file system information held
+in the mount super block. So the use of a file handle needs to be
+retained.
+
+
+The Solution
+============
+
+To be able to restart autofs leaving existing direct, indirect and
+offset mounts in place we need to be able to obtain a file handle
+for these potentially covered autofs mount points. Rather than just
+implement an isolated operation it was decided to re-implement the
+existing ioctl interface and add new operations to provide this
+functionality.
+
+In addition, to be able to reconstruct a mount tree that has busy mounts,
+the uid and gid of the last user that triggered the mount needs to be
+available because these can be used as macro substitution variables in
+autofs maps. They are recorded at mount request time and an operation
+has been added to retrieve them.
+
+Since we're re-implementing the control interface, a couple of other
+problems with the existing interface have been addressed. First, when
+a mount or expire operation completes a status is returned to the
+kernel by either a "send ready" or a "send fail" operation. The
+"send fail" operation of the ioctl interface could only ever send
+ENOENT so the re-implementation allows user space to send an actual
+status. Another expensive operation in user space, for those using
+very large maps, is discovering if a mount is present. Usually this
+involves scanning /proc/mounts and since it needs to be done quite
+often it can introduce significant overhead when there are many entries
+in the mount table. An operation to lookup the mount status of a mount
+point dentry (covered or not) has also been added.
+
+Current kernel development policy recommends avoiding the use of the
+ioctl mechanism in favor of systems such as Netlink. An implementation
+using this system was attempted to evaluate its suitability and it was
+found to be inadequate, in this case. The Generic Netlink system was
+used for this as raw Netlink would lead to a significant increase in
+complexity. There's no question that the Generic Netlink system is an
+elegant solution for common case ioctl functions but it's not a complete
+replacement probably because it's primary purpose in life is to be a
+message bus implementation rather than specifically an ioctl replacement.
+While it would be possible to work around this there is one concern
+that lead to the decision to not use it. This is that the autofs
+expire in the daemon has become far to complex because umount
+candidates are enumerated, almost for no other reason than to "count"
+the number of times to call the expire ioctl. This involves scanning
+the mount table which has proved to be a big overhead for users with
+large maps. The best way to improve this is try and get back to the
+way the expire was done long ago. That is, when an expire request is
+issued for a mount (file handle) we should continually call back to
+the daemon until we can't umount any more mounts, then return the
+appropriate status to the daemon. At the moment we just expire one
+mount at a time. A Generic Netlink implementation would exclude this
+possibility for future development due to the requirements of the
+message bus architecture.
+
+
+autofs4 Miscellaneous Device mount control interface
+====================================================
+
+The control interface is opening a device node, typically /dev/autofs.
+
+All the ioctls use a common structure to pass the needed parameter
+information and return operation results:
+
+struct autofs_dev_ioctl {
+ __u32 ver_major;
+ __u32 ver_minor;
+ __u32 size; /* total size of data passed in
+ * including this struct */
+ __s32 ioctlfd; /* automount command fd */
+
+ __u32 arg1; /* Command parameters */
+ __u32 arg2;
+
+ char path[0];
+};
+
+The ioctlfd field is a mount point file descriptor of an autofs mount
+point. It is returned by the open call and is used by all calls except
+the check for whether a given path is a mount point, where it may
+optionally be used to check a specific mount corresponding to a given
+mount point file descriptor, and when requesting the uid and gid of the
+last successful mount on a directory within the autofs file system.
+
+The fields arg1 and arg2 are used to communicate parameters and results of
+calls made as described below.
+
+The path field is used to pass a path where it is needed and the size field
+is used account for the increased structure length when translating the
+structure sent from user space.
+
+This structure can be initialized before setting specific fields by using
+the void function call init_autofs_dev_ioctl(struct autofs_dev_ioctl *).
+
+All of the ioctls perform a copy of this structure from user space to
+kernel space and return -EINVAL if the size parameter is smaller than
+the structure size itself, -ENOMEM if the kernel memory allocation fails
+or -EFAULT if the copy itself fails. Other checks include a version check
+of the compiled in user space version against the module version and a
+mismatch results in a -EINVAL return. If the size field is greater than
+the structure size then a path is assumed to be present and is checked to
+ensure it begins with a "/" and is NULL terminated, otherwise -EINVAL is
+returned. Following these checks, for all ioctl commands except
+AUTOFS_DEV_IOCTL_VERSION_CMD, AUTOFS_DEV_IOCTL_OPENMOUNT_CMD and
+AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD the ioctlfd is validated and if it is
+not a valid descriptor or doesn't correspond to an autofs mount point
+an error of -EBADF, -ENOTTY or -EINVAL (not an autofs descriptor) is
+returned.
+
+
+The ioctls
+==========
+
+An example of an implementation which uses this interface can be seen
+in autofs version 5.0.4 and later in file lib/dev-ioctl-lib.c of the
+distribution tar available for download from kernel.org in directory
+/pub/linux/daemons/autofs/v5.
+
+The device node ioctl operations implemented by this interface are:
+
+
+AUTOFS_DEV_IOCTL_VERSION
+------------------------
+
+Get the major and minor version of the autofs4 device ioctl kernel module
+implementation. It requires an initialized struct autofs_dev_ioctl as an
+input parameter and sets the version information in the passed in structure.
+It returns 0 on success or the error -EINVAL if a version mismatch is
+detected.
+
+
+AUTOFS_DEV_IOCTL_PROTOVER_CMD and AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD
+------------------------------------------------------------------
+
+Get the major and minor version of the autofs4 protocol version understood
+by loaded module. This call requires an initialized struct autofs_dev_ioctl
+with the ioctlfd field set to a valid autofs mount point descriptor
+and sets the requested version number in structure field arg1. These
+commands return 0 on success or one of the negative error codes if
+validation fails.
+
+
+AUTOFS_DEV_IOCTL_OPENMOUNT and AUTOFS_DEV_IOCTL_CLOSEMOUNT
+----------------------------------------------------------
+
+Obtain and release a file descriptor for an autofs managed mount point
+path. The open call requires an initialized struct autofs_dev_ioctl with
+the the path field set and the size field adjusted appropriately as well
+as the arg1 field set to the device number of the autofs mount. The
+device number can be obtained from the mount options shown in
+/proc/mounts. The close call requires an initialized struct
+autofs_dev_ioct with the ioctlfd field set to the descriptor obtained
+from the open call. The release of the file descriptor can also be done
+with close(2) so any open descriptors will also be closed at process exit.
+The close call is included in the implemented operations largely for
+completeness and to provide for a consistent user space implementation.
+
+
+AUTOFS_DEV_IOCTL_READY_CMD and AUTOFS_DEV_IOCTL_FAIL_CMD
+--------------------------------------------------------
+
+Return mount and expire result status from user space to the kernel.
+Both of these calls require an initialized struct autofs_dev_ioctl
+with the ioctlfd field set to the descriptor obtained from the open
+call and the arg1 field set to the wait queue token number, received
+by user space in the foregoing mount or expire request. The arg2 field
+is set to the status to be returned. For the ready call this is always
+0 and for the fail call it is set to the errno of the operation.
+
+
+AUTOFS_DEV_IOCTL_SETPIPEFD_CMD
+------------------------------
+
+Set the pipe file descriptor used for kernel communication to the daemon.
+Normally this is set at mount time using an option but when reconnecting
+to a existing mount we need to use this to tell the autofs mount about
+the new kernel pipe descriptor. In order to protect mounts against
+incorrectly setting the pipe descriptor we also require that the autofs
+mount be catatonic (see next call).
+
+The call requires an initialized struct autofs_dev_ioctl with the
+ioctlfd field set to the descriptor obtained from the open call and
+the arg1 field set to descriptor of the pipe. On success the call
+also sets the process group id used to identify the controlling process
+(eg. the owning automount(8) daemon) to the process group of the caller.
+
+
+AUTOFS_DEV_IOCTL_CATATONIC_CMD
+------------------------------
+
+Make the autofs mount point catatonic. The autofs mount will no longer
+issue mount requests, the kernel communication pipe descriptor is released
+and any remaining waits in the queue released.
+
+The call requires an initialized struct autofs_dev_ioctl with the
+ioctlfd field set to the descriptor obtained from the open call.
+
+
+AUTOFS_DEV_IOCTL_TIMEOUT_CMD
+----------------------------
+
+Set the expire timeout for mounts withing an autofs mount point.
+
+The call requires an initialized struct autofs_dev_ioctl with the
+ioctlfd field set to the descriptor obtained from the open call.
+
+
+AUTOFS_DEV_IOCTL_REQUESTER_CMD
+------------------------------
+
+Return the uid and gid of the last process to successfully trigger a the
+mount on the given path dentry.
+
+The call requires an initialized struct autofs_dev_ioctl with the path
+field set to the mount point in question and the size field adjusted
+appropriately as well as the arg1 field set to the device number of the
+containing autofs mount. Upon return the struct field arg1 contains the
+uid and arg2 the gid.
+
+When reconstructing an autofs mount tree with active mounts we need to
+re-connect to mounts that may have used the original process uid and
+gid (or string variations of them) for mount lookups within the map entry.
+This call provides the ability to obtain this uid and gid so they may be
+used by user space for the mount map lookups.
+
+
+AUTOFS_DEV_IOCTL_EXPIRE_CMD
+---------------------------
+
+Issue an expire request to the kernel for an autofs mount. Typically
+this ioctl is called until no further expire candidates are found.
+
+The call requires an initialized struct autofs_dev_ioctl with the
+ioctlfd field set to the descriptor obtained from the open call. In
+addition an immediate expire, independent of the mount timeout, can be
+requested by setting the arg1 field to 1. If no expire candidates can
+be found the ioctl returns -1 with errno set to EAGAIN.
+
+This call causes the kernel module to check the mount corresponding
+to the given ioctlfd for mounts that can be expired, issues an expire
+request back to the daemon and waits for completion.
+
+AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD
+------------------------------
+
+Checks if an autofs mount point is in use.
+
+The call requires an initialized struct autofs_dev_ioctl with the
+ioctlfd field set to the descriptor obtained from the open call and
+it returns the result in the arg1 field, 1 for busy and 0 otherwise.
+
+
+AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD
+---------------------------------
+
+Check if the given path is a mountpoint.
+
+The call requires an initialized struct autofs_dev_ioctl. There are two
+possible variations. Both use the path field set to the path of the mount
+point to check and the size field adjusted appropriately. One uses the
+ioctlfd field to identify a specific mount point to check while the other
+variation uses the path and optionaly arg1 set to an autofs mount type.
+The call returns 1 if this is a mount point and sets arg1 to the device
+number of the mount and field arg2 to the relevant super block magic
+number (described below) or 0 if it isn't a mountpoint. In both cases
+the the device number (as returned by new_encode_dev()) is returned
+in field arg1.
+
+If supplied with a file descriptor we're looking for a specific mount,
+not necessarily at the top of the mounted stack. In this case the path
+the descriptor corresponds to is considered a mountpoint if it is itself
+a mountpoint or contains a mount, such as a multi-mount without a root
+mount. In this case we return 1 if the descriptor corresponds to a mount
+point and and also returns the super magic of the covering mount if there
+is one or 0 if it isn't a mountpoint.
+
+If a path is supplied (and the ioctlfd field is set to -1) then the path
+is looked up and is checked to see if it is the root of a mount. If a
+type is also given we are looking for a particular autofs mount and if
+a match isn't found a fail is returned. If the the located path is the
+root of a mount 1 is returned along with the super magic of the mount
+or 0 otherwise.
+
Patch to add a miscellaneous device to the autofs4 module for routing
ioctls. This provides the ability to obtain an ioctl file handle for
an autofs mount point that is possibly covered by another mount.
The actual problem with autofs is that it can't reconnect to existing
mounts. Immediately one things of just adding the ability to remount
autofs file systems would solve it, but alas, that can't work. This is
because autofs direct mounts and the implementation of "on demand mount
and expire" of nested mount trees have the file system mounted on top of
the mount trigger dentry.
To resolve this a miscellaneous device node for routing ioctl commands
to these mount points has been implemented in the autofs4 kernel module
and a library added to autofs. This provides the ability to open a file
descriptor for these over mounted autofs mount points.
Please refer to Documentation/filesystems/autofs4-mount-control.txt for
a discussion of the problem, implementation alternatives considered and
a description of the interface.
Signed-off-by: Ian Kent <[email protected]>
---
fs/autofs4/Makefile | 2
fs/autofs4/autofs_i.h | 35 ++
fs/autofs4/dev-ioctl.c | 862 ++++++++++++++++++++++++++++++++++++++++
fs/autofs4/expire.c | 16 -
fs/autofs4/init.c | 11 -
include/linux/auto_dev-ioctl.h | 157 +++++++
include/linux/auto_fs4.h | 2
7 files changed, 1072 insertions(+), 13 deletions(-)
create mode 100644 fs/autofs4/dev-ioctl.c
create mode 100644 include/linux/auto_dev-ioctl.h
diff --git a/fs/autofs4/Makefile b/fs/autofs4/Makefile
index f2c3b79..a811c1f 100644
--- a/fs/autofs4/Makefile
+++ b/fs/autofs4/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_AUTOFS4_FS) += autofs4.o
-autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o
+autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index fa76d18..e0f16da 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -14,6 +14,7 @@
/* Internal header file for autofs */
#include <linux/auto_fs4.h>
+#include <linux/auto_dev-ioctl.h>
#include <linux/mutex.h>
#include <linux/list.h>
@@ -21,6 +22,9 @@
#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
#define AUTOFS_IOC_COUNT 32
+#define AUTOFS_DEV_IOCTL_IOC_FIRST (AUTOFS_DEV_IOCTL_VERSION)
+#define AUTOFS_DEV_IOCTL_IOC_COUNT (AUTOFS_IOC_COUNT - 11)
+
#define AUTOFS_TYPE_TRIGGER (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET)
#include <linux/kernel.h>
@@ -37,11 +41,27 @@
/* #define DEBUG */
#ifdef DEBUG
-#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0)
+#define DPRINTK(fmt, args...) \
+do { \
+ printk(KERN_DEBUG "pid %d: %s: " fmt "\n", \
+ current->pid, __func__, ##args); \
+} while (0)
#else
-#define DPRINTK(fmt,args...) do {} while(0)
+#define DPRINTK(fmt, args...) do {} while (0)
#endif
+#define AUTOFS_WARN(fmt, args...) \
+do { \
+ printk(KERN_WARNING "pid %d: %s: " fmt "\n", \
+ current->pid, __func__, ##args); \
+} while (0)
+
+#define AUTOFS_ERROR(fmt, args...) \
+do { \
+ printk(KERN_ERR "pid %d: %s: " fmt "\n", \
+ current->pid, __func__, ##args); \
+} while (0)
+
/* Unified info structure. This is pointed to by both the dentry and
inode structures. Each file in the filesystem has an instance of this
structure. It holds a reference to the dentry, so dentries are never
@@ -170,6 +190,17 @@ int autofs4_expire_run(struct super_block *, struct vfsmount *,
struct autofs_packet_expire __user *);
int autofs4_expire_multi(struct super_block *, struct vfsmount *,
struct autofs_sb_info *, int __user *);
+struct dentry *autofs4_expire_direct(struct super_block *sb,
+ struct vfsmount *mnt,
+ struct autofs_sb_info *sbi, int how);
+struct dentry *autofs4_expire_indirect(struct super_block *sb,
+ struct vfsmount *mnt,
+ struct autofs_sb_info *sbi, int how);
+
+/* Device node initialization */
+
+int autofs_dev_ioctl_init(void);
+void autofs_dev_ioctl_exit(void);
/* Operations structures */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
new file mode 100644
index 0000000..4b201d9
--- /dev/null
+++ b/fs/autofs4/dev-ioctl.c
@@ -0,0 +1,862 @@
+/*
+ * Copyright 2008 Red Hat, Inc. All rights reserved.
+ * Copyright 2008 Ian Kent <[email protected]>
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ */
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/namei.h>
+#include <linux/fcntl.h>
+#include <linux/file.h>
+#include <linux/sched.h>
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <linux/smp_lock.h>
+#include <linux/magic.h>
+#include <linux/dcache.h>
+#include <linux/uaccess.h>
+
+#include "autofs_i.h"
+
+/*
+ * This module implements an interface for routing autofs ioctl control
+ * commands via a miscellaneous device file.
+ *
+ * The alternate interface is needed because we need to be able open
+ * an ioctl file descriptor on an autofs mount that may be covered by
+ * another mount. This situation arises when starting automount(8)
+ * or other user space daemon which uses direct mounts or offset
+ * mounts (used for autofs lazy mount/umount of nested mount trees),
+ * which have been left busy at at service shutdown.
+ */
+
+#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
+
+typedef int (*ioctl_fn)(struct file *,
+struct autofs_sb_info *, struct autofs_dev_ioctl *);
+
+static int check_name(const char *name)
+{
+ if (!strchr(name, '/'))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Check a string doesn't overrun the chunk of
+ * memory we copied from user land.
+ */
+static int invalid_str(char *str, void *end)
+{
+ while ((void *) str <= end)
+ if (!*str++)
+ return 0;
+ return -EINVAL;
+}
+
+/*
+ * Check that the user compiled against correct version of autofs
+ * misc device code.
+ *
+ * As well as checking the version compatibility this always copies
+ * the kernel interface version out.
+ */
+static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
+{
+ int err = 0;
+
+ if ((AUTOFS_DEV_IOCTL_VERSION_MAJOR != param->ver_major) ||
+ (AUTOFS_DEV_IOCTL_VERSION_MINOR < param->ver_minor)) {
+ AUTOFS_WARN("ioctl control interface version mismatch: "
+ "kernel(%u.%u), user(%u.%u), cmd(%d)",
+ AUTOFS_DEV_IOCTL_VERSION_MAJOR,
+ AUTOFS_DEV_IOCTL_VERSION_MINOR,
+ param->ver_major, param->ver_minor, cmd);
+ err = -EINVAL;
+ }
+
+ /* Fill in the kernel version. */
+ param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
+ param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
+
+ return err;
+}
+
+/*
+ * Copy parameter control struct, including a possible path allocated
+ * at the end of the struct.
+ */
+static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
+{
+ struct autofs_dev_ioctl tmp, *ads;
+
+ if (copy_from_user(&tmp, in, sizeof(tmp)))
+ return ERR_PTR(-EFAULT);
+
+ if (tmp.size < sizeof(tmp))
+ return ERR_PTR(-EINVAL);
+
+ ads = kmalloc(tmp.size, GFP_KERNEL);
+ if (!ads)
+ return ERR_PTR(-ENOMEM);
+
+ if (copy_from_user(ads, in, tmp.size)) {
+ kfree(ads);
+ return ERR_PTR(-EFAULT);
+ }
+
+ return ads;
+}
+
+static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
+{
+ kfree(param);
+ return;
+}
+
+/*
+ * Check sanity of parameter control fields and if a path is present
+ * check that it has a "/" and is terminated.
+ */
+static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
+{
+ int err = -EINVAL;
+
+ if (check_dev_ioctl_version(cmd, param)) {
+ AUTOFS_WARN("invalid device control module version "
+ "supplied for cmd(0x%08x)", cmd);
+ goto out;
+ }
+
+ if (param->size > sizeof(*param)) {
+ err = check_name(param->path);
+ if (err) {
+ AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
+ cmd);
+ goto out;
+ }
+
+ err = invalid_str(param->path,
+ (void *) ((size_t) param + param->size));
+ if (err) {
+ AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
+ cmd);
+ goto out;
+ }
+ }
+
+ err = 0;
+out:
+ return err;
+}
+
+/*
+ * Get the autofs super block info struct from the file opened on
+ * the autofs mount point.
+ */
+static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f)
+{
+ struct autofs_sb_info *sbi = NULL;
+ struct inode *inode;
+
+ if (f) {
+ inode = f->f_path.dentry->d_inode;
+ sbi = autofs4_sbi(inode->i_sb);
+ }
+ return sbi;
+}
+
+/* Return autofs module protocol version */
+static int autofs_dev_ioctl_protover(struct file *fp,
+ struct autofs_sb_info *sbi,
+ struct autofs_dev_ioctl *param)
+{
+ param->arg1 = sbi->version;
+ return 0;
+}
+
+/* Return autofs module protocol sub version */
+static int autofs_dev_ioctl_protosubver(struct file *fp,
+ struct autofs_sb_info *sbi,
+ struct autofs_dev_ioctl *param)
+{
+ param->arg1 = sbi->sub_version;
+ return 0;
+}
+
+/*
+ * Walk down the mount stack looking for an autofs mount that
+ * has the requested device number (aka. new_encode_dev(sb->s_dev).
+ */
+static int autofs_dev_ioctl_find_super(struct nameidata *nd, dev_t devno)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+ struct super_block *sb;
+ dev_t s_dev;
+ unsigned int err;
+
+ err = -ENOENT;
+
+ /* Lookup the dentry name at the base of our mount point */
+ dentry = d_lookup(nd->path.dentry, &nd->last);
+ if (!dentry)
+ goto out;
+
+ dput(nd->path.dentry);
+ nd->path.dentry = dentry;
+
+ /* And follow the mount stack looking for our autofs mount */
+ while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
+ inode = nd->path.dentry->d_inode;
+ if (!inode)
+ break;
+
+ sb = inode->i_sb;
+ s_dev = new_encode_dev(sb->s_dev);
+ if (devno == s_dev) {
+ if (sb->s_magic == AUTOFS_SUPER_MAGIC) {
+ err = 0;
+ break;
+ }
+ }
+ }
+out:
+ return err;
+}
+
+/*
+ * Walk down the mount stack looking for an autofs mount that
+ * has the requested mount type (ie. indirect, direct or offset).
+ */
+static int autofs_dev_ioctl_find_sbi_type(struct nameidata *nd, unsigned int type)
+{
+ struct dentry *dentry;
+ struct autofs_info *ino;
+ unsigned int err;
+
+ err = -ENOENT;
+
+ /* Lookup the dentry name at the base of our mount point */
+ dentry = d_lookup(nd->path.dentry, &nd->last);
+ if (!dentry)
+ goto out;
+
+ dput(nd->path.dentry);
+ nd->path.dentry = dentry;
+
+ /* And follow the mount stack looking for our autofs mount */
+ while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
+ ino = autofs4_dentry_ino(nd->path.dentry);
+ if (ino && ino->sbi->type & type) {
+ err = 0;
+ break;
+ }
+ }
+out:
+ return err;
+}
+
+static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
+{
+ struct files_struct *files = current->files;
+ struct fdtable *fdt;
+
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ BUG_ON(fdt->fd[fd] != NULL);
+ rcu_assign_pointer(fdt->fd[fd], file);
+ FD_SET(fd, fdt->close_on_exec);
+ spin_unlock(&files->file_lock);
+}
+
+
+/*
+ * Open a file descriptor on the autofs mount point corresponding
+ * to the given path and device number (aka. new_encode_dev(sb->s_dev)).
+ */
+static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid)
+{
+ struct file *filp;
+ struct nameidata nd;
+ int err, fd;
+
+ fd = get_unused_fd();
+ if (likely(fd >= 0)) {
+ /* Get nameidata of the parent directory */
+ err = path_lookup(path, LOOKUP_PARENT, &nd);
+ if (err)
+ goto out;
+
+ /*
+ * Search down, within the parent, looking for an
+ * autofs super block that has the device number
+ * corresponding to the autofs fs we want to open.
+ */
+ err = autofs_dev_ioctl_find_super(&nd, devid);
+ if (err) {
+ path_put(&nd.path);
+ goto out;
+ }
+
+ filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY);
+ if (IS_ERR(filp)) {
+ err = PTR_ERR(filp);
+ goto out;
+ }
+
+ autofs_dev_ioctl_fd_install(fd, filp);
+ }
+
+ return fd;
+
+out:
+ put_unused_fd(fd);
+ return err;
+}
+
+/* Open a file descriptor on an autofs mount point */
+static int autofs_dev_ioctl_openmount(struct file *fp,
+ struct autofs_sb_info *sbi,
+ struct autofs_dev_ioctl *param)
+{
+ const char *path;
+ dev_t devid;
+ int err, fd;
+
+ /* param->path has already been checked */
+ if (!param->arg1)
+ return -EINVAL;
+
+ param->ioctlfd = -1;
+
+ path = param->path;
+ devid = param->arg1;
+
+ err = 0;
+ fd = autofs_dev_ioctl_open_mountpoint(path, devid);
+ if (unlikely(fd < 0)) {
+ err = fd;
+ goto out;
+ }
+
+ param->ioctlfd = fd;
+out:
+ return err;
+}
+
+/* Close file descriptor allocated above (user can also use close(2)). */
+static int autofs_dev_ioctl_closemount(struct file *fp,
+ struct autofs_sb_info *sbi,
+ struct autofs_dev_ioctl *param)
+{
+ return sys_close(param->ioctlfd);
+}
+
+/*
+ * Send "ready" status for an existing wait (either a mount or an expire
+ * request).
+ */
+static int autofs_dev_ioctl_ready(struct file *fp,
+ struct autofs_sb_info *sbi,
+ struct autofs_dev_ioctl *param)
+{
+ autofs_wqt_t token;
+
+ token = (autofs_wqt_t) param->arg1;
+ return autofs4_wait_release(sbi, token, 0);
+}
+
+/*
+ * Send "fail" status for an existing wait (either a mount or an expire
+ * request).
+ */
+static int autofs_dev_ioctl_fail(struct file *fp,
+ struct autofs_sb_info *sbi,
+ struct autofs_dev_ioctl *param)
+{
+ autofs_wqt_t token;
+ int status;
+
+ token = (autofs_wqt_t) param->arg1;
+ status = param->arg2 ? param->arg2 : -ENOENT;
+ return autofs4_wait_release(sbi, token, status);
+}
+
+/*
+ * Set the pipe fd for kernel communication to the daemon.
+ *
+ * Normally this is set at mount using an option but if we
+ * are reconnecting to a busy mount then we need to use this
+ * to tell the autofs mount about the new kernel pipe fd. In
+ * order to protect mounts against incorrectly setting the
+ * pipefd we also require that the autofs mount be catatonic.
+ *
+ * This also sets the process group id used to identify the
+ * controlling process (eg. the owning automount(8) daemon).
+ */
+static int autofs_dev_ioctl_setpipefd(struct file *fp,
+ struct autofs_sb_info *sbi,
+ struct autofs_dev_ioctl *param)
+{
+ int pipefd;
+ int err = 0;
+
+ if (param->arg1 == -1)
+ return -EINVAL;
+
+ pipefd = param->arg1;
+
+ mutex_lock(&sbi->wq_mutex);
+ if (!sbi->catatonic) {
+ mutex_unlock(&sbi->wq_mutex);
+ return -EBUSY;
+ } else {
+ struct file *pipe = fget(pipefd);
+ if (!pipe->f_op || !pipe->f_op->write) {
+ err = -EPIPE;
+ fput(pipe);
+ goto out;
+ }
+ sbi->oz_pgrp = task_pgrp_nr(current);
+ sbi->pipefd = pipefd;
+ sbi->pipe = pipe;
+ sbi->catatonic = 0;
+ }
+out:
+ mutex_unlock(&sbi->wq_mutex);
+ return err;
+}
+
+/*
+ * Make the autofs mount point catatonic, no longer responsive to
+ * mount requests. Also closes the kernel pipe file descriptor.
+ */
+static int autofs_dev_ioctl_catatonic(struct file *fp,
+ struct autofs_sb_info *sbi,
+ struct autofs_dev_ioctl *param)
+{
+ autofs4_catatonic_mode(sbi);
+ return 0;
+}
+
+/* Set the autofs mount timeout */
+static int autofs_dev_ioctl_timeout(struct file *fp,
+ struct autofs_sb_info *sbi,
+ struct autofs_dev_ioctl *param)
+{
+ unsigned long timeout;
+
+ timeout = param->arg1;
+ param->arg1 = sbi->exp_timeout / HZ;
+ sbi->exp_timeout = timeout * HZ;
+ return 0;
+}
+
+/*
+ * Return the uid and gid of the last request for the mount
+ *
+ * When reconstructing an autofs mount tree with active mounts
+ * we need to re-connect to mounts that may have used the original
+ * process uid and gid (or string variations of them) for mount
+ * lookups within the map entry.
+ */
+static int autofs_dev_ioctl_requester(struct file *fp,
+ struct autofs_sb_info *sbi,
+ struct autofs_dev_ioctl *param)
+{
+ struct autofs_info *ino;
+ struct nameidata nd;
+ const char *path;
+ dev_t devid;
+ int err = -ENOENT;
+
+ if (param->size <= sizeof(*param)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ path = param->path;
+ devid = sbi->sb->s_dev;
+
+ param->arg1 = param->arg2 = -1;
+
+ /* Get nameidata of the parent directory */
+ err = path_lookup(path, LOOKUP_PARENT, &nd);
+ if (err)
+ goto out;
+
+ err = autofs_dev_ioctl_find_super(&nd, devid);
+ if (err)
+ goto out_release;
+
+ ino = autofs4_dentry_ino(nd.path.dentry);
+ if (ino) {
+ err = 0;
+ autofs4_expire_wait(nd.path.dentry);
+ spin_lock(&sbi->fs_lock);
+ param->arg1 = ino->uid;
+ param->arg2 = ino->gid;
+ spin_unlock(&sbi->fs_lock);
+ }
+
+out_release:
+ path_put(&nd.path);
+out:
+ return err;
+}
+
+/*
+ * Call repeatedly until it returns -EAGAIN, meaning there's nothing
+ * more that can be done.
+ */
+static int autofs_dev_ioctl_expire(struct file *fp,
+ struct autofs_sb_info *sbi,
+ struct autofs_dev_ioctl *param)
+{
+ struct dentry *dentry;
+ struct vfsmount *mnt;
+ int err = -EAGAIN;
+ int how;
+
+ how = param->arg1;
+ mnt = fp->f_path.mnt;
+
+ if (sbi->type & AUTOFS_TYPE_TRIGGER)
+ dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how);
+ else
+ dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how);
+
+ if (dentry) {
+ struct autofs_info *ino = autofs4_dentry_ino(dentry);
+
+ /*
+ * This is synchronous because it makes the daemon a
+ * little easier
+ */
+ err = autofs4_wait(sbi, dentry, NFY_EXPIRE);
+
+ spin_lock(&sbi->fs_lock);
+ if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
+ ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
+ sbi->sb->s_root->d_mounted++;
+ }
+ ino->flags &= ~AUTOFS_INF_EXPIRING;
+ complete_all(&ino->expire_complete);
+ spin_unlock(&sbi->fs_lock);
+ dput(dentry);
+ }
+
+ return err;
+}
+
+/* Check if autofs mount point is in use */
+static int autofs_dev_ioctl_askumount(struct file *fp,
+ struct autofs_sb_info *sbi,
+ struct autofs_dev_ioctl *param)
+{
+ param->arg1 = 0;
+ if (may_umount(fp->f_path.mnt))
+ param->arg1 = 1;
+ return 0;
+}
+
+/*
+ * Check if the given path is a mountpoint.
+ *
+ * If we are supplied with the file descriptor of an autofs
+ * mount we're looking for a specific mount. In this case
+ * the path is considered a mountpoint if it is itself a
+ * mountpoint or contains a mount, such as a multi-mount
+ * without a root mount. In this case we return 1 if the
+ * path is a mount point and the super magic of the covering
+ * mount if there is one or 0 if it isn't a mountpoint.
+ *
+ * If we aren't supplied with a file descriptor then we
+ * lookup the nameidata of the path and check if it is the
+ * root of a mount. If a type is given we are looking for
+ * a particular autofs mount and if we don't find a match
+ * we return fail. If the located nameidata path is the
+ * root of a mount we return 1 along with the super magic
+ * of the mount or 0 otherwise.
+ *
+ * In both cases the the device number (as returned by
+ * new_encode_dev()) is also returned.
+ */
+static int autofs_dev_ioctl_ismountpoint(struct file *fp,
+ struct autofs_sb_info *sbi,
+ struct autofs_dev_ioctl *param)
+{
+ struct nameidata nd;
+ const char *path;
+ unsigned int type;
+ int err = -ENOENT;
+
+ if (param->size <= sizeof(*param)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ path = param->path;
+ type = param->arg1;
+
+ param->arg1 = 0;
+ param->arg2 = 0;
+
+ if (!fp || param->ioctlfd == -1) {
+ if (type == AUTOFS_TYPE_ANY) {
+ struct super_block *sb;
+
+ err = path_lookup(path, LOOKUP_FOLLOW, &nd);
+ if (err)
+ goto out;
+
+ sb = nd.path.dentry->d_sb;
+ param->arg1 = new_encode_dev(sb->s_dev);
+ } else {
+ struct autofs_info *ino;
+
+ err = path_lookup(path, LOOKUP_PARENT, &nd);
+ if (err)
+ goto out;
+
+ err = autofs_dev_ioctl_find_sbi_type(&nd, type);
+ if (err)
+ goto out_release;
+
+ ino = autofs4_dentry_ino(nd.path.dentry);
+ param->arg1 = autofs4_get_dev(ino->sbi);
+ }
+
+ err = 0;
+ if (nd.path.dentry->d_inode &&
+ nd.path.mnt->mnt_root == nd.path.dentry) {
+ err = 1;
+ param->arg2 = nd.path.dentry->d_inode->i_sb->s_magic;
+ }
+ } else {
+ dev_t devid = new_encode_dev(sbi->sb->s_dev);
+
+ err = path_lookup(path, LOOKUP_PARENT, &nd);
+ if (err)
+ goto out;
+
+ err = autofs_dev_ioctl_find_super(&nd, devid);
+ if (err)
+ goto out_release;
+
+ param->arg1 = autofs4_get_dev(sbi);
+
+ err = have_submounts(nd.path.dentry);
+
+ if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) {
+ if (follow_down(&nd.path.mnt, &nd.path.dentry)) {
+ struct inode *inode = nd.path.dentry->d_inode;
+ param->arg2 = inode->i_sb->s_magic;
+ }
+ }
+ }
+
+out_release:
+ path_put(&nd.path);
+out:
+ return err;
+}
+
+/*
+ * Our range of ioctl numbers isn't 0 based so we need to shift
+ * the array index by _IOC_NR(AUTOFS_CTL_IOC_FIRST) for the table
+ * lookup.
+ */
+#define cmd_idx(cmd) (cmd - _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST))
+
+static ioctl_fn lookup_dev_ioctl(unsigned int cmd)
+{
+ static struct {
+ int cmd;
+ ioctl_fn fn;
+ } _ioctls[] = {
+ {cmd_idx(AUTOFS_DEV_IOCTL_VERSION_CMD), NULL},
+ {cmd_idx(AUTOFS_DEV_IOCTL_PROTOVER_CMD),
+ autofs_dev_ioctl_protover},
+ {cmd_idx(AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD),
+ autofs_dev_ioctl_protosubver},
+ {cmd_idx(AUTOFS_DEV_IOCTL_OPENMOUNT_CMD),
+ autofs_dev_ioctl_openmount},
+ {cmd_idx(AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD),
+ autofs_dev_ioctl_closemount},
+ {cmd_idx(AUTOFS_DEV_IOCTL_READY_CMD),
+ autofs_dev_ioctl_ready},
+ {cmd_idx(AUTOFS_DEV_IOCTL_FAIL_CMD),
+ autofs_dev_ioctl_fail},
+ {cmd_idx(AUTOFS_DEV_IOCTL_SETPIPEFD_CMD),
+ autofs_dev_ioctl_setpipefd},
+ {cmd_idx(AUTOFS_DEV_IOCTL_CATATONIC_CMD),
+ autofs_dev_ioctl_catatonic},
+ {cmd_idx(AUTOFS_DEV_IOCTL_TIMEOUT_CMD),
+ autofs_dev_ioctl_timeout},
+ {cmd_idx(AUTOFS_DEV_IOCTL_REQUESTER_CMD),
+ autofs_dev_ioctl_requester},
+ {cmd_idx(AUTOFS_DEV_IOCTL_EXPIRE_CMD),
+ autofs_dev_ioctl_expire},
+ {cmd_idx(AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD),
+ autofs_dev_ioctl_askumount},
+ {cmd_idx(AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD),
+ autofs_dev_ioctl_ismountpoint}
+ };
+ unsigned int idx = cmd_idx(cmd);
+
+ return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx].fn;
+}
+
+/* ioctl dispatcher */
+static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __user *user)
+{
+ struct autofs_dev_ioctl *param;
+ struct file *fp;
+ struct autofs_sb_info *sbi;
+ unsigned int cmd_first, cmd;
+ ioctl_fn fn = NULL;
+ int err = 0;
+
+ /* only root can play with this */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST);
+ cmd = _IOC_NR(command);
+
+ if (_IOC_TYPE(command) != _IOC_TYPE(AUTOFS_DEV_IOCTL_IOC_FIRST) ||
+ cmd - cmd_first >= AUTOFS_DEV_IOCTL_IOC_COUNT) {
+ return -ENOTTY;
+ }
+
+ /* Copy the parameters into kernel space. */
+ param = copy_dev_ioctl(user);
+ if (IS_ERR(param))
+ return PTR_ERR(param);
+
+ err = validate_dev_ioctl(command, param);
+ if (err)
+ goto out;
+
+ /* The validate routine above always sets the version */
+ if (cmd == AUTOFS_DEV_IOCTL_VERSION_CMD)
+ goto done;
+
+ fn = lookup_dev_ioctl(cmd);
+ if (!fn) {
+ AUTOFS_WARN("unknown command 0x%08x", command);
+ return -ENOTTY;
+ }
+
+ fp = NULL;
+ sbi = NULL;
+
+ /*
+ * For obvious reasons the openmount can't have a file
+ * descriptor yet. We don't take a reference to the
+ * file during close to allow for immediate release.
+ */
+ if (cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD &&
+ cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) {
+ fp = fget(param->ioctlfd);
+ if (!fp) {
+ if (cmd == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD)
+ goto cont;
+ err = -EBADF;
+ goto out;
+ }
+
+ if (!fp->f_op) {
+ err = -ENOTTY;
+ fput(fp);
+ goto out;
+ }
+
+ sbi = autofs_dev_ioctl_sbi(fp);
+ if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) {
+ err = -EINVAL;
+ fput(fp);
+ goto out;
+ }
+
+ /*
+ * Admin needs to be able to set the mount catatonic in
+ * order to be able to perform the re-open.
+ */
+ if (!autofs4_oz_mode(sbi) &&
+ cmd != AUTOFS_DEV_IOCTL_CATATONIC_CMD) {
+ err = -EACCES;
+ fput(fp);
+ goto out;
+ }
+ }
+cont:
+ err = fn(fp, sbi, param);
+
+ if (fp)
+ fput(fp);
+done:
+ if (err >= 0 && copy_to_user(user, param, AUTOFS_DEV_IOCTL_SIZE))
+ err = -EFAULT;
+out:
+ free_dev_ioctl(param);
+ return err;
+}
+
+static long autofs_dev_ioctl(struct file *file, uint command, ulong u)
+{
+ int err;
+ err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u);
+ return (long) err;
+}
+
+#ifdef CONFIG_COMPAT
+static long autofs_dev_ioctl_compat(struct file *file, uint command, ulong u)
+{
+ return (long) autofs_dev_ioctl(file, command, (ulong) compat_ptr(u));
+}
+#else
+#define autofs_dev_ioctl_compat NULL
+#endif
+
+static const struct file_operations _dev_ioctl_fops = {
+ .unlocked_ioctl = autofs_dev_ioctl,
+ .compat_ioctl = autofs_dev_ioctl_compat,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice _autofs_dev_ioctl_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = AUTOFS_DEVICE_NAME,
+ .fops = &_dev_ioctl_fops
+};
+
+/* Register/deregister misc character device */
+int autofs_dev_ioctl_init(void)
+{
+ int r;
+
+ r = misc_register(&_autofs_dev_ioctl_misc);
+ if (r) {
+ AUTOFS_ERROR("misc_register failed for control device");
+ return r;
+ }
+
+ return 0;
+}
+
+void autofs_dev_ioctl_exit(void)
+{
+ misc_deregister(&_autofs_dev_ioctl_misc);
+ return;
+}
+
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index e79dd09..cde2f8e 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -244,10 +244,10 @@ cont:
}
/* Check if we can expire a direct mount (possibly a tree) */
-static struct dentry *autofs4_expire_direct(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi,
- int how)
+struct dentry *autofs4_expire_direct(struct super_block *sb,
+ struct vfsmount *mnt,
+ struct autofs_sb_info *sbi,
+ int how)
{
unsigned long timeout;
struct dentry *root = dget(sb->s_root);
@@ -283,10 +283,10 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
* - it is unused by any user process
* - it has been unused for exp_timeout time
*/
-static struct dentry *autofs4_expire_indirect(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi,
- int how)
+struct dentry *autofs4_expire_indirect(struct super_block *sb,
+ struct vfsmount *mnt,
+ struct autofs_sb_info *sbi,
+ int how)
{
unsigned long timeout;
struct dentry *root = sb->s_root;
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index 723a1c5..9722e4b 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -29,11 +29,20 @@ static struct file_system_type autofs_fs_type = {
static int __init init_autofs4_fs(void)
{
- return register_filesystem(&autofs_fs_type);
+ int err;
+
+ err = register_filesystem(&autofs_fs_type);
+ if (err)
+ return err;
+
+ autofs_dev_ioctl_init();
+
+ return err;
}
static void __exit exit_autofs4_fs(void)
{
+ autofs_dev_ioctl_exit();
unregister_filesystem(&autofs_fs_type);
}
diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h
new file mode 100644
index 0000000..f4d05cc
--- /dev/null
+++ b/include/linux/auto_dev-ioctl.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2008 Red Hat, Inc. All rights reserved.
+ * Copyright 2008 Ian Kent <[email protected]>
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ */
+
+#ifndef _LINUX_AUTO_DEV_IOCTL_H
+#define _LINUX_AUTO_DEV_IOCTL_H
+
+#include <linux/types.h>
+
+#define AUTOFS_DEVICE_NAME "autofs"
+
+#define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1
+#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0
+
+#define AUTOFS_DEVID_LEN 16
+
+#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
+
+/*
+ * An ioctl interface for autofs mount point control.
+ */
+
+/*
+ * All the ioctls use this structure.
+ * When sending a path size must account for the total length
+ * of the chunk of memory otherwise is is the size of the
+ * structure.
+ */
+
+struct autofs_dev_ioctl {
+ __u32 ver_major;
+ __u32 ver_minor;
+ __u32 size; /* total size of data passed in
+ * including this struct */
+ __s32 ioctlfd; /* automount command fd */
+
+ __u32 arg1; /* Command parameters */
+ __u32 arg2;
+
+ char path[0];
+};
+
+static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
+{
+ in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
+ in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
+ in->size = sizeof(struct autofs_dev_ioctl);
+ in->ioctlfd = -1;
+ in->arg1 = 0;
+ in->arg2 = 0;
+ return;
+}
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to autofs-dev-ioctl.c:lookup_ioctl()
+ */
+enum {
+ /* Get various version info */
+ AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71,
+ AUTOFS_DEV_IOCTL_PROTOVER_CMD,
+ AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD,
+
+ /* Open mount ioctl fd */
+ AUTOFS_DEV_IOCTL_OPENMOUNT_CMD,
+
+ /* Close mount ioctl fd */
+ AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD,
+
+ /* Mount/expire status returns */
+ AUTOFS_DEV_IOCTL_READY_CMD,
+ AUTOFS_DEV_IOCTL_FAIL_CMD,
+
+ /* Activate/deactivate autofs mount */
+ AUTOFS_DEV_IOCTL_SETPIPEFD_CMD,
+ AUTOFS_DEV_IOCTL_CATATONIC_CMD,
+
+ /* Expiry timeout */
+ AUTOFS_DEV_IOCTL_TIMEOUT_CMD,
+
+ /* Get mount last requesting uid and gid */
+ AUTOFS_DEV_IOCTL_REQUESTER_CMD,
+
+ /* Check for eligible expire candidates */
+ AUTOFS_DEV_IOCTL_EXPIRE_CMD,
+
+ /* Request busy status */
+ AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD,
+
+ /* Check if path is a mountpoint */
+ AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD,
+};
+
+#define AUTOFS_IOCTL 0x93
+
+#define AUTOFS_DEV_IOCTL_VERSION \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOVER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOSUBVER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_OPENMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_READY \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_FAIL \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_SETPIPEFD \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CATATONIC \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_TIMEOUT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_REQUESTER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_EXPIRE \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ASKUMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl)
+
+#endif /* _LINUX_AUTO_DEV_IOCTL_H */
diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index aa96a04..2253716 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -23,7 +23,7 @@
#define AUTOFS_MIN_PROTO_VERSION 3
#define AUTOFS_MAX_PROTO_VERSION 5
-#define AUTOFS_PROTO_SUBVERSION 0
+#define AUTOFS_PROTO_SUBVERSION 1
/* Mask for expire behaviour */
#define AUTOFS_EXP_IMMEDIATE 1
On Thu, 07 Aug 2008 19:40:14 +0800
Ian Kent <[email protected]> wrote:
> Patch to track the uid and gid of the last process to request a mount
> for on an autofs dentry.
pet peeve: changelog should not tell the reader that this is a "patch".
Because when someone is reading the changelog in the git repository,
they hopefully already know that.
> Signed-off-by: Ian Kent <[email protected]>
>
> ---
>
> fs/autofs4/autofs_i.h | 3 +++
> fs/autofs4/inode.c | 2 ++
> fs/autofs4/waitq.c | 34 ++++++++++++++++++++++++++++++++++
> 3 files changed, 39 insertions(+), 0 deletions(-)
>
>
> diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
> index ea024d8..fa76d18 100644
> --- a/fs/autofs4/autofs_i.h
> +++ b/fs/autofs4/autofs_i.h
> @@ -63,6 +63,9 @@ struct autofs_info {
> unsigned long last_used;
> atomic_t count;
>
> + uid_t uid;
> + gid_t gid;
> +
> mode_t mode;
> size_t size;
>
> diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
> index 9ca2d07..9408507 100644
> --- a/fs/autofs4/inode.c
> +++ b/fs/autofs4/inode.c
> @@ -53,6 +53,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
> atomic_set(&ino->count, 0);
> }
>
> + ino->uid = 0;
> + ino->gid = 0;
> ino->mode = mode;
> ino->last_used = jiffies;
>
> diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
> index 6d87bb1..7c60c0b 100644
> --- a/fs/autofs4/waitq.c
> +++ b/fs/autofs4/waitq.c
> @@ -457,6 +457,40 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
>
> status = wq->status;
>
> + /*
> + * For direct and offset mounts we need to track the requestrer
typo which I'll fix.
> + * uid and gid in the dentry info struct. This is so it can be
> + * supplied, on request, by the misc device ioctl interface.
> + * This is needed during daemon resatart when reconnecting
> + * to existing, active, autofs mounts. The uid and gid (and
> + * related string values) may be used for macro substitution
> + * in autofs mount maps.
> + */
> + if (!status) {
> + struct autofs_info *ino;
> + struct dentry *de = NULL;
> +
> + /* direct mount or browsable map */
> + ino = autofs4_dentry_ino(dentry);
> + if (!ino) {
> + /* If not lookup actual dentry used */
> + de = d_lookup(dentry->d_parent, &dentry->d_name);
> + if (de)
> + ino = autofs4_dentry_ino(de);
> + }
> +
> + /* Set mount requester */
> + if (ino) {
> + spin_lock(&sbi->fs_lock);
> + ino->uid = wq->uid;
> + ino->gid = wq->gid;
> + spin_unlock(&sbi->fs_lock);
> + }
> +
> + if (de)
> + dput(de);
> + }
> +
Please remind me again why autofs's use of current->uid and
current->gid is not busted in the presence of PID namespaces, where
these things are no longer system-wide unique?
On Thu, 07 Aug 2008 19:40:31 +0800
Ian Kent <[email protected]> wrote:
>
> Subject: [PATCH 4/4] autofs4 - add miscelaneous device for ioctls
I fixed that spello
> Patch to add a miscellaneous device to the autofs4 module for routing
> ioctls. This provides the ability to obtain an ioctl file handle for
> an autofs mount point that is possibly covered by another mount.
>
> The actual problem with autofs is that it can't reconnect to existing
> mounts. Immediately one things of just adding the ability to remount
> autofs file systems would solve it, but alas, that can't work. This is
> because autofs direct mounts and the implementation of "on demand mount
> and expire" of nested mount trees have the file system mounted on top of
> the mount trigger dentry.
>
> To resolve this a miscellaneous device node for routing ioctl commands
> to these mount points has been implemented in the autofs4 kernel module
> and a library added to autofs. This provides the ability to open a file
> descriptor for these over mounted autofs mount points.
>
> Please refer to Documentation/filesystems/autofs4-mount-control.txt for
> a discussion of the problem, implementation alternatives considered and
> a description of the interface.
>
>
> ...
>
> +
> +#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
> +
> +typedef int (*ioctl_fn)(struct file *,
> +struct autofs_sb_info *, struct autofs_dev_ioctl *);
Weird layout, which I fixed.
> +static int check_name(const char *name)
> +{
> + if (!strchr(name, '/'))
> + return -EINVAL;
> + return 0;
> +}
> +
> +/*
> + * Check a string doesn't overrun the chunk of
> + * memory we copied from user land.
> + */
> +static int invalid_str(char *str, void *end)
> +{
> + while ((void *) str <= end)
> + if (!*str++)
> + return 0;
> + return -EINVAL;
> +}
What is this? gWwe copy strings in from userspace in 10000 different
places without needing checks such as this?
> +/*
> + * Check that the user compiled against correct version of autofs
> + * misc device code.
> + *
> + * As well as checking the version compatibility this always copies
> + * the kernel interface version out.
> + */
> +static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
> +{
> + int err = 0;
> +
> + if ((AUTOFS_DEV_IOCTL_VERSION_MAJOR != param->ver_major) ||
> + (AUTOFS_DEV_IOCTL_VERSION_MINOR < param->ver_minor)) {
> + AUTOFS_WARN("ioctl control interface version mismatch: "
> + "kernel(%u.%u), user(%u.%u), cmd(%d)",
> + AUTOFS_DEV_IOCTL_VERSION_MAJOR,
> + AUTOFS_DEV_IOCTL_VERSION_MINOR,
> + param->ver_major, param->ver_minor, cmd);
> + err = -EINVAL;
> + }
> +
> + /* Fill in the kernel version. */
> + param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
> + param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
> +
> + return err;
> +}
> +
> +/*
> + * Copy parameter control struct, including a possible path allocated
> + * at the end of the struct.
> + */
> +static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
> +{
> + struct autofs_dev_ioctl tmp, *ads;
Variables called `tmp' get me upset, but it seems appropriate here.
> + if (copy_from_user(&tmp, in, sizeof(tmp)))
> + return ERR_PTR(-EFAULT);
> +
> + if (tmp.size < sizeof(tmp))
> + return ERR_PTR(-EINVAL);
> +
> + ads = kmalloc(tmp.size, GFP_KERNEL);
> + if (!ads)
> + return ERR_PTR(-ENOMEM);
> +
> + if (copy_from_user(ads, in, tmp.size)) {
> + kfree(ads);
> + return ERR_PTR(-EFAULT);
> + }
> +
> + return ads;
> +}
> +
> +static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
> +{
> + kfree(param);
> + return;
> +}
> +
> +/*
> + * Check sanity of parameter control fields and if a path is present
> + * check that it has a "/" and is terminated.
> + */
> +static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
> +{
> + int err = -EINVAL;
> +
> + if (check_dev_ioctl_version(cmd, param)) {
> + AUTOFS_WARN("invalid device control module version "
> + "supplied for cmd(0x%08x)", cmd);
> + goto out;
check_dev_ioctl_version() carefully returned a -EFOO value, but this
caller dropped it on the floor.
> + }
> +
> + if (param->size > sizeof(*param)) {
> + err = check_name(param->path);
> + if (err) {
> + AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
> + cmd);
> + goto out;
> + }
> +
> + err = invalid_str(param->path,
> + (void *) ((size_t) param + param->size));
> + if (err) {
> + AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
> + cmd);
> + goto out;
> + }
> + }
> +
> + err = 0;
> +out:
> + return err;
> +}
> +
>
> ...
>
> +static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
> +{
> + struct files_struct *files = current->files;
> + struct fdtable *fdt;
> +
> + spin_lock(&files->file_lock);
> + fdt = files_fdtable(files);
> + BUG_ON(fdt->fd[fd] != NULL);
> + rcu_assign_pointer(fdt->fd[fd], file);
> + FD_SET(fd, fdt->close_on_exec);
> + spin_unlock(&files->file_lock);
> +}
urgh, it's bad to have done a copy-n-paste on fd_install() here. It
means that if we go and change the locking in core kernel (quite
possible) then people won't udpate this code.
Do we have alternative here? Can we set close_on_exec outside the lock
and just call fd_install()? Probably not.
Can we export set_close_on_exec() then call that after having called
fd_install()? I think so.
But not this, please.
Quoting Andrew Morton ([email protected]):
> On Thu, 07 Aug 2008 19:40:14 +0800
> Ian Kent <[email protected]> wrote:
>
> > Patch to track the uid and gid of the last process to request a mount
> > for on an autofs dentry.
>
> pet peeve: changelog should not tell the reader that this is a "patch".
> Because when someone is reading the changelog in the git repository,
> they hopefully already know that.
>
> > Signed-off-by: Ian Kent <[email protected]>
> >
> > ---
> >
> > fs/autofs4/autofs_i.h | 3 +++
> > fs/autofs4/inode.c | 2 ++
> > fs/autofs4/waitq.c | 34 ++++++++++++++++++++++++++++++++++
> > 3 files changed, 39 insertions(+), 0 deletions(-)
> >
> >
> > diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
> > index ea024d8..fa76d18 100644
> > --- a/fs/autofs4/autofs_i.h
> > +++ b/fs/autofs4/autofs_i.h
> > @@ -63,6 +63,9 @@ struct autofs_info {
> > unsigned long last_used;
> > atomic_t count;
> >
> > + uid_t uid;
> > + gid_t gid;
> > +
> > mode_t mode;
> > size_t size;
> >
> > diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
> > index 9ca2d07..9408507 100644
> > --- a/fs/autofs4/inode.c
> > +++ b/fs/autofs4/inode.c
> > @@ -53,6 +53,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
> > atomic_set(&ino->count, 0);
> > }
> >
> > + ino->uid = 0;
> > + ino->gid = 0;
> > ino->mode = mode;
> > ino->last_used = jiffies;
> >
> > diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
> > index 6d87bb1..7c60c0b 100644
> > --- a/fs/autofs4/waitq.c
> > +++ b/fs/autofs4/waitq.c
> > @@ -457,6 +457,40 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
> >
> > status = wq->status;
> >
> > + /*
> > + * For direct and offset mounts we need to track the requestrer
>
> typo which I'll fix.
>
> > + * uid and gid in the dentry info struct. This is so it can be
> > + * supplied, on request, by the misc device ioctl interface.
> > + * This is needed during daemon resatart when reconnecting
> > + * to existing, active, autofs mounts. The uid and gid (and
> > + * related string values) may be used for macro substitution
> > + * in autofs mount maps.
> > + */
> > + if (!status) {
> > + struct autofs_info *ino;
> > + struct dentry *de = NULL;
> > +
> > + /* direct mount or browsable map */
> > + ino = autofs4_dentry_ino(dentry);
> > + if (!ino) {
> > + /* If not lookup actual dentry used */
> > + de = d_lookup(dentry->d_parent, &dentry->d_name);
> > + if (de)
> > + ino = autofs4_dentry_ino(de);
> > + }
> > +
> > + /* Set mount requester */
> > + if (ino) {
> > + spin_lock(&sbi->fs_lock);
> > + ino->uid = wq->uid;
> > + ino->gid = wq->gid;
> > + spin_unlock(&sbi->fs_lock);
> > + }
> > +
> > + if (de)
> > + dput(de);
> > + }
> > +
>
> Please remind me again why autofs's use of current->uid and
> current->gid is not busted in the presence of PID namespaces, where
> these things are no longer system-wide unique?
I actually don't see what the autofs4_waitq->pid is used for. It's
copied from current into wq->pid at autofs4_wait, and into a packet to
send to userspace (I assume) at autofs4_notify_daemon.
So as long as a daemon can serve multiple pid namespaces (which
doubtless it can), the pid could be confusing (or erroneous) for the
daemon.
If I'm remotely right about how the pid is being used, then the thing to
do would be to
1. store the daemon's pid namespace (would that belong in
the autofs_sb_info?)
2. store the task_pid(current) in the waitqueue
3. retrieve the pid_t for the waiting task in the daemon's
pid namespace, and put that into the packet at
autofs4_notify_daemon.
I realize this patch was about the *uids*, but the pids seem more
urgent.
-serge
Quoting Andrew Morton ([email protected]):
> On Thu, 07 Aug 2008 19:40:14 +0800
> Ian Kent <[email protected]> wrote:
>
> > Patch to track the uid and gid of the last process to request a mount
> > for on an autofs dentry.
>
> pet peeve: changelog should not tell the reader that this is a "patch".
> Because when someone is reading the changelog in the git repository,
> they hopefully already know that.
>
> > Signed-off-by: Ian Kent <[email protected]>
> >
> > ---
> >
> > fs/autofs4/autofs_i.h | 3 +++
> > fs/autofs4/inode.c | 2 ++
> > fs/autofs4/waitq.c | 34 ++++++++++++++++++++++++++++++++++
> > 3 files changed, 39 insertions(+), 0 deletions(-)
> >
> >
> > diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
> > index ea024d8..fa76d18 100644
> > --- a/fs/autofs4/autofs_i.h
> > +++ b/fs/autofs4/autofs_i.h
> > @@ -63,6 +63,9 @@ struct autofs_info {
> > unsigned long last_used;
> > atomic_t count;
> >
> > + uid_t uid;
> > + gid_t gid;
> > +
> > mode_t mode;
> > size_t size;
> >
> > diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
> > index 9ca2d07..9408507 100644
> > --- a/fs/autofs4/inode.c
> > +++ b/fs/autofs4/inode.c
> > @@ -53,6 +53,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
> > atomic_set(&ino->count, 0);
> > }
> >
> > + ino->uid = 0;
> > + ino->gid = 0;
> > ino->mode = mode;
> > ino->last_used = jiffies;
> >
> > diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
> > index 6d87bb1..7c60c0b 100644
> > --- a/fs/autofs4/waitq.c
> > +++ b/fs/autofs4/waitq.c
> > @@ -457,6 +457,40 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
> >
> > status = wq->status;
> >
> > + /*
> > + * For direct and offset mounts we need to track the requestrer
>
> typo which I'll fix.
>
> > + * uid and gid in the dentry info struct. This is so it can be
> > + * supplied, on request, by the misc device ioctl interface.
> > + * This is needed during daemon resatart when reconnecting
> > + * to existing, active, autofs mounts. The uid and gid (and
> > + * related string values) may be used for macro substitution
> > + * in autofs mount maps.
Hi Ian,
could you say just a few more words on these macro substitution?
I think your use of uids can completely ignore user namespaces, but it
depends on who does the macro substitutions and how...
thanks,
-serge
> > + */
> > + if (!status) {
> > + struct autofs_info *ino;
> > + struct dentry *de = NULL;
> > +
> > + /* direct mount or browsable map */
> > + ino = autofs4_dentry_ino(dentry);
> > + if (!ino) {
> > + /* If not lookup actual dentry used */
> > + de = d_lookup(dentry->d_parent, &dentry->d_name);
> > + if (de)
> > + ino = autofs4_dentry_ino(de);
> > + }
> > +
> > + /* Set mount requester */
> > + if (ino) {
> > + spin_lock(&sbi->fs_lock);
> > + ino->uid = wq->uid;
> > + ino->gid = wq->gid;
> > + spin_unlock(&sbi->fs_lock);
> > + }
> > +
> > + if (de)
> > + dput(de);
> > + }
> > +
>
> Please remind me again why autofs's use of current->uid and
> current->gid is not busted in the presence of PID namespaces, where
> these things are no longer system-wide unique?
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2008-08-07 at 17:15 -0500, Serge E. Hallyn wrote:
> Quoting Andrew Morton ([email protected]):
> > On Thu, 07 Aug 2008 19:40:14 +0800
> > Ian Kent <[email protected]> wrote:
> >
> > > Patch to track the uid and gid of the last process to request a mount
> > > for on an autofs dentry.
> >
> > pet peeve: changelog should not tell the reader that this is a "patch".
> > Because when someone is reading the changelog in the git repository,
> > they hopefully already know that.
> >
> > > Signed-off-by: Ian Kent <[email protected]>
> > >
> > > ---
> > >
> > > fs/autofs4/autofs_i.h | 3 +++
> > > fs/autofs4/inode.c | 2 ++
> > > fs/autofs4/waitq.c | 34 ++++++++++++++++++++++++++++++++++
> > > 3 files changed, 39 insertions(+), 0 deletions(-)
> > >
> > >
> > > diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
> > > index ea024d8..fa76d18 100644
> > > --- a/fs/autofs4/autofs_i.h
> > > +++ b/fs/autofs4/autofs_i.h
> > > @@ -63,6 +63,9 @@ struct autofs_info {
> > > unsigned long last_used;
> > > atomic_t count;
> > >
> > > + uid_t uid;
> > > + gid_t gid;
> > > +
> > > mode_t mode;
> > > size_t size;
> > >
> > > diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
> > > index 9ca2d07..9408507 100644
> > > --- a/fs/autofs4/inode.c
> > > +++ b/fs/autofs4/inode.c
> > > @@ -53,6 +53,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
> > > atomic_set(&ino->count, 0);
> > > }
> > >
> > > + ino->uid = 0;
> > > + ino->gid = 0;
> > > ino->mode = mode;
> > > ino->last_used = jiffies;
> > >
> > > diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
> > > index 6d87bb1..7c60c0b 100644
> > > --- a/fs/autofs4/waitq.c
> > > +++ b/fs/autofs4/waitq.c
> > > @@ -457,6 +457,40 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
> > >
> > > status = wq->status;
> > >
> > > + /*
> > > + * For direct and offset mounts we need to track the requestrer
> >
> > typo which I'll fix.
> >
> > > + * uid and gid in the dentry info struct. This is so it can be
> > > + * supplied, on request, by the misc device ioctl interface.
> > > + * This is needed during daemon resatart when reconnecting
> > > + * to existing, active, autofs mounts. The uid and gid (and
> > > + * related string values) may be used for macro substitution
> > > + * in autofs mount maps.
>
> Hi Ian,
>
> could you say just a few more words on these macro substitution?
>
> I think your use of uids can completely ignore user namespaces, but it
> depends on who does the macro substitutions and how...
Suppose we have an autofs map entry in /etc/auto.master:
/test /etc/auto.test
and in /etc/auto.test
im1 /om1 server:/dir1 \
/om2/$USER server2:/dir2
Then if this is automounted and the daemon is sent a SIGKILL and started
again we need the uid number that was used when this was mounted to
re-construct the offsets (/om2/$USER in this case needs the
substitution). The uid is used to lookup the user name.
Can you point me in the right direction wrt. to namespaces on this
please Serge? We didn't really get to the bottom of it last time I
think.
>
> thanks,
> -serge
>
> > > + */
> > > + if (!status) {
> > > + struct autofs_info *ino;
> > > + struct dentry *de = NULL;
> > > +
> > > + /* direct mount or browsable map */
> > > + ino = autofs4_dentry_ino(dentry);
> > > + if (!ino) {
> > > + /* If not lookup actual dentry used */
> > > + de = d_lookup(dentry->d_parent, &dentry->d_name);
> > > + if (de)
> > > + ino = autofs4_dentry_ino(de);
> > > + }
> > > +
> > > + /* Set mount requester */
> > > + if (ino) {
> > > + spin_lock(&sbi->fs_lock);
> > > + ino->uid = wq->uid;
> > > + ino->gid = wq->gid;
> > > + spin_unlock(&sbi->fs_lock);
> > > + }
> > > +
> > > + if (de)
> > > + dput(de);
> > > + }
> > > +
> >
> > Please remind me again why autofs's use of current->uid and
> > current->gid is not busted in the presence of PID namespaces, where
> > these things are no longer system-wide unique?
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> > the body of a message to [email protected]
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2008-08-07 at 13:46 -0700, Andrew Morton wrote:
> On Thu, 07 Aug 2008 19:40:14 +0800
> Ian Kent <[email protected]> wrote:
>
> > Patch to track the uid and gid of the last process to request a mount
> > for on an autofs dentry.
>
> pet peeve: changelog should not tell the reader that this is a "patch".
> Because when someone is reading the changelog in the git repository,
> they hopefully already know that.
>
> > Signed-off-by: Ian Kent <[email protected]>
> >
> > ---
> >
> > fs/autofs4/autofs_i.h | 3 +++
> > fs/autofs4/inode.c | 2 ++
> > fs/autofs4/waitq.c | 34 ++++++++++++++++++++++++++++++++++
> > 3 files changed, 39 insertions(+), 0 deletions(-)
> >
> >
> > diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
> > index ea024d8..fa76d18 100644
> > --- a/fs/autofs4/autofs_i.h
> > +++ b/fs/autofs4/autofs_i.h
> > @@ -63,6 +63,9 @@ struct autofs_info {
> > unsigned long last_used;
> > atomic_t count;
> >
> > + uid_t uid;
> > + gid_t gid;
> > +
> > mode_t mode;
> > size_t size;
> >
> > diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
> > index 9ca2d07..9408507 100644
> > --- a/fs/autofs4/inode.c
> > +++ b/fs/autofs4/inode.c
> > @@ -53,6 +53,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
> > atomic_set(&ino->count, 0);
> > }
> >
> > + ino->uid = 0;
> > + ino->gid = 0;
> > ino->mode = mode;
> > ino->last_used = jiffies;
> >
> > diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
> > index 6d87bb1..7c60c0b 100644
> > --- a/fs/autofs4/waitq.c
> > +++ b/fs/autofs4/waitq.c
> > @@ -457,6 +457,40 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
> >
> > status = wq->status;
> >
> > + /*
> > + * For direct and offset mounts we need to track the requestrer
>
> typo which I'll fix.
>
> > + * uid and gid in the dentry info struct. This is so it can be
> > + * supplied, on request, by the misc device ioctl interface.
> > + * This is needed during daemon resatart when reconnecting
> > + * to existing, active, autofs mounts. The uid and gid (and
> > + * related string values) may be used for macro substitution
> > + * in autofs mount maps.
> > + */
> > + if (!status) {
> > + struct autofs_info *ino;
> > + struct dentry *de = NULL;
> > +
> > + /* direct mount or browsable map */
> > + ino = autofs4_dentry_ino(dentry);
> > + if (!ino) {
> > + /* If not lookup actual dentry used */
> > + de = d_lookup(dentry->d_parent, &dentry->d_name);
> > + if (de)
> > + ino = autofs4_dentry_ino(de);
> > + }
> > +
> > + /* Set mount requester */
> > + if (ino) {
> > + spin_lock(&sbi->fs_lock);
> > + ino->uid = wq->uid;
> > + ino->gid = wq->gid;
> > + spin_unlock(&sbi->fs_lock);
> > + }
> > +
> > + if (de)
> > + dput(de);
> > + }
> > +
>
> Please remind me again why autofs's use of current->uid and
> current->gid is not busted in the presence of PID namespaces, where
> these things are no longer system-wide unique?
We didn't really get to the bottom of that last time I posted this but
hopefully Serge will get right on this and we can sort out what, idf
anything I need to do.
Ian
On Thu, 2008-08-07 at 14:10 -0700, Andrew Morton wrote:
> On Thu, 07 Aug 2008 19:40:31 +0800
> Ian Kent <[email protected]> wrote:
> >
> > Subject: [PATCH 4/4] autofs4 - add miscelaneous device for ioctls
>
> I fixed that spello
>
> > Patch to add a miscellaneous device to the autofs4 module for routing
> > ioctls. This provides the ability to obtain an ioctl file handle for
> > an autofs mount point that is possibly covered by another mount.
> >
> > The actual problem with autofs is that it can't reconnect to existing
> > mounts. Immediately one things of just adding the ability to remount
> > autofs file systems would solve it, but alas, that can't work. This is
> > because autofs direct mounts and the implementation of "on demand mount
> > and expire" of nested mount trees have the file system mounted on top of
> > the mount trigger dentry.
> >
> > To resolve this a miscellaneous device node for routing ioctl commands
> > to these mount points has been implemented in the autofs4 kernel module
> > and a library added to autofs. This provides the ability to open a file
> > descriptor for these over mounted autofs mount points.
> >
> > Please refer to Documentation/filesystems/autofs4-mount-control.txt for
> > a discussion of the problem, implementation alternatives considered and
> > a description of the interface.
> >
> >
> > ...
> >
> > +
> > +#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
> > +
> > +typedef int (*ioctl_fn)(struct file *,
> > +struct autofs_sb_info *, struct autofs_dev_ioctl *);
>
> Weird layout, which I fixed.
Auuuhh .. didn't want to do this. I personally like declarations like
this to be on a single line but checkpatch.pl complained about it.
>
> > +static int check_name(const char *name)
> > +{
> > + if (!strchr(name, '/'))
> > + return -EINVAL;
> > + return 0;
> > +}
> > +
> > +/*
> > + * Check a string doesn't overrun the chunk of
> > + * memory we copied from user land.
> > + */
> > +static int invalid_str(char *str, void *end)
> > +{
> > + while ((void *) str <= end)
> > + if (!*str++)
> > + return 0;
> > + return -EINVAL;
> > +}
>
> What is this? gWwe copy strings in from userspace in 10000 different
> places without needing checks such as this?
Yeah, that's true.
Since you recommend I get rid of I'll do so.
>
> > +/*
> > + * Check that the user compiled against correct version of autofs
> > + * misc device code.
> > + *
> > + * As well as checking the version compatibility this always copies
> > + * the kernel interface version out.
> > + */
> > +static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
> > +{
> > + int err = 0;
> > +
> > + if ((AUTOFS_DEV_IOCTL_VERSION_MAJOR != param->ver_major) ||
> > + (AUTOFS_DEV_IOCTL_VERSION_MINOR < param->ver_minor)) {
> > + AUTOFS_WARN("ioctl control interface version mismatch: "
> > + "kernel(%u.%u), user(%u.%u), cmd(%d)",
> > + AUTOFS_DEV_IOCTL_VERSION_MAJOR,
> > + AUTOFS_DEV_IOCTL_VERSION_MINOR,
> > + param->ver_major, param->ver_minor, cmd);
> > + err = -EINVAL;
> > + }
> > +
> > + /* Fill in the kernel version. */
> > + param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
> > + param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
> > +
> > + return err;
> > +}
> > +
> > +/*
> > + * Copy parameter control struct, including a possible path allocated
> > + * at the end of the struct.
> > + */
> > +static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
> > +{
> > + struct autofs_dev_ioctl tmp, *ads;
>
> Variables called `tmp' get me upset, but it seems appropriate here.
>
> > + if (copy_from_user(&tmp, in, sizeof(tmp)))
> > + return ERR_PTR(-EFAULT);
> > +
> > + if (tmp.size < sizeof(tmp))
> > + return ERR_PTR(-EINVAL);
> > +
> > + ads = kmalloc(tmp.size, GFP_KERNEL);
> > + if (!ads)
> > + return ERR_PTR(-ENOMEM);
> > +
> > + if (copy_from_user(ads, in, tmp.size)) {
> > + kfree(ads);
> > + return ERR_PTR(-EFAULT);
> > + }
> > +
> > + return ads;
> > +}
> > +
> > +static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
> > +{
> > + kfree(param);
> > + return;
> > +}
> > +
> > +/*
> > + * Check sanity of parameter control fields and if a path is present
> > + * check that it has a "/" and is terminated.
> > + */
> > +static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
> > +{
> > + int err = -EINVAL;
> > +
> > + if (check_dev_ioctl_version(cmd, param)) {
> > + AUTOFS_WARN("invalid device control module version "
> > + "supplied for cmd(0x%08x)", cmd);
> > + goto out;
>
> check_dev_ioctl_version() carefully returned a -EFOO value, but this
> caller dropped it on the floor.
Will fix.
>
> > + }
> > +
> > + if (param->size > sizeof(*param)) {
> > + err = check_name(param->path);
> > + if (err) {
> > + AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
> > + cmd);
> > + goto out;
> > + }
> > +
> > + err = invalid_str(param->path,
> > + (void *) ((size_t) param + param->size));
> > + if (err) {
> > + AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
> > + cmd);
> > + goto out;
> > + }
> > + }
> > +
> > + err = 0;
> > +out:
> > + return err;
> > +}
> > +
> >
> > ...
> >
> > +static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
> > +{
> > + struct files_struct *files = current->files;
> > + struct fdtable *fdt;
> > +
> > + spin_lock(&files->file_lock);
> > + fdt = files_fdtable(files);
> > + BUG_ON(fdt->fd[fd] != NULL);
> > + rcu_assign_pointer(fdt->fd[fd], file);
> > + FD_SET(fd, fdt->close_on_exec);
> > + spin_unlock(&files->file_lock);
> > +}
>
> urgh, it's bad to have done a copy-n-paste on fd_install() here. It
> means that if we go and change the locking in core kernel (quite
> possible) then people won't udpate this code.
>
> Do we have alternative here? Can we set close_on_exec outside the lock
> and just call fd_install()? Probably not.
>
> Can we export set_close_on_exec() then call that after having called
> fd_install()? I think so.
>
> But not this, please.
No problem.
You mentioned this last time as well.
Since there are a couple of possible approaches and I wasn't sure which
way to go I thought I'd post it as is and get feedback then make it a
followup patch.
Could the pthreads user space daemon exec something between fd_install()
and set_close_on_exec()?
Perhaps there are some other alternative approaches to this.
Suggestions?
Ian
On Thu, 2008-08-07 at 17:12 -0500, Serge E. Hallyn wrote:
> Quoting Andrew Morton ([email protected]):
> > On Thu, 07 Aug 2008 19:40:14 +0800
> > Ian Kent <[email protected]> wrote:
> >
> > > Patch to track the uid and gid of the last process to request a mount
> > > for on an autofs dentry.
> >
> > pet peeve: changelog should not tell the reader that this is a "patch".
> > Because when someone is reading the changelog in the git repository,
> > they hopefully already know that.
> >
> > > Signed-off-by: Ian Kent <[email protected]>
> > >
> > > ---
> > >
> > > fs/autofs4/autofs_i.h | 3 +++
> > > fs/autofs4/inode.c | 2 ++
> > > fs/autofs4/waitq.c | 34 ++++++++++++++++++++++++++++++++++
> > > 3 files changed, 39 insertions(+), 0 deletions(-)
> > >
> > >
> > > diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
> > > index ea024d8..fa76d18 100644
> > > --- a/fs/autofs4/autofs_i.h
> > > +++ b/fs/autofs4/autofs_i.h
> > > @@ -63,6 +63,9 @@ struct autofs_info {
> > > unsigned long last_used;
> > > atomic_t count;
> > >
> > > + uid_t uid;
> > > + gid_t gid;
> > > +
> > > mode_t mode;
> > > size_t size;
> > >
> > > diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
> > > index 9ca2d07..9408507 100644
> > > --- a/fs/autofs4/inode.c
> > > +++ b/fs/autofs4/inode.c
> > > @@ -53,6 +53,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
> > > atomic_set(&ino->count, 0);
> > > }
> > >
> > > + ino->uid = 0;
> > > + ino->gid = 0;
> > > ino->mode = mode;
> > > ino->last_used = jiffies;
> > >
> > > diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
> > > index 6d87bb1..7c60c0b 100644
> > > --- a/fs/autofs4/waitq.c
> > > +++ b/fs/autofs4/waitq.c
> > > @@ -457,6 +457,40 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
> > >
> > > status = wq->status;
> > >
> > > + /*
> > > + * For direct and offset mounts we need to track the requestrer
> >
> > typo which I'll fix.
> >
> > > + * uid and gid in the dentry info struct. This is so it can be
> > > + * supplied, on request, by the misc device ioctl interface.
> > > + * This is needed during daemon resatart when reconnecting
> > > + * to existing, active, autofs mounts. The uid and gid (and
> > > + * related string values) may be used for macro substitution
> > > + * in autofs mount maps.
> > > + */
> > > + if (!status) {
> > > + struct autofs_info *ino;
> > > + struct dentry *de = NULL;
> > > +
> > > + /* direct mount or browsable map */
> > > + ino = autofs4_dentry_ino(dentry);
> > > + if (!ino) {
> > > + /* If not lookup actual dentry used */
> > > + de = d_lookup(dentry->d_parent, &dentry->d_name);
> > > + if (de)
> > > + ino = autofs4_dentry_ino(de);
> > > + }
> > > +
> > > + /* Set mount requester */
> > > + if (ino) {
> > > + spin_lock(&sbi->fs_lock);
> > > + ino->uid = wq->uid;
> > > + ino->gid = wq->gid;
> > > + spin_unlock(&sbi->fs_lock);
> > > + }
> > > +
> > > + if (de)
> > > + dput(de);
> > > + }
> > > +
> >
> > Please remind me again why autofs's use of current->uid and
> > current->gid is not busted in the presence of PID namespaces, where
> > these things are no longer system-wide unique?
>
> I actually don't see what the autofs4_waitq->pid is used for. It's
> copied from current into wq->pid at autofs4_wait, and into a packet to
> send to userspace (I assume) at autofs4_notify_daemon.
>
> So as long as a daemon can serve multiple pid namespaces (which
> doubtless it can), the pid could be confusing (or erroneous) for the
> daemon.
Your point is well taken.
The pid is used purely for logging purposes to aid in debugging in user
space. I'm not sure it is worth worrying about it too much as the daemon
has no business interfering with user space processes it is not the
owner of.
>
> If I'm remotely right about how the pid is being used, then the thing to
> do would be to
> 1. store the daemon's pid namespace (would that belong in
> the autofs_sb_info?)
Yep.
> 2. store the task_pid(current) in the waitqueue
> 3. retrieve the pid_t for the waiting task in the daemon's
> pid namespace, and put that into the packet at
> autofs4_notify_daemon.
>
> I realize this patch was about the *uids*, but the pids seem more
> urgent.
OK, I get it.
I'll have a go at doing this for completeness.
Ian
On Fri, 2008-08-08 at 11:48 +0800, Ian Kent wrote:
> > >
> > > Please remind me again why autofs's use of current->uid and
> > > current->gid is not busted in the presence of PID namespaces, where
> > > these things are no longer system-wide unique?
> >
> > I actually don't see what the autofs4_waitq->pid is used for. It's
> > copied from current into wq->pid at autofs4_wait, and into a packet to
> > send to userspace (I assume) at autofs4_notify_daemon.
> >
> > So as long as a daemon can serve multiple pid namespaces (which
> > doubtless it can), the pid could be confusing (or erroneous) for the
> > daemon.
>
> Your point is well taken.
>
> The pid is used purely for logging purposes to aid in debugging in user
> space. I'm not sure it is worth worrying about it too much as the daemon
> has no business interfering with user space processes it is not the
> owner of.
>
> >
> > If I'm remotely right about how the pid is being used, then the thing to
> > do would be to
> > 1. store the daemon's pid namespace (would that belong in
> > the autofs_sb_info?)
>
> Yep.
>
> > 2. store the task_pid(current) in the waitqueue
> > 3. retrieve the pid_t for the waiting task in the daemon's
> > pid namespace, and put that into the packet at
> > autofs4_notify_daemon.
> >
> > I realize this patch was about the *uids*, but the pids seem more
> > urgent.
>
> OK, I get it.
> I'll have a go at doing this for completeness.
On second thoughts I'm not sure about this.
The pid that is logged needs to relate to a process in the name space of
the one that caused the mount to be done.
For example, suppose a GUI user finds mounts never expiring, then we get
a debug log to try and identify the culprit. So the pid should
correspond to a process that the user sees (So I guess in the namespace
of that user).
This is the only reason I added the pid to the request packet in the
first place.
Please correct me if my understanding of this is not right.
Ian
>
> Ian
>
On Fri, 08 Aug 2008 11:39:19 +0800 Ian Kent <[email protected]> wrote:
>
> On Thu, 2008-08-07 at 14:10 -0700, Andrew Morton wrote:
> > On Thu, 07 Aug 2008 19:40:31 +0800
> > Ian Kent <[email protected]> wrote:
> > >
> > > Subject: [PATCH 4/4] autofs4 - add miscelaneous device for ioctls
> >
> > I fixed that spello
> >
> > > Patch to add a miscellaneous device to the autofs4 module for routing
> > > ioctls. This provides the ability to obtain an ioctl file handle for
> > > an autofs mount point that is possibly covered by another mount.
> > >
> > > The actual problem with autofs is that it can't reconnect to existing
> > > mounts. Immediately one things of just adding the ability to remount
> > > autofs file systems would solve it, but alas, that can't work. This is
> > > because autofs direct mounts and the implementation of "on demand mount
> > > and expire" of nested mount trees have the file system mounted on top of
> > > the mount trigger dentry.
> > >
> > > To resolve this a miscellaneous device node for routing ioctl commands
> > > to these mount points has been implemented in the autofs4 kernel module
> > > and a library added to autofs. This provides the ability to open a file
> > > descriptor for these over mounted autofs mount points.
> > >
> > > Please refer to Documentation/filesystems/autofs4-mount-control.txt for
> > > a discussion of the problem, implementation alternatives considered and
> > > a description of the interface.
> > >
> > >
> > > ...
> > >
> > > +
> > > +#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
> > > +
> > > +typedef int (*ioctl_fn)(struct file *,
> > > +struct autofs_sb_info *, struct autofs_dev_ioctl *);
> >
> > Weird layout, which I fixed.
>
> Auuuhh .. didn't want to do this. I personally like declarations like
> this to be on a single line but checkpatch.pl complained about it.
Well, that's why it's a checkpatch warning, not an error. The way I
look at checkpatch is that it is for detecting possible mistakes. Did
you _really_ mean to do that? Usually the answer is "nope, and I
wouldn't have noticed unless you told me". But sometimes the answer is
"yes, I really did mean that".
I routinely ignore checkpatch 80-col warnings, unless they are flagging
something which is just egregiously revolting.
Anyway, that's an aside. Given that you decided to fit that typedef
into 80 cols, the way you did it was weird ;)
> > > +static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
> > > +{
> > > + struct files_struct *files = current->files;
> > > + struct fdtable *fdt;
> > > +
> > > + spin_lock(&files->file_lock);
> > > + fdt = files_fdtable(files);
> > > + rcu_assign_pointer(fdt->fd[fd], file);
> > > + FD_SET(fd, fdt->close_on_exec);
> > > + spin_unlock(&files->file_lock);
> > > +}
> >
> > urgh, it's bad to have done a copy-n-paste on fd_install() here. It
> > means that if we go and change the locking in core kernel (quite
> > possible) then people won't udpate this code.
> >
> > Do we have alternative here? Can we set close_on_exec outside the lock
> > and just call fd_install()? Probably not.
> >
> > Can we export set_close_on_exec() then call that after having called
> > fd_install()? I think so.
> >
> > But not this, please.
>
> No problem.
> You mentioned this last time as well.
>
> Since there are a couple of possible approaches and I wasn't sure which
> way to go I thought I'd post it as is and get feedback then make it a
> followup patch.
>
> Could the pthreads user space daemon exec something between fd_install()
> and set_close_on_exec()?
Gee, I don't know, it would depend on the context.
Is that a private file*? Was it just created, and is there no
possibility that any other thread can be sharing it anyway? If so,
there's no problem.
> Perhaps there are some other alternative approaches to this.
>
> Suggestions?
I don't know enough about autofs nor about what problem you're
attacking here to be able to say, sorry. I don't even know why
close_on_exec is being set?
On Fri, 2008-08-08 at 11:25 +0800, Ian Kent wrote:
> On Thu, 2008-08-07 at 13:46 -0700, Andrew Morton wrote:
> > On Thu, 07 Aug 2008 19:40:14 +0800
> > Ian Kent <[email protected]> wrote:
> >
> > > Patch to track the uid and gid of the last process to request a mount
> > > for on an autofs dentry.
> >
> > pet peeve: changelog should not tell the reader that this is a "patch".
> > Because when someone is reading the changelog in the git repository,
> > they hopefully already know that.
> >
> > > Signed-off-by: Ian Kent <[email protected]>
> > >
> > > ---
> > >
> > > fs/autofs4/autofs_i.h | 3 +++
> > > fs/autofs4/inode.c | 2 ++
> > > fs/autofs4/waitq.c | 34 ++++++++++++++++++++++++++++++++++
> > > 3 files changed, 39 insertions(+), 0 deletions(-)
> > >
> > >
> > > diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
> > > index ea024d8..fa76d18 100644
> > > --- a/fs/autofs4/autofs_i.h
> > > +++ b/fs/autofs4/autofs_i.h
> > > @@ -63,6 +63,9 @@ struct autofs_info {
> > > unsigned long last_used;
> > > atomic_t count;
> > >
> > > + uid_t uid;
> > > + gid_t gid;
> > > +
> > > mode_t mode;
> > > size_t size;
> > >
> > > diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
> > > index 9ca2d07..9408507 100644
> > > --- a/fs/autofs4/inode.c
> > > +++ b/fs/autofs4/inode.c
> > > @@ -53,6 +53,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
> > > atomic_set(&ino->count, 0);
> > > }
> > >
> > > + ino->uid = 0;
> > > + ino->gid = 0;
> > > ino->mode = mode;
> > > ino->last_used = jiffies;
> > >
> > > diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
> > > index 6d87bb1..7c60c0b 100644
> > > --- a/fs/autofs4/waitq.c
> > > +++ b/fs/autofs4/waitq.c
> > > @@ -457,6 +457,40 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
> > >
> > > status = wq->status;
> > >
> > > + /*
> > > + * For direct and offset mounts we need to track the requestrer
> >
> > typo which I'll fix.
> >
> > > + * uid and gid in the dentry info struct. This is so it can be
> > > + * supplied, on request, by the misc device ioctl interface.
> > > + * This is needed during daemon resatart when reconnecting
> > > + * to existing, active, autofs mounts. The uid and gid (and
> > > + * related string values) may be used for macro substitution
> > > + * in autofs mount maps.
> > > + */
> > > + if (!status) {
> > > + struct autofs_info *ino;
> > > + struct dentry *de = NULL;
> > > +
> > > + /* direct mount or browsable map */
> > > + ino = autofs4_dentry_ino(dentry);
> > > + if (!ino) {
> > > + /* If not lookup actual dentry used */
> > > + de = d_lookup(dentry->d_parent, &dentry->d_name);
> > > + if (de)
> > > + ino = autofs4_dentry_ino(de);
> > > + }
> > > +
> > > + /* Set mount requester */
> > > + if (ino) {
> > > + spin_lock(&sbi->fs_lock);
> > > + ino->uid = wq->uid;
> > > + ino->gid = wq->gid;
> > > + spin_unlock(&sbi->fs_lock);
> > > + }
> > > +
> > > + if (de)
> > > + dput(de);
> > > + }
> > > +
> >
> > Please remind me again why autofs's use of current->uid and
> > current->gid is not busted in the presence of PID namespaces, where
> > these things are no longer system-wide unique?
>
> We didn't really get to the bottom of that last time I posted this but
> hopefully Serge will get right on this and we can sort out what, idf
> anything I need to do.
But, at this stage, I believe that if namespaces are being used then
there would also need to be a daemon within the container which would
have the same namespace view as other processes in the container. I
expect that the situation is not nearly as straight forward as I think
so I'll need to wait for further comments.
Ian
On Thu, 2008-08-07 at 22:31 -0700, Andrew Morton wrote:
> On Fri, 08 Aug 2008 11:39:19 +0800 Ian Kent <[email protected]> wrote:
>
> >
> > On Thu, 2008-08-07 at 14:10 -0700, Andrew Morton wrote:
> > > On Thu, 07 Aug 2008 19:40:31 +0800
> > > Ian Kent <[email protected]> wrote:
> > > >
> > > > Subject: [PATCH 4/4] autofs4 - add miscelaneous device for ioctls
> > >
> > > I fixed that spello
> > >
> > > > Patch to add a miscellaneous device to the autofs4 module for routing
> > > > ioctls. This provides the ability to obtain an ioctl file handle for
> > > > an autofs mount point that is possibly covered by another mount.
> > > >
> > > > The actual problem with autofs is that it can't reconnect to existing
> > > > mounts. Immediately one things of just adding the ability to remount
> > > > autofs file systems would solve it, but alas, that can't work. This is
> > > > because autofs direct mounts and the implementation of "on demand mount
> > > > and expire" of nested mount trees have the file system mounted on top of
> > > > the mount trigger dentry.
> > > >
> > > > To resolve this a miscellaneous device node for routing ioctl commands
> > > > to these mount points has been implemented in the autofs4 kernel module
> > > > and a library added to autofs. This provides the ability to open a file
> > > > descriptor for these over mounted autofs mount points.
> > > >
> > > > Please refer to Documentation/filesystems/autofs4-mount-control.txt for
> > > > a discussion of the problem, implementation alternatives considered and
> > > > a description of the interface.
> > > >
> > > >
> > > > ...
> > > >
> > > > +
> > > > +#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
> > > > +
> > > > +typedef int (*ioctl_fn)(struct file *,
> > > > +struct autofs_sb_info *, struct autofs_dev_ioctl *);
> > >
> > > Weird layout, which I fixed.
> >
> > Auuuhh .. didn't want to do this. I personally like declarations like
> > this to be on a single line but checkpatch.pl complained about it.
>
> Well, that's why it's a checkpatch warning, not an error. The way I
> look at checkpatch is that it is for detecting possible mistakes. Did
> you _really_ mean to do that? Usually the answer is "nope, and I
> wouldn't have noticed unless you told me". But sometimes the answer is
> "yes, I really did mean that".
>
> I routinely ignore checkpatch 80-col warnings, unless they are flagging
> something which is just egregiously revolting.
>
> Anyway, that's an aside. Given that you decided to fit that typedef
> into 80 cols, the way you did it was weird ;)
>
> > > > +static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
> > > > +{
> > > > + struct files_struct *files = current->files;
> > > > + struct fdtable *fdt;
> > > > +
> > > > + spin_lock(&files->file_lock);
> > > > + fdt = files_fdtable(files);
> > > > + rcu_assign_pointer(fdt->fd[fd], file);
> > > > + FD_SET(fd, fdt->close_on_exec);
> > > > + spin_unlock(&files->file_lock);
> > > > +}
> > >
> > > urgh, it's bad to have done a copy-n-paste on fd_install() here. It
> > > means that if we go and change the locking in core kernel (quite
> > > possible) then people won't udpate this code.
> > >
> > > Do we have alternative here? Can we set close_on_exec outside the lock
> > > and just call fd_install()? Probably not.
> > >
> > > Can we export set_close_on_exec() then call that after having called
> > > fd_install()? I think so.
> > >
> > > But not this, please.
> >
> > No problem.
> > You mentioned this last time as well.
> >
> > Since there are a couple of possible approaches and I wasn't sure which
> > way to go I thought I'd post it as is and get feedback then make it a
> > followup patch.
> >
> > Could the pthreads user space daemon exec something between fd_install()
> > and set_close_on_exec()?
>
> Gee, I don't know, it would depend on the context.
>
> Is that a private file*? Was it just created, and is there no
> possibility that any other thread can be sharing it anyway? If so,
> there's no problem.
The problem is that autofs threads can exec mount or umount at any time
and we see annoying selinux file descriptor leak security violation
messages. So the point of this is to set the bit at the same time as the
file is inserted into the table.
>
> > Perhaps there are some other alternative approaches to this.
> >
> > Suggestions?
>
> I don't know enough about autofs nor about what problem you're
> attacking here to be able to say, sorry. I don't even know why
> close_on_exec is being set?
OK, sorry.
What I'm really after is:
Should I do this at all, given the above?
If this is sensible, should a parameter be added to fd_insall() to allow
it to be requested at descriptor install or should a new function, say,
fd_install_close_on_exec() be added?
Ian
On Fri, 08 Aug 2008 14:12:21 +0800 Ian Kent <[email protected]> wrote:
> > > No problem.
> > > You mentioned this last time as well.
> > >
> > > Since there are a couple of possible approaches and I wasn't sure which
> > > way to go I thought I'd post it as is and get feedback then make it a
> > > followup patch.
> > >
> > > Could the pthreads user space daemon exec something between fd_install()
> > > and set_close_on_exec()?
> >
> > Gee, I don't know, it would depend on the context.
> >
> > Is that a private file*? Was it just created, and is there no
> > possibility that any other thread can be sharing it anyway? If so,
> > there's no problem.
>
> The problem is that autofs threads can exec mount or umount at any time
> and we see annoying selinux file descriptor leak security violation
> messages. So the point of this is to set the bit at the same time as the
> file is inserted into the table.
>
> >
> > > Perhaps there are some other alternative approaches to this.
> > >
> > > Suggestions?
> >
> > I don't know enough about autofs nor about what problem you're
> > attacking here to be able to say, sorry. I don't even know why
> > close_on_exec is being set?
>
> OK, sorry.
>
> What I'm really after is:
> Should I do this at all, given the above?
I don't reliably know, sorry. <does viro summoning dance>
> If this is sensible, should a parameter be added to fd_insall() to allow
> it to be requested at descriptor install or should a new function, say,
> fd_install_close_on_exec() be added?
If we decide to do it this way, then we can add an extra arg to
fd_install(), I guess.
void fd_install(unsigned int fd, struct file *file,
void (*callback)(struct file *));
Quoting Ian Kent ([email protected]):
>
> On Fri, 2008-08-08 at 11:48 +0800, Ian Kent wrote:
> > > >
> > > > Please remind me again why autofs's use of current->uid and
> > > > current->gid is not busted in the presence of PID namespaces, where
> > > > these things are no longer system-wide unique?
> > >
> > > I actually don't see what the autofs4_waitq->pid is used for. It's
> > > copied from current into wq->pid at autofs4_wait, and into a packet to
> > > send to userspace (I assume) at autofs4_notify_daemon.
> > >
> > > So as long as a daemon can serve multiple pid namespaces (which
> > > doubtless it can), the pid could be confusing (or erroneous) for the
> > > daemon.
> >
> > Your point is well taken.
> >
> > The pid is used purely for logging purposes to aid in debugging in user
> > space. I'm not sure it is worth worrying about it too much as the daemon
> > has no business interfering with user space processes it is not the
> > owner of.
> >
> > >
> > > If I'm remotely right about how the pid is being used, then the thing to
> > > do would be to
> > > 1. store the daemon's pid namespace (would that belong in
> > > the autofs_sb_info?)
> >
> > Yep.
> >
> > > 2. store the task_pid(current) in the waitqueue
> > > 3. retrieve the pid_t for the waiting task in the daemon's
> > > pid namespace, and put that into the packet at
> > > autofs4_notify_daemon.
> > >
> > > I realize this patch was about the *uids*, but the pids seem more
> > > urgent.
> >
> > OK, I get it.
> > I'll have a go at doing this for completeness.
>
> On second thoughts I'm not sure about this.
>
> The pid that is logged needs to relate to a process in the name space of
> the one that caused the mount to be done.
>
> For example, suppose a GUI user finds mounts never expiring, then we get
> a debug log to try and identify the culprit. So the pid should
> correspond to a process that the user sees (So I guess in the namespace
> of that user).
>
> This is the only reason I added the pid to the request packet in the
> first place.
>
> Please correct me if my understanding of this is not right.
It's not wrong, but we just have to think through which value is the
most useful.
Any process executing clone(CLONE_NEWPID) (with CAP_SYS_ADMIN) can start
an application in a new pid namespace. So imagine the user at the
desktop clicking some button which runs an application in a new pid
namespace. Now if the user starts an xterm and runs ps -ef, the pid
values he'll see for the tasks in that new namespace will not be the
same as those which the application sees for itself, and not the same as
those which, right now, autofs would report.
For instance, if I start a shell in a new pid namespace, then within the
new pid namespace ps -ef gives me:
sh-3.2# ps -ef
UID PID PPID C STIME TTY TIME CMD
root 1 0 0 10:54 pts/1 00:00:00 /bin/sh
root 5 1 0 10:54 pts/1 00:00:00 /bin/sleep 100
root 6 1 0 10:54 pts/1 00:00:00 ps -ef
but from another shell as the same user, partial output of ps -ef
gives me:
root 2877 2876 0 10:54 pts/1 00:00:00 /bin/sh
root 2881 2877 0 10:54 pts/1 00:00:00 /bin/sleep 100
And so what we're trying to decide is whether autofs should send
pid 5 or pid 2881 for a message about the "/bin/sleep 100" task.
In fact, if the user clicks that button twice, chances are both
instances of the application will have the same pid values for each
process in the application. So now if autofs sends a message to the
user about the application, the user cannot tell which process is at
fault.
Autofs will be sending the user some message about 'process 5'. The
user won't know whether it means "the real" pid 5, [watchdog/0],
pid 5 in the first instance of the application, or pid 5 in the
second instance.
Now it's true that the user's xterm may still be in a different
(descendent) pidns of the autofs daemon. But we can't expect
the autofs daemon to do pid_t translation for the user, so I
think what we have to aim for is making sure that the values
reported are unique within the pidns of the autofs daemon. And
that means sending back either the pid values in the autofs
daemon's pid namespace, or using the top-level pid_ts, that is,
the pid values in the init namespace, which will be unique
on the whole system.
Sorry this turned out long-winded, I hope it makes sense.
And if I'm just showing a misunderstanding of what you're doing,
please do correct me :)
thanks,
-serge
Quoting Ian Kent ([email protected]):
>
> On Thu, 2008-08-07 at 17:15 -0500, Serge E. Hallyn wrote:
> > Quoting Andrew Morton ([email protected]):
> > > On Thu, 07 Aug 2008 19:40:14 +0800
> > > Ian Kent <[email protected]> wrote:
> > >
> > > > Patch to track the uid and gid of the last process to request a mount
> > > > for on an autofs dentry.
> > >
> > > pet peeve: changelog should not tell the reader that this is a "patch".
> > > Because when someone is reading the changelog in the git repository,
> > > they hopefully already know that.
> > >
> > > > Signed-off-by: Ian Kent <[email protected]>
> > > >
> > > > ---
> > > >
> > > > fs/autofs4/autofs_i.h | 3 +++
> > > > fs/autofs4/inode.c | 2 ++
> > > > fs/autofs4/waitq.c | 34 ++++++++++++++++++++++++++++++++++
> > > > 3 files changed, 39 insertions(+), 0 deletions(-)
> > > >
> > > >
> > > > diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
> > > > index ea024d8..fa76d18 100644
> > > > --- a/fs/autofs4/autofs_i.h
> > > > +++ b/fs/autofs4/autofs_i.h
> > > > @@ -63,6 +63,9 @@ struct autofs_info {
> > > > unsigned long last_used;
> > > > atomic_t count;
> > > >
> > > > + uid_t uid;
> > > > + gid_t gid;
> > > > +
> > > > mode_t mode;
> > > > size_t size;
> > > >
> > > > diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
> > > > index 9ca2d07..9408507 100644
> > > > --- a/fs/autofs4/inode.c
> > > > +++ b/fs/autofs4/inode.c
> > > > @@ -53,6 +53,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
> > > > atomic_set(&ino->count, 0);
> > > > }
> > > >
> > > > + ino->uid = 0;
> > > > + ino->gid = 0;
> > > > ino->mode = mode;
> > > > ino->last_used = jiffies;
> > > >
> > > > diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
> > > > index 6d87bb1..7c60c0b 100644
> > > > --- a/fs/autofs4/waitq.c
> > > > +++ b/fs/autofs4/waitq.c
> > > > @@ -457,6 +457,40 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
> > > >
> > > > status = wq->status;
> > > >
> > > > + /*
> > > > + * For direct and offset mounts we need to track the requestrer
> > >
> > > typo which I'll fix.
> > >
> > > > + * uid and gid in the dentry info struct. This is so it can be
> > > > + * supplied, on request, by the misc device ioctl interface.
> > > > + * This is needed during daemon resatart when reconnecting
> > > > + * to existing, active, autofs mounts. The uid and gid (and
> > > > + * related string values) may be used for macro substitution
> > > > + * in autofs mount maps.
> >
> > Hi Ian,
> >
> > could you say just a few more words on these macro substitution?
> >
> > I think your use of uids can completely ignore user namespaces, but it
> > depends on who does the macro substitutions and how...
>
> Suppose we have an autofs map entry in /etc/auto.master:
> /test /etc/auto.test
>
> and in /etc/auto.test
>
> im1 /om1 server:/dir1 \
> /om2/$USER server2:/dir2
>
> Then if this is automounted and the daemon is sent a SIGKILL and started
> again we need the uid number that was used when this was mounted to
> re-construct the offsets (/om2/$USER in this case needs the
> substitution). The uid is used to lookup the user name.
Based on that I can't quite tell whether there is any security property
that could be violated by uid 500 'tricking' the autofs daemon into
doing something for uid 0. It sounds like it could be annoying but
not a security violation?
> Can you point me in the right direction wrt. to namespaces on this
> please Serge? We didn't really get to the bottom of it last time I
> think.
Well user namespaces are still being implemented, in fact at some level
still being designed. So it doesn't hurt to consider the right thing
to do right off the bat, but unlike the pid namespaces this really shouldn't
be hurting anyone right now.
User namespaces will be hierarchical - like pid namespaces, except
somewhat differently. If user 500 in userns 0 creates a new user
namespace, then each userid in the new user namespace is also
'owned' by (500,0). Our hope is that we can make the userns
core such that unprivileged users can safely unshare a new pid
namespace. So for that reason, I suspect we'll want to handle
user namespace much like I just suggested handling pid namespaces.
1. For each 'system container', that is, a process set with its
own network stack, its own devices, etc, we'll want separate
autofs daemons. That's simple enough - apart from determining
what qualifies as a 'system container'. Here I think that will
depend on how autofs talks to the userspace daemon, and what
needs to be isolated in order to prevent a surreptitious admin
in one system container from having its autofs daemon talk to
another.
2. Within a system container, I think we'll want the autofs
daemon to store a pointer to its user namespace. When an
autofs4_wait happens, the wq uses the full current->user
to look up the task's uid within the autofs daemon's own user
namespace, and it uses for macro expansions.
That prevents unprivileged user hallyn from creating a new
user namespace and trying to fool autofs into expanding
macros for uid 0.
Note that certainly for now and probably a long time coming,
you do need privilege to clone(CLONE_NEWUSER) so this isn't
urgent.
thanks,
-serge
On Fri, 2008-08-08 at 09:58 -0500, Serge E. Hallyn wrote:
> Quoting Ian Kent ([email protected]):
> >
> > On Fri, 2008-08-08 at 11:48 +0800, Ian Kent wrote:
> > > > >
> > > > > Please remind me again why autofs's use of current->uid and
> > > > > current->gid is not busted in the presence of PID namespaces, where
> > > > > these things are no longer system-wide unique?
> > > >
> > > > I actually don't see what the autofs4_waitq->pid is used for. It's
> > > > copied from current into wq->pid at autofs4_wait, and into a packet to
> > > > send to userspace (I assume) at autofs4_notify_daemon.
> > > >
> > > > So as long as a daemon can serve multiple pid namespaces (which
> > > > doubtless it can), the pid could be confusing (or erroneous) for the
> > > > daemon.
> > >
> > > Your point is well taken.
> > >
> > > The pid is used purely for logging purposes to aid in debugging in user
> > > space. I'm not sure it is worth worrying about it too much as the daemon
> > > has no business interfering with user space processes it is not the
> > > owner of.
> > >
> > > >
> > > > If I'm remotely right about how the pid is being used, then the thing to
> > > > do would be to
> > > > 1. store the daemon's pid namespace (would that belong in
> > > > the autofs_sb_info?)
> > >
> > > Yep.
> > >
> > > > 2. store the task_pid(current) in the waitqueue
> > > > 3. retrieve the pid_t for the waiting task in the daemon's
> > > > pid namespace, and put that into the packet at
> > > > autofs4_notify_daemon.
> > > >
> > > > I realize this patch was about the *uids*, but the pids seem more
> > > > urgent.
> > >
> > > OK, I get it.
> > > I'll have a go at doing this for completeness.
> >
> > On second thoughts I'm not sure about this.
> >
> > The pid that is logged needs to relate to a process in the name space of
> > the one that caused the mount to be done.
> >
> > For example, suppose a GUI user finds mounts never expiring, then we get
> > a debug log to try and identify the culprit. So the pid should
> > correspond to a process that the user sees (So I guess in the namespace
> > of that user).
> >
> > This is the only reason I added the pid to the request packet in the
> > first place.
> >
> > Please correct me if my understanding of this is not right.
>
> It's not wrong, but we just have to think through which value is the
> most useful.
>
> Any process executing clone(CLONE_NEWPID) (with CAP_SYS_ADMIN) can start
> an application in a new pid namespace. So imagine the user at the
> desktop clicking some button which runs an application in a new pid
> namespace. Now if the user starts an xterm and runs ps -ef, the pid
> values he'll see for the tasks in that new namespace will not be the
> same as those which the application sees for itself, and not the same as
> those which, right now, autofs would report.
>
> For instance, if I start a shell in a new pid namespace, then within the
> new pid namespace ps -ef gives me:
>
> sh-3.2# ps -ef
> UID PID PPID C STIME TTY TIME CMD
> root 1 0 0 10:54 pts/1 00:00:00 /bin/sh
> root 5 1 0 10:54 pts/1 00:00:00 /bin/sleep 100
> root 6 1 0 10:54 pts/1 00:00:00 ps -ef
>
> but from another shell as the same user, partial output of ps -ef
> gives me:
>
> root 2877 2876 0 10:54 pts/1 00:00:00 /bin/sh
> root 2881 2877 0 10:54 pts/1 00:00:00 /bin/sleep 100
>
> And so what we're trying to decide is whether autofs should send
> pid 5 or pid 2881 for a message about the "/bin/sleep 100" task.
>
> In fact, if the user clicks that button twice, chances are both
> instances of the application will have the same pid values for each
> process in the application. So now if autofs sends a message to the
> user about the application, the user cannot tell which process is at
> fault.
>
> Autofs will be sending the user some message about 'process 5'. The
> user won't know whether it means "the real" pid 5, [watchdog/0],
> pid 5 in the first instance of the application, or pid 5 in the
> second instance.
>
> Now it's true that the user's xterm may still be in a different
> (descendent) pidns of the autofs daemon. But we can't expect
> the autofs daemon to do pid_t translation for the user, so I
> think what we have to aim for is making sure that the values
> reported are unique within the pidns of the autofs daemon. And
> that means sending back either the pid values in the autofs
> daemon's pid namespace, or using the top-level pid_ts, that is,
> the pid values in the init namespace, which will be unique
> on the whole system.
>
> Sorry this turned out long-winded, I hope it makes sense.
> And if I'm just showing a misunderstanding of what you're doing,
> please do correct me :)
Yes, it's a bit tricky.
In reality this information is only logged when debug logging is enabled
and generally is only used by myself or others that maintain autofs. But
getting sensible logging is important so it's worth sorting this out.
I think it would be best to use the pid of the highest view namespace
which I think is the gist of what you said in the beginning. Then (at
some future time), if there was a user space API, the daemon could
report additional pid information related to subordinate pid namespaces.
I am assuming that, to be useful, an autofs daemon that is able to serve
multiple namespaces would be higher up in the tree. But the forgoing
description sounds like there's not a necessarily a hierarchic structure
to pid namespaces?
The other issue that comes up is, after storing (a reference to) the
daemon namespace in the super block info struct a "kill -9" on the
daemon would render the namespace invalid. At the moment, when this
happens the write on the kernel pipe fails causing the autofs mount to
become catatonic, but for the namespace aware case the namespace is now
invalid so we won't get that far. I could make the mount catatonic when
I detect that the namespace has become invalid but I'm not sure how to
check it. Is there a way I can to do this? Would there be any issues
with namespace (pid) reuse for such a check?
Sorry to seem so thick about this but the implications of namespaces for
autofs are quite new to me and it is important that autofs "plays well
with others" as the development progresses.
Anyway, assuming I'm on the right track I'll poke around further and see
if I can work out how I to do this.
Ian
On Thu, Aug 07, 2008 at 07:40:06PM +0800, Ian Kent wrote:
> Usage of the AUTOFS_TYPE_* defines is a little confusing and
> appears inconsistent.
Not enough of an explanation for this patch.
It does:
- move AUTOFS_TYPE_INDIRECT/AUTOFS_TYPE_DIRECT/AUTOFS_TYPE_OFFSET
to include/linux/auto_fs4.h. Obviously okay but should mentioned
in the changelog.
- add a new, unused AUTOFS_TYPE_ANY constant. Why?
- add a new AUTOFS_TYPE_TRIGGER and use it instead of
AUTOFS_TYPE_DIRECT or in one single case
(AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET). Definitively needs a good
explanation.
- replaces one use of sbi->type = 0 with AUTOFS_TYPE_INDIRECT (0x0001).
Why?
On Thu, Aug 07, 2008 at 07:40:31PM +0800, Ian Kent wrote:
> The actual problem with autofs is that it can't reconnect to existing
> mounts. Immediately one things of just adding the ability to remount
> autofs file systems would solve it, but alas, that can't work. This is
> because autofs direct mounts and the implementation of "on demand mount
> and expire" of nested mount trees have the file system mounted on top of
> the mount trigger dentry.
So what you really need instead of all the ioctl cruft is to get access
to the sb of the hidden autofs4 mount. One way to do that that I can
think of right now is to change from using get_sb_nodev as ->get_sb
to a variant that can find an existing superblock using some mount
options.
On Thu, Aug 07, 2008 at 07:40:23PM +0800, Ian Kent wrote:
> Patch to add documentation for the miscellaneous device module of
> autofs4.
Any documentation should preferably stay withthe patch introducing
the interface. But hopefully most of this won't be needed anyway
with the proper solution.
Quoting Ian Kent ([email protected]):
>
> On Fri, 2008-08-08 at 09:58 -0500, Serge E. Hallyn wrote:
> > Quoting Ian Kent ([email protected]):
> > >
> > > On Fri, 2008-08-08 at 11:48 +0800, Ian Kent wrote:
> > > > > >
> > > > > > Please remind me again why autofs's use of current->uid and
> > > > > > current->gid is not busted in the presence of PID namespaces, where
> > > > > > these things are no longer system-wide unique?
> > > > >
> > > > > I actually don't see what the autofs4_waitq->pid is used for. It's
> > > > > copied from current into wq->pid at autofs4_wait, and into a packet to
> > > > > send to userspace (I assume) at autofs4_notify_daemon.
> > > > >
> > > > > So as long as a daemon can serve multiple pid namespaces (which
> > > > > doubtless it can), the pid could be confusing (or erroneous) for the
> > > > > daemon.
> > > >
> > > > Your point is well taken.
> > > >
> > > > The pid is used purely for logging purposes to aid in debugging in user
> > > > space. I'm not sure it is worth worrying about it too much as the daemon
> > > > has no business interfering with user space processes it is not the
> > > > owner of.
> > > >
> > > > >
> > > > > If I'm remotely right about how the pid is being used, then the thing to
> > > > > do would be to
> > > > > 1. store the daemon's pid namespace (would that belong in
> > > > > the autofs_sb_info?)
> > > >
> > > > Yep.
> > > >
> > > > > 2. store the task_pid(current) in the waitqueue
> > > > > 3. retrieve the pid_t for the waiting task in the daemon's
> > > > > pid namespace, and put that into the packet at
> > > > > autofs4_notify_daemon.
> > > > >
> > > > > I realize this patch was about the *uids*, but the pids seem more
> > > > > urgent.
> > > >
> > > > OK, I get it.
> > > > I'll have a go at doing this for completeness.
> > >
> > > On second thoughts I'm not sure about this.
> > >
> > > The pid that is logged needs to relate to a process in the name space of
> > > the one that caused the mount to be done.
> > >
> > > For example, suppose a GUI user finds mounts never expiring, then we get
> > > a debug log to try and identify the culprit. So the pid should
> > > correspond to a process that the user sees (So I guess in the namespace
> > > of that user).
> > >
> > > This is the only reason I added the pid to the request packet in the
> > > first place.
> > >
> > > Please correct me if my understanding of this is not right.
> >
> > It's not wrong, but we just have to think through which value is the
> > most useful.
> >
> > Any process executing clone(CLONE_NEWPID) (with CAP_SYS_ADMIN) can start
> > an application in a new pid namespace. So imagine the user at the
> > desktop clicking some button which runs an application in a new pid
> > namespace. Now if the user starts an xterm and runs ps -ef, the pid
> > values he'll see for the tasks in that new namespace will not be the
> > same as those which the application sees for itself, and not the same as
> > those which, right now, autofs would report.
> >
> > For instance, if I start a shell in a new pid namespace, then within the
> > new pid namespace ps -ef gives me:
> >
> > sh-3.2# ps -ef
> > UID PID PPID C STIME TTY TIME CMD
> > root 1 0 0 10:54 pts/1 00:00:00 /bin/sh
> > root 5 1 0 10:54 pts/1 00:00:00 /bin/sleep 100
> > root 6 1 0 10:54 pts/1 00:00:00 ps -ef
> >
> > but from another shell as the same user, partial output of ps -ef
> > gives me:
> >
> > root 2877 2876 0 10:54 pts/1 00:00:00 /bin/sh
> > root 2881 2877 0 10:54 pts/1 00:00:00 /bin/sleep 100
> >
> > And so what we're trying to decide is whether autofs should send
> > pid 5 or pid 2881 for a message about the "/bin/sleep 100" task.
> >
> > In fact, if the user clicks that button twice, chances are both
> > instances of the application will have the same pid values for each
> > process in the application. So now if autofs sends a message to the
> > user about the application, the user cannot tell which process is at
> > fault.
> >
> > Autofs will be sending the user some message about 'process 5'. The
> > user won't know whether it means "the real" pid 5, [watchdog/0],
> > pid 5 in the first instance of the application, or pid 5 in the
> > second instance.
> >
> > Now it's true that the user's xterm may still be in a different
> > (descendent) pidns of the autofs daemon. But we can't expect
> > the autofs daemon to do pid_t translation for the user, so I
> > think what we have to aim for is making sure that the values
> > reported are unique within the pidns of the autofs daemon. And
> > that means sending back either the pid values in the autofs
> > daemon's pid namespace, or using the top-level pid_ts, that is,
> > the pid values in the init namespace, which will be unique
> > on the whole system.
> >
> > Sorry this turned out long-winded, I hope it makes sense.
> > And if I'm just showing a misunderstanding of what you're doing,
> > please do correct me :)
>
> Yes, it's a bit tricky.
>
> In reality this information is only logged when debug logging is enabled
> and generally is only used by myself or others that maintain autofs. But
> getting sensible logging is important so it's worth sorting this out.
>
> I think it would be best to use the pid of the highest view namespace
> which I think is the gist of what you said in the beginning. Then (at
> some future time), if there was a user space API, the daemon could
> report additional pid information related to subordinate pid namespaces.
> I am assuming that, to be useful, an autofs daemon that is able to serve
> multiple namespaces would be higher up in the tree. But the forgoing
> description sounds like there's not a necessarily a hierarchic structure
> to pid namespaces?
It is a simple tree, but of course if you have 3 autofs daemons in
separate containers, and one on the main host, then the pid namespaces for
each of the 3 container autofs daemons will be siblings, so they won't
have meaningful translations for each others' pids, while pids in
each container will have meaningful translations in the host system's
pid namespace (the init_pid_ns).
> The other issue that comes up is, after storing (a reference to) the
> daemon namespace in the super block info struct a "kill -9" on the
> daemon would render the namespace invalid. At the moment, when this
> happens the write on the kernel pipe fails causing the autofs mount to
> become catatonic, but for the namespace aware case the namespace is now
> invalid so we won't get that far. I could make the mount catatonic when
I figured you would grab a reference to the pid namespace, so it
wouldn't go away until you released the superblock or registered a new
daemon for it.
> I detect that the namespace has become invalid but I'm not sure how to
> check it. Is there a way I can to do this? Would there be any issues
> with namespace (pid) reuse for such a check?
I'm not sure what you mean - but struct pid is never reused so long
as you have a reference to one. So you could store
get_pid(task_pid(autofs_daemon)) and check whether the autofs daemon's
pid has changed that way, I suppose. But grabbing a struct pid reference
does not pin the pid_ns iirc.
Maybe you're right about just getting the top-level pid_nr.
> Sorry to seem so thick about this but the implications of namespaces for
> autofs are quite new to me and it is important that autofs "plays well
> with others" as the development progresses.
>
> Anyway, assuming I'm on the right track I'll poke around further and see
> if I can work out how I to do this.
Yes I think you're on the right track. And registering the top-level
pid may be the simplest thing to do. You'd get that by grabbing
pid_nr(task_pid(task)). Then you don't have to do any sort of pid
namespace storing at all.
The other approach has its appeal, but maybe it's not worth the
complications at this point.
thanks,
-serge
On Sat, 2008-08-09 at 08:47 -0400, Christoph Hellwig wrote:
> On Thu, Aug 07, 2008 at 07:40:06PM +0800, Ian Kent wrote:
> > Usage of the AUTOFS_TYPE_* defines is a little confusing and
> > appears inconsistent.
>
> Not enough of an explanation for this patch.
OK, I'll re-submit this with an updated description.
>
> It does:
>
> - move AUTOFS_TYPE_INDIRECT/AUTOFS_TYPE_DIRECT/AUTOFS_TYPE_OFFSET
> to include/linux/auto_fs4.h. Obviously okay but should mentioned
> in the changelog.
So that user space can use consistent definitions.
> - add a new, unused AUTOFS_TYPE_ANY constant. Why?
It is used by the later ioctl implementation patch.
> - add a new AUTOFS_TYPE_TRIGGER and use it instead of
> AUTOFS_TYPE_DIRECT or in one single case
> (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET). Definitively needs a good
> explanation.
The direct and offset autofs mount types are, to a large extent, treated
the same way in the kernel module but are used a little differently in
user space. Previously, an offset mount had it's type set to
AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET but, the subsequent ioctl
implementation patch needs to be able to identify all three types of
autofs mount distinctly.
> - replaces one use of sbi->type = 0 with AUTOFS_TYPE_INDIRECT (0x0001).
> Why?
Previously, a type of 0 was implicitly assumed to be type
AUTOFS_TYPE_INDIRECT but that was not clear and doesn't lend itself to
clearly defined logic when trying to locate a specific type of autofs
mount in the later patch in the series.
Ian
On Sat, 2008-08-09 at 08:59 -0400, Christoph Hellwig wrote:
> On Thu, Aug 07, 2008 at 07:40:31PM +0800, Ian Kent wrote:
> > The actual problem with autofs is that it can't reconnect to existing
> > mounts. Immediately one things of just adding the ability to remount
> > autofs file systems would solve it, but alas, that can't work. This is
> > because autofs direct mounts and the implementation of "on demand mount
> > and expire" of nested mount trees have the file system mounted on top of
> > the mount trigger dentry.
>
> So what you really need instead of all the ioctl cruft is to get access
> to the sb of the hidden autofs4 mount. One way to do that that I can
> think of right now is to change from using get_sb_nodev as ->get_sb
> to a variant that can find an existing superblock using some mount
> options.
I wished I'd spoken to you about this a long time ago since you seem to
know how I can do this using a remount option but this isn't enough of
an explanation of how it can be done.
AFAICT, mount(8) will never be able to get a "struct path" or "struct
nameidata" to a mount point that is covered using the path lookup in
fs/namespace.c:do_*_mount() so we'll never be called back via any VFS
methods. So tell me more about how this could work please!
Ian
On Sat, Aug 09, 2008 at 11:29:09PM +0800, Ian Kent wrote:
>
> On Sat, 2008-08-09 at 08:59 -0400, Christoph Hellwig wrote:
> > On Thu, Aug 07, 2008 at 07:40:31PM +0800, Ian Kent wrote:
> > > The actual problem with autofs is that it can't reconnect to existing
> > > mounts. Immediately one things of just adding the ability to remount
> > > autofs file systems would solve it, but alas, that can't work. This is
> > > because autofs direct mounts and the implementation of "on demand mount
> > > and expire" of nested mount trees have the file system mounted on top of
> > > the mount trigger dentry.
> >
> > So what you really need instead of all the ioctl cruft is to get access
> > to the sb of the hidden autofs4 mount. One way to do that that I can
> > think of right now is to change from using get_sb_nodev as ->get_sb
> > to a variant that can find an existing superblock using some mount
> > options.
>
> I wished I'd spoken to you about this a long time ago since you seem to
> know how I can do this using a remount option but this isn't enough of
> an explanation of how it can be done.
>
> AFAICT, mount(8) will never be able to get a "struct path" or "struct
> nameidata" to a mount point that is covered using the path lookup in
> fs/namespace.c:do_*_mount() so we'll never be called back via any VFS
> methods. So tell me more about how this could work please!
Your introduction tells what you want to do is a remount, for which
finding the superblock is enough. If that's not the actual requirement
please explain the requirement. And don't point me to the gazillion
ioctls you added, that seems to mostly not have much to do with that..
On Sat, 2008-08-09 at 13:18 -0400, Christoph Hellwig wrote:
> On Sat, Aug 09, 2008 at 11:29:09PM +0800, Ian Kent wrote:
> >
> > On Sat, 2008-08-09 at 08:59 -0400, Christoph Hellwig wrote:
> > > On Thu, Aug 07, 2008 at 07:40:31PM +0800, Ian Kent wrote:
> > > > The actual problem with autofs is that it can't reconnect to existing
> > > > mounts. Immediately one things of just adding the ability to remount
> > > > autofs file systems would solve it, but alas, that can't work. This is
> > > > because autofs direct mounts and the implementation of "on demand mount
> > > > and expire" of nested mount trees have the file system mounted on top of
> > > > the mount trigger dentry.
> > >
> > > So what you really need instead of all the ioctl cruft is to get access
> > > to the sb of the hidden autofs4 mount. One way to do that that I can
> > > think of right now is to change from using get_sb_nodev as ->get_sb
> > > to a variant that can find an existing superblock using some mount
> > > options.
> >
> > I wished I'd spoken to you about this a long time ago since you seem to
> > know how I can do this using a remount option but this isn't enough of
> > an explanation of how it can be done.
> >
> > AFAICT, mount(8) will never be able to get a "struct path" or "struct
> > nameidata" to a mount point that is covered using the path lookup in
> > fs/namespace.c:do_*_mount() so we'll never be called back via any VFS
> > methods. So tell me more about how this could work please!
>
> Your introduction tells what you want to do is a remount, for which
> finding the superblock is enough. If that's not the actual requirement
> please explain the requirement. And don't point me to the gazillion
> ioctls you added, that seems to mostly not have much to do with that..
The text you've quoted from provides quite a detailed explanation of
this and my response above to your question also notes why this cannot
be done in this case.
Perhaps you missed the fact that the mount in question is covered by
another mount, as noted in the quoted text and in my answer, which is
why a mount with a remount option cannot work. Or perhaps your assuming
the covering mount is also an autofs mount which may be the case but
generally is not.
Quoting Serge E. Hallyn ([email protected]):
> Quoting Ian Kent ([email protected]):
> >
> > On Fri, 2008-08-08 at 09:58 -0500, Serge E. Hallyn wrote:
> > > Quoting Ian Kent ([email protected]):
> > > >
> > > > On Fri, 2008-08-08 at 11:48 +0800, Ian Kent wrote:
> > > > > > >
> > > > > > > Please remind me again why autofs's use of current->uid and
> > > > > > > current->gid is not busted in the presence of PID namespaces, where
> > > > > > > these things are no longer system-wide unique?
> > > > > >
> > > > > > I actually don't see what the autofs4_waitq->pid is used for. It's
> > > > > > copied from current into wq->pid at autofs4_wait, and into a packet to
> > > > > > send to userspace (I assume) at autofs4_notify_daemon.
> > > > > >
> > > > > > So as long as a daemon can serve multiple pid namespaces (which
> > > > > > doubtless it can), the pid could be confusing (or erroneous) for the
> > > > > > daemon.
> > > > >
> > > > > Your point is well taken.
> > > > >
> > > > > The pid is used purely for logging purposes to aid in debugging in user
> > > > > space. I'm not sure it is worth worrying about it too much as the daemon
> > > > > has no business interfering with user space processes it is not the
> > > > > owner of.
> > > > >
> > > > > >
> > > > > > If I'm remotely right about how the pid is being used, then the thing to
> > > > > > do would be to
> > > > > > 1. store the daemon's pid namespace (would that belong in
> > > > > > the autofs_sb_info?)
> > > > >
> > > > > Yep.
> > > > >
> > > > > > 2. store the task_pid(current) in the waitqueue
> > > > > > 3. retrieve the pid_t for the waiting task in the daemon's
> > > > > > pid namespace, and put that into the packet at
> > > > > > autofs4_notify_daemon.
> > > > > >
> > > > > > I realize this patch was about the *uids*, but the pids seem more
> > > > > > urgent.
> > > > >
> > > > > OK, I get it.
> > > > > I'll have a go at doing this for completeness.
> > > >
> > > > On second thoughts I'm not sure about this.
> > > >
> > > > The pid that is logged needs to relate to a process in the name space of
> > > > the one that caused the mount to be done.
> > > >
> > > > For example, suppose a GUI user finds mounts never expiring, then we get
> > > > a debug log to try and identify the culprit. So the pid should
> > > > correspond to a process that the user sees (So I guess in the namespace
> > > > of that user).
> > > >
> > > > This is the only reason I added the pid to the request packet in the
> > > > first place.
> > > >
> > > > Please correct me if my understanding of this is not right.
> > >
> > > It's not wrong, but we just have to think through which value is the
> > > most useful.
> > >
> > > Any process executing clone(CLONE_NEWPID) (with CAP_SYS_ADMIN) can start
> > > an application in a new pid namespace. So imagine the user at the
> > > desktop clicking some button which runs an application in a new pid
> > > namespace. Now if the user starts an xterm and runs ps -ef, the pid
> > > values he'll see for the tasks in that new namespace will not be the
> > > same as those which the application sees for itself, and not the same as
> > > those which, right now, autofs would report.
> > >
> > > For instance, if I start a shell in a new pid namespace, then within the
> > > new pid namespace ps -ef gives me:
> > >
> > > sh-3.2# ps -ef
> > > UID PID PPID C STIME TTY TIME CMD
> > > root 1 0 0 10:54 pts/1 00:00:00 /bin/sh
> > > root 5 1 0 10:54 pts/1 00:00:00 /bin/sleep 100
> > > root 6 1 0 10:54 pts/1 00:00:00 ps -ef
> > >
> > > but from another shell as the same user, partial output of ps -ef
> > > gives me:
> > >
> > > root 2877 2876 0 10:54 pts/1 00:00:00 /bin/sh
> > > root 2881 2877 0 10:54 pts/1 00:00:00 /bin/sleep 100
> > >
> > > And so what we're trying to decide is whether autofs should send
> > > pid 5 or pid 2881 for a message about the "/bin/sleep 100" task.
> > >
> > > In fact, if the user clicks that button twice, chances are both
> > > instances of the application will have the same pid values for each
> > > process in the application. So now if autofs sends a message to the
> > > user about the application, the user cannot tell which process is at
> > > fault.
> > >
> > > Autofs will be sending the user some message about 'process 5'. The
> > > user won't know whether it means "the real" pid 5, [watchdog/0],
> > > pid 5 in the first instance of the application, or pid 5 in the
> > > second instance.
> > >
> > > Now it's true that the user's xterm may still be in a different
> > > (descendent) pidns of the autofs daemon. But we can't expect
> > > the autofs daemon to do pid_t translation for the user, so I
> > > think what we have to aim for is making sure that the values
> > > reported are unique within the pidns of the autofs daemon. And
> > > that means sending back either the pid values in the autofs
> > > daemon's pid namespace, or using the top-level pid_ts, that is,
> > > the pid values in the init namespace, which will be unique
> > > on the whole system.
> > >
> > > Sorry this turned out long-winded, I hope it makes sense.
> > > And if I'm just showing a misunderstanding of what you're doing,
> > > please do correct me :)
> >
> > Yes, it's a bit tricky.
> >
> > In reality this information is only logged when debug logging is enabled
> > and generally is only used by myself or others that maintain autofs. But
> > getting sensible logging is important so it's worth sorting this out.
> >
> > I think it would be best to use the pid of the highest view namespace
> > which I think is the gist of what you said in the beginning. Then (at
> > some future time), if there was a user space API, the daemon could
> > report additional pid information related to subordinate pid namespaces.
> > I am assuming that, to be useful, an autofs daemon that is able to serve
> > multiple namespaces would be higher up in the tree. But the forgoing
> > description sounds like there's not a necessarily a hierarchic structure
> > to pid namespaces?
>
> It is a simple tree, but of course if you have 3 autofs daemons in
> separate containers, and one on the main host, then the pid namespaces for
> each of the 3 container autofs daemons will be siblings, so they won't
> have meaningful translations for each others' pids, while pids in
> each container will have meaningful translations in the host system's
> pid namespace (the init_pid_ns).
>
> > The other issue that comes up is, after storing (a reference to) the
> > daemon namespace in the super block info struct a "kill -9" on the
> > daemon would render the namespace invalid. At the moment, when this
> > happens the write on the kernel pipe fails causing the autofs mount to
> > become catatonic, but for the namespace aware case the namespace is now
> > invalid so we won't get that far. I could make the mount catatonic when
>
> I figured you would grab a reference to the pid namespace, so it
> wouldn't go away until you released the superblock or registered a new
> daemon for it.
>
> > I detect that the namespace has become invalid but I'm not sure how to
> > check it. Is there a way I can to do this? Would there be any issues
> > with namespace (pid) reuse for such a check?
>
> I'm not sure what you mean - but struct pid is never reused so long
> as you have a reference to one. So you could store
> get_pid(task_pid(autofs_daemon)) and check whether the autofs daemon's
> pid has changed that way, I suppose. But grabbing a struct pid reference
> does not pin the pid_ns iirc.
>
> Maybe you're right about just getting the top-level pid_nr.
After doing a quick test with your git tree, I find that I was wrong,
and task->pid is the global pid of the process, not the pid in its own
pid namespace.
So currently autofs sends a process id in the init_pid_ns. This may
be meaningless in the autofs daemon's pid namespace, but since the
purpose is just for logging/debugging, having a global pid, which
uniquely identifies any task on the system, seems correct.
So in terms of pids no change is needed IMO.
thanks,
-serge