Add a file called pidnr under /proc/task/. Reading this file gives the
pid of /proc/task in the reading task's namespace (or 0 if there is no
valid pid).
This fills a need currently not solvable at all. The particular need I
have for it is so that a task inside a container can pass requests to a
task outside the container (using an open fd for /proc/task) to have the
target task moved to a new cgroup. Others have asked for this ability
for other reasons.
Signed-off-by: Serge Hallyn <[email protected]>
Cc: Eric Biederman <[email protected]>
Cc: "Daniel P. Berrange" <[email protected]>
Cc: [email protected]
Cc: Colin Ian King <[email protected]>
Cc: St?phane Graber <[email protected]>
---
fs/proc/base.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c3834da..b7499eb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -899,6 +899,28 @@ static const struct file_operations proc_environ_operations = {
.release = mem_release,
};
+#define TMPBUFLEN 21
+static ssize_t pidnr_read(struct file * file, char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode * inode = file_inode(file);
+ struct task_struct *task = get_proc_task(inode);
+ ssize_t length;
+ char tmpbuf[TMPBUFLEN];
+
+ if (!task)
+ return -ESRCH;
+ length = scnprintf(tmpbuf, TMPBUFLEN, "%u\n",
+ task_pid_vnr(task));
+ put_task_struct(task);
+ return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
+}
+
+static const struct file_operations pidnr_operations = {
+ .read = pidnr_read,
+ .llseek = generic_file_llseek,
+};
+
static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
@@ -1096,7 +1118,6 @@ static const struct file_operations proc_oom_score_adj_operations = {
};
#ifdef CONFIG_AUDITSYSCALL
-#define TMPBUFLEN 21
static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
{
@@ -2642,6 +2663,7 @@ static const struct pid_entry tgid_base_stuff[] = {
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
#endif
REG("environ", S_IRUSR, proc_environ_operations),
+ REG("pidnr", S_IRUGO, pidnr_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUGO, proc_pid_personality),
@@ -2999,6 +3021,7 @@ static const struct pid_entry tid_base_stuff[] = {
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
REG("environ", S_IRUSR, proc_environ_operations),
+ REG("pidnr", S_IRUGO, pidnr_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUGO, proc_pid_personality),
--
1.7.9.5
Serge Hallyn <[email protected]> writes:
> Add a file called pidnr under /proc/task/. Reading this file gives the
> pid of /proc/task in the reading task's namespace (or 0 if there is no
> valid pid).
>
> This fills a need currently not solvable at all. The particular need I
> have for it is so that a task inside a container can pass requests to a
> task outside the container (using an open fd for /proc/task) to have the
> target task moved to a new cgroup. Others have asked for this ability
> for other reasons.
This is solvable today. Just pass the pid using SCM_CREDENTIALS over a
unix domain socket between the two processes. That is actually better
because a task can't claim to be a member of another task. You already
have the unix domain socket if you are using SCM_RIGHTS to pass file
descriptors.
Oh ick. You have a file whose contents change depending on who is
reading an open file descriptor. That can get rather ugly. It is
better for the contents to be constant and based upon when the file was
opened.
I also don't like the name. Nothing about the name says to me this is
the tasks pid from the reading tasks perspective.
I do sympathize with the problem and I think this patch could be on
the right track.
Eric
> Signed-off-by: Serge Hallyn <[email protected]>
> Cc: Eric Biederman <[email protected]>
> Cc: "Daniel P. Berrange" <[email protected]>
> Cc: [email protected]
> Cc: Colin Ian King <[email protected]>
> Cc: Stéphane Graber <[email protected]>
> ---
> fs/proc/base.c | 25 ++++++++++++++++++++++++-
> 1 file changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index c3834da..b7499eb 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -899,6 +899,28 @@ static const struct file_operations proc_environ_operations = {
> .release = mem_release,
> };
>
> +#define TMPBUFLEN 21
> +static ssize_t pidnr_read(struct file * file, char __user * buf,
> + size_t count, loff_t *ppos)
> +{
> + struct inode * inode = file_inode(file);
> + struct task_struct *task = get_proc_task(inode);
> + ssize_t length;
> + char tmpbuf[TMPBUFLEN];
> +
> + if (!task)
> + return -ESRCH;
> + length = scnprintf(tmpbuf, TMPBUFLEN, "%u\n",
> + task_pid_vnr(task));
> + put_task_struct(task);
> + return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
> +}
> +
> +static const struct file_operations pidnr_operations = {
> + .read = pidnr_read,
> + .llseek = generic_file_llseek,
> +};
> +
> static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
> loff_t *ppos)
> {
> @@ -1096,7 +1118,6 @@ static const struct file_operations proc_oom_score_adj_operations = {
> };
>
> #ifdef CONFIG_AUDITSYSCALL
> -#define TMPBUFLEN 21
> static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
> size_t count, loff_t *ppos)
> {
> @@ -2642,6 +2663,7 @@ static const struct pid_entry tgid_base_stuff[] = {
> DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
> #endif
> REG("environ", S_IRUSR, proc_environ_operations),
> + REG("pidnr", S_IRUGO, pidnr_operations),
> INF("auxv", S_IRUSR, proc_pid_auxv),
> ONE("status", S_IRUGO, proc_pid_status),
> ONE("personality", S_IRUGO, proc_pid_personality),
> @@ -2999,6 +3021,7 @@ static const struct pid_entry tid_base_stuff[] = {
> DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
> DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
> REG("environ", S_IRUSR, proc_environ_operations),
> + REG("pidnr", S_IRUGO, pidnr_operations),
> INF("auxv", S_IRUSR, proc_pid_auxv),
> ONE("status", S_IRUGO, proc_pid_status),
> ONE("personality", S_IRUGO, proc_pid_personality),
Quoting Eric W. Biederman ([email protected]):
> Serge Hallyn <[email protected]> writes:
>
> > Add a file called pidnr under /proc/task/. Reading this file gives the
> > pid of /proc/task in the reading task's namespace (or 0 if there is no
> > valid pid).
> >
> > This fills a need currently not solvable at all. The particular need I
> > have for it is so that a task inside a container can pass requests to a
> > task outside the container (using an open fd for /proc/task) to have the
> > target task moved to a new cgroup. Others have asked for this ability
> > for other reasons.
>
> This is solvable today. Just pass the pid using SCM_CREDENTIALS over a
> unix domain socket between the two processes. That is actually better
> because a task can't claim to be a member of another task. You already
> have the unix domain socket if you are using SCM_RIGHTS to pass file
> descriptors.
Hm, yeah, that should work. (I was thinking I had to do a
getpeercon-like thing where I could only get the ucreds of the
task which opened the socket). I'll try it to see if there are
any gotchas.
Note though that this doesn't help the general admin case, because
it requires a program (and not a one-liner) running in the container.
So I can't just do
cat /proc/`pidof container-init`/root/proc/200/pidnr"
to figure out the corresponding pid in my own ns. Whereas that cat
doesn't require me to execute anything inside that container.
Wonder if coreutils should ship a program which clones a task in
same netns but target pidns, and sends ucred.pid = atoi(argv[1])
from that pidns to the first task which prints out what it reads.
That should be a tidy, purely userspace solution to the general admin
problem. Only downside being that it requires the rights to setns to
the pidns, instead of just read access to its /proc.
thanks,
-serge