We need a direct method of getting the pid inside containers.
If some issues occurred inside container guest, host user
could not know which process is in trouble just by guest pid:
the users of container guest only knew the pid inside containers.
This will bring obstacle for trouble shooting.
int getnspid(pid_t pid, int fd1, int fd2, int pidtype);
pid: the pid number need to be translated.
fd: a file descriptor referring to one of
the namespace entries in a /proc/[pid]/ns/pid.
fd1 for destination ns(ns1), where the pid came from.
fd2 for reference ns(ns2), while fd2 = -2 means for current ns.
pidtype: 0 PIDTYPE_PID; 1 PIDTYPE_PGID; 2 PIDTYPE_SID.
return value:
>0: translated pid in ns1(fd1) seen from ns2(fd2).
<0: on failure.
Signed-off-by: Chen Hanxiao <[email protected]>
---
arch/x86/syscalls/syscall_32.tbl | 1 +
arch/x86/syscalls/syscall_64.tbl | 1 +
include/linux/syscalls.h | 1 +
kernel/nsproxy.c | 60 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 63 insertions(+)
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d6b8679..9de0b32 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -360,3 +360,4 @@
351 i386 sched_setattr sys_sched_setattr
352 i386 sched_getattr sys_sched_getattr
353 i386 renameat2 sys_renameat2
+354 i386 getnspid sys_getnspid
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index ec255a1..1630a8a 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -323,6 +323,7 @@
314 common sched_setattr sys_sched_setattr
315 common sched_getattr sys_sched_getattr
316 common renameat2 sys_renameat2
+317 common getnspid sys_getnspid
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b0881a0..271c7b1 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -866,4 +866,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
unsigned long idx1, unsigned long idx2);
asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
+asmlinkage long sys_getpidns(pid_t pid, int fd1, int fd2, int pidtype);
#endif
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 8e78110..3eda90a 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -261,6 +261,66 @@ out:
return err;
}
+SYSCALL_DEFINE4(getnspid, pid_t, pid, int, fd1, int, fd2, int, pidtype)
+{
+ struct file *file1 = NULL, *file2 = NULL;
+ struct task_struct *task;
+ struct pid_namespace *ns1, *ns2;
+ struct proc_ns *ei;
+ int ret = -1;
+
+ if (pidtype >= PIDTYPE_MAX)
+ return -EINVAL;
+
+ file1 = proc_ns_fget(fd1);
+ if (IS_ERR(file1))
+ return PTR_ERR(file1);
+ ei = get_proc_ns(file_inode(file1));
+ ns1 = (struct pid_namespace *)ei->ns;
+
+ /* fd == -2 for current pid ns */
+ if (fd2 == -2) {
+ ns2 = task_active_pid_ns(current);
+ } else {
+ file2 = proc_ns_fget(fd2);
+ if (IS_ERR(file2)) {
+ fput(file1);
+ return PTR_ERR(file2);
+ }
+ ei = get_proc_ns(file_inode(file2));
+ ns2 = (struct pid_namespace *)ei->ns;
+ }
+
+ rcu_read_lock();
+ task = find_task_by_pid_ns(pid, ns1);
+ rcu_read_unlock();
+ if (!task) {
+ ret = -ESRCH;
+ goto out;
+ }
+
+ switch (pidtype) {
+ case PIDTYPE_PID:
+ ret = task_pid_nr_ns(task, ns2);
+ break;
+ case PIDTYPE_PGID:
+ ret = task_pgrp_nr_ns(task, ns2);
+ break;
+ case PIDTYPE_SID:
+ ret = task_session_nr_ns(task, ns2);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ ret = (ret == 0) ? -ESRCH : ret;
+
+out:
+ fput(file1);
+ if (file2)
+ fput(file2);
+ return ret;
+}
+
int __init nsproxy_cache_init(void)
{
nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
--
1.9.0
On 06/17/2014 02:21 PM, Chen Hanxiao wrote:
> We need a direct method of getting the pid inside containers.
> If some issues occurred inside container guest, host user
> could not know which process is in trouble just by guest pid:
> the users of container guest only knew the pid inside containers.
> This will bring obstacle for trouble shooting.
>
> int getnspid(pid_t pid, int fd1, int fd2, int pidtype);
>
> pid: the pid number need to be translated.
>
> fd: a file descriptor referring to one of
> the namespace entries in a /proc/[pid]/ns/pid.
> fd1 for destination ns(ns1), where the pid came from.
> fd2 for reference ns(ns2), while fd2 = -2 means for current ns.
>
> pidtype: 0 PIDTYPE_PID; 1 PIDTYPE_PGID; 2 PIDTYPE_SID.
>
> return value:
> >0: translated pid in ns1(fd1) seen from ns2(fd2).
> <0: on failure.
>
> Signed-off-by: Chen Hanxiao <[email protected]>
> ---
> arch/x86/syscalls/syscall_32.tbl | 1 +
> arch/x86/syscalls/syscall_64.tbl | 1 +
> include/linux/syscalls.h | 1 +
> kernel/nsproxy.c | 60 ++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 63 insertions(+)
>
> diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
> index d6b8679..9de0b32 100644
> --- a/arch/x86/syscalls/syscall_32.tbl
> +++ b/arch/x86/syscalls/syscall_32.tbl
> @@ -360,3 +360,4 @@
> 351 i386 sched_setattr sys_sched_setattr
> 352 i386 sched_getattr sys_sched_getattr
> 353 i386 renameat2 sys_renameat2
> +354 i386 getnspid sys_getnspid
> diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
> index ec255a1..1630a8a 100644
> --- a/arch/x86/syscalls/syscall_64.tbl
> +++ b/arch/x86/syscalls/syscall_64.tbl
> @@ -323,6 +323,7 @@
> 314 common sched_setattr sys_sched_setattr
> 315 common sched_getattr sys_sched_getattr
> 316 common renameat2 sys_renameat2
> +317 common getnspid sys_getnspid
>
> #
> # x32-specific system call numbers start at 512 to avoid cache impact
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index b0881a0..271c7b1 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -866,4 +866,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
> asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
> unsigned long idx1, unsigned long idx2);
> asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
> +asmlinkage long sys_getpidns(pid_t pid, int fd1, int fd2, int pidtype);
> #endif
> diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> index 8e78110..3eda90a 100644
> --- a/kernel/nsproxy.c
> +++ b/kernel/nsproxy.c
> @@ -261,6 +261,66 @@ out:
> return err;
> }
>
> +SYSCALL_DEFINE4(getnspid, pid_t, pid, int, fd1, int, fd2, int, pidtype)
> +{
> + struct file *file1 = NULL, *file2 = NULL;
> + struct task_struct *task;
> + struct pid_namespace *ns1, *ns2;
> + struct proc_ns *ei;
> + int ret = -1;
> +
> + if (pidtype >= PIDTYPE_MAX)
> + return -EINVAL;
> +
> + file1 = proc_ns_fget(fd1);
> + if (IS_ERR(file1))
> + return PTR_ERR(file1);
> + ei = get_proc_ns(file_inode(file1));
> + ns1 = (struct pid_namespace *)ei->ns;
> +
> + /* fd == -2 for current pid ns */
> + if (fd2 == -2) {
> + ns2 = task_active_pid_ns(current);
> + } else {
> + file2 = proc_ns_fget(fd2);
> + if (IS_ERR(file2)) {
> + fput(file1);
> + return PTR_ERR(file2);
> + }
> + ei = get_proc_ns(file_inode(file2));
> + ns2 = (struct pid_namespace *)ei->ns;
> + }
> +
> + rcu_read_lock();
> + task = find_task_by_pid_ns(pid, ns1);
> + rcu_read_unlock();
> + if (!task) {
> + ret = -ESRCH;
> + goto out;
> + }
> +
> + switch (pidtype) {
There's no need in switch, the __task_pid_nr_ns() accepts
the type argument.
> + case PIDTYPE_PID:
> + ret = task_pid_nr_ns(task, ns2);
But this is not correct. If task doesn't live in ns2, but ns2
just has the ns->level small enough, then the wrong pid value
would be reported.
> + break;
> + case PIDTYPE_PGID:
> + ret = task_pgrp_nr_ns(task, ns2);
> + break;
> + case PIDTYPE_SID:
> + ret = task_session_nr_ns(task, ns2);
> + break;
> + default:
> + ret = -EINVAL;
> + }
> + ret = (ret == 0) ? -ESRCH : ret;
> +
> +out:
> + fput(file1);
> + if (file2)
> + fput(file2);
> + return ret;
> +}
> +
> int __init nsproxy_cache_init(void)
> {
> nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
>
Hello Chen Hanxiao
On Tue, Jun 17, 2014 at 12:21 PM, Chen Hanxiao
<[email protected]> wrote:
> We need a direct method of getting the pid inside containers.
> If some issues occurred inside container guest, host user
> could not know which process is in trouble just by guest pid:
> the users of container guest only knew the pid inside containers.
> This will bring obstacle for trouble shooting.
>
> int getnspid(pid_t pid, int fd1, int fd2, int pidtype);
Please CC [email protected] on all patches that change the API
that the kernel presents to user space. See
https://www.kernel.org/doc/man-pages/linux-api-ml.html
Thanks,
Michael
> pid: the pid number need to be translated.
>
> fd: a file descriptor referring to one of
> the namespace entries in a /proc/[pid]/ns/pid.
> fd1 for destination ns(ns1), where the pid came from.
> fd2 for reference ns(ns2), while fd2 = -2 means for current ns.
>
> pidtype: 0 PIDTYPE_PID; 1 PIDTYPE_PGID; 2 PIDTYPE_SID.
>
> return value:
> >0: translated pid in ns1(fd1) seen from ns2(fd2).
> <0: on failure.
>
> Signed-off-by: Chen Hanxiao <[email protected]>
> ---
> arch/x86/syscalls/syscall_32.tbl | 1 +
> arch/x86/syscalls/syscall_64.tbl | 1 +
> include/linux/syscalls.h | 1 +
> kernel/nsproxy.c | 60 ++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 63 insertions(+)
>
> diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
> index d6b8679..9de0b32 100644
> --- a/arch/x86/syscalls/syscall_32.tbl
> +++ b/arch/x86/syscalls/syscall_32.tbl
> @@ -360,3 +360,4 @@
> 351 i386 sched_setattr sys_sched_setattr
> 352 i386 sched_getattr sys_sched_getattr
> 353 i386 renameat2 sys_renameat2
> +354 i386 getnspid sys_getnspid
> diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
> index ec255a1..1630a8a 100644
> --- a/arch/x86/syscalls/syscall_64.tbl
> +++ b/arch/x86/syscalls/syscall_64.tbl
> @@ -323,6 +323,7 @@
> 314 common sched_setattr sys_sched_setattr
> 315 common sched_getattr sys_sched_getattr
> 316 common renameat2 sys_renameat2
> +317 common getnspid sys_getnspid
>
> #
> # x32-specific system call numbers start at 512 to avoid cache impact
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index b0881a0..271c7b1 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -866,4 +866,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
> asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
> unsigned long idx1, unsigned long idx2);
> asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
> +asmlinkage long sys_getpidns(pid_t pid, int fd1, int fd2, int pidtype);
> #endif
> diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> index 8e78110..3eda90a 100644
> --- a/kernel/nsproxy.c
> +++ b/kernel/nsproxy.c
> @@ -261,6 +261,66 @@ out:
> return err;
> }
>
> +SYSCALL_DEFINE4(getnspid, pid_t, pid, int, fd1, int, fd2, int, pidtype)
> +{
> + struct file *file1 = NULL, *file2 = NULL;
> + struct task_struct *task;
> + struct pid_namespace *ns1, *ns2;
> + struct proc_ns *ei;
> + int ret = -1;
> +
> + if (pidtype >= PIDTYPE_MAX)
> + return -EINVAL;
> +
> + file1 = proc_ns_fget(fd1);
> + if (IS_ERR(file1))
> + return PTR_ERR(file1);
> + ei = get_proc_ns(file_inode(file1));
> + ns1 = (struct pid_namespace *)ei->ns;
> +
> + /* fd == -2 for current pid ns */
> + if (fd2 == -2) {
> + ns2 = task_active_pid_ns(current);
> + } else {
> + file2 = proc_ns_fget(fd2);
> + if (IS_ERR(file2)) {
> + fput(file1);
> + return PTR_ERR(file2);
> + }
> + ei = get_proc_ns(file_inode(file2));
> + ns2 = (struct pid_namespace *)ei->ns;
> + }
> +
> + rcu_read_lock();
> + task = find_task_by_pid_ns(pid, ns1);
> + rcu_read_unlock();
> + if (!task) {
> + ret = -ESRCH;
> + goto out;
> + }
> +
> + switch (pidtype) {
> + case PIDTYPE_PID:
> + ret = task_pid_nr_ns(task, ns2);
> + break;
> + case PIDTYPE_PGID:
> + ret = task_pgrp_nr_ns(task, ns2);
> + break;
> + case PIDTYPE_SID:
> + ret = task_session_nr_ns(task, ns2);
> + break;
> + default:
> + ret = -EINVAL;
> + }
> + ret = (ret == 0) ? -ESRCH : ret;
> +
> +out:
> + fput(file1);
> + if (file2)
> + fput(file2);
> + return ret;
> +}
> +
> int __init nsproxy_cache_init(void)
> {
> nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
> --
> 1.9.0
>
> _______________________________________________
> Containers mailing list
> [email protected]
> https://lists.linuxfoundation.org/mailman/listinfo/containers
--
Michael Kerrisk Linux man-pages maintainer;
http://www.kernel.org/doc/man-pages/
Author of "The Linux Programming Interface", http://blog.man7.org/
Chen Hanxiao <[email protected]> writes:
> We need a direct method of getting the pid inside containers.
> If some issues occurred inside container guest, host user
> could not know which process is in trouble just by guest pid:
> the users of container guest only knew the pid inside containers.
> This will bring obstacle for trouble shooting.
There is also some ongoing work to export this information via a proc
file which seems more appropriate for solving your problem. Certainly
for debugging something easily human discoverable is needed.
> int getnspid(pid_t pid, int fd1, int fd2, int pidtype);
The pidtype is nonsense. The translation of a pid does not depend upon
type. Using that kind of nonsense will lead you and others into confusion.
> pid: the pid number need to be translated.
>
> fd: a file descriptor referring to one of
> the namespace entries in a /proc/[pid]/ns/pid.
> fd1 for destination ns(ns1), where the pid came from.
> fd2 for reference ns(ns2), while fd2 = -2 means for current ns.
>
> pidtype: 0 PIDTYPE_PID; 1 PIDTYPE_PGID; 2 PIDTYPE_SID.
>
> return value:
> >0: translated pid in ns1(fd1) seen from ns2(fd2).
> <0: on failure.
Elsewhere we use 0 on pid translation failure. Why be different here?
Eric
> Signed-off-by: Chen Hanxiao <[email protected]>
> ---
> arch/x86/syscalls/syscall_32.tbl | 1 +
> arch/x86/syscalls/syscall_64.tbl | 1 +
> include/linux/syscalls.h | 1 +
> kernel/nsproxy.c | 60 ++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 63 insertions(+)
>
> diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
> index d6b8679..9de0b32 100644
> --- a/arch/x86/syscalls/syscall_32.tbl
> +++ b/arch/x86/syscalls/syscall_32.tbl
> @@ -360,3 +360,4 @@
> 351 i386 sched_setattr sys_sched_setattr
> 352 i386 sched_getattr sys_sched_getattr
> 353 i386 renameat2 sys_renameat2
> +354 i386 getnspid sys_getnspid
> diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
> index ec255a1..1630a8a 100644
> --- a/arch/x86/syscalls/syscall_64.tbl
> +++ b/arch/x86/syscalls/syscall_64.tbl
> @@ -323,6 +323,7 @@
> 314 common sched_setattr sys_sched_setattr
> 315 common sched_getattr sys_sched_getattr
> 316 common renameat2 sys_renameat2
> +317 common getnspid sys_getnspid
>
> #
> # x32-specific system call numbers start at 512 to avoid cache impact
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index b0881a0..271c7b1 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -866,4 +866,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
> asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
> unsigned long idx1, unsigned long idx2);
> asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
> +asmlinkage long sys_getpidns(pid_t pid, int fd1, int fd2, int pidtype);
> #endif
> diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> index 8e78110..3eda90a 100644
> --- a/kernel/nsproxy.c
> +++ b/kernel/nsproxy.c
> @@ -261,6 +261,66 @@ out:
> return err;
> }
>
> +SYSCALL_DEFINE4(getnspid, pid_t, pid, int, fd1, int, fd2, int, pidtype)
> +{
> + struct file *file1 = NULL, *file2 = NULL;
> + struct task_struct *task;
> + struct pid_namespace *ns1, *ns2;
> + struct proc_ns *ei;
> + int ret = -1;
> +
> + if (pidtype >= PIDTYPE_MAX)
> + return -EINVAL;
> +
> + file1 = proc_ns_fget(fd1);
> + if (IS_ERR(file1))
> + return PTR_ERR(file1);
> + ei = get_proc_ns(file_inode(file1));
> + ns1 = (struct pid_namespace *)ei->ns;
> +
> + /* fd == -2 for current pid ns */
> + if (fd2 == -2) {
> + ns2 = task_active_pid_ns(current);
> + } else {
> + file2 = proc_ns_fget(fd2);
> + if (IS_ERR(file2)) {
> + fput(file1);
> + return PTR_ERR(file2);
> + }
> + ei = get_proc_ns(file_inode(file2));
> + ns2 = (struct pid_namespace *)ei->ns;
> + }
> +
> + rcu_read_lock();
> + task = find_task_by_pid_ns(pid, ns1);
The functions you want to be using here are:
find_pid_ns and pid_nr_ns.
> + rcu_read_unlock();
> + if (!task) {
> + ret = -ESRCH;
> + goto out;
> + }
> +
> + switch (pidtype) {
> + case PIDTYPE_PID:
> + ret = task_pid_nr_ns(task, ns2);
> + break;
> + case PIDTYPE_PGID:
> + ret = task_pgrp_nr_ns(task, ns2);
> + break;
> + case PIDTYPE_SID:
> + ret = task_session_nr_ns(task, ns2);
> + break;
> + default:
> + ret = -EINVAL;
> + }
> + ret = (ret == 0) ? -ESRCH : ret;
> +
> +out:
> + fput(file1);
> + if (file2)
> + fput(file2);
> + return ret;
> +}
> +
> int __init nsproxy_cache_init(void)
> {
> nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
> -----Original Message-----
> From: Pavel Emelyanov [mailto:[email protected]]
> Sent: Tuesday, June 17, 2014 8:13 PM
> To: Chen, Hanxiao/?? ????
> Cc: [email protected]; [email protected];
> Andrew Morton; Eric W. Biederman; Serge Hallyn; Daniel P. Berrange; Oleg Nesterov;
> Al Viro; David Howells; Richard Weinberger; Vasiliy Kulikov; Gotou, Yasunori/
> ???u ????
> Subject: Re: [PATCH] ns: introduce getnspid syscall
>
> On 06/17/2014 02:21 PM, Chen Hanxiao wrote:
> > We need a direct method of getting the pid inside containers.
> > If some issues occurred inside container guest, host user
> > could not know which process is in trouble just by guest pid:
> > the users of container guest only knew the pid inside containers.
> > This will bring obstacle for trouble shooting.
> >
> > int getnspid(pid_t pid, int fd1, int fd2, int pidtype);
> >
> > pid: the pid number need to be translated.
> >
> > fd: a file descriptor referring to one of
> > the namespace entries in a /proc/[pid]/ns/pid.
> > fd1 for destination ns(ns1), where the pid came from.
> > fd2 for reference ns(ns2), while fd2 = -2 means for current ns.
> >
> > pidtype: 0 PIDTYPE_PID; 1 PIDTYPE_PGID; 2 PIDTYPE_SID.
> >
> > return value:
> > >0: translated pid in ns1(fd1) seen from ns2(fd2).
> > <0: on failure.
> >
> > + }
> > +
> > + switch (pidtype) {
>
> There's no need in switch, the __task_pid_nr_ns() accepts
> the type argument.
>
Yes, I think we still have that kind of functions, so I used them...
> > + case PIDTYPE_PID:
> > + ret = task_pid_nr_ns(task, ns2);
>
> But this is not correct. If task doesn't live in ns2, but ns2
> just has the ns->level small enough, then the wrong pid value
> would be reported.
>
Right, we should check whether the task belonged to that namespace firstly.
Thanks,
- Chen
> > + break;
> > + case PIDTYPE_PGID:
> > + ret = task_pgrp_nr_ns(task, ns2);
> > + break;
> > + case PIDTYPE_SID:
> > + ret = task_session_nr_ns(task, ns2);
> > + break;
> > + default:
> > + ret = -EINVAL;
> > + }
> > + ret = (ret == 0) ? -ESRCH : ret;
> > +
> > +out:
> > + fput(file1);
> > + if (file2)
> > + fput(file2);
> > + return ret;
> > +}
> > +
> > int __init nsproxy_cache_init(void)
> > {
> > nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
> >
>
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?
> -----Original Message-----
> From: [email protected] [mailto:[email protected]] On Behalf Of
> Michael Kerrisk
> Sent: Wednesday, June 18, 2014 2:27 AM
> To: Chen, Hanxiao/陈 晗霄
> Cc: containers; Linux Kernel; Richard Weinberger; Serge Hallyn; Oleg Nesterov;
> David Howells; Eric W. Biederman; Andrew Morton; Al Viro; Linux API; Michael
> Kerrisk-manpages
> Subject: Re: [PATCH] ns: introduce getnspid syscall
>
> Hello Chen Hanxiao
>
> On Tue, Jun 17, 2014 at 12:21 PM, Chen Hanxiao
> <[email protected]> wrote:
> > We need a direct method of getting the pid inside containers.
> > If some issues occurred inside container guest, host user
> > could not know which process is in trouble just by guest pid:
> > the users of container guest only knew the pid inside containers.
> > This will bring obstacle for trouble shooting.
> >
> > int getnspid(pid_t pid, int fd1, int fd2, int pidtype);
>
> Please CC [email protected] on all patches that change the API
> that the kernel presents to user space. See
> https://www.kernel.org/doc/man-pages/linux-api-ml.html
>
> Thanks,
>
> Michael
>
>
Thanks for reminding.
- Chen
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?
Hi,
> -----Original Message-----
> From: Eric W. Biederman [mailto:[email protected]]
> Sent: Wednesday, June 18, 2014 9:31 AM
> To: Chen, Hanxiao/?? ????
> Cc: [email protected]; [email protected];
> Andrew Morton; Serge Hallyn; Daniel P. Berrange; Oleg Nesterov; Al Viro; David
> Howells; Richard Weinberger; Pavel Emelyanov; Vasiliy Kulikov; Gotou, Yasunori/
> ???u ????; Linux API; Michael Kerrisk-manpages
> Subject: Re: [PATCH] ns: introduce getnspid syscall
>
> Chen Hanxiao <[email protected]> writes:
>
> > We need a direct method of getting the pid inside containers.
> > If some issues occurred inside container guest, host user
> > could not know which process is in trouble just by guest pid:
> > the users of container guest only knew the pid inside containers.
> > This will bring obstacle for trouble shooting.
>
> There is also some ongoing work to export this information via a proc
> file which seems more appropriate for solving your problem. Certainly
> for debugging something easily human discoverable is needed.
>
Do you mean this patch:
/proc/pid/status: show all sets of pid according to ns
https://lkml.org/lkml/2014/5/26/145
But no new comments on this patch,
Pavel suggested that a syscall should be a good choice.
Do we should continue this kind of work?
> > int getnspid(pid_t pid, int fd1, int fd2, int pidtype);
>
> The pidtype is nonsense. The translation of a pid does not depend upon
> type. Using that kind of nonsense will lead you and others into confusion.
>
I see.
> > pid: the pid number need to be translated.
> >
> > fd: a file descriptor referring to one of
> > the namespace entries in a /proc/[pid]/ns/pid.
> > fd1 for destination ns(ns1), where the pid came from.
> > fd2 for reference ns(ns2), while fd2 = -2 means for current ns.
> >
> > pidtype: 0 PIDTYPE_PID; 1 PIDTYPE_PGID; 2 PIDTYPE_SID.
> >
> > return value:
> > >0: translated pid in ns1(fd1) seen from ns2(fd2).
> > <0: on failure.
>
> Elsewhere we use 0 on pid translation failure. Why be different here?
>
It should be <=0. And <0 means some other failures.
> Eric
>
>
> > Signed-off-by: Chen Hanxiao <[email protected]>
> > +
> > + rcu_read_lock();
> > + task = find_task_by_pid_ns(pid, ns1);
>
> The functions you want to be using here are:
> find_pid_ns and pid_nr_ns.
>
Thanks for your hint.
- Chen
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?
On 06/17, Chen Hanxiao wrote:
>
> +SYSCALL_DEFINE4(getnspid, pid_t, pid, int, fd1, int, fd2, int, pidtype)
> +{
> + struct file *file1 = NULL, *file2 = NULL;
> + struct task_struct *task;
> + struct pid_namespace *ns1, *ns2;
> + struct proc_ns *ei;
> + int ret = -1;
> +
> + if (pidtype >= PIDTYPE_MAX)
> + return -EINVAL;
> +
> + file1 = proc_ns_fget(fd1);
> + if (IS_ERR(file1))
> + return PTR_ERR(file1);
> + ei = get_proc_ns(file_inode(file1));
> + ns1 = (struct pid_namespace *)ei->ns;
and I am not sure this part is correct... shouldn't we also verify that
ns_ops == pidns_operations ?
Perhaps it makes sense to generalize get_net_ns_by_fd() into
"void *get_ns_by_fd(fd, type)"... this probably needs another "check-and-get"
method in proc_ns_operations(). I dunno.
Oleg.
On 06/17, Pavel Emelyanov wrote:
>
> On 06/17/2014 02:21 PM, Chen Hanxiao wrote:
> > + case PIDTYPE_PID:
> > + ret = task_pid_nr_ns(task, ns2);
>
> But this is not correct. If task doesn't live in ns2, but ns2
> just has the ns->level small enough, then the wrong pid value
> would be reported.
Confused... pid_nr_ns() checks upid->ns == ns and returns zero
otherwise?
Oleg.
On 06/18/2014 10:02 PM, Oleg Nesterov wrote:
> On 06/17, Pavel Emelyanov wrote:
>>
>> On 06/17/2014 02:21 PM, Chen Hanxiao wrote:
>>> + case PIDTYPE_PID:
>>> + ret = task_pid_nr_ns(task, ns2);
>>
>> But this is not correct. If task doesn't live in ns2, but ns2
>> just has the ns->level small enough, then the wrong pid value
>> would be reported.
>
> Confused... pid_nr_ns() checks upid->ns == ns and returns zero
> otherwise?
Yes, you're right. I've missed that line of code :(
> Oleg.
>
> .
>
> -----Original Message-----
> From: Oleg Nesterov [mailto:[email protected]]
> Sent: Thursday, June 19, 2014 1:58 AM
> To: Chen, Hanxiao/?? ????
> Cc: [email protected]; [email protected];
> Andrew Morton; Eric W. Biederman; Serge Hallyn; Daniel P. Berrange; Al Viro; David
> Howells; Richard Weinberger; Pavel Emelyanov; Vasiliy Kulikov; Gotou, Yasunori/
> ???u ????
> Subject: Re: [PATCH] ns: introduce getnspid syscall
>
> On 06/17, Chen Hanxiao wrote:
> >
> > +SYSCALL_DEFINE4(getnspid, pid_t, pid, int, fd1, int, fd2, int, pidtype)
> > +{
> > + struct file *file1 = NULL, *file2 = NULL;
> > + struct task_struct *task;
> > + struct pid_namespace *ns1, *ns2;
> > + struct proc_ns *ei;
> > + int ret = -1;
> > +
> > + if (pidtype >= PIDTYPE_MAX)
> > + return -EINVAL;
> > +
> > + file1 = proc_ns_fget(fd1);
> > + if (IS_ERR(file1))
> > + return PTR_ERR(file1);
> > + ei = get_proc_ns(file_inode(file1));
> > + ns1 = (struct pid_namespace *)ei->ns;
>
> and I am not sure this part is correct... shouldn't we also verify that
> ns_ops == pidns_operations ?
>
You're right. We should check this part.
Thanks,
- Chen
> Perhaps it makes sense to generalize get_net_ns_by_fd() into
> "void *get_ns_by_fd(fd, type)"... this probably needs another "check-and-get"
> method in proc_ns_operations(). I dunno.
>
> Oleg.
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?