2018-11-08 13:05:36

by chouryzhou(周威)

[permalink] [raw]
Subject: [PATCH V3] binder: ipc namespace support for android binder

We are working for running android in container, but we found that binder is
not isolated by ipc namespace. Since binder is a form of IPC and therefore should
be tied to ipc namespace. With this patch, we can run more than one android
container on one host.
This patch move "binder_procs" and "binder_context" into ipc_namespace,
driver will find the context from it when opening. Although statistics in debugfs
remain global.

Signed-off-by: chouryzhou <[email protected]>
---
drivers/android/binder.c | 128 +++++++++++++++++++++++++++++++-----------
include/linux/ipc_namespace.h | 15 +++++
ipc/namespace.c | 10 +++-
3 files changed, 117 insertions(+), 36 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index cb30a524d16d..22e45bb937e6 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -68,6 +68,7 @@
#include <linux/seq_file.h>
#include <linux/uaccess.h>
#include <linux/pid_namespace.h>
+#include <linux/ipc_namespace.h>
#include <linux/security.h>
#include <linux/spinlock.h>
#include <linux/ratelimit.h>
@@ -80,13 +81,18 @@
#include "binder_alloc.h"
#include "binder_trace.h"

+
+#if !defined(CONFIG_SYSVIPC) && !defined(CONFIG_POSIX_MQUEUE)
+struct ipc_namespace init_ipc_ns;
+#define ipcns (&init_ipc_ns)
+#else
+#define ipcns (current->nsproxy->ipc_ns)
+#endif
+
static HLIST_HEAD(binder_deferred_list);
static DEFINE_MUTEX(binder_deferred_lock);

static HLIST_HEAD(binder_devices);
-static HLIST_HEAD(binder_procs);
-static DEFINE_MUTEX(binder_procs_lock);
-
static HLIST_HEAD(binder_dead_nodes);
static DEFINE_SPINLOCK(binder_dead_nodes_lock);

@@ -232,7 +238,7 @@ struct binder_transaction_log_entry {
int return_error_line;
uint32_t return_error;
uint32_t return_error_param;
- const char *context_name;
+ int context_device;
};
struct binder_transaction_log {
atomic_t cur;
@@ -263,19 +269,64 @@ static struct binder_transaction_log_entry *binder_transaction_log_add(
}

struct binder_context {
+ struct hlist_node hlist;
struct binder_node *binder_context_mgr_node;
struct mutex context_mgr_node_lock;

kuid_t binder_context_mgr_uid;
- const char *name;
+ int device;
};

struct binder_device {
struct hlist_node hlist;
struct miscdevice miscdev;
- struct binder_context context;
};

+void binder_exit_ns(struct ipc_namespace *ns)
+{
+ struct binder_context *context;
+ struct hlist_node *tmp;
+
+ mutex_destroy(&ns->binder_procs_lock);
+ hlist_for_each_entry_safe(context, tmp, &ns->binder_contexts, hlist) {
+ mutex_destroy(&context->context_mgr_node_lock);
+ hlist_del(&context->hlist);
+ kfree(context);
+ }
+}
+
+int binder_init_ns(struct ipc_namespace *ns)
+{
+ int ret;
+ struct binder_device *device;
+
+ mutex_init(&ns->binder_procs_lock);
+ INIT_HLIST_HEAD(&ns->binder_procs);
+ INIT_HLIST_HEAD(&ns->binder_contexts);
+
+ hlist_for_each_entry(device, &binder_devices, hlist) {
+ struct binder_context *context;
+
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ context->device = device->miscdev.minor;
+ context->binder_context_mgr_uid = INVALID_UID;
+ mutex_init(&context->context_mgr_node_lock);
+
+ hlist_add_head(&context->hlist, &ns->binder_contexts);
+ }
+
+ return 0;
+err:
+ binder_exit_ns(ns);
+ return ret;
+}
+
+
/**
* struct binder_work - work enqueued on a worklist
* @entry: node enqueued on list
@@ -2727,7 +2778,7 @@ static void binder_transaction(struct binder_proc *proc,
e->target_handle = tr->target.handle;
e->data_size = tr->data_size;
e->offsets_size = tr->offsets_size;
- e->context_name = proc->context->name;
+ e->context_device = proc->context->device;

if (reply) {
binder_inner_proc_lock(proc);
@@ -4922,6 +4973,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
{
struct binder_proc *proc;
struct binder_device *binder_dev;
+ struct binder_context *context;

binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__,
current->group_leader->pid, current->pid);
@@ -4937,7 +4989,15 @@ static int binder_open(struct inode *nodp, struct file *filp)
proc->default_priority = task_nice(current);
binder_dev = container_of(filp->private_data, struct binder_device,
miscdev);
- proc->context = &binder_dev->context;
+ hlist_for_each_entry(context, &ipcns->binder_contexts, hlist) {
+ if (context->device == binder_dev->miscdev.minor) {
+ proc->context = context;
+ break;
+ }
+ }
+ if (!proc->context)
+ return -ENOENT;
+
binder_alloc_init(&proc->alloc);

binder_stats_created(BINDER_STAT_PROC);
@@ -4946,9 +5006,9 @@ static int binder_open(struct inode *nodp, struct file *filp)
INIT_LIST_HEAD(&proc->waiting_threads);
filp->private_data = proc;

- mutex_lock(&binder_procs_lock);
- hlist_add_head(&proc->proc_node, &binder_procs);
- mutex_unlock(&binder_procs_lock);
+ mutex_lock(&ipcns->binder_procs_lock);
+ hlist_add_head(&proc->proc_node, &ipcns->binder_procs);
+ mutex_unlock(&ipcns->binder_procs_lock);

if (binder_debugfs_dir_entry_proc) {
char strbuf[11];
@@ -5082,9 +5142,9 @@ static void binder_deferred_release(struct binder_proc *proc)
struct rb_node *n;
int threads, nodes, incoming_refs, outgoing_refs, active_transactions;

- mutex_lock(&binder_procs_lock);
+ mutex_lock(&ipcns->binder_procs_lock);
hlist_del(&proc->proc_node);
- mutex_unlock(&binder_procs_lock);
+ mutex_unlock(&ipcns->binder_procs_lock);

mutex_lock(&context->context_mgr_node_lock);
if (context->binder_context_mgr_node &&
@@ -5377,7 +5437,7 @@ static void print_binder_proc(struct seq_file *m,
struct binder_node *last_node = NULL;

seq_printf(m, "proc %d\n", proc->pid);
- seq_printf(m, "context %s\n", proc->context->name);
+ seq_printf(m, "context %d\n", proc->context->device);
header_pos = m->count;

binder_inner_proc_lock(proc);
@@ -5538,7 +5598,7 @@ static void print_binder_proc_stats(struct seq_file *m,
binder_alloc_get_free_async_space(&proc->alloc);

seq_printf(m, "proc %d\n", proc->pid);
- seq_printf(m, "context %s\n", proc->context->name);
+ seq_printf(m, "context %d\n", proc->context->device);
count = 0;
ready_threads = 0;
binder_inner_proc_lock(proc);
@@ -5623,10 +5683,10 @@ static int binder_state_show(struct seq_file *m, void *unused)
if (last_node)
binder_put_node(last_node);

- mutex_lock(&binder_procs_lock);
- hlist_for_each_entry(proc, &binder_procs, proc_node)
+ mutex_lock(&ipcns->binder_procs_lock);
+ hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
print_binder_proc(m, proc, 1);
- mutex_unlock(&binder_procs_lock);
+ mutex_unlock(&ipcns->binder_procs_lock);

return 0;
}
@@ -5639,10 +5699,10 @@ static int binder_stats_show(struct seq_file *m, void *unused)

print_binder_stats(m, "", &binder_stats);

- mutex_lock(&binder_procs_lock);
- hlist_for_each_entry(proc, &binder_procs, proc_node)
+ mutex_lock(&ipcns->binder_procs_lock);
+ hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
print_binder_proc_stats(m, proc);
- mutex_unlock(&binder_procs_lock);
+ mutex_unlock(&ipcns->binder_procs_lock);

return 0;
}
@@ -5652,10 +5712,10 @@ static int binder_transactions_show(struct seq_file *m, void *unused)
struct binder_proc *proc;

seq_puts(m, "binder transactions:\n");
- mutex_lock(&binder_procs_lock);
- hlist_for_each_entry(proc, &binder_procs, proc_node)
+ mutex_lock(&ipcns->binder_procs_lock);
+ hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
print_binder_proc(m, proc, 0);
- mutex_unlock(&binder_procs_lock);
+ mutex_unlock(&ipcns->binder_procs_lock);

return 0;
}
@@ -5665,14 +5725,14 @@ static int binder_proc_show(struct seq_file *m, void *unused)
struct binder_proc *itr;
int pid = (unsigned long)m->private;

- mutex_lock(&binder_procs_lock);
- hlist_for_each_entry(itr, &binder_procs, proc_node) {
+ mutex_lock(&ipcns->binder_procs_lock);
+ hlist_for_each_entry(itr, &ipcns->binder_procs, proc_node) {
if (itr->pid == pid) {
seq_puts(m, "binder proc state:\n");
print_binder_proc(m, itr, 1);
}
}
- mutex_unlock(&binder_procs_lock);
+ mutex_unlock(&ipcns->binder_procs_lock);

return 0;
}
@@ -5687,10 +5747,10 @@ static void print_binder_transaction_log_entry(struct seq_file *m,
*/
smp_rmb();
seq_printf(m,
- "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d",
+ "%d: %s from %d:%d to %d:%d context %d node %d handle %d size %d:%d ret %d/%d l=%d",
e->debug_id, (e->call_type == 2) ? "reply" :
((e->call_type == 1) ? "async" : "call "), e->from_proc,
- e->from_thread, e->to_proc, e->to_thread, e->context_name,
+ e->from_thread, e->to_proc, e->to_thread, e->context_device,
e->to_node, e->target_handle, e->data_size, e->offsets_size,
e->return_error, e->return_error_param,
e->return_error_line);
@@ -5753,10 +5813,6 @@ static int __init init_binder_device(const char *name)
binder_device->miscdev.minor = MISC_DYNAMIC_MINOR;
binder_device->miscdev.name = name;

- binder_device->context.binder_context_mgr_uid = INVALID_UID;
- binder_device->context.name = name;
- mutex_init(&binder_device->context.context_mgr_node_lock);
-
ret = misc_register(&binder_device->miscdev);
if (ret < 0) {
kfree(binder_device);
@@ -5832,8 +5888,12 @@ static int __init binder_init(void)
goto err_init_binder_device_failed;
}

- return ret;
+ ret = binder_init_ns(&init_ipc_ns);
+ if (ret)
+ goto err_init_namespace_failed;

+ return ret;
+err_init_namespace_failed:
err_init_binder_device_failed:
hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) {
misc_deregister(&device->miscdev);
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 6ab8c1bada3f..d7f850a2ded8 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -63,6 +63,13 @@ struct ipc_namespace {
unsigned int mq_msg_default;
unsigned int mq_msgsize_default;

+#ifdef CONFIG_ANDROID_BINDER_IPC
+ /* next fields are for binder */
+ struct mutex binder_procs_lock;
+ struct hlist_head binder_procs;
+ struct hlist_head binder_contexts;
+#endif
+
/* user_ns which owns the ipc ns */
struct user_namespace *user_ns;
struct ucounts *ucounts;
@@ -118,6 +125,14 @@ extern int mq_init_ns(struct ipc_namespace *ns);
static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
#endif

+#ifdef CONFIG_ANDROID_BINDER_IPC
+extern int binder_init_ns(struct ipc_namespace *ns);
+extern void binder_exit_ns(struct ipc_namespace *ns);
+#else
+static inline int binder_init_ns(struct ipc_namespace *ns) { return 0; }
+static inline void binder_exit_ns(struct ipc_namespace *ns) { }
+#endif
+
#if defined(CONFIG_IPC_NS)
extern struct ipc_namespace *copy_ipcs(unsigned long flags,
struct user_namespace *user_ns, struct ipc_namespace *ns);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 21607791d62c..68c6e983b002 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -57,7 +57,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,

err = mq_init_ns(ns);
if (err)
- goto fail_put;
+ goto fail_init_mq;
+ err = binder_init_ns(ns);
+ if (err)
+ goto fail_init_binder;

sem_init_ns(ns);
msg_init_ns(ns);
@@ -65,7 +68,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,

return ns;

-fail_put:
+fail_init_binder:
+ mq_put_mnt(ns);
+fail_init_mq:
put_user_ns(ns->user_ns);
ns_free_inum(&ns->ns);
fail_free:
@@ -120,6 +125,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
sem_exit_ns(ns);
msg_exit_ns(ns);
shm_exit_ns(ns);
+ binder_exit_ns(ns);

dec_ipc_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
--
2.11.0


2018-11-08 14:31:08

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH V3] binder: ipc namespace support for android binder

On Thu, Nov 08, 2018 at 01:02:32PM +0000, chouryzhou(周威) wrote:
> We are working for running android in container, but we found that binder is
> not isolated by ipc namespace. Since binder is a form of IPC and therefore should
> be tied to ipc namespace. With this patch, we can run more than one android
> container on one host.
> This patch move "binder_procs" and "binder_context" into ipc_namespace,
> driver will find the context from it when opening. Although statistics in debugfs
> remain global.
>
> Signed-off-by: chouryzhou <[email protected]>
> ---
> drivers/android/binder.c | 128 +++++++++++++++++++++++++++++++-----------
> include/linux/ipc_namespace.h | 15 +++++
> ipc/namespace.c | 10 +++-
> 3 files changed, 117 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index cb30a524d16d..22e45bb937e6 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -68,6 +68,7 @@
> #include <linux/seq_file.h>
> #include <linux/uaccess.h>
> #include <linux/pid_namespace.h>
> +#include <linux/ipc_namespace.h>
> #include <linux/security.h>
> #include <linux/spinlock.h>
> #include <linux/ratelimit.h>
> @@ -80,13 +81,18 @@
> #include "binder_alloc.h"
> #include "binder_trace.h"
>
> +
> +#if !defined(CONFIG_SYSVIPC) && !defined(CONFIG_POSIX_MQUEUE)
> +struct ipc_namespace init_ipc_ns;
> +#define ipcns (&init_ipc_ns)
> +#else
> +#define ipcns (current->nsproxy->ipc_ns)
> +#endif
> +
> static HLIST_HEAD(binder_deferred_list);
> static DEFINE_MUTEX(binder_deferred_lock);
>
> static HLIST_HEAD(binder_devices);
> -static HLIST_HEAD(binder_procs);
> -static DEFINE_MUTEX(binder_procs_lock);
> -
> static HLIST_HEAD(binder_dead_nodes);
> static DEFINE_SPINLOCK(binder_dead_nodes_lock);
>
> @@ -232,7 +238,7 @@ struct binder_transaction_log_entry {
> int return_error_line;
> uint32_t return_error;
> uint32_t return_error_param;
> - const char *context_name;
> + int context_device;
> };
> struct binder_transaction_log {
> atomic_t cur;
> @@ -263,19 +269,64 @@ static struct binder_transaction_log_entry *binder_transaction_log_add(
> }
>
> struct binder_context {
> + struct hlist_node hlist;
> struct binder_node *binder_context_mgr_node;
> struct mutex context_mgr_node_lock;
>
> kuid_t binder_context_mgr_uid;
> - const char *name;
> + int device;
> };
>
> struct binder_device {
> struct hlist_node hlist;
> struct miscdevice miscdev;
> - struct binder_context context;
> };
>
> +void binder_exit_ns(struct ipc_namespace *ns)
> +{
> + struct binder_context *context;
> + struct hlist_node *tmp;
> +
> + mutex_destroy(&ns->binder_procs_lock);
> + hlist_for_each_entry_safe(context, tmp, &ns->binder_contexts, hlist) {
> + mutex_destroy(&context->context_mgr_node_lock);
> + hlist_del(&context->hlist);
> + kfree(context);
> + }
> +}
> +
> +int binder_init_ns(struct ipc_namespace *ns)
> +{
> + int ret;
> + struct binder_device *device;
> +
> + mutex_init(&ns->binder_procs_lock);
> + INIT_HLIST_HEAD(&ns->binder_procs);
> + INIT_HLIST_HEAD(&ns->binder_contexts);
> +
> + hlist_for_each_entry(device, &binder_devices, hlist) {
> + struct binder_context *context;
> +
> + context = kzalloc(sizeof(*context), GFP_KERNEL);
> + if (!context) {
> + ret = -ENOMEM;
> + goto err;
> + }
> +
> + context->device = device->miscdev.minor;
> + context->binder_context_mgr_uid = INVALID_UID;
> + mutex_init(&context->context_mgr_node_lock);
> +
> + hlist_add_head(&context->hlist, &ns->binder_contexts);
> + }
> +
> + return 0;
> +err:
> + binder_exit_ns(ns);
> + return ret;
> +}
> +
> +
> /**
> * struct binder_work - work enqueued on a worklist
> * @entry: node enqueued on list
> @@ -2727,7 +2778,7 @@ static void binder_transaction(struct binder_proc *proc,
> e->target_handle = tr->target.handle;
> e->data_size = tr->data_size;
> e->offsets_size = tr->offsets_size;
> - e->context_name = proc->context->name;
> + e->context_device = proc->context->device;
>
> if (reply) {
> binder_inner_proc_lock(proc);
> @@ -4922,6 +4973,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
> {
> struct binder_proc *proc;
> struct binder_device *binder_dev;
> + struct binder_context *context;
>
> binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__,
> current->group_leader->pid, current->pid);
> @@ -4937,7 +4989,15 @@ static int binder_open(struct inode *nodp, struct file *filp)

So the idea is that on open() of a binder device you attach an ipc
namespace to the opening process?
So you'd basically be able to create a new device node with the same
minor and major number as the one in the host ipc namespace in another
ipc namespace and it would give you a different context.
That's basically namespacing binder devices without *properly*
namespacing them.

What happens if I do:

/* in initial ipc namespace */
binder_fd = open(/dev/binder0)

Now I send binder_fd via SCM_RIGHTs from initial ipc namespace to
non-initial ipc namespace.

/* in non-initial ipc namespace */
new_binder_fd = open(/proc/self/binder_fd);

What ipc namespace do you intend new_binder_fd to refer to? It seems
a re-open on such an fd would switch ipc namespaces. I find that odd
semantics. Are there other things that behave that way? If you send a
pty fd to another namespace it won't switch mount and user namespaces
just because you re-open it via /proc.

> proc->default_priority = task_nice(current);
> binder_dev = container_of(filp->private_data, struct binder_device,
> miscdev);
> - proc->context = &binder_dev->context;
> + hlist_for_each_entry(context, &ipcns->binder_contexts, hlist) {
> + if (context->device == binder_dev->miscdev.minor) {
> + proc->context = context;
> + break;
> + }
> + }
> + if (!proc->context)
> + return -ENOENT;
> +
> binder_alloc_init(&proc->alloc);
>
> binder_stats_created(BINDER_STAT_PROC);
> @@ -4946,9 +5006,9 @@ static int binder_open(struct inode *nodp, struct file *filp)
> INIT_LIST_HEAD(&proc->waiting_threads);
> filp->private_data = proc;
>
> - mutex_lock(&binder_procs_lock);
> - hlist_add_head(&proc->proc_node, &binder_procs);
> - mutex_unlock(&binder_procs_lock);
> + mutex_lock(&ipcns->binder_procs_lock);
> + hlist_add_head(&proc->proc_node, &ipcns->binder_procs);
> + mutex_unlock(&ipcns->binder_procs_lock);
>
> if (binder_debugfs_dir_entry_proc) {
> char strbuf[11];
> @@ -5082,9 +5142,9 @@ static void binder_deferred_release(struct binder_proc *proc)
> struct rb_node *n;
> int threads, nodes, incoming_refs, outgoing_refs, active_transactions;
>
> - mutex_lock(&binder_procs_lock);
> + mutex_lock(&ipcns->binder_procs_lock);
> hlist_del(&proc->proc_node);
> - mutex_unlock(&binder_procs_lock);
> + mutex_unlock(&ipcns->binder_procs_lock);
>
> mutex_lock(&context->context_mgr_node_lock);
> if (context->binder_context_mgr_node &&
> @@ -5377,7 +5437,7 @@ static void print_binder_proc(struct seq_file *m,
> struct binder_node *last_node = NULL;
>
> seq_printf(m, "proc %d\n", proc->pid);
> - seq_printf(m, "context %s\n", proc->context->name);
> + seq_printf(m, "context %d\n", proc->context->device);
> header_pos = m->count;
>
> binder_inner_proc_lock(proc);
> @@ -5538,7 +5598,7 @@ static void print_binder_proc_stats(struct seq_file *m,
> binder_alloc_get_free_async_space(&proc->alloc);
>
> seq_printf(m, "proc %d\n", proc->pid);
> - seq_printf(m, "context %s\n", proc->context->name);
> + seq_printf(m, "context %d\n", proc->context->device);
> count = 0;
> ready_threads = 0;
> binder_inner_proc_lock(proc);
> @@ -5623,10 +5683,10 @@ static int binder_state_show(struct seq_file *m, void *unused)
> if (last_node)
> binder_put_node(last_node);
>
> - mutex_lock(&binder_procs_lock);
> - hlist_for_each_entry(proc, &binder_procs, proc_node)
> + mutex_lock(&ipcns->binder_procs_lock);
> + hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
> print_binder_proc(m, proc, 1);
> - mutex_unlock(&binder_procs_lock);
> + mutex_unlock(&ipcns->binder_procs_lock);
>
> return 0;
> }
> @@ -5639,10 +5699,10 @@ static int binder_stats_show(struct seq_file *m, void *unused)
>
> print_binder_stats(m, "", &binder_stats);
>
> - mutex_lock(&binder_procs_lock);
> - hlist_for_each_entry(proc, &binder_procs, proc_node)
> + mutex_lock(&ipcns->binder_procs_lock);
> + hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
> print_binder_proc_stats(m, proc);
> - mutex_unlock(&binder_procs_lock);
> + mutex_unlock(&ipcns->binder_procs_lock);
>
> return 0;
> }
> @@ -5652,10 +5712,10 @@ static int binder_transactions_show(struct seq_file *m, void *unused)
> struct binder_proc *proc;
>
> seq_puts(m, "binder transactions:\n");
> - mutex_lock(&binder_procs_lock);
> - hlist_for_each_entry(proc, &binder_procs, proc_node)
> + mutex_lock(&ipcns->binder_procs_lock);
> + hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
> print_binder_proc(m, proc, 0);
> - mutex_unlock(&binder_procs_lock);
> + mutex_unlock(&ipcns->binder_procs_lock);
>
> return 0;
> }
> @@ -5665,14 +5725,14 @@ static int binder_proc_show(struct seq_file *m, void *unused)
> struct binder_proc *itr;
> int pid = (unsigned long)m->private;
>
> - mutex_lock(&binder_procs_lock);
> - hlist_for_each_entry(itr, &binder_procs, proc_node) {
> + mutex_lock(&ipcns->binder_procs_lock);
> + hlist_for_each_entry(itr, &ipcns->binder_procs, proc_node) {
> if (itr->pid == pid) {
> seq_puts(m, "binder proc state:\n");
> print_binder_proc(m, itr, 1);
> }
> }
> - mutex_unlock(&binder_procs_lock);
> + mutex_unlock(&ipcns->binder_procs_lock);
>
> return 0;
> }
> @@ -5687,10 +5747,10 @@ static void print_binder_transaction_log_entry(struct seq_file *m,
> */
> smp_rmb();
> seq_printf(m,
> - "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d",
> + "%d: %s from %d:%d to %d:%d context %d node %d handle %d size %d:%d ret %d/%d l=%d",
> e->debug_id, (e->call_type == 2) ? "reply" :
> ((e->call_type == 1) ? "async" : "call "), e->from_proc,
> - e->from_thread, e->to_proc, e->to_thread, e->context_name,
> + e->from_thread, e->to_proc, e->to_thread, e->context_device,
> e->to_node, e->target_handle, e->data_size, e->offsets_size,
> e->return_error, e->return_error_param,
> e->return_error_line);
> @@ -5753,10 +5813,6 @@ static int __init init_binder_device(const char *name)
> binder_device->miscdev.minor = MISC_DYNAMIC_MINOR;
> binder_device->miscdev.name = name;
>
> - binder_device->context.binder_context_mgr_uid = INVALID_UID;
> - binder_device->context.name = name;
> - mutex_init(&binder_device->context.context_mgr_node_lock);
> -
> ret = misc_register(&binder_device->miscdev);
> if (ret < 0) {
> kfree(binder_device);
> @@ -5832,8 +5888,12 @@ static int __init binder_init(void)
> goto err_init_binder_device_failed;
> }
>
> - return ret;
> + ret = binder_init_ns(&init_ipc_ns);
> + if (ret)
> + goto err_init_namespace_failed;
>
> + return ret;
> +err_init_namespace_failed:
> err_init_binder_device_failed:
> hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) {
> misc_deregister(&device->miscdev);
> diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
> index 6ab8c1bada3f..d7f850a2ded8 100644
> --- a/include/linux/ipc_namespace.h
> +++ b/include/linux/ipc_namespace.h
> @@ -63,6 +63,13 @@ struct ipc_namespace {
> unsigned int mq_msg_default;
> unsigned int mq_msgsize_default;
>
> +#ifdef CONFIG_ANDROID_BINDER_IPC
> + /* next fields are for binder */
> + struct mutex binder_procs_lock;
> + struct hlist_head binder_procs;
> + struct hlist_head binder_contexts;
> +#endif
> +
> /* user_ns which owns the ipc ns */
> struct user_namespace *user_ns;
> struct ucounts *ucounts;
> @@ -118,6 +125,14 @@ extern int mq_init_ns(struct ipc_namespace *ns);
> static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
> #endif
>
> +#ifdef CONFIG_ANDROID_BINDER_IPC
> +extern int binder_init_ns(struct ipc_namespace *ns);
> +extern void binder_exit_ns(struct ipc_namespace *ns);
> +#else
> +static inline int binder_init_ns(struct ipc_namespace *ns) { return 0; }
> +static inline void binder_exit_ns(struct ipc_namespace *ns) { }
> +#endif
> +
> #if defined(CONFIG_IPC_NS)
> extern struct ipc_namespace *copy_ipcs(unsigned long flags,
> struct user_namespace *user_ns, struct ipc_namespace *ns);
> diff --git a/ipc/namespace.c b/ipc/namespace.c
> index 21607791d62c..68c6e983b002 100644
> --- a/ipc/namespace.c
> +++ b/ipc/namespace.c
> @@ -57,7 +57,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>
> err = mq_init_ns(ns);
> if (err)
> - goto fail_put;
> + goto fail_init_mq;
> + err = binder_init_ns(ns);
> + if (err)
> + goto fail_init_binder;
>
> sem_init_ns(ns);
> msg_init_ns(ns);
> @@ -65,7 +68,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>
> return ns;
>
> -fail_put:
> +fail_init_binder:
> + mq_put_mnt(ns);
> +fail_init_mq:
> put_user_ns(ns->user_ns);
> ns_free_inum(&ns->ns);
> fail_free:
> @@ -120,6 +125,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
> sem_exit_ns(ns);
> msg_exit_ns(ns);
> shm_exit_ns(ns);
> + binder_exit_ns(ns);
>
> dec_ipc_namespaces(ns->ucounts);
> put_user_ns(ns->user_ns);
> --
> 2.11.0

2018-11-09 17:56:56

by Todd Kjos

[permalink] [raw]
Subject: Re: [PATCH V3] binder: ipc namespace support for android binder

On Thu, Nov 8, 2018 at 5:02 AM chouryzhou(周威) <[email protected]> wrote:
>
> We are working for running android in container, but we found that binder is
> not isolated by ipc namespace. Since binder is a form of IPC and therefore should
> be tied to ipc namespace. With this patch, we can run more than one android
> container on one host.
> This patch move "binder_procs" and "binder_context" into ipc_namespace,
> driver will find the context from it when opening. Although statistics in debugfs
> remain global.
>
> Signed-off-by: chouryzhou <[email protected]>
> ---
> drivers/android/binder.c | 128 +++++++++++++++++++++++++++++++-----------
> include/linux/ipc_namespace.h | 15 +++++
> ipc/namespace.c | 10 +++-
> 3 files changed, 117 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index cb30a524d16d..22e45bb937e6 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -68,6 +68,7 @@
> #include <linux/seq_file.h>
> #include <linux/uaccess.h>
> #include <linux/pid_namespace.h>
> +#include <linux/ipc_namespace.h>
> #include <linux/security.h>
> #include <linux/spinlock.h>
> #include <linux/ratelimit.h>
> @@ -80,13 +81,18 @@
> #include "binder_alloc.h"
> #include "binder_trace.h"
>
> +
> +#if !defined(CONFIG_SYSVIPC) && !defined(CONFIG_POSIX_MQUEUE)

I still don't understand the dependencies on SYSVIPC or POSIX_MQUEUE.
It seems like this mechanism would work even if both are disabled --
as long as IPC_NS is enabled. Seems cleaner to change init/Kconfig and
allow IPC_NS if CONFIG_ANDROID_BINDER_IPC and change this line to
"#ifndef CONFIG_IPC_NS"

> +struct ipc_namespace init_ipc_ns;
> +#define ipcns (&init_ipc_ns)
> +#else
> +#define ipcns (current->nsproxy->ipc_ns)
> +#endif
> +
> static HLIST_HEAD(binder_deferred_list);
> static DEFINE_MUTEX(binder_deferred_lock);
>
> static HLIST_HEAD(binder_devices);
> -static HLIST_HEAD(binder_procs);
> -static DEFINE_MUTEX(binder_procs_lock);
> -
> static HLIST_HEAD(binder_dead_nodes);
> static DEFINE_SPINLOCK(binder_dead_nodes_lock);
>
> @@ -232,7 +238,7 @@ struct binder_transaction_log_entry {
> int return_error_line;
> uint32_t return_error;
> uint32_t return_error_param;
> - const char *context_name;
> + int context_device;
> };
> struct binder_transaction_log {
> atomic_t cur;
> @@ -263,19 +269,64 @@ static struct binder_transaction_log_entry *binder_transaction_log_add(
> }
>
> struct binder_context {
> + struct hlist_node hlist;
> struct binder_node *binder_context_mgr_node;
> struct mutex context_mgr_node_lock;
>
> kuid_t binder_context_mgr_uid;
> - const char *name;
> + int device;
> };
>
> struct binder_device {
> struct hlist_node hlist;
> struct miscdevice miscdev;
> - struct binder_context context;
> };
>
> +void binder_exit_ns(struct ipc_namespace *ns)
> +{
> + struct binder_context *context;
> + struct hlist_node *tmp;
> +
> + mutex_destroy(&ns->binder_procs_lock);
> + hlist_for_each_entry_safe(context, tmp, &ns->binder_contexts, hlist) {
> + mutex_destroy(&context->context_mgr_node_lock);
> + hlist_del(&context->hlist);
> + kfree(context);
> + }
> +}
> +
> +int binder_init_ns(struct ipc_namespace *ns)
> +{
> + int ret;
> + struct binder_device *device;
> +
> + mutex_init(&ns->binder_procs_lock);
> + INIT_HLIST_HEAD(&ns->binder_procs);
> + INIT_HLIST_HEAD(&ns->binder_contexts);
> +
> + hlist_for_each_entry(device, &binder_devices, hlist) {
> + struct binder_context *context;
> +
> + context = kzalloc(sizeof(*context), GFP_KERNEL);
> + if (!context) {
> + ret = -ENOMEM;
> + goto err;
> + }
> +
> + context->device = device->miscdev.minor;
> + context->binder_context_mgr_uid = INVALID_UID;
> + mutex_init(&context->context_mgr_node_lock);
> +
> + hlist_add_head(&context->hlist, &ns->binder_contexts);
> + }
> +
> + return 0;
> +err:
> + binder_exit_ns(ns);
> + return ret;
> +}
> +
> +
> /**
> * struct binder_work - work enqueued on a worklist
> * @entry: node enqueued on list
> @@ -2727,7 +2778,7 @@ static void binder_transaction(struct binder_proc *proc,
> e->target_handle = tr->target.handle;
> e->data_size = tr->data_size;
> e->offsets_size = tr->offsets_size;
> - e->context_name = proc->context->name;
> + e->context_device = proc->context->device;

why eliminate name? The string name is very useful for differentiating
normal "framework" binder transactions vs "hal" or "vendor"
transactions. If we just have a device number it will be hard to tell
in the logs even which namespace it belongs to. We need to keep both
the "name" (for which there might be multiple in each ns) and some
indication of which namespace this is. Maybe we assign some sort of
namespace ID during binder_init_ns().

>
> if (reply) {
> binder_inner_proc_lock(proc);
> @@ -4922,6 +4973,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
> {
> struct binder_proc *proc;
> struct binder_device *binder_dev;
> + struct binder_context *context;
>
> binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__,
> current->group_leader->pid, current->pid);
> @@ -4937,7 +4989,15 @@ static int binder_open(struct inode *nodp, struct file *filp)
> proc->default_priority = task_nice(current);
> binder_dev = container_of(filp->private_data, struct binder_device,
> miscdev);
> - proc->context = &binder_dev->context;
> + hlist_for_each_entry(context, &ipcns->binder_contexts, hlist) {
> + if (context->device == binder_dev->miscdev.minor) {
> + proc->context = context;
> + break;
> + }
> + }
> + if (!proc->context)
> + return -ENOENT;
> +
> binder_alloc_init(&proc->alloc);
>
> binder_stats_created(BINDER_STAT_PROC);
> @@ -4946,9 +5006,9 @@ static int binder_open(struct inode *nodp, struct file *filp)
> INIT_LIST_HEAD(&proc->waiting_threads);
> filp->private_data = proc;
>
> - mutex_lock(&binder_procs_lock);
> - hlist_add_head(&proc->proc_node, &binder_procs);
> - mutex_unlock(&binder_procs_lock);
> + mutex_lock(&ipcns->binder_procs_lock);
> + hlist_add_head(&proc->proc_node, &ipcns->binder_procs);
> + mutex_unlock(&ipcns->binder_procs_lock);
>
> if (binder_debugfs_dir_entry_proc) {
> char strbuf[11];
> @@ -5082,9 +5142,9 @@ static void binder_deferred_release(struct binder_proc *proc)
> struct rb_node *n;
> int threads, nodes, incoming_refs, outgoing_refs, active_transactions;
>
> - mutex_lock(&binder_procs_lock);
> + mutex_lock(&ipcns->binder_procs_lock);
> hlist_del(&proc->proc_node);
> - mutex_unlock(&binder_procs_lock);
> + mutex_unlock(&ipcns->binder_procs_lock);
>
> mutex_lock(&context->context_mgr_node_lock);
> if (context->binder_context_mgr_node &&
> @@ -5377,7 +5437,7 @@ static void print_binder_proc(struct seq_file *m,
> struct binder_node *last_node = NULL;
>
> seq_printf(m, "proc %d\n", proc->pid);
> - seq_printf(m, "context %s\n", proc->context->name);
> + seq_printf(m, "context %d\n", proc->context->device);

As mentioned above, we need to retain name and probably also want a ns
id of some sort. So context now has 3 components if IPC_NS, so maybe a
helper function to print context like:

static void binder_seq_print_context(struct seq_file *m, struct
binder_context *context)
{
#ifdef CONFIG_IPC_NS
seq_printf(m, "%d-%d-%s", context->ns_id, context->device,
context->name);
#else
seq_printf(m, "%d", context->name);
#endif
}

(same comment below everywhere context is printed)

Should these debugfs nodes should be ns aware and only print debugging
info for the context of the thread accessing the node? If so, we would
also want to be namespace-aware when printing pids.

> header_pos = m->count;
>
> binder_inner_proc_lock(proc);
> @@ -5538,7 +5598,7 @@ static void print_binder_proc_stats(struct seq_file *m,
> binder_alloc_get_free_async_space(&proc->alloc);
>
> seq_printf(m, "proc %d\n", proc->pid);
> - seq_printf(m, "context %s\n", proc->context->name);
> + seq_printf(m, "context %d\n", proc->context->device);
> count = 0;
> ready_threads = 0;
> binder_inner_proc_lock(proc);
> @@ -5623,10 +5683,10 @@ static int binder_state_show(struct seq_file *m, void *unused)
> if (last_node)
> binder_put_node(last_node);
>
> - mutex_lock(&binder_procs_lock);
> - hlist_for_each_entry(proc, &binder_procs, proc_node)
> + mutex_lock(&ipcns->binder_procs_lock);
> + hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
> print_binder_proc(m, proc, 1);
> - mutex_unlock(&binder_procs_lock);
> + mutex_unlock(&ipcns->binder_procs_lock);
>
> return 0;
> }
> @@ -5639,10 +5699,10 @@ static int binder_stats_show(struct seq_file *m, void *unused)
>
> print_binder_stats(m, "", &binder_stats);
>
> - mutex_lock(&binder_procs_lock);
> - hlist_for_each_entry(proc, &binder_procs, proc_node)
> + mutex_lock(&ipcns->binder_procs_lock);
> + hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
> print_binder_proc_stats(m, proc);
> - mutex_unlock(&binder_procs_lock);
> + mutex_unlock(&ipcns->binder_procs_lock);
>
> return 0;
> }
> @@ -5652,10 +5712,10 @@ static int binder_transactions_show(struct seq_file *m, void *unused)
> struct binder_proc *proc;
>
> seq_puts(m, "binder transactions:\n");
> - mutex_lock(&binder_procs_lock);
> - hlist_for_each_entry(proc, &binder_procs, proc_node)
> + mutex_lock(&ipcns->binder_procs_lock);
> + hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
> print_binder_proc(m, proc, 0);
> - mutex_unlock(&binder_procs_lock);
> + mutex_unlock(&ipcns->binder_procs_lock);
>
> return 0;
> }
> @@ -5665,14 +5725,14 @@ static int binder_proc_show(struct seq_file *m, void *unused)
> struct binder_proc *itr;
> int pid = (unsigned long)m->private;
>
> - mutex_lock(&binder_procs_lock);
> - hlist_for_each_entry(itr, &binder_procs, proc_node) {
> + mutex_lock(&ipcns->binder_procs_lock);
> + hlist_for_each_entry(itr, &ipcns->binder_procs, proc_node) {
> if (itr->pid == pid) {
> seq_puts(m, "binder proc state:\n");
> print_binder_proc(m, itr, 1);
> }
> }
> - mutex_unlock(&binder_procs_lock);
> + mutex_unlock(&ipcns->binder_procs_lock);
>
> return 0;
> }
> @@ -5687,10 +5747,10 @@ static void print_binder_transaction_log_entry(struct seq_file *m,
> */
> smp_rmb();
> seq_printf(m,
> - "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d",
> + "%d: %s from %d:%d to %d:%d context %d node %d handle %d size %d:%d ret %d/%d l=%d",
> e->debug_id, (e->call_type == 2) ? "reply" :
> ((e->call_type == 1) ? "async" : "call "), e->from_proc,
> - e->from_thread, e->to_proc, e->to_thread, e->context_name,
> + e->from_thread, e->to_proc, e->to_thread, e->context_device,
> e->to_node, e->target_handle, e->data_size, e->offsets_size,
> e->return_error, e->return_error_param,
> e->return_error_line);
> @@ -5753,10 +5813,6 @@ static int __init init_binder_device(const char *name)
> binder_device->miscdev.minor = MISC_DYNAMIC_MINOR;
> binder_device->miscdev.name = name;
>
> - binder_device->context.binder_context_mgr_uid = INVALID_UID;
> - binder_device->context.name = name;
> - mutex_init(&binder_device->context.context_mgr_node_lock);
> -
> ret = misc_register(&binder_device->miscdev);
> if (ret < 0) {
> kfree(binder_device);
> @@ -5832,8 +5888,12 @@ static int __init binder_init(void)
> goto err_init_binder_device_failed;
> }
>
> - return ret;
> + ret = binder_init_ns(&init_ipc_ns);
> + if (ret)
> + goto err_init_namespace_failed;
>
> + return ret;
> +err_init_namespace_failed:
> err_init_binder_device_failed:
> hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) {
> misc_deregister(&device->miscdev);
> diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
> index 6ab8c1bada3f..d7f850a2ded8 100644
> --- a/include/linux/ipc_namespace.h
> +++ b/include/linux/ipc_namespace.h
> @@ -63,6 +63,13 @@ struct ipc_namespace {
> unsigned int mq_msg_default;
> unsigned int mq_msgsize_default;
>
> +#ifdef CONFIG_ANDROID_BINDER_IPC
> + /* next fields are for binder */
> + struct mutex binder_procs_lock;
> + struct hlist_head binder_procs;
> + struct hlist_head binder_contexts;
> +#endif
> +
> /* user_ns which owns the ipc ns */
> struct user_namespace *user_ns;
> struct ucounts *ucounts;
> @@ -118,6 +125,14 @@ extern int mq_init_ns(struct ipc_namespace *ns);
> static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
> #endif
>
> +#ifdef CONFIG_ANDROID_BINDER_IPC
> +extern int binder_init_ns(struct ipc_namespace *ns);
> +extern void binder_exit_ns(struct ipc_namespace *ns);
> +#else
> +static inline int binder_init_ns(struct ipc_namespace *ns) { return 0; }
> +static inline void binder_exit_ns(struct ipc_namespace *ns) { }
> +#endif

I'm fine with this if the namespace.[hc] maintainers are, but would it
be better to add some sort of registration function so we don't need
binder-specific stuff here?

A version of this was proposed by Oren Laadan in:

https://lists.linuxfoundation.org/pipermail/containers/2013-December/033834.html

and then updated for 4.9 in a proposal in AOSP:

https://android-review.googlesource.com/c/kernel/common/+/471961

There will be some discussions on containerized binder support at LPC,
so we'll want to wait until after LPC before moving ahead with this.

-Todd


> +
> #if defined(CONFIG_IPC_NS)
> extern struct ipc_namespace *copy_ipcs(unsigned long flags,
> struct user_namespace *user_ns, struct ipc_namespace *ns);
> diff --git a/ipc/namespace.c b/ipc/namespace.c
> index 21607791d62c..68c6e983b002 100644
> --- a/ipc/namespace.c
> +++ b/ipc/namespace.c
> @@ -57,7 +57,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>
> err = mq_init_ns(ns);
> if (err)
> - goto fail_put;
> + goto fail_init_mq;
> + err = binder_init_ns(ns);
> + if (err)
> + goto fail_init_binder;
>
> sem_init_ns(ns);
> msg_init_ns(ns);
> @@ -65,7 +68,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>
> return ns;
>
> -fail_put:
> +fail_init_binder:
> + mq_put_mnt(ns);
> +fail_init_mq:
> put_user_ns(ns->user_ns);
> ns_free_inum(&ns->ns);
> fail_free:
> @@ -120,6 +125,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
> sem_exit_ns(ns);
> msg_exit_ns(ns);
> shm_exit_ns(ns);
> + binder_exit_ns(ns);
>
> dec_ipc_namespaces(ns->ucounts);
> put_user_ns(ns->user_ns);
> --
> 2.11.0

2018-11-09 18:27:56

by Davidlohr Bueso

[permalink] [raw]
Subject: Re: [PATCH V3] binder: ipc namespace support for android binder

On Thu, 08 Nov 2018, chouryzhou(??????) wrote:

>+#ifdef CONFIG_ANDROID_BINDER_IPC
>+ /* next fields are for binder */
>+ struct mutex binder_procs_lock;
>+ struct hlist_head binder_procs;
>+ struct hlist_head binder_contexts;
>+#endif

Now, I took a look at how the binder_procs list is used; and no, what
follows isn't really related to this patch, but a general observation.

I think that a mutex is also an overkill and you might wanna replace it
with a spinlock/rwlock. Can anything block while holding the binder_procs_lock?
I don't see anything... you mainly need it for consulting the hlist calling
print_binder_proc[_stat]() - which will take the proc->inner_lock anyway, so
no blocking there. Also, if this is perhaps because of long hold times, dunno,
the rb_first_cached primitive might reduce some of it, although I don't know
how big the rbtrees in binder can get and if it matters at all.

Anyway, that said and along with addressing Todd's comments, the ipc/ bits look
good. Feel free to add my:

Reviewed-by: Davidlohr Bueso <[email protected]>

Thanks,
Davidlohr

2018-11-09 19:04:59

by Todd Kjos

[permalink] [raw]
Subject: Re: [PATCH V3] binder: ipc namespace support for android binder

On Fri, Nov 9, 2018 at 10:27 AM Davidlohr Bueso <[email protected]> wrote:
>
> On Thu, 08 Nov 2018, chouryzhou(??????) wrote:
>
> >+#ifdef CONFIG_ANDROID_BINDER_IPC
> >+ /* next fields are for binder */
> >+ struct mutex binder_procs_lock;
> >+ struct hlist_head binder_procs;
> >+ struct hlist_head binder_contexts;
> >+#endif
>
> Now, I took a look at how the binder_procs list is used; and no, what
> follows isn't really related to this patch, but a general observation.
>
> I think that a mutex is also an overkill and you might wanna replace it
> with a spinlock/rwlock. Can anything block while holding the binder_procs_lock?
> I don't see anything... you mainly need it for consulting the hlist calling
> print_binder_proc[_stat]() - which will take the proc->inner_lock anyway, so
> no blocking there.

print_binder_proc() drops proc->inner_lock and calls
binder_alloc_print_allocated() which acquires proc->alloc->mutex.
Likewise, print_binder_stats() calls print_binder_proc_stats() which
drops its locks to call binder_alloc_print_pages() which also acquires
proc->alloc->mutex. So binder_procs_lock needs to be a mutex since it
can block on proc->alloc->mutex.

> Also, if this is perhaps because of long hold times, dunno,
> the rb_first_cached primitive might reduce some of it, although I don't know
> how big the rbtrees in binder can get and if it matters at all.
>
> Anyway, that said and along with addressing Todd's comments, the ipc/ bits look
> good. Feel free to add my:
>
> Reviewed-by: Davidlohr Bueso <[email protected]>
>
> Thanks,
> Davidlohr

2018-11-09 19:17:30

by Davidlohr Bueso

[permalink] [raw]
Subject: Re: [PATCH V3] binder: ipc namespace support for android binder

On Fri, 09 Nov 2018, Todd Kjos wrote:
>
>print_binder_proc() drops proc->inner_lock and calls
>binder_alloc_print_allocated() which acquires proc->alloc->mutex.
>Likewise, print_binder_stats() calls print_binder_proc_stats() which
>drops its locks to call binder_alloc_print_pages() which also acquires
>proc->alloc->mutex. So binder_procs_lock needs to be a mutex since it
>can block on proc->alloc->mutex.

Ah, very good then.

Thanks,
Davidlohr

2018-11-10 03:15:22

by chouryzhou(周威)

[permalink] [raw]
Subject: Re: Re: [PATCH V3] binder: ipc namespace support for android binder


> I still don't understand the dependencies on SYSVIPC or POSIX_MQUEUE.
> It seems like this mechanism would work even if both are disabled --
> as long as IPC_NS is enabled. Seems cleaner to change init/Kconfig and
> allow IPC_NS if CONFIG_ANDROID_BINDER_IPC and change this line to
> "#ifndef CONFIG_IPC_NS"

Let me explain it in detail. If SYSIPC and IPC_NS are both defined,  
current->nsproxy->ipc_ns will save the ipc namespace variables. We just use 
it. If SYSIPC (or POSIX_MQUEUE) is defined while IPC_NS is not set, 
current->nsproxy->ipc_ns will always refer to init_ipc_ns in ipc/msgutil.c, 
which is also fine to us. But if neither SYSIPC nor POSIX_MQUEUE is set 
(IPC_NS can't be set in this situation), there is no current->nsproxy->ipc_ns.
We make a fack init_ipc_ns here and use it.

> why eliminate name? The string name is very useful for differentiating
> normal "framework" binder transactions vs "hal" or "vendor"
> transactions. If we just have a device number it will be hard to tell
> in the logs even which namespace it belongs to. We need to keep both
> the "name" (for which there might be multiple in each ns) and some
> indication of which namespace this is. Maybe we assign some sort of
> namespace ID during binder_init_ns().

 I will remain the name of device. The  inum of ipc_ns can be treated as 
namespace ID in ipc_ns.

> As mentioned above, we need to retain name and probably also want a ns
> id of some sort. So context now has 3 components if IPC_NS, so maybe a
> helper function to print context like:

> static void binder_seq_print_context(struct seq_file *m, struct
> binder_context *context)
> {
> #ifdef CONFIG_IPC_NS
>           seq_printf(m, "%d-%d-%s", context->ns_id, context->device,
> context->name);
> #else
>           seq_printf(m, "%d", context->name);
> #endif
> }

> (same comment below everywhere context is printed)

> Should these debugfs nodes should be ns aware and only print debugging
> info for the context of the thread accessing the node? If so, we would
> also want to be namespace-aware when printing pids.

Nowadays, debugfs is not namespace-ized, and pid namespace is not associated 
with ipc namespace.  Will it be more complicated to debug this if we just print 
the info for current thread? Because we will have to enter the ipc namespace 
firstly. But add ipc inum should be no problem.

- choury -


2018-11-10 05:43:22

by Todd Kjos

[permalink] [raw]
Subject: Re: Re: [PATCH V3] binder: ipc namespace support for android binder

On Fri, Nov 9, 2018 at 7:09 PM chouryzhou(周威) <[email protected]> wrote:
>
> >
> > I still don't understand the dependencies on SYSVIPC or POSIX_MQUEUE.
> > It seems like this mechanism would work even if both are disabled --
> > as long as IPC_NS is enabled. Seems cleaner to change init/Kconfig and
> > allow IPC_NS if CONFIG_ANDROID_BINDER_IPC and change this line to
> > "#ifndef CONFIG_IPC_NS"
>
> Let me explain it in detail. If SYSIPC and IPC_NS are both defined,
> current->nsproxy->ipc_ns will save the ipc namespace variables. We just use
> it. If SYSIPC (or POSIX_MQUEUE) is defined while IPC_NS is not set,
> current->nsproxy->ipc_ns will always refer to init_ipc_ns in ipc/msgutil.c,
> which is also fine to us. But if neither SYSIPC nor POSIX_MQUEUE is set
> (IPC_NS can't be set in this situation), there is no current->nsproxy->ipc_ns.
> We make a fack init_ipc_ns here and use it.

Yes, I can read the code. I'm wondering specifically about SYSVIPC and
POSIX_MQUEUE. Even with your code changes, binder has no dependency on
these configs. Why are you creating one? The actual dependency with
your changes is on "current->nsproxy->ipc_ns" being initialized for
binder -- which is dependent on CONFIG_IPC_NS being enabled, isn't it?

If SYSVIPC or POSIX_MQUEUE are enabled, but IPC_NS is disabled, does this work?

>
> > why eliminate name? The string name is very useful for differentiating
> > normal "framework" binder transactions vs "hal" or "vendor"
> > transactions. If we just have a device number it will be hard to tell
> > in the logs even which namespace it belongs to. We need to keep both
> > the "name" (for which there might be multiple in each ns) and some
> > indication of which namespace this is. Maybe we assign some sort of
> > namespace ID during binder_init_ns().
>
> I will remain the name of device. The inum of ipc_ns can be treated as
> namespace ID in ipc_ns.
>
> > As mentioned above, we need to retain name and probably also want a ns
> > id of some sort. So context now has 3 components if IPC_NS, so maybe a
> > helper function to print context like:
> >
> > static void binder_seq_print_context(struct seq_file *m, struct
> > binder_context *context)
> > {
> > #ifdef CONFIG_IPC_NS
> > seq_printf(m, "%d-%d-%s", context->ns_id, context->device,
> > context->name);
> > #else
> > seq_printf(m, "%d", context->name);
> > #endif
> > }
> >
> > (same comment below everywhere context is printed)
> >
> > Should these debugfs nodes should be ns aware and only print debugging
> > info for the context of the thread accessing the node? If so, we would
> > also want to be namespace-aware when printing pids.
>
> Nowadays, debugfs is not namespace-ized, and pid namespace is not associated
> with ipc namespace. Will it be more complicated to debug this if we just print
> the info for current thread? Because we will have to enter the ipc namespace
> firstly. But add ipc inum should be no problem.

With the name and ns id, debugging would be fine from the host-side. I
don't understand the container use cases enough to know if you need to
be able to debug binder transaction failures from within the container
-- in which case it seems like you'd want the container-version of the
PIDs -- but obviously this depends on how the containers are set up
and what the use-cases really are. I'm ok with leaving that for a
later patch.

>
> - choury -
>
>