Introduce new helpers to access 'struct task_struct'->pid, tgid, uid, gid, comm
fields in tracing and networking.
Share bpf_trace_printk() and bpf_get_smp_processor_id() helpers between
tracing and networking.
Alexei Starovoitov (3):
bpf: introduce current->pid, tgid, uid, gid, comm accessors
bpf: allow networking programs to use bpf_trace_printk() for
debugging
bpf: let kprobe programs use bpf_get_smp_processor_id() helper
include/linux/bpf.h | 4 +++
include/uapi/linux/bpf.h | 19 +++++++++++++
kernel/bpf/core.c | 7 +++++
kernel/bpf/helpers.c | 58 ++++++++++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 28 ++++++++++++------
net/core/filter.c | 8 ++++++
samples/bpf/bpf_helpers.h | 6 ++++
samples/bpf/tracex2_kern.c | 24 ++++++++++++----
samples/bpf/tracex2_user.c | 67 ++++++++++++++++++++++++++++++++++++++------
9 files changed, 199 insertions(+), 22 deletions(-)
--
1.7.9.5
eBPF programs attached to kprobes need to filter based on
current->pid, uid and other fields, so introduce helper functions:
u64 bpf_get_current_pid_tgid(void)
Return: current->tgid << 32 | current->pid
u64 bpf_get_current_uid_gid(void)
Return: current_gid << 32 | current_uid
bpf_get_current_comm(char *buf, int size_of_buf)
stores current->comm into buf
They can be used from the programs attached to TC as well to classify packets
based on current task fields.
Update tracex2 example to print histogram of write syscalls for each process
instead of aggregated for all.
Signed-off-by: Alexei Starovoitov <[email protected]>
---
These helpers will be mainly used by bpf+tracing, but the patch is targeting
net-next tree to minimize merge conflicts and they're useful in TC too.
The feature was requested by Wang Nan <[email protected]> and
Brendan Gregg <[email protected]>
include/linux/bpf.h | 3 ++
include/uapi/linux/bpf.h | 19 +++++++++++++
kernel/bpf/core.c | 3 ++
kernel/bpf/helpers.c | 58 ++++++++++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 6 ++++
net/core/filter.c | 6 ++++
samples/bpf/bpf_helpers.h | 6 ++++
samples/bpf/tracex2_kern.c | 24 ++++++++++++----
samples/bpf/tracex2_user.c | 67 ++++++++++++++++++++++++++++++++++++++------
9 files changed, 178 insertions(+), 14 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2235aee8096a..1b9a3f5b27f6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -188,5 +188,8 @@ extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
extern const struct bpf_func_proto bpf_tail_call_proto;
extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
+extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
+extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
+extern const struct bpf_func_proto bpf_get_current_comm_proto;
#endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 602f05b7a275..29ef6f99e43d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -230,6 +230,25 @@ enum bpf_func_id {
* Return: 0 on success
*/
BPF_FUNC_clone_redirect,
+
+ /**
+ * u64 bpf_get_current_pid_tgid(void)
+ * Return: current->tgid << 32 | current->pid
+ */
+ BPF_FUNC_get_current_pid_tgid,
+
+ /**
+ * u64 bpf_get_current_uid_gid(void)
+ * Return: current_gid << 32 | current_uid
+ */
+ BPF_FUNC_get_current_uid_gid,
+
+ /**
+ * bpf_get_current_comm(char *buf, int size_of_buf)
+ * stores current->comm into buf
+ * Return: 0 on success
+ */
+ BPF_FUNC_get_current_comm,
__BPF_FUNC_MAX_ID,
};
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1e00aa3316dc..1fc45cc83076 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -730,6 +730,9 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
+const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
+const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
+const struct bpf_func_proto bpf_get_current_comm_proto __weak;
/* Always built-in helper functions. */
const struct bpf_func_proto bpf_tail_call_proto = {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 7ad5d8842d5b..d1dce346c56f 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -14,6 +14,8 @@
#include <linux/random.h>
#include <linux/smp.h>
#include <linux/ktime.h>
+#include <linux/sched.h>
+#include <linux/uidgid.h>
/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -124,3 +126,59 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = {
.gpl_only = true,
.ret_type = RET_INTEGER,
};
+
+static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ struct task_struct *task = current;
+
+ if (!task)
+ return -EINVAL;
+
+ return (u64) task->tgid << 32 | task->pid;
+}
+
+const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
+ .func = bpf_get_current_pid_tgid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
+
+static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ struct task_struct *task = current;
+ kuid_t uid;
+ kgid_t gid;
+
+ if (!task)
+ return -EINVAL;
+
+ current_uid_gid(&uid, &gid);
+ return (u64) from_kgid(current_user_ns(), gid) << 32 |
+ from_kuid(current_user_ns(), uid);
+}
+
+const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
+ .func = bpf_get_current_uid_gid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
+
+static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5)
+{
+ struct task_struct *task = current;
+ char *buf = (char *) (long) r1;
+
+ if (!task)
+ return -EINVAL;
+
+ memcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm)));
+ return 0;
+}
+
+const struct bpf_func_proto bpf_get_current_comm_proto = {
+ .func = bpf_get_current_comm,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_STACK,
+ .arg2_type = ARG_CONST_STACK_SIZE,
+};
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 50c4015a8ad3..3a17638cdf46 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -162,6 +162,12 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_tail_call:
return &bpf_tail_call_proto;
+ case BPF_FUNC_get_current_pid_tgid:
+ return &bpf_get_current_pid_tgid_proto;
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
+ case BPF_FUNC_get_current_comm:
+ return &bpf_get_current_comm_proto;
case BPF_FUNC_trace_printk:
/*
diff --git a/net/core/filter.c b/net/core/filter.c
index d271c06bf01f..20aa51ccbf9d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1459,6 +1459,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_l4_csum_replace_proto;
case BPF_FUNC_clone_redirect:
return &bpf_clone_redirect_proto;
+ case BPF_FUNC_get_current_pid_tgid:
+ return &bpf_get_current_pid_tgid_proto;
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
+ case BPF_FUNC_get_current_comm:
+ return &bpf_get_current_comm_proto;
default:
return sk_filter_func_proto(func_id);
}
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index f531a0b3282d..bdf1c1607b80 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -25,6 +25,12 @@ static void (*bpf_tail_call)(void *ctx, void *map, int index) =
(void *) BPF_FUNC_tail_call;
static unsigned long long (*bpf_get_smp_processor_id)(void) =
(void *) BPF_FUNC_get_smp_processor_id;
+static unsigned long long (*bpf_get_current_pid_tgid)(void) =
+ (void *) BPF_FUNC_get_current_pid_tgid;
+static unsigned long long (*bpf_get_current_uid_gid)(void) =
+ (void *) BPF_FUNC_get_current_uid_gid;
+static int (*bpf_get_current_comm)(void *buf, int buf_size) =
+ (void *) BPF_FUNC_get_current_comm;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
index 19ec1cfc45db..dc50f4f2943f 100644
--- a/samples/bpf/tracex2_kern.c
+++ b/samples/bpf/tracex2_kern.c
@@ -62,11 +62,18 @@ static unsigned int log2l(unsigned long v)
return log2(v);
}
+struct hist_key {
+ char comm[16];
+ u64 pid_tgid;
+ u64 uid_gid;
+ u32 index;
+};
+
struct bpf_map_def SEC("maps") my_hist_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct hist_key),
.value_size = sizeof(long),
- .max_entries = 64,
+ .max_entries = 1024,
};
SEC("kprobe/sys_write")
@@ -75,11 +82,18 @@ int bpf_prog3(struct pt_regs *ctx)
long write_size = ctx->dx; /* arg3 */
long init_val = 1;
long *value;
- u32 index = log2l(write_size);
+ struct hist_key key = {};
+
+ key.index = log2l(write_size);
+ key.pid_tgid = bpf_get_current_pid_tgid();
+ key.uid_gid = bpf_get_current_uid_gid();
+ bpf_get_current_comm(&key.comm, sizeof(key.comm));
- value = bpf_map_lookup_elem(&my_hist_map, &index);
+ value = bpf_map_lookup_elem(&my_hist_map, &key);
if (value)
__sync_fetch_and_add(value, 1);
+ else
+ bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY);
return 0;
}
char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index 91b8d0896fbb..cd0241c1447a 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -3,6 +3,7 @@
#include <stdlib.h>
#include <signal.h>
#include <linux/bpf.h>
+#include <string.h>
#include "libbpf.h"
#include "bpf_load.h"
@@ -20,23 +21,42 @@ static void stars(char *str, long val, long max, int width)
str[i] = '\0';
}
-static void print_hist(int fd)
+struct task {
+ char comm[16];
+ __u64 pid_tgid;
+ __u64 uid_gid;
+};
+
+struct hist_key {
+ struct task t;
+ __u32 index;
+};
+
+#define SIZE sizeof(struct task)
+
+static void print_hist_for_pid(int fd, void *task)
{
- int key;
+ struct hist_key key = {}, next_key;
+ char starstr[MAX_STARS];
long value;
long data[MAX_INDEX] = {};
- char starstr[MAX_STARS];
- int i;
int max_ind = -1;
long max_value = 0;
+ int i, ind;
- for (key = 0; key < MAX_INDEX; key++) {
- bpf_lookup_elem(fd, &key, &value);
- data[key] = value;
- if (value && key > max_ind)
- max_ind = key;
+ while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+ if (memcmp(&next_key, task, SIZE)) {
+ key = next_key;
+ continue;
+ }
+ bpf_lookup_elem(fd, &next_key, &value);
+ ind = next_key.index;
+ data[ind] = value;
+ if (value && ind > max_ind)
+ max_ind = ind;
if (value > max_value)
max_value = value;
+ key = next_key;
}
printf(" syscall write() stats\n");
@@ -48,6 +68,35 @@ static void print_hist(int fd)
MAX_STARS, starstr);
}
}
+
+static void print_hist(int fd)
+{
+ struct hist_key key = {}, next_key;
+ static struct task tasks[1024];
+ int task_cnt = 0;
+ int i;
+
+ while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+ int found = 0;
+
+ for (i = 0; i < task_cnt; i++)
+ if (memcmp(&tasks[i], &next_key, SIZE) == 0)
+ found = 1;
+ if (!found)
+ memcpy(&tasks[task_cnt++], &next_key, SIZE);
+ key = next_key;
+ }
+
+ for (i = 0; i < task_cnt; i++) {
+ printf("\npid %d cmd %s uid %d\n",
+ (__u32) tasks[i].pid_tgid,
+ tasks[i].comm,
+ (__u32) tasks[i].uid_gid);
+ print_hist_for_pid(fd, &tasks[i]);
+ }
+
+}
+
static void int_exit(int sig)
{
print_hist(map_fd[1]);
--
1.7.9.5
bpf_trace_printk() is a helper function used to debug eBPF programs.
Let socket and TC programs use it as well.
Note, it's DEBUG ONLY helper. If it's used in the program,
the kernel will print warning banner to make sure users don't use
it in production.
Signed-off-by: Alexei Starovoitov <[email protected]>
---
include/linux/bpf.h | 1 +
kernel/bpf/core.c | 4 ++++
kernel/trace/bpf_trace.c | 20 ++++++++++++--------
net/core/filter.c | 2 ++
4 files changed, 19 insertions(+), 8 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1b9a3f5b27f6..4383476a0d48 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -150,6 +150,7 @@ struct bpf_array {
u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
void bpf_prog_array_map_clear(struct bpf_map *map);
bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
#ifdef CONFIG_BPF_SYSCALL
void bpf_register_prog_type(struct bpf_prog_type_list *tl);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1fc45cc83076..c5bedc82bc1c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -733,6 +733,10 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
+const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
+{
+ return NULL;
+}
/* Always built-in helper functions. */
const struct bpf_func_proto bpf_tail_call_proto = {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3a17638cdf46..4f9b5d41869b 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -147,6 +147,17 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
.arg2_type = ARG_CONST_STACK_SIZE,
};
+const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
+{
+ /*
+ * this program might be calling bpf_trace_printk,
+ * so allocate per-cpu printk buffers
+ */
+ trace_printk_init_buffers();
+
+ return &bpf_trace_printk_proto;
+}
+
static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -168,15 +179,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_get_current_comm:
return &bpf_get_current_comm_proto;
-
case BPF_FUNC_trace_printk:
- /*
- * this program might be calling bpf_trace_printk,
- * so allocate per-cpu printk buffers
- */
- trace_printk_init_buffers();
-
- return &bpf_trace_printk_proto;
+ return bpf_get_trace_printk_proto();
default:
return NULL;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 20aa51ccbf9d..65ff107d3d29 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1442,6 +1442,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
return &bpf_tail_call_proto;
case BPF_FUNC_ktime_get_ns:
return &bpf_ktime_get_ns_proto;
+ case BPF_FUNC_trace_printk:
+ return bpf_get_trace_printk_proto();
default:
return NULL;
}
--
1.7.9.5
It's useful to do per-cpu histograms.
Suggested-by: Daniel Wagner <[email protected]>
Signed-off-by: Alexei Starovoitov <[email protected]>
---
kernel/trace/bpf_trace.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4f9b5d41869b..88a041adee90 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -181,6 +181,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_get_current_comm_proto;
case BPF_FUNC_trace_printk:
return bpf_get_trace_printk_proto();
+ case BPF_FUNC_get_smp_processor_id:
+ return &bpf_get_smp_processor_id_proto;
default:
return NULL;
}
--
1.7.9.5
On Fri, Jun 12, 2015 at 2:40 PM, Alexei Starovoitov <[email protected]> wrote:
> eBPF programs attached to kprobes need to filter based on
> current->pid, uid and other fields, so introduce helper functions:
>
> u64 bpf_get_current_pid_tgid(void)
> Return: current->tgid << 32 | current->pid
>
> u64 bpf_get_current_uid_gid(void)
> Return: current_gid << 32 | current_uid
How does this work wrt namespaces, and why the weird packing?
--Andy
On 6/12/15 3:08 PM, Andy Lutomirski wrote:
> On Fri, Jun 12, 2015 at 2:40 PM, Alexei Starovoitov <[email protected]> wrote:
>> eBPF programs attached to kprobes need to filter based on
>> current->pid, uid and other fields, so introduce helper functions:
>>
>> u64 bpf_get_current_pid_tgid(void)
>> Return: current->tgid << 32 | current->pid
>>
>> u64 bpf_get_current_uid_gid(void)
>> Return: current_gid << 32 | current_uid
>
> How does this work wrt namespaces,
from_kuid(current_user_ns(), uid)
> and why the weird packing?
to minimize number of calls.
We've considered several alternatives.
1. 5 different helpers
Cons: every call adds performance overhead
2a: single helper that populates 'struct bpf_task_info'
and uses 'flags' with bit per field.
+struct bpf_task_info {
+ __u32 pid;
+ __u32 tgid;
+ __u32 uid;
+ __u32 gid;
+ char comm[16];
+};
bpf_get_current_task_info(task_info, size, flags)
bit 0 - fill in pid
bit 1 - fill in tgid
Pros: single helper
Cons: ugly to use and a lot of compares in the helper
itself (two compares for each field)
2b. single helper that populates 'struct bpf_task_info'
and uses 'size' to tell how many fields to fill in.
bpf_get_current_task_info(task_info, size);
+ if (size >= offsetof(struct bpf_task_info, pid) + sizeof(info->pid))
+ info->pid = task->pid;
+ if (size >= offsetof(struct bpf_task_info, tgid) +
sizeof(info->tgid))
+ info->tgid = task->tgid;
Pros: single call (with single compare per field).
Cons: still hard to use when only uid is needed.
These three helpers looked as the best balance between
performance and usability.
On Fri, Jun 12, 2015 at 3:44 PM, Alexei Starovoitov <[email protected]> wrote:
> On 6/12/15 3:08 PM, Andy Lutomirski wrote:
>>
>> On Fri, Jun 12, 2015 at 2:40 PM, Alexei Starovoitov <[email protected]>
>> wrote:
>>>
>>> eBPF programs attached to kprobes need to filter based on
>>> current->pid, uid and other fields, so introduce helper functions:
>>>
>>> u64 bpf_get_current_pid_tgid(void)
>>> Return: current->tgid << 32 | current->pid
>>>
>>> u64 bpf_get_current_uid_gid(void)
>>> Return: current_gid << 32 | current_uid
>>
>>
>> How does this work wrt namespaces,
>
>
> from_kuid(current_user_ns(), uid)
>
Is current_user_ns() well defined in the context of an eBPF program?
--Andy
On 6/12/15 3:54 PM, Andy Lutomirski wrote:
> On Fri, Jun 12, 2015 at 3:44 PM, Alexei Starovoitov <[email protected]> wrote:
>> On 6/12/15 3:08 PM, Andy Lutomirski wrote:
>>>
>>> On Fri, Jun 12, 2015 at 2:40 PM, Alexei Starovoitov <[email protected]>
>>> wrote:
>>>>
>>>> eBPF programs attached to kprobes need to filter based on
>>>> current->pid, uid and other fields, so introduce helper functions:
>>>>
>>>> u64 bpf_get_current_pid_tgid(void)
>>>> Return: current->tgid << 32 | current->pid
>>>>
>>>> u64 bpf_get_current_uid_gid(void)
>>>> Return: current_gid << 32 | current_uid
>>>
>>>
>>> How does this work wrt namespaces,
>>
>>
>> from_kuid(current_user_ns(), uid)
>>
>
> Is current_user_ns() well defined in the context of an eBPF program?
What do you mean 'well defined'?
Semantically same as 'current'. Depending on where particular
kprobe is placed, 'current' is either meaningful or not. Program
author needs to know what he's doing. It's a tool.
On Fri, Jun 12, 2015 at 4:23 PM, Alexei Starovoitov <[email protected]> wrote:
> On 6/12/15 3:54 PM, Andy Lutomirski wrote:
>>
>> On Fri, Jun 12, 2015 at 3:44 PM, Alexei Starovoitov <[email protected]>
>> wrote:
>>>
>>> On 6/12/15 3:08 PM, Andy Lutomirski wrote:
>>>>
>>>>
>>>> On Fri, Jun 12, 2015 at 2:40 PM, Alexei Starovoitov <[email protected]>
>>>> wrote:
>>>>>
>>>>>
>>>>> eBPF programs attached to kprobes need to filter based on
>>>>> current->pid, uid and other fields, so introduce helper functions:
>>>>>
>>>>> u64 bpf_get_current_pid_tgid(void)
>>>>> Return: current->tgid << 32 | current->pid
>>>>>
>>>>> u64 bpf_get_current_uid_gid(void)
>>>>> Return: current_gid << 32 | current_uid
>>>>
>>>>
>>>>
>>>> How does this work wrt namespaces,
>>>
>>>
>>>
>>> from_kuid(current_user_ns(), uid)
>>>
>>
>> Is current_user_ns() well defined in the context of an eBPF program?
>
>
> What do you mean 'well defined'?
> Semantically same as 'current'. Depending on where particular
> kprobe is placed, 'current' is either meaningful or not. Program
> author needs to know what he's doing. It's a tool.
>
It's a dangerous tool. Also, shouldn't the returned uid match the
namespace of the task that installed the probe, not the task that's
being probed?
--Andy
On 6/12/15 4:25 PM, Andy Lutomirski wrote:
> It's a dangerous tool. Also, shouldn't the returned uid match the
> namespace of the task that installed the probe, not the task that's
> being probed?
so leaking info to unprivileged apps is the concern?
The whole thing is for root only as you know.
The non-root is still far away. Today root needs to see the whole
kernel. That was the goal from the beginning.
On Fri, Jun 12, 2015 at 4:38 PM, Alexei Starovoitov <[email protected]> wrote:
> On 6/12/15 4:25 PM, Andy Lutomirski wrote:
>>
>> It's a dangerous tool. Also, shouldn't the returned uid match the
>> namespace of the task that installed the probe, not the task that's
>> being probed?
>
>
> so leaking info to unprivileged apps is the concern?
> The whole thing is for root only as you know.
> The non-root is still far away. Today root needs to see the whole
> kernel. That was the goal from the beginning.
>
This is more of a correctness issue than a security issue. ISTM using
current_user_ns() in a kprobe is asking for trouble. It certainly
allows any unprivilege user to show any uid it wants to the probe,
which is probably not what the installer of the probe expects.
--Andy
On 6/12/15 4:47 PM, Andy Lutomirski wrote:
> On Fri, Jun 12, 2015 at 4:38 PM, Alexei Starovoitov <[email protected]> wrote:
>> On 6/12/15 4:25 PM, Andy Lutomirski wrote:
>>>
>>> It's a dangerous tool. Also, shouldn't the returned uid match the
>>> namespace of the task that installed the probe, not the task that's
>>> being probed?
>>
>>
>> so leaking info to unprivileged apps is the concern?
>> The whole thing is for root only as you know.
>> The non-root is still far away. Today root needs to see the whole
>> kernel. That was the goal from the beginning.
>>
>
> This is more of a correctness issue than a security issue. ISTM using
> current_user_ns() in a kprobe is asking for trouble. It certainly
> allows any unprivilege user to show any uid it wants to the probe,
> which is probably not what the installer of the probe expects.
probe doesn't expect anything. it doesn't make any decisions.
bpf is read only. it's _visibility_ into the kernel.
It's not used for security.
When we start connecting eBPF to seccomp I would agree that uid
handling needs to be done carefully, but we're not there yet.
I don't want to kill _visibility_ because in some distant future
bpf becomes a decision making tool in security area and
get_current_uid() will return numbers that shouldn't be blindly
used to reject/accept a user requesting something. That's far away.
On Fri, Jun 12, 2015 at 4:55 PM, Alexei Starovoitov <[email protected]> wrote:
> On 6/12/15 4:47 PM, Andy Lutomirski wrote:
>>
>> On Fri, Jun 12, 2015 at 4:38 PM, Alexei Starovoitov <[email protected]>
>> wrote:
>>>
>>> On 6/12/15 4:25 PM, Andy Lutomirski wrote:
>>>>
>>>>
>>>> It's a dangerous tool. Also, shouldn't the returned uid match the
>>>> namespace of the task that installed the probe, not the task that's
>>>> being probed?
>>>
>>>
>>>
>>> so leaking info to unprivileged apps is the concern?
>>> The whole thing is for root only as you know.
>>> The non-root is still far away. Today root needs to see the whole
>>> kernel. That was the goal from the beginning.
>>>
>>
>> This is more of a correctness issue than a security issue. ISTM using
>> current_user_ns() in a kprobe is asking for trouble. It certainly
>> allows any unprivilege user to show any uid it wants to the probe,
>> which is probably not what the installer of the probe expects.
>
>
> probe doesn't expect anything. it doesn't make any decisions.
> bpf is read only. it's _visibility_ into the kernel.
> It's not used for security.
> When we start connecting eBPF to seccomp I would agree that uid
> handling needs to be done carefully, but we're not there yet.
> I don't want to kill _visibility_ because in some distant future
> bpf becomes a decision making tool in security area and
> get_current_uid() will return numbers that shouldn't be blindly
> used to reject/accept a user requesting something. That's far away.
>
All that is true, but the code that *installed* the bpf probe might
get might confused when it logs that uid 0 did such-and-such when
really some unprivileged userns root did it.
Also, as you start calling more and more non-trivial functions from
bpf, you might need to start preventing bpf probe installations in
those functions.
--Andy
On 6/12/15 5:03 PM, Andy Lutomirski wrote:
> On Fri, Jun 12, 2015 at 4:55 PM, Alexei Starovoitov <[email protected]> wrote:
>> On 6/12/15 4:47 PM, Andy Lutomirski wrote:
>>>
>>> On Fri, Jun 12, 2015 at 4:38 PM, Alexei Starovoitov <[email protected]>
>>> wrote:
>>>>
>>>> On 6/12/15 4:25 PM, Andy Lutomirski wrote:
>>>>>
>>>>>
>>>>> It's a dangerous tool. Also, shouldn't the returned uid match the
>>>>> namespace of the task that installed the probe, not the task that's
>>>>> being probed?
>>>>
>>>>
>>>>
>>>> so leaking info to unprivileged apps is the concern?
>>>> The whole thing is for root only as you know.
>>>> The non-root is still far away. Today root needs to see the whole
>>>> kernel. That was the goal from the beginning.
>>>>
>>>
>>> This is more of a correctness issue than a security issue. ISTM using
>>> current_user_ns() in a kprobe is asking for trouble. It certainly
>>> allows any unprivilege user to show any uid it wants to the probe,
>>> which is probably not what the installer of the probe expects.
>>
>>
>> probe doesn't expect anything. it doesn't make any decisions.
>> bpf is read only. it's _visibility_ into the kernel.
>> It's not used for security.
>> When we start connecting eBPF to seccomp I would agree that uid
>> handling needs to be done carefully, but we're not there yet.
>> I don't want to kill _visibility_ because in some distant future
>> bpf becomes a decision making tool in security area and
>> get_current_uid() will return numbers that shouldn't be blindly
>> used to reject/accept a user requesting something. That's far away.
>>
>
> All that is true, but the code that *installed* the bpf probe might
> get might confused when it logs that uid 0 did such-and-such when
> really some unprivileged userns root did it.
so what specifically you proposing?
Use from_kuid(&init_user_ns,...) instead?
> Also, as you start calling more and more non-trivial functions from
> bpf, you might need to start preventing bpf probe installations in
> those functions.
yes. may be. I don't want to blacklist stuff yet, unless it
causes crashes. Recursive check is already there. Probably
something else will be needed.
On Fri, Jun 12, 2015 at 5:15 PM, Alexei Starovoitov <[email protected]> wrote:
> On 6/12/15 5:03 PM, Andy Lutomirski wrote:
>>
>> On Fri, Jun 12, 2015 at 4:55 PM, Alexei Starovoitov <[email protected]>
>> wrote:
>>>
>>> On 6/12/15 4:47 PM, Andy Lutomirski wrote:
>>>>
>>>>
>>>> On Fri, Jun 12, 2015 at 4:38 PM, Alexei Starovoitov <[email protected]>
>>>> wrote:
>>>>>
>>>>>
>>>>> On 6/12/15 4:25 PM, Andy Lutomirski wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> It's a dangerous tool. Also, shouldn't the returned uid match the
>>>>>> namespace of the task that installed the probe, not the task that's
>>>>>> being probed?
>>>>>
>>>>>
>>>>>
>>>>>
>>>>> so leaking info to unprivileged apps is the concern?
>>>>> The whole thing is for root only as you know.
>>>>> The non-root is still far away. Today root needs to see the whole
>>>>> kernel. That was the goal from the beginning.
>>>>>
>>>>
>>>> This is more of a correctness issue than a security issue. ISTM using
>>>> current_user_ns() in a kprobe is asking for trouble. It certainly
>>>> allows any unprivilege user to show any uid it wants to the probe,
>>>> which is probably not what the installer of the probe expects.
>>>
>>>
>>>
>>> probe doesn't expect anything. it doesn't make any decisions.
>>> bpf is read only. it's _visibility_ into the kernel.
>>> It's not used for security.
>>> When we start connecting eBPF to seccomp I would agree that uid
>>> handling needs to be done carefully, but we're not there yet.
>>> I don't want to kill _visibility_ because in some distant future
>>> bpf becomes a decision making tool in security area and
>>> get_current_uid() will return numbers that shouldn't be blindly
>>> used to reject/accept a user requesting something. That's far away.
>>>
>>
>> All that is true, but the code that *installed* the bpf probe might
>> get might confused when it logs that uid 0 did such-and-such when
>> really some unprivileged userns root did it.
>
>
> so what specifically you proposing?
> Use from_kuid(&init_user_ns,...) instead?
That seems reasonable to me. After all, you can't install one of
these probes from a non-init userns.
--Andy
On 6/12/15 5:24 PM, Andy Lutomirski wrote:
>> >so what specifically you proposing?
>> >Use from_kuid(&init_user_ns,...) instead?
> That seems reasonable to me. After all, you can't install one of
> these probes from a non-init userns.
ok. will respin with that change.