2021-01-11 06:30:43

by Yonghong Song

[permalink] [raw]
Subject: Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs



On 1/8/21 3:19 PM, Song Liu wrote:
> To access per-task data, BPF program typically creates a hash table with
> pid as the key. This is not ideal because:
> 1. The use need to estimate requires size of the hash table, with may be
> inaccurate;
> 2. Big hash tables are slow;
> 3. To clean up the data properly during task terminations, the user need
> to write code.
>
> Task local storage overcomes these issues and becomes a better option for
> these per-task data. Task local storage is only available to BPF_LSM. Now
> enable it for tracing programs.
>
> Reported-by: kernel test robot <[email protected]>
> Signed-off-by: Song Liu <[email protected]>
> ---
> include/linux/bpf.h | 7 +++++++
> include/linux/bpf_lsm.h | 22 ----------------------
> include/linux/bpf_types.h | 2 +-
> include/linux/sched.h | 5 +++++
> kernel/bpf/Makefile | 3 +--
> kernel/bpf/bpf_local_storage.c | 28 +++++++++++++++++-----------
> kernel/bpf/bpf_lsm.c | 4 ----
> kernel/bpf/bpf_task_storage.c | 26 ++++++--------------------
> kernel/fork.c | 5 +++++
> kernel/trace/bpf_trace.c | 4 ++++
> 10 files changed, 46 insertions(+), 60 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 07cb5d15e7439..cf16548f28f7b 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1480,6 +1480,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
> struct bpf_link *bpf_link_by_id(u32 id);
>
> const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
> +void bpf_task_storage_free(struct task_struct *task);
> #else /* !CONFIG_BPF_SYSCALL */
> static inline struct bpf_prog *bpf_prog_get(u32 ufd)
> {
> @@ -1665,6 +1666,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
> {
> return NULL;
> }
> +
> +static inline void bpf_task_storage_free(struct task_struct *task)
> +{
> +}
> #endif /* CONFIG_BPF_SYSCALL */
>
> static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
> @@ -1860,6 +1865,8 @@ extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
> extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
> extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
> extern const struct bpf_func_proto bpf_sock_from_file_proto;
> +extern const struct bpf_func_proto bpf_task_storage_get_proto;
> +extern const struct bpf_func_proto bpf_task_storage_delete_proto;
>
> const struct bpf_func_proto *bpf_tracing_func_proto(
> enum bpf_func_id func_id, const struct bpf_prog *prog);
> diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
> index 0d1c33ace3987..479c101546ad1 100644
> --- a/include/linux/bpf_lsm.h
> +++ b/include/linux/bpf_lsm.h
> @@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode(
> return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
> }
>
> -static inline struct bpf_storage_blob *bpf_task(
> - const struct task_struct *task)
> -{
> - if (unlikely(!task->security))
> - return NULL;
> -
> - return task->security + bpf_lsm_blob_sizes.lbs_task;
> -}
> -
> extern const struct bpf_func_proto bpf_inode_storage_get_proto;
> extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
> -extern const struct bpf_func_proto bpf_task_storage_get_proto;
> -extern const struct bpf_func_proto bpf_task_storage_delete_proto;
> void bpf_inode_storage_free(struct inode *inode);
> -void bpf_task_storage_free(struct task_struct *task);
>
> #else /* !CONFIG_BPF_LSM */
>
> @@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode(
> return NULL;
> }
>
> -static inline struct bpf_storage_blob *bpf_task(
> - const struct task_struct *task)
> -{
> - return NULL;
> -}
> -
> static inline void bpf_inode_storage_free(struct inode *inode)
> {
> }
>
> -static inline void bpf_task_storage_free(struct task_struct *task)
> -{
> -}
> -
> #endif /* CONFIG_BPF_LSM */
>
> #endif /* _LINUX_BPF_LSM_H */
> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> index 99f7fd657d87a..b9edee336d804 100644
> --- a/include/linux/bpf_types.h
> +++ b/include/linux/bpf_types.h
> @@ -109,8 +109,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
> #endif
> #ifdef CONFIG_BPF_LSM
> BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
> -BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
> #endif
> +BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
> BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
> #if defined(CONFIG_XDP_SOCKETS)
> BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 51d535b69bd6f..4a173defa2010 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -42,6 +42,7 @@ struct audit_context;
> struct backing_dev_info;
> struct bio_list;
> struct blk_plug;
> +struct bpf_local_storage;
> struct capture_control;
> struct cfs_rq;
> struct fs_struct;
> @@ -1348,6 +1349,10 @@ struct task_struct {
> /* Used by LSM modules for access restriction: */
> void *security;
> #endif
> +#ifdef CONFIG_BPF_SYSCALL
> + /* Used by BPF task local storage */
> + struct bpf_local_storage *bpf_storage;
> +#endif

I remembered there is a discussion where KP initially wanted to put
bpf_local_storage in task_struct, but later on changed to
use in lsm as his use case mostly for lsm. Did anybody
remember the details of the discussion? Just want to be
sure what is the concern people has with putting bpf_local_storage
in task_struct and whether the use case presented by
Song will justify it.

>
> #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
> unsigned long lowest_stack;
> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> index d1249340fd6ba..ca995fdfa45e7 100644
> --- a/kernel/bpf/Makefile
> +++ b/kernel/bpf/Makefile
> @@ -8,9 +8,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
>
> obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
> obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
> -obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
> +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_task_storage.o
> obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
> -obj-${CONFIG_BPF_LSM} += bpf_task_storage.o
> obj-$(CONFIG_BPF_SYSCALL) += disasm.o
> obj-$(CONFIG_BPF_JIT) += trampoline.o
> obj-$(CONFIG_BPF_SYSCALL) += btf.o
[...]


2021-01-11 10:20:42

by KP Singh

[permalink] [raw]
Subject: Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs

On Mon, Jan 11, 2021 at 7:27 AM Yonghong Song <[email protected]> wrote:
>
>
>
> On 1/8/21 3:19 PM, Song Liu wrote:
> > To access per-task data, BPF program typically creates a hash table with
> > pid as the key. This is not ideal because:
> > 1. The use need to estimate requires size of the hash table, with may be
> > inaccurate;
> > 2. Big hash tables are slow;
> > 3. To clean up the data properly during task terminations, the user need
> > to write code.
> >
> > Task local storage overcomes these issues and becomes a better option for
> > these per-task data. Task local storage is only available to BPF_LSM. Now
> > enable it for tracing programs.
> >
> > Reported-by: kernel test robot <[email protected]>
> > Signed-off-by: Song Liu <[email protected]>
> > ---

[...]

> > struct cfs_rq;
> > struct fs_struct;
> > @@ -1348,6 +1349,10 @@ struct task_struct {
> > /* Used by LSM modules for access restriction: */
> > void *security;
> > #endif
> > +#ifdef CONFIG_BPF_SYSCALL
> > + /* Used by BPF task local storage */
> > + struct bpf_local_storage *bpf_storage;
> > +#endif
>
> I remembered there is a discussion where KP initially wanted to put
> bpf_local_storage in task_struct, but later on changed to
> use in lsm as his use case mostly for lsm. Did anybody
> remember the details of the discussion? Just want to be
> sure what is the concern people has with putting bpf_local_storage
> in task_struct and whether the use case presented by
> Song will justify it.
>

If I recall correctly, the discussion was about inode local storage and
it was decided to use the security blob since the use-case was only LSM
programs. Since we now plan to use it in tracing,
detangling the dependency from CONFIG_BPF_LSM
sounds logical to me.


> >
> > #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
> > unsigned long lowest_stack;
> > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> > index d1249340fd6ba..ca995fdfa45e7 100644
> > --- a/kernel/bpf/Makefile
> > +++ b/kernel/bpf/Makefile
> > @@ -8,9 +8,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
> >
> > obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
> > obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
> > -obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
> > +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_task_storage.o
> > obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
> > -obj-${CONFIG_BPF_LSM} += bpf_task_storage.o
> > obj-$(CONFIG_BPF_SYSCALL) += disasm.o
> > obj-$(CONFIG_BPF_JIT) += trampoline.o
> > obj-$(CONFIG_BPF_SYSCALL) += btf.o
> [...]

2021-01-11 15:59:52

by Yonghong Song

[permalink] [raw]
Subject: Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs



On 1/11/21 2:17 AM, KP Singh wrote:
> On Mon, Jan 11, 2021 at 7:27 AM Yonghong Song <[email protected]> wrote:
>>
>>
>>
>> On 1/8/21 3:19 PM, Song Liu wrote:
>>> To access per-task data, BPF program typically creates a hash table with
>>> pid as the key. This is not ideal because:
>>> 1. The use need to estimate requires size of the hash table, with may be
>>> inaccurate;
>>> 2. Big hash tables are slow;
>>> 3. To clean up the data properly during task terminations, the user need
>>> to write code.
>>>
>>> Task local storage overcomes these issues and becomes a better option for
>>> these per-task data. Task local storage is only available to BPF_LSM. Now
>>> enable it for tracing programs.
>>>
>>> Reported-by: kernel test robot <[email protected]>
>>> Signed-off-by: Song Liu <[email protected]>
>>> ---
>
> [...]
>
>>> struct cfs_rq;
>>> struct fs_struct;
>>> @@ -1348,6 +1349,10 @@ struct task_struct {
>>> /* Used by LSM modules for access restriction: */
>>> void *security;
>>> #endif
>>> +#ifdef CONFIG_BPF_SYSCALL
>>> + /* Used by BPF task local storage */
>>> + struct bpf_local_storage *bpf_storage;
>>> +#endif
>>
>> I remembered there is a discussion where KP initially wanted to put
>> bpf_local_storage in task_struct, but later on changed to
>> use in lsm as his use case mostly for lsm. Did anybody
>> remember the details of the discussion? Just want to be
>> sure what is the concern people has with putting bpf_local_storage
>> in task_struct and whether the use case presented by
>> Song will justify it.
>>
>
> If I recall correctly, the discussion was about inode local storage and
> it was decided to use the security blob since the use-case was only LSM
> programs. Since we now plan to use it in tracing,
> detangling the dependency from CONFIG_BPF_LSM
> sounds logical to me.

Sounds good. Thanks for explanation.

>
>
>>>
>>> #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
>>> unsigned long lowest_stack;
>>> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
>>> index d1249340fd6ba..ca995fdfa45e7 100644
>>> --- a/kernel/bpf/Makefile
>>> +++ b/kernel/bpf/Makefile
>>> @@ -8,9 +8,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
>>>
>>> obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
>>> obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
>>> -obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
>>> +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_task_storage.o
>>> obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
>>> -obj-${CONFIG_BPF_LSM} += bpf_task_storage.o
>>> obj-$(CONFIG_BPF_SYSCALL) += disasm.o
>>> obj-$(CONFIG_BPF_JIT) += trampoline.o
>>> obj-$(CONFIG_BPF_SYSCALL) += btf.o
>> [...]