2022-04-07 15:33:16

by Jiri Olsa

[permalink] [raw]
Subject: [RFC bpf-next 1/4] kallsyms: Add kallsyms_lookup_names function

Adding kallsyms_lookup_names function that resolves array of symbols
with single pass over kallsyms.

The user provides array of string pointers with count and pointer to
allocated array for resolved values.

int kallsyms_lookup_names(const char **syms, u32 cnt,
unsigned long *addrs)

Before we iterate kallsyms we sort user provided symbols by name and
then use that in kalsyms iteration to find each kallsyms symbol in
user provided symbols.

We also check each symbol to pass ftrace_location, because this API
will be used for fprobe symbols resolving. This can be optional in
future if there's a need.

Suggested-by: Andrii Nakryiko <[email protected]>
Signed-off-by: Jiri Olsa <[email protected]>
---
include/linux/kallsyms.h | 6 +++++
kernel/kallsyms.c | 48 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 54 insertions(+)

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index ce1bd2fbf23e..5320a5e77f61 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -72,6 +72,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
#ifdef CONFIG_KALLSYMS
/* Lookup the address for a symbol. Returns 0 if not found. */
unsigned long kallsyms_lookup_name(const char *name);
+int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs);

extern int kallsyms_lookup_size_offset(unsigned long addr,
unsigned long *symbolsize,
@@ -103,6 +104,11 @@ static inline unsigned long kallsyms_lookup_name(const char *name)
return 0;
}

+int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
+{
+ return -ERANGE;
+}
+
static inline int kallsyms_lookup_size_offset(unsigned long addr,
unsigned long *symbolsize,
unsigned long *offset)
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 79f2eb617a62..a3738ddf9e87 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -29,6 +29,8 @@
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/bsearch.h>
+#include <linux/sort.h>

/*
* These will be re-linked against their real values
@@ -572,6 +574,52 @@ int sprint_backtrace_build_id(char *buffer, unsigned long address)
return __sprint_symbol(buffer, address, -1, 1, 1);
}

+static int symbols_cmp(const void *a, const void *b)
+{
+ const char **str_a = (const char **) a;
+ const char **str_b = (const char **) b;
+
+ return strcmp(*str_a, *str_b);
+}
+
+struct kallsyms_data {
+ unsigned long *addrs;
+ const char **syms;
+ u32 cnt;
+ u32 found;
+};
+
+static int kallsyms_callback(void *data, const char *name,
+ struct module *mod, unsigned long addr)
+{
+ struct kallsyms_data *args = data;
+
+ if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp))
+ return 0;
+
+ addr = ftrace_location(addr);
+ if (!addr)
+ return 0;
+
+ args->addrs[args->found++] = addr;
+ return args->found == args->cnt ? 1 : 0;
+}
+
+int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
+{
+ struct kallsyms_data args;
+
+ sort(syms, cnt, sizeof(*syms), symbols_cmp, NULL);
+
+ args.addrs = addrs;
+ args.syms = syms;
+ args.cnt = cnt;
+ args.found = 0;
+ kallsyms_on_each_symbol(kallsyms_callback, &args);
+
+ return args.found == args.cnt ? 0 : -EINVAL;
+}
+
/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
struct kallsym_iter {
loff_t pos;
--
2.35.1


2022-04-08 01:26:22

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [RFC bpf-next 1/4] kallsyms: Add kallsyms_lookup_names function

On Thu, 7 Apr 2022 14:52:21 +0200
Jiri Olsa <[email protected]> wrote:

> Adding kallsyms_lookup_names function that resolves array of symbols
> with single pass over kallsyms.
>
> The user provides array of string pointers with count and pointer to
> allocated array for resolved values.
>
> int kallsyms_lookup_names(const char **syms, u32 cnt,
> unsigned long *addrs)
>
> Before we iterate kallsyms we sort user provided symbols by name and
> then use that in kalsyms iteration to find each kallsyms symbol in
> user provided symbols.
>
> We also check each symbol to pass ftrace_location, because this API
> will be used for fprobe symbols resolving. This can be optional in
> future if there's a need.

I like this idea very much :-)

>
> Suggested-by: Andrii Nakryiko <[email protected]>
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> include/linux/kallsyms.h | 6 +++++
> kernel/kallsyms.c | 48 ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 54 insertions(+)
>
> diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
> index ce1bd2fbf23e..5320a5e77f61 100644
> --- a/include/linux/kallsyms.h
> +++ b/include/linux/kallsyms.h
> @@ -72,6 +72,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
> #ifdef CONFIG_KALLSYMS
> /* Lookup the address for a symbol. Returns 0 if not found. */
> unsigned long kallsyms_lookup_name(const char *name);
> +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs);
>
> extern int kallsyms_lookup_size_offset(unsigned long addr,
> unsigned long *symbolsize,
> @@ -103,6 +104,11 @@ static inline unsigned long kallsyms_lookup_name(const char *name)
> return 0;
> }
>
> +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> +{
> + return -ERANGE;
> +}
> +
> static inline int kallsyms_lookup_size_offset(unsigned long addr,
> unsigned long *symbolsize,
> unsigned long *offset)
> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> index 79f2eb617a62..a3738ddf9e87 100644
> --- a/kernel/kallsyms.c
> +++ b/kernel/kallsyms.c
> @@ -29,6 +29,8 @@
> #include <linux/compiler.h>
> #include <linux/module.h>
> #include <linux/kernel.h>
> +#include <linux/bsearch.h>
> +#include <linux/sort.h>
>
> /*
> * These will be re-linked against their real values
> @@ -572,6 +574,52 @@ int sprint_backtrace_build_id(char *buffer, unsigned long address)
> return __sprint_symbol(buffer, address, -1, 1, 1);
> }
>
> +static int symbols_cmp(const void *a, const void *b)
> +{
> + const char **str_a = (const char **) a;
> + const char **str_b = (const char **) b;
> +
> + return strcmp(*str_a, *str_b);
> +}
> +
> +struct kallsyms_data {
> + unsigned long *addrs;
> + const char **syms;
> + u32 cnt;
> + u32 found;

BTW, why do you use 'u32' for this arch independent code?
I think 'size_t' will make its role clearer.

> +};
> +
> +static int kallsyms_callback(void *data, const char *name,
> + struct module *mod, unsigned long addr)
> +{
> + struct kallsyms_data *args = data;
> +
> + if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp))
> + return 0;
> +
> + addr = ftrace_location(addr);
> + if (!addr)
> + return 0;
> +
> + args->addrs[args->found++] = addr;
> + return args->found == args->cnt ? 1 : 0;
> +}
> +
> +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)

Ditto. I think 'size_t cnt' is better.

Thank you,

> +{
> + struct kallsyms_data args;
> +
> + sort(syms, cnt, sizeof(*syms), symbols_cmp, NULL);
> +
> + args.addrs = addrs;
> + args.syms = syms;
> + args.cnt = cnt;
> + args.found = 0;
> + kallsyms_on_each_symbol(kallsyms_callback, &args);
> +
> + return args.found == args.cnt ? 0 : -EINVAL;
> +}
> +
> /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
> struct kallsym_iter {
> loff_t pos;
> --
> 2.35.1
>


--
Masami Hiramatsu <[email protected]>

2022-04-10 01:24:49

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [RFC bpf-next 1/4] kallsyms: Add kallsyms_lookup_names function

On Thu, Apr 07, 2022 at 02:52:21PM +0200, Jiri Olsa wrote:
> Adding kallsyms_lookup_names function that resolves array of symbols
> with single pass over kallsyms.
>
> The user provides array of string pointers with count and pointer to
> allocated array for resolved values.
>
> int kallsyms_lookup_names(const char **syms, u32 cnt,
> unsigned long *addrs)
>
> Before we iterate kallsyms we sort user provided symbols by name and
> then use that in kalsyms iteration to find each kallsyms symbol in
> user provided symbols.
>
> We also check each symbol to pass ftrace_location, because this API
> will be used for fprobe symbols resolving. This can be optional in
> future if there's a need.
>
> Suggested-by: Andrii Nakryiko <[email protected]>
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> include/linux/kallsyms.h | 6 +++++
> kernel/kallsyms.c | 48 ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 54 insertions(+)
>
> diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
> index ce1bd2fbf23e..5320a5e77f61 100644
> --- a/include/linux/kallsyms.h
> +++ b/include/linux/kallsyms.h
> @@ -72,6 +72,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
> #ifdef CONFIG_KALLSYMS
> /* Lookup the address for a symbol. Returns 0 if not found. */
> unsigned long kallsyms_lookup_name(const char *name);
> +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs);
>
> extern int kallsyms_lookup_size_offset(unsigned long addr,
> unsigned long *symbolsize,
> @@ -103,6 +104,11 @@ static inline unsigned long kallsyms_lookup_name(const char *name)
> return 0;
> }
>
> +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> +{
> + return -ERANGE;
> +}
> +
> static inline int kallsyms_lookup_size_offset(unsigned long addr,
> unsigned long *symbolsize,
> unsigned long *offset)
> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> index 79f2eb617a62..a3738ddf9e87 100644
> --- a/kernel/kallsyms.c
> +++ b/kernel/kallsyms.c
> @@ -29,6 +29,8 @@
> #include <linux/compiler.h>
> #include <linux/module.h>
> #include <linux/kernel.h>
> +#include <linux/bsearch.h>
> +#include <linux/sort.h>
>
> /*
> * These will be re-linked against their real values
> @@ -572,6 +574,52 @@ int sprint_backtrace_build_id(char *buffer, unsigned long address)
> return __sprint_symbol(buffer, address, -1, 1, 1);
> }
>
> +static int symbols_cmp(const void *a, const void *b)
> +{
> + const char **str_a = (const char **) a;
> + const char **str_b = (const char **) b;
> +
> + return strcmp(*str_a, *str_b);
> +}
> +
> +struct kallsyms_data {
> + unsigned long *addrs;
> + const char **syms;
> + u32 cnt;
> + u32 found;
> +};
> +
> +static int kallsyms_callback(void *data, const char *name,
> + struct module *mod, unsigned long addr)
> +{
> + struct kallsyms_data *args = data;
> +
> + if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp))
> + return 0;
> +
> + addr = ftrace_location(addr);
> + if (!addr)
> + return 0;
> +
> + args->addrs[args->found++] = addr;
> + return args->found == args->cnt ? 1 : 0;
> +}
> +
> +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> +{
> + struct kallsyms_data args;
> +
> + sort(syms, cnt, sizeof(*syms), symbols_cmp, NULL);

It's nice to share symbols_cmp for sort and bsearch,
but messing technically input argument 'syms' like this will cause
issues sooner or later.
Lets make caller do the sort.
Unordered input will cause issue with bsearch, of course,
but it's a lesser evil. imo.

> +
> + args.addrs = addrs;
> + args.syms = syms;
> + args.cnt = cnt;
> + args.found = 0;
> + kallsyms_on_each_symbol(kallsyms_callback, &args);
> +
> + return args.found == args.cnt ? 0 : -EINVAL;
> +}
> +
> /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
> struct kallsym_iter {
> loff_t pos;
> --
> 2.35.1
>

2022-04-12 21:49:19

by Jiri Olsa

[permalink] [raw]
Subject: Re: [RFC bpf-next 1/4] kallsyms: Add kallsyms_lookup_names function

On Fri, Apr 08, 2022 at 04:19:25PM -0700, Alexei Starovoitov wrote:
> On Thu, Apr 07, 2022 at 02:52:21PM +0200, Jiri Olsa wrote:
> > Adding kallsyms_lookup_names function that resolves array of symbols
> > with single pass over kallsyms.
> >
> > The user provides array of string pointers with count and pointer to
> > allocated array for resolved values.
> >
> > int kallsyms_lookup_names(const char **syms, u32 cnt,
> > unsigned long *addrs)
> >
> > Before we iterate kallsyms we sort user provided symbols by name and
> > then use that in kalsyms iteration to find each kallsyms symbol in
> > user provided symbols.
> >
> > We also check each symbol to pass ftrace_location, because this API
> > will be used for fprobe symbols resolving. This can be optional in
> > future if there's a need.
> >
> > Suggested-by: Andrii Nakryiko <[email protected]>
> > Signed-off-by: Jiri Olsa <[email protected]>
> > ---
> > include/linux/kallsyms.h | 6 +++++
> > kernel/kallsyms.c | 48 ++++++++++++++++++++++++++++++++++++++++
> > 2 files changed, 54 insertions(+)
> >
> > diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
> > index ce1bd2fbf23e..5320a5e77f61 100644
> > --- a/include/linux/kallsyms.h
> > +++ b/include/linux/kallsyms.h
> > @@ -72,6 +72,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
> > #ifdef CONFIG_KALLSYMS
> > /* Lookup the address for a symbol. Returns 0 if not found. */
> > unsigned long kallsyms_lookup_name(const char *name);
> > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs);
> >
> > extern int kallsyms_lookup_size_offset(unsigned long addr,
> > unsigned long *symbolsize,
> > @@ -103,6 +104,11 @@ static inline unsigned long kallsyms_lookup_name(const char *name)
> > return 0;
> > }
> >
> > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> > +{
> > + return -ERANGE;
> > +}
> > +
> > static inline int kallsyms_lookup_size_offset(unsigned long addr,
> > unsigned long *symbolsize,
> > unsigned long *offset)
> > diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> > index 79f2eb617a62..a3738ddf9e87 100644
> > --- a/kernel/kallsyms.c
> > +++ b/kernel/kallsyms.c
> > @@ -29,6 +29,8 @@
> > #include <linux/compiler.h>
> > #include <linux/module.h>
> > #include <linux/kernel.h>
> > +#include <linux/bsearch.h>
> > +#include <linux/sort.h>
> >
> > /*
> > * These will be re-linked against their real values
> > @@ -572,6 +574,52 @@ int sprint_backtrace_build_id(char *buffer, unsigned long address)
> > return __sprint_symbol(buffer, address, -1, 1, 1);
> > }
> >
> > +static int symbols_cmp(const void *a, const void *b)
> > +{
> > + const char **str_a = (const char **) a;
> > + const char **str_b = (const char **) b;
> > +
> > + return strcmp(*str_a, *str_b);
> > +}
> > +
> > +struct kallsyms_data {
> > + unsigned long *addrs;
> > + const char **syms;
> > + u32 cnt;
> > + u32 found;
> > +};
> > +
> > +static int kallsyms_callback(void *data, const char *name,
> > + struct module *mod, unsigned long addr)
> > +{
> > + struct kallsyms_data *args = data;
> > +
> > + if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp))
> > + return 0;
> > +
> > + addr = ftrace_location(addr);
> > + if (!addr)
> > + return 0;
> > +
> > + args->addrs[args->found++] = addr;
> > + return args->found == args->cnt ? 1 : 0;
> > +}
> > +
> > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> > +{
> > + struct kallsyms_data args;
> > +
> > + sort(syms, cnt, sizeof(*syms), symbols_cmp, NULL);
>
> It's nice to share symbols_cmp for sort and bsearch,
> but messing technically input argument 'syms' like this will cause
> issues sooner or later.
> Lets make caller do the sort.
> Unordered input will cause issue with bsearch, of course,
> but it's a lesser evil. imo.

ok, will move it out and make some proper comment for the
function mentioning the sort requirement for syms

thanks,
jirka

>
> > +
> > + args.addrs = addrs;
> > + args.syms = syms;
> > + args.cnt = cnt;
> > + args.found = 0;
> > + kallsyms_on_each_symbol(kallsyms_callback, &args);
> > +
> > + return args.found == args.cnt ? 0 : -EINVAL;
> > +}
> > +
> > /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
> > struct kallsym_iter {
> > loff_t pos;
> > --
> > 2.35.1
> >

2022-04-12 22:29:05

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [RFC bpf-next 1/4] kallsyms: Add kallsyms_lookup_names function

On Thu, Apr 7, 2022 at 5:52 AM Jiri Olsa <[email protected]> wrote:
>
> Adding kallsyms_lookup_names function that resolves array of symbols
> with single pass over kallsyms.
>
> The user provides array of string pointers with count and pointer to
> allocated array for resolved values.
>
> int kallsyms_lookup_names(const char **syms, u32 cnt,
> unsigned long *addrs)
>
> Before we iterate kallsyms we sort user provided symbols by name and
> then use that in kalsyms iteration to find each kallsyms symbol in
> user provided symbols.
>
> We also check each symbol to pass ftrace_location, because this API
> will be used for fprobe symbols resolving. This can be optional in
> future if there's a need.
>
> Suggested-by: Andrii Nakryiko <[email protected]>
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> include/linux/kallsyms.h | 6 +++++
> kernel/kallsyms.c | 48 ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 54 insertions(+)
>
> diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
> index ce1bd2fbf23e..5320a5e77f61 100644
> --- a/include/linux/kallsyms.h
> +++ b/include/linux/kallsyms.h
> @@ -72,6 +72,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
> #ifdef CONFIG_KALLSYMS
> /* Lookup the address for a symbol. Returns 0 if not found. */
> unsigned long kallsyms_lookup_name(const char *name);
> +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs);
>
> extern int kallsyms_lookup_size_offset(unsigned long addr,
> unsigned long *symbolsize,
> @@ -103,6 +104,11 @@ static inline unsigned long kallsyms_lookup_name(const char *name)
> return 0;
> }
>
> +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> +{
> + return -ERANGE;
> +}
> +
> static inline int kallsyms_lookup_size_offset(unsigned long addr,
> unsigned long *symbolsize,
> unsigned long *offset)
> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> index 79f2eb617a62..a3738ddf9e87 100644
> --- a/kernel/kallsyms.c
> +++ b/kernel/kallsyms.c
> @@ -29,6 +29,8 @@
> #include <linux/compiler.h>
> #include <linux/module.h>
> #include <linux/kernel.h>
> +#include <linux/bsearch.h>
> +#include <linux/sort.h>
>
> /*
> * These will be re-linked against their real values
> @@ -572,6 +574,52 @@ int sprint_backtrace_build_id(char *buffer, unsigned long address)
> return __sprint_symbol(buffer, address, -1, 1, 1);
> }
>
> +static int symbols_cmp(const void *a, const void *b)

isn't this literally strcmp? Or compiler will actually complain about
const void * vs const char *?

> +{
> + const char **str_a = (const char **) a;
> + const char **str_b = (const char **) b;
> +
> + return strcmp(*str_a, *str_b);
> +}
> +
> +struct kallsyms_data {
> + unsigned long *addrs;
> + const char **syms;
> + u32 cnt;
> + u32 found;
> +};
> +
> +static int kallsyms_callback(void *data, const char *name,
> + struct module *mod, unsigned long addr)
> +{
> + struct kallsyms_data *args = data;
> +
> + if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp))
> + return 0;
> +
> + addr = ftrace_location(addr);
> + if (!addr)
> + return 0;
> +
> + args->addrs[args->found++] = addr;
> + return args->found == args->cnt ? 1 : 0;
> +}
> +
> +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> +{
> + struct kallsyms_data args;
> +
> + sort(syms, cnt, sizeof(*syms), symbols_cmp, NULL);
> +
> + args.addrs = addrs;
> + args.syms = syms;
> + args.cnt = cnt;
> + args.found = 0;
> + kallsyms_on_each_symbol(kallsyms_callback, &args);
> +
> + return args.found == args.cnt ? 0 : -EINVAL;

ESRCH or ENOENT makes a bit more sense as an error?


> +}
> +
> /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
> struct kallsym_iter {
> loff_t pos;
> --
> 2.35.1
>

2022-04-13 00:06:08

by Jiri Olsa

[permalink] [raw]
Subject: Re: [RFC bpf-next 1/4] kallsyms: Add kallsyms_lookup_names function

On Mon, Apr 11, 2022 at 03:15:23PM -0700, Andrii Nakryiko wrote:

SNIP

> > static inline int kallsyms_lookup_size_offset(unsigned long addr,
> > unsigned long *symbolsize,
> > unsigned long *offset)
> > diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> > index 79f2eb617a62..a3738ddf9e87 100644
> > --- a/kernel/kallsyms.c
> > +++ b/kernel/kallsyms.c
> > @@ -29,6 +29,8 @@
> > #include <linux/compiler.h>
> > #include <linux/module.h>
> > #include <linux/kernel.h>
> > +#include <linux/bsearch.h>
> > +#include <linux/sort.h>
> >
> > /*
> > * These will be re-linked against their real values
> > @@ -572,6 +574,52 @@ int sprint_backtrace_build_id(char *buffer, unsigned long address)
> > return __sprint_symbol(buffer, address, -1, 1, 1);
> > }
> >
> > +static int symbols_cmp(const void *a, const void *b)
>
> isn't this literally strcmp? Or compiler will actually complain about
> const void * vs const char *?

yes..

kernel/kallsyms.c: In function ‘kallsyms_callback’:
kernel/kallsyms.c:597:73: error: passing argument 5 of ‘bsearch’ from incompatible pointer type [-Werror=incompatible-pointer-types]
597 | if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), strcmp))
| ^~~~~~
| |
| int (*)(const char *, const char *)


>
> > +{
> > + const char **str_a = (const char **) a;
> > + const char **str_b = (const char **) b;
> > +
> > + return strcmp(*str_a, *str_b);
> > +}
> > +
> > +struct kallsyms_data {
> > + unsigned long *addrs;
> > + const char **syms;
> > + u32 cnt;
> > + u32 found;
> > +};
> > +
> > +static int kallsyms_callback(void *data, const char *name,
> > + struct module *mod, unsigned long addr)
> > +{
> > + struct kallsyms_data *args = data;
> > +
> > + if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp))
> > + return 0;
> > +
> > + addr = ftrace_location(addr);
> > + if (!addr)
> > + return 0;
> > +
> > + args->addrs[args->found++] = addr;
> > + return args->found == args->cnt ? 1 : 0;
> > +}
> > +
> > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> > +{
> > + struct kallsyms_data args;
> > +
> > + sort(syms, cnt, sizeof(*syms), symbols_cmp, NULL);
> > +
> > + args.addrs = addrs;
> > + args.syms = syms;
> > + args.cnt = cnt;
> > + args.found = 0;
> > + kallsyms_on_each_symbol(kallsyms_callback, &args);
> > +
> > + return args.found == args.cnt ? 0 : -EINVAL;
>
> ESRCH or ENOENT makes a bit more sense as an error?

ok

jirka

>
>
> > +}
> > +
> > /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
> > struct kallsym_iter {
> > loff_t pos;
> > --
> > 2.35.1
> >

2022-04-13 00:07:35

by Jiri Olsa

[permalink] [raw]
Subject: Re: [RFC bpf-next 1/4] kallsyms: Add kallsyms_lookup_names function

On Fri, Apr 08, 2022 at 04:19:25PM -0700, Alexei Starovoitov wrote:
> On Thu, Apr 07, 2022 at 02:52:21PM +0200, Jiri Olsa wrote:
> > Adding kallsyms_lookup_names function that resolves array of symbols
> > with single pass over kallsyms.
> >
> > The user provides array of string pointers with count and pointer to
> > allocated array for resolved values.
> >
> > int kallsyms_lookup_names(const char **syms, u32 cnt,
> > unsigned long *addrs)
> >
> > Before we iterate kallsyms we sort user provided symbols by name and
> > then use that in kalsyms iteration to find each kallsyms symbol in
> > user provided symbols.
> >
> > We also check each symbol to pass ftrace_location, because this API
> > will be used for fprobe symbols resolving. This can be optional in
> > future if there's a need.
> >
> > Suggested-by: Andrii Nakryiko <[email protected]>
> > Signed-off-by: Jiri Olsa <[email protected]>
> > ---
> > include/linux/kallsyms.h | 6 +++++
> > kernel/kallsyms.c | 48 ++++++++++++++++++++++++++++++++++++++++
> > 2 files changed, 54 insertions(+)
> >
> > diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
> > index ce1bd2fbf23e..5320a5e77f61 100644
> > --- a/include/linux/kallsyms.h
> > +++ b/include/linux/kallsyms.h
> > @@ -72,6 +72,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
> > #ifdef CONFIG_KALLSYMS
> > /* Lookup the address for a symbol. Returns 0 if not found. */
> > unsigned long kallsyms_lookup_name(const char *name);
> > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs);
> >
> > extern int kallsyms_lookup_size_offset(unsigned long addr,
> > unsigned long *symbolsize,
> > @@ -103,6 +104,11 @@ static inline unsigned long kallsyms_lookup_name(const char *name)
> > return 0;
> > }
> >
> > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> > +{
> > + return -ERANGE;
> > +}
> > +
> > static inline int kallsyms_lookup_size_offset(unsigned long addr,
> > unsigned long *symbolsize,
> > unsigned long *offset)
> > diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> > index 79f2eb617a62..a3738ddf9e87 100644
> > --- a/kernel/kallsyms.c
> > +++ b/kernel/kallsyms.c
> > @@ -29,6 +29,8 @@
> > #include <linux/compiler.h>
> > #include <linux/module.h>
> > #include <linux/kernel.h>
> > +#include <linux/bsearch.h>
> > +#include <linux/sort.h>
> >
> > /*
> > * These will be re-linked against their real values
> > @@ -572,6 +574,52 @@ int sprint_backtrace_build_id(char *buffer, unsigned long address)
> > return __sprint_symbol(buffer, address, -1, 1, 1);
> > }
> >
> > +static int symbols_cmp(const void *a, const void *b)
> > +{
> > + const char **str_a = (const char **) a;
> > + const char **str_b = (const char **) b;
> > +
> > + return strcmp(*str_a, *str_b);
> > +}
> > +
> > +struct kallsyms_data {
> > + unsigned long *addrs;
> > + const char **syms;
> > + u32 cnt;
> > + u32 found;
> > +};
> > +
> > +static int kallsyms_callback(void *data, const char *name,
> > + struct module *mod, unsigned long addr)
> > +{
> > + struct kallsyms_data *args = data;
> > +
> > + if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp))
> > + return 0;
> > +
> > + addr = ftrace_location(addr);
> > + if (!addr)
> > + return 0;
> > +
> > + args->addrs[args->found++] = addr;
> > + return args->found == args->cnt ? 1 : 0;
> > +}
> > +
> > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> > +{
> > + struct kallsyms_data args;
> > +
> > + sort(syms, cnt, sizeof(*syms), symbols_cmp, NULL);
>
> It's nice to share symbols_cmp for sort and bsearch,
> but messing technically input argument 'syms' like this will cause
> issues sooner or later.
> Lets make caller do the sort.
> Unordered input will cause issue with bsearch, of course,
> but it's a lesser evil. imo.
>

Masami,
this logic bubbles up to the register_fprobe_syms, because user
provides symbols as its argument. Can we still force this assumption
to the 'syms' array, like with the comment change below?

FYI the bpf side does not use register_fprobe_syms, it uses
register_fprobe_ips, because it always needs ips as search
base for cookie values

thanks,
jirka


---
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index d466803dc2b2..28379c0e23e5 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -250,7 +250,7 @@ EXPORT_SYMBOL_GPL(register_fprobe_ips);
/**
* register_fprobe_syms() - Register fprobe to ftrace by symbols.
* @fp: A fprobe data structure to be registered.
- * @syms: An array of target symbols.
+ * @syms: An array of target symbols, must be alphabetically sorted.
* @num: The number of entries of @syms.
*
* Register @fp to the symbols given by @syms array. This will be useful if

2022-04-13 08:54:16

by Jiri Olsa

[permalink] [raw]
Subject: Re: [RFC bpf-next 1/4] kallsyms: Add kallsyms_lookup_names function

On Fri, Apr 08, 2022 at 09:57:01AM +0900, Masami Hiramatsu wrote:

SNIP

> > /*
> > * These will be re-linked against their real values
> > @@ -572,6 +574,52 @@ int sprint_backtrace_build_id(char *buffer, unsigned long address)
> > return __sprint_symbol(buffer, address, -1, 1, 1);
> > }
> >
> > +static int symbols_cmp(const void *a, const void *b)
> > +{
> > + const char **str_a = (const char **) a;
> > + const char **str_b = (const char **) b;
> > +
> > + return strcmp(*str_a, *str_b);
> > +}
> > +
> > +struct kallsyms_data {
> > + unsigned long *addrs;
> > + const char **syms;
> > + u32 cnt;
> > + u32 found;
>
> BTW, why do you use 'u32' for this arch independent code?
> I think 'size_t' will make its role clearer.

right, will change

>
> > +};
> > +
> > +static int kallsyms_callback(void *data, const char *name,
> > + struct module *mod, unsigned long addr)
> > +{
> > + struct kallsyms_data *args = data;
> > +
> > + if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp))
> > + return 0;
> > +
> > + addr = ftrace_location(addr);
> > + if (!addr)
> > + return 0;
> > +
> > + args->addrs[args->found++] = addr;
> > + return args->found == args->cnt ? 1 : 0;
> > +}
> > +
> > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
>
> Ditto. I think 'size_t cnt' is better.

ok, thanks

jirka

>
> Thank you,
>
> > +{
> > + struct kallsyms_data args;
> > +
> > + sort(syms, cnt, sizeof(*syms), symbols_cmp, NULL);
> > +
> > + args.addrs = addrs;
> > + args.syms = syms;
> > + args.cnt = cnt;
> > + args.found = 0;
> > + kallsyms_on_each_symbol(kallsyms_callback, &args);
> > +
> > + return args.found == args.cnt ? 0 : -EINVAL;
> > +}
> > +
> > /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
> > struct kallsym_iter {
> > loff_t pos;
> > --
> > 2.35.1
> >
>
>
> --
> Masami Hiramatsu <[email protected]>

2022-04-16 00:15:53

by Jiri Olsa

[permalink] [raw]
Subject: Re: [RFC bpf-next 1/4] kallsyms: Add kallsyms_lookup_names function

On Fri, Apr 15, 2022 at 09:47:27AM +0900, Masami Hiramatsu wrote:
> Hi Jiri,
>
> Sorry for replying later.
>
> On Tue, 12 Apr 2022 22:46:15 +0200
> Jiri Olsa <[email protected]> wrote:
>
> > On Fri, Apr 08, 2022 at 04:19:25PM -0700, Alexei Starovoitov wrote:
> > > On Thu, Apr 07, 2022 at 02:52:21PM +0200, Jiri Olsa wrote:
> > > > Adding kallsyms_lookup_names function that resolves array of symbols
> > > > with single pass over kallsyms.
> > > >
> > > > The user provides array of string pointers with count and pointer to
> > > > allocated array for resolved values.
> > > >
> > > > int kallsyms_lookup_names(const char **syms, u32 cnt,
> > > > unsigned long *addrs)
> > > >
> > > > Before we iterate kallsyms we sort user provided symbols by name and
> > > > then use that in kalsyms iteration to find each kallsyms symbol in
> > > > user provided symbols.
> > > >
> > > > We also check each symbol to pass ftrace_location, because this API
> > > > will be used for fprobe symbols resolving. This can be optional in
> > > > future if there's a need.
> > > >
> > > > Suggested-by: Andrii Nakryiko <[email protected]>
> > > > Signed-off-by: Jiri Olsa <[email protected]>
> > > > ---
> > > > include/linux/kallsyms.h | 6 +++++
> > > > kernel/kallsyms.c | 48 ++++++++++++++++++++++++++++++++++++++++
> > > > 2 files changed, 54 insertions(+)
> > > >
> > > > diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
> > > > index ce1bd2fbf23e..5320a5e77f61 100644
> > > > --- a/include/linux/kallsyms.h
> > > > +++ b/include/linux/kallsyms.h
> > > > @@ -72,6 +72,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
> > > > #ifdef CONFIG_KALLSYMS
> > > > /* Lookup the address for a symbol. Returns 0 if not found. */
> > > > unsigned long kallsyms_lookup_name(const char *name);
> > > > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs);
> > > >
> > > > extern int kallsyms_lookup_size_offset(unsigned long addr,
> > > > unsigned long *symbolsize,
> > > > @@ -103,6 +104,11 @@ static inline unsigned long kallsyms_lookup_name(const char *name)
> > > > return 0;
> > > > }
> > > >
> > > > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> > > > +{
> > > > + return -ERANGE;
> > > > +}
> > > > +
> > > > static inline int kallsyms_lookup_size_offset(unsigned long addr,
> > > > unsigned long *symbolsize,
> > > > unsigned long *offset)
> > > > diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> > > > index 79f2eb617a62..a3738ddf9e87 100644
> > > > --- a/kernel/kallsyms.c
> > > > +++ b/kernel/kallsyms.c
> > > > @@ -29,6 +29,8 @@
> > > > #include <linux/compiler.h>
> > > > #include <linux/module.h>
> > > > #include <linux/kernel.h>
> > > > +#include <linux/bsearch.h>
> > > > +#include <linux/sort.h>
> > > >
> > > > /*
> > > > * These will be re-linked against their real values
> > > > @@ -572,6 +574,52 @@ int sprint_backtrace_build_id(char *buffer, unsigned long address)
> > > > return __sprint_symbol(buffer, address, -1, 1, 1);
> > > > }
> > > >
> > > > +static int symbols_cmp(const void *a, const void *b)
> > > > +{
> > > > + const char **str_a = (const char **) a;
> > > > + const char **str_b = (const char **) b;
> > > > +
> > > > + return strcmp(*str_a, *str_b);
> > > > +}
> > > > +
> > > > +struct kallsyms_data {
> > > > + unsigned long *addrs;
> > > > + const char **syms;
> > > > + u32 cnt;
> > > > + u32 found;
> > > > +};
> > > > +
> > > > +static int kallsyms_callback(void *data, const char *name,
> > > > + struct module *mod, unsigned long addr)
> > > > +{
> > > > + struct kallsyms_data *args = data;
> > > > +
> > > > + if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp))
> > > > + return 0;
> > > > +
> > > > + addr = ftrace_location(addr);
> > > > + if (!addr)
> > > > + return 0;
> > > > +
> > > > + args->addrs[args->found++] = addr;
> > > > + return args->found == args->cnt ? 1 : 0;
> > > > +}
> > > > +
> > > > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> > > > +{
> > > > + struct kallsyms_data args;
> > > > +
> > > > + sort(syms, cnt, sizeof(*syms), symbols_cmp, NULL);
> > >
> > > It's nice to share symbols_cmp for sort and bsearch,
> > > but messing technically input argument 'syms' like this will cause
> > > issues sooner or later.
> > > Lets make caller do the sort.
> > > Unordered input will cause issue with bsearch, of course,
> > > but it's a lesser evil. imo.
> > >
> >
> > Masami,
> > this logic bubbles up to the register_fprobe_syms, because user
> > provides symbols as its argument. Can we still force this assumption
> > to the 'syms' array, like with the comment change below?
> >
> > FYI the bpf side does not use register_fprobe_syms, it uses
> > register_fprobe_ips, because it always needs ips as search
> > base for cookie values
>
> Hmm, in that case fprobe can call sort() in the register function.
> That will be much easier and safer. The bpf case, the input array will
> be generated by the bpftool (not by manual), so it can ensure the
> syms is sorted. But we don't know how fprobe user passes syms array.
> Then register_fprobe_syms() will always requires sort(). I don't like
> such redundant requirements.

ok, I'll add it to register_fprobe_syms

thanks,
jirka

2022-04-16 01:35:00

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [RFC bpf-next 1/4] kallsyms: Add kallsyms_lookup_names function

Hi Jiri,

Sorry for replying later.

On Tue, 12 Apr 2022 22:46:15 +0200
Jiri Olsa <[email protected]> wrote:

> On Fri, Apr 08, 2022 at 04:19:25PM -0700, Alexei Starovoitov wrote:
> > On Thu, Apr 07, 2022 at 02:52:21PM +0200, Jiri Olsa wrote:
> > > Adding kallsyms_lookup_names function that resolves array of symbols
> > > with single pass over kallsyms.
> > >
> > > The user provides array of string pointers with count and pointer to
> > > allocated array for resolved values.
> > >
> > > int kallsyms_lookup_names(const char **syms, u32 cnt,
> > > unsigned long *addrs)
> > >
> > > Before we iterate kallsyms we sort user provided symbols by name and
> > > then use that in kalsyms iteration to find each kallsyms symbol in
> > > user provided symbols.
> > >
> > > We also check each symbol to pass ftrace_location, because this API
> > > will be used for fprobe symbols resolving. This can be optional in
> > > future if there's a need.
> > >
> > > Suggested-by: Andrii Nakryiko <[email protected]>
> > > Signed-off-by: Jiri Olsa <[email protected]>
> > > ---
> > > include/linux/kallsyms.h | 6 +++++
> > > kernel/kallsyms.c | 48 ++++++++++++++++++++++++++++++++++++++++
> > > 2 files changed, 54 insertions(+)
> > >
> > > diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
> > > index ce1bd2fbf23e..5320a5e77f61 100644
> > > --- a/include/linux/kallsyms.h
> > > +++ b/include/linux/kallsyms.h
> > > @@ -72,6 +72,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
> > > #ifdef CONFIG_KALLSYMS
> > > /* Lookup the address for a symbol. Returns 0 if not found. */
> > > unsigned long kallsyms_lookup_name(const char *name);
> > > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs);
> > >
> > > extern int kallsyms_lookup_size_offset(unsigned long addr,
> > > unsigned long *symbolsize,
> > > @@ -103,6 +104,11 @@ static inline unsigned long kallsyms_lookup_name(const char *name)
> > > return 0;
> > > }
> > >
> > > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> > > +{
> > > + return -ERANGE;
> > > +}
> > > +
> > > static inline int kallsyms_lookup_size_offset(unsigned long addr,
> > > unsigned long *symbolsize,
> > > unsigned long *offset)
> > > diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
> > > index 79f2eb617a62..a3738ddf9e87 100644
> > > --- a/kernel/kallsyms.c
> > > +++ b/kernel/kallsyms.c
> > > @@ -29,6 +29,8 @@
> > > #include <linux/compiler.h>
> > > #include <linux/module.h>
> > > #include <linux/kernel.h>
> > > +#include <linux/bsearch.h>
> > > +#include <linux/sort.h>
> > >
> > > /*
> > > * These will be re-linked against their real values
> > > @@ -572,6 +574,52 @@ int sprint_backtrace_build_id(char *buffer, unsigned long address)
> > > return __sprint_symbol(buffer, address, -1, 1, 1);
> > > }
> > >
> > > +static int symbols_cmp(const void *a, const void *b)
> > > +{
> > > + const char **str_a = (const char **) a;
> > > + const char **str_b = (const char **) b;
> > > +
> > > + return strcmp(*str_a, *str_b);
> > > +}
> > > +
> > > +struct kallsyms_data {
> > > + unsigned long *addrs;
> > > + const char **syms;
> > > + u32 cnt;
> > > + u32 found;
> > > +};
> > > +
> > > +static int kallsyms_callback(void *data, const char *name,
> > > + struct module *mod, unsigned long addr)
> > > +{
> > > + struct kallsyms_data *args = data;
> > > +
> > > + if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp))
> > > + return 0;
> > > +
> > > + addr = ftrace_location(addr);
> > > + if (!addr)
> > > + return 0;
> > > +
> > > + args->addrs[args->found++] = addr;
> > > + return args->found == args->cnt ? 1 : 0;
> > > +}
> > > +
> > > +int kallsyms_lookup_names(const char **syms, u32 cnt, unsigned long *addrs)
> > > +{
> > > + struct kallsyms_data args;
> > > +
> > > + sort(syms, cnt, sizeof(*syms), symbols_cmp, NULL);
> >
> > It's nice to share symbols_cmp for sort and bsearch,
> > but messing technically input argument 'syms' like this will cause
> > issues sooner or later.
> > Lets make caller do the sort.
> > Unordered input will cause issue with bsearch, of course,
> > but it's a lesser evil. imo.
> >
>
> Masami,
> this logic bubbles up to the register_fprobe_syms, because user
> provides symbols as its argument. Can we still force this assumption
> to the 'syms' array, like with the comment change below?
>
> FYI the bpf side does not use register_fprobe_syms, it uses
> register_fprobe_ips, because it always needs ips as search
> base for cookie values

Hmm, in that case fprobe can call sort() in the register function.
That will be much easier and safer. The bpf case, the input array will
be generated by the bpftool (not by manual), so it can ensure the
syms is sorted. But we don't know how fprobe user passes syms array.
Then register_fprobe_syms() will always requires sort(). I don't like
such redundant requirements.

Thank you,

--
Masami Hiramatsu <[email protected]>