2023-09-18 22:17:16

by Jack Brennen

[permalink] [raw]
Subject: [PATCH] modpost: Optimize symbol search from linear to binary search

Modify modpost to use binary search for converting addresses back
into symbol references. Previously it used linear search.

This change saves a few seconds of wall time for defconfig builds,
but can save several minutes on allyesconfigs.

Before:
$ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
Elapsed (wall clock) time (h:mm:ss or m:ss): 13:30.31

After:
$ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
Elapsed (wall clock) time (h:mm:ss or m:ss): 11:43.43

Signed-off-by: Jack Brennen <[email protected]>
Tested-by: Nick Desaulniers <[email protected]>
---
scripts/mod/Makefile | 4 +-
scripts/mod/modpost.c | 60 +----------
scripts/mod/modpost.h | 25 +++++
scripts/mod/symsearch.c | 233 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 265 insertions(+), 57 deletions(-)
create mode 100644 scripts/mod/symsearch.c

diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index c9e38ad937fd..3c54125eb373 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
hostprogs-always-y += modpost mk_elfconfig
always-y += empty.o

-modpost-objs := modpost.o file2alias.o sumversion.o
+modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o

devicetable-offsets-file := devicetable-offsets.h

@@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s

# dependencies on generated files need to be listed explicitly

-$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
+$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
$(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)

quiet_cmd_elfconfig = MKELF $@
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index de499dce5265..975f235aca2c 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -22,7 +22,6 @@
#include <errno.h>
#include "modpost.h"
#include "../../include/linux/license.h"
-#include "../../include/linux/module_symbol.h"

static bool module_enabled;
/* Are we using CONFIG_MODVERSIONS? */
@@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename)
*p = TO_NATIVE(*p);
}

+ symsearch_init(info);
+
return 1;
}

static void parse_elf_finish(struct elf_info *info)
{
+ symsearch_finish(info);
release_file(info->hdr, info->size);
}

@@ -1039,65 +1041,13 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,
return 1;
}

-/*
- * If there's no name there, ignore it; likewise, ignore it if it's
- * one of the magic symbols emitted used by current tools.
- *
- * Otherwise if find_symbols_between() returns those symbols, they'll
- * fail the whitelist tests and cause lots of false alarms ... fixable
- * only by merging __exit and __init sections into __text, bloating
- * the kernel (which is especially evil on embedded platforms).
- */
-static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
-{
- const char *name = elf->strtab + sym->st_name;
-
- if (!name || !strlen(name))
- return 0;
- return !is_mapping_symbol(name);
-}
-
/* Look up the nearest symbol based on the section and the address */
static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
unsigned int secndx, bool allow_negative,
Elf_Addr min_distance)
{
- Elf_Sym *sym;
- Elf_Sym *near = NULL;
- Elf_Addr sym_addr, distance;
- bool is_arm = (elf->hdr->e_machine == EM_ARM);
-
- for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
- if (get_secindex(elf, sym) != secndx)
- continue;
- if (!is_valid_name(elf, sym))
- continue;
-
- sym_addr = sym->st_value;
-
- /*
- * For ARM Thumb instruction, the bit 0 of st_value is set
- * if the symbol is STT_FUNC type. Mask it to get the address.
- */
- if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
- sym_addr &= ~1;
-
- if (addr >= sym_addr)
- distance = addr - sym_addr;
- else if (allow_negative)
- distance = sym_addr - addr;
- else
- continue;
-
- if (distance <= min_distance) {
- min_distance = distance;
- near = sym;
- }
-
- if (min_distance == 0)
- break;
- }
- return near;
+ return symsearch_find_nearest(elf, addr, secndx,
+ allow_negative, min_distance);
}

static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 5f94c2c9f2d9..6413f26fcb6b 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -10,6 +10,7 @@
#include <fcntl.h>
#include <unistd.h>
#include <elf.h>
+#include "../../include/linux/module_symbol.h"

#include "list.h"
#include "elfconfig.h"
@@ -128,6 +129,8 @@ struct elf_info {
* take shndx from symtab_shndx_start[N] instead */
Elf32_Word *symtab_shndx_start;
Elf32_Word *symtab_shndx_stop;
+
+ struct symsearch *symsearch;
};

/* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
@@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info,
return index;
}

+/*
+ * If there's no name there, ignore it; likewise, ignore it if it's
+ * one of the magic symbols emitted used by current tools.
+ *
+ * Internal symbols created by tools should be ignored by modpost.
+ */
+static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
+{
+ const char *name = elf->strtab + sym->st_name;
+
+ if (!name || !strlen(name))
+ return 0;
+ return !is_mapping_symbol(name);
+}
+
+/* symsearch.c */
+void symsearch_init(struct elf_info *elf);
+void symsearch_finish(struct elf_info *elf);
+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
+ unsigned int secndx, bool allow_negative,
+ Elf_Addr min_distance);
+
/* file2alias.c */
void handle_moddevtable(struct module *mod, struct elf_info *info,
Elf_Sym *sym, const char *symname);
diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
new file mode 100644
index 000000000000..aab79262512b
--- /dev/null
+++ b/scripts/mod/symsearch.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Helper functions for finding the symbol in an ELF which is "nearest"
+ * to a given address.
+ */
+
+#include "modpost.h"
+
+/* Struct used for binary search. */
+struct syminfo {
+ unsigned int symbol_index;
+ unsigned int section_index;
+ Elf_Addr addr;
+};
+
+/* Container used to hold an entire binary search table.
+ * Entries in table are ascending, sorted first by section_index,
+ * then by addr, and last by symbol_index. The sorting by
+ * symbol_index is used to duplicate the quirks of the prior
+ * find_nearest_sym() function, where exact matches to an address
+ * return the first symtab entry seen, but near misses return the
+ * last symtab entry seen.
+ * The first and last entries of the table are sentinels and their
+ * values only matter in two places: when we sort the table, and
+ * on lookups, the end sentinel should not have an addr field which
+ * matches its immediate predecessor. To meet these requirements,
+ * we initialize them to (0,0,0) and (max,max,max), and then after
+ * sorting, we tweak the end sentinel's addr field accordingly.
+ */
+struct symsearch {
+ size_t table_size;
+ struct syminfo table[];
+};
+
+static inline bool is_sym_searchable(struct elf_info *elf, Elf_Sym *sym)
+{
+ return is_valid_name(elf, sym) != 0;
+}
+
+static int syminfo_compare(const void *s1, const void *s2)
+{
+ const struct syminfo *sym1 = s1;
+ const struct syminfo *sym2 = s2;
+
+ if (sym1->section_index > sym2->section_index)
+ return 1;
+ if (sym1->section_index < sym2->section_index)
+ return -1;
+ if (sym1->addr > sym2->addr)
+ return 1;
+ if (sym1->addr < sym2->addr)
+ return -1;
+ if (sym1->symbol_index > sym2->symbol_index)
+ return 1;
+ if (sym1->symbol_index < sym2->symbol_index)
+ return -1;
+ return 0;
+}
+
+static size_t symbol_count(struct elf_info *elf)
+{
+ size_t result = 0;
+
+ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+ if (is_sym_searchable(elf, sym))
+ result++;
+ }
+ return result;
+}
+
+/* Populate the search array that we just allocated.
+ * Be slightly paranoid here. If the ELF file changes during processing,
+ * or if the behavior of is_sym_searchable() changes during processing,
+ * we want to catch it; neither of those is acceptable.
+ */
+static void symsearch_populate(struct elf_info *elf,
+ struct syminfo *table,
+ size_t table_size)
+{
+ bool is_arm = (elf->hdr->e_machine == EM_ARM);
+
+ /* Start sentinel */
+ if (table_size-- == 0)
+ fatal("%s: size mismatch\n", __func__);
+ table->symbol_index = 0;
+ table->section_index = 0;
+ table->addr = 0;
+ table++;
+
+ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+ if (is_sym_searchable(elf, sym)) {
+ if (table_size-- == 0)
+ fatal("%s: size mismatch\n", __func__);
+ table->symbol_index = sym - elf->symtab_start;
+ table->section_index = get_secindex(elf, sym);
+ table->addr = sym->st_value;
+
+ /*
+ * For ARM Thumb instruction, the bit 0 of st_value is
+ * set if the symbol is STT_FUNC type. Mask it to get
+ * the address.
+ */
+ if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
+ table->addr &= ~1;
+
+ table++;
+ }
+ }
+
+ /* End sentinel; all values are unsigned so -1 wraps to max */
+ if (table_size != 1)
+ fatal("%s: size mismatch\n", __func__);
+ table->symbol_index = -1;
+ table->section_index = -1;
+ table->addr = -1;
+}
+
+void symsearch_init(struct elf_info *elf)
+{
+ /* +2 here to allocate space for the start and end sentinels */
+ size_t table_size = symbol_count(elf) + 2;
+
+ elf->symsearch = NOFAIL(malloc(
+ sizeof(struct symsearch) +
+ sizeof(struct syminfo) * table_size));
+ elf->symsearch->table_size = table_size;
+
+ symsearch_populate(elf, elf->symsearch->table, table_size);
+ qsort(elf->symsearch->table, table_size,
+ sizeof(struct syminfo), syminfo_compare);
+
+ /* A bit of paranoia; make sure that the end sentinel's address is
+ * different than its predecessor. Not doing this could cause
+ * possible undefined behavior if anybody ever inserts a symbol
+ * with section_index and addr both at their max values.
+ * Doing this little bit of defensive programming is more efficient
+ * than checking for array overruns later.
+ */
+ elf->symsearch->table[table_size - 1].addr =
+ elf->symsearch->table[table_size - 2].addr + 1;
+}
+
+void symsearch_finish(struct elf_info *elf)
+{
+ free(elf->symsearch);
+ elf->symsearch = NULL;
+}
+
+/* Find the syminfo which is in secndx and "nearest" to addr.
+ * allow_negative: allow returning a symbol whose address is > addr.
+ * min_distance: ignore symbols which are further away than this.
+ *
+ * Returns a nonzero index into the symsearch table for success.
+ * Returns NULL if no legal symbol is found within the requested range.
+ */
+static size_t symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
+ unsigned int secndx, bool allow_negative,
+ Elf_Addr min_distance)
+{
+ /* Find the target in the array; it will lie between two elements.
+ * Invariant here: table[lo] < target <= table[hi]
+ * For the purposes of search, exact hits in the search array are
+ * considered greater than the target. This means that if we do
+ * get an exact hit, then once the search terminates, table[hi]
+ * will be the exact match which has the lowest symbol index.
+ */
+ struct syminfo *table = elf->symsearch->table;
+ size_t hi = elf->symsearch->table_size - 1;
+ size_t lo = 0;
+ bool hi_is_usable = false;
+ bool lo_is_usable = false;
+ Elf_Addr hi_distance = -1; // max Elf_Addr
+ Elf_Addr lo_distance = -1; // max Elf_Addr
+ Elf_Addr min_distance_lo = min_distance;
+ Elf_Addr min_distance_hi = allow_negative ? min_distance : 0;
+
+ for (;;) {
+ size_t mid;
+
+ mid = lo + (hi - lo) / 2;
+ if (mid == lo)
+ break;
+ if (secndx > table[mid].section_index) {
+ lo = mid;
+ } else if (secndx < table[mid].section_index) {
+ hi = mid;
+ } else if (addr > table[mid].addr) {
+ lo = mid;
+ lo_distance = addr - table[mid].addr;
+ lo_is_usable = (lo_distance <= min_distance_lo);
+ } else {
+ hi = mid;
+ hi_distance = table[mid].addr - addr;
+ hi_is_usable = (hi_distance <= min_distance_hi);
+ }
+ }
+
+ if (hi_is_usable && lo_is_usable) {
+ lo_is_usable = (lo_distance <= hi_distance);
+ hi_is_usable = (hi_distance <= lo_distance);
+ }
+
+ if (!hi_is_usable)
+ return lo_is_usable ? lo : 0;
+
+ if (hi_distance == 0)
+ return hi;
+
+ /* Match quirks of existing behavior. Advance hi to the last
+ * matching entry in the search table. We don't need to worry
+ * about running off the end of the array due to the sentinel.
+ */
+ while (table[hi+1].addr == table[hi].addr &&
+ table[hi+1].section_index == table[hi].section_index) {
+ hi++;
+ }
+
+ return (lo_is_usable &&
+ table[lo].symbol_index > table[hi].symbol_index) ? lo : hi;
+}
+
+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
+ unsigned int secndx, bool allow_negative,
+ Elf_Addr min_distance)
+{
+ size_t result = symsearch_find_impl(elf, addr, secndx,
+ allow_negative, min_distance);
+
+ if (result == 0)
+ return NULL;
+
+ return &elf->symtab_start[elf->symsearch->table[result].symbol_index];
+}
--
2.42.0.459.ge4e396fd5e-goog


2023-09-19 21:45:58

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH] modpost: Optimize symbol search from linear to binary search

On Mon, Sep 18, 2023 at 2:06 PM Jack Brennen <[email protected]> wrote:
>
> Modify modpost to use binary search for converting addresses back
> into symbol references. Previously it used linear search.
>
> This change saves a few seconds of wall time for defconfig builds,
> but can save several minutes on allyesconfigs.
>
> Before:
> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
> Elapsed (wall clock) time (h:mm:ss or m:ss): 13:30.31
>
> After:
> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
> Elapsed (wall clock) time (h:mm:ss or m:ss): 11:43.43
>
> Signed-off-by: Jack Brennen <[email protected]>
> Tested-by: Nick Desaulniers <[email protected]>

Jack, if this is your first kernel patch, it's a nice one! And
welcome! Thanks for your work on this.

Reviewed-by: Nick Desaulniers <[email protected]>

Some ideas for future improvement.

I noticed we make 2 trips through the symbol table checking
is_sym_searchable twice. We could probably skip one trip and just
malloc the full size of the symbol table, even if this is technically
larger than absolutely needed. Guessing that might save us repeated
strlen calls on every symbol in the table.

Also, if qsort shows up in any profile, it's pretty well known why
std::sort beats qsort; there's potential for more speed there, but I
didn't profile so IDK.

> ---
> scripts/mod/Makefile | 4 +-
> scripts/mod/modpost.c | 60 +----------
> scripts/mod/modpost.h | 25 +++++
> scripts/mod/symsearch.c | 233 ++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 265 insertions(+), 57 deletions(-)
> create mode 100644 scripts/mod/symsearch.c
>
> diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
> index c9e38ad937fd..3c54125eb373 100644
> --- a/scripts/mod/Makefile
> +++ b/scripts/mod/Makefile
> @@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
> hostprogs-always-y += modpost mk_elfconfig
> always-y += empty.o
>
> -modpost-objs := modpost.o file2alias.o sumversion.o
> +modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o
>
> devicetable-offsets-file := devicetable-offsets.h
>
> @@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s
>
> # dependencies on generated files need to be listed explicitly
>
> -$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
> +$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
> $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)
>
> quiet_cmd_elfconfig = MKELF $@
> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
> index de499dce5265..975f235aca2c 100644
> --- a/scripts/mod/modpost.c
> +++ b/scripts/mod/modpost.c
> @@ -22,7 +22,6 @@
> #include <errno.h>
> #include "modpost.h"
> #include "../../include/linux/license.h"
> -#include "../../include/linux/module_symbol.h"
>
> static bool module_enabled;
> /* Are we using CONFIG_MODVERSIONS? */
> @@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename)
> *p = TO_NATIVE(*p);
> }
>
> + symsearch_init(info);
> +
> return 1;
> }
>
> static void parse_elf_finish(struct elf_info *info)
> {
> + symsearch_finish(info);
> release_file(info->hdr, info->size);
> }
>
> @@ -1039,65 +1041,13 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,
> return 1;
> }
>
> -/*
> - * If there's no name there, ignore it; likewise, ignore it if it's
> - * one of the magic symbols emitted used by current tools.
> - *
> - * Otherwise if find_symbols_between() returns those symbols, they'll
> - * fail the whitelist tests and cause lots of false alarms ... fixable
> - * only by merging __exit and __init sections into __text, bloating
> - * the kernel (which is especially evil on embedded platforms).
> - */
> -static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
> -{
> - const char *name = elf->strtab + sym->st_name;
> -
> - if (!name || !strlen(name))
> - return 0;
> - return !is_mapping_symbol(name);
> -}
> -
> /* Look up the nearest symbol based on the section and the address */
> static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
> unsigned int secndx, bool allow_negative,
> Elf_Addr min_distance)
> {
> - Elf_Sym *sym;
> - Elf_Sym *near = NULL;
> - Elf_Addr sym_addr, distance;
> - bool is_arm = (elf->hdr->e_machine == EM_ARM);
> -
> - for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> - if (get_secindex(elf, sym) != secndx)
> - continue;
> - if (!is_valid_name(elf, sym))
> - continue;
> -
> - sym_addr = sym->st_value;
> -
> - /*
> - * For ARM Thumb instruction, the bit 0 of st_value is set
> - * if the symbol is STT_FUNC type. Mask it to get the address.
> - */
> - if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
> - sym_addr &= ~1;
> -
> - if (addr >= sym_addr)
> - distance = addr - sym_addr;
> - else if (allow_negative)
> - distance = sym_addr - addr;
> - else
> - continue;
> -
> - if (distance <= min_distance) {
> - min_distance = distance;
> - near = sym;
> - }
> -
> - if (min_distance == 0)
> - break;
> - }
> - return near;
> + return symsearch_find_nearest(elf, addr, secndx,
> + allow_negative, min_distance);
> }
>
> static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
> diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
> index 5f94c2c9f2d9..6413f26fcb6b 100644
> --- a/scripts/mod/modpost.h
> +++ b/scripts/mod/modpost.h
> @@ -10,6 +10,7 @@
> #include <fcntl.h>
> #include <unistd.h>
> #include <elf.h>
> +#include "../../include/linux/module_symbol.h"
>
> #include "list.h"
> #include "elfconfig.h"
> @@ -128,6 +129,8 @@ struct elf_info {
> * take shndx from symtab_shndx_start[N] instead */
> Elf32_Word *symtab_shndx_start;
> Elf32_Word *symtab_shndx_stop;
> +
> + struct symsearch *symsearch;
> };
>
> /* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
> @@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info,
> return index;
> }
>
> +/*
> + * If there's no name there, ignore it; likewise, ignore it if it's
> + * one of the magic symbols emitted used by current tools.
> + *
> + * Internal symbols created by tools should be ignored by modpost.
> + */
> +static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
> +{
> + const char *name = elf->strtab + sym->st_name;
> +
> + if (!name || !strlen(name))
> + return 0;
> + return !is_mapping_symbol(name);
> +}
> +
> +/* symsearch.c */
> +void symsearch_init(struct elf_info *elf);
> +void symsearch_finish(struct elf_info *elf);
> +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
> + unsigned int secndx, bool allow_negative,
> + Elf_Addr min_distance);
> +
> /* file2alias.c */
> void handle_moddevtable(struct module *mod, struct elf_info *info,
> Elf_Sym *sym, const char *symname);
> diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
> new file mode 100644
> index 000000000000..aab79262512b
> --- /dev/null
> +++ b/scripts/mod/symsearch.c
> @@ -0,0 +1,233 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +/* Helper functions for finding the symbol in an ELF which is "nearest"
> + * to a given address.
> + */
> +
> +#include "modpost.h"
> +
> +/* Struct used for binary search. */
> +struct syminfo {
> + unsigned int symbol_index;
> + unsigned int section_index;
> + Elf_Addr addr;
> +};
> +
> +/* Container used to hold an entire binary search table.
> + * Entries in table are ascending, sorted first by section_index,
> + * then by addr, and last by symbol_index. The sorting by
> + * symbol_index is used to duplicate the quirks of the prior
> + * find_nearest_sym() function, where exact matches to an address
> + * return the first symtab entry seen, but near misses return the
> + * last symtab entry seen.
> + * The first and last entries of the table are sentinels and their
> + * values only matter in two places: when we sort the table, and
> + * on lookups, the end sentinel should not have an addr field which
> + * matches its immediate predecessor. To meet these requirements,
> + * we initialize them to (0,0,0) and (max,max,max), and then after
> + * sorting, we tweak the end sentinel's addr field accordingly.
> + */
> +struct symsearch {
> + size_t table_size;
> + struct syminfo table[];
> +};
> +
> +static inline bool is_sym_searchable(struct elf_info *elf, Elf_Sym *sym)
> +{
> + return is_valid_name(elf, sym) != 0;
> +}
> +
> +static int syminfo_compare(const void *s1, const void *s2)
> +{
> + const struct syminfo *sym1 = s1;
> + const struct syminfo *sym2 = s2;
> +
> + if (sym1->section_index > sym2->section_index)
> + return 1;
> + if (sym1->section_index < sym2->section_index)
> + return -1;
> + if (sym1->addr > sym2->addr)
> + return 1;
> + if (sym1->addr < sym2->addr)
> + return -1;
> + if (sym1->symbol_index > sym2->symbol_index)
> + return 1;
> + if (sym1->symbol_index < sym2->symbol_index)
> + return -1;
> + return 0;
> +}
> +
> +static size_t symbol_count(struct elf_info *elf)
> +{
> + size_t result = 0;
> +
> + for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> + if (is_sym_searchable(elf, sym))
> + result++;
> + }
> + return result;
> +}
> +
> +/* Populate the search array that we just allocated.
> + * Be slightly paranoid here. If the ELF file changes during processing,
> + * or if the behavior of is_sym_searchable() changes during processing,
> + * we want to catch it; neither of those is acceptable.
> + */
> +static void symsearch_populate(struct elf_info *elf,
> + struct syminfo *table,
> + size_t table_size)
> +{
> + bool is_arm = (elf->hdr->e_machine == EM_ARM);
> +
> + /* Start sentinel */
> + if (table_size-- == 0)
> + fatal("%s: size mismatch\n", __func__);
> + table->symbol_index = 0;
> + table->section_index = 0;
> + table->addr = 0;
> + table++;
> +
> + for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> + if (is_sym_searchable(elf, sym)) {
> + if (table_size-- == 0)
> + fatal("%s: size mismatch\n", __func__);
> + table->symbol_index = sym - elf->symtab_start;
> + table->section_index = get_secindex(elf, sym);
> + table->addr = sym->st_value;
> +
> + /*
> + * For ARM Thumb instruction, the bit 0 of st_value is
> + * set if the symbol is STT_FUNC type. Mask it to get
> + * the address.
> + */
> + if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
> + table->addr &= ~1;
> +
> + table++;
> + }
> + }
> +
> + /* End sentinel; all values are unsigned so -1 wraps to max */
> + if (table_size != 1)
> + fatal("%s: size mismatch\n", __func__);
> + table->symbol_index = -1;
> + table->section_index = -1;
> + table->addr = -1;
> +}
> +
> +void symsearch_init(struct elf_info *elf)
> +{
> + /* +2 here to allocate space for the start and end sentinels */
> + size_t table_size = symbol_count(elf) + 2;
> +
> + elf->symsearch = NOFAIL(malloc(
> + sizeof(struct symsearch) +
> + sizeof(struct syminfo) * table_size));
> + elf->symsearch->table_size = table_size;
> +
> + symsearch_populate(elf, elf->symsearch->table, table_size);
> + qsort(elf->symsearch->table, table_size,
> + sizeof(struct syminfo), syminfo_compare);
> +
> + /* A bit of paranoia; make sure that the end sentinel's address is
> + * different than its predecessor. Not doing this could cause
> + * possible undefined behavior if anybody ever inserts a symbol
> + * with section_index and addr both at their max values.
> + * Doing this little bit of defensive programming is more efficient
> + * than checking for array overruns later.
> + */
> + elf->symsearch->table[table_size - 1].addr =
> + elf->symsearch->table[table_size - 2].addr + 1;
> +}
> +
> +void symsearch_finish(struct elf_info *elf)
> +{
> + free(elf->symsearch);
> + elf->symsearch = NULL;
> +}
> +
> +/* Find the syminfo which is in secndx and "nearest" to addr.
> + * allow_negative: allow returning a symbol whose address is > addr.
> + * min_distance: ignore symbols which are further away than this.
> + *
> + * Returns a nonzero index into the symsearch table for success.
> + * Returns NULL if no legal symbol is found within the requested range.
> + */
> +static size_t symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
> + unsigned int secndx, bool allow_negative,
> + Elf_Addr min_distance)
> +{
> + /* Find the target in the array; it will lie between two elements.
> + * Invariant here: table[lo] < target <= table[hi]
> + * For the purposes of search, exact hits in the search array are
> + * considered greater than the target. This means that if we do
> + * get an exact hit, then once the search terminates, table[hi]
> + * will be the exact match which has the lowest symbol index.
> + */
> + struct syminfo *table = elf->symsearch->table;
> + size_t hi = elf->symsearch->table_size - 1;
> + size_t lo = 0;
> + bool hi_is_usable = false;
> + bool lo_is_usable = false;
> + Elf_Addr hi_distance = -1; // max Elf_Addr
> + Elf_Addr lo_distance = -1; // max Elf_Addr
> + Elf_Addr min_distance_lo = min_distance;
> + Elf_Addr min_distance_hi = allow_negative ? min_distance : 0;
> +
> + for (;;) {
> + size_t mid;
> +
> + mid = lo + (hi - lo) / 2;
> + if (mid == lo)
> + break;
> + if (secndx > table[mid].section_index) {
> + lo = mid;
> + } else if (secndx < table[mid].section_index) {
> + hi = mid;
> + } else if (addr > table[mid].addr) {
> + lo = mid;
> + lo_distance = addr - table[mid].addr;
> + lo_is_usable = (lo_distance <= min_distance_lo);
> + } else {
> + hi = mid;
> + hi_distance = table[mid].addr - addr;
> + hi_is_usable = (hi_distance <= min_distance_hi);
> + }
> + }
> +
> + if (hi_is_usable && lo_is_usable) {
> + lo_is_usable = (lo_distance <= hi_distance);
> + hi_is_usable = (hi_distance <= lo_distance);
> + }
> +
> + if (!hi_is_usable)
> + return lo_is_usable ? lo : 0;
> +
> + if (hi_distance == 0)
> + return hi;
> +
> + /* Match quirks of existing behavior. Advance hi to the last
> + * matching entry in the search table. We don't need to worry
> + * about running off the end of the array due to the sentinel.
> + */
> + while (table[hi+1].addr == table[hi].addr &&
> + table[hi+1].section_index == table[hi].section_index) {
> + hi++;
> + }
> +
> + return (lo_is_usable &&
> + table[lo].symbol_index > table[hi].symbol_index) ? lo : hi;
> +}
> +
> +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
> + unsigned int secndx, bool allow_negative,
> + Elf_Addr min_distance)
> +{
> + size_t result = symsearch_find_impl(elf, addr, secndx,
> + allow_negative, min_distance);
> +
> + if (result == 0)
> + return NULL;
> +
> + return &elf->symtab_start[elf->symsearch->table[result].symbol_index];
> +}
> --
> 2.42.0.459.ge4e396fd5e-goog
>


--
Thanks,
~Nick Desaulniers

2023-09-22 00:51:04

by Clément Léger

[permalink] [raw]
Subject: Re: [PATCH] modpost: Optimize symbol search from linear to binary search

Hi Jack,

On RISC-V builds, we noticed a recent slow down after commit
ddb5cdbafaaa ("kbuild: generate KSYMTAB entries by modpost") was
introduced. We tracked it down to find_nearest_sym() being called a lot
and more specifically since we have a lot of local symbols that are
generated as part of PCREL accesses (even more when building in debug
mode, measured a count of 12964362 symbols in one vmlinux.o).

Without your changes, a typical riscv defconfig build + debug, modpost
took the following amount of time:

$ time scripts/mod/modpost -M -o Module.symvers -T modules.order vmlinux.o
real 4m21,976s
user 4m21,803s
sys 0m0,100s

With your changes:

$ time scripts/mod/modpost -M -o Module.symvers -T modules.order vmlinux.o
real 0m1,077s
user 0m0,980s
sys 0m0,095s

I guess you could further optimize it by allocating a binary tree for
each section since find_nearest_sym() searches for symbols in a specific
section, that would save a few comparisons. Not sure it will be way
faster nor simpler to implement though.

FWIW: Tested-by: Clément Léger <[email protected]>

Thanks,

Clément

On 18/09/2023 23:06, Jack Brennen wrote:
> Modify modpost to use binary search for converting addresses back
> into symbol references. Previously it used linear search.
>
> This change saves a few seconds of wall time for defconfig builds,
> but can save several minutes on allyesconfigs.
>
> Before:
> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
> Elapsed (wall clock) time (h:mm:ss or m:ss): 13:30.31
>
> After:
> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
> Elapsed (wall clock) time (h:mm:ss or m:ss): 11:43.43
>
> Signed-off-by: Jack Brennen <[email protected]>
> Tested-by: Nick Desaulniers <[email protected]>
> ---
> scripts/mod/Makefile | 4 +-
> scripts/mod/modpost.c | 60 +----------
> scripts/mod/modpost.h | 25 +++++
> scripts/mod/symsearch.c | 233 ++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 265 insertions(+), 57 deletions(-)
> create mode 100644 scripts/mod/symsearch.c
>
> diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
> index c9e38ad937fd..3c54125eb373 100644
> --- a/scripts/mod/Makefile
> +++ b/scripts/mod/Makefile
> @@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
> hostprogs-always-y += modpost mk_elfconfig
> always-y += empty.o
>
> -modpost-objs := modpost.o file2alias.o sumversion.o
> +modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o
>
> devicetable-offsets-file := devicetable-offsets.h
>
> @@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s
>
> # dependencies on generated files need to be listed explicitly
>
> -$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
> +$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
> $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)
>
> quiet_cmd_elfconfig = MKELF $@
> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
> index de499dce5265..975f235aca2c 100644
> --- a/scripts/mod/modpost.c
> +++ b/scripts/mod/modpost.c
> @@ -22,7 +22,6 @@
> #include <errno.h>
> #include "modpost.h"
> #include "../../include/linux/license.h"
> -#include "../../include/linux/module_symbol.h"
>
> static bool module_enabled;
> /* Are we using CONFIG_MODVERSIONS? */
> @@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename)
> *p = TO_NATIVE(*p);
> }
>
> + symsearch_init(info);
> +
> return 1;
> }
>
> static void parse_elf_finish(struct elf_info *info)
> {
> + symsearch_finish(info);
> release_file(info->hdr, info->size);
> }
>
> @@ -1039,65 +1041,13 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,
> return 1;
> }
>
> -/*
> - * If there's no name there, ignore it; likewise, ignore it if it's
> - * one of the magic symbols emitted used by current tools.
> - *
> - * Otherwise if find_symbols_between() returns those symbols, they'll
> - * fail the whitelist tests and cause lots of false alarms ... fixable
> - * only by merging __exit and __init sections into __text, bloating
> - * the kernel (which is especially evil on embedded platforms).
> - */
> -static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
> -{
> - const char *name = elf->strtab + sym->st_name;
> -
> - if (!name || !strlen(name))
> - return 0;
> - return !is_mapping_symbol(name);
> -}
> -
> /* Look up the nearest symbol based on the section and the address */
> static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
> unsigned int secndx, bool allow_negative,
> Elf_Addr min_distance)
> {
> - Elf_Sym *sym;
> - Elf_Sym *near = NULL;
> - Elf_Addr sym_addr, distance;
> - bool is_arm = (elf->hdr->e_machine == EM_ARM);
> -
> - for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> - if (get_secindex(elf, sym) != secndx)
> - continue;
> - if (!is_valid_name(elf, sym))
> - continue;
> -
> - sym_addr = sym->st_value;
> -
> - /*
> - * For ARM Thumb instruction, the bit 0 of st_value is set
> - * if the symbol is STT_FUNC type. Mask it to get the address.
> - */
> - if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
> - sym_addr &= ~1;
> -
> - if (addr >= sym_addr)
> - distance = addr - sym_addr;
> - else if (allow_negative)
> - distance = sym_addr - addr;
> - else
> - continue;
> -
> - if (distance <= min_distance) {
> - min_distance = distance;
> - near = sym;
> - }
> -
> - if (min_distance == 0)
> - break;
> - }
> - return near;
> + return symsearch_find_nearest(elf, addr, secndx,
> + allow_negative, min_distance);
> }
>
> static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
> diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
> index 5f94c2c9f2d9..6413f26fcb6b 100644
> --- a/scripts/mod/modpost.h
> +++ b/scripts/mod/modpost.h
> @@ -10,6 +10,7 @@
> #include <fcntl.h>
> #include <unistd.h>
> #include <elf.h>
> +#include "../../include/linux/module_symbol.h"
>
> #include "list.h"
> #include "elfconfig.h"
> @@ -128,6 +129,8 @@ struct elf_info {
> * take shndx from symtab_shndx_start[N] instead */
> Elf32_Word *symtab_shndx_start;
> Elf32_Word *symtab_shndx_stop;
> +
> + struct symsearch *symsearch;
> };
>
> /* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
> @@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info,
> return index;
> }
>
> +/*
> + * If there's no name there, ignore it; likewise, ignore it if it's
> + * one of the magic symbols emitted used by current tools.
> + *
> + * Internal symbols created by tools should be ignored by modpost.
> + */
> +static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
> +{
> + const char *name = elf->strtab + sym->st_name;
> +
> + if (!name || !strlen(name))
> + return 0;
> + return !is_mapping_symbol(name);
> +}
> +
> +/* symsearch.c */
> +void symsearch_init(struct elf_info *elf);
> +void symsearch_finish(struct elf_info *elf);
> +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
> + unsigned int secndx, bool allow_negative,
> + Elf_Addr min_distance);
> +
> /* file2alias.c */
> void handle_moddevtable(struct module *mod, struct elf_info *info,
> Elf_Sym *sym, const char *symname);
> diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
> new file mode 100644
> index 000000000000..aab79262512b
> --- /dev/null
> +++ b/scripts/mod/symsearch.c
> @@ -0,0 +1,233 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +/* Helper functions for finding the symbol in an ELF which is "nearest"
> + * to a given address.
> + */
> +
> +#include "modpost.h"
> +
> +/* Struct used for binary search. */
> +struct syminfo {
> + unsigned int symbol_index;
> + unsigned int section_index;
> + Elf_Addr addr;
> +};
> +
> +/* Container used to hold an entire binary search table.
> + * Entries in table are ascending, sorted first by section_index,
> + * then by addr, and last by symbol_index. The sorting by
> + * symbol_index is used to duplicate the quirks of the prior
> + * find_nearest_sym() function, where exact matches to an address
> + * return the first symtab entry seen, but near misses return the
> + * last symtab entry seen.
> + * The first and last entries of the table are sentinels and their
> + * values only matter in two places: when we sort the table, and
> + * on lookups, the end sentinel should not have an addr field which
> + * matches its immediate predecessor. To meet these requirements,
> + * we initialize them to (0,0,0) and (max,max,max), and then after
> + * sorting, we tweak the end sentinel's addr field accordingly.
> + */
> +struct symsearch {
> + size_t table_size;
> + struct syminfo table[];
> +};
> +
> +static inline bool is_sym_searchable(struct elf_info *elf, Elf_Sym *sym)
> +{
> + return is_valid_name(elf, sym) != 0;
> +}
> +
> +static int syminfo_compare(const void *s1, const void *s2)
> +{
> + const struct syminfo *sym1 = s1;
> + const struct syminfo *sym2 = s2;
> +
> + if (sym1->section_index > sym2->section_index)
> + return 1;
> + if (sym1->section_index < sym2->section_index)
> + return -1;
> + if (sym1->addr > sym2->addr)
> + return 1;
> + if (sym1->addr < sym2->addr)
> + return -1;
> + if (sym1->symbol_index > sym2->symbol_index)
> + return 1;
> + if (sym1->symbol_index < sym2->symbol_index)
> + return -1;
> + return 0;
> +}
> +
> +static size_t symbol_count(struct elf_info *elf)
> +{
> + size_t result = 0;
> +
> + for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> + if (is_sym_searchable(elf, sym))
> + result++;
> + }
> + return result;
> +}
> +
> +/* Populate the search array that we just allocated.
> + * Be slightly paranoid here. If the ELF file changes during processing,
> + * or if the behavior of is_sym_searchable() changes during processing,
> + * we want to catch it; neither of those is acceptable.
> + */
> +static void symsearch_populate(struct elf_info *elf,
> + struct syminfo *table,
> + size_t table_size)
> +{
> + bool is_arm = (elf->hdr->e_machine == EM_ARM);
> +
> + /* Start sentinel */
> + if (table_size-- == 0)
> + fatal("%s: size mismatch\n", __func__);
> + table->symbol_index = 0;
> + table->section_index = 0;
> + table->addr = 0;
> + table++;
> +
> + for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> + if (is_sym_searchable(elf, sym)) {
> + if (table_size-- == 0)
> + fatal("%s: size mismatch\n", __func__);
> + table->symbol_index = sym - elf->symtab_start;
> + table->section_index = get_secindex(elf, sym);
> + table->addr = sym->st_value;
> +
> + /*
> + * For ARM Thumb instruction, the bit 0 of st_value is
> + * set if the symbol is STT_FUNC type. Mask it to get
> + * the address.
> + */
> + if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
> + table->addr &= ~1;
> +
> + table++;
> + }
> + }
> +
> + /* End sentinel; all values are unsigned so -1 wraps to max */
> + if (table_size != 1)
> + fatal("%s: size mismatch\n", __func__);
> + table->symbol_index = -1;
> + table->section_index = -1;
> + table->addr = -1;
> +}
> +
> +void symsearch_init(struct elf_info *elf)
> +{
> + /* +2 here to allocate space for the start and end sentinels */
> + size_t table_size = symbol_count(elf) + 2;
> +
> + elf->symsearch = NOFAIL(malloc(
> + sizeof(struct symsearch) +
> + sizeof(struct syminfo) * table_size));
> + elf->symsearch->table_size = table_size;
> +
> + symsearch_populate(elf, elf->symsearch->table, table_size);
> + qsort(elf->symsearch->table, table_size,
> + sizeof(struct syminfo), syminfo_compare);
> +
> + /* A bit of paranoia; make sure that the end sentinel's address is
> + * different than its predecessor. Not doing this could cause
> + * possible undefined behavior if anybody ever inserts a symbol
> + * with section_index and addr both at their max values.
> + * Doing this little bit of defensive programming is more efficient
> + * than checking for array overruns later.
> + */
> + elf->symsearch->table[table_size - 1].addr =
> + elf->symsearch->table[table_size - 2].addr + 1;
> +}
> +
> +void symsearch_finish(struct elf_info *elf)
> +{
> + free(elf->symsearch);
> + elf->symsearch = NULL;
> +}
> +
> +/* Find the syminfo which is in secndx and "nearest" to addr.
> + * allow_negative: allow returning a symbol whose address is > addr.
> + * min_distance: ignore symbols which are further away than this.
> + *
> + * Returns a nonzero index into the symsearch table for success.
> + * Returns NULL if no legal symbol is found within the requested range.
> + */
> +static size_t symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
> + unsigned int secndx, bool allow_negative,
> + Elf_Addr min_distance)
> +{
> + /* Find the target in the array; it will lie between two elements.
> + * Invariant here: table[lo] < target <= table[hi]
> + * For the purposes of search, exact hits in the search array are
> + * considered greater than the target. This means that if we do
> + * get an exact hit, then once the search terminates, table[hi]
> + * will be the exact match which has the lowest symbol index.
> + */
> + struct syminfo *table = elf->symsearch->table;
> + size_t hi = elf->symsearch->table_size - 1;
> + size_t lo = 0;
> + bool hi_is_usable = false;
> + bool lo_is_usable = false;
> + Elf_Addr hi_distance = -1; // max Elf_Addr
> + Elf_Addr lo_distance = -1; // max Elf_Addr
> + Elf_Addr min_distance_lo = min_distance;
> + Elf_Addr min_distance_hi = allow_negative ? min_distance : 0;
> +
> + for (;;) {
> + size_t mid;
> +
> + mid = lo + (hi - lo) / 2;
> + if (mid == lo)
> + break;
> + if (secndx > table[mid].section_index) {
> + lo = mid;
> + } else if (secndx < table[mid].section_index) {
> + hi = mid;
> + } else if (addr > table[mid].addr) {
> + lo = mid;
> + lo_distance = addr - table[mid].addr;
> + lo_is_usable = (lo_distance <= min_distance_lo);
> + } else {
> + hi = mid;
> + hi_distance = table[mid].addr - addr;
> + hi_is_usable = (hi_distance <= min_distance_hi);
> + }
> + }
> +
> + if (hi_is_usable && lo_is_usable) {
> + lo_is_usable = (lo_distance <= hi_distance);
> + hi_is_usable = (hi_distance <= lo_distance);
> + }
> +
> + if (!hi_is_usable)
> + return lo_is_usable ? lo : 0;
> +
> + if (hi_distance == 0)
> + return hi;
> +
> + /* Match quirks of existing behavior. Advance hi to the last
> + * matching entry in the search table. We don't need to worry
> + * about running off the end of the array due to the sentinel.
> + */
> + while (table[hi+1].addr == table[hi].addr &&
> + table[hi+1].section_index == table[hi].section_index) {
> + hi++;
> + }
> +
> + return (lo_is_usable &&
> + table[lo].symbol_index > table[hi].symbol_index) ? lo : hi;
> +}
> +
> +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
> + unsigned int secndx, bool allow_negative,
> + Elf_Addr min_distance)
> +{
> + size_t result = symsearch_find_impl(elf, addr, secndx,
> + allow_negative, min_distance);
> +
> + if (result == 0)
> + return NULL;
> +
> + return &elf->symtab_start[elf->symsearch->table[result].symbol_index];
> +}

2023-09-23 08:50:33

by Masahiro Yamada

[permalink] [raw]
Subject: Re: [PATCH] modpost: Optimize symbol search from linear to binary search

On Tue, Sep 19, 2023 at 6:06 AM Jack Brennen <[email protected]> wrote:
>
> Modify modpost to use binary search for converting addresses back
> into symbol references. Previously it used linear search.
>
> This change saves a few seconds of wall time for defconfig builds,
> but can save several minutes on allyesconfigs.

Thanks.
Binary search is a good idea.


> Before:
> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
> Elapsed (wall clock) time (h:mm:ss or m:ss): 13:30.31

Instead of the time for the entire build,
can you put the time for the modpost command?

If you allyesconfig case,

$ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o





> diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
> new file mode 100644
> index 000000000000..aab79262512b
> --- /dev/null
> +++ b/scripts/mod/symsearch.c
> @@ -0,0 +1,233 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +/* Helper functions for finding the symbol in an ELF which is "nearest"
> + * to a given address.
> + */
>

Can you use the following block comment style?

/*
* Helper functions for finding the symbol in an ELF which is "nearest"
* to a given address.
*/



> +#include "modpost.h"
> +
> +/* Struct used for binary search. */

I think this obvious comment is unneeded.



> +struct syminfo {
> + unsigned int symbol_index;
> + unsigned int section_index;
> + Elf_Addr addr;
> +};
> +
> +/* Container used to hold an entire binary search table.
> + * Entries in table are ascending, sorted first by section_index,
> + * then by addr, and last by symbol_index. The sorting by
> + * symbol_index is used to duplicate the quirks of the prior
> + * find_nearest_sym() function, where exact matches to an address
> + * return the first symtab entry seen, but near misses return the
> + * last symtab entry seen.

Preserving this quirk makes the code complicated.

I do not mind changing the behavior of the corner case.





> + * The first and last entries of the table are sentinels and their
> + * values only matter in two places: when we sort the table, and
> + * on lookups, the end sentinel should not have an addr field which
> + * matches its immediate predecessor. To meet these requirements,
> + * we initialize them to (0,0,0) and (max,max,max), and then after
> + * sorting, we tweak the end sentinel's addr field accordingly.
> + */
> +struct symsearch {
> + size_t table_size;
> + struct syminfo table[];
> +};



syminfo::symbol_index is unsigned int.
symsearch::table_size is size_t.


symbol_index of the last element is always larger than
elf->symsearch->table_size.

So, the code works only within 32-bit width anyway.












> +
> +static inline bool is_sym_searchable(struct elf_info *elf, Elf_Sym *sym)
> +{
> + return is_valid_name(elf, sym) != 0;
> +}

If you call is_valid_name() directly, this function was unneeded?






> +
> +static int syminfo_compare(const void *s1, const void *s2)
> +{
> + const struct syminfo *sym1 = s1;
> + const struct syminfo *sym2 = s2;
> +
> + if (sym1->section_index > sym2->section_index)
> + return 1;
> + if (sym1->section_index < sym2->section_index)
> + return -1;
> + if (sym1->addr > sym2->addr)
> + return 1;
> + if (sym1->addr < sym2->addr)
> + return -1;
> + if (sym1->symbol_index > sym2->symbol_index)
> + return 1;
> + if (sym1->symbol_index < sym2->symbol_index)
> + return -1;
> + return 0;
> +}
> +
> +static size_t symbol_count(struct elf_info *elf)
> +{
> + size_t result = 0;
> +
> + for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> + if (is_sym_searchable(elf, sym))
> + result++;
> + }
> + return result;
> +}
> +
> +/* Populate the search array that we just allocated.
> + * Be slightly paranoid here. If the ELF file changes during processing,

I could not understand. In which case, the ELF file changes?

modpost loads the entire file to memory first..

In which scenario, the memory content changes?






> + * or if the behavior of is_sym_searchable() changes during processing,
> + * we want to catch it; neither of those is acceptable.
> + */
> +static void symsearch_populate(struct elf_info *elf,
> + struct syminfo *table,
> + size_t table_size)
> +{
> + bool is_arm = (elf->hdr->e_machine == EM_ARM);
> +
> + /* Start sentinel */
> + if (table_size-- == 0)
> + fatal("%s: size mismatch\n", __func__);
> + table->symbol_index = 0;
> + table->section_index = 0;
> + table->addr = 0;
> + table++;
> +
> + for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> + if (is_sym_searchable(elf, sym)) {
> + if (table_size-- == 0)
> + fatal("%s: size mismatch\n", __func__);
> + table->symbol_index = sym - elf->symtab_start;
> + table->section_index = get_secindex(elf, sym);
> + table->addr = sym->st_value;
> +
> + /*
> + * For ARM Thumb instruction, the bit 0 of st_value is
> + * set if the symbol is STT_FUNC type. Mask it to get
> + * the address.
> + */
> + if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
> + table->addr &= ~1;
> +
> + table++;
> + }
> + }
> +
> + /* End sentinel; all values are unsigned so -1 wraps to max */
> + if (table_size != 1)
> + fatal("%s: size mismatch\n", __func__);
> + table->symbol_index = -1;
> + table->section_index = -1;
> + table->addr = -1;
> +}
> +
> +void symsearch_init(struct elf_info *elf)
> +{
> + /* +2 here to allocate space for the start and end sentinels */
> + size_t table_size = symbol_count(elf) + 2;
> +
> + elf->symsearch = NOFAIL(malloc(
> + sizeof(struct symsearch) +
> + sizeof(struct syminfo) * table_size));
> + elf->symsearch->table_size = table_size;
> +
> + symsearch_populate(elf, elf->symsearch->table, table_size);
> + qsort(elf->symsearch->table, table_size,
> + sizeof(struct syminfo), syminfo_compare);
> +
> + /* A bit of paranoia; make sure that the end sentinel's address is
> + * different than its predecessor. Not doing this could cause
> + * possible undefined behavior if anybody ever inserts a symbol
> + * with section_index and addr both at their max values.

I could not understand this comment.

If section_index and addr both at their max values at [table_size - 2],
->table[table_size - 2].addr + 1 wraps to zero.

The table is not sorted any longer?




> + * Doing this little bit of defensive programming is more efficient
> + * than checking for array overruns later.
> + */
> + elf->symsearch->table[table_size - 1].addr =
> + elf->symsearch->table[table_size - 2].addr + 1;
> +}
> +
> +void symsearch_finish(struct elf_info *elf)
> +{
> + free(elf->symsearch);
> + elf->symsearch = NULL;
> +}
> +
> +/* Find the syminfo which is in secndx and "nearest" to addr.
> + * allow_negative: allow returning a symbol whose address is > addr.
> + * min_distance: ignore symbols which are further away than this.
> + *
> + * Returns a nonzero index into the symsearch table for success.
> + * Returns NULL if no legal symbol is found within the requested range.
> + */
> +static size_t symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
> + unsigned int secndx, bool allow_negative,
> + Elf_Addr min_distance)
> +{
> + /* Find the target in the array; it will lie between two elements.
> + * Invariant here: table[lo] < target <= table[hi]
> + * For the purposes of search, exact hits in the search array are
> + * considered greater than the target. This means that if we do
> + * get an exact hit, then once the search terminates, table[hi]
> + * will be the exact match which has the lowest symbol index.
> + */
> + struct syminfo *table = elf->symsearch->table;
> + size_t hi = elf->symsearch->table_size - 1;
> + size_t lo = 0;




The binary search code was implemented in a too complex way
to preserve the previous quirks.


I want to use the same comparison function for
qsort() and bsearch() to avoid paranoia.




How about this implementation?



static struct syminfo *symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
unsigned int secndx, bool
allow_negative,
Elf_Addr min_distance)
{
struct syminfo target = { .symbol_index = -1, .section_index =
secndx, .addr = addr };
struct syminfo *table = elf->symsearch->table;
unsigned int hi = elf->symsearch->table_size - 1;
unsigned int lo = 0;
struct syminfo *result = NULL;
Elf_Addr distance;

while (lo < hi) {
unsigned int mid = (lo + hi + 1) / 2;

if (syminfo_compare(&table[mid], &target) > 0)
hi = mid - 1;
else
lo = mid;
}

/*
* The target resides between lo and (lo + 1).
* If allow_negative is true, check both of them.
*/

if (allow_negative && lo + 1 < elf->symsearch->table_size &&
table[lo + 1].section_index == secndx) {
distance = table[lo + 1].addr - addr;
if (distance <= min_distance) {
min_distance = distance;
result = &table[lo + 1];
}
}

if (table[lo].section_index == secndx) {
distance = addr - table[lo].addr;
if (distance <= min_distance)
result = &table[lo];
}

return result;
}

Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
unsigned int secndx, bool allow_negative,
Elf_Addr min_distance)
{
struct syminfo *result;

result = symsearch_find_impl(elf, addr, secndx,
allow_negative, min_distance);
if (!result)
return NULL;

return &elf->symtab_start[result->symbol_index];
}



This does not preserve the previous quirks.

If there are multiple entries with the same address,
it always returns the last element.

I did not expect sentinels.

I did not do thorough tests, but it seems to be working for me.




Also, please call symsearch_find_nearest() directly
and remove symfind_nearest_sym().






--
Best Regards

Masahiro Yamada

2023-09-23 23:25:28

by Jack Brennen

[permalink] [raw]
Subject: Re: [PATCH] modpost: Optimize symbol search from linear to binary search

Mainly just clarifying that we're willing to change the behavior in corner
cases? The existing behavior has one set of quirks about which symtab entry
is returned, and your proposed behavior has another different set of quirks.

It's probably OK to break builds which have an undocumented assumption
about the order of symtab entries; personally, I'd rather not risk that myself,
but if somebody with more experience is willing to back that decision, I'm
OK with it.

Also, there's an alternative approach that uses bsearch from the standard
library and a common comparison function between qsort and bsearch. I
considered this alternative earlier; maybe you would prefer it because it
eliminates having to reimplement a binary search algorithm.
I chose not to do it this way because of trying to duplicate the quirks.
If no duplication of the quirks is needed, this becomes easier.

The idea for that is to build a sorted array of syminfo that look like this:

(section_index, addr_lo, addr_hi, sym_lo, sym_hi)

What this represents is the situation where for any lookup in the
range from (section_index, addr_lo) to (section_index, addr_hi)
inclusive, the nearest symbol will be either sym_lo or sym_hi.
There are four different meanings for (sym_lo, sym_hi):

(sym_lo = 0)
This is a placeholder for a duplicated address, and it cannot
compare equal to anything. After we sort the array, we set all
of the duplicated addresses except for the last one to sym_lo = 0.

(sym_lo = MAX_SYM)
This is used to designate an address being looked up. When this
is seen, it compares equal to any other syminfo that has an
overlapping range.

(sym_lo != 0, sym_hi = 0)
This represents the last range in a section. There's no following
address that could match. Should also have addr_hi = MAX.

(sym_lo != 0, sym_hi != 0)
This represents a range in a section that's not the last range.
sym_hi may be usable to satisfy the lookup, but only if it's
closer than sym_lo and if allow_negative is true. Note that
the address of sym_hi will be addr_hi+1, so we don't need any
additional code to fetch that address.

Here's a sample comparison function:
int syminfo_compare(const void *a, const void *b) {
const struct syminfo *sym1 = a;
const struct syminfo *sym2 = b;

if (sym1->section_index > sym2->section_index)
return 1;
if (sym1->section_index < sym2->section_index)
return -1;
if ((sym1->sym_lo == MAX_SYM && sym2->sym_lo != 0) ||
(sym2->sym_lo == MAX_SYM && sym1->sym_lo != 0)) {
/* Overlap is equality - test for it */
if (sym1->addr_hi >= sym2->addr_lo &&
sym2->addr_hi >= sym1->addr_lo) {
return 0;
}
/* No overlap, fall through */
}
if (sym1->addr_lo > sym2->addr_lo)
return 1;
if (sym1->addr_lo < sym2->addr_lo)
return -1;
/* Note that if we are comparing a lookup (MAX_SYM) with
a placeholder (0), the lookup always compares greater.
This causes us to search to the "right" of the placeholder
for a match, which is what we want. */
if (sym1->sym_lo > sym2->sym_lo)
return 1;
if (sym1->sym_lo < sym2->sym_lo)
return -1;
return 0;
}

So this greatly simplifies the back-end searching. It's a bsearch()
which gives you either a miss, or one or two alternatives for the result.
On the front end, you have an extra step after sorting which massages the
search array into the right configuration. There's actually not much code
needed to do that.

Is that of interest? The leveraging of bsearch() in that way?

A few other responses below...

On Sat, Sep 23, 2023 at 4:50 AM Masahiro Yamada <[email protected]> wrote:
> > +/* Populate the search array that we just allocated.
> > + * Be slightly paranoid here. If the ELF file changes during processing,
>
> I could not understand. In which case, the ELF file changes?
>
> modpost loads the entire file to memory first..
>
> In which scenario, the memory content changes?
>

modpost doesn't load the entire file, it uses mmap to map it into the
address space.The easiest way to imagine this being an issue is that some
buggy parallelization happens and something is modifying vmlinux.o while
modpost is processing it. Of course it's probably acceptable to say,
"Don't do that!"

There are two alternatives here: actually read in the entire file, which
is certainly suboptimal, or just live with the fact that mmap makes no
guarantees about whether changes in the file are reflected in the memory map.

> > + /* A bit of paranoia; make sure that the end sentinel's address is
> > + * different than its predecessor. Not doing this could cause
> > + * possible undefined behavior if anybody ever inserts a symbol
> > + * with section_index and addr both at their max values.
>
> I could not understand this comment.
>
> If section_index and addr both at their max values at [table_size - 2],
> ->table[table_size - 2].addr + 1 wraps to zero.
>
> The table is not sorted any longer?
>

That's correct, the table would not be sorted any longer. But by design,
we never do a relational comparison against the end sentinel. But if
we rework the search, either by your suggestion or by using bsearch(),
this sentinel goes away.

2023-09-25 02:37:45

by Fangrui Song

[permalink] [raw]
Subject: Re: [PATCH] modpost: Optimize symbol search from linear to binary search

On 2023-09-23, Masahiro Yamada wrote:
>On Tue, Sep 19, 2023 at 6:06 AM Jack Brennen <[email protected]> wrote:
>>
>> Modify modpost to use binary search for converting addresses back
>> into symbol references. Previously it used linear search.
>>
>> This change saves a few seconds of wall time for defconfig builds,
>> but can save several minutes on allyesconfigs.
>
>Thanks.
>Binary search is a good idea.
>
>
>> Before:
>> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
>> Elapsed (wall clock) time (h:mm:ss or m:ss): 13:30.31
>
>Instead of the time for the entire build,
>can you put the time for the modpost command?
>
>If you allyesconfig case,
>
> $ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o
>
>
>
>
>
>> diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
>> new file mode 100644
>> index 000000000000..aab79262512b
>> --- /dev/null
>> +++ b/scripts/mod/symsearch.c
>> @@ -0,0 +1,233 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +/* Helper functions for finding the symbol in an ELF which is "nearest"
>> + * to a given address.
>> + */
>>
>
>Can you use the following block comment style?
>
>/*
> * Helper functions for finding the symbol in an ELF which is "nearest"
> * to a given address.
> */
>
>
>
>> +#include "modpost.h"
>> +
>> +/* Struct used for binary search. */
>
>I think this obvious comment is unneeded.
>
>
>
>> +struct syminfo {
>> + unsigned int symbol_index;
>> + unsigned int section_index;
>> + Elf_Addr addr;
>> +};
>> +
>> +/* Container used to hold an entire binary search table.
>> + * Entries in table are ascending, sorted first by section_index,
>> + * then by addr, and last by symbol_index. The sorting by
>> + * symbol_index is used to duplicate the quirks of the prior
>> + * find_nearest_sym() function, where exact matches to an address
>> + * return the first symtab entry seen, but near misses return the
>> + * last symtab entry seen.
>
>Preserving this quirk makes the code complicated.
>
>I do not mind changing the behavior of the corner case.
>
>
>
>
>
>> + * The first and last entries of the table are sentinels and their
>> + * values only matter in two places: when we sort the table, and
>> + * on lookups, the end sentinel should not have an addr field which
>> + * matches its immediate predecessor. To meet these requirements,
>> + * we initialize them to (0,0,0) and (max,max,max), and then after
>> + * sorting, we tweak the end sentinel's addr field accordingly.
>> + */
>> +struct symsearch {
>> + size_t table_size;
>> + struct syminfo table[];
>> +};
>
>
>
>syminfo::symbol_index is unsigned int.
>symsearch::table_size is size_t.
>
>
>symbol_index of the last element is always larger than
>elf->symsearch->table_size.
>
>So, the code works only within 32-bit width anyway.
>
>
>
>
>
>
>
>
>
>
>
>
>> +
>> +static inline bool is_sym_searchable(struct elf_info *elf, Elf_Sym *sym)
>> +{
>> + return is_valid_name(elf, sym) != 0;
>> +}
>
>If you call is_valid_name() directly, this function was unneeded?
>
>
>
>
>
>
>> +
>> +static int syminfo_compare(const void *s1, const void *s2)
>> +{
>> + const struct syminfo *sym1 = s1;
>> + const struct syminfo *sym2 = s2;
>> +
>> + if (sym1->section_index > sym2->section_index)
>> + return 1;
>> + if (sym1->section_index < sym2->section_index)
>> + return -1;
>> + if (sym1->addr > sym2->addr)
>> + return 1;
>> + if (sym1->addr < sym2->addr)
>> + return -1;
>> + if (sym1->symbol_index > sym2->symbol_index)
>> + return 1;
>> + if (sym1->symbol_index < sym2->symbol_index)
>> + return -1;
>> + return 0;
>> +}
>> +
>> +static size_t symbol_count(struct elf_info *elf)
>> +{
>> + size_t result = 0;
>> +
>> + for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
>> + if (is_sym_searchable(elf, sym))
>> + result++;
>> + }
>> + return result;
>> +}
>> +
>> +/* Populate the search array that we just allocated.
>> + * Be slightly paranoid here. If the ELF file changes during processing,
>
>I could not understand. In which case, the ELF file changes?
>
>modpost loads the entire file to memory first..
>
>In which scenario, the memory content changes?
>
>
>
>
>
>
>> + * or if the behavior of is_sym_searchable() changes during processing,
>> + * we want to catch it; neither of those is acceptable.
>> + */
>> +static void symsearch_populate(struct elf_info *elf,
>> + struct syminfo *table,
>> + size_t table_size)
>> +{
>> + bool is_arm = (elf->hdr->e_machine == EM_ARM);
>> +
>> + /* Start sentinel */
>> + if (table_size-- == 0)
>> + fatal("%s: size mismatch\n", __func__);
>> + table->symbol_index = 0;
>> + table->section_index = 0;
>> + table->addr = 0;
>> + table++;
>> +
>> + for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
>> + if (is_sym_searchable(elf, sym)) {
>> + if (table_size-- == 0)
>> + fatal("%s: size mismatch\n", __func__);
>> + table->symbol_index = sym - elf->symtab_start;
>> + table->section_index = get_secindex(elf, sym);
>> + table->addr = sym->st_value;
>> +
>> + /*
>> + * For ARM Thumb instruction, the bit 0 of st_value is
>> + * set if the symbol is STT_FUNC type. Mask it to get
>> + * the address.
>> + */
>> + if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
>> + table->addr &= ~1;
>> +
>> + table++;
>> + }
>> + }
>> +
>> + /* End sentinel; all values are unsigned so -1 wraps to max */
>> + if (table_size != 1)
>> + fatal("%s: size mismatch\n", __func__);
>> + table->symbol_index = -1;
>> + table->section_index = -1;
>> + table->addr = -1;
>> +}
>> +
>> +void symsearch_init(struct elf_info *elf)
>> +{
>> + /* +2 here to allocate space for the start and end sentinels */
>> + size_t table_size = symbol_count(elf) + 2;
>> +
>> + elf->symsearch = NOFAIL(malloc(
>> + sizeof(struct symsearch) +
>> + sizeof(struct syminfo) * table_size));
>> + elf->symsearch->table_size = table_size;
>> +
>> + symsearch_populate(elf, elf->symsearch->table, table_size);
>> + qsort(elf->symsearch->table, table_size,
>> + sizeof(struct syminfo), syminfo_compare);
>> +
>> + /* A bit of paranoia; make sure that the end sentinel's address is
>> + * different than its predecessor. Not doing this could cause
>> + * possible undefined behavior if anybody ever inserts a symbol
>> + * with section_index and addr both at their max values.
>
>I could not understand this comment.
>
>If section_index and addr both at their max values at [table_size - 2],
>->table[table_size - 2].addr + 1 wraps to zero.
>
>The table is not sorted any longer?
>
>
>
>
>> + * Doing this little bit of defensive programming is more efficient
>> + * than checking for array overruns later.
>> + */
>> + elf->symsearch->table[table_size - 1].addr =
>> + elf->symsearch->table[table_size - 2].addr + 1;
>> +}
>> +
>> +void symsearch_finish(struct elf_info *elf)
>> +{
>> + free(elf->symsearch);
>> + elf->symsearch = NULL;
>> +}
>> +
>> +/* Find the syminfo which is in secndx and "nearest" to addr.
>> + * allow_negative: allow returning a symbol whose address is > addr.
>> + * min_distance: ignore symbols which are further away than this.
>> + *
>> + * Returns a nonzero index into the symsearch table for success.
>> + * Returns NULL if no legal symbol is found within the requested range.
>> + */
>> +static size_t symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
>> + unsigned int secndx, bool allow_negative,
>> + Elf_Addr min_distance)
>> +{
>> + /* Find the target in the array; it will lie between two elements.
>> + * Invariant here: table[lo] < target <= table[hi]
>> + * For the purposes of search, exact hits in the search array are
>> + * considered greater than the target. This means that if we do
>> + * get an exact hit, then once the search terminates, table[hi]
>> + * will be the exact match which has the lowest symbol index.
>> + */
>> + struct syminfo *table = elf->symsearch->table;
>> + size_t hi = elf->symsearch->table_size - 1;
>> + size_t lo = 0;
>
>
>
>
>The binary search code was implemented in a too complex way
>to preserve the previous quirks.
>
>
>I want to use the same comparison function for
>qsort() and bsearch() to avoid paranoia.
>
>
>
>
>How about this implementation?
>
>
>
>static struct syminfo *symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
> unsigned int secndx, bool
>allow_negative,
> Elf_Addr min_distance)
>{
> struct syminfo target = { .symbol_index = -1, .section_index =
>secndx, .addr = addr };
> struct syminfo *table = elf->symsearch->table;
> unsigned int hi = elf->symsearch->table_size - 1;
> unsigned int lo = 0;
> struct syminfo *result = NULL;
> Elf_Addr distance;
>
> while (lo < hi) {
> unsigned int mid = (lo + hi + 1) / 2;
>
> if (syminfo_compare(&table[mid], &target) > 0)
> hi = mid - 1;
> else
> lo = mid;
> }
>
> /*
> * The target resides between lo and (lo + 1).
> * If allow_negative is true, check both of them.
> */
>
> if (allow_negative && lo + 1 < elf->symsearch->table_size &&
> table[lo + 1].section_index == secndx) {
> distance = table[lo + 1].addr - addr;
> if (distance <= min_distance) {
> min_distance = distance;
> result = &table[lo + 1];
> }
> }
>
> if (table[lo].section_index == secndx) {
> distance = addr - table[lo].addr;
> if (distance <= min_distance)
> result = &table[lo];
> }
>
> return result;
>}

I think this implementation (shrinking [lo,hi] to [lo,mid-1] or
[mid,hi]) is better than the original one (shrinking [lo,hi] to [lo,mid]
or [mid,hi], a bit wasteful).

The original patch uses `if (mid == lo) break;`, which I consider not so
elegant.

However, the `- 1` part in `unsigned int hi = elf->symsearch->table_size - 1;` can be improved.
I'd prefer an implementation similar to typical C++ https://en.cppreference.com/w/cpp/algorithm/upper_bound implementation.

lo = 0;
hi = n; // or replace hi with count
while (lo < hi) {
mid = (lo + hi) / 2; // we don't care about (lo+hi) overflow
if (less_or_eq(&table[mid], &target))
lo = mid+1;
else
hi = mid;
}

// lo == hi: the index of the first element that is > target
// if elements equal to target are present, they are on the left of lo

>Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
> unsigned int secndx, bool allow_negative,
> Elf_Addr min_distance)
>{
> struct syminfo *result;
>
> result = symsearch_find_impl(elf, addr, secndx,
> allow_negative, min_distance);
> if (!result)
> return NULL;
>
> return &elf->symtab_start[result->symbol_index];
>}
>
>
>
>This does not preserve the previous quirks.
>
>If there are multiple entries with the same address,
>it always returns the last element.
>
>I did not expect sentinels.
>
>I did not do thorough tests, but it seems to be working for me.
>
>
>
>
>Also, please call symsearch_find_nearest() directly
>and remove symfind_nearest_sym().
>
>
>
>
>
>
>--
>Best Regards
>
>Masahiro Yamada
>

2023-09-25 14:33:20

by Masahiro Yamada

[permalink] [raw]
Subject: Re: [PATCH] modpost: Optimize symbol search from linear to binary search

On Mon, Sep 25, 2023 at 7:20 AM Fangrui Song <[email protected]> wrote:

> However, the `- 1` part in `unsigned int hi = elf->symsearch->table_size - 1;` can be improved.
> I'd prefer an implementation similar to typical C++ https://en.cppreference.com/w/cpp/algorithm/upper_bound implementation.
>
> lo = 0;
> hi = n; // or replace hi with count
> while (lo < hi) {
> mid = (lo + hi) / 2; // we don't care about (lo+hi) overflow
> if (less_or_eq(&table[mid], &target))
> lo = mid+1;
> else
> hi = mid;
> }
>
> // lo == hi: the index of the first element that is > target
> // if elements equal to target are present, they are on the left of lo


Ah, it is much more elegant!

Thanks.



--
Best Regards
Masahiro Yamada

2023-09-25 14:37:46

by Masahiro Yamada

[permalink] [raw]
Subject: Re: [PATCH] modpost: Optimize symbol search from linear to binary search

On Sun, Sep 24, 2023 at 2:21 AM Jack Brennen <[email protected]> wrote:
>
> Mainly just clarifying that we're willing to change the behavior in corner
> cases?


Yes.

I do not see a sensible reason in the previous quirk
(take the first for the exact match, but the last for others).




> The existing behavior has one set of quirks about which symtab entry
> is returned, and your proposed behavior has another different set of quirks.
>
> It's probably OK to break builds which have an undocumented assumption
> about the order of symtab entries; personally, I'd rather not risk that myself,
> but if somebody with more experience is willing to back that decision, I'm
> OK with it.
>
> Also, there's an alternative approach that uses bsearch from the standard
> library and a common comparison function between qsort and bsearch. I
> considered this alternative earlier; maybe you would prefer it because it
> eliminates having to reimplement a binary search algorithm.
> I chose not to do it this way because of trying to duplicate the quirks.
> If no duplication of the quirks is needed, this becomes easier.
>
> The idea for that is to build a sorted array of syminfo that look like this:
>
> (section_index, addr_lo, addr_hi, sym_lo, sym_hi)
>
> What this represents is the situation where for any lookup in the
> range from (section_index, addr_lo) to (section_index, addr_hi)
> inclusive, the nearest symbol will be either sym_lo or sym_hi.
> There are four different meanings for (sym_lo, sym_hi):
>
> (sym_lo = 0)
> This is a placeholder for a duplicated address, and it cannot
> compare equal to anything. After we sort the array, we set all
> of the duplicated addresses except for the last one to sym_lo = 0.
>
> (sym_lo = MAX_SYM)
> This is used to designate an address being looked up. When this
> is seen, it compares equal to any other syminfo that has an
> overlapping range.
>
> (sym_lo != 0, sym_hi = 0)
> This represents the last range in a section. There's no following
> address that could match. Should also have addr_hi = MAX.
>
> (sym_lo != 0, sym_hi != 0)
> This represents a range in a section that's not the last range.
> sym_hi may be usable to satisfy the lookup, but only if it's
> closer than sym_lo and if allow_negative is true. Note that
> the address of sym_hi will be addr_hi+1, so we don't need any
> additional code to fetch that address.
>
> Here's a sample comparison function:
> int syminfo_compare(const void *a, const void *b) {
> const struct syminfo *sym1 = a;
> const struct syminfo *sym2 = b;
>
> if (sym1->section_index > sym2->section_index)
> return 1;
> if (sym1->section_index < sym2->section_index)
> return -1;
> if ((sym1->sym_lo == MAX_SYM && sym2->sym_lo != 0) ||
> (sym2->sym_lo == MAX_SYM && sym1->sym_lo != 0)) {
> /* Overlap is equality - test for it */
> if (sym1->addr_hi >= sym2->addr_lo &&
> sym2->addr_hi >= sym1->addr_lo) {
> return 0;
> }
> /* No overlap, fall through */
> }
> if (sym1->addr_lo > sym2->addr_lo)
> return 1;
> if (sym1->addr_lo < sym2->addr_lo)
> return -1;
> /* Note that if we are comparing a lookup (MAX_SYM) with
> a placeholder (0), the lookup always compares greater.
> This causes us to search to the "right" of the placeholder
> for a match, which is what we want. */
> if (sym1->sym_lo > sym2->sym_lo)
> return 1;
> if (sym1->sym_lo < sym2->sym_lo)
> return -1;
> return 0;
> }
>
> So this greatly simplifies the back-end searching. It's a bsearch()
> which gives you either a miss, or one or two alternatives for the result.
> On the front end, you have an extra step after sorting which massages the
> search array into the right configuration. There's actually not much code
> needed to do that.
>
> Is that of interest? The leveraging of bsearch() in that way?


I am curious about how to use bsearch() if the whole implementation
is short, but I could not understand that logic fully.




> A few other responses below...
>
> On Sat, Sep 23, 2023 at 4:50 AM Masahiro Yamada <[email protected]> wrote:
> > > +/* Populate the search array that we just allocated.
> > > + * Be slightly paranoid here. If the ELF file changes during processing,
> >
> > I could not understand. In which case, the ELF file changes?
> >
> > modpost loads the entire file to memory first..
> >
> > In which scenario, the memory content changes?
> >
>
> modpost doesn't load the entire file, it uses mmap to map it into the
> address space.The easiest way to imagine this being an issue is that some
> buggy parallelization happens and something is modifying vmlinux.o while
> modpost is processing it. Of course it's probably acceptable to say,
> "Don't do that!"
>
> There are two alternatives here: actually read in the entire file, which
> is certainly suboptimal, or just live with the fact that mmap makes no
> guarantees about whether changes in the file are reflected in the memory map.

You are right. I missed the fact that grab_file() used mmap.

I am OK with the careful check.

Yet another possible alternative is, as Nick suggested, cut the first loop.
Use realloc() to grow the array as needed, but this is also suboptimal
if memory copy occurs.




> > > + /* A bit of paranoia; make sure that the end sentinel's address is
> > > + * different than its predecessor. Not doing this could cause
> > > + * possible undefined behavior if anybody ever inserts a symbol
> > > + * with section_index and addr both at their max values.
> >
> > I could not understand this comment.
> >
> > If section_index and addr both at their max values at [table_size - 2],
> > ->table[table_size - 2].addr + 1 wraps to zero.
> >
> > The table is not sorted any longer?
> >
>
> That's correct, the table would not be sorted any longer. But by design,
> we never do a relational comparison against the end sentinel. But if
> we rework the search, either by your suggestion or by using bsearch(),
> this sentinel goes away.

Understood.
With mid = lo + (hi - lo) / 2,
the last sentinel is never set as mid.



--
Best Regards
Masahiro Yamada

2023-09-25 23:20:21

by Jack Brennen

[permalink] [raw]
Subject: [PATCH] modpost: Optimize symbol search from linear to binary search

Modify modpost to use binary search for converting addresses back
into symbol references. Previously it used linear search.

This change saves a few seconds of wall time for defconfig builds,
but can save several minutes on allyesconfigs.

Before:
$ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
$ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o
198.38user 1.27system 3:19.71elapsed

After:
$ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
$ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o
12.02user 0.87system 0:12.91elapsed

Signed-off-by: Jack Brennen <[email protected]>
Tested-by: Nick Desaulniers <[email protected]>
---
scripts/mod/Makefile | 4 +-
scripts/mod/modpost.c | 70 ++------------
scripts/mod/modpost.h | 25 +++++
scripts/mod/symsearch.c | 198 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 231 insertions(+), 66 deletions(-)
create mode 100644 scripts/mod/symsearch.c

diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index c9e38ad937fd..3c54125eb373 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
hostprogs-always-y += modpost mk_elfconfig
always-y += empty.o

-modpost-objs := modpost.o file2alias.o sumversion.o
+modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o

devicetable-offsets-file := devicetable-offsets.h

@@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s

# dependencies on generated files need to be listed explicitly

-$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
+$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
$(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)

quiet_cmd_elfconfig = MKELF $@
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index de499dce5265..79ee46f0e683 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -22,7 +22,6 @@
#include <errno.h>
#include "modpost.h"
#include "../../include/linux/license.h"
-#include "../../include/linux/module_symbol.h"

static bool module_enabled;
/* Are we using CONFIG_MODVERSIONS? */
@@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename)
*p = TO_NATIVE(*p);
}

+ symsearch_init(info);
+
return 1;
}

static void parse_elf_finish(struct elf_info *info)
{
+ symsearch_finish(info);
release_file(info->hdr, info->size);
}

@@ -1039,71 +1041,10 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,
return 1;
}

-/*
- * If there's no name there, ignore it; likewise, ignore it if it's
- * one of the magic symbols emitted used by current tools.
- *
- * Otherwise if find_symbols_between() returns those symbols, they'll
- * fail the whitelist tests and cause lots of false alarms ... fixable
- * only by merging __exit and __init sections into __text, bloating
- * the kernel (which is especially evil on embedded platforms).
- */
-static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
-{
- const char *name = elf->strtab + sym->st_name;
-
- if (!name || !strlen(name))
- return 0;
- return !is_mapping_symbol(name);
-}
-
-/* Look up the nearest symbol based on the section and the address */
-static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
- unsigned int secndx, bool allow_negative,
- Elf_Addr min_distance)
-{
- Elf_Sym *sym;
- Elf_Sym *near = NULL;
- Elf_Addr sym_addr, distance;
- bool is_arm = (elf->hdr->e_machine == EM_ARM);
-
- for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
- if (get_secindex(elf, sym) != secndx)
- continue;
- if (!is_valid_name(elf, sym))
- continue;
-
- sym_addr = sym->st_value;
-
- /*
- * For ARM Thumb instruction, the bit 0 of st_value is set
- * if the symbol is STT_FUNC type. Mask it to get the address.
- */
- if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
- sym_addr &= ~1;
-
- if (addr >= sym_addr)
- distance = addr - sym_addr;
- else if (allow_negative)
- distance = sym_addr - addr;
- else
- continue;
-
- if (distance <= min_distance) {
- min_distance = distance;
- near = sym;
- }
-
- if (min_distance == 0)
- break;
- }
- return near;
-}
-
static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
unsigned int secndx)
{
- return find_nearest_sym(elf, addr, secndx, false, ~0);
+ return symsearch_find_nearest(elf, addr, secndx, false, ~0);
}

static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
@@ -1116,7 +1057,8 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
* Strive to find a better symbol name, but the resulting name may not
* match the symbol referenced in the original code.
*/
- return find_nearest_sym(elf, addr, get_secindex(elf, sym), true, 20);
+ return symsearch_find_nearest(elf, addr, get_secindex(elf, sym),
+ true, 20);
}

static bool is_executable_section(struct elf_info *elf, unsigned int secndx)
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 5f94c2c9f2d9..6413f26fcb6b 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -10,6 +10,7 @@
#include <fcntl.h>
#include <unistd.h>
#include <elf.h>
+#include "../../include/linux/module_symbol.h"

#include "list.h"
#include "elfconfig.h"
@@ -128,6 +129,8 @@ struct elf_info {
* take shndx from symtab_shndx_start[N] instead */
Elf32_Word *symtab_shndx_start;
Elf32_Word *symtab_shndx_stop;
+
+ struct symsearch *symsearch;
};

/* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
@@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info,
return index;
}

+/*
+ * If there's no name there, ignore it; likewise, ignore it if it's
+ * one of the magic symbols emitted used by current tools.
+ *
+ * Internal symbols created by tools should be ignored by modpost.
+ */
+static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
+{
+ const char *name = elf->strtab + sym->st_name;
+
+ if (!name || !strlen(name))
+ return 0;
+ return !is_mapping_symbol(name);
+}
+
+/* symsearch.c */
+void symsearch_init(struct elf_info *elf);
+void symsearch_finish(struct elf_info *elf);
+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
+ unsigned int secndx, bool allow_negative,
+ Elf_Addr min_distance);
+
/* file2alias.c */
void handle_moddevtable(struct module *mod, struct elf_info *info,
Elf_Sym *sym, const char *symname);
diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
new file mode 100644
index 000000000000..fbe3b1ff3c34
--- /dev/null
+++ b/scripts/mod/symsearch.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Helper functions for finding the symbol in an ELF which is "nearest"
+ * to a given address.
+ */
+
+#include "modpost.h"
+
+struct syminfo {
+ unsigned int symbol_index;
+ unsigned int section_index;
+ Elf_Addr addr;
+};
+
+/*
+ * Container used to hold an entire binary search table.
+ * Entries in table are ascending, sorted first by section_index,
+ * then by addr, and last by symbol_index. The sorting by
+ * symbol_index is used to ensure predictable behavior when
+ * multiple symbols are present with the same address; all
+ * symbols past the first are effectively ignored, by eliding
+ * them in symsearch_fixup().
+ */
+struct symsearch {
+ size_t table_size;
+ struct syminfo table[];
+};
+
+static int syminfo_compare(const void *s1, const void *s2)
+{
+ const struct syminfo *sym1 = s1;
+ const struct syminfo *sym2 = s2;
+
+ if (sym1->section_index > sym2->section_index)
+ return 1;
+ if (sym1->section_index < sym2->section_index)
+ return -1;
+ if (sym1->addr > sym2->addr)
+ return 1;
+ if (sym1->addr < sym2->addr)
+ return -1;
+ if (sym1->symbol_index > sym2->symbol_index)
+ return 1;
+ if (sym1->symbol_index < sym2->symbol_index)
+ return -1;
+ return 0;
+}
+
+static size_t symbol_count(struct elf_info *elf)
+{
+ size_t result = 0;
+
+ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+ if (is_valid_name(elf, sym))
+ result++;
+ }
+ return result;
+}
+
+/*
+ * Populate the search array that we just allocated.
+ * Be slightly paranoid here. The ELF file is mmap'd and could
+ * conceivably change between symbol_count() and symsearch_populate().
+ * If we notice any difference, bail out rather than potentially
+ * propagating errors or crashing.
+ */
+static void symsearch_populate(struct elf_info *elf,
+ struct syminfo *table,
+ size_t table_size)
+{
+ bool is_arm = (elf->hdr->e_machine == EM_ARM);
+
+ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+ if (is_valid_name(elf, sym)) {
+ if (table_size-- == 0)
+ fatal("%s: size mismatch\n", __func__);
+ table->symbol_index = sym - elf->symtab_start;
+ table->section_index = get_secindex(elf, sym);
+ table->addr = sym->st_value;
+
+ /*
+ * For ARM Thumb instruction, the bit 0 of st_value is
+ * set if the symbol is STT_FUNC type. Mask it to get
+ * the address.
+ */
+ if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
+ table->addr &= ~1;
+
+ table++;
+ }
+ }
+
+ if (table_size != 0)
+ fatal("%s: size mismatch\n", __func__);
+}
+
+/*
+ * Do any fixups on the table after sorting.
+ * For now, this just finds adjacent entries which have
+ * the same section_index and addr, and it propagates
+ * the first symbol_index over the subsequent entries,
+ * so that only one symbol_index is seen for any given
+ * section_index and addr. This ensures that whether
+ * we're looking at an address from "above" or "below"
+ * that we see the same symbol_index.
+ * This does leave some duplicate entries in the table;
+ * in practice, these are a small fraction of the
+ * total number of entries, and they are harmless to
+ * the binary search algorithm other than a few occasional
+ * unnecessary comparisons.
+ */
+static void symsearch_fixup(struct syminfo *table, size_t table_size)
+{
+ /* Don't look at index 0, it will never change. */
+ for (size_t i = 1; i < table_size; i++) {
+ if (table[i].addr == table[i-1].addr &&
+ table[i].section_index == table[i-1].section_index) {
+ table[i].symbol_index = table[i-1].symbol_index;
+ }
+ }
+}
+
+void symsearch_init(struct elf_info *elf)
+{
+ size_t table_size = symbol_count(elf);
+
+ elf->symsearch = NOFAIL(malloc(
+ sizeof(struct symsearch) +
+ sizeof(struct syminfo) * table_size));
+ elf->symsearch->table_size = table_size;
+
+ symsearch_populate(elf, elf->symsearch->table, table_size);
+ qsort(elf->symsearch->table, table_size,
+ sizeof(struct syminfo), syminfo_compare);
+
+ symsearch_fixup(elf->symsearch->table, table_size);
+}
+
+void symsearch_finish(struct elf_info *elf)
+{
+ free(elf->symsearch);
+ elf->symsearch = NULL;
+}
+
+/*
+ * Find the syminfo which is in secndx and "nearest" to addr.
+ * allow_negative: allow returning a symbol whose address is > addr.
+ * min_distance: ignore symbols which are further away than this.
+ *
+ * Returns a pointer into the symbol table for success.
+ * Returns NULL if no legal symbol is found within the requested range.
+ */
+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
+ unsigned int secndx, bool allow_negative,
+ Elf_Addr min_distance)
+{
+ size_t hi = elf->symsearch->table_size;
+ size_t lo = 0;
+ struct syminfo *table = elf->symsearch->table;
+ struct syminfo target;
+ target.addr = addr;
+ target.section_index = secndx;
+ target.symbol_index = ~0; /* compares greater than any actual index */
+ while (hi > lo) {
+ size_t mid = lo + (hi-lo)/2; /* Avoids potential overflow */
+ if (syminfo_compare(&table[mid], &target) > 0) {
+ hi = mid;
+ } else {
+ lo = mid+1;
+ }
+ }
+
+ /*
+ * table[hi], if it exists, is the first entry in the array which
+ * lies beyond target. table[hi-1], if it exists, is the last
+ * entry in the array which comes before target, including the
+ * case where it perfectly matches the section and the address.
+ *
+ * Note -- if the address we're looking up falls perfectly
+ * in the middle of two symbols, this is written to always
+ * prefer the symbol with the lower address.
+ */
+ Elf_Sym *result = NULL;
+ if (allow_negative &&
+ hi < elf->symsearch->table_size &&
+ table[hi].section_index == secndx &&
+ table[hi].addr - addr <= min_distance) {
+ min_distance = table[hi].addr - addr;
+ result = &elf->symtab_start[table[hi].symbol_index];
+ }
+ if (hi > 0 &&
+ table[hi-1].section_index == secndx &&
+ addr - table[hi-1].addr <= min_distance) {
+ result = &elf->symtab_start[table[hi-1].symbol_index];
+ }
+ return result;
+}
--
2.42.0.515.g380fc7ccd1-goog

2023-09-26 03:01:33

by Jack Brennen

[permalink] [raw]
Subject: [PATCH] modpost: Optimize symbol search from linear to binary search

Modify modpost to use binary search for converting addresses back
into symbol references. Previously it used linear search.

This change saves a few seconds of wall time for defconfig builds,
but can save several minutes on allyesconfigs.

Before:
$ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
$ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o
198.38user 1.27system 3:19.71elapsed

After:
$ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
$ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o
12.02user 0.87system 0:12.91elapsed

Signed-off-by: Jack Brennen <[email protected]>
Tested-by: Nick Desaulniers <[email protected]>
---
scripts/mod/Makefile | 4 +-
scripts/mod/modpost.c | 70 ++------------
scripts/mod/modpost.h | 25 +++++
scripts/mod/symsearch.c | 200 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 233 insertions(+), 66 deletions(-)
create mode 100644 scripts/mod/symsearch.c

diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index c9e38ad937fd..3c54125eb373 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
hostprogs-always-y += modpost mk_elfconfig
always-y += empty.o

-modpost-objs := modpost.o file2alias.o sumversion.o
+modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o

devicetable-offsets-file := devicetable-offsets.h

@@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s

# dependencies on generated files need to be listed explicitly

-$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
+$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
$(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)

quiet_cmd_elfconfig = MKELF $@
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index de499dce5265..71ddce212538 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -22,7 +22,6 @@
#include <errno.h>
#include "modpost.h"
#include "../../include/linux/license.h"
-#include "../../include/linux/module_symbol.h"

static bool module_enabled;
/* Are we using CONFIG_MODVERSIONS? */
@@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename)
*p = TO_NATIVE(*p);
}

+ symsearch_init(info);
+
return 1;
}

static void parse_elf_finish(struct elf_info *info)
{
+ symsearch_finish(info);
release_file(info->hdr, info->size);
}

@@ -1039,71 +1041,10 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,
return 1;
}

-/*
- * If there's no name there, ignore it; likewise, ignore it if it's
- * one of the magic symbols emitted used by current tools.
- *
- * Otherwise if find_symbols_between() returns those symbols, they'll
- * fail the whitelist tests and cause lots of false alarms ... fixable
- * only by merging __exit and __init sections into __text, bloating
- * the kernel (which is especially evil on embedded platforms).
- */
-static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
-{
- const char *name = elf->strtab + sym->st_name;
-
- if (!name || !strlen(name))
- return 0;
- return !is_mapping_symbol(name);
-}
-
-/* Look up the nearest symbol based on the section and the address */
-static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
- unsigned int secndx, bool allow_negative,
- Elf_Addr min_distance)
-{
- Elf_Sym *sym;
- Elf_Sym *near = NULL;
- Elf_Addr sym_addr, distance;
- bool is_arm = (elf->hdr->e_machine == EM_ARM);
-
- for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
- if (get_secindex(elf, sym) != secndx)
- continue;
- if (!is_valid_name(elf, sym))
- continue;
-
- sym_addr = sym->st_value;
-
- /*
- * For ARM Thumb instruction, the bit 0 of st_value is set
- * if the symbol is STT_FUNC type. Mask it to get the address.
- */
- if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
- sym_addr &= ~1;
-
- if (addr >= sym_addr)
- distance = addr - sym_addr;
- else if (allow_negative)
- distance = sym_addr - addr;
- else
- continue;
-
- if (distance <= min_distance) {
- min_distance = distance;
- near = sym;
- }
-
- if (min_distance == 0)
- break;
- }
- return near;
-}
-
static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
unsigned int secndx)
{
- return find_nearest_sym(elf, addr, secndx, false, ~0);
+ return symsearch_find_nearest(elf, addr, secndx, false, ~0);
}

static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
@@ -1116,7 +1057,8 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
* Strive to find a better symbol name, but the resulting name may not
* match the symbol referenced in the original code.
*/
- return find_nearest_sym(elf, addr, get_secindex(elf, sym), true, 20);
+ return symsearch_find_nearest(elf, addr, get_secindex(elf, sym),
+ true, 20);
}

static bool is_executable_section(struct elf_info *elf, unsigned int secndx)
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 5f94c2c9f2d9..6413f26fcb6b 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -10,6 +10,7 @@
#include <fcntl.h>
#include <unistd.h>
#include <elf.h>
+#include "../../include/linux/module_symbol.h"

#include "list.h"
#include "elfconfig.h"
@@ -128,6 +129,8 @@ struct elf_info {
* take shndx from symtab_shndx_start[N] instead */
Elf32_Word *symtab_shndx_start;
Elf32_Word *symtab_shndx_stop;
+
+ struct symsearch *symsearch;
};

/* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
@@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info,
return index;
}

+/*
+ * If there's no name there, ignore it; likewise, ignore it if it's
+ * one of the magic symbols emitted used by current tools.
+ *
+ * Internal symbols created by tools should be ignored by modpost.
+ */
+static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
+{
+ const char *name = elf->strtab + sym->st_name;
+
+ if (!name || !strlen(name))
+ return 0;
+ return !is_mapping_symbol(name);
+}
+
+/* symsearch.c */
+void symsearch_init(struct elf_info *elf);
+void symsearch_finish(struct elf_info *elf);
+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
+ unsigned int secndx, bool allow_negative,
+ Elf_Addr min_distance);
+
/* file2alias.c */
void handle_moddevtable(struct module *mod, struct elf_info *info,
Elf_Sym *sym, const char *symname);
diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
new file mode 100644
index 000000000000..174e88686418
--- /dev/null
+++ b/scripts/mod/symsearch.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Helper functions for finding the symbol in an ELF which is "nearest"
+ * to a given address.
+ */
+
+#include "modpost.h"
+
+struct syminfo {
+ unsigned int symbol_index;
+ unsigned int section_index;
+ Elf_Addr addr;
+};
+
+/*
+ * Container used to hold an entire binary search table.
+ * Entries in table are ascending, sorted first by section_index,
+ * then by addr, and last by symbol_index. The sorting by
+ * symbol_index is used to ensure predictable behavior when
+ * multiple symbols are present with the same address; all
+ * symbols past the first are effectively ignored, by eliding
+ * them in symsearch_fixup().
+ */
+struct symsearch {
+ size_t table_size;
+ struct syminfo table[];
+};
+
+static int syminfo_compare(const void *s1, const void *s2)
+{
+ const struct syminfo *sym1 = s1;
+ const struct syminfo *sym2 = s2;
+
+ if (sym1->section_index > sym2->section_index)
+ return 1;
+ if (sym1->section_index < sym2->section_index)
+ return -1;
+ if (sym1->addr > sym2->addr)
+ return 1;
+ if (sym1->addr < sym2->addr)
+ return -1;
+ if (sym1->symbol_index > sym2->symbol_index)
+ return 1;
+ if (sym1->symbol_index < sym2->symbol_index)
+ return -1;
+ return 0;
+}
+
+static size_t symbol_count(struct elf_info *elf)
+{
+ size_t result = 0;
+
+ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+ if (is_valid_name(elf, sym))
+ result++;
+ }
+ return result;
+}
+
+/*
+ * Populate the search array that we just allocated.
+ * Be slightly paranoid here. The ELF file is mmap'd and could
+ * conceivably change between symbol_count() and symsearch_populate().
+ * If we notice any difference, bail out rather than potentially
+ * propagating errors or crashing.
+ */
+static void symsearch_populate(struct elf_info *elf,
+ struct syminfo *table,
+ size_t table_size)
+{
+ bool is_arm = (elf->hdr->e_machine == EM_ARM);
+
+ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+ if (is_valid_name(elf, sym)) {
+ if (table_size-- == 0)
+ fatal("%s: size mismatch\n", __func__);
+ table->symbol_index = sym - elf->symtab_start;
+ table->section_index = get_secindex(elf, sym);
+ table->addr = sym->st_value;
+
+ /*
+ * For ARM Thumb instruction, the bit 0 of st_value is
+ * set if the symbol is STT_FUNC type. Mask it to get
+ * the address.
+ */
+ if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
+ table->addr &= ~1;
+
+ table++;
+ }
+ }
+
+ if (table_size != 0)
+ fatal("%s: size mismatch\n", __func__);
+}
+
+/*
+ * Do any fixups on the table after sorting.
+ * For now, this just finds adjacent entries which have
+ * the same section_index and addr, and it propagates
+ * the first symbol_index over the subsequent entries,
+ * so that only one symbol_index is seen for any given
+ * section_index and addr. This ensures that whether
+ * we're looking at an address from "above" or "below"
+ * that we see the same symbol_index.
+ * This does leave some duplicate entries in the table;
+ * in practice, these are a small fraction of the
+ * total number of entries, and they are harmless to
+ * the binary search algorithm other than a few occasional
+ * unnecessary comparisons.
+ */
+static void symsearch_fixup(struct syminfo *table, size_t table_size)
+{
+ /* Don't look at index 0, it will never change. */
+ for (size_t i = 1; i < table_size; i++) {
+ if (table[i].addr == table[i-1].addr &&
+ table[i].section_index == table[i-1].section_index) {
+ table[i].symbol_index = table[i-1].symbol_index;
+ }
+ }
+}
+
+void symsearch_init(struct elf_info *elf)
+{
+ size_t table_size = symbol_count(elf);
+
+ elf->symsearch = NOFAIL(malloc(
+ sizeof(struct symsearch) +
+ sizeof(struct syminfo) * table_size));
+ elf->symsearch->table_size = table_size;
+
+ symsearch_populate(elf, elf->symsearch->table, table_size);
+ qsort(elf->symsearch->table, table_size,
+ sizeof(struct syminfo), syminfo_compare);
+
+ symsearch_fixup(elf->symsearch->table, table_size);
+}
+
+void symsearch_finish(struct elf_info *elf)
+{
+ free(elf->symsearch);
+ elf->symsearch = NULL;
+}
+
+/*
+ * Find the syminfo which is in secndx and "nearest" to addr.
+ * allow_negative: allow returning a symbol whose address is > addr.
+ * min_distance: ignore symbols which are further away than this.
+ *
+ * Returns a pointer into the symbol table for success.
+ * Returns NULL if no legal symbol is found within the requested range.
+ */
+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
+ unsigned int secndx, bool allow_negative,
+ Elf_Addr min_distance)
+{
+ size_t hi = elf->symsearch->table_size;
+ size_t lo = 0;
+ struct syminfo *table = elf->symsearch->table;
+ struct syminfo target;
+
+ target.addr = addr;
+ target.section_index = secndx;
+ target.symbol_index = ~0; /* compares greater than any actual index */
+ while (hi > lo) {
+ size_t mid = lo + (hi-lo)/2; /* Avoids potential overflow */
+
+ if (syminfo_compare(&table[mid], &target) > 0)
+ hi = mid;
+ else
+ lo = mid+1;
+ }
+
+ /*
+ * table[hi], if it exists, is the first entry in the array which
+ * lies beyond target. table[hi-1], if it exists, is the last
+ * entry in the array which comes before target, including the
+ * case where it perfectly matches the section and the address.
+ *
+ * Note -- if the address we're looking up falls perfectly
+ * in the middle of two symbols, this is written to always
+ * prefer the symbol with the lower address.
+ */
+ Elf_Sym *result = NULL;
+
+ if (allow_negative &&
+ hi < elf->symsearch->table_size &&
+ table[hi].section_index == secndx &&
+ table[hi].addr - addr <= min_distance) {
+ min_distance = table[hi].addr - addr;
+ result = &elf->symtab_start[table[hi].symbol_index];
+ }
+ if (hi > 0 &&
+ table[hi-1].section_index == secndx &&
+ addr - table[hi-1].addr <= min_distance) {
+ result = &elf->symtab_start[table[hi-1].symbol_index];
+ }
+ return result;
+}
--
2.42.0.515.g380fc7ccd1-goog

2023-09-26 06:52:21

by Masahiro Yamada

[permalink] [raw]
Subject: Re: [PATCH] modpost: Optimize symbol search from linear to binary search

On Tue, Sep 26, 2023 at 5:59 AM Jack Brennen <[email protected]> wrote:

> +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
> + unsigned int secndx, bool allow_negative,
> + Elf_Addr min_distance)
> +{
> + size_t hi = elf->symsearch->table_size;
> + size_t lo = 0;
> + struct syminfo *table = elf->symsearch->table;
> + struct syminfo target;
> +
> + target.addr = addr;
> + target.section_index = secndx;
> + target.symbol_index = ~0; /* compares greater than any actual index */
> + while (hi > lo) {
> + size_t mid = lo + (hi-lo)/2; /* Avoids potential overflow */
> +
> + if (syminfo_compare(&table[mid], &target) > 0)
> + hi = mid;
> + else
> + lo = mid+1;


My preference is "low = mid + 1" over "low = mid+1"


Documentation/process/coding-style.rst suggests spaces
around binary operators.

"
Use one space around (on each side of) most binary and ternary operators,
such as any of these::

= + - < > * / % | & ^ <= >= == != ? :
"




I can see the corresponding line in the checkpatch tool:

https://github.com/torvalds/linux/blob/v6.5/scripts/checkpatch.pl#L5330


I wonder why the checkpatch did not detect it.

Maybe, Joe Perches may know the reason.







My previous question about the type inconsistency
was not addressed.

syminfo::symbol_index is unsigned int
symsearch::table_size is size_t


If we encountered a situation where size_t is
really needed for the table_size
(that is, the number of entries does not fit in 32-bit),
syminfo::symbol_index would wrap around.

So, there is no point to use size_t for one,
and (unsigned int) for the other.


In my opinion, (unsigned int) would be enough to count
numbers or index here.

size_t might be 32-bit or 64-bit depending
on the build host architecture.
That is not related to the target architecture
of ELF being processed.




--
Best Regards
Masahiro Yamada

2023-09-26 07:28:13

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] modpost: Optimize symbol search from linear to binary search

On Tue, 2023-09-26 at 15:46 +0900, Masahiro Yamada wrote:
> On Tue, Sep 26, 2023 at 5:59 AM Jack Brennen <[email protected]> wrote:
>
> > +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
> > + unsigned int secndx, bool allow_negative,
> > + Elf_Addr min_distance)
> > +{
> > + size_t hi = elf->symsearch->table_size;
> > + size_t lo = 0;
> > + struct syminfo *table = elf->symsearch->table;
> > + struct syminfo target;
> > +
> > + target.addr = addr;
> > + target.section_index = secndx;
> > + target.symbol_index = ~0; /* compares greater than any actual index */
> > + while (hi > lo) {
> > + size_t mid = lo + (hi-lo)/2; /* Avoids potential overflow */
> > +
> > + if (syminfo_compare(&table[mid], &target) > 0)
> > + hi = mid;
> > + else
> > + lo = mid+1;
>
> My preference is "low = mid + 1" over "low = mid+1"
>
> Documentation/process/coding-style.rst suggests spaces
> around binary operators.
>
> "
> Use one space around (on each side of) most binary and ternary operators,
> such as any of these::
>
> = + - < > * / % | & ^ <= >= == != ? :
> "
>
> I can see the corresponding line in the checkpatch tool:
>
> https://github.com/torvalds/linux/blob/v6.5/scripts/checkpatch.pl#L5330
>
>
> I wonder why the checkpatch did not detect it.
>
> Maybe, Joe Perches may know the reason.

checkpatch requires --strict to emit that message.

2023-09-26 10:06:41

by Masahiro Yamada

[permalink] [raw]
Subject: Re: [PATCH] modpost: Optimize symbol search from linear to binary search

On Tue, Sep 26, 2023 at 4:24 PM Joe Perches <[email protected]> wrote:
>
> On Tue, 2023-09-26 at 15:46 +0900, Masahiro Yamada wrote:
> > On Tue, Sep 26, 2023 at 5:59 AM Jack Brennen <[email protected]> wrote:
> >
> > > +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
> > > + unsigned int secndx, bool allow_negative,
> > > + Elf_Addr min_distance)
> > > +{
> > > + size_t hi = elf->symsearch->table_size;
> > > + size_t lo = 0;
> > > + struct syminfo *table = elf->symsearch->table;
> > > + struct syminfo target;
> > > +
> > > + target.addr = addr;
> > > + target.section_index = secndx;
> > > + target.symbol_index = ~0; /* compares greater than any actual index */
> > > + while (hi > lo) {
> > > + size_t mid = lo + (hi-lo)/2; /* Avoids potential overflow */
> > > +
> > > + if (syminfo_compare(&table[mid], &target) > 0)
> > > + hi = mid;
> > > + else
> > > + lo = mid+1;
> >
> > My preference is "low = mid + 1" over "low = mid+1"
> >
> > Documentation/process/coding-style.rst suggests spaces
> > around binary operators.
> >
> > "
> > Use one space around (on each side of) most binary and ternary operators,
> > such as any of these::
> >
> > = + - < > * / % | & ^ <= >= == != ? :
> > "
> >
> > I can see the corresponding line in the checkpatch tool:
> >
> > https://github.com/torvalds/linux/blob/v6.5/scripts/checkpatch.pl#L5330
> >
> >
> > I wonder why the checkpatch did not detect it.
> >
> > Maybe, Joe Perches may know the reason.
>
> checkpatch requires --strict to emit that message.
>




Ah, now I see it. I learned a new thing today.



CHECK: spaces preferred around that '+' (ctx:VxV)
#466: FILE: scripts/mod/symsearch.c:172:
+ lo = mid+1;
^







--
Best Regards
Masahiro Yamada

2023-09-26 17:20:32

by Jack Brennen

[permalink] [raw]
Subject: [PATCH] modpost: Optimize symbol search from linear to binary search

Modify modpost to use binary search for converting addresses back
into symbol references. Previously it used linear search.

This change saves a few seconds of wall time for defconfig builds,
but can save several minutes on allyesconfigs.

Before:
$ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
$ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o
198.38user 1.27system 3:19.71elapsed

After:
$ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
$ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o
11.91user 0.85system 0:12.78elapsed

Signed-off-by: Jack Brennen <[email protected]>
Tested-by: Nick Desaulniers <[email protected]>
---
scripts/mod/Makefile | 4 +-
scripts/mod/modpost.c | 70 ++------------
scripts/mod/modpost.h | 25 +++++
scripts/mod/symsearch.c | 199 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 232 insertions(+), 66 deletions(-)
create mode 100644 scripts/mod/symsearch.c

diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index c9e38ad937fd..3c54125eb373 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
hostprogs-always-y += modpost mk_elfconfig
always-y += empty.o

-modpost-objs := modpost.o file2alias.o sumversion.o
+modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o

devicetable-offsets-file := devicetable-offsets.h

@@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s

# dependencies on generated files need to be listed explicitly

-$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
+$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
$(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)

quiet_cmd_elfconfig = MKELF $@
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index de499dce5265..71ddce212538 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -22,7 +22,6 @@
#include <errno.h>
#include "modpost.h"
#include "../../include/linux/license.h"
-#include "../../include/linux/module_symbol.h"

static bool module_enabled;
/* Are we using CONFIG_MODVERSIONS? */
@@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename)
*p = TO_NATIVE(*p);
}

+ symsearch_init(info);
+
return 1;
}

static void parse_elf_finish(struct elf_info *info)
{
+ symsearch_finish(info);
release_file(info->hdr, info->size);
}

@@ -1039,71 +1041,10 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,
return 1;
}

-/*
- * If there's no name there, ignore it; likewise, ignore it if it's
- * one of the magic symbols emitted used by current tools.
- *
- * Otherwise if find_symbols_between() returns those symbols, they'll
- * fail the whitelist tests and cause lots of false alarms ... fixable
- * only by merging __exit and __init sections into __text, bloating
- * the kernel (which is especially evil on embedded platforms).
- */
-static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
-{
- const char *name = elf->strtab + sym->st_name;
-
- if (!name || !strlen(name))
- return 0;
- return !is_mapping_symbol(name);
-}
-
-/* Look up the nearest symbol based on the section and the address */
-static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
- unsigned int secndx, bool allow_negative,
- Elf_Addr min_distance)
-{
- Elf_Sym *sym;
- Elf_Sym *near = NULL;
- Elf_Addr sym_addr, distance;
- bool is_arm = (elf->hdr->e_machine == EM_ARM);
-
- for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
- if (get_secindex(elf, sym) != secndx)
- continue;
- if (!is_valid_name(elf, sym))
- continue;
-
- sym_addr = sym->st_value;
-
- /*
- * For ARM Thumb instruction, the bit 0 of st_value is set
- * if the symbol is STT_FUNC type. Mask it to get the address.
- */
- if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
- sym_addr &= ~1;
-
- if (addr >= sym_addr)
- distance = addr - sym_addr;
- else if (allow_negative)
- distance = sym_addr - addr;
- else
- continue;
-
- if (distance <= min_distance) {
- min_distance = distance;
- near = sym;
- }
-
- if (min_distance == 0)
- break;
- }
- return near;
-}
-
static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
unsigned int secndx)
{
- return find_nearest_sym(elf, addr, secndx, false, ~0);
+ return symsearch_find_nearest(elf, addr, secndx, false, ~0);
}

static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
@@ -1116,7 +1057,8 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
* Strive to find a better symbol name, but the resulting name may not
* match the symbol referenced in the original code.
*/
- return find_nearest_sym(elf, addr, get_secindex(elf, sym), true, 20);
+ return symsearch_find_nearest(elf, addr, get_secindex(elf, sym),
+ true, 20);
}

static bool is_executable_section(struct elf_info *elf, unsigned int secndx)
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 5f94c2c9f2d9..6413f26fcb6b 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -10,6 +10,7 @@
#include <fcntl.h>
#include <unistd.h>
#include <elf.h>
+#include "../../include/linux/module_symbol.h"

#include "list.h"
#include "elfconfig.h"
@@ -128,6 +129,8 @@ struct elf_info {
* take shndx from symtab_shndx_start[N] instead */
Elf32_Word *symtab_shndx_start;
Elf32_Word *symtab_shndx_stop;
+
+ struct symsearch *symsearch;
};

/* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
@@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info,
return index;
}

+/*
+ * If there's no name there, ignore it; likewise, ignore it if it's
+ * one of the magic symbols emitted used by current tools.
+ *
+ * Internal symbols created by tools should be ignored by modpost.
+ */
+static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
+{
+ const char *name = elf->strtab + sym->st_name;
+
+ if (!name || !strlen(name))
+ return 0;
+ return !is_mapping_symbol(name);
+}
+
+/* symsearch.c */
+void symsearch_init(struct elf_info *elf);
+void symsearch_finish(struct elf_info *elf);
+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
+ unsigned int secndx, bool allow_negative,
+ Elf_Addr min_distance);
+
/* file2alias.c */
void handle_moddevtable(struct module *mod, struct elf_info *info,
Elf_Sym *sym, const char *symname);
diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
new file mode 100644
index 000000000000..aa4ed51f9960
--- /dev/null
+++ b/scripts/mod/symsearch.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Helper functions for finding the symbol in an ELF which is "nearest"
+ * to a given address.
+ */
+
+#include "modpost.h"
+
+struct syminfo {
+ unsigned int symbol_index;
+ unsigned int section_index;
+ Elf_Addr addr;
+};
+
+/*
+ * Container used to hold an entire binary search table.
+ * Entries in table are ascending, sorted first by section_index,
+ * then by addr, and last by symbol_index. The sorting by
+ * symbol_index is used to ensure predictable behavior when
+ * multiple symbols are present with the same address; all
+ * symbols past the first are effectively ignored, by eliding
+ * them in symsearch_fixup().
+ */
+struct symsearch {
+ unsigned int table_size;
+ struct syminfo table[];
+};
+
+static int syminfo_compare(const void *s1, const void *s2)
+{
+ const struct syminfo *sym1 = s1;
+ const struct syminfo *sym2 = s2;
+
+ if (sym1->section_index > sym2->section_index)
+ return 1;
+ if (sym1->section_index < sym2->section_index)
+ return -1;
+ if (sym1->addr > sym2->addr)
+ return 1;
+ if (sym1->addr < sym2->addr)
+ return -1;
+ if (sym1->symbol_index > sym2->symbol_index)
+ return 1;
+ if (sym1->symbol_index < sym2->symbol_index)
+ return -1;
+ return 0;
+}
+
+static unsigned int symbol_count(struct elf_info *elf)
+{
+ unsigned int result = 0;
+
+ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+ if (is_valid_name(elf, sym))
+ result++;
+ }
+ return result;
+}
+
+/*
+ * Populate the search array that we just allocated.
+ * Be slightly paranoid here. The ELF file is mmap'd and could
+ * conceivably change between symbol_count() and symsearch_populate().
+ * If we notice any difference, bail out rather than potentially
+ * propagating errors or crashing.
+ */
+static void symsearch_populate(struct elf_info *elf,
+ struct syminfo *table,
+ unsigned int table_size)
+{
+ bool is_arm = (elf->hdr->e_machine == EM_ARM);
+
+ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+ if (is_valid_name(elf, sym)) {
+ if (table_size-- == 0)
+ fatal("%s: size mismatch\n", __func__);
+ table->symbol_index = sym - elf->symtab_start;
+ table->section_index = get_secindex(elf, sym);
+ table->addr = sym->st_value;
+
+ /*
+ * For ARM Thumb instruction, the bit 0 of st_value is
+ * set if the symbol is STT_FUNC type. Mask it to get
+ * the address.
+ */
+ if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
+ table->addr &= ~1;
+
+ table++;
+ }
+ }
+
+ if (table_size != 0)
+ fatal("%s: size mismatch\n", __func__);
+}
+
+/*
+ * Do any fixups on the table after sorting.
+ * For now, this just finds adjacent entries which have
+ * the same section_index and addr, and it propagates
+ * the first symbol_index over the subsequent entries,
+ * so that only one symbol_index is seen for any given
+ * section_index and addr. This ensures that whether
+ * we're looking at an address from "above" or "below"
+ * that we see the same symbol_index.
+ * This does leave some duplicate entries in the table;
+ * in practice, these are a small fraction of the
+ * total number of entries, and they are harmless to
+ * the binary search algorithm other than a few occasional
+ * unnecessary comparisons.
+ */
+static void symsearch_fixup(struct syminfo *table, unsigned int table_size)
+{
+ /* Don't look at index 0, it will never change. */
+ for (unsigned int i = 1; i < table_size; i++) {
+ if (table[i].addr == table[i - 1].addr &&
+ table[i].section_index == table[i - 1].section_index) {
+ table[i].symbol_index = table[i - 1].symbol_index;
+ }
+ }
+}
+
+void symsearch_init(struct elf_info *elf)
+{
+ unsigned int table_size = symbol_count(elf);
+
+ elf->symsearch = NOFAIL(malloc(sizeof(struct symsearch) +
+ sizeof(struct syminfo) * table_size));
+ elf->symsearch->table_size = table_size;
+
+ symsearch_populate(elf, elf->symsearch->table, table_size);
+ qsort(elf->symsearch->table, table_size,
+ sizeof(struct syminfo), syminfo_compare);
+
+ symsearch_fixup(elf->symsearch->table, table_size);
+}
+
+void symsearch_finish(struct elf_info *elf)
+{
+ free(elf->symsearch);
+ elf->symsearch = NULL;
+}
+
+/*
+ * Find the syminfo which is in secndx and "nearest" to addr.
+ * allow_negative: allow returning a symbol whose address is > addr.
+ * min_distance: ignore symbols which are further away than this.
+ *
+ * Returns a pointer into the symbol table for success.
+ * Returns NULL if no legal symbol is found within the requested range.
+ */
+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
+ unsigned int secndx, bool allow_negative,
+ Elf_Addr min_distance)
+{
+ unsigned int hi = elf->symsearch->table_size;
+ unsigned int lo = 0;
+ struct syminfo *table = elf->symsearch->table;
+ struct syminfo target;
+
+ target.addr = addr;
+ target.section_index = secndx;
+ target.symbol_index = ~0; /* compares greater than any actual index */
+ while (hi > lo) {
+ unsigned int mid = lo + (hi - lo) / 2; /* Avoids overflow */
+
+ if (syminfo_compare(&table[mid], &target) > 0)
+ hi = mid;
+ else
+ lo = mid + 1;
+ }
+
+ /*
+ * table[hi], if it exists, is the first entry in the array which
+ * lies beyond target. table[hi - 1], if it exists, is the last
+ * entry in the array which comes before target, including the
+ * case where it perfectly matches the section and the address.
+ *
+ * Note -- if the address we're looking up falls perfectly
+ * in the middle of two symbols, this is written to always
+ * prefer the symbol with the lower address.
+ */
+ Elf_Sym *result = NULL;
+
+ if (allow_negative &&
+ hi < elf->symsearch->table_size &&
+ table[hi].section_index == secndx &&
+ table[hi].addr - addr <= min_distance) {
+ min_distance = table[hi].addr - addr;
+ result = &elf->symtab_start[table[hi].symbol_index];
+ }
+ if (hi > 0 &&
+ table[hi - 1].section_index == secndx &&
+ addr - table[hi - 1].addr <= min_distance) {
+ result = &elf->symtab_start[table[hi - 1].symbol_index];
+ }
+ return result;
+}
--
2.42.0.515.g380fc7ccd1-goog

2023-10-03 11:36:25

by Masahiro Yamada

[permalink] [raw]
Subject: Re: [PATCH] modpost: Optimize symbol search from linear to binary search

On Tue, Sep 26, 2023 at 9:40 PM Jack Brennen <[email protected]> wrote:
>
> Modify modpost to use binary search for converting addresses back
> into symbol references. Previously it used linear search.
>
> This change saves a few seconds of wall time for defconfig builds,
> but can save several minutes on allyesconfigs.
>
> Before:
> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
> $ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o
> 198.38user 1.27system 3:19.71elapsed
>
> After:
> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
> $ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o
> 11.91user 0.85system 0:12.78elapsed
>
> Signed-off-by: Jack Brennen <[email protected]>
> Tested-by: Nick Desaulniers <[email protected]>



Applied to linux-kbuild.
Thanks.



--
Best Regards
Masahiro Yamada