2020-03-17 17:13:12

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH v2 17/19] objtool: Optimize !vmlinux.o again

When doing kbuild tests to see if the objtool changes affected those I
found that there was a measurable regression:

pre post

real 1m13.594 1m16.488s
user 34m58.246s 35m23.947s
sys 4m0.393s 4m27.312s

Perf showed that for small files the increased hash-table sizes were a
measurable difference. Since we already have -l "vmlinux" to
distinguish between the modes, make it also use a smaller portion of
the hash-tables.

This flips it into a small win:

real 1m14.143s
user 34m49.292s
sys 3m44.746s

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
tools/objtool/elf.c | 51 ++++++++++++++++++++++++++++++++++-----------------
tools/objtool/elf.h | 4 ++--
2 files changed, 36 insertions(+), 19 deletions(-)

--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -27,6 +27,22 @@ static inline u32 str_hash(const char *s
return jhash(str, strlen(str), 0);
}

+static inline int elf_hash_bits(void)
+{
+ return vmlinux ? 20 : 16;
+}
+
+#define elf_hash_add(hashtable, node, key) \
+ hlist_add_head(node, &hashtable[hash_min(key, elf_hash_bits())])
+
+static void elf_hash_init(struct hlist_head *table)
+{
+ __hash_init(table, 1U << elf_hash_bits());
+}
+
+#define elf_hash_for_each_possible(name, obj, member, key) \
+ hlist_for_each_entry(obj, &name[hash_min(key, elf_hash_bits())], member)
+
static void rb_add(struct rb_root *tree, struct rb_node *node,
int (*cmp)(struct rb_node *, const struct rb_node *))
{
@@ -115,7 +131,7 @@ struct section *find_section_by_name(str
{
struct section *sec;

- hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
+ elf_hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
if (!strcmp(sec->name, name))
return sec;

@@ -127,7 +143,7 @@ static struct section *find_section_by_i
{
struct section *sec;

- hash_for_each_possible(elf->section_hash, sec, hash, idx)
+ elf_hash_for_each_possible(elf->section_hash, sec, hash, idx)
if (sec->idx == idx)
return sec;

@@ -138,7 +154,7 @@ static struct symbol *find_symbol_by_ind
{
struct symbol *sym;

- hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
+ elf_hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
if (sym->idx == idx)
return sym;

@@ -205,7 +221,7 @@ struct symbol *find_symbol_by_name(struc
{
struct symbol *sym;

- hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
+ elf_hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
if (!strcmp(sym->name, name))
return sym;

@@ -309,8 +325,8 @@ static int read_sections(struct elf *elf
sec->len = sec->sh.sh_size;

list_add_tail(&sec->list, &elf->sections);
- hash_add(elf->section_hash, &sec->hash, sec->idx);
- hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+ elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
+ elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
}

if (stats)
@@ -394,8 +410,8 @@ static int read_symbols(struct elf *elf)
else
entry = &sym->sec->symbol_list;
list_add(&sym->list, entry);
- hash_add(elf->symbol_hash, &sym->hash, sym->idx);
- hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+ elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
+ elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
}

if (stats)
@@ -504,7 +520,7 @@ static int read_relas(struct elf *elf)
}

list_add_tail(&rela->list, &sec->rela_list);
- hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
+ elf_hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
nr_rela++;
}
max_rela = max(max_rela, nr_rela);
@@ -531,15 +547,16 @@ struct elf *elf_read(const char *name, i
perror("malloc");
return NULL;
}
- memset(elf, 0, sizeof(*elf));
+ memset(elf, 0, offsetof(struct elf, sections));

- hash_init(elf->symbol_hash);
- hash_init(elf->symbol_name_hash);
- hash_init(elf->section_hash);
- hash_init(elf->section_name_hash);
- hash_init(elf->rela_hash);
INIT_LIST_HEAD(&elf->sections);

+ elf_hash_init(elf->symbol_hash);
+ elf_hash_init(elf->symbol_name_hash);
+ elf_hash_init(elf->section_hash);
+ elf_hash_init(elf->section_name_hash);
+ elf_hash_init(elf->rela_hash);
+
elf->fd = open(name, flags);
if (elf->fd == -1) {
fprintf(stderr, "objtool: Can't open '%s': %s\n",
@@ -676,8 +693,8 @@ struct section *elf_create_section(struc
shstrtab->changed = true;

list_add_tail(&sec->list, &elf->sections);
- hash_add(elf->section_hash, &sec->hash, sec->idx);
- hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+ elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
+ elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));

return sec;
}
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -80,8 +80,8 @@ struct elf {
struct list_head sections;
DECLARE_HASHTABLE(symbol_hash, 20);
DECLARE_HASHTABLE(symbol_name_hash, 20);
- DECLARE_HASHTABLE(section_hash, 16);
- DECLARE_HASHTABLE(section_name_hash, 16);
+ DECLARE_HASHTABLE(section_hash, 20);
+ DECLARE_HASHTABLE(section_name_hash, 20);
DECLARE_HASHTABLE(rela_hash, 20);
};




2020-03-18 13:21:14

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 17/19] objtool: Optimize !vmlinux.o again

On Tue, Mar 17, 2020 at 06:02:51PM +0100, Peter Zijlstra wrote:
> When doing kbuild tests to see if the objtool changes affected those I
> found that there was a measurable regression:
>
> pre post
>
> real 1m13.594 1m16.488s
> user 34m58.246s 35m23.947s
> sys 4m0.393s 4m27.312s
>
> Perf showed that for small files the increased hash-table sizes were a
> measurable difference. Since we already have -l "vmlinux" to
> distinguish between the modes, make it also use a smaller portion of
> the hash-tables.
>
> This flips it into a small win:
>
> real 1m14.143s
> user 34m49.292s
> sys 3m44.746s
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>

There was one 'elf_' prefixing gone missing. Updated patch below.

---
tools/objtool/elf.c | 53 ++++++++++++++++++++++++++++++++++------------------
tools/objtool/elf.h | 4 +--
2 files changed, 37 insertions(+), 20 deletions(-)

--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -27,6 +27,22 @@ static inline u32 str_hash(const char *s
return jhash(str, strlen(str), 0);
}

+static inline int elf_hash_bits(void)
+{
+ return vmlinux ? 20 : 16;
+}
+
+#define elf_hash_add(hashtable, node, key) \
+ hlist_add_head(node, &hashtable[hash_min(key, elf_hash_bits())])
+
+static void elf_hash_init(struct hlist_head *table)
+{
+ __hash_init(table, 1U << elf_hash_bits());
+}
+
+#define elf_hash_for_each_possible(name, obj, member, key) \
+ hlist_for_each_entry(obj, &name[hash_min(key, elf_hash_bits())], member)
+
static void rb_add(struct rb_root *tree, struct rb_node *node,
int (*cmp)(struct rb_node *, const struct rb_node *))
{
@@ -115,7 +131,7 @@ struct section *find_section_by_name(str
{
struct section *sec;

- hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
+ elf_hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
if (!strcmp(sec->name, name))
return sec;

@@ -127,7 +143,7 @@ static struct section *find_section_by_i
{
struct section *sec;

- hash_for_each_possible(elf->section_hash, sec, hash, idx)
+ elf_hash_for_each_possible(elf->section_hash, sec, hash, idx)
if (sec->idx == idx)
return sec;

@@ -138,7 +154,7 @@ static struct symbol *find_symbol_by_ind
{
struct symbol *sym;

- hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
+ elf_hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
if (sym->idx == idx)
return sym;

@@ -205,7 +221,7 @@ struct symbol *find_symbol_by_name(struc
{
struct symbol *sym;

- hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
+ elf_hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
if (!strcmp(sym->name, name))
return sym;

@@ -224,7 +240,7 @@ struct rela *find_rela_by_dest_range(str
sec = sec->rela;

for_offset_range(o, offset, offset + len) {
- hash_for_each_possible(elf->rela_hash, rela, hash,
+ elf_hash_for_each_possible(elf->rela_hash, rela, hash,
sec_offset_hash(sec, o)) {
if (rela->sec != sec)
continue;
@@ -309,8 +325,8 @@ static int read_sections(struct elf *elf
sec->len = sec->sh.sh_size;

list_add_tail(&sec->list, &elf->sections);
- hash_add(elf->section_hash, &sec->hash, sec->idx);
- hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+ elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
+ elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
}

if (stats)
@@ -394,8 +410,8 @@ static int read_symbols(struct elf *elf)
else
entry = &sym->sec->symbol_list;
list_add(&sym->list, entry);
- hash_add(elf->symbol_hash, &sym->hash, sym->idx);
- hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+ elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
+ elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
}

if (stats)
@@ -504,7 +520,7 @@ static int read_relas(struct elf *elf)
}

list_add_tail(&rela->list, &sec->rela_list);
- hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
+ elf_hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
nr_rela++;
}
max_rela = max(max_rela, nr_rela);
@@ -531,15 +547,16 @@ struct elf *elf_read(const char *name, i
perror("malloc");
return NULL;
}
- memset(elf, 0, sizeof(*elf));
+ memset(elf, 0, offsetof(struct elf, sections));

- hash_init(elf->symbol_hash);
- hash_init(elf->symbol_name_hash);
- hash_init(elf->section_hash);
- hash_init(elf->section_name_hash);
- hash_init(elf->rela_hash);
INIT_LIST_HEAD(&elf->sections);

+ elf_hash_init(elf->symbol_hash);
+ elf_hash_init(elf->symbol_name_hash);
+ elf_hash_init(elf->section_hash);
+ elf_hash_init(elf->section_name_hash);
+ elf_hash_init(elf->rela_hash);
+
elf->fd = open(name, flags);
if (elf->fd == -1) {
fprintf(stderr, "objtool: Can't open '%s': %s\n",
@@ -676,8 +693,8 @@ struct section *elf_create_section(struc
shstrtab->changed = true;

list_add_tail(&sec->list, &elf->sections);
- hash_add(elf->section_hash, &sec->hash, sec->idx);
- hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+ elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
+ elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));

return sec;
}
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -78,8 +78,8 @@ struct elf {
struct list_head sections;
DECLARE_HASHTABLE(symbol_hash, 20);
DECLARE_HASHTABLE(symbol_name_hash, 20);
- DECLARE_HASHTABLE(section_hash, 16);
- DECLARE_HASHTABLE(section_name_hash, 16);
+ DECLARE_HASHTABLE(section_hash, 20);
+ DECLARE_HASHTABLE(section_name_hash, 20);
DECLARE_HASHTABLE(rela_hash, 20);
};

2020-03-20 16:21:54

by Miroslav Benes

[permalink] [raw]
Subject: Re: [PATCH v2 17/19] objtool: Optimize !vmlinux.o again

On Wed, 18 Mar 2020, Peter Zijlstra wrote:

> On Tue, Mar 17, 2020 at 06:02:51PM +0100, Peter Zijlstra wrote:
> > When doing kbuild tests to see if the objtool changes affected those I
> > found that there was a measurable regression:
> >
> > pre post
> >
> > real 1m13.594 1m16.488s
> > user 34m58.246s 35m23.947s
> > sys 4m0.393s 4m27.312s
> >
> > Perf showed that for small files the increased hash-table sizes were a
> > measurable difference. Since we already have -l "vmlinux" to
> > distinguish between the modes, make it also use a smaller portion of
> > the hash-tables.
> >
> > This flips it into a small win:
> >
> > real 1m14.143s
> > user 34m49.292s
> > sys 3m44.746s
> >
> > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
>
> There was one 'elf_' prefixing gone missing. Updated patch below.

I think there is one more missing in create_orc_entry().

Miroslav

2020-03-21 15:15:41

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 17/19] objtool: Optimize !vmlinux.o again

On Fri, Mar 20, 2020 at 05:20:47PM +0100, Miroslav Benes wrote:

> I think there is one more missing in create_orc_entry().

I'm thikning you're quite right about that.... lemme see what to do
about that.

2020-03-21 16:13:47

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 17/19] objtool: Optimize !vmlinux.o again

On Sat, Mar 21, 2020 at 04:14:21PM +0100, Peter Zijlstra wrote:
> On Fri, Mar 20, 2020 at 05:20:47PM +0100, Miroslav Benes wrote:
>
> > I think there is one more missing in create_orc_entry().
>
> I'm thikning you're quite right about that.... lemme see what to do
> about that.

---
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -472,6 +472,14 @@ static int read_symbols(struct elf *elf)
return -1;
}

+void elf_add_rela(struct elf *elf, struct rela *rela)
+{
+ struct section *sec = rela->sec;
+
+ list_add_tail(&rela->list, &sec->rela_list);
+ elf_hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
+}
+
static int read_relas(struct elf *elf)
{
struct section *sec;
@@ -519,8 +527,7 @@ static int read_relas(struct elf *elf)
return -1;
}

- list_add_tail(&rela->list, &sec->rela_list);
- elf_hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
+ elf_add_rela(elf, rela);
nr_rela++;
}
max_rela = max(max_rela, nr_rela);
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -127,6 +127,7 @@ struct section *elf_create_rela_section(
int elf_rebuild_rela_section(struct section *sec);
int elf_write(struct elf *elf);
void elf_close(struct elf *elf);
+void elf_add_rela(struct elf *elf, struct rela *rela);

#define for_each_sec(file, sec) \
list_for_each_entry(sec, &file->elf->sections, list)
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -111,8 +111,7 @@ static int create_orc_entry(struct elf *
rela->offset = idx * sizeof(int);
rela->sec = ip_relasec;

- list_add_tail(&rela->list, &ip_relasec->rela_list);
- hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
+ elf_add_rela(elf, rela);

return 0;
}

2020-03-23 07:28:06

by Miroslav Benes

[permalink] [raw]
Subject: Re: [PATCH v2 17/19] objtool: Optimize !vmlinux.o again

On Sat, 21 Mar 2020, Peter Zijlstra wrote:

> On Sat, Mar 21, 2020 at 04:14:21PM +0100, Peter Zijlstra wrote:
> > On Fri, Mar 20, 2020 at 05:20:47PM +0100, Miroslav Benes wrote:
> >
> > > I think there is one more missing in create_orc_entry().
> >
> > I'm thikning you're quite right about that.... lemme see what to do
> > about that.
>
> ---
> --- a/tools/objtool/elf.c
> +++ b/tools/objtool/elf.c
> @@ -472,6 +472,14 @@ static int read_symbols(struct elf *elf)
> return -1;
> }
>
> +void elf_add_rela(struct elf *elf, struct rela *rela)
> +{
> + struct section *sec = rela->sec;
> +
> + list_add_tail(&rela->list, &sec->rela_list);
> + elf_hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
> +}
> +
> static int read_relas(struct elf *elf)
> {
> struct section *sec;
> @@ -519,8 +527,7 @@ static int read_relas(struct elf *elf)
> return -1;
> }
>
> - list_add_tail(&rela->list, &sec->rela_list);
> - elf_hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
> + elf_add_rela(elf, rela);
> nr_rela++;
> }
> max_rela = max(max_rela, nr_rela);
> --- a/tools/objtool/elf.h
> +++ b/tools/objtool/elf.h
> @@ -127,6 +127,7 @@ struct section *elf_create_rela_section(
> int elf_rebuild_rela_section(struct section *sec);
> int elf_write(struct elf *elf);
> void elf_close(struct elf *elf);
> +void elf_add_rela(struct elf *elf, struct rela *rela);
>
> #define for_each_sec(file, sec) \
> list_for_each_entry(sec, &file->elf->sections, list)
> --- a/tools/objtool/orc_gen.c
> +++ b/tools/objtool/orc_gen.c
> @@ -111,8 +111,7 @@ static int create_orc_entry(struct elf *
> rela->offset = idx * sizeof(int);
> rela->sec = ip_relasec;
>
> - list_add_tail(&rela->list, &ip_relasec->rela_list);
> - hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
> + elf_add_rela(elf, rela);
>
> return 0;
> }

Yup, looks good.

Miroslav