2024-05-06 18:02:40

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v7 0/4] dso/dsos memory savings and clean up

Hopefully the final 4 more patches from:
https://lore.kernel.org/lkml/[email protected]/
a near half year old adventure in trying to lower perf's dynamic
memory use. These final changes fix reference count issues, some
introduced by making find returned a getted (+1) reference counted
DSO. The last patch lowers the overhead using container_of rather than
a pointer except when reference count checking - asserts maintain the
validity of this.

v5. Rebase, adding use of accessors to dso as necessary. Previous
versions were all rebases or dropping merged patches.

v6. Rebase, move dsos__purge NULL assignment to dso->dsos to above
dso__put to avoid a warning reported by Arnaldo. This was part of
patch 5 in the v5 series.

v7. Rebase dropping 5 merged patches. Break apart the fixes per
file. Tweak to the map dso logic as the else path was missing a
dso__get.

Ian Rogers (4):
perf map: Add missing dso__put in map__new
perf symbol-elf: Ensure dso__put in machine__process_ksymbol_register
perf symbol-elf: dso__load_sym_internal reference count fixes
perf dso: Use container_of to avoid a pointer in dso_data

tools/perf/tests/dso-data.c | 60 +++++++++++++++++-------------------
tools/perf/util/dso.c | 16 +++++++++-
tools/perf/util/dso.h | 2 ++
tools/perf/util/machine.c | 6 ++--
tools/perf/util/map.c | 1 +
tools/perf/util/symbol-elf.c | 51 +++++++++++++++---------------
6 files changed, 75 insertions(+), 61 deletions(-)

--
2.45.0.rc1.225.g2a3ae87e7f-goog



2024-05-06 18:03:23

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v7 4/4] perf dso: Use container_of to avoid a pointer in dso_data

The dso pointer in dso_data is necessary for reference count checking
to account for the dso_data forming a global list of open dso's with
references to the dso. The dso pointer also allows for the indirection
that reference count checking needs. Outside of reference count
checking the indirection isn't needed and container_of is more
efficient and saves space.

The reference count won't be increased by placing items onto the
global list, matching how things were before the reference count
checking change, but we assert the dso is in dsos holding it live (and
that the set of open dsos is a subset of all dsos for the
machine). Update the DSO data tests so that they use a dsos struct to
make the invariant true.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/tests/dso-data.c | 60 ++++++++++++++++++-------------------
tools/perf/util/dso.c | 16 +++++++++-
tools/perf/util/dso.h | 2 ++
3 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index fde4eca84b6f..5286ae8bd2d7 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -10,6 +10,7 @@
#include <sys/resource.h>
#include <api/fs/fs.h>
#include "dso.h"
+#include "dsos.h"
#include "machine.h"
#include "symbol.h"
#include "tests.h"
@@ -123,9 +124,10 @@ static int test__dso_data(struct test_suite *test __maybe_unused, int subtest __
TEST_ASSERT_VAL("No test file", file);

memset(&machine, 0, sizeof(machine));
+ dsos__init(&machine.dsos);

- dso = dso__new((const char *)file);
-
+ dso = dso__new(file);
+ TEST_ASSERT_VAL("Failed to add dso", !dsos__add(&machine.dsos, dso));
TEST_ASSERT_VAL("Failed to access to dso",
dso__data_fd(dso, &machine) >= 0);

@@ -170,6 +172,7 @@ static int test__dso_data(struct test_suite *test __maybe_unused, int subtest __
}

dso__put(dso);
+ dsos__exit(&machine.dsos);
unlink(file);
return 0;
}
@@ -199,41 +202,35 @@ static long open_files_cnt(void)
return nr - 1;
}

-static struct dso **dsos;
-
-static int dsos__create(int cnt, int size)
+static int dsos__create(int cnt, int size, struct dsos *dsos)
{
int i;

- dsos = malloc(sizeof(*dsos) * cnt);
- TEST_ASSERT_VAL("failed to alloc dsos array", dsos);
+ dsos__init(dsos);

for (i = 0; i < cnt; i++) {
- char *file;
+ struct dso *dso;
+ char *file = test_file(size);

- file = test_file(size);
TEST_ASSERT_VAL("failed to get dso file", file);
-
- dsos[i] = dso__new(file);
- TEST_ASSERT_VAL("failed to get dso", dsos[i]);
+ dso = dso__new(file);
+ TEST_ASSERT_VAL("failed to get dso", dso);
+ TEST_ASSERT_VAL("failed to add dso", !dsos__add(dsos, dso));
+ dso__put(dso);
}

return 0;
}

-static void dsos__delete(int cnt)
+static void dsos__delete(struct dsos *dsos)
{
- int i;
-
- for (i = 0; i < cnt; i++) {
- struct dso *dso = dsos[i];
+ for (unsigned int i = 0; i < dsos->cnt; i++) {
+ struct dso *dso = dsos->dsos[i];

dso__data_close(dso);
unlink(dso__name(dso));
- dso__put(dso);
}
-
- free(dsos);
+ dsos__exit(dsos);
}

static int set_fd_limit(int n)
@@ -267,10 +264,10 @@ static int test__dso_data_cache(struct test_suite *test __maybe_unused, int subt
/* and this is now our dso open FDs limit */
dso_cnt = limit / 2;
TEST_ASSERT_VAL("failed to create dsos\n",
- !dsos__create(dso_cnt, TEST_FILE_SIZE));
+ !dsos__create(dso_cnt, TEST_FILE_SIZE, &machine.dsos));

for (i = 0; i < (dso_cnt - 1); i++) {
- struct dso *dso = dsos[i];
+ struct dso *dso = machine.dsos.dsos[i];

/*
* Open dsos via dso__data_fd(), it opens the data
@@ -290,17 +287,17 @@ static int test__dso_data_cache(struct test_suite *test __maybe_unused, int subt
}

/* verify the first one is already open */
- TEST_ASSERT_VAL("dsos[0] is not open", dso__data(dsos[0])->fd != -1);
+ TEST_ASSERT_VAL("dsos[0] is not open", dso__data(machine.dsos.dsos[0])->fd != -1);

/* open +1 dso to reach the allowed limit */
- fd = dso__data_fd(dsos[i], &machine);
+ fd = dso__data_fd(machine.dsos.dsos[i], &machine);
TEST_ASSERT_VAL("failed to get fd", fd > 0);

/* should force the first one to be closed */
- TEST_ASSERT_VAL("failed to close dsos[0]", dso__data(dsos[0])->fd == -1);
+ TEST_ASSERT_VAL("failed to close dsos[0]", dso__data(machine.dsos.dsos[0])->fd == -1);

/* cleanup everything */
- dsos__delete(dso_cnt);
+ dsos__delete(&machine.dsos);

/* Make sure we did not leak any file descriptor. */
nr_end = open_files_cnt();
@@ -325,9 +322,9 @@ static int test__dso_data_reopen(struct test_suite *test __maybe_unused, int sub
long nr_end, nr = open_files_cnt(), lim = new_limit(3);
int fd, fd_extra;

-#define dso_0 (dsos[0])
-#define dso_1 (dsos[1])
-#define dso_2 (dsos[2])
+#define dso_0 (machine.dsos.dsos[0])
+#define dso_1 (machine.dsos.dsos[1])
+#define dso_2 (machine.dsos.dsos[2])

/* Rest the internal dso open counter limit. */
reset_fd_limit();
@@ -347,7 +344,8 @@ static int test__dso_data_reopen(struct test_suite *test __maybe_unused, int sub
TEST_ASSERT_VAL("failed to set file limit",
!set_fd_limit((lim)));

- TEST_ASSERT_VAL("failed to create dsos\n", !dsos__create(3, TEST_FILE_SIZE));
+ TEST_ASSERT_VAL("failed to create dsos\n",
+ !dsos__create(3, TEST_FILE_SIZE, &machine.dsos));

/* open dso_0 */
fd = dso__data_fd(dso_0, &machine);
@@ -386,7 +384,7 @@ static int test__dso_data_reopen(struct test_suite *test __maybe_unused, int sub

/* cleanup everything */
close(fd_extra);
- dsos__delete(3);
+ dsos__delete(&machine.dsos);

/* Make sure we did not leak any file descriptor. */
nr_end = open_files_cnt();
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 27db65e96e04..dde706b71da7 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -497,14 +497,20 @@ static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER;
static void dso__list_add(struct dso *dso)
{
list_add_tail(&dso__data(dso)->open_entry, &dso__data_open);
+#ifdef REFCNT_CHECKING
dso__data(dso)->dso = dso__get(dso);
+#endif
+ /* Assume the dso is part of dsos, hence the optional reference count above. */
+ assert(dso__dsos(dso));
dso__data_open_cnt++;
}

static void dso__list_del(struct dso *dso)
{
list_del_init(&dso__data(dso)->open_entry);
+#ifdef REFCNT_CHECKING
dso__put(dso__data(dso)->dso);
+#endif
WARN_ONCE(dso__data_open_cnt <= 0,
"DSO data fd counter out of bounds.");
dso__data_open_cnt--;
@@ -654,9 +660,15 @@ static void close_dso(struct dso *dso)
static void close_first_dso(void)
{
struct dso_data *dso_data;
+ struct dso *dso;

dso_data = list_first_entry(&dso__data_open, struct dso_data, open_entry);
- close_dso(dso_data->dso);
+#ifdef REFCNT_CHECKING
+ dso = dso_data->dso;
+#else
+ dso = container_of(dso_data, struct dso, data);
+#endif
+ close_dso(dso);
}

static rlim_t get_fd_limit(void)
@@ -1449,7 +1461,9 @@ struct dso *dso__new_id(const char *name, struct dso_id *id)
data->fd = -1;
data->status = DSO_DATA_STATUS_UNKNOWN;
INIT_LIST_HEAD(&data->open_entry);
+#ifdef REFCNT_CHECKING
data->dso = NULL; /* Set when on the open_entry list. */
+#endif
}
return res;
}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index f9689dd60de3..df2c98402af3 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -147,7 +147,9 @@ struct dso_cache {
struct dso_data {
struct rb_root cache;
struct list_head open_entry;
+#ifdef REFCNT_CHECKING
struct dso *dso;
+#endif
int fd;
int status;
u32 status_seen;
--
2.45.0.rc1.225.g2a3ae87e7f-goog


2024-05-06 18:07:04

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v7 3/4] perf symbol-elf: dso__load_sym_internal reference count fixes

dso__load_sym_internal passed curr_mapp as an out argument to
dso__process_kernel_symbol. The out argument was never used so remove
it to simplify the reference counting logic.

Simplify reference counting issues with curr_dso by ensuring the value
it points to has a +1 reference count, and then putting as
necessary. This avoids some reference counting games when the dso is
created making the code more obviously correct with some possible
introduced overhead due to the reference counting get/puts. This,
however, silences reference count checking and we can always optimize
from a seemingly correct point.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/util/symbol-elf.c | 51 ++++++++++++++++++------------------
1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 3be5e8d1e278..e398abfd13a0 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1419,7 +1419,7 @@ void __weak arch__sym_update(struct symbol *s __maybe_unused,
static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
GElf_Sym *sym, GElf_Shdr *shdr,
struct maps *kmaps, struct kmap *kmap,
- struct dso **curr_dsop, struct map **curr_mapp,
+ struct dso **curr_dsop,
const char *section_name,
bool adjust_kernel_syms, bool kmodule, bool *remap_kernel,
u64 max_text_sh_offset)
@@ -1470,8 +1470,8 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
map__set_pgoff(map, shdr->sh_offset);
}

- *curr_mapp = map;
- *curr_dsop = dso;
+ dso__put(*curr_dsop);
+ *curr_dsop = dso__get(dso);
return 0;
}

@@ -1484,8 +1484,8 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
*/
if (kmodule && adjust_kernel_syms && is_exe_text(shdr->sh_flags) &&
shdr->sh_offset <= max_text_sh_offset) {
- *curr_mapp = map;
- *curr_dsop = dso;
+ dso__put(*curr_dsop);
+ *curr_dsop = dso__get(dso);
return 0;
}

@@ -1507,10 +1507,10 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
dso__set_binary_type(curr_dso, dso__binary_type(dso));
dso__set_adjust_symbols(curr_dso, dso__adjust_symbols(dso));
curr_map = map__new2(start, curr_dso);
- dso__put(curr_dso);
- if (curr_map == NULL)
+ if (curr_map == NULL) {
+ dso__put(curr_dso);
return -1;
-
+ }
if (dso__kernel(curr_dso))
map__kmap(curr_map)->kmaps = kmaps;

@@ -1524,21 +1524,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
dso__set_symtab_type(curr_dso, dso__symtab_type(dso));
if (maps__insert(kmaps, curr_map))
return -1;
- /*
- * Add it before we drop the reference to curr_map, i.e. while
- * we still are sure to have a reference to this DSO via
- * *curr_map->dso.
- */
dsos__add(&maps__machine(kmaps)->dsos, curr_dso);
- /* kmaps already got it */
- map__put(curr_map);
dso__set_loaded(curr_dso);
- *curr_mapp = curr_map;
+ dso__put(*curr_dsop);
*curr_dsop = curr_dso;
} else {
- *curr_dsop = map__dso(curr_map);
- map__put(curr_map);
+ dso__put(*curr_dsop);
+ *curr_dsop = dso__get(map__dso(curr_map));
}
+ map__put(curr_map);

return 0;
}
@@ -1549,11 +1543,9 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
{
struct kmap *kmap = dso__kernel(dso) ? map__kmap(map) : NULL;
struct maps *kmaps = kmap ? map__kmaps(map) : NULL;
- struct map *curr_map = map;
- struct dso *curr_dso = dso;
+ struct dso *curr_dso = NULL;
Elf_Data *symstrs, *secstrs, *secstrs_run, *secstrs_sym;
uint32_t nr_syms;
- int err = -1;
uint32_t idx;
GElf_Ehdr ehdr;
GElf_Shdr shdr;
@@ -1656,6 +1648,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
if (kmodule && adjust_kernel_syms)
max_text_sh_offset = max_text_section(runtime_ss->elf, &runtime_ss->ehdr);

+ curr_dso = dso__get(dso);
elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
struct symbol *f;
const char *elf_name = elf_sym__name(&sym, symstrs);
@@ -1744,9 +1737,13 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
--sym.st_value;

if (dso__kernel(dso)) {
- if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map,
- section_name, adjust_kernel_syms, kmodule,
- &remap_kernel, max_text_sh_offset))
+ if (dso__process_kernel_symbol(dso, map, &sym, &shdr,
+ kmaps, kmap, &curr_dso,
+ section_name,
+ adjust_kernel_syms,
+ kmodule,
+ &remap_kernel,
+ max_text_sh_offset))
goto out_elf_end;
} else if ((used_opd && runtime_ss->adjust_symbols) ||
(!used_opd && syms_ss->adjust_symbols)) {
@@ -1795,6 +1792,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
__symbols__insert(dso__symbols(curr_dso), f, dso__kernel(dso));
nr++;
}
+ dso__put(curr_dso);

/*
* For misannotated, zeroed, ASM function sizes.
@@ -1810,9 +1808,10 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
maps__fixup_end(kmaps);
}
}
- err = nr;
+ return nr;
out_elf_end:
- return err;
+ dso__put(curr_dso);
+ return -1;
}

int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
--
2.45.0.rc1.225.g2a3ae87e7f-goog


2024-05-06 18:12:12

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v7 2/4] perf symbol-elf: Ensure dso__put in machine__process_ksymbol_register

dso__put after the map creation causes a use after put in the
dso__set_loaded. To ensure there is a +1 reference count on both sides
of the if-else, do a dso__get on the found map's dso.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/util/machine.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 0b8fb14f5ff6..a3ff2ab154bd 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -683,7 +683,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
struct perf_sample *sample __maybe_unused)
{
struct symbol *sym;
- struct dso *dso;
+ struct dso *dso = NULL;
struct map *map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr);
int err = 0;

@@ -696,7 +696,6 @@ static int machine__process_ksymbol_register(struct machine *machine,
}
dso__set_kernel(dso, DSO_SPACE__KERNEL);
map = map__new2(0, dso);
- dso__put(dso);
if (!map) {
err = -ENOMEM;
goto out;
@@ -722,7 +721,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
dso__set_long_name(dso, "", false);
}
} else {
- dso = map__dso(map);
+ dso = dso__get(map__dso(map));
}

sym = symbol__new(map__map_ip(map, map__start(map)),
@@ -735,6 +734,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
dso__insert_symbol(dso, sym);
out:
map__put(map);
+ dso__put(dso);
return err;
}

--
2.45.0.rc1.225.g2a3ae87e7f-goog


2024-05-06 20:36:54

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v7 0/4] dso/dsos memory savings and clean up

On Mon, May 06, 2024 at 11:01:00AM -0700, Ian Rogers wrote:
> v7. Rebase dropping 5 merged patches. Break apart the fixes per
> file. Tweak to the map dso logic as the else path was missing a
> dso__get.

Thanks, applied to perf-tools-next,

- Arnaldo

- Arnaldo