Hello,
I was looking at the performance of faddr2line and noticed that it spends
most of its time performing two tasks:
- (1) Making redundant calls to readelf and addr2line for each address
(e.g., it makes 7 readelf calls and 2 addr2line calls when converting 1
address, and it makes 252 readelf calls and 51 addr2line calls when
converting 50 addresses); and
- (2) Calculating a symbol’s size by unnecessarily looping through every
symbol in a particular section.
This patch series consists of the following parts:
- Patches 1-2 reduce the total number of readelf calls to one,
- Patches 3-4 make minor changes in preparation for the following patches,
- Patches 5-6 reduce the total number of addr2line calls to one, and
- Patch 7 modifies the symbol size calculation to only check two symbols.
I evaluated the performance of the patch series by running faddr2line with
a standard kernel image (X86_64 defconfig with CONFIG_DEBUG_INFO enabled).
When converting only 1 address, the patch series gives a negligible
speedup. When converting 50 addresses, however, it gives a 15x speedup.
Cheers,
Brian
Brian Johannesmeyer (7):
scripts/faddr2line: Reduce number of readelf calls to three
scripts/faddr2line: Combine three readelf calls into one
scripts/faddr2line: Check vmlinux only once
scripts/faddr2line: Pass --addresses argument to addr2line
scripts/faddr2line: Invoke addr2line as a single long-running process
scripts/faddr2line: Remove call to addr2line from find_dir_prefix()
scripts/faddr2line: Check only two symbols when calculating symbol
size
scripts/faddr2line | 110 ++++++++++++++++++++++++++++++++++-----------
1 file changed, 84 insertions(+), 26 deletions(-)
--
2.34.1
Rather than calling readelf several times for each invocation of
__faddr2line, call readelf only three times at the beginning, and save its
result for future use.
Signed-off-by: Brian Johannesmeyer <[email protected]>
---
scripts/faddr2line | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)
diff --git a/scripts/faddr2line b/scripts/faddr2line
index 587415a52b6f..bf394bfd526a 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -87,7 +87,7 @@ command -v ${ADDR2LINE} >/dev/null 2>&1 || die "${ADDR2LINE} isn't installed"
find_dir_prefix() {
local objfile=$1
- local start_kernel_addr=$(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' |
+ local start_kernel_addr=$(echo "${ELF_SYMS}" | sed 's/\[.*\]//' |
${AWK} '$8 == "start_kernel" {printf "0x%s", $2}')
[[ -z $start_kernel_addr ]] && return
@@ -103,6 +103,14 @@ find_dir_prefix() {
return 0
}
+run_readelf() {
+ local objfile=$1
+
+ ELF_FILEHEADER=$(${READELF} --file-header $objfile)
+ ELF_SECHEADERS=$(${READELF} --section-headers --wide $objfile)
+ ELF_SYMS=$(${READELF} --symbols --wide $objfile)
+}
+
__faddr2line() {
local objfile=$1
local func_addr=$2
@@ -125,7 +133,7 @@ __faddr2line() {
# vmlinux uses absolute addresses in the section table rather than
# section offsets.
- local file_type=$(${READELF} --file-header $objfile |
+ local file_type=$(echo "${ELF_FILEHEADER}" |
${AWK} '$1 == "Type:" { print $2; exit }')
if [[ $file_type = "EXEC" ]] || [[ $file_type == "DYN" ]]; then
is_vmlinux=1
@@ -143,8 +151,7 @@ __faddr2line() {
local sec_name
# Get the section size:
- sec_size=$(${READELF} --section-headers --wide $objfile |
- sed 's/\[ /\[/' |
+ sec_size=$(echo "${ELF_SECHEADERS}" | sed 's/\[ /\[/' |
${AWK} -v sec=$sym_sec '$1 == "[" sec "]" { print "0x" $6; exit }')
if [[ -z $sec_size ]]; then
@@ -154,8 +161,7 @@ __faddr2line() {
fi
# Get the section name:
- sec_name=$(${READELF} --section-headers --wide $objfile |
- sed 's/\[ /\[/' |
+ sec_name=$(echo "${ELF_SECHEADERS}" | sed 's/\[ /\[/' |
${AWK} -v sec=$sym_sec '$1 == "[" sec "]" { print $2; exit }')
if [[ -z $sec_name ]]; then
@@ -197,7 +203,7 @@ __faddr2line() {
found=2
break
fi
- done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2)
+ done < <(echo "${ELF_SYMS}" | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2)
if [[ $found = 0 ]]; then
warn "can't find symbol: sym_name: $sym_name sym_sec: $sym_sec sym_addr: $sym_addr sym_elf_size: $sym_elf_size"
@@ -278,7 +284,7 @@ __faddr2line() {
DONE=1
- done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v fn=$sym_name '$8 == fn')
+ done < <(echo "${ELF_SYMS}" | sed 's/\[.*\]//' | ${AWK} -v fn=$sym_name '$8 == fn')
}
[[ $# -lt 2 ]] && usage
@@ -291,7 +297,9 @@ LIST=0
[[ ! -f $objfile ]] && die "can't find objfile $objfile"
shift
-${READELF} --section-headers --wide $objfile | ${GREP} -q '\.debug_info' || die "CONFIG_DEBUG_INFO not enabled"
+run_readelf $objfile
+
+echo "${ELF_SECHEADERS}" | ${GREP} -q '\.debug_info' || die "CONFIG_DEBUG_INFO not enabled"
DIR_PREFIX=supercalifragilisticexpialidocious
find_dir_prefix $objfile
--
2.34.1
Rather than calling readelf three separate times to collect three different
types of info, call it only once, and parse out the different types of info
from its output.
Signed-off-by: Brian Johannesmeyer <[email protected]>
---
scripts/faddr2line | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/scripts/faddr2line b/scripts/faddr2line
index bf394bfd526a..f011bda4ed25 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -105,10 +105,14 @@ find_dir_prefix() {
run_readelf() {
local objfile=$1
-
- ELF_FILEHEADER=$(${READELF} --file-header $objfile)
- ELF_SECHEADERS=$(${READELF} --section-headers --wide $objfile)
- ELF_SYMS=$(${READELF} --symbols --wide $objfile)
+ local out=$(${READELF} --file-header --section-headers --symbols --wide $objfile)
+
+ # This assumes that readelf first prints the file header, then the section headers, then the symbols.
+ # Note: It seems that GNU readelf does not prefix section headers with the "There are X section headers"
+ # line when multiple options are given, so let's also match with the "Section Headers:" line.
+ ELF_FILEHEADER=$(echo "${out}" | sed -n '/There are [0-9]* section headers, starting at offset\|Section Headers:/q;p')
+ ELF_SECHEADERS=$(echo "${out}" | sed -n '/There are [0-9]* section headers, starting at offset\|Section Headers:/,$p' | sed -n '/Symbol table .* contains [0-9]* entries:/q;p')
+ ELF_SYMS=$(echo "${out}" | sed -n '/Symbol table .* contains [0-9]* entries:/,$p')
}
__faddr2line() {
--
2.34.1
Rather than checking whether the object file is vmlinux for each invocation
of __faddr2line, check it only once beforehand.
Signed-off-by: Brian Johannesmeyer <[email protected]>
---
scripts/faddr2line | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/scripts/faddr2line b/scripts/faddr2line
index f011bda4ed25..bb3b5f03f4ea 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -115,6 +115,17 @@ run_readelf() {
ELF_SYMS=$(echo "${out}" | sed -n '/Symbol table .* contains [0-9]* entries:/,$p')
}
+check_vmlinux() {
+ # vmlinux uses absolute addresses in the section table rather than
+ # section offsets.
+ IS_VMLINUX=0
+ local file_type=$(echo "${ELF_FILEHEADER}" |
+ ${AWK} '$1 == "Type:" { print $2; exit }')
+ if [[ $file_type = "EXEC" ]] || [[ $file_type == "DYN" ]]; then
+ IS_VMLINUX=1
+ fi
+}
+
__faddr2line() {
local objfile=$1
local func_addr=$2
@@ -125,8 +136,6 @@ __faddr2line() {
local func_offset=${func_addr#*+}
func_offset=${func_offset%/*}
local user_size=
- local file_type
- local is_vmlinux=0
[[ $func_addr =~ "/" ]] && user_size=${func_addr#*/}
if [[ -z $sym_name ]] || [[ -z $func_offset ]] || [[ $sym_name = $func_addr ]]; then
@@ -135,14 +144,6 @@ __faddr2line() {
return
fi
- # vmlinux uses absolute addresses in the section table rather than
- # section offsets.
- local file_type=$(echo "${ELF_FILEHEADER}" |
- ${AWK} '$1 == "Type:" { print $2; exit }')
- if [[ $file_type = "EXEC" ]] || [[ $file_type == "DYN" ]]; then
- is_vmlinux=1
- fi
-
# Go through each of the object's symbols which match the func name.
# In rare cases there might be duplicates, in which case we print all
# matches.
@@ -260,7 +261,7 @@ __faddr2line() {
# Pass section address to addr2line and strip absolute paths
# from the output:
local args="--functions --pretty-print --inlines --exe=$objfile"
- [[ $is_vmlinux = 0 ]] && args="$args --section=$sec_name"
+ [[ $IS_VMLINUX = 0 ]] && args="$args --section=$sec_name"
local output=$(${ADDR2LINE} $args $addr | sed "s; $dir_prefix\(\./\)*; ;")
[[ -z $output ]] && continue
@@ -305,6 +306,8 @@ run_readelf $objfile
echo "${ELF_SECHEADERS}" | ${GREP} -q '\.debug_info' || die "CONFIG_DEBUG_INFO not enabled"
+check_vmlinux
+
DIR_PREFIX=supercalifragilisticexpialidocious
find_dir_prefix $objfile
--
2.34.1
In preparation for identifying an addr2line sentinel. See previous work
[0], which applies a similar change to perf.
[0] commit 8dc26b6f718a ("perf srcline: Make sentinel reading for binutils
addr2line more robust")
Signed-off-by: Brian Johannesmeyer <[email protected]>
---
scripts/faddr2line | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/scripts/faddr2line b/scripts/faddr2line
index bb3b5f03f4ea..820680c59a39 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -260,9 +260,12 @@ __faddr2line() {
# Pass section address to addr2line and strip absolute paths
# from the output:
- local args="--functions --pretty-print --inlines --exe=$objfile"
+ local args="--functions --pretty-print --inlines --addresses --exe=$objfile"
[[ $IS_VMLINUX = 0 ]] && args="$args --section=$sec_name"
- local output=$(${ADDR2LINE} $args $addr | sed "s; $dir_prefix\(\./\)*; ;")
+ local output_with_addr=$(${ADDR2LINE} $args $addr | sed "s; $dir_prefix\(\./\)*; ;")
+ [[ -z $output_with_addr ]] && continue
+
+ local output=$(echo "${output_with_addr}" | sed 's/^0x[0-9a-fA-F]*: //')
[[ -z $output ]] && continue
# Default output (non --list):
--
2.34.1
Rather than invoking a separate addr2line process for each address, invoke
a single addr2line coprocess, and pass each address to its stdin. Previous
work [0] applied a similar change to perf, leading to a ~60x speed-up [1].
If using an object file that is _not_ vmlinux, faddr2line passes a section
name argument to addr2line. Because we do not know until runtime which
section names will be passed to addr2line, we cannot apply this change to
non-vmlinux object files. Hence, it only applies to vmlinux.
[0] commit be8ecc57f180 ("perf srcline: Use long-running addr2line per
DSO")
[1] Link:
https://eighty-twenty.org/2021/09/09/perf-addr2line-speed-improvement
Signed-off-by: Brian Johannesmeyer <[email protected]>
---
scripts/faddr2line | 52 +++++++++++++++++++++++++++++++++++++++-------
1 file changed, 45 insertions(+), 7 deletions(-)
diff --git a/scripts/faddr2line b/scripts/faddr2line
index 820680c59a39..48fc8cfc80df 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -126,6 +126,48 @@ check_vmlinux() {
fi
}
+init_addr2line() {
+ local objfile=$1
+
+ check_vmlinux
+
+ ADDR2LINE_ARGS="--functions --pretty-print --inlines --addresses --exe=$objfile"
+ if [[ $IS_VMLINUX = 1 ]]; then
+ # If the executable file is vmlinux, we don't pass section names to
+ # addr2line, so we can launch it now as a single long-running process.
+ coproc ADDR2LINE_PROC (${ADDR2LINE} ${ADDR2LINE_ARGS})
+ fi
+}
+
+run_addr2line() {
+ local addr=$1
+ local sec_name=$2
+
+ if [[ $IS_VMLINUX = 1 ]]; then
+ # We send to the addr2line process: (1) the address, then (2) a sentinel
+ # value, i.e., something that can't be interpreted as a valid address
+ # (i.e., ","). This causes addr2line to write out: (1) the answer for
+ # our address, then (2) either "?? ??:0" or "0x0...0: ..." (if
+ # using binutils' addr2line), or "," (if using LLVM's addr2line).
+ echo ${addr} >& "${ADDR2LINE_PROC[1]}"
+ echo "," >& "${ADDR2LINE_PROC[1]}"
+ local first_line
+ read -r first_line <& "${ADDR2LINE_PROC[0]}"
+ ADDR2LINE_OUT=$(echo "${first_line}" | sed 's/^0x[0-9a-fA-F]*: //')
+ while read -r line <& "${ADDR2LINE_PROC[0]}"; do
+ if [[ "$line" == "?? ??:0" ]] || [[ "$line" == "," ]] || [[ $(echo "$line" | ${GREP} "^0x00*: ") ]]; then
+ break
+ fi
+ ADDR2LINE_OUT+=$'\n'$(echo "$line" | sed 's/^0x[0-9a-fA-F]*: //')
+ done
+ else
+ # Run addr2line as a single invocation.
+ local sec_arg
+ [[ -z $sec_name ]] && sec_arg="" || sec_arg="--section=${sec_name}"
+ ADDR2LINE_OUT=$(${ADDR2LINE} ${ADDR2LINE_ARGS} ${sec_arg} ${addr} | sed 's/^0x[0-9a-fA-F]*: //')
+ fi
+}
+
__faddr2line() {
local objfile=$1
local func_addr=$2
@@ -260,12 +302,8 @@ __faddr2line() {
# Pass section address to addr2line and strip absolute paths
# from the output:
- local args="--functions --pretty-print --inlines --addresses --exe=$objfile"
- [[ $IS_VMLINUX = 0 ]] && args="$args --section=$sec_name"
- local output_with_addr=$(${ADDR2LINE} $args $addr | sed "s; $dir_prefix\(\./\)*; ;")
- [[ -z $output_with_addr ]] && continue
-
- local output=$(echo "${output_with_addr}" | sed 's/^0x[0-9a-fA-F]*: //')
+ run_addr2line $addr $sec_name
+ local output=$(echo "${ADDR2LINE_OUT}" | sed "s; $dir_prefix\(\./\)*; ;")
[[ -z $output ]] && continue
# Default output (non --list):
@@ -309,7 +347,7 @@ run_readelf $objfile
echo "${ELF_SECHEADERS}" | ${GREP} -q '\.debug_info' || die "CONFIG_DEBUG_INFO not enabled"
-check_vmlinux
+init_addr2line $objfile
DIR_PREFIX=supercalifragilisticexpialidocious
find_dir_prefix $objfile
--
2.34.1
Use the single long-running faddr2line process from find_dir_prefix().
Signed-off-by: Brian Johannesmeyer <[email protected]>
---
scripts/faddr2line | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/scripts/faddr2line b/scripts/faddr2line
index 48fc8cfc80df..1fa6beef9f97 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -85,15 +85,17 @@ command -v ${ADDR2LINE} >/dev/null 2>&1 || die "${ADDR2LINE} isn't installed"
# init/main.c! This only works for vmlinux. Otherwise it falls back to
# printing the absolute path.
find_dir_prefix() {
- local objfile=$1
-
local start_kernel_addr=$(echo "${ELF_SYMS}" | sed 's/\[.*\]//' |
${AWK} '$8 == "start_kernel" {printf "0x%s", $2}')
[[ -z $start_kernel_addr ]] && return
- local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr)
- [[ -z $file_line ]] && return
+ run_addr2line ${start_kernel_addr} ""
+ [[ -z $ADDR2LINE_OUT ]] && return
+ local file_line=${ADDR2LINE_OUT#* at }
+ if [[ -z $file_line ]] || [[ $file_line = $ADDR2LINE_OUT ]]; then
+ return
+ fi
local prefix=${file_line%init/main.c:*}
if [[ -z $prefix ]] || [[ $prefix = $file_line ]]; then
return
@@ -350,7 +352,7 @@ echo "${ELF_SECHEADERS}" | ${GREP} -q '\.debug_info' || die "CONFIG_DEBUG_INFO n
init_addr2line $objfile
DIR_PREFIX=supercalifragilisticexpialidocious
-find_dir_prefix $objfile
+find_dir_prefix
FIRST=1
while [[ $# -gt 0 ]]; do
--
2.34.1
Rather than looping through each symbol in a particular section to
calculate a symbol's size, grep for the symbol and its immediate
successor, and only use those two symbols.
Signed-off-by: Brian Johannesmeyer <[email protected]>
---
scripts/faddr2line | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/faddr2line b/scripts/faddr2line
index 1fa6beef9f97..70d5a4602a92 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -252,7 +252,7 @@ __faddr2line() {
found=2
break
fi
- done < <(echo "${ELF_SYMS}" | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2)
+ done < <(echo "${ELF_SYMS}" | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2 | ${GREP} -A1 " ${sym_name}$")
if [[ $found = 0 ]]; then
warn "can't find symbol: sym_name: $sym_name sym_sec: $sym_sec sym_addr: $sym_addr sym_elf_size: $sym_elf_size"
--
2.34.1
On Mon, Mar 11, 2024 at 09:40:12PM +0100, Brian Johannesmeyer wrote:
> Hello,
>
> I was looking at the performance of faddr2line and noticed that it spends
> most of its time performing two tasks:
> - (1) Making redundant calls to readelf and addr2line for each address
> (e.g., it makes 7 readelf calls and 2 addr2line calls when converting 1
> address, and it makes 252 readelf calls and 51 addr2line calls when
> converting 50 addresses); and
> - (2) Calculating a symbol’s size by unnecessarily looping through every
> symbol in a particular section.
>
> This patch series consists of the following parts:
> - Patches 1-2 reduce the total number of readelf calls to one,
> - Patches 3-4 make minor changes in preparation for the following patches,
> - Patches 5-6 reduce the total number of addr2line calls to one, and
> - Patch 7 modifies the symbol size calculation to only check two symbols.
>
> I evaluated the performance of the patch series by running faddr2line with
> a standard kernel image (X86_64 defconfig with CONFIG_DEBUG_INFO enabled).
> When converting only 1 address, the patch series gives a negligible
> speedup. When converting 50 addresses, however, it gives a 15x speedup.
This looks really nice.
If testing runs ok, I'll go ahead and queue it up.
Thanks!
--
Josh
On Mon, Mar 11, 2024 at 1:40 PM Brian Johannesmeyer
<[email protected]> wrote:
>
> Rather than looping through each symbol in a particular section to
> calculate a symbol's size, grep for the symbol and its immediate
> successor, and only use those two symbols.
>
> Signed-off-by: Brian Johannesmeyer <[email protected]>
> ---
> scripts/faddr2line | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/scripts/faddr2line b/scripts/faddr2line
> index 1fa6beef9f97..70d5a4602a92 100755
> --- a/scripts/faddr2line
> +++ b/scripts/faddr2line
> @@ -252,7 +252,7 @@ __faddr2line() {
> found=2
> break
> fi
> - done < <(echo "${ELF_SYMS}" | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2)
> + done < <(echo "${ELF_SYMS}" | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2 | ${GREP} -A1 " ${sym_name}$")
>
> if [[ $found = 0 ]]; then
> warn "can't find symbol: sym_name: $sym_name sym_sec: $sym_sec sym_addr: $sym_addr sym_elf_size: $sym_elf_size"
> --
> 2.34.1
>
Hey Josh,
I just noticed that this patch does not correctly handle duplicate
sym_names. To fix, the "--no-group-separator" option should be added
as follows:
> - done < <(echo "${ELF_SYMS}" | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2)
> + done < <(echo "${ELF_SYMS}" | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2 | ${GREP} -A1 --no-group-separator " ${sym_name}$")
Would you like me to re-submit the patch series with this fix?
Thanks,
Brian
On Thu, Apr 11, 2024 at 12:28:29PM -0700, Brian Johannesmeyer wrote:
> Hey Josh,
>
> I just noticed that this patch does not correctly handle duplicate
> sym_names. To fix, the "--no-group-separator" option should be added
> as follows:
>
> > - done < <(echo "${ELF_SYMS}" | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2)
> > + done < <(echo "${ELF_SYMS}" | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2 | ${GREP} -A1 --no-group-separator " ${sym_name}$")
>
> Would you like me to re-submit the patch series with this fix?
Yes, that would be helpful. Thanks!
--
Josh