branch_stack struct has bit field definition which
produces different bit ordering for big/little endian.
Because of this, when branch_stack sample is collected
in a BE system and viewed/reported in a LE system, bit
fields of the branch stack are not presented properly.
To address this issue, a evsel__bitfield_swap_branch_stack()
is defined and introduced in evsel__parse_sample.
Signed-off-by: Madhavan Srinivasan <[email protected]>
---
Changelog v1:
- Renamed function and macro
- Added comments in code
tools/perf/util/evsel.c | 74 +++++++++++++++++++++++++++++++++++++++--
tools/perf/util/evsel.h | 13 ++++++++
2 files changed, 85 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index dbfeceb2546c..746e642d4d32 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2221,6 +2221,51 @@ void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
data->weight = *array;
}
+u64 evsel__bitfield_swap_branch_flags(u64 value)
+{
+ u64 new_val = 0;
+
+ /*
+ * branch_flags
+ * union {
+ * u64 values;
+ * struct {
+ * mispred:1 //target mispredicted
+ * predicted:1 //target predicted
+ * in_tx:1 //in transaction
+ * abort:1 //transaction abort
+ * cycles:16 //cycle count to last branch
+ * type:4 //branch type
+ * reserved:40
+ * }
+ * }
+ *
+ * Avoid bswap64() the entire branch_flag.value,
+ * as it has variable bit-field sizes. Instead the
+ * macro takes the bit-field position/size,
+ * swaps it based on the host endianness.
+ */
+ if (bigendian()) {
+ new_val = bitfield_swap(value, 0, 1);
+ new_val |= bitfield_swap(value, 1, 1);
+ new_val |= bitfield_swap(value, 2, 1);
+ new_val |= bitfield_swap(value, 3, 1);
+ new_val |= bitfield_swap(value, 4, 16);
+ new_val |= bitfield_swap(value, 20, 4);
+ new_val |= bitfield_swap(value, 24, 40);
+ } else {
+ new_val = bitfield_swap(value, 63, 1);
+ new_val |= bitfield_swap(value, 62, 1);
+ new_val |= bitfield_swap(value, 61, 1);
+ new_val |= bitfield_swap(value, 60, 1);
+ new_val |= bitfield_swap(value, 44, 16);
+ new_val |= bitfield_swap(value, 40, 4);
+ new_val |= bitfield_swap(value, 0, 40);
+ }
+
+ return new_val;
+}
+
int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
struct perf_sample *data)
{
@@ -2408,6 +2453,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
if (type & PERF_SAMPLE_BRANCH_STACK) {
const u64 max_branch_nr = UINT64_MAX /
sizeof(struct branch_entry);
+ struct branch_entry *e;
+ unsigned int i;
OVERFLOW_CHECK_u64(array);
data->branch_stack = (struct branch_stack *)array++;
@@ -2416,10 +2463,33 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
return -EFAULT;
sz = data->branch_stack->nr * sizeof(struct branch_entry);
- if (evsel__has_branch_hw_idx(evsel))
+ if (evsel__has_branch_hw_idx(evsel)) {
sz += sizeof(u64);
- else
+ e = &data->branch_stack->entries[0];
+ } else {
data->no_hw_idx = true;
+ /*
+ * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
+ * only nr and entries[] will be output by kernel.
+ */
+ e = (struct branch_entry *)&data->branch_stack->hw_idx;
+ }
+
+ if (swapped) {
+ /*
+ * struct branch_flag does not have endian
+ * specific bit field definition. And bswap
+ * will not resolve the issue, since these
+ * are bit fields.
+ *
+ * evsel__bitfield_swap_branch_flags() uses a
+ * bitfield_swap macro to swap the bit position
+ * based on the host endians.
+ */
+ for (i = 0; i < data->branch_stack->nr; i++, e++)
+ e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
+ }
+
OVERFLOW_CHECK(array, sz, max_size);
array = (void *)array + sz;
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 1f7edfa8568a..2e82cdbe2c08 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -482,4 +482,17 @@ struct evsel *evsel__leader(struct evsel *evsel);
bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
bool evsel__is_leader(struct evsel *evsel);
void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
+
+/*
+ * Macro to swap the bit-field postition and size.
+ * Used when,
+ * - dont need to swap the entire u64 &&
+ * - when u64 has variable bit-field sizes &&
+ * - when presented in a host endian which is different
+ * than the source endian of the perf.data file
+ */
+#define bitfield_swap(src, pos, size) \
+ ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
+
+u64 evsel__bitfield_swap_branch_flags(u64 value);
#endif /* __PERF_EVSEL_H */
--
2.31.1
On Sat, Oct 16, 2021 at 06:20:58PM +0530, Madhavan Srinivasan wrote:
> branch_stack struct has bit field definition which
> produces different bit ordering for big/little endian.
> Because of this, when branch_stack sample is collected
> in a BE system and viewed/reported in a LE system, bit
> fields of the branch stack are not presented properly.
> To address this issue, a evsel__bitfield_swap_branch_stack()
> is defined and introduced in evsel__parse_sample.
>
> Signed-off-by: Madhavan Srinivasan <[email protected]>
> ---
> Changelog v1:
> - Renamed function and macro
> - Added comments in code
>
> tools/perf/util/evsel.c | 74 +++++++++++++++++++++++++++++++++++++++--
> tools/perf/util/evsel.h | 13 ++++++++
> 2 files changed, 85 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index dbfeceb2546c..746e642d4d32 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -2221,6 +2221,51 @@ void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
> data->weight = *array;
> }
>
> +u64 evsel__bitfield_swap_branch_flags(u64 value)
> +{
> + u64 new_val = 0;
> +
> + /*
> + * branch_flags
> + * union {
> + * u64 values;
> + * struct {
> + * mispred:1 //target mispredicted
> + * predicted:1 //target predicted
> + * in_tx:1 //in transaction
> + * abort:1 //transaction abort
> + * cycles:16 //cycle count to last branch
> + * type:4 //branch type
> + * reserved:40
> + * }
> + * }
> + *
> + * Avoid bswap64() the entire branch_flag.value,
> + * as it has variable bit-field sizes. Instead the
> + * macro takes the bit-field position/size,
> + * swaps it based on the host endianness.
> + */
> + if (bigendian()) {
> + new_val = bitfield_swap(value, 0, 1);
> + new_val |= bitfield_swap(value, 1, 1);
> + new_val |= bitfield_swap(value, 2, 1);
> + new_val |= bitfield_swap(value, 3, 1);
> + new_val |= bitfield_swap(value, 4, 16);
> + new_val |= bitfield_swap(value, 20, 4);
> + new_val |= bitfield_swap(value, 24, 40);
> + } else {
> + new_val = bitfield_swap(value, 63, 1);
> + new_val |= bitfield_swap(value, 62, 1);
> + new_val |= bitfield_swap(value, 61, 1);
> + new_val |= bitfield_swap(value, 60, 1);
> + new_val |= bitfield_swap(value, 44, 16);
> + new_val |= bitfield_swap(value, 40, 4);
> + new_val |= bitfield_swap(value, 0, 40);
> + }
> +
> + return new_val;
> +}
> +
> int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> struct perf_sample *data)
> {
> @@ -2408,6 +2453,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> if (type & PERF_SAMPLE_BRANCH_STACK) {
> const u64 max_branch_nr = UINT64_MAX /
> sizeof(struct branch_entry);
> + struct branch_entry *e;
> + unsigned int i;
>
> OVERFLOW_CHECK_u64(array);
> data->branch_stack = (struct branch_stack *)array++;
> @@ -2416,10 +2463,33 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> return -EFAULT;
>
> sz = data->branch_stack->nr * sizeof(struct branch_entry);
> - if (evsel__has_branch_hw_idx(evsel))
> + if (evsel__has_branch_hw_idx(evsel)) {
> sz += sizeof(u64);
> - else
> + e = &data->branch_stack->entries[0];
> + } else {
> data->no_hw_idx = true;
> + /*
> + * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
> + * only nr and entries[] will be output by kernel.
> + */
> + e = (struct branch_entry *)&data->branch_stack->hw_idx;
> + }
> +
> + if (swapped) {
thanks for all the comments, make it easy to review
> + /*
> + * struct branch_flag does not have endian
> + * specific bit field definition. And bswap
> + * will not resolve the issue, since these
> + * are bit fields.
> + *
> + * evsel__bitfield_swap_branch_flags() uses a
> + * bitfield_swap macro to swap the bit position
> + * based on the host endians.
> + */
> + for (i = 0; i < data->branch_stack->nr; i++, e++)
> + e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
one last thing that confuses me.. the sample is already swapped at this
point, right? with perf_event__all64_swap function
should you swap it back first and then do the proper bitfield swap?
like we do in PERF_SAMPLE_RAW to get proper u32 values
thanks,
jirka
> + }
> +
> OVERFLOW_CHECK(array, sz, max_size);
> array = (void *)array + sz;
> }
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 1f7edfa8568a..2e82cdbe2c08 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -482,4 +482,17 @@ struct evsel *evsel__leader(struct evsel *evsel);
> bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
> bool evsel__is_leader(struct evsel *evsel);
> void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
> +
> +/*
> + * Macro to swap the bit-field postition and size.
> + * Used when,
> + * - dont need to swap the entire u64 &&
> + * - when u64 has variable bit-field sizes &&
> + * - when presented in a host endian which is different
> + * than the source endian of the perf.data file
> + */
> +#define bitfield_swap(src, pos, size) \
> + ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
> +
> +u64 evsel__bitfield_swap_branch_flags(u64 value);
> #endif /* __PERF_EVSEL_H */
> --
> 2.31.1
>
On Sat, Oct 16, 2021 at 06:20:58PM +0530, Madhavan Srinivasan wrote:
> branch_stack struct has bit field definition which
> produces different bit ordering for big/little endian.
> Because of this, when branch_stack sample is collected
> in a BE system and viewed/reported in a LE system, bit
> fields of the branch stack are not presented properly.
> To address this issue, a evsel__bitfield_swap_branch_stack()
> is defined and introduced in evsel__parse_sample.
>
> Signed-off-by: Madhavan Srinivasan <[email protected]>
for both patches
Acked-by: Jiri Olsa <[email protected]>
thanks,
jirka
> ---
> Changelog v1:
> - Renamed function and macro
> - Added comments in code
>
> tools/perf/util/evsel.c | 74 +++++++++++++++++++++++++++++++++++++++--
> tools/perf/util/evsel.h | 13 ++++++++
> 2 files changed, 85 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index dbfeceb2546c..746e642d4d32 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -2221,6 +2221,51 @@ void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
> data->weight = *array;
> }
>
> +u64 evsel__bitfield_swap_branch_flags(u64 value)
> +{
> + u64 new_val = 0;
> +
> + /*
> + * branch_flags
> + * union {
> + * u64 values;
> + * struct {
> + * mispred:1 //target mispredicted
> + * predicted:1 //target predicted
> + * in_tx:1 //in transaction
> + * abort:1 //transaction abort
> + * cycles:16 //cycle count to last branch
> + * type:4 //branch type
> + * reserved:40
> + * }
> + * }
> + *
> + * Avoid bswap64() the entire branch_flag.value,
> + * as it has variable bit-field sizes. Instead the
> + * macro takes the bit-field position/size,
> + * swaps it based on the host endianness.
> + */
> + if (bigendian()) {
> + new_val = bitfield_swap(value, 0, 1);
> + new_val |= bitfield_swap(value, 1, 1);
> + new_val |= bitfield_swap(value, 2, 1);
> + new_val |= bitfield_swap(value, 3, 1);
> + new_val |= bitfield_swap(value, 4, 16);
> + new_val |= bitfield_swap(value, 20, 4);
> + new_val |= bitfield_swap(value, 24, 40);
> + } else {
> + new_val = bitfield_swap(value, 63, 1);
> + new_val |= bitfield_swap(value, 62, 1);
> + new_val |= bitfield_swap(value, 61, 1);
> + new_val |= bitfield_swap(value, 60, 1);
> + new_val |= bitfield_swap(value, 44, 16);
> + new_val |= bitfield_swap(value, 40, 4);
> + new_val |= bitfield_swap(value, 0, 40);
> + }
> +
> + return new_val;
> +}
> +
> int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> struct perf_sample *data)
> {
> @@ -2408,6 +2453,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> if (type & PERF_SAMPLE_BRANCH_STACK) {
> const u64 max_branch_nr = UINT64_MAX /
> sizeof(struct branch_entry);
> + struct branch_entry *e;
> + unsigned int i;
>
> OVERFLOW_CHECK_u64(array);
> data->branch_stack = (struct branch_stack *)array++;
> @@ -2416,10 +2463,33 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> return -EFAULT;
>
> sz = data->branch_stack->nr * sizeof(struct branch_entry);
> - if (evsel__has_branch_hw_idx(evsel))
> + if (evsel__has_branch_hw_idx(evsel)) {
> sz += sizeof(u64);
> - else
> + e = &data->branch_stack->entries[0];
> + } else {
> data->no_hw_idx = true;
> + /*
> + * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
> + * only nr and entries[] will be output by kernel.
> + */
> + e = (struct branch_entry *)&data->branch_stack->hw_idx;
> + }
> +
> + if (swapped) {
> + /*
> + * struct branch_flag does not have endian
> + * specific bit field definition. And bswap
> + * will not resolve the issue, since these
> + * are bit fields.
> + *
> + * evsel__bitfield_swap_branch_flags() uses a
> + * bitfield_swap macro to swap the bit position
> + * based on the host endians.
> + */
> + for (i = 0; i < data->branch_stack->nr; i++, e++)
> + e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
> + }
> +
> OVERFLOW_CHECK(array, sz, max_size);
> array = (void *)array + sz;
> }
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 1f7edfa8568a..2e82cdbe2c08 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -482,4 +482,17 @@ struct evsel *evsel__leader(struct evsel *evsel);
> bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
> bool evsel__is_leader(struct evsel *evsel);
> void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
> +
> +/*
> + * Macro to swap the bit-field postition and size.
> + * Used when,
> + * - dont need to swap the entire u64 &&
> + * - when u64 has variable bit-field sizes &&
> + * - when presented in a host endian which is different
> + * than the source endian of the perf.data file
> + */
> +#define bitfield_swap(src, pos, size) \
> + ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
> +
> +u64 evsel__bitfield_swap_branch_flags(u64 value);
> #endif /* __PERF_EVSEL_H */
> --
> 2.31.1
>
On 10/21/21 6:55 PM, Jiri Olsa wrote:
> On Sat, Oct 16, 2021 at 06:20:58PM +0530, Madhavan Srinivasan wrote:
>> branch_stack struct has bit field definition which
>> produces different bit ordering for big/little endian.
>> Because of this, when branch_stack sample is collected
>> in a BE system and viewed/reported in a LE system, bit
>> fields of the branch stack are not presented properly.
>> To address this issue, a evsel__bitfield_swap_branch_stack()
>> is defined and introduced in evsel__parse_sample.
>>
>> Signed-off-by: Madhavan Srinivasan <[email protected]>
>> ---
>> Changelog v1:
>> - Renamed function and macro
>> - Added comments in code
>>
>> tools/perf/util/evsel.c | 74 +++++++++++++++++++++++++++++++++++++++--
>> tools/perf/util/evsel.h | 13 ++++++++
>> 2 files changed, 85 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
>> index dbfeceb2546c..746e642d4d32 100644
>> --- a/tools/perf/util/evsel.c
>> +++ b/tools/perf/util/evsel.c
>> @@ -2221,6 +2221,51 @@ void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
>> data->weight = *array;
>> }
>>
>> +u64 evsel__bitfield_swap_branch_flags(u64 value)
>> +{
>> + u64 new_val = 0;
>> +
>> + /*
>> + * branch_flags
>> + * union {
>> + * u64 values;
>> + * struct {
>> + * mispred:1 //target mispredicted
>> + * predicted:1 //target predicted
>> + * in_tx:1 //in transaction
>> + * abort:1 //transaction abort
>> + * cycles:16 //cycle count to last branch
>> + * type:4 //branch type
>> + * reserved:40
>> + * }
>> + * }
>> + *
>> + * Avoid bswap64() the entire branch_flag.value,
>> + * as it has variable bit-field sizes. Instead the
>> + * macro takes the bit-field position/size,
>> + * swaps it based on the host endianness.
>> + */
>> + if (bigendian()) {
>> + new_val = bitfield_swap(value, 0, 1);
>> + new_val |= bitfield_swap(value, 1, 1);
>> + new_val |= bitfield_swap(value, 2, 1);
>> + new_val |= bitfield_swap(value, 3, 1);
>> + new_val |= bitfield_swap(value, 4, 16);
>> + new_val |= bitfield_swap(value, 20, 4);
>> + new_val |= bitfield_swap(value, 24, 40);
>> + } else {
>> + new_val = bitfield_swap(value, 63, 1);
>> + new_val |= bitfield_swap(value, 62, 1);
>> + new_val |= bitfield_swap(value, 61, 1);
>> + new_val |= bitfield_swap(value, 60, 1);
>> + new_val |= bitfield_swap(value, 44, 16);
>> + new_val |= bitfield_swap(value, 40, 4);
>> + new_val |= bitfield_swap(value, 0, 40);
>> + }
>> +
>> + return new_val;
>> +}
>> +
>> int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>> struct perf_sample *data)
>> {
>> @@ -2408,6 +2453,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>> if (type & PERF_SAMPLE_BRANCH_STACK) {
>> const u64 max_branch_nr = UINT64_MAX /
>> sizeof(struct branch_entry);
>> + struct branch_entry *e;
>> + unsigned int i;
>>
>> OVERFLOW_CHECK_u64(array);
>> data->branch_stack = (struct branch_stack *)array++;
>> @@ -2416,10 +2463,33 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>> return -EFAULT;
>>
>> sz = data->branch_stack->nr * sizeof(struct branch_entry);
>> - if (evsel__has_branch_hw_idx(evsel))
>> + if (evsel__has_branch_hw_idx(evsel)) {
>> sz += sizeof(u64);
>> - else
>> + e = &data->branch_stack->entries[0];
>> + } else {
>> data->no_hw_idx = true;
>> + /*
>> + * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
>> + * only nr and entries[] will be output by kernel.
>> + */
>> + e = (struct branch_entry *)&data->branch_stack->hw_idx;
>> + }
>> +
>> + if (swapped) {
> thanks for all the comments, make it easy to review
>
>> + /*
>> + * struct branch_flag does not have endian
>> + * specific bit field definition. And bswap
>> + * will not resolve the issue, since these
>> + * are bit fields.
>> + *
>> + * evsel__bitfield_swap_branch_flags() uses a
>> + * bitfield_swap macro to swap the bit position
>> + * based on the host endians.
>> + */
>> + for (i = 0; i < data->branch_stack->nr; i++, e++)
>> + e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
> one last thing that confuses me.. the sample is already swapped at this
> point, right? with perf_event__all64_swap function
>
> should you swap it back first and then do the proper bitfield swap?
> like we do in PERF_SAMPLE_RAW to get proper u32 values
No, in case of PERF_SAMPLE_RAW type, first word contains the size.
Let me explain with an example (hope will not confuse further :) ),
say we have branch_flag.value as 0x4000000000000000 generated in a
BE system, which has predict set, now if we undo the swap done by
perf_event__all64_swap, value comes out as 0x40 (in a LE system) and
if this value fed to bitswap macro, we end up 0x0000000040000000,
which is wrong. Instead, by not swapping back, we feed the value
in the source-endian to the new macro.
Maddy
> thanks,
> jirka
>
>> + }
>> +
>> OVERFLOW_CHECK(array, sz, max_size);
>> array = (void *)array + sz;
>> }
>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>> index 1f7edfa8568a..2e82cdbe2c08 100644
>> --- a/tools/perf/util/evsel.h
>> +++ b/tools/perf/util/evsel.h
>> @@ -482,4 +482,17 @@ struct evsel *evsel__leader(struct evsel *evsel);
>> bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
>> bool evsel__is_leader(struct evsel *evsel);
>> void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
>> +
>> +/*
>> + * Macro to swap the bit-field postition and size.
>> + * Used when,
>> + * - dont need to swap the entire u64 &&
>> + * - when u64 has variable bit-field sizes &&
>> + * - when presented in a host endian which is different
>> + * than the source endian of the perf.data file
>> + */
>> +#define bitfield_swap(src, pos, size) \
>> + ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
>> +
>> +u64 evsel__bitfield_swap_branch_flags(u64 value);
>> #endif /* __PERF_EVSEL_H */
>> --
>> 2.31.1
>>
Em Mon, Oct 25, 2021 at 02:07:23PM +0200, Jiri Olsa escreveu:
> On Sat, Oct 16, 2021 at 06:20:58PM +0530, Madhavan Srinivasan wrote:
> > branch_stack struct has bit field definition which
> > produces different bit ordering for big/little endian.
> > Because of this, when branch_stack sample is collected
> > in a BE system and viewed/reported in a LE system, bit
> > fields of the branch stack are not presented properly.
> > To address this issue, a evsel__bitfield_swap_branch_stack()
> > is defined and introduced in evsel__parse_sample.
> >
> > Signed-off-by: Madhavan Srinivasan <[email protected]>
>
> for both patches
>
> Acked-by: Jiri Olsa <[email protected]>
Thanks, applied.
- Arnaldo
> thanks,
> jirka
>
> > ---
> > Changelog v1:
> > - Renamed function and macro
> > - Added comments in code
> >
> > tools/perf/util/evsel.c | 74 +++++++++++++++++++++++++++++++++++++++--
> > tools/perf/util/evsel.h | 13 ++++++++
> > 2 files changed, 85 insertions(+), 2 deletions(-)
> >
> > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > index dbfeceb2546c..746e642d4d32 100644
> > --- a/tools/perf/util/evsel.c
> > +++ b/tools/perf/util/evsel.c
> > @@ -2221,6 +2221,51 @@ void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
> > data->weight = *array;
> > }
> >
> > +u64 evsel__bitfield_swap_branch_flags(u64 value)
> > +{
> > + u64 new_val = 0;
> > +
> > + /*
> > + * branch_flags
> > + * union {
> > + * u64 values;
> > + * struct {
> > + * mispred:1 //target mispredicted
> > + * predicted:1 //target predicted
> > + * in_tx:1 //in transaction
> > + * abort:1 //transaction abort
> > + * cycles:16 //cycle count to last branch
> > + * type:4 //branch type
> > + * reserved:40
> > + * }
> > + * }
> > + *
> > + * Avoid bswap64() the entire branch_flag.value,
> > + * as it has variable bit-field sizes. Instead the
> > + * macro takes the bit-field position/size,
> > + * swaps it based on the host endianness.
> > + */
> > + if (bigendian()) {
> > + new_val = bitfield_swap(value, 0, 1);
> > + new_val |= bitfield_swap(value, 1, 1);
> > + new_val |= bitfield_swap(value, 2, 1);
> > + new_val |= bitfield_swap(value, 3, 1);
> > + new_val |= bitfield_swap(value, 4, 16);
> > + new_val |= bitfield_swap(value, 20, 4);
> > + new_val |= bitfield_swap(value, 24, 40);
> > + } else {
> > + new_val = bitfield_swap(value, 63, 1);
> > + new_val |= bitfield_swap(value, 62, 1);
> > + new_val |= bitfield_swap(value, 61, 1);
> > + new_val |= bitfield_swap(value, 60, 1);
> > + new_val |= bitfield_swap(value, 44, 16);
> > + new_val |= bitfield_swap(value, 40, 4);
> > + new_val |= bitfield_swap(value, 0, 40);
> > + }
> > +
> > + return new_val;
> > +}
> > +
> > int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> > struct perf_sample *data)
> > {
> > @@ -2408,6 +2453,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> > if (type & PERF_SAMPLE_BRANCH_STACK) {
> > const u64 max_branch_nr = UINT64_MAX /
> > sizeof(struct branch_entry);
> > + struct branch_entry *e;
> > + unsigned int i;
> >
> > OVERFLOW_CHECK_u64(array);
> > data->branch_stack = (struct branch_stack *)array++;
> > @@ -2416,10 +2463,33 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> > return -EFAULT;
> >
> > sz = data->branch_stack->nr * sizeof(struct branch_entry);
> > - if (evsel__has_branch_hw_idx(evsel))
> > + if (evsel__has_branch_hw_idx(evsel)) {
> > sz += sizeof(u64);
> > - else
> > + e = &data->branch_stack->entries[0];
> > + } else {
> > data->no_hw_idx = true;
> > + /*
> > + * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
> > + * only nr and entries[] will be output by kernel.
> > + */
> > + e = (struct branch_entry *)&data->branch_stack->hw_idx;
> > + }
> > +
> > + if (swapped) {
> > + /*
> > + * struct branch_flag does not have endian
> > + * specific bit field definition. And bswap
> > + * will not resolve the issue, since these
> > + * are bit fields.
> > + *
> > + * evsel__bitfield_swap_branch_flags() uses a
> > + * bitfield_swap macro to swap the bit position
> > + * based on the host endians.
> > + */
> > + for (i = 0; i < data->branch_stack->nr; i++, e++)
> > + e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
> > + }
> > +
> > OVERFLOW_CHECK(array, sz, max_size);
> > array = (void *)array + sz;
> > }
> > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> > index 1f7edfa8568a..2e82cdbe2c08 100644
> > --- a/tools/perf/util/evsel.h
> > +++ b/tools/perf/util/evsel.h
> > @@ -482,4 +482,17 @@ struct evsel *evsel__leader(struct evsel *evsel);
> > bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
> > bool evsel__is_leader(struct evsel *evsel);
> > void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
> > +
> > +/*
> > + * Macro to swap the bit-field postition and size.
> > + * Used when,
> > + * - dont need to swap the entire u64 &&
> > + * - when u64 has variable bit-field sizes &&
> > + * - when presented in a host endian which is different
> > + * than the source endian of the perf.data file
> > + */
> > +#define bitfield_swap(src, pos, size) \
> > + ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
> > +
> > +u64 evsel__bitfield_swap_branch_flags(u64 value);
> > #endif /* __PERF_EVSEL_H */
> > --
> > 2.31.1
> >
--
- Arnaldo
Em Sat, Oct 16, 2021 at 06:20:58PM +0530, Madhavan Srinivasan escreveu:
> branch_stack struct has bit field definition which
> produces different bit ordering for big/little endian.
> Because of this, when branch_stack sample is collected
> in a BE system and viewed/reported in a LE system, bit
> fields of the branch stack are not presented properly.
> To address this issue, a evsel__bitfield_swap_branch_stack()
> is defined and introduced in evsel__parse_sample.
>
> Signed-off-by: Madhavan Srinivasan <[email protected]>
> ---
> Changelog v1:
> - Renamed function and macro
> - Added comments in code
[acme@quaco perf]$ perf test python
19: 'import perf' in python : FAILED!
[acme@quaco perf]$ perf test -v python
Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc
19: 'import perf' in python :
--- start ---
test child forked, pid 991284
python usage test: "echo "import sys ; sys.path.append('/tmp/build/perf/python'); import perf" | '/usr/bin/python3' "
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ImportError: /tmp/build/perf/python/perf.cpython-39-x86_64-linux-gnu.so: undefined symbol: bigendian
test child finished with -1
---- end ----
'import perf' in python: FAILED!
[acme@quaco perf]$
> tools/perf/util/evsel.c | 74 +++++++++++++++++++++++++++++++++++++++--
> tools/perf/util/evsel.h | 13 ++++++++
> 2 files changed, 85 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index dbfeceb2546c..746e642d4d32 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -2221,6 +2221,51 @@ void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
> data->weight = *array;
> }
>
> +u64 evsel__bitfield_swap_branch_flags(u64 value)
> +{
> + u64 new_val = 0;
> +
> + /*
> + * branch_flags
> + * union {
> + * u64 values;
> + * struct {
> + * mispred:1 //target mispredicted
> + * predicted:1 //target predicted
> + * in_tx:1 //in transaction
> + * abort:1 //transaction abort
> + * cycles:16 //cycle count to last branch
> + * type:4 //branch type
> + * reserved:40
> + * }
> + * }
> + *
> + * Avoid bswap64() the entire branch_flag.value,
> + * as it has variable bit-field sizes. Instead the
> + * macro takes the bit-field position/size,
> + * swaps it based on the host endianness.
> + */
> + if (bigendian()) {
> + new_val = bitfield_swap(value, 0, 1);
> + new_val |= bitfield_swap(value, 1, 1);
> + new_val |= bitfield_swap(value, 2, 1);
> + new_val |= bitfield_swap(value, 3, 1);
> + new_val |= bitfield_swap(value, 4, 16);
> + new_val |= bitfield_swap(value, 20, 4);
> + new_val |= bitfield_swap(value, 24, 40);
> + } else {
> + new_val = bitfield_swap(value, 63, 1);
> + new_val |= bitfield_swap(value, 62, 1);
> + new_val |= bitfield_swap(value, 61, 1);
> + new_val |= bitfield_swap(value, 60, 1);
> + new_val |= bitfield_swap(value, 44, 16);
> + new_val |= bitfield_swap(value, 40, 4);
> + new_val |= bitfield_swap(value, 0, 40);
> + }
> +
> + return new_val;
> +}
> +
> int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> struct perf_sample *data)
> {
> @@ -2408,6 +2453,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> if (type & PERF_SAMPLE_BRANCH_STACK) {
> const u64 max_branch_nr = UINT64_MAX /
> sizeof(struct branch_entry);
> + struct branch_entry *e;
> + unsigned int i;
>
> OVERFLOW_CHECK_u64(array);
> data->branch_stack = (struct branch_stack *)array++;
> @@ -2416,10 +2463,33 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> return -EFAULT;
>
> sz = data->branch_stack->nr * sizeof(struct branch_entry);
> - if (evsel__has_branch_hw_idx(evsel))
> + if (evsel__has_branch_hw_idx(evsel)) {
> sz += sizeof(u64);
> - else
> + e = &data->branch_stack->entries[0];
> + } else {
> data->no_hw_idx = true;
> + /*
> + * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
> + * only nr and entries[] will be output by kernel.
> + */
> + e = (struct branch_entry *)&data->branch_stack->hw_idx;
> + }
> +
> + if (swapped) {
> + /*
> + * struct branch_flag does not have endian
> + * specific bit field definition. And bswap
> + * will not resolve the issue, since these
> + * are bit fields.
> + *
> + * evsel__bitfield_swap_branch_flags() uses a
> + * bitfield_swap macro to swap the bit position
> + * based on the host endians.
> + */
> + for (i = 0; i < data->branch_stack->nr; i++, e++)
> + e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
> + }
> +
> OVERFLOW_CHECK(array, sz, max_size);
> array = (void *)array + sz;
> }
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 1f7edfa8568a..2e82cdbe2c08 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -482,4 +482,17 @@ struct evsel *evsel__leader(struct evsel *evsel);
> bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
> bool evsel__is_leader(struct evsel *evsel);
> void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
> +
> +/*
> + * Macro to swap the bit-field postition and size.
> + * Used when,
> + * - dont need to swap the entire u64 &&
> + * - when u64 has variable bit-field sizes &&
> + * - when presented in a host endian which is different
> + * than the source endian of the perf.data file
> + */
> +#define bitfield_swap(src, pos, size) \
> + ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
> +
> +u64 evsel__bitfield_swap_branch_flags(u64 value);
> #endif /* __PERF_EVSEL_H */
> --
> 2.31.1
--
- Arnaldo
On October 27, 2021 9:16:51 PM GMT-03:00, Arnaldo Carvalho de Melo <[email protected]> wrote:
>Em Sat, Oct 16, 2021 at 06:20:58PM +0530, Madhavan Srinivasan escreveu:
>> branch_stack struct has bit field definition which
>> produces different bit ordering for big/little endian.
>> Because of this, when branch_stack sample is collected
>> in a BE system and viewed/reported in a LE system, bit
>> fields of the branch stack are not presented properly.
>> To address this issue, a evsel__bitfield_swap_branch_stack()
>> is defined and introduced in evsel__parse_sample.
>>
>> Signed-off-by: Madhavan Srinivasan <[email protected]>
>> ---
>> Changelog v1:
>> - Renamed function and macro
>> - Added comments in code
>
Please, run 'perf test' before submitting patches, it's not that difficult, please do so.
- Arnaldo
>[acme@quaco perf]$ perf test python
>19: 'import perf' in python : FAILED!
>[acme@quaco perf]$ perf test -v python
>Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc
>19: 'import perf' in python :
>--- start ---
>test child forked, pid 991284
>python usage test: "echo "import sys ; sys.path.append('/tmp/build/perf/python'); import perf" | '/usr/bin/python3' "
>Traceback (most recent call last):
> File "<stdin>", line 1, in <module>
>ImportError: /tmp/build/perf/python/perf.cpython-39-x86_64-linux-gnu.so: undefined symbol: bigendian
>test child finished with -1
>---- end ----
>'import perf' in python: FAILED!
>[acme@quaco perf]$
>
>> tools/perf/util/evsel.c | 74 +++++++++++++++++++++++++++++++++++++++--
>> tools/perf/util/evsel.h | 13 ++++++++
>> 2 files changed, 85 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
>> index dbfeceb2546c..746e642d4d32 100644
>> --- a/tools/perf/util/evsel.c
>> +++ b/tools/perf/util/evsel.c
>> @@ -2221,6 +2221,51 @@ void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
>> data->weight = *array;
>> }
>>
>> +u64 evsel__bitfield_swap_branch_flags(u64 value)
>> +{
>> + u64 new_val = 0;
>> +
>> + /*
>> + * branch_flags
>> + * union {
>> + * u64 values;
>> + * struct {
>> + * mispred:1 //target mispredicted
>> + * predicted:1 //target predicted
>> + * in_tx:1 //in transaction
>> + * abort:1 //transaction abort
>> + * cycles:16 //cycle count to last branch
>> + * type:4 //branch type
>> + * reserved:40
>> + * }
>> + * }
>> + *
>> + * Avoid bswap64() the entire branch_flag.value,
>> + * as it has variable bit-field sizes. Instead the
>> + * macro takes the bit-field position/size,
>> + * swaps it based on the host endianness.
>> + */
>> + if (bigendian()) {
>> + new_val = bitfield_swap(value, 0, 1);
>> + new_val |= bitfield_swap(value, 1, 1);
>> + new_val |= bitfield_swap(value, 2, 1);
>> + new_val |= bitfield_swap(value, 3, 1);
>> + new_val |= bitfield_swap(value, 4, 16);
>> + new_val |= bitfield_swap(value, 20, 4);
>> + new_val |= bitfield_swap(value, 24, 40);
>> + } else {
>> + new_val = bitfield_swap(value, 63, 1);
>> + new_val |= bitfield_swap(value, 62, 1);
>> + new_val |= bitfield_swap(value, 61, 1);
>> + new_val |= bitfield_swap(value, 60, 1);
>> + new_val |= bitfield_swap(value, 44, 16);
>> + new_val |= bitfield_swap(value, 40, 4);
>> + new_val |= bitfield_swap(value, 0, 40);
>> + }
>> +
>> + return new_val;
>> +}
>> +
>> int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>> struct perf_sample *data)
>> {
>> @@ -2408,6 +2453,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>> if (type & PERF_SAMPLE_BRANCH_STACK) {
>> const u64 max_branch_nr = UINT64_MAX /
>> sizeof(struct branch_entry);
>> + struct branch_entry *e;
>> + unsigned int i;
>>
>> OVERFLOW_CHECK_u64(array);
>> data->branch_stack = (struct branch_stack *)array++;
>> @@ -2416,10 +2463,33 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>> return -EFAULT;
>>
>> sz = data->branch_stack->nr * sizeof(struct branch_entry);
>> - if (evsel__has_branch_hw_idx(evsel))
>> + if (evsel__has_branch_hw_idx(evsel)) {
>> sz += sizeof(u64);
>> - else
>> + e = &data->branch_stack->entries[0];
>> + } else {
>> data->no_hw_idx = true;
>> + /*
>> + * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
>> + * only nr and entries[] will be output by kernel.
>> + */
>> + e = (struct branch_entry *)&data->branch_stack->hw_idx;
>> + }
>> +
>> + if (swapped) {
>> + /*
>> + * struct branch_flag does not have endian
>> + * specific bit field definition. And bswap
>> + * will not resolve the issue, since these
>> + * are bit fields.
>> + *
>> + * evsel__bitfield_swap_branch_flags() uses a
>> + * bitfield_swap macro to swap the bit position
>> + * based on the host endians.
>> + */
>> + for (i = 0; i < data->branch_stack->nr; i++, e++)
>> + e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
>> + }
>> +
>> OVERFLOW_CHECK(array, sz, max_size);
>> array = (void *)array + sz;
>> }
>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>> index 1f7edfa8568a..2e82cdbe2c08 100644
>> --- a/tools/perf/util/evsel.h
>> +++ b/tools/perf/util/evsel.h
>> @@ -482,4 +482,17 @@ struct evsel *evsel__leader(struct evsel *evsel);
>> bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
>> bool evsel__is_leader(struct evsel *evsel);
>> void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
>> +
>> +/*
>> + * Macro to swap the bit-field postition and size.
>> + * Used when,
>> + * - dont need to swap the entire u64 &&
>> + * - when u64 has variable bit-field sizes &&
>> + * - when presented in a host endian which is different
>> + * than the source endian of the perf.data file
>> + */
>> +#define bitfield_swap(src, pos, size) \
>> + ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
>> +
>> +u64 evsel__bitfield_swap_branch_flags(u64 value);
>> #endif /* __PERF_EVSEL_H */
>> --
>> 2.31.1
>
On 10/28/21 8:07 AM, Arnaldo Carvalho de Melo wrote:
>
> On October 27, 2021 9:16:51 PM GMT-03:00, Arnaldo Carvalho de Melo <[email protected]> wrote:
>> Em Sat, Oct 16, 2021 at 06:20:58PM +0530, Madhavan Srinivasan escreveu:
>>> branch_stack struct has bit field definition which
>>> produces different bit ordering for big/little endian.
>>> Because of this, when branch_stack sample is collected
>>> in a BE system and viewed/reported in a LE system, bit
>>> fields of the branch stack are not presented properly.
>>> To address this issue, a evsel__bitfield_swap_branch_stack()
>>> is defined and introduced in evsel__parse_sample.
>>>
>>> Signed-off-by: Madhavan Srinivasan <[email protected]>
>>> ---
>>> Changelog v1:
>>> - Renamed function and macro
>>> - Added comments in code
> Please, run 'perf test' before submitting patches, it's not that difficult, please do so.
I did. I ran 'perf test' and did not see any fail. I did not do 'perf
test python', will check that out. My bad.
Maddy
>
> - Arnaldo
>
>> [acme@quaco perf]$ perf test python
>> 19: 'import perf' in python : FAILED!
>> [acme@quaco perf]$ perf test -v python
>> Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc
>> 19: 'import perf' in python :
>> --- start ---
>> test child forked, pid 991284
>> python usage test: "echo "import sys ; sys.path.append('/tmp/build/perf/python'); import perf" | '/usr/bin/python3' "
>> Traceback (most recent call last):
>> File "<stdin>", line 1, in <module>
>> ImportError: /tmp/build/perf/python/perf.cpython-39-x86_64-linux-gnu.so: undefined symbol: bigendian
>> test child finished with -1
>> ---- end ----
>> 'import perf' in python: FAILED!
>> [acme@quaco perf]$
>>
>>> tools/perf/util/evsel.c | 74 +++++++++++++++++++++++++++++++++++++++--
>>> tools/perf/util/evsel.h | 13 ++++++++
>>> 2 files changed, 85 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
>>> index dbfeceb2546c..746e642d4d32 100644
>>> --- a/tools/perf/util/evsel.c
>>> +++ b/tools/perf/util/evsel.c
>>> @@ -2221,6 +2221,51 @@ void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
>>> data->weight = *array;
>>> }
>>>
>>> +u64 evsel__bitfield_swap_branch_flags(u64 value)
>>> +{
>>> + u64 new_val = 0;
>>> +
>>> + /*
>>> + * branch_flags
>>> + * union {
>>> + * u64 values;
>>> + * struct {
>>> + * mispred:1 //target mispredicted
>>> + * predicted:1 //target predicted
>>> + * in_tx:1 //in transaction
>>> + * abort:1 //transaction abort
>>> + * cycles:16 //cycle count to last branch
>>> + * type:4 //branch type
>>> + * reserved:40
>>> + * }
>>> + * }
>>> + *
>>> + * Avoid bswap64() the entire branch_flag.value,
>>> + * as it has variable bit-field sizes. Instead the
>>> + * macro takes the bit-field position/size,
>>> + * swaps it based on the host endianness.
>>> + */
>>> + if (bigendian()) {
>>> + new_val = bitfield_swap(value, 0, 1);
>>> + new_val |= bitfield_swap(value, 1, 1);
>>> + new_val |= bitfield_swap(value, 2, 1);
>>> + new_val |= bitfield_swap(value, 3, 1);
>>> + new_val |= bitfield_swap(value, 4, 16);
>>> + new_val |= bitfield_swap(value, 20, 4);
>>> + new_val |= bitfield_swap(value, 24, 40);
>>> + } else {
>>> + new_val = bitfield_swap(value, 63, 1);
>>> + new_val |= bitfield_swap(value, 62, 1);
>>> + new_val |= bitfield_swap(value, 61, 1);
>>> + new_val |= bitfield_swap(value, 60, 1);
>>> + new_val |= bitfield_swap(value, 44, 16);
>>> + new_val |= bitfield_swap(value, 40, 4);
>>> + new_val |= bitfield_swap(value, 0, 40);
>>> + }
>>> +
>>> + return new_val;
>>> +}
>>> +
>>> int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>>> struct perf_sample *data)
>>> {
>>> @@ -2408,6 +2453,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>>> if (type & PERF_SAMPLE_BRANCH_STACK) {
>>> const u64 max_branch_nr = UINT64_MAX /
>>> sizeof(struct branch_entry);
>>> + struct branch_entry *e;
>>> + unsigned int i;
>>>
>>> OVERFLOW_CHECK_u64(array);
>>> data->branch_stack = (struct branch_stack *)array++;
>>> @@ -2416,10 +2463,33 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>>> return -EFAULT;
>>>
>>> sz = data->branch_stack->nr * sizeof(struct branch_entry);
>>> - if (evsel__has_branch_hw_idx(evsel))
>>> + if (evsel__has_branch_hw_idx(evsel)) {
>>> sz += sizeof(u64);
>>> - else
>>> + e = &data->branch_stack->entries[0];
>>> + } else {
>>> data->no_hw_idx = true;
>>> + /*
>>> + * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
>>> + * only nr and entries[] will be output by kernel.
>>> + */
>>> + e = (struct branch_entry *)&data->branch_stack->hw_idx;
>>> + }
>>> +
>>> + if (swapped) {
>>> + /*
>>> + * struct branch_flag does not have endian
>>> + * specific bit field definition. And bswap
>>> + * will not resolve the issue, since these
>>> + * are bit fields.
>>> + *
>>> + * evsel__bitfield_swap_branch_flags() uses a
>>> + * bitfield_swap macro to swap the bit position
>>> + * based on the host endians.
>>> + */
>>> + for (i = 0; i < data->branch_stack->nr; i++, e++)
>>> + e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
>>> + }
>>> +
>>> OVERFLOW_CHECK(array, sz, max_size);
>>> array = (void *)array + sz;
>>> }
>>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>>> index 1f7edfa8568a..2e82cdbe2c08 100644
>>> --- a/tools/perf/util/evsel.h
>>> +++ b/tools/perf/util/evsel.h
>>> @@ -482,4 +482,17 @@ struct evsel *evsel__leader(struct evsel *evsel);
>>> bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
>>> bool evsel__is_leader(struct evsel *evsel);
>>> void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
>>> +
>>> +/*
>>> + * Macro to swap the bit-field postition and size.
>>> + * Used when,
>>> + * - dont need to swap the entire u64 &&
>>> + * - when u64 has variable bit-field sizes &&
>>> + * - when presented in a host endian which is different
>>> + * than the source endian of the perf.data file
>>> + */
>>> +#define bitfield_swap(src, pos, size) \
>>> + ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
>>> +
>>> +u64 evsel__bitfield_swap_branch_flags(u64 value);
>>> #endif /* __PERF_EVSEL_H */
>>> --
>>> 2.31.1
On 10/28/21 8:07 AM, Arnaldo Carvalho de Melo wrote:
>
> On October 27, 2021 9:16:51 PM GMT-03:00, Arnaldo Carvalho de Melo <[email protected]> wrote:
>> Em Sat, Oct 16, 2021 at 06:20:58PM +0530, Madhavan Srinivasan escreveu:
>>> branch_stack struct has bit field definition which
>>> produces different bit ordering for big/little endian.
>>> Because of this, when branch_stack sample is collected
>>> in a BE system and viewed/reported in a LE system, bit
>>> fields of the branch stack are not presented properly.
>>> To address this issue, a evsel__bitfield_swap_branch_stack()
>>> is defined and introduced in evsel__parse_sample.
>>>
>>> Signed-off-by: Madhavan Srinivasan <[email protected]>
>>> ---
>>> Changelog v1:
>>> - Renamed function and macro
>>> - Added comments in code
> Please, run 'perf test' before submitting patches, it's not that difficult, please do so.
>
> - Arnaldo
>
>> [acme@quaco perf]$ perf test python
>> 19: 'import perf' in python : FAILED!
>> [acme@quaco perf]$ perf test -v python
>> Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc
>> 19: 'import perf' in python :
>> --- start ---
>> test child forked, pid 991284
>> python usage test: "echo "import sys ; sys.path.append('/tmp/build/perf/python'); import perf" | '/usr/bin/python3' "
>> Traceback (most recent call last):
>> File "<stdin>", line 1, in <module>
>> ImportError: /tmp/build/perf/python/perf.cpython-39-x86_64-linux-gnu.so: undefined symbol: bigendian
>> test child finished with -1
using tep_is_bigendian() instead of bigendian() fixes it.
Will send a V3 with that changes.
Maddy
>> ---- end ---- 'import perf' in python: FAILED! [acme@quaco perf]$
>>> tools/perf/util/evsel.c | 74 +++++++++++++++++++++++++++++++++++++++--
>>> tools/perf/util/evsel.h | 13 ++++++++
>>> 2 files changed, 85 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
>>> index dbfeceb2546c..746e642d4d32 100644
>>> --- a/tools/perf/util/evsel.c
>>> +++ b/tools/perf/util/evsel.c
>>> @@ -2221,6 +2221,51 @@ void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
>>> data->weight = *array;
>>> }
>>>
>>> +u64 evsel__bitfield_swap_branch_flags(u64 value)
>>> +{
>>> + u64 new_val = 0;
>>> +
>>> + /*
>>> + * branch_flags
>>> + * union {
>>> + * u64 values;
>>> + * struct {
>>> + * mispred:1 //target mispredicted
>>> + * predicted:1 //target predicted
>>> + * in_tx:1 //in transaction
>>> + * abort:1 //transaction abort
>>> + * cycles:16 //cycle count to last branch
>>> + * type:4 //branch type
>>> + * reserved:40
>>> + * }
>>> + * }
>>> + *
>>> + * Avoid bswap64() the entire branch_flag.value,
>>> + * as it has variable bit-field sizes. Instead the
>>> + * macro takes the bit-field position/size,
>>> + * swaps it based on the host endianness.
>>> + */
>>> + if (bigendian()) {
>>> + new_val = bitfield_swap(value, 0, 1);
>>> + new_val |= bitfield_swap(value, 1, 1);
>>> + new_val |= bitfield_swap(value, 2, 1);
>>> + new_val |= bitfield_swap(value, 3, 1);
>>> + new_val |= bitfield_swap(value, 4, 16);
>>> + new_val |= bitfield_swap(value, 20, 4);
>>> + new_val |= bitfield_swap(value, 24, 40);
>>> + } else {
>>> + new_val = bitfield_swap(value, 63, 1);
>>> + new_val |= bitfield_swap(value, 62, 1);
>>> + new_val |= bitfield_swap(value, 61, 1);
>>> + new_val |= bitfield_swap(value, 60, 1);
>>> + new_val |= bitfield_swap(value, 44, 16);
>>> + new_val |= bitfield_swap(value, 40, 4);
>>> + new_val |= bitfield_swap(value, 0, 40);
>>> + }
>>> +
>>> + return new_val;
>>> +}
>>> +
>>> int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>>> struct perf_sample *data)
>>> {
>>> @@ -2408,6 +2453,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>>> if (type & PERF_SAMPLE_BRANCH_STACK) {
>>> const u64 max_branch_nr = UINT64_MAX /
>>> sizeof(struct branch_entry);
>>> + struct branch_entry *e;
>>> + unsigned int i;
>>>
>>> OVERFLOW_CHECK_u64(array);
>>> data->branch_stack = (struct branch_stack *)array++;
>>> @@ -2416,10 +2463,33 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>>> return -EFAULT;
>>>
>>> sz = data->branch_stack->nr * sizeof(struct branch_entry);
>>> - if (evsel__has_branch_hw_idx(evsel))
>>> + if (evsel__has_branch_hw_idx(evsel)) {
>>> sz += sizeof(u64);
>>> - else
>>> + e = &data->branch_stack->entries[0];
>>> + } else {
>>> data->no_hw_idx = true;
>>> + /*
>>> + * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
>>> + * only nr and entries[] will be output by kernel.
>>> + */
>>> + e = (struct branch_entry *)&data->branch_stack->hw_idx;
>>> + }
>>> +
>>> + if (swapped) {
>>> + /*
>>> + * struct branch_flag does not have endian
>>> + * specific bit field definition. And bswap
>>> + * will not resolve the issue, since these
>>> + * are bit fields.
>>> + *
>>> + * evsel__bitfield_swap_branch_flags() uses a
>>> + * bitfield_swap macro to swap the bit position
>>> + * based on the host endians.
>>> + */
>>> + for (i = 0; i < data->branch_stack->nr; i++, e++)
>>> + e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
>>> + }
>>> +
>>> OVERFLOW_CHECK(array, sz, max_size);
>>> array = (void *)array + sz;
>>> }
>>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>>> index 1f7edfa8568a..2e82cdbe2c08 100644
>>> --- a/tools/perf/util/evsel.h
>>> +++ b/tools/perf/util/evsel.h
>>> @@ -482,4 +482,17 @@ struct evsel *evsel__leader(struct evsel *evsel);
>>> bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
>>> bool evsel__is_leader(struct evsel *evsel);
>>> void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
>>> +
>>> +/*
>>> + * Macro to swap the bit-field postition and size.
>>> + * Used when,
>>> + * - dont need to swap the entire u64 &&
>>> + * - when u64 has variable bit-field sizes &&
>>> + * - when presented in a host endian which is different
>>> + * than the source endian of the perf.data file
>>> + */
>>> +#define bitfield_swap(src, pos, size) \
>>> + ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
>>> +
>>> +u64 evsel__bitfield_swap_branch_flags(u64 value);
>>> #endif /* __PERF_EVSEL_H */
>>> --
>>> 2.31.1