2018-11-07 11:05:18

by Robert Walker

[permalink] [raw]
Subject: [PATCH v4] perf: Support for Arm A32/T32 instruction sets in CoreSight trace

This patch adds support for generating instruction samples from trace of
AArch32 programs using the A32 and T32 instruction sets.

T32 has variable 2 or 4 byte instruction size, so the conversion between
addresses and instruction counts requires extra information from the trace
decoder, requiring version 0.10.0 of OpenCSD. A check for the OpenCSD
library version has been added to the feature check for OpenCSD.

Signed-off-by: Robert Walker <[email protected]>
---
tools/build/feature/test-libopencsd.c | 8 +++
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 29 ++++++++++
tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 ++++
tools/perf/util/cs-etm.c | 71 +++++++++++--------------
4 files changed, 78 insertions(+), 40 deletions(-)

diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
index 5ff1246..d68eb4f 100644
--- a/tools/build/feature/test-libopencsd.c
+++ b/tools/build/feature/test-libopencsd.c
@@ -1,6 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <opencsd/c_api/opencsd_c_api.h>

+/*
+ * Check OpenCSD library version is sufficient to provide required features
+ */
+#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0))
+#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
+#error "OpenCSD >= 0.10.0 is required"
+#endif
+
int main(void)
{
(void)ocsd_get_version();
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 938def6..5efb616 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -263,9 +263,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
decoder->tail = 0;
decoder->packet_count = 0;
for (i = 0; i < MAX_BUFFER; i++) {
+ decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
+ decoder->packet_buffer[i].instr_count = 0;
decoder->packet_buffer[i].last_instr_taken_branch = false;
+ decoder->packet_buffer[i].last_instr_size = 0;
decoder->packet_buffer[i].exc = false;
decoder->packet_buffer[i].exc_ret = false;
decoder->packet_buffer[i].cpu = INT_MIN;
@@ -294,11 +297,15 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
decoder->packet_count++;

decoder->packet_buffer[et].sample_type = sample_type;
+ decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
decoder->packet_buffer[et].exc = false;
decoder->packet_buffer[et].exc_ret = false;
decoder->packet_buffer[et].cpu = *((int *)inode->priv);
decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
+ decoder->packet_buffer[et].instr_count = 0;
+ decoder->packet_buffer[et].last_instr_taken_branch = false;
+ decoder->packet_buffer[et].last_instr_size = 0;

if (decoder->packet_count == MAX_BUFFER - 1)
return OCSD_RESP_WAIT;
@@ -321,8 +328,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,

packet = &decoder->packet_buffer[decoder->tail];

+ switch (elem->isa) {
+ case ocsd_isa_aarch64:
+ packet->isa = CS_ETM_ISA_A64;
+ break;
+ case ocsd_isa_arm:
+ packet->isa = CS_ETM_ISA_A32;
+ break;
+ case ocsd_isa_thumb2:
+ packet->isa = CS_ETM_ISA_T32;
+ break;
+ case ocsd_isa_tee:
+ case ocsd_isa_jazelle:
+ case ocsd_isa_custom:
+ case ocsd_isa_unknown:
+ default:
+ packet->isa = CS_ETM_ISA_UNKNOWN;
+ }
+
packet->start_addr = elem->st_addr;
packet->end_addr = elem->en_addr;
+ packet->instr_count = elem->num_instr_range;
+
switch (elem->last_i_type) {
case OCSD_INSTR_BR:
case OCSD_INSTR_BR_INDIRECT:
@@ -336,6 +363,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
break;
}

+ packet->last_instr_size = elem->last_instr_sz;
+
return ret;
}

diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 612b575..9351bd1 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -28,11 +28,21 @@ enum cs_etm_sample_type {
CS_ETM_TRACE_ON = 1 << 1,
};

+enum cs_etm_isa {
+ CS_ETM_ISA_UNKNOWN,
+ CS_ETM_ISA_A64,
+ CS_ETM_ISA_A32,
+ CS_ETM_ISA_T32,
+};
+
struct cs_etm_packet {
enum cs_etm_sample_type sample_type;
+ enum cs_etm_isa isa;
u64 start_addr;
u64 end_addr;
+ u32 instr_count;
u8 last_instr_taken_branch;
+ u8 last_instr_size;
u8 exc;
u8 exc_ret;
int cpu;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 73430b7..cad133c 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -31,14 +31,6 @@

#define MAX_TIMESTAMP (~0ULL)

-/*
- * A64 instructions are always 4 bytes
- *
- * Only A64 is supported, so can use this constant for converting between
- * addresses and instruction counts, calculting offsets etc
- */
-#define A64_INSTR_SIZE 4
-
struct cs_etm_auxtrace {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
@@ -510,21 +502,16 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
etmq->last_branch_rb->nr = 0;
}

-static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
-{
- /* Returns 0 for the CS_ETM_TRACE_ON packet */
- if (packet->sample_type == CS_ETM_TRACE_ON)
- return 0;
+static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
+ u64 addr) {
+ u8 instrBytes[2];

- /*
- * The packet records the execution range with an exclusive end address
- *
- * A64 instructions are constant size, so the last executed
- * instruction is A64_INSTR_SIZE before the end address
- * Will need to do instruction level decode for T32 instructions as
- * they can be variable size (not yet supported).
+ cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
+ /* T32 instruction size is indicated by bits[15:11] of the first
+ * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
+ * denote a 32-bit instruction.
*/
- return packet->end_addr - A64_INSTR_SIZE;
+ return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
}

static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
@@ -536,27 +523,32 @@ static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
return packet->start_addr;
}

-static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
+static inline
+u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
{
- /*
- * Only A64 instructions are currently supported, so can get
- * instruction count by dividing.
- * Will need to do instruction level decode for T32 instructions as
- * they can be variable size (not yet supported).
- */
- return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
+ /* Returns 0 for the CS_ETM_TRACE_ON packet */
+ if (packet->sample_type == CS_ETM_TRACE_ON)
+ return 0;
+
+ return packet->end_addr - packet->last_instr_size;
}

-static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
+static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
+ const struct cs_etm_packet *packet,
u64 offset)
{
- /*
- * Only A64 instructions are currently supported, so can get
- * instruction address by muliplying.
- * Will need to do instruction level decode for T32 instructions as
- * they can be variable size (not yet supported).
- */
- return packet->start_addr + offset * A64_INSTR_SIZE;
+ if (packet->isa == CS_ETM_ISA_T32) {
+ u64 addr = packet->start_addr;
+
+ while (offset > 0) {
+ addr += cs_etm__t32_instr_size(etmq, addr);
+ offset--;
+ }
+ return addr;
+ }
+
+ /* Assume a 4 byte instruction size (A32/A64) */
+ return packet->start_addr + offset * 4;
}

static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
@@ -888,9 +880,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
struct cs_etm_auxtrace *etm = etmq->etm;
struct cs_etm_packet *tmp;
int ret;
- u64 instrs_executed;
+ u64 instrs_executed = etmq->packet->instr_count;

- instrs_executed = cs_etm__instr_count(etmq->packet);
etmq->period_instructions += instrs_executed;

/*
@@ -920,7 +911,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
* executed, but PC has not advanced to next instruction)
*/
u64 offset = (instrs_executed - instrs_over - 1);
- u64 addr = cs_etm__instr_addr(etmq->packet, offset);
+ u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset);

ret = cs_etm__synth_instruction_sample(
etmq, addr, etm->instructions_sample_period);
--
2.7.4



2018-11-08 20:51:14

by Mathieu Poirier

[permalink] [raw]
Subject: Re: [PATCH v4] perf: Support for Arm A32/T32 instruction sets in CoreSight trace

Hi Robert,

On Wed, Nov 07, 2018 at 11:04:12AM +0000, Robert Walker wrote:
> This patch adds support for generating instruction samples from trace of
> AArch32 programs using the A32 and T32 instruction sets.
>
> T32 has variable 2 or 4 byte instruction size, so the conversion between
> addresses and instruction counts requires extra information from the trace
> decoder, requiring version 0.10.0 of OpenCSD. A check for the OpenCSD
> library version has been added to the feature check for OpenCSD.
>
> Signed-off-by: Robert Walker <[email protected]>
> ---
> tools/build/feature/test-libopencsd.c | 8 +++
> tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 29 ++++++++++
> tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 ++++
> tools/perf/util/cs-etm.c | 71 +++++++++++--------------
> 4 files changed, 78 insertions(+), 40 deletions(-)
>
> diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
> index 5ff1246..d68eb4f 100644
> --- a/tools/build/feature/test-libopencsd.c
> +++ b/tools/build/feature/test-libopencsd.c
> @@ -1,6 +1,14 @@
> // SPDX-License-Identifier: GPL-2.0
> #include <opencsd/c_api/opencsd_c_api.h>
>
> +/*
> + * Check OpenCSD library version is sufficient to provide required features
> + */
> +#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0))
> +#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
> +#error "OpenCSD >= 0.10.0 is required"
> +#endif
> +
> int main(void)
> {
> (void)ocsd_get_version();
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> index 938def6..5efb616 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> @@ -263,9 +263,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
> decoder->tail = 0;
> decoder->packet_count = 0;
> for (i = 0; i < MAX_BUFFER; i++) {
> + decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
> decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
> decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
> + decoder->packet_buffer[i].instr_count = 0;
> decoder->packet_buffer[i].last_instr_taken_branch = false;
> + decoder->packet_buffer[i].last_instr_size = 0;
> decoder->packet_buffer[i].exc = false;
> decoder->packet_buffer[i].exc_ret = false;
> decoder->packet_buffer[i].cpu = INT_MIN;
> @@ -294,11 +297,15 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
> decoder->packet_count++;
>
> decoder->packet_buffer[et].sample_type = sample_type;
> + decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
> decoder->packet_buffer[et].exc = false;
> decoder->packet_buffer[et].exc_ret = false;
> decoder->packet_buffer[et].cpu = *((int *)inode->priv);
> decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
> decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
> + decoder->packet_buffer[et].instr_count = 0;
> + decoder->packet_buffer[et].last_instr_taken_branch = false;
> + decoder->packet_buffer[et].last_instr_size = 0;
>
> if (decoder->packet_count == MAX_BUFFER - 1)
> return OCSD_RESP_WAIT;
> @@ -321,8 +328,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
>
> packet = &decoder->packet_buffer[decoder->tail];
>
> + switch (elem->isa) {
> + case ocsd_isa_aarch64:
> + packet->isa = CS_ETM_ISA_A64;
> + break;
> + case ocsd_isa_arm:
> + packet->isa = CS_ETM_ISA_A32;
> + break;
> + case ocsd_isa_thumb2:
> + packet->isa = CS_ETM_ISA_T32;
> + break;
> + case ocsd_isa_tee:
> + case ocsd_isa_jazelle:
> + case ocsd_isa_custom:
> + case ocsd_isa_unknown:
> + default:
> + packet->isa = CS_ETM_ISA_UNKNOWN;
> + }
> +
> packet->start_addr = elem->st_addr;
> packet->end_addr = elem->en_addr;
> + packet->instr_count = elem->num_instr_range;
> +
> switch (elem->last_i_type) {
> case OCSD_INSTR_BR:
> case OCSD_INSTR_BR_INDIRECT:
> @@ -336,6 +363,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
> break;
> }
>
> + packet->last_instr_size = elem->last_instr_sz;
> +
> return ret;
> }
>
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> index 612b575..9351bd1 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> @@ -28,11 +28,21 @@ enum cs_etm_sample_type {
> CS_ETM_TRACE_ON = 1 << 1,
> };
>
> +enum cs_etm_isa {
> + CS_ETM_ISA_UNKNOWN,
> + CS_ETM_ISA_A64,
> + CS_ETM_ISA_A32,
> + CS_ETM_ISA_T32,
> +};
> +
> struct cs_etm_packet {
> enum cs_etm_sample_type sample_type;
> + enum cs_etm_isa isa;
> u64 start_addr;
> u64 end_addr;
> + u32 instr_count;
> u8 last_instr_taken_branch;
> + u8 last_instr_size;
> u8 exc;
> u8 exc_ret;
> int cpu;
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 73430b7..cad133c 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -31,14 +31,6 @@
>
> #define MAX_TIMESTAMP (~0ULL)
>
> -/*
> - * A64 instructions are always 4 bytes
> - *
> - * Only A64 is supported, so can use this constant for converting between
> - * addresses and instruction counts, calculting offsets etc
> - */
> -#define A64_INSTR_SIZE 4
> -
> struct cs_etm_auxtrace {
> struct auxtrace auxtrace;
> struct auxtrace_queues queues;
> @@ -510,21 +502,16 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
> etmq->last_branch_rb->nr = 0;
> }
>
> -static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
> -{
> - /* Returns 0 for the CS_ETM_TRACE_ON packet */
> - if (packet->sample_type == CS_ETM_TRACE_ON)
> - return 0;
> +static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
> + u64 addr) {
> + u8 instrBytes[2];
>
> - /*
> - * The packet records the execution range with an exclusive end address
> - *
> - * A64 instructions are constant size, so the last executed
> - * instruction is A64_INSTR_SIZE before the end address
> - * Will need to do instruction level decode for T32 instructions as
> - * they can be variable size (not yet supported).
> + cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
> + /* T32 instruction size is indicated by bits[15:11] of the first
> + * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
> + * denote a 32-bit instruction.
> */

Event if checkpatch doesn't complain the opening of a multi-line comment should
sit on a line by itself, like below:


/*
* T32 instruction size is indicated by bits[15:11] of the first
* 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
* denote a 32-bit instruction.
*/

Please address and resend.

Thanks,
Mathieu

> - return packet->end_addr - A64_INSTR_SIZE;
> + return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
> }
>
> static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
> @@ -536,27 +523,32 @@ static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
> return packet->start_addr;
> }
>
> -static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
> +static inline
> +u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
> {
> - /*
> - * Only A64 instructions are currently supported, so can get
> - * instruction count by dividing.
> - * Will need to do instruction level decode for T32 instructions as
> - * they can be variable size (not yet supported).
> - */
> - return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
> + /* Returns 0 for the CS_ETM_TRACE_ON packet */
> + if (packet->sample_type == CS_ETM_TRACE_ON)
> + return 0;
> +
> + return packet->end_addr - packet->last_instr_size;
> }
>
> -static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
> +static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
> + const struct cs_etm_packet *packet,
> u64 offset)
> {
> - /*
> - * Only A64 instructions are currently supported, so can get
> - * instruction address by muliplying.
> - * Will need to do instruction level decode for T32 instructions as
> - * they can be variable size (not yet supported).
> - */
> - return packet->start_addr + offset * A64_INSTR_SIZE;
> + if (packet->isa == CS_ETM_ISA_T32) {
> + u64 addr = packet->start_addr;
> +
> + while (offset > 0) {
> + addr += cs_etm__t32_instr_size(etmq, addr);
> + offset--;
> + }
> + return addr;
> + }
> +
> + /* Assume a 4 byte instruction size (A32/A64) */
> + return packet->start_addr + offset * 4;
> }
>
> static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
> @@ -888,9 +880,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
> struct cs_etm_auxtrace *etm = etmq->etm;
> struct cs_etm_packet *tmp;
> int ret;
> - u64 instrs_executed;
> + u64 instrs_executed = etmq->packet->instr_count;
>
> - instrs_executed = cs_etm__instr_count(etmq->packet);
> etmq->period_instructions += instrs_executed;
>
> /*
> @@ -920,7 +911,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
> * executed, but PC has not advanced to next instruction)
> */
> u64 offset = (instrs_executed - instrs_over - 1);
> - u64 addr = cs_etm__instr_addr(etmq->packet, offset);
> + u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset);
>
> ret = cs_etm__synth_instruction_sample(
> etmq, addr, etm->instructions_sample_period);
> --
> 2.7.4
>

2018-11-09 02:38:51

by Leo Yan

[permalink] [raw]
Subject: Re: [PATCH v4] perf: Support for Arm A32/T32 instruction sets in CoreSight trace

On Wed, Nov 07, 2018 at 11:04:12AM +0000, Robert Walker wrote:
> This patch adds support for generating instruction samples from trace of
> AArch32 programs using the A32 and T32 instruction sets.
>
> T32 has variable 2 or 4 byte instruction size, so the conversion between
> addresses and instruction counts requires extra information from the trace
> decoder, requiring version 0.10.0 of OpenCSD. A check for the OpenCSD
> library version has been added to the feature check for OpenCSD.

I have applied this patch on latest mainline kernel and tested for
A64/A32/T32 insntructions.

Tested-by: Leo Yan <[email protected]>