2023-06-15 13:41:14

by Anshuman Khandual

[permalink] [raw]
Subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU

This enables support for branch stack sampling event in ARMV8 PMU, checking
has_branch_stack() on the event inside 'struct arm_pmu' callbacks. Although
these branch stack helpers armv8pmu_branch_XXXXX() are just dummy functions
for now. While here, this also defines arm_pmu's sched_task() callback with
armv8pmu_sched_task(), which resets the branch record buffer on a sched_in.

Cc: Catalin Marinas <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Mark Rutland <[email protected]>
Cc: [email protected]
Cc: [email protected]
Tested-by: James Clark <[email protected]>
Signed-off-by: Anshuman Khandual <[email protected]>
---
arch/arm64/include/asm/perf_event.h | 31 +++++++++++
drivers/perf/arm_pmuv3.c | 86 +++++++++++++++++++++--------
2 files changed, 93 insertions(+), 24 deletions(-)

diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index eb7071c9eb34..ebc392ba3559 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -24,4 +24,35 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
(regs)->pstate = PSR_MODE_EL1h; \
}

+struct pmu_hw_events;
+struct arm_pmu;
+struct perf_event;
+
+#ifdef CONFIG_PERF_EVENTS
+static inline bool has_branch_stack(struct perf_event *event);
+
+static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event)
+{
+ WARN_ON_ONCE(!has_branch_stack(event));
+}
+
+static inline bool armv8pmu_branch_attr_valid(struct perf_event *event)
+{
+ WARN_ON_ONCE(!has_branch_stack(event));
+ return false;
+}
+
+static inline void armv8pmu_branch_enable(struct perf_event *event)
+{
+ WARN_ON_ONCE(!has_branch_stack(event));
+}
+
+static inline void armv8pmu_branch_disable(struct perf_event *event)
+{
+ WARN_ON_ONCE(!has_branch_stack(event));
+}
+
+static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) { }
+static inline void armv8pmu_branch_reset(void) { }
+#endif
#endif
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index c98e4039386d..54c80f393eb6 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -705,38 +705,21 @@ static void armv8pmu_enable_event(struct perf_event *event)
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
-
- /*
- * Disable counter
- */
armv8pmu_disable_event_counter(event);
-
- /*
- * Set event.
- */
armv8pmu_write_event_type(event);
-
- /*
- * Enable interrupt for this counter
- */
armv8pmu_enable_event_irq(event);
-
- /*
- * Enable counter
- */
armv8pmu_enable_event_counter(event);
+
+ if (has_branch_stack(event))
+ armv8pmu_branch_enable(event);
}

static void armv8pmu_disable_event(struct perf_event *event)
{
- /*
- * Disable counter
- */
- armv8pmu_disable_event_counter(event);
+ if (has_branch_stack(event))
+ armv8pmu_branch_disable(event);

- /*
- * Disable interrupt for this counter
- */
+ armv8pmu_disable_event_counter(event);
armv8pmu_disable_event_irq(event);
}

@@ -814,6 +797,11 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
if (!armpmu_event_set_period(event))
continue;

+ if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) {
+ armv8pmu_branch_read(cpuc, event);
+ perf_sample_save_brstack(&data, event, &cpuc->branches->branch_stack);
+ }
+
/*
* Perf event overflow will queue the processing of the event as
* an irq_work which will be taken care of in the handling of
@@ -912,6 +900,14 @@ static int armv8pmu_user_event_idx(struct perf_event *event)
return event->hw.idx;
}

+static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+{
+ struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu);
+
+ if (sched_in && armpmu->has_branch_stack)
+ armv8pmu_branch_reset();
+}
+
/*
* Add an event filter to a given event.
*/
@@ -982,6 +978,9 @@ static void armv8pmu_reset(void *info)
pmcr |= ARMV8_PMU_PMCR_LP;

armv8pmu_pmcr_write(pmcr);
+
+ if (cpu_pmu->has_branch_stack)
+ armv8pmu_branch_reset();
}

static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu,
@@ -1019,6 +1018,9 @@ static int __armv8_pmuv3_map_event(struct perf_event *event,

hw_event_id = __armv8_pmuv3_map_event_id(armpmu, event);

+ if (has_branch_stack(event) && !armv8pmu_branch_attr_valid(event))
+ return -EOPNOTSUPP;
+
/*
* CHAIN events only work when paired with an adjacent counter, and it
* never makes sense for a user to open one in isolation, as they'll be
@@ -1135,6 +1137,33 @@ static void __armv8pmu_probe_pmu(void *info)
cpu_pmu->reg_pmmir = read_pmmir();
else
cpu_pmu->reg_pmmir = 0;
+ armv8pmu_branch_probe(cpu_pmu);
+}
+
+static int branch_records_alloc(struct arm_pmu *armpmu)
+{
+ struct branch_records __percpu *records;
+ int cpu;
+
+ records = alloc_percpu_gfp(struct branch_records, GFP_KERNEL);
+ if (!records)
+ return -ENOMEM;
+
+ /*
+ * FIXME: Memory allocated via records gets completely
+ * consumed here, never required to be freed up later. Hence
+ * losing access to on stack 'records' is acceptable.
+ * Otherwise this alloc handle has to be saved some where.
+ */
+ for_each_possible_cpu(cpu) {
+ struct pmu_hw_events *events_cpu;
+ struct branch_records *records_cpu;
+
+ events_cpu = per_cpu_ptr(armpmu->hw_events, cpu);
+ records_cpu = per_cpu_ptr(records, cpu);
+ events_cpu->branches = records_cpu;
+ }
+ return 0;
}

static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
@@ -1151,7 +1180,15 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
if (ret)
return ret;

- return probe.present ? 0 : -ENODEV;
+ if (!probe.present)
+ return -ENODEV;
+
+ if (cpu_pmu->has_branch_stack) {
+ ret = branch_records_alloc(cpu_pmu);
+ if (ret)
+ return ret;
+ }
+ return 0;
}

static void armv8pmu_disable_user_access_ipi(void *unused)
@@ -1214,6 +1251,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;

cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx;
+ cpu_pmu->sched_task = armv8pmu_sched_task;

cpu_pmu->name = name;
cpu_pmu->map_event = map_event;
--
2.25.1



2023-06-16 00:18:30

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU

Hi Anshuman,

kernel test robot noticed the following build errors:

[auto build test ERROR on arm64/for-next/core]
[also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/[email protected]/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
reproduce (this is a W=1 build):
mkdir -p ~/bin
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm cross compiling tool for clang build
# apt-get install binutils-arm-linux-gnueabi
git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
git fetch arm64 for-next/core
git checkout arm64/for-next/core
b4 shazam https://lore.kernel.org/r/[email protected]
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

| ^~~~~~
drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
140 | PERF_CACHE_MAP_ALL_UNSUPPORTED,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
| ^~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
35 | #define CACHE_OP_UNSUPPORTED 0xFFFF
| ^~~~~~
drivers/perf/arm_pmuv3.c:147:44: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
147 | [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:133:44: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD'
133 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E
| ^~~~~~
drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
140 | PERF_CACHE_MAP_ALL_UNSUPPORTED,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
| ^~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
35 | #define CACHE_OP_UNSUPPORTED 0xFFFF
| ^~~~~~
drivers/perf/arm_pmuv3.c:148:45: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
148 | [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:134:44: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR'
134 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F
| ^~~~~~
drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
140 | PERF_CACHE_MAP_ALL_UNSUPPORTED,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
| ^~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
35 | #define CACHE_OP_UNSUPPORTED 0xFFFF
| ^~~~~~
drivers/perf/arm_pmuv3.c:149:42: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
149 | [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:131:50: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD'
131 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C
| ^~~~~~
drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
140 | PERF_CACHE_MAP_ALL_UNSUPPORTED,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
| ^~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
35 | #define CACHE_OP_UNSUPPORTED 0xFFFF
| ^~~~~~
drivers/perf/arm_pmuv3.c:150:43: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
150 | [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:132:50: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR'
132 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D
| ^~~~~~
drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
140 | PERF_CACHE_MAP_ALL_UNSUPPORTED,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
| ^~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
35 | #define CACHE_OP_UNSUPPORTED 0xFFFF
| ^~~~~~
drivers/perf/arm_pmuv3.c:152:44: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
152 | [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:148:46: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD'
148 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060
| ^~~~~~
drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
140 | PERF_CACHE_MAP_ALL_UNSUPPORTED,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
| ^~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
35 | #define CACHE_OP_UNSUPPORTED 0xFFFF
| ^~~~~~
drivers/perf/arm_pmuv3.c:153:45: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
153 | [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:149:46: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR'
149 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061
| ^~~~~~
drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
140 | PERF_CACHE_MAP_ALL_UNSUPPORTED,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
| ^~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
35 | #define CACHE_OP_UNSUPPORTED 0xFFFF
| ^~~~~~
>> drivers/perf/arm_pmuv3.c:714:3: error: call to undeclared function 'armv8pmu_branch_enable'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
714 | armv8pmu_branch_enable(event);
| ^
>> drivers/perf/arm_pmuv3.c:720:3: error: call to undeclared function 'armv8pmu_branch_disable'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
720 | armv8pmu_branch_disable(event);
| ^
>> drivers/perf/arm_pmuv3.c:801:4: error: call to undeclared function 'armv8pmu_branch_read'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
801 | armv8pmu_branch_read(cpuc, event);
| ^
drivers/perf/arm_pmuv3.c:801:4: note: did you mean 'armv8pmu_pmcr_read'?
drivers/perf/arm_pmuv3.c:430:19: note: 'armv8pmu_pmcr_read' declared here
430 | static inline u32 armv8pmu_pmcr_read(void)
| ^
>> drivers/perf/arm_pmuv3.c:908:3: error: call to undeclared function 'armv8pmu_branch_reset'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
908 | armv8pmu_branch_reset();
| ^
drivers/perf/arm_pmuv3.c:983:3: error: call to undeclared function 'armv8pmu_branch_reset'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
983 | armv8pmu_branch_reset();
| ^
>> drivers/perf/arm_pmuv3.c:1021:34: error: call to undeclared function 'armv8pmu_branch_attr_valid'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
1021 | if (has_branch_stack(event) && !armv8pmu_branch_attr_valid(event))
| ^
>> drivers/perf/arm_pmuv3.c:1140:2: error: call to undeclared function 'armv8pmu_branch_probe'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
1140 | armv8pmu_branch_probe(cpu_pmu);
| ^
55 warnings and 7 errors generated.


vim +/armv8pmu_branch_enable +714 drivers/perf/arm_pmuv3.c

701
702 static void armv8pmu_enable_event(struct perf_event *event)
703 {
704 /*
705 * Enable counter and interrupt, and set the counter to count
706 * the event that we're interested in.
707 */
708 armv8pmu_disable_event_counter(event);
709 armv8pmu_write_event_type(event);
710 armv8pmu_enable_event_irq(event);
711 armv8pmu_enable_event_counter(event);
712
713 if (has_branch_stack(event))
> 714 armv8pmu_branch_enable(event);
715 }
716
717 static void armv8pmu_disable_event(struct perf_event *event)
718 {
719 if (has_branch_stack(event))
> 720 armv8pmu_branch_disable(event);
721
722 armv8pmu_disable_event_counter(event);
723 armv8pmu_disable_event_irq(event);
724 }
725
726 static void armv8pmu_start(struct arm_pmu *cpu_pmu)
727 {
728 struct perf_event_context *ctx;
729 int nr_user = 0;
730
731 ctx = perf_cpu_task_ctx();
732 if (ctx)
733 nr_user = ctx->nr_user;
734
735 if (sysctl_perf_user_access && nr_user)
736 armv8pmu_enable_user_access(cpu_pmu);
737 else
738 armv8pmu_disable_user_access();
739
740 /* Enable all counters */
741 armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
742 }
743
744 static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
745 {
746 /* Disable all counters */
747 armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
748 }
749
750 static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
751 {
752 u32 pmovsr;
753 struct perf_sample_data data;
754 struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
755 struct pt_regs *regs;
756 int idx;
757
758 /*
759 * Get and reset the IRQ flags
760 */
761 pmovsr = armv8pmu_getreset_flags();
762
763 /*
764 * Did an overflow occur?
765 */
766 if (!armv8pmu_has_overflowed(pmovsr))
767 return IRQ_NONE;
768
769 /*
770 * Handle the counter(s) overflow(s)
771 */
772 regs = get_irq_regs();
773
774 /*
775 * Stop the PMU while processing the counter overflows
776 * to prevent skews in group events.
777 */
778 armv8pmu_stop(cpu_pmu);
779 for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
780 struct perf_event *event = cpuc->events[idx];
781 struct hw_perf_event *hwc;
782
783 /* Ignore if we don't have an event. */
784 if (!event)
785 continue;
786
787 /*
788 * We have a single interrupt for all counters. Check that
789 * each counter has overflowed before we process it.
790 */
791 if (!armv8pmu_counter_has_overflowed(pmovsr, idx))
792 continue;
793
794 hwc = &event->hw;
795 armpmu_event_update(event);
796 perf_sample_data_init(&data, 0, hwc->last_period);
797 if (!armpmu_event_set_period(event))
798 continue;
799
800 if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) {
> 801 armv8pmu_branch_read(cpuc, event);
802 perf_sample_save_brstack(&data, event, &cpuc->branches->branch_stack);
803 }
804
805 /*
806 * Perf event overflow will queue the processing of the event as
807 * an irq_work which will be taken care of in the handling of
808 * IPI_IRQ_WORK.
809 */
810 if (perf_event_overflow(event, &data, regs))
811 cpu_pmu->disable(event);
812 }
813 armv8pmu_start(cpu_pmu);
814
815 return IRQ_HANDLED;
816 }
817
818 static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
819 struct arm_pmu *cpu_pmu)
820 {
821 int idx;
822
823 for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
824 if (!test_and_set_bit(idx, cpuc->used_mask))
825 return idx;
826 }
827 return -EAGAIN;
828 }
829
830 static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
831 struct arm_pmu *cpu_pmu)
832 {
833 int idx;
834
835 /*
836 * Chaining requires two consecutive event counters, where
837 * the lower idx must be even.
838 */
839 for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) {
840 if (!test_and_set_bit(idx, cpuc->used_mask)) {
841 /* Check if the preceding even counter is available */
842 if (!test_and_set_bit(idx - 1, cpuc->used_mask))
843 return idx;
844 /* Release the Odd counter */
845 clear_bit(idx, cpuc->used_mask);
846 }
847 }
848 return -EAGAIN;
849 }
850
851 static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
852 struct perf_event *event)
853 {
854 struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
855 struct hw_perf_event *hwc = &event->hw;
856 unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
857
858 /* Always prefer to place a cycle counter into the cycle counter. */
859 if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
860 if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
861 return ARMV8_IDX_CYCLE_COUNTER;
862 else if (armv8pmu_event_is_64bit(event) &&
863 armv8pmu_event_want_user_access(event) &&
864 !armv8pmu_has_long_event(cpu_pmu))
865 return -EAGAIN;
866 }
867
868 /*
869 * Otherwise use events counters
870 */
871 if (armv8pmu_event_is_chained(event))
872 return armv8pmu_get_chain_idx(cpuc, cpu_pmu);
873 else
874 return armv8pmu_get_single_idx(cpuc, cpu_pmu);
875 }
876
877 static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc,
878 struct perf_event *event)
879 {
880 int idx = event->hw.idx;
881
882 clear_bit(idx, cpuc->used_mask);
883 if (armv8pmu_event_is_chained(event))
884 clear_bit(idx - 1, cpuc->used_mask);
885 }
886
887 static int armv8pmu_user_event_idx(struct perf_event *event)
888 {
889 if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event))
890 return 0;
891
892 /*
893 * We remap the cycle counter index to 32 to
894 * match the offset applied to the rest of
895 * the counter indices.
896 */
897 if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER)
898 return ARMV8_IDX_CYCLE_COUNTER_USER;
899
900 return event->hw.idx;
901 }
902
903 static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
904 {
905 struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu);
906
907 if (sched_in && armpmu->has_branch_stack)
> 908 armv8pmu_branch_reset();
909 }
910

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-06-16 02:16:27

by Anshuman Khandual

[permalink] [raw]
Subject: Re: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU



On 6/16/23 05:12, kernel test robot wrote:
> Hi Anshuman,
>
> kernel test robot noticed the following build errors:
>
> [auto build test ERROR on arm64/for-next/core]
> [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
> base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
> patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
> patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
> config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/[email protected]/config)
> compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
> reproduce (this is a W=1 build):
> mkdir -p ~/bin
> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # install arm cross compiling tool for clang build
> # apt-get install binutils-arm-linux-gnueabi
> git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
> git fetch arm64 for-next/core
> git checkout arm64/for-next/core
> b4 shazam https://lore.kernel.org/r/[email protected]
> # save the config file
> mkdir build_dir && cp config build_dir/.config
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/

I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler
on a W=1 build. Looking at all other problems reported on the file, it seems
something is not right here. Reported build problems around these callbacks,
i.e armv8pmu_branch_XXXX() do not make sense as they are available via config
CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this
test config.

2023-06-16 04:42:43

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU

Hi Anshuman,

kernel test robot noticed the following build errors:

[auto build test ERROR on arm64/for-next/core]
[also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
config: arm-allmodconfig (https://download.01.org/0day-ci/archive/20230616/[email protected]/config)
compiler: arm-linux-gnueabi-gcc (GCC) 12.3.0
reproduce (this is a W=1 build):
mkdir -p ~/bin
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
git fetch arm64 for-next/core
git checkout arm64/for-next/core
b4 shazam https://lore.kernel.org/r/[email protected]
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

drivers/perf/arm_pmuv3.c:143:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD'
143 | [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:122:65: warning: initialized field overwritten [-Woverride-init]
122 | #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041
| ^~~~~~
drivers/perf/arm_pmuv3.c:144:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR'
144 | [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:122:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[0][1][0]')
122 | #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041
| ^~~~~~
drivers/perf/arm_pmuv3.c:144:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR'
144 | [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:124:65: warning: initialized field overwritten [-Woverride-init]
124 | #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043
| ^~~~~~
drivers/perf/arm_pmuv3.c:145:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR'
145 | [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:124:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[0][1][1]')
124 | #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043
| ^~~~~~
drivers/perf/arm_pmuv3.c:145:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR'
145 | [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:133:65: warning: initialized field overwritten [-Woverride-init]
133 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E
| ^~~~~~
drivers/perf/arm_pmuv3.c:147:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD'
147 | [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:133:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[3][0][0]')
133 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E
| ^~~~~~
drivers/perf/arm_pmuv3.c:147:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD'
147 | [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:134:65: warning: initialized field overwritten [-Woverride-init]
134 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F
| ^~~~~~
drivers/perf/arm_pmuv3.c:148:52: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR'
148 | [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:134:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[3][1][0]')
134 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F
| ^~~~~~
drivers/perf/arm_pmuv3.c:148:52: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR'
148 | [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:131:65: warning: initialized field overwritten [-Woverride-init]
131 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C
| ^~~~~~
drivers/perf/arm_pmuv3.c:149:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD'
149 | [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:131:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[3][0][1]')
131 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C
| ^~~~~~
drivers/perf/arm_pmuv3.c:149:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD'
149 | [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:132:65: warning: initialized field overwritten [-Woverride-init]
132 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D
| ^~~~~~
drivers/perf/arm_pmuv3.c:150:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR'
150 | [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:132:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[3][1][1]')
132 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D
| ^~~~~~
drivers/perf/arm_pmuv3.c:150:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR'
150 | [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:148:65: warning: initialized field overwritten [-Woverride-init]
148 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060
| ^~~~~~
drivers/perf/arm_pmuv3.c:152:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD'
152 | [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:148:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[6][0][0]')
148 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060
| ^~~~~~
drivers/perf/arm_pmuv3.c:152:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD'
152 | [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:149:65: warning: initialized field overwritten [-Woverride-init]
149 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061
| ^~~~~~
drivers/perf/arm_pmuv3.c:153:52: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR'
153 | [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/perf/arm_pmuv3.h:149:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[6][1][0]')
149 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061
| ^~~~~~
drivers/perf/arm_pmuv3.c:153:52: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR'
153 | [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/perf/arm_pmuv3.c: In function 'armv8pmu_enable_event':
>> drivers/perf/arm_pmuv3.c:714:17: error: implicit declaration of function 'armv8pmu_branch_enable'; did you mean 'static_branch_enable'? [-Werror=implicit-function-declaration]
714 | armv8pmu_branch_enable(event);
| ^~~~~~~~~~~~~~~~~~~~~~
| static_branch_enable
drivers/perf/arm_pmuv3.c: In function 'armv8pmu_disable_event':
>> drivers/perf/arm_pmuv3.c:720:17: error: implicit declaration of function 'armv8pmu_branch_disable'; did you mean 'static_branch_disable'? [-Werror=implicit-function-declaration]
720 | armv8pmu_branch_disable(event);
| ^~~~~~~~~~~~~~~~~~~~~~~
| static_branch_disable
drivers/perf/arm_pmuv3.c: In function 'armv8pmu_handle_irq':
>> drivers/perf/arm_pmuv3.c:801:25: error: implicit declaration of function 'armv8pmu_branch_read'; did you mean 'armv8pmu_pmcr_read'? [-Werror=implicit-function-declaration]
801 | armv8pmu_branch_read(cpuc, event);
| ^~~~~~~~~~~~~~~~~~~~
| armv8pmu_pmcr_read
drivers/perf/arm_pmuv3.c: In function 'armv8pmu_sched_task':
>> drivers/perf/arm_pmuv3.c:908:17: error: implicit declaration of function 'armv8pmu_branch_reset' [-Werror=implicit-function-declaration]
908 | armv8pmu_branch_reset();
| ^~~~~~~~~~~~~~~~~~~~~
drivers/perf/arm_pmuv3.c: In function '__armv8_pmuv3_map_event':
>> drivers/perf/arm_pmuv3.c:1021:41: error: implicit declaration of function 'armv8pmu_branch_attr_valid' [-Werror=implicit-function-declaration]
1021 | if (has_branch_stack(event) && !armv8pmu_branch_attr_valid(event))
| ^~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/perf/arm_pmuv3.c: In function '__armv8pmu_probe_pmu':
>> drivers/perf/arm_pmuv3.c:1140:9: error: implicit declaration of function 'armv8pmu_branch_probe'; did you mean 'arm_pmu_acpi_probe'? [-Werror=implicit-function-declaration]
1140 | armv8pmu_branch_probe(cpu_pmu);
| ^~~~~~~~~~~~~~~~~~~~~
| arm_pmu_acpi_probe
cc1: some warnings being treated as errors


vim +714 drivers/perf/arm_pmuv3.c

701
702 static void armv8pmu_enable_event(struct perf_event *event)
703 {
704 /*
705 * Enable counter and interrupt, and set the counter to count
706 * the event that we're interested in.
707 */
708 armv8pmu_disable_event_counter(event);
709 armv8pmu_write_event_type(event);
710 armv8pmu_enable_event_irq(event);
711 armv8pmu_enable_event_counter(event);
712
713 if (has_branch_stack(event))
> 714 armv8pmu_branch_enable(event);
715 }
716
717 static void armv8pmu_disable_event(struct perf_event *event)
718 {
719 if (has_branch_stack(event))
> 720 armv8pmu_branch_disable(event);
721
722 armv8pmu_disable_event_counter(event);
723 armv8pmu_disable_event_irq(event);
724 }
725
726 static void armv8pmu_start(struct arm_pmu *cpu_pmu)
727 {
728 struct perf_event_context *ctx;
729 int nr_user = 0;
730
731 ctx = perf_cpu_task_ctx();
732 if (ctx)
733 nr_user = ctx->nr_user;
734
735 if (sysctl_perf_user_access && nr_user)
736 armv8pmu_enable_user_access(cpu_pmu);
737 else
738 armv8pmu_disable_user_access();
739
740 /* Enable all counters */
741 armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
742 }
743
744 static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
745 {
746 /* Disable all counters */
747 armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
748 }
749
750 static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
751 {
752 u32 pmovsr;
753 struct perf_sample_data data;
754 struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
755 struct pt_regs *regs;
756 int idx;
757
758 /*
759 * Get and reset the IRQ flags
760 */
761 pmovsr = armv8pmu_getreset_flags();
762
763 /*
764 * Did an overflow occur?
765 */
766 if (!armv8pmu_has_overflowed(pmovsr))
767 return IRQ_NONE;
768
769 /*
770 * Handle the counter(s) overflow(s)
771 */
772 regs = get_irq_regs();
773
774 /*
775 * Stop the PMU while processing the counter overflows
776 * to prevent skews in group events.
777 */
778 armv8pmu_stop(cpu_pmu);
779 for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
780 struct perf_event *event = cpuc->events[idx];
781 struct hw_perf_event *hwc;
782
783 /* Ignore if we don't have an event. */
784 if (!event)
785 continue;
786
787 /*
788 * We have a single interrupt for all counters. Check that
789 * each counter has overflowed before we process it.
790 */
791 if (!armv8pmu_counter_has_overflowed(pmovsr, idx))
792 continue;
793
794 hwc = &event->hw;
795 armpmu_event_update(event);
796 perf_sample_data_init(&data, 0, hwc->last_period);
797 if (!armpmu_event_set_period(event))
798 continue;
799
800 if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) {
> 801 armv8pmu_branch_read(cpuc, event);
802 perf_sample_save_brstack(&data, event, &cpuc->branches->branch_stack);
803 }
804
805 /*
806 * Perf event overflow will queue the processing of the event as
807 * an irq_work which will be taken care of in the handling of
808 * IPI_IRQ_WORK.
809 */
810 if (perf_event_overflow(event, &data, regs))
811 cpu_pmu->disable(event);
812 }
813 armv8pmu_start(cpu_pmu);
814
815 return IRQ_HANDLED;
816 }
817
818 static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
819 struct arm_pmu *cpu_pmu)
820 {
821 int idx;
822
823 for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
824 if (!test_and_set_bit(idx, cpuc->used_mask))
825 return idx;
826 }
827 return -EAGAIN;
828 }
829
830 static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
831 struct arm_pmu *cpu_pmu)
832 {
833 int idx;
834
835 /*
836 * Chaining requires two consecutive event counters, where
837 * the lower idx must be even.
838 */
839 for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) {
840 if (!test_and_set_bit(idx, cpuc->used_mask)) {
841 /* Check if the preceding even counter is available */
842 if (!test_and_set_bit(idx - 1, cpuc->used_mask))
843 return idx;
844 /* Release the Odd counter */
845 clear_bit(idx, cpuc->used_mask);
846 }
847 }
848 return -EAGAIN;
849 }
850
851 static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
852 struct perf_event *event)
853 {
854 struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
855 struct hw_perf_event *hwc = &event->hw;
856 unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
857
858 /* Always prefer to place a cycle counter into the cycle counter. */
859 if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
860 if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
861 return ARMV8_IDX_CYCLE_COUNTER;
862 else if (armv8pmu_event_is_64bit(event) &&
863 armv8pmu_event_want_user_access(event) &&
864 !armv8pmu_has_long_event(cpu_pmu))
865 return -EAGAIN;
866 }
867
868 /*
869 * Otherwise use events counters
870 */
871 if (armv8pmu_event_is_chained(event))
872 return armv8pmu_get_chain_idx(cpuc, cpu_pmu);
873 else
874 return armv8pmu_get_single_idx(cpuc, cpu_pmu);
875 }
876
877 static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc,
878 struct perf_event *event)
879 {
880 int idx = event->hw.idx;
881
882 clear_bit(idx, cpuc->used_mask);
883 if (armv8pmu_event_is_chained(event))
884 clear_bit(idx - 1, cpuc->used_mask);
885 }
886
887 static int armv8pmu_user_event_idx(struct perf_event *event)
888 {
889 if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event))
890 return 0;
891
892 /*
893 * We remap the cycle counter index to 32 to
894 * match the offset applied to the rest of
895 * the counter indices.
896 */
897 if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER)
898 return ARMV8_IDX_CYCLE_COUNTER_USER;
899
900 return event->hw.idx;
901 }
902
903 static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
904 {
905 struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu);
906
907 if (sched_in && armpmu->has_branch_stack)
> 908 armv8pmu_branch_reset();
909 }
910

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-06-16 09:27:26

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU

On Fri, Jun 16, 2023 at 06:57:52AM +0530, Anshuman Khandual wrote:
> On 6/16/23 05:12, kernel test robot wrote:
> > kernel test robot noticed the following build errors:
> >
> > [auto build test ERROR on arm64/for-next/core]
> > [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
> > [If your patch is applied to the wrong git tree, kindly drop us a note.
> > And when submitting patch, we suggest to use '--base' as documented in
> > https://git-scm.com/docs/git-format-patch#_base_tree_information]
> >
> > url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
> > base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
> > patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
> > patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
> > config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/[email protected]/config)
> > compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
> > reproduce (this is a W=1 build):
> > mkdir -p ~/bin
> > wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> > chmod +x ~/bin/make.cross
> > # install arm cross compiling tool for clang build
> > # apt-get install binutils-arm-linux-gnueabi
> > git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
> > git fetch arm64 for-next/core
> > git checkout arm64/for-next/core
> > b4 shazam https://lore.kernel.org/r/[email protected]
> > # save the config file
> > mkdir build_dir && cp config build_dir/.config
> > COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
> > COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/
>
> I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler
> on a W=1 build. Looking at all other problems reported on the file, it seems
> something is not right here. Reported build problems around these callbacks,
> i.e armv8pmu_branch_XXXX() do not make sense as they are available via config
> CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this
> test config.

Have you tried applying this series on top of the arm64 for-next/core
branch? That's what the robot it testing (in the absence of a --base
option when generating the patches).

--
Catalin

2023-06-19 06:07:19

by Anshuman Khandual

[permalink] [raw]
Subject: Re: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU



On 6/16/23 14:51, Catalin Marinas wrote:
> On Fri, Jun 16, 2023 at 06:57:52AM +0530, Anshuman Khandual wrote:
>> On 6/16/23 05:12, kernel test robot wrote:
>>> kernel test robot noticed the following build errors:
>>>
>>> [auto build test ERROR on arm64/for-next/core]
>>> [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
>>> [If your patch is applied to the wrong git tree, kindly drop us a note.
>>> And when submitting patch, we suggest to use '--base' as documented in
>>> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>>>
>>> url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
>>> base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
>>> patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
>>> patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
>>> config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/[email protected]/config)
>>> compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
>>> reproduce (this is a W=1 build):
>>> mkdir -p ~/bin
>>> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>>> chmod +x ~/bin/make.cross
>>> # install arm cross compiling tool for clang build
>>> # apt-get install binutils-arm-linux-gnueabi
>>> git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
>>> git fetch arm64 for-next/core
>>> git checkout arm64/for-next/core
>>> b4 shazam https://lore.kernel.org/r/[email protected]
>>> # save the config file
>>> mkdir build_dir && cp config build_dir/.config
>>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
>>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/
>>
>> I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler
>> on a W=1 build. Looking at all other problems reported on the file, it seems
>> something is not right here. Reported build problems around these callbacks,
>> i.e armv8pmu_branch_XXXX() do not make sense as they are available via config
>> CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this
>> test config.
>
> Have you tried applying this series on top of the arm64 for-next/core
> branch? That's what the robot it testing (in the absence of a --base
> option when generating the patches).

Right, it turned out to be a build problem on arm (32 bit) platform instead.
After arm_pmuv3.c moved into common ./drivers/perf from ./arch/arm64/kernel/,
it can no longer access arch/arm64/include/asm/perf_event.h defined functions
without breaking arm (32) bit. The following code block needs to be moved out
from arch/arm64/include/asm/perf_event.h into include/linux/perf/arm_pmuv3.h
(which is preferred as all call sites are inside drivers/perf/arm_pmuv3.c) or
may be arm_pmu.h (which is one step higher in the abstraction).

struct pmu_hw_events;
struct arm_pmu;
struct perf_event;

#ifdef CONFIG_PERF_EVENTS
static inline bool has_branch_stack(struct perf_event *event);

#ifdef CONFIG_ARM64_BRBE
void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event);
bool armv8pmu_branch_attr_valid(struct perf_event *event);
void armv8pmu_branch_enable(struct perf_event *event);
void armv8pmu_branch_disable(struct perf_event *event);
void armv8pmu_branch_probe(struct arm_pmu *arm_pmu);
void armv8pmu_branch_reset(void);
int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu);
void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu);
void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx);
#else
static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event)
{
WARN_ON_ONCE(!has_branch_stack(event));
}

static inline bool armv8pmu_branch_attr_valid(struct perf_event *event)
{
WARN_ON_ONCE(!has_branch_stack(event));
return false;
}

static inline void armv8pmu_branch_enable(struct perf_event *event)
{
WARN_ON_ONCE(!has_branch_stack(event));
}

static inline void armv8pmu_branch_disable(struct perf_event *event)
{
WARN_ON_ONCE(!has_branch_stack(event));
}

static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) { }
static inline void armv8pmu_branch_reset(void) { }
static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) { return 0; }
static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) { }
static inline void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) { }
#endif
#endif

2023-06-19 09:18:14

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU

On Mon, 19 Jun 2023 06:45:07 +0100,
Anshuman Khandual <[email protected]> wrote:
>
>
>
> On 6/16/23 14:51, Catalin Marinas wrote:
> > On Fri, Jun 16, 2023 at 06:57:52AM +0530, Anshuman Khandual wrote:
> >> On 6/16/23 05:12, kernel test robot wrote:
> >>> kernel test robot noticed the following build errors:
> >>>
> >>> [auto build test ERROR on arm64/for-next/core]
> >>> [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
> >>> [If your patch is applied to the wrong git tree, kindly drop us a note.
> >>> And when submitting patch, we suggest to use '--base' as documented in
> >>> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> >>>
> >>> url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
> >>> base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
> >>> patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
> >>> patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
> >>> config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/[email protected]/config)
> >>> compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
> >>> reproduce (this is a W=1 build):
> >>> mkdir -p ~/bin
> >>> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> >>> chmod +x ~/bin/make.cross
> >>> # install arm cross compiling tool for clang build
> >>> # apt-get install binutils-arm-linux-gnueabi
> >>> git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
> >>> git fetch arm64 for-next/core
> >>> git checkout arm64/for-next/core
> >>> b4 shazam https://lore.kernel.org/r/[email protected]
> >>> # save the config file
> >>> mkdir build_dir && cp config build_dir/.config
> >>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
> >>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/
> >>
> >> I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler
> >> on a W=1 build. Looking at all other problems reported on the file, it seems
> >> something is not right here. Reported build problems around these callbacks,
> >> i.e armv8pmu_branch_XXXX() do not make sense as they are available via config
> >> CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this
> >> test config.
> >
> > Have you tried applying this series on top of the arm64 for-next/core
> > branch? That's what the robot it testing (in the absence of a --base
> > option when generating the patches).
>
> Right, it turned out to be a build problem on arm (32 bit) platform instead.
> After arm_pmuv3.c moved into common ./drivers/perf from ./arch/arm64/kernel/,
> it can no longer access arch/arm64/include/asm/perf_event.h defined functions
> without breaking arm (32) bit. The following code block needs to be moved out
> from arch/arm64/include/asm/perf_event.h into include/linux/perf/arm_pmuv3.h
> (which is preferred as all call sites are inside drivers/perf/arm_pmuv3.c) or
> may be arm_pmu.h (which is one step higher in the abstraction).

No, that's the wrong approach. The 32bit backend must have its own
stubs for the stuff it implements or not.

Just add something like the patch below, and please *test* that a
32bit VM using PMUv3 doesn't have any regression.

Thanks,

M.

From 017362ca518e6d6ac3262514d1f7f27e73232799 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <[email protected]>
Date: Mon, 19 Jun 2023 10:05:52 +0100
Subject: [PATCH] 32bit hack

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm/include/asm/arm_pmuv3.h | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)

diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
index f4db3e75d75f..c4bcb7a18267 100644
--- a/arch/arm/include/asm/arm_pmuv3.h
+++ b/arch/arm/include/asm/arm_pmuv3.h
@@ -244,4 +244,22 @@ static inline bool is_pmuv3p5(int pmuver)
return pmuver >= ARMV8_PMU_DFR_VER_V3P5;
}

+/* BRBE stubs */
+static inline void armv8pmu_branch_enable(struct perf_event *event) { }
+static inline void armv8pmu_branch_disable(struct perf_event *event) { }
+static inline void armv8pmu_branch_read(struct pmu_hw_events * cpuc,
+ struct perf_event *event) { }
+static inline void armv8pmu_branch_save(struct arm_pmu *armpmu, void *ctx) {}
+static inline void armv8pmu_branch_reset(void) {}
+static inline bool armv8pmu_branch_attr_valid(struct perf_event *event)
+{
+ return false;
+}
+static inline void armv8pmu_branch_probe(struct arm_pmu *armpmu) {}
+static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *armpmu)
+{
+ return 0;
+}
+static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *armpmu) {}
+
#endif
--
2.39.2


--
Without deviation from the norm, progress is not possible.

2023-06-22 03:08:58

by Anshuman Khandual

[permalink] [raw]
Subject: Re: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU



On 6/19/23 14:38, Marc Zyngier wrote:
> On Mon, 19 Jun 2023 06:45:07 +0100,
> Anshuman Khandual <[email protected]> wrote:
>>
>>
>>
>> On 6/16/23 14:51, Catalin Marinas wrote:
>>> On Fri, Jun 16, 2023 at 06:57:52AM +0530, Anshuman Khandual wrote:
>>>> On 6/16/23 05:12, kernel test robot wrote:
>>>>> kernel test robot noticed the following build errors:
>>>>>
>>>>> [auto build test ERROR on arm64/for-next/core]
>>>>> [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
>>>>> [If your patch is applied to the wrong git tree, kindly drop us a note.
>>>>> And when submitting patch, we suggest to use '--base' as documented in
>>>>> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>>>>>
>>>>> url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
>>>>> base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
>>>>> patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
>>>>> patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
>>>>> config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/[email protected]/config)
>>>>> compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
>>>>> reproduce (this is a W=1 build):
>>>>> mkdir -p ~/bin
>>>>> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>>>>> chmod +x ~/bin/make.cross
>>>>> # install arm cross compiling tool for clang build
>>>>> # apt-get install binutils-arm-linux-gnueabi
>>>>> git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
>>>>> git fetch arm64 for-next/core
>>>>> git checkout arm64/for-next/core
>>>>> b4 shazam https://lore.kernel.org/r/[email protected]
>>>>> # save the config file
>>>>> mkdir build_dir && cp config build_dir/.config
>>>>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
>>>>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/
>>>>
>>>> I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler
>>>> on a W=1 build. Looking at all other problems reported on the file, it seems
>>>> something is not right here. Reported build problems around these callbacks,
>>>> i.e armv8pmu_branch_XXXX() do not make sense as they are available via config
>>>> CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this
>>>> test config.
>>>
>>> Have you tried applying this series on top of the arm64 for-next/core
>>> branch? That's what the robot it testing (in the absence of a --base
>>> option when generating the patches).
>>
>> Right, it turned out to be a build problem on arm (32 bit) platform instead.
>> After arm_pmuv3.c moved into common ./drivers/perf from ./arch/arm64/kernel/,
>> it can no longer access arch/arm64/include/asm/perf_event.h defined functions
>> without breaking arm (32) bit. The following code block needs to be moved out
>> from arch/arm64/include/asm/perf_event.h into include/linux/perf/arm_pmuv3.h
>> (which is preferred as all call sites are inside drivers/perf/arm_pmuv3.c) or
>> may be arm_pmu.h (which is one step higher in the abstraction).
>
> No, that's the wrong approach. The 32bit backend must have its own
> stubs for the stuff it implements or not.

Okay.


>
> Just add something like the patch below, and please *test* that a
> 32bit VM using PMUv3 doesn't have any regression.

Sure.

>
> Thanks,
>
> M.
>
>>From 017362ca518e6d6ac3262514d1f7f27e73232799 Mon Sep 17 00:00:00 2001
> From: Marc Zyngier <[email protected]>
> Date: Mon, 19 Jun 2023 10:05:52 +0100
> Subject: [PATCH] 32bit hack
>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> arch/arm/include/asm/arm_pmuv3.h | 18 ++++++++++++++++++
> 1 file changed, 18 insertions(+)
>
> diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
> index f4db3e75d75f..c4bcb7a18267 100644
> --- a/arch/arm/include/asm/arm_pmuv3.h
> +++ b/arch/arm/include/asm/arm_pmuv3.h
> @@ -244,4 +244,22 @@ static inline bool is_pmuv3p5(int pmuver)
> return pmuver >= ARMV8_PMU_DFR_VER_V3P5;
> }
>
> +/* BRBE stubs */

These stubs also need to be wrapped around with #ifdef CONFIG_PERF_EVENTS

> +static inline void armv8pmu_branch_enable(struct perf_event *event) { }
> +static inline void armv8pmu_branch_disable(struct perf_event *event) { }
> +static inline void armv8pmu_branch_read(struct pmu_hw_events * cpuc,
> + struct perf_event *event) { }
> +static inline void armv8pmu_branch_save(struct arm_pmu *armpmu, void *ctx) {}
> +static inline void armv8pmu_branch_reset(void) {}
> +static inline bool armv8pmu_branch_attr_valid(struct perf_event *event)
> +{
> + return false;
> +}
> +static inline void armv8pmu_branch_probe(struct arm_pmu *armpmu) {}
> +static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *armpmu)
> +{
> + return 0;
> +}
> +static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *armpmu) {}
> +
> #endif

Sure, will make all the necessary changes.