2017-11-08 03:02:12

by Vineet Gupta

[permalink] [raw]
Subject: [PATCH 0/4] ARC perf updates

Hi,

Found these when cleaning up some old branches. The only controversial one
could be the last one.

Thx,
-Vineet

Vineet Gupta (4):
ARCv2: perf: tweak overflow interrupt
ARCv2: perf: optimize given that num counters <= 32
ARC: perf: avoid vmalloc backed mmap
ARCv2: entry: Reduce perf intr return path

arch/arc/Kconfig | 2 +-
arch/arc/include/asm/entry-arcv2.h | 2 ++
arch/arc/kernel/entry-arcv2.S | 23 +++++++++++++++++++++-
arch/arc/kernel/perf_event.c | 40 ++++++++++++++++++++------------------
4 files changed, 46 insertions(+), 21 deletions(-)

--
2.7.4


From 1584731500121443935@xxx Wed Nov 22 02:28:50 +0000 2017
X-GM-THRID: 1584731500121443935
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread


2017-11-08 07:38:28

by Vineet Gupta

[permalink] [raw]
Subject: [PATCH 4/4] ARCv2: entry: Reduce perf intr return path

In the more likely case of returning to kernel from perf interrupt, do a
fast path returning w/o bothering about CONFIG_PREEMPT etc

However, if returning to user space, need do go thru the usual gyrations,
as check for pending signals is an absolute must.

Signed-off-by: Vineet Gupta <[email protected]>
---
arch/arc/include/asm/entry-arcv2.h | 2 ++
arch/arc/kernel/entry-arcv2.S | 23 ++++++++++++++++++++++-
2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
index 257a68f3c2fe..8b49b327b1f9 100644
--- a/arch/arc/include/asm/entry-arcv2.h
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -58,6 +58,8 @@
/*------------------------------------------------------------------------*/
.macro INTERRUPT_EPILOGUE called_from

+ ; Assumes STATUS32.Z bit set if return to K
+
.ifnc \called_from, exception
add sp, sp, 12 ; skip BTA/ECR/orig_r0 placeholderss
.endif
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
index cc558a25b8fa..9ca1d146426b 100644
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -51,7 +51,7 @@ VECTOR handle_interrupt ; (16) Timer0
VECTOR handle_interrupt ; unused (Timer1)
VECTOR handle_interrupt ; unused (WDT)
VECTOR handle_interrupt ; (19) Inter core Interrupt (IPI)
-VECTOR handle_interrupt ; (20) perf Interrupt
+VECTOR handle_interrupt_pct ; (20) perf Interrupt
VECTOR handle_interrupt ; (21) Software Triggered Intr (Self IPI)
VECTOR handle_interrupt ; unused
VECTOR handle_interrupt ; (23) unused
@@ -97,6 +97,26 @@ ENTRY(handle_interrupt)

END(handle_interrupt)

+ENTRY(handle_interrupt_pct)
+
+ INTERRUPT_PROLOGUE irq
+
+ IRQ_DISABLE
+
+ lr r0, [ICAUSE]
+
+ bl.d arch_do_IRQ
+ mov r1, sp
+
+ ld r0, [sp, PT_status32] ; returning to User/Kernel Mode
+ btst r0, STATUS_U_BIT
+ bnz resume_user_mode_begin
+
+ clri
+ b .Lisr_ret_fast_path_to_k
+
+END(handle_interrupt_pct)
+
;################### Non TLB Exception Handling #############################

ENTRY(EV_SWI)
@@ -224,6 +244,7 @@ debug_marker_l1:
bset.nz r11, r11, AUX_IRQ_ACT_BIT_U ; NZ means U
sr r11, [AUX_IRQ_ACT]

+.Lisr_ret_fast_path_to_k:
INTERRUPT_EPILOGUE irq
rtie

--
2.7.4


From 1584731803178518565@xxx Wed Nov 22 02:33:39 +0000 2017
X-GM-THRID: 1584731803178518565
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread

2017-11-08 07:40:13

by Vineet Gupta

[permalink] [raw]
Subject: [PATCH 1/4] ARCv2: perf: tweak overflow interrupt

Current perf ISR loops thru all 32 counters, checking for each if it
caused the interrupt. Instead only loop thru counters which actually
interrupted (typically 1).

Signed-off-by: Vineet Gupta <[email protected]>
---
arch/arc/kernel/perf_event.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 2ce24e74f879..0eaa132a2c90 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -377,21 +377,22 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
struct perf_sample_data data;
struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
struct pt_regs *regs;
- int active_ints;
+ unsigned int active_ints;
int idx;

arc_pmu_disable(&arc_pmu->pmu);

active_ints = read_aux_reg(ARC_REG_PCT_INT_ACT);
+ if (!active_ints)
+ goto done;

regs = get_irq_regs();

- for (idx = 0; idx < arc_pmu->n_counters; idx++) {
- struct perf_event *event = pmu_cpu->act_counter[idx];
+ do {
+ struct perf_event *event;
struct hw_perf_event *hwc;

- if (!(active_ints & (1 << idx)))
- continue;
+ idx = __ffs(active_ints);

/* Reset interrupt flag by writing of 1 */
write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx);
@@ -404,19 +405,22 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
write_aux_reg(ARC_REG_PCT_INT_CTRL,
read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx));

+ event = pmu_cpu->act_counter[idx];
hwc = &event->hw;

WARN_ON_ONCE(hwc->idx != idx);

arc_perf_event_update(event, &event->hw, event->hw.idx);
perf_sample_data_init(&data, 0, hwc->last_period);
- if (!arc_pmu_event_set_period(event))
- continue;
+ if (arc_pmu_event_set_period(event)) {
+ if (perf_event_overflow(event, &data, regs))
+ arc_pmu_stop(event, 0);
+ }

- if (perf_event_overflow(event, &data, regs))
- arc_pmu_stop(event, 0);
- }
+ active_ints &= ~(1U << idx);
+ } while (active_ints);

+done:
arc_pmu_enable(&arc_pmu->pmu);

return IRQ_HANDLED;
--
2.7.4


From 1583483730652314647@xxx Wed Nov 08 07:56:04 +0000 2017
X-GM-THRID: 1583483730652314647
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread

2017-11-08 03:02:42

by Vineet Gupta

[permalink] [raw]
Subject: [PATCH 2/4] ARCv2: perf: optimize given that num counters <= 32

use ffz primitive which maps to ARCv2 instruction, vs. non atomic
__test_and_set_bit

It is unlikely if we will even have more than 32 counters, but still add
a BUILD_BUG to catch that

Signed-off-by: Vineet Gupta <[email protected]>
---
arch/arc/kernel/perf_event.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 0eaa132a2c90..8aec462d90fb 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -336,15 +336,12 @@ static int arc_pmu_add(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;

- if (__test_and_set_bit(idx, pmu_cpu->used_mask)) {
- idx = find_first_zero_bit(pmu_cpu->used_mask,
- arc_pmu->n_counters);
- if (idx == arc_pmu->n_counters)
- return -EAGAIN;
-
- __set_bit(idx, pmu_cpu->used_mask);
- hwc->idx = idx;
- }
+ idx = ffz(pmu_cpu->used_mask[0]);
+ if (idx == arc_pmu->n_counters)
+ return -EAGAIN;
+
+ __set_bit(idx, pmu_cpu->used_mask);
+ hwc->idx = idx;

write_aux_reg(ARC_REG_PCT_INDEX, idx);

@@ -465,6 +462,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
pr_err("This core does not have performance counters!\n");
return -ENODEV;
}
+ BUILD_BUG_ON(ARC_PERF_MAX_COUNTERS > 32);
BUG_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS);

READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
--
2.7.4


From 1583513513450732729@xxx Wed Nov 08 15:49:27 +0000 2017
X-GM-THRID: 1583325737432497172
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread

2017-11-08 03:02:39

by Vineet Gupta

[permalink] [raw]
Subject: [PATCH 3/4] ARC: perf: avoid vmalloc backed mmap

For non-alising Dcache, vmalloc is not needed.

vmalloc triggers additonal D-TLB Misses in the perf interrupt code path
making it slightly inefficient as evident from hackbench runs below.

| [ARCLinux]# perf stat -e dTLB-load-misses --repeat 5 hackbench
| Running with 10*40 (== 400) tasks.
| Time: 35.060
| ...
| Performance counter stats for 'hackbench' (5 runs):

Before: 399235 dTLB-load-misses ( +- 2.08% )
After : 397676 dTLB-load-misses ( +- 2.27% )

Signed-off-by: Vineet Gupta <[email protected]>
---
arch/arc/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index c84e67fdea09..f3cad98eeb8f 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -39,7 +39,7 @@ config ARC
select OF
select OF_EARLY_FLATTREE
select OF_RESERVED_MEM
- select PERF_USE_VMALLOC
+ select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_GENERIC_DMA_COHERENT
select HAVE_KERNEL_GZIP
--
2.7.4


From 1583066151251351051@xxx Fri Nov 03 17:18:49 +0000 2017
X-GM-THRID: 1583066151251351051
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread