2022-03-04 06:48:11

by Jinzhou Su

[permalink] [raw]
Subject: [PATCH V2 0/4] Add tracer tool for AMD P-State driver

Hello,

intel_pstate_tracer is a useful tool to analyze the performance of
intel_pstate driver. We upstream out AMD P-state driver into Linux
kernel recently and like to use similar tool to tune the performance
of the driver.

I modified intel_pstate_tracer.py then it could import as a module to
analyze AMD P-State trace event. Other trace event also can benifit from
this change once they need this tool.

intel_pstate_tracer could be used as the same way as before and the
original functionality isn't broken.

Changes from V1->V2
-Add tracer documentation in amd-pstate RST
-fix typo in amd_pstate_trace.py
-add "Co-developed-by" in patch 1/4

Thanks,
Joe

Jinzhou Su (4):
cpufreq: amd-pstate: Add more tracepoint for AMD P-State module
tools/power/x86/intel_pstate_tracer: make tracer as a module
tools/power/x86/amd_pstate_tracer: Add tracer tool for AMD P-state
Documentation: amd-pstate: add tracer tool introduction

Documentation/admin-guide/pm/amd-pstate.rst | 26 ++
MAINTAINERS | 1 +
drivers/cpufreq/amd-pstate-trace.h | 22 +-
drivers/cpufreq/amd-pstate.c | 59 ++-
.../x86/amd_pstate_tracer/amd_pstate_trace.py | 354 ++++++++++++++++++
.../intel_pstate_tracer.py | 260 +++++++------
6 files changed, 588 insertions(+), 134 deletions(-)
create mode 100755 tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py

--
2.27.0


2022-03-04 09:41:54

by Jinzhou Su

[permalink] [raw]
Subject: [PATCH V2 1/4] cpufreq: amd-pstate: Add more tracepoint for AMD P-State module

Add frequency, mperf, aperf and tsc in the trace. This can be used
to debug and tune the performance of AMD P-state driver.

Use the time difference between amd_pstate_update to calculate CPU
frequency. There could be sleep in arch_freq_get_on_cpu, so do not
use it here.

Signed-off-by: Jinzhou Su <[email protected]>
Co-developed-by: Huang Rui <[email protected]>
Signed-off-by: Huang Rui <[email protected]>
---
drivers/cpufreq/amd-pstate-trace.h | 22 ++++++++++-
drivers/cpufreq/amd-pstate.c | 59 +++++++++++++++++++++++++++++-
2 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h
index 647505957d4f..35f38ae67fb1 100644
--- a/drivers/cpufreq/amd-pstate-trace.h
+++ b/drivers/cpufreq/amd-pstate-trace.h
@@ -27,6 +27,10 @@ TRACE_EVENT(amd_pstate_perf,
TP_PROTO(unsigned long min_perf,
unsigned long target_perf,
unsigned long capacity,
+ u64 freq,
+ u64 mperf,
+ u64 aperf,
+ u64 tsc,
unsigned int cpu_id,
bool changed,
bool fast_switch
@@ -35,6 +39,10 @@ TRACE_EVENT(amd_pstate_perf,
TP_ARGS(min_perf,
target_perf,
capacity,
+ freq,
+ mperf,
+ aperf,
+ tsc,
cpu_id,
changed,
fast_switch
@@ -44,6 +52,10 @@ TRACE_EVENT(amd_pstate_perf,
__field(unsigned long, min_perf)
__field(unsigned long, target_perf)
__field(unsigned long, capacity)
+ __field(unsigned long long, freq)
+ __field(unsigned long long, mperf)
+ __field(unsigned long long, aperf)
+ __field(unsigned long long, tsc)
__field(unsigned int, cpu_id)
__field(bool, changed)
__field(bool, fast_switch)
@@ -53,15 +65,23 @@ TRACE_EVENT(amd_pstate_perf,
__entry->min_perf = min_perf;
__entry->target_perf = target_perf;
__entry->capacity = capacity;
+ __entry->freq = freq;
+ __entry->mperf = mperf;
+ __entry->aperf = aperf;
+ __entry->tsc = tsc;
__entry->cpu_id = cpu_id;
__entry->changed = changed;
__entry->fast_switch = fast_switch;
),

- TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s",
+ TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s",
(unsigned long)__entry->min_perf,
(unsigned long)__entry->target_perf,
(unsigned long)__entry->capacity,
+ (unsigned long long)__entry->freq,
+ (unsigned long long)__entry->mperf,
+ (unsigned long long)__entry->aperf,
+ (unsigned long long)__entry->tsc,
(unsigned int)__entry->cpu_id,
(__entry->changed) ? "true" : "false",
(__entry->fast_switch) ? "true" : "false"
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 9ce75ed11f8e..7be38bc6a673 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -65,6 +65,18 @@ MODULE_PARM_DESC(shared_mem,

static struct cpufreq_driver amd_pstate_driver;

+/**
+ * struct amd_aperf_mperf
+ * @aperf: actual performance frequency clock count
+ * @mperf: maximum performance frequency clock count
+ * @tsc: time stamp counter
+ */
+struct amd_aperf_mperf {
+ u64 aperf;
+ u64 mperf;
+ u64 tsc;
+};
+
/**
* struct amd_cpudata - private CPU data for AMD P-State
* @cpu: CPU number
@@ -81,6 +93,9 @@ static struct cpufreq_driver amd_pstate_driver;
* @min_freq: the frequency that mapped to lowest_perf
* @nominal_freq: the frequency that mapped to nominal_perf
* @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf
+ * @cur: Difference of Aperf/Mperf/tsc count between last and current sample
+ * @prev: Last Aperf/Mperf/tsc count value read from register
+ * @freq: current cpu frequency value
* @boost_supported: check whether the Processor or SBIOS supports boost mode
*
* The amd_cpudata is key private data for each CPU thread in AMD P-State, and
@@ -102,6 +117,10 @@ struct amd_cpudata {
u32 nominal_freq;
u32 lowest_nonlinear_freq;

+ struct amd_aperf_mperf cur;
+ struct amd_aperf_mperf prev;
+
+ u64 freq;
bool boost_supported;
};

@@ -211,6 +230,39 @@ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
max_perf, fast_switch);
}

+static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
+{
+ u64 aperf, mperf, tsc;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ rdmsrl(MSR_IA32_APERF, aperf);
+ rdmsrl(MSR_IA32_MPERF, mperf);
+ tsc = rdtsc();
+
+ if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
+ local_irq_restore(flags);
+ return false;
+ }
+
+ local_irq_restore(flags);
+
+ cpudata->cur.aperf = aperf;
+ cpudata->cur.mperf = mperf;
+ cpudata->cur.tsc = tsc;
+ cpudata->cur.aperf -= cpudata->prev.aperf;
+ cpudata->cur.mperf -= cpudata->prev.mperf;
+ cpudata->cur.tsc -= cpudata->prev.tsc;
+
+ cpudata->prev.aperf = aperf;
+ cpudata->prev.mperf = mperf;
+ cpudata->prev.tsc = tsc;
+
+ cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf);
+
+ return true;
+}
+
static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
u32 des_perf, u32 max_perf, bool fast_switch)
{
@@ -226,8 +278,11 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
value &= ~AMD_CPPC_MAX_PERF(~0L);
value |= AMD_CPPC_MAX_PERF(max_perf);

- trace_amd_pstate_perf(min_perf, des_perf, max_perf,
- cpudata->cpu, (value != prev), fast_switch);
+ if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
+ trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
+ cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
+ cpudata->cpu, (value != prev), fast_switch);
+ }

if (value == prev)
return;
--
2.27.0

2022-03-04 09:42:57

by Jinzhou Su

[permalink] [raw]
Subject: [PATCH V2 3/4] tools/power/x86/amd_pstate_tracer: Add tracer tool for AMD P-state

Intel P-state tracer is a useful tool to tune and debug Intel P-state
driver. AMD P-state tracer import intel pstate tracer. This tool can
be used to analyze the performance of AMD P-state tracer.

Now CPU frequency, load and desired perf can be traced.

Signed-off-by: Jinzhou Su <[email protected]>
---
MAINTAINERS | 1 +
.../x86/amd_pstate_tracer/amd_pstate_trace.py | 354 ++++++++++++++++++
2 files changed, 355 insertions(+)
create mode 100755 tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py

diff --git a/MAINTAINERS b/MAINTAINERS
index 4f9acc183cdc..01ac42504dcd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1002,6 +1002,7 @@ L: [email protected]
S: Supported
F: Documentation/admin-guide/pm/amd-pstate.rst
F: drivers/cpufreq/amd-pstate*
+F: tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py

AMD PTDMA DRIVER
M: Sanjay R Mehta <[email protected]>
diff --git a/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
new file mode 100755
index 000000000000..2dea4032ac56
--- /dev/null
+++ b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
@@ -0,0 +1,354 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+# -*- coding: utf-8 -*-
+#
+""" This utility can be used to debug and tune the performance of the
+AMD P-State driver. It imports intel_pstate_tracer to analyze AMD P-State
+trace event.
+
+Prerequisites:
+ Python version 2.7.x or higher
+ gnuplot 5.0 or higher
+ gnuplot-py 1.8 or higher
+ (Most of the distributions have these required packages. They may be called
+ gnuplot-py, phython-gnuplot or phython3-gnuplot, gnuplot-nox, ... )
+
+ Kernel config for Linux trace is enabled
+
+ see print_help(): for Usage and Output details
+
+"""
+from __future__ import print_function
+from datetime import datetime
+import subprocess
+import os
+import time
+import re
+import signal
+import sys
+import getopt
+import Gnuplot
+from numpy import *
+from decimal import *
+sys.path.append('../intel_pstate_tracer')
+#import intel_pstate_tracer
+import intel_pstate_tracer as ipt
+
+__license__ = "GPL version 2"
+
+MAX_CPUS = 256
+# Define the csv file columns
+C_COMM = 15
+C_ELAPSED = 14
+C_SAMPLE = 13
+C_DURATION = 12
+C_LOAD = 11
+C_TSC = 10
+C_APERF = 9
+C_MPERF = 8
+C_FREQ = 7
+C_MAX_PERF = 6
+C_DES_PERF = 5
+C_MIN_PERF = 4
+C_USEC = 3
+C_SEC = 2
+C_CPU = 1
+
+global sample_num, last_sec_cpu, last_usec_cpu, start_time, test_name, trace_file
+
+getcontext().prec = 11
+
+sample_num =0
+last_sec_cpu = [0] * MAX_CPUS
+last_usec_cpu = [0] * MAX_CPUS
+
+def plot_per_cpu_freq(cpu_index):
+ """ Plot per cpu frequency """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_frequency.png" % cpu_index
+ g_plot = ipt.common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ g_plot('set yrange [0:7]')
+ g_plot('set ytics 0, 1')
+ g_plot('set ylabel "CPU Frequency (GHz)"')
+ g_plot('set title "{} : frequency : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
+ g_plot('set ylabel "CPU frequency"')
+ g_plot('set key off')
+ ipt.set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_FREQ))
+
+def plot_per_cpu_des_perf(cpu_index):
+ """ Plot per cpu desired perf """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_des_perf.png" % cpu_index
+ g_plot = ipt.common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ g_plot('set yrange [0:255]')
+ g_plot('set ylabel "des perf"')
+ g_plot('set title "{} : cpu des perf : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
+ g_plot('set key off')
+ ipt.set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_DES_PERF))
+
+def plot_per_cpu_load(cpu_index):
+ """ Plot per cpu load """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_load.png" % cpu_index
+ g_plot = ipt.common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ g_plot('set yrange [0:100]')
+ g_plot('set ytics 0, 10')
+ g_plot('set ylabel "CPU load (percent)"')
+ g_plot('set title "{} : cpu load : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
+ g_plot('set key off')
+ ipt.set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_LOAD))
+
+def plot_all_cpu_frequency():
+ """ Plot all cpu frequencies """
+
+ output_png = 'all_cpu_frequencies.png'
+ g_plot = ipt.common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ g_plot('set ylabel "CPU Frequency (GHz)"')
+ g_plot('set title "{} : cpu frequencies : {:%F %H:%M}"'.format(test_name, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_FREQ)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_all_cpu_des_perf():
+ """ Plot all cpu desired perf """
+
+ output_png = 'all_cpu_des_perf.png'
+ g_plot = ipt.common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ g_plot('set ylabel "des perf"')
+ g_plot('set title "{} : cpu des perf : {:%F %H:%M}"'.format(test_name, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_DES_PERF)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_all_cpu_load():
+ """ Plot all cpu load """
+
+ output_png = 'all_cpu_load.png'
+ g_plot = ipt.common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ g_plot('set yrange [0:100]')
+ g_plot('set ylabel "CPU load (percent)"')
+ g_plot('set title "{} : cpu load : {:%F %H:%M}"'.format(test_name, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_LOAD)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask):
+ """ Store master csv file information """
+
+ global graph_data_present
+
+ if cpu_mask[cpu_int] == 0:
+ return
+
+ try:
+ f_handle = open('cpu.csv', 'a')
+ string_buffer = "CPU_%03u, %05u, %06u, %u, %u, %u, %.4f, %u, %u, %u, %.2f, %.3f, %u, %.3f, %s\n" % (cpu_int, int(time_pre_dec), int(time_post_dec), int(min_perf), int(des_perf), int(max_perf), freq_ghz, int(mperf), int(aperf), int(tsc), load, duration_ms, sample_num, elapsed_time, common_comm)
+ f_handle.write(string_buffer)
+ f_handle.close()
+ except:
+ print('IO error cpu.csv')
+ return
+
+ graph_data_present = True;
+
+
+def cleanup_data_files():
+ """ clean up existing data files """
+
+ if os.path.exists('cpu.csv'):
+ os.remove('cpu.csv')
+ f_handle = open('cpu.csv', 'a')
+ f_handle.write('common_cpu, common_secs, common_usecs, min_perf, des_perf, max_perf, freq, mperf, aperf, tsc, load, duration_ms, sample_num, elapsed_time, common_comm')
+ f_handle.write('\n')
+ f_handle.close()
+
+def read_trace_data(file_name, cpu_mask):
+ """ Read and parse trace data """
+
+ global current_max_cpu
+ global sample_num, last_sec_cpu, last_usec_cpu, start_time
+
+ try:
+ data = open(file_name, 'r').read()
+ except:
+ print('Error opening ', file_name)
+ sys.exit(2)
+
+ for line in data.splitlines():
+ search_obj = \
+ re.search(r'(^(.*?)\[)((\d+)[^\]])(.*?)(\d+)([.])(\d+)(.*?amd_min_perf=)(\d+)(.*?amd_des_perf=)(\d+)(.*?amd_max_perf=)(\d+)(.*?freq=)(\d+)(.*?mperf=)(\d+)(.*?aperf=)(\d+)(.*?tsc=)(\d+)'
+ , line)
+
+ if search_obj:
+ cpu = search_obj.group(3)
+ cpu_int = int(cpu)
+ cpu = str(cpu_int)
+
+ time_pre_dec = search_obj.group(6)
+ time_post_dec = search_obj.group(8)
+ min_perf = search_obj.group(10)
+ des_perf = search_obj.group(12)
+ max_perf = search_obj.group(14)
+ freq = search_obj.group(16)
+ mperf = search_obj.group(18)
+ aperf = search_obj.group(20)
+ tsc = search_obj.group(22)
+
+ common_comm = search_obj.group(2).replace(' ', '')
+
+ if sample_num == 0 :
+ start_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000)
+ sample_num += 1
+
+ if last_sec_cpu[cpu_int] == 0 :
+ last_sec_cpu[cpu_int] = time_pre_dec
+ last_usec_cpu[cpu_int] = time_post_dec
+ else :
+ duration_us = (int(time_pre_dec) - int(last_sec_cpu[cpu_int])) * 1000000 + (int(time_post_dec) - int(last_usec_cpu[cpu_int]))
+ duration_ms = Decimal(duration_us) / Decimal(1000)
+ last_sec_cpu[cpu_int] = time_pre_dec
+ last_usec_cpu[cpu_int] = time_post_dec
+ elapsed_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000) - start_time
+ load = Decimal(int(mperf)*100)/ Decimal(tsc)
+ freq_ghz = Decimal(freq)/Decimal(1000000)
+ store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask)
+
+ if cpu_int > current_max_cpu:
+ current_max_cpu = cpu_int
+# Now separate the main overall csv file into per CPU csv files.
+ ipt.split_csv(current_max_cpu, cpu_mask)
+
+
+def signal_handler(signal, frame):
+ print(' SIGINT: Forcing cleanup before exit.')
+ if interval:
+ ipt.disable_trace(trace_file)
+ ipt.clear_trace_file()
+ ipt.free_trace_buffer()
+ sys.exit(0)
+
+trace_file = "/sys/kernel/debug/tracing/events/amd_cpu/enable"
+signal.signal(signal.SIGINT, signal_handler)
+
+interval = ""
+file_name = ""
+cpu_list = ""
+test_name = ""
+memory = "10240"
+graph_data_present = False;
+
+valid1 = False
+valid2 = False
+
+cpu_mask = zeros((MAX_CPUS,), dtype=int)
+
+
+try:
+ opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
+except getopt.GetoptError:
+ ipt.print_help('amd_pstate')
+ sys.exit(2)
+for opt, arg in opts:
+ if opt == '-h':
+ print()
+ sys.exit()
+ elif opt in ("-t", "--trace_file"):
+ valid1 = True
+ location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+ file_name = os.path.join(location, arg)
+ elif opt in ("-i", "--interval"):
+ valid1 = True
+ interval = arg
+ elif opt in ("-c", "--cpu"):
+ cpu_list = arg
+ elif opt in ("-n", "--name"):
+ valid2 = True
+ test_name = arg
+ elif opt in ("-m", "--memory"):
+ memory = arg
+
+if not (valid1 and valid2):
+ ipt.print_help('amd_pstate')
+ sys.exit()
+
+if cpu_list:
+ for p in re.split("[,]", cpu_list):
+ if int(p) < MAX_CPUS :
+ cpu_mask[int(p)] = 1
+else:
+ for i in range (0, MAX_CPUS):
+ cpu_mask[i] = 1
+
+if not os.path.exists('results'):
+ os.mkdir('results')
+ ipt.fix_ownership('results')
+
+os.chdir('results')
+if os.path.exists(test_name):
+ print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
+ sys.exit()
+os.mkdir(test_name)
+ipt.fix_ownership(test_name)
+os.chdir(test_name)
+
+cur_version = sys.version_info
+print('python version (should be >= 2.7):')
+print(cur_version)
+
+cleanup_data_files()
+
+if interval:
+ file_name = "/sys/kernel/debug/tracing/trace"
+ ipt.clear_trace_file()
+ ipt.set_trace_buffer_size(memory)
+ ipt.enable_trace(trace_file)
+ time.sleep(int(interval))
+ ipt.disable_trace(trace_file)
+
+current_max_cpu = 0
+
+read_trace_data(file_name, cpu_mask)
+
+if interval:
+ ipt.clear_trace_file()
+ ipt.free_trace_buffer()
+
+if graph_data_present == False:
+ print('No valid data to plot')
+ sys.exit(2)
+
+for cpu_no in range(0, current_max_cpu + 1):
+ plot_per_cpu_freq(cpu_no)
+ plot_per_cpu_des_perf(cpu_no)
+ plot_per_cpu_load(cpu_no)
+
+plot_all_cpu_des_perf()
+plot_all_cpu_frequency()
+plot_all_cpu_load()
+
+for root, dirs, files in os.walk('.'):
+ for f in files:
+ ipt.fix_ownership(f)
+
+os.chdir('../../')
--
2.27.0

2022-03-04 10:09:56

by Jinzhou Su

[permalink] [raw]
Subject: [PATCH V2 2/4] tools/power/x86/intel_pstate_tracer: make tracer as a module

Make intel_pstate_tracer as a module. Other trace event can import
this module to analyze their trace data.

Signed-off-by: Jinzhou Su <[email protected]>
Acked-by: Doug Smythies <[email protected]>
---
.../intel_pstate_tracer.py | 260 +++++++++---------
1 file changed, 129 insertions(+), 131 deletions(-)

diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
index e15e20696d17..b46e9eb8f5aa 100755
--- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -63,7 +63,7 @@ C_USEC = 3
C_SEC = 2
C_CPU = 1

-global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname
+global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname, trace_file

# 11 digits covers uptime to 115 days
getcontext().prec = 11
@@ -72,17 +72,17 @@ sample_num =0
last_sec_cpu = [0] * MAX_CPUS
last_usec_cpu = [0] * MAX_CPUS

-def print_help():
- print('intel_pstate_tracer.py:')
+def print_help(driver_name):
+ print('%s_tracer.py:'%driver_name)
print(' Usage:')
print(' If the trace file is available, then to simply parse and plot, use (sudo not required):')
- print(' ./intel_pstate_tracer.py [-c cpus] -t <trace_file> -n <test_name>')
+ print(' ./%s_tracer.py [-c cpus] -t <trace_file> -n <test_name>'%driver_name)
print(' Or')
- print(' ./intel_pstate_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>')
+ print(' ./%s_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>'%driver_name)
print(' To generate trace file, parse and plot, use (sudo required):')
- print(' sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>')
+ print(' sudo ./%s_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>'%driver_name)
print(' Or')
- print(' sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>')
+ print(' sudo ./%s_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>'%driver_name)
print(' Optional argument:')
print(' cpus: comma separated list of CPUs')
print(' kbytes: Kilo bytes of memory per CPU to allocate to the trace buffer. Default: 10240')
@@ -323,7 +323,7 @@ def set_4_plot_linestyles(g_plot):
g_plot('set style line 3 linetype 1 linecolor rgb "purple" pointtype -1')
g_plot('set style line 4 linetype 1 linecolor rgb "blue" pointtype -1')

-def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz):
+def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask):
""" Store master csv file information """

global graph_data_present
@@ -342,11 +342,9 @@ def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _t

graph_data_present = True;

-def split_csv():
+def split_csv(current_max_cpu, cpu_mask):
""" seperate the all csv file into per CPU csv files. """

- global current_max_cpu
-
if os.path.exists('cpu.csv'):
for index in range(0, current_max_cpu + 1):
if cpu_mask[int(index)] != 0:
@@ -381,27 +379,25 @@ def clear_trace_file():
print('IO error clearing trace file ')
sys.exit(2)

-def enable_trace():
+def enable_trace(trace_file):
""" Enable trace """

try:
- open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
- , 'w').write("1")
+ open(trace_file,'w').write("1")
except:
print('IO error enabling trace ')
sys.exit(2)

-def disable_trace():
+def disable_trace(trace_file):
""" Disable trace """

try:
- open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
- , 'w').write("0")
+ open(trace_file, 'w').write("0")
except:
print('IO error disabling trace ')
sys.exit(2)

-def set_trace_buffer_size():
+def set_trace_buffer_size(memory):
""" Set trace buffer size """

try:
@@ -421,7 +417,7 @@ def free_trace_buffer():
print('IO error freeing trace buffer ')
sys.exit(2)

-def read_trace_data(filename):
+def read_trace_data(filename, cpu_mask):
""" Read and parse trace data """

global current_max_cpu
@@ -481,135 +477,137 @@ def read_trace_data(filename):
tsc_ghz = Decimal(0)
if duration_ms != Decimal(0) :
tsc_ghz = Decimal(tsc)/duration_ms/Decimal(1000000)
- store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz)
+ store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask)

if cpu_int > current_max_cpu:
current_max_cpu = cpu_int
# End of for each trace line loop
# Now seperate the main overall csv file into per CPU csv files.
- split_csv()
+ split_csv(current_max_cpu, cpu_mask)

def signal_handler(signal, frame):
print(' SIGINT: Forcing cleanup before exit.')
if interval:
- disable_trace()
+ disable_trace(trace_file)
clear_trace_file()
# Free the memory
free_trace_buffer()
sys.exit(0)

-signal.signal(signal.SIGINT, signal_handler)
+if __name__ == "__main__":
+ trace_file = "/sys/kernel/debug/tracing/events/power/pstate_sample/enable"
+ signal.signal(signal.SIGINT, signal_handler)

-interval = ""
-filename = ""
-cpu_list = ""
-testname = ""
-memory = "10240"
-graph_data_present = False;
+ interval = ""
+ filename = ""
+ cpu_list = ""
+ testname = ""
+ memory = "10240"
+ graph_data_present = False;

-valid1 = False
-valid2 = False
+ valid1 = False
+ valid2 = False

-cpu_mask = zeros((MAX_CPUS,), dtype=int)
+ cpu_mask = zeros((MAX_CPUS,), dtype=int)

-try:
- opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
-except getopt.GetoptError:
- print_help()
- sys.exit(2)
-for opt, arg in opts:
- if opt == '-h':
- print()
+ try:
+ opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
+ except getopt.GetoptError:
+ print_help('intel_pstate')
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt == '-h':
+ print_help('intel_pstate')
+ sys.exit()
+ elif opt in ("-t", "--trace_file"):
+ valid1 = True
+ location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+ filename = os.path.join(location, arg)
+ elif opt in ("-i", "--interval"):
+ valid1 = True
+ interval = arg
+ elif opt in ("-c", "--cpu"):
+ cpu_list = arg
+ elif opt in ("-n", "--name"):
+ valid2 = True
+ testname = arg
+ elif opt in ("-m", "--memory"):
+ memory = arg
+
+ if not (valid1 and valid2):
+ print_help('intel_pstate')
sys.exit()
- elif opt in ("-t", "--trace_file"):
- valid1 = True
- location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
- filename = os.path.join(location, arg)
- elif opt in ("-i", "--interval"):
- valid1 = True
- interval = arg
- elif opt in ("-c", "--cpu"):
- cpu_list = arg
- elif opt in ("-n", "--name"):
- valid2 = True
- testname = arg
- elif opt in ("-m", "--memory"):
- memory = arg
-
-if not (valid1 and valid2):
- print_help()
- sys.exit()
-
-if cpu_list:
- for p in re.split("[,]", cpu_list):
- if int(p) < MAX_CPUS :
- cpu_mask[int(p)] = 1
-else:
- for i in range (0, MAX_CPUS):
- cpu_mask[i] = 1
-
-if not os.path.exists('results'):
- os.mkdir('results')
+
+ if cpu_list:
+ for p in re.split("[,]", cpu_list):
+ if int(p) < MAX_CPUS :
+ cpu_mask[int(p)] = 1
+ else:
+ for i in range (0, MAX_CPUS):
+ cpu_mask[i] = 1
+
+ if not os.path.exists('results'):
+ os.mkdir('results')
+ # The regular user needs to own the directory, not root.
+ fix_ownership('results')
+
+ os.chdir('results')
+ if os.path.exists(testname):
+ print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
+ sys.exit()
+ os.mkdir(testname)
# The regular user needs to own the directory, not root.
- fix_ownership('results')
-
-os.chdir('results')
-if os.path.exists(testname):
- print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
- sys.exit()
-os.mkdir(testname)
-# The regular user needs to own the directory, not root.
-fix_ownership(testname)
-os.chdir(testname)
-
-# Temporary (or perhaps not)
-cur_version = sys.version_info
-print('python version (should be >= 2.7):')
-print(cur_version)
-
-# Left as "cleanup" for potential future re-run ability.
-cleanup_data_files()
-
-if interval:
- filename = "/sys/kernel/debug/tracing/trace"
- clear_trace_file()
- set_trace_buffer_size()
- enable_trace()
- print('Sleeping for ', interval, 'seconds')
- time.sleep(int(interval))
- disable_trace()
-
-current_max_cpu = 0
-
-read_trace_data(filename)
-
-if interval:
- clear_trace_file()
- # Free the memory
- free_trace_buffer()
-
-if graph_data_present == False:
- print('No valid data to plot')
- sys.exit(2)
-
-for cpu_no in range(0, current_max_cpu + 1):
- plot_perf_busy_with_sample(cpu_no)
- plot_perf_busy(cpu_no)
- plot_durations(cpu_no)
- plot_loads(cpu_no)
-
-plot_pstate_cpu_with_sample()
-plot_pstate_cpu()
-plot_load_cpu()
-plot_frequency_cpu()
-plot_duration_cpu()
-plot_scaled_cpu()
-plot_boost_cpu()
-plot_ghz_cpu()
-
-# It is preferrable, but not necessary, that the regular user owns the files, not root.
-for root, dirs, files in os.walk('.'):
- for f in files:
- fix_ownership(f)
-
-os.chdir('../../')
+ fix_ownership(testname)
+ os.chdir(testname)
+
+ # Temporary (or perhaps not)
+ cur_version = sys.version_info
+ print('python version (should be >= 2.7):')
+ print(cur_version)
+
+ # Left as "cleanup" for potential future re-run ability.
+ cleanup_data_files()
+
+ if interval:
+ filename = "/sys/kernel/debug/tracing/trace"
+ clear_trace_file()
+ set_trace_buffer_size(memory)
+ enable_trace(trace_file)
+ print('Sleeping for ', interval, 'seconds')
+ time.sleep(int(interval))
+ disable_trace(trace_file)
+
+ current_max_cpu = 0
+
+ read_trace_data(filename, cpu_mask)
+
+ if interval:
+ clear_trace_file()
+ # Free the memory
+ free_trace_buffer()
+
+ if graph_data_present == False:
+ print('No valid data to plot')
+ sys.exit(2)
+
+ for cpu_no in range(0, current_max_cpu + 1):
+ plot_perf_busy_with_sample(cpu_no)
+ plot_perf_busy(cpu_no)
+ plot_durations(cpu_no)
+ plot_loads(cpu_no)
+
+ plot_pstate_cpu_with_sample()
+ plot_pstate_cpu()
+ plot_load_cpu()
+ plot_frequency_cpu()
+ plot_duration_cpu()
+ plot_scaled_cpu()
+ plot_boost_cpu()
+ plot_ghz_cpu()
+
+ # It is preferrable, but not necessary, that the regular user owns the files, not root.
+ for root, dirs, files in os.walk('.'):
+ for f in files:
+ fix_ownership(f)
+
+ os.chdir('../../')
--
2.27.0

2022-03-04 10:41:12

by Jinzhou Su

[permalink] [raw]
Subject: [PATCH V2 4/4] Documentation: amd-pstate: add tracer tool introduction

Add amd pstate tracer tool introduction

Signed-off-by: Jinzhou Su <[email protected]>
---
Documentation/admin-guide/pm/amd-pstate.rst | 26 +++++++++++++++++++++
1 file changed, 26 insertions(+)

diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
index 2f066df4ee9c..17dd7396e8fc 100644
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -369,6 +369,32 @@ governor (for the policies it is attached to), or by the ``CPUFreq`` core (for t
policies with other scaling governors).


+Tracer Tool
+-------------
+
+``amd_pstate_tracer.py`` can record and parse amd-pstate trace log, then
+generate performance plots. This utility can be used to debug and tune the
+performance of the amd-pstate driver. The tracer tool needs to import intel
+pstate tracer.
+
+Tracer tool located in linux/tools/power/x86/amd_pstate_tracer. It can be
+used in two ways. If trace file is available, then directly parse the file
+with command ::
+
+ ./amd_pstate_trace.py [-c cpus] -t <trace_file> -n <test_name>
+
+Or generate trace file with root privilege, then parse and plot with command ::
+
+ sudo ./amd_pstate_trace.py [-c cpus] -n <test_name> -i <interval> [-m kbytes]
+
+The test result can be found in ``results/test_name``. Following is the example
+about part of the output. ::
+
+ common_cpu common_secs common_usecs min_perf des_perf max_perf freq mperf apef tsc load duration_ms sample_num elapsed_time common_comm
+ CPU_005 712 116384 39 49 166 0.7565 9645075 2214891 38431470 25.1 11.646 469 2.496 kworker/5:0-40
+ CPU_006 712 116408 39 49 166 0.6769 8950227 1839034 37192089 24.06 11.272 470 2.496 kworker/6:0-1264
+
+
Reference
===========

--
2.27.0

2022-03-04 11:23:42

by Huang Rui

[permalink] [raw]
Subject: Re: [PATCH V2 1/4] cpufreq: amd-pstate: Add more tracepoint for AMD P-State module

On Fri, Mar 04, 2022 at 02:07:21PM +0800, Su, Jinzhou (Joe) wrote:
> Add frequency, mperf, aperf and tsc in the trace. This can be used
> to debug and tune the performance of AMD P-state driver.
>
> Use the time difference between amd_pstate_update to calculate CPU
> frequency. There could be sleep in arch_freq_get_on_cpu, so do not
> use it here.
>
> Signed-off-by: Jinzhou Su <[email protected]>
> Co-developed-by: Huang Rui <[email protected]>
> Signed-off-by: Huang Rui <[email protected]>

Let's remove "Signed-off-by" of me, just leave "Co-developed-by".

> ---
> drivers/cpufreq/amd-pstate-trace.h | 22 ++++++++++-
> drivers/cpufreq/amd-pstate.c | 59 +++++++++++++++++++++++++++++-
> 2 files changed, 78 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h
> index 647505957d4f..35f38ae67fb1 100644
> --- a/drivers/cpufreq/amd-pstate-trace.h
> +++ b/drivers/cpufreq/amd-pstate-trace.h
> @@ -27,6 +27,10 @@ TRACE_EVENT(amd_pstate_perf,
> TP_PROTO(unsigned long min_perf,
> unsigned long target_perf,
> unsigned long capacity,
> + u64 freq,
> + u64 mperf,
> + u64 aperf,
> + u64 tsc,
> unsigned int cpu_id,
> bool changed,
> bool fast_switch
> @@ -35,6 +39,10 @@ TRACE_EVENT(amd_pstate_perf,
> TP_ARGS(min_perf,
> target_perf,
> capacity,
> + freq,
> + mperf,
> + aperf,
> + tsc,
> cpu_id,
> changed,
> fast_switch
> @@ -44,6 +52,10 @@ TRACE_EVENT(amd_pstate_perf,
> __field(unsigned long, min_perf)
> __field(unsigned long, target_perf)
> __field(unsigned long, capacity)
> + __field(unsigned long long, freq)
> + __field(unsigned long long, mperf)
> + __field(unsigned long long, aperf)
> + __field(unsigned long long, tsc)
> __field(unsigned int, cpu_id)
> __field(bool, changed)
> __field(bool, fast_switch)
> @@ -53,15 +65,23 @@ TRACE_EVENT(amd_pstate_perf,
> __entry->min_perf = min_perf;
> __entry->target_perf = target_perf;
> __entry->capacity = capacity;
> + __entry->freq = freq;
> + __entry->mperf = mperf;
> + __entry->aperf = aperf;
> + __entry->tsc = tsc;
> __entry->cpu_id = cpu_id;
> __entry->changed = changed;
> __entry->fast_switch = fast_switch;
> ),
>
> - TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s",
> + TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s",
> (unsigned long)__entry->min_perf,
> (unsigned long)__entry->target_perf,
> (unsigned long)__entry->capacity,
> + (unsigned long long)__entry->freq,
> + (unsigned long long)__entry->mperf,
> + (unsigned long long)__entry->aperf,
> + (unsigned long long)__entry->tsc,
> (unsigned int)__entry->cpu_id,
> (__entry->changed) ? "true" : "false",
> (__entry->fast_switch) ? "true" : "false"
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 9ce75ed11f8e..7be38bc6a673 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -65,6 +65,18 @@ MODULE_PARM_DESC(shared_mem,
>
> static struct cpufreq_driver amd_pstate_driver;
>
> +/**
> + * struct amd_aperf_mperf
> + * @aperf: actual performance frequency clock count
> + * @mperf: maximum performance frequency clock count
> + * @tsc: time stamp counter
> + */
> +struct amd_aperf_mperf {
> + u64 aperf;
> + u64 mperf;
> + u64 tsc;
> +};
> +
> /**
> * struct amd_cpudata - private CPU data for AMD P-State
> * @cpu: CPU number
> @@ -81,6 +93,9 @@ static struct cpufreq_driver amd_pstate_driver;
> * @min_freq: the frequency that mapped to lowest_perf
> * @nominal_freq: the frequency that mapped to nominal_perf
> * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf
> + * @cur: Difference of Aperf/Mperf/tsc count between last and current sample
> + * @prev: Last Aperf/Mperf/tsc count value read from register
> + * @freq: current cpu frequency value
> * @boost_supported: check whether the Processor or SBIOS supports boost mode
> *
> * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
> @@ -102,6 +117,10 @@ struct amd_cpudata {
> u32 nominal_freq;
> u32 lowest_nonlinear_freq;
>
> + struct amd_aperf_mperf cur;
> + struct amd_aperf_mperf prev;
> +
> + u64 freq;
> bool boost_supported;
> };
>
> @@ -211,6 +230,39 @@ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
> max_perf, fast_switch);
> }
>
> +static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
> +{
> + u64 aperf, mperf, tsc;
> + unsigned long flags;
> +
> + local_irq_save(flags);
> + rdmsrl(MSR_IA32_APERF, aperf);
> + rdmsrl(MSR_IA32_MPERF, mperf);
> + tsc = rdtsc();
> +
> + if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
> + local_irq_restore(flags);
> + return false;
> + }
> +
> + local_irq_restore(flags);
> +
> + cpudata->cur.aperf = aperf;
> + cpudata->cur.mperf = mperf;
> + cpudata->cur.tsc = tsc;
> + cpudata->cur.aperf -= cpudata->prev.aperf;
> + cpudata->cur.mperf -= cpudata->prev.mperf;
> + cpudata->cur.tsc -= cpudata->prev.tsc;
> +
> + cpudata->prev.aperf = aperf;
> + cpudata->prev.mperf = mperf;
> + cpudata->prev.tsc = tsc;
> +
> + cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf);
> +
> + return true;
> +}
> +
> static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
> u32 des_perf, u32 max_perf, bool fast_switch)
> {
> @@ -226,8 +278,11 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
> value &= ~AMD_CPPC_MAX_PERF(~0L);
> value |= AMD_CPPC_MAX_PERF(max_perf);
>
> - trace_amd_pstate_perf(min_perf, des_perf, max_perf,
> - cpudata->cpu, (value != prev), fast_switch);
> + if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
> + trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
> + cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
> + cpudata->cpu, (value != prev), fast_switch);

How about using struct amd_aperf_mperf pointer as one input:

trace_amd_pstate_perf(min_perf, des_perf, max_perf, &cpudata->cur, ...);

You can refer the members of struct amd_aperf_mperf in the
amd-pstate-trace.h:

__entry->mperf = cur->mperf;
__entry->aperf = cur->aperf;
__entry->tsc = cur->tsc;

Thanks,
Ray

2022-03-04 14:38:10

by Huang Rui

[permalink] [raw]
Subject: Re: [PATCH V2 3/4] tools/power/x86/amd_pstate_tracer: Add tracer tool for AMD P-state

On Fri, Mar 04, 2022 at 02:07:23PM +0800, Su, Jinzhou (Joe) wrote:
> Intel P-state tracer is a useful tool to tune and debug Intel P-state
> driver. AMD P-state tracer import intel pstate tracer. This tool can
> be used to analyze the performance of AMD P-state tracer.
>
> Now CPU frequency, load and desired perf can be traced.
>
> Signed-off-by: Jinzhou Su <[email protected]>

Reviewed-by: Huang Rui <[email protected]>

> ---
> MAINTAINERS | 1 +
> .../x86/amd_pstate_tracer/amd_pstate_trace.py | 354 ++++++++++++++++++
> 2 files changed, 355 insertions(+)
> create mode 100755 tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 4f9acc183cdc..01ac42504dcd 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1002,6 +1002,7 @@ L: [email protected]
> S: Supported
> F: Documentation/admin-guide/pm/amd-pstate.rst
> F: drivers/cpufreq/amd-pstate*
> +F: tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
>
> AMD PTDMA DRIVER
> M: Sanjay R Mehta <[email protected]>
> diff --git a/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
> new file mode 100755
> index 000000000000..2dea4032ac56
> --- /dev/null
> +++ b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
> @@ -0,0 +1,354 @@
> +#!/usr/bin/env python3
> +# SPDX-License-Identifier: GPL-2.0-only
> +# -*- coding: utf-8 -*-
> +#
> +""" This utility can be used to debug and tune the performance of the
> +AMD P-State driver. It imports intel_pstate_tracer to analyze AMD P-State
> +trace event.
> +
> +Prerequisites:
> + Python version 2.7.x or higher
> + gnuplot 5.0 or higher
> + gnuplot-py 1.8 or higher
> + (Most of the distributions have these required packages. They may be called
> + gnuplot-py, phython-gnuplot or phython3-gnuplot, gnuplot-nox, ... )
> +
> + Kernel config for Linux trace is enabled
> +
> + see print_help(): for Usage and Output details
> +
> +"""
> +from __future__ import print_function
> +from datetime import datetime
> +import subprocess
> +import os
> +import time
> +import re
> +import signal
> +import sys
> +import getopt
> +import Gnuplot
> +from numpy import *
> +from decimal import *
> +sys.path.append('../intel_pstate_tracer')
> +#import intel_pstate_tracer
> +import intel_pstate_tracer as ipt
> +
> +__license__ = "GPL version 2"
> +
> +MAX_CPUS = 256
> +# Define the csv file columns
> +C_COMM = 15
> +C_ELAPSED = 14
> +C_SAMPLE = 13
> +C_DURATION = 12
> +C_LOAD = 11
> +C_TSC = 10
> +C_APERF = 9
> +C_MPERF = 8
> +C_FREQ = 7
> +C_MAX_PERF = 6
> +C_DES_PERF = 5
> +C_MIN_PERF = 4
> +C_USEC = 3
> +C_SEC = 2
> +C_CPU = 1
> +
> +global sample_num, last_sec_cpu, last_usec_cpu, start_time, test_name, trace_file
> +
> +getcontext().prec = 11
> +
> +sample_num =0
> +last_sec_cpu = [0] * MAX_CPUS
> +last_usec_cpu = [0] * MAX_CPUS
> +
> +def plot_per_cpu_freq(cpu_index):
> + """ Plot per cpu frequency """
> +
> + file_name = 'cpu{:0>3}.csv'.format(cpu_index)
> + if os.path.exists(file_name):
> + output_png = "cpu%03d_frequency.png" % cpu_index
> + g_plot = ipt.common_gnuplot_settings()
> + g_plot('set output "' + output_png + '"')
> + g_plot('set yrange [0:7]')
> + g_plot('set ytics 0, 1')
> + g_plot('set ylabel "CPU Frequency (GHz)"')
> + g_plot('set title "{} : frequency : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
> + g_plot('set ylabel "CPU frequency"')
> + g_plot('set key off')
> + ipt.set_4_plot_linestyles(g_plot)
> + g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_FREQ))
> +
> +def plot_per_cpu_des_perf(cpu_index):
> + """ Plot per cpu desired perf """
> +
> + file_name = 'cpu{:0>3}.csv'.format(cpu_index)
> + if os.path.exists(file_name):
> + output_png = "cpu%03d_des_perf.png" % cpu_index
> + g_plot = ipt.common_gnuplot_settings()
> + g_plot('set output "' + output_png + '"')
> + g_plot('set yrange [0:255]')
> + g_plot('set ylabel "des perf"')
> + g_plot('set title "{} : cpu des perf : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
> + g_plot('set key off')
> + ipt.set_4_plot_linestyles(g_plot)
> + g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_DES_PERF))
> +
> +def plot_per_cpu_load(cpu_index):
> + """ Plot per cpu load """
> +
> + file_name = 'cpu{:0>3}.csv'.format(cpu_index)
> + if os.path.exists(file_name):
> + output_png = "cpu%03d_load.png" % cpu_index
> + g_plot = ipt.common_gnuplot_settings()
> + g_plot('set output "' + output_png + '"')
> + g_plot('set yrange [0:100]')
> + g_plot('set ytics 0, 10')
> + g_plot('set ylabel "CPU load (percent)"')
> + g_plot('set title "{} : cpu load : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
> + g_plot('set key off')
> + ipt.set_4_plot_linestyles(g_plot)
> + g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_LOAD))
> +
> +def plot_all_cpu_frequency():
> + """ Plot all cpu frequencies """
> +
> + output_png = 'all_cpu_frequencies.png'
> + g_plot = ipt.common_gnuplot_settings()
> + g_plot('set output "' + output_png + '"')
> + g_plot('set ylabel "CPU Frequency (GHz)"')
> + g_plot('set title "{} : cpu frequencies : {:%F %H:%M}"'.format(test_name, datetime.now()))
> +
> + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
> + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_FREQ)
> + g_plot('title_list = "{}"'.format(title_list))
> + g_plot(plot_str)
> +
> +def plot_all_cpu_des_perf():
> + """ Plot all cpu desired perf """
> +
> + output_png = 'all_cpu_des_perf.png'
> + g_plot = ipt.common_gnuplot_settings()
> + g_plot('set output "' + output_png + '"')
> + g_plot('set ylabel "des perf"')
> + g_plot('set title "{} : cpu des perf : {:%F %H:%M}"'.format(test_name, datetime.now()))
> +
> + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
> + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_DES_PERF)
> + g_plot('title_list = "{}"'.format(title_list))
> + g_plot(plot_str)
> +
> +def plot_all_cpu_load():
> + """ Plot all cpu load """
> +
> + output_png = 'all_cpu_load.png'
> + g_plot = ipt.common_gnuplot_settings()
> + g_plot('set output "' + output_png + '"')
> + g_plot('set yrange [0:100]')
> + g_plot('set ylabel "CPU load (percent)"')
> + g_plot('set title "{} : cpu load : {:%F %H:%M}"'.format(test_name, datetime.now()))
> +
> + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
> + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_LOAD)
> + g_plot('title_list = "{}"'.format(title_list))
> + g_plot(plot_str)
> +
> +def store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask):
> + """ Store master csv file information """
> +
> + global graph_data_present
> +
> + if cpu_mask[cpu_int] == 0:
> + return
> +
> + try:
> + f_handle = open('cpu.csv', 'a')
> + string_buffer = "CPU_%03u, %05u, %06u, %u, %u, %u, %.4f, %u, %u, %u, %.2f, %.3f, %u, %.3f, %s\n" % (cpu_int, int(time_pre_dec), int(time_post_dec), int(min_perf), int(des_perf), int(max_perf), freq_ghz, int(mperf), int(aperf), int(tsc), load, duration_ms, sample_num, elapsed_time, common_comm)
> + f_handle.write(string_buffer)
> + f_handle.close()
> + except:
> + print('IO error cpu.csv')
> + return
> +
> + graph_data_present = True;
> +
> +
> +def cleanup_data_files():
> + """ clean up existing data files """
> +
> + if os.path.exists('cpu.csv'):
> + os.remove('cpu.csv')
> + f_handle = open('cpu.csv', 'a')
> + f_handle.write('common_cpu, common_secs, common_usecs, min_perf, des_perf, max_perf, freq, mperf, aperf, tsc, load, duration_ms, sample_num, elapsed_time, common_comm')
> + f_handle.write('\n')
> + f_handle.close()
> +
> +def read_trace_data(file_name, cpu_mask):
> + """ Read and parse trace data """
> +
> + global current_max_cpu
> + global sample_num, last_sec_cpu, last_usec_cpu, start_time
> +
> + try:
> + data = open(file_name, 'r').read()
> + except:
> + print('Error opening ', file_name)
> + sys.exit(2)
> +
> + for line in data.splitlines():
> + search_obj = \
> + re.search(r'(^(.*?)\[)((\d+)[^\]])(.*?)(\d+)([.])(\d+)(.*?amd_min_perf=)(\d+)(.*?amd_des_perf=)(\d+)(.*?amd_max_perf=)(\d+)(.*?freq=)(\d+)(.*?mperf=)(\d+)(.*?aperf=)(\d+)(.*?tsc=)(\d+)'
> + , line)
> +
> + if search_obj:
> + cpu = search_obj.group(3)
> + cpu_int = int(cpu)
> + cpu = str(cpu_int)
> +
> + time_pre_dec = search_obj.group(6)
> + time_post_dec = search_obj.group(8)
> + min_perf = search_obj.group(10)
> + des_perf = search_obj.group(12)
> + max_perf = search_obj.group(14)
> + freq = search_obj.group(16)
> + mperf = search_obj.group(18)
> + aperf = search_obj.group(20)
> + tsc = search_obj.group(22)
> +
> + common_comm = search_obj.group(2).replace(' ', '')
> +
> + if sample_num == 0 :
> + start_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000)
> + sample_num += 1
> +
> + if last_sec_cpu[cpu_int] == 0 :
> + last_sec_cpu[cpu_int] = time_pre_dec
> + last_usec_cpu[cpu_int] = time_post_dec
> + else :
> + duration_us = (int(time_pre_dec) - int(last_sec_cpu[cpu_int])) * 1000000 + (int(time_post_dec) - int(last_usec_cpu[cpu_int]))
> + duration_ms = Decimal(duration_us) / Decimal(1000)
> + last_sec_cpu[cpu_int] = time_pre_dec
> + last_usec_cpu[cpu_int] = time_post_dec
> + elapsed_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000) - start_time
> + load = Decimal(int(mperf)*100)/ Decimal(tsc)
> + freq_ghz = Decimal(freq)/Decimal(1000000)
> + store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask)
> +
> + if cpu_int > current_max_cpu:
> + current_max_cpu = cpu_int
> +# Now separate the main overall csv file into per CPU csv files.
> + ipt.split_csv(current_max_cpu, cpu_mask)
> +
> +
> +def signal_handler(signal, frame):
> + print(' SIGINT: Forcing cleanup before exit.')
> + if interval:
> + ipt.disable_trace(trace_file)
> + ipt.clear_trace_file()
> + ipt.free_trace_buffer()
> + sys.exit(0)
> +
> +trace_file = "/sys/kernel/debug/tracing/events/amd_cpu/enable"
> +signal.signal(signal.SIGINT, signal_handler)
> +
> +interval = ""
> +file_name = ""
> +cpu_list = ""
> +test_name = ""
> +memory = "10240"
> +graph_data_present = False;
> +
> +valid1 = False
> +valid2 = False
> +
> +cpu_mask = zeros((MAX_CPUS,), dtype=int)
> +
> +
> +try:
> + opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
> +except getopt.GetoptError:
> + ipt.print_help('amd_pstate')
> + sys.exit(2)
> +for opt, arg in opts:
> + if opt == '-h':
> + print()
> + sys.exit()
> + elif opt in ("-t", "--trace_file"):
> + valid1 = True
> + location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
> + file_name = os.path.join(location, arg)
> + elif opt in ("-i", "--interval"):
> + valid1 = True
> + interval = arg
> + elif opt in ("-c", "--cpu"):
> + cpu_list = arg
> + elif opt in ("-n", "--name"):
> + valid2 = True
> + test_name = arg
> + elif opt in ("-m", "--memory"):
> + memory = arg
> +
> +if not (valid1 and valid2):
> + ipt.print_help('amd_pstate')
> + sys.exit()
> +
> +if cpu_list:
> + for p in re.split("[,]", cpu_list):
> + if int(p) < MAX_CPUS :
> + cpu_mask[int(p)] = 1
> +else:
> + for i in range (0, MAX_CPUS):
> + cpu_mask[i] = 1
> +
> +if not os.path.exists('results'):
> + os.mkdir('results')
> + ipt.fix_ownership('results')
> +
> +os.chdir('results')
> +if os.path.exists(test_name):
> + print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
> + sys.exit()
> +os.mkdir(test_name)
> +ipt.fix_ownership(test_name)
> +os.chdir(test_name)
> +
> +cur_version = sys.version_info
> +print('python version (should be >= 2.7):')
> +print(cur_version)
> +
> +cleanup_data_files()
> +
> +if interval:
> + file_name = "/sys/kernel/debug/tracing/trace"
> + ipt.clear_trace_file()
> + ipt.set_trace_buffer_size(memory)
> + ipt.enable_trace(trace_file)
> + time.sleep(int(interval))
> + ipt.disable_trace(trace_file)
> +
> +current_max_cpu = 0
> +
> +read_trace_data(file_name, cpu_mask)
> +
> +if interval:
> + ipt.clear_trace_file()
> + ipt.free_trace_buffer()
> +
> +if graph_data_present == False:
> + print('No valid data to plot')
> + sys.exit(2)
> +
> +for cpu_no in range(0, current_max_cpu + 1):
> + plot_per_cpu_freq(cpu_no)
> + plot_per_cpu_des_perf(cpu_no)
> + plot_per_cpu_load(cpu_no)
> +
> +plot_all_cpu_des_perf()
> +plot_all_cpu_frequency()
> +plot_all_cpu_load()
> +
> +for root, dirs, files in os.walk('.'):
> + for f in files:
> + ipt.fix_ownership(f)
> +
> +os.chdir('../../')
> --
> 2.27.0
>

2022-03-04 19:47:03

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH V2 1/4] cpufreq: amd-pstate: Add more tracepoint for AMD P-State module

On Fri, Mar 4, 2022 at 7:42 AM Huang Rui <[email protected]> wrote:
>
> On Fri, Mar 04, 2022 at 02:07:21PM +0800, Su, Jinzhou (Joe) wrote:
> > Add frequency, mperf, aperf and tsc in the trace. This can be used
> > to debug and tune the performance of AMD P-state driver.
> >
> > Use the time difference between amd_pstate_update to calculate CPU
> > frequency. There could be sleep in arch_freq_get_on_cpu, so do not
> > use it here.
> >
> > Signed-off-by: Jinzhou Su <[email protected]>
> > Co-developed-by: Huang Rui <[email protected]>
> > Signed-off-by: Huang Rui <[email protected]>
>
> Let's remove "Signed-off-by" of me, just leave "Co-developed-by".

Actually, they both need to be present (the C-d-b clarifies the S-o-b
meaning), so the above is correct.

>
> > ---
> > drivers/cpufreq/amd-pstate-trace.h | 22 ++++++++++-
> > drivers/cpufreq/amd-pstate.c | 59 +++++++++++++++++++++++++++++-
> > 2 files changed, 78 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h
> > index 647505957d4f..35f38ae67fb1 100644
> > --- a/drivers/cpufreq/amd-pstate-trace.h
> > +++ b/drivers/cpufreq/amd-pstate-trace.h
> > @@ -27,6 +27,10 @@ TRACE_EVENT(amd_pstate_perf,
> > TP_PROTO(unsigned long min_perf,
> > unsigned long target_perf,
> > unsigned long capacity,
> > + u64 freq,
> > + u64 mperf,
> > + u64 aperf,
> > + u64 tsc,
> > unsigned int cpu_id,
> > bool changed,
> > bool fast_switch
> > @@ -35,6 +39,10 @@ TRACE_EVENT(amd_pstate_perf,
> > TP_ARGS(min_perf,
> > target_perf,
> > capacity,
> > + freq,
> > + mperf,
> > + aperf,
> > + tsc,
> > cpu_id,
> > changed,
> > fast_switch
> > @@ -44,6 +52,10 @@ TRACE_EVENT(amd_pstate_perf,
> > __field(unsigned long, min_perf)
> > __field(unsigned long, target_perf)
> > __field(unsigned long, capacity)
> > + __field(unsigned long long, freq)
> > + __field(unsigned long long, mperf)
> > + __field(unsigned long long, aperf)
> > + __field(unsigned long long, tsc)
> > __field(unsigned int, cpu_id)
> > __field(bool, changed)
> > __field(bool, fast_switch)
> > @@ -53,15 +65,23 @@ TRACE_EVENT(amd_pstate_perf,
> > __entry->min_perf = min_perf;
> > __entry->target_perf = target_perf;
> > __entry->capacity = capacity;
> > + __entry->freq = freq;
> > + __entry->mperf = mperf;
> > + __entry->aperf = aperf;
> > + __entry->tsc = tsc;
> > __entry->cpu_id = cpu_id;
> > __entry->changed = changed;
> > __entry->fast_switch = fast_switch;
> > ),
> >
> > - TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s",
> > + TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s",
> > (unsigned long)__entry->min_perf,
> > (unsigned long)__entry->target_perf,
> > (unsigned long)__entry->capacity,
> > + (unsigned long long)__entry->freq,
> > + (unsigned long long)__entry->mperf,
> > + (unsigned long long)__entry->aperf,
> > + (unsigned long long)__entry->tsc,
> > (unsigned int)__entry->cpu_id,
> > (__entry->changed) ? "true" : "false",
> > (__entry->fast_switch) ? "true" : "false"
> > diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> > index 9ce75ed11f8e..7be38bc6a673 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -65,6 +65,18 @@ MODULE_PARM_DESC(shared_mem,
> >
> > static struct cpufreq_driver amd_pstate_driver;
> >
> > +/**
> > + * struct amd_aperf_mperf
> > + * @aperf: actual performance frequency clock count
> > + * @mperf: maximum performance frequency clock count
> > + * @tsc: time stamp counter
> > + */
> > +struct amd_aperf_mperf {
> > + u64 aperf;
> > + u64 mperf;
> > + u64 tsc;
> > +};
> > +
> > /**
> > * struct amd_cpudata - private CPU data for AMD P-State
> > * @cpu: CPU number
> > @@ -81,6 +93,9 @@ static struct cpufreq_driver amd_pstate_driver;
> > * @min_freq: the frequency that mapped to lowest_perf
> > * @nominal_freq: the frequency that mapped to nominal_perf
> > * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf
> > + * @cur: Difference of Aperf/Mperf/tsc count between last and current sample
> > + * @prev: Last Aperf/Mperf/tsc count value read from register
> > + * @freq: current cpu frequency value
> > * @boost_supported: check whether the Processor or SBIOS supports boost mode
> > *
> > * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
> > @@ -102,6 +117,10 @@ struct amd_cpudata {
> > u32 nominal_freq;
> > u32 lowest_nonlinear_freq;
> >
> > + struct amd_aperf_mperf cur;
> > + struct amd_aperf_mperf prev;
> > +
> > + u64 freq;
> > bool boost_supported;
> > };
> >
> > @@ -211,6 +230,39 @@ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
> > max_perf, fast_switch);
> > }
> >
> > +static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
> > +{
> > + u64 aperf, mperf, tsc;
> > + unsigned long flags;
> > +
> > + local_irq_save(flags);
> > + rdmsrl(MSR_IA32_APERF, aperf);
> > + rdmsrl(MSR_IA32_MPERF, mperf);
> > + tsc = rdtsc();
> > +
> > + if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
> > + local_irq_restore(flags);
> > + return false;
> > + }
> > +
> > + local_irq_restore(flags);
> > +
> > + cpudata->cur.aperf = aperf;
> > + cpudata->cur.mperf = mperf;
> > + cpudata->cur.tsc = tsc;
> > + cpudata->cur.aperf -= cpudata->prev.aperf;
> > + cpudata->cur.mperf -= cpudata->prev.mperf;
> > + cpudata->cur.tsc -= cpudata->prev.tsc;
> > +
> > + cpudata->prev.aperf = aperf;
> > + cpudata->prev.mperf = mperf;
> > + cpudata->prev.tsc = tsc;
> > +
> > + cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf);
> > +
> > + return true;
> > +}
> > +
> > static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
> > u32 des_perf, u32 max_perf, bool fast_switch)
> > {
> > @@ -226,8 +278,11 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
> > value &= ~AMD_CPPC_MAX_PERF(~0L);
> > value |= AMD_CPPC_MAX_PERF(max_perf);
> >
> > - trace_amd_pstate_perf(min_perf, des_perf, max_perf,
> > - cpudata->cpu, (value != prev), fast_switch);
> > + if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
> > + trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
> > + cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
> > + cpudata->cpu, (value != prev), fast_switch);
>
> How about using struct amd_aperf_mperf pointer as one input:
>
> trace_amd_pstate_perf(min_perf, des_perf, max_perf, &cpudata->cur, ...);
>
> You can refer the members of struct amd_aperf_mperf in the
> amd-pstate-trace.h:
>
> __entry->mperf = cur->mperf;
> __entry->aperf = cur->aperf;
> __entry->tsc = cur->tsc;
>
> Thanks,
> Ray

2022-03-04 20:30:42

by Huang Rui

[permalink] [raw]
Subject: Re: [PATCH V2 2/4] tools/power/x86/intel_pstate_tracer: make tracer as a module

On Fri, Mar 04, 2022 at 02:07:22PM +0800, Su, Jinzhou (Joe) wrote:
> Make intel_pstate_tracer as a module. Other trace event can import
> this module to analyze their trace data.
>
> Signed-off-by: Jinzhou Su <[email protected]>
> Acked-by: Doug Smythies <[email protected]>

Reviewed-by: Huang Rui <[email protected]>

> ---
> .../intel_pstate_tracer.py | 260 +++++++++---------
> 1 file changed, 129 insertions(+), 131 deletions(-)
>
> diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
> index e15e20696d17..b46e9eb8f5aa 100755
> --- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
> +++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
> @@ -63,7 +63,7 @@ C_USEC = 3
> C_SEC = 2
> C_CPU = 1
>
> -global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname
> +global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname, trace_file
>
> # 11 digits covers uptime to 115 days
> getcontext().prec = 11
> @@ -72,17 +72,17 @@ sample_num =0
> last_sec_cpu = [0] * MAX_CPUS
> last_usec_cpu = [0] * MAX_CPUS
>
> -def print_help():
> - print('intel_pstate_tracer.py:')
> +def print_help(driver_name):
> + print('%s_tracer.py:'%driver_name)
> print(' Usage:')
> print(' If the trace file is available, then to simply parse and plot, use (sudo not required):')
> - print(' ./intel_pstate_tracer.py [-c cpus] -t <trace_file> -n <test_name>')
> + print(' ./%s_tracer.py [-c cpus] -t <trace_file> -n <test_name>'%driver_name)
> print(' Or')
> - print(' ./intel_pstate_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>')
> + print(' ./%s_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>'%driver_name)
> print(' To generate trace file, parse and plot, use (sudo required):')
> - print(' sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>')
> + print(' sudo ./%s_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>'%driver_name)
> print(' Or')
> - print(' sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>')
> + print(' sudo ./%s_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>'%driver_name)
> print(' Optional argument:')
> print(' cpus: comma separated list of CPUs')
> print(' kbytes: Kilo bytes of memory per CPU to allocate to the trace buffer. Default: 10240')
> @@ -323,7 +323,7 @@ def set_4_plot_linestyles(g_plot):
> g_plot('set style line 3 linetype 1 linecolor rgb "purple" pointtype -1')
> g_plot('set style line 4 linetype 1 linecolor rgb "blue" pointtype -1')
>
> -def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz):
> +def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask):
> """ Store master csv file information """
>
> global graph_data_present
> @@ -342,11 +342,9 @@ def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _t
>
> graph_data_present = True;
>
> -def split_csv():
> +def split_csv(current_max_cpu, cpu_mask):
> """ seperate the all csv file into per CPU csv files. """
>
> - global current_max_cpu
> -
> if os.path.exists('cpu.csv'):
> for index in range(0, current_max_cpu + 1):
> if cpu_mask[int(index)] != 0:
> @@ -381,27 +379,25 @@ def clear_trace_file():
> print('IO error clearing trace file ')
> sys.exit(2)
>
> -def enable_trace():
> +def enable_trace(trace_file):
> """ Enable trace """
>
> try:
> - open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
> - , 'w').write("1")
> + open(trace_file,'w').write("1")
> except:
> print('IO error enabling trace ')
> sys.exit(2)
>
> -def disable_trace():
> +def disable_trace(trace_file):
> """ Disable trace """
>
> try:
> - open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
> - , 'w').write("0")
> + open(trace_file, 'w').write("0")
> except:
> print('IO error disabling trace ')
> sys.exit(2)
>
> -def set_trace_buffer_size():
> +def set_trace_buffer_size(memory):
> """ Set trace buffer size """
>
> try:
> @@ -421,7 +417,7 @@ def free_trace_buffer():
> print('IO error freeing trace buffer ')
> sys.exit(2)
>
> -def read_trace_data(filename):
> +def read_trace_data(filename, cpu_mask):
> """ Read and parse trace data """
>
> global current_max_cpu
> @@ -481,135 +477,137 @@ def read_trace_data(filename):
> tsc_ghz = Decimal(0)
> if duration_ms != Decimal(0) :
> tsc_ghz = Decimal(tsc)/duration_ms/Decimal(1000000)
> - store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz)
> + store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask)
>
> if cpu_int > current_max_cpu:
> current_max_cpu = cpu_int
> # End of for each trace line loop
> # Now seperate the main overall csv file into per CPU csv files.
> - split_csv()
> + split_csv(current_max_cpu, cpu_mask)
>
> def signal_handler(signal, frame):
> print(' SIGINT: Forcing cleanup before exit.')
> if interval:
> - disable_trace()
> + disable_trace(trace_file)
> clear_trace_file()
> # Free the memory
> free_trace_buffer()
> sys.exit(0)
>
> -signal.signal(signal.SIGINT, signal_handler)
> +if __name__ == "__main__":
> + trace_file = "/sys/kernel/debug/tracing/events/power/pstate_sample/enable"
> + signal.signal(signal.SIGINT, signal_handler)
>
> -interval = ""
> -filename = ""
> -cpu_list = ""
> -testname = ""
> -memory = "10240"
> -graph_data_present = False;
> + interval = ""
> + filename = ""
> + cpu_list = ""
> + testname = ""
> + memory = "10240"
> + graph_data_present = False;
>
> -valid1 = False
> -valid2 = False
> + valid1 = False
> + valid2 = False
>
> -cpu_mask = zeros((MAX_CPUS,), dtype=int)
> + cpu_mask = zeros((MAX_CPUS,), dtype=int)
>
> -try:
> - opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
> -except getopt.GetoptError:
> - print_help()
> - sys.exit(2)
> -for opt, arg in opts:
> - if opt == '-h':
> - print()
> + try:
> + opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
> + except getopt.GetoptError:
> + print_help('intel_pstate')
> + sys.exit(2)
> + for opt, arg in opts:
> + if opt == '-h':
> + print_help('intel_pstate')
> + sys.exit()
> + elif opt in ("-t", "--trace_file"):
> + valid1 = True
> + location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
> + filename = os.path.join(location, arg)
> + elif opt in ("-i", "--interval"):
> + valid1 = True
> + interval = arg
> + elif opt in ("-c", "--cpu"):
> + cpu_list = arg
> + elif opt in ("-n", "--name"):
> + valid2 = True
> + testname = arg
> + elif opt in ("-m", "--memory"):
> + memory = arg
> +
> + if not (valid1 and valid2):
> + print_help('intel_pstate')
> sys.exit()
> - elif opt in ("-t", "--trace_file"):
> - valid1 = True
> - location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
> - filename = os.path.join(location, arg)
> - elif opt in ("-i", "--interval"):
> - valid1 = True
> - interval = arg
> - elif opt in ("-c", "--cpu"):
> - cpu_list = arg
> - elif opt in ("-n", "--name"):
> - valid2 = True
> - testname = arg
> - elif opt in ("-m", "--memory"):
> - memory = arg
> -
> -if not (valid1 and valid2):
> - print_help()
> - sys.exit()
> -
> -if cpu_list:
> - for p in re.split("[,]", cpu_list):
> - if int(p) < MAX_CPUS :
> - cpu_mask[int(p)] = 1
> -else:
> - for i in range (0, MAX_CPUS):
> - cpu_mask[i] = 1
> -
> -if not os.path.exists('results'):
> - os.mkdir('results')
> +
> + if cpu_list:
> + for p in re.split("[,]", cpu_list):
> + if int(p) < MAX_CPUS :
> + cpu_mask[int(p)] = 1
> + else:
> + for i in range (0, MAX_CPUS):
> + cpu_mask[i] = 1
> +
> + if not os.path.exists('results'):
> + os.mkdir('results')
> + # The regular user needs to own the directory, not root.
> + fix_ownership('results')
> +
> + os.chdir('results')
> + if os.path.exists(testname):
> + print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
> + sys.exit()
> + os.mkdir(testname)
> # The regular user needs to own the directory, not root.
> - fix_ownership('results')
> -
> -os.chdir('results')
> -if os.path.exists(testname):
> - print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
> - sys.exit()
> -os.mkdir(testname)
> -# The regular user needs to own the directory, not root.
> -fix_ownership(testname)
> -os.chdir(testname)
> -
> -# Temporary (or perhaps not)
> -cur_version = sys.version_info
> -print('python version (should be >= 2.7):')
> -print(cur_version)
> -
> -# Left as "cleanup" for potential future re-run ability.
> -cleanup_data_files()
> -
> -if interval:
> - filename = "/sys/kernel/debug/tracing/trace"
> - clear_trace_file()
> - set_trace_buffer_size()
> - enable_trace()
> - print('Sleeping for ', interval, 'seconds')
> - time.sleep(int(interval))
> - disable_trace()
> -
> -current_max_cpu = 0
> -
> -read_trace_data(filename)
> -
> -if interval:
> - clear_trace_file()
> - # Free the memory
> - free_trace_buffer()
> -
> -if graph_data_present == False:
> - print('No valid data to plot')
> - sys.exit(2)
> -
> -for cpu_no in range(0, current_max_cpu + 1):
> - plot_perf_busy_with_sample(cpu_no)
> - plot_perf_busy(cpu_no)
> - plot_durations(cpu_no)
> - plot_loads(cpu_no)
> -
> -plot_pstate_cpu_with_sample()
> -plot_pstate_cpu()
> -plot_load_cpu()
> -plot_frequency_cpu()
> -plot_duration_cpu()
> -plot_scaled_cpu()
> -plot_boost_cpu()
> -plot_ghz_cpu()
> -
> -# It is preferrable, but not necessary, that the regular user owns the files, not root.
> -for root, dirs, files in os.walk('.'):
> - for f in files:
> - fix_ownership(f)
> -
> -os.chdir('../../')
> + fix_ownership(testname)
> + os.chdir(testname)
> +
> + # Temporary (or perhaps not)
> + cur_version = sys.version_info
> + print('python version (should be >= 2.7):')
> + print(cur_version)
> +
> + # Left as "cleanup" for potential future re-run ability.
> + cleanup_data_files()
> +
> + if interval:
> + filename = "/sys/kernel/debug/tracing/trace"
> + clear_trace_file()
> + set_trace_buffer_size(memory)
> + enable_trace(trace_file)
> + print('Sleeping for ', interval, 'seconds')
> + time.sleep(int(interval))
> + disable_trace(trace_file)
> +
> + current_max_cpu = 0
> +
> + read_trace_data(filename, cpu_mask)
> +
> + if interval:
> + clear_trace_file()
> + # Free the memory
> + free_trace_buffer()
> +
> + if graph_data_present == False:
> + print('No valid data to plot')
> + sys.exit(2)
> +
> + for cpu_no in range(0, current_max_cpu + 1):
> + plot_perf_busy_with_sample(cpu_no)
> + plot_perf_busy(cpu_no)
> + plot_durations(cpu_no)
> + plot_loads(cpu_no)
> +
> + plot_pstate_cpu_with_sample()
> + plot_pstate_cpu()
> + plot_load_cpu()
> + plot_frequency_cpu()
> + plot_duration_cpu()
> + plot_scaled_cpu()
> + plot_boost_cpu()
> + plot_ghz_cpu()
> +
> + # It is preferrable, but not necessary, that the regular user owns the files, not root.
> + for root, dirs, files in os.walk('.'):
> + for f in files:
> + fix_ownership(f)
> +
> + os.chdir('../../')
> --
> 2.27.0
>

2022-03-07 09:45:44

by Huang Rui

[permalink] [raw]
Subject: Re: [PATCH V2 1/4] cpufreq: amd-pstate: Add more tracepoint for AMD P-State module

On Sat, Mar 05, 2022 at 02:49:51AM +0800, Rafael J. Wysocki wrote:
> On Fri, Mar 4, 2022 at 7:42 AM Huang Rui <[email protected]> wrote:
> >
> > On Fri, Mar 04, 2022 at 02:07:21PM +0800, Su, Jinzhou (Joe) wrote:
> > > Add frequency, mperf, aperf and tsc in the trace. This can be used
> > > to debug and tune the performance of AMD P-state driver.
> > >
> > > Use the time difference between amd_pstate_update to calculate CPU
> > > frequency. There could be sleep in arch_freq_get_on_cpu, so do not
> > > use it here.
> > >
> > > Signed-off-by: Jinzhou Su <[email protected]>
> > > Co-developed-by: Huang Rui <[email protected]>
> > > Signed-off-by: Huang Rui <[email protected]>
> >
> > Let's remove "Signed-off-by" of me, just leave "Co-developed-by".
>
> Actually, they both need to be present (the C-d-b clarifies the S-o-b
> meaning), so the above is correct.
>

OK, I see. Thanks to clarify this.

Best Regards,
Ray

2022-03-09 02:23:22

by Jinzhou Su

[permalink] [raw]
Subject: RE: [PATCH V2 1/4] cpufreq: amd-pstate: Add more tracepoint for AMD P-State module

[AMD Official Use Only]

> -----Original Message-----
> From: Huang, Ray <[email protected]>
> Sent: Friday, March 4, 2022 2:42 PM
> To: Su, Jinzhou (Joe) <[email protected]>
> Cc: [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected]; linux-
> [email protected]; Sharma, Deepak <[email protected]>;
> Deucher, Alexander <[email protected]>; Du, Xiaojian
> <[email protected]>; Yuan, Perry <[email protected]>; Meng, Li
> (Jassmine) <[email protected]>
> Subject: Re: [PATCH V2 1/4] cpufreq: amd-pstate: Add more tracepoint for
> AMD P-State module
>
> On Fri, Mar 04, 2022 at 02:07:21PM +0800, Su, Jinzhou (Joe) wrote:
> > Add frequency, mperf, aperf and tsc in the trace. This can be used to
> > debug and tune the performance of AMD P-state driver.
> >
> > Use the time difference between amd_pstate_update to calculate CPU
> > frequency. There could be sleep in arch_freq_get_on_cpu, so do not use
> > it here.
> >
> > Signed-off-by: Jinzhou Su <[email protected]>
> > Co-developed-by: Huang Rui <[email protected]>
> > Signed-off-by: Huang Rui <[email protected]>
>
> Let's remove "Signed-off-by" of me, just leave "Co-developed-by".
>
> > ---
> > drivers/cpufreq/amd-pstate-trace.h | 22 ++++++++++-
> > drivers/cpufreq/amd-pstate.c | 59 +++++++++++++++++++++++++++++-
> > 2 files changed, 78 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/cpufreq/amd-pstate-trace.h
> > b/drivers/cpufreq/amd-pstate-trace.h
> > index 647505957d4f..35f38ae67fb1 100644
> > --- a/drivers/cpufreq/amd-pstate-trace.h
> > +++ b/drivers/cpufreq/amd-pstate-trace.h
> > @@ -27,6 +27,10 @@ TRACE_EVENT(amd_pstate_perf,
> > TP_PROTO(unsigned long min_perf,
> > unsigned long target_perf,
> > unsigned long capacity,
> > + u64 freq,
> > + u64 mperf,
> > + u64 aperf,
> > + u64 tsc,
> > unsigned int cpu_id,
> > bool changed,
> > bool fast_switch
> > @@ -35,6 +39,10 @@ TRACE_EVENT(amd_pstate_perf,
> > TP_ARGS(min_perf,
> > target_perf,
> > capacity,
> > + freq,
> > + mperf,
> > + aperf,
> > + tsc,
> > cpu_id,
> > changed,
> > fast_switch
> > @@ -44,6 +52,10 @@ TRACE_EVENT(amd_pstate_perf,
> > __field(unsigned long, min_perf)
> > __field(unsigned long, target_perf)
> > __field(unsigned long, capacity)
> > + __field(unsigned long long, freq)
> > + __field(unsigned long long, mperf)
> > + __field(unsigned long long, aperf)
> > + __field(unsigned long long, tsc)
> > __field(unsigned int, cpu_id)
> > __field(bool, changed)
> > __field(bool, fast_switch)
> > @@ -53,15 +65,23 @@ TRACE_EVENT(amd_pstate_perf,
> > __entry->min_perf = min_perf;
> > __entry->target_perf = target_perf;
> > __entry->capacity = capacity;
> > + __entry->freq = freq;
> > + __entry->mperf = mperf;
> > + __entry->aperf = aperf;
> > + __entry->tsc = tsc;
> > __entry->cpu_id = cpu_id;
> > __entry->changed = changed;
> > __entry->fast_switch = fast_switch;
> > ),
> >
> > - TP_printk("amd_min_perf=%lu amd_des_perf=%lu
> amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s",
> > + TP_printk("amd_min_perf=%lu amd_des_perf=%lu
> amd_max_perf=%lu
> > +freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s
> > +fast_switch=%s",
> > (unsigned long)__entry->min_perf,
> > (unsigned long)__entry->target_perf,
> > (unsigned long)__entry->capacity,
> > + (unsigned long long)__entry->freq,
> > + (unsigned long long)__entry->mperf,
> > + (unsigned long long)__entry->aperf,
> > + (unsigned long long)__entry->tsc,
> > (unsigned int)__entry->cpu_id,
> > (__entry->changed) ? "true" : "false",
> > (__entry->fast_switch) ? "true" : "false"
> > diff --git a/drivers/cpufreq/amd-pstate.c
> > b/drivers/cpufreq/amd-pstate.c index 9ce75ed11f8e..7be38bc6a673 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -65,6 +65,18 @@ MODULE_PARM_DESC(shared_mem,
> >
> > static struct cpufreq_driver amd_pstate_driver;
> >
> > +/**
> > + * struct amd_aperf_mperf
> > + * @aperf: actual performance frequency clock count
> > + * @mperf: maximum performance frequency clock count
> > + * @tsc: time stamp counter
> > + */
> > +struct amd_aperf_mperf {
> > + u64 aperf;
> > + u64 mperf;
> > + u64 tsc;
> > +};
> > +
> > /**
> > * struct amd_cpudata - private CPU data for AMD P-State
> > * @cpu: CPU number
> > @@ -81,6 +93,9 @@ static struct cpufreq_driver amd_pstate_driver;
> > * @min_freq: the frequency that mapped to lowest_perf
> > * @nominal_freq: the frequency that mapped to nominal_perf
> > * @lowest_nonlinear_freq: the frequency that mapped to
> > lowest_nonlinear_perf
> > + * @cur: Difference of Aperf/Mperf/tsc count between last and current
> > + sample
> > + * @prev: Last Aperf/Mperf/tsc count value read from register
> > + * @freq: current cpu frequency value
> > * @boost_supported: check whether the Processor or SBIOS supports boost
> mode
> > *
> > * The amd_cpudata is key private data for each CPU thread in AMD
> > P-State, and @@ -102,6 +117,10 @@ struct amd_cpudata {
> > u32 nominal_freq;
> > u32 lowest_nonlinear_freq;
> >
> > + struct amd_aperf_mperf cur;
> > + struct amd_aperf_mperf prev;
> > +
> > + u64 freq;
> > bool boost_supported;
> > };
> >
> > @@ -211,6 +230,39 @@ static inline void amd_pstate_update_perf(struct
> amd_cpudata *cpudata,
> > max_perf, fast_switch);
> > }
> >
> > +static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) {
> > + u64 aperf, mperf, tsc;
> > + unsigned long flags;
> > +
> > + local_irq_save(flags);
> > + rdmsrl(MSR_IA32_APERF, aperf);
> > + rdmsrl(MSR_IA32_MPERF, mperf);
> > + tsc = rdtsc();
> > +
> > + if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
> > + local_irq_restore(flags);
> > + return false;
> > + }
> > +
> > + local_irq_restore(flags);
> > +
> > + cpudata->cur.aperf = aperf;
> > + cpudata->cur.mperf = mperf;
> > + cpudata->cur.tsc = tsc;
> > + cpudata->cur.aperf -= cpudata->prev.aperf;
> > + cpudata->cur.mperf -= cpudata->prev.mperf;
> > + cpudata->cur.tsc -= cpudata->prev.tsc;
> > +
> > + cpudata->prev.aperf = aperf;
> > + cpudata->prev.mperf = mperf;
> > + cpudata->prev.tsc = tsc;
> > +
> > + cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz),
> > +cpudata->cur.mperf);
> > +
> > + return true;
> > +}
> > +
> > static void amd_pstate_update(struct amd_cpudata *cpudata, u32
> min_perf,
> > u32 des_perf, u32 max_perf, bool fast_switch)
> { @@ -226,8
> > +278,11 @@ static void amd_pstate_update(struct amd_cpudata *cpudata,
> u32 min_perf,
> > value &= ~AMD_CPPC_MAX_PERF(~0L);
> > value |= AMD_CPPC_MAX_PERF(max_perf);
> >
> > - trace_amd_pstate_perf(min_perf, des_perf, max_perf,
> > - cpudata->cpu, (value != prev), fast_switch);
> > + if (trace_amd_pstate_perf_enabled() &&
> amd_pstate_sample(cpudata)) {
> > + trace_amd_pstate_perf(min_perf, des_perf, max_perf,
> cpudata->freq,
> > + cpudata->cur.mperf, cpudata->cur.aperf, cpudata-
> >cur.tsc,
> > + cpudata->cpu, (value != prev), fast_switch);
>
> How about using struct amd_aperf_mperf pointer as one input:
>
> trace_amd_pstate_perf(min_perf, des_perf, max_perf, &cpudata->cur, ...);
>
> You can refer the members of struct amd_aperf_mperf in the
> amd-pstate-trace.h:
>
> __entry->mperf = cur->mperf;
> __entry->aperf = cur->aperf;
> __entry->tsc = cur->tsc;
>

I prefer the former way. We'd better to split the definition of struct "amd_cpudata" into head file and include it in the trace file with your change. Will do that in the future if needed.

> Thanks,
> Ray

2022-03-09 16:25:10

by Huang Rui

[permalink] [raw]
Subject: Re: [PATCH V2 1/4] cpufreq: amd-pstate: Add more tracepoint for AMD P-State module

On Wed, Mar 09, 2022 at 09:23:38AM +0800, Su, Jinzhou (Joe) wrote:
> [AMD Official Use Only]
>
> > -----Original Message-----
> > From: Huang, Ray <[email protected]>
> > Sent: Friday, March 4, 2022 2:42 PM
> > To: Su, Jinzhou (Joe) <[email protected]>
> > Cc: [email protected]; [email protected];
> > [email protected]; [email protected];
> > [email protected]; [email protected]; linux-
> > [email protected]; Sharma, Deepak <[email protected]>;
> > Deucher, Alexander <[email protected]>; Du, Xiaojian
> > <[email protected]>; Yuan, Perry <[email protected]>; Meng, Li
> > (Jassmine) <[email protected]>
> > Subject: Re: [PATCH V2 1/4] cpufreq: amd-pstate: Add more tracepoint for
> > AMD P-State module
> >
> > On Fri, Mar 04, 2022 at 02:07:21PM +0800, Su, Jinzhou (Joe) wrote:
> > > Add frequency, mperf, aperf and tsc in the trace. This can be used to
> > > debug and tune the performance of AMD P-state driver.
> > >
> > > Use the time difference between amd_pstate_update to calculate CPU
> > > frequency. There could be sleep in arch_freq_get_on_cpu, so do not use
> > > it here.
> > >
> > > Signed-off-by: Jinzhou Su <[email protected]>
> > > Co-developed-by: Huang Rui <[email protected]>
> > > Signed-off-by: Huang Rui <[email protected]>
> >
> > Let's remove "Signed-off-by" of me, just leave "Co-developed-by".
> >
> > > ---
> > > drivers/cpufreq/amd-pstate-trace.h | 22 ++++++++++-
> > > drivers/cpufreq/amd-pstate.c | 59 +++++++++++++++++++++++++++++-
> > > 2 files changed, 78 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/cpufreq/amd-pstate-trace.h
> > > b/drivers/cpufreq/amd-pstate-trace.h
> > > index 647505957d4f..35f38ae67fb1 100644
> > > --- a/drivers/cpufreq/amd-pstate-trace.h
> > > +++ b/drivers/cpufreq/amd-pstate-trace.h
> > > @@ -27,6 +27,10 @@ TRACE_EVENT(amd_pstate_perf,
> > > TP_PROTO(unsigned long min_perf,
> > > unsigned long target_perf,
> > > unsigned long capacity,
> > > + u64 freq,
> > > + u64 mperf,
> > > + u64 aperf,
> > > + u64 tsc,
> > > unsigned int cpu_id,
> > > bool changed,
> > > bool fast_switch
> > > @@ -35,6 +39,10 @@ TRACE_EVENT(amd_pstate_perf,
> > > TP_ARGS(min_perf,
> > > target_perf,
> > > capacity,
> > > + freq,
> > > + mperf,
> > > + aperf,
> > > + tsc,
> > > cpu_id,
> > > changed,
> > > fast_switch
> > > @@ -44,6 +52,10 @@ TRACE_EVENT(amd_pstate_perf,
> > > __field(unsigned long, min_perf)
> > > __field(unsigned long, target_perf)
> > > __field(unsigned long, capacity)
> > > + __field(unsigned long long, freq)
> > > + __field(unsigned long long, mperf)
> > > + __field(unsigned long long, aperf)
> > > + __field(unsigned long long, tsc)
> > > __field(unsigned int, cpu_id)
> > > __field(bool, changed)
> > > __field(bool, fast_switch)
> > > @@ -53,15 +65,23 @@ TRACE_EVENT(amd_pstate_perf,
> > > __entry->min_perf = min_perf;
> > > __entry->target_perf = target_perf;
> > > __entry->capacity = capacity;
> > > + __entry->freq = freq;
> > > + __entry->mperf = mperf;
> > > + __entry->aperf = aperf;
> > > + __entry->tsc = tsc;
> > > __entry->cpu_id = cpu_id;
> > > __entry->changed = changed;
> > > __entry->fast_switch = fast_switch;
> > > ),
> > >
> > > - TP_printk("amd_min_perf=%lu amd_des_perf=%lu
> > amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s",
> > > + TP_printk("amd_min_perf=%lu amd_des_perf=%lu
> > amd_max_perf=%lu
> > > +freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s
> > > +fast_switch=%s",
> > > (unsigned long)__entry->min_perf,
> > > (unsigned long)__entry->target_perf,
> > > (unsigned long)__entry->capacity,
> > > + (unsigned long long)__entry->freq,
> > > + (unsigned long long)__entry->mperf,
> > > + (unsigned long long)__entry->aperf,
> > > + (unsigned long long)__entry->tsc,
> > > (unsigned int)__entry->cpu_id,
> > > (__entry->changed) ? "true" : "false",
> > > (__entry->fast_switch) ? "true" : "false"
> > > diff --git a/drivers/cpufreq/amd-pstate.c
> > > b/drivers/cpufreq/amd-pstate.c index 9ce75ed11f8e..7be38bc6a673 100644
> > > --- a/drivers/cpufreq/amd-pstate.c
> > > +++ b/drivers/cpufreq/amd-pstate.c
> > > @@ -65,6 +65,18 @@ MODULE_PARM_DESC(shared_mem,
> > >
> > > static struct cpufreq_driver amd_pstate_driver;
> > >
> > > +/**
> > > + * struct amd_aperf_mperf
> > > + * @aperf: actual performance frequency clock count
> > > + * @mperf: maximum performance frequency clock count
> > > + * @tsc: time stamp counter
> > > + */
> > > +struct amd_aperf_mperf {
> > > + u64 aperf;
> > > + u64 mperf;
> > > + u64 tsc;
> > > +};
> > > +
> > > /**
> > > * struct amd_cpudata - private CPU data for AMD P-State
> > > * @cpu: CPU number
> > > @@ -81,6 +93,9 @@ static struct cpufreq_driver amd_pstate_driver;
> > > * @min_freq: the frequency that mapped to lowest_perf
> > > * @nominal_freq: the frequency that mapped to nominal_perf
> > > * @lowest_nonlinear_freq: the frequency that mapped to
> > > lowest_nonlinear_perf
> > > + * @cur: Difference of Aperf/Mperf/tsc count between last and current
> > > + sample
> > > + * @prev: Last Aperf/Mperf/tsc count value read from register
> > > + * @freq: current cpu frequency value
> > > * @boost_supported: check whether the Processor or SBIOS supports boost
> > mode
> > > *
> > > * The amd_cpudata is key private data for each CPU thread in AMD
> > > P-State, and @@ -102,6 +117,10 @@ struct amd_cpudata {
> > > u32 nominal_freq;
> > > u32 lowest_nonlinear_freq;
> > >
> > > + struct amd_aperf_mperf cur;
> > > + struct amd_aperf_mperf prev;
> > > +
> > > + u64 freq;
> > > bool boost_supported;
> > > };
> > >
> > > @@ -211,6 +230,39 @@ static inline void amd_pstate_update_perf(struct
> > amd_cpudata *cpudata,
> > > max_perf, fast_switch);
> > > }
> > >
> > > +static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) {
> > > + u64 aperf, mperf, tsc;
> > > + unsigned long flags;
> > > +
> > > + local_irq_save(flags);
> > > + rdmsrl(MSR_IA32_APERF, aperf);
> > > + rdmsrl(MSR_IA32_MPERF, mperf);
> > > + tsc = rdtsc();
> > > +
> > > + if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
> > > + local_irq_restore(flags);
> > > + return false;
> > > + }
> > > +
> > > + local_irq_restore(flags);
> > > +
> > > + cpudata->cur.aperf = aperf;
> > > + cpudata->cur.mperf = mperf;
> > > + cpudata->cur.tsc = tsc;
> > > + cpudata->cur.aperf -= cpudata->prev.aperf;
> > > + cpudata->cur.mperf -= cpudata->prev.mperf;
> > > + cpudata->cur.tsc -= cpudata->prev.tsc;
> > > +
> > > + cpudata->prev.aperf = aperf;
> > > + cpudata->prev.mperf = mperf;
> > > + cpudata->prev.tsc = tsc;
> > > +
> > > + cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz),
> > > +cpudata->cur.mperf);
> > > +
> > > + return true;
> > > +}
> > > +
> > > static void amd_pstate_update(struct amd_cpudata *cpudata, u32
> > min_perf,
> > > u32 des_perf, u32 max_perf, bool fast_switch)
> > { @@ -226,8
> > > +278,11 @@ static void amd_pstate_update(struct amd_cpudata *cpudata,
> > u32 min_perf,
> > > value &= ~AMD_CPPC_MAX_PERF(~0L);
> > > value |= AMD_CPPC_MAX_PERF(max_perf);
> > >
> > > - trace_amd_pstate_perf(min_perf, des_perf, max_perf,
> > > - cpudata->cpu, (value != prev), fast_switch);
> > > + if (trace_amd_pstate_perf_enabled() &&
> > amd_pstate_sample(cpudata)) {
> > > + trace_amd_pstate_perf(min_perf, des_perf, max_perf,
> > cpudata->freq,
> > > + cpudata->cur.mperf, cpudata->cur.aperf, cpudata-
> > >cur.tsc,
> > > + cpudata->cpu, (value != prev), fast_switch);
> >
> > How about using struct amd_aperf_mperf pointer as one input:
> >
> > trace_amd_pstate_perf(min_perf, des_perf, max_perf, &cpudata->cur, ...);
> >
> > You can refer the members of struct amd_aperf_mperf in the
> > amd-pstate-trace.h:
> >
> > __entry->mperf = cur->mperf;
> > __entry->aperf = cur->aperf;
> > __entry->tsc = cur->tsc;
> >
>
> I prefer the former way. We'd better to split the definition of struct "amd_cpudata" into head file and include it in the trace file with your change. Will do that in the future if needed.
>

Hmm, it should be ok. Because the trace implementation will be moved to
include/trace/events/power.h after the shared_mem is enabled.

Thanks,
Ray