2022-07-22 20:41:38

by André Almeida

[permalink] [raw]
Subject: [PATCH 0/4] drm/amd: Add more GFXOFF stats for vangogh

This series adds new logging features for GFXOFF available for vangogh
and documentation for it.

I've created a small userspace program to interact with this new debugfs
interface and it can be found at:

https://gitlab.freedesktop.org/andrealmeid/gfxoff_tool

André Almeida (4):
drm/amd: Add detailed GFXOFF stats to debugfs
drm/amd/pm: Implement GFXOFF's entry count and residency for vangogh
Documentation/gpu: Document GFXOFF's count and residency
drm/amdgpu: Document gfx_off members of struct amdgpu_gfx

Documentation/gpu/amdgpu/thermal.rst | 14 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 168 ++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 ++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 14 +-
drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 45 +++++
drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 +
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 34 +++-
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 22 +++
.../pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h | 5 +-
drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 5 +-
.../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 92 ++++++++++
drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 3 +
13 files changed, 439 insertions(+), 7 deletions(-)

--
2.37.1


2022-07-22 20:41:49

by André Almeida

[permalink] [raw]
Subject: [PATCH 4/4] drm/amdgpu: Document gfx_off members of struct amdgpu_gfx

Add comments to document gfx_off related members of struct amdgpu_gfx.

Signed-off-by: André Almeida <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index f06e979e2565..a552a49c1b25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -332,12 +332,12 @@ struct amdgpu_gfx {
uint32_t srbm_soft_reset;

/* gfx off */
- bool gfx_off_state; /* true: enabled, false: disabled */
- struct mutex gfx_off_mutex;
- uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
- struct delayed_work gfx_off_delay_work;
- uint32_t gfx_off_residency;
- uint32_t gfx_off_entrycount;
+ bool gfx_off_state; /* true: enabled, false: disabled */
+ struct mutex gfx_off_mutex; /* mutex to change gfxoff state */
+ uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
+ struct delayed_work gfx_off_delay_work; /* async work to set gfx block off */
+ uint32_t gfx_off_residency; /* last logged residency */
+ uint32_t gfx_off_entrycount; /* count of times GPU has get into GFXOFF state */

/* pipe reservation */
struct mutex pipe_reserve_mutex;
--
2.37.1

2022-07-22 20:42:40

by André Almeida

[permalink] [raw]
Subject: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs

Add debugfs interface to log GFXOFF statistics:

- Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the
time of query since system power-up

- Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop.
Read it to get average GFXOFF residency % multiplied by 100
during the last logging interval.

Both features are designed to be keep the values persistent between
suspends.

Signed-off-by: André Almeida <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 168 ++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 ++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +
drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 45 +++++
drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 +
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 34 +++-
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 22 +++
drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 3 +
9 files changed, 321 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index e2eec985adb3..edf90a9ba980 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1042,6 +1042,157 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
return r;
}

+/**
+ * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * Read the last residency value logged. It doesn't auto update, one needs to
+ * stop logging before getting the current value.
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ uint32_t value;
+
+ r = amdgpu_get_gfx_off_residency(adev, &value);
+ if (r)
+ goto out;
+
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u32 value;
+
+ r = get_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ amdgpu_set_gfx_off_residency(adev, value ? true : false);
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+
+/**
+ * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ */
+static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u32 value;
+
+ r = amdgpu_get_gfx_off_entrycount(adev, &value);
+ if (r)
+ goto out;
+
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
/**
* amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
*
@@ -1249,6 +1400,19 @@ static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
.llseek = default_llseek
};

+static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_count_read,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_residency_read,
+ .write = amdgpu_debugfs_gfxoff_residency_write,
+ .llseek = default_llseek
+};
+
static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_regs_fops,
&amdgpu_debugfs_regs2_fops,
@@ -1261,6 +1425,8 @@ static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_gpr_fops,
&amdgpu_debugfs_gfxoff_fops,
&amdgpu_debugfs_gfxoff_status_fops,
+ &amdgpu_debugfs_gfxoff_count_fops,
+ &amdgpu_debugfs_gfxoff_residency_fops,
};

static const char *debugfs_regs_names[] = {
@@ -1275,6 +1441,8 @@ static const char *debugfs_regs_names[] = {
"amdgpu_gpr",
"amdgpu_gfxoff",
"amdgpu_gfxoff_status",
+ "amdgpu_gfxoff_count",
+ "amdgpu_gfxoff_residency",
};

/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b79ee4ffb879..15a95bc2c211 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3576,6 +3576,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);

adev->gfx.gfx_off_req_count = 1;
+ adev->gfx.gfx_off_residency = 0;
+ adev->gfx.gfx_off_entrycount = 0;
adev->pm.ac_power = power_supply_is_system_supplied() > 0;

atomic_set(&adev->throttling_logging_enabled, 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 222d3d7ea076..3675c1b899db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
mutex_unlock(&adev->gfx.gfx_off_mutex);
}

+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
+{
+ int r = 0;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ r = amdgpu_dpm_set_residency_gfxoff(adev, value);
+
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+ return r;
+}
+
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
+{
+ int r = 0;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ r = amdgpu_dpm_get_residency_gfxoff(adev, value);
+
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+ return r;
+}
+
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32 *value)
+{
+ int r = 0;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
+
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+ return r;
+}
+
int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
{

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 23a696d38390..f06e979e2565 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -336,6 +336,8 @@ struct amdgpu_gfx {
struct mutex gfx_off_mutex;
uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
struct delayed_work gfx_off_delay_work;
+ uint32_t gfx_off_residency;
+ uint32_t gfx_off_entrycount;

/* pipe reservation */
struct mutex pipe_reserve_mutex;
@@ -407,6 +409,10 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32 *value);
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *residency);
+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value);
int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 956b6ce81c84..df87d0768fd7 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev,
return ret;
}

+int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool value)
+{
+ struct smu_context *smu = adev->powerplay.pp_handle;
+ int ret = 0;
+
+ if (!is_support_sw_smu(adev))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&adev->pm.mutex);
+ ret = smu_set_residency_gfxoff(smu, value);
+ mutex_unlock(&adev->pm.mutex);
+
+ return ret;
+}
+
+int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32 *value)
+{
+ struct smu_context *smu = adev->powerplay.pp_handle;
+ int ret = 0;
+
+ if (!is_support_sw_smu(adev))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&adev->pm.mutex);
+ ret = smu_get_residency_gfxoff(smu, value);
+ mutex_unlock(&adev->pm.mutex);
+
+ return ret;
+}
+
+int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev, u32 *value)
+{
+ struct smu_context *smu = adev->powerplay.pp_handle;
+ int ret = 0;
+
+ if (!is_support_sw_smu(adev))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&adev->pm.mutex);
+ ret = smu_get_entrycount_gfxoff(smu, value);
+ mutex_unlock(&adev->pm.mutex);
+
+ return ret;
+}
+
int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value)
{
struct smu_context *smu = adev->powerplay.pp_handle;
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 65624d091ed2..83a83e93037c 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev,
int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev);
int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum smu_event_type event,
uint64_t event_arg);
+int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32 *value);
+int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool value);
+int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev, u32 *value);
int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value);
uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct amdgpu_device *adev);
void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index fd79b213fab4..cfc3b9d749bf 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void *handle,
return smu_set_pp_feature_mask(smu, new_mask);
}

+int smu_set_residency_gfxoff(struct smu_context *smu, bool value)
+{
+ if (!smu->ppt_funcs->set_gfx_off_residency)
+ return -EINVAL;
+
+ return smu_set_gfx_off_residency(smu, value);
+}
+
+int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value)
+{
+ if (!smu->ppt_funcs->get_gfx_off_residency)
+ return -EINVAL;
+
+ return smu_get_gfx_off_residency(smu, value);
+}
+
+int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value)
+{
+ if (!smu->ppt_funcs->get_gfx_off_entrycount)
+ return -EINVAL;
+
+ return smu_get_gfx_off_entrycount(smu, value);
+}
+
int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
{
if (!smu->ppt_funcs->get_gfx_off_status)
@@ -1573,7 +1597,7 @@ static int smu_suspend(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu = adev->powerplay.pp_handle;
- int ret;
+ int ret, count;

if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
return 0;
@@ -1591,6 +1615,14 @@ static int smu_suspend(void *handle)

smu_set_gfx_cgpg(smu, false);

+ /*
+ * pwfw resets entrycount when device is suspended, so we save the
+ * last value to be used when we resume to keep it consistent
+ */
+ ret = smu_get_entrycount_gfxoff(smu, &count);
+ if (!ret)
+ adev->gfx.gfx_off_entrycount = count;
+
return 0;
}

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index b81c657c7386..9827075b768e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1111,6 +1111,22 @@ struct pptable_funcs {
*/
uint32_t (*get_gfx_off_status)(struct smu_context *smu);

+ /**
+ * @gfx_off_entrycount: total GFXOFF entry count at the time of
+ * query since system power-up
+ */
+ u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint32_t *entrycount);
+
+ /**
+ * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging
+ */
+ u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start);
+
+ /**
+ * @get_gfx_off_residency: Average GFXOFF residency % during the logging interval
+ */
+ u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t *residency);
+
/**
* @register_irq_handler: Register interupt request handlers.
*/
@@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu);

int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);

+int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value);
+
+int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
+
+int smu_set_residency_gfxoff(struct smu_context *smu, bool value);
+
int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value);

int smu_handle_passthrough_sbr(struct smu_context *smu, bool enable);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
index 7469bbfce1fb..ceb13c838067 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
@@ -47,6 +47,9 @@
#define smu_notify_memory_pool_location(smu) smu_ppt_funcs(notify_memory_pool_location, 0, smu)
#define smu_gfx_off_control(smu, enable) smu_ppt_funcs(gfx_off_control, 0, smu, enable)
#define smu_get_gfx_off_status(smu) smu_ppt_funcs(get_gfx_off_status, 0, smu)
+#define smu_get_gfx_off_entrycount(smu, value) smu_ppt_funcs(get_gfx_off_entrycount, 0, smu, value)
+#define smu_get_gfx_off_residency(smu, value) smu_ppt_funcs(get_gfx_off_residency, 0, smu, value)
+#define smu_set_gfx_off_residency(smu, value) smu_ppt_funcs(set_gfx_off_residency, 0, smu, value)
#define smu_set_last_dcef_min_deep_sleep_clk(smu) smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu)
#define smu_system_features_control(smu, en) smu_ppt_funcs(system_features_control, 0, smu, en)
#define smu_init_max_sustainable_clocks(smu) smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)
--
2.37.1

2022-07-22 20:58:08

by André Almeida

[permalink] [raw]
Subject: [PATCH 3/4] Documentation/gpu: Document GFXOFF's count and residency

Add documentation explaining those two new files.

Signed-off-by: André Almeida <[email protected]>
---
Documentation/gpu/amdgpu/thermal.rst | 14 ++++++++++++++
1 file changed, 14 insertions(+)

diff --git a/Documentation/gpu/amdgpu/thermal.rst b/Documentation/gpu/amdgpu/thermal.rst
index 997231b6adcf..c31f94c6c681 100644
--- a/Documentation/gpu/amdgpu/thermal.rst
+++ b/Documentation/gpu/amdgpu/thermal.rst
@@ -104,3 +104,17 @@ Read it to check current GFXOFF's status of a GPU::
If GFXOFF is enabled, the value will be transitioning around [0, 3], always
getting into 0 when possible. When it's disabled, it's always at 2. Returns
``-EINVAL`` if it's not supported.
+
+``amdgpu_gfxoff_count``
+-----------------------
+
+Read it to get the total GFXOFF entry count at the time of query since system
+power-up. *Only supported in vangogh*
+
+``amdgpu_gfxoff_residency``
+---------------------------
+
+Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop. Read it to
+get average GFXOFF residency % multiplied by 100 during the last logging
+interval. E.g. a value of 7854 means 78.54% of the time in the last logging
+interval the GPU was in GFXOFF mode. *Only supported in vangogh*
--
2.37.1

2022-07-25 10:46:04

by Evan Quan

[permalink] [raw]
Subject: RE: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs

[AMD Official Use Only - General]

Using "uint64_t" instead of "uint32_t" for entry counter may be better.

BR
Evan
> -----Original Message-----
> From: amd-gfx <[email protected]> On Behalf Of
> André Almeida
> Sent: Saturday, July 23, 2022 4:34 AM
> To: Deucher, Alexander <[email protected]>; Koenig, Christian
> <[email protected]>; Pan, Xinhui <[email protected]>; David
> Airlie <[email protected]>; Daniel Vetter <[email protected]>; Zhang, Hawking
> <[email protected]>; Zhou1, Tao <[email protected]>; Kuehling,
> Felix <[email protected]>; Xiao, Jack <[email protected]>; amd-
> [email protected]; [email protected]; linux-
> [email protected]; StDenis, Tom <[email protected]>; Siqueira,
> Rodrigo <[email protected]>
> Cc: André Almeida <[email protected]>; [email protected]
> Subject: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
>
> Add debugfs interface to log GFXOFF statistics:
>
> - Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the
> time of query since system power-up
>
> - Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop.
> Read it to get average GFXOFF residency % multiplied by 100
> during the last logging interval.
>
> Both features are designed to be keep the values persistent between
> suspends.
>
> Signed-off-by: André Almeida <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 168
> ++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 ++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +
> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 45 +++++
> drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 +
> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 34 +++-
> drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 22 +++
> drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 3 +
> 9 files changed, 321 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index e2eec985adb3..edf90a9ba980 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1042,6 +1042,157 @@ static ssize_t amdgpu_debugfs_gpr_read(struct
> file *f, char __user *buf,
> return r;
> }
>
> +/**
> + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
> + *
> + * @f: open file handle
> + * @buf: User buffer to store read data in
> + * @size: Number of bytes to read
> + * @pos: Offset to seek to
> + *
> + * Read the last residency value logged. It doesn't auto update, one needs
> to
> + * stop logging before getting the current value.
> + */
> +static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char
> __user *buf,
> + size_t size, loff_t *pos)
> +{
> + struct amdgpu_device *adev = file_inode(f)->i_private;
> + ssize_t result = 0;
> + int r;
> +
> + if (size & 0x3 || *pos & 0x3)
> + return -EINVAL;
> +
> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> + if (r < 0) {
> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> + return r;
> + }
> +
> + while (size) {
> + uint32_t value;
> +
> + r = amdgpu_get_gfx_off_residency(adev, &value);
> + if (r)
> + goto out;
> +
> + r = put_user(value, (uint32_t *)buf);
> + if (r)
> + goto out;
> +
> + result += 4;
> + buf += 4;
> + *pos += 4;
> + size -= 4;
> + }
> +
> + r = result;
> +out:
> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> +
> + return r;
> +}
> +
> +/**
> + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
> + *
> + * @f: open file handle
> + * @buf: User buffer to write data from
> + * @size: Number of bytes to write
> + * @pos: Offset to seek to
> + *
> + * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop
> + */
> +static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const
> char __user *buf,
> + size_t size, loff_t *pos)
> +{
> + struct amdgpu_device *adev = file_inode(f)->i_private;
> + ssize_t result = 0;
> + int r;
> +
> + if (size & 0x3 || *pos & 0x3)
> + return -EINVAL;
> +
> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> + if (r < 0) {
> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> + return r;
> + }
> +
> + while (size) {
> + u32 value;
> +
> + r = get_user(value, (uint32_t *)buf);
> + if (r)
> + goto out;
> +
> + amdgpu_set_gfx_off_residency(adev, value ? true : false);
> +
> + result += 4;
> + buf += 4;
> + *pos += 4;
> + size -= 4;
> + }
> +
> + r = result;
> +out:
> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> +
> + return r;
> +}
> +
> +
> +/**
> + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
> + *
> + * @f: open file handle
> + * @buf: User buffer to store read data in
> + * @size: Number of bytes to read
> + * @pos: Offset to seek to
> + */
> +static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char
> __user *buf,
> + size_t size, loff_t *pos)
> +{
> + struct amdgpu_device *adev = file_inode(f)->i_private;
> + ssize_t result = 0;
> + int r;
> +
> + if (size & 0x3 || *pos & 0x3)
> + return -EINVAL;
> +
> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> + if (r < 0) {
> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> + return r;
> + }
> +
> + while (size) {
> + u32 value;
> +
> + r = amdgpu_get_gfx_off_entrycount(adev, &value);
> + if (r)
> + goto out;
> +
> + r = put_user(value, (uint32_t *)buf);
> + if (r)
> + goto out;
> +
> + result += 4;
> + buf += 4;
> + *pos += 4;
> + size -= 4;
> + }
> +
> + r = result;
> +out:
> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> +
> + return r;
> +}
> +
> /**
> * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
> *
> @@ -1249,6 +1400,19 @@ static const struct file_operations
> amdgpu_debugfs_gfxoff_status_fops = {
> .llseek = default_llseek
> };
>
> +static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = {
> + .owner = THIS_MODULE,
> + .read = amdgpu_debugfs_gfxoff_count_read,
> + .llseek = default_llseek
> +};
> +
> +static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops
> = {
> + .owner = THIS_MODULE,
> + .read = amdgpu_debugfs_gfxoff_residency_read,
> + .write = amdgpu_debugfs_gfxoff_residency_write,
> + .llseek = default_llseek
> +};
> +
> static const struct file_operations *debugfs_regs[] = {
> &amdgpu_debugfs_regs_fops,
> &amdgpu_debugfs_regs2_fops,
> @@ -1261,6 +1425,8 @@ static const struct file_operations *debugfs_regs[]
> = {
> &amdgpu_debugfs_gpr_fops,
> &amdgpu_debugfs_gfxoff_fops,
> &amdgpu_debugfs_gfxoff_status_fops,
> + &amdgpu_debugfs_gfxoff_count_fops,
> + &amdgpu_debugfs_gfxoff_residency_fops,
> };
>
> static const char *debugfs_regs_names[] = {
> @@ -1275,6 +1441,8 @@ static const char *debugfs_regs_names[] = {
> "amdgpu_gpr",
> "amdgpu_gfxoff",
> "amdgpu_gfxoff_status",
> + "amdgpu_gfxoff_count",
> + "amdgpu_gfxoff_residency",
> };
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index b79ee4ffb879..15a95bc2c211 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3576,6 +3576,8 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
> INIT_WORK(&adev->xgmi_reset_work,
> amdgpu_device_xgmi_reset_func);
>
> adev->gfx.gfx_off_req_count = 1;
> + adev->gfx.gfx_off_residency = 0;
> + adev->gfx.gfx_off_entrycount = 0;
> adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>
> atomic_set(&adev->throttling_logging_enabled, 1);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 222d3d7ea076..3675c1b899db 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device
> *adev, bool enable)
> mutex_unlock(&adev->gfx.gfx_off_mutex);
> }
>
> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
> value)
> +{
> + int r = 0;
> +
> + mutex_lock(&adev->gfx.gfx_off_mutex);
> +
> + r = amdgpu_dpm_set_residency_gfxoff(adev, value);
> +
> + mutex_unlock(&adev->gfx.gfx_off_mutex);
> +
> + return r;
> +}
> +
> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
> *value)
> +{
> + int r = 0;
> +
> + mutex_lock(&adev->gfx.gfx_off_mutex);
> +
> + r = amdgpu_dpm_get_residency_gfxoff(adev, value);
> +
> + mutex_unlock(&adev->gfx.gfx_off_mutex);
> +
> + return r;
> +}
> +
> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
> *value)
> +{
> + int r = 0;
> +
> + mutex_lock(&adev->gfx.gfx_off_mutex);
> +
> + r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
> +
> + mutex_unlock(&adev->gfx.gfx_off_mutex);
> +
> + return r;
> +}
> +
> int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
> *value)
> {
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 23a696d38390..f06e979e2565 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -336,6 +336,8 @@ struct amdgpu_gfx {
> struct mutex gfx_off_mutex;
> uint32_t gfx_off_req_count; /* default 1, enable gfx off:
> dec 1, disable gfx off: add 1 */
> struct delayed_work gfx_off_delay_work;
> + uint32_t gfx_off_residency;
> + uint32_t gfx_off_entrycount;
>
> /* pipe reservation */
> struct mutex pipe_reserve_mutex;
> @@ -407,6 +409,10 @@ bool amdgpu_gfx_is_me_queue_enabled(struct
> amdgpu_device *adev, int me,
> void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
> int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
> *value);
> int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct
> ras_common_if *ras_block);
> +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
> *value);
> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
> *residency);
> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
> value);
> int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
> void *err_data,
> struct amdgpu_iv_entry *entry);
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> index 956b6ce81c84..df87d0768fd7 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> @@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct
> amdgpu_device *adev,
> return ret;
> }
>
> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool
> value)
> +{
> + struct smu_context *smu = adev->powerplay.pp_handle;
> + int ret = 0;
> +
> + if (!is_support_sw_smu(adev))
> + return -EOPNOTSUPP;
> +
> + mutex_lock(&adev->pm.mutex);
> + ret = smu_set_residency_gfxoff(smu, value);
> + mutex_unlock(&adev->pm.mutex);
> +
> + return ret;
> +}
> +
> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32
> *value)
> +{
> + struct smu_context *smu = adev->powerplay.pp_handle;
> + int ret = 0;
> +
> + if (!is_support_sw_smu(adev))
> + return -EOPNOTSUPP;
> +
> + mutex_lock(&adev->pm.mutex);
> + ret = smu_get_residency_gfxoff(smu, value);
> + mutex_unlock(&adev->pm.mutex);
> +
> + return ret;
> +}
> +
> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev,
> u32 *value)
> +{
> + struct smu_context *smu = adev->powerplay.pp_handle;
> + int ret = 0;
> +
> + if (!is_support_sw_smu(adev))
> + return -EOPNOTSUPP;
> +
> + mutex_lock(&adev->pm.mutex);
> + ret = smu_get_entrycount_gfxoff(smu, value);
> + mutex_unlock(&adev->pm.mutex);
> +
> + return ret;
> +}
> +
> int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
> *value)
> {
> struct smu_context *smu = adev->powerplay.pp_handle;
> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> index 65624d091ed2..83a83e93037c 100644
> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> @@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct
> amdgpu_device *adev,
> int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev);
> int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum
> smu_event_type event,
> uint64_t event_arg);
> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32
> *value);
> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool
> value);
> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev,
> u32 *value);
> int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
> *value);
> uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct
> amdgpu_device *adev);
> void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index fd79b213fab4..cfc3b9d749bf 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void *handle,
> return smu_set_pp_feature_mask(smu, new_mask);
> }
>
> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value)
> +{
> + if (!smu->ppt_funcs->set_gfx_off_residency)
> + return -EINVAL;
> +
> + return smu_set_gfx_off_residency(smu, value);
> +}
> +
> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value)
> +{
> + if (!smu->ppt_funcs->get_gfx_off_residency)
> + return -EINVAL;
> +
> + return smu_get_gfx_off_residency(smu, value);
> +}
> +
> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value)
> +{
> + if (!smu->ppt_funcs->get_gfx_off_entrycount)
> + return -EINVAL;
> +
> + return smu_get_gfx_off_entrycount(smu, value);
> +}
> +
> int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
> {
> if (!smu->ppt_funcs->get_gfx_off_status)
> @@ -1573,7 +1597,7 @@ static int smu_suspend(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> struct smu_context *smu = adev->powerplay.pp_handle;
> - int ret;
> + int ret, count;
>
> if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
> return 0;
> @@ -1591,6 +1615,14 @@ static int smu_suspend(void *handle)
>
> smu_set_gfx_cgpg(smu, false);
>
> + /*
> + * pwfw resets entrycount when device is suspended, so we save
> the
> + * last value to be used when we resume to keep it consistent
> + */
> + ret = smu_get_entrycount_gfxoff(smu, &count);
> + if (!ret)
> + adev->gfx.gfx_off_entrycount = count;
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> index b81c657c7386..9827075b768e 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> @@ -1111,6 +1111,22 @@ struct pptable_funcs {
> */
> uint32_t (*get_gfx_off_status)(struct smu_context *smu);
>
> + /**
> + * @gfx_off_entrycount: total GFXOFF entry count at the time of
> + * query since system power-up
> + */
> + u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint32_t
> *entrycount);
> +
> + /**
> + * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging
> + */
> + u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start);
> +
> + /**
> + * @get_gfx_off_residency: Average GFXOFF residency % during the
> logging interval
> + */
> + u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t
> *residency);
> +
> /**
> * @register_irq_handler: Register interupt request handlers.
> */
> @@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu);
>
> int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
>
> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value);
> +
> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
> +
> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value);
> +
> int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value);
>
> int smu_handle_passthrough_sbr(struct smu_context *smu, bool enable);
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> index 7469bbfce1fb..ceb13c838067 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> @@ -47,6 +47,9 @@
> #define smu_notify_memory_pool_location(smu)
> smu_ppt_funcs(notify_memory_pool_location, 0, smu)
> #define smu_gfx_off_control(smu, enable)
> smu_ppt_funcs(gfx_off_control, 0, smu, enable)
> #define smu_get_gfx_off_status(smu)
> smu_ppt_funcs(get_gfx_off_status, 0, smu)
> +#define smu_get_gfx_off_entrycount(smu, value)
> smu_ppt_funcs(get_gfx_off_entrycount, 0, smu,
> value)
> +#define smu_get_gfx_off_residency(smu, value)
> smu_ppt_funcs(get_gfx_off_residency, 0, smu,
> value)
> +#define smu_set_gfx_off_residency(smu, value)
> smu_ppt_funcs(set_gfx_off_residency, 0, smu,
> value)
> #define smu_set_last_dcef_min_deep_sleep_clk(smu)
> smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu)
> #define smu_system_features_control(smu, en)
> smu_ppt_funcs(system_features_control, 0, smu, en)
> #define smu_init_max_sustainable_clocks(smu)
> smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)
> --
> 2.37.1

2022-07-25 13:15:09

by André Almeida

[permalink] [raw]
Subject: Re: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs

Às 07:27 de 25/07/22, Quan, Evan escreveu:
> [AMD Official Use Only - General]
>
> Using "uint64_t" instead of "uint32_t" for entry counter may be better.
>

Indeed, it's a good idea. I'll send a v2 with that change, thanks.

> BR
> Evan
>> -----Original Message-----
>> From: amd-gfx <[email protected]> On Behalf Of
>> André Almeida
>> Sent: Saturday, July 23, 2022 4:34 AM
>> To: Deucher, Alexander <[email protected]>; Koenig, Christian
>> <[email protected]>; Pan, Xinhui <[email protected]>; David
>> Airlie <[email protected]>; Daniel Vetter <[email protected]>; Zhang, Hawking
>> <[email protected]>; Zhou1, Tao <[email protected]>; Kuehling,
>> Felix <[email protected]>; Xiao, Jack <[email protected]>; amd-
>> [email protected]; [email protected]; linux-
>> [email protected]; StDenis, Tom <[email protected]>; Siqueira,
>> Rodrigo <[email protected]>
>> Cc: André Almeida <[email protected]>; [email protected]
>> Subject: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
>>
>> Add debugfs interface to log GFXOFF statistics:
>>
>> - Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the
>> time of query since system power-up
>>
>> - Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop.
>> Read it to get average GFXOFF residency % multiplied by 100
>> during the last logging interval.
>>
>> Both features are designed to be keep the values persistent between
>> suspends.
>>
>> Signed-off-by: André Almeida <[email protected]>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 168
>> ++++++++++++++++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 ++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +
>> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 45 +++++
>> drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 +
>> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 34 +++-
>> drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 22 +++
>> drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 3 +
>> 9 files changed, 321 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> index e2eec985adb3..edf90a9ba980 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> @@ -1042,6 +1042,157 @@ static ssize_t amdgpu_debugfs_gpr_read(struct
>> file *f, char __user *buf,
>> return r;
>> }
>>
>> +/**
>> + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
>> + *
>> + * @f: open file handle
>> + * @buf: User buffer to store read data in
>> + * @size: Number of bytes to read
>> + * @pos: Offset to seek to
>> + *
>> + * Read the last residency value logged. It doesn't auto update, one needs
>> to
>> + * stop logging before getting the current value.
>> + */
>> +static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char
>> __user *buf,
>> + size_t size, loff_t *pos)
>> +{
>> + struct amdgpu_device *adev = file_inode(f)->i_private;
>> + ssize_t result = 0;
>> + int r;
>> +
>> + if (size & 0x3 || *pos & 0x3)
>> + return -EINVAL;
>> +
>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> + if (r < 0) {
>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> + return r;
>> + }
>> +
>> + while (size) {
>> + uint32_t value;
>> +
>> + r = amdgpu_get_gfx_off_residency(adev, &value);
>> + if (r)
>> + goto out;
>> +
>> + r = put_user(value, (uint32_t *)buf);
>> + if (r)
>> + goto out;
>> +
>> + result += 4;
>> + buf += 4;
>> + *pos += 4;
>> + size -= 4;
>> + }
>> +
>> + r = result;
>> +out:
>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> +
>> + return r;
>> +}
>> +
>> +/**
>> + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
>> + *
>> + * @f: open file handle
>> + * @buf: User buffer to write data from
>> + * @size: Number of bytes to write
>> + * @pos: Offset to seek to
>> + *
>> + * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop
>> + */
>> +static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const
>> char __user *buf,
>> + size_t size, loff_t *pos)
>> +{
>> + struct amdgpu_device *adev = file_inode(f)->i_private;
>> + ssize_t result = 0;
>> + int r;
>> +
>> + if (size & 0x3 || *pos & 0x3)
>> + return -EINVAL;
>> +
>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> + if (r < 0) {
>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> + return r;
>> + }
>> +
>> + while (size) {
>> + u32 value;
>> +
>> + r = get_user(value, (uint32_t *)buf);
>> + if (r)
>> + goto out;
>> +
>> + amdgpu_set_gfx_off_residency(adev, value ? true : false);
>> +
>> + result += 4;
>> + buf += 4;
>> + *pos += 4;
>> + size -= 4;
>> + }
>> +
>> + r = result;
>> +out:
>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> +
>> + return r;
>> +}
>> +
>> +
>> +/**
>> + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
>> + *
>> + * @f: open file handle
>> + * @buf: User buffer to store read data in
>> + * @size: Number of bytes to read
>> + * @pos: Offset to seek to
>> + */
>> +static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char
>> __user *buf,
>> + size_t size, loff_t *pos)
>> +{
>> + struct amdgpu_device *adev = file_inode(f)->i_private;
>> + ssize_t result = 0;
>> + int r;
>> +
>> + if (size & 0x3 || *pos & 0x3)
>> + return -EINVAL;
>> +
>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> + if (r < 0) {
>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> + return r;
>> + }
>> +
>> + while (size) {
>> + u32 value;
>> +
>> + r = amdgpu_get_gfx_off_entrycount(adev, &value);
>> + if (r)
>> + goto out;
>> +
>> + r = put_user(value, (uint32_t *)buf);
>> + if (r)
>> + goto out;
>> +
>> + result += 4;
>> + buf += 4;
>> + *pos += 4;
>> + size -= 4;
>> + }
>> +
>> + r = result;
>> +out:
>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> +
>> + return r;
>> +}
>> +
>> /**
>> * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
>> *
>> @@ -1249,6 +1400,19 @@ static const struct file_operations
>> amdgpu_debugfs_gfxoff_status_fops = {
>> .llseek = default_llseek
>> };
>>
>> +static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = {
>> + .owner = THIS_MODULE,
>> + .read = amdgpu_debugfs_gfxoff_count_read,
>> + .llseek = default_llseek
>> +};
>> +
>> +static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops
>> = {
>> + .owner = THIS_MODULE,
>> + .read = amdgpu_debugfs_gfxoff_residency_read,
>> + .write = amdgpu_debugfs_gfxoff_residency_write,
>> + .llseek = default_llseek
>> +};
>> +
>> static const struct file_operations *debugfs_regs[] = {
>> &amdgpu_debugfs_regs_fops,
>> &amdgpu_debugfs_regs2_fops,
>> @@ -1261,6 +1425,8 @@ static const struct file_operations *debugfs_regs[]
>> = {
>> &amdgpu_debugfs_gpr_fops,
>> &amdgpu_debugfs_gfxoff_fops,
>> &amdgpu_debugfs_gfxoff_status_fops,
>> + &amdgpu_debugfs_gfxoff_count_fops,
>> + &amdgpu_debugfs_gfxoff_residency_fops,
>> };
>>
>> static const char *debugfs_regs_names[] = {
>> @@ -1275,6 +1441,8 @@ static const char *debugfs_regs_names[] = {
>> "amdgpu_gpr",
>> "amdgpu_gfxoff",
>> "amdgpu_gfxoff_status",
>> + "amdgpu_gfxoff_count",
>> + "amdgpu_gfxoff_residency",
>> };
>>
>> /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index b79ee4ffb879..15a95bc2c211 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -3576,6 +3576,8 @@ int amdgpu_device_init(struct amdgpu_device
>> *adev,
>> INIT_WORK(&adev->xgmi_reset_work,
>> amdgpu_device_xgmi_reset_func);
>>
>> adev->gfx.gfx_off_req_count = 1;
>> + adev->gfx.gfx_off_residency = 0;
>> + adev->gfx.gfx_off_entrycount = 0;
>> adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>
>> atomic_set(&adev->throttling_logging_enabled, 1);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index 222d3d7ea076..3675c1b899db 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device
>> *adev, bool enable)
>> mutex_unlock(&adev->gfx.gfx_off_mutex);
>> }
>>
>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
>> value)
>> +{
>> + int r = 0;
>> +
>> + mutex_lock(&adev->gfx.gfx_off_mutex);
>> +
>> + r = amdgpu_dpm_set_residency_gfxoff(adev, value);
>> +
>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
>> +
>> + return r;
>> +}
>> +
>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
>> *value)
>> +{
>> + int r = 0;
>> +
>> + mutex_lock(&adev->gfx.gfx_off_mutex);
>> +
>> + r = amdgpu_dpm_get_residency_gfxoff(adev, value);
>> +
>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
>> +
>> + return r;
>> +}
>> +
>> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
>> *value)
>> +{
>> + int r = 0;
>> +
>> + mutex_lock(&adev->gfx.gfx_off_mutex);
>> +
>> + r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
>> +
>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
>> +
>> + return r;
>> +}
>> +
>> int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
>> *value)
>> {
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> index 23a696d38390..f06e979e2565 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> @@ -336,6 +336,8 @@ struct amdgpu_gfx {
>> struct mutex gfx_off_mutex;
>> uint32_t gfx_off_req_count; /* default 1, enable gfx off:
>> dec 1, disable gfx off: add 1 */
>> struct delayed_work gfx_off_delay_work;
>> + uint32_t gfx_off_residency;
>> + uint32_t gfx_off_entrycount;
>>
>> /* pipe reservation */
>> struct mutex pipe_reserve_mutex;
>> @@ -407,6 +409,10 @@ bool amdgpu_gfx_is_me_queue_enabled(struct
>> amdgpu_device *adev, int me,
>> void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
>> int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
>> *value);
>> int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct
>> ras_common_if *ras_block);
>> +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
>> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
>> *value);
>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
>> *residency);
>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
>> value);
>> int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
>> void *err_data,
>> struct amdgpu_iv_entry *entry);
>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>> index 956b6ce81c84..df87d0768fd7 100644
>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>> @@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct
>> amdgpu_device *adev,
>> return ret;
>> }
>>
>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool
>> value)
>> +{
>> + struct smu_context *smu = adev->powerplay.pp_handle;
>> + int ret = 0;
>> +
>> + if (!is_support_sw_smu(adev))
>> + return -EOPNOTSUPP;
>> +
>> + mutex_lock(&adev->pm.mutex);
>> + ret = smu_set_residency_gfxoff(smu, value);
>> + mutex_unlock(&adev->pm.mutex);
>> +
>> + return ret;
>> +}
>> +
>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32
>> *value)
>> +{
>> + struct smu_context *smu = adev->powerplay.pp_handle;
>> + int ret = 0;
>> +
>> + if (!is_support_sw_smu(adev))
>> + return -EOPNOTSUPP;
>> +
>> + mutex_lock(&adev->pm.mutex);
>> + ret = smu_get_residency_gfxoff(smu, value);
>> + mutex_unlock(&adev->pm.mutex);
>> +
>> + return ret;
>> +}
>> +
>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev,
>> u32 *value)
>> +{
>> + struct smu_context *smu = adev->powerplay.pp_handle;
>> + int ret = 0;
>> +
>> + if (!is_support_sw_smu(adev))
>> + return -EOPNOTSUPP;
>> +
>> + mutex_lock(&adev->pm.mutex);
>> + ret = smu_get_entrycount_gfxoff(smu, value);
>> + mutex_unlock(&adev->pm.mutex);
>> +
>> + return ret;
>> +}
>> +
>> int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
>> *value)
>> {
>> struct smu_context *smu = adev->powerplay.pp_handle;
>> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>> index 65624d091ed2..83a83e93037c 100644
>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>> @@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct
>> amdgpu_device *adev,
>> int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev);
>> int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum
>> smu_event_type event,
>> uint64_t event_arg);
>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32
>> *value);
>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool
>> value);
>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev,
>> u32 *value);
>> int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
>> *value);
>> uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct
>> amdgpu_device *adev);
>> void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>> index fd79b213fab4..cfc3b9d749bf 100644
>> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>> @@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void *handle,
>> return smu_set_pp_feature_mask(smu, new_mask);
>> }
>>
>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value)
>> +{
>> + if (!smu->ppt_funcs->set_gfx_off_residency)
>> + return -EINVAL;
>> +
>> + return smu_set_gfx_off_residency(smu, value);
>> +}
>> +
>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value)
>> +{
>> + if (!smu->ppt_funcs->get_gfx_off_residency)
>> + return -EINVAL;
>> +
>> + return smu_get_gfx_off_residency(smu, value);
>> +}
>> +
>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value)
>> +{
>> + if (!smu->ppt_funcs->get_gfx_off_entrycount)
>> + return -EINVAL;
>> +
>> + return smu_get_gfx_off_entrycount(smu, value);
>> +}
>> +
>> int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
>> {
>> if (!smu->ppt_funcs->get_gfx_off_status)
>> @@ -1573,7 +1597,7 @@ static int smu_suspend(void *handle)
>> {
>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>> struct smu_context *smu = adev->powerplay.pp_handle;
>> - int ret;
>> + int ret, count;
>>
>> if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
>> return 0;
>> @@ -1591,6 +1615,14 @@ static int smu_suspend(void *handle)
>>
>> smu_set_gfx_cgpg(smu, false);
>>
>> + /*
>> + * pwfw resets entrycount when device is suspended, so we save
>> the
>> + * last value to be used when we resume to keep it consistent
>> + */
>> + ret = smu_get_entrycount_gfxoff(smu, &count);
>> + if (!ret)
>> + adev->gfx.gfx_off_entrycount = count;
>> +
>> return 0;
>> }
>>
>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>> index b81c657c7386..9827075b768e 100644
>> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>> @@ -1111,6 +1111,22 @@ struct pptable_funcs {
>> */
>> uint32_t (*get_gfx_off_status)(struct smu_context *smu);
>>
>> + /**
>> + * @gfx_off_entrycount: total GFXOFF entry count at the time of
>> + * query since system power-up
>> + */
>> + u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint32_t
>> *entrycount);
>> +
>> + /**
>> + * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging
>> + */
>> + u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start);
>> +
>> + /**
>> + * @get_gfx_off_residency: Average GFXOFF residency % during the
>> logging interval
>> + */
>> + u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t
>> *residency);
>> +
>> /**
>> * @register_irq_handler: Register interupt request handlers.
>> */
>> @@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu);
>>
>> int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
>>
>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value);
>> +
>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
>> +
>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value);
>> +
>> int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value);
>>
>> int smu_handle_passthrough_sbr(struct smu_context *smu, bool enable);
>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>> b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>> index 7469bbfce1fb..ceb13c838067 100644
>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>> @@ -47,6 +47,9 @@
>> #define smu_notify_memory_pool_location(smu)
>> smu_ppt_funcs(notify_memory_pool_location, 0, smu)
>> #define smu_gfx_off_control(smu, enable)
>> smu_ppt_funcs(gfx_off_control, 0, smu, enable)
>> #define smu_get_gfx_off_status(smu)
>> smu_ppt_funcs(get_gfx_off_status, 0, smu)
>> +#define smu_get_gfx_off_entrycount(smu, value)
>> smu_ppt_funcs(get_gfx_off_entrycount, 0, smu,
>> value)
>> +#define smu_get_gfx_off_residency(smu, value)
>> smu_ppt_funcs(get_gfx_off_residency, 0, smu,
>> value)
>> +#define smu_set_gfx_off_residency(smu, value)
>> smu_ppt_funcs(set_gfx_off_residency, 0, smu,
>> value)
>> #define smu_set_last_dcef_min_deep_sleep_clk(smu)
>> smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu)
>> #define smu_system_features_control(smu, en)
>> smu_ppt_funcs(system_features_control, 0, smu, en)
>> #define smu_init_max_sustainable_clocks(smu)
>> smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)
>> --
>> 2.37.1

2022-07-25 16:49:36

by André Almeida

[permalink] [raw]
Subject: Re: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs

Às 10:04 de 25/07/22, André Almeida escreveu:
> Às 07:27 de 25/07/22, Quan, Evan escreveu:
>> [AMD Official Use Only - General]
>>
>> Using "uint64_t" instead of "uint32_t" for entry counter may be better.
>>
>
> Indeed, it's a good idea. I'll send a v2 with that change, thanks.
>

However, SMU messaging reads a 32bit register to get the entrycount from
the pwfw, so would keep with with the risk of overflow anyway right?

>> BR
>> Evan
>>> -----Original Message-----
>>> From: amd-gfx <[email protected]> On Behalf Of
>>> André Almeida
>>> Sent: Saturday, July 23, 2022 4:34 AM
>>> To: Deucher, Alexander <[email protected]>; Koenig, Christian
>>> <[email protected]>; Pan, Xinhui <[email protected]>; David
>>> Airlie <[email protected]>; Daniel Vetter <[email protected]>; Zhang, Hawking
>>> <[email protected]>; Zhou1, Tao <[email protected]>; Kuehling,
>>> Felix <[email protected]>; Xiao, Jack <[email protected]>; amd-
>>> [email protected]; [email protected]; linux-
>>> [email protected]; StDenis, Tom <[email protected]>; Siqueira,
>>> Rodrigo <[email protected]>
>>> Cc: André Almeida <[email protected]>; [email protected]
>>> Subject: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
>>>
>>> Add debugfs interface to log GFXOFF statistics:
>>>
>>> - Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the
>>> time of query since system power-up
>>>
>>> - Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop.
>>> Read it to get average GFXOFF residency % multiplied by 100
>>> during the last logging interval.
>>>
>>> Both features are designed to be keep the values persistent between
>>> suspends.
>>>
>>> Signed-off-by: André Almeida <[email protected]>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 168
>>> ++++++++++++++++++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 ++++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +
>>> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 45 +++++
>>> drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 +
>>> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 34 +++-
>>> drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 22 +++
>>> drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 3 +
>>> 9 files changed, 321 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>> index e2eec985adb3..edf90a9ba980 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>> @@ -1042,6 +1042,157 @@ static ssize_t amdgpu_debugfs_gpr_read(struct
>>> file *f, char __user *buf,
>>> return r;
>>> }
>>>
>>> +/**
>>> + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
>>> + *
>>> + * @f: open file handle
>>> + * @buf: User buffer to store read data in
>>> + * @size: Number of bytes to read
>>> + * @pos: Offset to seek to
>>> + *
>>> + * Read the last residency value logged. It doesn't auto update, one needs
>>> to
>>> + * stop logging before getting the current value.
>>> + */
>>> +static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char
>>> __user *buf,
>>> + size_t size, loff_t *pos)
>>> +{
>>> + struct amdgpu_device *adev = file_inode(f)->i_private;
>>> + ssize_t result = 0;
>>> + int r;
>>> +
>>> + if (size & 0x3 || *pos & 0x3)
>>> + return -EINVAL;
>>> +
>>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>> + if (r < 0) {
>>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>> + return r;
>>> + }
>>> +
>>> + while (size) {
>>> + uint32_t value;
>>> +
>>> + r = amdgpu_get_gfx_off_residency(adev, &value);
>>> + if (r)
>>> + goto out;
>>> +
>>> + r = put_user(value, (uint32_t *)buf);
>>> + if (r)
>>> + goto out;
>>> +
>>> + result += 4;
>>> + buf += 4;
>>> + *pos += 4;
>>> + size -= 4;
>>> + }
>>> +
>>> + r = result;
>>> +out:
>>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>> +
>>> + return r;
>>> +}
>>> +
>>> +/**
>>> + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
>>> + *
>>> + * @f: open file handle
>>> + * @buf: User buffer to write data from
>>> + * @size: Number of bytes to write
>>> + * @pos: Offset to seek to
>>> + *
>>> + * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop
>>> + */
>>> +static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const
>>> char __user *buf,
>>> + size_t size, loff_t *pos)
>>> +{
>>> + struct amdgpu_device *adev = file_inode(f)->i_private;
>>> + ssize_t result = 0;
>>> + int r;
>>> +
>>> + if (size & 0x3 || *pos & 0x3)
>>> + return -EINVAL;
>>> +
>>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>> + if (r < 0) {
>>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>> + return r;
>>> + }
>>> +
>>> + while (size) {
>>> + u32 value;
>>> +
>>> + r = get_user(value, (uint32_t *)buf);
>>> + if (r)
>>> + goto out;
>>> +
>>> + amdgpu_set_gfx_off_residency(adev, value ? true : false);
>>> +
>>> + result += 4;
>>> + buf += 4;
>>> + *pos += 4;
>>> + size -= 4;
>>> + }
>>> +
>>> + r = result;
>>> +out:
>>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>> +
>>> + return r;
>>> +}
>>> +
>>> +
>>> +/**
>>> + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
>>> + *
>>> + * @f: open file handle
>>> + * @buf: User buffer to store read data in
>>> + * @size: Number of bytes to read
>>> + * @pos: Offset to seek to
>>> + */
>>> +static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char
>>> __user *buf,
>>> + size_t size, loff_t *pos)
>>> +{
>>> + struct amdgpu_device *adev = file_inode(f)->i_private;
>>> + ssize_t result = 0;
>>> + int r;
>>> +
>>> + if (size & 0x3 || *pos & 0x3)
>>> + return -EINVAL;
>>> +
>>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>> + if (r < 0) {
>>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>> + return r;
>>> + }
>>> +
>>> + while (size) {
>>> + u32 value;
>>> +
>>> + r = amdgpu_get_gfx_off_entrycount(adev, &value);
>>> + if (r)
>>> + goto out;
>>> +
>>> + r = put_user(value, (uint32_t *)buf);
>>> + if (r)
>>> + goto out;
>>> +
>>> + result += 4;
>>> + buf += 4;
>>> + *pos += 4;
>>> + size -= 4;
>>> + }
>>> +
>>> + r = result;
>>> +out:
>>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>> +
>>> + return r;
>>> +}
>>> +
>>> /**
>>> * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
>>> *
>>> @@ -1249,6 +1400,19 @@ static const struct file_operations
>>> amdgpu_debugfs_gfxoff_status_fops = {
>>> .llseek = default_llseek
>>> };
>>>
>>> +static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = {
>>> + .owner = THIS_MODULE,
>>> + .read = amdgpu_debugfs_gfxoff_count_read,
>>> + .llseek = default_llseek
>>> +};
>>> +
>>> +static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops
>>> = {
>>> + .owner = THIS_MODULE,
>>> + .read = amdgpu_debugfs_gfxoff_residency_read,
>>> + .write = amdgpu_debugfs_gfxoff_residency_write,
>>> + .llseek = default_llseek
>>> +};
>>> +
>>> static const struct file_operations *debugfs_regs[] = {
>>> &amdgpu_debugfs_regs_fops,
>>> &amdgpu_debugfs_regs2_fops,
>>> @@ -1261,6 +1425,8 @@ static const struct file_operations *debugfs_regs[]
>>> = {
>>> &amdgpu_debugfs_gpr_fops,
>>> &amdgpu_debugfs_gfxoff_fops,
>>> &amdgpu_debugfs_gfxoff_status_fops,
>>> + &amdgpu_debugfs_gfxoff_count_fops,
>>> + &amdgpu_debugfs_gfxoff_residency_fops,
>>> };
>>>
>>> static const char *debugfs_regs_names[] = {
>>> @@ -1275,6 +1441,8 @@ static const char *debugfs_regs_names[] = {
>>> "amdgpu_gpr",
>>> "amdgpu_gfxoff",
>>> "amdgpu_gfxoff_status",
>>> + "amdgpu_gfxoff_count",
>>> + "amdgpu_gfxoff_residency",
>>> };
>>>
>>> /**
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index b79ee4ffb879..15a95bc2c211 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -3576,6 +3576,8 @@ int amdgpu_device_init(struct amdgpu_device
>>> *adev,
>>> INIT_WORK(&adev->xgmi_reset_work,
>>> amdgpu_device_xgmi_reset_func);
>>>
>>> adev->gfx.gfx_off_req_count = 1;
>>> + adev->gfx.gfx_off_residency = 0;
>>> + adev->gfx.gfx_off_entrycount = 0;
>>> adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>>
>>> atomic_set(&adev->throttling_logging_enabled, 1);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> index 222d3d7ea076..3675c1b899db 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> @@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device
>>> *adev, bool enable)
>>> mutex_unlock(&adev->gfx.gfx_off_mutex);
>>> }
>>>
>>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
>>> value)
>>> +{
>>> + int r = 0;
>>> +
>>> + mutex_lock(&adev->gfx.gfx_off_mutex);
>>> +
>>> + r = amdgpu_dpm_set_residency_gfxoff(adev, value);
>>> +
>>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
>>> +
>>> + return r;
>>> +}
>>> +
>>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
>>> *value)
>>> +{
>>> + int r = 0;
>>> +
>>> + mutex_lock(&adev->gfx.gfx_off_mutex);
>>> +
>>> + r = amdgpu_dpm_get_residency_gfxoff(adev, value);
>>> +
>>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
>>> +
>>> + return r;
>>> +}
>>> +
>>> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
>>> *value)
>>> +{
>>> + int r = 0;
>>> +
>>> + mutex_lock(&adev->gfx.gfx_off_mutex);
>>> +
>>> + r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
>>> +
>>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
>>> +
>>> + return r;
>>> +}
>>> +
>>> int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
>>> *value)
>>> {
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> index 23a696d38390..f06e979e2565 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> @@ -336,6 +336,8 @@ struct amdgpu_gfx {
>>> struct mutex gfx_off_mutex;
>>> uint32_t gfx_off_req_count; /* default 1, enable gfx off:
>>> dec 1, disable gfx off: add 1 */
>>> struct delayed_work gfx_off_delay_work;
>>> + uint32_t gfx_off_residency;
>>> + uint32_t gfx_off_entrycount;
>>>
>>> /* pipe reservation */
>>> struct mutex pipe_reserve_mutex;
>>> @@ -407,6 +409,10 @@ bool amdgpu_gfx_is_me_queue_enabled(struct
>>> amdgpu_device *adev, int me,
>>> void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
>>> int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
>>> *value);
>>> int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct
>>> ras_common_if *ras_block);
>>> +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
>>> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
>>> *value);
>>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
>>> *residency);
>>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
>>> value);
>>> int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
>>> void *err_data,
>>> struct amdgpu_iv_entry *entry);
>>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> index 956b6ce81c84..df87d0768fd7 100644
>>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> @@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct
>>> amdgpu_device *adev,
>>> return ret;
>>> }
>>>
>>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool
>>> value)
>>> +{
>>> + struct smu_context *smu = adev->powerplay.pp_handle;
>>> + int ret = 0;
>>> +
>>> + if (!is_support_sw_smu(adev))
>>> + return -EOPNOTSUPP;
>>> +
>>> + mutex_lock(&adev->pm.mutex);
>>> + ret = smu_set_residency_gfxoff(smu, value);
>>> + mutex_unlock(&adev->pm.mutex);
>>> +
>>> + return ret;
>>> +}
>>> +
>>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32
>>> *value)
>>> +{
>>> + struct smu_context *smu = adev->powerplay.pp_handle;
>>> + int ret = 0;
>>> +
>>> + if (!is_support_sw_smu(adev))
>>> + return -EOPNOTSUPP;
>>> +
>>> + mutex_lock(&adev->pm.mutex);
>>> + ret = smu_get_residency_gfxoff(smu, value);
>>> + mutex_unlock(&adev->pm.mutex);
>>> +
>>> + return ret;
>>> +}
>>> +
>>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev,
>>> u32 *value)
>>> +{
>>> + struct smu_context *smu = adev->powerplay.pp_handle;
>>> + int ret = 0;
>>> +
>>> + if (!is_support_sw_smu(adev))
>>> + return -EOPNOTSUPP;
>>> +
>>> + mutex_lock(&adev->pm.mutex);
>>> + ret = smu_get_entrycount_gfxoff(smu, value);
>>> + mutex_unlock(&adev->pm.mutex);
>>> +
>>> + return ret;
>>> +}
>>> +
>>> int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
>>> *value)
>>> {
>>> struct smu_context *smu = adev->powerplay.pp_handle;
>>> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>> index 65624d091ed2..83a83e93037c 100644
>>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>> @@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct
>>> amdgpu_device *adev,
>>> int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev);
>>> int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum
>>> smu_event_type event,
>>> uint64_t event_arg);
>>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32
>>> *value);
>>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool
>>> value);
>>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev,
>>> u32 *value);
>>> int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
>>> *value);
>>> uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct
>>> amdgpu_device *adev);
>>> void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> index fd79b213fab4..cfc3b9d749bf 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> @@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void *handle,
>>> return smu_set_pp_feature_mask(smu, new_mask);
>>> }
>>>
>>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value)
>>> +{
>>> + if (!smu->ppt_funcs->set_gfx_off_residency)
>>> + return -EINVAL;
>>> +
>>> + return smu_set_gfx_off_residency(smu, value);
>>> +}
>>> +
>>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value)
>>> +{
>>> + if (!smu->ppt_funcs->get_gfx_off_residency)
>>> + return -EINVAL;
>>> +
>>> + return smu_get_gfx_off_residency(smu, value);
>>> +}
>>> +
>>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value)
>>> +{
>>> + if (!smu->ppt_funcs->get_gfx_off_entrycount)
>>> + return -EINVAL;
>>> +
>>> + return smu_get_gfx_off_entrycount(smu, value);
>>> +}
>>> +
>>> int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
>>> {
>>> if (!smu->ppt_funcs->get_gfx_off_status)
>>> @@ -1573,7 +1597,7 @@ static int smu_suspend(void *handle)
>>> {
>>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>> struct smu_context *smu = adev->powerplay.pp_handle;
>>> - int ret;
>>> + int ret, count;
>>>
>>> if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
>>> return 0;
>>> @@ -1591,6 +1615,14 @@ static int smu_suspend(void *handle)
>>>
>>> smu_set_gfx_cgpg(smu, false);
>>>
>>> + /*
>>> + * pwfw resets entrycount when device is suspended, so we save
>>> the
>>> + * last value to be used when we resume to keep it consistent
>>> + */
>>> + ret = smu_get_entrycount_gfxoff(smu, &count);
>>> + if (!ret)
>>> + adev->gfx.gfx_off_entrycount = count;
>>> +
>>> return 0;
>>> }
>>>
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> index b81c657c7386..9827075b768e 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> @@ -1111,6 +1111,22 @@ struct pptable_funcs {
>>> */
>>> uint32_t (*get_gfx_off_status)(struct smu_context *smu);
>>>
>>> + /**
>>> + * @gfx_off_entrycount: total GFXOFF entry count at the time of
>>> + * query since system power-up
>>> + */
>>> + u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint32_t
>>> *entrycount);
>>> +
>>> + /**
>>> + * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging
>>> + */
>>> + u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start);
>>> +
>>> + /**
>>> + * @get_gfx_off_residency: Average GFXOFF residency % during the
>>> logging interval
>>> + */
>>> + u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t
>>> *residency);
>>> +
>>> /**
>>> * @register_irq_handler: Register interupt request handlers.
>>> */
>>> @@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu);
>>>
>>> int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
>>>
>>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value);
>>> +
>>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
>>> +
>>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value);
>>> +
>>> int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value);
>>>
>>> int smu_handle_passthrough_sbr(struct smu_context *smu, bool enable);
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>>> b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>>> index 7469bbfce1fb..ceb13c838067 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>>> @@ -47,6 +47,9 @@
>>> #define smu_notify_memory_pool_location(smu)
>>> smu_ppt_funcs(notify_memory_pool_location, 0, smu)
>>> #define smu_gfx_off_control(smu, enable)
>>> smu_ppt_funcs(gfx_off_control, 0, smu, enable)
>>> #define smu_get_gfx_off_status(smu)
>>> smu_ppt_funcs(get_gfx_off_status, 0, smu)
>>> +#define smu_get_gfx_off_entrycount(smu, value)
>>> smu_ppt_funcs(get_gfx_off_entrycount, 0, smu,
>>> value)
>>> +#define smu_get_gfx_off_residency(smu, value)
>>> smu_ppt_funcs(get_gfx_off_residency, 0, smu,
>>> value)
>>> +#define smu_set_gfx_off_residency(smu, value)
>>> smu_ppt_funcs(set_gfx_off_residency, 0, smu,
>>> value)
>>> #define smu_set_last_dcef_min_deep_sleep_clk(smu)
>>> smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu)
>>> #define smu_system_features_control(smu, en)
>>> smu_ppt_funcs(system_features_control, 0, smu, en)
>>> #define smu_init_max_sustainable_clocks(smu)
>>> smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)
>>> --
>>> 2.37.1

2022-07-26 03:06:55

by Evan Quan

[permalink] [raw]
Subject: RE: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs

[AMD Official Use Only - General]



> -----Original Message-----
> From: André Almeida <[email protected]>
> Sent: Tuesday, July 26, 2022 12:15 AM
> To: Quan, Evan <[email protected]>; Deucher, Alexander
> <[email protected]>; Koenig, Christian
> <[email protected]>; Pan, Xinhui <[email protected]>; David
> Airlie <[email protected]>; Daniel Vetter <[email protected]>; Zhang, Hawking
> <[email protected]>; Zhou1, Tao <[email protected]>; Kuehling,
> Felix <[email protected]>; Xiao, Jack <[email protected]>; amd-
> [email protected]; [email protected]; linux-
> [email protected]; StDenis, Tom <[email protected]>; Siqueira,
> Rodrigo <[email protected]>
> Cc: [email protected]
> Subject: Re: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
>
> Às 10:04 de 25/07/22, André Almeida escreveu:
> > Às 07:27 de 25/07/22, Quan, Evan escreveu:
> >> [AMD Official Use Only - General]
> >>
> >> Using "uint64_t" instead of "uint32_t" for entry counter may be better.
> >>
> >
> > Indeed, it's a good idea. I'll send a v2 with that change, thanks.
> >
>
> However, SMU messaging reads a 32bit register to get the entrycount from
> the pwfw, so would keep with with the risk of overflow anyway right?
[Quan, Evan] Yes, that makes sense. Better to document that(the risk of overflow).
Anyway, the series seems fine to me.
Series is acked-by: Evan Quan <[email protected]>
>
> >> BR
> >> Evan
> >>> -----Original Message-----
> >>> From: amd-gfx <[email protected]> On Behalf Of
> >>> André Almeida
> >>> Sent: Saturday, July 23, 2022 4:34 AM
> >>> To: Deucher, Alexander <[email protected]>; Koenig,
> >>> Christian <[email protected]>; Pan, Xinhui
> >>> <[email protected]>; David Airlie <[email protected]>; Daniel Vetter
> >>> <[email protected]>; Zhang, Hawking <[email protected]>; Zhou1,
> >>> Tao <[email protected]>; Kuehling, Felix
> <[email protected]>;
> >>> Xiao, Jack <[email protected]>; amd- [email protected];
> >>> [email protected]; linux- [email protected];
> >>> StDenis, Tom <[email protected]>; Siqueira, Rodrigo
> >>> <[email protected]>
> >>> Cc: André Almeida <[email protected]>; [email protected]
> >>> Subject: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
> >>>
> >>> Add debugfs interface to log GFXOFF statistics:
> >>>
> >>> - Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the
> >>> time of query since system power-up
> >>>
> >>> - Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop.
> >>> Read it to get average GFXOFF residency % multiplied by 100
> >>> during the last logging interval.
> >>>
> >>> Both features are designed to be keep the values persistent between
> >>> suspends.
> >>>
> >>> Signed-off-by: André Almeida <[email protected]>
> >>> ---
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 168
> >>> ++++++++++++++++++
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 ++++
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +
> >>> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 45 +++++
> >>> drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 +
> >>> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 34 +++-
> >>> drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 22 +++
> >>> drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 3 +
> >>> 9 files changed, 321 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >>> index e2eec985adb3..edf90a9ba980 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >>> @@ -1042,6 +1042,157 @@ static ssize_t
> >>> amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
> >>> return r;
> >>> }
> >>>
> >>> +/**
> >>> + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
> >>> + *
> >>> + * @f: open file handle
> >>> + * @buf: User buffer to store read data in
> >>> + * @size: Number of bytes to read
> >>> + * @pos: Offset to seek to
> >>> + *
> >>> + * Read the last residency value logged. It doesn't auto update,
> >>> +one needs
> >>> to
> >>> + * stop logging before getting the current value.
> >>> + */
> >>> +static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f,
> >>> +char
> >>> __user *buf,
> >>> + size_t size, loff_t *pos) {
> >>> + struct amdgpu_device *adev = file_inode(f)->i_private;
> >>> + ssize_t result = 0;
> >>> + int r;
> >>> +
> >>> + if (size & 0x3 || *pos & 0x3)
> >>> + return -EINVAL;
> >>> +
> >>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >>> + if (r < 0) {
> >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >>> + return r;
> >>> + }
> >>> +
> >>> + while (size) {
> >>> + uint32_t value;
> >>> +
> >>> + r = amdgpu_get_gfx_off_residency(adev, &value);
> >>> + if (r)
> >>> + goto out;
> >>> +
> >>> + r = put_user(value, (uint32_t *)buf);
> >>> + if (r)
> >>> + goto out;
> >>> +
> >>> + result += 4;
> >>> + buf += 4;
> >>> + *pos += 4;
> >>> + size -= 4;
> >>> + }
> >>> +
> >>> + r = result;
> >>> +out:
> >>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >>> +
> >>> + return r;
> >>> +}
> >>> +
> >>> +/**
> >>> + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
> >>> + *
> >>> + * @f: open file handle
> >>> + * @buf: User buffer to write data from
> >>> + * @size: Number of bytes to write
> >>> + * @pos: Offset to seek to
> >>> + *
> >>> + * Write a 32-bit non-zero to start logging; write a 32-bit zero to
> >>> +stop */ static ssize_t
> >>> +amdgpu_debugfs_gfxoff_residency_write(struct file *f, const
> >>> char __user *buf,
> >>> + size_t size, loff_t *pos) {
> >>> + struct amdgpu_device *adev = file_inode(f)->i_private;
> >>> + ssize_t result = 0;
> >>> + int r;
> >>> +
> >>> + if (size & 0x3 || *pos & 0x3)
> >>> + return -EINVAL;
> >>> +
> >>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >>> + if (r < 0) {
> >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >>> + return r;
> >>> + }
> >>> +
> >>> + while (size) {
> >>> + u32 value;
> >>> +
> >>> + r = get_user(value, (uint32_t *)buf);
> >>> + if (r)
> >>> + goto out;
> >>> +
> >>> + amdgpu_set_gfx_off_residency(adev, value ? true : false);
> >>> +
> >>> + result += 4;
> >>> + buf += 4;
> >>> + *pos += 4;
> >>> + size -= 4;
> >>> + }
> >>> +
> >>> + r = result;
> >>> +out:
> >>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >>> +
> >>> + return r;
> >>> +}
> >>> +
> >>> +
> >>> +/**
> >>> + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
> >>> + *
> >>> + * @f: open file handle
> >>> + * @buf: User buffer to store read data in
> >>> + * @size: Number of bytes to read
> >>> + * @pos: Offset to seek to
> >>> + */
> >>> +static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f,
> >>> +char
> >>> __user *buf,
> >>> + size_t size, loff_t *pos)
> >>> +{
> >>> + struct amdgpu_device *adev = file_inode(f)->i_private;
> >>> + ssize_t result = 0;
> >>> + int r;
> >>> +
> >>> + if (size & 0x3 || *pos & 0x3)
> >>> + return -EINVAL;
> >>> +
> >>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >>> + if (r < 0) {
> >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >>> + return r;
> >>> + }
> >>> +
> >>> + while (size) {
> >>> + u32 value;
> >>> +
> >>> + r = amdgpu_get_gfx_off_entrycount(adev, &value);
> >>> + if (r)
> >>> + goto out;
> >>> +
> >>> + r = put_user(value, (uint32_t *)buf);
> >>> + if (r)
> >>> + goto out;
> >>> +
> >>> + result += 4;
> >>> + buf += 4;
> >>> + *pos += 4;
> >>> + size -= 4;
> >>> + }
> >>> +
> >>> + r = result;
> >>> +out:
> >>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >>> +
> >>> + return r;
> >>> +}
> >>> +
> >>> /**
> >>> * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
> >>> *
> >>> @@ -1249,6 +1400,19 @@ static const struct file_operations
> >>> amdgpu_debugfs_gfxoff_status_fops = {
> >>> .llseek = default_llseek
> >>> };
> >>>
> >>> +static const struct file_operations
> amdgpu_debugfs_gfxoff_count_fops = {
> >>> + .owner = THIS_MODULE,
> >>> + .read = amdgpu_debugfs_gfxoff_count_read,
> >>> + .llseek = default_llseek
> >>> +};
> >>> +
> >>> +static const struct file_operations
> >>> +amdgpu_debugfs_gfxoff_residency_fops
> >>> = {
> >>> + .owner = THIS_MODULE,
> >>> + .read = amdgpu_debugfs_gfxoff_residency_read,
> >>> + .write = amdgpu_debugfs_gfxoff_residency_write,
> >>> + .llseek = default_llseek
> >>> +};
> >>> +
> >>> static const struct file_operations *debugfs_regs[] = {
> >>> &amdgpu_debugfs_regs_fops,
> >>> &amdgpu_debugfs_regs2_fops,
> >>> @@ -1261,6 +1425,8 @@ static const struct file_operations
> >>> *debugfs_regs[] = {
> >>> &amdgpu_debugfs_gpr_fops,
> >>> &amdgpu_debugfs_gfxoff_fops,
> >>> &amdgpu_debugfs_gfxoff_status_fops,
> >>> + &amdgpu_debugfs_gfxoff_count_fops,
> >>> + &amdgpu_debugfs_gfxoff_residency_fops,
> >>> };
> >>>
> >>> static const char *debugfs_regs_names[] = { @@ -1275,6 +1441,8 @@
> >>> static const char *debugfs_regs_names[] = {
> >>> "amdgpu_gpr",
> >>> "amdgpu_gfxoff",
> >>> "amdgpu_gfxoff_status",
> >>> + "amdgpu_gfxoff_count",
> >>> + "amdgpu_gfxoff_residency",
> >>> };
> >>>
> >>> /**
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> index b79ee4ffb879..15a95bc2c211 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> @@ -3576,6 +3576,8 @@ int amdgpu_device_init(struct amdgpu_device
> >>> *adev,
> >>> INIT_WORK(&adev->xgmi_reset_work,
> >>> amdgpu_device_xgmi_reset_func);
> >>>
> >>> adev->gfx.gfx_off_req_count = 1;
> >>> + adev->gfx.gfx_off_residency = 0;
> >>> + adev->gfx.gfx_off_entrycount = 0;
> >>> adev->pm.ac_power = power_supply_is_system_supplied() > 0;
> >>>
> >>> atomic_set(&adev->throttling_logging_enabled, 1); diff --git
> >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>> index 222d3d7ea076..3675c1b899db 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>> @@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct
> amdgpu_device
> >>> *adev, bool enable)
> >>> mutex_unlock(&adev->gfx.gfx_off_mutex);
> >>> }
> >>>
> >>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
> >>> value)
> >>> +{
> >>> + int r = 0;
> >>> +
> >>> + mutex_lock(&adev->gfx.gfx_off_mutex);
> >>> +
> >>> + r = amdgpu_dpm_set_residency_gfxoff(adev, value);
> >>> +
> >>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
> >>> +
> >>> + return r;
> >>> +}
> >>> +
> >>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
> >>> *value)
> >>> +{
> >>> + int r = 0;
> >>> +
> >>> + mutex_lock(&adev->gfx.gfx_off_mutex);
> >>> +
> >>> + r = amdgpu_dpm_get_residency_gfxoff(adev, value);
> >>> +
> >>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
> >>> +
> >>> + return r;
> >>> +}
> >>> +
> >>> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev,
> u32
> >>> *value)
> >>> +{
> >>> + int r = 0;
> >>> +
> >>> + mutex_lock(&adev->gfx.gfx_off_mutex);
> >>> +
> >>> + r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
> >>> +
> >>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
> >>> +
> >>> + return r;
> >>> +}
> >>> +
> >>> int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
> >>> *value)
> >>> {
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>> index 23a696d38390..f06e979e2565 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>> @@ -336,6 +336,8 @@ struct amdgpu_gfx {
> >>> struct mutex gfx_off_mutex;
> >>> uint32_t gfx_off_req_count; /* default 1, enable gfx off:
> >>> dec 1, disable gfx off: add 1 */
> >>> struct delayed_work gfx_off_delay_work;
> >>> + uint32_t gfx_off_residency;
> >>> + uint32_t gfx_off_entrycount;
> >>>
> >>> /* pipe reservation */
> >>> struct mutex pipe_reserve_mutex;
> >>> @@ -407,6 +409,10 @@ bool
> amdgpu_gfx_is_me_queue_enabled(struct
> >>> amdgpu_device *adev, int me,
> >>> void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
> >>> int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
> >>> *value); int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev,
> >>> struct ras_common_if *ras_block);
> >>> +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); int
> >>> +amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
> >>> *value);
> >>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
> >>> *residency);
> >>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
> >>> value);
> >>> int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
> >>> void *err_data,
> >>> struct amdgpu_iv_entry *entry);
> >>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>> index 956b6ce81c84..df87d0768fd7 100644
> >>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>> @@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct
> >>> amdgpu_device *adev,
> >>> return ret;
> >>> }
> >>>
> >>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev,
> >>> +bool
> >>> value)
> >>> +{
> >>> + struct smu_context *smu = adev->powerplay.pp_handle;
> >>> + int ret = 0;
> >>> +
> >>> + if (!is_support_sw_smu(adev))
> >>> + return -EOPNOTSUPP;
> >>> +
> >>> + mutex_lock(&adev->pm.mutex);
> >>> + ret = smu_set_residency_gfxoff(smu, value);
> >>> + mutex_unlock(&adev->pm.mutex);
> >>> +
> >>> + return ret;
> >>> +}
> >>> +
> >>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev,
> u32
> >>> *value)
> >>> +{
> >>> + struct smu_context *smu = adev->powerplay.pp_handle;
> >>> + int ret = 0;
> >>> +
> >>> + if (!is_support_sw_smu(adev))
> >>> + return -EOPNOTSUPP;
> >>> +
> >>> + mutex_lock(&adev->pm.mutex);
> >>> + ret = smu_get_residency_gfxoff(smu, value);
> >>> + mutex_unlock(&adev->pm.mutex);
> >>> +
> >>> + return ret;
> >>> +}
> >>> +
> >>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device
> *adev,
> >>> u32 *value)
> >>> +{
> >>> + struct smu_context *smu = adev->powerplay.pp_handle;
> >>> + int ret = 0;
> >>> +
> >>> + if (!is_support_sw_smu(adev))
> >>> + return -EOPNOTSUPP;
> >>> +
> >>> + mutex_lock(&adev->pm.mutex);
> >>> + ret = smu_get_entrycount_gfxoff(smu, value);
> >>> + mutex_unlock(&adev->pm.mutex);
> >>> +
> >>> + return ret;
> >>> +}
> >>> +
> >>> int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev,
> >>> uint32_t
> >>> *value)
> >>> {
> >>> struct smu_context *smu = adev->powerplay.pp_handle; diff --git
> >>> a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>> index 65624d091ed2..83a83e93037c 100644
> >>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>> @@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct
> >>> amdgpu_device *adev,
> >>> int amdgpu_dpm_write_watermarks_table(struct amdgpu_device
> *adev);
> >>> int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum
> >>> smu_event_type event,
> >>> uint64_t event_arg);
> >>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev,
> u32
> >>> *value);
> >>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev,
> >>> +bool
> >>> value);
> >>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device
> *adev,
> >>> u32 *value);
> >>> int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev,
> >>> uint32_t *value); uint64_t
> >>> amdgpu_dpm_get_thermal_throttling_counter(struct
> >>> amdgpu_device *adev);
> >>> void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
> diff
> >>> --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> >>> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> >>> index fd79b213fab4..cfc3b9d749bf 100644
> >>> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> >>> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> >>> @@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void
> *handle,
> >>> return smu_set_pp_feature_mask(smu, new_mask); }
> >>>
> >>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value) {
> >>> + if (!smu->ppt_funcs->set_gfx_off_residency)
> >>> + return -EINVAL;
> >>> +
> >>> + return smu_set_gfx_off_residency(smu, value); }
> >>> +
> >>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value) {
> >>> + if (!smu->ppt_funcs->get_gfx_off_residency)
> >>> + return -EINVAL;
> >>> +
> >>> + return smu_get_gfx_off_residency(smu, value); }
> >>> +
> >>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value)
> >>> +{
> >>> + if (!smu->ppt_funcs->get_gfx_off_entrycount)
> >>> + return -EINVAL;
> >>> +
> >>> + return smu_get_gfx_off_entrycount(smu, value); }
> >>> +
> >>> int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
> >>> {
> >>> if (!smu->ppt_funcs->get_gfx_off_status)
> >>> @@ -1573,7 +1597,7 @@ static int smu_suspend(void *handle) {
> >>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> >>> struct smu_context *smu = adev->powerplay.pp_handle;
> >>> - int ret;
> >>> + int ret, count;
> >>>
> >>> if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
> >>> return 0;
> >>> @@ -1591,6 +1615,14 @@ static int smu_suspend(void *handle)
> >>>
> >>> smu_set_gfx_cgpg(smu, false);
> >>>
> >>> + /*
> >>> + * pwfw resets entrycount when device is suspended, so we save
> >>> the
> >>> + * last value to be used when we resume to keep it consistent
> >>> + */
> >>> + ret = smu_get_entrycount_gfxoff(smu, &count);
> >>> + if (!ret)
> >>> + adev->gfx.gfx_off_entrycount = count;
> >>> +
> >>> return 0;
> >>> }
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> >>> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> >>> index b81c657c7386..9827075b768e 100644
> >>> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> >>> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> >>> @@ -1111,6 +1111,22 @@ struct pptable_funcs {
> >>> */
> >>> uint32_t (*get_gfx_off_status)(struct smu_context *smu);
> >>>
> >>> + /**
> >>> + * @gfx_off_entrycount: total GFXOFF entry count at the time of
> >>> + * query since system power-up
> >>> + */
> >>> + u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint32_t
> >>> *entrycount);
> >>> +
> >>> + /**
> >>> + * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging
> >>> + */
> >>> + u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start);
> >>> +
> >>> + /**
> >>> + * @get_gfx_off_residency: Average GFXOFF residency % during the
> >>> logging interval
> >>> + */
> >>> + u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t
> >>> *residency);
> >>> +
> >>> /**
> >>> * @register_irq_handler: Register interupt request handlers.
> >>> */
> >>> @@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu);
> >>>
> >>> int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
> >>>
> >>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value);
> >>> +
> >>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
> >>> +
> >>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value);
> >>> +
> >>> int smu_get_status_gfxoff(struct smu_context *smu, uint32_t
> >>> *value);
> >>>
> >>> int smu_handle_passthrough_sbr(struct smu_context *smu, bool
> >>> enable); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> >>> b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> >>> index 7469bbfce1fb..ceb13c838067 100644
> >>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> >>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> >>> @@ -47,6 +47,9 @@
> >>> #define smu_notify_memory_pool_location(smu)
> >>> smu_ppt_funcs(notify_memory_pool_location, 0, smu) #define
> >>> smu_gfx_off_control(smu, enable)
> >>> smu_ppt_funcs(gfx_off_control, 0, smu, enable) #define
> >>> smu_get_gfx_off_status(smu)
> >>> smu_ppt_funcs(get_gfx_off_status, 0, smu)
> >>> +#define smu_get_gfx_off_entrycount(smu, value)
> >>> smu_ppt_funcs(get_gfx_off_entrycount, 0, smu,
> >>> value)
> >>> +#define smu_get_gfx_off_residency(smu, value)
> >>> smu_ppt_funcs(get_gfx_off_residency, 0, smu,
> >>> value)
> >>> +#define smu_set_gfx_off_residency(smu, value)
> >>> smu_ppt_funcs(set_gfx_off_residency, 0, smu,
> >>> value)
> >>> #define smu_set_last_dcef_min_deep_sleep_clk(smu)
> >>> smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu)
> #define
> >>> smu_system_features_control(smu, en)
> >>> smu_ppt_funcs(system_features_control, 0, smu, en) #define
> >>> smu_init_max_sustainable_clocks(smu)
> >>> smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)
> >>> --
> >>> 2.37.1

2022-07-26 13:47:23

by Alex Deucher

[permalink] [raw]
Subject: Re: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs

On Mon, Jul 25, 2022 at 11:03 PM Quan, Evan <[email protected]> wrote:
>
> [AMD Official Use Only - General]
>
>
>
> > -----Original Message-----
> > From: André Almeida <[email protected]>
> > Sent: Tuesday, July 26, 2022 12:15 AM
> > To: Quan, Evan <[email protected]>; Deucher, Alexander
> > <[email protected]>; Koenig, Christian
> > <[email protected]>; Pan, Xinhui <[email protected]>; David
> > Airlie <[email protected]>; Daniel Vetter <[email protected]>; Zhang, Hawking
> > <[email protected]>; Zhou1, Tao <[email protected]>; Kuehling,
> > Felix <[email protected]>; Xiao, Jack <[email protected]>; amd-
> > [email protected]; [email protected]; linux-
> > [email protected]; StDenis, Tom <[email protected]>; Siqueira,
> > Rodrigo <[email protected]>
> > Cc: [email protected]
> > Subject: Re: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
> >
> > Às 10:04 de 25/07/22, André Almeida escreveu:
> > > Às 07:27 de 25/07/22, Quan, Evan escreveu:
> > >> [AMD Official Use Only - General]
> > >>
> > >> Using "uint64_t" instead of "uint32_t" for entry counter may be better.
> > >>
> > >
> > > Indeed, it's a good idea. I'll send a v2 with that change, thanks.
> > >
> >
> > However, SMU messaging reads a 32bit register to get the entrycount from
> > the pwfw, so would keep with with the risk of overflow anyway right?
> [Quan, Evan] Yes, that makes sense. Better to document that(the risk of overflow).

Still might be better to use a 64 bit number for the external
interface to be more future proof. Then we can either document the
behavior for specific chips or handle the wrap around in the driver.

Alex

> Anyway, the series seems fine to me.
> Series is acked-by: Evan Quan <[email protected]>
> >
> > >> BR
> > >> Evan
> > >>> -----Original Message-----
> > >>> From: amd-gfx <[email protected]> On Behalf Of
> > >>> André Almeida
> > >>> Sent: Saturday, July 23, 2022 4:34 AM
> > >>> To: Deucher, Alexander <[email protected]>; Koenig,
> > >>> Christian <[email protected]>; Pan, Xinhui
> > >>> <[email protected]>; David Airlie <[email protected]>; Daniel Vetter
> > >>> <[email protected]>; Zhang, Hawking <[email protected]>; Zhou1,
> > >>> Tao <[email protected]>; Kuehling, Felix
> > <[email protected]>;
> > >>> Xiao, Jack <[email protected]>; amd- [email protected];
> > >>> [email protected]; linux- [email protected];
> > >>> StDenis, Tom <[email protected]>; Siqueira, Rodrigo
> > >>> <[email protected]>
> > >>> Cc: André Almeida <[email protected]>; [email protected]
> > >>> Subject: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
> > >>>
> > >>> Add debugfs interface to log GFXOFF statistics:
> > >>>
> > >>> - Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the
> > >>> time of query since system power-up
> > >>>
> > >>> - Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop.
> > >>> Read it to get average GFXOFF residency % multiplied by 100
> > >>> during the last logging interval.
> > >>>
> > >>> Both features are designed to be keep the values persistent between
> > >>> suspends.
> > >>>
> > >>> Signed-off-by: André Almeida <[email protected]>
> > >>> ---
> > >>> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 168
> > >>> ++++++++++++++++++
> > >>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +
> > >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 ++++
> > >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 +
> > >>> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 45 +++++
> > >>> drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 +
> > >>> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 34 +++-
> > >>> drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 22 +++
> > >>> drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 3 +
> > >>> 9 files changed, 321 insertions(+), 1 deletion(-)
> > >>>
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > >>> index e2eec985adb3..edf90a9ba980 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > >>> @@ -1042,6 +1042,157 @@ static ssize_t
> > >>> amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
> > >>> return r;
> > >>> }
> > >>>
> > >>> +/**
> > >>> + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
> > >>> + *
> > >>> + * @f: open file handle
> > >>> + * @buf: User buffer to store read data in
> > >>> + * @size: Number of bytes to read
> > >>> + * @pos: Offset to seek to
> > >>> + *
> > >>> + * Read the last residency value logged. It doesn't auto update,
> > >>> +one needs
> > >>> to
> > >>> + * stop logging before getting the current value.
> > >>> + */
> > >>> +static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f,
> > >>> +char
> > >>> __user *buf,
> > >>> + size_t size, loff_t *pos) {
> > >>> + struct amdgpu_device *adev = file_inode(f)->i_private;
> > >>> + ssize_t result = 0;
> > >>> + int r;
> > >>> +
> > >>> + if (size & 0x3 || *pos & 0x3)
> > >>> + return -EINVAL;
> > >>> +
> > >>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> > >>> + if (r < 0) {
> > >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> > >>> + return r;
> > >>> + }
> > >>> +
> > >>> + while (size) {
> > >>> + uint32_t value;
> > >>> +
> > >>> + r = amdgpu_get_gfx_off_residency(adev, &value);
> > >>> + if (r)
> > >>> + goto out;
> > >>> +
> > >>> + r = put_user(value, (uint32_t *)buf);
> > >>> + if (r)
> > >>> + goto out;
> > >>> +
> > >>> + result += 4;
> > >>> + buf += 4;
> > >>> + *pos += 4;
> > >>> + size -= 4;
> > >>> + }
> > >>> +
> > >>> + r = result;
> > >>> +out:
> > >>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> > >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> > >>> +
> > >>> + return r;
> > >>> +}
> > >>> +
> > >>> +/**
> > >>> + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
> > >>> + *
> > >>> + * @f: open file handle
> > >>> + * @buf: User buffer to write data from
> > >>> + * @size: Number of bytes to write
> > >>> + * @pos: Offset to seek to
> > >>> + *
> > >>> + * Write a 32-bit non-zero to start logging; write a 32-bit zero to
> > >>> +stop */ static ssize_t
> > >>> +amdgpu_debugfs_gfxoff_residency_write(struct file *f, const
> > >>> char __user *buf,
> > >>> + size_t size, loff_t *pos) {
> > >>> + struct amdgpu_device *adev = file_inode(f)->i_private;
> > >>> + ssize_t result = 0;
> > >>> + int r;
> > >>> +
> > >>> + if (size & 0x3 || *pos & 0x3)
> > >>> + return -EINVAL;
> > >>> +
> > >>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> > >>> + if (r < 0) {
> > >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> > >>> + return r;
> > >>> + }
> > >>> +
> > >>> + while (size) {
> > >>> + u32 value;
> > >>> +
> > >>> + r = get_user(value, (uint32_t *)buf);
> > >>> + if (r)
> > >>> + goto out;
> > >>> +
> > >>> + amdgpu_set_gfx_off_residency(adev, value ? true : false);
> > >>> +
> > >>> + result += 4;
> > >>> + buf += 4;
> > >>> + *pos += 4;
> > >>> + size -= 4;
> > >>> + }
> > >>> +
> > >>> + r = result;
> > >>> +out:
> > >>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> > >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> > >>> +
> > >>> + return r;
> > >>> +}
> > >>> +
> > >>> +
> > >>> +/**
> > >>> + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
> > >>> + *
> > >>> + * @f: open file handle
> > >>> + * @buf: User buffer to store read data in
> > >>> + * @size: Number of bytes to read
> > >>> + * @pos: Offset to seek to
> > >>> + */
> > >>> +static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f,
> > >>> +char
> > >>> __user *buf,
> > >>> + size_t size, loff_t *pos)
> > >>> +{
> > >>> + struct amdgpu_device *adev = file_inode(f)->i_private;
> > >>> + ssize_t result = 0;
> > >>> + int r;
> > >>> +
> > >>> + if (size & 0x3 || *pos & 0x3)
> > >>> + return -EINVAL;
> > >>> +
> > >>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> > >>> + if (r < 0) {
> > >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> > >>> + return r;
> > >>> + }
> > >>> +
> > >>> + while (size) {
> > >>> + u32 value;
> > >>> +
> > >>> + r = amdgpu_get_gfx_off_entrycount(adev, &value);
> > >>> + if (r)
> > >>> + goto out;
> > >>> +
> > >>> + r = put_user(value, (uint32_t *)buf);
> > >>> + if (r)
> > >>> + goto out;
> > >>> +
> > >>> + result += 4;
> > >>> + buf += 4;
> > >>> + *pos += 4;
> > >>> + size -= 4;
> > >>> + }
> > >>> +
> > >>> + r = result;
> > >>> +out:
> > >>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> > >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> > >>> +
> > >>> + return r;
> > >>> +}
> > >>> +
> > >>> /**
> > >>> * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
> > >>> *
> > >>> @@ -1249,6 +1400,19 @@ static const struct file_operations
> > >>> amdgpu_debugfs_gfxoff_status_fops = {
> > >>> .llseek = default_llseek
> > >>> };
> > >>>
> > >>> +static const struct file_operations
> > amdgpu_debugfs_gfxoff_count_fops = {
> > >>> + .owner = THIS_MODULE,
> > >>> + .read = amdgpu_debugfs_gfxoff_count_read,
> > >>> + .llseek = default_llseek
> > >>> +};
> > >>> +
> > >>> +static const struct file_operations
> > >>> +amdgpu_debugfs_gfxoff_residency_fops
> > >>> = {
> > >>> + .owner = THIS_MODULE,
> > >>> + .read = amdgpu_debugfs_gfxoff_residency_read,
> > >>> + .write = amdgpu_debugfs_gfxoff_residency_write,
> > >>> + .llseek = default_llseek
> > >>> +};
> > >>> +
> > >>> static const struct file_operations *debugfs_regs[] = {
> > >>> &amdgpu_debugfs_regs_fops,
> > >>> &amdgpu_debugfs_regs2_fops,
> > >>> @@ -1261,6 +1425,8 @@ static const struct file_operations
> > >>> *debugfs_regs[] = {
> > >>> &amdgpu_debugfs_gpr_fops,
> > >>> &amdgpu_debugfs_gfxoff_fops,
> > >>> &amdgpu_debugfs_gfxoff_status_fops,
> > >>> + &amdgpu_debugfs_gfxoff_count_fops,
> > >>> + &amdgpu_debugfs_gfxoff_residency_fops,
> > >>> };
> > >>>
> > >>> static const char *debugfs_regs_names[] = { @@ -1275,6 +1441,8 @@
> > >>> static const char *debugfs_regs_names[] = {
> > >>> "amdgpu_gpr",
> > >>> "amdgpu_gfxoff",
> > >>> "amdgpu_gfxoff_status",
> > >>> + "amdgpu_gfxoff_count",
> > >>> + "amdgpu_gfxoff_residency",
> > >>> };
> > >>>
> > >>> /**
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > >>> index b79ee4ffb879..15a95bc2c211 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > >>> @@ -3576,6 +3576,8 @@ int amdgpu_device_init(struct amdgpu_device
> > >>> *adev,
> > >>> INIT_WORK(&adev->xgmi_reset_work,
> > >>> amdgpu_device_xgmi_reset_func);
> > >>>
> > >>> adev->gfx.gfx_off_req_count = 1;
> > >>> + adev->gfx.gfx_off_residency = 0;
> > >>> + adev->gfx.gfx_off_entrycount = 0;
> > >>> adev->pm.ac_power = power_supply_is_system_supplied() > 0;
> > >>>
> > >>> atomic_set(&adev->throttling_logging_enabled, 1); diff --git
> > >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > >>> index 222d3d7ea076..3675c1b899db 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > >>> @@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct
> > amdgpu_device
> > >>> *adev, bool enable)
> > >>> mutex_unlock(&adev->gfx.gfx_off_mutex);
> > >>> }
> > >>>
> > >>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
> > >>> value)
> > >>> +{
> > >>> + int r = 0;
> > >>> +
> > >>> + mutex_lock(&adev->gfx.gfx_off_mutex);
> > >>> +
> > >>> + r = amdgpu_dpm_set_residency_gfxoff(adev, value);
> > >>> +
> > >>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
> > >>> +
> > >>> + return r;
> > >>> +}
> > >>> +
> > >>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
> > >>> *value)
> > >>> +{
> > >>> + int r = 0;
> > >>> +
> > >>> + mutex_lock(&adev->gfx.gfx_off_mutex);
> > >>> +
> > >>> + r = amdgpu_dpm_get_residency_gfxoff(adev, value);
> > >>> +
> > >>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
> > >>> +
> > >>> + return r;
> > >>> +}
> > >>> +
> > >>> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev,
> > u32
> > >>> *value)
> > >>> +{
> > >>> + int r = 0;
> > >>> +
> > >>> + mutex_lock(&adev->gfx.gfx_off_mutex);
> > >>> +
> > >>> + r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
> > >>> +
> > >>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
> > >>> +
> > >>> + return r;
> > >>> +}
> > >>> +
> > >>> int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
> > >>> *value)
> > >>> {
> > >>>
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > >>> index 23a696d38390..f06e979e2565 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > >>> @@ -336,6 +336,8 @@ struct amdgpu_gfx {
> > >>> struct mutex gfx_off_mutex;
> > >>> uint32_t gfx_off_req_count; /* default 1, enable gfx off:
> > >>> dec 1, disable gfx off: add 1 */
> > >>> struct delayed_work gfx_off_delay_work;
> > >>> + uint32_t gfx_off_residency;
> > >>> + uint32_t gfx_off_entrycount;
> > >>>
> > >>> /* pipe reservation */
> > >>> struct mutex pipe_reserve_mutex;
> > >>> @@ -407,6 +409,10 @@ bool
> > amdgpu_gfx_is_me_queue_enabled(struct
> > >>> amdgpu_device *adev, int me,
> > >>> void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
> > >>> int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
> > >>> *value); int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev,
> > >>> struct ras_common_if *ras_block);
> > >>> +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); int
> > >>> +amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
> > >>> *value);
> > >>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
> > >>> *residency);
> > >>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
> > >>> value);
> > >>> int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
> > >>> void *err_data,
> > >>> struct amdgpu_iv_entry *entry);
> > >>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > >>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > >>> index 956b6ce81c84..df87d0768fd7 100644
> > >>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > >>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > >>> @@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct
> > >>> amdgpu_device *adev,
> > >>> return ret;
> > >>> }
> > >>>
> > >>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev,
> > >>> +bool
> > >>> value)
> > >>> +{
> > >>> + struct smu_context *smu = adev->powerplay.pp_handle;
> > >>> + int ret = 0;
> > >>> +
> > >>> + if (!is_support_sw_smu(adev))
> > >>> + return -EOPNOTSUPP;
> > >>> +
> > >>> + mutex_lock(&adev->pm.mutex);
> > >>> + ret = smu_set_residency_gfxoff(smu, value);
> > >>> + mutex_unlock(&adev->pm.mutex);
> > >>> +
> > >>> + return ret;
> > >>> +}
> > >>> +
> > >>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev,
> > u32
> > >>> *value)
> > >>> +{
> > >>> + struct smu_context *smu = adev->powerplay.pp_handle;
> > >>> + int ret = 0;
> > >>> +
> > >>> + if (!is_support_sw_smu(adev))
> > >>> + return -EOPNOTSUPP;
> > >>> +
> > >>> + mutex_lock(&adev->pm.mutex);
> > >>> + ret = smu_get_residency_gfxoff(smu, value);
> > >>> + mutex_unlock(&adev->pm.mutex);
> > >>> +
> > >>> + return ret;
> > >>> +}
> > >>> +
> > >>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device
> > *adev,
> > >>> u32 *value)
> > >>> +{
> > >>> + struct smu_context *smu = adev->powerplay.pp_handle;
> > >>> + int ret = 0;
> > >>> +
> > >>> + if (!is_support_sw_smu(adev))
> > >>> + return -EOPNOTSUPP;
> > >>> +
> > >>> + mutex_lock(&adev->pm.mutex);
> > >>> + ret = smu_get_entrycount_gfxoff(smu, value);
> > >>> + mutex_unlock(&adev->pm.mutex);
> > >>> +
> > >>> + return ret;
> > >>> +}
> > >>> +
> > >>> int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev,
> > >>> uint32_t
> > >>> *value)
> > >>> {
> > >>> struct smu_context *smu = adev->powerplay.pp_handle; diff --git
> > >>> a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> > >>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> > >>> index 65624d091ed2..83a83e93037c 100644
> > >>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> > >>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> > >>> @@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct
> > >>> amdgpu_device *adev,
> > >>> int amdgpu_dpm_write_watermarks_table(struct amdgpu_device
> > *adev);
> > >>> int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum
> > >>> smu_event_type event,
> > >>> uint64_t event_arg);
> > >>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev,
> > u32
> > >>> *value);
> > >>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev,
> > >>> +bool
> > >>> value);
> > >>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device
> > *adev,
> > >>> u32 *value);
> > >>> int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev,
> > >>> uint32_t *value); uint64_t
> > >>> amdgpu_dpm_get_thermal_throttling_counter(struct
> > >>> amdgpu_device *adev);
> > >>> void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
> > diff
> > >>> --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > >>> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > >>> index fd79b213fab4..cfc3b9d749bf 100644
> > >>> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > >>> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > >>> @@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void
> > *handle,
> > >>> return smu_set_pp_feature_mask(smu, new_mask); }
> > >>>
> > >>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value) {
> > >>> + if (!smu->ppt_funcs->set_gfx_off_residency)
> > >>> + return -EINVAL;
> > >>> +
> > >>> + return smu_set_gfx_off_residency(smu, value); }
> > >>> +
> > >>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value) {
> > >>> + if (!smu->ppt_funcs->get_gfx_off_residency)
> > >>> + return -EINVAL;
> > >>> +
> > >>> + return smu_get_gfx_off_residency(smu, value); }
> > >>> +
> > >>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value)
> > >>> +{
> > >>> + if (!smu->ppt_funcs->get_gfx_off_entrycount)
> > >>> + return -EINVAL;
> > >>> +
> > >>> + return smu_get_gfx_off_entrycount(smu, value); }
> > >>> +
> > >>> int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
> > >>> {
> > >>> if (!smu->ppt_funcs->get_gfx_off_status)
> > >>> @@ -1573,7 +1597,7 @@ static int smu_suspend(void *handle) {
> > >>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> > >>> struct smu_context *smu = adev->powerplay.pp_handle;
> > >>> - int ret;
> > >>> + int ret, count;
> > >>>
> > >>> if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
> > >>> return 0;
> > >>> @@ -1591,6 +1615,14 @@ static int smu_suspend(void *handle)
> > >>>
> > >>> smu_set_gfx_cgpg(smu, false);
> > >>>
> > >>> + /*
> > >>> + * pwfw resets entrycount when device is suspended, so we save
> > >>> the
> > >>> + * last value to be used when we resume to keep it consistent
> > >>> + */
> > >>> + ret = smu_get_entrycount_gfxoff(smu, &count);
> > >>> + if (!ret)
> > >>> + adev->gfx.gfx_off_entrycount = count;
> > >>> +
> > >>> return 0;
> > >>> }
> > >>>
> > >>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > >>> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > >>> index b81c657c7386..9827075b768e 100644
> > >>> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > >>> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > >>> @@ -1111,6 +1111,22 @@ struct pptable_funcs {
> > >>> */
> > >>> uint32_t (*get_gfx_off_status)(struct smu_context *smu);
> > >>>
> > >>> + /**
> > >>> + * @gfx_off_entrycount: total GFXOFF entry count at the time of
> > >>> + * query since system power-up
> > >>> + */
> > >>> + u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint32_t
> > >>> *entrycount);
> > >>> +
> > >>> + /**
> > >>> + * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging
> > >>> + */
> > >>> + u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start);
> > >>> +
> > >>> + /**
> > >>> + * @get_gfx_off_residency: Average GFXOFF residency % during the
> > >>> logging interval
> > >>> + */
> > >>> + u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t
> > >>> *residency);
> > >>> +
> > >>> /**
> > >>> * @register_irq_handler: Register interupt request handlers.
> > >>> */
> > >>> @@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu);
> > >>>
> > >>> int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
> > >>>
> > >>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value);
> > >>> +
> > >>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
> > >>> +
> > >>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value);
> > >>> +
> > >>> int smu_get_status_gfxoff(struct smu_context *smu, uint32_t
> > >>> *value);
> > >>>
> > >>> int smu_handle_passthrough_sbr(struct smu_context *smu, bool
> > >>> enable); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> > >>> b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> > >>> index 7469bbfce1fb..ceb13c838067 100644
> > >>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> > >>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> > >>> @@ -47,6 +47,9 @@
> > >>> #define smu_notify_memory_pool_location(smu)
> > >>> smu_ppt_funcs(notify_memory_pool_location, 0, smu) #define
> > >>> smu_gfx_off_control(smu, enable)
> > >>> smu_ppt_funcs(gfx_off_control, 0, smu, enable) #define
> > >>> smu_get_gfx_off_status(smu)
> > >>> smu_ppt_funcs(get_gfx_off_status, 0, smu)
> > >>> +#define smu_get_gfx_off_entrycount(smu, value)
> > >>> smu_ppt_funcs(get_gfx_off_entrycount, 0, smu,
> > >>> value)
> > >>> +#define smu_get_gfx_off_residency(smu, value)
> > >>> smu_ppt_funcs(get_gfx_off_residency, 0, smu,
> > >>> value)
> > >>> +#define smu_set_gfx_off_residency(smu, value)
> > >>> smu_ppt_funcs(set_gfx_off_residency, 0, smu,
> > >>> value)
> > >>> #define smu_set_last_dcef_min_deep_sleep_clk(smu)
> > >>> smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu)
> > #define
> > >>> smu_system_features_control(smu, en)
> > >>> smu_ppt_funcs(system_features_control, 0, smu, en) #define
> > >>> smu_init_max_sustainable_clocks(smu)
> > >>> smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)
> > >>> --
> > >>> 2.37.1