2022-07-13 15:33:13

by André Almeida

[permalink] [raw]
Subject: [PATCH] drm/amd/debugfs: Expose GFXOFF state to userspace

GFXOFF has two different "state" values: one to define if the GPU is
allowed/disallowed to enter GFXOFF, usually called state; and another
one to define if currently GFXOFF is being used, usually called status.
Even when GFXOFF is allowed, GPU firmware can decide to not used it
accordingly to the GPU load.

Userspace can allow/disallow GPUs to enter into GFXOFF via debugfs. The
kernel maintains a counter of requests for GFXOFF (gfx_off_req_count)
that should be decreased to allow GFXOFF and increased to disallow.

The issue with this interface is that userspace can't be sure if GFXOFF
is currently allowed. Even by checking amdgpu_gfxoff file, one might get
an ambiguous 2, that means that GPU is currently out of GFXOFF, but that
can be either because it's currently disallowed or because it's allowed
but given the current GPU load it's enabled. Then, userspace needs to
rely on the fact that GFXOFF is enabled by default on boot and to track
this information.

To make userspace life easier and GFXOFF more reliable, return the
current state of GFXOFF to userspace when reading amdgpu_gfxoff with the
same semantics of writing: 0 means not allowed, not 0 means allowed.

Expose the current status of GFXOFF through a new file,
amdgpu_gfxoff_status.

Signed-off-by: André Almeida <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 49 ++++++++++++++++++++-
1 file changed, 47 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index f3b3c688e4e7..e2eec985adb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1117,13 +1117,50 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
}

while (size) {
- uint32_t value;
+ u32 value = adev->gfx.gfx_off_state;
+
+ r = put_user(value, (u32 *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u32 value;

r = amdgpu_get_gfx_off_status(adev, &value);
if (r)
goto out;

- r = put_user(value, (uint32_t *)buf);
+ r = put_user(value, (u32 *)buf);
if (r)
goto out;

@@ -1206,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
.llseek = default_llseek
};

+static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_status_read,
+ .llseek = default_llseek
+};
+
static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_regs_fops,
&amdgpu_debugfs_regs2_fops,
@@ -1217,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_wave_fops,
&amdgpu_debugfs_gpr_fops,
&amdgpu_debugfs_gfxoff_fops,
+ &amdgpu_debugfs_gfxoff_status_fops,
};

static const char *debugfs_regs_names[] = {
@@ -1230,6 +1274,7 @@ static const char *debugfs_regs_names[] = {
"amdgpu_wave",
"amdgpu_gpr",
"amdgpu_gfxoff",
+ "amdgpu_gfxoff_status",
};

/**
--
2.37.0


2022-07-14 16:45:59

by Alex Deucher

[permalink] [raw]
Subject: Re: [PATCH] drm/amd/debugfs: Expose GFXOFF state to userspace

On Wed, Jul 13, 2022 at 11:15 AM André Almeida <[email protected]> wrote:
>
> GFXOFF has two different "state" values: one to define if the GPU is
> allowed/disallowed to enter GFXOFF, usually called state; and another
> one to define if currently GFXOFF is being used, usually called status.
> Even when GFXOFF is allowed, GPU firmware can decide to not used it
> accordingly to the GPU load.
>
> Userspace can allow/disallow GPUs to enter into GFXOFF via debugfs. The
> kernel maintains a counter of requests for GFXOFF (gfx_off_req_count)
> that should be decreased to allow GFXOFF and increased to disallow.
>
> The issue with this interface is that userspace can't be sure if GFXOFF
> is currently allowed. Even by checking amdgpu_gfxoff file, one might get
> an ambiguous 2, that means that GPU is currently out of GFXOFF, but that
> can be either because it's currently disallowed or because it's allowed
> but given the current GPU load it's enabled. Then, userspace needs to
> rely on the fact that GFXOFF is enabled by default on boot and to track
> this information.
>
> To make userspace life easier and GFXOFF more reliable, return the
> current state of GFXOFF to userspace when reading amdgpu_gfxoff with the
> same semantics of writing: 0 means not allowed, not 0 means allowed.
>

This looks good. Can you document this in the amdgpu kerneldoc?

Alex

> Expose the current status of GFXOFF through a new file,
> amdgpu_gfxoff_status.
>
> Signed-off-by: André Almeida <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 49 ++++++++++++++++++++-
> 1 file changed, 47 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index f3b3c688e4e7..e2eec985adb3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1117,13 +1117,50 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
> }
>
> while (size) {
> - uint32_t value;
> + u32 value = adev->gfx.gfx_off_state;
> +
> + r = put_user(value, (u32 *)buf);
> + if (r)
> + goto out;
> +
> + result += 4;
> + buf += 4;
> + *pos += 4;
> + size -= 4;
> + }
> +
> + r = result;
> +out:
> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> +
> + return r;
> +}
> +
> +static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
> + size_t size, loff_t *pos)
> +{
> + struct amdgpu_device *adev = file_inode(f)->i_private;
> + ssize_t result = 0;
> + int r;
> +
> + if (size & 0x3 || *pos & 0x3)
> + return -EINVAL;
> +
> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> + if (r < 0) {
> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> + return r;
> + }
> +
> + while (size) {
> + u32 value;
>
> r = amdgpu_get_gfx_off_status(adev, &value);
> if (r)
> goto out;
>
> - r = put_user(value, (uint32_t *)buf);
> + r = put_user(value, (u32 *)buf);
> if (r)
> goto out;
>
> @@ -1206,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
> .llseek = default_llseek
> };
>
> +static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
> + .owner = THIS_MODULE,
> + .read = amdgpu_debugfs_gfxoff_status_read,
> + .llseek = default_llseek
> +};
> +
> static const struct file_operations *debugfs_regs[] = {
> &amdgpu_debugfs_regs_fops,
> &amdgpu_debugfs_regs2_fops,
> @@ -1217,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = {
> &amdgpu_debugfs_wave_fops,
> &amdgpu_debugfs_gpr_fops,
> &amdgpu_debugfs_gfxoff_fops,
> + &amdgpu_debugfs_gfxoff_status_fops,
> };
>
> static const char *debugfs_regs_names[] = {
> @@ -1230,6 +1274,7 @@ static const char *debugfs_regs_names[] = {
> "amdgpu_wave",
> "amdgpu_gpr",
> "amdgpu_gfxoff",
> + "amdgpu_gfxoff_status",
> };
>
> /**
> --
> 2.37.0
>

2022-07-14 16:57:23

by Alex Deucher

[permalink] [raw]
Subject: Re: [PATCH] drm/amd/debugfs: Expose GFXOFF state to userspace

On Thu, Jul 14, 2022 at 12:45 PM André Almeida <[email protected]> wrote:
>
>
>
> Às 13:42 de 14/07/22, Alex Deucher escreveu:
> > On Wed, Jul 13, 2022 at 11:15 AM André Almeida <[email protected]> wrote:
> >>
> >> GFXOFF has two different "state" values: one to define if the GPU is
> >> allowed/disallowed to enter GFXOFF, usually called state; and another
> >> one to define if currently GFXOFF is being used, usually called status.
> >> Even when GFXOFF is allowed, GPU firmware can decide to not used it
> >> accordingly to the GPU load.
> >>
> >> Userspace can allow/disallow GPUs to enter into GFXOFF via debugfs. The
> >> kernel maintains a counter of requests for GFXOFF (gfx_off_req_count)
> >> that should be decreased to allow GFXOFF and increased to disallow.
> >>
> >> The issue with this interface is that userspace can't be sure if GFXOFF
> >> is currently allowed. Even by checking amdgpu_gfxoff file, one might get
> >> an ambiguous 2, that means that GPU is currently out of GFXOFF, but that
> >> can be either because it's currently disallowed or because it's allowed
> >> but given the current GPU load it's enabled. Then, userspace needs to
> >> rely on the fact that GFXOFF is enabled by default on boot and to track
> >> this information.
> >>
> >> To make userspace life easier and GFXOFF more reliable, return the
> >> current state of GFXOFF to userspace when reading amdgpu_gfxoff with the
> >> same semantics of writing: 0 means not allowed, not 0 means allowed.
> >>
> >
> > This looks good. Can you document this in the amdgpu kerneldoc?
> >
>
> Sure, let me send a v2 with that

While you are at it, if we are missing docs for the other gfxoff file
can you add that as well?

Thanks!

Alex

>
> > Alex
> >
> >> Expose the current status of GFXOFF through a new file,
> >> amdgpu_gfxoff_status.
> >>
> >> Signed-off-by: André Almeida <[email protected]>
> >> ---
> >> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 49 ++++++++++++++++++++-
> >> 1 file changed, 47 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >> index f3b3c688e4e7..e2eec985adb3 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >> @@ -1117,13 +1117,50 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
> >> }
> >>
> >> while (size) {
> >> - uint32_t value;
> >> + u32 value = adev->gfx.gfx_off_state;
> >> +
> >> + r = put_user(value, (u32 *)buf);
> >> + if (r)
> >> + goto out;
> >> +
> >> + result += 4;
> >> + buf += 4;
> >> + *pos += 4;
> >> + size -= 4;
> >> + }
> >> +
> >> + r = result;
> >> +out:
> >> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >> +
> >> + return r;
> >> +}
> >> +
> >> +static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
> >> + size_t size, loff_t *pos)
> >> +{
> >> + struct amdgpu_device *adev = file_inode(f)->i_private;
> >> + ssize_t result = 0;
> >> + int r;
> >> +
> >> + if (size & 0x3 || *pos & 0x3)
> >> + return -EINVAL;
> >> +
> >> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >> + if (r < 0) {
> >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >> + return r;
> >> + }
> >> +
> >> + while (size) {
> >> + u32 value;
> >>
> >> r = amdgpu_get_gfx_off_status(adev, &value);
> >> if (r)
> >> goto out;
> >>
> >> - r = put_user(value, (uint32_t *)buf);
> >> + r = put_user(value, (u32 *)buf);
> >> if (r)
> >> goto out;
> >>
> >> @@ -1206,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
> >> .llseek = default_llseek
> >> };
> >>
> >> +static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
> >> + .owner = THIS_MODULE,
> >> + .read = amdgpu_debugfs_gfxoff_status_read,
> >> + .llseek = default_llseek
> >> +};
> >> +
> >> static const struct file_operations *debugfs_regs[] = {
> >> &amdgpu_debugfs_regs_fops,
> >> &amdgpu_debugfs_regs2_fops,
> >> @@ -1217,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = {
> >> &amdgpu_debugfs_wave_fops,
> >> &amdgpu_debugfs_gpr_fops,
> >> &amdgpu_debugfs_gfxoff_fops,
> >> + &amdgpu_debugfs_gfxoff_status_fops,
> >> };
> >>
> >> static const char *debugfs_regs_names[] = {
> >> @@ -1230,6 +1274,7 @@ static const char *debugfs_regs_names[] = {
> >> "amdgpu_wave",
> >> "amdgpu_gpr",
> >> "amdgpu_gfxoff",
> >> + "amdgpu_gfxoff_status",
> >> };
> >>
> >> /**
> >> --
> >> 2.37.0
> >>

2022-07-14 17:19:21

by André Almeida

[permalink] [raw]
Subject: Re: [PATCH] drm/amd/debugfs: Expose GFXOFF state to userspace



Às 13:42 de 14/07/22, Alex Deucher escreveu:
> On Wed, Jul 13, 2022 at 11:15 AM André Almeida <[email protected]> wrote:
>>
>> GFXOFF has two different "state" values: one to define if the GPU is
>> allowed/disallowed to enter GFXOFF, usually called state; and another
>> one to define if currently GFXOFF is being used, usually called status.
>> Even when GFXOFF is allowed, GPU firmware can decide to not used it
>> accordingly to the GPU load.
>>
>> Userspace can allow/disallow GPUs to enter into GFXOFF via debugfs. The
>> kernel maintains a counter of requests for GFXOFF (gfx_off_req_count)
>> that should be decreased to allow GFXOFF and increased to disallow.
>>
>> The issue with this interface is that userspace can't be sure if GFXOFF
>> is currently allowed. Even by checking amdgpu_gfxoff file, one might get
>> an ambiguous 2, that means that GPU is currently out of GFXOFF, but that
>> can be either because it's currently disallowed or because it's allowed
>> but given the current GPU load it's enabled. Then, userspace needs to
>> rely on the fact that GFXOFF is enabled by default on boot and to track
>> this information.
>>
>> To make userspace life easier and GFXOFF more reliable, return the
>> current state of GFXOFF to userspace when reading amdgpu_gfxoff with the
>> same semantics of writing: 0 means not allowed, not 0 means allowed.
>>
>
> This looks good. Can you document this in the amdgpu kerneldoc?
>

Sure, let me send a v2 with that

> Alex
>
>> Expose the current status of GFXOFF through a new file,
>> amdgpu_gfxoff_status.
>>
>> Signed-off-by: André Almeida <[email protected]>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 49 ++++++++++++++++++++-
>> 1 file changed, 47 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> index f3b3c688e4e7..e2eec985adb3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> @@ -1117,13 +1117,50 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
>> }
>>
>> while (size) {
>> - uint32_t value;
>> + u32 value = adev->gfx.gfx_off_state;
>> +
>> + r = put_user(value, (u32 *)buf);
>> + if (r)
>> + goto out;
>> +
>> + result += 4;
>> + buf += 4;
>> + *pos += 4;
>> + size -= 4;
>> + }
>> +
>> + r = result;
>> +out:
>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> +
>> + return r;
>> +}
>> +
>> +static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
>> + size_t size, loff_t *pos)
>> +{
>> + struct amdgpu_device *adev = file_inode(f)->i_private;
>> + ssize_t result = 0;
>> + int r;
>> +
>> + if (size & 0x3 || *pos & 0x3)
>> + return -EINVAL;
>> +
>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> + if (r < 0) {
>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> + return r;
>> + }
>> +
>> + while (size) {
>> + u32 value;
>>
>> r = amdgpu_get_gfx_off_status(adev, &value);
>> if (r)
>> goto out;
>>
>> - r = put_user(value, (uint32_t *)buf);
>> + r = put_user(value, (u32 *)buf);
>> if (r)
>> goto out;
>>
>> @@ -1206,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
>> .llseek = default_llseek
>> };
>>
>> +static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
>> + .owner = THIS_MODULE,
>> + .read = amdgpu_debugfs_gfxoff_status_read,
>> + .llseek = default_llseek
>> +};
>> +
>> static const struct file_operations *debugfs_regs[] = {
>> &amdgpu_debugfs_regs_fops,
>> &amdgpu_debugfs_regs2_fops,
>> @@ -1217,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = {
>> &amdgpu_debugfs_wave_fops,
>> &amdgpu_debugfs_gpr_fops,
>> &amdgpu_debugfs_gfxoff_fops,
>> + &amdgpu_debugfs_gfxoff_status_fops,
>> };
>>
>> static const char *debugfs_regs_names[] = {
>> @@ -1230,6 +1274,7 @@ static const char *debugfs_regs_names[] = {
>> "amdgpu_wave",
>> "amdgpu_gpr",
>> "amdgpu_gfxoff",
>> + "amdgpu_gfxoff_status",
>> };
>>
>> /**
>> --
>> 2.37.0
>>