2022-04-29 18:00:08

by Gong, Richard

[permalink] [raw]
Subject: [PATCHv5] drm/amdgpu: vi: disable ASPM on Intel Alder Lake based systems

Active State Power Management (ASPM) feature is enabled since kernel 5.14.
There are some AMD Volcanic Islands (VI) GFX cards, such as the WX3200 and
RX640, that do not work with ASPM-enabled Intel Alder Lake based systems.
Using these GFX cards as video/display output, Intel Alder Lake based
systems will freeze after suspend/resume.

The issue was originally reported on one system (Dell Precision 3660 with
BIOS version 0.14.81), but was later confirmed to affect at least 4
pre-production Alder Lake based systems.

Add an extra check to disable ASPM on Intel Alder Lake based systems with
the problematic AMD Volcanic Islands GFX cards.

Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
Reported-by: kernel test robot <[email protected]>
Signed-off-by: Richard Gong <[email protected]>
---
v5: added vi to commit header and updated commit message
rolled back guard with the preprocessor as did in v2 to correct build
error on non-x86 systems
v4: s/CONFIG_X86_64/CONFIG_X86
enhanced check logic
v3: s/intel_core_aspm_chk/aspm_support_quirk_check
correct build error with W=1 option
v2: correct commit description
move the check from chip family to problematic platform
---
drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 039b90cdc3bc..45f0188c4273 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -81,6 +81,10 @@
#include "mxgpu_vi.h"
#include "amdgpu_dm.h"

+#if IS_ENABLED(CONFIG_X86)
+#include <asm/intel-family.h>
+#endif
+
#define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6
#define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK 0x00000001L
#define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK 0x00000002L
@@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
WREG32_PCIE(ixPCIE_LC_CNTL, data);
}

+static bool aspm_support_quirk_check(void)
+{
+#if IS_ENABLED(CONFIG_X86)
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
+#else
+ return true;
+#endif
+}
+
static void vi_program_aspm(struct amdgpu_device *adev)
{
u32 data, data1, orig;
bool bL1SS = false;
bool bClkReqSupport = true;

- if (!amdgpu_device_should_use_aspm(adev))
+ if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
return;

if (adev->flags & AMD_IS_APU ||
--
2.25.1


2022-04-30 14:51:20

by Alex Deucher

[permalink] [raw]
Subject: Re: [PATCHv5] drm/amdgpu: vi: disable ASPM on Intel Alder Lake based systems

Reviewed-by: Alex Deucher <[email protected]>

On Fri, Apr 29, 2022 at 12:08 PM Richard Gong <[email protected]> wrote:
>
> Active State Power Management (ASPM) feature is enabled since kernel 5.14.
> There are some AMD Volcanic Islands (VI) GFX cards, such as the WX3200 and
> RX640, that do not work with ASPM-enabled Intel Alder Lake based systems.
> Using these GFX cards as video/display output, Intel Alder Lake based
> systems will freeze after suspend/resume.
>
> The issue was originally reported on one system (Dell Precision 3660 with
> BIOS version 0.14.81), but was later confirmed to affect at least 4
> pre-production Alder Lake based systems.
>
> Add an extra check to disable ASPM on Intel Alder Lake based systems with
> the problematic AMD Volcanic Islands GFX cards.
>
> Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
> Reported-by: kernel test robot <[email protected]>
> Signed-off-by: Richard Gong <[email protected]>
> ---
> v5: added vi to commit header and updated commit message
> rolled back guard with the preprocessor as did in v2 to correct build
> error on non-x86 systems
> v4: s/CONFIG_X86_64/CONFIG_X86
> enhanced check logic
> v3: s/intel_core_aspm_chk/aspm_support_quirk_check
> correct build error with W=1 option
> v2: correct commit description
> move the check from chip family to problematic platform
> ---
> drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
> 1 file changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 039b90cdc3bc..45f0188c4273 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -81,6 +81,10 @@
> #include "mxgpu_vi.h"
> #include "amdgpu_dm.h"
>
> +#if IS_ENABLED(CONFIG_X86)
> +#include <asm/intel-family.h>
> +#endif
> +
> #define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6
> #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK 0x00000001L
> #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK 0x00000002L
> @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
> WREG32_PCIE(ixPCIE_LC_CNTL, data);
> }
>
> +static bool aspm_support_quirk_check(void)
> +{
> +#if IS_ENABLED(CONFIG_X86)
> + struct cpuinfo_x86 *c = &cpu_data(0);
> +
> + return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
> +#else
> + return true;
> +#endif
> +}
> +
> static void vi_program_aspm(struct amdgpu_device *adev)
> {
> u32 data, data1, orig;
> bool bL1SS = false;
> bool bClkReqSupport = true;
>
> - if (!amdgpu_device_should_use_aspm(adev))
> + if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
> return;
>
> if (adev->flags & AMD_IS_APU ||
> --
> 2.25.1
>

2022-05-02 07:30:23

by Paul Menzel

[permalink] [raw]
Subject: Re: [PATCHv5] drm/amdgpu: vi: disable ASPM on Intel Alder Lake based systems

Dear Richard,


Am 29.04.22 um 18:06 schrieb Richard Gong:
> Active State Power Management (ASPM) feature is enabled since kernel 5.14.
> There are some AMD Volcanic Islands (VI) GFX cards, such as the WX3200 and
> RX640, that do not work with ASPM-enabled Intel Alder Lake based systems.
> Using these GFX cards as video/display output, Intel Alder Lake based
> systems will freeze after suspend/resume.

As replied in v4 just now, “freeze” is misleading if you can still run
`dmesg` after resume.


Kind regards,

Paul


> The issue was originally reported on one system (Dell Precision 3660 with
> BIOS version 0.14.81), but was later confirmed to affect at least 4
> pre-production Alder Lake based systems.
>
> Add an extra check to disable ASPM on Intel Alder Lake based systems with
> the problematic AMD Volcanic Islands GFX cards.
>
> Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
> Reported-by: kernel test robot <[email protected]>
> Signed-off-by: Richard Gong <[email protected]>
> ---
> v5: added vi to commit header and updated commit message
> rolled back guard with the preprocessor as did in v2 to correct build
> error on non-x86 systems
> v4: s/CONFIG_X86_64/CONFIG_X86
> enhanced check logic
> v3: s/intel_core_aspm_chk/aspm_support_quirk_check
> correct build error with W=1 option
> v2: correct commit description
> move the check from chip family to problematic platform
> ---
> drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
> 1 file changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 039b90cdc3bc..45f0188c4273 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -81,6 +81,10 @@
> #include "mxgpu_vi.h"
> #include "amdgpu_dm.h"
>
> +#if IS_ENABLED(CONFIG_X86)
> +#include <asm/intel-family.h>
> +#endif
> +
> #define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6
> #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK 0x00000001L
> #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK 0x00000002L
> @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
> WREG32_PCIE(ixPCIE_LC_CNTL, data);
> }
>
> +static bool aspm_support_quirk_check(void)
> +{
> +#if IS_ENABLED(CONFIG_X86)
> + struct cpuinfo_x86 *c = &cpu_data(0);
> +
> + return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
> +#else
> + return true;
> +#endif
> +}
> +
> static void vi_program_aspm(struct amdgpu_device *adev)
> {
> u32 data, data1, orig;
> bool bL1SS = false;
> bool bClkReqSupport = true;
>
> - if (!amdgpu_device_should_use_aspm(adev))
> + if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
> return;
>
> if (adev->flags & AMD_IS_APU ||

2022-05-02 23:29:35

by Gong, Richard

[permalink] [raw]
Subject: Re: [PATCHv5] drm/amdgpu: vi: disable ASPM on Intel Alder Lake based systems

Hi Paul,

On 5/1/2022 2:14 AM, Paul Menzel wrote:
> Dear Richard,
>
>
> Am 29.04.22 um 18:06 schrieb Richard Gong:
>> Active State Power Management (ASPM) feature is enabled since kernel
>> 5.14.
>> There are some AMD Volcanic Islands (VI) GFX cards, such as the
>> WX3200 and
>> RX640, that do not work with ASPM-enabled Intel Alder Lake based
>> systems.
>> Using these GFX cards as video/display output, Intel Alder Lake based
>> systems will freeze after suspend/resume.
>
> As replied in v4 just now, “freeze” is misleading if you can still run
> `dmesg` after resume.
As my comments in v4, we can't run 'dmesg' when issue occurred. User
have to recycle power to reset the system.
>
>
> Kind regards,
>
> Paul

Regards,

Richard

>
>
>> The issue was originally reported on one system (Dell Precision 3660
>> with
>> BIOS version 0.14.81), but was later confirmed to affect at least 4
>> pre-production Alder Lake based systems.
>>
>> Add an extra check to disable ASPM on Intel Alder Lake based systems
>> with
>> the problematic AMD Volcanic Islands GFX cards.
>>
>> Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
>> Link:
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.freedesktop.org%2Fdrm%2Famd%2F-%2Fissues%2F1885&amp;data=05%7C01%7Crichard.gong%40amd.com%7C78173acb0fe3463fead808da2b423e81%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637869860787352219%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=TK3Ur99Ro4OczgUlCpdod6CrvgGJvNZAyUfpzKEqExw%3D&amp;reserved=0
>> Reported-by: kernel test robot <[email protected]>
>> Signed-off-by: Richard Gong <[email protected]>
>> ---
>> v5: added vi to commit header and updated commit message
>>      rolled back guard with the preprocessor as did in v2 to correct
>> build
>>      error on non-x86 systems
>> v4: s/CONFIG_X86_64/CONFIG_X86
>>      enhanced check logic
>> v3: s/intel_core_aspm_chk/aspm_support_quirk_check
>>      correct build error with W=1 option
>> v2: correct commit description
>>      move the check from chip family to problematic platform
>> ---
>>   drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
>>   1 file changed, 16 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c
>> b/drivers/gpu/drm/amd/amdgpu/vi.c
>> index 039b90cdc3bc..45f0188c4273 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
>> @@ -81,6 +81,10 @@
>>   #include "mxgpu_vi.h"
>>   #include "amdgpu_dm.h"
>>   +#if IS_ENABLED(CONFIG_X86)
>> +#include <asm/intel-family.h>
>> +#endif
>> +
>>   #define ixPCIE_LC_L1_PM_SUBSTATE    0x100100C6
>>   #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK
>> 0x00000001L
>>   #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK
>> 0x00000002L
>> @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct
>> amdgpu_device *adev)
>>           WREG32_PCIE(ixPCIE_LC_CNTL, data);
>>   }
>>   +static bool aspm_support_quirk_check(void)
>> +{
>> +#if IS_ENABLED(CONFIG_X86)
>> +    struct cpuinfo_x86 *c = &cpu_data(0);
>> +
>> +    return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
>> +#else
>> +    return true;
>> +#endif
>> +}
>> +
>>   static void vi_program_aspm(struct amdgpu_device *adev)
>>   {
>>       u32 data, data1, orig;
>>       bool bL1SS = false;
>>       bool bClkReqSupport = true;
>>   -    if (!amdgpu_device_should_use_aspm(adev))
>> +    if (!amdgpu_device_should_use_aspm(adev) ||
>> !aspm_support_quirk_check())
>>           return;
>>         if (adev->flags & AMD_IS_APU ||

2022-05-03 02:16:13

by Mario Limonciello

[permalink] [raw]
Subject: RE: [PATCHv5] drm/amdgpu: vi: disable ASPM on Intel Alder Lake based systems

[Public]



> -----Original Message-----
> From: Alex Deucher <[email protected]>
> Sent: Friday, April 29, 2022 11:14
> To: Gong, Richard <[email protected]>
> Cc: Deucher, Alexander <[email protected]>; Koenig, Christian
> <[email protected]>; Pan, Xinhui <[email protected]>; Dave
> Airlie <[email protected]>; Daniel Vetter <[email protected]>; amd-gfx list <amd-
> [email protected]>; kernel test robot <[email protected]>; LKML <linux-
> [email protected]>; Maling list - DRI developers <dri-
> [email protected]>; Limonciello, Mario
> <[email protected]>
> Subject: Re: [PATCHv5] drm/amdgpu: vi: disable ASPM on Intel Alder Lake
> based systems
>
> Reviewed-by: Alex Deucher <[email protected]>
>
> On Fri, Apr 29, 2022 at 12:08 PM Richard Gong <[email protected]>
> wrote:
> >
> > Active State Power Management (ASPM) feature is enabled since kernel
> 5.14.
> > There are some AMD Volcanic Islands (VI) GFX cards, such as the WX3200
> and
> > RX640, that do not work with ASPM-enabled Intel Alder Lake based
> systems.
> > Using these GFX cards as video/display output, Intel Alder Lake based
> > systems will freeze after suspend/resume.
> >
> > The issue was originally reported on one system (Dell Precision 3660 with
> > BIOS version 0.14.81), but was later confirmed to affect at least 4
> > pre-production Alder Lake based systems.
> >
> > Add an extra check to disable ASPM on Intel Alder Lake based systems with
> > the problematic AMD Volcanic Islands GFX cards.
> >
> > Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
> > Link:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitla
> b.freedesktop.org%2Fdrm%2Famd%2F-
> %2Fissues%2F1885&amp;data=05%7C01%7Cmario.limonciello%40amd.com%
> 7C1fdb6c767a4a4b3f572c08da29fb3f1a%7C3dd8961fe4884e608e11a82d994e1
> 83d%7C0%7C0%7C637868456326825256%7CUnknown%7CTWFpbGZsb3d8eyJ
> WIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%
> 7C3000%7C%7C%7C&amp;sdata=LtV9WqUzB032KFmH2g%2F2BMtX2R6DyfM
> KqxCF1e2rHHg%3D&amp;reserved=0
> > Reported-by: kernel test robot <[email protected]>

You should drop this "Reported-by:". That makes more sense when it's a patch
that is already queued up and committee. The bot just caught an intermediary
revision that wasn't committed anywhere, and it doesn't make sense here.

I don't think you need to resend out a new version for review to change commit
message for that, this can just be fixed up when committing the change.

Reviewed-by: Mario Limonciello <[email protected]>

> > Signed-off-by: Richard Gong <[email protected]>
> > ---
> > v5: added vi to commit header and updated commit message
> > rolled back guard with the preprocessor as did in v2 to correct build
> > error on non-x86 systems
> > v4: s/CONFIG_X86_64/CONFIG_X86
> > enhanced check logic
> > v3: s/intel_core_aspm_chk/aspm_support_quirk_check
> > correct build error with W=1 option
> > v2: correct commit description
> > move the check from chip family to problematic platform
> > ---
> > drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
> > 1 file changed, 16 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c
> b/drivers/gpu/drm/amd/amdgpu/vi.c
> > index 039b90cdc3bc..45f0188c4273 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> > @@ -81,6 +81,10 @@
> > #include "mxgpu_vi.h"
> > #include "amdgpu_dm.h"
> >
> > +#if IS_ENABLED(CONFIG_X86)
> > +#include <asm/intel-family.h>
> > +#endif
> > +
> > #define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6
> > #define
> PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK
> 0x00000001L
> > #define
> PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK
> 0x00000002L
> > @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct
> amdgpu_device *adev)
> > WREG32_PCIE(ixPCIE_LC_CNTL, data);
> > }
> >
> > +static bool aspm_support_quirk_check(void)
> > +{
> > +#if IS_ENABLED(CONFIG_X86)
> > + struct cpuinfo_x86 *c = &cpu_data(0);
> > +
> > + return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
> > +#else
> > + return true;
> > +#endif
> > +}
> > +
> > static void vi_program_aspm(struct amdgpu_device *adev)
> > {
> > u32 data, data1, orig;
> > bool bL1SS = false;
> > bool bClkReqSupport = true;
> >
> > - if (!amdgpu_device_should_use_aspm(adev))
> > + if (!amdgpu_device_should_use_aspm(adev) ||
> !aspm_support_quirk_check())
> > return;
> >
> > if (adev->flags & AMD_IS_APU ||
> > --
> > 2.25.1
> >