2024-04-22 05:27:29

by Qiang Ma

[permalink] [raw]
Subject: [PATCH] drm/amdgpu: Fixup bad vram size on gmc v6 and v7

Some boards(like Oland PRO: 0x1002:0x6613) seem to have
garbage in the upper 16 bits of the vram size register,
kern log as follows:

[ 6.000000] [drm] Detected VRAM RAM=2256537600M, BAR=256M
[ 6.007812] [drm] RAM width 64bits GDDR5
[ 6.031250] [drm] amdgpu: 2256537600M of VRAM memory ready

This is obviously not true, check for this and clamp the size
properly. Fixes boards reporting bogus amounts of vram,
kern log as follows:

[ 2.789062] [drm] Probable bad vram size: 0x86800800
[ 2.789062] [drm] Detected VRAM RAM=2048M, BAR=256M
[ 2.789062] [drm] RAM width 64bits GDDR5
[ 2.789062] [drm] amdgpu: 2048M of VRAM memory ready

Signed-off-by: Qiang Ma <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 11 +++++++++--
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 13 ++++++++++---
2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 23b478639921..3703695f7789 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -309,8 +309,15 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
}
adev->gmc.vram_width = numchan * chansize;
/* size in MB on si */
- adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
- adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+ tmp = RREG32(mmCONFIG_MEMSIZE);
+ /* some boards may have garbage in the upper 16 bits */
+ if (tmp & 0xffff0000) {
+ DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
+ if (tmp & 0xffff)
+ tmp &= 0xffff;
+ }
+ adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;

if (!(adev->flags & AMD_IS_APU)) {
r = amdgpu_device_resize_fb_bar(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 3da7b6a2b00d..1df1fc578ff6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -316,10 +316,10 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
{
int r;
+ u32 tmp;

adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev);
if (!adev->gmc.vram_width) {
- u32 tmp;
int chansize, numchan;

/* Get VRAM informations */
@@ -363,8 +363,15 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
adev->gmc.vram_width = numchan * chansize;
}
/* size in MB on si */
- adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
- adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+ tmp = RREG32(mmCONFIG_MEMSIZE);
+ /* some boards may have garbage in the upper 16 bits */
+ if (tmp & 0xffff0000) {
+ DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
+ if (tmp & 0xffff)
+ tmp &= 0xffff;
+ }
+ adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;

if (!(adev->flags & AMD_IS_APU)) {
r = amdgpu_device_resize_fb_bar(adev);
--
2.20.1



2024-04-22 09:41:06

by Christian König

[permalink] [raw]
Subject: Re: [PATCH] drm/amdgpu: Fixup bad vram size on gmc v6 and v7

Am 22.04.24 um 07:26 schrieb Qiang Ma:
> Some boards(like Oland PRO: 0x1002:0x6613) seem to have
> garbage in the upper 16 bits of the vram size register,
> kern log as follows:
>
> [ 6.000000] [drm] Detected VRAM RAM=2256537600M, BAR=256M
> [ 6.007812] [drm] RAM width 64bits GDDR5
> [ 6.031250] [drm] amdgpu: 2256537600M of VRAM memory ready
>
> This is obviously not true, check for this and clamp the size
> properly. Fixes boards reporting bogus amounts of vram,
> kern log as follows:
>
> [ 2.789062] [drm] Probable bad vram size: 0x86800800
> [ 2.789062] [drm] Detected VRAM RAM=2048M, BAR=256M
> [ 2.789062] [drm] RAM width 64bits GDDR5
> [ 2.789062] [drm] amdgpu: 2048M of VRAM memory ready

Well we had patches like this one here before and so far we always
rejected them.

When the mmCONFIG_MEMSIZE register isn't properly initialized then there
is something wrong with your hardware.

Working around that in the software driver is not going to fly.

Regards,
Christian.

> Signed-off-by: Qiang Ma <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 11 +++++++++--
> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 13 ++++++++++---
> 2 files changed, 19 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> index 23b478639921..3703695f7789 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> @@ -309,8 +309,15 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
> }
> adev->gmc.vram_width = numchan * chansize;
> /* size in MB on si */
> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
> + tmp = RREG32(mmCONFIG_MEMSIZE);
> + /* some boards may have garbage in the upper 16 bits */
> + if (tmp & 0xffff0000) {
> + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
> + if (tmp & 0xffff)
> + tmp &= 0xffff;
> + }
> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
>
> if (!(adev->flags & AMD_IS_APU)) {
> r = amdgpu_device_resize_fb_bar(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 3da7b6a2b00d..1df1fc578ff6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -316,10 +316,10 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
> static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
> {
> int r;
> + u32 tmp;
>
> adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev);
> if (!adev->gmc.vram_width) {
> - u32 tmp;
> int chansize, numchan;
>
> /* Get VRAM informations */
> @@ -363,8 +363,15 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
> adev->gmc.vram_width = numchan * chansize;
> }
> /* size in MB on si */
> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
> + tmp = RREG32(mmCONFIG_MEMSIZE);
> + /* some boards may have garbage in the upper 16 bits */
> + if (tmp & 0xffff0000) {
> + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
> + if (tmp & 0xffff)
> + tmp &= 0xffff;
> + }
> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
>
> if (!(adev->flags & AMD_IS_APU)) {
> r = amdgpu_device_resize_fb_bar(adev);


2024-04-22 12:34:17

by Qiang Ma

[permalink] [raw]
Subject: Re: [PATCH] drm/amdgpu: Fixup bad vram size on gmc v6 and v7

On Mon, 22 Apr 2024 11:40:26 +0200
Christian König <[email protected]> wrote:

> Am 22.04.24 um 07:26 schrieb Qiang Ma:
> > Some boards(like Oland PRO: 0x1002:0x6613) seem to have
> > garbage in the upper 16 bits of the vram size register,
> > kern log as follows:
> >
> > [ 6.000000] [drm] Detected VRAM RAM=2256537600M, BAR=256M
> > [ 6.007812] [drm] RAM width 64bits GDDR5
> > [ 6.031250] [drm] amdgpu: 2256537600M of VRAM memory ready
> >
> > This is obviously not true, check for this and clamp the size
> > properly. Fixes boards reporting bogus amounts of vram,
> > kern log as follows:
> >
> > [ 2.789062] [drm] Probable bad vram size: 0x86800800
> > [ 2.789062] [drm] Detected VRAM RAM=2048M, BAR=256M
> > [ 2.789062] [drm] RAM width 64bits GDDR5
> > [ 2.789062] [drm] amdgpu: 2048M of VRAM memory ready
>
> Well we had patches like this one here before and so far we always
> rejected them.
>
> When the mmCONFIG_MEMSIZE register isn't properly initialized then
> there is something wrong with your hardware.
>
> Working around that in the software driver is not going to fly.
>
> Regards,
> Christian.
>
Hi Christian:
I see that two patches for this issue have been merged, and the
patches are as follows:

11544d77e397 drm/amdgpu: fixup bad vram size on gmc v8
0ca223b029a2 drm/radeon: fixup bad vram size on SI

Qiang Ma

> > Signed-off-by: Qiang Ma <[email protected]>
> > ---
> > drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 11 +++++++++--
> > drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 13 ++++++++++---
> > 2 files changed, 19 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index
> > 23b478639921..3703695f7789 100644 ---
> > a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -309,8 +309,15 @@ static
> > int gmc_v6_0_mc_init(struct amdgpu_device *adev) }
> > adev->gmc.vram_width = numchan * chansize;
> > /* size in MB on si */
> > - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> > 1024ULL * 1024ULL;
> > - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> > 1024ULL * 1024ULL;
> > + tmp = RREG32(mmCONFIG_MEMSIZE);
> > + /* some boards may have garbage in the upper 16 bits */
> > + if (tmp & 0xffff0000) {
> > + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
> > + if (tmp & 0xffff)
> > + tmp &= 0xffff;
> > + }
> > + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
> > + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
> >
> > if (!(adev->flags & AMD_IS_APU)) {
> > r = amdgpu_device_resize_fb_bar(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index
> > 3da7b6a2b00d..1df1fc578ff6 100644 ---
> > a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++
> > b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -316,10 +316,10 @@
> > static void gmc_v7_0_mc_program(struct amdgpu_device *adev) static
> > int gmc_v7_0_mc_init(struct amdgpu_device *adev) {
> > int r;
> > + u32 tmp;
> >
> > adev->gmc.vram_width =
> > amdgpu_atombios_get_vram_width(adev); if (!adev->gmc.vram_width) {
> > - u32 tmp;
> > int chansize, numchan;
> >
> > /* Get VRAM informations */
> > @@ -363,8 +363,15 @@ static int gmc_v7_0_mc_init(struct
> > amdgpu_device *adev) adev->gmc.vram_width = numchan * chansize;
> > }
> > /* size in MB on si */
> > - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> > 1024ULL * 1024ULL;
> > - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> > 1024ULL * 1024ULL;
> > + tmp = RREG32(mmCONFIG_MEMSIZE);
> > + /* some boards may have garbage in the upper 16 bits */
> > + if (tmp & 0xffff0000) {
> > + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
> > + if (tmp & 0xffff)
> > + tmp &= 0xffff;
> > + }
> > + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
> > + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
> >
> > if (!(adev->flags & AMD_IS_APU)) {
> > r = amdgpu_device_resize_fb_bar(adev);
>
>


2024-04-22 13:11:28

by Christian König

[permalink] [raw]
Subject: Re: [PATCH] drm/amdgpu: Fixup bad vram size on gmc v6 and v7

Am 22.04.24 um 14:33 schrieb Qiang Ma:
> On Mon, 22 Apr 2024 11:40:26 +0200
> Christian König <[email protected]> wrote:
>
>> Am 22.04.24 um 07:26 schrieb Qiang Ma:
>>> Some boards(like Oland PRO: 0x1002:0x6613) seem to have
>>> garbage in the upper 16 bits of the vram size register,
>>> kern log as follows:
>>>
>>> [ 6.000000] [drm] Detected VRAM RAM=2256537600M, BAR=256M
>>> [ 6.007812] [drm] RAM width 64bits GDDR5
>>> [ 6.031250] [drm] amdgpu: 2256537600M of VRAM memory ready
>>>
>>> This is obviously not true, check for this and clamp the size
>>> properly. Fixes boards reporting bogus amounts of vram,
>>> kern log as follows:
>>>
>>> [ 2.789062] [drm] Probable bad vram size: 0x86800800
>>> [ 2.789062] [drm] Detected VRAM RAM=2048M, BAR=256M
>>> [ 2.789062] [drm] RAM width 64bits GDDR5
>>> [ 2.789062] [drm] amdgpu: 2048M of VRAM memory ready
>> Well we had patches like this one here before and so far we always
>> rejected them.
>>
>> When the mmCONFIG_MEMSIZE register isn't properly initialized then
>> there is something wrong with your hardware.
>>
>> Working around that in the software driver is not going to fly.
>>
>> Regards,
>> Christian.
>>
> Hi Christian:
> I see that two patches for this issue have been merged, and the
> patches are as follows:
>
> 11544d77e397 drm/amdgpu: fixup bad vram size on gmc v8
> 0ca223b029a2 drm/radeon: fixup bad vram size on SI

Mhm, I remember that we discussed reverting those but it looks like that
never happened. I need to ask around internally.

Question is do you see any other problems with the board? E.g. incorrect
connector or harvesting configuration?

Regards,
Christian.

>
> Qiang Ma
>
>>> Signed-off-by: Qiang Ma <[email protected]>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 11 +++++++++--
>>> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 13 ++++++++++---
>>> 2 files changed, 19 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index
>>> 23b478639921..3703695f7789 100644 ---
>>> a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -309,8 +309,15 @@ static
>>> int gmc_v6_0_mc_init(struct amdgpu_device *adev) }
>>> adev->gmc.vram_width = numchan * chansize;
>>> /* size in MB on si */
>>> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
>>> 1024ULL * 1024ULL;
>>> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
>>> 1024ULL * 1024ULL;
>>> + tmp = RREG32(mmCONFIG_MEMSIZE);
>>> + /* some boards may have garbage in the upper 16 bits */
>>> + if (tmp & 0xffff0000) {
>>> + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
>>> + if (tmp & 0xffff)
>>> + tmp &= 0xffff;
>>> + }
>>> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
>>> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
>>>
>>> if (!(adev->flags & AMD_IS_APU)) {
>>> r = amdgpu_device_resize_fb_bar(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index
>>> 3da7b6a2b00d..1df1fc578ff6 100644 ---
>>> a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -316,10 +316,10 @@
>>> static void gmc_v7_0_mc_program(struct amdgpu_device *adev) static
>>> int gmc_v7_0_mc_init(struct amdgpu_device *adev) {
>>> int r;
>>> + u32 tmp;
>>>
>>> adev->gmc.vram_width =
>>> amdgpu_atombios_get_vram_width(adev); if (!adev->gmc.vram_width) {
>>> - u32 tmp;
>>> int chansize, numchan;
>>>
>>> /* Get VRAM informations */
>>> @@ -363,8 +363,15 @@ static int gmc_v7_0_mc_init(struct
>>> amdgpu_device *adev) adev->gmc.vram_width = numchan * chansize;
>>> }
>>> /* size in MB on si */
>>> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
>>> 1024ULL * 1024ULL;
>>> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
>>> 1024ULL * 1024ULL;
>>> + tmp = RREG32(mmCONFIG_MEMSIZE);
>>> + /* some boards may have garbage in the upper 16 bits */
>>> + if (tmp & 0xffff0000) {
>>> + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
>>> + if (tmp & 0xffff)
>>> + tmp &= 0xffff;
>>> + }
>>> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
>>> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
>>>
>>> if (!(adev->flags & AMD_IS_APU)) {
>>> r = amdgpu_device_resize_fb_bar(adev);
>>


2024-04-22 13:45:53

by Qiang Ma

[permalink] [raw]
Subject: Re: [PATCH] drm/amdgpu: Fixup bad vram size on gmc v6 and v7

On Mon, 22 Apr 2024 14:59:36 +0200
Christian König <[email protected]> wrote:

> Am 22.04.24 um 14:33 schrieb Qiang Ma:
> > On Mon, 22 Apr 2024 11:40:26 +0200
> > Christian König <[email protected]> wrote:
> >
> >> Am 22.04.24 um 07:26 schrieb Qiang Ma:
> >>> Some boards(like Oland PRO: 0x1002:0x6613) seem to have
> >>> garbage in the upper 16 bits of the vram size register,
> >>> kern log as follows:
> >>>
> >>> [ 6.000000] [drm] Detected VRAM RAM=2256537600M, BAR=256M
> >>> [ 6.007812] [drm] RAM width 64bits GDDR5
> >>> [ 6.031250] [drm] amdgpu: 2256537600M of VRAM memory ready
> >>>
> >>> This is obviously not true, check for this and clamp the size
> >>> properly. Fixes boards reporting bogus amounts of vram,
> >>> kern log as follows:
> >>>
> >>> [ 2.789062] [drm] Probable bad vram size: 0x86800800
> >>> [ 2.789062] [drm] Detected VRAM RAM=2048M, BAR=256M
> >>> [ 2.789062] [drm] RAM width 64bits GDDR5
> >>> [ 2.789062] [drm] amdgpu: 2048M of VRAM memory ready
> >> Well we had patches like this one here before and so far we always
> >> rejected them.
> >>
> >> When the mmCONFIG_MEMSIZE register isn't properly initialized then
> >> there is something wrong with your hardware.
> >>
> >> Working around that in the software driver is not going to fly.
> >>
> >> Regards,
> >> Christian.
> >>
> > Hi Christian:
> > I see that two patches for this issue have been merged, and the
> > patches are as follows:
> >
> > 11544d77e397 drm/amdgpu: fixup bad vram size on gmc v8
> > 0ca223b029a2 drm/radeon: fixup bad vram size on SI
>
> Mhm, I remember that we discussed reverting those but it looks like
> that never happened. I need to ask around internally.
>
> Question is do you see any other problems with the board? E.g.
> incorrect connector or harvesting configuration?
>
> Regards,
> Christian.
>

At present, no other problems have been found.
Configured as radeon driver, display is normal.
But this problem is found when I switch to amdgpu driver, and the
startup fails with black screen.
After add the patch, the startup was successful, display is normal.

Qiang Ma

> >
> > Qiang Ma
> >
> >>> Signed-off-by: Qiang Ma <[email protected]>
> >>> ---
> >>> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 11 +++++++++--
> >>> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 13 ++++++++++---
> >>> 2 files changed, 19 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> >>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index
> >>> 23b478639921..3703695f7789 100644 ---
> >>> a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++
> >>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -309,8 +309,15 @@
> >>> static int gmc_v6_0_mc_init(struct amdgpu_device *adev) }
> >>> adev->gmc.vram_width = numchan * chansize;
> >>> /* size in MB on si */
> >>> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> >>> 1024ULL * 1024ULL;
> >>> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> >>> 1024ULL * 1024ULL;
> >>> + tmp = RREG32(mmCONFIG_MEMSIZE);
> >>> + /* some boards may have garbage in the upper 16 bits */
> >>> + if (tmp & 0xffff0000) {
> >>> + DRM_INFO("Probable bad vram size: 0x%08x\n",
> >>> tmp);
> >>> + if (tmp & 0xffff)
> >>> + tmp &= 0xffff;
> >>> + }
> >>> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
> >>> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
> >>>
> >>> if (!(adev->flags & AMD_IS_APU)) {
> >>> r = amdgpu_device_resize_fb_bar(adev);
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> >>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index
> >>> 3da7b6a2b00d..1df1fc578ff6 100644 ---
> >>> a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++
> >>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -316,10 +316,10 @@
> >>> static void gmc_v7_0_mc_program(struct amdgpu_device *adev) static
> >>> int gmc_v7_0_mc_init(struct amdgpu_device *adev) {
> >>> int r;
> >>> + u32 tmp;
> >>>
> >>> adev->gmc.vram_width =
> >>> amdgpu_atombios_get_vram_width(adev); if (!adev->gmc.vram_width) {
> >>> - u32 tmp;
> >>> int chansize, numchan;
> >>>
> >>> /* Get VRAM informations */
> >>> @@ -363,8 +363,15 @@ static int gmc_v7_0_mc_init(struct
> >>> amdgpu_device *adev) adev->gmc.vram_width = numchan * chansize;
> >>> }
> >>> /* size in MB on si */
> >>> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> >>> 1024ULL * 1024ULL;
> >>> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> >>> 1024ULL * 1024ULL;
> >>> + tmp = RREG32(mmCONFIG_MEMSIZE);
> >>> + /* some boards may have garbage in the upper 16 bits */
> >>> + if (tmp & 0xffff0000) {
> >>> + DRM_INFO("Probable bad vram size: 0x%08x\n",
> >>> tmp);
> >>> + if (tmp & 0xffff)
> >>> + tmp &= 0xffff;
> >>> + }
> >>> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
> >>> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
> >>>
> >>> if (!(adev->flags & AMD_IS_APU)) {
> >>> r = amdgpu_device_resize_fb_bar(adev);
> >>
>
>


2024-04-22 14:43:27

by Alex Deucher

[permalink] [raw]
Subject: Re: [PATCH] drm/amdgpu: Fixup bad vram size on gmc v6 and v7

On Mon, Apr 22, 2024 at 9:00 AM Christian König
<[email protected]> wrote:
>
> Am 22.04.24 um 14:33 schrieb Qiang Ma:
> > On Mon, 22 Apr 2024 11:40:26 +0200
> > Christian König <[email protected]> wrote:
> >
> >> Am 22.04.24 um 07:26 schrieb Qiang Ma:
> >>> Some boards(like Oland PRO: 0x1002:0x6613) seem to have
> >>> garbage in the upper 16 bits of the vram size register,
> >>> kern log as follows:
> >>>
> >>> [ 6.000000] [drm] Detected VRAM RAM=2256537600M, BAR=256M
> >>> [ 6.007812] [drm] RAM width 64bits GDDR5
> >>> [ 6.031250] [drm] amdgpu: 2256537600M of VRAM memory ready
> >>>
> >>> This is obviously not true, check for this and clamp the size
> >>> properly. Fixes boards reporting bogus amounts of vram,
> >>> kern log as follows:
> >>>
> >>> [ 2.789062] [drm] Probable bad vram size: 0x86800800
> >>> [ 2.789062] [drm] Detected VRAM RAM=2048M, BAR=256M
> >>> [ 2.789062] [drm] RAM width 64bits GDDR5
> >>> [ 2.789062] [drm] amdgpu: 2048M of VRAM memory ready
> >> Well we had patches like this one here before and so far we always
> >> rejected them.
> >>
> >> When the mmCONFIG_MEMSIZE register isn't properly initialized then
> >> there is something wrong with your hardware.
> >>
> >> Working around that in the software driver is not going to fly.
> >>
> >> Regards,
> >> Christian.
> >>
> > Hi Christian:
> > I see that two patches for this issue have been merged, and the
> > patches are as follows:
> >
> > 11544d77e397 drm/amdgpu: fixup bad vram size on gmc v8
> > 0ca223b029a2 drm/radeon: fixup bad vram size on SI
>
> Mhm, I remember that we discussed reverting those but it looks like that
> never happened. I need to ask around internally.
>
> Question is do you see any other problems with the board? E.g. incorrect
> connector or harvesting configuration?

I'll need to dig up the past discussion again, but IIRC, the issue was
only seen on some non-x86 platforms. Maybe something specific to MMIO
on those?

Alex


>
> Regards,
> Christian.
>
> >
> > Qiang Ma
> >
> >>> Signed-off-by: Qiang Ma <[email protected]>
> >>> ---
> >>> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 11 +++++++++--
> >>> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 13 ++++++++++---
> >>> 2 files changed, 19 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> >>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index
> >>> 23b478639921..3703695f7789 100644 ---
> >>> a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++
> >>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -309,8 +309,15 @@ static
> >>> int gmc_v6_0_mc_init(struct amdgpu_device *adev) }
> >>> adev->gmc.vram_width = numchan * chansize;
> >>> /* size in MB on si */
> >>> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> >>> 1024ULL * 1024ULL;
> >>> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> >>> 1024ULL * 1024ULL;
> >>> + tmp = RREG32(mmCONFIG_MEMSIZE);
> >>> + /* some boards may have garbage in the upper 16 bits */
> >>> + if (tmp & 0xffff0000) {
> >>> + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
> >>> + if (tmp & 0xffff)
> >>> + tmp &= 0xffff;
> >>> + }
> >>> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
> >>> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
> >>>
> >>> if (!(adev->flags & AMD_IS_APU)) {
> >>> r = amdgpu_device_resize_fb_bar(adev);
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> >>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index
> >>> 3da7b6a2b00d..1df1fc578ff6 100644 ---
> >>> a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++
> >>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -316,10 +316,10 @@
> >>> static void gmc_v7_0_mc_program(struct amdgpu_device *adev) static
> >>> int gmc_v7_0_mc_init(struct amdgpu_device *adev) {
> >>> int r;
> >>> + u32 tmp;
> >>>
> >>> adev->gmc.vram_width =
> >>> amdgpu_atombios_get_vram_width(adev); if (!adev->gmc.vram_width) {
> >>> - u32 tmp;
> >>> int chansize, numchan;
> >>>
> >>> /* Get VRAM informations */
> >>> @@ -363,8 +363,15 @@ static int gmc_v7_0_mc_init(struct
> >>> amdgpu_device *adev) adev->gmc.vram_width = numchan * chansize;
> >>> }
> >>> /* size in MB on si */
> >>> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> >>> 1024ULL * 1024ULL;
> >>> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> >>> 1024ULL * 1024ULL;
> >>> + tmp = RREG32(mmCONFIG_MEMSIZE);
> >>> + /* some boards may have garbage in the upper 16 bits */
> >>> + if (tmp & 0xffff0000) {
> >>> + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
> >>> + if (tmp & 0xffff)
> >>> + tmp &= 0xffff;
> >>> + }
> >>> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
> >>> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
> >>>
> >>> if (!(adev->flags & AMD_IS_APU)) {
> >>> r = amdgpu_device_resize_fb_bar(adev);
> >>
>

2024-04-22 14:48:10

by Christian König

[permalink] [raw]
Subject: Re: [PATCH] drm/amdgpu: Fixup bad vram size on gmc v6 and v7

Am 22.04.24 um 16:40 schrieb Alex Deucher:
> On Mon, Apr 22, 2024 at 9:00 AM Christian König
> <[email protected]> wrote:
>> Am 22.04.24 um 14:33 schrieb Qiang Ma:
>>> On Mon, 22 Apr 2024 11:40:26 +0200
>>> Christian König <[email protected]> wrote:
>>>
>>>> Am 22.04.24 um 07:26 schrieb Qiang Ma:
>>>>> Some boards(like Oland PRO: 0x1002:0x6613) seem to have
>>>>> garbage in the upper 16 bits of the vram size register,
>>>>> kern log as follows:
>>>>>
>>>>> [ 6.000000] [drm] Detected VRAM RAM=2256537600M, BAR=256M
>>>>> [ 6.007812] [drm] RAM width 64bits GDDR5
>>>>> [ 6.031250] [drm] amdgpu: 2256537600M of VRAM memory ready
>>>>>
>>>>> This is obviously not true, check for this and clamp the size
>>>>> properly. Fixes boards reporting bogus amounts of vram,
>>>>> kern log as follows:
>>>>>
>>>>> [ 2.789062] [drm] Probable bad vram size: 0x86800800
>>>>> [ 2.789062] [drm] Detected VRAM RAM=2048M, BAR=256M
>>>>> [ 2.789062] [drm] RAM width 64bits GDDR5
>>>>> [ 2.789062] [drm] amdgpu: 2048M of VRAM memory ready
>>>> Well we had patches like this one here before and so far we always
>>>> rejected them.
>>>>
>>>> When the mmCONFIG_MEMSIZE register isn't properly initialized then
>>>> there is something wrong with your hardware.
>>>>
>>>> Working around that in the software driver is not going to fly.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>> Hi Christian:
>>> I see that two patches for this issue have been merged, and the
>>> patches are as follows:
>>>
>>> 11544d77e397 drm/amdgpu: fixup bad vram size on gmc v8
>>> 0ca223b029a2 drm/radeon: fixup bad vram size on SI
>> Mhm, I remember that we discussed reverting those but it looks like that
>> never happened. I need to ask around internally.
>>
>> Question is do you see any other problems with the board? E.g. incorrect
>> connector or harvesting configuration?
> I'll need to dig up the past discussion again, but IIRC, the issue was
> only seen on some non-x86 platforms. Maybe something specific to MMIO
> on those?

I honestly doesn't remember it either, but in general it's the job of
the VBIOS to init this register.

So if we see the upper bits mangled the VBIOS hasn't done that correctly
and it's quite likely that this is only the tip of the iceberg of problems.

Christian.

>
> Alex
>
>
>> Regards,
>> Christian.
>>
>>> Qiang Ma
>>>
>>>>> Signed-off-by: Qiang Ma <[email protected]>
>>>>> ---
>>>>> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 11 +++++++++--
>>>>> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 13 ++++++++++---
>>>>> 2 files changed, 19 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index
>>>>> 23b478639921..3703695f7789 100644 ---
>>>>> a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++
>>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -309,8 +309,15 @@ static
>>>>> int gmc_v6_0_mc_init(struct amdgpu_device *adev) }
>>>>> adev->gmc.vram_width = numchan * chansize;
>>>>> /* size in MB on si */
>>>>> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
>>>>> 1024ULL * 1024ULL;
>>>>> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
>>>>> 1024ULL * 1024ULL;
>>>>> + tmp = RREG32(mmCONFIG_MEMSIZE);
>>>>> + /* some boards may have garbage in the upper 16 bits */
>>>>> + if (tmp & 0xffff0000) {
>>>>> + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
>>>>> + if (tmp & 0xffff)
>>>>> + tmp &= 0xffff;
>>>>> + }
>>>>> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
>>>>> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
>>>>>
>>>>> if (!(adev->flags & AMD_IS_APU)) {
>>>>> r = amdgpu_device_resize_fb_bar(adev);
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index
>>>>> 3da7b6a2b00d..1df1fc578ff6 100644 ---
>>>>> a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++
>>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -316,10 +316,10 @@
>>>>> static void gmc_v7_0_mc_program(struct amdgpu_device *adev) static
>>>>> int gmc_v7_0_mc_init(struct amdgpu_device *adev) {
>>>>> int r;
>>>>> + u32 tmp;
>>>>>
>>>>> adev->gmc.vram_width =
>>>>> amdgpu_atombios_get_vram_width(adev); if (!adev->gmc.vram_width) {
>>>>> - u32 tmp;
>>>>> int chansize, numchan;
>>>>>
>>>>> /* Get VRAM informations */
>>>>> @@ -363,8 +363,15 @@ static int gmc_v7_0_mc_init(struct
>>>>> amdgpu_device *adev) adev->gmc.vram_width = numchan * chansize;
>>>>> }
>>>>> /* size in MB on si */
>>>>> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
>>>>> 1024ULL * 1024ULL;
>>>>> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
>>>>> 1024ULL * 1024ULL;
>>>>> + tmp = RREG32(mmCONFIG_MEMSIZE);
>>>>> + /* some boards may have garbage in the upper 16 bits */
>>>>> + if (tmp & 0xffff0000) {
>>>>> + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
>>>>> + if (tmp & 0xffff)
>>>>> + tmp &= 0xffff;
>>>>> + }
>>>>> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
>>>>> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
>>>>>
>>>>> if (!(adev->flags & AMD_IS_APU)) {
>>>>> r = amdgpu_device_resize_fb_bar(adev);


2024-04-24 02:35:27

by Qiang Ma

[permalink] [raw]
Subject: Re: [PATCH] drm/amdgpu: Fixup bad vram size on gmc v6 and v7

On Mon, 22 Apr 2024 16:47:36 +0200
Christian König <[email protected]> wrote:

> Am 22.04.24 um 16:40 schrieb Alex Deucher:
> > On Mon, Apr 22, 2024 at 9:00 AM Christian König
> > <[email protected]> wrote:
> >> Am 22.04.24 um 14:33 schrieb Qiang Ma:
> >>> On Mon, 22 Apr 2024 11:40:26 +0200
> >>> Christian König <[email protected]> wrote:
> >>>
> >>>> Am 22.04.24 um 07:26 schrieb Qiang Ma:
> >>>>> Some boards(like Oland PRO: 0x1002:0x6613) seem to have
> >>>>> garbage in the upper 16 bits of the vram size register,
> >>>>> kern log as follows:
> >>>>>
> >>>>> [ 6.000000] [drm] Detected VRAM RAM=2256537600M, BAR=256M
> >>>>> [ 6.007812] [drm] RAM width 64bits GDDR5
> >>>>> [ 6.031250] [drm] amdgpu: 2256537600M of VRAM memory ready
> >>>>>
> >>>>> This is obviously not true, check for this and clamp the size
> >>>>> properly. Fixes boards reporting bogus amounts of vram,
> >>>>> kern log as follows:
> >>>>>
> >>>>> [ 2.789062] [drm] Probable bad vram size: 0x86800800
> >>>>> [ 2.789062] [drm] Detected VRAM RAM=2048M, BAR=256M
> >>>>> [ 2.789062] [drm] RAM width 64bits GDDR5
> >>>>> [ 2.789062] [drm] amdgpu: 2048M of VRAM memory ready
> >>>> Well we had patches like this one here before and so far we
> >>>> always rejected them.
> >>>>
> >>>> When the mmCONFIG_MEMSIZE register isn't properly initialized
> >>>> then there is something wrong with your hardware.
> >>>>
> >>>> Working around that in the software driver is not going to fly.
> >>>>
> >>>> Regards,
> >>>> Christian.
> >>>>
> >>> Hi Christian:
> >>> I see that two patches for this issue have been merged, and the
> >>> patches are as follows:
> >>>
> >>> 11544d77e397 drm/amdgpu: fixup bad vram size on gmc v8
> >>> 0ca223b029a2 drm/radeon: fixup bad vram size on SI
> >> Mhm, I remember that we discussed reverting those but it looks
> >> like that never happened. I need to ask around internally.
> >>
> >> Question is do you see any other problems with the board? E.g.
> >> incorrect connector or harvesting configuration?
> > I'll need to dig up the past discussion again, but IIRC, the issue
> > was only seen on some non-x86 platforms. Maybe something specific
> > to MMIO on those?
>
> I honestly doesn't remember it either, but in general it's the job of
> the VBIOS to init this register.
>
> So if we see the upper bits mangled the VBIOS hasn't done that
> correctly and it's quite likely that this is only the tip of the
> iceberg of problems.
>

Yes, this problem was found on the mips notebook. Check log vbios
signature incorrect, I don't know if it has any influence.
After add patch, drm logs are as follows:

[ 2.554687] [drm] amdgpu kernel modesetting enabled.
[ 2.558593] [drm] initializing kernel modesetting (OLAND
0x1002:0x6613 0x1028:0x1002 0x00). [ 2.558593] [drm] register mmio
base: 0x45400000 [ 2.558593] [drm] register mmio size: 262144
[ 2.558593] [drm] add ip block number 0 <si_common>
[ 2.558593] [drm] add ip block number 1 <gmc_v6_0>
[ 2.558593] [drm] add ip block number 2 <si_ih>
[ 2.558593] [drm] add ip block number 3 <si_dpm>
[ 2.558593] [drm] add ip block number 4 <dce_v6_0>
[ 2.558593] [drm] add ip block number 5 <gfx_v6_0>
[ 2.558593] [drm] add ip block number 6 <si_dma>
[ 2.578125] [drm] BIOS signature incorrect 0 0
[ 2.746093] [drm] vm size is 64 GB, 2 levels, block size is 10-bit,
fragment size is 9-bit [ 2.746093] [drm] Probable bad vram size:
0x86800800 [ 2.746093] [drm] Detected VRAM RAM=2048M, BAR=256M
[ 2.746093] [drm] RAM width 64bits GDDR5
[ 2.750000] [drm] amdgpu: 2048M of VRAM memory ready
[ 2.750000] [drm] amdgpu: 3072M of GTT memory ready.
[ 2.750000] [drm] GART: num cpu pages 65536, num gpu pages 262144
[ 2.750000] [drm] Supports vblank timestamp caching Rev 2
(21.10.2013). [ 2.750000] [drm] Driver supports precise vblank
timestamp query. [ 2.750000] [drm] Internal thermal controller with
fan control [ 2.750000] [drm] amdgpu: dpm initialized
[ 2.750000] [drm] amdgpu atom DIG backlight initialized
[ 2.750000] [drm] AMDGPU Display Connectors
[ 2.750000] [drm] Connector 0:
[ 2.750000] [drm] DP-1
[ 2.750000] [drm] HPD2
[ 2.750000] [drm] DDC: 0x1950 0x1950 0x1951 0x1951 0x1952 0x1952
0x1953 0x1953 [ 2.750000] [drm] Encoders:
[ 2.750000] [drm] LCD1: INTERNAL_UNIPHY
[ 2.750000] [drm] Connector 1:
[ 2.750000] [drm] HDMI-A-1
[ 2.750000] [drm] HPD1
[ 2.750000] [drm] DDC: 0x194c 0x194c 0x194d 0x194d 0x194e 0x194e
0x194f 0x194f [ 2.750000] [drm] Encoders:
[ 2.750000] [drm] DFP1: INTERNAL_UNIPHY
[ 2.753906] [drm] PCIE gen 2 link speeds already enabled
[ 3.507812] [drm] fb mappable at 0x503E4000
[ 3.507812] [drm] vram apper at 0x50000000
[ 3.507812] [drm] size 8306688
[ 3.507812] [drm] fb depth is 24
[ 3.507812] [drm] pitch is 7680
[ 4.632812] amdgpu 0000:05:00.0: fb0: amdgpudrmfb frame buffer device
[ 5.023437] [drm] Initialized amdgpu 3.27.0 20150101 for
0000:05:00.0 on minor 0


Qiang Ma

> Christian.
>
> >
> > Alex
> >
> >
> >> Regards,
> >> Christian.
> >>
> >>> Qiang Ma
> >>>
> >>>>> Signed-off-by: Qiang Ma <[email protected]>
> >>>>> ---
> >>>>> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 11 +++++++++--
> >>>>> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 13 ++++++++++---
> >>>>> 2 files changed, 19 insertions(+), 5 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> >>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index
> >>>>> 23b478639921..3703695f7789 100644 ---
> >>>>> a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++
> >>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -309,8 +309,15 @@
> >>>>> static int gmc_v6_0_mc_init(struct amdgpu_device *adev) }
> >>>>> adev->gmc.vram_width = numchan * chansize;
> >>>>> /* size in MB on si */
> >>>>> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> >>>>> 1024ULL * 1024ULL;
> >>>>> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> >>>>> 1024ULL * 1024ULL;
> >>>>> + tmp = RREG32(mmCONFIG_MEMSIZE);
> >>>>> + /* some boards may have garbage in the upper 16 bits */
> >>>>> + if (tmp & 0xffff0000) {
> >>>>> + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
> >>>>> + if (tmp & 0xffff)
> >>>>> + tmp &= 0xffff;
> >>>>> + }
> >>>>> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
> >>>>> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
> >>>>>
> >>>>> if (!(adev->flags & AMD_IS_APU)) {
> >>>>> r = amdgpu_device_resize_fb_bar(adev);
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> >>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index
> >>>>> 3da7b6a2b00d..1df1fc578ff6 100644 ---
> >>>>> a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++
> >>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -316,10 +316,10 @@
> >>>>> static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
> >>>>> static int gmc_v7_0_mc_init(struct amdgpu_device *adev) {
> >>>>> int r;
> >>>>> + u32 tmp;
> >>>>>
> >>>>> adev->gmc.vram_width =
> >>>>> amdgpu_atombios_get_vram_width(adev); if
> >>>>> (!adev->gmc.vram_width) {
> >>>>> - u32 tmp;
> >>>>> int chansize, numchan;
> >>>>>
> >>>>> /* Get VRAM informations */
> >>>>> @@ -363,8 +363,15 @@ static int gmc_v7_0_mc_init(struct
> >>>>> amdgpu_device *adev) adev->gmc.vram_width = numchan * chansize;
> >>>>> }
> >>>>> /* size in MB on si */
> >>>>> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> >>>>> 1024ULL * 1024ULL;
> >>>>> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> >>>>> 1024ULL * 1024ULL;
> >>>>> + tmp = RREG32(mmCONFIG_MEMSIZE);
> >>>>> + /* some boards may have garbage in the upper 16 bits */
> >>>>> + if (tmp & 0xffff0000) {
> >>>>> + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
> >>>>> + if (tmp & 0xffff)
> >>>>> + tmp &= 0xffff;
> >>>>> + }
> >>>>> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
> >>>>> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
> >>>>>
> >>>>> if (!(adev->flags & AMD_IS_APU)) {
> >>>>> r = amdgpu_device_resize_fb_bar(adev);
>
>


2024-04-24 02:52:32

by Alex Deucher

[permalink] [raw]
Subject: Re: [PATCH] drm/amdgpu: Fixup bad vram size on gmc v6 and v7

On Tue, Apr 23, 2024 at 10:30 PM Qiang Ma <[email protected]> wrote:
>
> On Mon, 22 Apr 2024 16:47:36 +0200
> Christian König <[email protected]> wrote:
>
> > Am 22.04.24 um 16:40 schrieb Alex Deucher:
> > > On Mon, Apr 22, 2024 at 9:00 AM Christian König
> > > <[email protected]> wrote:
> > >> Am 22.04.24 um 14:33 schrieb Qiang Ma:
> > >>> On Mon, 22 Apr 2024 11:40:26 +0200
> > >>> Christian König <[email protected]> wrote:
> > >>>
> > >>>> Am 22.04.24 um 07:26 schrieb Qiang Ma:
> > >>>>> Some boards(like Oland PRO: 0x1002:0x6613) seem to have
> > >>>>> garbage in the upper 16 bits of the vram size register,
> > >>>>> kern log as follows:
> > >>>>>
> > >>>>> [ 6.000000] [drm] Detected VRAM RAM=2256537600M, BAR=256M
> > >>>>> [ 6.007812] [drm] RAM width 64bits GDDR5
> > >>>>> [ 6.031250] [drm] amdgpu: 2256537600M of VRAM memory ready
> > >>>>>
> > >>>>> This is obviously not true, check for this and clamp the size
> > >>>>> properly. Fixes boards reporting bogus amounts of vram,
> > >>>>> kern log as follows:
> > >>>>>
> > >>>>> [ 2.789062] [drm] Probable bad vram size: 0x86800800
> > >>>>> [ 2.789062] [drm] Detected VRAM RAM=2048M, BAR=256M
> > >>>>> [ 2.789062] [drm] RAM width 64bits GDDR5
> > >>>>> [ 2.789062] [drm] amdgpu: 2048M of VRAM memory ready
> > >>>> Well we had patches like this one here before and so far we
> > >>>> always rejected them.
> > >>>>
> > >>>> When the mmCONFIG_MEMSIZE register isn't properly initialized
> > >>>> then there is something wrong with your hardware.
> > >>>>
> > >>>> Working around that in the software driver is not going to fly.
> > >>>>
> > >>>> Regards,
> > >>>> Christian.
> > >>>>
> > >>> Hi Christian:
> > >>> I see that two patches for this issue have been merged, and the
> > >>> patches are as follows:
> > >>>
> > >>> 11544d77e397 drm/amdgpu: fixup bad vram size on gmc v8
> > >>> 0ca223b029a2 drm/radeon: fixup bad vram size on SI
> > >> Mhm, I remember that we discussed reverting those but it looks
> > >> like that never happened. I need to ask around internally.
> > >>
> > >> Question is do you see any other problems with the board? E.g.
> > >> incorrect connector or harvesting configuration?
> > > I'll need to dig up the past discussion again, but IIRC, the issue
> > > was only seen on some non-x86 platforms. Maybe something specific
> > > to MMIO on those?
> >
> > I honestly doesn't remember it either, but in general it's the job of
> > the VBIOS to init this register.
> >
> > So if we see the upper bits mangled the VBIOS hasn't done that
> > correctly and it's quite likely that this is only the tip of the
> > iceberg of problems.
> >
>
> Yes, this problem was found on the mips notebook. Check log vbios
> signature incorrect, I don't know if it has any influence.
> After add patch, drm logs are as follows:

IIRC, the patches for the other generations were on similar less
mainstream platforms. I've never seen anything like this on x86. I
suspect this is something platform specific. Does this platform have
an x86 real mode emulator to post the GPU? If so, I wonder if the
problem lies somewhere in there? Can you disable that or test with a
secondary board which does not post the GPU? The driver can post it
itself. Another thing to try would be to call the atombios asic_init
sequence and check if the value is correctly written in that case.
The asic_init sequence should write the mem config register as part of
the sequence. Since asic_init is just a programming sequence stored
in the vbios (and interpreted by the driver), you should be able to
audit it to find out what is going wrong. You can use atomdis
(https://cgit.freedesktop.org/~mhopf/AtomDis/) to print out the
programming sequences in the atom command tables.

Alex


>
> [ 2.554687] [drm] amdgpu kernel modesetting enabled.
> [ 2.558593] [drm] initializing kernel modesetting (OLAND
> 0x1002:0x6613 0x1028:0x1002 0x00). [ 2.558593] [drm] register mmio
> base: 0x45400000 [ 2.558593] [drm] register mmio size: 262144
> [ 2.558593] [drm] add ip block number 0 <si_common>
> [ 2.558593] [drm] add ip block number 1 <gmc_v6_0>
> [ 2.558593] [drm] add ip block number 2 <si_ih>
> [ 2.558593] [drm] add ip block number 3 <si_dpm>
> [ 2.558593] [drm] add ip block number 4 <dce_v6_0>
> [ 2.558593] [drm] add ip block number 5 <gfx_v6_0>
> [ 2.558593] [drm] add ip block number 6 <si_dma>
> [ 2.578125] [drm] BIOS signature incorrect 0 0
> [ 2.746093] [drm] vm size is 64 GB, 2 levels, block size is 10-bit,
> fragment size is 9-bit [ 2.746093] [drm] Probable bad vram size:
> 0x86800800 [ 2.746093] [drm] Detected VRAM RAM=2048M, BAR=256M
> [ 2.746093] [drm] RAM width 64bits GDDR5
> [ 2.750000] [drm] amdgpu: 2048M of VRAM memory ready
> [ 2.750000] [drm] amdgpu: 3072M of GTT memory ready.
> [ 2.750000] [drm] GART: num cpu pages 65536, num gpu pages 262144
> [ 2.750000] [drm] Supports vblank timestamp caching Rev 2
> (21.10.2013). [ 2.750000] [drm] Driver supports precise vblank
> timestamp query. [ 2.750000] [drm] Internal thermal controller with
> fan control [ 2.750000] [drm] amdgpu: dpm initialized
> [ 2.750000] [drm] amdgpu atom DIG backlight initialized
> [ 2.750000] [drm] AMDGPU Display Connectors
> [ 2.750000] [drm] Connector 0:
> [ 2.750000] [drm] DP-1
> [ 2.750000] [drm] HPD2
> [ 2.750000] [drm] DDC: 0x1950 0x1950 0x1951 0x1951 0x1952 0x1952
> 0x1953 0x1953 [ 2.750000] [drm] Encoders:
> [ 2.750000] [drm] LCD1: INTERNAL_UNIPHY
> [ 2.750000] [drm] Connector 1:
> [ 2.750000] [drm] HDMI-A-1
> [ 2.750000] [drm] HPD1
> [ 2.750000] [drm] DDC: 0x194c 0x194c 0x194d 0x194d 0x194e 0x194e
> 0x194f 0x194f [ 2.750000] [drm] Encoders:
> [ 2.750000] [drm] DFP1: INTERNAL_UNIPHY
> [ 2.753906] [drm] PCIE gen 2 link speeds already enabled
> [ 3.507812] [drm] fb mappable at 0x503E4000
> [ 3.507812] [drm] vram apper at 0x50000000
> [ 3.507812] [drm] size 8306688
> [ 3.507812] [drm] fb depth is 24
> [ 3.507812] [drm] pitch is 7680
> [ 4.632812] amdgpu 0000:05:00.0: fb0: amdgpudrmfb frame buffer device
> [ 5.023437] [drm] Initialized amdgpu 3.27.0 20150101 for
> 0000:05:00.0 on minor 0
>
>
> Qiang Ma
>
> > Christian.
> >
> > >
> > > Alex
> > >
> > >
> > >> Regards,
> > >> Christian.
> > >>
> > >>> Qiang Ma
> > >>>
> > >>>>> Signed-off-by: Qiang Ma <[email protected]>
> > >>>>> ---
> > >>>>> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 11 +++++++++--
> > >>>>> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 13 ++++++++++---
> > >>>>> 2 files changed, 19 insertions(+), 5 deletions(-)
> > >>>>>
> > >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> > >>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index
> > >>>>> 23b478639921..3703695f7789 100644 ---
> > >>>>> a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++
> > >>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -309,8 +309,15 @@
> > >>>>> static int gmc_v6_0_mc_init(struct amdgpu_device *adev) }
> > >>>>> adev->gmc.vram_width = numchan * chansize;
> > >>>>> /* size in MB on si */
> > >>>>> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> > >>>>> 1024ULL * 1024ULL;
> > >>>>> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> > >>>>> 1024ULL * 1024ULL;
> > >>>>> + tmp = RREG32(mmCONFIG_MEMSIZE);
> > >>>>> + /* some boards may have garbage in the upper 16 bits */
> > >>>>> + if (tmp & 0xffff0000) {
> > >>>>> + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
> > >>>>> + if (tmp & 0xffff)
> > >>>>> + tmp &= 0xffff;
> > >>>>> + }
> > >>>>> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
> > >>>>> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
> > >>>>>
> > >>>>> if (!(adev->flags & AMD_IS_APU)) {
> > >>>>> r = amdgpu_device_resize_fb_bar(adev);
> > >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> > >>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index
> > >>>>> 3da7b6a2b00d..1df1fc578ff6 100644 ---
> > >>>>> a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++
> > >>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -316,10 +316,10 @@
> > >>>>> static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
> > >>>>> static int gmc_v7_0_mc_init(struct amdgpu_device *adev) {
> > >>>>> int r;
> > >>>>> + u32 tmp;
> > >>>>>
> > >>>>> adev->gmc.vram_width =
> > >>>>> amdgpu_atombios_get_vram_width(adev); if
> > >>>>> (!adev->gmc.vram_width) {
> > >>>>> - u32 tmp;
> > >>>>> int chansize, numchan;
> > >>>>>
> > >>>>> /* Get VRAM informations */
> > >>>>> @@ -363,8 +363,15 @@ static int gmc_v7_0_mc_init(struct
> > >>>>> amdgpu_device *adev) adev->gmc.vram_width = numchan * chansize;
> > >>>>> }
> > >>>>> /* size in MB on si */
> > >>>>> - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> > >>>>> 1024ULL * 1024ULL;
> > >>>>> - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) *
> > >>>>> 1024ULL * 1024ULL;
> > >>>>> + tmp = RREG32(mmCONFIG_MEMSIZE);
> > >>>>> + /* some boards may have garbage in the upper 16 bits */
> > >>>>> + if (tmp & 0xffff0000) {
> > >>>>> + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
> > >>>>> + if (tmp & 0xffff)
> > >>>>> + tmp &= 0xffff;
> > >>>>> + }
> > >>>>> + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
> > >>>>> + adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
> > >>>>>
> > >>>>> if (!(adev->flags & AMD_IS_APU)) {
> > >>>>> r = amdgpu_device_resize_fb_bar(adev);
> >
> >
>