2010-12-20 18:12:27

by Arnd Bergmann

[permalink] [raw]
Subject: [BISECTED] agp/intel: revert "Remove confusion of stolen entries not stolen memory"

Commit c64f7ba5f10 "agp/intel: Remove confusion of stolen entries not stolen memory" caused a regression on my Intel G45 based system, using the VESA
Xorg driver or uvesafb. I have not tried if i915 with KMS shows the same
behaviour, but can try if necessary.

Reverting the patch on Friday's linux-next restores the normal behaviour
and lets me use X again.

Signed-off-by: Arnd Bergmann <[email protected]>
Cc: Chris Wilson <[email protected]>

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 356f73e..7812a3a 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -342,12 +342,13 @@ static const struct aper_size_info_fixed intel_fake_agp_sizes[] = {
{512, 131072, 7},
};

-static unsigned int intel_gtt_stolen_size(void)
+static unsigned int intel_gtt_stolen_entries(void)
{
u16 gmch_ctrl;
u8 rdct;
int local = 0;
static const int ddt[4] = { 0, 16, 32, 64 };
+ unsigned int overhead_entries;
unsigned int stolen_size = 0;

if (INTEL_GTT_GEN == 1)
@@ -356,6 +357,14 @@ static unsigned int intel_gtt_stolen_size(void)
pci_read_config_word(intel_private.bridge_dev,
I830_GMCH_CTRL, &gmch_ctrl);

+ if (INTEL_GTT_GEN > 4 || IS_PINEVIEW)
+ overhead_entries = 0;
+ else
+ overhead_entries = intel_private.base.gtt_mappable_entries
+ / 1024;
+
+ overhead_entries += 1; /* BIOS popup */
+
if (intel_private.bridge_dev->device == PCI_DEVICE_ID_INTEL_82830_HB ||
intel_private.bridge_dev->device == PCI_DEVICE_ID_INTEL_82845G_HB) {
switch (gmch_ctrl & I830_GMCH_GMS_MASK) {
@@ -490,7 +499,7 @@ static unsigned int intel_gtt_stolen_size(void)
stolen_size = 0;
}

- return stolen_size;
+ return stolen_size/KB(4) - overhead_entries;
}

static void i965_adjust_pgetbl_size(unsigned int size_flag)
@@ -686,7 +695,7 @@ static int intel_gtt_init(void)

global_cache_flush(); /* FIXME: ? */

- intel_private.base.stolen_size = intel_gtt_stolen_size();
+ intel_private.base.gtt_stolen_entries = intel_gtt_stolen_entries();

ret = intel_gtt_setup_scratch_page();
if (ret != 0) {
@@ -867,7 +876,8 @@ static int intel_fake_agp_configure(void)

agp_bridge->gart_bus_addr = intel_private.gma_bus_addr;

- for (i = 0; i < intel_private.base.gtt_total_entries; i++) {
+ for (i = intel_private.base.gtt_stolen_entries;
+ i < intel_private.base.gtt_total_entries; i++) {
intel_private.driver->write_entry(intel_private.scratch_page_dma,
i, 0);
}
@@ -942,7 +952,17 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
if (mem->page_count == 0)
goto out;

- if (pg_start + mem->page_count > intel_private.base.gtt_total_entries)
+ if (pg_start < intel_private.base.gtt_stolen_entries) {
+ dev_printk(KERN_DEBUG, &intel_private.pcidev->dev,
+ "pg_start == 0x%.8lx, gtt_stolen_entries == 0x%.8x\n",
+ pg_start, intel_private.base.gtt_stolen_entries);
+
+ dev_info(&intel_private.pcidev->dev,
+ "trying to insert into local/stolen memory\n");
+ goto out_err;
+ }
+
+ if ((pg_start + mem->page_count) > intel_private.base.gtt_total_entries)
goto out_err;

if (type != mem->type)
@@ -991,6 +1011,12 @@ static int intel_fake_agp_remove_entries(struct agp_memory *mem,
if (mem->page_count == 0)
return 0;

+ if (pg_start < intel_private.base.gtt_stolen_entries) {
+ dev_info(&intel_private.pcidev->dev,
+ "trying to disable local/stolen memory\n");
+ return -EINVAL;
+ }
+
if (intel_private.base.needs_dmar) {
intel_gtt_unmap_memory(mem->sg_list, mem->num_sg);
mem->sg_list = NULL;
@@ -1489,7 +1515,7 @@ int intel_gmch_probe(struct pci_dev *pdev,
}
EXPORT_SYMBOL(intel_gmch_probe);

-const struct intel_gtt *intel_gtt_get(void)
+struct intel_gtt *intel_gtt_get(void)
{
return &intel_private.base;
}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 3f7b203..1655c39 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1049,7 +1049,7 @@ static unsigned long i915_stolen_to_phys(struct drm_device *dev, u32 offset)
pci_read_config_byte(pdev, 0x9c, &val);
base = val >> 3 << 27;
}
- base -= dev_priv->mm.gtt->stolen_size;
+ base -= dev_priv->mm.gtt->gtt_stolen_entries << PAGE_SHIFT;
#endif

return base + offset;
@@ -1173,7 +1173,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
unsigned long prealloc_size, gtt_size, mappable_size;
int ret = 0;

- prealloc_size = dev_priv->mm.gtt->stolen_size;
+ prealloc_size = dev_priv->mm.gtt->gtt_stolen_entries << PAGE_SHIFT;
gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 30780f2..b41edf6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -532,7 +532,7 @@ typedef struct drm_i915_private {

struct {
/** Bridge to intel-gtt-ko */
- const struct intel_gtt *gtt;
+ struct intel_gtt *gtt;
/** Memory allocator for GTT stolen memory */
struct drm_mm stolen;
/** Memory allocator for GTT */
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 9e343c0..75c2088 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -2,10 +2,9 @@

#ifndef _DRM_INTEL_GTT_H
#define _DRM_INTEL_GTT_H
-
-const struct intel_gtt {
- /* Size of memory reserved for graphics by the BIOS */
- unsigned int stolen_size;
+struct intel_gtt {
+ /* Number of stolen gtt entries at the beginning. */
+ unsigned int gtt_stolen_entries;
/* Total number of gtt entries. */
unsigned int gtt_total_entries;
/* Part of the gtt that is mappable by the cpu, for those chips where


2010-12-20 18:53:40

by Chris Wilson

[permalink] [raw]
Subject: Re: [BISECTED] agp/intel: revert "Remove confusion of stolen entries not stolen memory"

On Mon, 20 Dec 2010 19:12:19 +0100, Arnd Bergmann <[email protected]> wrote:
> Commit c64f7ba5f10 "agp/intel: Remove confusion of stolen entries not stolen memory" caused a regression on my Intel G45 based system, using the VESA
> Xorg driver or uvesafb. I have not tried if i915 with KMS shows the same
> behaviour, but can try if necessary.

I'm very confused as to how this would have impacted the VESA video
driver. Any clues as to what VESA is doing? And what is the nature of
the regression?
-Chris

--
Chris Wilson, Intel Open Source Technology Centre

2010-12-20 19:47:12

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [BISECTED] agp/intel: revert "Remove confusion of stolen entries not stolen memory"

On Monday 20 December 2010 19:53:35 Chris Wilson wrote:
> On Mon, 20 Dec 2010 19:12:19 +0100, Arnd Bergmann <[email protected]> wrote:
> > Commit c64f7ba5f10 "agp/intel: Remove confusion of stolen entries not stolen memory" caused a regression on my Intel G45 based system, using the VESA
> > Xorg driver or uvesafb. I have not tried if i915 with KMS shows the same
> > behaviour, but can try if necessary.
>
> I'm very confused as to how this would have impacted the VESA video
> driver. Any clues as to what VESA is doing?

Not the slightest idea. I don't understand what your code is doing
either, I only bisected it in order to get a running linux-next
kernel.

> And what is the nature of the regression?

The machine is very much alive, but the screen output is garbled. On
the text console, it shows a couple of characters around the cursor
position all over the screen. In X, it seems to be similar. The kdm
login screen gets shown as a nice pattern of colours, and they
change when I move the mouse. I can't identify anything visible on the
screen though.

Trying with i915 KMS next.

Arnd

2010-12-20 19:52:12

by Chris Wilson

[permalink] [raw]
Subject: Re: [BISECTED] agp/intel: revert "Remove confusion of stolen entries not stolen memory"

Also, which modules do you have loaded when using VESA? i.e. is the
i915.ko loaded, but in UMS mode (i915.modeset=0)?
-Chris

--
Chris Wilson, Intel Open Source Technology Centre

2010-12-20 20:52:44

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [BISECTED] agp/intel: revert "Remove confusion of stolen entries not stolen memory"

On Monday 20 December 2010 20:52:07 Chris Wilson wrote:
>
> Also, which modules do you have loaded when using VESA? i.e. is the
> i915.ko loaded, but in UMS mode (i915.modeset=0)?

This doesn't seem to matter, as far as I can tell, i915 can be loaded
or now.

I've seen the system crash once while loading i915 with
modeset=1 and my revert patch applied and backed it out.

After that, I could no longer even get i915 to do modesetting,
the ioremap in intel_opregion_setup now fails because reserve_memtype
decides that the opregion should be write-back when we ask for
an uncached mapping. That's probably an unrelated problem, but
I'm mentioning it anyway in case it's significant.

Arnd

2010-12-20 21:07:03

by Chris Wilson

[permalink] [raw]
Subject: Re: [BISECTED] agp/intel: revert "Remove confusion of stolen entries not stolen memory"

On Mon, 20 Dec 2010 21:52:38 +0100, Arnd Bergmann <[email protected]> wrote:
> On Monday 20 December 2010 20:52:07 Chris Wilson wrote:
> >
> > Also, which modules do you have loaded when using VESA? i.e. is the
> > i915.ko loaded, but in UMS mode (i915.modeset=0)?
>
> This doesn't seem to matter, as far as I can tell, i915 can be loaded
> or now.

Thanks, that rules out the likely explanation that we [i915] loaded the
GTT with some conflicting entries. Instead it is likely the initialisation
of the GTT to point to the scratch page that is triggering the problem.
Can you try disabling it with:

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 356f73e..238848e 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -867,11 +867,13 @@ static int intel_fake_agp_configure(void)

agp_bridge->gart_bus_addr = intel_private.gma_bus_addr;

+#if 0
for (i = 0; i < intel_private.base.gtt_total_entries; i++) {
intel_private.driver->write_entry(intel_private.scratch_page_dma,
i, 0);
}
readl(intel_private.gtt+i-1); /* PCI Posting. */
+#endif

global_cache_flush();

> I've seen the system crash once while loading i915 with
> modeset=1 and my revert patch applied and backed it out.
>
> After that, I could no longer even get i915 to do modesetting,
> the ioremap in intel_opregion_setup now fails because reserve_memtype
> decides that the opregion should be write-back when we ask for
> an uncached mapping. That's probably an unrelated problem, but
> I'm mentioning it anyway in case it's significant.

I hope not. But it sounds like we're in for a turbulent ride if ioremap is
failing in -next.

Thanks,
-Chris

--
Chris Wilson, Intel Open Source Technology Centre

2010-12-20 21:54:38

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [BISECTED] agp/intel: revert "Remove confusion of stolen entries not stolen memory"

On Monday 20 December 2010 22:06:47 Chris Wilson wrote:
> On Mon, 20 Dec 2010 21:52:38 +0100, Arnd Bergmann <[email protected]> wrote:
> > On Monday 20 December 2010 20:52:07 Chris Wilson wrote:
> > >
> > > Also, which modules do you have loaded when using VESA? i.e. is the
> > > i915.ko loaded, but in UMS mode (i915.modeset=0)?
> >
> > This doesn't seem to matter, as far as I can tell, i915 can be loaded
> > or now.
>
> Thanks, that rules out the likely explanation that we [i915] loaded the
> GTT with some conflicting entries. Instead it is likely the initialisation
> of the GTT to point to the scratch page that is triggering the problem.
> Can you try disabling it with:
>
> diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
> index 356f73e..238848e 100644
> --- a/drivers/char/agp/intel-gtt.c
> +++ b/drivers/char/agp/intel-gtt.c
> @@ -867,11 +867,13 @@ static int intel_fake_agp_configure(void)
>
> agp_bridge->gart_bus_addr = intel_private.gma_bus_addr;
>
> +#if 0
> for (i = 0; i < intel_private.base.gtt_total_entries; i++) {
> intel_private.driver->write_entry(intel_private.scratch_page_dma,
> i, 0);
> }
> readl(intel_private.gtt+i-1); /* PCI Posting. */
> +#endif
>
> global_cache_flush();

Yes, this works as well, good catch!

> > I've seen the system crash once while loading i915 with
> > modeset=1 and my revert patch applied and backed it out.
> >
> > After that, I could no longer even get i915 to do modesetting,
> > the ioremap in intel_opregion_setup now fails because reserve_memtype
> > decides that the opregion should be write-back when we ask for
> > an uncached mapping. That's probably an unrelated problem, but
> > I'm mentioning it anyway in case it's significant.
>
> I hope not. But it sounds like we're in for a turbulent ride if ioremap is
> failing in -next.

It only fails for the opregion. I feel I've done enough bisecting for today,
but it's certainly broken in -next and the ioremap works in 2.6.37-rc6.
Should the opregion actually be writeback cached? Maybe something is
wrong in reserve_memtype.

Loading i915 in -rc6 also crashes my system hard when modeset=1, but
that may be a hardware problem -- the same one that used to cause occasional
hangs with i915 KMS, forcing me to run X11 with the vesa driver.

Arnd

2010-12-20 22:08:18

by Dave Airlie

[permalink] [raw]
Subject: Re: [BISECTED] agp/intel: revert "Remove confusion of stolen entries not stolen memory"

On Tue, Dec 21, 2010 at 7:54 AM, Arnd Bergmann <[email protected]> wrote:
> On Monday 20 December 2010 22:06:47 Chris Wilson wrote:
>> On Mon, 20 Dec 2010 21:52:38 +0100, Arnd Bergmann <[email protected]> wrote:
>> > On Monday 20 December 2010 20:52:07 Chris Wilson wrote:
>> > >
>> > > Also, which modules do you have loaded when using VESA? i.e. is the
>> > > i915.ko loaded, but in UMS mode (i915.modeset=0)?
>> >
>> > This doesn't seem to matter, as far as I can tell, i915 can be loaded
>> > or now.
>>
>> Thanks, that rules out the likely explanation that we [i915] loaded the
>> GTT with some conflicting entries. Instead it is likely the initialisation
>> of the GTT to point to the scratch page that is triggering the problem.
>> Can you try disabling it with:
>>
>> diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
>> index 356f73e..238848e 100644
>> --- a/drivers/char/agp/intel-gtt.c
>> +++ b/drivers/char/agp/intel-gtt.c
>> @@ -867,11 +867,13 @@ static int intel_fake_agp_configure(void)
>>
>> ? ? ? agp_bridge->gart_bus_addr = intel_private.gma_bus_addr;
>>
>> +#if 0
>> ? ? ? for (i = 0; i < intel_private.base.gtt_total_entries; i++) {
>> ? ? ? ? ? ? ? intel_private.driver->write_entry(intel_private.scratch_page_dma,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? i, 0);
>> ? ? ? }
>> ? ? ? readl(intel_private.gtt+i-1); ? /* PCI Posting. */
>> +#endif
>>
>> ? ? ? global_cache_flush();
>
> Yes, this works as well, good catch!
>
>> > I've seen the system crash once while loading i915 with
>> > modeset=1 and my revert patch applied and backed it out.
>> >
>> > After that, I could no longer even get i915 to do modesetting,
>> > the ioremap in intel_opregion_setup now fails because reserve_memtype
>> > decides that the opregion should be write-back when we ask for
>> > an uncached mapping. That's probably an unrelated problem, but
>> > I'm mentioning it anyway in case it's significant.
>>
>> I hope not. But it sounds like we're in for a turbulent ride if ioremap is
>> failing in -next.
>
> It only fails for the opregion. I feel I've done enough bisecting for today,
> but it's certainly broken in -next and the ioremap works in 2.6.37-rc6.
> Should the opregion actually be writeback cached? Maybe something is
> wrong in reserve_memtype.
>
> Loading i915 in -rc6 also crashes my system hard when modeset=1, but
> that may be a hardware problem -- the same one that used to cause occasional
> hangs with i915 KMS, forcing me to run X11 with the vesa driver.

I wonder if the ACPI table mapping stuff is in -next yet.

As a first guess.

Dave.