2008-01-14 21:34:37

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 1/3] x86_64: check and enable MMCONFIG for AMD Family 10h Opteron v2

[PATCH 1/3] x86_64: check and enable MMCONFIG for AMD Family 10h Opteron v2

So we can use MMCONF when MMCONF is not set BIOS or booting kernel with acpi=off

using TOP_MEM2 msr to get memory top, and try to scan fam10h mmio routing to
make sure the range is not conflicted with some prefetch MMIO that is above 4G.
(current only LinuxBIOS assign 64 bit mmio above 4G for some co-processor)

Signed-off-by: Yinghai Lu <[email protected]>

Index: linux-2.6/arch/x86/kernel/setup_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_64.c
+++ linux-2.6/arch/x86/kernel/setup_64.c
@@ -30,6 +30,7 @@
#include <linux/crash_dump.h>
#include <linux/root_dev.h>
#include <linux/pci.h>
+#include <asm/pci-direct.h>
#include <linux/efi.h>
#include <linux/acpi.h>
#include <linux/kallsyms.h>
@@ -40,6 +41,7 @@
#include <linux/dmi.h>
#include <linux/dma-mapping.h>
#include <linux/ctype.h>
+#include <linux/sort.h>
#include <linux/uaccess.h>
#include <linux/init_ohci1394_dma.h>

@@ -581,6 +583,203 @@ static int nearby_node(int apicid)
}
#endif

+#ifdef CONFIG_PCI_MMCONFIG
+struct pci_hostbridge_probe {
+ u32 bus;
+ u32 slot;
+ u32 vendor;
+ u32 device;
+};
+
+static u64 __cpuinitdata fam10h_pci_mmconf_base;
+static int __cpuinitdata fam10h_pci_mmconf_base_status;
+
+static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
+ { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 },
+ { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 },
+};
+
+struct range {
+ u64 start;
+ u64 end;
+};
+
+static int __cpuinit cmp_range(const void *x1, const void *x2)
+{
+ const struct range *r1 = x1;
+ const struct range *r2 = x2;
+ int start1, start2;
+
+ start1 = r1->start >> 32;
+ start2 = r2->start >> 32;
+
+ return start1 - start2;
+}
+
+/*[47:0] */
+/* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */
+#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32)
+#define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32)))
+static void __cpuinit get_fam10h_pci_mmconf_base(void)
+{
+ int i;
+ unsigned bus;
+ unsigned slot;
+ int found;
+
+ u64 val;
+ u32 address;
+ u64 tom2;
+ u64 base = FAM10H_PCI_MMCONF_BASE;
+
+ int hi_mmio_num;
+ struct range range[8];
+
+ /* only try to get setting from BSP */
+ /* -1 or 1 */
+ if (fam10h_pci_mmconf_base_status)
+ return;
+
+ if (!early_pci_allowed())
+ goto fail;
+
+ found = 0;
+ for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
+ u32 id;
+ u16 device;
+ u16 vendor;
+
+ bus = pci_probes[i].bus;
+ slot = pci_probes[i].slot;
+ id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID);
+
+ vendor = id & 0xffff;
+ device = (id>>16) & 0xffff;
+ if (pci_probes[i].vendor == vendor &&
+ pci_probes[i].device == device) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found)
+ goto fail;
+
+ /* SYS_CFG */
+ address = 0xc0010010;
+ rdmsrl(address, val);
+
+ /* TOP_MEM2 is not enabled? */
+ if (!(val & (1<<21))) {
+ tom2 = 0;
+ } else {
+ /* TOP_MEM2 */
+ address = 0xc001001d;
+ rdmsrl(address, val);
+ tom2 = val & (0xffffULL<<32);
+ }
+
+ if (base <= tom2)
+ base = tom2 + (1ULL<<32);
+
+ /*
+ * need to check if the range is in the high mmio range that is
+ * above 4G
+ */
+ hi_mmio_num = 0;
+ for (i = 0; i < 8; i++) {
+ u32 reg;
+ u64 start;
+ u64 end;
+ reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3));
+ if (!(reg & 3))
+ continue;
+
+ start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/
+ reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3));
+ end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/
+
+ if (!end)
+ continue;
+
+ range[hi_mmio_num].start = start;
+ range[hi_mmio_num].end = end;
+ hi_mmio_num++;
+ }
+
+ if (!hi_mmio_num)
+ goto out;
+
+ /* sort the range */
+ sort(range, hi_mmio_num, sizeof(struct range), cmp_range, NULL);
+
+ if (range[hi_mmio_num - 1].end < base)
+ goto out;
+ if (range[0].start > base)
+ goto out;
+
+ /* need to find one window */
+ base = range[0].start - (1ULL << 32);
+ if ((base > tom2) && BASE_VALID(base))
+ goto out;
+ base = range[hi_mmio_num - 1].end + (1ULL << 32);
+ if ((base > tom2) && BASE_VALID(base))
+ goto out;
+ /* need to find window between ranges */
+ if (hi_mmio_num > 1)
+ for (i = 0; i < hi_mmio_num - 1; i++) {
+ if (range[i + 1].start > (range[i].end + (1ULL << 32))) {
+ base = range[i].end + (1ULL << 32);
+ if ((base > tom2) && BASE_VALID(base))
+ goto out;
+ }
+ }
+
+fail:
+ fam10h_pci_mmconf_base_status = -1;
+ return;
+out:
+ fam10h_pci_mmconf_base = base;
+ fam10h_pci_mmconf_base_status = 1;
+}
+#endif
+
+static void __cpuinit fam10h_check_enable_mmcfg(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_PCI_MMCONFIG
+ u64 val;
+ u32 address;
+
+ address = 0xc0010058;
+ rdmsrl(address, val);
+
+ /* try to make sure that AP's setting is identical to BSP setting */
+ if (val & 1) {
+ u64 base;
+ base = val & (0xffffULL << 32);
+ if (fam10h_pci_mmconf_base_status <= 0) {
+ fam10h_pci_mmconf_base = base;
+ fam10h_pci_mmconf_base_status = 1;
+ return;
+ } else if (fam10h_pci_mmconf_base == base)
+ return;
+ }
+
+ /*
+ * if it is not enabled, try to enable it and assume only one segment
+ * with 256 buses
+ */
+ get_fam10h_pci_mmconf_base();
+ if (fam10h_pci_mmconf_base_status <= 0)
+ return;
+
+ printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n");
+ val &= ~((0xfffffff<<20) | (0xf << 2));
+ val |= fam10h_pci_mmconf_base | (8 << 2) | (1 << 0);
+ wrmsrl(address, val);
+#endif
+}
+
/*
* On a AMD dual core setup the lower bits of the APIC id distingush the cores.
* Assumes number of cores is a power of two.
@@ -768,6 +967,9 @@ static void __cpuinit init_amd(struct cp
if (c->x86 == 0x10 && !force_mwait)
clear_cpu_cap(c, X86_FEATURE_MWAIT);

+ if (c->x86 == 0x10)
+ fam10h_check_enable_mmcfg(c);
+
if (amd_apic_timer_broken())
disable_apic_timer = 1;
}



2008-01-14 21:33:58

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 3/3] x86_64: set cfg_size for AMD Family 10h in case MMCONFIG is used.

[PATCH 3/3] x86_64: set cfg_size for AMD Family 10h in case MMCONFIG is used.

reuse pci_cfg_space_size but skip check pci express and pci-x CAP ID.

Signed-off-by: Yinghai Lu <[email protected]>

===================================================================
Index: linux-2.6/arch/x86/pci/fixup.c
===================================================================
--- linux-2.6.orig/arch/x86/pci/fixup.c
+++ linux-2.6/arch/x86/pci/fixup.c
@@ -491,3 +491,17 @@ static void __devinit pci_siemens_interr
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
pci_siemens_interrupt_controller);
+
+/*
+ * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config
+ * have 4096 bytes. Even if the device is capable, that doesn't mean we can
+ * access it. Maybe we don't have a way to generate extended config space
+ * accesses. So check it
+ */
+static void fam10h_pci_cfg_space_size(struct pci_dev *dev)
+{
+ dev->cfg_size = pci_cfg_space_size_ext(dev, 0);
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_ANY_ID,
+ fam10h_pci_cfg_space_size);
Index: linux-2.6/drivers/pci/probe.c
===================================================================
--- linux-2.6.orig/drivers/pci/probe.c
+++ linux-2.6/drivers/pci/probe.c
@@ -838,11 +838,14 @@ static void set_pcie_port_type(struct pc
* reading the dword at 0x100 which must either be 0 or a valid extended
* capability header.
*/
-int pci_cfg_space_size(struct pci_dev *dev)
+int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix)
{
int pos;
u32 status;

+ if (!check_exp_pcix)
+ goto skip;
+
pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
if (!pos) {
pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
@@ -854,6 +857,7 @@ int pci_cfg_space_size(struct pci_dev *d
goto fail;
}

+ skip:
if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL)
goto fail;
if (status == 0xffffffff)
@@ -865,6 +869,11 @@ int pci_cfg_space_size(struct pci_dev *d
return PCI_CFG_SPACE_SIZE;
}

+int pci_cfg_space_size(struct pci_dev *dev)
+{
+ return pci_cfg_space_size_ext(dev, 1);
+}
+
static void pci_release_bus_bridge_dev(struct device *dev)
{
kfree(dev);
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -639,6 +639,7 @@ int pci_scan_bridge(struct pci_bus *bus,

void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *),
void *userdata);
+int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix);
int pci_cfg_space_size(struct pci_dev *dev);
unsigned char pci_bus_max_busnr(struct pci_bus* bus);

2008-01-14 21:34:20

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 2/3] x86_64: check MSR to get MMCONFIG for AMD Family 10h Opteron

[PATCH 2/3] x86_64: check MSR to get MMCONFIG for AMD Family 10h Opteron

So even MCFG is not there, we still can use MMCONFIG.

Signed-off-by: Yinghai Lu <[email protected]>

Index: linux-2.6/arch/x86/pci/mmconfig-shared.c
===================================================================
--- linux-2.6.orig/arch/x86/pci/mmconfig-shared.c
+++ linux-2.6/arch/x86/pci/mmconfig-shared.c
@@ -133,33 +133,84 @@ static const char __init *pci_mmcfg_inte
return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub";
}

+static const char __init *pci_mmcfg_amd_fam10h(void)
+{
+ u32 low, high, address;
+ u64 base;
+ int i;
+ unsigned segnbits = 0, busnbits;
+
+ address = 0xc0010058;
+ if (rdmsr_safe(address, &low, &high))
+ return NULL;
+
+ /* mmconfig is not enable */
+ if (!(low & 1))
+ return NULL;
+
+ base = high & 0xffff;
+ base <<= 32;
+
+ busnbits = (low >> 2) & 0x0f;
+ if (busnbits > 8) {
+ segnbits = busnbits - 8;
+ busnbits = 8;
+ }
+
+ pci_mmcfg_config_num = (1 << segnbits);
+ pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]) *
+ pci_mmcfg_config_num, GFP_KERNEL);
+ if (!pci_mmcfg_config)
+ return NULL;
+
+ for (i = 0; i < (1 << segnbits); i++) {
+ pci_mmcfg_config[i].address = base + (1<<28) * i;
+ pci_mmcfg_config[i].pci_segment = i;
+ pci_mmcfg_config[i].start_bus_number = 0;
+ pci_mmcfg_config[i].end_bus_number = (1 << busnbits) - 1;
+ }
+
+ return "AMD Family 10h NB";
+}
+
struct pci_mmcfg_hostbridge_probe {
+ u32 bus;
+ u32 devfn;
u32 vendor;
u32 device;
const char *(*probe)(void);
};

static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = {
- { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 },
- { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 },
+ { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 },
+ { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 },
+ { 0, PCI_DEVFN(0x18, 0), PCI_VENDOR_ID_AMD,
+ 0x1200, pci_mmcfg_amd_fam10h },
+ { 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD,
+ 0x1200, pci_mmcfg_amd_fam10h },
};

static int __init pci_mmcfg_check_hostbridge(void)
{
u32 l;
+ u32 bus, devfn;
u16 vendor, device;
int i;
const char *name;

- pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0, 4, &l);
- vendor = l & 0xffff;
- device = (l >> 16) & 0xffff;
-
pci_mmcfg_config_num = 0;
pci_mmcfg_config = NULL;
name = NULL;

for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) {
+ bus = pci_mmcfg_probes[i].bus;
+ devfn = pci_mmcfg_probes[i].devfn;
+ pci_conf1_read(0, bus, devfn, 0, 4, &l);
+ vendor = l & 0xffff;
+ device = (l >> 16) & 0xffff;
+
if (pci_mmcfg_probes[i].vendor == vendor &&
pci_mmcfg_probes[i].device == device)
name = pci_mmcfg_probes[i].probe();

2008-01-15 15:48:20

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86_64: check MSR to get MMCONFIG for AMD Family 10h Opteron

Yinghai Lu wrote:
> +
> + address = 0xc0010058;
> + if (rdmsr_safe(address, &low, &high))
> + return NULL;
> +

NAK. Add the symbolic MSR name to asm-x86/msr-index.h and use the
symbolic name in the code, please.

-hpa

2008-01-15 21:57:19

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 0/4] x86_64:amd fam10h mmconf v3

please check the updated version, so we will not rely on MCFG setting

it is against to x86.git

these three patches can be applied seperately.
if you think 2/4 is too complicated, you can apply 3 and 4

change: add msr in msr_index.h

Thanks

YH