2016-11-30 23:51:02

by Duc Dang

[permalink] [raw]
Subject: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

PCIe controllers in X-Gene SoCs is not ECAM compliant: software
needs to configure additional controller's register to address
device at bus:dev:function.

The quirk will only be applied for X-Gene PCIe MCFG table with
OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).

The quirk declares the X-Gene PCIe controller register space as 64KB
fixed memory resource with name "PCIe CSR". This name will be showed
next to the resource range in the output of "cat /proc/iomem".

Signed-off-by: Duc Dang <[email protected]>
---
v3:
- Rebase on top of pci/ecam-v6 tree.
- Use DEFINE_RES_MEM_NAMED to declare controller register space
with name "PCIe CSR"
v2:
RFC v2: https://patchwork.ozlabs.org/patch/686846/
v1:
RFC v1: https://patchwork.kernel.org/patch/9337115/

drivers/acpi/pci_mcfg.c | 31 ++++++++
drivers/pci/host/pci-xgene.c | 165 ++++++++++++++++++++++++++++++++++++++++++-
include/linux/pci-ecam.h | 7 ++
3 files changed, 200 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
index ac21db3..eb6125b 100644
--- a/drivers/acpi/pci_mcfg.c
+++ b/drivers/acpi/pci_mcfg.c
@@ -57,6 +57,37 @@ struct mcfg_fixup {
{ "QCOM ", "QDF2432 ", 1, 5, MCFG_BUS_ANY, &pci_32b_ops },
{ "QCOM ", "QDF2432 ", 1, 6, MCFG_BUS_ANY, &pci_32b_ops },
{ "QCOM ", "QDF2432 ", 1, 7, MCFG_BUS_ANY, &pci_32b_ops },
+
+#ifdef CONFIG_PCI_XGENE
+#define XGENE_V1_ECAM_MCFG(rev, seg) \
+ {"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \
+ &xgene_v1_pcie_ecam_ops }
+#define XGENE_V2_1_ECAM_MCFG(rev, seg) \
+ {"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \
+ &xgene_v2_1_pcie_ecam_ops }
+#define XGENE_V2_2_ECAM_MCFG(rev, seg) \
+ {"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \
+ &xgene_v2_2_pcie_ecam_ops }
+
+ /* X-Gene SoC with v1 PCIe controller */
+ XGENE_V1_ECAM_MCFG(1, 0),
+ XGENE_V1_ECAM_MCFG(1, 1),
+ XGENE_V1_ECAM_MCFG(1, 2),
+ XGENE_V1_ECAM_MCFG(1, 3),
+ XGENE_V1_ECAM_MCFG(1, 4),
+ XGENE_V1_ECAM_MCFG(2, 0),
+ XGENE_V1_ECAM_MCFG(2, 1),
+ XGENE_V1_ECAM_MCFG(2, 2),
+ XGENE_V1_ECAM_MCFG(2, 3),
+ XGENE_V1_ECAM_MCFG(2, 4),
+ /* X-Gene SoC with v2.1 PCIe controller */
+ XGENE_V2_1_ECAM_MCFG(3, 0),
+ XGENE_V2_1_ECAM_MCFG(3, 1),
+ /* X-Gene SoC with v2.2 PCIe controller */
+ XGENE_V2_2_ECAM_MCFG(4, 0),
+ XGENE_V2_2_ECAM_MCFG(4, 1),
+ XGENE_V2_2_ECAM_MCFG(4, 2),
+#endif
};

static char mcfg_oem_id[ACPI_OEM_ID_SIZE];
diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
index 1de23d7..43d7c69 100644
--- a/drivers/pci/host/pci-xgene.c
+++ b/drivers/pci/host/pci-xgene.c
@@ -27,6 +27,8 @@
#include <linux/of_irq.h>
#include <linux/of_pci.h>
#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
#include <linux/platform_device.h>
#include <linux/slab.h>

@@ -64,6 +66,7 @@
/* PCIe IP version */
#define XGENE_PCIE_IP_VER_UNKN 0
#define XGENE_PCIE_IP_VER_1 1
+#define XGENE_PCIE_IP_VER_2 2

struct xgene_pcie_port {
struct device_node *node;
@@ -97,7 +100,15 @@ static inline u32 pcie_bar_low_val(u32 addr, u32 flags)
*/
static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
{
- struct xgene_pcie_port *port = bus->sysdata;
+ struct pci_config_window *cfg;
+ struct xgene_pcie_port *port;
+
+ if (acpi_disabled)
+ port = bus->sysdata;
+ else {
+ cfg = bus->sysdata;
+ port = cfg->priv;
+ }

if (bus->number >= (bus->primary + 1))
return port->cfg_base + AXI_EP_CFG_ACCESS;
@@ -111,10 +122,18 @@ static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
*/
static void xgene_pcie_set_rtdid_reg(struct pci_bus *bus, uint devfn)
{
- struct xgene_pcie_port *port = bus->sysdata;
+ struct pci_config_window *cfg;
+ struct xgene_pcie_port *port;
unsigned int b, d, f;
u32 rtdid_val = 0;

+ if (acpi_disabled)
+ port = bus->sysdata;
+ else {
+ cfg = bus->sysdata;
+ port = cfg->priv;
+ }
+
b = bus->number;
d = PCI_SLOT(devfn);
f = PCI_FUNC(devfn);
@@ -158,7 +177,15 @@ static void __iomem *xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
int where, int size, u32 *val)
{
- struct xgene_pcie_port *port = bus->sysdata;
+ struct pci_config_window *cfg;
+ struct xgene_pcie_port *port;
+
+ if (acpi_disabled)
+ port = bus->sysdata;
+ else {
+ cfg = bus->sysdata;
+ port = cfg->priv;
+ }

if (pci_generic_config_read32(bus, devfn, where & ~0x3, 4, val) !=
PCIBIOS_SUCCESSFUL)
@@ -189,6 +216,138 @@ static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
.write = pci_generic_config_write32,
};

+#ifdef CONFIG_ACPI
+static struct resource xgene_v1_csr_res[] = {
+ [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
+ [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
+ [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
+ [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
+ [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
+};
+
+static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
+{
+ struct acpi_device *adev = to_acpi_device(cfg->parent);
+ struct acpi_pci_root *root = acpi_driver_data(adev);
+ struct device *dev = cfg->parent;
+ struct xgene_pcie_port *port;
+ struct resource *csr;
+
+ port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return -ENOMEM;
+
+ csr = &xgene_v1_csr_res[root->segment];
+ port->csr_base = devm_ioremap_resource(dev, csr);
+ if (IS_ERR(port->csr_base)) {
+ kfree(port);
+ return -ENOMEM;
+ }
+
+ port->cfg_base = cfg->win;
+ port->version = XGENE_PCIE_IP_VER_1;
+
+ cfg->priv = port;
+
+ return 0;
+}
+
+struct pci_ecam_ops xgene_v1_pcie_ecam_ops = {
+ .bus_shift = 16,
+ .init = xgene_v1_pcie_ecam_init,
+ .pci_ops = {
+ .map_bus = xgene_pcie_map_bus,
+ .read = xgene_pcie_config_read32,
+ .write = pci_generic_config_write,
+ }
+};
+
+static struct resource xgene_v2_1_csr_res[] = {
+ [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
+ [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
+};
+
+static int xgene_v2_1_pcie_ecam_init(struct pci_config_window *cfg)
+{
+ struct acpi_device *adev = to_acpi_device(cfg->parent);
+ struct acpi_pci_root *root = acpi_driver_data(adev);
+ struct device *dev = cfg->parent;
+ struct xgene_pcie_port *port;
+ struct resource *csr;
+
+ port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return -ENOMEM;
+
+ csr = &xgene_v2_1_csr_res[root->segment];
+ port->csr_base = devm_ioremap_resource(dev, csr);
+ if (IS_ERR(port->csr_base)) {
+ kfree(port);
+ return -ENOMEM;
+ }
+
+ port->cfg_base = cfg->win;
+ port->version = XGENE_PCIE_IP_VER_2;
+
+ cfg->priv = port;
+
+ return 0;
+}
+
+struct pci_ecam_ops xgene_v2_1_pcie_ecam_ops = {
+ .bus_shift = 16,
+ .init = xgene_v2_1_pcie_ecam_init,
+ .pci_ops = {
+ .map_bus = xgene_pcie_map_bus,
+ .read = xgene_pcie_config_read32,
+ .write = pci_generic_config_write,
+ }
+};
+
+static struct resource xgene_v2_2_csr_res[] = {
+ [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
+ [1] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
+ [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
+};
+
+static int xgene_v2_2_pcie_ecam_init(struct pci_config_window *cfg)
+{
+ struct acpi_device *adev = to_acpi_device(cfg->parent);
+ struct acpi_pci_root *root = acpi_driver_data(adev);
+ struct device *dev = cfg->parent;
+ struct xgene_pcie_port *port;
+ struct resource *csr;
+
+ port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return -ENOMEM;
+
+ csr = &xgene_v2_2_csr_res[root->segment];
+ port->csr_base = devm_ioremap_resource(dev, csr);
+ if (IS_ERR(port->csr_base)) {
+ kfree(port);
+ return -ENOMEM;
+ }
+
+ port->cfg_base = cfg->win;
+ port->version = XGENE_PCIE_IP_VER_2;
+
+ cfg->priv = port;
+
+ return 0;
+}
+
+struct pci_ecam_ops xgene_v2_2_pcie_ecam_ops = {
+ .bus_shift = 16,
+ .init = xgene_v2_2_pcie_ecam_init,
+ .pci_ops = {
+ .map_bus = xgene_pcie_map_bus,
+ .read = xgene_pcie_config_read32,
+ .write = pci_generic_config_write,
+ }
+};
+#endif
+
static u64 xgene_pcie_set_ib_mask(struct xgene_pcie_port *port, u32 addr,
u32 flags, u64 size)
{
diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
index f5740b7..47ab947 100644
--- a/include/linux/pci-ecam.h
+++ b/include/linux/pci-ecam.h
@@ -62,6 +62,13 @@ void __iomem *pci_ecam_map_bus(struct pci_bus *bus, unsigned int devfn,
/* ops for buggy ECAM that supports only 32-bit accesses */
extern struct pci_ecam_ops pci_32b_ops;

+/* ECAM ops for known quirks */
+#ifdef CONFIG_PCI_XGENE
+extern struct pci_ecam_ops xgene_v1_pcie_ecam_ops;
+extern struct pci_ecam_ops xgene_v2_1_pcie_ecam_ops;
+extern struct pci_ecam_ops xgene_v2_2_pcie_ecam_ops;
+#endif
+
#ifdef CONFIG_PCI_HOST_GENERIC
/* for DT-based PCI controllers that support ECAM */
int pci_host_common_probe(struct platform_device *pdev,
--
1.9.1


2016-12-01 15:08:25

by Mark Salter

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Wed, 2016-11-30 at 15:42 -0800, Duc Dang wrote:
> PCIe controllers in X-Gene SoCs is not ECAM compliant: software
> needs to configure additional controller's register to address
> device at bus:dev:function.
>
> The quirk will only be applied for X-Gene PCIe MCFG table with
> OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).
>
> The quirk declares the X-Gene PCIe controller register space as 64KB
> fixed memory resource with name "PCIe CSR". This name will be showed
> next to the resource range in the output of "cat /proc/iomem".
>
> Signed-off-by: Duc Dang <[email protected]>
> ---
> v3:
>   - Rebase on top of pci/ecam-v6 tree.
>   - Use DEFINE_RES_MEM_NAMED to declare controller register space
>   with name "PCIe CSR"
> v2:
>   RFC v2: https://patchwork.ozlabs.org/patch/686846/
> v1:
>   RFC v1: https://patchwork.kernel.org/patch/9337115/
>
>  drivers/acpi/pci_mcfg.c      |  31 ++++++++
>  drivers/pci/host/pci-xgene.c | 165 ++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/pci-ecam.h     |   7 ++
>  3 files changed, 200 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
> index ac21db3..eb6125b 100644
> --- a/drivers/acpi/pci_mcfg.c
> +++ b/drivers/acpi/pci_mcfg.c
> @@ -57,6 +57,37 @@ struct mcfg_fixup {
>   { "QCOM  ", "QDF2432 ", 1, 5, MCFG_BUS_ANY, &pci_32b_ops },
>   { "QCOM  ", "QDF2432 ", 1, 6, MCFG_BUS_ANY, &pci_32b_ops },
>   { "QCOM  ", "QDF2432 ", 1, 7, MCFG_BUS_ANY, &pci_32b_ops },
> +
> +#ifdef CONFIG_PCI_XGENE
> +#define XGENE_V1_ECAM_MCFG(rev, seg) \
> + {"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> + &xgene_v1_pcie_ecam_ops }
> +#define XGENE_V2_1_ECAM_MCFG(rev, seg) \
> + {"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> + &xgene_v2_1_pcie_ecam_ops }
> +#define XGENE_V2_2_ECAM_MCFG(rev, seg) \
> + {"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> + &xgene_v2_2_pcie_ecam_ops }
> +
> + /* X-Gene SoC with v1 PCIe controller */
> + XGENE_V1_ECAM_MCFG(1, 0),
> + XGENE_V1_ECAM_MCFG(1, 1),
> + XGENE_V1_ECAM_MCFG(1, 2),
> + XGENE_V1_ECAM_MCFG(1, 3),
> + XGENE_V1_ECAM_MCFG(1, 4),
> + XGENE_V1_ECAM_MCFG(2, 0),
> + XGENE_V1_ECAM_MCFG(2, 1),
> + XGENE_V1_ECAM_MCFG(2, 2),
> + XGENE_V1_ECAM_MCFG(2, 3),
> + XGENE_V1_ECAM_MCFG(2, 4),
> + /* X-Gene SoC with v2.1 PCIe controller */
> + XGENE_V2_1_ECAM_MCFG(3, 0),
> + XGENE_V2_1_ECAM_MCFG(3, 1),
> + /* X-Gene SoC with v2.2 PCIe controller */
> + XGENE_V2_2_ECAM_MCFG(4, 0),
> + XGENE_V2_2_ECAM_MCFG(4, 1),
> + XGENE_V2_2_ECAM_MCFG(4, 2),
> +#endif
>  };
>  
>  static char mcfg_oem_id[ACPI_OEM_ID_SIZE];
> diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
> index 1de23d7..43d7c69 100644
> --- a/drivers/pci/host/pci-xgene.c
> +++ b/drivers/pci/host/pci-xgene.c
> @@ -27,6 +27,8 @@
>  #include <linux/of_irq.h>
>  #include <linux/of_pci.h>
>  #include <linux/pci.h>
> +#include <linux/pci-acpi.h>
> +#include <linux/pci-ecam.h>
>  #include <linux/platform_device.h>
>  #include <linux/slab.h>
>  
> @@ -64,6 +66,7 @@
>  /* PCIe IP version */
>  #define XGENE_PCIE_IP_VER_UNKN 0
>  #define XGENE_PCIE_IP_VER_1 1
> +#define XGENE_PCIE_IP_VER_2 2
>  
>  struct xgene_pcie_port {
>   struct device_node *node;
> @@ -97,7 +100,15 @@ static inline u32 pcie_bar_low_val(u32 addr, u32 flags)
>   */
>  static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
>  {
> - struct xgene_pcie_port *port = bus->sysdata;
> + struct pci_config_window *cfg;
> + struct xgene_pcie_port *port;
> +
> + if (acpi_disabled)
> + port = bus->sysdata;
> + else {
> + cfg = bus->sysdata;
> + port = cfg->priv;
> + }
>  
>   if (bus->number >= (bus->primary + 1))
>   return port->cfg_base + AXI_EP_CFG_ACCESS;
> @@ -111,10 +122,18 @@ static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
>   */
>  static void xgene_pcie_set_rtdid_reg(struct pci_bus *bus, uint devfn)
>  {
> - struct xgene_pcie_port *port = bus->sysdata;
> + struct pci_config_window *cfg;
> + struct xgene_pcie_port *port;
>   unsigned int b, d, f;
>   u32 rtdid_val = 0;
>  
> + if (acpi_disabled)
> + port = bus->sysdata;
> + else {
> + cfg = bus->sysdata;
> + port = cfg->priv;
> + }
> +
>   b = bus->number;
>   d = PCI_SLOT(devfn);
>   f = PCI_FUNC(devfn);
> @@ -158,7 +177,15 @@ static void __iomem *xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
>  static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
>       int where, int size, u32 *val)
>  {
> - struct xgene_pcie_port *port = bus->sysdata;
> + struct pci_config_window *cfg;
> + struct xgene_pcie_port *port;
> +
> + if (acpi_disabled)
> + port = bus->sysdata;
> + else {
> + cfg = bus->sysdata;
> + port = cfg->priv;
> + }
>  
>   if (pci_generic_config_read32(bus, devfn, where & ~0x3, 4, val) !=
>       PCIBIOS_SUCCESSFUL)
> @@ -189,6 +216,138 @@ static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
>   .write = pci_generic_config_write32,
>  };
>  
> +#ifdef CONFIG_ACPI
> +static struct resource xgene_v1_csr_res[] = {
> + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> + [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
> + [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
> + [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
> + [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
> +};
> +
> +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
> +{
> + struct acpi_device *adev = to_acpi_device(cfg->parent);
> + struct acpi_pci_root *root = acpi_driver_data(adev);
> + struct device *dev = cfg->parent;
> + struct xgene_pcie_port *port;
> + struct resource *csr;
> +
> + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> + if (!port)
> + return -ENOMEM;
> +
> + csr = &xgene_v1_csr_res[root->segment];

This hard-coded assumption that segment N uses controller N breaks
for m400 where segment 0 is using controller 3.

> + port->csr_base = devm_ioremap_resource(dev, csr);
> + if (IS_ERR(port->csr_base)) {
> + kfree(port);
> + return -ENOMEM;
> + }
> +
> + port->cfg_base = cfg->win;
> + port->version = XGENE_PCIE_IP_VER_1;
> +
> + cfg->priv = port;
> +
> + return 0;
> +}
> +
> +struct pci_ecam_ops xgene_v1_pcie_ecam_ops = {
> + .bus_shift      = 16,
> + .init           = xgene_v1_pcie_ecam_init,
> + .pci_ops        = {
> + .map_bus        = xgene_pcie_map_bus,
> + .read           = xgene_pcie_config_read32,
> + .write          = pci_generic_config_write,
> + }
> +};
> +
> +static struct resource xgene_v2_1_csr_res[] = {
> + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> + [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
> +};
> +
> +static int xgene_v2_1_pcie_ecam_init(struct pci_config_window *cfg)
> +{
> + struct acpi_device *adev = to_acpi_device(cfg->parent);
> + struct acpi_pci_root *root = acpi_driver_data(adev);
> + struct device *dev = cfg->parent;
> + struct xgene_pcie_port *port;
> + struct resource *csr;
> +
> + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> + if (!port)
> + return -ENOMEM;
> +
> + csr = &xgene_v2_1_csr_res[root->segment];
> + port->csr_base = devm_ioremap_resource(dev, csr);
> + if (IS_ERR(port->csr_base)) {
> + kfree(port);
> + return -ENOMEM;
> + }
> +
> + port->cfg_base = cfg->win;
> + port->version = XGENE_PCIE_IP_VER_2;
> +
> + cfg->priv = port;
> +
> + return 0;
> +}
> +
> +struct pci_ecam_ops xgene_v2_1_pcie_ecam_ops = {
> + .bus_shift      = 16,
> + .init           = xgene_v2_1_pcie_ecam_init,
> + .pci_ops        = {
> + .map_bus        = xgene_pcie_map_bus,
> + .read           = xgene_pcie_config_read32,
> + .write          = pci_generic_config_write,
> + }
> +};
> +
> +static struct resource xgene_v2_2_csr_res[] = {
> + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> + [1] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
> + [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
> +};
> +
> +static int xgene_v2_2_pcie_ecam_init(struct pci_config_window *cfg)
> +{
> + struct acpi_device *adev = to_acpi_device(cfg->parent);
> + struct acpi_pci_root *root = acpi_driver_data(adev);
> + struct device *dev = cfg->parent;
> + struct xgene_pcie_port *port;
> + struct resource *csr;
> +
> + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> + if (!port)
> + return -ENOMEM;
> +
> + csr = &xgene_v2_2_csr_res[root->segment];
> + port->csr_base = devm_ioremap_resource(dev, csr);
> + if (IS_ERR(port->csr_base)) {
> + kfree(port);
> + return -ENOMEM;
> + }
> +
> + port->cfg_base = cfg->win;
> + port->version = XGENE_PCIE_IP_VER_2;
> +
> + cfg->priv = port;
> +
> + return 0;
> +}
> +
> +struct pci_ecam_ops xgene_v2_2_pcie_ecam_ops = {
> + .bus_shift      = 16,
> + .init           = xgene_v2_2_pcie_ecam_init,
> + .pci_ops        = {
> + .map_bus        = xgene_pcie_map_bus,
> + .read           = xgene_pcie_config_read32,
> + .write          = pci_generic_config_write,
> + }
> +};
> +#endif
> +
>  static u64 xgene_pcie_set_ib_mask(struct xgene_pcie_port *port, u32 addr,
>     u32 flags, u64 size)
>  {
> diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
> index f5740b7..47ab947 100644
> --- a/include/linux/pci-ecam.h
> +++ b/include/linux/pci-ecam.h
> @@ -62,6 +62,13 @@ void __iomem *pci_ecam_map_bus(struct pci_bus *bus, unsigned int devfn,
>  /* ops for buggy ECAM that supports only 32-bit accesses */
>  extern struct pci_ecam_ops pci_32b_ops;
>  
> +/* ECAM ops for known quirks */
> +#ifdef CONFIG_PCI_XGENE
> +extern struct pci_ecam_ops xgene_v1_pcie_ecam_ops;
> +extern struct pci_ecam_ops xgene_v2_1_pcie_ecam_ops;
> +extern struct pci_ecam_ops xgene_v2_2_pcie_ecam_ops;
> +#endif
> +
>  #ifdef CONFIG_PCI_HOST_GENERIC
>  /* for DT-based PCI controllers that support ECAM */
>  int pci_host_common_probe(struct platform_device *pdev,

2016-12-01 18:33:59

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

Hi Duc,

On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:
> PCIe controllers in X-Gene SoCs is not ECAM compliant: software
> needs to configure additional controller's register to address
> device at bus:dev:function.
>
> The quirk will only be applied for X-Gene PCIe MCFG table with
> OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).
>
> The quirk declares the X-Gene PCIe controller register space as 64KB
> fixed memory resource with name "PCIe CSR". This name will be showed
> next to the resource range in the output of "cat /proc/iomem".
>
> Signed-off-by: Duc Dang <[email protected]>
> ---
> v3:
> - Rebase on top of pci/ecam-v6 tree.
> - Use DEFINE_RES_MEM_NAMED to declare controller register space
> with name "PCIe CSR"
> v2:
> RFC v2: https://patchwork.ozlabs.org/patch/686846/
> v1:
> RFC v1: https://patchwork.kernel.org/patch/9337115/
>
> drivers/acpi/pci_mcfg.c | 31 ++++++++
> drivers/pci/host/pci-xgene.c | 165 ++++++++++++++++++++++++++++++++++++++++++-
> include/linux/pci-ecam.h | 7 ++
> 3 files changed, 200 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
> index ac21db3..eb6125b 100644
> --- a/drivers/acpi/pci_mcfg.c
> +++ b/drivers/acpi/pci_mcfg.c
> @@ -57,6 +57,37 @@ struct mcfg_fixup {
> { "QCOM ", "QDF2432 ", 1, 5, MCFG_BUS_ANY, &pci_32b_ops },
> { "QCOM ", "QDF2432 ", 1, 6, MCFG_BUS_ANY, &pci_32b_ops },
> { "QCOM ", "QDF2432 ", 1, 7, MCFG_BUS_ANY, &pci_32b_ops },
> +
> +#ifdef CONFIG_PCI_XGENE

As you've no doubt noticed, I'm proposing to add these quirks without
CONFIG_PCI_XGENE, so we don't have to select each device when building
a generic ACPI kernel.

I'm also proposing some Kconfig and Makefile changes so we don't build
the platform driver part in a generic ACPI kernel (unless we *also*
explicitly select the platform driver).

Here's an example:
https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/commit/?h=pci/ecam&id=f80edf4d6c05

> +#define XGENE_V1_ECAM_MCFG(rev, seg) \
> + {"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \
> + &xgene_v1_pcie_ecam_ops }
> +#define XGENE_V2_1_ECAM_MCFG(rev, seg) \
> + {"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \
> + &xgene_v2_1_pcie_ecam_ops }
> +#define XGENE_V2_2_ECAM_MCFG(rev, seg) \
> + {"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \
> + &xgene_v2_2_pcie_ecam_ops }
> +
> + /* X-Gene SoC with v1 PCIe controller */
> + XGENE_V1_ECAM_MCFG(1, 0),
> + XGENE_V1_ECAM_MCFG(1, 1),

> @@ -64,6 +66,7 @@
> /* PCIe IP version */
> #define XGENE_PCIE_IP_VER_UNKN 0
> #define XGENE_PCIE_IP_VER_1 1
> +#define XGENE_PCIE_IP_VER_2 2

This isn't used anywhere, which makes me wonder whether it's worth
keeping it.

> static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
> {
> - struct xgene_pcie_port *port = bus->sysdata;
> + struct pci_config_window *cfg;
> + struct xgene_pcie_port *port;
> +
> + if (acpi_disabled)
> + port = bus->sysdata;
> + else {
> + cfg = bus->sysdata;
> + port = cfg->priv;
> + }

I would really, really like to figure out a way to get rid of these
"if (acpi_disabled)" checks sprinkled through here. Is there any way
we can set up bus->sysdata to be the same, regardless of whether we're
using this as a platform driver or an ACPI quirk?

> +#ifdef CONFIG_ACPI

You've probably noticed that I've been using

#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)

in this situation, mostly to make it clear that this is part of a
workaround. I don't want people to blindly copy this stuff without
realizing that it's a workaround for a hardware issue.

> +static struct resource xgene_v1_csr_res[] = {
> + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> + [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
> + [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
> + [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
> + [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),

I assume these ranges are not the actual ECAM space, right?
If they *were* ECAM, I assume you would have included them in the
quirk itself in the mcfg_quirks[] table.

> +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
> +{
> + struct acpi_device *adev = to_acpi_device(cfg->parent);
> + struct acpi_pci_root *root = acpi_driver_data(adev);
> + struct device *dev = cfg->parent;
> + struct xgene_pcie_port *port;
> + struct resource *csr;
> +
> + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> + if (!port)
> + return -ENOMEM;
> +
> + csr = &xgene_v1_csr_res[root->segment];

This makes me nervous because root->segment comes from the ACPI _SEG,
and if firmware gives us junk in _SEG, we will reference something in
the weeds.

> + port->csr_base = devm_ioremap_resource(dev, csr);
> + if (IS_ERR(port->csr_base)) {
> + kfree(port);
> + return -ENOMEM;
> + }
> +
> + port->cfg_base = cfg->win;
> + port->version = XGENE_PCIE_IP_VER_1;
> +
> + cfg->priv = port;

All these init functions are almost identical. Can we factor this out
by having wrappers that do nothing more than pass in the table and
version, and put the kzalloc and ioremap in a shared back-end?

We're so close I can taste it! I can't wait to see all this work come
to fruition.

Bjorn

2016-12-01 19:17:12

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On 12/01/2016 10:08 AM, Mark Salter wrote:
> On Wed, 2016-11-30 at 15:42 -0800, Duc Dang wrote:

>> +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
>> +{
>> + struct acpi_device *adev = to_acpi_device(cfg->parent);
>> + struct acpi_pci_root *root = acpi_driver_data(adev);
>> + struct device *dev = cfg->parent;
>> + struct xgene_pcie_port *port;
>> + struct resource *csr;
>> +
>> + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
>> + if (!port)
>> + return -ENOMEM;
>> +
>> + csr = &xgene_v1_csr_res[root->segment];
>
> This hard-coded assumption that segment N uses controller N breaks
> for m400 where segment 0 is using controller 3.

This seems very fragile. So in addition to Bjorn's comment about not
trusting firmware provided data for the segment offset in the CSR list,
you will want to also determine the controller from the ACPI tree. The
existing code walks the ACPI hierarchy and finds the CSR that way.
Obviously, the goal is to avoid that in the latest incarnation, but you
could still determine which controller matches a given device.

Jon.

--
Computer Architect | Sent from my Fedora powered laptop

2016-12-01 19:21:03

by Mark Salter

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Thu, 2016-12-01 at 12:33 -0600, Bjorn Helgaas wrote:
> Hi Duc,
>
> On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:
> >
> > PCIe controllers in X-Gene SoCs is not ECAM compliant: software
> > needs to configure additional controller's register to address
> > device at bus:dev:function.
> >
> > The quirk will only be applied for X-Gene PCIe MCFG table with
> > OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).
> >
> > The quirk declares the X-Gene PCIe controller register space as 64KB
> > fixed memory resource with name "PCIe CSR". This name will be showed
> > next to the resource range in the output of "cat /proc/iomem".
> >
> > Signed-off-by: Duc Dang <[email protected]>
> > ---
> > v3:
> >   - Rebase on top of pci/ecam-v6 tree.
> >   - Use DEFINE_RES_MEM_NAMED to declare controller register space
> >   with name "PCIe CSR"
> > v2:
> >   RFC v2: https://patchwork.ozlabs.org/patch/686846/
> > v1:
> >   RFC v1: https://patchwork.kernel.org/patch/9337115/
> >
> >  drivers/acpi/pci_mcfg.c      |  31 ++++++++
> >  drivers/pci/host/pci-xgene.c | 165 ++++++++++++++++++++++++++++++++++++++++++-
> >  include/linux/pci-ecam.h     |   7 ++
> >  3 files changed, 200 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
> > index ac21db3..eb6125b 100644
> > --- a/drivers/acpi/pci_mcfg.c
> > +++ b/drivers/acpi/pci_mcfg.c
> > @@ -57,6 +57,37 @@ struct mcfg_fixup {
> >   { "QCOM  ", "QDF2432 ", 1, 5, MCFG_BUS_ANY, &pci_32b_ops },
> >   { "QCOM  ", "QDF2432 ", 1, 6, MCFG_BUS_ANY, &pci_32b_ops },
> >   { "QCOM  ", "QDF2432 ", 1, 7, MCFG_BUS_ANY, &pci_32b_ops },
> > +
> > +#ifdef CONFIG_PCI_XGENE
> As you've no doubt noticed, I'm proposing to add these quirks without
> CONFIG_PCI_XGENE, so we don't have to select each device when building
> a generic ACPI kernel.
>
> I'm also proposing some Kconfig and Makefile changes so we don't build
> the platform driver part in a generic ACPI kernel (unless we *also*
> explicitly select the platform driver).
>
> Here's an example:
> https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/commit/?h=pci/ecam&id=f80edf4d6c05
>
> >
> > +#define XGENE_V1_ECAM_MCFG(rev, seg) \
> > + {"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> > + &xgene_v1_pcie_ecam_ops }
> > +#define XGENE_V2_1_ECAM_MCFG(rev, seg) \
> > + {"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> > + &xgene_v2_1_pcie_ecam_ops }
> > +#define XGENE_V2_2_ECAM_MCFG(rev, seg) \
> > + {"APM   ", "XGENE   ", rev, seg, MCFG_BUS_ANY, \
> > + &xgene_v2_2_pcie_ecam_ops }
> > +
> > + /* X-Gene SoC with v1 PCIe controller */
> > + XGENE_V1_ECAM_MCFG(1, 0),
> > + XGENE_V1_ECAM_MCFG(1, 1),
> >
> > @@ -64,6 +66,7 @@
> >  /* PCIe IP version */
> >  #define XGENE_PCIE_IP_VER_UNKN 0
> >  #define XGENE_PCIE_IP_VER_1 1
> > +#define XGENE_PCIE_IP_VER_2 2
> This isn't used anywhere, which makes me wonder whether it's worth
> keeping it.
>
> >
> >  static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
> >  {
> > - struct xgene_pcie_port *port = bus->sysdata;
> > + struct pci_config_window *cfg;
> > + struct xgene_pcie_port *port;
> > +
> > + if (acpi_disabled)
> > + port = bus->sysdata;
> > + else {
> > + cfg = bus->sysdata;
> > + port = cfg->priv;
> > + }
> I would really, really like to figure out a way to get rid of these
> "if (acpi_disabled)" checks sprinkled through here.  Is there any way
> we can set up bus->sysdata to be the same, regardless of whether we're
> using this as a platform driver or an ACPI quirk?
>
> >
> > +#ifdef CONFIG_ACPI
> You've probably noticed that I've been using
>
>   #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
>
> in this situation, mostly to make it clear that this is part of a
> workaround.  I don't want people to blindly copy this stuff without
> realizing that it's a workaround for a hardware issue.
>
> >
> > +static struct resource xgene_v1_csr_res[] = {
> > + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> > + [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
> > + [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
> > + [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
> > + [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
> I assume these ranges are not the actual ECAM space, right?
> If they *were* ECAM, I assume you would have included them in the
> quirk itself in the mcfg_quirks[] table.

These are base addresses for some RC mmio registers.
>
> >
> > +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
> > +{
> > + struct acpi_device *adev = to_acpi_device(cfg->parent);
> > + struct acpi_pci_root *root = acpi_driver_data(adev);
> > + struct device *dev = cfg->parent;
> > + struct xgene_pcie_port *port;
> > + struct resource *csr;
> > +
> > + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> > + if (!port)
> > + return -ENOMEM;
> > +
> > + csr = &xgene_v1_csr_res[root->segment];
> This makes me nervous because root->segment comes from the ACPI _SEG,
> and if firmware gives us junk in _SEG, we will reference something in
> the weeds.

The SoC provide some number of RC bridges, each with a different base
for some mmio registers. Even if segment is legitimate in MCFG, there
is still a problem if a platform doesn't use the segment ordering
implied by the code. But the PNP0A03 _CRS does have this base address
as the first memory resource, so we could get it from there and not
have hard-coded addresses and implied ording in the quirk code.

I have tested a modified version of these quirks using this to
get the CSR base and it works on the 3 different platforms I have
access to.

static int xgene_pcie_get_csr(struct device *dev, struct resource *r)
{
struct acpi_device *adev = to_acpi_device(dev);
unsigned long flags;
struct list_head list;
struct resource_entry *entry;
int ret;

INIT_LIST_HEAD(&list);
flags = IORESOURCE_MEM;
ret = acpi_dev_get_resources(adev, &list,
     acpi_dev_filter_resource_type_cb,
     (void *)flags);
if (ret < 0) {
dev_err(dev, "failed to parse _CRS, error: %d\n", ret);
return ret;
} else if (ret == 0) {
dev_err(dev, "no memory resources present in _CRS\n");
return -EINVAL;
}

entry = list_first_entry(&list, struct resource_entry, node);
*r = *entry->res;
acpi_dev_free_resource_list(&list);
return 0;
}

>
> >
> > + port->csr_base = devm_ioremap_resource(dev, csr);
> > + if (IS_ERR(port->csr_base)) {
> > + kfree(port);
> > + return -ENOMEM;
> > + }
> > +
> > + port->cfg_base = cfg->win;
> > + port->version = XGENE_PCIE_IP_VER_1;
> > +
> > + cfg->priv = port;
> All these init functions are almost identical.  Can we factor this out
> by having wrappers that do nothing more than pass in the table and
> version, and put the kzalloc and ioremap in a shared back-end?
>
> We're so close I can taste it!  I can't wait to see all this work come
> to fruition.
>
> Bjorn

2016-12-01 19:26:37

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On 12/01/2016 02:20 PM, Mark Salter wrote:
> On Thu, 2016-12-01 at 12:33 -0600, Bjorn Helgaas wrote:

>>> + csr = &xgene_v1_csr_res[root->segment];
>> This makes me nervous because root->segment comes from the ACPI _SEG,
>> and if firmware gives us junk in _SEG, we will reference something in
>> the weeds.
>
> The SoC provide some number of RC bridges, each with a different base
> for some mmio registers. Even if segment is legitimate in MCFG, there
> is still a problem if a platform doesn't use the segment ordering
> implied by the code. But the PNP0A03 _CRS does have this base address
> as the first memory resource, so we could get it from there and not
> have hard-coded addresses and implied ording in the quirk code.
>
> I have tested a modified version of these quirks using this to
> get the CSR base and it works on the 3 different platforms I have
> access to.
>
> static int xgene_pcie_get_csr(struct device *dev, struct resource *r)
> {
> struct acpi_device *adev = to_acpi_device(dev);
> unsigned long flags;
> struct list_head list;
> struct resource_entry *entry;
> int ret;
>
> INIT_LIST_HEAD(&list);
> flags = IORESOURCE_MEM;
> ret = acpi_dev_get_resources(adev, &list,
> acpi_dev_filter_resource_type_cb,
> (void *)flags);
> if (ret < 0) {
> dev_err(dev, "failed to parse _CRS, error: %d\n", ret);
> return ret;
> } else if (ret == 0) {
> dev_err(dev, "no memory resources present in _CRS\n");
> return -EINVAL;
> }
>
> entry = list_first_entry(&list, struct resource_entry, node);
> *r = *entry->res;
> acpi_dev_free_resource_list(&list);
> return 0;
> }

This seems a lot safer. At some point trusting firmware to provide the
correct _CRS for the RC in use is better than hard coding for every
possible implementation configuration of an X-Gene SoC.

Jon.

--
Computer Architect | Sent from my Fedora powered laptop

2016-12-01 19:41:31

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Thu, Dec 01, 2016 at 02:20:53PM -0500, Mark Salter wrote:
> On Thu, 2016-12-01 at 12:33 -0600, Bjorn Helgaas wrote:
> > On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:

> > > +static struct resource xgene_v1_csr_res[] = {
> > > + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> > > + [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
> > > + [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
> > > + [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
> > > + [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
> > I assume these ranges are not the actual ECAM space, right?
> > If they *were* ECAM, I assume you would have included them in the
> > quirk itself in the mcfg_quirks[] table.
>
> These are base addresses for some RC mmio registers.
> >
> > >
> > > +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
> > > +{
> > > + struct acpi_device *adev = to_acpi_device(cfg->parent);
> > > + struct acpi_pci_root *root = acpi_driver_data(adev);
> > > + struct device *dev = cfg->parent;
> > > + struct xgene_pcie_port *port;
> > > + struct resource *csr;
> > > +
> > > + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> > > + if (!port)
> > > + return -ENOMEM;
> > > +
> > > + csr = &xgene_v1_csr_res[root->segment];
> > This makes me nervous because root->segment comes from the ACPI _SEG,
> > and if firmware gives us junk in _SEG, we will reference something in
> > the weeds.
>
> The SoC provide some number of RC bridges, each with a different base
> for some mmio registers. Even if segment is legitimate in MCFG, there
> is still a problem if a platform doesn't use the segment ordering
> implied by the code. But the PNP0A03 _CRS does have this base address
> as the first memory resource, so we could get it from there and not
> have hard-coded addresses and implied ording in the quirk code.

I'm confused. Doesn't the current code treat every item in PNP0A03
_CRS as a window? Do you mean the first resource is handled
differently somehow? The Consumer/Producer bit could allow us to do
this by marking the RC MMIO space as "Consumer", but I didn't think
that strategy was quite working yet.

> I have tested a modified version of these quirks using this to
> get the CSR base and it works on the 3 different platforms I have
> access to.
>
> static int xgene_pcie_get_csr(struct device *dev, struct resource *r)
> {
> struct acpi_device *adev = to_acpi_device(dev);
> unsigned long flags;
> struct list_head list;
> struct resource_entry *entry;
> int ret;
>
> INIT_LIST_HEAD(&list);
> flags = IORESOURCE_MEM;
> ret = acpi_dev_get_resources(adev, &list,
> ?????acpi_dev_filter_resource_type_cb,
> ?????(void *)flags);
> if (ret < 0) {
> dev_err(dev, "failed to parse _CRS, error: %d\n", ret);
> return ret;
> } else if (ret == 0) {
> dev_err(dev, "no memory resources present in _CRS\n");
> return -EINVAL;
> }
>
> entry = list_first_entry(&list, struct resource_entry, node);
> *r = *entry->res;
> acpi_dev_free_resource_list(&list);
> return 0;
> }

The code above is identical to acpi_get_rc_addr(), which is used in
the acpi_get_rc_resources() path by the other quirks. Can you use
that path, too, instead of reimplementing it here?

2016-12-01 19:58:45

by Duc Dang

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Thu, Dec 1, 2016 at 11:17 AM, Jon Masters <[email protected]> wrote:
> On 12/01/2016 10:08 AM, Mark Salter wrote:
>> On Wed, 2016-11-30 at 15:42 -0800, Duc Dang wrote:
>
>>> +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
>>> +{
>>> + struct acpi_device *adev = to_acpi_device(cfg->parent);
>>> + struct acpi_pci_root *root = acpi_driver_data(adev);
>>> + struct device *dev = cfg->parent;
>>> + struct xgene_pcie_port *port;
>>> + struct resource *csr;
>>> +
>>> + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
>>> + if (!port)
>>> + return -ENOMEM;
>>> +
>>> + csr = &xgene_v1_csr_res[root->segment];
>>
>> This hard-coded assumption that segment N uses controller N breaks
>> for m400 where segment 0 is using controller 3.

I think the latest firmware released from us a few months back use
segment 3 for PCIe controller 3 in MCFG table.
>
> This seems very fragile. So in addition to Bjorn's comment about not
> trusting firmware provided data for the segment offset in the CSR list,
> you will want to also determine the controller from the ACPI tree. The
> existing code walks the ACPI hierarchy and finds the CSR that way.
> Obviously, the goal is to avoid that in the latest incarnation, but you
> could still determine which controller matches a given device.

Yes, I will look into that more.

>
> Jon.
>
> --
> Computer Architect | Sent from my Fedora powered laptop
>
Regards,
Duc Dang.

2016-12-01 22:10:43

by Duc Dang

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Thu, Dec 1, 2016 at 11:41 AM, Bjorn Helgaas <[email protected]> wrote:
> On Thu, Dec 01, 2016 at 02:20:53PM -0500, Mark Salter wrote:
>> On Thu, 2016-12-01 at 12:33 -0600, Bjorn Helgaas wrote:
>> > On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:
>
>> > > +static struct resource xgene_v1_csr_res[] = {
>> > > + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
>> > > + [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
>> > > + [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
>> > > + [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
>> > > + [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
>> > I assume these ranges are not the actual ECAM space, right?
>> > If they *were* ECAM, I assume you would have included them in the
>> > quirk itself in the mcfg_quirks[] table.
>>
>> These are base addresses for some RC mmio registers.
>> >
>> > >
>> > > +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
>> > > +{
>> > > + struct acpi_device *adev = to_acpi_device(cfg->parent);
>> > > + struct acpi_pci_root *root = acpi_driver_data(adev);
>> > > + struct device *dev = cfg->parent;
>> > > + struct xgene_pcie_port *port;
>> > > + struct resource *csr;
>> > > +
>> > > + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
>> > > + if (!port)
>> > > + return -ENOMEM;
>> > > +
>> > > + csr = &xgene_v1_csr_res[root->segment];
>> > This makes me nervous because root->segment comes from the ACPI _SEG,
>> > and if firmware gives us junk in _SEG, we will reference something in
>> > the weeds.
>>
>> The SoC provide some number of RC bridges, each with a different base
>> for some mmio registers. Even if segment is legitimate in MCFG, there
>> is still a problem if a platform doesn't use the segment ordering
>> implied by the code. But the PNP0A03 _CRS does have this base address
>> as the first memory resource, so we could get it from there and not
>> have hard-coded addresses and implied ording in the quirk code.
>
> I'm confused. Doesn't the current code treat every item in PNP0A03
> _CRS as a window? Do you mean the first resource is handled
> differently somehow? The Consumer/Producer bit could allow us to do
> this by marking the RC MMIO space as "Consumer", but I didn't think
> that strategy was quite working yet.

The first resource is defined like below. It was introduced long time
ago to use with older version of X-Gene ECAM quirks.
Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )

The resource discovered during booting will be like following:
[ 0.728117] ACPI: MCFG table detected, 1 entries
[ 0.735330] ACPI: Power Resource [SCVR] (on)
[ 0.767478] ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
[ 0.774013] acpi PNP0A08:00: _OSC: OS supports [ExtendedConfig ASPM
ClockPM Segments MSI]
[ 0.782864] acpi PNP0A08:00: _OSC: OS now controls [PCIeHotplug PME
AER PCIeCapability]
[ 0.791331] acpi PNP0A08:00: MCFG quirk: ECAM at [mem
0xe0d0000000-0xe0dfffffff] for [bus 00-ff] with xgene_v1_pcie_ecam_ops
[ 0.803207] acpi PNP0A08:00: ECAM at [mem
0xe0d0000000-0xe0dfffffff] for [bus 00-ff]
[ 0.811399] Remapped I/O 0x000000e010000000 to [io 0x0000-0xffff window]
[ 0.818678] PCI host bridge to bus 0000:00
[ 0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
[ 0.830257] pci_bus 0000:00: root bus resource [io 0x0000-0xffff
window] (bus address [0x10000000-0x1000ffff])
[ 0.840917] pci_bus 0000:00: root bus resource [mem
0xe040000000-0xe07fffffff window] (bus address
[0x40000000-0x7fffffff])
[ 0.852675] pci_bus 0000:00: root bus resource [mem
0xf000000000-0xffffffffff window]
[ 0.860950] pci_bus 0000:00: root bus resource [bus 00-ff]
[ 0.866761] pci 0000:00:00.0: [10e8:e004] type 01 class 0x060400
[ 0.873175] pci 0000:00:00.0: supports D1 D2
[ 0.877980] pci 0000:01:00.0: [15b3:1003] type 00 class 0x020000
[ 0.884597] pci 0000:01:00.0: reg 0x10: [mem 0xe040000000-0xe0400fffff 64bit]
[ 0.892337] pci 0000:01:00.0: reg 0x18: [mem
0xe042000000-0xe043ffffff 64bit pref]
[ 0.900694] pci 0000:01:00.0: reg 0x30: [mem 0xfff00000-0xffffffff pref]
[ 0.923853] pci_bus 0000:00: on NUMA node 0
[ 0.928269] pci 0000:00:00.0: BAR 15: assigned [mem
0xf000000000-0xf001ffffff 64bit pref]
[ 0.936908] pci 0000:00:00.0: BAR 14: assigned [mem
0xe040000000-0xe0401fffff]
[ 0.944539] pci 0000:01:00.0: BAR 2: assigned [mem
0xf000000000-0xf001ffffff 64bit pref]
[ 0.953210] pci 0000:01:00.0: BAR 0: assigned [mem
0xe040000000-0xe0400fffff 64bit]
[ 0.961430] pci 0000:01:00.0: BAR 6: assigned [mem
0xe040100000-0xe0401fffff pref]
[ 0.969438] pci 0000:00:00.0: PCI bridge to [bus 01]
[ 0.974690] pci 0000:00:00.0: bridge window [mem 0xe040000000-0xe0401fffff]
[ 0.982231] pci 0000:00:00.0: bridge window [mem
0xf000000000-0xf001ffffff 64bit pref]

>
>> I have tested a modified version of these quirks using this to
>> get the CSR base and it works on the 3 different platforms I have
>> access to.
>>
>> static int xgene_pcie_get_csr(struct device *dev, struct resource *r)
>> {
>> struct acpi_device *adev = to_acpi_device(dev);
>> unsigned long flags;
>> struct list_head list;
>> struct resource_entry *entry;
>> int ret;
>>
>> INIT_LIST_HEAD(&list);
>> flags = IORESOURCE_MEM;
>> ret = acpi_dev_get_resources(adev, &list,
>> acpi_dev_filter_resource_type_cb,
>> (void *)flags);
>> if (ret < 0) {
>> dev_err(dev, "failed to parse _CRS, error: %d\n", ret);
>> return ret;
>> } else if (ret == 0) {
>> dev_err(dev, "no memory resources present in _CRS\n");
>> return -EINVAL;
>> }
>>
>> entry = list_first_entry(&list, struct resource_entry, node);
>> *r = *entry->res;
>> acpi_dev_free_resource_list(&list);
>> return 0;
>> }
>
> The code above is identical to acpi_get_rc_addr(), which is used in
> the acpi_get_rc_resources() path by the other quirks. Can you use
> that path, too, instead of reimplementing it here?

I will post a new version using acpi_get_rc_resources and includes
other changes that you suggested.

Regards,
Duc Dang.

2016-12-01 22:47:36

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

Thanks Duc! I will test quickly if you have it today :)

--
Computer Architect | Sent from my 64-bit #ARM Powered phone

> On Dec 1, 2016, at 17:10, Duc Dang <[email protected]> wrote:
>
>> On Thu, Dec 1, 2016 at 11:41 AM, Bjorn Helgaas <[email protected]> wrote:
>>> On Thu, Dec 01, 2016 at 02:20:53PM -0500, Mark Salter wrote:
>>>> On Thu, 2016-12-01 at 12:33 -0600, Bjorn Helgaas wrote:
>>>> On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:
>>
>>>>> +static struct resource xgene_v1_csr_res[] = {
>>>>> + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
>>>>> + [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
>>>>> + [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
>>>>> + [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
>>>>> + [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
>>>> I assume these ranges are not the actual ECAM space, right?
>>>> If they *were* ECAM, I assume you would have included them in the
>>>> quirk itself in the mcfg_quirks[] table.
>>>
>>> These are base addresses for some RC mmio registers.
>>>>
>>>>>
>>>>> +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
>>>>> +{
>>>>> + struct acpi_device *adev = to_acpi_device(cfg->parent);
>>>>> + struct acpi_pci_root *root = acpi_driver_data(adev);
>>>>> + struct device *dev = cfg->parent;
>>>>> + struct xgene_pcie_port *port;
>>>>> + struct resource *csr;
>>>>> +
>>>>> + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
>>>>> + if (!port)
>>>>> + return -ENOMEM;
>>>>> +
>>>>> + csr = &xgene_v1_csr_res[root->segment];
>>>> This makes me nervous because root->segment comes from the ACPI _SEG,
>>>> and if firmware gives us junk in _SEG, we will reference something in
>>>> the weeds.
>>>
>>> The SoC provide some number of RC bridges, each with a different base
>>> for some mmio registers. Even if segment is legitimate in MCFG, there
>>> is still a problem if a platform doesn't use the segment ordering
>>> implied by the code. But the PNP0A03 _CRS does have this base address
>>> as the first memory resource, so we could get it from there and not
>>> have hard-coded addresses and implied ording in the quirk code.
>>
>> I'm confused. Doesn't the current code treat every item in PNP0A03
>> _CRS as a window? Do you mean the first resource is handled
>> differently somehow? The Consumer/Producer bit could allow us to do
>> this by marking the RC MMIO space as "Consumer", but I didn't think
>> that strategy was quite working yet.
>
> The first resource is defined like below. It was introduced long time
> ago to use with older version of X-Gene ECAM quirks.
> Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )
>
> The resource discovered during booting will be like following:
> [ 0.728117] ACPI: MCFG table detected, 1 entries
> [ 0.735330] ACPI: Power Resource [SCVR] (on)
> [ 0.767478] ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> [ 0.774013] acpi PNP0A08:00: _OSC: OS supports [ExtendedConfig ASPM
> ClockPM Segments MSI]
> [ 0.782864] acpi PNP0A08:00: _OSC: OS now controls [PCIeHotplug PME
> AER PCIeCapability]
> [ 0.791331] acpi PNP0A08:00: MCFG quirk: ECAM at [mem
> 0xe0d0000000-0xe0dfffffff] for [bus 00-ff] with xgene_v1_pcie_ecam_ops
> [ 0.803207] acpi PNP0A08:00: ECAM at [mem
> 0xe0d0000000-0xe0dfffffff] for [bus 00-ff]
> [ 0.811399] Remapped I/O 0x000000e010000000 to [io 0x0000-0xffff window]
> [ 0.818678] PCI host bridge to bus 0000:00
> [ 0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
> [ 0.830257] pci_bus 0000:00: root bus resource [io 0x0000-0xffff
> window] (bus address [0x10000000-0x1000ffff])
> [ 0.840917] pci_bus 0000:00: root bus resource [mem
> 0xe040000000-0xe07fffffff window] (bus address
> [0x40000000-0x7fffffff])
> [ 0.852675] pci_bus 0000:00: root bus resource [mem
> 0xf000000000-0xffffffffff window]
> [ 0.860950] pci_bus 0000:00: root bus resource [bus 00-ff]
> [ 0.866761] pci 0000:00:00.0: [10e8:e004] type 01 class 0x060400
> [ 0.873175] pci 0000:00:00.0: supports D1 D2
> [ 0.877980] pci 0000:01:00.0: [15b3:1003] type 00 class 0x020000
> [ 0.884597] pci 0000:01:00.0: reg 0x10: [mem 0xe040000000-0xe0400fffff 64bit]
> [ 0.892337] pci 0000:01:00.0: reg 0x18: [mem
> 0xe042000000-0xe043ffffff 64bit pref]
> [ 0.900694] pci 0000:01:00.0: reg 0x30: [mem 0xfff00000-0xffffffff pref]
> [ 0.923853] pci_bus 0000:00: on NUMA node 0
> [ 0.928269] pci 0000:00:00.0: BAR 15: assigned [mem
> 0xf000000000-0xf001ffffff 64bit pref]
> [ 0.936908] pci 0000:00:00.0: BAR 14: assigned [mem
> 0xe040000000-0xe0401fffff]
> [ 0.944539] pci 0000:01:00.0: BAR 2: assigned [mem
> 0xf000000000-0xf001ffffff 64bit pref]
> [ 0.953210] pci 0000:01:00.0: BAR 0: assigned [mem
> 0xe040000000-0xe0400fffff 64bit]
> [ 0.961430] pci 0000:01:00.0: BAR 6: assigned [mem
> 0xe040100000-0xe0401fffff pref]
> [ 0.969438] pci 0000:00:00.0: PCI bridge to [bus 01]
> [ 0.974690] pci 0000:00:00.0: bridge window [mem 0xe040000000-0xe0401fffff]
> [ 0.982231] pci 0000:00:00.0: bridge window [mem
> 0xf000000000-0xf001ffffff 64bit pref]
>
>>
>>> I have tested a modified version of these quirks using this to
>>> get the CSR base and it works on the 3 different platforms I have
>>> access to.
>>>
>>> static int xgene_pcie_get_csr(struct device *dev, struct resource *r)
>>> {
>>> struct acpi_device *adev = to_acpi_device(dev);
>>> unsigned long flags;
>>> struct list_head list;
>>> struct resource_entry *entry;
>>> int ret;
>>>
>>> INIT_LIST_HEAD(&list);
>>> flags = IORESOURCE_MEM;
>>> ret = acpi_dev_get_resources(adev, &list,
>>> acpi_dev_filter_resource_type_cb,
>>> (void *)flags);
>>> if (ret < 0) {
>>> dev_err(dev, "failed to parse _CRS, error: %d\n", ret);
>>> return ret;
>>> } else if (ret == 0) {
>>> dev_err(dev, "no memory resources present in _CRS\n");
>>> return -EINVAL;
>>> }
>>>
>>> entry = list_first_entry(&list, struct resource_entry, node);
>>> *r = *entry->res;
>>> acpi_dev_free_resource_list(&list);
>>> return 0;
>>> }
>>
>> The code above is identical to acpi_get_rc_addr(), which is used in
>> the acpi_get_rc_resources() path by the other quirks. Can you use
>> that path, too, instead of reimplementing it here?
>
> I will post a new version using acpi_get_rc_resources and includes
> other changes that you suggested.
>
> Regards,
> Duc Dang.

2016-12-01 23:07:41

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Thu, Dec 01, 2016 at 02:10:10PM -0800, Duc Dang wrote:
> On Thu, Dec 1, 2016 at 11:41 AM, Bjorn Helgaas <[email protected]> wrote:
> > On Thu, Dec 01, 2016 at 02:20:53PM -0500, Mark Salter wrote:
> >> On Thu, 2016-12-01 at 12:33 -0600, Bjorn Helgaas wrote:
> >> > On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:
> >
> >> > > +static struct resource xgene_v1_csr_res[] = {
> >> > > + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
> >> > > + [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
> >> > > + [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
> >> > > + [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
> >> > > + [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
> >> > I assume these ranges are not the actual ECAM space, right?
> >> > If they *were* ECAM, I assume you would have included them in the
> >> > quirk itself in the mcfg_quirks[] table.
> >>
> >> These are base addresses for some RC mmio registers.
> >> >
> >> > >
> >> > > +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
> >> > > +{
> >> > > + struct acpi_device *adev = to_acpi_device(cfg->parent);
> >> > > + struct acpi_pci_root *root = acpi_driver_data(adev);
> >> > > + struct device *dev = cfg->parent;
> >> > > + struct xgene_pcie_port *port;
> >> > > + struct resource *csr;
> >> > > +
> >> > > + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
> >> > > + if (!port)
> >> > > + return -ENOMEM;
> >> > > +
> >> > > + csr = &xgene_v1_csr_res[root->segment];
> >> > This makes me nervous because root->segment comes from the ACPI _SEG,
> >> > and if firmware gives us junk in _SEG, we will reference something in
> >> > the weeds.
> >>
> >> The SoC provide some number of RC bridges, each with a different base
> >> for some mmio registers. Even if segment is legitimate in MCFG, there
> >> is still a problem if a platform doesn't use the segment ordering
> >> implied by the code. But the PNP0A03 _CRS does have this base address
> >> as the first memory resource, so we could get it from there and not
> >> have hard-coded addresses and implied ording in the quirk code.
> >
> > I'm confused. Doesn't the current code treat every item in PNP0A03
> > _CRS as a window? Do you mean the first resource is handled
> > differently somehow? The Consumer/Producer bit could allow us to do
> > this by marking the RC MMIO space as "Consumer", but I didn't think
> > that strategy was quite working yet.
>
> The first resource is defined like below. It was introduced long time
> ago to use with older version of X-Gene ECAM quirks.
> Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )

> [ 0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]

I think this is wrong. The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
is available for use by devices on bus 0000:00, but I think you're
saying it is consumed by the bridge itself, not forwarded down to PCI.

What's in your /proc/iomem? I see that your quirks do call
devm_ioremap_resource(), which calls devm_request_mem_region()
internally, so the driver does at least request that region, which
should keep us from assigning it to PCI devices.

But it still isn't quite right to tell the PCI core that the region is
available on the root bus.

Bjorn

2016-12-01 23:22:49

by Duc Dang

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Thu, Dec 1, 2016 at 3:07 PM, Bjorn Helgaas <[email protected]> wrote:
> On Thu, Dec 01, 2016 at 02:10:10PM -0800, Duc Dang wrote:
>> On Thu, Dec 1, 2016 at 11:41 AM, Bjorn Helgaas <[email protected]> wrote:
>> > On Thu, Dec 01, 2016 at 02:20:53PM -0500, Mark Salter wrote:
>> >> On Thu, 2016-12-01 at 12:33 -0600, Bjorn Helgaas wrote:
>> >> > On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:
>> >
>> >> > > +static struct resource xgene_v1_csr_res[] = {
>> >> > > + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
>> >> > > + [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
>> >> > > + [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
>> >> > > + [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
>> >> > > + [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
>> >> > I assume these ranges are not the actual ECAM space, right?
>> >> > If they *were* ECAM, I assume you would have included them in the
>> >> > quirk itself in the mcfg_quirks[] table.
>> >>
>> >> These are base addresses for some RC mmio registers.
>> >> >
>> >> > >
>> >> > > +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
>> >> > > +{
>> >> > > + struct acpi_device *adev = to_acpi_device(cfg->parent);
>> >> > > + struct acpi_pci_root *root = acpi_driver_data(adev);
>> >> > > + struct device *dev = cfg->parent;
>> >> > > + struct xgene_pcie_port *port;
>> >> > > + struct resource *csr;
>> >> > > +
>> >> > > + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
>> >> > > + if (!port)
>> >> > > + return -ENOMEM;
>> >> > > +
>> >> > > + csr = &xgene_v1_csr_res[root->segment];
>> >> > This makes me nervous because root->segment comes from the ACPI _SEG,
>> >> > and if firmware gives us junk in _SEG, we will reference something in
>> >> > the weeds.
>> >>
>> >> The SoC provide some number of RC bridges, each with a different base
>> >> for some mmio registers. Even if segment is legitimate in MCFG, there
>> >> is still a problem if a platform doesn't use the segment ordering
>> >> implied by the code. But the PNP0A03 _CRS does have this base address
>> >> as the first memory resource, so we could get it from there and not
>> >> have hard-coded addresses and implied ording in the quirk code.
>> >
>> > I'm confused. Doesn't the current code treat every item in PNP0A03
>> > _CRS as a window? Do you mean the first resource is handled
>> > differently somehow? The Consumer/Producer bit could allow us to do
>> > this by marking the RC MMIO space as "Consumer", but I didn't think
>> > that strategy was quite working yet.
>>
>> The first resource is defined like below. It was introduced long time
>> ago to use with older version of X-Gene ECAM quirks.
>> Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )
>
>> [ 0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
>
> I think this is wrong. The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
> is available for use by devices on bus 0000:00, but I think you're
> saying it is consumed by the bridge itself, not forwarded down to PCI.
>
> What's in your /proc/iomem? I see that your quirks do call
> devm_ioremap_resource(), which calls devm_request_mem_region()
> internally, so the driver does at least request that region, which
> should keep us from assigning it to PCI devices.
>
> But it still isn't quite right to tell the PCI core that the region is
> available on the root bus.

This is /proc/iomem output on my Mustang board. The 64K "PCIe CSR"
region is consumed completely.
1f2b0000-1f2bffff : PCI Bus 0000:00
1f2b0000-1f2bffff : PCIe CSR

e040000000-e07fffffff : PCI Bus 0000:00
e040000000-e0401fffff : PCI Bus 0000:01
e040000000-e0400fffff : 0000:01:00.0
e040000000-e0400fffff : mlx4_core
e040100000-e0401fffff : 0000:01:00.0
e0d0000000-e0dfffffff : PCI ECAM
f000000000-ffffffffff : PCI Bus 0000:00
f000000000-f001ffffff : PCI Bus 0000:01
f000000000-f001ffffff : 0000:01:00.0
f000000000-f001ffffff : mlx4_core

Using hard-coded resources for mmio space make the quirk rely on the
segment number passing from the firmware. Using Mark's method or
acpi_get_rc_resource can discover the mmio space and consume all of
the space, but as you mentioned, it leaves the defect that PCI core
considers the mmio space as available resource for secondary devices
although it will never allocate the mmio space to secondary devices as
the RC already reserves and consumes all of the space.

>
> Bjorn
>
Regards,
Duc Dang.

2016-12-02 02:35:29

by Duc Dang

[permalink] [raw]
Subject: [PATCH v4 1/1] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

PCIe controllers in X-Gene SoCs is not ECAM compliant: software
needs to configure additional controller's register to address
device at bus:dev:function.

The quirk will discover controller MMIO register space and configure
controller registers to select and address the target secondary device.

The quirk will only be applied for X-Gene PCIe MCFG table with
OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).

Signed-off-by: Duc Dang <[email protected]>
---
v4:
- Rebase on top of pci/ecam tree
- Introduce xgene_get_csr_resource to discover MMIO register space
(per Mark's test code).
- Refactor .init functions for pci_ecam_ops to reuse common code
- Introduce pcie_bus_to_port to extract X-Gene 'port' information
from 'bus' device.
- Kconfig/Makefile changes to only compile required code for ACPI
v3:
- Rebase on top of pci/ecam-v6 tree.
- Use DEFINE_RES_MEM_NAMED to declare controller register space
with name "PCIe CSR"
v2:
RFC v2: https://patchwork.ozlabs.org/patch/686846/
v1:
RFC v1: https://patchwork.kernel.org/patch/9337115/
---
drivers/acpi/pci_mcfg.c | 25 +++++++++
drivers/pci/host/Kconfig | 4 +-
drivers/pci/host/Makefile | 2 +-
drivers/pci/host/pci-xgene.c | 127 ++++++++++++++++++++++++++++++++++++++++---
include/linux/pci-ecam.h | 2 +
5 files changed, 150 insertions(+), 10 deletions(-)

diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
index d34d196..7319188 100644
--- a/drivers/acpi/pci_mcfg.c
+++ b/drivers/acpi/pci_mcfg.c
@@ -108,6 +108,31 @@ struct mcfg_fixup {
THUNDER_ECAM_QUIRK(2, 11),
THUNDER_ECAM_QUIRK(2, 12),
THUNDER_ECAM_QUIRK(2, 13),
+
+#define XGENE_V1_ECAM_MCFG(rev, seg) \
+ {"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \
+ &xgene_v1_pcie_ecam_ops }
+#define XGENE_V2_ECAM_MCFG(rev, seg) \
+ {"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \
+ &xgene_v2_pcie_ecam_ops }
+ /* X-Gene SoC with v1 PCIe controller */
+ XGENE_V1_ECAM_MCFG(1, 0),
+ XGENE_V1_ECAM_MCFG(1, 1),
+ XGENE_V1_ECAM_MCFG(1, 2),
+ XGENE_V1_ECAM_MCFG(1, 3),
+ XGENE_V1_ECAM_MCFG(1, 4),
+ XGENE_V1_ECAM_MCFG(2, 0),
+ XGENE_V1_ECAM_MCFG(2, 1),
+ XGENE_V1_ECAM_MCFG(2, 2),
+ XGENE_V1_ECAM_MCFG(2, 3),
+ XGENE_V1_ECAM_MCFG(2, 4),
+ /* X-Gene SoC with v2.1 PCIe controller */
+ XGENE_V2_ECAM_MCFG(3, 0),
+ XGENE_V2_ECAM_MCFG(3, 1),
+ /* X-Gene SoC with v2.2 PCIe controller */
+ XGENE_V2_ECAM_MCFG(4, 0),
+ XGENE_V2_ECAM_MCFG(4, 1),
+ XGENE_V2_ECAM_MCFG(4, 2),
};

static char mcfg_oem_id[ACPI_OEM_ID_SIZE];
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index c983892..1fb5518 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -133,8 +133,8 @@ config PCIE_XILINX

config PCI_XGENE
bool "X-Gene PCIe controller"
- depends on ARCH_XGENE
- depends on OF
+ depends on ARM64
+ depends on OF || (ACPI && PCI_QUIRKS)
select PCIEPORTBUS
help
Say Y here if you want internal PCI support on APM X-Gene SoC.
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index 639494a..6cc84b4 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -15,7 +15,7 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o
obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
obj-$(CONFIG_PCIE_XILINX_NWL) += pcie-xilinx-nwl.o
-obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
+obj-$(CONFIG_ARM64) += pci-xgene.o
obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
index 1de23d7..6edcac7 100644
--- a/drivers/pci/host/pci-xgene.c
+++ b/drivers/pci/host/pci-xgene.c
@@ -27,6 +27,8 @@
#include <linux/of_irq.h>
#include <linux/of_pci.h>
#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
#include <linux/platform_device.h>
#include <linux/slab.h>

@@ -64,7 +66,9 @@
/* PCIe IP version */
#define XGENE_PCIE_IP_VER_UNKN 0
#define XGENE_PCIE_IP_VER_1 1
+#define XGENE_PCIE_IP_VER_2 2

+#if defined(CONFIG_PCI_XGENE) || (defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS))
struct xgene_pcie_port {
struct device_node *node;
struct device *dev;
@@ -91,13 +95,24 @@ static inline u32 pcie_bar_low_val(u32 addr, u32 flags)
return (addr & PCI_BASE_ADDRESS_MEM_MASK) | flags;
}

+static inline struct xgene_pcie_port *pcie_bus_to_port(struct pci_bus *bus)
+{
+ struct pci_config_window *cfg;
+
+ if (acpi_disabled)
+ return (struct xgene_pcie_port *)(bus->sysdata);
+
+ cfg = bus->sysdata;
+ return (struct xgene_pcie_port *)(cfg->priv);
+}
+
/*
* When the address bit [17:16] is 2'b01, the Configuration access will be
* treated as Type 1 and it will be forwarded to external PCIe device.
*/
static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
{
- struct xgene_pcie_port *port = bus->sysdata;
+ struct xgene_pcie_port *port = pcie_bus_to_port(bus);

if (bus->number >= (bus->primary + 1))
return port->cfg_base + AXI_EP_CFG_ACCESS;
@@ -111,7 +126,7 @@ static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
*/
static void xgene_pcie_set_rtdid_reg(struct pci_bus *bus, uint devfn)
{
- struct xgene_pcie_port *port = bus->sysdata;
+ struct xgene_pcie_port *port = pcie_bus_to_port(bus);
unsigned int b, d, f;
u32 rtdid_val = 0;

@@ -158,7 +173,7 @@ static void __iomem *xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
int where, int size, u32 *val)
{
- struct xgene_pcie_port *port = bus->sysdata;
+ struct xgene_pcie_port *port = pcie_bus_to_port(bus);

if (pci_generic_config_read32(bus, devfn, where & ~0x3, 4, val) !=
PCIBIOS_SUCCESSFUL)
@@ -182,13 +197,104 @@ static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,

return PCIBIOS_SUCCESSFUL;
}
+#endif

-static struct pci_ops xgene_pcie_ops = {
- .map_bus = xgene_pcie_map_bus,
- .read = xgene_pcie_config_read32,
- .write = pci_generic_config_write32,
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+static int xgene_get_csr_resource(struct acpi_device *adev,
+ struct resource *res)
+{
+ struct device *dev = &adev->dev;
+ struct resource_entry *entry;
+ struct list_head list;
+ unsigned long flags;
+ int ret;
+
+ INIT_LIST_HEAD(&list);
+ flags = IORESOURCE_MEM;
+ ret = acpi_dev_get_resources(adev, &list,
+ acpi_dev_filter_resource_type_cb,
+ (void *) flags);
+ if (ret < 0) {
+ dev_err(dev, "failed to parse _CRS method, error code %d\n",
+ ret);
+ return ret;
+ }
+
+ if (ret == 0) {
+ dev_err(dev, "no IO and memory resources present in _CRS\n");
+ return -EINVAL;
+ }
+
+ entry = list_first_entry(&list, struct resource_entry, node);
+ *res = *entry->res;
+ acpi_dev_free_resource_list(&list);
+ return 0;
+}
+
+static int xgene_pcie_ecam_init(struct pci_config_window *cfg, u32 ipversion)
+{
+ struct acpi_device *adev = to_acpi_device(cfg->parent);
+ struct device *dev = cfg->parent;
+ struct xgene_pcie_port *port;
+ struct resource csr;
+ int ret;
+
+ port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return -ENOMEM;
+
+ ret = xgene_get_csr_resource(adev, &csr);
+ if (ret) {
+ dev_err(dev, "can't get CSR resource\n");
+ kfree(port);
+ return ret;
+ }
+ port->csr_base = devm_ioremap_resource(dev, &csr);
+ if (IS_ERR(port->csr_base)) {
+ kfree(port);
+ return -ENOMEM;
+ }
+
+ port->cfg_base = cfg->win;
+ port->version = ipversion;
+
+ cfg->priv = port;
+
+ return 0;
+}
+
+static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
+{
+ return xgene_pcie_ecam_init(cfg, XGENE_PCIE_IP_VER_1);
+}
+
+struct pci_ecam_ops xgene_v1_pcie_ecam_ops = {
+ .bus_shift = 16,
+ .init = xgene_v1_pcie_ecam_init,
+ .pci_ops = {
+ .map_bus = xgene_pcie_map_bus,
+ .read = xgene_pcie_config_read32,
+ .write = pci_generic_config_write,
+ }
+};
+
+static int xgene_v2_pcie_ecam_init(struct pci_config_window *cfg)
+{
+ return xgene_pcie_ecam_init(cfg, XGENE_PCIE_IP_VER_2);
+}
+
+struct pci_ecam_ops xgene_v2_pcie_ecam_ops = {
+ .bus_shift = 16,
+ .init = xgene_v2_pcie_ecam_init,
+ .pci_ops = {
+ .map_bus = xgene_pcie_map_bus,
+ .read = xgene_pcie_config_read32,
+ .write = pci_generic_config_write,
+ }
};
+#endif

+#if defined(CONFIG_PCI_XGENE)
static u64 xgene_pcie_set_ib_mask(struct xgene_pcie_port *port, u32 addr,
u32 flags, u64 size)
{
@@ -521,6 +627,12 @@ static int xgene_pcie_setup(struct xgene_pcie_port *port,
return 0;
}

+static struct pci_ops xgene_pcie_ops = {
+ .map_bus = xgene_pcie_map_bus,
+ .read = xgene_pcie_config_read32,
+ .write = pci_generic_config_write32,
+};
+
static int xgene_pcie_probe_bridge(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -591,3 +703,4 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev)
.probe = xgene_pcie_probe_bridge,
};
builtin_platform_driver(xgene_pcie_driver);
+#endif
diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
index 00eb8eb..f0d2b94 100644
--- a/include/linux/pci-ecam.h
+++ b/include/linux/pci-ecam.h
@@ -64,6 +64,8 @@ void __iomem *pci_ecam_map_bus(struct pci_bus *bus, unsigned int devfn,
extern struct pci_ecam_ops hisi_pcie_ops; /* HiSilicon */
extern struct pci_ecam_ops thunder_pem_ecam_ops; /* Cavium ThunderX 1.x & 2.x */
extern struct pci_ecam_ops pci_thunder_ecam_ops; /* Cavium ThunderX 1.x */
+extern struct pci_ecam_ops xgene_v1_pcie_ecam_ops; /* APM X-Gene PCIe v1 */
+extern struct pci_ecam_ops xgene_v2_pcie_ecam_ops; /* APM X-Gene PCIe v2.x */
#endif

#ifdef CONFIG_PCI_HOST_GENERIC
--
1.9.1

2016-12-02 02:52:56

by Duc Dang

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Thu, Dec 1, 2016 at 10:33 AM, Bjorn Helgaas <[email protected]> wrote:
> Hi Duc,
>
> On Wed, Nov 30, 2016 at 03:42:53PM -0800, Duc Dang wrote:
>> PCIe controllers in X-Gene SoCs is not ECAM compliant: software
>> needs to configure additional controller's register to address
>> device at bus:dev:function.
>>
>> The quirk will only be applied for X-Gene PCIe MCFG table with
>> OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).
>>
>> The quirk declares the X-Gene PCIe controller register space as 64KB
>> fixed memory resource with name "PCIe CSR". This name will be showed
>> next to the resource range in the output of "cat /proc/iomem".
>>
>> Signed-off-by: Duc Dang <[email protected]>
>> ---
>> v3:
>> - Rebase on top of pci/ecam-v6 tree.
>> - Use DEFINE_RES_MEM_NAMED to declare controller register space
>> with name "PCIe CSR"
>> v2:
>> RFC v2: https://patchwork.ozlabs.org/patch/686846/
>> v1:
>> RFC v1: https://patchwork.kernel.org/patch/9337115/
>>
>> drivers/acpi/pci_mcfg.c | 31 ++++++++
>> drivers/pci/host/pci-xgene.c | 165 ++++++++++++++++++++++++++++++++++++++++++-
>> include/linux/pci-ecam.h | 7 ++
>> 3 files changed, 200 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
>> index ac21db3..eb6125b 100644
>> --- a/drivers/acpi/pci_mcfg.c
>> +++ b/drivers/acpi/pci_mcfg.c
>> @@ -57,6 +57,37 @@ struct mcfg_fixup {
>> { "QCOM ", "QDF2432 ", 1, 5, MCFG_BUS_ANY, &pci_32b_ops },
>> { "QCOM ", "QDF2432 ", 1, 6, MCFG_BUS_ANY, &pci_32b_ops },
>> { "QCOM ", "QDF2432 ", 1, 7, MCFG_BUS_ANY, &pci_32b_ops },
>> +
>> +#ifdef CONFIG_PCI_XGENE
>
> As you've no doubt noticed, I'm proposing to add these quirks without
> CONFIG_PCI_XGENE, so we don't have to select each device when building
> a generic ACPI kernel.
>
> I'm also proposing some Kconfig and Makefile changes so we don't build
> the platform driver part in a generic ACPI kernel (unless we *also*
> explicitly select the platform driver).
>
> Here's an example:
> https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/commit/?h=pci/ecam&id=f80edf4d6c05

I made similar changes in v4 patch. The ECAM quirk will be built when
ACPI and PCI_QUIRKS are enabled.

When building for DT only, the ECAM quirk won't be compiled.
>
>> +#define XGENE_V1_ECAM_MCFG(rev, seg) \
>> + {"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \
>> + &xgene_v1_pcie_ecam_ops }
>> +#define XGENE_V2_1_ECAM_MCFG(rev, seg) \
>> + {"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \
>> + &xgene_v2_1_pcie_ecam_ops }
>> +#define XGENE_V2_2_ECAM_MCFG(rev, seg) \
>> + {"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \
>> + &xgene_v2_2_pcie_ecam_ops }
>> +
>> + /* X-Gene SoC with v1 PCIe controller */
>> + XGENE_V1_ECAM_MCFG(1, 0),
>> + XGENE_V1_ECAM_MCFG(1, 1),
>
>> @@ -64,6 +66,7 @@
>> /* PCIe IP version */
>> #define XGENE_PCIE_IP_VER_UNKN 0
>> #define XGENE_PCIE_IP_VER_1 1
>> +#define XGENE_PCIE_IP_VER_2 2
>
> This isn't used anywhere, which makes me wonder whether it's worth
> keeping it.

V2 controller will use this XGENE_PCIE_IP_VER_2 (port->version =
XGENE_PCIE_IP_VER_2). This will be used to indicate that the
controller is V2, and to enable configuration request retry status
feature (by not disable it like V1 controller).

>
>> static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
>> {
>> - struct xgene_pcie_port *port = bus->sysdata;
>> + struct pci_config_window *cfg;
>> + struct xgene_pcie_port *port;
>> +
>> + if (acpi_disabled)
>> + port = bus->sysdata;
>> + else {
>> + cfg = bus->sysdata;
>> + port = cfg->priv;
>> + }
>
> I would really, really like to figure out a way to get rid of these
> "if (acpi_disabled)" checks sprinkled through here. Is there any way
> we can set up bus->sysdata to be the same, regardless of whether we're
> using this as a platform driver or an ACPI quirk?

Right now, I created a inline function to extract xgene_pcie_port from
pci_bus. In order to get rid of acpi_disabled, I will need to make
sysdata in DT case also point to pci_config_window structure, which
means I will need to convert and test the DT driver to use ecam ops.
It is a separate patch itself. So I think I should do it at later time
(after this ECAM quirk patch). I hope you are ok with this.

>
>> +#ifdef CONFIG_ACPI
>
> You've probably noticed that I've been using
>
> #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
>
> in this situation, mostly to make it clear that this is part of a
> workaround. I don't want people to blindly copy this stuff without
> realizing that it's a workaround for a hardware issue.

Yes, I used defined(CONFIG_PCI_QUIRKS) in v4 patch as well.
>
>> +static struct resource xgene_v1_csr_res[] = {
>> + [0] = DEFINE_RES_MEM_NAMED(0x1f2b0000UL, SZ_64K, "PCIe CSR"),
>> + [1] = DEFINE_RES_MEM_NAMED(0x1f2c0000UL, SZ_64K, "PCIe CSR"),
>> + [2] = DEFINE_RES_MEM_NAMED(0x1f2d0000UL, SZ_64K, "PCIe CSR"),
>> + [3] = DEFINE_RES_MEM_NAMED(0x1f500000UL, SZ_64K, "PCIe CSR"),
>> + [4] = DEFINE_RES_MEM_NAMED(0x1f510000UL, SZ_64K, "PCIe CSR"),
>
> I assume these ranges are not the actual ECAM space, right?
> If they *were* ECAM, I assume you would have included them in the
> quirk itself in the mcfg_quirks[] table.

Yes, as Mark also pointed out. These are MMIO space for controller registers.

>
>> +static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg)
>> +{
>> + struct acpi_device *adev = to_acpi_device(cfg->parent);
>> + struct acpi_pci_root *root = acpi_driver_data(adev);
>> + struct device *dev = cfg->parent;
>> + struct xgene_pcie_port *port;
>> + struct resource *csr;
>> +
>> + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
>> + if (!port)
>> + return -ENOMEM;
>> +
>> + csr = &xgene_v1_csr_res[root->segment];
>
> This makes me nervous because root->segment comes from the ACPI _SEG,
> and if firmware gives us junk in _SEG, we will reference something in
> the weeds.

I use Mark's approach in v4 patch (discover the MMIO space using
acpi_dev_get_resources. Both approach have pros and cons. I can also
fallback to hard-coded resource array if you want to, but as you
mentioned right above, we will need to rely on firmware for correct
_SEG information.

I need to define the function (xgene_get_csr_resource()) inside
pci-xgene.c to duplicate the code of acpi_get_rc_addr. The reason is
X-Gene firmware does not have a dedicate PNP0C02 node to declare the
resource, and if I use acpi_get_rc_resources() with "PNP0A08", I got
error due to acpi_bus_get_device() returns error.

>
>> + port->csr_base = devm_ioremap_resource(dev, csr);
>> + if (IS_ERR(port->csr_base)) {
>> + kfree(port);
>> + return -ENOMEM;
>> + }
>> +
>> + port->cfg_base = cfg->win;
>> + port->version = XGENE_PCIE_IP_VER_1;
>> +
>> + cfg->priv = port;
>
> All these init functions are almost identical. Can we factor this out
> by having wrappers that do nothing more than pass in the table and
> version, and put the kzalloc and ioremap in a shared back-end?

I refactor-ed these .init functions. And as a result, there are only 2
ecam ops left: xgene_v1_pcie_ecam_ops and xgene_v2_pcie_ecam_ops.

>
> We're so close I can taste it! I can't wait to see all this work come
> to fruition.
>
> Bjorn
Regards,
Duc Dang.

2016-12-02 04:08:32

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

Hi Bjorn, Duc, Mark,

I switched my brain to the on mode and went and read some specs, and a few
tables, so here's my 2 cents on this...

On 12/01/2016 06:22 PM, Duc Dang wrote:
> On Thu, Dec 1, 2016 at 3:07 PM, Bjorn Helgaas <[email protected]> wrote:
>> On Thu, Dec 01, 2016 at 02:10:10PM -0800, Duc Dang wrote:

>>>>> The SoC provide some number of RC bridges, each with a different base
>>>>> for some mmio registers. Even if segment is legitimate in MCFG, there
>>>>> is still a problem if a platform doesn't use the segment ordering
>>>>> implied by the code. But the PNP0A03 _CRS does have this base address
>>>>> as the first memory resource, so we could get it from there and not
>>>>> have hard-coded addresses and implied ording in the quirk code.
>>>>
>>>> I'm confused. Doesn't the current code treat every item in PNP0A03
>>>> _CRS as a window? Do you mean the first resource is handled
>>>> differently somehow? The Consumer/Producer bit could allow us to do
>>>> this by marking the RC MMIO space as "Consumer", but I didn't think
>>>> that strategy was quite working yet.

Let's see if I summarized this correctly...

1. The MMIO registers for the host bridge itself need to be described
somewhere, especially if we need to find those in a quirk and poke
them. Since those registers are very much part of the bridge device,
it makes sense for them to be in the _CRS for PNP0A08/PNP0A03.

2. The address space covering these registers MUST be described as a
ResourceConsumer in order to avoid accidentally exposing them as
available for use by downstream devices on the PCI bus.

3. The ACPI specification allows for resources of the type "Memory32Fixed".
This is a macro that doesn't have the notion of a producer or consumer.
HOWEVER various interpretations seem to be that this could/should
default to being interpreted as a consumed region.

4. At one point, a regression was added to the kernel:

63f1789ec716 ("x86/PCI/ACPI: Ignore resources consumed by
host bridge itself")

Which lead to a series on conversations about what should happen
for bridge resources (e.g. https://lkml.org/lkml/2015/3/24/962 )

5. This resulted in the following commit reverting point 4:

2c62e8492ed7 ("x86/PCI/ACPI: Make all resources except [io 0xcf8-0xcff]
available on PCI bus")

Which also stated that:

"This solution will also ease the way to consolidate ACPI PCI host
bridge common code from x86, ia64 and ARM64"

End of summary.

So it seems that generally there is an aversion to having bridge resources
be described in this manner and you would like to require that they be
described e.g. using QWordMemory with a ResourceConsumer type?

BUT if we were to do that, it would break existing shipping systems since
there are quirks out there that use this form to find the base CSR:

if (acpi_res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) {
fixed32 = &acpi_res->data.fixed_memory32;
port->csr_base = ioremap(fixed32->address,
fixed32->address_length);
return AE_CTRL_TERMINATE;
}

That's what's shipping in at least RHEL(SA) today, and probably in other
distros. So if we get vendors to take that out, existing stuff will break,
which will have the downside that customers will have to choose between
whether to run a given distro or be able to use upstream kernels. In
that sense, to me, there are shipping platforms out there, which may well
be doing the "wrong" thing, but they are deployed and they are doing it.

Which makes me wonder a couple of things (I think should NOT be done):

1. What would happen if we had both. A FixedMemory32 and the same region
described again using the longer form as a consumed region. I doubt
that's legal, and the current code would still add the region if it
saw the FixedMemory32 first when walking the tree. I don't like it,
but I'm mentioning it in case that leads to some helpful thinking.

2. What would happen if we had a difference policy on arm64 for such
resources. x86 has an "exception" for accessing the config space
using IO port 0xCF8-0xCFF (a fairly reasonable exception!) and
we can make the rules for a new platform (i.e. actually prescribe
exactly what the behavior is, rather than have it not be defined).
This is of course terrible in that existing BIOS vendors and so on
won't necessarily know this when working on ARM ACPI later on.

I don't like either of these obviously. I'm hoping there's some way we
can say that this is tolerated in this one quirk (allow the use of
FixedMemory32 in this case) on the grounds that the driver claims
this bridge region and can be annotated to explain such.

Once you let us know what you prefer, we will go and update the ARM
SBBR to spell out that future platforms should not make this mistake
again. We can prescribe whatever you'd like in terms of how bridge
resources consumed by the bridge are exposed. I have spoken about
this kind of situation within MS in the past, but they didn't have
specific guidance since they don't really tolerate such quirks. I
can, however, consult them before we change the SBBR as well.

>>> The first resource is defined like below. It was introduced long time
>>> ago to use with older version of X-Gene ECAM quirks.
>>> Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )

Indeed. And in the case of m400, it is currently this in shipping systems:

Memory32Fixed (ReadWrite,
0x1F500000, // Address Base
0x00010000, // Address Length
)

The spec isn't clear on whether these are produced or consumed but the
implication is that these are consumed resources in most cases. Not that
this changes any of the above, but one can understand why it happened.

>>> [ 0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
>>
>> I think this is wrong. The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
>> is available for use by devices on bus 0000:00, but I think you're
>> saying it is consumed by the bridge itself, not forwarded down to PCI.

Indeed.

>> What's in your /proc/iomem? I see that your quirks do call
>> devm_ioremap_resource(), which calls devm_request_mem_region()
>> internally, so the driver does at least request that region, which
>> should keep us from assigning it to PCI devices.

I'm hoping you can grant an exception on the grounds that the quirk will
keep the region from actually being used. And then somehow we document
this in the driver.

>> But it still isn't quite right to tell the PCI core that the region is
>> available on the root bus.
>
> This is /proc/iomem output on my Mustang board. The 64K "PCIe CSR"
> region is consumed completely.
> 1f2b0000-1f2bffff : PCI Bus 0000:00
> 1f2b0000-1f2bffff : PCIe CSR
>
> e040000000-e07fffffff : PCI Bus 0000:00
> e040000000-e0401fffff : PCI Bus 0000:01
> e040000000-e0400fffff : 0000:01:00.0
> e040000000-e0400fffff : mlx4_core
> e040100000-e0401fffff : 0000:01:00.0
> e0d0000000-e0dfffffff : PCI ECAM
> f000000000-ffffffffff : PCI Bus 0000:00
> f000000000-f001ffffff : PCI Bus 0000:01
> f000000000-f001ffffff : 0000:01:00.0
> f000000000-f001ffffff : mlx4_core
>
> Using hard-coded resources for mmio space make the quirk rely on the
> segment number passing from the firmware. Using Mark's method or
> acpi_get_rc_resource can discover the mmio space and consume all of
> the space, but as you mentioned, it leaves the defect that PCI core
> considers the mmio space as available resource for secondary devices
> although it will never allocate the mmio space to secondary devices as
> the RC already reserves and consumes all of the space.

Indeed. It's not clean, but perhaps we can get away with it on the
grounds that there are existing systems out there and this won't
be allowed to happen again in the future :)

Jon.

--
Computer Architect | Sent from my Fedora powered laptop

2016-12-02 06:31:58

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

Bjorn,

Although I think the below still applies (that we need to leave that
Memory32Fixed for existing deployments, and this is going to result
in /proc/iomem polution), I've done some more reading of your ecam
tree and the implementation of acpi_get_rc_resources you mentioned,
and in particular how the PNP0C02 devices actually get wired up.

I would like to be able to boot upstream on existing shipping and
deployed machines that are in the field (not to mention our labs), but
there's no reason we can't *also* get APM to add a new vendor specific
PNP0C02 to the ACPI namespace in future firmware updates (for at least
their own Mustang reference boards) matching segment to CSR, as in the
case of the HiSi patches. That might then allow for some later
preference to use that for the CSR rather than getting it from the RC
device. Still, it would be ideal to boot on machines that are shipping
from HPE and others at this moment, so I am still hopeful you'll
at least allow the approach from Duc's v4 for now (4.10).

Another nasty option for later consideration could then be having
the kernel fake up any missing PNP0C02 on existing machines, but
it would need special knowledge of the platform to generate that
so as to handle the problem Mark flagged earlier (segment vs
controller mismatch on some platforms). That could be done with a
DMI quirk that matched on a specific (e.g. HPE) machine. It would
only be needed on "broken" existing machines, and could be added
post-4.10 to clean this up if you really want to do that.

That's all very nasty...

Jon.

On 12/01/2016 11:08 PM, Jon Masters wrote:
> Hi Bjorn, Duc, Mark,
>
> I switched my brain to the on mode and went and read some specs, and a few
> tables, so here's my 2 cents on this...
>
> On 12/01/2016 06:22 PM, Duc Dang wrote:
>> On Thu, Dec 1, 2016 at 3:07 PM, Bjorn Helgaas <[email protected]> wrote:
>>> On Thu, Dec 01, 2016 at 02:10:10PM -0800, Duc Dang wrote:
>
>>>>>> The SoC provide some number of RC bridges, each with a different base
>>>>>> for some mmio registers. Even if segment is legitimate in MCFG, there
>>>>>> is still a problem if a platform doesn't use the segment ordering
>>>>>> implied by the code. But the PNP0A03 _CRS does have this base address
>>>>>> as the first memory resource, so we could get it from there and not
>>>>>> have hard-coded addresses and implied ording in the quirk code.
>>>>>
>>>>> I'm confused. Doesn't the current code treat every item in PNP0A03
>>>>> _CRS as a window? Do you mean the first resource is handled
>>>>> differently somehow? The Consumer/Producer bit could allow us to do
>>>>> this by marking the RC MMIO space as "Consumer", but I didn't think
>>>>> that strategy was quite working yet.
>
> Let's see if I summarized this correctly...
>
> 1. The MMIO registers for the host bridge itself need to be described
> somewhere, especially if we need to find those in a quirk and poke
> them. Since those registers are very much part of the bridge device,
> it makes sense for them to be in the _CRS for PNP0A08/PNP0A03.
>
> 2. The address space covering these registers MUST be described as a
> ResourceConsumer in order to avoid accidentally exposing them as
> available for use by downstream devices on the PCI bus.
>
> 3. The ACPI specification allows for resources of the type "Memory32Fixed".
> This is a macro that doesn't have the notion of a producer or consumer.
> HOWEVER various interpretations seem to be that this could/should
> default to being interpreted as a consumed region.
>
> 4. At one point, a regression was added to the kernel:
>
> 63f1789ec716 ("x86/PCI/ACPI: Ignore resources consumed by
> host bridge itself")
>
> Which lead to a series on conversations about what should happen
> for bridge resources (e.g. https://lkml.org/lkml/2015/3/24/962 )
>
> 5. This resulted in the following commit reverting point 4:
>
> 2c62e8492ed7 ("x86/PCI/ACPI: Make all resources except [io 0xcf8-0xcff]
> available on PCI bus")
>
> Which also stated that:
>
> "This solution will also ease the way to consolidate ACPI PCI host
> bridge common code from x86, ia64 and ARM64"
>
> End of summary.
>
> So it seems that generally there is an aversion to having bridge resources
> be described in this manner and you would like to require that they be
> described e.g. using QWordMemory with a ResourceConsumer type?
>
> BUT if we were to do that, it would break existing shipping systems since
> there are quirks out there that use this form to find the base CSR:
>
> if (acpi_res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) {
> fixed32 = &acpi_res->data.fixed_memory32;
> port->csr_base = ioremap(fixed32->address,
> fixed32->address_length);
> return AE_CTRL_TERMINATE;
> }
>
> That's what's shipping in at least RHEL(SA) today, and probably in other
> distros. So if we get vendors to take that out, existing stuff will break,
> which will have the downside that customers will have to choose between
> whether to run a given distro or be able to use upstream kernels. In
> that sense, to me, there are shipping platforms out there, which may well
> be doing the "wrong" thing, but they are deployed and they are doing it.
>
> Which makes me wonder a couple of things (I think should NOT be done):
>
> 1. What would happen if we had both. A FixedMemory32 and the same region
> described again using the longer form as a consumed region. I doubt
> that's legal, and the current code would still add the region if it
> saw the FixedMemory32 first when walking the tree. I don't like it,
> but I'm mentioning it in case that leads to some helpful thinking.
>
> 2. What would happen if we had a difference policy on arm64 for such
> resources. x86 has an "exception" for accessing the config space
> using IO port 0xCF8-0xCFF (a fairly reasonable exception!) and
> we can make the rules for a new platform (i.e. actually prescribe
> exactly what the behavior is, rather than have it not be defined).
> This is of course terrible in that existing BIOS vendors and so on
> won't necessarily know this when working on ARM ACPI later on.
>
> I don't like either of these obviously. I'm hoping there's some way we
> can say that this is tolerated in this one quirk (allow the use of
> FixedMemory32 in this case) on the grounds that the driver claims
> this bridge region and can be annotated to explain such.
>
> Once you let us know what you prefer, we will go and update the ARM
> SBBR to spell out that future platforms should not make this mistake
> again. We can prescribe whatever you'd like in terms of how bridge
> resources consumed by the bridge are exposed. I have spoken about
> this kind of situation within MS in the past, but they didn't have
> specific guidance since they don't really tolerate such quirks. I
> can, however, consult them before we change the SBBR as well.
>
>>>> The first resource is defined like below. It was introduced long time
>>>> ago to use with older version of X-Gene ECAM quirks.
>>>> Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )
>
> Indeed. And in the case of m400, it is currently this in shipping systems:
>
> Memory32Fixed (ReadWrite,
> 0x1F500000, // Address Base
> 0x00010000, // Address Length
> )
>
> The spec isn't clear on whether these are produced or consumed but the
> implication is that these are consumed resources in most cases. Not that
> this changes any of the above, but one can understand why it happened.
>
>>>> [ 0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
>>>
>>> I think this is wrong. The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
>>> is available for use by devices on bus 0000:00, but I think you're
>>> saying it is consumed by the bridge itself, not forwarded down to PCI.
>
> Indeed.
>
>>> What's in your /proc/iomem? I see that your quirks do call
>>> devm_ioremap_resource(), which calls devm_request_mem_region()
>>> internally, so the driver does at least request that region, which
>>> should keep us from assigning it to PCI devices.
>
> I'm hoping you can grant an exception on the grounds that the quirk will
> keep the region from actually being used. And then somehow we document
> this in the driver.
>
>>> But it still isn't quite right to tell the PCI core that the region is
>>> available on the root bus.
>>
>> This is /proc/iomem output on my Mustang board. The 64K "PCIe CSR"
>> region is consumed completely.
>> 1f2b0000-1f2bffff : PCI Bus 0000:00
>> 1f2b0000-1f2bffff : PCIe CSR
>>
>> e040000000-e07fffffff : PCI Bus 0000:00
>> e040000000-e0401fffff : PCI Bus 0000:01
>> e040000000-e0400fffff : 0000:01:00.0
>> e040000000-e0400fffff : mlx4_core
>> e040100000-e0401fffff : 0000:01:00.0
>> e0d0000000-e0dfffffff : PCI ECAM
>> f000000000-ffffffffff : PCI Bus 0000:00
>> f000000000-f001ffffff : PCI Bus 0000:01
>> f000000000-f001ffffff : 0000:01:00.0
>> f000000000-f001ffffff : mlx4_core
>>
>> Using hard-coded resources for mmio space make the quirk rely on the
>> segment number passing from the firmware. Using Mark's method or
>> acpi_get_rc_resource can discover the mmio space and consume all of
>> the space, but as you mentioned, it leaves the defect that PCI core
>> considers the mmio space as available resource for secondary devices
>> although it will never allocate the mmio space to secondary devices as
>> the RC already reserves and consumes all of the space.
>
> Indeed. It's not clean, but perhaps we can get away with it on the
> grounds that there are existing systems out there and this won't
> be allowed to happen again in the future :)
>
> Jon.
>


--
Computer Architect | Sent from my Fedora powered laptop

2016-12-02 07:13:15

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v4 1/1] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On 12/01/2016 09:27 PM, Duc Dang wrote:
> PCIe controllers in X-Gene SoCs is not ECAM compliant: software
> needs to configure additional controller's register to address
> device at bus:dev:function.
>
> The quirk will discover controller MMIO register space and configure
> controller registers to select and address the target secondary device.
>
> The quirk will only be applied for X-Gene PCIe MCFG table with
> OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).
>
> Signed-off-by: Duc Dang <[email protected]>

So far I've tested this on an HPE ProLiant m400 (Moonshot) cartridge
and will test it on some other reference platforms soon. Bootlog for
the m400 attached in case Bjorn wants to see the output. Here's
what I see in /proc/iomem btw on that platform:

# cat /proc/iomem
10520000-10523fff : APMC0D18:00
10520000-10523fff : APMC0D18:00
10524000-10527fff : APMC0D17:00
10540000-1054a0ff : APMC0D01:00
10546000-10546fff : APMC0D50:00
1054a000-1054a00f : APMC0D12:03
1054a000-1054a00f : APMC0D12:02
1054a000-1054a00f : APMC0D12:01
1054a000-1054a00f : APMC0D12:00
17000000-17000fff : APMC0D01:00
17001000-17001fff : APMC0D01:00
17001000-170013ff : APMC0D15:00
17001000-170013ff : APMC0D15:00
1701c000-1701cfff : APMC0D14:00
1a800000-1a800fff : APMC0D0D:00
1a800000-1a800fff : APMC0D0D:00
1c000200-1c0002ff : APMC0D06:00
1c021000-1c0210ff : APMC0D08:00
1c021000-1c02101f : serial
1c024000-1c024fff : APMC0D07:00
1f230000-1f230fff : APMC0D0D:00
1f230000-1f230fff : APMC0D0D:00
1f23d000-1f23dfff : APMC0D0D:00
1f23d000-1f23dfff : APMC0D0D:00
1f23e000-1f23efff : APMC0D0D:00
1f23e000-1f23efff : APMC0D0D:00
1f2a0000-1f31ffff : APMC0D06:00
1f500000-1f50ffff : PCI Bus 0000:00
1f500000-1f50ffff : PNP0A08:00
78800000-78800fff : APMC0D13:00
78800000-78800fff : APMC0D12:03
78800000-78800fff : APMC0D12:02
78800000-78800fff : APMC0D12:01
78800000-78800fff : APMC0D12:00
78800000-78800fff : APMC0D11:00
78800000-78800fff : APMC0D10:03
78800000-78800fff : APMC0D10:02
78800000-78800fff : APMC0D10:01
78800000-78800fff : APMC0D10:00
79000000-798fffff : APMC0D0E:00
7c000000-7c1fffff : APMC0D12:00
7c200000-7c3fffff : APMC0D12:01
7c400000-7c5fffff : APMC0D12:02
7c600000-7c7fffff : APMC0D12:03
7e000000-7e000fff : APMC0D13:00
7e200000-7e200fff : APMC0D10:03
7e200000-7e200fff : APMC0D10:02
7e200000-7e200fff : APMC0D10:01
7e200000-7e200fff : APMC0D10:00
7e600000-7e600fff : APMC0D11:00
7e700000-7e700fff : APMC0D10:03
7e700000-7e700fff : APMC0D10:02
7e700000-7e700fff : APMC0D10:01
7e700000-7e700fff : APMC0D10:00
7e720000-7e720fff : APMC0D10:03
7e720000-7e720fff : APMC0D10:02
7e720000-7e720fff : APMC0D10:01
7e720000-7e720fff : APMC0D10:00
7e800000-7e800fff : APMC0D10:00
7e840000-7e840fff : APMC0D10:01
7e880000-7e880fff : APMC0D10:02
7e8c0000-7e8c0fff : APMC0D10:03
7e930000-7e930fff : APMC0D13:00
4000000000-4001ffffff : System RAM
4000080000-4000c3ffff : Kernel code
4000db0000-400165ffff : Kernel data
40023a0000-4ff733ffff : System RAM
4ff7340000-4ff77cffff : reserved
4ff77d0000-4ff79cffff : System RAM
4ff79d0000-4ff7e7ffff : reserved
4ff7e80000-4ff7e8ffff : System RAM
4ff7e90000-4ff7efffff : reserved
4ff7f10000-4ff800ffff : reserved
4ff8010000-4fffffffff : System RAM
a020000000-a03fffffff : PCI Bus 0000:00
a020000000-a0201fffff : PCI Bus 0000:01
a020000000-a0200fffff : 0000:01:00.0
a020000000-a0200fffff : mlx4_core
a020100000-a0201fffff : 0000:01:00.0
a060000000-a07fffffff : PCI Bus 0000:00
a0d0000000-a0dfffffff : PCI ECAM
a110000000-a14fffffff : PCI Bus 0000:00
a110000000-a121ffffff : PCI Bus 0000:01
a110000000-a111ffffff : 0000:01:00.0
a110000000-a111ffffff : mlx4_core
a112000000-a121ffffff : 0000:01:00.0

Adding a Tested-by for the record:

Tested-by: Jon Masters <[email protected]>

Jon.

--
Computer Architect | Sent from my Fedora powered laptop


Attachments:
dmesg-4.9.0-rc7.ecam_jcm1+.txt (28.59 kB)

2016-12-02 07:34:48

by Duc Dang

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Thu, Dec 1, 2016 at 10:31 PM, Jon Masters <[email protected]> wrote:
> Bjorn,
>
> Although I think the below still applies (that we need to leave that
> Memory32Fixed for existing deployments, and this is going to result
> in /proc/iomem polution), I've done some more reading of your ecam
> tree and the implementation of acpi_get_rc_resources you mentioned,
> and in particular how the PNP0C02 devices actually get wired up.
>
> I would like to be able to boot upstream on existing shipping and
> deployed machines that are in the field (not to mention our labs), but
> there's no reason we can't *also* get APM to add a new vendor specific
> PNP0C02 to the ACPI namespace in future firmware updates (for at least
> their own Mustang reference boards) matching segment to CSR, as in the
> case of the HiSi patches. That might then allow for some later
> preference to use that for the CSR rather than getting it from the RC
> device. Still, it would be ideal to boot on machines that are shipping
> from HPE and others at this moment, so I am still hopeful you'll
> at least allow the approach from Duc's v4 for now (4.10).

APM X-Gene 1 and X-Gene 2 ACPI tables will absolutely have PNP0C02
nodes (in upcoming firmware release). I hope to have a solution that
works for both old buggy firmware and the future improved firmware. So
I am thinking the CSR discovery will be like this:

(1) Use acpi_get_rc_resources() to discover CSR resource by checking
PNP0C02 nodes
(2) (1) should succeed with the new firmware
(3) If (1) fails, we can fall back to approach on v4 patch: calling
xgene_get_csr_resource() to discover the CSR described by
Memory32Fixed macro.

How do you feel about this? The drawback is the new firmware that does
not have the CSR space described with Memory32Fixed macro will fail on
the distro version that uses the old quirk (that relies on this
Memory32Fixed macro).

>
> Another nasty option for later consideration could then be having
> the kernel fake up any missing PNP0C02 on existing machines, but
> it would need special knowledge of the platform to generate that
> so as to handle the problem Mark flagged earlier (segment vs
> controller mismatch on some platforms). That could be done with a
> DMI quirk that matched on a specific (e.g. HPE) machine. It would
> only be needed on "broken" existing machines, and could be added
> post-4.10 to clean this up if you really want to do that.

Bjorn suggested similar approach (have a PNP quirk to fabricate a
PNP0C02 device and decleare all the required resources there) on
another thread. But as you said, this approach does not scale, it can
only applicable for a specific machine (by checking DMI information to
apply the PNP quirk).

>
> That's all very nasty...
>
> Jon.
>
> On 12/01/2016 11:08 PM, Jon Masters wrote:
>> Hi Bjorn, Duc, Mark,
>>
>> I switched my brain to the on mode and went and read some specs, and a few
>> tables, so here's my 2 cents on this...
>>
>> On 12/01/2016 06:22 PM, Duc Dang wrote:
>>> On Thu, Dec 1, 2016 at 3:07 PM, Bjorn Helgaas <[email protected]> wrote:
>>>> On Thu, Dec 01, 2016 at 02:10:10PM -0800, Duc Dang wrote:
>>
>>>>>>> The SoC provide some number of RC bridges, each with a different base
>>>>>>> for some mmio registers. Even if segment is legitimate in MCFG, there
>>>>>>> is still a problem if a platform doesn't use the segment ordering
>>>>>>> implied by the code. But the PNP0A03 _CRS does have this base address
>>>>>>> as the first memory resource, so we could get it from there and not
>>>>>>> have hard-coded addresses and implied ording in the quirk code.
>>>>>>
>>>>>> I'm confused. Doesn't the current code treat every item in PNP0A03
>>>>>> _CRS as a window? Do you mean the first resource is handled
>>>>>> differently somehow? The Consumer/Producer bit could allow us to do
>>>>>> this by marking the RC MMIO space as "Consumer", but I didn't think
>>>>>> that strategy was quite working yet.
>>
>> Let's see if I summarized this correctly...
>>
>> 1. The MMIO registers for the host bridge itself need to be described
>> somewhere, especially if we need to find those in a quirk and poke
>> them. Since those registers are very much part of the bridge device,
>> it makes sense for them to be in the _CRS for PNP0A08/PNP0A03.
>>
>> 2. The address space covering these registers MUST be described as a
>> ResourceConsumer in order to avoid accidentally exposing them as
>> available for use by downstream devices on the PCI bus.
>>
>> 3. The ACPI specification allows for resources of the type "Memory32Fixed".
>> This is a macro that doesn't have the notion of a producer or consumer.
>> HOWEVER various interpretations seem to be that this could/should
>> default to being interpreted as a consumed region.
>>
>> 4. At one point, a regression was added to the kernel:
>>
>> 63f1789ec716 ("x86/PCI/ACPI: Ignore resources consumed by
>> host bridge itself")
>>
>> Which lead to a series on conversations about what should happen
>> for bridge resources (e.g. https://lkml.org/lkml/2015/3/24/962 )
>>
>> 5. This resulted in the following commit reverting point 4:
>>
>> 2c62e8492ed7 ("x86/PCI/ACPI: Make all resources except [io 0xcf8-0xcff]
>> available on PCI bus")
>>
>> Which also stated that:
>>
>> "This solution will also ease the way to consolidate ACPI PCI host
>> bridge common code from x86, ia64 and ARM64"
>>
>> End of summary.
>>
>> So it seems that generally there is an aversion to having bridge resources
>> be described in this manner and you would like to require that they be
>> described e.g. using QWordMemory with a ResourceConsumer type?
>>
>> BUT if we were to do that, it would break existing shipping systems since
>> there are quirks out there that use this form to find the base CSR:
>>
>> if (acpi_res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) {
>> fixed32 = &acpi_res->data.fixed_memory32;
>> port->csr_base = ioremap(fixed32->address,
>> fixed32->address_length);
>> return AE_CTRL_TERMINATE;
>> }
>>
>> That's what's shipping in at least RHEL(SA) today, and probably in other
>> distros. So if we get vendors to take that out, existing stuff will break,
>> which will have the downside that customers will have to choose between
>> whether to run a given distro or be able to use upstream kernels. In
>> that sense, to me, there are shipping platforms out there, which may well
>> be doing the "wrong" thing, but they are deployed and they are doing it.
>>
>> Which makes me wonder a couple of things (I think should NOT be done):
>>
>> 1. What would happen if we had both. A FixedMemory32 and the same region
>> described again using the longer form as a consumed region. I doubt
>> that's legal, and the current code would still add the region if it
>> saw the FixedMemory32 first when walking the tree. I don't like it,
>> but I'm mentioning it in case that leads to some helpful thinking.
>>
>> 2. What would happen if we had a difference policy on arm64 for such
>> resources. x86 has an "exception" for accessing the config space
>> using IO port 0xCF8-0xCFF (a fairly reasonable exception!) and
>> we can make the rules for a new platform (i.e. actually prescribe
>> exactly what the behavior is, rather than have it not be defined).
>> This is of course terrible in that existing BIOS vendors and so on
>> won't necessarily know this when working on ARM ACPI later on.
>>
>> I don't like either of these obviously. I'm hoping there's some way we
>> can say that this is tolerated in this one quirk (allow the use of
>> FixedMemory32 in this case) on the grounds that the driver claims
>> this bridge region and can be annotated to explain such.
>>
>> Once you let us know what you prefer, we will go and update the ARM
>> SBBR to spell out that future platforms should not make this mistake
>> again. We can prescribe whatever you'd like in terms of how bridge
>> resources consumed by the bridge are exposed. I have spoken about
>> this kind of situation within MS in the past, but they didn't have
>> specific guidance since they don't really tolerate such quirks. I
>> can, however, consult them before we change the SBBR as well.
>>
>>>>> The first resource is defined like below. It was introduced long time
>>>>> ago to use with older version of X-Gene ECAM quirks.
>>>>> Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )
>>
>> Indeed. And in the case of m400, it is currently this in shipping systems:
>>
>> Memory32Fixed (ReadWrite,
>> 0x1F500000, // Address Base
>> 0x00010000, // Address Length
>> )
>>
>> The spec isn't clear on whether these are produced or consumed but the
>> implication is that these are consumed resources in most cases. Not that
>> this changes any of the above, but one can understand why it happened.
>>
>>>>> [ 0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
>>>>
>>>> I think this is wrong. The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
>>>> is available for use by devices on bus 0000:00, but I think you're
>>>> saying it is consumed by the bridge itself, not forwarded down to PCI.
>>
>> Indeed.
>>
>>>> What's in your /proc/iomem? I see that your quirks do call
>>>> devm_ioremap_resource(), which calls devm_request_mem_region()
>>>> internally, so the driver does at least request that region, which
>>>> should keep us from assigning it to PCI devices.
>>
>> I'm hoping you can grant an exception on the grounds that the quirk will
>> keep the region from actually being used. And then somehow we document
>> this in the driver.
>>
>>>> But it still isn't quite right to tell the PCI core that the region is
>>>> available on the root bus.
>>>
>>> This is /proc/iomem output on my Mustang board. The 64K "PCIe CSR"
>>> region is consumed completely.
>>> 1f2b0000-1f2bffff : PCI Bus 0000:00
>>> 1f2b0000-1f2bffff : PCIe CSR
>>>
>>> e040000000-e07fffffff : PCI Bus 0000:00
>>> e040000000-e0401fffff : PCI Bus 0000:01
>>> e040000000-e0400fffff : 0000:01:00.0
>>> e040000000-e0400fffff : mlx4_core
>>> e040100000-e0401fffff : 0000:01:00.0
>>> e0d0000000-e0dfffffff : PCI ECAM
>>> f000000000-ffffffffff : PCI Bus 0000:00
>>> f000000000-f001ffffff : PCI Bus 0000:01
>>> f000000000-f001ffffff : 0000:01:00.0
>>> f000000000-f001ffffff : mlx4_core
>>>
>>> Using hard-coded resources for mmio space make the quirk rely on the
>>> segment number passing from the firmware. Using Mark's method or
>>> acpi_get_rc_resource can discover the mmio space and consume all of
>>> the space, but as you mentioned, it leaves the defect that PCI core
>>> considers the mmio space as available resource for secondary devices
>>> although it will never allocate the mmio space to secondary devices as
>>> the RC already reserves and consumes all of the space.
>>
>> Indeed. It's not clean, but perhaps we can get away with it on the
>> grounds that there are existing systems out there and this won't
>> be allowed to happen again in the future :)
>>
>> Jon.
>>
>
>
> --
> Computer Architect | Sent from my Fedora powered laptop

2016-12-02 07:37:15

by Duc Dang

[permalink] [raw]
Subject: Re: [PATCH v4 1/1] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Thu, Dec 1, 2016 at 11:12 PM, Jon Masters <[email protected]> wrote:
> On 12/01/2016 09:27 PM, Duc Dang wrote:
>> PCIe controllers in X-Gene SoCs is not ECAM compliant: software
>> needs to configure additional controller's register to address
>> device at bus:dev:function.
>>
>> The quirk will discover controller MMIO register space and configure
>> controller registers to select and address the target secondary device.
>>
>> The quirk will only be applied for X-Gene PCIe MCFG table with
>> OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).
>>
>> Signed-off-by: Duc Dang <[email protected]>
>
> So far I've tested this on an HPE ProLiant m400 (Moonshot) cartridge
> and will test it on some other reference platforms soon. Bootlog for
> the m400 attached in case Bjorn wants to see the output. Here's
> what I see in /proc/iomem btw on that platform:
>
> # cat /proc/iomem
> 10520000-10523fff : APMC0D18:00
> 10520000-10523fff : APMC0D18:00
> 10524000-10527fff : APMC0D17:00
> 10540000-1054a0ff : APMC0D01:00
> 10546000-10546fff : APMC0D50:00
> 1054a000-1054a00f : APMC0D12:03
> 1054a000-1054a00f : APMC0D12:02
> 1054a000-1054a00f : APMC0D12:01
> 1054a000-1054a00f : APMC0D12:00
> 17000000-17000fff : APMC0D01:00
> 17001000-17001fff : APMC0D01:00
> 17001000-170013ff : APMC0D15:00
> 17001000-170013ff : APMC0D15:00
> 1701c000-1701cfff : APMC0D14:00
> 1a800000-1a800fff : APMC0D0D:00
> 1a800000-1a800fff : APMC0D0D:00
> 1c000200-1c0002ff : APMC0D06:00
> 1c021000-1c0210ff : APMC0D08:00
> 1c021000-1c02101f : serial
> 1c024000-1c024fff : APMC0D07:00
> 1f230000-1f230fff : APMC0D0D:00
> 1f230000-1f230fff : APMC0D0D:00
> 1f23d000-1f23dfff : APMC0D0D:00
> 1f23d000-1f23dfff : APMC0D0D:00
> 1f23e000-1f23efff : APMC0D0D:00
> 1f23e000-1f23efff : APMC0D0D:00
> 1f2a0000-1f31ffff : APMC0D06:00
> 1f500000-1f50ffff : PCI Bus 0000:00
> 1f500000-1f50ffff : PNP0A08:00
> 78800000-78800fff : APMC0D13:00
> 78800000-78800fff : APMC0D12:03
> 78800000-78800fff : APMC0D12:02
> 78800000-78800fff : APMC0D12:01
> 78800000-78800fff : APMC0D12:00
> 78800000-78800fff : APMC0D11:00
> 78800000-78800fff : APMC0D10:03
> 78800000-78800fff : APMC0D10:02
> 78800000-78800fff : APMC0D10:01
> 78800000-78800fff : APMC0D10:00
> 79000000-798fffff : APMC0D0E:00
> 7c000000-7c1fffff : APMC0D12:00
> 7c200000-7c3fffff : APMC0D12:01
> 7c400000-7c5fffff : APMC0D12:02
> 7c600000-7c7fffff : APMC0D12:03
> 7e000000-7e000fff : APMC0D13:00
> 7e200000-7e200fff : APMC0D10:03
> 7e200000-7e200fff : APMC0D10:02
> 7e200000-7e200fff : APMC0D10:01
> 7e200000-7e200fff : APMC0D10:00
> 7e600000-7e600fff : APMC0D11:00
> 7e700000-7e700fff : APMC0D10:03
> 7e700000-7e700fff : APMC0D10:02
> 7e700000-7e700fff : APMC0D10:01
> 7e700000-7e700fff : APMC0D10:00
> 7e720000-7e720fff : APMC0D10:03
> 7e720000-7e720fff : APMC0D10:02
> 7e720000-7e720fff : APMC0D10:01
> 7e720000-7e720fff : APMC0D10:00
> 7e800000-7e800fff : APMC0D10:00
> 7e840000-7e840fff : APMC0D10:01
> 7e880000-7e880fff : APMC0D10:02
> 7e8c0000-7e8c0fff : APMC0D10:03
> 7e930000-7e930fff : APMC0D13:00
> 4000000000-4001ffffff : System RAM
> 4000080000-4000c3ffff : Kernel code
> 4000db0000-400165ffff : Kernel data
> 40023a0000-4ff733ffff : System RAM
> 4ff7340000-4ff77cffff : reserved
> 4ff77d0000-4ff79cffff : System RAM
> 4ff79d0000-4ff7e7ffff : reserved
> 4ff7e80000-4ff7e8ffff : System RAM
> 4ff7e90000-4ff7efffff : reserved
> 4ff7f10000-4ff800ffff : reserved
> 4ff8010000-4fffffffff : System RAM
> a020000000-a03fffffff : PCI Bus 0000:00
> a020000000-a0201fffff : PCI Bus 0000:01
> a020000000-a0200fffff : 0000:01:00.0
> a020000000-a0200fffff : mlx4_core
> a020100000-a0201fffff : 0000:01:00.0
> a060000000-a07fffffff : PCI Bus 0000:00
> a0d0000000-a0dfffffff : PCI ECAM
> a110000000-a14fffffff : PCI Bus 0000:00
> a110000000-a121ffffff : PCI Bus 0000:01
> a110000000-a111ffffff : 0000:01:00.0
> a110000000-a111ffffff : mlx4_core
> a112000000-a121ffffff : 0000:01:00.0
>
> Adding a Tested-by for the record:
>
> Tested-by: Jon Masters <[email protected]>

Thanks a lot for testing this, Jon.
>
> Jon.
>
> --
> Computer Architect | Sent from my Fedora powered laptop
>
Regards,
Duc Dang.

2016-12-02 08:09:15

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

Quick reply - sorry for top posting (it's 3am...) - I would favor keeping the existing Fixed32Memory _CRS but switching over to prefer the PNP entry as a good citizen. The trouble is that it would be unfortunate if existing distros stopped working on newer firmware and it would lead to IMO more pain than it is worth. Hopefully for this reason Bjorn will take your v4 as-is for now and let us all figure out the cleanest long term cleanup later.

--
Computer Architect | Sent from my 64-bit #ARM Powered phone

> On Dec 2, 2016, at 02:34, Duc Dang <[email protected]> wrote:
>
>> On Thu, Dec 1, 2016 at 10:31 PM, Jon Masters <[email protected]> wrote:
>> Bjorn,
>>
>> Although I think the below still applies (that we need to leave that
>> Memory32Fixed for existing deployments, and this is going to result
>> in /proc/iomem polution), I've done some more reading of your ecam
>> tree and the implementation of acpi_get_rc_resources you mentioned,
>> and in particular how the PNP0C02 devices actually get wired up.
>>
>> I would like to be able to boot upstream on existing shipping and
>> deployed machines that are in the field (not to mention our labs), but
>> there's no reason we can't *also* get APM to add a new vendor specific
>> PNP0C02 to the ACPI namespace in future firmware updates (for at least
>> their own Mustang reference boards) matching segment to CSR, as in the
>> case of the HiSi patches. That might then allow for some later
>> preference to use that for the CSR rather than getting it from the RC
>> device. Still, it would be ideal to boot on machines that are shipping
>> from HPE and others at this moment, so I am still hopeful you'll
>> at least allow the approach from Duc's v4 for now (4.10).
>
> APM X-Gene 1 and X-Gene 2 ACPI tables will absolutely have PNP0C02
> nodes (in upcoming firmware release). I hope to have a solution that
> works for both old buggy firmware and the future improved firmware. So
> I am thinking the CSR discovery will be like this:
>
> (1) Use acpi_get_rc_resources() to discover CSR resource by checking
> PNP0C02 nodes
> (2) (1) should succeed with the new firmware
> (3) If (1) fails, we can fall back to approach on v4 patch: calling
> xgene_get_csr_resource() to discover the CSR described by
> Memory32Fixed macro.
>
> How do you feel about this? The drawback is the new firmware that does
> not have the CSR space described with Memory32Fixed macro will fail on
> the distro version that uses the old quirk (that relies on this
> Memory32Fixed macro).
>
>>
>> Another nasty option for later consideration could then be having
>> the kernel fake up any missing PNP0C02 on existing machines, but
>> it would need special knowledge of the platform to generate that
>> so as to handle the problem Mark flagged earlier (segment vs
>> controller mismatch on some platforms). That could be done with a
>> DMI quirk that matched on a specific (e.g. HPE) machine. It would
>> only be needed on "broken" existing machines, and could be added
>> post-4.10 to clean this up if you really want to do that.
>
> Bjorn suggested similar approach (have a PNP quirk to fabricate a
> PNP0C02 device and decleare all the required resources there) on
> another thread. But as you said, this approach does not scale, it can
> only applicable for a specific machine (by checking DMI information to
> apply the PNP quirk).
>
>>
>> That's all very nasty...
>>
>> Jon.
>>
>>> On 12/01/2016 11:08 PM, Jon Masters wrote:
>>> Hi Bjorn, Duc, Mark,
>>>
>>> I switched my brain to the on mode and went and read some specs, and a few
>>> tables, so here's my 2 cents on this...
>>>
>>>> On 12/01/2016 06:22 PM, Duc Dang wrote:
>>>>> On Thu, Dec 1, 2016 at 3:07 PM, Bjorn Helgaas <[email protected]> wrote:
>>>>> On Thu, Dec 01, 2016 at 02:10:10PM -0800, Duc Dang wrote:
>>>
>>>>>>>> The SoC provide some number of RC bridges, each with a different base
>>>>>>>> for some mmio registers. Even if segment is legitimate in MCFG, there
>>>>>>>> is still a problem if a platform doesn't use the segment ordering
>>>>>>>> implied by the code. But the PNP0A03 _CRS does have this base address
>>>>>>>> as the first memory resource, so we could get it from there and not
>>>>>>>> have hard-coded addresses and implied ording in the quirk code.
>>>>>>>
>>>>>>> I'm confused. Doesn't the current code treat every item in PNP0A03
>>>>>>> _CRS as a window? Do you mean the first resource is handled
>>>>>>> differently somehow? The Consumer/Producer bit could allow us to do
>>>>>>> this by marking the RC MMIO space as "Consumer", but I didn't think
>>>>>>> that strategy was quite working yet.
>>>
>>> Let's see if I summarized this correctly...
>>>
>>> 1. The MMIO registers for the host bridge itself need to be described
>>> somewhere, especially if we need to find those in a quirk and poke
>>> them. Since those registers are very much part of the bridge device,
>>> it makes sense for them to be in the _CRS for PNP0A08/PNP0A03.
>>>
>>> 2. The address space covering these registers MUST be described as a
>>> ResourceConsumer in order to avoid accidentally exposing them as
>>> available for use by downstream devices on the PCI bus.
>>>
>>> 3. The ACPI specification allows for resources of the type "Memory32Fixed".
>>> This is a macro that doesn't have the notion of a producer or consumer.
>>> HOWEVER various interpretations seem to be that this could/should
>>> default to being interpreted as a consumed region.
>>>
>>> 4. At one point, a regression was added to the kernel:
>>>
>>> 63f1789ec716 ("x86/PCI/ACPI: Ignore resources consumed by
>>> host bridge itself")
>>>
>>> Which lead to a series on conversations about what should happen
>>> for bridge resources (e.g. https://lkml.org/lkml/2015/3/24/962 )
>>>
>>> 5. This resulted in the following commit reverting point 4:
>>>
>>> 2c62e8492ed7 ("x86/PCI/ACPI: Make all resources except [io 0xcf8-0xcff]
>>> available on PCI bus")
>>>
>>> Which also stated that:
>>>
>>> "This solution will also ease the way to consolidate ACPI PCI host
>>> bridge common code from x86, ia64 and ARM64"
>>>
>>> End of summary.
>>>
>>> So it seems that generally there is an aversion to having bridge resources
>>> be described in this manner and you would like to require that they be
>>> described e.g. using QWordMemory with a ResourceConsumer type?
>>>
>>> BUT if we were to do that, it would break existing shipping systems since
>>> there are quirks out there that use this form to find the base CSR:
>>>
>>> if (acpi_res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) {
>>> fixed32 = &acpi_res->data.fixed_memory32;
>>> port->csr_base = ioremap(fixed32->address,
>>> fixed32->address_length);
>>> return AE_CTRL_TERMINATE;
>>> }
>>>
>>> That's what's shipping in at least RHEL(SA) today, and probably in other
>>> distros. So if we get vendors to take that out, existing stuff will break,
>>> which will have the downside that customers will have to choose between
>>> whether to run a given distro or be able to use upstream kernels. In
>>> that sense, to me, there are shipping platforms out there, which may well
>>> be doing the "wrong" thing, but they are deployed and they are doing it.
>>>
>>> Which makes me wonder a couple of things (I think should NOT be done):
>>>
>>> 1. What would happen if we had both. A FixedMemory32 and the same region
>>> described again using the longer form as a consumed region. I doubt
>>> that's legal, and the current code would still add the region if it
>>> saw the FixedMemory32 first when walking the tree. I don't like it,
>>> but I'm mentioning it in case that leads to some helpful thinking.
>>>
>>> 2. What would happen if we had a difference policy on arm64 for such
>>> resources. x86 has an "exception" for accessing the config space
>>> using IO port 0xCF8-0xCFF (a fairly reasonable exception!) and
>>> we can make the rules for a new platform (i.e. actually prescribe
>>> exactly what the behavior is, rather than have it not be defined).
>>> This is of course terrible in that existing BIOS vendors and so on
>>> won't necessarily know this when working on ARM ACPI later on.
>>>
>>> I don't like either of these obviously. I'm hoping there's some way we
>>> can say that this is tolerated in this one quirk (allow the use of
>>> FixedMemory32 in this case) on the grounds that the driver claims
>>> this bridge region and can be annotated to explain such.
>>>
>>> Once you let us know what you prefer, we will go and update the ARM
>>> SBBR to spell out that future platforms should not make this mistake
>>> again. We can prescribe whatever you'd like in terms of how bridge
>>> resources consumed by the bridge are exposed. I have spoken about
>>> this kind of situation within MS in the past, but they didn't have
>>> specific guidance since they don't really tolerate such quirks. I
>>> can, however, consult them before we change the SBBR as well.
>>>
>>>>>> The first resource is defined like below. It was introduced long time
>>>>>> ago to use with older version of X-Gene ECAM quirks.
>>>>>> Memory32Fixed(ReadWrite, 0x1F2B0000, 0x10000, )
>>>
>>> Indeed. And in the case of m400, it is currently this in shipping systems:
>>>
>>> Memory32Fixed (ReadWrite,
>>> 0x1F500000, // Address Base
>>> 0x00010000, // Address Length
>>> )
>>>
>>> The spec isn't clear on whether these are produced or consumed but the
>>> implication is that these are consumed resources in most cases. Not that
>>> this changes any of the above, but one can understand why it happened.
>>>
>>>>>> [ 0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
>>>>>
>>>>> I think this is wrong. The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
>>>>> is available for use by devices on bus 0000:00, but I think you're
>>>>> saying it is consumed by the bridge itself, not forwarded down to PCI.
>>>
>>> Indeed.
>>>
>>>>> What's in your /proc/iomem? I see that your quirks do call
>>>>> devm_ioremap_resource(), which calls devm_request_mem_region()
>>>>> internally, so the driver does at least request that region, which
>>>>> should keep us from assigning it to PCI devices.
>>>
>>> I'm hoping you can grant an exception on the grounds that the quirk will
>>> keep the region from actually being used. And then somehow we document
>>> this in the driver.
>>>
>>>>> But it still isn't quite right to tell the PCI core that the region is
>>>>> available on the root bus.
>>>>
>>>> This is /proc/iomem output on my Mustang board. The 64K "PCIe CSR"
>>>> region is consumed completely.
>>>> 1f2b0000-1f2bffff : PCI Bus 0000:00
>>>> 1f2b0000-1f2bffff : PCIe CSR
>>>>
>>>> e040000000-e07fffffff : PCI Bus 0000:00
>>>> e040000000-e0401fffff : PCI Bus 0000:01
>>>> e040000000-e0400fffff : 0000:01:00.0
>>>> e040000000-e0400fffff : mlx4_core
>>>> e040100000-e0401fffff : 0000:01:00.0
>>>> e0d0000000-e0dfffffff : PCI ECAM
>>>> f000000000-ffffffffff : PCI Bus 0000:00
>>>> f000000000-f001ffffff : PCI Bus 0000:01
>>>> f000000000-f001ffffff : 0000:01:00.0
>>>> f000000000-f001ffffff : mlx4_core
>>>>
>>>> Using hard-coded resources for mmio space make the quirk rely on the
>>>> segment number passing from the firmware. Using Mark's method or
>>>> acpi_get_rc_resource can discover the mmio space and consume all of
>>>> the space, but as you mentioned, it leaves the defect that PCI core
>>>> considers the mmio space as available resource for secondary devices
>>>> although it will never allocate the mmio space to secondary devices as
>>>> the RC already reserves and consumes all of the space.
>>>
>>> Indeed. It's not clean, but perhaps we can get away with it on the
>>> grounds that there are existing systems out there and this won't
>>> be allowed to happen again in the future :)
>>>
>>> Jon.
>>>
>>
>>
>> --
>> Computer Architect | Sent from my Fedora powered laptop

2016-12-02 08:12:33

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v4 1/1] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

You're welcome.

(Unrelated) Note that I added a console= and earlycon in my test (and got the baud rate wrong for the console but nevermind...was ssh'd in after the earlycon output I cared about anyway) because of some other cleanup work for the SPCR parsing that apparently is still not quite fixed for upstream, or rather, there is a need to match on the 32-bit access required for the UART and that isn't happening so it's not getting setup. Folks are tracking that one and fixing it though.

--
Computer Architect | Sent from my 64-bit #ARM Powered phone

> On Dec 2, 2016, at 02:37, Duc Dang <[email protected]> wrote:
>
>> On Thu, Dec 1, 2016 at 11:12 PM, Jon Masters <[email protected]> wrote:
>>> On 12/01/2016 09:27 PM, Duc Dang wrote:
>>> PCIe controllers in X-Gene SoCs is not ECAM compliant: software
>>> needs to configure additional controller's register to address
>>> device at bus:dev:function.
>>>
>>> The quirk will discover controller MMIO register space and configure
>>> controller registers to select and address the target secondary device.
>>>
>>> The quirk will only be applied for X-Gene PCIe MCFG table with
>>> OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).
>>>
>>> Signed-off-by: Duc Dang <[email protected]>
>>
>> So far I've tested this on an HPE ProLiant m400 (Moonshot) cartridge
>> and will test it on some other reference platforms soon. Bootlog for
>> the m400 attached in case Bjorn wants to see the output. Here's
>> what I see in /proc/iomem btw on that platform:
>>
>> # cat /proc/iomem
>> 10520000-10523fff : APMC0D18:00
>> 10520000-10523fff : APMC0D18:00
>> 10524000-10527fff : APMC0D17:00
>> 10540000-1054a0ff : APMC0D01:00
>> 10546000-10546fff : APMC0D50:00
>> 1054a000-1054a00f : APMC0D12:03
>> 1054a000-1054a00f : APMC0D12:02
>> 1054a000-1054a00f : APMC0D12:01
>> 1054a000-1054a00f : APMC0D12:00
>> 17000000-17000fff : APMC0D01:00
>> 17001000-17001fff : APMC0D01:00
>> 17001000-170013ff : APMC0D15:00
>> 17001000-170013ff : APMC0D15:00
>> 1701c000-1701cfff : APMC0D14:00
>> 1a800000-1a800fff : APMC0D0D:00
>> 1a800000-1a800fff : APMC0D0D:00
>> 1c000200-1c0002ff : APMC0D06:00
>> 1c021000-1c0210ff : APMC0D08:00
>> 1c021000-1c02101f : serial
>> 1c024000-1c024fff : APMC0D07:00
>> 1f230000-1f230fff : APMC0D0D:00
>> 1f230000-1f230fff : APMC0D0D:00
>> 1f23d000-1f23dfff : APMC0D0D:00
>> 1f23d000-1f23dfff : APMC0D0D:00
>> 1f23e000-1f23efff : APMC0D0D:00
>> 1f23e000-1f23efff : APMC0D0D:00
>> 1f2a0000-1f31ffff : APMC0D06:00
>> 1f500000-1f50ffff : PCI Bus 0000:00
>> 1f500000-1f50ffff : PNP0A08:00
>> 78800000-78800fff : APMC0D13:00
>> 78800000-78800fff : APMC0D12:03
>> 78800000-78800fff : APMC0D12:02
>> 78800000-78800fff : APMC0D12:01
>> 78800000-78800fff : APMC0D12:00
>> 78800000-78800fff : APMC0D11:00
>> 78800000-78800fff : APMC0D10:03
>> 78800000-78800fff : APMC0D10:02
>> 78800000-78800fff : APMC0D10:01
>> 78800000-78800fff : APMC0D10:00
>> 79000000-798fffff : APMC0D0E:00
>> 7c000000-7c1fffff : APMC0D12:00
>> 7c200000-7c3fffff : APMC0D12:01
>> 7c400000-7c5fffff : APMC0D12:02
>> 7c600000-7c7fffff : APMC0D12:03
>> 7e000000-7e000fff : APMC0D13:00
>> 7e200000-7e200fff : APMC0D10:03
>> 7e200000-7e200fff : APMC0D10:02
>> 7e200000-7e200fff : APMC0D10:01
>> 7e200000-7e200fff : APMC0D10:00
>> 7e600000-7e600fff : APMC0D11:00
>> 7e700000-7e700fff : APMC0D10:03
>> 7e700000-7e700fff : APMC0D10:02
>> 7e700000-7e700fff : APMC0D10:01
>> 7e700000-7e700fff : APMC0D10:00
>> 7e720000-7e720fff : APMC0D10:03
>> 7e720000-7e720fff : APMC0D10:02
>> 7e720000-7e720fff : APMC0D10:01
>> 7e720000-7e720fff : APMC0D10:00
>> 7e800000-7e800fff : APMC0D10:00
>> 7e840000-7e840fff : APMC0D10:01
>> 7e880000-7e880fff : APMC0D10:02
>> 7e8c0000-7e8c0fff : APMC0D10:03
>> 7e930000-7e930fff : APMC0D13:00
>> 4000000000-4001ffffff : System RAM
>> 4000080000-4000c3ffff : Kernel code
>> 4000db0000-400165ffff : Kernel data
>> 40023a0000-4ff733ffff : System RAM
>> 4ff7340000-4ff77cffff : reserved
>> 4ff77d0000-4ff79cffff : System RAM
>> 4ff79d0000-4ff7e7ffff : reserved
>> 4ff7e80000-4ff7e8ffff : System RAM
>> 4ff7e90000-4ff7efffff : reserved
>> 4ff7f10000-4ff800ffff : reserved
>> 4ff8010000-4fffffffff : System RAM
>> a020000000-a03fffffff : PCI Bus 0000:00
>> a020000000-a0201fffff : PCI Bus 0000:01
>> a020000000-a0200fffff : 0000:01:00.0
>> a020000000-a0200fffff : mlx4_core
>> a020100000-a0201fffff : 0000:01:00.0
>> a060000000-a07fffffff : PCI Bus 0000:00
>> a0d0000000-a0dfffffff : PCI ECAM
>> a110000000-a14fffffff : PCI Bus 0000:00
>> a110000000-a121ffffff : PCI Bus 0000:01
>> a110000000-a111ffffff : 0000:01:00.0
>> a110000000-a111ffffff : mlx4_core
>> a112000000-a121ffffff : 0000:01:00.0
>>
>> Adding a Tested-by for the record:
>>
>> Tested-by: Jon Masters <[email protected]>
>
> Thanks a lot for testing this, Jon.
>>
>> Jon.
>>
>> --
>> Computer Architect | Sent from my Fedora powered laptop
>>
> Regards,
> Duc Dang.

2016-12-02 19:39:54

by Duc Dang

[permalink] [raw]
Subject: Re: [PATCH v4 1/1] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Fri, Dec 2, 2016 at 12:11 AM, Jon Masters <[email protected]> wrote:
> You're welcome.
>
> (Unrelated) Note that I added a console= and earlycon in my test (and got the baud rate wrong for the console but nevermind...was ssh'd in after the earlycon output I cared about anyway) because of some other cleanup work for the SPCR parsing that apparently is still not quite fixed for upstream, or rather, there is a need to match on the 32-bit access required for the UART and that isn't happening so it's not getting setup. Folks are tracking that one and fixing it though.

I don't see this console issue on X-Gene1 (Mustang board). I tried
with X-Gene 2 as well. I used both console=ttyS0,115200 and
earlycon=uart8250,mmio32,0x1c020000. Are you setting baudrate to
115200 or something else?

>
> --
> Computer Architect | Sent from my 64-bit #ARM Powered phone
>
>> On Dec 2, 2016, at 02:37, Duc Dang <[email protected]> wrote:
>>
>>> On Thu, Dec 1, 2016 at 11:12 PM, Jon Masters <[email protected]> wrote:
>>>> On 12/01/2016 09:27 PM, Duc Dang wrote:
>>>> PCIe controllers in X-Gene SoCs is not ECAM compliant: software
>>>> needs to configure additional controller's register to address
>>>> device at bus:dev:function.
>>>>
>>>> The quirk will discover controller MMIO register space and configure
>>>> controller registers to select and address the target secondary device.
>>>>
>>>> The quirk will only be applied for X-Gene PCIe MCFG table with
>>>> OEM revison 1, 2, 3 or 4 (PCIe controller v1 and v2 on X-Gene SoCs).
>>>>
>>>> Signed-off-by: Duc Dang <[email protected]>
>>>
>>> So far I've tested this on an HPE ProLiant m400 (Moonshot) cartridge
>>> and will test it on some other reference platforms soon. Bootlog for
>>> the m400 attached in case Bjorn wants to see the output. Here's
>>> what I see in /proc/iomem btw on that platform:
>>>
>>> # cat /proc/iomem
>>> 10520000-10523fff : APMC0D18:00
>>> 10520000-10523fff : APMC0D18:00
>>> 10524000-10527fff : APMC0D17:00
>>> 10540000-1054a0ff : APMC0D01:00
>>> 10546000-10546fff : APMC0D50:00
>>> 1054a000-1054a00f : APMC0D12:03
>>> 1054a000-1054a00f : APMC0D12:02
>>> 1054a000-1054a00f : APMC0D12:01
>>> 1054a000-1054a00f : APMC0D12:00
>>> 17000000-17000fff : APMC0D01:00
>>> 17001000-17001fff : APMC0D01:00
>>> 17001000-170013ff : APMC0D15:00
>>> 17001000-170013ff : APMC0D15:00
>>> 1701c000-1701cfff : APMC0D14:00
>>> 1a800000-1a800fff : APMC0D0D:00
>>> 1a800000-1a800fff : APMC0D0D:00
>>> 1c000200-1c0002ff : APMC0D06:00
>>> 1c021000-1c0210ff : APMC0D08:00
>>> 1c021000-1c02101f : serial
>>> 1c024000-1c024fff : APMC0D07:00
>>> 1f230000-1f230fff : APMC0D0D:00
>>> 1f230000-1f230fff : APMC0D0D:00
>>> 1f23d000-1f23dfff : APMC0D0D:00
>>> 1f23d000-1f23dfff : APMC0D0D:00
>>> 1f23e000-1f23efff : APMC0D0D:00
>>> 1f23e000-1f23efff : APMC0D0D:00
>>> 1f2a0000-1f31ffff : APMC0D06:00
>>> 1f500000-1f50ffff : PCI Bus 0000:00
>>> 1f500000-1f50ffff : PNP0A08:00
>>> 78800000-78800fff : APMC0D13:00
>>> 78800000-78800fff : APMC0D12:03
>>> 78800000-78800fff : APMC0D12:02
>>> 78800000-78800fff : APMC0D12:01
>>> 78800000-78800fff : APMC0D12:00
>>> 78800000-78800fff : APMC0D11:00
>>> 78800000-78800fff : APMC0D10:03
>>> 78800000-78800fff : APMC0D10:02
>>> 78800000-78800fff : APMC0D10:01
>>> 78800000-78800fff : APMC0D10:00
>>> 79000000-798fffff : APMC0D0E:00
>>> 7c000000-7c1fffff : APMC0D12:00
>>> 7c200000-7c3fffff : APMC0D12:01
>>> 7c400000-7c5fffff : APMC0D12:02
>>> 7c600000-7c7fffff : APMC0D12:03
>>> 7e000000-7e000fff : APMC0D13:00
>>> 7e200000-7e200fff : APMC0D10:03
>>> 7e200000-7e200fff : APMC0D10:02
>>> 7e200000-7e200fff : APMC0D10:01
>>> 7e200000-7e200fff : APMC0D10:00
>>> 7e600000-7e600fff : APMC0D11:00
>>> 7e700000-7e700fff : APMC0D10:03
>>> 7e700000-7e700fff : APMC0D10:02
>>> 7e700000-7e700fff : APMC0D10:01
>>> 7e700000-7e700fff : APMC0D10:00
>>> 7e720000-7e720fff : APMC0D10:03
>>> 7e720000-7e720fff : APMC0D10:02
>>> 7e720000-7e720fff : APMC0D10:01
>>> 7e720000-7e720fff : APMC0D10:00
>>> 7e800000-7e800fff : APMC0D10:00
>>> 7e840000-7e840fff : APMC0D10:01
>>> 7e880000-7e880fff : APMC0D10:02
>>> 7e8c0000-7e8c0fff : APMC0D10:03
>>> 7e930000-7e930fff : APMC0D13:00
>>> 4000000000-4001ffffff : System RAM
>>> 4000080000-4000c3ffff : Kernel code
>>> 4000db0000-400165ffff : Kernel data
>>> 40023a0000-4ff733ffff : System RAM
>>> 4ff7340000-4ff77cffff : reserved
>>> 4ff77d0000-4ff79cffff : System RAM
>>> 4ff79d0000-4ff7e7ffff : reserved
>>> 4ff7e80000-4ff7e8ffff : System RAM
>>> 4ff7e90000-4ff7efffff : reserved
>>> 4ff7f10000-4ff800ffff : reserved
>>> 4ff8010000-4fffffffff : System RAM
>>> a020000000-a03fffffff : PCI Bus 0000:00
>>> a020000000-a0201fffff : PCI Bus 0000:01
>>> a020000000-a0200fffff : 0000:01:00.0
>>> a020000000-a0200fffff : mlx4_core
>>> a020100000-a0201fffff : 0000:01:00.0
>>> a060000000-a07fffffff : PCI Bus 0000:00
>>> a0d0000000-a0dfffffff : PCI ECAM
>>> a110000000-a14fffffff : PCI Bus 0000:00
>>> a110000000-a121ffffff : PCI Bus 0000:01
>>> a110000000-a111ffffff : 0000:01:00.0
>>> a110000000-a111ffffff : mlx4_core
>>> a112000000-a121ffffff : 0000:01:00.0
>>>
>>> Adding a Tested-by for the record:
>>>
>>> Tested-by: Jon Masters <[email protected]>
>>
>> Thanks a lot for testing this, Jon.
>>>
>>> Jon.
>>>
>>> --
>>> Computer Architect | Sent from my Fedora powered laptop
>>>
>> Regards,
>> Duc Dang.

2016-12-02 19:59:44

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v4 1/1] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On 12/02/2016 02:39 PM, Duc Dang wrote:
> On Fri, Dec 2, 2016 at 12:11 AM, Jon Masters <[email protected]> wrote:
>> You're welcome.
>>
>> (Unrelated) Note that I added a console= and earlycon in my test (and got the baud rate wrong for the console but nevermind...was ssh'd in after the earlycon output I cared about anyway) because of some other cleanup work for the SPCR parsing that apparently is still not quite fixed for upstream, or rather, there is a need to match on the 32-bit access required for the UART and that isn't happening so it's not getting setup. Folks are tracking that one and fixing it though.
>
> I don't see this console issue on X-Gene1 (Mustang board). I tried
> with X-Gene 2 as well. I used both console=ttyS0,115200 and
> earlycon=uart8250,mmio32,0x1c020000. Are you setting baudrate to
> 115200 or something else?

It's an m400 issue in that their SPCR needs updating to convey the
required 32-bit access width for the 8250 dw IP or similar. It's one
of those things someone described the other day and it made sense but
I haven't yet dug into the exact situation, other than that I know
the access width in the m400 ACPI table isn't quite right.

My understanding is that it's in hand. I'll catch up on exactly what's
up, check with my own mustangs, and followup separately.

Jon.

--
Computer Architect | Sent from my Fedora powered laptop

2016-12-02 23:39:50

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Thu, Dec 01, 2016 at 11:08:23PM -0500, Jon Masters wrote:
> Hi Bjorn, Duc, Mark,
>
> I switched my brain to the on mode and went and read some specs, and a few
> tables, so here's my 2 cents on this...
>
> On 12/01/2016 06:22 PM, Duc Dang wrote:
> > On Thu, Dec 1, 2016 at 3:07 PM, Bjorn Helgaas <[email protected]> wrote:
> >> On Thu, Dec 01, 2016 at 02:10:10PM -0800, Duc Dang wrote:
>
> >>>>> The SoC provide some number of RC bridges, each with a different base
> >>>>> for some mmio registers. Even if segment is legitimate in MCFG, there
> >>>>> is still a problem if a platform doesn't use the segment ordering
> >>>>> implied by the code. But the PNP0A03 _CRS does have this base address
> >>>>> as the first memory resource, so we could get it from there and not
> >>>>> have hard-coded addresses and implied ording in the quirk code.
> >>>>
> >>>> I'm confused. Doesn't the current code treat every item in PNP0A03
> >>>> _CRS as a window? Do you mean the first resource is handled
> >>>> differently somehow? The Consumer/Producer bit could allow us to do
> >>>> this by marking the RC MMIO space as "Consumer", but I didn't think
> >>>> that strategy was quite working yet.
>
> Let's see if I summarized this correctly...
>
> 1. The MMIO registers for the host bridge itself need to be described
> somewhere, especially if we need to find those in a quirk and poke
> them. Since those registers are very much part of the bridge device,
> it makes sense for them to be in the _CRS for PNP0A08/PNP0A03.
>
> 2. The address space covering these registers MUST be described as a
> ResourceConsumer in order to avoid accidentally exposing them as
> available for use by downstream devices on the PCI bus.
>
> 3. The ACPI specification allows for resources of the type "Memory32Fixed".
> This is a macro that doesn't have the notion of a producer or consumer.
> HOWEVER various interpretations seem to be that this could/should
> default to being interpreted as a consumed region.

I agree; I think that per spec, Memory24, Memory32, Memory32Fixed, IO,
and FixedIO should all be for consumed resources, not for bridge
windows, since they don't have the notion of producer.

I'm pretty sure there's x86 firmware in the field that uses these for
windows, so I think we have to accept that usage, at least on x86.

> 4. At one point, a regression was added to the kernel:
>
> 63f1789ec716 ("x86/PCI/ACPI: Ignore resources consumed by
> host bridge itself")
>
> Which lead to a series on conversations about what should happen
> for bridge resources (e.g. https://lkml.org/lkml/2015/3/24/962 )
>
> 5. This resulted in the following commit reverting point 4:
>
> 2c62e8492ed7 ("x86/PCI/ACPI: Make all resources except [io 0xcf8-0xcff]
> available on PCI bus")
>
> Which also stated that:
>
> "This solution will also ease the way to consolidate ACPI PCI host
> bridge common code from x86, ia64 and ARM64"
>
> End of summary.
>
> So it seems that generally there is an aversion to having bridge resources
> be described in this manner and you would like to require that they be
> described e.g. using QWordMemory with a ResourceConsumer type?

Per spec, we should ignore the Consumer/Producer bit in Word/DWord/QWord
descriptors. In bridge devices on x86, I think we have to treat them as
producers (windows) because that's how they've been typically used.

> BUT if we were to do that, it would break existing shipping systems since
> there are quirks out there that use this form to find the base CSR:
>
> if (acpi_res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) {
> fixed32 = &acpi_res->data.fixed_memory32;
> port->csr_base = ioremap(fixed32->address,
> fixed32->address_length);
> return AE_CTRL_TERMINATE;
> }

I think this is a valid usage of FixedMemory32 in terms of the spec.
Linux currently handles this as a window if it appears in a PNP0A03
device because some x86 firmware used it that way.

We might be able to handle it differently on arm64, e.g., by making an
arm64 version of pci_acpi_root_prepare_resources() that checks for
IORESOURCE_WINDOW.

> 2. What would happen if we had a difference policy on arm64 for such
> resources. x86 has an "exception" for accessing the config space
> using IO port 0xCF8-0xCFF (a fairly reasonable exception!) and
> we can make the rules for a new platform (i.e. actually prescribe
> exactly what the behavior is, rather than have it not be defined).
> This is of course terrible in that existing BIOS vendors and so on
> won't necessarily know this when working on ARM ACPI later on.

> Indeed. And in the case of m400, it is currently this in shipping systems:
>
> Memory32Fixed (ReadWrite,
> 0x1F500000, // Address Base
> 0x00010000, // Address Length
> )

> >>> [ 0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
> >>
> >> I think this is wrong. The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
> >> is available for use by devices on bus 0000:00, but I think you're
> >> saying it is consumed by the bridge itself, not forwarded down to PCI.

I think this ASL is perfectly spec-compliant, and what's wrong is the
way Linux is interpreting it.

I'm not clear on what's terrible about idea 2. I think it's basically
what I suggested above, i.e., something like the patch below, which I
think (hope) would keep us from thinking that region is a window.

Even without this patch, I don't think it's a show-stopper to have
Linux mistakenly thinking this region is routed to PCI, because the
driver does reserve it and the PCI core will never try to use it.

diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 8a177a1..a16fc8e 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -114,6 +114,19 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
return 0;
}

+static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
+{
+ struct resource_entry *entry, *tmp;
+ int status;
+
+ status = acpi_pci_probe_root_resources(ci);
+ resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
+ if (!(entry->res->flags & IORESOURCE_WINDOW))
+ resource_list_destroy_entry(entry);
+ }
+ return status;
+}
+
/*
* Lookup the bus range for the domain in MCFG, and set up config space
* mapping.
@@ -190,6 +203,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
}

root_ops->release_info = pci_acpi_generic_release_info;
+ root_ops->prepare_resources = pci_acpi_root_prepare_resources;
root_ops->pci_ops = &ri->cfg->ops->pci_ops;
bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
if (!bus)

2016-12-03 00:33:55

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On 12/02/2016 06:39 PM, Bjorn Helgaas wrote:
> On Thu, Dec 01, 2016 at 11:08:23PM -0500, Jon Masters wrote:

>> Let's see if I summarized this correctly...
>>
>> 1. The MMIO registers for the host bridge itself need to be described
>> somewhere, especially if we need to find those in a quirk and poke
>> them. Since those registers are very much part of the bridge device,
>> it makes sense for them to be in the _CRS for PNP0A08/PNP0A03.
>>
>> 2. The address space covering these registers MUST be described as a
>> ResourceConsumer in order to avoid accidentally exposing them as
>> available for use by downstream devices on the PCI bus.
>>
>> 3. The ACPI specification allows for resources of the type "Memory32Fixed".
>> This is a macro that doesn't have the notion of a producer or consumer.
>> HOWEVER various interpretations seem to be that this could/should
>> default to being interpreted as a consumed region.
>
> I agree; I think that per spec, Memory24, Memory32, Memory32Fixed, IO,
> and FixedIO should all be for consumed resources, not for bridge
> windows, since they don't have the notion of producer.

Ok. If we ultimately codify this somewhere as the general Linux kernel
consensus (Rafael?) then we can also go and get the various ARM server
specs updated to reflect this in (for e.g.) reference firmware builds.

> I'm pretty sure there's x86 firmware in the field that uses these for
> windows, so I think we have to accept that usage, at least on x86.

Ok. I was pondering how to even go about finding that out, but even if
I scheduled a job across RH's infra to look, that would be a drop in
the bucket of possible machines that might be out there doing this.

<snip>

> Per spec, we should ignore the Consumer/Producer bit in Word/DWord/QWord
> descriptors. In bridge devices on x86, I think we have to treat them as
> producers (windows) because that's how they've been typically used.

Ok.

>> BUT if we were to do that, it would break existing shipping systems since
>> there are quirks out there that use this form to find the base CSR:
>>
>> if (acpi_res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) {
>> fixed32 = &acpi_res->data.fixed_memory32;
>> port->csr_base = ioremap(fixed32->address,
>> fixed32->address_length);
>> return AE_CTRL_TERMINATE;
>> }
>
> I think this is a valid usage of FixedMemory32 in terms of the spec.
> Linux currently handles this as a window if it appears in a PNP0A03
> device because some x86 firmware used it that way.
>
> We might be able to handle it differently on arm64, e.g., by making an
> arm64 version of pci_acpi_root_prepare_resources() that checks for
> IORESOURCE_WINDOW.

This is something we should figure out the consensus on and codify.

>> 2. What would happen if we had a difference policy on arm64 for such
>> resources. x86 has an "exception" for accessing the config space
>> using IO port 0xCF8-0xCFF (a fairly reasonable exception!) and
>> we can make the rules for a new platform (i.e. actually prescribe
>> exactly what the behavior is, rather than have it not be defined).
>> This is of course terrible in that existing BIOS vendors and so on
>> won't necessarily know this when working on ARM ACPI later on.
>
>> Indeed. And in the case of m400, it is currently this in shipping systems:
>>
>> Memory32Fixed (ReadWrite,
>> 0x1F500000, // Address Base
>> 0x00010000, // Address Length
>> )
>
>>>>> [ 0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
>>>>
>>>> I think this is wrong. The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
>>>> is available for use by devices on bus 0000:00, but I think you're
>>>> saying it is consumed by the bridge itself, not forwarded down to PCI.
>
> I think this ASL is perfectly spec-compliant, and what's wrong is the
> way Linux is interpreting it.
>
> I'm not clear on what's terrible about idea 2. I think it's basically
> what I suggested above, i.e., something like the patch below, which I
> think (hope) would keep us from thinking that region is a window.

I was guarded because I like harmony between architectures (where it
makes sense). But that said, there is nothing to prevent having a
different interpretation on ARM, as long as everyone agrees on it.

> Even without this patch, I don't think it's a show-stopper to have
> Linux mistakenly thinking this region is routed to PCI, because the
> driver does reserve it and the PCI core will never try to use it.

Ok. So are you happy with pulling in Duc's v4 patch and retaining
status quo on the bridge resources for 4.10? We can continue to
discuss this and ultimately set a direction for the spec, as well
as clean up existing and future designs (certainly the latter) to
ensure all possible resources used by a platform are described
and consumed correctly, and hopefully live with the slightly
odd little bit of address space eaten up for that RC CSR :)

> diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
> index 8a177a1..a16fc8e 100644
> --- a/arch/arm64/kernel/pci.c
> +++ b/arch/arm64/kernel/pci.c
> @@ -114,6 +114,19 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
> return 0;
> }
>
> +static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
> +{
> + struct resource_entry *entry, *tmp;
> + int status;
> +
> + status = acpi_pci_probe_root_resources(ci);
> + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
> + if (!(entry->res->flags & IORESOURCE_WINDOW))
> + resource_list_destroy_entry(entry);
> + }
> + return status;
> +}
> +
> /*
> * Lookup the bus range for the domain in MCFG, and set up config space
> * mapping.
> @@ -190,6 +203,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
> }
>
> root_ops->release_info = pci_acpi_generic_release_info;
> + root_ops->prepare_resources = pci_acpi_root_prepare_resources;
> root_ops->pci_ops = &ri->cfg->ops->pci_ops;
> bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
> if (!bus)
>

I can give this patch a quick boot test a bit later.

Jon.

--
Computer Architect | Sent from my Fedora powered laptop

2016-12-03 07:07:25

by Duc Dang

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Fri, Dec 2, 2016 at 3:39 PM, Bjorn Helgaas <[email protected]> wrote:
>
> On Thu, Dec 01, 2016 at 11:08:23PM -0500, Jon Masters wrote:
> > Hi Bjorn, Duc, Mark,
> >
> > I switched my brain to the on mode and went and read some specs, and a few
> > tables, so here's my 2 cents on this...
> >
> > On 12/01/2016 06:22 PM, Duc Dang wrote:
> > > On Thu, Dec 1, 2016 at 3:07 PM, Bjorn Helgaas <[email protected]> wrote:
> > >> On Thu, Dec 01, 2016 at 02:10:10PM -0800, Duc Dang wrote:
> >
> > >>>>> The SoC provide some number of RC bridges, each with a different base
> > >>>>> for some mmio registers. Even if segment is legitimate in MCFG, there
> > >>>>> is still a problem if a platform doesn't use the segment ordering
> > >>>>> implied by the code. But the PNP0A03 _CRS does have this base address
> > >>>>> as the first memory resource, so we could get it from there and not
> > >>>>> have hard-coded addresses and implied ording in the quirk code.
> > >>>>
> > >>>> I'm confused. Doesn't the current code treat every item in PNP0A03
> > >>>> _CRS as a window? Do you mean the first resource is handled
> > >>>> differently somehow? The Consumer/Producer bit could allow us to do
> > >>>> this by marking the RC MMIO space as "Consumer", but I didn't think
> > >>>> that strategy was quite working yet.
> >
> > Let's see if I summarized this correctly...
> >
> > 1. The MMIO registers for the host bridge itself need to be described
> > somewhere, especially if we need to find those in a quirk and poke
> > them. Since those registers are very much part of the bridge device,
> > it makes sense for them to be in the _CRS for PNP0A08/PNP0A03.
> >
> > 2. The address space covering these registers MUST be described as a
> > ResourceConsumer in order to avoid accidentally exposing them as
> > available for use by downstream devices on the PCI bus.
> >
> > 3. The ACPI specification allows for resources of the type "Memory32Fixed".
> > This is a macro that doesn't have the notion of a producer or consumer.
> > HOWEVER various interpretations seem to be that this could/should
> > default to being interpreted as a consumed region.
>
> I agree; I think that per spec, Memory24, Memory32, Memory32Fixed, IO,
> and FixedIO should all be for consumed resources, not for bridge
> windows, since they don't have the notion of producer.
>
> I'm pretty sure there's x86 firmware in the field that uses these for
> windows, so I think we have to accept that usage, at least on x86.
>
> > 4. At one point, a regression was added to the kernel:
> >
> > 63f1789ec716 ("x86/PCI/ACPI: Ignore resources consumed by
> > host bridge itself")
> >
> > Which lead to a series on conversations about what should happen
> > for bridge resources (e.g. https://lkml.org/lkml/2015/3/24/962 )
> >
> > 5. This resulted in the following commit reverting point 4:
> >
> > 2c62e8492ed7 ("x86/PCI/ACPI: Make all resources except [io 0xcf8-0xcff]
> > available on PCI bus")
> >
> > Which also stated that:
> >
> > "This solution will also ease the way to consolidate ACPI PCI host
> > bridge common code from x86, ia64 and ARM64"
> >
> > End of summary.
> >
> > So it seems that generally there is an aversion to having bridge resources
> > be described in this manner and you would like to require that they be
> > described e.g. using QWordMemory with a ResourceConsumer type?
>
> Per spec, we should ignore the Consumer/Producer bit in Word/DWord/QWord
> descriptors. In bridge devices on x86, I think we have to treat them as
> producers (windows) because that's how they've been typically used.
>
> > BUT if we were to do that, it would break existing shipping systems since
> > there are quirks out there that use this form to find the base CSR:
> >
> > if (acpi_res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) {
> > fixed32 = &acpi_res->data.fixed_memory32;
> > port->csr_base = ioremap(fixed32->address,
> > fixed32->address_length);
> > return AE_CTRL_TERMINATE;
> > }
>
> I think this is a valid usage of FixedMemory32 in terms of the spec.
> Linux currently handles this as a window if it appears in a PNP0A03
> device because some x86 firmware used it that way.
>
> We might be able to handle it differently on arm64, e.g., by making an
> arm64 version of pci_acpi_root_prepare_resources() that checks for
> IORESOURCE_WINDOW.
>
> > 2. What would happen if we had a difference policy on arm64 for such
> > resources. x86 has an "exception" for accessing the config space
> > using IO port 0xCF8-0xCFF (a fairly reasonable exception!) and
> > we can make the rules for a new platform (i.e. actually prescribe
> > exactly what the behavior is, rather than have it not be defined).
> > This is of course terrible in that existing BIOS vendors and so on
> > won't necessarily know this when working on ARM ACPI later on.
>
> > Indeed. And in the case of m400, it is currently this in shipping systems:
> >
> > Memory32Fixed (ReadWrite,
> > 0x1F500000, // Address Base
> > 0x00010000, // Address Length
> > )
>
> > >>> [ 0.822990] pci_bus 0000:00: root bus resource [mem 0x1f2b0000-0x1f2bffff]
> > >>
> > >> I think this is wrong. The PCI core thinks [mem 0x1f2b0000-0x1f2bffff]
> > >> is available for use by devices on bus 0000:00, but I think you're
> > >> saying it is consumed by the bridge itself, not forwarded down to PCI.
>
> I think this ASL is perfectly spec-compliant, and what's wrong is the
> way Linux is interpreting it.
>
> I'm not clear on what's terrible about idea 2. I think it's basically
> what I suggested above, i.e., something like the patch below, which I
> think (hope) would keep us from thinking that region is a window.
>
> Even without this patch, I don't think it's a show-stopper to have
> Linux mistakenly thinking this region is routed to PCI, because the
> driver does reserve it and the PCI core will never try to use it.
>
> diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
> index 8a177a1..a16fc8e 100644
> --- a/arch/arm64/kernel/pci.c
> +++ b/arch/arm64/kernel/pci.c
> @@ -114,6 +114,19 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
> return 0;
> }
>
> +static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
> +{
> + struct resource_entry *entry, *tmp;
> + int status;
> +
> + status = acpi_pci_probe_root_resources(ci);
> + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
> + if (!(entry->res->flags & IORESOURCE_WINDOW))
> + resource_list_destroy_entry(entry);
> + }
> + return status;
> +}
> +
> /*
> * Lookup the bus range for the domain in MCFG, and set up config space
> * mapping.
> @@ -190,6 +203,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
> }
>
> root_ops->release_info = pci_acpi_generic_release_info;
> + root_ops->prepare_resources = pci_acpi_root_prepare_resources;
> root_ops->pci_ops = &ri->cfg->ops->pci_ops;
> bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
> if (!bus)

I tried your patch above with my X-Gene ECAM v4 patch on Mustang, here
is the kernel boot log and output of 'cat /proc/iomem'. The PCIe core
does not print the MMIO space as a window (which is expected per your
patch above).

Booting Linux on physical CPU 0x0
Linux version 4.9.0-rc1-17008-gf18738b-dirty
(dhdang@dhdang-workstation-01) (gcc version 4.9.3 20150218
(prerelease) (APM-8.0.10-le) ) #78 SMP PREEMPT Fri Dec 2 22:32:29 PST
2016
Boot CPU: AArch64 Processor [500f0001]
earlycon: uart8250 at MMIO32 0x000000001c020000 (options '')
bootconsole [uart8250] enabled
efi: Getting EFI parameters from FDT:
efi: EFI v2.40 by X-Gene Mustang Board EFI Oct 17 2016 13:54:05
efi: ACPI=0x47fa700000 ACPI 2.0=0x47fa700014 SMBIOS
3.0=0x47fa9db000 ESRT=0x47ff006f18
esrt: Reserving ESRT space from 0x00000047ff006f18 to 0x00000047ff006f78.
cma: Reserved 256 MiB at 0x00000040f0000000
ACPI: Early table checksum verification disabled
ACPI: RSDP 0x00000047FA700014 000024 (v02 APM )
ACPI: XSDT 0x00000047FA6F00E8 000084 (v01 APM XGENE 00000003
01000013)
ACPI: FACP 0x00000047FA6C0000 00010C (v05 APM XGENE 00000003
INTL 20140724)
ACPI: DSDT 0x00000047FA6D0000 005922 (v05 APM APM88xxx 00000001
INTL 20140724)
ACPI: DBG2 0x00000047FA6E0000 0000AA (v00 APMC0D XGENEDBG 00000000
INTL 20140724)
ACPI: GTDT 0x00000047FA6A0000 000060 (v02 APM XGENE 00000001
INTL 20140724)
ACPI: MCFG 0x00000047FA690000 00003C (v01 APM XGENE 00000002
INTL 20140724)
ACPI: SPCR 0x00000047FA680000 000050 (v02 APMC0D XGENESPC 00000000
INTL 20140724)
ACPI: SSDT 0x00000047FA670000 00002D (v02 APM XGENE 00000001
INTL 20140724)
ACPI: BERT 0x00000047FA660000 000030 (v01 APM XGENE 00000002
INTL 20140724)
ACPI: HEST 0x00000047FA650000 0002A8 (v01 APM XGENE 00000002
INTL 20140724)
ACPI: APIC 0x00000047FA640000 0002A4 (v03 APM XGENE 00000003
01000013)
ACPI: SSDT 0x00000047FA630000 000063 (v02 REDHAT MACADDRS 00000001
01000013)
ACPI: SSDT 0x00000047FA620000 000032 (v02 REDHAT UARTCLKS 00000001
01000013)
ACPI: PCCT 0x00000047FA610000 000300 (v01 APM XGENE 00000003
01000013)
ACPI: SPCR: console: uart,mmio,0x1c020000,115200
On node 0 totalpages: 8388608
DMA zone: 16384 pages used for memmap
DMA zone: 0 pages reserved
DMA zone: 1048576 pages, LIFO batch:31
Normal zone: 114688 pages used for memmap
Normal zone: 7340032 pages, LIFO batch:31
psci: is not implemented in ACPI.
percpu: Embedded 21 pages/cpu @ffff8007fff16000 s48000 r8192 d29824 u86016
pcpu-alloc: s48000 r8192 d29824 u86016 alloc=21*4096
pcpu-alloc: [0] 0 [0] 1 [0] 2 [0] 3 [0] 4 [0] 5 [0] 6 [0] 7
Detected PIPT I-cache on CPU0
Built 1 zonelists in Zone order, mobility grouping on. Total pages: 8257536
Kernel command line: BOOT_IMAGE=/apm-opensource/Image
console=ttyS0,115200 earlycon=uart8250,mmio32,0x1c020000 root=/dev/ram
rw netdev=eth0 debug acpi=force
log_buf_len individual max cpu contribution: 4096 bytes
log_buf_len total cpu_extra contributions: 28672 bytes
log_buf_len min size: 16384 bytes
log_buf_len: 65536 bytes
early log buf free: 12844(78%)
PID hash table entries: 4096 (order: 3, 32768 bytes)
Dentry cache hash table entries: 4194304 (order: 13, 33554432 bytes)
Inode-cache hash table entries: 2097152 (order: 12, 16777216 bytes)
software IO TLB [mem 0x40ebfff000-0x40effff000] (64MB) mapped at
[ffff8000ebfff000-ffff8000efffefff]
Memory: 32615844K/33554432K available (8700K kernel code, 870K rwdata,
3792K rodata, 1024K init, 284K bss, 676444K reserved, 262144K
cma-reserved)
Virtual kernel memory layout:
modules : 0xffff000000000000 - 0xffff000008000000 ( 128 MB)
vmalloc : 0xffff000008000000 - 0xffff7dffbfff0000 (129022 GB)
.text : 0xffff000008080000 - 0xffff000008900000 ( 8704 KB)
.rodata : 0xffff000008900000 - 0xffff000008cc0000 ( 3840 KB)
.init : 0xffff000008cc0000 - 0xffff000008dc0000 ( 1024 KB)
.data : 0xffff000008dc0000 - 0xffff000008e99a00 ( 871 KB)
.bss : 0xffff000008e99a00 - 0xffff000008ee0bc0 ( 285 KB)
fixed : 0xffff7dfffe7fd000 - 0xffff7dfffec00000 ( 4108 KB)
PCI I/O : 0xffff7dfffee00000 - 0xffff7dffffe00000 ( 16 MB)
vmemmap : 0xffff7e0000000000 - 0xffff800000000000 ( 2048 GB maximum)
0xffff7e0000000000 - 0xffff7e0020000000 ( 512 MB actual)
memory : 0xffff800000000000 - 0xffff800800000000 ( 32768 MB)
SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=8, Nodes=1
Preemptible hierarchical RCU implementation.
Build-time adjustment of leaf fanout to 64.
RCU restricting CPUs from NR_CPUS=64 to nr_cpu_ids=8.
RCU: Adjusting geometry for rcu_fanout_leaf=64, nr_cpu_ids=8
NR_IRQS:64 nr_irqs:64 0
GIC: Using split EOI/Deactivate mode
GICv3: No distributor detected at @ffff000008010000, giving up
arm_arch_timer: Architected cp15 timer(s) running at 50.00MHz (phys).
clocksource: arch_sys_counter: mask: 0xffffffffffffff max_cycles:
0xb8812736b, max_idle_ns: 440795202655 ns
sched_clock: 56 bits at 50MHz, resolution 20ns, wraps every 4398046511100ns
Console: colour dummy device 80x25
Calibrating delay loop (skipped), value calculated using timer
frequency.. 100.00 BogoMIPS (lpj=200000)
pid_max: default: 32768 minimum: 301
ACPI: Core revision 20160831
ACPI Error: Method parse/execution failed [\_SB.ET00._STA] (Node
ffff8007fa9fcdc0), AE_CTRL_PARSE_CONTINUE (20160831/psparse-543)
ACPI Error: Invalid zero thread count in method (20160831/dsmethod-796)
ACPI Error: Invalid OwnerId: 0x00 (20160831/utownerid-186)
ACPI Error: Method parse/execution failed [\_SB.ET01._STA] (Node
ffff8007fa9fe078), AE_CTRL_PARSE_CONTINUE (20160831/psparse-543)
ACPI Error: Invalid zero thread count in method (20160831/dsmethod-796)
ACPI Error: Invalid OwnerId: 0x00 (20160831/utownerid-186)
ACPI: 4 ACPI AML tables successfully acquired and loaded

Security Framework initialized
Mount-cache hash table entries: 65536 (order: 7, 524288 bytes)
Mountpoint-cache hash table entries: 65536 (order: 7, 524288 bytes)
ASID allocator initialised with 65536 entries
Remapping and enabling EFI services.
EFI remap 0x0000000010510000 => 0000000020000000
EFI remap 0x0000000010548000 => 0000000020018000
EFI remap 0x0000000017000000 => 0000000020020000
EFI remap 0x000000001c025000 => 0000000020035000
EFI remap 0x00000047fa5a0000 => 0000000020040000
EFI remap 0x00000047fa5b0000 => 0000000020050000
EFI remap 0x00000047fa5c0000 => 0000000020060000
EFI remap 0x00000047fa710000 => 0000000020070000
EFI remap 0x00000047fa730000 => 0000000020090000
EFI remap 0x00000047fa790000 => 00000000200f0000
EFI remap 0x00000047fa7a0000 => 0000000020100000
EFI remap 0x00000047fa9a0000 => 0000000020300000
EFI remap 0x00000047fa9b0000 => 0000000020310000
EFI remap 0x00000047ff9a0000 => 0000000020330000
EFI remap 0x00000047ff9c0000 => 0000000020340000
Detected PIPT I-cache on CPU1
CPU1: Booted secondary processor [500f0001]
Detected PIPT I-cache on CPU2
CPU2: Booted secondary processor [500f0001]
Detected PIPT I-cache on CPU3
CPU3: Booted secondary processor [500f0001]
Detected PIPT I-cache on CPU4
CPU4: Booted secondary processor [500f0001]
Detected PIPT I-cache on CPU5
CPU5: Booted secondary processor [500f0001]
Detected PIPT I-cache on CPU6
CPU6: Booted secondary processor [500f0001]
Detected PIPT I-cache on CPU7
CPU7: Booted secondary processor [500f0001]
Brought up 8 CPUs
SMP: Total of 8 processors activated.
CPU features: detected feature: 32-bit EL0 Support
CPU: All CPU(s) started at EL2
devtmpfs: initialized
SMBIOS 3.0.0 present.
clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff,
max_idle_ns: 7645041785100000 ns
pinctrl core: initialized pinctrl subsystem
NET: Registered protocol family 16
cpuidle: using governor menu
vdso: 2 pages (1 code @ ffff000008907000, 1 data @ ffff000008dc4000)
hw-breakpoint: found 4 breakpoint and 4 watchpoint registers.
DMA: preallocated 256 KiB pool for atomic allocations
ACPI: bus type PCI registered
Serial: AMBA PL011 UART driver
HugeTLB registered 2 MB page size, pre-allocated 0 pages
ACPI: Added _OSI(Module Device)
ACPI: Added _OSI(Processor Device)
ACPI: Added _OSI(3.0 _SCP Extensions)
ACPI: Added _OSI(Processor Aggregator Device)
ACPI: Interpreter enabled
ACPI: Using GIC for interrupt routing
ACPI: MCFG table detected, 1 entries
ACPI: Power Resource [SCVR] (on)
ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
acpi PNP0A08:00: _OSC: OS supports [ExtendedConfig ASPM ClockPM Segments MSI]
acpi PNP0A08:00: _OSC: OS now controls [PCIeHotplug PME AER PCIeCapability]
acpi PNP0A08:00: MCFG quirk: ECAM at [mem 0xe0d0000000-0xe0dfffffff]
for [bus 00-ff] with xgene_v1_pcie_ecam_ops
acpi PNP0A08:00: [Firmware Bug]: ECAM area [mem
0xe0d0000000-0xe0dfffffff] not reserved in ACPI namespace
acpi PNP0A08:00: ECAM at [mem 0xe0d0000000-0xe0dfffffff] for [bus 00-ff]
Remapped I/O 0x000000e010000000 to [io 0x0000-0xffff window]
PCI host bridge to bus 0000:00
pci_bus 0000:00: root bus resource [io 0x0000-0xffff window] (bus
address [0x10000000-0x1000ffff])
pci_bus 0000:00: root bus resource [mem 0xe040000000-0xe07fffffff
window] (bus address [0x40000000-0x7fffffff])
pci_bus 0000:00: root bus resource [mem 0xf000000000-0xffffffffff window]
pci_bus 0000:00: root bus resource [bus 00-ff]
pci 0000:00:00.0: [10e8:e004] type 01 class 0x060400
pci 0000:00:00.0: supports D1 D2
pci 0000:01:00.0: [15b3:1003] type 00 class 0x020000
pci 0000:01:00.0: reg 0x10: [mem 0xe040000000-0xe0400fffff 64bit]
pci 0000:01:00.0: reg 0x18: [mem 0xe042000000-0xe043ffffff 64bit pref]
pci 0000:01:00.0: reg 0x30: [mem 0xfff00000-0xffffffff pref]
pci_bus 0000:00: on NUMA node 0
pci 0000:00:00.0: BAR 15: assigned [mem 0xf000000000-0xf001ffffff 64bit pref]
pci 0000:00:00.0: BAR 14: assigned [mem 0xe040000000-0xe0401fffff]
pci 0000:01:00.0: BAR 2: assigned [mem 0xf000000000-0xf001ffffff 64bit pref]
pci 0000:01:00.0: BAR 0: assigned [mem 0xe040000000-0xe0400fffff 64bit]
pci 0000:01:00.0: BAR 6: assigned [mem 0xe040100000-0xe0401fffff pref]
pci 0000:00:00.0: PCI bridge to [bus 01]
pci 0000:00:00.0: bridge window [mem 0xe040000000-0xe0401fffff]
pci 0000:00:00.0: bridge window [mem 0xf000000000-0xf001ffffff 64bit pref]
vgaarb: loaded
SCSI subsystem initialized
libata version 3.00 loaded.
ACPI: bus type USB registered
usbcore: registered new interface driver usbfs
usbcore: registered new interface driver hub
usbcore: registered new device driver usb
pps_core: LinuxPPS API ver. 1 registered
pps_core: Software ver. 5.3.6 - Copyright 2005-2007 Rodolfo Giometti
<[email protected]>
PTP clock support registered
Registered efivars operations
Advanced Linux Sound Architecture Driver Initialized.
clocksource: Switched to clocksource arch_sys_counter
VFS: Disk quotas dquot_6.6.0
VFS: Dquot-cache hash table entries: 512 (order 0, 4096 bytes)
pnp: PnP ACPI init
pnp: PnP ACPI: found 0 devices
NET: Registered protocol family 2
TCP established hash table entries: 262144 (order: 9, 2097152 bytes)
TCP bind hash table entries: 65536 (order: 8, 1048576 bytes)
TCP: Hash tables configured (established 262144 bind 65536)
UDP hash table entries: 16384 (order: 7, 524288 bytes)
UDP-Lite hash table entries: 16384 (order: 7, 524288 bytes)
NET: Registered protocol family 1
RPC: Registered named UNIX socket transport module.
RPC: Registered udp transport module.
RPC: Registered tcp transport module.
RPC: Registered tcp NFSv4.1 backchannel transport module.
PCI: CLS 0 bytes, default 128
Unpacking initramfs...
Freeing initrd memory: 14676K (ffff8007f8767000 - ffff8007f95bc000)
kvm [1]: 8-bit VMID
kvm [1]: IDMAP page: 4000af5000
kvm [1]: HYP VA range: 800000000000:ffffffffffff
kvm [1]: Hyp mode initialized successfully
kvm [1]: vgic-v2@780cf000
kvm [1]: vgic interrupt IRQ1
kvm [1]: virtual timer IRQ4
futex hash table entries: 2048 (order: 6, 262144 bytes)
audit: initializing netlink subsys (disabled)
audit: type=2000 audit(4.120:1): initialized
workingset: timestamp_bits=46 max_order=23 bucket_order=0
squashfs: version 4.0 (2009/01/31) Phillip Lougher
NFS: Registering the id_resolver key type
Key type id_resolver registered
Key type id_legacy registered
nfs4filelayout_init: NFSv4 File Layout Driver Registering...
9p: Installing v9fs 9p2000 file system support
Block layer SCSI generic (bsg) driver version 0.4 loaded (major 247)
io scheduler noop registered
io scheduler cfq registered (default)
libphy: mdio_driver_register: phy-bcm-ns2-pci
xgene-gpio APMC0D14:00: X-Gene GPIO driver registered.
aer 0000:00:00.0:pcie002: service driver aer loaded
pcieport 0000:00:00.0: Signaling PME through PCIe PME interrupt
pci 0000:01:00.0: Signaling PME through PCIe PME interrupt
pcie_pme 0000:00:00.0:pcie001: service driver pcie_pme loaded
input: Power Button as /devices/LNXSYSTM:00/PNP0C0C:00/input/input0
ACPI: Power Button [PWRB]
xenfs: not registering filesystem on non-xen platform
Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled
console [ttyS0] disabled
APMC0D08:00: ttyS0 at MMIO 0x1c020000 (irq = 22, base_baud = 3125000)
is a U6_16550A
console [ttyS0] enabled
bootconsole [uart8250] disabled
APMC0D08:01: ttyS1 at MMIO 0x1c021000 (irq = 23, base_baud = 3125000)
is a U6_16550A
SuperH (H)SCI(F) driver initialized
msm_serial: driver initialized
Failed to find cpu0 device node
Unable to detect cache hierarchy from DT for CPU 0
loop: module loaded
hisi_sas: driver version v1.6
xgene-ahci APMC0D0D:00: skip clock and PHY initialization
xgene-ahci APMC0D0D:00: controller can't do NCQ, turning off CAP_NCQ
xgene-ahci APMC0D0D:00: AHCI 0001.0300 32 slots 2 ports 6 Gbps 0x3
impl platform mode
xgene-ahci APMC0D0D:00: flags: 64bit sntf pm only pmp fbs pio slum part ccc
xgene-ahci APMC0D0D:00: port 0 is not capable of FBS
xgene-ahci APMC0D0D:00: port 1 is not capable of FBS
scsi host0: xgene-ahci
scsi host1: xgene-ahci
ata1: SATA max UDMA/133 mmio [mem 0x1a400000-0x1a400fff] port 0x100 irq 27
ata2: SATA max UDMA/133 mmio [mem 0x1a400000-0x1a400fff] port 0x180 irq 27
xgene-ahci APMC0D0D:01: skip clock and PHY initialization
xgene-ahci APMC0D0D:01: controller can't do NCQ, turning off CAP_NCQ
xgene-ahci APMC0D0D:01: AHCI 0001.0300 32 slots 2 ports 6 Gbps 0x3
impl platform mode
xgene-ahci APMC0D0D:01: flags: 64bit sntf pm only pmp fbs pio slum part ccc
xgene-ahci APMC0D0D:01: port 0 is not capable of FBS
xgene-ahci APMC0D0D:01: port 1 is not capable of FBS
scsi host2: xgene-ahci
scsi host3: xgene-ahci
ata3: SATA max UDMA/133 mmio [mem 0x1a800000-0x1a800fff] port 0x100 irq 28
ata4: SATA max UDMA/133 mmio [mem 0x1a800000-0x1a800fff] port 0x180 irq 28
libphy: APM X-Gene MDIO bus: probed
libphy: Fixed MDIO Bus: probed
tun: Universal TUN/TAP device driver, 1.6
tun: (C) 1999-2004 Max Krasnyansky <[email protected]>
xgene-enet APMC0D05:00: clocks have been setup already
xgene-enet APMC0D30:00: clocks have been setup already
ata1: SATA link down (SStatus 0 SControl 4300)
ata2: SATA link down (SStatus 0 SControl 4300)
xgene-enet APMC0D30:01: clocks have been setup already
xgene-enet APMC0D31:00: clocks have been setup already
e1000e: Intel(R) PRO/1000 Network Driver - 3.2.6-k
e1000e: Copyright(c) 1999 - 2015 Intel Corporation.
igb: Intel(R) Gigabit Ethernet Network Driver - version 5.4.0-k
igb: Copyright (c) 2007-2014 Intel Corporation.
igbvf: Intel(R) Gigabit Virtual Function Network Driver - version 2.4.0-k
igbvf: Copyright (c) 2009 - 2012 Intel Corporation.
sky2: driver version 1.30
mlx4_core: Mellanox ConnectX core driver v2.2-1 (Feb, 2014)
ata3: SATA link down (SStatus 0 SControl 4300)
mlx4_core: Initializing 0000:01:00.0
ata4: SATA link up 6.0 Gbps (SStatus 133 SControl 4300)
ata4.00: ATA-8: SDLFOCAM-800G-1HA1, ZZ37RE92, max UDMA/133
ata4.00: 1562824368 sectors, multi 0: LBA48 NCQ (depth 0/32)
ata4.00: configured for UDMA/133
scsi 3:0:0:0: Direct-Access ATA SDLFOCAM-800G-1H RE92 PQ: 0 ANSI: 5
sd 3:0:0:0: [sda] 1562824368 512-byte logical blocks: (800 GB/745 GiB)
sd 3:0:0:0: [sda] 4096-byte physical blocks
sd 3:0:0:0: [sda] Write Protect is off
sd 3:0:0:0: [sda] Mode Sense: 00 3a 00 00
sd 3:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't
support DPO or FUA
sda: sda1 sda2 sda3
sd 3:0:0:0: [sda] Attached SCSI disk
pcieport 0000:00:00.0: AER: Multiple Corrected error received: id=0000
pcieport 0000:00:00.0: PCIe Bus Error: severity=Corrected,
type=Physical Layer, id=0000(Receiver ID)
pcieport 0000:00:00.0: device [10e8:e004] error status/mask=00000041/00002001
pcieport 0000:00:00.0: [ 6] Bad TLP
pcieport 0000:00:00.0: AER: Corrected error received: id=0000
pcieport 0000:00:00.0: can't find device of ID0000
pcieport 0000:00:00.0: AER: Corrected error received: id=0000
pcieport 0000:00:00.0: can't find device of ID0000
pcieport 0000:00:00.0: AER: Corrected error received: id=0000
pcieport 0000:00:00.0: PCIe Bus Error: severity=Corrected,
type=Physical Layer, id=0000(Transmitter ID)
pcieport 0000:00:00.0: device [10e8:e004] error status/mask=00001041/00002001
pcieport 0000:00:00.0: [ 6] Bad TLP
pcieport 0000:00:00.0: [12] Replay Timer Timeout
pcieport 0000:00:00.0: AER: Multiple Corrected error received: id=0000
pcieport 0000:00:00.0: can't find device of ID0000
pcieport 0000:00:00.0: AER: Corrected error received: id=0000
pcieport 0000:00:00.0: can't find device of ID0000
pcieport 0000:00:00.0: AER: Corrected error received: id=0000
pcieport 0000:00:00.0: can't find device of ID0000
mlx4_core 0000:01:00.0: PCIe link speed is 8.0GT/s, device supports 8.0GT/s
mlx4_core 0000:01:00.0: PCIe link width is x8, device supports x8
mlx4_en: Mellanox ConnectX HCA Ethernet driver v2.2-1 (Feb 2014)
mlx4_en 0000:01:00.0: Activating port:1
mlx4_en: 0000:01:00.0: Port 1: Using 64 TX rings
mlx4_en: 0000:01:00.0: Port 1: Using 4 RX rings
mlx4_en: 0000:01:00.0: Port 1: frag:0 - size:1522 prefix:0 stride:1536
mlx4_en: 0000:01:00.0: Port 1: Initializing port
mlx4_en 0000:01:00.0: registered PHC clock
mlx4_en 0000:01:00.0: Activating port:2
mlx4_en: 0000:01:00.0: Port 2: Using 64 TX rings
mlx4_en: 0000:01:00.0: Port 2: Using 4 RX rings
mlx4_en: 0000:01:00.0: Port 2: frag:0 - size:1522 prefix:0 stride:1536
mlx4_en: 0000:01:00.0: Port 2: Initializing port
VFIO - User Level meta-driver version: 0.3
ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver
ehci-pci: EHCI PCI platform driver
ehci-platform: EHCI generic platform driver
ehci-exynos: EHCI EXYNOS driver
ehci-msm: Qualcomm On-Chip EHCI Host Controller
ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
ohci-pci: OHCI PCI platform driver
ohci-platform: OHCI generic platform driver
ohci-exynos: OHCI EXYNOS driver
xhci-hcd: probe of xhci-hcd.0.auto failed with error -5
xhci-hcd: probe of xhci-hcd.1.auto failed with error -5
usbcore: registered new interface driver usb-storage
mousedev: PS/2 mouse device common for all mice
rtc-efi rtc-efi: rtc core: registered rtc-efi as rtc0
i2c /dev entries driver
sdhci: Secure Digital Host Controller Interface driver
sdhci: Copyright(c) Pierre Ossman
Synopsys Designware Multimedia Card Interface Driver
sdhci-pltfm: SDHCI platform and OF driver helper
ledtrig-cpu: registered to indicate activity on CPUs
usbcore: registered new interface driver usbhid
usbhid: USB HID core driver
NET: Registered protocol family 17
9pnet: Installing 9P2000 support
Key type dns_resolver registered
registered taskstats version 1
rtc-efi rtc-efi: setting system clock to 2016-12-03 06:49:10 UTC (1480747750)
ALSA device list:
No soundcards found.
Freeing unused kernel memory: 1024K (ffff800000ec0000 - ffff800000fc0000)
udevd[1484]: starting version 182
random: fast init done
[root@(none) ~]# cat /proc/io mem
10000000-103fffff : APMC0D05:00
10520000-10523fff : APMC0D18:00
10524000-10527fff : APMC0D17:00
10540000-10547fff : APMC0D01:00
1054a000-1054a0ff : APMC0D43:00
1054a000-1054a01b : APMC0D41:00
17001000-170013ff : APMC0D15:00
1701c000-1701cfff : APMC0D14:00
17020000-1702d0ff : APMC0D05:00
17020000-1702d0ff : APMC0D65:00
17020000-1702d0ff : APMC0D3E:00
17020000-1702d0ff : APMC0D65:00
17030000-1703ffff : APMC0D05:00
18000000-183fffff : APMC0D31:00
18000000-183fffff : APMC0D6A:00
19000000-19007fff : 808622B7:00
1900c100-190fffff : 808622B7:00
1900c100-190fffff : 808622B7:00
19800000-19807fff : 808622B7:01
1980c100-198fffff : 808622B7:01
1980c100-198fffff : 808622B7:01
1a400000-1a400fff : APMC0D0D:00
1a400000-1a400fff : APMC0D0D:00
1a800000-1a800fff : APMC0D0D:01
1a800000-1a800fff : APMC0D0D:01
1b000000-1b3fffff : APMC0D43:00
1b000000-1b007fff : APMC0D30:01
1b000000-1b001fff : APMC0D30:00
1b00a000-1b00bfff : APMC0D41:00
1c000000-1c0000ff : APMC0D0C:00
1c020000-1c0200ff : APMC0D08:00
1c020000-1c02001f : serial
1c021000-1c0210ff : APMC0D08:01
1c021000-1c02101f : serial
1c024000-1c024fff : APMC0D07:00
1c024000-1c024fff : APMC0D07:00
1f200000-1f20ffff : APMC0D41:00
1f200000-1f20ffff : APMC0D43:00
1f200000-1f20c2ff : APMC0D30:01
1f200000-1f20c2ff : APMC0D30:00
1f210000-1f21d0ff : APMC0D30:00
1f210030-1f21d0ff : APMC0D30:01
1f220000-1f220fff : APMC0D0D:00
1f220000-1f220fff : APMC0D0D:00
1f227000-1f227fff : APMC0D0D:00
1f227000-1f227fff : APMC0D0D:00
1f22d000-1f22dfff : APMC0D0D:00
1f22d000-1f22dfff : APMC0D0D:00
1f22e000-1f22efff : APMC0D0D:00
1f22e000-1f22efff : APMC0D0D:00
1f230000-1f230fff : APMC0D0D:01
1f230000-1f230fff : APMC0D0D:01
1f23d000-1f23dfff : APMC0D0D:01
1f23d000-1f23dfff : APMC0D0D:01
1f23e000-1f23efff : APMC0D0D:01
1f23e000-1f23efff : APMC0D0D:01
1f250000-1f25ffff : APMC0D41:00
1f270000-1f27ffff : APMC0D43:00
1f280000-1f28ffff : 808622B7:00
1f290000-1f29ffff : 808622B7:01
1f2a0000-1f2a0fff : APMC0D0C:00
1f2b0000-1f2bffff : PNP0A08:00
1f600000-1f60ffff : APMC0D31:00
1f600000-1f60ffff : APMC0D6A:00
1f610000-1f61ffff : APMC0D31:00
78810000-78810fff : APMC0D5C:00
79000000-798fffff : APMC0D0E:00
7e200000-7e200fff : APMC0D5C:00
7e610000-7e610fff : APMC0D5D:00
7e700000-7e700fff : APMC0D5C:00
7e710000-7e710fff : APMC0D5F:00
7e720000-7e720fff : APMC0D5C:00
7e730000-7e730fff : APMC0D5F:01
7e810000-7e810fff : APMC0D60:00
7e850000-7e850fff : APMC0D60:01
7e890000-7e890fff : APMC0D60:02
7e8d0000-7e8d0fff : APMC0D60:03
7e940000-7e940fff : APMC0D5E:00
4000000000-40001fffff : reserved
4000200000-47fa59ffff : System RAM
4000280000-4000ebffff : Kernel code
4000fc0000-40010e6fff : Kernel data
47fa5a0000-47fa5cffff : reserved
47fa5d0000-47fa5ddfff : System RAM
47fa5de000-47fa9cffff : reserved
47fa9d0000-47fa9d9fff : System RAM
47fa9da000-47fa9dbfff : reserved
47fa9dc000-47ff99ffff : System RAM
47ff9a0000-47ff9affff : reserved
47ff9b0000-47ff9bffff : System RAM
47ff9c0000-47ff9effff : reserved
47ff9f0000-47ffffffff : System RAM
e040000000-e07fffffff : PCI Bus 0000:00
e040000000-e0401fffff : PCI Bus 0000:01
e040000000-e0400fffff : 0000:01:00.0
e040000000-e0400fffff : mlx4_core
e040100000-e0401fffff : 0000:01:00.0
e0d0000000-e0dfffffff : PCI ECAM
f000000000-ffffffffff : PCI Bus 0000:00
f000000000-f001ffffff : PCI Bus 0000:01
f000000000-f001ffffff : 0000:01:00.0
f000000000-f001ffffff : mlx4_core
[root@(none) ~]#

Regards,
Duc Dang.

2016-12-03 10:06:42

by Jon Masters

[permalink] [raw]
Subject: [SPCR] mmio32 iotype access requirements for X-Gene 8250(_dw) UART

Hi Duc, all,

(and changing the subject and trimming/adjusting the CC)

On 12/02/2016 02:39 PM, Duc Dang wrote:
> On Fri, Dec 2, 2016 at 12:11 AM, Jon Masters <[email protected]> wrote:
>> You're welcome.
>>
>> (Unrelated) Note that I added a console= and earlycon in my test (and
>> got the baud rate wrong for the console but nevermind...was ssh'd in
>> after the earlycon output I cared about anyway) because of some other
>> cleanup work for the SPCR parsing that apparently is still not quite
>> fixed for upstream, or rather, there is a need to match on the 32-bit
>> access required for the UART and that isn't happening so it's not
>> getting setup. Folks are tracking that one and fixing it though.

> I don't see this console issue on X-Gene1 (Mustang board). I tried
> with X-Gene 2 as well. I used both console=ttyS0,115200 and
> earlycon=uart8250,mmio32,0x1c020000. Are you setting baudrate to
> 115200 or something else?

Let me clarify. What I meant above is that when I generated the boot
log, I had specified earlycon and console parameters, but had fat
fingered the baud rate (m400 uses 9600, mustang uses 115200 baud).
That's what I was referring to in the original text above.

HOWEVER...

There is a broader problem with X-Gene SPCR support. The problem is
that the 16550 UART in X-Gene requires the 32-bit access quirk (the
iotype is set to UPIO_MEM32 for the APMC0D08 device). This means
that when univ8250_console_match runs later, it will compare the
iotype (MEM32) with the type previously registered with the
kernel when the earlycon setup the preferred console.

The earlycon preferred console will parse the SPCR and find:

iotype = table->serial_port.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY ?
"mmio" : "io";

Which sets it to "mmio" (not "mmio32"). There is a DBG2 (the table
referenced by the SPCR that provides the actual port types for all
modern revisions of the SPCR with revision2+, as required by Linux)
port subtype for non-compliant SBSA ARM Generic UARTs that require
32-bit accesses, and this is ACPI_DBG2_ARM_SBSA_32BIT (type 0xd).

However that only applies for "pl011" devices, and doesn't provide
for 16550 UARTs that require 32-bit accessors.

So you'll end up with Linux thinking it's registering a non-32-bit
mmio preferred console during earlycon setup and then never match
against that later in the 8250_core/8250_dw match function by
virtue of the fact that these differ.

There are a couple of possibilities:

1). Perhaps (for some reason) the IP actually does support sub-32-bit
access and the iotype simply needs to be changed to reflect this.
That would be the easiest option. But it's been this way for a
long time in various codebases, so I would be pleasantly
surprised if this were the case. Let me/us know :)

2). We find some way during SPCR setup to quirk for X-Gene as a non
standard 16550 and set it up as an mmio32 iotype.

3). We get the DBG2 table updated to add a subtype of the 16650
called something like "(deprecated) 16550 subset supporting
only 32-bit accesses". Then we add this to Linux and get
the firmware updated on systems to switch to this type. We
would probably still want to quirk for existing machines.

Perhaps I'm missing something. I would love for that to be true,
but I don't think it is. I think we need a subtype of the 16550
defined that encapsulates this mmio32 requirement properly. To
that end, I've preemptively asked some friends at MS for a
favor to look into adding a new subtype for this.

Let me know what you think is the best path... :)

Thanks,

Jon.

--
Computer Architect | Sent from my Fedora powered laptop

2016-12-03 17:15:13

by Mark Salter

[permalink] [raw]
Subject: Re: [SPCR] mmio32 iotype access requirements for X-Gene 8250(_dw) UART

On Sat, 2016-12-03 at 05:06 -0500, Jon Masters wrote:
> Hi Duc, all,
>
> (and changing the subject and trimming/adjusting the CC)
>
> On 12/02/2016 02:39 PM, Duc Dang wrote:
> >
> > On Fri, Dec 2, 2016 at 12:11 AM, Jon Masters <[email protected]> wrote:
> > >
> > > You're welcome.
> > >
> > > (Unrelated) Note that I added a console= and earlycon in my test (and
> > > got the baud rate wrong for the console but nevermind...was ssh'd in
> > > after the earlycon output I cared about anyway) because of some other
> > > cleanup work for the SPCR parsing that apparently is still not quite
> > > fixed for upstream, or rather, there is a need to match on the 32-bit
> > > access required for the UART and that isn't happening so it's not
> > > getting setup. Folks are tracking that one and fixing it though.
> >
> > I don't see this console issue on X-Gene1 (Mustang board). I tried
> > with X-Gene 2 as well. I used both console=ttyS0,115200 and
> > earlycon=uart8250,mmio32,0x1c020000. Are you setting baudrate to
> > 115200 or something else?
> Let me clarify. What I meant above is that when I generated the boot
> log, I had specified earlycon and console parameters, but had fat
> fingered the baud rate (m400 uses 9600, mustang uses 115200 baud).
> That's what I was referring to in the original text above.
>
> HOWEVER...
>
> There is a broader problem with X-Gene SPCR support. The problem is
> that the 16550 UART in X-Gene requires the 32-bit access quirk (the
> iotype is set to UPIO_MEM32 for the APMC0D08 device). This means
> that when univ8250_console_match runs later, it will compare the
> iotype (MEM32) with the type previously registered with the
> kernel when the earlycon setup the preferred console.

Linaro has a kernel patch which looks at the bit_width field of the
port address:

Author: Aleksey Makarov <[email protected]>
Date:   Thu Apr 28 19:52:38 2016 +0300

    serial: SPCR: check bit width for the 16550 UART

The SPCR in 3.06.25 firmware has a bit_width field set to 32 and with the
above patch, I don't need to use console=. But HP firmware on m400 sets
bit width to 8 so that needs a firmware fix to work with above.


>
> The earlycon preferred console will parse the SPCR and find:
>
>         iotype = table->serial_port.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY ?
>                         "mmio" : "io";
>
> Which sets it to "mmio" (not "mmio32"). There is a DBG2 (the table
> referenced by the SPCR that provides the actual port types for all
> modern revisions of the SPCR with revision2+, as required by Linux)
> port subtype for non-compliant SBSA ARM Generic UARTs that require
> 32-bit accesses, and this is ACPI_DBG2_ARM_SBSA_32BIT (type 0xd).
>
> However that only applies for "pl011" devices, and doesn't provide
> for 16550 UARTs that require 32-bit accessors.
>
> So you'll end up with Linux thinking it's registering a non-32-bit
> mmio preferred console during earlycon setup and then never match
> against that later in the 8250_core/8250_dw match function by
> virtue of the fact that these differ.
>
> There are a couple of possibilities:
>
> 1). Perhaps (for some reason) the IP actually does support sub-32-bit
>     access and the iotype simply needs to be changed to reflect this.
>     That would be the easiest option. But it's been this way for a
>     long time in various codebases, so I would be pleasantly
>     surprised if this were the case. Let me/us know :)
>
> 2). We find some way during SPCR setup to quirk for X-Gene as a non
>     standard 16550 and set it up as an mmio32 iotype.
>
> 3). We get the DBG2 table updated to add a subtype of the 16650
>     called something like "(deprecated) 16550 subset supporting
>     only 32-bit accesses". Then we add this to Linux and get
>     the firmware updated on systems to switch to this type. We
>     would probably still want to quirk for existing machines.
>
> Perhaps I'm missing something. I would love for that to be true,
> but I don't think it is. I think we need a subtype of the 16550
> defined that encapsulates this mmio32 requirement properly. To
> that end, I've preemptively asked some friends at MS for a
> favor to look into adding a new subtype for this.
>
> Let me know what you think is the best path... :)
>
> Thanks,
>
> Jon.
>

2016-12-03 20:33:26

by Jon Masters

[permalink] [raw]
Subject: Re: [SPCR] mmio32 iotype access requirements for X-Gene 8250(_dw) UART

Hi Mark,

On 12/03/2016 12:15 PM, Mark Salter wrote:
> On Sat, 2016-12-03 at 05:06 -0500, Jon Masters wrote:

>> There is a broader problem with X-Gene SPCR support. The problem is
>> that the 16550 UART in X-Gene requires the 32-bit access quirk (the
>> iotype is set to UPIO_MEM32 for the APMC0D08 device). This means
>> that when univ8250_console_match runs later, it will compare the
>> iotype (MEM32) with the type previously registered with the
>> kernel when the earlycon setup the preferred console.
>
> Linaro has a kernel patch which looks at the bit_width field of the
> port address:
>
> Author: Aleksey Makarov <[email protected]>
> Date: Thu Apr 28 19:52:38 2016 +0300
>
> serial: SPCR: check bit width for the 16550 UART
>
> The SPCR in 3.06.25 firmware has a bit_width field set to 32 and with the
> above patch, I don't need to use console=. But HP firmware on m400 sets
> bit width to 8 so that needs a firmware fix to work with above.

Indeed, thanks. Graeme also mentioned that last night. I think this
a good solution for the moment, but still that it makes sense to
have a new 16650 UART type defined in the DBG2 spec for those
requiring 32-bit access (to mirror the type D for pl011). I
heard back from Microsoft this morning that they're looking.

Jon.

--
Computer Architect | Sent from my Fedora powered laptop

2016-12-04 10:42:14

by Duc Dang

[permalink] [raw]
Subject: Re: [SPCR] mmio32 iotype access requirements for X-Gene 8250(_dw) UART

On Sat, Dec 3, 2016 at 12:33 PM, Jon Masters <[email protected]> wrote:
> Hi Mark,
>
> On 12/03/2016 12:15 PM, Mark Salter wrote:
>> On Sat, 2016-12-03 at 05:06 -0500, Jon Masters wrote:
>
>>> There is a broader problem with X-Gene SPCR support. The problem is
>>> that the 16550 UART in X-Gene requires the 32-bit access quirk (the
>>> iotype is set to UPIO_MEM32 for the APMC0D08 device). This means
>>> that when univ8250_console_match runs later, it will compare the
>>> iotype (MEM32) with the type previously registered with the
>>> kernel when the earlycon setup the preferred console.
>>
>> Linaro has a kernel patch which looks at the bit_width field of the
>> port address:
>>
>> Author: Aleksey Makarov <[email protected]>
>> Date: Thu Apr 28 19:52:38 2016 +0300
>>
>> serial: SPCR: check bit width for the 16550 UART
>>
>> The SPCR in 3.06.25 firmware has a bit_width field set to 32 and with the
>> above patch, I don't need to use console=. But HP firmware on m400 sets
>> bit width to 8 so that needs a firmware fix to work with above.

Will this patch be posted for upstream?

>
> Indeed, thanks. Graeme also mentioned that last night. I think this
> a good solution for the moment, but still that it makes sense to
> have a new 16650 UART type defined in the DBG2 spec for those
> requiring 32-bit access (to mirror the type D for pl011). I
> heard back from Microsoft this morning that they're looking.

Yes, thanks, Jon. It will be nice to have a new 16550 UART type for
those requiring 32-bit access.

>
> Jon.
>
> --
> Computer Architect | Sent from my Fedora powered laptop
>
Regards,
Duc Dang.

2016-12-05 21:20:21

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Fri, Dec 02, 2016 at 11:06:30PM -0800, Duc Dang wrote:
> On Fri, Dec 2, 2016 at 3:39 PM, Bjorn Helgaas <[email protected]> wrote:

> > diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
> > index 8a177a1..a16fc8e 100644
> > --- a/arch/arm64/kernel/pci.c
> > +++ b/arch/arm64/kernel/pci.c
> > @@ -114,6 +114,19 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
> > return 0;
> > }
> >
> > +static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
> > +{
> > + struct resource_entry *entry, *tmp;
> > + int status;
> > +
> > + status = acpi_pci_probe_root_resources(ci);
> > + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
> > + if (!(entry->res->flags & IORESOURCE_WINDOW))
> > + resource_list_destroy_entry(entry);
> > + }
> > + return status;
> > +}
> > +
> > /*
> > * Lookup the bus range for the domain in MCFG, and set up config space
> > * mapping.
> > @@ -190,6 +203,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
> > }
> >
> > root_ops->release_info = pci_acpi_generic_release_info;
> > + root_ops->prepare_resources = pci_acpi_root_prepare_resources;
> > root_ops->pci_ops = &ri->cfg->ops->pci_ops;
> > bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
> > if (!bus)
>
> I tried your patch above with my X-Gene ECAM v4 patch on Mustang, here
> is the kernel boot log and output of 'cat /proc/iomem'. The PCIe core
> does not print the MMIO space as a window (which is expected per your
> patch above).

Thanks!

> ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> acpi PNP0A08:00: _OSC: OS supports [ExtendedConfig ASPM ClockPM Segments MSI]
> acpi PNP0A08:00: _OSC: OS now controls [PCIeHotplug PME AER PCIeCapability]
> acpi PNP0A08:00: MCFG quirk: ECAM at [mem 0xe0d0000000-0xe0dfffffff] for [bus 00-ff] with xgene_v1_pcie_ecam_ops
> acpi PNP0A08:00: [Firmware Bug]: ECAM area [mem 0xe0d0000000-0xe0dfffffff] not reserved in ACPI namespace
> acpi PNP0A08:00: ECAM at [mem 0xe0d0000000-0xe0dfffffff] for [bus 00-ff]
> Remapped I/O 0x000000e010000000 to [io 0x0000-0xffff window]
> PCI host bridge to bus 0000:00
> pci_bus 0000:00: root bus resource [io 0x0000-0xffff window] (bus address [0x10000000-0x1000ffff])
> pci_bus 0000:00: root bus resource [mem 0xe040000000-0xe07fffffff window] (bus address [0x40000000-0x7fffffff])
> pci_bus 0000:00: root bus resource [mem 0xf000000000-0xffffffffff window]
> pci_bus 0000:00: root bus resource [bus 00-ff]

Yup, no bridge register space here; that's good. I assume the bridge
registers are at [mem 0x1f2b0000-0x1f2bffff] as shown in /proc/iomem
below.

> [root@(none) ~]# cat /proc/io mem
> ...
> 19000000-19007fff : 808622B7:00
> 1900c100-190fffff : 808622B7:00
> 1900c100-190fffff : 808622B7:00
> 19800000-19807fff : 808622B7:01
> 1980c100-198fffff : 808622B7:01
> 1980c100-198fffff : 808622B7:01
> ...
> 1f280000-1f28ffff : 808622B7:00
> 1f290000-1f29ffff : 808622B7:01

I'm curious what these "808622B7" devices are. Per ACPI 6.0, sec
6.1.5, that looks like a PCI vendor ID, which I guess is a valid ACPI
ID. But these resources don't seem to have any connection with PCI
(they're not in any of the host bridge apertures).

> 1f2b0000-1f2bffff : PNP0A08:00

Looks like the bridge register space; good.

> e040000000-e07fffffff : PCI Bus 0000:00
> e040000000-e0401fffff : PCI Bus 0000:01
> e040000000-e0400fffff : 0000:01:00.0
> e040000000-e0400fffff : mlx4_core
> e040100000-e0401fffff : 0000:01:00.0

> e0d0000000-e0dfffffff : PCI ECAM

This region should be described in either a PNP0C02 device or (if we
decide we can allow "consumer" descriptors) the PNP0A08 device. I
assume you'll fix that in a future firmware release.

But I think this reservation from pci_ecam_create() is good enough for
now.

> f000000000-ffffffffff : PCI Bus 0000:00
> f000000000-f001ffffff : PCI Bus 0000:01
> f000000000-f001ffffff : 0000:01:00.0
> f000000000-f001ffffff : mlx4_core

2016-12-05 21:21:48

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Fri, Dec 02, 2016 at 07:33:46PM -0500, Jon Masters wrote:
> On 12/02/2016 06:39 PM, Bjorn Helgaas wrote:
> > On Thu, Dec 01, 2016 at 11:08:23PM -0500, Jon Masters wrote:
>
> >> Let's see if I summarized this correctly...
> >>
> >> 1. The MMIO registers for the host bridge itself need to be described
> >> somewhere, especially if we need to find those in a quirk and poke
> >> them. Since those registers are very much part of the bridge device,
> >> it makes sense for them to be in the _CRS for PNP0A08/PNP0A03.
> >>
> >> 2. The address space covering these registers MUST be described as a
> >> ResourceConsumer in order to avoid accidentally exposing them as
> >> available for use by downstream devices on the PCI bus.
> >>
> >> 3. The ACPI specification allows for resources of the type "Memory32Fixed".
> >> This is a macro that doesn't have the notion of a producer or consumer.
> >> HOWEVER various interpretations seem to be that this could/should
> >> default to being interpreted as a consumed region.
> >
> > I agree; I think that per spec, Memory24, Memory32, Memory32Fixed, IO,
> > and FixedIO should all be for consumed resources, not for bridge
> > windows, since they don't have the notion of producer.
>
> Ok. If we ultimately codify this somewhere as the general Linux kernel
> consensus (Rafael?) then we can also go and get the various ARM server
> specs updated to reflect this in (for e.g.) reference firmware builds.
>
> > I'm pretty sure there's x86 firmware in the field that uses these for
> > windows, so I think we have to accept that usage, at least on x86.
>
> Ok. I was pondering how to even go about finding that out, but even if
> I scheduled a job across RH's infra to look, that would be a drop in
> the bucket of possible machines that might be out there doing this.

Hmmm, when researching this, I thought I came across a change
specifically for a machine that used Memory32Fixed this way, but I
can't find it now.

The only thing I did find was some old experiments with Windows that
showed it interpreting a Memory32Fixed region as a window and putting
PCI devices in it: https://bugzilla.kernel.org/show_bug.cgi?id=15817
But that was a synthetic example with qemu, not a real machine in the
field.

> > Even without this patch, I don't think it's a show-stopper to have
> > Linux mistakenly thinking this region is routed to PCI, because the
> > driver does reserve it and the PCI core will never try to use it.
>
> Ok. So are you happy with pulling in Duc's v4 patch and retaining
> status quo on the bridge resources for 4.10?

Yes, I think it looks good. I'll finish packaging things up and
repost the current series.

Bjorn

2016-12-05 21:41:08

by Duc Dang

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Mon, Dec 5, 2016 at 1:20 PM, Bjorn Helgaas <[email protected]> wrote:
> On Fri, Dec 02, 2016 at 11:06:30PM -0800, Duc Dang wrote:
>> On Fri, Dec 2, 2016 at 3:39 PM, Bjorn Helgaas <[email protected]> wrote:
>
>> > diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
>> > index 8a177a1..a16fc8e 100644
>> > --- a/arch/arm64/kernel/pci.c
>> > +++ b/arch/arm64/kernel/pci.c
>> > @@ -114,6 +114,19 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
>> > return 0;
>> > }
>> >
>> > +static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
>> > +{
>> > + struct resource_entry *entry, *tmp;
>> > + int status;
>> > +
>> > + status = acpi_pci_probe_root_resources(ci);
>> > + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
>> > + if (!(entry->res->flags & IORESOURCE_WINDOW))
>> > + resource_list_destroy_entry(entry);
>> > + }
>> > + return status;
>> > +}
>> > +
>> > /*
>> > * Lookup the bus range for the domain in MCFG, and set up config space
>> > * mapping.
>> > @@ -190,6 +203,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
>> > }
>> >
>> > root_ops->release_info = pci_acpi_generic_release_info;
>> > + root_ops->prepare_resources = pci_acpi_root_prepare_resources;
>> > root_ops->pci_ops = &ri->cfg->ops->pci_ops;
>> > bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
>> > if (!bus)
>>
>> I tried your patch above with my X-Gene ECAM v4 patch on Mustang, here
>> is the kernel boot log and output of 'cat /proc/iomem'. The PCIe core
>> does not print the MMIO space as a window (which is expected per your
>> patch above).
>
> Thanks!
>
>> ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
>> acpi PNP0A08:00: _OSC: OS supports [ExtendedConfig ASPM ClockPM Segments MSI]
>> acpi PNP0A08:00: _OSC: OS now controls [PCIeHotplug PME AER PCIeCapability]
>> acpi PNP0A08:00: MCFG quirk: ECAM at [mem 0xe0d0000000-0xe0dfffffff] for [bus 00-ff] with xgene_v1_pcie_ecam_ops
>> acpi PNP0A08:00: [Firmware Bug]: ECAM area [mem 0xe0d0000000-0xe0dfffffff] not reserved in ACPI namespace
>> acpi PNP0A08:00: ECAM at [mem 0xe0d0000000-0xe0dfffffff] for [bus 00-ff]
>> Remapped I/O 0x000000e010000000 to [io 0x0000-0xffff window]
>> PCI host bridge to bus 0000:00
>> pci_bus 0000:00: root bus resource [io 0x0000-0xffff window] (bus address [0x10000000-0x1000ffff])
>> pci_bus 0000:00: root bus resource [mem 0xe040000000-0xe07fffffff window] (bus address [0x40000000-0x7fffffff])
>> pci_bus 0000:00: root bus resource [mem 0xf000000000-0xffffffffff window]
>> pci_bus 0000:00: root bus resource [bus 00-ff]
>
> Yup, no bridge register space here; that's good. I assume the bridge
> registers are at [mem 0x1f2b0000-0x1f2bffff] as shown in /proc/iomem
> below.

Yes, the bridge registers are at [mem 0x1f2b0000-0x1f2bffff].

>
>> [root@(none) ~]# cat /proc/io mem
>> ...
>> 19000000-19007fff : 808622B7:00
>> 1900c100-190fffff : 808622B7:00
>> 1900c100-190fffff : 808622B7:00
>> 19800000-19807fff : 808622B7:01
>> 1980c100-198fffff : 808622B7:01
>> 1980c100-198fffff : 808622B7:01
>> ...
>> 1f280000-1f28ffff : 808622B7:00
>> 1f290000-1f29ffff : 808622B7:01
>
> I'm curious what these "808622B7" devices are. Per ACPI 6.0, sec
> 6.1.5, that looks like a PCI vendor ID, which I guess is a valid ACPI
> ID. But these resources don't seem to have any connection with PCI
> (they're not in any of the host bridge apertures).

These are DesignWare USB 3.0 controllers (DWC3). The ACPI ID is
defined in drivers/usb/dwc3/core.c.
>
>> 1f2b0000-1f2bffff : PNP0A08:00
>
> Looks like the bridge register space; good.

Yes, it is.

>
>> e040000000-e07fffffff : PCI Bus 0000:00
>> e040000000-e0401fffff : PCI Bus 0000:01
>> e040000000-e0400fffff : 0000:01:00.0
>> e040000000-e0400fffff : mlx4_core
>> e040100000-e0401fffff : 0000:01:00.0
>
>> e0d0000000-e0dfffffff : PCI ECAM
>
> This region should be described in either a PNP0C02 device or (if we
> decide we can allow "consumer" descriptors) the PNP0A08 device. I
> assume you'll fix that in a future firmware release.

Yes, future firmware will have PNP0C02 node that describes this ECAM
space (or a new resource in PNP0A08 if we use 'consumer' descriptor).

>
> But I think this reservation from pci_ecam_create() is good enough for
> now.
>
>> f000000000-ffffffffff : PCI Bus 0000:00
>> f000000000-f001ffffff : PCI Bus 0000:01
>> f000000000-f001ffffff : 0000:01:00.0
>> f000000000-f001ffffff : mlx4_core
Regards,
Duc Dang.

2016-12-05 21:53:31

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Thu, Dec 01, 2016 at 06:52:23PM -0800, Duc Dang wrote:
> On Thu, Dec 1, 2016 at 10:33 AM, Bjorn Helgaas <[email protected]> wrote:

> I made similar changes in v4 patch. The ECAM quirk will be built when
> ACPI and PCI_QUIRKS are enabled.
>
> When building for DT only, the ECAM quirk won't be compiled.

Perfect.

> >> #define XGENE_PCIE_IP_VER_UNKN 0
> >> #define XGENE_PCIE_IP_VER_1 1
> >> +#define XGENE_PCIE_IP_VER_2 2
> >
> > This isn't used anywhere, which makes me wonder whether it's worth
> > keeping it.
>
> V2 controller will use this XGENE_PCIE_IP_VER_2 (port->version =
> XGENE_PCIE_IP_VER_2). This will be used to indicate that the
> controller is V2, and to enable configuration request retry status
> feature (by not disable it like V1 controller).

OK, I see. You don't actually need XGENE_PCIE_IP_VER_2, you just need
port->version to be something other than XGENE_PCIE_IP_VER_1. So this
is fine as it is.

> >> static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
> >> {
> >> - struct xgene_pcie_port *port = bus->sysdata;
> >> + struct pci_config_window *cfg;
> >> + struct xgene_pcie_port *port;
> >> +
> >> + if (acpi_disabled)
> >> + port = bus->sysdata;
> >> + else {
> >> + cfg = bus->sysdata;
> >> + port = cfg->priv;
> >> + }
> >
> > I would really, really like to figure out a way to get rid of these
> > "if (acpi_disabled)" checks sprinkled through here. Is there any way
> > we can set up bus->sysdata to be the same, regardless of whether we're
> > using this as a platform driver or an ACPI quirk?
>
> Right now, I created a inline function to extract xgene_pcie_port from
> pci_bus. In order to get rid of acpi_disabled, I will need to make
> sysdata in DT case also point to pci_config_window structure, which
> means I will need to convert and test the DT driver to use ecam ops.
> It is a separate patch itself. So I think I should do it at later time
> (after this ECAM quirk patch). I hope you are ok with this.

OK. I did the simple-minded version of leaving the DT ops the same
but making sysdata point to a dummy pci_config_window. Your proposal
of using ECAM for DT would be much better.

It's interesting that you actually already use the same accessors
except that DT uses the 32-bit pci_generic_config_write32() and ACPI
uses the regular pci_generic_config_write(). I guess that means the
hardware actually *does* support sub-32 bit writes?

> I need to define the function (xgene_get_csr_resource()) inside
> pci-xgene.c to duplicate the code of acpi_get_rc_addr. The reason is
> X-Gene firmware does not have a dedicate PNP0C02 node to declare the
> resource, and if I use acpi_get_rc_resources() with "PNP0A08", I got
> error due to acpi_bus_get_device() returns error.

Looks good.

> > All these init functions are almost identical. Can we factor this out
> > by having wrappers that do nothing more than pass in the table and
> > version, and put the kzalloc and ioremap in a shared back-end?
>
> I refactor-ed these .init functions. And as a result, there are only 2
> ecam ops left: xgene_v1_pcie_ecam_ops and xgene_v2_pcie_ecam_ops.

Looks good.

Bjorn

2016-12-05 22:10:08

by Duc Dang

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Mon, Dec 5, 2016 at 1:53 PM, Bjorn Helgaas <[email protected]> wrote:
> On Thu, Dec 01, 2016 at 06:52:23PM -0800, Duc Dang wrote:
>> On Thu, Dec 1, 2016 at 10:33 AM, Bjorn Helgaas <[email protected]> wrote:
>
>> I made similar changes in v4 patch. The ECAM quirk will be built when
>> ACPI and PCI_QUIRKS are enabled.
>>
>> When building for DT only, the ECAM quirk won't be compiled.
>
> Perfect.
>
>> >> #define XGENE_PCIE_IP_VER_UNKN 0
>> >> #define XGENE_PCIE_IP_VER_1 1
>> >> +#define XGENE_PCIE_IP_VER_2 2
>> >
>> > This isn't used anywhere, which makes me wonder whether it's worth
>> > keeping it.
>>
>> V2 controller will use this XGENE_PCIE_IP_VER_2 (port->version =
>> XGENE_PCIE_IP_VER_2). This will be used to indicate that the
>> controller is V2, and to enable configuration request retry status
>> feature (by not disable it like V1 controller).
>
> OK, I see. You don't actually need XGENE_PCIE_IP_VER_2, you just need
> port->version to be something other than XGENE_PCIE_IP_VER_1. So this
> is fine as it is.
>
>> >> static void __iomem *xgene_pcie_get_cfg_base(struct pci_bus *bus)
>> >> {
>> >> - struct xgene_pcie_port *port = bus->sysdata;
>> >> + struct pci_config_window *cfg;
>> >> + struct xgene_pcie_port *port;
>> >> +
>> >> + if (acpi_disabled)
>> >> + port = bus->sysdata;
>> >> + else {
>> >> + cfg = bus->sysdata;
>> >> + port = cfg->priv;
>> >> + }
>> >
>> > I would really, really like to figure out a way to get rid of these
>> > "if (acpi_disabled)" checks sprinkled through here. Is there any way
>> > we can set up bus->sysdata to be the same, regardless of whether we're
>> > using this as a platform driver or an ACPI quirk?
>>
>> Right now, I created a inline function to extract xgene_pcie_port from
>> pci_bus. In order to get rid of acpi_disabled, I will need to make
>> sysdata in DT case also point to pci_config_window structure, which
>> means I will need to convert and test the DT driver to use ecam ops.
>> It is a separate patch itself. So I think I should do it at later time
>> (after this ECAM quirk patch). I hope you are ok with this.
>
> OK. I did the simple-minded version of leaving the DT ops the same
> but making sysdata point to a dummy pci_config_window. Your proposal
> of using ECAM for DT would be much better.
>
> It's interesting that you actually already use the same accessors
> except that DT uses the 32-bit pci_generic_config_write32() and ACPI
> uses the regular pci_generic_config_write(). I guess that means the
> hardware actually *does* support sub-32 bit writes?

Yes, the hardware does support sub-32 bit writes (and reads). This is
another item in my TODO list for DT (which does not seem quite urgent
now): switch to use pci_generic_config_write for DT. But, well, I will
need to do that for read as well (for both ACPI and DT).

>
>> I need to define the function (xgene_get_csr_resource()) inside
>> pci-xgene.c to duplicate the code of acpi_get_rc_addr. The reason is
>> X-Gene firmware does not have a dedicate PNP0C02 node to declare the
>> resource, and if I use acpi_get_rc_resources() with "PNP0A08", I got
>> error due to acpi_bus_get_device() returns error.
>
> Looks good.
>
>> > All these init functions are almost identical. Can we factor this out
>> > by having wrappers that do nothing more than pass in the table and
>> > version, and put the kzalloc and ioremap in a shared back-end?
>>
>> I refactor-ed these .init functions. And as a result, there are only 2
>> ecam ops left: xgene_v1_pcie_ecam_ops and xgene_v2_pcie_ecam_ops.
>
> Looks good.
>
> Bjorn
Regards,
Duc Dang.

2016-12-05 23:33:38

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On 12/05/2016 04:20 PM, Bjorn Helgaas wrote:
> On Fri, Dec 02, 2016 at 11:06:30PM -0800, Duc Dang wrote:
>> On Fri, Dec 2, 2016 at 3:39 PM, Bjorn Helgaas <[email protected]> wrote:
>
>>> diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
>>> index 8a177a1..a16fc8e 100644
>>> --- a/arch/arm64/kernel/pci.c
>>> +++ b/arch/arm64/kernel/pci.c
>>> @@ -114,6 +114,19 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
>>> return 0;
>>> }
>>>
>>> +static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
>>> +{
>>> + struct resource_entry *entry, *tmp;
>>> + int status;
>>> +
>>> + status = acpi_pci_probe_root_resources(ci);
>>> + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
>>> + if (!(entry->res->flags & IORESOURCE_WINDOW))
>>> + resource_list_destroy_entry(entry);
>>> + }
>>> + return status;
>>> +}
>>> +
>>> /*
>>> * Lookup the bus range for the domain in MCFG, and set up config space
>>> * mapping.
>>> @@ -190,6 +203,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
>>> }
>>>
>>> root_ops->release_info = pci_acpi_generic_release_info;
>>> + root_ops->prepare_resources = pci_acpi_root_prepare_resources;
>>> root_ops->pci_ops = &ri->cfg->ops->pci_ops;
>>> bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
>>> if (!bus)
>>
>> I tried your patch above with my X-Gene ECAM v4 patch on Mustang, here
>> is the kernel boot log and output of 'cat /proc/iomem'. The PCIe core
>> does not print the MMIO space as a window (which is expected per your
>> patch above).
>
> Thanks!

...and just for the record, here it is on HPE ProLiant m400 (Moonshot),
with the same result that the region is no longer claimed as PCI space
(it - 1f500000 - is now showing as being owned by PNP0A08:00):

# cat /proc/iomem
10520000-10523fff : APMC0D18:00
10520000-10523fff : APMC0D18:00
10524000-10527fff : APMC0D17:00
10540000-1054a0ff : APMC0D01:00
10546000-10546fff : APMC0D50:00
1054a000-1054a00f : APMC0D12:03
1054a000-1054a00f : APMC0D12:02
1054a000-1054a00f : APMC0D12:01
1054a000-1054a00f : APMC0D12:00
17000000-17000fff : APMC0D01:00
17001000-17001fff : APMC0D01:00
17001000-170013ff : APMC0D15:00
17001000-170013ff : APMC0D15:00
1701c000-1701cfff : APMC0D14:00
1a800000-1a800fff : APMC0D0D:00
1a800000-1a800fff : APMC0D0D:00
1c000200-1c0002ff : APMC0D06:00
1c021000-1c0210ff : APMC0D08:00
1c021000-1c02101f : serial
1c024000-1c024fff : APMC0D07:00
1f230000-1f230fff : APMC0D0D:00
1f230000-1f230fff : APMC0D0D:00
1f23d000-1f23dfff : APMC0D0D:00
1f23d000-1f23dfff : APMC0D0D:00
1f23e000-1f23efff : APMC0D0D:00
1f23e000-1f23efff : APMC0D0D:00
1f2a0000-1f31ffff : APMC0D06:00
1f500000-1f50ffff : PNP0A08:00
78800000-78800fff : APMC0D13:00
78800000-78800fff : APMC0D12:03
78800000-78800fff : APMC0D12:02
78800000-78800fff : APMC0D12:01
78800000-78800fff : APMC0D12:00
78800000-78800fff : APMC0D11:00
78800000-78800fff : APMC0D10:03
78800000-78800fff : APMC0D10:02
78800000-78800fff : APMC0D10:01
78800000-78800fff : APMC0D10:00
79000000-798fffff : APMC0D0E:00
7c000000-7c1fffff : APMC0D12:00
7c200000-7c3fffff : APMC0D12:01
7c400000-7c5fffff : APMC0D12:02
7c600000-7c7fffff : APMC0D12:03
7e000000-7e000fff : APMC0D13:00
7e200000-7e200fff : APMC0D10:03
7e200000-7e200fff : APMC0D10:02
7e200000-7e200fff : APMC0D10:01
7e200000-7e200fff : APMC0D10:00
7e600000-7e600fff : APMC0D11:00
7e700000-7e700fff : APMC0D10:03
7e700000-7e700fff : APMC0D10:02
7e700000-7e700fff : APMC0D10:01
7e700000-7e700fff : APMC0D10:00
7e720000-7e720fff : APMC0D10:03
7e720000-7e720fff : APMC0D10:02
7e720000-7e720fff : APMC0D10:01
7e720000-7e720fff : APMC0D10:00
7e800000-7e800fff : APMC0D10:00
7e840000-7e840fff : APMC0D10:01
7e880000-7e880fff : APMC0D10:02
7e8c0000-7e8c0fff : APMC0D10:03
7e930000-7e930fff : APMC0D13:00
4000000000-4001ffffff : System RAM
4000080000-4000c3ffff : Kernel code
4000db0000-400165ffff : Kernel data
40023a0000-4ff733ffff : System RAM
4ff7340000-4ff77cffff : reserved
4ff77d0000-4ff79cffff : System RAM
4ff79d0000-4ff7e7ffff : reserved
4ff7e80000-4ff7e8ffff : System RAM
4ff7e90000-4ff7efffff : reserved
4ff7f10000-4ff800ffff : reserved
4ff8010000-4fffffffff : System RAM
a020000000-a03fffffff : PCI Bus 0000:00
a020000000-a0201fffff : PCI Bus 0000:01
a020000000-a0200fffff : 0000:01:00.0
a020000000-a0200fffff : mlx4_core
a020100000-a0201fffff : 0000:01:00.0
a060000000-a07fffffff : PCI Bus 0000:00
a0d0000000-a0dfffffff : PCI ECAM
a110000000-a14fffffff : PCI Bus 0000:00
a110000000-a121ffffff : PCI Bus 0000:01
a110000000-a111ffffff : 0000:01:00.0
a110000000-a111ffffff : mlx4_core
a112000000-a121ffffff : 0000:01:00.0

Tested-by: Jon Masters <[email protected]>

--
Computer Architect | Sent from my Fedora powered laptop

2016-12-06 19:53:57

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On 12/05/2016 04:21 PM, Bjorn Helgaas wrote:
> On Fri, Dec 02, 2016 at 07:33:46PM -0500, Jon Masters wrote:

>>> Even without this patch, I don't think it's a show-stopper to have
>>> Linux mistakenly thinking this region is routed to PCI, because the
>>> driver does reserve it and the PCI core will never try to use it.
>>
>> Ok. So are you happy with pulling in Duc's v4 patch and retaining
>> status quo on the bridge resources for 4.10?
>
> Yes, I think it looks good. I'll finish packaging things up and
> repost the current series.

Ok, great. So you're still pretty confident we'll have "out of the box"
booting on these machines for 4.10? :)

Jon.

--
Computer Architect | Sent from my Fedora powered laptop

2016-12-06 20:18:40

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On Tue, Dec 06, 2016 at 02:46:00PM -0500, Jon Masters wrote:
> On 12/05/2016 04:21 PM, Bjorn Helgaas wrote:
> > On Fri, Dec 02, 2016 at 07:33:46PM -0500, Jon Masters wrote:
>
> >>> Even without this patch, I don't think it's a show-stopper to have
> >>> Linux mistakenly thinking this region is routed to PCI, because the
> >>> driver does reserve it and the PCI core will never try to use it.
> >>
> >> Ok. So are you happy with pulling in Duc's v4 patch and retaining
> >> status quo on the bridge resources for 4.10?
> >
> > Yes, I think it looks good. I'll finish packaging things up and
> > repost the current series.
>
> Ok, great. So you're still pretty confident we'll have "out of the box"
> booting on these machines for 4.10? :)

I just merged pci/ecam into my "next" branch, so as long as tomorrow's
linux-next boots out of the box, we should be set. I made the following
changes compared to v11:

- dropped the x86 change to use acpi_resource_consumer()
- added parens around macro args used in arithmetic expressions
- renamed "seg" to "node" in THUNDER_PEM_RES and THUNDER_PEM_QUIRK
- incorporated (u64) cast and dropped "UL" postfix for THUNDER_PEM_QUIRK

Let me know if you see any issues.

Bjorn

2016-12-06 20:30:05

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On 12/06/2016 03:18 PM, Bjorn Helgaas wrote:
> On Tue, Dec 06, 2016 at 02:46:00PM -0500, Jon Masters wrote:
>> On 12/05/2016 04:21 PM, Bjorn Helgaas wrote:
>>> On Fri, Dec 02, 2016 at 07:33:46PM -0500, Jon Masters wrote:
>>
>>>>> Even without this patch, I don't think it's a show-stopper to have
>>>>> Linux mistakenly thinking this region is routed to PCI, because the
>>>>> driver does reserve it and the PCI core will never try to use it.
>>>>
>>>> Ok. So are you happy with pulling in Duc's v4 patch and retaining
>>>> status quo on the bridge resources for 4.10?
>>>
>>> Yes, I think it looks good. I'll finish packaging things up and
>>> repost the current series.
>>
>> Ok, great. So you're still pretty confident we'll have "out of the box"
>> booting on these machines for 4.10? :)
>
> I just merged pci/ecam into my "next" branch, so as long as tomorrow's
> linux-next boots out of the box, we should be set. I made the following
> changes compared to v11:
>
> - dropped the x86 change to use acpi_resource_consumer()
> - added parens around macro args used in arithmetic expressions
> - renamed "seg" to "node" in THUNDER_PEM_RES and THUNDER_PEM_QUIRK
> - incorporated (u64) cast and dropped "UL" postfix for THUNDER_PEM_QUIRK
>
> Let me know if you see any issues.

Thanks - I'll test linux-next tomorrow.

Jon.

--
Computer Architect | Sent from my Fedora powered laptop

2016-12-13 21:45:08

by Jon Masters

[permalink] [raw]
Subject: Re: [PATCH v3] PCI/ACPI: xgene: Add ECAM quirk for X-Gene PCIe controller

On 12/06/2016 03:18 PM, Bjorn Helgaas wrote:

> I just merged pci/ecam into my "next" branch, so as long as tomorrow's
> linux-next boots out of the box, we should be set. I made the following
> changes compared to v11:
>
> - dropped the x86 change to use acpi_resource_consumer()
> - added parens around macro args used in arithmetic expressions
> - renamed "seg" to "node" in THUNDER_PEM_RES and THUNDER_PEM_QUIRK
> - incorporated (u64) cast and dropped "UL" postfix for THUNDER_PEM_QUIRK
>
> Let me know if you see any issues.

Just following up. Please find attached a boot log from an HPE ProLiant m400
Moonshot X-Gene based cartridge running next-20161213 with pci/ecam branch.

Here is the /proc/iomem output as well:

# cat /proc/iomem
10520000-10523fff : APMC0D18:00
10520000-10523fff : APMC0D18:00
10524000-10527fff : APMC0D17:00
10540000-1054a0ff : APMC0D01:00
10546000-10546fff : APMC0D50:00
1054a000-1054a00f : APMC0D12:03
1054a000-1054a00f : APMC0D12:02
1054a000-1054a00f : APMC0D12:01
1054a000-1054a00f : APMC0D12:00
17000000-17000fff : APMC0D01:00
17001000-17001fff : APMC0D01:00
17001000-170013ff : APMC0D15:00
17001000-170013ff : APMC0D15:00
1701c000-1701cfff : APMC0D14:00
1a800000-1a800fff : APMC0D0D:00
1a800000-1a800fff : APMC0D0D:00
1c000200-1c0002ff : APMC0D06:00
1c021000-1c0210ff : APMC0D08:00
1c021000-1c02101f : serial
1c024000-1c024fff : APMC0D07:00
1f230000-1f230fff : APMC0D0D:00
1f230000-1f230fff : APMC0D0D:00
1f23d000-1f23dfff : APMC0D0D:00
1f23d000-1f23dfff : APMC0D0D:00
1f23e000-1f23efff : APMC0D0D:00
1f23e000-1f23efff : APMC0D0D:00
1f2a0000-1f31ffff : APMC0D06:00
1f500000-1f50ffff : PNP0A08:00
78800000-78800fff : APMC0D13:00
78800000-78800fff : APMC0D12:03
78800000-78800fff : APMC0D12:02
78800000-78800fff : APMC0D12:01
78800000-78800fff : APMC0D12:00
78800000-78800fff : APMC0D11:00
78800000-78800fff : APMC0D10:03
78800000-78800fff : APMC0D10:02
78800000-78800fff : APMC0D10:01
78800000-78800fff : APMC0D10:00
79000000-798fffff : APMC0D0E:00
7c000000-7c1fffff : APMC0D12:00
7c200000-7c3fffff : APMC0D12:01
7c400000-7c5fffff : APMC0D12:02
7c600000-7c7fffff : APMC0D12:03
7e000000-7e000fff : APMC0D13:00
7e200000-7e200fff : APMC0D10:03
7e200000-7e200fff : APMC0D10:02
7e200000-7e200fff : APMC0D10:01
7e200000-7e200fff : APMC0D10:00
7e600000-7e600fff : APMC0D11:00
7e700000-7e700fff : APMC0D10:03
7e700000-7e700fff : APMC0D10:02
7e700000-7e700fff : APMC0D10:01
7e700000-7e700fff : APMC0D10:00
7e720000-7e720fff : APMC0D10:03
7e720000-7e720fff : APMC0D10:02
7e720000-7e720fff : APMC0D10:01
7e720000-7e720fff : APMC0D10:00
7e800000-7e800fff : APMC0D10:00
7e840000-7e840fff : APMC0D10:01
7e880000-7e880fff : APMC0D10:02
7e8c0000-7e8c0fff : APMC0D10:03
7e930000-7e930fff : APMC0D13:00
4000000000-4001ffffff : System RAM
4000080000-4000c9ffff : Kernel code
4000e20000-400171ffff : Kernel data
40023a0000-4ff733ffff : System RAM
4ff7340000-4ff77cffff : reserved
4ff77d0000-4ff79cffff : System RAM
4ff79d0000-4ff7e7ffff : reserved
4ff7e80000-4ff7e8ffff : System RAM
4ff7e90000-4ff7efffff : reserved
4ff7f10000-4ff800ffff : reserved
4ff8010000-4fffffffff : System RAM
a020000000-a03fffffff : PCI Bus 0000:00
a020000000-a0201fffff : PCI Bus 0000:01
a020000000-a0200fffff : 0000:01:00.0
a020000000-a0200fffff : mlx4_core
a020100000-a0201fffff : 0000:01:00.0
a060000000-a07fffffff : PCI Bus 0000:00
a0d0000000-a0dfffffff : PCI ECAM
a110000000-a14fffffff : PCI Bus 0000:00
a110000000-a121ffffff : PCI Bus 0000:01
a110000000-a111ffffff : 0000:01:00.0
a110000000-a111ffffff : mlx4_core
a112000000-a121ffffff : 0000:01:00.0

Thanks again, Bjorn. Looking forward to seeing this upstream.

Tested-by: Jon Masters <[email protected]>

--
Computer Architect | Sent from my Fedora powered laptop


Attachments:
dmesg-next-20161213.txt (30.48 kB)