2017-03-10 02:47:38

by Brian Norris

[permalink] [raw]
Subject: [PATCH v2 1/5] PCI: rockchip: fix sign issues for current limits

The regulator framework can return negative error codes via
regulator_get_current_limit() for regulators that don't provide current
information. The subsequent check for postive values isn't very useful,
if the variable is unsigned.

Let's just match the signedness of the return value.

Prevents error messages like this, seen on Samsung Chromebook Plus:

[ 1.069372] rockchip-pcie f8000000.pcie: invalid power supply

Fixes: 4816c4c7b82b ("PCI: rockchip: Provide captured slot power limit and scale")
Signed-off-by: Brian Norris <[email protected]>
Acked-by: Shawn Lin <[email protected]>
---
v2: add Shawn's ack
---
drivers/pci/host/pcie-rockchip.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
index 26ddd3535272..d785f64ec03b 100644
--- a/drivers/pci/host/pcie-rockchip.c
+++ b/drivers/pci/host/pcie-rockchip.c
@@ -425,7 +425,8 @@ static struct pci_ops rockchip_pcie_ops = {

static void rockchip_pcie_set_power_limit(struct rockchip_pcie *rockchip)
{
- u32 status, curr, scale, power;
+ int curr;
+ u32 status, scale, power;

if (IS_ERR(rockchip->vpcie3v3))
return;
--
2.12.0.246.ga2ecc84866-goog


2017-03-10 02:47:20

by Brian Norris

[permalink] [raw]
Subject: [PATCH v2 3/5] PCI: rockchip: add remove() support

Currently, if we try to unbind the platform device, the remove will
succeed, but the removal won't undo most of the registration, leaving
partially-configured PCI devices in the system.

This allows, for example, a simple 'lspci' to crash the system, as it
will try to touch the freed (via devm_*) driver structures.

So let's implement device remove().

Signed-off-by: Brian Norris <[email protected]>
---
v2:
* unmap IO space with pci_unmap_iospace()
* remove IRQ domain
---
drivers/pci/host/pcie-rockchip.c | 36 ++++++++++++++++++++++++++++++++++--
1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
index 5d7b27b1e941..d2e5078ae331 100644
--- a/drivers/pci/host/pcie-rockchip.c
+++ b/drivers/pci/host/pcie-rockchip.c
@@ -223,9 +223,11 @@ struct rockchip_pcie {
int link_gen;
struct device *dev;
struct irq_domain *irq_domain;
- u32 io_size;
int offset;
+ struct pci_bus *root_bus;
+ struct resource *io;
phys_addr_t io_bus_addr;
+ u32 io_size;
void __iomem *msg_region;
u32 mem_size;
phys_addr_t msg_bus_addr;
@@ -1360,6 +1362,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
err, io);
continue;
}
+ rockchip->io = io;
break;
case IORESOURCE_MEM:
mem = win->res;
@@ -1391,6 +1394,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
err = -ENOMEM;
goto err_free_res;
}
+ rockchip->root_bus = bus;

pci_bus_size_bridges(bus);
pci_bus_assign_resources(bus);
@@ -1421,6 +1425,34 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
return err;
}

+static int rockchip_pcie_remove(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct rockchip_pcie *rockchip = dev_get_drvdata(dev);
+
+ pci_stop_root_bus(rockchip->root_bus);
+ pci_remove_root_bus(rockchip->root_bus);
+ pci_unmap_iospace(rockchip->io);
+ irq_domain_remove(rockchip->irq_domain);
+
+ phy_power_off(rockchip->phy);
+ phy_exit(rockchip->phy);
+
+ clk_disable_unprepare(rockchip->clk_pcie_pm);
+ clk_disable_unprepare(rockchip->hclk_pcie);
+ clk_disable_unprepare(rockchip->aclk_perf_pcie);
+ clk_disable_unprepare(rockchip->aclk_pcie);
+
+ if (!IS_ERR(rockchip->vpcie3v3))
+ regulator_disable(rockchip->vpcie3v3);
+ if (!IS_ERR(rockchip->vpcie1v8))
+ regulator_disable(rockchip->vpcie1v8);
+ if (!IS_ERR(rockchip->vpcie0v9))
+ regulator_disable(rockchip->vpcie0v9);
+
+ return 0;
+}
+
static const struct dev_pm_ops rockchip_pcie_pm_ops = {
SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(rockchip_pcie_suspend_noirq,
rockchip_pcie_resume_noirq)
@@ -1438,6 +1470,6 @@ static struct platform_driver rockchip_pcie_driver = {
.pm = &rockchip_pcie_pm_ops,
},
.probe = rockchip_pcie_probe,
-
+ .remove = rockchip_pcie_remove,
};
builtin_platform_driver(rockchip_pcie_driver);
--
2.12.0.246.ga2ecc84866-goog

2017-03-10 02:47:39

by Brian Norris

[permalink] [raw]
Subject: [PATCH v2 4/5] PCI: export pci_remap_iospace() and pci_unmap_iospace()

These are useful for PCIe host drivers, and those drivers can be
modules.

Signed-off-by: Brian Norris <[email protected]>
---
new in v2
---
drivers/pci/pci.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7904d02ffdb9..3ec248774911 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3363,7 +3363,7 @@ unsigned long __weak pci_address_to_pio(phys_addr_t address)
* Only architectures that have memory mapped IO functions defined
* (and the PCI_IOBASE value defined) should call this function.
*/
-int __weak pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr)
+int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr)
{
#if defined(PCI_IOBASE) && defined(CONFIG_MMU)
unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start;
@@ -3383,6 +3383,7 @@ int __weak pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr)
return -ENODEV;
#endif
}
+EXPORT_SYMBOL(pci_remap_iospace);

/**
* pci_unmap_iospace - Unmap the memory mapped I/O space
@@ -3400,6 +3401,7 @@ void pci_unmap_iospace(struct resource *res)
unmap_kernel_range(vaddr, resource_size(res));
#endif
}
+EXPORT_SYMBOL(pci_unmap_iospace);

static void __pci_set_master(struct pci_dev *dev, bool enable)
{
--
2.12.0.246.ga2ecc84866-goog

2017-03-10 02:47:36

by Brian Norris

[permalink] [raw]
Subject: [PATCH v2 2/5] PCI: rockchip: make 'return 0' more obvious in probe()

There's no way to get here with 'err != 0'. Just return 0 to be more
obvious and prevent future changes from accidentally erroring out here
without going through the right error paths.

Signed-off-by: Brian Norris <[email protected]>
---
v2: no change
---
drivers/pci/host/pcie-rockchip.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
index d785f64ec03b..5d7b27b1e941 100644
--- a/drivers/pci/host/pcie-rockchip.c
+++ b/drivers/pci/host/pcie-rockchip.c
@@ -1398,7 +1398,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
pcie_bus_configure_settings(child);

pci_bus_add_devices(bus);
- return err;
+ return 0;

err_free_res:
pci_free_resource_list(&res);
--
2.12.0.246.ga2ecc84866-goog

2017-03-10 02:47:35

by Brian Norris

[permalink] [raw]
Subject: [PATCH v2 5/5] PCI: rockchip: modularize

Now that we've exported pci_remap_iospace() and added proper remove()
support, there's no reason this can't be a loadable module.

Signed-off-by: Brian Norris <[email protected]>
---
new in v2
---
drivers/pci/host/Kconfig | 2 +-
drivers/pci/host/pcie-rockchip.c | 8 +++++++-
2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index f7c1d4d5c665..d2293ed81cf9 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -164,7 +164,7 @@ config PCI_HOST_THUNDER_ECAM
Say Y here if you want ECAM support for CN88XX-Pass-1.x Cavium Thunder SoCs.

config PCIE_ROCKCHIP
- bool "Rockchip PCIe controller"
+ tristate "Rockchip PCIe controller"
depends on ARCH_ROCKCHIP || COMPILE_TEST
depends on OF
depends on PCI_MSI_IRQ_DOMAIN
diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
index d2e5078ae331..bd6df7254de4 100644
--- a/drivers/pci/host/pcie-rockchip.c
+++ b/drivers/pci/host/pcie-rockchip.c
@@ -26,6 +26,7 @@
#include <linux/irqdomain.h>
#include <linux/kernel.h>
#include <linux/mfd/syscon.h>
+#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/of_pci.h>
@@ -1462,6 +1463,7 @@ static const struct of_device_id rockchip_pcie_of_match[] = {
{ .compatible = "rockchip,rk3399-pcie", },
{}
};
+MODULE_DEVICE_TABLE(of, rockchip_pcie_of_match);

static struct platform_driver rockchip_pcie_driver = {
.driver = {
@@ -1472,4 +1474,8 @@ static struct platform_driver rockchip_pcie_driver = {
.probe = rockchip_pcie_probe,
.remove = rockchip_pcie_remove,
};
-builtin_platform_driver(rockchip_pcie_driver);
+module_platform_driver(rockchip_pcie_driver);
+
+MODULE_AUTHOR("Rockchip Inc");
+MODULE_DESCRIPTION("Rockchip AXI PCIe driver");
+MODULE_LICENSE("GPL v2");
--
2.12.0.246.ga2ecc84866-goog

2017-03-10 03:22:40

by Shawn Lin

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

On 2017/3/10 10:46, Brian Norris wrote:
> Currently, if we try to unbind the platform device, the remove will
> succeed, but the removal won't undo most of the registration, leaving
> partially-configured PCI devices in the system.
>
> This allows, for example, a simple 'lspci' to crash the system, as it
> will try to touch the freed (via devm_*) driver structures.
>
> So let's implement device remove().
>

As this patchset seems to be merged together so I think the following
warning will be ok? if my git-am robot only pick your patch 1->compile->
patch 2->compile->patch 3 then

drivers/pci/host/pcie-rockchip.c: In function 'rockchip_pcie_remove':
drivers/pci/host/pcie-rockchip.c:1435:2: error: implicit declaration of
function 'pci_unmap_iospace' [-Werror=implicit-function-declaration]
pci_unmap_iospace(rockchip->io);

but I guess you may need to move your patch 4 ahead of patch 3?


> Signed-off-by: Brian Norris <[email protected]>
> ---
> v2:
> * unmap IO space with pci_unmap_iospace()
> * remove IRQ domain
> ---
> drivers/pci/host/pcie-rockchip.c | 36 ++++++++++++++++++++++++++++++++++--
> 1 file changed, 34 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
> index 5d7b27b1e941..d2e5078ae331 100644
> --- a/drivers/pci/host/pcie-rockchip.c
> +++ b/drivers/pci/host/pcie-rockchip.c
> @@ -223,9 +223,11 @@ struct rockchip_pcie {
> int link_gen;
> struct device *dev;
> struct irq_domain *irq_domain;
> - u32 io_size;
> int offset;
> + struct pci_bus *root_bus;
> + struct resource *io;
> phys_addr_t io_bus_addr;
> + u32 io_size;
> void __iomem *msg_region;
> u32 mem_size;
> phys_addr_t msg_bus_addr;
> @@ -1360,6 +1362,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
> err, io);
> continue;
> }
> + rockchip->io = io;
> break;
> case IORESOURCE_MEM:
> mem = win->res;
> @@ -1391,6 +1394,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
> err = -ENOMEM;
> goto err_free_res;
> }
> + rockchip->root_bus = bus;
>
> pci_bus_size_bridges(bus);
> pci_bus_assign_resources(bus);
> @@ -1421,6 +1425,34 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
> return err;
> }
>
> +static int rockchip_pcie_remove(struct platform_device *pdev)
> +{
> + struct device *dev = &pdev->dev;
> + struct rockchip_pcie *rockchip = dev_get_drvdata(dev);
> +
> + pci_stop_root_bus(rockchip->root_bus);
> + pci_remove_root_bus(rockchip->root_bus);
> + pci_unmap_iospace(rockchip->io);
> + irq_domain_remove(rockchip->irq_domain);
> +
> + phy_power_off(rockchip->phy);
> + phy_exit(rockchip->phy);
> +
> + clk_disable_unprepare(rockchip->clk_pcie_pm);
> + clk_disable_unprepare(rockchip->hclk_pcie);
> + clk_disable_unprepare(rockchip->aclk_perf_pcie);
> + clk_disable_unprepare(rockchip->aclk_pcie);
> +
> + if (!IS_ERR(rockchip->vpcie3v3))
> + regulator_disable(rockchip->vpcie3v3);
> + if (!IS_ERR(rockchip->vpcie1v8))
> + regulator_disable(rockchip->vpcie1v8);
> + if (!IS_ERR(rockchip->vpcie0v9))
> + regulator_disable(rockchip->vpcie0v9);
> +
> + return 0;
> +}
> +
> static const struct dev_pm_ops rockchip_pcie_pm_ops = {
> SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(rockchip_pcie_suspend_noirq,
> rockchip_pcie_resume_noirq)
> @@ -1438,6 +1470,6 @@ static struct platform_driver rockchip_pcie_driver = {
> .pm = &rockchip_pcie_pm_ops,
> },
> .probe = rockchip_pcie_probe,
> -
> + .remove = rockchip_pcie_remove,
> };
> builtin_platform_driver(rockchip_pcie_driver);
>


--
Best Regards
Shawn Lin

2017-03-10 04:21:09

by Shawn Lin

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

On 2017/3/10 11:22, Shawn Lin wrote:
> On 2017/3/10 10:46, Brian Norris wrote:
>> Currently, if we try to unbind the platform device, the remove will
>> succeed, but the removal won't undo most of the registration, leaving
>> partially-configured PCI devices in the system.
>>
>> This allows, for example, a simple 'lspci' to crash the system, as it
>> will try to touch the freed (via devm_*) driver structures.
>>
>> So let's implement device remove().
>>
>
> As this patchset seems to be merged together so I think the following
> warning will be ok? if my git-am robot only pick your patch 1->compile->
> patch 2->compile->patch 3 then
>
> drivers/pci/host/pcie-rockchip.c: In function 'rockchip_pcie_remove':
> drivers/pci/host/pcie-rockchip.c:1435:2: error: implicit declaration of
> function 'pci_unmap_iospace' [-Werror=implicit-function-declaration]
> pci_unmap_iospace(rockchip->io);
>
> but I guess you may need to move your patch 4 ahead of patch 3?
>

Well, I am not sure if something is wrong here.

But when booting up the system for the first time, we got
[ 0.527263] PCI host bridge /pcie@f8000000 ranges:
[ 0.527293] MEM 0xfa000000..0xfa5fffff -> 0xfa000000
[ 0.527308] IO 0xfa600000..0xfa6fffff -> 0xfa600000
[ 0.527544] rockchip-pcie f8000000.pcie: PCI host bridge to bus 0000:0

so the hierarchy(lspci -t) looks like:
lspci -t
-[0000:00]---00.0-[01]----00.0

and lspci
0000:00:00.0 Class 0604: Device 1d87:0100
0001:01:00.0 Class 0108: Device 8086:f1a5 (rev 03)

but if I did unbind and bind, the bus number is different.

lspci
0001:00:00.0 Class 0604: Device 1d87:0100
0001:01:00.0 Class 0108: Device 8086:f1a5 (rev 03)

lspci -t
-+-[0001:00]---00.0-[01]----00.0
\-[0000:00]-

This hierarchy looks wrong to me.

>
>> Signed-off-by: Brian Norris <[email protected]>
>> ---
>> v2:
>> * unmap IO space with pci_unmap_iospace()
>> * remove IRQ domain
>> ---
>> drivers/pci/host/pcie-rockchip.c | 36
>> ++++++++++++++++++++++++++++++++++--
>> 1 file changed, 34 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/pci/host/pcie-rockchip.c
>> b/drivers/pci/host/pcie-rockchip.c
>> index 5d7b27b1e941..d2e5078ae331 100644
>> --- a/drivers/pci/host/pcie-rockchip.c
>> +++ b/drivers/pci/host/pcie-rockchip.c
>> @@ -223,9 +223,11 @@ struct rockchip_pcie {
>> int link_gen;
>> struct device *dev;
>> struct irq_domain *irq_domain;
>> - u32 io_size;
>> int offset;
>> + struct pci_bus *root_bus;
>> + struct resource *io;
>> phys_addr_t io_bus_addr;
>> + u32 io_size;
>> void __iomem *msg_region;
>> u32 mem_size;
>> phys_addr_t msg_bus_addr;
>> @@ -1360,6 +1362,7 @@ static int rockchip_pcie_probe(struct
>> platform_device *pdev)
>> err, io);
>> continue;
>> }
>> + rockchip->io = io;
>> break;
>> case IORESOURCE_MEM:
>> mem = win->res;
>> @@ -1391,6 +1394,7 @@ static int rockchip_pcie_probe(struct
>> platform_device *pdev)
>> err = -ENOMEM;
>> goto err_free_res;
>> }
>> + rockchip->root_bus = bus;
>>
>> pci_bus_size_bridges(bus);
>> pci_bus_assign_resources(bus);
>> @@ -1421,6 +1425,34 @@ static int rockchip_pcie_probe(struct
>> platform_device *pdev)
>> return err;
>> }
>>
>> +static int rockchip_pcie_remove(struct platform_device *pdev)
>> +{
>> + struct device *dev = &pdev->dev;
>> + struct rockchip_pcie *rockchip = dev_get_drvdata(dev);
>> +
>> + pci_stop_root_bus(rockchip->root_bus);
>> + pci_remove_root_bus(rockchip->root_bus);
>> + pci_unmap_iospace(rockchip->io);
>> + irq_domain_remove(rockchip->irq_domain);
>> +
>> + phy_power_off(rockchip->phy);
>> + phy_exit(rockchip->phy);
>> +
>> + clk_disable_unprepare(rockchip->clk_pcie_pm);
>> + clk_disable_unprepare(rockchip->hclk_pcie);
>> + clk_disable_unprepare(rockchip->aclk_perf_pcie);
>> + clk_disable_unprepare(rockchip->aclk_pcie);
>> +
>> + if (!IS_ERR(rockchip->vpcie3v3))
>> + regulator_disable(rockchip->vpcie3v3);
>> + if (!IS_ERR(rockchip->vpcie1v8))
>> + regulator_disable(rockchip->vpcie1v8);
>> + if (!IS_ERR(rockchip->vpcie0v9))
>> + regulator_disable(rockchip->vpcie0v9);
>> +
>> + return 0;
>> +}
>> +
>> static const struct dev_pm_ops rockchip_pcie_pm_ops = {
>> SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(rockchip_pcie_suspend_noirq,
>> rockchip_pcie_resume_noirq)
>> @@ -1438,6 +1470,6 @@ static struct platform_driver
>> rockchip_pcie_driver = {
>> .pm = &rockchip_pcie_pm_ops,
>> },
>> .probe = rockchip_pcie_probe,
>> -
>> + .remove = rockchip_pcie_remove,
>> };
>> builtin_platform_driver(rockchip_pcie_driver);
>>
>
>


--
Best Regards
Shawn Lin

2017-03-10 19:40:53

by Brian Norris

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

On Fri, Mar 10, 2017 at 12:20:54PM +0800, Shawn Lin wrote:
> On 2017/3/10 11:22, Shawn Lin wrote:
> >On 2017/3/10 10:46, Brian Norris wrote:
> >>Currently, if we try to unbind the platform device, the remove will
> >>succeed, but the removal won't undo most of the registration, leaving
> >>partially-configured PCI devices in the system.
> >>
> >>This allows, for example, a simple 'lspci' to crash the system, as it
> >>will try to touch the freed (via devm_*) driver structures.
> >>
> >>So let's implement device remove().
> >>
> >
> >As this patchset seems to be merged together so I think the following
> >warning will be ok? if my git-am robot only pick your patch 1->compile->
> >patch 2->compile->patch 3 then
> >
> >drivers/pci/host/pcie-rockchip.c: In function 'rockchip_pcie_remove':
> >drivers/pci/host/pcie-rockchip.c:1435:2: error: implicit declaration of
> >function 'pci_unmap_iospace' [-Werror=implicit-function-declaration]
> > pci_unmap_iospace(rockchip->io);

I'm not sure what you're doing here, but when I test patches 1-3, this
all compiles fine for me. Maybe you're testing an old kernel that does
not have pci_unmap_iospace()?

> >but I guess you may need to move your patch 4 ahead of patch 3?

No. Patch 4 is only necessary for building modules that can use those
functions; your PCIe driver doesn't build as a module until patch 5.

I'm going to guess that you're testing your hacky vendor tree, and not
pure upstream.

> Well, I am not sure if something is wrong here.
>
> But when booting up the system for the first time, we got
> [ 0.527263] PCI host bridge /pcie@f8000000 ranges:
> [ 0.527293] MEM 0xfa000000..0xfa5fffff -> 0xfa000000
> [ 0.527308] IO 0xfa600000..0xfa6fffff -> 0xfa600000
> [ 0.527544] rockchip-pcie f8000000.pcie: PCI host bridge to bus 0000:0
>
> so the hierarchy(lspci -t) looks like:
> lspci -t
> -[0000:00]---00.0-[01]----00.0
>
> and lspci
> 0000:00:00.0 Class 0604: Device 1d87:0100
> 0001:01:00.0 Class 0108: Device 8086:f1a5 (rev 03)
>
> but if I did unbind and bind, the bus number is different.
>
> lspci
> 0001:00:00.0 Class 0604: Device 1d87:0100
> 0001:01:00.0 Class 0108: Device 8086:f1a5 (rev 03)
>
> lspci -t
> -+-[0001:00]---00.0-[01]----00.0
> \-[0000:00]-
>
> This hierarchy looks wrong to me.

That looks like it's somewhat an artifact of lspci's tree view, which
manufactures the 0000:00 root.

I might comment on your "RFT" patch too but... it does touch on the
problem (that the domain numbers don't get reused), but I don't think
it's a good idea. What if we add another domain after the Rockchip
PCIe domain? You'll clobber all the domain allocations the first time
you remove the Rockchip one. Instead, if we want to actually stabilize
this indexing across hotplug, we need the core PCI code to take care of
this for us. e.g., maybe use one of the existing indexing ID mechanisms
in the kernel, like IDR?

Anyway, other than the bad lspci -t output, is there any actual bug
here? I didn't think the domain numbers were actually so special.

Brian

2017-03-13 02:26:32

by Shawn Lin

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

On 2017/3/11 3:40, Brian Norris wrote:
> On Fri, Mar 10, 2017 at 12:20:54PM +0800, Shawn Lin wrote:
>> On 2017/3/10 11:22, Shawn Lin wrote:
>>> On 2017/3/10 10:46, Brian Norris wrote:
>>>> Currently, if we try to unbind the platform device, the remove will
>>>> succeed, but the removal won't undo most of the registration, leaving
>>>> partially-configured PCI devices in the system.
>>>>
>>>> This allows, for example, a simple 'lspci' to crash the system, as it
>>>> will try to touch the freed (via devm_*) driver structures.
>>>>
>>>> So let's implement device remove().
>>>>
>>>
>>> As this patchset seems to be merged together so I think the following
>>> warning will be ok? if my git-am robot only pick your patch 1->compile->
>>> patch 2->compile->patch 3 then
>>>
>>> drivers/pci/host/pcie-rockchip.c: In function 'rockchip_pcie_remove':
>>> drivers/pci/host/pcie-rockchip.c:1435:2: error: implicit declaration of
>>> function 'pci_unmap_iospace' [-Werror=implicit-function-declaration]
>>> pci_unmap_iospace(rockchip->io);
>
> I'm not sure what you're doing here, but when I test patches 1-3, this
> all compiles fine for me. Maybe you're testing an old kernel that does
> not have pci_unmap_iospace()?
>
>>> but I guess you may need to move your patch 4 ahead of patch 3?
>
> No. Patch 4 is only necessary for building modules that can use those
> functions; your PCIe driver doesn't build as a module until patch 5.
>
> I'm going to guess that you're testing your hacky vendor tree, and not
> pure upstream.
>
>> Well, I am not sure if something is wrong here.
>>
>> But when booting up the system for the first time, we got
>> [ 0.527263] PCI host bridge /pcie@f8000000 ranges:
>> [ 0.527293] MEM 0xfa000000..0xfa5fffff -> 0xfa000000
>> [ 0.527308] IO 0xfa600000..0xfa6fffff -> 0xfa600000
>> [ 0.527544] rockchip-pcie f8000000.pcie: PCI host bridge to bus 0000:0
>>
>> so the hierarchy(lspci -t) looks like:
>> lspci -t
>> -[0000:00]---00.0-[01]----00.0
>>
>> and lspci
>> 0000:00:00.0 Class 0604: Device 1d87:0100
>> 0001:01:00.0 Class 0108: Device 8086:f1a5 (rev 03)
>>
>> but if I did unbind and bind, the bus number is different.
>>
>> lspci
>> 0001:00:00.0 Class 0604: Device 1d87:0100
>> 0001:01:00.0 Class 0108: Device 8086:f1a5 (rev 03)
>>
>> lspci -t
>> -+-[0001:00]---00.0-[01]----00.0
>> \-[0000:00]-
>>
>> This hierarchy looks wrong to me.
>
> That looks like it's somewhat an artifact of lspci's tree view, which
> manufactures the 0000:00 root.
>
> I might comment on your "RFT" patch too but... it does touch on the
> problem (that the domain numbers don't get reused), but I don't think
> it's a good idea. What if we add another domain after the Rockchip
> PCIe domain? You'll clobber all the domain allocations the first time
> you remove the Rockchip one. Instead, if we want to actually stabilize
> this indexing across hotplug, we need the core PCI code to take care of
> this for us. e.g., maybe use one of the existing indexing ID mechanisms
> in the kernel, like IDR?
>
> Anyway, other than the bad lspci -t output, is there any actual bug
> here? I didn't think the domain numbers were actually so special.
>

No, it works fine. But I am going to guess
(1) is there any code or user-space binary care about the domain
numbers, so it will complain about this?
(2) if there are two doamins for PCI hierarchy, so when I unbind
and bind one of them continuously, is it possible that finally they
will share the same domain number as the domain number would be
overflow ? But actually they didn't share the same domain. :)

I just thought we should fix the domain number here by adding
"linux,pci-domain = <0>" for rk3399.dtsi, which seems more wise
to me now. Does it make sense to you?


> Brian
>
>
>


--
Best Regards
Shawn Lin

2017-03-20 22:38:13

by Brian Norris

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

On Mon, Mar 13, 2017 at 10:26:12AM +0800, Shawn Lin wrote:
> I just thought we should fix the domain number here by adding
> "linux,pci-domain = <0>" for rk3399.dtsi, which seems more wise
> to me now. Does it make sense to you?

I think that's fine (as noted in response to your patch). That doesn't
really prevent this from being a core PCI domain bug though...

BTW, I've been using this patch set to do some repeated remove/probe
testing, and aside from the domain renumbering question and a small
memory leak noticed by kmemleak (an existing bug; sending a patch
shortly), this has been working well for me.

Brian

2017-03-23 22:27:23

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] PCI: rockchip: fix sign issues for current limits

On Thu, Mar 09, 2017 at 06:46:13PM -0800, Brian Norris wrote:
> The regulator framework can return negative error codes via
> regulator_get_current_limit() for regulators that don't provide current
> information. The subsequent check for postive values isn't very useful,
> if the variable is unsigned.
>
> Let's just match the signedness of the return value.
>
> Prevents error messages like this, seen on Samsung Chromebook Plus:
>
> [ 1.069372] rockchip-pcie f8000000.pcie: invalid power supply
>
> Fixes: 4816c4c7b82b ("PCI: rockchip: Provide captured slot power limit and scale")
> Signed-off-by: Brian Norris <[email protected]>
> Acked-by: Shawn Lin <[email protected]>

I applied the first two patches (this already has Shawn's ack and the
second is trivially obvious) to pci/host-rockchip. I'm not sure what the
current state of the others is.

I also applied the appended trivial indentation patch.

> ---
> v2: add Shawn's ack
> ---
> drivers/pci/host/pcie-rockchip.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
> index 26ddd3535272..d785f64ec03b 100644
> --- a/drivers/pci/host/pcie-rockchip.c
> +++ b/drivers/pci/host/pcie-rockchip.c
> @@ -425,7 +425,8 @@ static struct pci_ops rockchip_pcie_ops = {
>
> static void rockchip_pcie_set_power_limit(struct rockchip_pcie *rockchip)
> {
> - u32 status, curr, scale, power;
> + int curr;
> + u32 status, scale, power;
>
> if (IS_ERR(rockchip->vpcie3v3))
> return;
> --
> 2.12.0.246.ga2ecc84866-goog
>

commit 73edd2b180a18024605c599074596a9e22d745d6
Author: Bjorn Helgaas <[email protected]>
Date: Thu Mar 23 17:21:26 2017 -0500

PCI: rockchip: Unindent rockchip_pcie_set_power_limit()

If regulator_get_current_limit() returns 0 or error, return early so the
body of the function doesn't have to be indented as the body of an "if"
statement. No functional change intended.

Signed-off-by: Bjorn Helgaas <[email protected]>

diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
index d785f64ec03b..7f76ff6af0ba 100644
--- a/drivers/pci/host/pcie-rockchip.c
+++ b/drivers/pci/host/pcie-rockchip.c
@@ -438,24 +438,25 @@ static void rockchip_pcie_set_power_limit(struct rockchip_pcie *rockchip)
* to the actual power supply.
*/
curr = regulator_get_current_limit(rockchip->vpcie3v3);
- if (curr > 0) {
- scale = 3; /* 0.001x */
- curr = curr / 1000; /* convert to mA */
- power = (curr * 3300) / 1000; /* milliwatt */
- while (power > PCIE_RC_CONFIG_DCR_CSPL_LIMIT) {
- if (!scale) {
- dev_warn(rockchip->dev, "invalid power supply\n");
- return;
- }
- scale--;
- power = power / 10;
- }
+ if (curr <= 0)
+ return;

- status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_DCR);
- status |= (power << PCIE_RC_CONFIG_DCR_CSPL_SHIFT) |
- (scale << PCIE_RC_CONFIG_DCR_CPLS_SHIFT);
- rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_DCR);
+ scale = 3; /* 0.001x */
+ curr = curr / 1000; /* convert to mA */
+ power = (curr * 3300) / 1000; /* milliwatt */
+ while (power > PCIE_RC_CONFIG_DCR_CSPL_LIMIT) {
+ if (!scale) {
+ dev_warn(rockchip->dev, "invalid power supply\n");
+ return;
+ }
+ scale--;
+ power = power / 10;
}
+
+ status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_DCR);
+ status |= (power << PCIE_RC_CONFIG_DCR_CSPL_SHIFT) |
+ (scale << PCIE_RC_CONFIG_DCR_CPLS_SHIFT);
+ rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_DCR);
}

/**

2017-03-23 22:33:55

by Brian Norris

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] PCI: rockchip: fix sign issues for current limits

On Thu, Mar 23, 2017 at 05:27:17PM -0500, Bjorn Helgaas wrote:
> On Thu, Mar 09, 2017 at 06:46:13PM -0800, Brian Norris wrote:
> > The regulator framework can return negative error codes via
> > regulator_get_current_limit() for regulators that don't provide current
> > information. The subsequent check for postive values isn't very useful,
> > if the variable is unsigned.
> >
> > Let's just match the signedness of the return value.
> >
> > Prevents error messages like this, seen on Samsung Chromebook Plus:
> >
> > [ 1.069372] rockchip-pcie f8000000.pcie: invalid power supply
> >
> > Fixes: 4816c4c7b82b ("PCI: rockchip: Provide captured slot power limit and scale")
> > Signed-off-by: Brian Norris <[email protected]>
> > Acked-by: Shawn Lin <[email protected]>
>
> I applied the first two patches (this already has Shawn's ack and the
> second is trivially obvious) to pci/host-rockchip.

Thanks!

> I'm not sure what the
> current state of the others is.

Patch 4 seems like it should be fine (it was discussed previously, but
never done).

Apart from existing leaks in the PCI framework (which Jeffy and Shawn
are trying to patch [1]), I don't think there are any known issues with
3 and 5. It's certainly better than having 100% broken unbind at least,
IMO.

I suppose it's worth getting an ack/nack from Shawn though.

Brian

[1] https://patchwork.kernel.org/patch/9638353/
https://patchwork.kernel.org/patch/9640545/
https://patchwork.kernel.org/patch/9640549/

2017-03-24 01:25:07

by Shawn Lin

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] PCI: rockchip: fix sign issues for current limits

Hi Bjorn,

On 2017/3/24 6:33, Brian Norris wrote:
> On Thu, Mar 23, 2017 at 05:27:17PM -0500, Bjorn Helgaas wrote:
>> On Thu, Mar 09, 2017 at 06:46:13PM -0800, Brian Norris wrote:
>>> The regulator framework can return negative error codes via
>>> regulator_get_current_limit() for regulators that don't provide current
>>> information. The subsequent check for postive values isn't very useful,
>>> if the variable is unsigned.
>>>
>>> Let's just match the signedness of the return value.
>>>
>>> Prevents error messages like this, seen on Samsung Chromebook Plus:
>>>
>>> [ 1.069372] rockchip-pcie f8000000.pcie: invalid power supply
>>>
>>> Fixes: 4816c4c7b82b ("PCI: rockchip: Provide captured slot power limit and scale")
>>> Signed-off-by: Brian Norris <[email protected]>
>>> Acked-by: Shawn Lin <[email protected]>
>>
>> I applied the first two patches (this already has Shawn's ack and the
>> second is trivially obvious) to pci/host-rockchip.
>
> Thanks!
>
>> I'm not sure what the
>> current state of the others is.
>
> Patch 4 seems like it should be fine (it was discussed previously, but
> never done).

I'm fine with the other pacthes and fully tested it, but I was just
waiting for your decision for patch 4, so at least,

Acked-by: Shawn Lin <[email protected]> for pcie-rockchip.


>
> Apart from existing leaks in the PCI framework (which Jeffy and Shawn
> are trying to patch [1]), I don't think there are any known issues with
> 3 and 5. It's certainly better than having 100% broken unbind at least,
> IMO.
>
> I suppose it's worth getting an ack/nack from Shawn though.
>
> Brian
>
> [1] https://patchwork.kernel.org/patch/9638353/
> https://patchwork.kernel.org/patch/9640545/
> https://patchwork.kernel.org/patch/9640549/
>
>
>

2017-03-24 14:26:19

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

On Thu, Mar 09, 2017 at 06:46:15PM -0800, Brian Norris wrote:
> Currently, if we try to unbind the platform device, the remove will
> succeed, but the removal won't undo most of the registration, leaving
> partially-configured PCI devices in the system.
>
> This allows, for example, a simple 'lspci' to crash the system, as it
> will try to touch the freed (via devm_*) driver structures.
>
> So let's implement device remove().

How exactly do you reproduce this problem?

There are several other drivers that are superficially similar, e.g.,
they define a struct platform_driver without a .remove method. Do
they all have this problem? Some of them do set .suppress_bind_attrs
= true; is that relevant to this scenario?

In fact, the only other callers of pci_remove_root_bus() are
iproc_pcie_remove(), hv_pci_remove(), and vmd_remove().

These don't have .remove:

imx6_pcie_driver
ls_pcie_driver
armada8k_pcie_driver
artpec6_pcie_driver
dw_plat_pcie_driver
hisi_pcie_driver
hisi_pcie_almost_ecam_driver
spear13xx_pcie_driver
gen_pci_driver

These don't have .remove but do set .suppress_bind_attrs = true:

dra7xx_pcie_driver
qcom_pcie_driver
advk_pcie_driver
mvebu_pcie_driver
rcar_pci_driver
rcar_pcie_driver
tegra_pcie_driver
altera_pcie_driver
nwl_pcie_driver
xilinx_pcie_driver


> Signed-off-by: Brian Norris <[email protected]>
> ---
> v2:
> * unmap IO space with pci_unmap_iospace()
> * remove IRQ domain
> ---
> drivers/pci/host/pcie-rockchip.c | 36 ++++++++++++++++++++++++++++++++++--
> 1 file changed, 34 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
> index 5d7b27b1e941..d2e5078ae331 100644
> --- a/drivers/pci/host/pcie-rockchip.c
> +++ b/drivers/pci/host/pcie-rockchip.c
> @@ -223,9 +223,11 @@ struct rockchip_pcie {
> int link_gen;
> struct device *dev;
> struct irq_domain *irq_domain;
> - u32 io_size;
> int offset;
> + struct pci_bus *root_bus;
> + struct resource *io;
> phys_addr_t io_bus_addr;
> + u32 io_size;
> void __iomem *msg_region;
> u32 mem_size;
> phys_addr_t msg_bus_addr;
> @@ -1360,6 +1362,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
> err, io);
> continue;
> }
> + rockchip->io = io;
> break;
> case IORESOURCE_MEM:
> mem = win->res;
> @@ -1391,6 +1394,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
> err = -ENOMEM;
> goto err_free_res;
> }
> + rockchip->root_bus = bus;
>
> pci_bus_size_bridges(bus);
> pci_bus_assign_resources(bus);
> @@ -1421,6 +1425,34 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
> return err;
> }
>
> +static int rockchip_pcie_remove(struct platform_device *pdev)
> +{
> + struct device *dev = &pdev->dev;
> + struct rockchip_pcie *rockchip = dev_get_drvdata(dev);
> +
> + pci_stop_root_bus(rockchip->root_bus);
> + pci_remove_root_bus(rockchip->root_bus);
> + pci_unmap_iospace(rockchip->io);
> + irq_domain_remove(rockchip->irq_domain);
> +
> + phy_power_off(rockchip->phy);
> + phy_exit(rockchip->phy);
> +
> + clk_disable_unprepare(rockchip->clk_pcie_pm);
> + clk_disable_unprepare(rockchip->hclk_pcie);
> + clk_disable_unprepare(rockchip->aclk_perf_pcie);
> + clk_disable_unprepare(rockchip->aclk_pcie);
> +
> + if (!IS_ERR(rockchip->vpcie3v3))
> + regulator_disable(rockchip->vpcie3v3);
> + if (!IS_ERR(rockchip->vpcie1v8))
> + regulator_disable(rockchip->vpcie1v8);
> + if (!IS_ERR(rockchip->vpcie0v9))
> + regulator_disable(rockchip->vpcie0v9);
> +
> + return 0;
> +}
> +
> static const struct dev_pm_ops rockchip_pcie_pm_ops = {
> SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(rockchip_pcie_suspend_noirq,
> rockchip_pcie_resume_noirq)
> @@ -1438,6 +1470,6 @@ static struct platform_driver rockchip_pcie_driver = {
> .pm = &rockchip_pcie_pm_ops,
> },
> .probe = rockchip_pcie_probe,
> -
> + .remove = rockchip_pcie_remove,
> };
> builtin_platform_driver(rockchip_pcie_driver);
> --
> 2.12.0.246.ga2ecc84866-goog
>

2017-03-24 17:22:33

by Brian Norris

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

Hi Bjorn,

On Fri, Mar 24, 2017 at 09:25:41AM -0500, Bjorn Helgaas wrote:
> On Thu, Mar 09, 2017 at 06:46:15PM -0800, Brian Norris wrote:
> > Currently, if we try to unbind the platform device, the remove will
> > succeed, but the removal won't undo most of the registration, leaving
> > partially-configured PCI devices in the system.
> >
> > This allows, for example, a simple 'lspci' to crash the system, as it
> > will try to touch the freed (via devm_*) driver structures.
> >
> > So let's implement device remove().
>
> How exactly do you reproduce this problem?

On RK3399:

# echo f8000000.pcie > /sys/bus/platform/drivers/rockchip-pcie/unbind
# lspci

> There are several other drivers that are superficially similar, e.g.,
> they define a struct platform_driver without a .remove method. Do
> they all have this problem? Some of them do set .suppress_bind_attrs
> = true; is that relevant to this scenario?

Yes, I think .suppress_bind_attrs would be enough to prevent this,
according to my reading of the code and comments:

* @suppress_bind_attrs: Disables bind/unbind via sysfs.

> In fact, the only other callers of pci_remove_root_bus() are
> iproc_pcie_remove(), hv_pci_remove(), and vmd_remove().

Then iProc would suffer from the same memory leak in
of_pci_get_host_bridge_resources() [1]. It *would* suffer from the same
domain allocation issues in of_pci_bus_find_domain_nr() ->
pci_get_new_domain_nr() [2], except that all iProc device trees (in
mainline at least) use the 'linux,pci-domain' property to avoid it.

HyperV and VMD drivers use ACPI, which uses neither
pci_get_new_domain_nr() nor of_pci_get_host_bridge_resources().

> These don't have .remove:
>
> imx6_pcie_driver
> ls_pcie_driver
> armada8k_pcie_driver
> artpec6_pcie_driver
> dw_plat_pcie_driver
> hisi_pcie_driver
> hisi_pcie_almost_ecam_driver
> spear13xx_pcie_driver
> gen_pci_driver

I think these are all technically broken.

> These don't have .remove but do set .suppress_bind_attrs = true:
>
> dra7xx_pcie_driver
> qcom_pcie_driver
> advk_pcie_driver
> mvebu_pcie_driver
> rcar_pci_driver
> rcar_pcie_driver
> tegra_pcie_driver
> altera_pcie_driver
> nwl_pcie_driver
> xilinx_pcie_driver

Those are fine then, I suppose.

Brian

[1] PCI: return resource_entry in pci_add_resource helpers
https://patchwork.kernel.org/patch/9642229/
of/pci: Fix memory leak in of_pci_get_host_bridge_resources
https://patchwork.kernel.org/patch/9642231/

[2] PCI: use IDA to manage domain number if not getting it from DT
https://patchwork.kernel.org/patch/9638353/

2017-03-30 23:29:06

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

On Fri, Mar 24, 2017 at 10:22:19AM -0700, Brian Norris wrote:
> Hi Bjorn,
>
> On Fri, Mar 24, 2017 at 09:25:41AM -0500, Bjorn Helgaas wrote:
> > On Thu, Mar 09, 2017 at 06:46:15PM -0800, Brian Norris wrote:
> > > Currently, if we try to unbind the platform device, the remove will
> > > succeed, but the removal won't undo most of the registration, leaving
> > > partially-configured PCI devices in the system.
> > >
> > > This allows, for example, a simple 'lspci' to crash the system, as it
> > > will try to touch the freed (via devm_*) driver structures.
> > >
> > > So let's implement device remove().
> >
> > How exactly do you reproduce this problem?
>
> On RK3399:
>
> # echo f8000000.pcie > /sys/bus/platform/drivers/rockchip-pcie/unbind
> # lspci
>
> > There are several other drivers that are superficially similar, e.g.,
> > they define a struct platform_driver without a .remove method. Do
> > they all have this problem? Some of them do set .suppress_bind_attrs
> > = true; is that relevant to this scenario?
>
> Yes, I think .suppress_bind_attrs would be enough to prevent this,
> according to my reading of the code and comments:
>
> * @suppress_bind_attrs: Disables bind/unbind via sysfs.
>
> > In fact, the only other callers of pci_remove_root_bus() are
> > iproc_pcie_remove(), hv_pci_remove(), and vmd_remove().
>
> Then iProc would suffer from the same memory leak in
> of_pci_get_host_bridge_resources() [1]. It *would* suffer from the same
> domain allocation issues in of_pci_bus_find_domain_nr() ->
> pci_get_new_domain_nr() [2], except that all iProc device trees (in
> mainline at least) use the 'linux,pci-domain' property to avoid it.
>
> HyperV and VMD drivers use ACPI, which uses neither
> pci_get_new_domain_nr() nor of_pci_get_host_bridge_resources().
>
> > These don't have .remove:
> >
> > imx6_pcie_driver
> > ls_pcie_driver
> > armada8k_pcie_driver
> > artpec6_pcie_driver
> > dw_plat_pcie_driver
> > hisi_pcie_driver
> > hisi_pcie_almost_ecam_driver
> > spear13xx_pcie_driver
> > gen_pci_driver
>
> I think these are all technically broken.

Can we fix them all at the same time as you fix Rockchip? Maybe we
should have a series that adds ".suppress_bind_attrs = true" to all
these drivers, including Rockchip. Then you could have this current
series to make Rockchip modular on top, if there's still value in it.

If we find a common problem, I'd like to fix it everywhere we know
about so it doesn't get forgotten or copied to even more places.

> > These don't have .remove but do set .suppress_bind_attrs = true:
> >
> > dra7xx_pcie_driver
> > qcom_pcie_driver
> > advk_pcie_driver
> > mvebu_pcie_driver
> > rcar_pci_driver
> > rcar_pcie_driver
> > tegra_pcie_driver
> > altera_pcie_driver
> > nwl_pcie_driver
> > xilinx_pcie_driver
>
> Those are fine then, I suppose.
>
> Brian
>
> [1] PCI: return resource_entry in pci_add_resource helpers
> https://patchwork.kernel.org/patch/9642229/
> of/pci: Fix memory leak in of_pci_get_host_bridge_resources
> https://patchwork.kernel.org/patch/9642231/
>
> [2] PCI: use IDA to manage domain number if not getting it from DT
> https://patchwork.kernel.org/patch/9638353/

2017-03-31 00:26:15

by Brian Norris

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

Hi Bjorn,

On Thu, Mar 30, 2017 at 06:28:25PM -0500, Bjorn Helgaas wrote:
> On Fri, Mar 24, 2017 at 10:22:19AM -0700, Brian Norris wrote:
> > On Fri, Mar 24, 2017 at 09:25:41AM -0500, Bjorn Helgaas wrote:
> > > These don't have .remove:
> > >
> > > imx6_pcie_driver
> > > ls_pcie_driver
> > > armada8k_pcie_driver
> > > artpec6_pcie_driver
> > > dw_plat_pcie_driver
> > > hisi_pcie_driver
> > > hisi_pcie_almost_ecam_driver
> > > spear13xx_pcie_driver
> > > gen_pci_driver
> >
> > I think these are all technically broken.
>
> Can we fix them all at the same time as you fix Rockchip? Maybe we
> should have a series that adds ".suppress_bind_attrs = true" to all
> these drivers,

Sure, I can do that.

> including Rockchip.

Huh? Why? So I can revert that in the next patch?

> Then you could have this current
> series to make Rockchip modular on top, if there's still value in it.

I do see value in it. That's the whole reason I wrote this patchset.
It's useful for stressing out certain behaviors that will happen all the
time (i.e., boot-time initialization, from platform probe, to bus init,
to client/EP init), via repeated bind/unbind (or modprobe/rmmod). It's
much faster than reboot testing.

Personally, I'd rather just patch the other drivers, and you can wait
until I follow through on that promise before applying my existing work
for the Rockchip driver, if that's what you'd prefer.

> If we find a common problem, I'd like to fix it everywhere we know
> about so it doesn't get forgotten or copied to even more places.

Sure. But you only just pointed out how broken several drivers were; I
didn't really notice :)

Brian

2017-03-31 05:17:10

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

On Thu, Mar 30, 2017 at 05:26:09PM -0700, Brian Norris wrote:
> Hi Bjorn,
>
> On Thu, Mar 30, 2017 at 06:28:25PM -0500, Bjorn Helgaas wrote:
> > On Fri, Mar 24, 2017 at 10:22:19AM -0700, Brian Norris wrote:
> > > On Fri, Mar 24, 2017 at 09:25:41AM -0500, Bjorn Helgaas wrote:
> > > > These don't have .remove:
> > > >
> > > > imx6_pcie_driver
> > > > ls_pcie_driver
> > > > armada8k_pcie_driver
> > > > artpec6_pcie_driver
> > > > dw_plat_pcie_driver
> > > > hisi_pcie_driver
> > > > hisi_pcie_almost_ecam_driver
> > > > spear13xx_pcie_driver
> > > > gen_pci_driver
> > >
> > > I think these are all technically broken.
> >
> > Can we fix them all at the same time as you fix Rockchip? Maybe we
> > should have a series that adds ".suppress_bind_attrs = true" to all
> > these drivers,
>
> Sure, I can do that.
>
> > including Rockchip.
>
> Huh? Why? So I can revert that in the next patch?
>
> > Then you could have this current
> > series to make Rockchip modular on top, if there's still value in it.
>
> I do see value in it. That's the whole reason I wrote this patchset.
> It's useful for stressing out certain behaviors that will happen all the
> time (i.e., boot-time initialization, from platform probe, to bus init,
> to client/EP init), via repeated bind/unbind (or modprobe/rmmod). It's
> much faster than reboot testing.

I didn't phrase that very well. There's certainly value in stressing
the bind/unbind paths, but I thought the primary reason you wrote this
was to fix the fact that you could crash the system like this:

# echo f8000000.pcie > /sys/bus/platform/drivers/rockchip-pcie/unbind
# lspci

>From my point of view, that's the issue that *has* to be fixed.
Better test coverage is icing.

It sounds like several drivers have that same issue, and the simplest
possible fix is to set .suppress_bind_attrs, so I suggested doing that
so it's easy to analyze the tree as a whole and say "these drivers
all have the same problem, and all the fixes look the same."

I guess if you'd rather skip that for Rockchip and apply a more
complicated fix there, I could go along with that. But I don't think
it would hurt anything to set .suppress_bind_attrs, then remove it
when you add module support. The concepts of .suppress_bind_attrs and
modularity are related, and doing this in a separate patch would make
it a nice example to follow if somebody wants to make other drivers
modular as well.

> Personally, I'd rather just patch the other drivers, and you can wait
> until I follow through on that promise before applying my existing work
> for the Rockchip driver, if that's what you'd prefer.

It's not so much a question of using the Rockchip change as a stick.
I'm just thinking that it makes a more logical progression to fix the
more important issue globally first.

> > If we find a common problem, I'd like to fix it everywhere we know
> > about so it doesn't get forgotten or copied to even more places.
>
> Sure. But you only just pointed out how broken several drivers were; I
> didn't really notice :)

Yeah, you're right, I had in my head the idea that if we've identified
the same problem in several drivers, we should fix them all, but I
neglected to turn that into words.

Bjorn

2017-03-31 16:40:22

by Brian Norris

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

Hi Bjorn,

On Fri, Mar 31, 2017 at 12:17:02AM -0500, Bjorn Helgaas wrote:
> On Thu, Mar 30, 2017 at 05:26:09PM -0700, Brian Norris wrote:
> > On Thu, Mar 30, 2017 at 06:28:25PM -0500, Bjorn Helgaas wrote:
> > > Can we fix them all at the same time as you fix Rockchip? Maybe we
> > > should have a series that adds ".suppress_bind_attrs = true" to all
> > > these drivers,
> >
> > Sure, I can do that.
> >
> > > including Rockchip.
> >
> > Huh? Why? So I can revert that in the next patch?
> >
> > > Then you could have this current
> > > series to make Rockchip modular on top, if there's still value in it.
> >
> > I do see value in it. That's the whole reason I wrote this patchset.
> > It's useful for stressing out certain behaviors that will happen all the
> > time (i.e., boot-time initialization, from platform probe, to bus init,
> > to client/EP init), via repeated bind/unbind (or modprobe/rmmod). It's
> > much faster than reboot testing.
>
> I didn't phrase that very well. There's certainly value in stressing
> the bind/unbind paths, but I thought the primary reason you wrote this
> was to fix the fact that you could crash the system like this:
>
> # echo f8000000.pcie > /sys/bus/platform/drivers/rockchip-pcie/unbind
> # lspci

Well, they're kinda two sides of the same coin; I was wanting to test
the bind path, and when I tried this, I noticed that I could trivially
crash the system. The crash seemed like a more important thing to
document (because otherwise, it just looks like I'm adding a feature).

> From my point of view, that's the issue that *has* to be fixed.
> Better test coverage is icing.

I didn't really view messing with /sys/.../unbind as a big issue,
outside of development and testing (there's a lot of damage a malicious
actor can do with unconstrained access to /sys/), so I guess I didn't
put that aspect as super-high priority. If you'd like to prioritize
that, then I'm OK with that.

> It sounds like several drivers have that same issue, and the simplest
> possible fix is to set .suppress_bind_attrs, so I suggested doing that
> so it's easy to analyze the tree as a whole and say "these drivers
> all have the same problem, and all the fixes look the same."

Sure, that is the simplest approach.

> I guess if you'd rather skip that for Rockchip and apply a more
> complicated fix there, I could go along with that. But I don't think
> it would hurt anything to set .suppress_bind_attrs, then remove it
> when you add module support. The concepts of .suppress_bind_attrs and
> modularity are related, and doing this in a separate patch would make
> it a nice example to follow if somebody wants to make other drivers
> modular as well.

I'll leave that up to you, and I can resubmit things if desired. As you
have since noticed, I already sent a patch to add .suppress_bind_attrs
to all the other drivers. If you'd like, feel free to add
pcie-rockchip.c into that mix, it's not hard -- or I can redo it myself.
Then I can modify and resend (or you can do the trivial modification
required to) the current patch set.

Just let me know.

> > Personally, I'd rather just patch the other drivers, and you can wait
> > until I follow through on that promise before applying my existing work
> > for the Rockchip driver, if that's what you'd prefer.
>
> It's not so much a question of using the Rockchip change as a stick.
> I'm just thinking that it makes a more logical progression to fix the
> more important issue globally first.

Sure, I can grok that. Just let me know if you want any more action from
me.

Brian

2017-04-11 18:18:45

by Brian Norris

[permalink] [raw]
Subject: Re: [PATCH v2 3/5] PCI: rockchip: add remove() support

Hi Bjorn,

On Fri, Mar 31, 2017 at 09:40:15AM -0700, Brian Norris wrote:
> Sure, I can grok that. Just let me know if you want any more action from
> me.

Any thoughts here? Would you like me to prepare my patches any
differently?

Brian

2017-04-21 19:03:25

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] PCI: rockchip: fix sign issues for current limits

On Thu, Mar 23, 2017 at 05:27:17PM -0500, Bjorn Helgaas wrote:
> On Thu, Mar 09, 2017 at 06:46:13PM -0800, Brian Norris wrote:
> > The regulator framework can return negative error codes via
> > regulator_get_current_limit() for regulators that don't provide current
> > information. The subsequent check for postive values isn't very useful,
> > if the variable is unsigned.
> >
> > Let's just match the signedness of the return value.
> >
> > Prevents error messages like this, seen on Samsung Chromebook Plus:
> >
> > [ 1.069372] rockchip-pcie f8000000.pcie: invalid power supply
> >
> > Fixes: 4816c4c7b82b ("PCI: rockchip: Provide captured slot power limit and scale")
> > Signed-off-by: Brian Norris <[email protected]>
> > Acked-by: Shawn Lin <[email protected]>
>
> I applied the first two patches (this already has Shawn's ack and the
> second is trivially obvious) to pci/host-rockchip. I'm not sure what the
> current state of the others is.

I applied patches 3-5 with Shawn's ack to pci/host-rockchip for v4.12,
thanks!

> > ---
> > v2: add Shawn's ack
> > ---
> > drivers/pci/host/pcie-rockchip.c | 3 ++-
> > 1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
> > index 26ddd3535272..d785f64ec03b 100644
> > --- a/drivers/pci/host/pcie-rockchip.c
> > +++ b/drivers/pci/host/pcie-rockchip.c
> > @@ -425,7 +425,8 @@ static struct pci_ops rockchip_pcie_ops = {
> >
> > static void rockchip_pcie_set_power_limit(struct rockchip_pcie *rockchip)
> > {
> > - u32 status, curr, scale, power;
> > + int curr;
> > + u32 status, scale, power;
> >
> > if (IS_ERR(rockchip->vpcie3v3))
> > return;
> > --
> > 2.12.0.246.ga2ecc84866-goog
> >
>
> commit 73edd2b180a18024605c599074596a9e22d745d6
> Author: Bjorn Helgaas <[email protected]>
> Date: Thu Mar 23 17:21:26 2017 -0500
>
> PCI: rockchip: Unindent rockchip_pcie_set_power_limit()
>
> If regulator_get_current_limit() returns 0 or error, return early so the
> body of the function doesn't have to be indented as the body of an "if"
> statement. No functional change intended.
>
> Signed-off-by: Bjorn Helgaas <[email protected]>
>
> diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
> index d785f64ec03b..7f76ff6af0ba 100644
> --- a/drivers/pci/host/pcie-rockchip.c
> +++ b/drivers/pci/host/pcie-rockchip.c
> @@ -438,24 +438,25 @@ static void rockchip_pcie_set_power_limit(struct rockchip_pcie *rockchip)
> * to the actual power supply.
> */
> curr = regulator_get_current_limit(rockchip->vpcie3v3);
> - if (curr > 0) {
> - scale = 3; /* 0.001x */
> - curr = curr / 1000; /* convert to mA */
> - power = (curr * 3300) / 1000; /* milliwatt */
> - while (power > PCIE_RC_CONFIG_DCR_CSPL_LIMIT) {
> - if (!scale) {
> - dev_warn(rockchip->dev, "invalid power supply\n");
> - return;
> - }
> - scale--;
> - power = power / 10;
> - }
> + if (curr <= 0)
> + return;
>
> - status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_DCR);
> - status |= (power << PCIE_RC_CONFIG_DCR_CSPL_SHIFT) |
> - (scale << PCIE_RC_CONFIG_DCR_CPLS_SHIFT);
> - rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_DCR);
> + scale = 3; /* 0.001x */
> + curr = curr / 1000; /* convert to mA */
> + power = (curr * 3300) / 1000; /* milliwatt */
> + while (power > PCIE_RC_CONFIG_DCR_CSPL_LIMIT) {
> + if (!scale) {
> + dev_warn(rockchip->dev, "invalid power supply\n");
> + return;
> + }
> + scale--;
> + power = power / 10;
> }
> +
> + status = rockchip_pcie_read(rockchip, PCIE_RC_CONFIG_DCR);
> + status |= (power << PCIE_RC_CONFIG_DCR_CSPL_SHIFT) |
> + (scale << PCIE_RC_CONFIG_DCR_CPLS_SHIFT);
> + rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_DCR);
> }
>
> /**