2022-07-27 19:23:01

by Saravana Kannan

[permalink] [raw]
Subject: [PATCH v6] amba: Remove deferred device addition

The uevents generated for an amba device need PID and CID information
that's available only when the amba device is powered on, clocked and
out of reset. So, if those resources aren't available, the information
can't be read to generate the uevents. To workaround this requirement,
if the resources weren't available, the device addition was deferred and
retried periodically.

However, this deferred addition retry isn't based on resources becoming
available. Instead, it's retried every 5 seconds and causes arbitrary
probe delays for amba devices and their consumers.

Also, maintaining a separate deferred-probe like mechanism is
maintenance headache.

With this commit, instead of deferring the device addition, we simply
defer the generation of uevents for the device and probing of the device
(because drivers needs PID and CID to match) until the PID and CID
information can be read. This allows us to delete all the amba specific
deferring code and also avoid the arbitrary probing delays.

Cc: Rob Herring <[email protected]>
Cc: Ulf Hansson <[email protected]>
Cc: Saravana Kannan <[email protected]>
Cc: Linus Walleij <[email protected]>
Cc: Sudeep Holla <[email protected]>
Cc: Nicolas Saenz Julienne <[email protected]>
Cc: Geert Uytterhoeven <[email protected]>
Cc: Marek Szyprowski <[email protected]>
Cc: Kefeng Wang <[email protected]>
Cc: Russell King <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Cc: [email protected]
Signed-off-by: Saravana Kannan <[email protected]>
Tested-by: Marek Szyprowski <[email protected]>
Tested-by: Kefeng Wang <[email protected]>
Tested-by: Sudeep Holla <[email protected]>
KernelVersion: rmk/for-next
---
drivers/amba/bus.c | 313 +++++++++++++++++++++------------------------
1 file changed, 145 insertions(+), 168 deletions(-)

v1 -> v2:
- Dropped RFC tag
- Complete rewrite to not use stub devices.

v2 -> v3:
- Flipped the if() condition for hard-coded periphids.
- Added a stub driver to handle the case where all amba drivers are
modules loaded by uevents.
- Cc Marek after I realized I forgot to add him.

v3 -> v4:
- Finally figured out and fixed the issue reported by Kefeng (bus match
can't return an error other than -EPROBE_DEFER).
- I tested the patch on "V2P-CA15" on qemu
- Marek tested v3, but that was so long ago and the rebase wasn't clean,
so I didn't include the tested-by.

v4 -> v5:
- Fixed up amba_match() to always return -EPROBE_DEFER on any failure.
This fixes the issue Sudeep reported on Juno.
- Rebased the patch on top of Russell's for-next branch to avoid
conflict with linux-next.

v5 -> v6:
- Added Sudeep's Tested-by.
- Cc [email protected]
- Added KernelVersion

Russell,

Hopefully I got everything right. Please let me know if I need to do
anything else for this to be picked up.

-Saravana

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 0cb20324da16..32b0e0b930c1 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -130,11 +130,100 @@ static struct attribute *amba_dev_attrs[] = {
};
ATTRIBUTE_GROUPS(amba_dev);

+static int amba_read_periphid(struct amba_device *dev)
+{
+ struct reset_control *rstc;
+ u32 size, pid, cid;
+ void __iomem *tmp;
+ int i, ret;
+
+ ret = dev_pm_domain_attach(&dev->dev, true);
+ if (ret) {
+ dev_dbg(&dev->dev, "can't get PM domain: %d\n", ret);
+ goto err_out;
+ }
+
+ ret = amba_get_enable_pclk(dev);
+ if (ret) {
+ dev_dbg(&dev->dev, "can't get pclk: %d\n", ret);
+ goto err_pm;
+ }
+
+ /*
+ * Find reset control(s) of the amba bus and de-assert them.
+ */
+ rstc = of_reset_control_array_get_optional_shared(dev->dev.of_node);
+ if (IS_ERR(rstc)) {
+ ret = PTR_ERR(rstc);
+ if (ret != -EPROBE_DEFER)
+ dev_err(&dev->dev, "can't get reset: %d\n", ret);
+ goto err_clk;
+ }
+ reset_control_deassert(rstc);
+ reset_control_put(rstc);
+
+ size = resource_size(&dev->res);
+ tmp = ioremap(dev->res.start, size);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto err_clk;
+ }
+
+ /*
+ * Read pid and cid based on size of resource
+ * they are located at end of region
+ */
+ for (pid = 0, i = 0; i < 4; i++)
+ pid |= (readl(tmp + size - 0x20 + 4 * i) & 255) << (i * 8);
+ for (cid = 0, i = 0; i < 4; i++)
+ cid |= (readl(tmp + size - 0x10 + 4 * i) & 255) << (i * 8);
+
+ if (cid == CORESIGHT_CID) {
+ /* set the base to the start of the last 4k block */
+ void __iomem *csbase = tmp + size - 4096;
+
+ dev->uci.devarch = readl(csbase + UCI_REG_DEVARCH_OFFSET);
+ dev->uci.devtype = readl(csbase + UCI_REG_DEVTYPE_OFFSET) & 0xff;
+ }
+
+ if (cid == AMBA_CID || cid == CORESIGHT_CID) {
+ dev->periphid = pid;
+ dev->cid = cid;
+ }
+
+ if (!dev->periphid)
+ ret = -ENODEV;
+
+ iounmap(tmp);
+
+err_clk:
+ amba_put_disable_pclk(dev);
+err_pm:
+ dev_pm_domain_detach(&dev->dev, true);
+err_out:
+ return ret;
+}
+
static int amba_match(struct device *dev, struct device_driver *drv)
{
struct amba_device *pcdev = to_amba_device(dev);
struct amba_driver *pcdrv = to_amba_driver(drv);

+ if (!pcdev->periphid) {
+ int ret = amba_read_periphid(pcdev);
+
+ /*
+ * Returning any error other than -EPROBE_DEFER from bus match
+ * can cause driver registration failure. So, if there's a
+ * permanent failure in reading pid and cid, simply map it to
+ * -EPROBE_DEFER.
+ */
+ if (ret)
+ return -EPROBE_DEFER;
+ dev_set_uevent_suppress(dev, false);
+ kobject_uevent(&dev->kobj, KOBJ_ADD);
+ }
+
/* When driver_override is set, only bind to the matching driver */
if (pcdev->driver_override)
return !strcmp(pcdev->driver_override, drv->name);
@@ -368,6 +457,42 @@ static int __init amba_init(void)

postcore_initcall(amba_init);

+static int amba_proxy_probe(struct amba_device *adev,
+ const struct amba_id *id)
+{
+ WARN(1, "Stub driver should never match any device.\n");
+ return -ENODEV;
+}
+
+static const struct amba_id amba_stub_drv_ids[] = {
+ { 0, 0 },
+};
+
+static struct amba_driver amba_proxy_drv = {
+ .drv = {
+ .name = "amba-proxy",
+ },
+ .probe = amba_proxy_probe,
+ .id_table = amba_stub_drv_ids,
+};
+
+static int __init amba_stub_drv_init(void)
+{
+ if (!IS_ENABLED(CONFIG_MODULES))
+ return 0;
+
+ /*
+ * The amba_match() function will get called only if there is at least
+ * one amba driver registered. If all amba drivers are modules and are
+ * only loaded based on uevents, then we'll hit a chicken-and-egg
+ * situation where amba_match() is waiting on drivers and drivers are
+ * waiting on amba_match(). So, register a stub driver to make sure
+ * amba_match() is called even if no amba driver has been registered.
+ */
+ return amba_driver_register(&amba_proxy_drv);
+}
+late_initcall_sync(amba_stub_drv_init);
+
/**
* amba_driver_register - register an AMBA device driver
* @drv: amba device driver structure
@@ -410,156 +535,6 @@ static void amba_device_release(struct device *dev)
kfree(d);
}

-static int amba_read_periphid(struct amba_device *dev)
-{
- struct reset_control *rstc;
- u32 size, pid, cid;
- void __iomem *tmp;
- int i, ret;
-
- ret = dev_pm_domain_attach(&dev->dev, true);
- if (ret)
- goto err_out;
-
- ret = amba_get_enable_pclk(dev);
- if (ret)
- goto err_pm;
-
- /*
- * Find reset control(s) of the amba bus and de-assert them.
- */
- rstc = of_reset_control_array_get_optional_shared(dev->dev.of_node);
- if (IS_ERR(rstc)) {
- ret = PTR_ERR(rstc);
- if (ret != -EPROBE_DEFER)
- dev_err(&dev->dev, "can't get reset: %d\n", ret);
- goto err_clk;
- }
- reset_control_deassert(rstc);
- reset_control_put(rstc);
-
- size = resource_size(&dev->res);
- tmp = ioremap(dev->res.start, size);
- if (!tmp) {
- ret = -ENOMEM;
- goto err_clk;
- }
-
- /*
- * Read pid and cid based on size of resource
- * they are located at end of region
- */
- for (pid = 0, i = 0; i < 4; i++)
- pid |= (readl(tmp + size - 0x20 + 4 * i) & 255) << (i * 8);
- for (cid = 0, i = 0; i < 4; i++)
- cid |= (readl(tmp + size - 0x10 + 4 * i) & 255) << (i * 8);
-
- if (cid == CORESIGHT_CID) {
- /* set the base to the start of the last 4k block */
- void __iomem *csbase = tmp + size - 4096;
-
- dev->uci.devarch = readl(csbase + UCI_REG_DEVARCH_OFFSET);
- dev->uci.devtype = readl(csbase + UCI_REG_DEVTYPE_OFFSET) & 0xff;
- }
-
- if (cid == AMBA_CID || cid == CORESIGHT_CID) {
- dev->periphid = pid;
- dev->cid = cid;
- }
-
- if (!dev->periphid)
- ret = -ENODEV;
-
- iounmap(tmp);
-
-err_clk:
- amba_put_disable_pclk(dev);
-err_pm:
- dev_pm_domain_detach(&dev->dev, true);
-err_out:
- return ret;
-}
-
-static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
-{
- int ret;
-
- ret = request_resource(parent, &dev->res);
- if (ret)
- goto err_out;
-
- /* Hard-coded primecell ID instead of plug-n-play */
- if (dev->periphid != 0)
- goto skip_probe;
-
- ret = amba_read_periphid(dev);
- if (ret)
- goto err_release;
-
-skip_probe:
- ret = device_add(&dev->dev);
-err_release:
- if (ret)
- release_resource(&dev->res);
-err_out:
- return ret;
-}
-
-/*
- * Registration of AMBA device require reading its pid and cid registers.
- * To do this, the device must be turned on (if it is a part of power domain)
- * and have clocks enabled. However in some cases those resources might not be
- * yet available. Returning EPROBE_DEFER is not a solution in such case,
- * because callers don't handle this special error code. Instead such devices
- * are added to the special list and their registration is retried from
- * periodic worker, until all resources are available and registration succeeds.
- */
-struct deferred_device {
- struct amba_device *dev;
- struct resource *parent;
- struct list_head node;
-};
-
-static LIST_HEAD(deferred_devices);
-static DEFINE_MUTEX(deferred_devices_lock);
-
-static void amba_deferred_retry_func(struct work_struct *dummy);
-static DECLARE_DELAYED_WORK(deferred_retry_work, amba_deferred_retry_func);
-
-#define DEFERRED_DEVICE_TIMEOUT (msecs_to_jiffies(5 * 1000))
-
-static int amba_deferred_retry(void)
-{
- struct deferred_device *ddev, *tmp;
-
- mutex_lock(&deferred_devices_lock);
-
- list_for_each_entry_safe(ddev, tmp, &deferred_devices, node) {
- int ret = amba_device_try_add(ddev->dev, ddev->parent);
-
- if (ret == -EPROBE_DEFER)
- continue;
-
- list_del_init(&ddev->node);
- amba_device_put(ddev->dev);
- kfree(ddev);
- }
-
- mutex_unlock(&deferred_devices_lock);
-
- return 0;
-}
-late_initcall(amba_deferred_retry);
-
-static void amba_deferred_retry_func(struct work_struct *dummy)
-{
- amba_deferred_retry();
-
- if (!list_empty(&deferred_devices))
- schedule_delayed_work(&deferred_retry_work,
- DEFERRED_DEVICE_TIMEOUT);
-}
-
/**
* amba_device_add - add a previously allocated AMBA device structure
* @dev: AMBA device allocated by amba_device_alloc
@@ -571,28 +546,30 @@ static void amba_deferred_retry_func(struct work_struct *dummy)
*/
int amba_device_add(struct amba_device *dev, struct resource *parent)
{
- int ret = amba_device_try_add(dev, parent);
-
- if (ret == -EPROBE_DEFER) {
- struct deferred_device *ddev;
-
- ddev = kmalloc(sizeof(*ddev), GFP_KERNEL);
- if (!ddev)
- return -ENOMEM;
+ int ret;

- ddev->dev = dev;
- ddev->parent = parent;
- ret = 0;
+ ret = request_resource(parent, &dev->res);
+ if (ret)
+ return ret;

- mutex_lock(&deferred_devices_lock);
+ /* If primecell ID isn't hard-coded, figure it out */
+ if (!dev->periphid) {
+ /*
+ * AMBA device uevents require reading its pid and cid
+ * registers. To do this, the device must be on, clocked and
+ * out of reset. However in some cases those resources might
+ * not yet be available. If that's the case, we suppress the
+ * generation of uevents until we can read the pid and cid
+ * registers. See also amba_match().
+ */
+ if (amba_read_periphid(dev))
+ dev_set_uevent_suppress(&dev->dev, true);
+ }

- if (list_empty(&deferred_devices))
- schedule_delayed_work(&deferred_retry_work,
- DEFERRED_DEVICE_TIMEOUT);
- list_add_tail(&ddev->node, &deferred_devices);
+ ret = device_add(&dev->dev);
+ if (ret)
+ release_resource(&dev->res);

- mutex_unlock(&deferred_devices_lock);
- }
return ret;
}
EXPORT_SYMBOL_GPL(amba_device_add);
--
2.37.1.359.gd136c6c3e2-goog


2022-07-27 22:42:49

by Russell King (Oracle)

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On Wed, Jul 27, 2022 at 11:19:35AM -0700, Saravana Kannan wrote:
> The uevents generated for an amba device need PID and CID information
> that's available only when the amba device is powered on, clocked and
> out of reset. So, if those resources aren't available, the information
> can't be read to generate the uevents. To workaround this requirement,
> if the resources weren't available, the device addition was deferred and
> retried periodically.
>
> However, this deferred addition retry isn't based on resources becoming
> available. Instead, it's retried every 5 seconds and causes arbitrary
> probe delays for amba devices and their consumers.
>
> Also, maintaining a separate deferred-probe like mechanism is
> maintenance headache.
>
> With this commit, instead of deferring the device addition, we simply
> defer the generation of uevents for the device and probing of the device
> (because drivers needs PID and CID to match) until the PID and CID
> information can be read. This allows us to delete all the amba specific
> deferring code and also avoid the arbitrary probing delays.

Oh, this is absolutely horrible. I can apply it cleanly to my "misc"
branch, but it then conflicts when I re-merge my tree for the for-next
thing (which is only supposed to be for sfr - the hint is in the name!)
for-next is basically my "fixes" plus "misc" branch and anything else I
want sfr to pick up for the -next tree.

Applying this has to be on top of that merge commit, otherwise the
conflicts are horrid, but that then means I need to send Linus the
for-next merge commit (which I don't normally do.)

Gah, we have too many changes to drivers/bus/amba.c in this cycle,
some of them which have been submitted for 5.19 as fixes (and thus
are not in 5.18-rc1 which the misc branch is based upon for other
patch dependency reasons) and others in the misc branch for the next
cycle - and now your patch wants both, which I can't do without
rebasing the misc branch.

Sadly, getting these changes into GregKH's tree will just create a
conflict between Greg's tree and my tree.

Can we postpone this please?

--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

2022-07-28 00:49:40

by Saravana Kannan

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On Wed, Jul 27, 2022 at 3:16 PM Russell King (Oracle)
<[email protected]> wrote:
>
> On Wed, Jul 27, 2022 at 11:19:35AM -0700, Saravana Kannan wrote:
> > The uevents generated for an amba device need PID and CID information
> > that's available only when the amba device is powered on, clocked and
> > out of reset. So, if those resources aren't available, the information
> > can't be read to generate the uevents. To workaround this requirement,
> > if the resources weren't available, the device addition was deferred and
> > retried periodically.
> >
> > However, this deferred addition retry isn't based on resources becoming
> > available. Instead, it's retried every 5 seconds and causes arbitrary
> > probe delays for amba devices and their consumers.
> >
> > Also, maintaining a separate deferred-probe like mechanism is
> > maintenance headache.
> >
> > With this commit, instead of deferring the device addition, we simply
> > defer the generation of uevents for the device and probing of the device
> > (because drivers needs PID and CID to match) until the PID and CID
> > information can be read. This allows us to delete all the amba specific
> > deferring code and also avoid the arbitrary probing delays.
>
> Oh, this is absolutely horrible. I can apply it cleanly to my "misc"
> branch, but it then conflicts when I re-merge my tree for the for-next
> thing (which is only supposed to be for sfr - the hint is in the name!)
> for-next is basically my "fixes" plus "misc" branch and anything else I
> want sfr to pick up for the -next tree.

Btw, this is the repo I was using because I couldn't find any amba repo.
git://git.armlinux.org.uk/~rmk/linux-arm.git

Is the misc branch visible somewhere because I don't see it in that
repo? Or is that just a local branch? Also, what does sfr stand for
(s* for next)?

In case this helps, all the conflicts are due to this commit:
8030aa3ce12e ARM: 9207/1: amba: fix refcount underflow if
amba_device_add() fails

It's fixing bugs in code I'm deleting. So if you revert that, I can
give you a patch that'll apply across 5.18 and 5.19.

Let me know if you want me to do that.

> Applying this has to be on top of that merge commit, otherwise the
> conflicts are horrid, but that then means I need to send Linus the
> for-next merge commit (which I don't normally do.)
>
> Gah, we have too many changes to drivers/bus/amba.c in this cycle,
> some of them which have been submitted for 5.19 as fixes (and thus
> are not in 5.18-rc1 which the misc branch is based upon for other
> patch dependency reasons) and others in the misc branch for the next
> cycle - and now your patch wants both, which I can't do without
> rebasing the misc branch.
>
> Sadly, getting these changes into GregKH's tree will just create a
> conflict between Greg's tree and my tree.
>
> Can we postpone this please?

I guess? It's not like I can force either of you :) Or maybe you can
ask Greg to pick it up after he picks up your for-next?

Anyway, whatever option can get this pulled in reasonably soon is fine
by me -- I can send out v7 on whatever tree you want. I've been
nursing this patch for a year and I want to get it in before I hit
more conflicts/issues and give up on it :)

-Saravana

2022-07-28 14:54:52

by Russell King (Oracle)

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On Wed, Jul 27, 2022 at 05:10:57PM -0700, Saravana Kannan wrote:
> On Wed, Jul 27, 2022 at 3:16 PM Russell King (Oracle)
> <[email protected]> wrote:
> >
> > On Wed, Jul 27, 2022 at 11:19:35AM -0700, Saravana Kannan wrote:
> > > The uevents generated for an amba device need PID and CID information
> > > that's available only when the amba device is powered on, clocked and
> > > out of reset. So, if those resources aren't available, the information
> > > can't be read to generate the uevents. To workaround this requirement,
> > > if the resources weren't available, the device addition was deferred and
> > > retried periodically.
> > >
> > > However, this deferred addition retry isn't based on resources becoming
> > > available. Instead, it's retried every 5 seconds and causes arbitrary
> > > probe delays for amba devices and their consumers.
> > >
> > > Also, maintaining a separate deferred-probe like mechanism is
> > > maintenance headache.
> > >
> > > With this commit, instead of deferring the device addition, we simply
> > > defer the generation of uevents for the device and probing of the device
> > > (because drivers needs PID and CID to match) until the PID and CID
> > > information can be read. This allows us to delete all the amba specific
> > > deferring code and also avoid the arbitrary probing delays.
> >
> > Oh, this is absolutely horrible. I can apply it cleanly to my "misc"
> > branch, but it then conflicts when I re-merge my tree for the for-next
> > thing (which is only supposed to be for sfr - the hint is in the name!)
> > for-next is basically my "fixes" plus "misc" branch and anything else I
> > want sfr to pick up for the -next tree.
>
> Btw, this is the repo I was using because I couldn't find any amba repo.
> git://git.armlinux.org.uk/~rmk/linux-arm.git

I don't maintain a separate repo for amba stuff.

> Is the misc branch visible somewhere because I don't see it in that
> repo? Or is that just a local branch? Also, what does sfr stand for
> (s* for next)?
>
> In case this helps, all the conflicts are due to this commit:
> 8030aa3ce12e ARM: 9207/1: amba: fix refcount underflow if
> amba_device_add() fails
>
> It's fixing bugs in code I'm deleting. So if you revert that, I can
> give you a patch that'll apply across 5.18 and 5.19.
>
> Let me know if you want me to do that.

I dug into what had happened - the patch you mentioned patch 9207/1,
and this also applies to 9208/1 as well although that isn't relevant
for your patch. These two were merged as fixes in v5.19-rc7 via my
fixes branch.

They also appeared for some reason in the misc branch as entirely
separate commits. Because 9207/1 appears in both, your patch applies
cleanly on misc, but when fixes is then merged, it touches the same
code and causes a conflict.

Reverting the commit you mention won't actually fix anything - it'll
only make things much worse, with a conflict then appearing between
my tree and Linus'.

The only sensible thing would be to delete those two duplicated
commits from the misc branch, rebase misc on v5.19-rc7 (thus picking
up those two commits that are already in mainline) and then putting
your patch on top of misc.

This is doable, because there's five commits there, most of them are
trivially small changes, so its not a great loss in terms of the
testing that's already been done.

That appears to work fine - I've just pushed that out with your commit
included, so should be in the final linux-next prior to the merge
window opening this Sunday.

--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

2022-07-28 18:59:17

by Saravana Kannan

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On Thu, Jul 28, 2022 at 7:16 AM Russell King (Oracle)
<[email protected]> wrote:
>
> On Wed, Jul 27, 2022 at 05:10:57PM -0700, Saravana Kannan wrote:
> > On Wed, Jul 27, 2022 at 3:16 PM Russell King (Oracle)
> > <[email protected]> wrote:
> > >
> > > On Wed, Jul 27, 2022 at 11:19:35AM -0700, Saravana Kannan wrote:
> > > > The uevents generated for an amba device need PID and CID information
> > > > that's available only when the amba device is powered on, clocked and
> > > > out of reset. So, if those resources aren't available, the information
> > > > can't be read to generate the uevents. To workaround this requirement,
> > > > if the resources weren't available, the device addition was deferred and
> > > > retried periodically.
> > > >
> > > > However, this deferred addition retry isn't based on resources becoming
> > > > available. Instead, it's retried every 5 seconds and causes arbitrary
> > > > probe delays for amba devices and their consumers.
> > > >
> > > > Also, maintaining a separate deferred-probe like mechanism is
> > > > maintenance headache.
> > > >
> > > > With this commit, instead of deferring the device addition, we simply
> > > > defer the generation of uevents for the device and probing of the device
> > > > (because drivers needs PID and CID to match) until the PID and CID
> > > > information can be read. This allows us to delete all the amba specific
> > > > deferring code and also avoid the arbitrary probing delays.
> > >
> > > Oh, this is absolutely horrible. I can apply it cleanly to my "misc"
> > > branch, but it then conflicts when I re-merge my tree for the for-next
> > > thing (which is only supposed to be for sfr - the hint is in the name!)
> > > for-next is basically my "fixes" plus "misc" branch and anything else I
> > > want sfr to pick up for the -next tree.
> >
> > Btw, this is the repo I was using because I couldn't find any amba repo.
> > git://git.armlinux.org.uk/~rmk/linux-arm.git
>
> I don't maintain a separate repo for amba stuff.
>
> > Is the misc branch visible somewhere because I don't see it in that
> > repo? Or is that just a local branch? Also, what does sfr stand for
> > (s* for next)?
> >
> > In case this helps, all the conflicts are due to this commit:
> > 8030aa3ce12e ARM: 9207/1: amba: fix refcount underflow if
> > amba_device_add() fails
> >
> > It's fixing bugs in code I'm deleting. So if you revert that, I can
> > give you a patch that'll apply across 5.18 and 5.19.
> >
> > Let me know if you want me to do that.
>
> I dug into what had happened - the patch you mentioned patch 9207/1,
> and this also applies to 9208/1 as well although that isn't relevant
> for your patch. These two were merged as fixes in v5.19-rc7 via my
> fixes branch.
>
> They also appeared for some reason in the misc branch as entirely
> separate commits. Because 9207/1 appears in both, your patch applies
> cleanly on misc, but when fixes is then merged, it touches the same
> code and causes a conflict.
>
> Reverting the commit you mention won't actually fix anything - it'll
> only make things much worse, with a conflict then appearing between
> my tree and Linus'.
>
> The only sensible thing would be to delete those two duplicated
> commits from the misc branch, rebase misc on v5.19-rc7 (thus picking
> up those two commits that are already in mainline) and then putting
> your patch on top of misc.
>
> This is doable, because there's five commits there, most of them are
> trivially small changes, so its not a great loss in terms of the
> testing that's already been done.
>
> That appears to work fine - I've just pushed that out with your commit
> included, so should be in the final linux-next prior to the merge
> window opening this Sunday.

Thanks a lot for taking care of this Russell!

-Saravana

2022-08-09 10:54:27

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

Hi,

On Wed, Jul 27, 2022 at 11:19:35AM -0700, Saravana Kannan wrote:
> The uevents generated for an amba device need PID and CID information
> that's available only when the amba device is powered on, clocked and
> out of reset. So, if those resources aren't available, the information
> can't be read to generate the uevents. To workaround this requirement,
> if the resources weren't available, the device addition was deferred and
> retried periodically.
>
...

This patch results in a large number of crashes in various qemu
emulations. Crash and bisect logs below. Reverting this patch
fixes the problem.

Additional information: The decoded stack trace suggests that the
"id" parameter of pl011_probe() may be NULL.

Guenter

---
8<--- cut here ---
Unable to handle kernel NULL pointer dereference at virtual address 00000008
[00000008] *pgd=00000000
Internal error: Oops: 5 [#1] ARM
Modules linked in:
CPU: 0 PID: 1 Comm: swapper Not tainted 5.19.0+ #1
Hardware name: ARM-Versatile (Device Tree Support)
PC is at pl011_probe+0x40/0x110
LR is at amba_probe+0x10c/0x19c
pc : [<c059847c>] lr : [<c055ac9c>] psr: 60000153
sp : c8811e00 ip : 00000000 fp : c1959ef8
r10: c7ef55f8 r9 : fffffdfb r8 : c0d77af8
r7 : c1959c00 r6 : c1959c00 r5 : 00000000 r4 : 00000003
r3 : c14191a4 r2 : 00000dc0 r1 : 00000198 r0 : c1959c00
Flags: nZCv IRQs on FIQs off Mode SVC_32 ISA ARM Segment none
Control: 00093177 Table: 00004000 DAC: 00000053
Register r0 information: slab kmalloc-1k start c1959c00 pointer offset 0 size 1024
Register r1 information: non-paged memory
Register r2 information: non-paged memory
Register r3 information: non-slab/vmalloc memory
Register r4 information: non-paged memory
Register r5 information: NULL pointer
Register r6 information: slab kmalloc-1k start c1959c00 pointer offset 0 size 1024
Register r7 information: slab kmalloc-1k start c1959c00 pointer offset 0 size 1024
Register r8 information: non-slab/vmalloc memory
Register r9 information: non-paged memory
Register r10 information: non-slab/vmalloc memory
Register r11 information: slab kmalloc-1k start c1959c00 pointer offset 760 size 1024
Register r12 information: NULL pointer
Process swapper (pid: 1, stack limit = 0x(ptrval))
Stack: (0xc8811e00 to 0xc8812000)
1e00: 60000153 00000009 c1959c00 00000000 c0d77af8 c055ac9c c055ab90 c1959c00
1e20: 00000000 c0d77af8 00000000 c1898d40 c180e158 c0cd8848 00000000 c05fbfe4
1e40: c1959c00 c0d77af8 c1959c00 00000000 c1898d40 c05fc250 c14195c4 60000153
1e60: c1959c00 c05fc2f8 c180e158 c0cd8848 00000000 00000000 c1959c00 c0d77af8
1e80: c0d72b98 c05fc704 00000000 c0d77af8 c05fc694 c0d72b98 c1898d40 c05fa0b4
1ea0: 00000000 c19458ac c1957eb4 c0cfb86c c0d77af8 c180e100 00000000 c05fb2ac
1ec0: c0bc2310 c0dad240 c1898d40 c0d77af8 c0dad240 c1898d40 00000000 c1898d40
1ee0: c0dbb000 c05fd258 c0cc55c4 c0dad240 c1898d40 c000a8b0 00000000 00000000
1f00: c18dbe47 c0c6cc00 00000137 c00488a8 c0c6b418 00000000 c0dad240 c0953980
1f20: c1898d40 00000003 c0dad240 c18dbe00 c0cd8864 c0c6b418 c0dbb000 c0cd8848
1f40: 00000000 c0cfb86c c0cf16a4 00000004 c18dbe00 c0cd8868 c0c6b418 c0ca5230
1f60: 00000003 00000003 00000000 c0ca4400 00000000 00000137 00000000 00000000
1f80: c0953c48 00000000 00000000 00000000 00000000 00000000 00000000 c0953c58
1fa0: 00000000 c0953c48 00000000 c00084f8 00000000 00000000 00000000 00000000
1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000
pl011_probe from amba_probe+0x10c/0x19c
amba_probe from really_probe+0xb4/0x2a0
really_probe from __driver_probe_device+0x80/0xe4
__driver_probe_device from driver_probe_device+0x44/0x108
driver_probe_device from __driver_attach+0x70/0x110
__driver_attach from bus_for_each_dev+0x74/0xc0
bus_for_each_dev from bus_add_driver+0x154/0x1e8
bus_add_driver from driver_register+0x74/0x10c
driver_register from do_one_initcall+0x8c/0x2fc
do_one_initcall from kernel_init_freeable+0x190/0x220
kernel_init_freeable from kernel_init+0x10/0x108
kernel_init from ret_from_fork+0x14/0x3c
Exception stack(0xc8811fb0 to 0xc8811ff8)
1fa0: 00000000 00000000 00000000 00000000
1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
1fe0: 00000000 00000000 00000000 00000000 00000013 00000000
Code: e8bd81f0 e3a02d37 e3a01f66 e1a00007 (e59c8008)
---[ end trace 0000000000000000 ]---
Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
Reboot failed -- System halted
qemu-system-arm: terminating on signal 15 from pid 952897 (/bin/bash)

---
# bad: [c8a684e2e110376c58f0bfa30fb3855d1e319670] Merge tag 'leds-5.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds
# good: [3d7cb6b04c3f3115719235cc6866b10326de34cd] Linux 5.19
git bisect start 'HEAD' 'v5.19'
# good: [12b68040a5e468068fd7f4af1150eab8f6e96235] Merge tag 'media/v5.20-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
git bisect good 12b68040a5e468068fd7f4af1150eab8f6e96235
# bad: [5f0848190c6dd0f5b8a2aaf0f1d900a96d96bee0] Merge tag 'platform-drivers-x86-v6.0-1' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86
git bisect bad 5f0848190c6dd0f5b8a2aaf0f1d900a96d96bee0
# good: [798cd57cd5f871452461746032cf6ee50b0fd69a] drm/amd/display: restore code for plane with no modifiers
git bisect good 798cd57cd5f871452461746032cf6ee50b0fd69a
# good: [723c188d5cd42a07344f997b0b7e1d83b4173c8d] Merge tag 'staging-6.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
git bisect good 723c188d5cd42a07344f997b0b7e1d83b4173c8d
# good: [a4850b5590d01bf3fb19fda3fc5d433f7382a974] Merge tag 'kvm-s390-next-5.20-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
git bisect good a4850b5590d01bf3fb19fda3fc5d433f7382a974
# bad: [8d9420ca9bd9bceddcfab3d0263d6a8e073396fe] Merge tag 'for-linus-2022080201' of git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid
git bisect bad 8d9420ca9bd9bceddcfab3d0263d6a8e073396fe
# good: [31f6e3832a0f1c366e54033335aed2375f6e447a] KVM: x86/mmu: remove unused variable
git bisect good 31f6e3832a0f1c366e54033335aed2375f6e447a
# good: [7df9075e232e09d99cf23b657b6cb04c9506e618] Merge tag 'csky-for-linus-6.0-rc1' of https://github.com/c-sky/csky-linux
git bisect good 7df9075e232e09d99cf23b657b6cb04c9506e618
# good: [c556717541c0c34bff887db92057964f0ff74582] Merge branch 'for-5.20/amd-sfh' into for-linus
git bisect good c556717541c0c34bff887db92057964f0ff74582
# good: [a60885b6a97b5dc9340dd9310a57b5682c2daf2d] Merge branch 'for-5.20/uclogic' into for-linus
git bisect good a60885b6a97b5dc9340dd9310a57b5682c2daf2d
# bad: [995177a4c75ee9b9069d5a313d90c005cf89c1b2] Merge tag 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm
git bisect bad 995177a4c75ee9b9069d5a313d90c005cf89c1b2
# good: [b97abb4d0e23766650619a6a57a52c91deb89b8a] ARM: 9217/1: add definition of arch_irq_work_raise()
git bisect good b97abb4d0e23766650619a6a57a52c91deb89b8a
# good: [fe520635ddc4377e84f78c6cf1c54393f1dfa33b] ARM: 9219/1: fix undeclared soft_restart
git bisect good fe520635ddc4377e84f78c6cf1c54393f1dfa33b
# bad: [f2d3b9a46e0ed4742abaa00506b18bb2ca9179d8] ARM: 9220/1: amba: Remove deferred device addition
git bisect bad f2d3b9a46e0ed4742abaa00506b18bb2ca9179d8
# first bad commit: [f2d3b9a46e0ed4742abaa00506b18bb2ca9179d8] ARM: 9220/1: amba: Remove deferred device addition


2022-08-10 00:52:07

by Saravana Kannan

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On Tue, Aug 9, 2022 at 3:30 AM Guenter Roeck <[email protected]> wrote:
>
> Hi,
>
> On Wed, Jul 27, 2022 at 11:19:35AM -0700, Saravana Kannan wrote:
> > The uevents generated for an amba device need PID and CID information
> > that's available only when the amba device is powered on, clocked and
> > out of reset. So, if those resources aren't available, the information
> > can't be read to generate the uevents. To workaround this requirement,
> > if the resources weren't available, the device addition was deferred and
> > retried periodically.
> >
> ...
>
> This patch results in a large number of crashes in various qemu
> emulations. Crash and bisect logs below. Reverting this patch
> fixes the problem.

Hey Guenter,

Thanks for the report. I'm kinda surprised because I had a pl011 probe
successfully in my local testing.

I'm wondering if you are having an interaction with some other changes I made.
Can you try pulling this series in and see if it helps?

https://lore.kernel.org/lkml/[email protected]/

> Additional information: The decoded stack trace suggests that the
> "id" parameter of pl011_probe() may be NULL.

That's strange! I'll take a closer look once you confirm that the
series above doesn't help.

-Saravana

> Guenter
>
> ---
> 8<--- cut here ---
> Unable to handle kernel NULL pointer dereference at virtual address 00000008
> [00000008] *pgd=00000000
> Internal error: Oops: 5 [#1] ARM
> Modules linked in:
> CPU: 0 PID: 1 Comm: swapper Not tainted 5.19.0+ #1
> Hardware name: ARM-Versatile (Device Tree Support)
> PC is at pl011_probe+0x40/0x110
> LR is at amba_probe+0x10c/0x19c
> pc : [<c059847c>] lr : [<c055ac9c>] psr: 60000153
> sp : c8811e00 ip : 00000000 fp : c1959ef8
> r10: c7ef55f8 r9 : fffffdfb r8 : c0d77af8
> r7 : c1959c00 r6 : c1959c00 r5 : 00000000 r4 : 00000003
> r3 : c14191a4 r2 : 00000dc0 r1 : 00000198 r0 : c1959c00
> Flags: nZCv IRQs on FIQs off Mode SVC_32 ISA ARM Segment none
> Control: 00093177 Table: 00004000 DAC: 00000053
> Register r0 information: slab kmalloc-1k start c1959c00 pointer offset 0 size 1024
> Register r1 information: non-paged memory
> Register r2 information: non-paged memory
> Register r3 information: non-slab/vmalloc memory
> Register r4 information: non-paged memory
> Register r5 information: NULL pointer
> Register r6 information: slab kmalloc-1k start c1959c00 pointer offset 0 size 1024
> Register r7 information: slab kmalloc-1k start c1959c00 pointer offset 0 size 1024
> Register r8 information: non-slab/vmalloc memory
> Register r9 information: non-paged memory
> Register r10 information: non-slab/vmalloc memory
> Register r11 information: slab kmalloc-1k start c1959c00 pointer offset 760 size 1024
> Register r12 information: NULL pointer
> Process swapper (pid: 1, stack limit = 0x(ptrval))
> Stack: (0xc8811e00 to 0xc8812000)
> 1e00: 60000153 00000009 c1959c00 00000000 c0d77af8 c055ac9c c055ab90 c1959c00
> 1e20: 00000000 c0d77af8 00000000 c1898d40 c180e158 c0cd8848 00000000 c05fbfe4
> 1e40: c1959c00 c0d77af8 c1959c00 00000000 c1898d40 c05fc250 c14195c4 60000153
> 1e60: c1959c00 c05fc2f8 c180e158 c0cd8848 00000000 00000000 c1959c00 c0d77af8
> 1e80: c0d72b98 c05fc704 00000000 c0d77af8 c05fc694 c0d72b98 c1898d40 c05fa0b4
> 1ea0: 00000000 c19458ac c1957eb4 c0cfb86c c0d77af8 c180e100 00000000 c05fb2ac
> 1ec0: c0bc2310 c0dad240 c1898d40 c0d77af8 c0dad240 c1898d40 00000000 c1898d40
> 1ee0: c0dbb000 c05fd258 c0cc55c4 c0dad240 c1898d40 c000a8b0 00000000 00000000
> 1f00: c18dbe47 c0c6cc00 00000137 c00488a8 c0c6b418 00000000 c0dad240 c0953980
> 1f20: c1898d40 00000003 c0dad240 c18dbe00 c0cd8864 c0c6b418 c0dbb000 c0cd8848
> 1f40: 00000000 c0cfb86c c0cf16a4 00000004 c18dbe00 c0cd8868 c0c6b418 c0ca5230
> 1f60: 00000003 00000003 00000000 c0ca4400 00000000 00000137 00000000 00000000
> 1f80: c0953c48 00000000 00000000 00000000 00000000 00000000 00000000 c0953c58
> 1fa0: 00000000 c0953c48 00000000 c00084f8 00000000 00000000 00000000 00000000
> 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
> 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000
> pl011_probe from amba_probe+0x10c/0x19c
> amba_probe from really_probe+0xb4/0x2a0
> really_probe from __driver_probe_device+0x80/0xe4
> __driver_probe_device from driver_probe_device+0x44/0x108
> driver_probe_device from __driver_attach+0x70/0x110
> __driver_attach from bus_for_each_dev+0x74/0xc0
> bus_for_each_dev from bus_add_driver+0x154/0x1e8
> bus_add_driver from driver_register+0x74/0x10c
> driver_register from do_one_initcall+0x8c/0x2fc
> do_one_initcall from kernel_init_freeable+0x190/0x220
> kernel_init_freeable from kernel_init+0x10/0x108
> kernel_init from ret_from_fork+0x14/0x3c
> Exception stack(0xc8811fb0 to 0xc8811ff8)
> 1fa0: 00000000 00000000 00000000 00000000
> 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
> 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000
> Code: e8bd81f0 e3a02d37 e3a01f66 e1a00007 (e59c8008)
> ---[ end trace 0000000000000000 ]---
> Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> Reboot failed -- System halted
> qemu-system-arm: terminating on signal 15 from pid 952897 (/bin/bash)
>
> ---
> # bad: [c8a684e2e110376c58f0bfa30fb3855d1e319670] Merge tag 'leds-5.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds
> # good: [3d7cb6b04c3f3115719235cc6866b10326de34cd] Linux 5.19
> git bisect start 'HEAD' 'v5.19'
> # good: [12b68040a5e468068fd7f4af1150eab8f6e96235] Merge tag 'media/v5.20-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
> git bisect good 12b68040a5e468068fd7f4af1150eab8f6e96235
> # bad: [5f0848190c6dd0f5b8a2aaf0f1d900a96d96bee0] Merge tag 'platform-drivers-x86-v6.0-1' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86
> git bisect bad 5f0848190c6dd0f5b8a2aaf0f1d900a96d96bee0
> # good: [798cd57cd5f871452461746032cf6ee50b0fd69a] drm/amd/display: restore code for plane with no modifiers
> git bisect good 798cd57cd5f871452461746032cf6ee50b0fd69a
> # good: [723c188d5cd42a07344f997b0b7e1d83b4173c8d] Merge tag 'staging-6.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
> git bisect good 723c188d5cd42a07344f997b0b7e1d83b4173c8d
> # good: [a4850b5590d01bf3fb19fda3fc5d433f7382a974] Merge tag 'kvm-s390-next-5.20-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
> git bisect good a4850b5590d01bf3fb19fda3fc5d433f7382a974
> # bad: [8d9420ca9bd9bceddcfab3d0263d6a8e073396fe] Merge tag 'for-linus-2022080201' of git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid
> git bisect bad 8d9420ca9bd9bceddcfab3d0263d6a8e073396fe
> # good: [31f6e3832a0f1c366e54033335aed2375f6e447a] KVM: x86/mmu: remove unused variable
> git bisect good 31f6e3832a0f1c366e54033335aed2375f6e447a
> # good: [7df9075e232e09d99cf23b657b6cb04c9506e618] Merge tag 'csky-for-linus-6.0-rc1' of https://github.com/c-sky/csky-linux
> git bisect good 7df9075e232e09d99cf23b657b6cb04c9506e618
> # good: [c556717541c0c34bff887db92057964f0ff74582] Merge branch 'for-5.20/amd-sfh' into for-linus
> git bisect good c556717541c0c34bff887db92057964f0ff74582
> # good: [a60885b6a97b5dc9340dd9310a57b5682c2daf2d] Merge branch 'for-5.20/uclogic' into for-linus
> git bisect good a60885b6a97b5dc9340dd9310a57b5682c2daf2d
> # bad: [995177a4c75ee9b9069d5a313d90c005cf89c1b2] Merge tag 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm
> git bisect bad 995177a4c75ee9b9069d5a313d90c005cf89c1b2
> # good: [b97abb4d0e23766650619a6a57a52c91deb89b8a] ARM: 9217/1: add definition of arch_irq_work_raise()
> git bisect good b97abb4d0e23766650619a6a57a52c91deb89b8a
> # good: [fe520635ddc4377e84f78c6cf1c54393f1dfa33b] ARM: 9219/1: fix undeclared soft_restart
> git bisect good fe520635ddc4377e84f78c6cf1c54393f1dfa33b
> # bad: [f2d3b9a46e0ed4742abaa00506b18bb2ca9179d8] ARM: 9220/1: amba: Remove deferred device addition
> git bisect bad f2d3b9a46e0ed4742abaa00506b18bb2ca9179d8
> # first bad commit: [f2d3b9a46e0ed4742abaa00506b18bb2ca9179d8] ARM: 9220/1: amba: Remove deferred device addition
>
>

2022-08-10 01:49:37

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On 8/9/22 17:42, Saravana Kannan wrote:
> On Tue, Aug 9, 2022 at 3:30 AM Guenter Roeck <[email protected]> wrote:
>>
>> Hi,
>>
>> On Wed, Jul 27, 2022 at 11:19:35AM -0700, Saravana Kannan wrote:
>>> The uevents generated for an amba device need PID and CID information
>>> that's available only when the amba device is powered on, clocked and
>>> out of reset. So, if those resources aren't available, the information
>>> can't be read to generate the uevents. To workaround this requirement,
>>> if the resources weren't available, the device addition was deferred and
>>> retried periodically.
>>>
>> ...
>>
>> This patch results in a large number of crashes in various qemu
>> emulations. Crash and bisect logs below. Reverting this patch
>> fixes the problem.
>
> Hey Guenter,
>
> Thanks for the report. I'm kinda surprised because I had a pl011 probe
> successfully in my local testing.
>

Maybe it only happens with qemu emulations, or with certain configurations.

> I'm wondering if you are having an interaction with some other changes I made.
> Can you try pulling this series in and see if it helps?
>
> https://lore.kernel.org/lkml/[email protected]/
>
>> Additional information: The decoded stack trace suggests that the
>> "id" parameter of pl011_probe() may be NULL.
>
> That's strange! I'll take a closer look once you confirm that the
> series above doesn't help.
>

That series does not make a difference.

Guenter

2022-08-10 03:48:53

by Saravana Kannan

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On Tue, Aug 9, 2022 at 6:34 PM Guenter Roeck <[email protected]> wrote:
>
> On 8/9/22 17:42, Saravana Kannan wrote:
> > On Tue, Aug 9, 2022 at 3:30 AM Guenter Roeck <[email protected]> wrote:
> >>
> >> Hi,
> >>
> >> On Wed, Jul 27, 2022 at 11:19:35AM -0700, Saravana Kannan wrote:
> >>> The uevents generated for an amba device need PID and CID information
> >>> that's available only when the amba device is powered on, clocked and
> >>> out of reset. So, if those resources aren't available, the information
> >>> can't be read to generate the uevents. To workaround this requirement,
> >>> if the resources weren't available, the device addition was deferred and
> >>> retried periodically.
> >>>
> >> ...
> >>
> >> This patch results in a large number of crashes in various qemu
> >> emulations. Crash and bisect logs below. Reverting this patch
> >> fixes the problem.
> >
> > Hey Guenter,
> >
> > Thanks for the report. I'm kinda surprised because I had a pl011 probe
> > successfully in my local testing.
> >
>
> Maybe it only happens with qemu emulations, or with certain configurations.

I tested on a qemu emulation too.

Can you give me more details on the qemu configuration so I could try
to reproduce it?

> > I'm wondering if you are having an interaction with some other changes I made.
> > Can you try pulling this series in and see if it helps?
> >
> > https://lore.kernel.org/lkml/[email protected]/
> >
> >> Additional information: The decoded stack trace suggests that the
> >> "id" parameter of pl011_probe() may be NULL.
> >
> > That's strange! I'll take a closer look once you confirm that the
> > series above doesn't help.
> >
>
> That series does not make a difference.

Thanks for checking that.

-Saravana

2022-08-10 13:33:02

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On 8/9/22 20:33, Saravana Kannan wrote:
[ ... ]

>
> Can you give me more details on the qemu configuration so I could try
> to reproduce it?

qemu-system-arm -M vexpress-a9 -kernel arch/arm/boot/zImage -no-reboot \
-initrd rootfs-armv5.cpio -m 128 \
--append "rdinit=/sbin/init console=ttyAMA0,115200" \
-dtb arch/arm/boot/dts/vexpress-v2p-ca9.dtb \
-nographic -monitor null -serial stdio

using multi_v7_defconfig will hang nicely with your patch applied,
and boot as expected without. This was with qemu v7.0, but I am
sure older qemu versions will show the same behavior. The initrd
used should not matter, but you'll find it at
https://github.com/groeck/linux-build-test/blob/master/rootfs/arm-v7/rootfs-armv5.cpio.gz

Guenter

2022-08-10 22:02:25

by Isaac J. Manjarres

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On Wed, Aug 10, 2022 at 05:58:58AM -0700, Guenter Roeck wrote:
> On 8/9/22 20:33, Saravana Kannan wrote:
> [ ... ]
>
> >
> > Can you give me more details on the qemu configuration so I could try
> > to reproduce it?
>
> qemu-system-arm -M vexpress-a9 -kernel arch/arm/boot/zImage -no-reboot \
> -initrd rootfs-armv5.cpio -m 128 \
> --append "rdinit=/sbin/init console=ttyAMA0,115200" \
> -dtb arch/arm/boot/dts/vexpress-v2p-ca9.dtb \
> -nographic -monitor null -serial stdio
>
> using multi_v7_defconfig will hang nicely with your patch applied,
> and boot as expected without. This was with qemu v7.0, but I am
> sure older qemu versions will show the same behavior. The initrd
> used should not matter, but you'll find it at
> https://github.com/groeck/linux-build-test/blob/master/rootfs/arm-v7/rootfs-armv5.cpio.gz
>
> Guenter
>
Hi Guenter,

Thanks for the information; I was able to reproduce this on my end as
well. The following changes fixed the problem for me. Can you please try
them out?

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 70f79fc71539..b377f18d8acc 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -881,6 +881,11 @@ static int __device_attach_driver(struct device_driver *drv, void *_data)
dev_dbg(dev, "Device match requests probe deferral\n");
dev->can_match = true;
driver_deferred_probe_add(dev);
+ /*
+ * Device can't match with the bus right now, so don't attempt
+ * to match or bind with other drivers on the bus.
+ */
+ return ret;
} else if (ret < 0) {
dev_dbg(dev, "Bus failed to match device: %d\n", ret);
return ret;
@@ -1120,6 +1125,11 @@ static int __driver_attach(struct device *dev, void *data)
dev_dbg(dev, "Device match requests probe deferral\n");
dev->can_match = true;
driver_deferred_probe_add(dev);
+ /*
+ * Driver could not match with device, but may match with
+ * another device on the bus.
+ */
+ return 0;
} else if (ret < 0) {
dev_dbg(dev, "Bus failed to match device: %d\n", ret);
return ret;


Thanks,
Isaac

2022-08-11 02:24:37

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On 8/10/22 14:47, Isaac Manjarres wrote:
> On Wed, Aug 10, 2022 at 05:58:58AM -0700, Guenter Roeck wrote:
>> On 8/9/22 20:33, Saravana Kannan wrote:
>> [ ... ]
>>
>>>
>>> Can you give me more details on the qemu configuration so I could try
>>> to reproduce it?
>>
>> qemu-system-arm -M vexpress-a9 -kernel arch/arm/boot/zImage -no-reboot \
>> -initrd rootfs-armv5.cpio -m 128 \
>> --append "rdinit=/sbin/init console=ttyAMA0,115200" \
>> -dtb arch/arm/boot/dts/vexpress-v2p-ca9.dtb \
>> -nographic -monitor null -serial stdio
>>
>> using multi_v7_defconfig will hang nicely with your patch applied,
>> and boot as expected without. This was with qemu v7.0, but I am
>> sure older qemu versions will show the same behavior. The initrd
>> used should not matter, but you'll find it at
>> https://github.com/groeck/linux-build-test/blob/master/rootfs/arm-v7/rootfs-armv5.cpio.gz
>>
>> Guenter
>>
> Hi Guenter,
>
> Thanks for the information; I was able to reproduce this on my end as
> well. The following changes fixed the problem for me. Can you please try
> them out?
>
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 70f79fc71539..b377f18d8acc 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -881,6 +881,11 @@ static int __device_attach_driver(struct device_driver *drv, void *_data)
> dev_dbg(dev, "Device match requests probe deferral\n");
> dev->can_match = true;
> driver_deferred_probe_add(dev);
> + /*
> + * Device can't match with the bus right now, so don't attempt
> + * to match or bind with other drivers on the bus.
> + */
> + return ret;
> } else if (ret < 0) {
> dev_dbg(dev, "Bus failed to match device: %d\n", ret);
> return ret;
> @@ -1120,6 +1125,11 @@ static int __driver_attach(struct device *dev, void *data)
> dev_dbg(dev, "Device match requests probe deferral\n");
> dev->can_match = true;
> driver_deferred_probe_add(dev);
> + /*
> + * Driver could not match with device, but may match with
> + * another device on the bus.
> + */
> + return 0;
> } else if (ret < 0) {
> dev_dbg(dev, "Bus failed to match device: %d\n", ret);
> return ret;
>

Most of the tests pass with the above applied, but there is still one crash.

8<--- cut here ---^M
Unhandled fault: page domain fault (0x81b) at 0x00000122^M
[00000122] *pgd=00000000^M
Internal error: : 81b [#1] ARM^M
Modules linked in:^M
CPU: 0 PID: 1 Comm: swapper Tainted: G N 5.19.0+ #1^M
Hardware name: ARM-Versatile (Device Tree Support)^M
PC is at do_alignment_ldrstr+0x7c/0x164^M
LR is at ai_half+0x0/0x4^M
pc : [<c001fa00>] lr : [<c0ca1278>] psr: 60000113^M
sp : c8811d68 ip : 00000003 fp : 00000004^M
r10: c05433e4 r9 : c0ca1278 r8 : 00000801^M
r7 : 00000122 r6 : 00000000 r5 : e5823000 r4 : c8811df8^M
r3 : 00000100 r2 : c8811df8 r1 : 00000000 r0 : 00000122^M
Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none^M
Control: 00093177 Table: 01698000 DAC: 00000051^M
Register r0 information: non-paged memory^M
Register r1 information: NULL pointer^M
Register r2 information: 2-page vmalloc region starting at 0xc8810000 allocated at kernel_clone+0x70/0x644^M
Register r3 information: non-paged memory^M
Register r4 information: 2-page vmalloc region starting at 0xc8810000 allocated at kernel_clone+0x70/0x644^M
Register r5 information: non-paged memory^M
Register r6 information: NULL pointer^M
Register r7 information: non-paged memory^M
Register r8 information: non-paged memory^M
Register r9 information: non-slab/vmalloc memory^M
Register r10 information: non-slab/vmalloc memory^M
Register r11 information: non-paged memory^M
Register r12 information: non-paged memory^M
Process swapper (pid: 1, stack limit = 0x(ptrval))^M
Stack: (0xc8811d68 to 0xc8812000)^M
1d60: c8811df8 e5823000 00000000 c00201dc 00000000 00000000^M
1d80: c0bf2fd4 60000013 00000000 c006c440 00000001 00000000 e5823000 c053cdc0^M
1da0: 00000000 c0be786c c1496d40 00000801 c0bec3f8 c001ffdc c8811df8 00000122^M
1dc0: c1496d40 c0bc4858 00000000 c001d66c c12ae950 00000001 c0c5f3b0 c1496d40^M
1de0: c05433e4 20000013 ffffffff c8811e2c c1496d40 c00095c4 00000001 00000001^M
1e00: 00000122 00000100 c24d8cc0 c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
1e20: c0bc4858 00000000 c1497330 c8811e48 00000001 c05433e8 20000013 ffffffff^M
1e40: 00000053 c05433cc ffffffed c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
1e60: c0bc4858 c053a784 c1552c00 c0c5f294 c0c5f294 c053a808 00000000 c1552c00^M
1e80: c0c5f294 c05d9f94 00000000 c0c5f294 c05d9f74 c0c5f230 c1496d40 c05d7984^M
1ea0: 00000000 c1540eac c7cd5fb4 c0be786c c0c5f294 c24d6e00 00000000 c05d8b7c^M
1ec0: c0acccd4 c0c92600 c1496d40 c0c5f294 c0c92600 c1496d40 00000000 c1496d40^M
1ee0: c0ca1000 c05dab3c c0bb0690 c0c92600 c1496d40 c000a8b0 00000000 00000000^M
1f00: c14d7e4b c0b5f800 000000bf c0047c5c c0b5e9b4 00000000 c0c92600 c088e078^M
1f20: c1496d40 00000007 c0c92600 c14d7e00 c0bc4874 c0b5e9b4 c0ca1000 c0bc4858^M
1f40: 00000000 c0be786c c0bdda90 00000008 c14d7e00 c0bc4878 c0b5e9b4 c0b93230^M
1f60: 00000007 00000007 00000000 c0b92400 00000000 000000bf 00000000 00000000^M
1f80: c088e340 00000000 00000000 00000000 00000000 00000000 00000000 c088e350^M
1fa0: 00000000 c088e340 00000000 c00084f8 00000000 00000000 00000000 00000000^M
1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M
1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000^M
do_alignment_ldrstr from do_alignment+0x200/0x984^M
do_alignment from do_DataAbort+0x38/0xb8^M
do_DataAbort from __dabt_svc+0x64/0xa0^M
Exception stack(0xc8811df8 to 0xc8811e40)^M
1de0: 00000001 00000001^M
1e00: 00000122 00000100 c24d8cc0 c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
1e20: c0bc4858 00000000 c1497330 c8811e48 00000001 c05433e8 20000013 ffffffff^M
__dabt_svc from __clk_put+0x34/0x174^M
__clk_put from amba_read_periphid+0xd8/0x120^M
amba_read_periphid from amba_match+0x3c/0x84^M
amba_match from __driver_attach+0x20/0x114^M
__driver_attach from bus_for_each_dev+0x74/0xc0^M
bus_for_each_dev from bus_add_driver+0x154/0x1e8^M
bus_add_driver from driver_register+0x74/0x10c^M
driver_register from do_one_initcall+0x8c/0x2fc^M
do_one_initcall from kernel_init_freeable+0x190/0x220^M
kernel_init_freeable from kernel_init+0x10/0x108^M
kernel_init from ret_from_fork+0x14/0x3c^M
Exception stack(0xc8811fb0 to 0xc8811ff8)^M
1fa0: 00000000 00000000 00000000 00000000^M
1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M
1fe0: 00000000 00000000 00000000 00000000 00000013 00000000^M
Code: e3a00002 e782310c e8bd8070 e792310c (e4c03001) ^M
---[ end trace 0000000000000000 ]---^M
Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b^M

This is with versatile_defconfig and versatileab. Let me know if you need details.

Guenter

2022-08-11 02:35:01

by Saravana Kannan

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On Wed, Aug 10, 2022 at 7:03 PM Guenter Roeck <[email protected]> wrote:
>
> On 8/10/22 14:47, Isaac Manjarres wrote:
> > On Wed, Aug 10, 2022 at 05:58:58AM -0700, Guenter Roeck wrote:
> >> On 8/9/22 20:33, Saravana Kannan wrote:
> >> [ ... ]
> >>
> >>>
> >>> Can you give me more details on the qemu configuration so I could try
> >>> to reproduce it?
> >>
> >> qemu-system-arm -M vexpress-a9 -kernel arch/arm/boot/zImage -no-reboot \
> >> -initrd rootfs-armv5.cpio -m 128 \
> >> --append "rdinit=/sbin/init console=ttyAMA0,115200" \
> >> -dtb arch/arm/boot/dts/vexpress-v2p-ca9.dtb \
> >> -nographic -monitor null -serial stdio
> >>
> >> using multi_v7_defconfig will hang nicely with your patch applied,
> >> and boot as expected without. This was with qemu v7.0, but I am
> >> sure older qemu versions will show the same behavior. The initrd
> >> used should not matter, but you'll find it at
> >> https://github.com/groeck/linux-build-test/blob/master/rootfs/arm-v7/rootfs-armv5.cpio.gz
> >>
> >> Guenter
> >>
> > Hi Guenter,
> >
> > Thanks for the information; I was able to reproduce this on my end as
> > well. The following changes fixed the problem for me. Can you please try
> > them out?
> >
> > diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> > index 70f79fc71539..b377f18d8acc 100644
> > --- a/drivers/base/dd.c
> > +++ b/drivers/base/dd.c
> > @@ -881,6 +881,11 @@ static int __device_attach_driver(struct device_driver *drv, void *_data)
> > dev_dbg(dev, "Device match requests probe deferral\n");
> > dev->can_match = true;
> > driver_deferred_probe_add(dev);
> > + /*
> > + * Device can't match with the bus right now, so don't attempt
> > + * to match or bind with other drivers on the bus.
> > + */
> > + return ret;
> > } else if (ret < 0) {
> > dev_dbg(dev, "Bus failed to match device: %d\n", ret);
> > return ret;
> > @@ -1120,6 +1125,11 @@ static int __driver_attach(struct device *dev, void *data)
> > dev_dbg(dev, "Device match requests probe deferral\n");
> > dev->can_match = true;
> > driver_deferred_probe_add(dev);
> > + /*
> > + * Driver could not match with device, but may match with
> > + * another device on the bus.
> > + */
> > + return 0;
> > } else if (ret < 0) {
> > dev_dbg(dev, "Bus failed to match device: %d\n", ret);
> > return ret;
> >

Thanks Isaac for debugging and providing a fix! It's surprising that
no one noticed this lack of "return"s for so long.

>
> Most of the tests pass with the above applied, but there is still one crash.

Good to hear it's mostly good now.

> 8<--- cut here ---^M
> Unhandled fault: page domain fault (0x81b) at 0x00000122^M
> [00000122] *pgd=00000000^M
> Internal error: : 81b [#1] ARM^M
> Modules linked in:^M
> CPU: 0 PID: 1 Comm: swapper Tainted: G N 5.19.0+ #1^M
> Hardware name: ARM-Versatile (Device Tree Support)^M
> PC is at do_alignment_ldrstr+0x7c/0x164^M
> LR is at ai_half+0x0/0x4^M
> pc : [<c001fa00>] lr : [<c0ca1278>] psr: 60000113^M
> sp : c8811d68 ip : 00000003 fp : 00000004^M
> r10: c05433e4 r9 : c0ca1278 r8 : 00000801^M
> r7 : 00000122 r6 : 00000000 r5 : e5823000 r4 : c8811df8^M
> r3 : 00000100 r2 : c8811df8 r1 : 00000000 r0 : 00000122^M
> Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none^M
> Control: 00093177 Table: 01698000 DAC: 00000051^M
> Register r0 information: non-paged memory^M
> Register r1 information: NULL pointer^M
> Register r2 information: 2-page vmalloc region starting at 0xc8810000 allocated at kernel_clone+0x70/0x644^M
> Register r3 information: non-paged memory^M
> Register r4 information: 2-page vmalloc region starting at 0xc8810000 allocated at kernel_clone+0x70/0x644^M
> Register r5 information: non-paged memory^M
> Register r6 information: NULL pointer^M
> Register r7 information: non-paged memory^M
> Register r8 information: non-paged memory^M
> Register r9 information: non-slab/vmalloc memory^M
> Register r10 information: non-slab/vmalloc memory^M
> Register r11 information: non-paged memory^M
> Register r12 information: non-paged memory^M
> Process swapper (pid: 1, stack limit = 0x(ptrval))^M
> Stack: (0xc8811d68 to 0xc8812000)^M
> 1d60: c8811df8 e5823000 00000000 c00201dc 00000000 00000000^M
> 1d80: c0bf2fd4 60000013 00000000 c006c440 00000001 00000000 e5823000 c053cdc0^M
> 1da0: 00000000 c0be786c c1496d40 00000801 c0bec3f8 c001ffdc c8811df8 00000122^M
> 1dc0: c1496d40 c0bc4858 00000000 c001d66c c12ae950 00000001 c0c5f3b0 c1496d40^M
> 1de0: c05433e4 20000013 ffffffff c8811e2c c1496d40 c00095c4 00000001 00000001^M
> 1e00: 00000122 00000100 c24d8cc0 c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
> 1e20: c0bc4858 00000000 c1497330 c8811e48 00000001 c05433e8 20000013 ffffffff^M
> 1e40: 00000053 c05433cc ffffffed c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
> 1e60: c0bc4858 c053a784 c1552c00 c0c5f294 c0c5f294 c053a808 00000000 c1552c00^M
> 1e80: c0c5f294 c05d9f94 00000000 c0c5f294 c05d9f74 c0c5f230 c1496d40 c05d7984^M
> 1ea0: 00000000 c1540eac c7cd5fb4 c0be786c c0c5f294 c24d6e00 00000000 c05d8b7c^M
> 1ec0: c0acccd4 c0c92600 c1496d40 c0c5f294 c0c92600 c1496d40 00000000 c1496d40^M
> 1ee0: c0ca1000 c05dab3c c0bb0690 c0c92600 c1496d40 c000a8b0 00000000 00000000^M
> 1f00: c14d7e4b c0b5f800 000000bf c0047c5c c0b5e9b4 00000000 c0c92600 c088e078^M
> 1f20: c1496d40 00000007 c0c92600 c14d7e00 c0bc4874 c0b5e9b4 c0ca1000 c0bc4858^M
> 1f40: 00000000 c0be786c c0bdda90 00000008 c14d7e00 c0bc4878 c0b5e9b4 c0b93230^M
> 1f60: 00000007 00000007 00000000 c0b92400 00000000 000000bf 00000000 00000000^M
> 1f80: c088e340 00000000 00000000 00000000 00000000 00000000 00000000 c088e350^M
> 1fa0: 00000000 c088e340 00000000 c00084f8 00000000 00000000 00000000 00000000^M
> 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M
> 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000^M
> do_alignment_ldrstr from do_alignment+0x200/0x984^M
> do_alignment from do_DataAbort+0x38/0xb8^M
> do_DataAbort from __dabt_svc+0x64/0xa0^M
> Exception stack(0xc8811df8 to 0xc8811e40)^M
> 1de0: 00000001 00000001^M
> 1e00: 00000122 00000100 c24d8cc0 c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
> 1e20: c0bc4858 00000000 c1497330 c8811e48 00000001 c05433e8 20000013 ffffffff^M
> __dabt_svc from __clk_put+0x34/0x174^M
> __clk_put from amba_read_periphid+0xd8/0x120^M

What the heck! How is clk_put() failing. I'll leave this to Isaac too.

> amba_read_periphid from amba_match+0x3c/0x84^M
> amba_match from __driver_attach+0x20/0x114^M
> __driver_attach from bus_for_each_dev+0x74/0xc0^M
> bus_for_each_dev from bus_add_driver+0x154/0x1e8^M
> bus_add_driver from driver_register+0x74/0x10c^M
> driver_register from do_one_initcall+0x8c/0x2fc^M
> do_one_initcall from kernel_init_freeable+0x190/0x220^M
> kernel_init_freeable from kernel_init+0x10/0x108^M
> kernel_init from ret_from_fork+0x14/0x3c^M
> Exception stack(0xc8811fb0 to 0xc8811ff8)^M
> 1fa0: 00000000 00000000 00000000 00000000^M
> 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M
> 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000^M
> Code: e3a00002 e782310c e8bd8070 e792310c (e4c03001) ^M
> ---[ end trace 0000000000000000 ]---^M
> Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b^M
>
> This is with versatile_defconfig and versatileab. Let me know if you need details.

Is the dts that's generated called versatilbeab.dts?

-Saravana

2022-08-11 04:26:59

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On 8/10/22 19:28, Saravana Kannan wrote:
> On Wed, Aug 10, 2022 at 7:03 PM Guenter Roeck <[email protected]> wrote:
>>
>> On 8/10/22 14:47, Isaac Manjarres wrote:
>>> On Wed, Aug 10, 2022 at 05:58:58AM -0700, Guenter Roeck wrote:
>>>> On 8/9/22 20:33, Saravana Kannan wrote:
>>>> [ ... ]
>>>>
>>>>>
>>>>> Can you give me more details on the qemu configuration so I could try
>>>>> to reproduce it?
>>>>
>>>> qemu-system-arm -M vexpress-a9 -kernel arch/arm/boot/zImage -no-reboot \
>>>> -initrd rootfs-armv5.cpio -m 128 \
>>>> --append "rdinit=/sbin/init console=ttyAMA0,115200" \
>>>> -dtb arch/arm/boot/dts/vexpress-v2p-ca9.dtb \
>>>> -nographic -monitor null -serial stdio
>>>>
>>>> using multi_v7_defconfig will hang nicely with your patch applied,
>>>> and boot as expected without. This was with qemu v7.0, but I am
>>>> sure older qemu versions will show the same behavior. The initrd
>>>> used should not matter, but you'll find it at
>>>> https://github.com/groeck/linux-build-test/blob/master/rootfs/arm-v7/rootfs-armv5.cpio.gz
>>>>
>>>> Guenter
>>>>
>>> Hi Guenter,
>>>
>>> Thanks for the information; I was able to reproduce this on my end as
>>> well. The following changes fixed the problem for me. Can you please try
>>> them out?
>>>
>>> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
>>> index 70f79fc71539..b377f18d8acc 100644
>>> --- a/drivers/base/dd.c
>>> +++ b/drivers/base/dd.c
>>> @@ -881,6 +881,11 @@ static int __device_attach_driver(struct device_driver *drv, void *_data)
>>> dev_dbg(dev, "Device match requests probe deferral\n");
>>> dev->can_match = true;
>>> driver_deferred_probe_add(dev);
>>> + /*
>>> + * Device can't match with the bus right now, so don't attempt
>>> + * to match or bind with other drivers on the bus.
>>> + */
>>> + return ret;
>>> } else if (ret < 0) {
>>> dev_dbg(dev, "Bus failed to match device: %d\n", ret);
>>> return ret;
>>> @@ -1120,6 +1125,11 @@ static int __driver_attach(struct device *dev, void *data)
>>> dev_dbg(dev, "Device match requests probe deferral\n");
>>> dev->can_match = true;
>>> driver_deferred_probe_add(dev);
>>> + /*
>>> + * Driver could not match with device, but may match with
>>> + * another device on the bus.
>>> + */
>>> + return 0;
>>> } else if (ret < 0) {
>>> dev_dbg(dev, "Bus failed to match device: %d\n", ret);
>>> return ret;
>>>
>
> Thanks Isaac for debugging and providing a fix! It's surprising that
> no one noticed this lack of "return"s for so long.
>
>>
>> Most of the tests pass with the above applied, but there is still one crash.
>
> Good to hear it's mostly good now.
>
>> 8<--- cut here ---^M
>> Unhandled fault: page domain fault (0x81b) at 0x00000122^M
>> [00000122] *pgd=00000000^M
>> Internal error: : 81b [#1] ARM^M
>> Modules linked in:^M
>> CPU: 0 PID: 1 Comm: swapper Tainted: G N 5.19.0+ #1^M
>> Hardware name: ARM-Versatile (Device Tree Support)^M
>> PC is at do_alignment_ldrstr+0x7c/0x164^M
>> LR is at ai_half+0x0/0x4^M
>> pc : [<c001fa00>] lr : [<c0ca1278>] psr: 60000113^M
>> sp : c8811d68 ip : 00000003 fp : 00000004^M
>> r10: c05433e4 r9 : c0ca1278 r8 : 00000801^M
>> r7 : 00000122 r6 : 00000000 r5 : e5823000 r4 : c8811df8^M
>> r3 : 00000100 r2 : c8811df8 r1 : 00000000 r0 : 00000122^M
>> Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none^M
>> Control: 00093177 Table: 01698000 DAC: 00000051^M
>> Register r0 information: non-paged memory^M
>> Register r1 information: NULL pointer^M
>> Register r2 information: 2-page vmalloc region starting at 0xc8810000 allocated at kernel_clone+0x70/0x644^M
>> Register r3 information: non-paged memory^M
>> Register r4 information: 2-page vmalloc region starting at 0xc8810000 allocated at kernel_clone+0x70/0x644^M
>> Register r5 information: non-paged memory^M
>> Register r6 information: NULL pointer^M
>> Register r7 information: non-paged memory^M
>> Register r8 information: non-paged memory^M
>> Register r9 information: non-slab/vmalloc memory^M
>> Register r10 information: non-slab/vmalloc memory^M
>> Register r11 information: non-paged memory^M
>> Register r12 information: non-paged memory^M
>> Process swapper (pid: 1, stack limit = 0x(ptrval))^M
>> Stack: (0xc8811d68 to 0xc8812000)^M
>> 1d60: c8811df8 e5823000 00000000 c00201dc 00000000 00000000^M
>> 1d80: c0bf2fd4 60000013 00000000 c006c440 00000001 00000000 e5823000 c053cdc0^M
>> 1da0: 00000000 c0be786c c1496d40 00000801 c0bec3f8 c001ffdc c8811df8 00000122^M
>> 1dc0: c1496d40 c0bc4858 00000000 c001d66c c12ae950 00000001 c0c5f3b0 c1496d40^M
>> 1de0: c05433e4 20000013 ffffffff c8811e2c c1496d40 c00095c4 00000001 00000001^M
>> 1e00: 00000122 00000100 c24d8cc0 c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
>> 1e20: c0bc4858 00000000 c1497330 c8811e48 00000001 c05433e8 20000013 ffffffff^M
>> 1e40: 00000053 c05433cc ffffffed c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
>> 1e60: c0bc4858 c053a784 c1552c00 c0c5f294 c0c5f294 c053a808 00000000 c1552c00^M
>> 1e80: c0c5f294 c05d9f94 00000000 c0c5f294 c05d9f74 c0c5f230 c1496d40 c05d7984^M
>> 1ea0: 00000000 c1540eac c7cd5fb4 c0be786c c0c5f294 c24d6e00 00000000 c05d8b7c^M
>> 1ec0: c0acccd4 c0c92600 c1496d40 c0c5f294 c0c92600 c1496d40 00000000 c1496d40^M
>> 1ee0: c0ca1000 c05dab3c c0bb0690 c0c92600 c1496d40 c000a8b0 00000000 00000000^M
>> 1f00: c14d7e4b c0b5f800 000000bf c0047c5c c0b5e9b4 00000000 c0c92600 c088e078^M
>> 1f20: c1496d40 00000007 c0c92600 c14d7e00 c0bc4874 c0b5e9b4 c0ca1000 c0bc4858^M
>> 1f40: 00000000 c0be786c c0bdda90 00000008 c14d7e00 c0bc4878 c0b5e9b4 c0b93230^M
>> 1f60: 00000007 00000007 00000000 c0b92400 00000000 000000bf 00000000 00000000^M
>> 1f80: c088e340 00000000 00000000 00000000 00000000 00000000 00000000 c088e350^M
>> 1fa0: 00000000 c088e340 00000000 c00084f8 00000000 00000000 00000000 00000000^M
>> 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M
>> 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000^M
>> do_alignment_ldrstr from do_alignment+0x200/0x984^M
>> do_alignment from do_DataAbort+0x38/0xb8^M
>> do_DataAbort from __dabt_svc+0x64/0xa0^M
>> Exception stack(0xc8811df8 to 0xc8811e40)^M
>> 1de0: 00000001 00000001^M
>> 1e00: 00000122 00000100 c24d8cc0 c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
>> 1e20: c0bc4858 00000000 c1497330 c8811e48 00000001 c05433e8 20000013 ffffffff^M
>> __dabt_svc from __clk_put+0x34/0x174^M
>> __clk_put from amba_read_periphid+0xd8/0x120^M
>
> What the heck! How is clk_put() failing. I'll leave this to Isaac too.
>
>> amba_read_periphid from amba_match+0x3c/0x84^M
>> amba_match from __driver_attach+0x20/0x114^M
>> __driver_attach from bus_for_each_dev+0x74/0xc0^M
>> bus_for_each_dev from bus_add_driver+0x154/0x1e8^M
>> bus_add_driver from driver_register+0x74/0x10c^M
>> driver_register from do_one_initcall+0x8c/0x2fc^M
>> do_one_initcall from kernel_init_freeable+0x190/0x220^M
>> kernel_init_freeable from kernel_init+0x10/0x108^M
>> kernel_init from ret_from_fork+0x14/0x3c^M
>> Exception stack(0xc8811fb0 to 0xc8811ff8)^M
>> 1fa0: 00000000 00000000 00000000 00000000^M
>> 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M
>> 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000^M
>> Code: e3a00002 e782310c e8bd8070 e792310c (e4c03001) ^M
>> ---[ end trace 0000000000000000 ]---^M
>> Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b^M
>>
>> This is with versatile_defconfig and versatileab. Let me know if you need details.
>
> Is the dts that's generated called versatilbeab.dts?
>

Yes. Here is the qemu command line:

qemu-system-arm -M versatileab -kernel arch/arm/boot/zImage -no-reboot \
-initrd rootfs-armv5.cpio -m 128 \
--append "rdinit=/sbin/init console=ttyAMA0,115200 earlycon" \
-dtb arch/arm/boot/dts/versatile-ab.dtb -nographic -monitor null \
-serial stdio

Guenter

2022-08-11 17:39:15

by Isaac J. Manjarres

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On Wed, Aug 10, 2022 at 09:09:26PM -0700, Guenter Roeck wrote:
> On 8/10/22 19:28, Saravana Kannan wrote:
> > On Wed, Aug 10, 2022 at 7:03 PM Guenter Roeck <[email protected]> wrote:
> > >
> > > On 8/10/22 14:47, Isaac Manjarres wrote:
> > > > On Wed, Aug 10, 2022 at 05:58:58AM -0700, Guenter Roeck wrote:
> > > > > On 8/9/22 20:33, Saravana Kannan wrote:
> > > > > [ ... ]
> > > > >
> > > > > >
> > > > > > Can you give me more details on the qemu configuration so I could try
> > > > > > to reproduce it?
> > > > >
> > > > > qemu-system-arm -M vexpress-a9 -kernel arch/arm/boot/zImage -no-reboot \
> > > > > -initrd rootfs-armv5.cpio -m 128 \
> > > > > --append "rdinit=/sbin/init console=ttyAMA0,115200" \
> > > > > -dtb arch/arm/boot/dts/vexpress-v2p-ca9.dtb \
> > > > > -nographic -monitor null -serial stdio
> > > > >
> > > > > using multi_v7_defconfig will hang nicely with your patch applied,
> > > > > and boot as expected without. This was with qemu v7.0, but I am
> > > > > sure older qemu versions will show the same behavior. The initrd
> > > > > used should not matter, but you'll find it at
> > > > > https://github.com/groeck/linux-build-test/blob/master/rootfs/arm-v7/rootfs-armv5.cpio.gz
> > > > >
> > > > > Guenter
> > > > >
> > > > Hi Guenter,
> > > >
> > > > Thanks for the information; I was able to reproduce this on my end as
> > > > well. The following changes fixed the problem for me. Can you please try
> > > > them out?
> > > >
> > > > diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> > > > index 70f79fc71539..b377f18d8acc 100644
> > > > --- a/drivers/base/dd.c
> > > > +++ b/drivers/base/dd.c
> > > > @@ -881,6 +881,11 @@ static int __device_attach_driver(struct device_driver *drv, void *_data)
> > > > dev_dbg(dev, "Device match requests probe deferral\n");
> > > > dev->can_match = true;
> > > > driver_deferred_probe_add(dev);
> > > > + /*
> > > > + * Device can't match with the bus right now, so don't attempt
> > > > + * to match or bind with other drivers on the bus.
> > > > + */
> > > > + return ret;
> > > > } else if (ret < 0) {
> > > > dev_dbg(dev, "Bus failed to match device: %d\n", ret);
> > > > return ret;
> > > > @@ -1120,6 +1125,11 @@ static int __driver_attach(struct device *dev, void *data)
> > > > dev_dbg(dev, "Device match requests probe deferral\n");
> > > > dev->can_match = true;
> > > > driver_deferred_probe_add(dev);
> > > > + /*
> > > > + * Driver could not match with device, but may match with
> > > > + * another device on the bus.
> > > > + */
> > > > + return 0;
> > > > } else if (ret < 0) {
> > > > dev_dbg(dev, "Bus failed to match device: %d\n", ret);
> > > > return ret;
> > > >
> >
> > Thanks Isaac for debugging and providing a fix! It's surprising that
> > no one noticed this lack of "return"s for so long.
> >
> > >
> > > Most of the tests pass with the above applied, but there is still one crash.
> >
> > Good to hear it's mostly good now.
> >
> > > 8<--- cut here ---^M
> > > Unhandled fault: page domain fault (0x81b) at 0x00000122^M
> > > [00000122] *pgd=00000000^M
> > > Internal error: : 81b [#1] ARM^M
> > > Modules linked in:^M
> > > CPU: 0 PID: 1 Comm: swapper Tainted: G N 5.19.0+ #1^M
> > > Hardware name: ARM-Versatile (Device Tree Support)^M
> > > PC is at do_alignment_ldrstr+0x7c/0x164^M
> > > LR is at ai_half+0x0/0x4^M
> > > pc : [<c001fa00>] lr : [<c0ca1278>] psr: 60000113^M
> > > sp : c8811d68 ip : 00000003 fp : 00000004^M
> > > r10: c05433e4 r9 : c0ca1278 r8 : 00000801^M
> > > r7 : 00000122 r6 : 00000000 r5 : e5823000 r4 : c8811df8^M
> > > r3 : 00000100 r2 : c8811df8 r1 : 00000000 r0 : 00000122^M
> > > Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none^M
> > > Control: 00093177 Table: 01698000 DAC: 00000051^M
> > > Register r0 information: non-paged memory^M
> > > Register r1 information: NULL pointer^M
> > > Register r2 information: 2-page vmalloc region starting at 0xc8810000 allocated at kernel_clone+0x70/0x644^M
> > > Register r3 information: non-paged memory^M
> > > Register r4 information: 2-page vmalloc region starting at 0xc8810000 allocated at kernel_clone+0x70/0x644^M
> > > Register r5 information: non-paged memory^M
> > > Register r6 information: NULL pointer^M
> > > Register r7 information: non-paged memory^M
> > > Register r8 information: non-paged memory^M
> > > Register r9 information: non-slab/vmalloc memory^M
> > > Register r10 information: non-slab/vmalloc memory^M
> > > Register r11 information: non-paged memory^M
> > > Register r12 information: non-paged memory^M
> > > Process swapper (pid: 1, stack limit = 0x(ptrval))^M
> > > Stack: (0xc8811d68 to 0xc8812000)^M
> > > 1d60: c8811df8 e5823000 00000000 c00201dc 00000000 00000000^M
> > > 1d80: c0bf2fd4 60000013 00000000 c006c440 00000001 00000000 e5823000 c053cdc0^M
> > > 1da0: 00000000 c0be786c c1496d40 00000801 c0bec3f8 c001ffdc c8811df8 00000122^M
> > > 1dc0: c1496d40 c0bc4858 00000000 c001d66c c12ae950 00000001 c0c5f3b0 c1496d40^M
> > > 1de0: c05433e4 20000013 ffffffff c8811e2c c1496d40 c00095c4 00000001 00000001^M
> > > 1e00: 00000122 00000100 c24d8cc0 c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
> > > 1e20: c0bc4858 00000000 c1497330 c8811e48 00000001 c05433e8 20000013 ffffffff^M
> > > 1e40: 00000053 c05433cc ffffffed c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
> > > 1e60: c0bc4858 c053a784 c1552c00 c0c5f294 c0c5f294 c053a808 00000000 c1552c00^M
> > > 1e80: c0c5f294 c05d9f94 00000000 c0c5f294 c05d9f74 c0c5f230 c1496d40 c05d7984^M
> > > 1ea0: 00000000 c1540eac c7cd5fb4 c0be786c c0c5f294 c24d6e00 00000000 c05d8b7c^M
> > > 1ec0: c0acccd4 c0c92600 c1496d40 c0c5f294 c0c92600 c1496d40 00000000 c1496d40^M
> > > 1ee0: c0ca1000 c05dab3c c0bb0690 c0c92600 c1496d40 c000a8b0 00000000 00000000^M
> > > 1f00: c14d7e4b c0b5f800 000000bf c0047c5c c0b5e9b4 00000000 c0c92600 c088e078^M
> > > 1f20: c1496d40 00000007 c0c92600 c14d7e00 c0bc4874 c0b5e9b4 c0ca1000 c0bc4858^M
> > > 1f40: 00000000 c0be786c c0bdda90 00000008 c14d7e00 c0bc4878 c0b5e9b4 c0b93230^M
> > > 1f60: 00000007 00000007 00000000 c0b92400 00000000 000000bf 00000000 00000000^M
> > > 1f80: c088e340 00000000 00000000 00000000 00000000 00000000 00000000 c088e350^M
> > > 1fa0: 00000000 c088e340 00000000 c00084f8 00000000 00000000 00000000 00000000^M
> > > 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M
> > > 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000^M
> > > do_alignment_ldrstr from do_alignment+0x200/0x984^M
> > > do_alignment from do_DataAbort+0x38/0xb8^M
> > > do_DataAbort from __dabt_svc+0x64/0xa0^M
> > > Exception stack(0xc8811df8 to 0xc8811e40)^M
> > > 1de0: 00000001 00000001^M
> > > 1e00: 00000122 00000100 c24d8cc0 c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
> > > 1e20: c0bc4858 00000000 c1497330 c8811e48 00000001 c05433e8 20000013 ffffffff^M
> > > __dabt_svc from __clk_put+0x34/0x174^M
> > > __clk_put from amba_read_periphid+0xd8/0x120^M
> >
> > What the heck! How is clk_put() failing. I'll leave this to Isaac too.
> >
> > > amba_read_periphid from amba_match+0x3c/0x84^M
> > > amba_match from __driver_attach+0x20/0x114^M
> > > __driver_attach from bus_for_each_dev+0x74/0xc0^M
> > > bus_for_each_dev from bus_add_driver+0x154/0x1e8^M
> > > bus_add_driver from driver_register+0x74/0x10c^M
> > > driver_register from do_one_initcall+0x8c/0x2fc^M
> > > do_one_initcall from kernel_init_freeable+0x190/0x220^M
> > > kernel_init_freeable from kernel_init+0x10/0x108^M
> > > kernel_init from ret_from_fork+0x14/0x3c^M
> > > Exception stack(0xc8811fb0 to 0xc8811ff8)^M
> > > 1fa0: 00000000 00000000 00000000 00000000^M
> > > 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M
> > > 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000^M
> > > Code: e3a00002 e782310c e8bd8070 e792310c (e4c03001) ^M
> > > ---[ end trace 0000000000000000 ]---^M
> > > Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b^M
> > >
> > > This is with versatile_defconfig and versatileab. Let me know if you need details.
> >
> > Is the dts that's generated called versatilbeab.dts?
> >
>
> Yes. Here is the qemu command line:
>
> qemu-system-arm -M versatileab -kernel arch/arm/boot/zImage -no-reboot \
> -initrd rootfs-armv5.cpio -m 128 \
> --append "rdinit=/sbin/init console=ttyAMA0,115200 earlycon" \
> -dtb arch/arm/boot/dts/versatile-ab.dtb -nographic -monitor null \
> -serial stdio
>
> Guenter

Thanks for testing that out, and the additional information, Guenter.
I'll look into the crash in __clk_put() and get back to you.

--Isaac

2022-08-11 19:26:01

by Isaac J. Manjarres

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On Thu, Aug 11, 2022 at 09:51:49AM -0700, Isaac Manjarres wrote:
> On Wed, Aug 10, 2022 at 09:09:26PM -0700, Guenter Roeck wrote:
> > On 8/10/22 19:28, Saravana Kannan wrote:
> > > On Wed, Aug 10, 2022 at 7:03 PM Guenter Roeck <[email protected]> wrote:
> > > >
> > > > On 8/10/22 14:47, Isaac Manjarres wrote:
> > > > > On Wed, Aug 10, 2022 at 05:58:58AM -0700, Guenter Roeck wrote:
> > > > > > On 8/9/22 20:33, Saravana Kannan wrote:
> > > > > > [ ... ]
> > > > > >
> > > > > > >
> > > > > > > Can you give me more details on the qemu configuration so I could try
> > > > > > > to reproduce it?
> > > > > >
> > > > > > qemu-system-arm -M vexpress-a9 -kernel arch/arm/boot/zImage -no-reboot \
> > > > > > -initrd rootfs-armv5.cpio -m 128 \
> > > > > > --append "rdinit=/sbin/init console=ttyAMA0,115200" \
> > > > > > -dtb arch/arm/boot/dts/vexpress-v2p-ca9.dtb \
> > > > > > -nographic -monitor null -serial stdio
> > > > > >
> > > > > > using multi_v7_defconfig will hang nicely with your patch applied,
> > > > > > and boot as expected without. This was with qemu v7.0, but I am
> > > > > > sure older qemu versions will show the same behavior. The initrd
> > > > > > used should not matter, but you'll find it at
> > > > > > https://github.com/groeck/linux-build-test/blob/master/rootfs/arm-v7/rootfs-armv5.cpio.gz
> > > > > >
> > > > > > Guenter
> > > > > >
> > > > > Hi Guenter,
> > > > >
> > > > > Thanks for the information; I was able to reproduce this on my end as
> > > > > well. The following changes fixed the problem for me. Can you please try
> > > > > them out?
> > > > >
> > > > > diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> > > > > index 70f79fc71539..b377f18d8acc 100644
> > > > > --- a/drivers/base/dd.c
> > > > > +++ b/drivers/base/dd.c
> > > > > @@ -881,6 +881,11 @@ static int __device_attach_driver(struct device_driver *drv, void *_data)
> > > > > dev_dbg(dev, "Device match requests probe deferral\n");
> > > > > dev->can_match = true;
> > > > > driver_deferred_probe_add(dev);
> > > > > + /*
> > > > > + * Device can't match with the bus right now, so don't attempt
> > > > > + * to match or bind with other drivers on the bus.
> > > > > + */
> > > > > + return ret;
> > > > > } else if (ret < 0) {
> > > > > dev_dbg(dev, "Bus failed to match device: %d\n", ret);
> > > > > return ret;
> > > > > @@ -1120,6 +1125,11 @@ static int __driver_attach(struct device *dev, void *data)
> > > > > dev_dbg(dev, "Device match requests probe deferral\n");
> > > > > dev->can_match = true;
> > > > > driver_deferred_probe_add(dev);
> > > > > + /*
> > > > > + * Driver could not match with device, but may match with
> > > > > + * another device on the bus.
> > > > > + */
> > > > > + return 0;
> > > > > } else if (ret < 0) {
> > > > > dev_dbg(dev, "Bus failed to match device: %d\n", ret);
> > > > > return ret;
> > > > >
> > >
> > > Thanks Isaac for debugging and providing a fix! It's surprising that
> > > no one noticed this lack of "return"s for so long.
> > >
> > > >
> > > > Most of the tests pass with the above applied, but there is still one crash.
> > >
> > > Good to hear it's mostly good now.
> > >
> > > > 8<--- cut here ---^M
> > > > Unhandled fault: page domain fault (0x81b) at 0x00000122^M
> > > > [00000122] *pgd=00000000^M
> > > > Internal error: : 81b [#1] ARM^M
> > > > Modules linked in:^M
> > > > CPU: 0 PID: 1 Comm: swapper Tainted: G N 5.19.0+ #1^M
> > > > Hardware name: ARM-Versatile (Device Tree Support)^M
> > > > PC is at do_alignment_ldrstr+0x7c/0x164^M
> > > > LR is at ai_half+0x0/0x4^M
> > > > pc : [<c001fa00>] lr : [<c0ca1278>] psr: 60000113^M
> > > > sp : c8811d68 ip : 00000003 fp : 00000004^M
> > > > r10: c05433e4 r9 : c0ca1278 r8 : 00000801^M
> > > > r7 : 00000122 r6 : 00000000 r5 : e5823000 r4 : c8811df8^M
> > > > r3 : 00000100 r2 : c8811df8 r1 : 00000000 r0 : 00000122^M
> > > > Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none^M
> > > > Control: 00093177 Table: 01698000 DAC: 00000051^M
> > > > Register r0 information: non-paged memory^M
> > > > Register r1 information: NULL pointer^M
> > > > Register r2 information: 2-page vmalloc region starting at 0xc8810000 allocated at kernel_clone+0x70/0x644^M
> > > > Register r3 information: non-paged memory^M
> > > > Register r4 information: 2-page vmalloc region starting at 0xc8810000 allocated at kernel_clone+0x70/0x644^M
> > > > Register r5 information: non-paged memory^M
> > > > Register r6 information: NULL pointer^M
> > > > Register r7 information: non-paged memory^M
> > > > Register r8 information: non-paged memory^M
> > > > Register r9 information: non-slab/vmalloc memory^M
> > > > Register r10 information: non-slab/vmalloc memory^M
> > > > Register r11 information: non-paged memory^M
> > > > Register r12 information: non-paged memory^M
> > > > Process swapper (pid: 1, stack limit = 0x(ptrval))^M
> > > > Stack: (0xc8811d68 to 0xc8812000)^M
> > > > 1d60: c8811df8 e5823000 00000000 c00201dc 00000000 00000000^M
> > > > 1d80: c0bf2fd4 60000013 00000000 c006c440 00000001 00000000 e5823000 c053cdc0^M
> > > > 1da0: 00000000 c0be786c c1496d40 00000801 c0bec3f8 c001ffdc c8811df8 00000122^M
> > > > 1dc0: c1496d40 c0bc4858 00000000 c001d66c c12ae950 00000001 c0c5f3b0 c1496d40^M
> > > > 1de0: c05433e4 20000013 ffffffff c8811e2c c1496d40 c00095c4 00000001 00000001^M
> > > > 1e00: 00000122 00000100 c24d8cc0 c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
> > > > 1e20: c0bc4858 00000000 c1497330 c8811e48 00000001 c05433e8 20000013 ffffffff^M
> > > > 1e40: 00000053 c05433cc ffffffed c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
> > > > 1e60: c0bc4858 c053a784 c1552c00 c0c5f294 c0c5f294 c053a808 00000000 c1552c00^M
> > > > 1e80: c0c5f294 c05d9f94 00000000 c0c5f294 c05d9f74 c0c5f230 c1496d40 c05d7984^M
> > > > 1ea0: 00000000 c1540eac c7cd5fb4 c0be786c c0c5f294 c24d6e00 00000000 c05d8b7c^M
> > > > 1ec0: c0acccd4 c0c92600 c1496d40 c0c5f294 c0c92600 c1496d40 00000000 c1496d40^M
> > > > 1ee0: c0ca1000 c05dab3c c0bb0690 c0c92600 c1496d40 c000a8b0 00000000 00000000^M
> > > > 1f00: c14d7e4b c0b5f800 000000bf c0047c5c c0b5e9b4 00000000 c0c92600 c088e078^M
> > > > 1f20: c1496d40 00000007 c0c92600 c14d7e00 c0bc4874 c0b5e9b4 c0ca1000 c0bc4858^M
> > > > 1f40: 00000000 c0be786c c0bdda90 00000008 c14d7e00 c0bc4878 c0b5e9b4 c0b93230^M
> > > > 1f60: 00000007 00000007 00000000 c0b92400 00000000 000000bf 00000000 00000000^M
> > > > 1f80: c088e340 00000000 00000000 00000000 00000000 00000000 00000000 c088e350^M
> > > > 1fa0: 00000000 c088e340 00000000 c00084f8 00000000 00000000 00000000 00000000^M
> > > > 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M
> > > > 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000^M
> > > > do_alignment_ldrstr from do_alignment+0x200/0x984^M
> > > > do_alignment from do_DataAbort+0x38/0xb8^M
> > > > do_DataAbort from __dabt_svc+0x64/0xa0^M
> > > > Exception stack(0xc8811df8 to 0xc8811e40)^M
> > > > 1de0: 00000001 00000001^M
> > > > 1e00: 00000122 00000100 c24d8cc0 c1552c00 00000fff c0c5f230 c1496d40 c24d6e58^M
> > > > 1e20: c0bc4858 00000000 c1497330 c8811e48 00000001 c05433e8 20000013 ffffffff^M
> > > > __dabt_svc from __clk_put+0x34/0x174^M
> > > > __clk_put from amba_read_periphid+0xd8/0x120^M
> > >
> > > What the heck! How is clk_put() failing. I'll leave this to Isaac too.
> > >
> > > > amba_read_periphid from amba_match+0x3c/0x84^M
> > > > amba_match from __driver_attach+0x20/0x114^M
> > > > __driver_attach from bus_for_each_dev+0x74/0xc0^M
> > > > bus_for_each_dev from bus_add_driver+0x154/0x1e8^M
> > > > bus_add_driver from driver_register+0x74/0x10c^M
> > > > driver_register from do_one_initcall+0x8c/0x2fc^M
> > > > do_one_initcall from kernel_init_freeable+0x190/0x220^M
> > > > kernel_init_freeable from kernel_init+0x10/0x108^M
> > > > kernel_init from ret_from_fork+0x14/0x3c^M
> > > > Exception stack(0xc8811fb0 to 0xc8811ff8)^M
> > > > 1fa0: 00000000 00000000 00000000 00000000^M
> > > > 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M
> > > > 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000^M
> > > > Code: e3a00002 e782310c e8bd8070 e792310c (e4c03001) ^M
> > > > ---[ end trace 0000000000000000 ]---^M
> > > > Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b^M
> > > >
> > > > This is with versatile_defconfig and versatileab. Let me know if you need details.
> > >
> > > Is the dts that's generated called versatilbeab.dts?
> > >
> >
> > Yes. Here is the qemu command line:
> >
> > qemu-system-arm -M versatileab -kernel arch/arm/boot/zImage -no-reboot \
> > -initrd rootfs-armv5.cpio -m 128 \
> > --append "rdinit=/sbin/init console=ttyAMA0,115200 earlycon" \
> > -dtb arch/arm/boot/dts/versatile-ab.dtb -nographic -monitor null \
> > -serial stdio
> >
> > Guenter
>
> Thanks for testing that out, and the additional information, Guenter.
> I'll look into the crash in __clk_put() and get back to you.
>
> --Isaac
>
Hi Guenter,

I tried to reproduce the crash you reported in __clk_put() above with
the information you mentioned, unfortunately I'm unable to reproduce this.
I'm able to boot up with the command using the same baseline commit you had,
along with my patch.

--Isaac

2022-08-11 20:06:48

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On Thu, Aug 11, 2022 at 12:18:40PM -0700, Isaac Manjarres wrote:
> >
> Hi Guenter,
>
> I tried to reproduce the crash you reported in __clk_put() above with
> the information you mentioned, unfortunately I'm unable to reproduce this.
> I'm able to boot up with the command using the same baseline commit you had,
> along with my patch.
>

Ah, it must be triggered by one of the configuration options I have enabled
on top of versatile_defconfig. Sorry, I should have checked. Please try
with the configuration below.

Guenter

---
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_NAMESPACES=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
# CONFIG_ARCH_MULTI_V7 is not set
CONFIG_ARCH_VERSATILE=y
CONFIG_AEABI=y
CONFIG_OABI_COMPAT=y
CONFIG_CMDLINE="root=1f03 mem=32M"
CONFIG_FPE_NWFPE=y
CONFIG_VFP=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_SLAB=y
CONFIG_CMA=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_PM_QOS_KUNIT_TEST=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_ADV_OPTIONS=y
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP=y
CONFIG_MTD_PHYSMAP_OF=y
CONFIG_OF_UNITTEST=y
CONFIG_BLK_DEV_RAM=y
CONFIG_VIRTIO_BLK=y
CONFIG_EEPROM_LEGACY=m
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
CONFIG_SCSI_VIRTIO=y
CONFIG_NETDEVICES=y
CONFIG_VIRTIO_NET=y
CONFIG_SMC91X=y
CONFIG_USB_USBNET=y
# CONFIG_WLAN is not set
# CONFIG_SERIO_SERPORT is not set
CONFIG_SERIO_AMBAKMI=y
CONFIG_LEGACY_PTY_COUNT=16
CONFIG_SERIAL_8250=m
CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_8250_RSA=y
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
CONFIG_I2C_CHARDEV=m
CONFIG_I2C_VERSATILE=y
CONFIG_SPI=y
CONFIG_GPIOLIB=y
CONFIG_GPIO_PL061=y
# CONFIG_HWMON is not set
CONFIG_DRM=y
CONFIG_DRM_PANEL_ARM_VERSATILE=y
CONFIG_DRM_PANEL_SIMPLE=y
CONFIG_DRM_PANEL_EDP=y
CONFIG_DRM_DISPLAY_CONNECTOR=y
CONFIG_DRM_SIMPLE_BRIDGE=y
CONFIG_DRM_PL111=y
CONFIG_FB=y
CONFIG_BACKLIGHT_CLASS_DEVICE=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=y
CONFIG_SND=m
CONFIG_SND_ARMAACI=m
CONFIG_HID_A4TECH=y
CONFIG_HID_APPLE=y
CONFIG_HID_BELKIN=y
CONFIG_HID_CHERRY=y
CONFIG_HID_CYPRESS=y
CONFIG_HID_EZKEY=y
CONFIG_HID_ITE=y
CONFIG_HID_KENSINGTON=y
CONFIG_HID_REDRAGON=y
CONFIG_HID_MICROSOFT=y
CONFIG_HID_MONTEREY=y
CONFIG_USB=y
CONFIG_USB_XHCI_HCD=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_USB_UAS=y
CONFIG_USB_TEST=y
CONFIG_USB_EHSET_TEST_FIXTURE=y
CONFIG_USB_LINK_LAYER_TEST=y
CONFIG_MMC=y
CONFIG_MMC_ARMMMCI=y
CONFIG_MMC_SDHCI=y
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
CONFIG_LEDS_SYSCON=y
CONFIG_LEDS_TRIGGERS=y
CONFIG_LEDS_TRIGGER_HEARTBEAT=y
CONFIG_LEDS_TRIGGER_CPU=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_MMIO=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
CONFIG_EXT4_KUNIT_TESTS=y
CONFIG_BTRFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_VFAT_FS=m
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_SQUASHFS=y
CONFIG_SQUASHFS_XATTR=y
CONFIG_SQUASHFS_4K_DEVBLK_SIZE=y
CONFIG_MINIX_FS=y
CONFIG_ROMFS_FS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
CONFIG_NLS_CODEPAGE_850=m
CONFIG_NLS_ISO8859_1=m
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
CONFIG_CRC32_SELFTEST=y
CONFIG_GLOB_SELFTEST=y
CONFIG_DEBUG_INFO_DWARF5=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_KFENCE=y
CONFIG_PROVE_LOCKING=y
CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
CONFIG_WW_MUTEX_SELFTEST=y
CONFIG_DEBUG_LIST=y
CONFIG_RCU_EQS_DEBUG=y
CONFIG_DEBUG_USER=y
CONFIG_DEBUG_LL=y
CONFIG_KUNIT=y
CONFIG_KUNIT_TEST=y
CONFIG_TEST_SORT=y
CONFIG_RBTREE_TEST=y
CONFIG_INTERVAL_TREE_TEST=y
CONFIG_STRING_SELFTEST=y
CONFIG_TEST_BITMAP=y
CONFIG_TEST_UUID=y
CONFIG_TEST_FIRMWARE=y
CONFIG_TEST_SYSCTL=y
CONFIG_RESOURCE_KUNIT_TEST=y
CONFIG_SYSCTL_KUNIT_TEST=y
CONFIG_LIST_KUNIT_TEST=y
CONFIG_CMDLINE_KUNIT_TEST=y
CONFIG_MEMCPY_KUNIT_TEST=y

2022-08-12 05:23:14

by Isaac J. Manjarres

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On Thu, Aug 11, 2022 at 12:55:08PM -0700, Guenter Roeck wrote:
>
> Ah, it must be triggered by one of the configuration options I have enabled
> on top of versatile_defconfig. Sorry, I should have checked. Please try
> with the configuration below.
>
> Guenter

Thanks for sharing your config options; I was able to reproduce the
crash after copying your config options to my repository :) The
following changes fixed the problem for me. Can you please give them a
try on your end to see if they work for you too?

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 90b31fb141a5..0315bc2853ef 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -1117,7 +1117,9 @@ static int __driver_attach(struct device *dev, void *data)
* is an error.
*/

+ device_lock(dev);
ret = driver_match_device(drv, dev);
+ device_unlock(dev);
if (ret == 0) {
/* no match */
return 0;


Thanks,
Isaac

2022-08-12 15:33:38

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On 8/11/22 22:12, Isaac Manjarres wrote:
> On Thu, Aug 11, 2022 at 12:55:08PM -0700, Guenter Roeck wrote:
>>
>> Ah, it must be triggered by one of the configuration options I have enabled
>> on top of versatile_defconfig. Sorry, I should have checked. Please try
>> with the configuration below.
>>
>> Guenter
>
> Thanks for sharing your config options; I was able to reproduce the
> crash after copying your config options to my repository :) The
> following changes fixed the problem for me. Can you please give them a
> try on your end to see if they work for you too?
>
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 90b31fb141a5..0315bc2853ef 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -1117,7 +1117,9 @@ static int __driver_attach(struct device *dev, void *data)
> * is an error.
> */
>
> + device_lock(dev);
> ret = driver_match_device(drv, dev);
> + device_unlock(dev);
> if (ret == 0) {
> /* no match */
> return 0;
>
>
> Thanks,
> Isaac

The original test passes, but I now see other boot failures with other emulations.
I don't know yet if it is due to your changes or due to something else. I'll do
more testing and let you know.

Guenter

2022-08-12 15:43:45

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On 8/11/22 22:12, Isaac Manjarres wrote:
> On Thu, Aug 11, 2022 at 12:55:08PM -0700, Guenter Roeck wrote:
>>
>> Ah, it must be triggered by one of the configuration options I have enabled
>> on top of versatile_defconfig. Sorry, I should have checked. Please try
>> with the configuration below.
>>
>> Guenter
>
> Thanks for sharing your config options; I was able to reproduce the
> crash after copying your config options to my repository :) The
> following changes fixed the problem for me. Can you please give them a
> try on your end to see if they work for you too?
>
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 90b31fb141a5..0315bc2853ef 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -1117,7 +1117,9 @@ static int __driver_attach(struct device *dev, void *data)
> * is an error.
> */
>
> + device_lock(dev);
> ret = driver_match_device(drv, dev);
> + device_unlock(dev);
> if (ret == 0) {
> /* no match */
> return 0;
>

After more testing: the changes above result in qemu sx1 boot failures.
There is no crash, boot just hangs.

qemu command line:

qemu-system-arm -M sx1 -kernel arch/arm/boot/zImage -no-reboot \
-initrd rootfs-armv4.cpio \
--append "rdinit=/sbin/init console=ttyS0,115200 earlycon=uart8250,mmio32,0xfffb0000,115200n8" \
-nographic -monitor null -serial stdio

with configuration from
https://github.com/groeck/linux-build-test/blob/master/rootfs/arm/qemu_sx1_defconfig
and root file system from
https://github.com/groeck/linux-build-test/blob/master/rootfs/arm/rootfs-armv4.cpio.gz

This is with your other patch applied.

Guenter

2022-08-15 20:01:22

by Isaac J. Manjarres

[permalink] [raw]
Subject: Re: [PATCH v6] amba: Remove deferred device addition

On Fri, Aug 12, 2022 at 08:30:46AM -0700, Guenter Roeck wrote:
> After more testing: the changes above result in qemu sx1 boot failures.
> There is no crash, boot just hangs.
>
> qemu command line:
>
> qemu-system-arm -M sx1 -kernel arch/arm/boot/zImage -no-reboot \
> -initrd rootfs-armv4.cpio \
> --append "rdinit=/sbin/init console=ttyS0,115200 earlycon=uart8250,mmio32,0xfffb0000,115200n8" \
> -nographic -monitor null -serial stdio
>
> with configuration from
> https://github.com/groeck/linux-build-test/blob/master/rootfs/arm/qemu_sx1_defconfig
> and root file system from
> https://github.com/groeck/linux-build-test/blob/master/rootfs/arm/rootfs-armv4.cpio.gz
>
> This is with your other patch applied.
>
> Guenter

Thanks for testing out the patch and sharing the qemu commandline you
used for the new issue. I was able to reproduce it on my end :) Can you
please try the following patch instead of the second patch I gave you?
This worked for me on sx1 and versatileab:

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 32b0e0b930c1..110a535648d2 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -209,6 +209,7 @@ static int amba_match(struct device *dev, struct device_driver *drv)
struct amba_device *pcdev = to_amba_device(dev);
struct amba_driver *pcdrv = to_amba_driver(drv);

+ mutex_lock(&pcdev->periphid_lock);
if (!pcdev->periphid) {
int ret = amba_read_periphid(pcdev);

@@ -218,11 +219,14 @@ static int amba_match(struct device *dev, struct device_driver *drv)
* permanent failure in reading pid and cid, simply map it to
* -EPROBE_DEFER.
*/
- if (ret)
+ if (ret) {
+ mutex_unlock(&pcdev->periphid_lock);
return -EPROBE_DEFER;
+ }
dev_set_uevent_suppress(dev, false);
kobject_uevent(&dev->kobj, KOBJ_ADD);
}
+ mutex_unlock(&pcdev->periphid_lock);

/* When driver_override is set, only bind to the matching driver */
if (pcdev->driver_override)
@@ -532,6 +536,7 @@ static void amba_device_release(struct device *dev)

if (d->res.parent)
release_resource(&d->res);
+ mutex_destroy(&d->periphid_lock);
kfree(d);
}

@@ -584,6 +589,7 @@ static void amba_device_initialize(struct amba_device *dev, const char *name)
dev->dev.dma_mask = &dev->dev.coherent_dma_mask;
dev->dev.dma_parms = &dev->dma_parms;
dev->res.name = dev_name(&dev->dev);
+ mutex_init(&dev->periphid_lock);
}

/**
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index e94cdf235f1d..5001e14c5c06 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -67,6 +67,7 @@ struct amba_device {
struct clk *pclk;
struct device_dma_parameters dma_parms;
unsigned int periphid;
+ struct mutex periphid_lock;
unsigned int cid;
struct amba_cs_uci_id uci;
unsigned int irq[AMBA_NR_IRQS];


Thanks,
Isaac