2007-02-05 23:21:24

by Robert Crocombe

[permalink] [raw]
Subject: ohci1394 broke 2.6.19 -> 2.6.20-rc1

Prior to testing a patch for bugzilla bug 7569 (hosts lost on bus
reset), I wanted to reproduce the behavior. I can under the noted
2.6.16-blah kernels, but moving to anything more recent than 2.6.19
means ohci1394 is non-functional (no 1394 hosts are detected) and the
module cannot be removed.

I have narrowed it down to 2.6.19 works, 2.6.20-rc1 doesn't. Lots of detail at:

http://bugzilla.kernel.org/show_bug.cgi?id=7942

--
Robert Crocombe


2007-02-06 00:03:30

by Stefan Richter

[permalink] [raw]
Subject: Re: ohci1394 broke 2.6.19 -> 2.6.20-rc1

Robert Crocombe wrote:
> Prior to testing a patch for bugzilla bug 7569 (hosts lost on bus
> reset),

(Well, I think a lot more has to be done for that bug than in the patch
I posted so far.)

> I wanted to reproduce the behavior. I can under the noted
> 2.6.16-blah kernels, but moving to anything more recent than 2.6.19
> means ohci1394 is non-functional (no 1394 hosts are detected) and the
> module cannot be removed.
>
> I have narrowed it down to 2.6.19 works, 2.6.20-rc1 doesn't. Lots of detail at:
>
> http://bugzilla.kernel.org/show_bug.cgi?id=7942

I get a spinlock lockup on 2.6.20-rc6 + some 1394 updates with a similar
trace to what Robert posted at bugzilla --- *if* I use ieee1394's option
disable_nodemgr=1. (Never used it before.) The key appears to be how
hpsb_alloc_host interacts with the driver core. I don't know if hosts.c
is at fault or some change in the driver core.

There is one notable change in hpsb_alloc_host after 2.6.19: I moved the
device registering out of a mutex. But I don't think that's relevant.
The lockup which I got was with only one FireWire controller.

$ git diff v2.6.19..v2.6.20-rc1 drivers/ieee1394/hosts.c
diff --git a/drivers/ieee1394/hosts.c b/drivers/ieee1394/hosts.c
index d90a3a1..ee82a53 100644
--- a/drivers/ieee1394/hosts.c
+++ b/drivers/ieee1394/hosts.c
@@ -31,9 +31,10 @@
#include "config_roms.h"


-static void delayed_reset_bus(void * __reset_info)
+static void delayed_reset_bus(struct work_struct *work)
{
- struct hpsb_host *host = (struct hpsb_host*)__reset_info;
+ struct hpsb_host *host =
+ container_of(work, struct hpsb_host, delayed_reset.work);
int generation = host->csr.generation + 1;

/* The generation field rolls over to 2 rather than 0 per IEEE
@@ -43,9 +44,10 @@ static void delayed_reset_bus(void * __reset_info)

CSR_SET_BUS_INFO_GENERATION(host->csr.rom, generation);
if (csr1212_generate_csr_image(host->csr.rom) != CSR1212_SUCCESS) {
- /* CSR image creation failed, reset generation field and do not
- * issue a bus reset. */
- CSR_SET_BUS_INFO_GENERATION(host->csr.rom, host->csr.generation);
+ /* CSR image creation failed.
+ * Reset generation field and do not issue a bus reset. */
+ CSR_SET_BUS_INFO_GENERATION(host->csr.rom,
+ host->csr.generation);
return;
}

@@ -53,7 +55,8 @@ static void delayed_reset_bus(void * __reset_info)

host->update_config_rom = 0;
if (host->driver->set_hw_config_rom)
- host->driver->set_hw_config_rom(host, host->csr.rom->bus_info_data);
+ host->driver->set_hw_config_rom(host,
+ host->csr.rom->bus_info_data);

host->csr.gen_timestamp[host->csr.generation] = jiffies;
hpsb_reset_bus(host, SHORT_RESET);
@@ -69,7 +72,8 @@ static int dummy_devctl(struct hpsb_host *h, enum devctl_cmd c, int arg)
return -1;
}

-static int dummy_isoctl(struct hpsb_iso *iso, enum isoctl_cmd command, unsigned long arg)
+static int dummy_isoctl(struct hpsb_iso *iso, enum isoctl_cmd command,
+ unsigned long arg)
{
return -1;
}
@@ -122,15 +126,13 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,
int i;
int hostnum = 0;

- h = kzalloc(sizeof(*h) + extra, SLAB_KERNEL);
+ h = kzalloc(sizeof(*h) + extra, GFP_KERNEL);
if (!h)
return NULL;

h->csr.rom = csr1212_create_csr(&csr_bus_ops, CSR_BUS_INFO_SIZE, h);
- if (!h->csr.rom) {
- kfree(h);
- return NULL;
- }
+ if (!h->csr.rom)
+ goto fail;

h->hostdata = h + 1;
h->driver = drv;
@@ -145,21 +147,20 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,

atomic_set(&h->generation, 0);

- INIT_WORK(&h->delayed_reset, delayed_reset_bus, h);
+ INIT_DELAYED_WORK(&h->delayed_reset, delayed_reset_bus);

init_timer(&h->timeout);
h->timeout.data = (unsigned long) h;
h->timeout.function = abort_timedouts;
- h->timeout_interval = HZ / 20; // 50ms by default
+ h->timeout_interval = HZ / 20; /* 50ms, half of minimum SPLIT_TIMEOUT */

h->topology_map = h->csr.topology_map + 3;
h->speed_map = (u8 *)(h->csr.speed_map + 2);

mutex_lock(&host_num_alloc);
-
while (nodemgr_for_each_host(&hostnum, alloc_hostnum_cb))
hostnum++;
-
+ mutex_unlock(&host_num_alloc);
h->id = hostnum;

memcpy(&h->device, &nodemgr_dev_template_host, sizeof(h->device));
@@ -170,13 +171,19 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,
h->class_dev.class = &hpsb_host_class;
snprintf(h->class_dev.class_id, BUS_ID_SIZE, "fw-host%d", h->id);

- device_register(&h->device);
- class_device_register(&h->class_dev);
+ if (device_register(&h->device))
+ goto fail;
+ if (class_device_register(&h->class_dev)) {
+ device_unregister(&h->device);
+ goto fail;
+ }
get_device(&h->device);

- mutex_unlock(&host_num_alloc);
-
return h;
+
+fail:
+ kfree(h);
+ return NULL;
}

int hpsb_add_host(struct hpsb_host *host)
@@ -228,13 +235,14 @@ int hpsb_update_config_rom_image(struct hpsb_host *host)
if (time_before(jiffies, host->csr.gen_timestamp[next_gen] + 60 * HZ))
/* Wait 60 seconds from the last time this generation number was
* used. */
- reset_delay = (60 * HZ) + host->csr.gen_timestamp[next_gen] - jiffies;
+ reset_delay =
+ (60 * HZ) + host->csr.gen_timestamp[next_gen] - jiffies;
else
/* Wait 1 second in case some other code wants to change the
* Config ROM in the near future. */
reset_delay = HZ;

- PREPARE_WORK(&host->delayed_reset, delayed_reset_bus, host);
+ PREPARE_DELAYED_WORK(&host->delayed_reset, delayed_reset_bus);
schedule_delayed_work(&host->delayed_reset, reset_delay);

return 0;



--
Stefan Richter
-=====-=-=== --=- --==-
http://arcgraph.de/sr/

2007-02-06 00:40:55

by Stefan Richter

[permalink] [raw]
Subject: Re: ohci1394 broke 2.6.19 -> 2.6.20-rc1

On 6 Feb, Stefan Richter wrote:
> Robert Crocombe wrote:
>> moving to anything more recent than 2.6.19
>> means ohci1394 is non-functional (no 1394 hosts are detected) and the
>> module cannot be removed.

Not using the option disable_nodemgr=1 should avoid the bug.

>> I have narrowed it down to 2.6.19 works, 2.6.20-rc1 doesn't. Lots of detail at:
>>
>> http://bugzilla.kernel.org/show_bug.cgi?id=7942
>
> I get a spinlock lockup on 2.6.20-rc6 + some 1394 updates with a similar
> trace to what Robert posted at bugzilla --- *if* I use ieee1394's option
> disable_nodemgr=1. (Never used it before.) The key appears to be how
> hpsb_alloc_host interacts with the driver core. I don't know if hosts.c
> is at fault or some change in the driver core.

It's my oversight, see patch.


From: Stefan Richter <[email protected]>
Subject: ieee1394: fix host device registering when nodemgr disabled

Since my commit 8252bbb1363b7fe963a3eb6f8a36da619a6f5a65 in 2.6.20-rc1,
host devices have a dummy driver attached. Alas the driver was not
registered before use if ieee1394 was loaded with disable_nodemgr=1.

Signed-off-by: Stefan Richter <[email protected]>
---
drivers/ieee1394/ieee1394_core.c | 1 +
drivers/ieee1394/nodemgr.c | 3 +--
drivers/ieee1394/nodemgr.h | 4 +---
3 files changed, 3 insertions(+), 5 deletions(-)

Index: linux-2.6.20-rc6/drivers/ieee1394/ieee1394_core.c
===================================================================
--- linux-2.6.20-rc6.orig/drivers/ieee1394/ieee1394_core.c 2007-02-03 18:04:50.000000000 +0100
+++ linux-2.6.20-rc6/drivers/ieee1394/ieee1394_core.c 2007-02-06 01:28:04.000000000 +0100
@@ -1132,6 +1132,7 @@ static int __init ieee1394_init(void)
}
}

+ ret = driver_register(&nodemgr_mid_layer_driver);
ret = class_register(&hpsb_host_class);
if (ret < 0)
goto release_all_bus;
Index: linux-2.6.20-rc6/drivers/ieee1394/nodemgr.c
===================================================================
--- linux-2.6.20-rc6.orig/drivers/ieee1394/nodemgr.c 2007-01-27 14:07:00.000000000 +0100
+++ linux-2.6.20-rc6/drivers/ieee1394/nodemgr.c 2007-02-06 01:29:21.000000000 +0100
@@ -249,7 +249,7 @@ static struct device nodemgr_dev_templat
* useful drivers for them yet, and there would be a deadlock possible if the
* driver core scans the host device while the host's low-level driver (i.e.
* the host's parent device) is being removed. */
-static struct device_driver nodemgr_mid_layer_driver = {
+struct device_driver nodemgr_mid_layer_driver = {
.bus = &ieee1394_bus_type,
.name = "nodemgr",
.owner = THIS_MODULE,
@@ -1857,7 +1857,6 @@ int init_ieee1394_nodemgr(void)
class_unregister(&nodemgr_ne_class);
return error;
}
- error = driver_register(&nodemgr_mid_layer_driver);
hpsb_register_highlevel(&nodemgr_highlevel);
return 0;
}
Index: linux-2.6.20-rc6/drivers/ieee1394/nodemgr.h
===================================================================
--- linux-2.6.20-rc6.orig/drivers/ieee1394/nodemgr.h 2007-01-27 14:07:00.000000000 +0100
+++ linux-2.6.20-rc6/drivers/ieee1394/nodemgr.h 2007-02-06 01:30:27.000000000 +0100
@@ -181,10 +181,8 @@ int nodemgr_for_each_host(void *__data,
int init_ieee1394_nodemgr(void);
void cleanup_ieee1394_nodemgr(void);

-/* The template for a host device */
+extern struct device_driver nodemgr_mid_layer_driver;
extern struct device nodemgr_dev_template_host;
-
-/* Bus attributes we export */
extern struct bus_attribute *const fw_bus_attrs[];

#endif /* _IEEE1394_NODEMGR_H */



--
Stefan Richter
-=====-=-=== --=- --==-
http://arcgraph.de/sr/


2007-02-06 00:48:45

by Stefan Richter

[permalink] [raw]
Subject: Re: ohci1394 broke 2.6.19 -> 2.6.20-rc1

I wrote:
> @@ -1132,6 +1132,7 @@ static int __init ieee1394_init(void)
> }
> }
>
> + ret = driver_register(&nodemgr_mid_layer_driver);
> ret = class_register(&hpsb_host_class);
> if (ret < 0)
> goto release_all_bus;

I think I make that a
WARN_ON(driver_register(&nodemgr_mid_layer_driver));
--
Stefan Richter
-=====-=-=== --=- --==-
http://arcgraph.de/sr/


2007-02-06 01:04:11

by Robert Crocombe

[permalink] [raw]
Subject: Re: ohci1394 broke 2.6.19 -> 2.6.20-rc1

On 2/5/07, Stefan Richter <[email protected]> wrote:
> It's my oversight, see patch.

Yes, this fixes things. Thanks!

--
Robert Crocombe

2007-02-06 01:20:27

by Stefan Richter

[permalink] [raw]
Subject: [PATCH update] ieee1394: fix host device registering when nodemgr disabled

Since my commit 8252bbb1363b7fe963a3eb6f8a36da619a6f5a65 in 2.6.20-rc1,
host devices have a dummy driver attached. Alas the driver was not
registered before use if ieee1394 was loaded with disable_nodemgr=1.

Signed-off-by: Stefan Richter <[email protected]>
---

Update: - work without extern variables,
- better error checks,
- add missing driver_unregister.

drivers/ieee1394/nodemgr.c | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)

Index: linux-2.6.20-rc5/drivers/ieee1394/nodemgr.c
===================================================================
--- linux-2.6.20-rc5.orig/drivers/ieee1394/nodemgr.c
+++ linux-2.6.20-rc5/drivers/ieee1394/nodemgr.c
@@ -258,7 +258,6 @@ static struct device_driver nodemgr_mid_
struct device nodemgr_dev_template_host = {
.bus = &ieee1394_bus_type,
.release = nodemgr_release_host,
- .driver = &nodemgr_mid_layer_driver,
};


@@ -1850,22 +1849,33 @@ int init_ieee1394_nodemgr(void)

error = class_register(&nodemgr_ne_class);
if (error)
- return error;
-
+ goto fail_ne;
error = class_register(&nodemgr_ud_class);
- if (error) {
- class_unregister(&nodemgr_ne_class);
- return error;
- }
+ if (error)
+ goto fail_ud;
+
+ /* don't show this driver if nodemgr is off (disable_nodmgr=1) */
+ nodemgr_dev_template_host.driver = &nodemgr_mid_layer_driver;
+
error = driver_register(&nodemgr_mid_layer_driver);
+ if (error)
+ goto fail_ml;
+
hpsb_register_highlevel(&nodemgr_highlevel);
return 0;
+
+fail_ml:
+ class_unregister(&nodemgr_ud_class);
+fail_ud:
+ class_unregister(&nodemgr_ne_class);
+fail_ne:
+ return error;
}

void cleanup_ieee1394_nodemgr(void)
{
hpsb_unregister_highlevel(&nodemgr_highlevel);
-
+ driver_unregister(&nodemgr_mid_layer_driver);
class_unregister(&nodemgr_ud_class);
class_unregister(&nodemgr_ne_class);
}


--
Stefan Richter
-=====-=-=== --=- --==-
http://arcgraph.de/sr/