On Fri, Apr 15, 2011 at 8:33 AM, Ciprian Docan <[email protected]> wrote:
>
> I enabled CONFIG_PRINTK_TIME in the config file and rebuild the kernel; and
> after a fresh reboot I saved the dmesg output before and after
> suspend/resume. Attached is the diff file containing the resume sequence. At
> linew 133 - 134 there is a longer gap, which I think is related to the r8169
> driver.
>
> I have unloaded the module and did a new suspend/resume sequence, but this
> time with the r8169 module removed it went much faster (3-4s to resume
> completely). Can you please help here ?
Ok, this seems to be yet another case of the ridiculously common error
of "let's load the firmware early, before the machine is even up". It
sometimes happens at boot-time (driver writers only test the module
case, not the built-in case), but it happens very often at
suspend/resume time (which driver writers seem to not test at all).
It is not valid to try to load the firmware at resume time. User space
isn't running, so all the firmware loading helpers are frozen.
Firmware that needs to be reloaded after a suspend needs to be saved
to memory!
I added netdev to the recipient list, because this is definitely not
just a r8169 issue - we've had this bug happen over and over again.
Looking at the code, it looks like r8169 _tries_ to save the firmware
(it has had this SAME bug before), but in case no firmware was ever
loaded successfully at all (either because there was no firmware to
begin with, or because the device had never been opened, so it had
never tried to load it before), it _still_ tries to load the firmware.
WTF? At resume time, you should _resume_, not "load firmware even if
it wasn't loaded before".
I also wonder if the generic power management layer code could do
something smarter about this. Print a big warning when somebody wants
to load firmware when the machine isn't fully operational, instead of
the "wait 60 seconds in the totally futile hope that things will
change". Adding greg to the cc for that case, he's marked as firmware
loader maintainer.
So Francois, can we please not load the firmware at resume time when
it wasn't loaded when suspended!
Greg/Rafael/whoever - could we please replace the "sleep for a minute
if you can't load the firmware" with a big immediate WARN_ON() if
somebody tries to load it at early boot time or at resume time? That
way we don't have the machine dead for sixty seconds (most people just
assume it's dead and will just reboot - I've done that myself when
I've been hit by this), and we'd get a nice "here's the offender"
printout.
And netdev/lkml just cc'd for information, just in case some driver
writer is lurking and suddenly realizes that their driver is broken
too.
Linus
Linus Torvalds <[email protected]> :
[...]
> So Francois, can we please not load the firmware at resume time when
> it wasn't loaded when suspended!
One can try the patch below. It is completely untested yet.
Subject: [PATCH] r8169: don't request firmware when there's no userspace.
The firmware is cached during open() and released during close().
The driver uses the cached firmware between open() and close().
Don't bother with rtl8169_pcierr_interrupt. It is special anyway.
Signed-off-by: Francois Romieu <[email protected]>
---
drivers/net/r8169.c | 76 +++++++++++++++++++++++++++++++++++----------------
1 files changed, 52 insertions(+), 24 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 493b0de..ccc25cd 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -170,6 +170,16 @@ static const struct {
};
#undef _R
+static const struct rtl_firmware_info {
+ int mac_version;
+ const char *fw_name;
+} rtl_firmware_infos[] = {
+ { .mac_version = RTL_GIGA_MAC_VER_25, .fw_name = FIRMWARE_8168D_1 },
+ { .mac_version = RTL_GIGA_MAC_VER_26, .fw_name = FIRMWARE_8168D_2 },
+ { .mac_version = RTL_GIGA_MAC_VER_29, .fw_name = FIRMWARE_8105E_1 },
+ { .mac_version = RTL_GIGA_MAC_VER_30, .fw_name = FIRMWARE_8105E_1 }
+};
+
enum cfg_version {
RTL_CFG_0 = 0x00,
RTL_CFG_1,
@@ -1793,21 +1803,21 @@ static void rtl_release_firmware(struct rtl8169_private *tp)
tp->fw = NULL;
}
-static int rtl_apply_firmware(struct rtl8169_private *tp, const char *fw_name)
+static void rtl_apply_firmware(struct rtl8169_private *tp)
{
- const struct firmware **fw = &tp->fw;
- int rc = !*fw;
-
- if (rc) {
- rc = request_firmware(fw, fw_name, &tp->pci_dev->dev);
- if (rc < 0)
- goto out;
- }
+ const struct firmware *fw = tp->fw;
/* TODO: release firmware once rtl_phy_write_fw signals failures. */
- rtl_phy_write_fw(tp, *fw);
-out:
- return rc;
+ if (fw)
+ rtl_phy_write_fw(tp, fw);
+}
+
+static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
+{
+ if (rtl_readphy(tp, reg) != val)
+ netif_warn(tp, hw, tp->dev, "chipset not ready for firmware\n");
+ else
+ rtl_apply_firmware(tp);
}
static void rtl8169s_hw_phy_config(struct rtl8169_private *tp)
@@ -2246,10 +2256,8 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
rtl_writephy(tp, 0x1f, 0x0005);
rtl_writephy(tp, 0x05, 0x001b);
- if ((rtl_readphy(tp, 0x06) != 0xbf00) ||
- (rtl_apply_firmware(tp, FIRMWARE_8168D_1) < 0)) {
- netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
- }
+
+ rtl_apply_firmware_cond(tp, MII_EXPANSION, 0xbf00);
rtl_writephy(tp, 0x1f, 0x0000);
}
@@ -2351,10 +2359,8 @@ static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp)
rtl_writephy(tp, 0x1f, 0x0005);
rtl_writephy(tp, 0x05, 0x001b);
- if ((rtl_readphy(tp, 0x06) != 0xb300) ||
- (rtl_apply_firmware(tp, FIRMWARE_8168D_2) < 0)) {
- netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
- }
+
+ rtl_apply_firmware_cond(tp, MII_EXPANSION, 0xb300);
rtl_writephy(tp, 0x1f, 0x0000);
}
@@ -2474,8 +2480,7 @@ static void rtl8105e_hw_phy_config(struct rtl8169_private *tp)
rtl_writephy(tp, 0x18, 0x0310);
msleep(100);
- if (rtl_apply_firmware(tp, FIRMWARE_8105E_1) < 0)
- netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
+ rtl_apply_firmware(tp);
rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
}
@@ -3288,8 +3293,6 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
cancel_delayed_work_sync(&tp->task);
- rtl_release_firmware(tp);
-
unregister_netdev(dev);
if (pci_dev_run_wake(pdev))
@@ -3303,6 +3306,27 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
}
+static void rtl_request_firmware(struct rtl8169_private *tp)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(rtl_firmware_infos); i++) {
+ const struct rtl_firmware_info *info = rtl_firmware_infos + i;
+
+ if (info->mac_version == tp->mac_version) {
+ const char *name = info->fw_name;
+ int rc;
+
+ rc = request_firmware(&tp->fw, name, &tp->pci_dev->dev);
+ if (rc < 0) {
+ netif_warn(tp, ifup, tp->dev, "unable to load "
+ "firmware patch %s (%d)\n", name, rc);
+ }
+ break;
+ }
+ }
+}
+
static int rtl8169_open(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
@@ -3334,6 +3358,8 @@ static int rtl8169_open(struct net_device *dev)
smp_mb();
+ rtl_request_firmware(tp);
+
retval = request_irq(dev->irq, rtl8169_interrupt,
(tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
dev->name, dev);
@@ -4891,6 +4917,8 @@ static int rtl8169_close(struct net_device *dev)
free_irq(dev->irq, dev);
+ rtl_release_firmware(tp);
+
dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
tp->RxPhyAddr);
dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray,
--
1.7.4
On Sun, Apr 17, 2011 at 3:17 AM, Francois Romieu <[email protected]> wrote:
>
> One can try the patch below. It is completely untested yet.
Looks _fairly_ sane to me. The "request firmware in open, release
firmware in close" approach would seem to be a fairly obvious one.
HOWEVER.
I think it's broken in two ways:
- it causes too much re-loading for no good reason. I'm looking at
rtl8169_reinit_task() in particular, and if I read that correctly,
then if there are any problems, that crazy function will end up
loading and unloading the firmware EVERY FOUR TIMER TICKS!
That's just totally broken.
But I'd also worry about some init scripts in particular, maybe
that whole "open to test something, close immediately" is common. I
don't know.
- it seems to leak the open function fails after requesting the
firmware - nothing will ever close it, and if you unload the module
the firmware will never be released as far as I can tell.
I might be missing some failure path (maybe the network device open
will call close even if the open failed), but it looks real.
So I think your patch _approaches_ being sane, but no, it's not working as-is.
I really think that this kind of "ad-hoc random firmware loading"
stuff should go away. The device layer (or maybe the network layer)
should have some clear and unambiguous rules. Exactly so that drivers
don't make these kinds of mistakes.
My gut feel for what the rules should be is roughly (but please take
this as a starting point for discussion rather than some final thing):
- try to load the firmware at each time the user tries to activate
the device. IOW, not like this, where the rtl8169_open() function is
called from many different contexts, only one of them being the
"network layer tried to open the device".
I do think we need to do it potentially multiple times: the main
issue being something like this:
ifconfig eth0 up
** oops, that failed because we didn't have the firmware files
installed **
... install firmware files, the 'dmesg' gave good error messages
that the user could use to know what was going on ..
ifconfig eth0 up
** this needs to trigger another firmware load attempt **
In other words, doing firmware load at module load time - or at
device scan time - is fundamentally broken for a network driver.
- unload not on close, but on device unregister (iow not when you do
"ifconfig eth0 down", but when the "eth0" device really goes away)
But as mentioned, the above is (a) just my gut feel - please discuss -
and (b) I really think that leaving this to the network driver has
been shown to continually result in the drivers doing the firmware
load/unload in all the wrong places.
So I do wonder whether we could add a "ndo_firmware_load()" and
"ndo_firmware_unload()" callback to the network device operations, and
have the network layer make the above rules. A network driver
obviously _could_ do its firmware load from other places instead, but
such a network driver would basically be "obviously broken".
Comments?
(That said, I think that Francois' patch could be made acceptable
fairly trivially:
- avoiding the load/unload from rtl8169_reinit_task() and that
horrible "every four timer ticks" issue. That just seems crazy. Maybe
by just having an internal open helper function that does everything
but the firmware load)
- adding a rtl_release_firmware on open failure so that you don't leak stuff
but I do think that this whole "firmware load in random places" has
been such a common bug that I think we should have some real rules
about it)
Hmm?
Linus
Linus Torvalds <[email protected]> :
[lots of silly bugs]
Ok. it should be fixed for wednesday.
[...]
> - unload not on close, but on device unregister (iow not when you do
> "ifconfig eth0 down", but when the "eth0" device really goes away)
Without further action, the firmware(s) will thus be locked in until the
driver is removed.
> But as mentioned, the above is (a) just my gut feel - please discuss -
> and (b) I really think that leaving this to the network driver has
> been shown to continually result in the drivers doing the firmware
> load/unload in all the wrong places.
As long as it can be fixed... If the 60s delay is removed and the firmware
loading emits some messages for programmer barbie, I am more than happy.
If someone can tell me where Realtek's firmware should be sent to as David
W. seems to be busy, it will be perfect.
--
"Lack of vision" Ueimor
On Mon, Apr 18, 2011 at 11:08 AM, Francois Romieu <[email protected]> wrote:
> [...]
>> ?- unload not on close, but on device unregister (iow not when you do
>> "ifconfig eth0 down", but when the "eth0" device really goes away)
>
> Without further action, the firmware(s) will thus be locked in until the
> driver is removed.
I do agree. It's a downside. Maybe doing it in "close()" is the right
thing, as long as we don't have that crazy "every four timer ticks"
situation with rtl8169_reinit_task.
As mentioned, the only real reason for me to be worried about the
close thing is that I don't have a good feel for what happens at boot
time. Are the setup scripts going to look at the interface lots of
times? On my desktop, I couldn't care less, but I try to keep boot
time in mind.
Maybe in practice there's just a single open at boot-time (for dhcp or
whatever), and I'm just worried for no good reason.
Without having looked at that whole rtl8169_reinit_task thing, I
probably wouldn't even worry about anything else doing something
similar ;)
> As long as it can be fixed... If the 60s delay is removed and the firmware
> loading emits some messages for programmer barbie, I am more than happy.
So the firmware loading timeout used to be ten seconds (which I
already think is excessive), but then commit
2f65168de7d68a5795e945e781d85b313bdc97b9 increased it to 60s because
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=174589
The ipw driver sometimes takes a long time to load its firmware.
Whilst the ipw driver should be using the async interface of
the firmware loader to make this a non-issue, this is a minimal fix.
although when I actually look at that bugzilla entry, the _timestamps_
for the failed case do not seem to support this being a timeout.
Very odd.
But the real problem is that we do that timeout even in cases where it
cannot help, ie when people load firmware during early boot or during
suspend. So I think drivers/base/firmware_class.c should be made a bit
smarter.
We have a few cases where call_usermodehelper() fails immediately:
- khelper_wq hasn't been set up yet
- usermodehelper_disabled is set.
and in particular, during suspend/resume, that
"usermodehelper_disabled" flag will be set.
I don't think it is sensible to do a user request for firmware during
that time either, and that 60-second timeout is just silly. It's not
going to help.
Why doesn't the firmware loader class check the error return from the
kobject_uevent()? I'd expect that if that fails, we should just warn
and abort, rather than wait 60 seconds to time out. Greg?
TOTALLY UNTESTED PATCH ATTACHED!
Ciprian - does this get rid of the 60-second wait? Do you get a nice
kernel traceback in your dmesg instead?
> If someone can tell me where Realtek's firmware should be sent to as David
> W. seems to be busy, it will be perfect.
Hmm. Dunno about that.
Linus
On Mon, 2011-04-18 at 11:49 -0700, Linus Torvalds wrote:
> On Mon, Apr 18, 2011 at 11:08 AM, Francois Romieu <[email protected]> wrote:
> > [...]
> >> - unload not on close, but on device unregister (iow not when you do
> >> "ifconfig eth0 down", but when the "eth0" device really goes away)
> >
> > Without further action, the firmware(s) will thus be locked in until the
> > driver is removed.
>
> I do agree. It's a downside. Maybe doing it in "close()" is the right
> thing, as long as we don't have that crazy "every four timer ticks"
> situation with rtl8169_reinit_task.
>
> As mentioned, the only real reason for me to be worried about the
> close thing is that I don't have a good feel for what happens at boot
> time. Are the setup scripts going to look at the interface lots of
> times? On my desktop, I couldn't care less, but I try to keep boot
> time in mind.
>
> Maybe in practice there's just a single open at boot-time (for dhcp or
> whatever), and I'm just worried for no good reason.
[...]
Well, net devices are weird - they don't have file descriptors, they
just have names and indices which you can specify in an ioctl on any
socket (or netlink message on an appropriate netlink socket). Opening
means starting the device, and I can't think of a configuration tool
that implicitly opens a net device. Normally they get opened by ifup or
network-manager or the local equivalent, and then they stay open until
an explicit action by the administrator.
Ben.
--
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
On Mon, Apr 18, 2011 at 12:27 PM, Ciprian Docan <[email protected]> wrote:
>
> Linus - I applied your patch and attached the diff between output of dmesg
> before and after the patch as resume_timing.diff. This patch alone does not
> seem to fix the issue;
Ok, that just means that the error return from call_usermodehelper()
gets lost somewhere along the way to kobject_uevent(). Or that I
misread the whole thing.
> however, applying Francois's patch in addition to
> yours make the resume smooth.
Well, Francois' patch on its own should already have fixed it for you,
and made the whole issue moot.
Francois' patch is fine and improves on things, I only had a few
arguments with it. The problems I think it had were really about some
rather special cases.
Linus
> TOTALLY UNTESTED PATCH ATTACHED!
>
> Ciprian - does this get rid of the 60-second wait? Do you get a nice
> kernel traceback in your dmesg instead?
Linus - I applied your patch and attached the diff between output of dmesg
before and after the patch as resume_timing.diff. This patch alone does
not seem to fix the issue; however, applying Francois's patch in addition
to yours make the resume smooth. I have also attached the diff output of
dmesg before and after, with both patched applied as resume2_timing.diff.
Both cases were tested after a fresh reboot. Please let me know if you
need any other information.
Thank you,
--
Ciprian
Linus Torvalds <[email protected]> :
[...]
> Francois' patch is fine and improves on things, I only had a few
> arguments with it. The problems I think it had were really about some
> rather special cases.
They may be addressed by the patch below.
I'll test it as soon as my 2.6.39-rc stops softlocking and I have a
working suspend / resume. No joke.
Subject: [PATCH] r8169: don't request firmware when there's no userspace.
The firmware is cached during the first successfull call to open() and
released once the network device is unregistered. The driver uses the
cached firmware between open() and unregister_netdev().
So far the firmware is optional : a failure to load the firmware does
not prevent open() to success. It is thus necessary to 1) unregister
all 816x / 810[23] devices and 2) force a driver probe to issue a new
firmware load.
Signed-off-by: Francois Romieu <[email protected]>
---
drivers/net/r8169.c | 92 ++++++++++++++++++++++++++++++++++++---------------
1 files changed, 65 insertions(+), 27 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 493b0de..fceb954 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -170,6 +170,16 @@ static const struct {
};
#undef _R
+static const struct rtl_firmware_info {
+ int mac_version;
+ const char *fw_name;
+} rtl_firmware_infos[] = {
+ { .mac_version = RTL_GIGA_MAC_VER_25, .fw_name = FIRMWARE_8168D_1 },
+ { .mac_version = RTL_GIGA_MAC_VER_26, .fw_name = FIRMWARE_8168D_2 },
+ { .mac_version = RTL_GIGA_MAC_VER_29, .fw_name = FIRMWARE_8105E_1 },
+ { .mac_version = RTL_GIGA_MAC_VER_30, .fw_name = FIRMWARE_8105E_1 }
+};
+
enum cfg_version {
RTL_CFG_0 = 0x00,
RTL_CFG_1,
@@ -565,6 +575,7 @@ struct rtl8169_private {
u32 saved_wolopts;
const struct firmware *fw;
+#define RTL_FIRMWARE_UNKNOWN ERR_PTR(-EAGAIN);
};
MODULE_AUTHOR("Realtek and the Linux r8169 crew <[email protected]>");
@@ -1790,24 +1801,24 @@ rtl_phy_write_fw(struct rtl8169_private *tp, const struct firmware *fw)
static void rtl_release_firmware(struct rtl8169_private *tp)
{
release_firmware(tp->fw);
- tp->fw = NULL;
+ tp->fw = RTL_FIRMWARE_UNKNOWN;
}
-static int rtl_apply_firmware(struct rtl8169_private *tp, const char *fw_name)
+static void rtl_apply_firmware(struct rtl8169_private *tp)
{
- const struct firmware **fw = &tp->fw;
- int rc = !*fw;
-
- if (rc) {
- rc = request_firmware(fw, fw_name, &tp->pci_dev->dev);
- if (rc < 0)
- goto out;
- }
+ const struct firmware *fw = tp->fw;
/* TODO: release firmware once rtl_phy_write_fw signals failures. */
- rtl_phy_write_fw(tp, *fw);
-out:
- return rc;
+ if (!IS_ERR_OR_NULL(fw))
+ rtl_phy_write_fw(tp, fw);
+}
+
+static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
+{
+ if (rtl_readphy(tp, reg) != val)
+ netif_warn(tp, hw, tp->dev, "chipset not ready for firmware\n");
+ else
+ rtl_apply_firmware(tp);
}
static void rtl8169s_hw_phy_config(struct rtl8169_private *tp)
@@ -2246,10 +2257,8 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
rtl_writephy(tp, 0x1f, 0x0005);
rtl_writephy(tp, 0x05, 0x001b);
- if ((rtl_readphy(tp, 0x06) != 0xbf00) ||
- (rtl_apply_firmware(tp, FIRMWARE_8168D_1) < 0)) {
- netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
- }
+
+ rtl_apply_firmware_cond(tp, MII_EXPANSION, 0xbf00);
rtl_writephy(tp, 0x1f, 0x0000);
}
@@ -2351,10 +2360,8 @@ static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp)
rtl_writephy(tp, 0x1f, 0x0005);
rtl_writephy(tp, 0x05, 0x001b);
- if ((rtl_readphy(tp, 0x06) != 0xb300) ||
- (rtl_apply_firmware(tp, FIRMWARE_8168D_2) < 0)) {
- netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
- }
+
+ rtl_apply_firmware_cond(tp, MII_EXPANSION, 0xb300);
rtl_writephy(tp, 0x1f, 0x0000);
}
@@ -2474,8 +2481,7 @@ static void rtl8105e_hw_phy_config(struct rtl8169_private *tp)
rtl_writephy(tp, 0x18, 0x0310);
msleep(100);
- if (rtl_apply_firmware(tp, FIRMWARE_8105E_1) < 0)
- netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
+ rtl_apply_firmware(tp);
rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
}
@@ -3237,6 +3243,8 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->timer.data = (unsigned long) dev;
tp->timer.function = rtl8169_phy_timer;
+ tp->fw = RTL_FIRMWARE_UNKNOWN;
+
rc = register_netdev(dev);
if (rc < 0)
goto err_out_msi_4;
@@ -3288,10 +3296,10 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
cancel_delayed_work_sync(&tp->task);
- rtl_release_firmware(tp);
-
unregister_netdev(dev);
+ rtl_release_firmware(tp);
+
if (pci_dev_run_wake(pdev))
pm_runtime_get_noresume(&pdev->dev);
@@ -3303,6 +3311,33 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
}
+static void rtl_request_firmware(struct rtl8169_private *tp)
+{
+ int i;
+
+ /* Return early if the firmware is already loaded / cached. */
+ if (!IS_ERR(tp->fw))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(rtl_firmware_infos); i++) {
+ const struct rtl_firmware_info *info = rtl_firmware_infos + i;
+
+ if (info->mac_version == tp->mac_version) {
+ const char *name = info->fw_name;
+ int rc;
+
+ rc = request_firmware(&tp->fw, name, &tp->pci_dev->dev);
+ if (rc < 0) {
+ netif_warn(tp, ifup, tp->dev, "unable to load "
+ "firmware patch %s (%d)\n", name, rc);
+ }
+ return;
+ }
+ }
+
+ tp->fw = NULL;
+}
+
static int rtl8169_open(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
@@ -3334,11 +3369,13 @@ static int rtl8169_open(struct net_device *dev)
smp_mb();
+ rtl_request_firmware(tp);
+
retval = request_irq(dev->irq, rtl8169_interrupt,
(tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
dev->name, dev);
if (retval < 0)
- goto err_release_ring_2;
+ goto err_release_fw_2;
napi_enable(&tp->napi);
@@ -3359,7 +3396,8 @@ static int rtl8169_open(struct net_device *dev)
out:
return retval;
-err_release_ring_2:
+err_release_fw_2:
+ rtl_release_firmware(tp);
rtl8169_rx_clear(tp);
err_free_rx_1:
dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
--
1.7.4.2
Hello Francois,
I tried your patch and my machine resumes properly.
> The firmware is cached during the first successfull call to open() and
> released once the network device is unregistered. The driver uses the
> cached firmware between open() and unregister_netdev().
Here is what I did to test it: I compiled the patched module, unloaded the
previous module, loaded the new one, and then suspend & resume. Output of
dmesg attached. Please let me know if you need any other information.
Regards,
--
Ciprian
Hello Francois,
I tried to unload the module, but it crashed. I re-booted the machine and
tried to unload the module again. I got the output from dmesg; please see
attached.
Regards,
--
Ciprian
Ciprian Docan <[email protected]> :
[...]
> Here is what I did to test it: I compiled the patched module,
> unloaded the previous module, loaded the new one, and then suspend &
> resume. Output of dmesg attached. Please let me know if you need any
> other information.
Thanks.
You can remove the firmware and try the same test.
Then you try again with a non-modular r8169 driver, with and without
firmware.
You should see a few extra messages but suffer no extra delay during
resume.
--
Ueimor
Ciprian Docan <[email protected]> :
[...]
> I tried to unload the module, but it crashed. I re-booted the
> machine and tried to unload the module again. I got the output from
> dmesg; please see attached.
Hmmm...
Subject: [PATCH] r8169: don't request firmware when there's no userspace.
The firmware is cached during the first successfull call to open() and
released once the network device is unregistered. The driver uses the
cached firmware between open() and unregister_netdev().
So far the firmware is optional : a failure to load the firmware does
not prevent open() to success. It is thus necessary to 1) unregister
all 816x / 810[23] devices and 2) force a driver probe to issue a new
firmware load.
Signed-off-by: Francois Romieu <[email protected]>
---
drivers/net/r8169.c | 96 ++++++++++++++++++++++++++++++++++++--------------
1 files changed, 69 insertions(+), 27 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 493b0de..4b41b80 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -170,6 +170,16 @@ static const struct {
};
#undef _R
+static const struct rtl_firmware_info {
+ int mac_version;
+ const char *fw_name;
+} rtl_firmware_infos[] = {
+ { .mac_version = RTL_GIGA_MAC_VER_25, .fw_name = FIRMWARE_8168D_1 },
+ { .mac_version = RTL_GIGA_MAC_VER_26, .fw_name = FIRMWARE_8168D_2 },
+ { .mac_version = RTL_GIGA_MAC_VER_29, .fw_name = FIRMWARE_8105E_1 },
+ { .mac_version = RTL_GIGA_MAC_VER_30, .fw_name = FIRMWARE_8105E_1 }
+};
+
enum cfg_version {
RTL_CFG_0 = 0x00,
RTL_CFG_1,
@@ -565,6 +575,7 @@ struct rtl8169_private {
u32 saved_wolopts;
const struct firmware *fw;
+#define RTL_FIRMWARE_UNKNOWN ERR_PTR(-EAGAIN);
};
MODULE_AUTHOR("Realtek and the Linux r8169 crew <[email protected]>");
@@ -1790,24 +1801,24 @@ rtl_phy_write_fw(struct rtl8169_private *tp, const struct firmware *fw)
static void rtl_release_firmware(struct rtl8169_private *tp)
{
release_firmware(tp->fw);
- tp->fw = NULL;
+ tp->fw = RTL_FIRMWARE_UNKNOWN;
}
-static int rtl_apply_firmware(struct rtl8169_private *tp, const char *fw_name)
+static void rtl_apply_firmware(struct rtl8169_private *tp)
{
- const struct firmware **fw = &tp->fw;
- int rc = !*fw;
-
- if (rc) {
- rc = request_firmware(fw, fw_name, &tp->pci_dev->dev);
- if (rc < 0)
- goto out;
- }
+ const struct firmware *fw = tp->fw;
/* TODO: release firmware once rtl_phy_write_fw signals failures. */
- rtl_phy_write_fw(tp, *fw);
-out:
- return rc;
+ if (!IS_ERR_OR_NULL(fw))
+ rtl_phy_write_fw(tp, fw);
+}
+
+static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
+{
+ if (rtl_readphy(tp, reg) != val)
+ netif_warn(tp, hw, tp->dev, "chipset not ready for firmware\n");
+ else
+ rtl_apply_firmware(tp);
}
static void rtl8169s_hw_phy_config(struct rtl8169_private *tp)
@@ -2246,10 +2257,8 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
rtl_writephy(tp, 0x1f, 0x0005);
rtl_writephy(tp, 0x05, 0x001b);
- if ((rtl_readphy(tp, 0x06) != 0xbf00) ||
- (rtl_apply_firmware(tp, FIRMWARE_8168D_1) < 0)) {
- netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
- }
+
+ rtl_apply_firmware_cond(tp, MII_EXPANSION, 0xbf00);
rtl_writephy(tp, 0x1f, 0x0000);
}
@@ -2351,10 +2360,8 @@ static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp)
rtl_writephy(tp, 0x1f, 0x0005);
rtl_writephy(tp, 0x05, 0x001b);
- if ((rtl_readphy(tp, 0x06) != 0xb300) ||
- (rtl_apply_firmware(tp, FIRMWARE_8168D_2) < 0)) {
- netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
- }
+
+ rtl_apply_firmware_cond(tp, MII_EXPANSION, 0xb300);
rtl_writephy(tp, 0x1f, 0x0000);
}
@@ -2474,8 +2481,7 @@ static void rtl8105e_hw_phy_config(struct rtl8169_private *tp)
rtl_writephy(tp, 0x18, 0x0310);
msleep(100);
- if (rtl_apply_firmware(tp, FIRMWARE_8105E_1) < 0)
- netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
+ rtl_apply_firmware(tp);
rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
}
@@ -3237,6 +3243,8 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->timer.data = (unsigned long) dev;
tp->timer.function = rtl8169_phy_timer;
+ tp->fw = RTL_FIRMWARE_UNKNOWN;
+
rc = register_netdev(dev);
if (rc < 0)
goto err_out_msi_4;
@@ -3288,10 +3296,10 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
cancel_delayed_work_sync(&tp->task);
- rtl_release_firmware(tp);
-
unregister_netdev(dev);
+ rtl_release_firmware(tp);
+
if (pci_dev_run_wake(pdev))
pm_runtime_get_noresume(&pdev->dev);
@@ -3303,6 +3311,37 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
}
+static void rtl_request_firmware(struct rtl8169_private *tp)
+{
+ int i;
+
+ /* Return early if the firmware is already loaded / cached. */
+ if (!IS_ERR(tp->fw))
+ goto out;
+
+ for (i = 0; i < ARRAY_SIZE(rtl_firmware_infos); i++) {
+ const struct rtl_firmware_info *info = rtl_firmware_infos + i;
+
+ if (info->mac_version == tp->mac_version) {
+ const char *name = info->fw_name;
+ int rc;
+
+ rc = request_firmware(&tp->fw, name, &tp->pci_dev->dev);
+ if (rc < 0) {
+ netif_warn(tp, ifup, tp->dev, "unable to load "
+ "firmware patch %s (%d)\n", name, rc);
+ goto out_disable_request_firmware;
+ }
+ goto out;
+ }
+ }
+
+out_disable_request_firmware:
+ tp->fw = NULL;
+out:
+ return;
+}
+
static int rtl8169_open(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
@@ -3334,11 +3373,13 @@ static int rtl8169_open(struct net_device *dev)
smp_mb();
+ rtl_request_firmware(tp);
+
retval = request_irq(dev->irq, rtl8169_interrupt,
(tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
dev->name, dev);
if (retval < 0)
- goto err_release_ring_2;
+ goto err_release_fw_2;
napi_enable(&tp->napi);
@@ -3359,7 +3400,8 @@ static int rtl8169_open(struct net_device *dev)
out:
return retval;
-err_release_ring_2:
+err_release_fw_2:
+ rtl_release_firmware(tp);
rtl8169_rx_clear(tp);
err_free_rx_1:
dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
--
1.7.4.2
Hello Francois,
>> I tried to unload the module, but it crashed. I re-booted the
>> machine and tried to unload the module again. I got the output from
>> dmesg; please see attached.
>
> Hmmm...
I adapted your patch and fixed the module unload problem; please let me
know what you think. Thank you.
--
Ciprian
Ciprian Docan <[email protected]> :
[...]
> @@ -1789,25 +1800,26 @@ rtl_phy_write_fw(struct rtl8169_private *tp, const struct firmware *fw)
>
> static void rtl_release_firmware(struct rtl8169_private *tp)
> {
> - release_firmware(tp->fw);
> - tp->fw = NULL;
> + if (!IS_ERR_OR_NULL(tp->fw))
> + release_firmware(tp->fw);
Ok.
Nit: the test against NULL is not needed here.
--
Ueimor