LinuxLists.cc - [RFC][PATCH v2] p54usb: rx refill revamp

2009-01-23 21:45:22

Subject: [RFC][PATCH v2] p54usb: rx refill revamp

This patch fixes a long standing issue in p54usb.

Under high memory pressure, dev_alloc_skb couldn't always allocate a
replacement skb. In such situations, we had to free the associated urb.
And over the time all urbs were eventually gone altogether and
obviously the device remained mute from then on.

Thanks go out to Artur Skawina for all the reviews, ideas and code!
---
Changes:
- remove workqueue check (now, the workqueue is always there!)
- added Artur's comments
- added Artur's ideas (use poison & unpoison, emergency refill etc...)
- handle urb->status error codes
So now it depends on the error-code if we resubmit the urb & skb,
or queue it in rx_refill_list and free it later.

I hope Artur, I could meet all of your demands this time.;-)
---
diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c
index 9539ddc..0d57614 100644
--- a/drivers/net/wireless/p54/p54usb.c
+++ b/drivers/net/wireless/p54/p54usb.c
@@ -80,6 +80,20 @@ static struct usb_device_id p54u_table[] __devinitdata = {

MODULE_DEVICE_TABLE(usb, p54u_table);

+static void p54u_queue_urb_for_refill(struct p54u_priv *priv, struct urb *urb)
+{
+ unsigned long flags;
+
+ /* reset essential pointers */
+ urb->transfer_buffer = NULL;
+ urb->context = NULL;
+ urb->complete = NULL;
+
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ list_add_tail(&urb->urb_list, &priv->rx_refill_list);
+ spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
+}
+
static void p54u_rx_cb(struct urb *urb)
{
struct sk_buff *skb = (struct sk_buff *) urb->context;
@@ -89,54 +103,102 @@ static void p54u_rx_cb(struct urb *urb)

skb_unlink(skb, &priv->rx_queue);

- if (unlikely(urb->status)) {
- dev_kfree_skb_irq(skb);
- return;
- }
-
- skb_put(skb, urb->actual_length);
+ switch (urb->status) {
+ case 0:
+ /*
+ * The device sent us a packet for processing.
+ */
+ skb_put(skb, urb->actual_length);
+
+ /*
+ * remove firmware/device specific headers in
+ * front of the frame.
+ */
+ if (priv->hw_type == P54U_NET2280)
+ skb_pull(skb, priv->common.tx_hdr_len);
+ if (priv->common.fw_interface == FW_LM87) {
+ skb_pull(skb, 4);
+ skb_put(skb, 4);
+ }

- if (priv->hw_type == P54U_NET2280)
- skb_pull(skb, priv->common.tx_hdr_len);
- if (priv->common.fw_interface == FW_LM87) {
- skb_pull(skb, 4);
- skb_put(skb, 4);
- }
+ if (p54_rx(dev, skb)) {
+ /*
+ * This skb has been accepted by library and now
+ * belongs to the mac80211 layer.
+ * We should put the URB on the refill list, where it
+ * can be linked to a newly allocated skb later.
+ * Except, if the workqueue that does the refilling
+ * can't keep up, we'll try a bit harder and attempt
+ * to obtain a new skb right here.
+ */
+
+ if (skb_queue_len(&priv->rx_queue) < 4) {
+ skb = dev_alloc_skb(priv->common.rx_mtu + 32);
+ if (skb) {
+ info = (struct p54u_rx_info *) skb->cb;
+ info->urb = urb;
+ info->dev = dev;
+ /* Update the urb to use the new skb */
+ urb->context = skb;
+ break;
+ }
+ }

- if (p54_rx(dev, skb)) {
- skb = dev_alloc_skb(priv->common.rx_mtu + 32);
- if (unlikely(!skb)) {
- /* TODO check rx queue length and refill *somewhere* */
- return;
+ p54u_queue_urb_for_refill(priv, urb);
+ queue_work(priv->common.hw->workqueue,
+ &priv->rx_refill_work);
+ return ;
}

- info = (struct p54u_rx_info *) skb->cb;
- info->urb = urb;
- info->dev = dev;
- urb->transfer_buffer = skb_tail_pointer(skb);
- urb->context = skb;
- } else {
+ /* Reverse all modifications, it must look like new. */
if (priv->hw_type == P54U_NET2280)
skb_push(skb, priv->common.tx_hdr_len);
if (priv->common.fw_interface == FW_LM87) {
skb_push(skb, 4);
skb_put(skb, 4);
}
- skb_reset_tail_pointer(skb);
- skb_trim(skb, 0);
- if (urb->transfer_buffer != skb_tail_pointer(skb)) {
- /* this should not happen */
- WARN_ON(1);
- urb->transfer_buffer = skb_tail_pointer(skb);
- }
+
+ break;
+
+ case -ENOENT:
+ case -ECONNRESET:
+ case -ENODEV:
+ case -ESHUTDOWN:
+ /*
+ * The device has been stopped or disconnected.
+ * Free the skb and put the URBs into the refill_list.
+ */
+
+ p54u_queue_urb_for_refill(priv, urb);
+ dev_kfree_skb_irq(skb);
+ return;
+
+ default:
+ /*
+ * USB error
+ * This frame is lost, but we can resubmit URB + skb and
+ * wait for a successful retry.
+ */
+ break;
}
- skb_queue_tail(&priv->rx_queue, skb);
+
+ /*
+ * Reuse the URB and its associated skb.
+ * Reset all data pointers into their original state and resubmit it.
+ */
+ skb_reset_tail_pointer(skb);
+ skb_trim(skb, 0);
+ urb->transfer_buffer = skb_tail_pointer(skb);
+
usb_anchor_urb(urb, &priv->submitted);
if (usb_submit_urb(urb, GFP_ATOMIC)) {
- skb_unlink(skb, &priv->rx_queue);
usb_unanchor_urb(urb);
dev_kfree_skb_irq(skb);
- }
+ p54u_queue_urb_for_refill(priv, urb);
+ } else
+ skb_queue_tail(&priv->rx_queue, skb);
+
+ return ;
}

static void p54u_tx_cb(struct urb *urb)
@@ -150,59 +212,165 @@ static void p54u_tx_cb(struct urb *urb)

static void p54u_tx_dummy_cb(struct urb *urb) { }

-static void p54u_free_urbs(struct ieee80211_hw *dev)
+static void p54u_free_rx_refill_list(struct ieee80211_hw *dev)
{
struct p54u_priv *priv = dev->priv;
- usb_kill_anchored_urbs(&priv->submitted);
+ struct urb *entry, *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ list_for_each_entry_safe(entry, tmp, &priv->rx_refill_list, urb_list) {
+ list_del(&entry->urb_list);
+ usb_free_urb(entry);
+ }
+ spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
}

-static int p54u_init_urbs(struct ieee80211_hw *dev)
+static void p54u_unpoison_rx_refill_list(struct ieee80211_hw *dev)
{
struct p54u_priv *priv = dev->priv;
- struct urb *entry = NULL;
- struct sk_buff *skb;
- struct p54u_rx_info *info;
- int ret = 0;
+ struct urb *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ list_for_each_entry(entry, &priv->rx_refill_list, urb_list) {
+ usb_unpoison_urb(entry);
+ }
+ spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
+}
+
+static void p54u_cancel_urbs(struct ieee80211_hw *dev)
+{
+ struct p54u_priv *priv = dev->priv;
+
+ usb_poison_anchored_urbs(&priv->submitted);

- while (skb_queue_len(&priv->rx_queue) < 32) {
+ /*
+ * By now every RX URB has either finished or been canceled;
+ * the p54u_rx_cb() completion has placed it on the refill
+ * list; any attempts to resubmit from p54u_rx_refill(),
+ * which could still be scheduled to run, will fail.
+ */
+ cancel_work_sync(&priv->rx_refill_work);
+
+ /*
+ * Unpoison all URBs in the rx_refill_list, so they can be reused.
+ */
+ p54u_unpoison_rx_refill_list(dev);
+
+ /*
+ * Unpoison the anchor itself; the old URBs are already gone,
+ * p54u_rx_cb() has moved them all to the refill list.
+ * Prevents new URBs from being poisoned when anchored.
+ */
+ usb_unpoison_anchored_urbs(&priv->submitted);
+}
+
+static int p54u_rx_refill(struct ieee80211_hw *dev)
+{
+ struct p54u_priv *priv = dev->priv;
+ struct urb *entry, *tmp;
+ unsigned long flags;
+ unsigned int refilled_urbs = 0;
+ int err = -EINVAL;
+
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ list_for_each_entry_safe(entry, tmp, &priv->rx_refill_list, urb_list) {
+ struct p54u_rx_info *info;
+ struct sk_buff *skb;
+
+ list_del(&entry->urb_list);
+ spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
skb = __dev_alloc_skb(priv->common.rx_mtu + 32, GFP_KERNEL);
- if (!skb) {
- ret = -ENOMEM;
- goto err;
- }
- entry = usb_alloc_urb(0, GFP_KERNEL);
- if (!entry) {
- ret = -ENOMEM;
- goto err;
+
+ if (unlikely(!skb)) {
+ /*
+ * In order to prevent a loop, we put the URB
+ * back at the _front_ of the list, so we can
+ * march on, in out-of-memory situations.
+ */
+
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ list_add(&entry->urb_list, &priv->rx_refill_list);
+ err = -ENOMEM;
+ continue;
}

usb_fill_bulk_urb(entry, priv->udev,
usb_rcvbulkpipe(priv->udev, P54U_PIPE_DATA),
skb_tail_pointer(skb),
priv->common.rx_mtu + 32, p54u_rx_cb, skb);
+
info = (struct p54u_rx_info *) skb->cb;
info->urb = entry;
info->dev = dev;
- skb_queue_tail(&priv->rx_queue, skb);

usb_anchor_urb(entry, &priv->submitted);
- ret = usb_submit_urb(entry, GFP_KERNEL);
- if (ret) {
- skb_unlink(skb, &priv->rx_queue);
+ err = usb_submit_urb(entry, GFP_KERNEL);
+ if (err) {
+ /*
+ * URB submission failed.
+ * Free the associated skb and put the URB back into
+ * the front of the refill list, so we can try our luck
+ * next time.
+ */
+
usb_unanchor_urb(entry);
- goto err;
+ kfree_skb(skb);
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ list_add(&entry->urb_list, &priv->rx_refill_list);
+ } else {
+ skb_queue_tail(&priv->rx_queue, skb);
+ refilled_urbs++;
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
}
- usb_free_urb(entry);
- entry = NULL;
+ }
+ spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
+ return refilled_urbs ? 0 : err;
+}
+
+static int p54u_open(struct ieee80211_hw *dev)
+{
+ struct p54u_priv *priv = dev->priv;
+ int err;
+
+ err = p54u_rx_refill(dev);
+ if (err)
+ return err;
+
+ if (skb_queue_len(&priv->rx_queue) != 32) {
+ dev_err(&priv->udev->dev, "Not enough useable transfer buffers "
+ "available to initialize the device.");
+ p54u_cancel_urbs(dev);
+ return -ENOMEM;
}

return 0;
+}

- err:
- usb_free_urb(entry);
- kfree_skb(skb);
- p54u_free_urbs(dev);
- return ret;
+static int p54u_init_urbs(struct ieee80211_hw *dev)
+{
+ struct p54u_priv *priv = dev->priv;
+ struct urb *entry;
+ unsigned long flags;
+ int err = 0;
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ entry = usb_alloc_urb(0, GFP_KERNEL);
+ if (!entry) {
+ err = -ENOMEM;
+ break;
+ }
+
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ list_add_tail(&entry->urb_list, &priv->rx_refill_list);
+ spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
+ }
+
+ if (err)
+ p54u_free_rx_refill_list(dev);
+ return err;
}

static void p54u_tx_3887(struct ieee80211_hw *dev, struct sk_buff *skb)
@@ -880,28 +1048,12 @@ static int p54u_upload_firmware_net2280(struct ieee80211_hw *dev)
return err;
}

-static int p54u_open(struct ieee80211_hw *dev)
+static void p54u_rx_refill_work(struct work_struct *work)
{
- struct p54u_priv *priv = dev->priv;
- int err;
-
- err = p54u_init_urbs(dev);
- if (err) {
- return err;
- }
+ struct p54u_priv *priv = container_of(work, struct p54u_priv,
+ rx_refill_work);

- priv->common.open = p54u_init_urbs;
-
- return 0;
-}
-
-static void p54u_stop(struct ieee80211_hw *dev)
-{
- /* TODO: figure out how to reliably stop the 3887 and net2280 so
- the hardware is still usable next time we want to start it.
- until then, we just stop listening to the hardware.. */
- p54u_free_urbs(dev);
- return;
+ p54u_rx_refill(priv->common.hw);
}

static int __devinit p54u_probe(struct usb_interface *intf,
@@ -928,6 +1080,9 @@ static int __devinit p54u_probe(struct usb_interface *intf,
priv->intf = intf;
skb_queue_head_init(&priv->rx_queue);
init_usb_anchor(&priv->submitted);
+ INIT_WORK(&priv->rx_refill_work, p54u_rx_refill_work);
+ spin_lock_init(&priv->rx_refill_lock);
+ INIT_LIST_HEAD(&priv->rx_refill_list);

usb_get_dev(udev);

@@ -949,8 +1104,7 @@ static int __devinit p54u_probe(struct usb_interface *intf,
recognized_pipes++;
}
}
- priv->common.open = p54u_open;
- priv->common.stop = p54u_stop;
+
if (recognized_pipes < P54U_PIPE_NUMBER) {
priv->hw_type = P54U_3887;
err = p54u_upload_firmware_3887(dev);
@@ -970,12 +1124,19 @@ static int __devinit p54u_probe(struct usb_interface *intf,
if (err)
goto err_free_dev;

- p54u_open(dev);
+ err = p54u_init_urbs(dev);
+ if (err)
+ goto err_free_dev;
+ err = p54u_open(dev);
+ if (err)
+ goto err_free_dev;
err = p54_read_eeprom(dev);
- p54u_stop(dev);
+ p54u_cancel_urbs(dev);
if (err)
goto err_free_dev;

+ priv->common.open = p54u_open;
+ priv->common.stop = p54u_cancel_urbs;
err = ieee80211_register_hw(dev);
if (err) {
dev_err(&udev->dev, "(p54usb) Cannot register netdevice\n");
@@ -999,6 +1160,8 @@ static void __devexit p54u_disconnect(struct usb_interface *intf)
if (!dev)
return;

+ p54u_free_rx_refill_list(dev);
+
ieee80211_unregister_hw(dev);

priv = dev->priv;
diff --git a/drivers/net/wireless/p54/p54usb.h b/drivers/net/wireless/p54/p54usb.h
index 8bc5898..986505c 100644
--- a/drivers/net/wireless/p54/p54usb.h
+++ b/drivers/net/wireless/p54/p54usb.h
@@ -132,8 +132,11 @@ struct p54u_priv {
P54U_3887
} hw_type;

- spinlock_t lock;
struct sk_buff_head rx_queue;
+ spinlock_t rx_refill_lock;
+ struct list_head rx_refill_list;
+ struct work_struct rx_refill_work;
+
struct usb_anchor submitted;
};

2009-01-24 01:15:47

by Christian Lamparter

[permalink] [raw]

Subject: Re: [RFC][PATCH v2] p54usb: rx refill revamp

On Friday 23 January 2009 23:59:07 Artur Skawina wrote:
> Christian Lamparter wrote:
> > This patch fixes a long standing issue in p54usb.
> >
> > Under high memory pressure, dev_alloc_skb couldn't always allocate a
> > replacement skb. In such situations, we had to free the associated urb.
> > And over the time all urbs were eventually gone altogether and
> > obviously the device remained mute from then on.
> >
> > Thanks go out to Artur Skawina for all the reviews, ideas and code!
> > ---
> > Changes:
> > - remove workqueue check (now, the workqueue is always there!)
> > - added Artur's comments
> > - added Artur's ideas (use poison & unpoison, emergency refill etc...)
> > - handle urb->status error codes
> > So now it depends on the error-code if we resubmit the urb & skb,
> > or queue it in rx_refill_list and free it later.
> >
> > I hope Artur, I could meet all of your demands this time.;-)
>
> There never were any 'demands', I had to spend way too much time hunting
> that data corruption bug, and in the process had to learn more than i ever
> wanted about the driver ;) So responding to your rfc was the obvious thing
> to do; feel free to ignore any comments that you think aren't useful.
why? most/all of them turned out to be useful? ;-)

> I have absolutely no problem with doing the work myself, it's just that you
> were fixing bugs that affected my device faster than i was able to run tests;
> so i never got around to send patches. In fact, i'll be waiting until this
> patch goes in, before even starting to work on some other changes, some of
> which i've already mentioned (and others that, afaict, would require changes
> to the usb stack, don't ask ;))
Oh?! please fix urb_unpoison_anchored_urbs!
Unless I'm totally wrong, there's a logic bug in this function preventing the "unposioning" of the urbs.

(I guess you already saw it, or do you plan a different change?)
1) urb_poison_anchored_urbs gets called
1) poison anchor structure
2) poison & killing every single urb
2) the usb_hcd_giveback_urb is called
1) >>unanchores<< the urb form anchor_list
2) calles urb->complete (urb)
3) p54u_rx_cb -here- but nothing interesting there
3) ... [time goes by]
4) urb_unpoison_anchored_urb is called
1) unpoison the anchor structure
2) tries to unpoison the anchored urbs... But there's not a single one in the anchor_list,
since step 2.1 (usb_hcd_giveback_urb) killed them off.

> > +static void p54u_cancel_urbs(struct ieee80211_hw *dev)
> > +{
> > + struct p54u_priv *priv = dev->priv;
> > +
> > + usb_poison_anchored_urbs(&priv->submitted);
> > + /*
> > + * By now every RX URB has either finished or been canceled;
> > + * the p54u_rx_cb() completion has placed it on the refill
> > + * list; any attempts to resubmit from p54u_rx_refill(),
> > + * which could still be scheduled to run, will fail.
> > + */
> > + cancel_work_sync(&priv->rx_refill_work);
> > +
> > + /*
> > + * Unpoison all URBs in the rx_refill_list, so they can be reused.
> > + */
> > + p54u_unpoison_rx_refill_list(dev);
>
> I'm curious why you keep the urbs around in the stopped state?
> The alloc/free/alloc sequence on init may not be that pretty, but
> is there some other reason?
well, in most cases the "stopped state" is very short and most wlan-adapters are always connected.
So, why throw them away when we need them again in a few seconds?
(usually wpa_supplicant/NM has the bad habit of doing a interface up/down dance... sometimes)

> > + /*
> > + * Unpoison the anchor itself; the old URBs are already gone,
> > + * p54u_rx_cb() has moved them all to the refill list.
> > + * Prevents new URBs from being poisoned when anchored.
> > + */
> > + usb_unpoison_anchored_urbs(&priv->submitted);
> > +}
> > +
> > +static int p54u_rx_refill(struct ieee80211_hw *dev)
> > +{
> > + struct p54u_priv *priv = dev->priv;
> > + struct urb *entry, *tmp;
> > + unsigned long flags;
> > + unsigned int refilled_urbs = 0;
> > + int err = -EINVAL;
> > +
> > + spin_lock_irqsave(&priv->rx_refill_lock, flags);

> > + list_for_each_entry_safe(entry, tmp, &priv->rx_refill_list, urb_list) {
uhh, this should be list_for_each_entry_safe_continue ... (fixed)
> > + struct p54u_rx_info *info;
> > + struct sk_buff *skb;
> > +
> > + list_del(&entry->urb_list);
> > + spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
> > skb = __dev_alloc_skb(priv->common.rx_mtu + 32, GFP_KERNEL);
> > +
> > + if (unlikely(!skb)) {
> > + /*
> > + * In order to prevent a loop, we put the URB
> > + * back at the _front_ of the list, so we can
> > + * march on, in out-of-memory situations.
> > + */
> > +
> > + spin_lock_irqsave(&priv->rx_refill_lock, flags);
> > + list_add(&entry->urb_list, &priv->rx_refill_list);
> > + err = -ENOMEM;
> > + continue;
> > }
> >
> > usb_fill_bulk_urb(entry, priv->udev,
> > usb_rcvbulkpipe(priv->udev, P54U_PIPE_DATA),
> > skb_tail_pointer(skb),
> > priv->common.rx_mtu + 32, p54u_rx_cb, skb);
> > +
> > info = (struct p54u_rx_info *) skb->cb;
> > info->urb = entry;
> > info->dev = dev;
> >
> > usb_anchor_urb(entry, &priv->submitted);
> > + err = usb_submit_urb(entry, GFP_KERNEL);
> > + if (err) {
>
> Hmm, won't this path (ie the foreach loop) be executed many times when
> canceling the urbs? (that's why i was returning early on -EPERM in my
> patch, but have not actually checked if it's an issue. yet.)
well, we don't schedule the workqueue if we canceling the urbs now,
( that's what the urb->status switch is supposed to do/( or in this context )stop...)

Another maybe related thing: ( a bit above)
* In order to prevent a loop, we put the URB
* back at the _front_ of the list, so we can
* march on, in out-of-memory situations.

I guess this could be true for -EPERM as well?
As far as I know list_for_each_entry_* iterates until it hits (head)
and since we insert the -EPERM "urb" with list_add (_head),
we will never do more than 32 iterations?! (since list_add put the elements in (head)->next )

But if we cancel on -EPERM, we should bail out on -ENODEV
(or -ECONNRESET, what ever says that the device is unavailable ) as well...

> > + /*
> > + * URB submission failed.
> > + * Free the associated skb and put the URB back into
> > + * the front of the refill list, so we can try our luck
> > + * next time.
> > + */
> > +
> > usb_unanchor_urb(entry);
> > + kfree_skb(skb);
> > + spin_lock_irqsave(&priv->rx_refill_lock, flags);
> > + list_add(&entry->urb_list, &priv->rx_refill_list);
> > + } else {
> > + skb_queue_tail(&priv->rx_queue, skb);
> > + refilled_urbs++;
> > + spin_lock_irqsave(&priv->rx_refill_lock, flags);
> > }
> > + }
> > + spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
> > + return refilled_urbs ? 0 : err;
> > +}
> > +
> > +static int p54u_open(struct ieee80211_hw *dev)
> > +{
> > + struct p54u_priv *priv = dev->priv;
> > + int err;
> > +
> > + err = p54u_rx_refill(dev);
> > + if (err)
> > + return err;
> > +
> > + if (skb_queue_len(&priv->rx_queue) != 32) {
> > + dev_err(&priv->udev->dev, "Not enough useable transfer buffers "
> > + "available to initialize the device.");
> > + p54u_cancel_urbs(dev);
> > + return -ENOMEM;
>
> Why 32 urbs?
Well, that's the firmware/hardware limit for all prism54 chips
(doesn't matter if usb/pci fullmac/softmac etc...)
all have 32 rx and tx slots in the "normal priority" queue/ring-buffer.

> And why should open() fail if, say, only 28 got successfully allocated?
> Shouldn't the device function nonetheless?
Well, what's the point of supporting a system that has problems finding 32 pages with GFP_KERNEL?
you know "one allocation on device init isn't worth avoiding." :-p

Regards,
Chr

2009-01-24 20:56:50

by Artur Skawina

[permalink] [raw]

Subject: Re: [RFC][PATCH v2] p54usb: rx refill revamp

Artur Skawina wrote:
> Christian Lamparter wrote:
>> On Saturday 24 January 2009 05:18:07 Artur Skawina wrote:
>>> [My version schedules the work for every urb, even the poisoned ones]
>> well, there's now a hard limit... no change of a endless loop now.
>
> The whole point of the poisoning was to prevent resubmission when
> canceling the urbs -- if you work around that manually, you could just
> as well kill them, instead of poisoning.
> I don't understand why want to add extra code to the rx irq just to
> avoid scheduling a work when downing the i/f, and keep a nasty failure
> case. The difference in down() performance is not going to be measurable,
> and even if it was, it wouldn't matter.

Oh, and we could always do something like

if (likely(atomic_read(&urb->reject)==0))
queue_work(priv->common.hw->workqueue, &priv->rx_refill_work);

which should catch most cases then urbs are either killed or poisoned.

artur

2009-01-24 04:18:11

by Artur Skawina

[permalink] [raw]

Subject: Re: [RFC][PATCH v2] p54usb: rx refill revamp

Christian Lamparter wrote:
> 1) urb_poison_anchored_urbs gets called
> 1) poison anchor structure
> 2) poison & killing every single urb
> 2) the usb_hcd_giveback_urb is called
> 1) >>unanchores<< the urb form anchor_list
> 2) calles urb->complete (urb)
> 3) p54u_rx_cb -here- but nothing interesting there
> 3) ... [time goes by]
> 4) urb_unpoison_anchored_urb is called
> 1) unpoison the anchor structure
> 2) tries to unpoison the anchored urbs... But there's not a single one in the anchor_list,
> since step 2.1 (usb_hcd_giveback_urb) killed them off.

I assume that's just how it's supposed to be. You could always anchor
the urbs to another anchor in the completion_. Or free any buffers and
drop the last ref before leaving the completion. (in fact, the former
is basically what you're doing, just using a list instead)

>> I'm curious why you keep the urbs around in the stopped state?
> well, in most cases the "stopped state" is very short and most wlan-adapters are always connected.
> So, why throw them away when we need them again in a few seconds?
> (usually wpa_supplicant/NM has the bad habit of doing a interface up/down dance... sometimes)

ok. (i don't know about most wlans being always up, but it seems a
reasonable compromise. still, that's 100k+ wasted ram in the down
state.)

> well, we don't schedule the workqueue if we canceling the urbs now,
> ( that's what the urb->status switch is supposed to do/( or in this context )stop...)

yep, noticed that later, see below.

> Another maybe related thing: ( a bit above)
> * In order to prevent a loop, we put the URB
> * back at the _front_ of the list, so we can
> * march on, in out-of-memory situations.
>
> I guess this could be true for -EPERM as well?
> As far as I know list_for_each_entry_* iterates until it hits (head)
> and since we insert the -EPERM "urb" with list_add (_head),
> we will never do more than 32 iterations?! (since list_add put the elements in (head)->next )
>
> But if we cancel on -EPERM, we should bail out on -ENODEV
> (or -ECONNRESET, what ever says that the device is unavailable ) as well...

I'm not sure I follow.. Ah, the only reason I bailed out on -EPERM
is that usb_submit_urb() will return -EPERM for poisoned urbs and
i didn't want to retry this call for every other urb as they would
all fail. Each try involves a useless skb alloc and free...
[My version schedules the work for every urb, even the poisoned ones]

>>> + if (skb_queue_len(&priv->rx_queue) != 32) {
>>> + dev_err(&priv->udev->dev, "Not enough useable transfer buffers "
>>> + "available to initialize the device.");
>>> + return -ENOMEM;
>> Why 32 urbs?
> Well, that's the firmware/hardware limit for all prism54 chips
> (doesn't matter if usb/pci fullmac/softmac etc...)
> all have 32 rx and tx slots in the "normal priority" queue/ring-buffer.
>
>> And why should open() fail if, say, only 28 got successfully allocated?
>> Shouldn't the device function nonetheless?
> Well, what's the point of supporting a system that has problems finding 32 pages with GFP_KERNEL?
> you know "one allocation on device init isn't worth avoiding." :-p

ok. that's not something this patch changes anyway ;)

I looked at your v2 briefly yesterday and even wrote a reply, but
didn't send it. I really liked your v1 much better, the new version
makes the code much harder to follow, and still can stall the device
after a few consecutive urb completion or submission (this is new)
errors happen. Uhm, i probably should shut up now ;)

Thanks for doing all this work,

artur

2009-01-23 22:59:14

by Artur Skawina

[permalink] [raw]

Subject: Re: [RFC][PATCH v2] p54usb: rx refill revamp

Christian Lamparter wrote:
> This patch fixes a long standing issue in p54usb.
>
> Under high memory pressure, dev_alloc_skb couldn't always allocate a
> replacement skb. In such situations, we had to free the associated urb.
> And over the time all urbs were eventually gone altogether and
> obviously the device remained mute from then on.
>
> Thanks go out to Artur Skawina for all the reviews, ideas and code!
> ---
> Changes:
> - remove workqueue check (now, the workqueue is always there!)
> - added Artur's comments
> - added Artur's ideas (use poison & unpoison, emergency refill etc...)
> - handle urb->status error codes
> So now it depends on the error-code if we resubmit the urb & skb,
> or queue it in rx_refill_list and free it later.
>
> I hope Artur, I could meet all of your demands this time.;-)

There never were any 'demands', I had to spend way too much time hunting
that data corruption bug, and in the process had to learn more than i ever
wanted about the driver ;) So responding to your rfc was the obvious thing
to do; feel free to ignore any comments that you think aren't useful.
I have absolutely no problem with doing the work myself, it's just that you
were fixing bugs that affected my device faster than i was able to run tests;
so i never got around to send patches. In fact, i'll be waiting until this
patch goes in, before even starting to work on some other changes, some of
which i've already mentioned (and others that, afaict, would require changes
to the usb stack, don't ask ;))

This patch reorganizes the code so much, i'll have to read it in context
later, this time only a few comments/questions, i hope you don't mind :)

> +static void p54u_cancel_urbs(struct ieee80211_hw *dev)
> +{
> + struct p54u_priv *priv = dev->priv;
> +
> + usb_poison_anchored_urbs(&priv->submitted);
> + /*
> + * By now every RX URB has either finished or been canceled;
> + * the p54u_rx_cb() completion has placed it on the refill
> + * list; any attempts to resubmit from p54u_rx_refill(),
> + * which could still be scheduled to run, will fail.
> + */
> + cancel_work_sync(&priv->rx_refill_work);
> +
> + /*
> + * Unpoison all URBs in the rx_refill_list, so they can be reused.
> + */
> + p54u_unpoison_rx_refill_list(dev);

I'm curious why you keep the urbs around in the stopped state?
The alloc/free/alloc sequence on init may not be that pretty, but
is there some other reason?

> + /*
> + * Unpoison the anchor itself; the old URBs are already gone,
> + * p54u_rx_cb() has moved them all to the refill list.
> + * Prevents new URBs from being poisoned when anchored.
> + */
> + usb_unpoison_anchored_urbs(&priv->submitted);
> +}
> +
> +static int p54u_rx_refill(struct ieee80211_hw *dev)
> +{
> + struct p54u_priv *priv = dev->priv;
> + struct urb *entry, *tmp;
> + unsigned long flags;
> + unsigned int refilled_urbs = 0;
> + int err = -EINVAL;
> +
> + spin_lock_irqsave(&priv->rx_refill_lock, flags);
> + list_for_each_entry_safe(entry, tmp, &priv->rx_refill_list, urb_list) {
> + struct p54u_rx_info *info;
> + struct sk_buff *skb;
> +
> + list_del(&entry->urb_list);
> + spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
> skb = __dev_alloc_skb(priv->common.rx_mtu + 32, GFP_KERNEL);
> +
> + if (unlikely(!skb)) {
> + /*
> + * In order to prevent a loop, we put the URB
> + * back at the _front_ of the list, so we can
> + * march on, in out-of-memory situations.
> + */
> +
> + spin_lock_irqsave(&priv->rx_refill_lock, flags);
> + list_add(&entry->urb_list, &priv->rx_refill_list);
> + err = -ENOMEM;
> + continue;
> }
>
> usb_fill_bulk_urb(entry, priv->udev,
> usb_rcvbulkpipe(priv->udev, P54U_PIPE_DATA),
> skb_tail_pointer(skb),
> priv->common.rx_mtu + 32, p54u_rx_cb, skb);
> +
> info = (struct p54u_rx_info *) skb->cb;
> info->urb = entry;
> info->dev = dev;
>
> usb_anchor_urb(entry, &priv->submitted);
> + err = usb_submit_urb(entry, GFP_KERNEL);
> + if (err) {

Hmm, won't this path (ie the foreach loop) be executed many times when
canceling the urbs? (that's why i was returning early on -EPERM in my
patch, but have not actually checked if it's an issue. yet.)

> + /*
> + * URB submission failed.
> + * Free the associated skb and put the URB back into
> + * the front of the refill list, so we can try our luck
> + * next time.
> + */
> +
> usb_unanchor_urb(entry);
> + kfree_skb(skb);
> + spin_lock_irqsave(&priv->rx_refill_lock, flags);
> + list_add(&entry->urb_list, &priv->rx_refill_list);
> + } else {
> + skb_queue_tail(&priv->rx_queue, skb);
> + refilled_urbs++;
> + spin_lock_irqsave(&priv->rx_refill_lock, flags);
> }
> + }
> + spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
> + return refilled_urbs ? 0 : err;
> +}
> +
> +static int p54u_open(struct ieee80211_hw *dev)
> +{
> + struct p54u_priv *priv = dev->priv;
> + int err;
> +
> + err = p54u_rx_refill(dev);
> + if (err)
> + return err;
> +
> + if (skb_queue_len(&priv->rx_queue) != 32) {
> + dev_err(&priv->udev->dev, "Not enough useable transfer buffers "
> + "available to initialize the device.");
> + p54u_cancel_urbs(dev);
> + return -ENOMEM;

Why 32 urbs?
And why should open() fail if, say, only 28 got successfully allocated?
Shouldn't the device function nonetheless?

artur

2009-01-24 19:54:22

by Artur Skawina

[permalink] [raw]

Subject: Re: [RFC][PATCH v2] p54usb: rx refill revamp

Christian Lamparter wrote:
> On Saturday 24 January 2009 05:18:07 Artur Skawina wrote:
>> Christian Lamparter wrote:
>>> 4) urb_unpoison_anchored_urb is called
>>> 1) unpoison the anchor structure
>>> 2) tries to unpoison the anchored urbs... But there's not a single one in the anchor_list,
>>> since step 2.1 (usb_hcd_giveback_urb) killed them off.
>> I assume that's just how it's supposed to be. You could always anchor
>> the urbs to another anchor in the completion_. Or free any buffers and
>> drop the last ref before leaving the completion. (in fact, the former
>> is basically what you're doing, just using a list instead)
> true! In fact --- behold the clean up patch which uses another anchor instead of the list ---

will try to look at this tomorrow, or the day after.

>> ok. (i don't know about most wlans being always up, but it seems a
>> reasonable compromise. still, that's 100k+ wasted ram in the down
>> state.)
>
> ??? We don't waste 100k+.
> We recycle the skbs, so the only thing left is 32 urb structures.

I need a weekend off ;), you're right, of course.

>>> well, we don't schedule the workqueue if we canceling the urbs now,
>>> ( that's what the urb->status switch is supposed to do/( or in this context )stop...)
>> yep, noticed that later, see below.

>> [My version schedules the work for every urb, even the poisoned ones]
> well, there's now a hard limit... no change of a endless loop now.

The whole point of the poisoning was to prevent resubmission when
canceling the urbs -- if you work around that manually, you could just
as well kill them, instead of poisoning.
I don't understand why want to add extra code to the rx irq just to
avoid scheduling a work when downing the i/f, and keep a nasty failure
case. The difference in down() performance is not going to be measurable,
and even if it was, it wouldn't matter.
Just four consecutive rx failures could make the device completely death,
requiring a down/up sequence to restore it to working condition. Before
you merged the emergency alloc-in-irq, even just one error could have
killed the card. Yes, it's going to be rare, but bugs that trigger once
a year during 24/7 operation are the absolutely worst kind of bugs.
And it may even be remotely exploitable, eg by flooding the card with
a large number of frames when the host experiences memory pressure or
has some cabling or power supply issues.

>> I looked at your v2 briefly yesterday and even wrote a reply, but
>> didn't send it. I really liked your v1 much better, the new version
>> makes the code much harder to follow, and still can stall the device
>> after a few consecutive urb completion or submission (this is new)
>> errors happen. Uhm, i probably should shut up now ;)
>
> be prepared to write the reply again ;-)
>
> Yeah, it's always easier to follow your own code, however its sometimes
> harder to find bugs, because you assumed you did everything right in the first place...

In this case it was mostly your code in _both_ cases :)
You can argue it's a matter of taste, but try to compare the old version
of p54u_rx_cb() with your new one side-by-side; probably the last patch
I sent vs your previous v2 one are the best candidates, as those are mostly
equivalent, with the difference being the work scheduling on status!=0,
which doesn't change the flow significantly.
Even though i know exactly what that completion is supposed to do, looking
at the maze of 'returns' and 'breaks', I can't immediately tell whether it
does just that or not. Gotos aren't evil.
Imagine you're looking at the code for the first time, which one would
you understand faster?...

After staring at the new function for ~ a minute, i think the new one
does approximately the same as the old one. 'approximately' because you
changed the code to _not_ resubmit an urb from the workqueue after it
fails to be submitted in the irq. This doesn't seem right; if it failed
because of eg -ENOMEM it could succeed later. The previous version would
have deferred the work in this case. This change wasn't in the changelog,
was it intentional?

Again, feel free to ignore me, i'd just like to have a robust AP, and the
best way to achieve that is to attract more p54 developers :), simpler,
more obvious code lowers the entry cost significantly...

artur

2009-01-24 11:06:22

by Christian Lamparter

[permalink] [raw]

Subject: Re: [RFC][PATCH v2] p54usb: rx refill revamp

On Saturday 24 January 2009 05:18:07 Artur Skawina wrote:
> Christian Lamparter wrote:
> > 1) urb_poison_anchored_urbs gets called
> > 1) poison anchor structure
> > 2) poison & killing every single urb
> > 2) the usb_hcd_giveback_urb is called
> > 1) >>unanchores<< the urb form anchor_list
> > 2) calles urb->complete (urb)
> > 3) p54u_rx_cb -here- but nothing interesting there
> > 3) ... [time goes by]
> > 4) urb_unpoison_anchored_urb is called
> > 1) unpoison the anchor structure
> > 2) tries to unpoison the anchored urbs... But there's not a single one in the anchor_list,
> > since step 2.1 (usb_hcd_giveback_urb) killed them off.
>
> I assume that's just how it's supposed to be. You could always anchor
> the urbs to another anchor in the completion_. Or free any buffers and
> drop the last ref before leaving the completion. (in fact, the former
> is basically what you're doing, just using a list instead)
true! In fact --- behold the clean up patch which uses another anchor instead of the list ---

> >> I'm curious why you keep the urbs around in the stopped state?
> > well, in most cases the "stopped state" is very short and most wlan-adapters are always connected.
> > So, why throw them away when we need them again in a few seconds?
> > (usually wpa_supplicant/NM has the bad habit of doing a interface up/down dance... sometimes)
>
> ok. (i don't know about most wlans being always up, but it seems a
> reasonable compromise. still, that's 100k+ wasted ram in the down
> state.)

??? We don't waste 100k+.
We recycle the skbs, so the only thing left is 32 urb structures.

> > well, we don't schedule the workqueue if we canceling the urbs now,
> > ( that's what the urb->status switch is supposed to do/( or in this context )stop...)
>
> yep, noticed that later, see below.
>
> > Another maybe related thing: ( a bit above)
> > * In order to prevent a loop, we put the URB
> > * back at the _front_ of the list, so we can
> > * march on, in out-of-memory situations.
> >
> > I guess this could be true for -EPERM as well?
> > As far as I know list_for_each_entry_* iterates until it hits (head)
> > and since we insert the -EPERM "urb" with list_add (_head),
> > we will never do more than 32 iterations?! (since list_add put the elements in (head)->next )
> >
> > But if we cancel on -EPERM, we should bail out on -ENODEV
> > (or -ECONNRESET, what ever says that the device is unavailable ) as well...
>
> I'm not sure I follow.. Ah, the only reason I bailed out on -EPERM
> is that usb_submit_urb() will return -EPERM for poisoned urbs and
> i didn't want to retry this call for every other urb as they would
> all fail. Each try involves a useless skb alloc and free...
> [My version schedules the work for every urb, even the poisoned ones]
well, there's now a hard limit... no change of a endless loop now.

> >>> + if (skb_queue_len(&priv->rx_queue) != 32) {
> >>> + dev_err(&priv->udev->dev, "Not enough useable transfer buffers "
> >>> + "available to initialize the device.");
> >>> + return -ENOMEM;
> >> Why 32 urbs?
> > Well, that's the firmware/hardware limit for all prism54 chips
> > (doesn't matter if usb/pci fullmac/softmac etc...)
> > all have 32 rx and tx slots in the "normal priority" queue/ring-buffer.
> >
> >> And why should open() fail if, say, only 28 got successfully allocated?
> >> Shouldn't the device function nonetheless?
> > Well, what's the point of supporting a system that has problems finding 32 pages with GFP_KERNEL?
> > you know "one allocation on device init isn't worth avoiding." :-p
>
> ok. that's not something this patch changes anyway ;)
>
>
> I looked at your v2 briefly yesterday and even wrote a reply, but
> didn't send it. I really liked your v1 much better, the new version
> makes the code much harder to follow, and still can stall the device
> after a few consecutive urb completion or submission (this is new)
> errors happen. Uhm, i probably should shut up now ;)

be prepared to write the reply again ;-)

Yeah, it's always easier to follow your own code, however its sometimes
harder to find bugs, because you assumed you did everything right in the first place...
---
diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c
index 9539ddc..64da6cb 100644
--- a/drivers/net/wireless/p54/p54usb.c
+++ b/drivers/net/wireless/p54/p54usb.c
@@ -89,54 +89,102 @@ static void p54u_rx_cb(struct urb *urb)

skb_unlink(skb, &priv->rx_queue);

- if (unlikely(urb->status)) {
- dev_kfree_skb_irq(skb);
- return;
- }
-
- skb_put(skb, urb->actual_length);
+ switch (urb->status) {
+ case 0:
+ /*
+ * The device sent us a packet for processing.
+ */
+ skb_put(skb, urb->actual_length);
+
+ /*
+ * remove firmware/device specific headers in
+ * front of the frame.
+ */
+ if (priv->hw_type == P54U_NET2280)
+ skb_pull(skb, priv->common.tx_hdr_len);
+ if (priv->common.fw_interface == FW_LM87) {
+ skb_pull(skb, 4);
+ skb_put(skb, 4);
+ }

- if (priv->hw_type == P54U_NET2280)
- skb_pull(skb, priv->common.tx_hdr_len);
- if (priv->common.fw_interface == FW_LM87) {
- skb_pull(skb, 4);
- skb_put(skb, 4);
- }
+ if (p54_rx(dev, skb)) {
+ /*
+ * This skb has been accepted by library and now
+ * belongs to the mac80211 layer.
+ * We should put the URB on the refill list, where it
+ * can be linked to a newly allocated skb later.
+ * Except, if the workqueue that does the refilling
+ * can't keep up, we'll try a bit harder and attempt
+ * to obtain a new skb right here.
+ */
+
+ if (skb_queue_len(&priv->rx_queue) < 4) {
+ skb = dev_alloc_skb(priv->common.rx_mtu + 32);
+ if (skb) {
+ info = (struct p54u_rx_info *) skb->cb;
+ info->urb = urb;
+ info->dev = dev;
+ /* Update the urb to use the new skb */
+ urb->context = skb;
+ break;
+ }
+ }

- if (p54_rx(dev, skb)) {
- skb = dev_alloc_skb(priv->common.rx_mtu + 32);
- if (unlikely(!skb)) {
- /* TODO check rx queue length and refill *somewhere* */
- return;
+ usb_anchor_urb(urb, &priv->urb_pool);
+ queue_work(priv->common.hw->workqueue,
+ &priv->rx_refill_work);
+ return ;
}

- info = (struct p54u_rx_info *) skb->cb;
- info->urb = urb;
- info->dev = dev;
- urb->transfer_buffer = skb_tail_pointer(skb);
- urb->context = skb;
- } else {
+ /* Reverse all modifications, it must look like new. */
if (priv->hw_type == P54U_NET2280)
skb_push(skb, priv->common.tx_hdr_len);
if (priv->common.fw_interface == FW_LM87) {
skb_push(skb, 4);
skb_put(skb, 4);
}
- skb_reset_tail_pointer(skb);
- skb_trim(skb, 0);
- if (urb->transfer_buffer != skb_tail_pointer(skb)) {
- /* this should not happen */
- WARN_ON(1);
- urb->transfer_buffer = skb_tail_pointer(skb);
- }
+
+ break;
+
+ case -ENOENT:
+ case -ECONNRESET:
+ case -ENODEV:
+ case -ESHUTDOWN:
+ /*
+ * The device has been stopped or disconnected.
+ * Free the skb and put the URBs into the refill_list.
+ */
+
+ usb_anchor_urb(urb, &priv->urb_pool);
+ dev_kfree_skb_irq(skb);
+ return;
+
+ default:
+ /*
+ * USB error
+ * This frame is lost, but we can resubmit URB + skb and
+ * wait for a successful retry.
+ */
+ break;
}
- skb_queue_tail(&priv->rx_queue, skb);
+
+ /*
+ * Reuse the URB and its associated skb.
+ * Reset all data pointers into their original state and resubmit it.
+ */
+ skb_reset_tail_pointer(skb);
+ skb_trim(skb, 0);
+ urb->transfer_buffer = skb_tail_pointer(skb);
+
usb_anchor_urb(urb, &priv->submitted);
if (usb_submit_urb(urb, GFP_ATOMIC)) {
- skb_unlink(skb, &priv->rx_queue);
usb_unanchor_urb(urb);
dev_kfree_skb_irq(skb);
- }
+ usb_anchor_urb(urb, &priv->urb_pool);
+ } else
+ skb_queue_tail(&priv->rx_queue, skb);
+
+ return ;
}

static void p54u_tx_cb(struct urb *urb)
@@ -150,59 +198,146 @@ static void p54u_tx_cb(struct urb *urb)

static void p54u_tx_dummy_cb(struct urb *urb) { }

-static void p54u_free_urbs(struct ieee80211_hw *dev)
+static void p54u_cancel_urbs(struct ieee80211_hw *dev)
{
struct p54u_priv *priv = dev->priv;
- usb_kill_anchored_urbs(&priv->submitted);
+
+ usb_poison_anchored_urbs(&priv->submitted);
+
+ /*
+ * By now every RX URB has either finished or been canceled;
+ * the p54u_rx_cb() completion has placed it on the refill
+ * list; any attempts to resubmit from p54u_rx_refill(),
+ * which could still be scheduled to run, will fail.
+ */
+ cancel_work_sync(&priv->rx_refill_work);
+
+ /*
+ * Unpoison the anchor itself; the old URBs are already gone,
+ * p54u_rx_cb() has moved them all to the refill list.
+ * Prevents new URBs from being poisoned when anchored.
+ */
+ usb_unpoison_anchored_urbs(&priv->submitted);
+
+ /*
+ * Unpoison all unused URBs in the pool, in case we want to reuse them.
+ */
+ usb_unpoison_anchored_urbs(&priv->urb_pool);
}

-static int p54u_init_urbs(struct ieee80211_hw *dev)
+static int p54u_rx_refill(struct ieee80211_hw *dev)
{
struct p54u_priv *priv = dev->priv;
- struct urb *entry = NULL;
- struct sk_buff *skb;
- struct p54u_rx_info *info;
- int ret = 0;
+ struct urb *entry;
+ unsigned int refilled_urbs = 0, cnt = 0;
+ int err = -EINVAL;
+
+ while (cnt++ != 32 && (entry = usb_get_from_anchor(&priv->urb_pool))) {
+ struct p54u_rx_info *info;
+ struct sk_buff *skb;

- while (skb_queue_len(&priv->rx_queue) < 32) {
skb = __dev_alloc_skb(priv->common.rx_mtu + 32, GFP_KERNEL);
- if (!skb) {
- ret = -ENOMEM;
- goto err;
- }
- entry = usb_alloc_urb(0, GFP_KERNEL);
- if (!entry) {
- ret = -ENOMEM;
- goto err;
+
+ if (unlikely(!skb)) {
+ /*
+ * In order to prevent a loop, we put the URB
+ * back at the _front_ of the list, so we can
+ * march on, in out-of-memory situations.
+ */
+
+ usb_anchor_urb(entry, &priv->urb_pool);
+ usb_free_urb(entry);
+ err = -ENOMEM;
+ continue;
}

usb_fill_bulk_urb(entry, priv->udev,
usb_rcvbulkpipe(priv->udev, P54U_PIPE_DATA),
skb_tail_pointer(skb),
priv->common.rx_mtu + 32, p54u_rx_cb, skb);
+
info = (struct p54u_rx_info *) skb->cb;
info->urb = entry;
info->dev = dev;
- skb_queue_tail(&priv->rx_queue, skb);

usb_anchor_urb(entry, &priv->submitted);
- ret = usb_submit_urb(entry, GFP_KERNEL);
- if (ret) {
- skb_unlink(skb, &priv->rx_queue);
+ err = usb_submit_urb(entry, GFP_KERNEL);
+ if (err) {
+ /*
+ * URB submission failed.
+ * Free the associated skb and put the URB back into
+ * the front of the refill list, so we can try our luck
+ * next time.
+ */
+
+ dev_err(&priv->udev->dev, "failed to resubmit %p\n",
+ entry);
+
usb_unanchor_urb(entry);
- goto err;
+ kfree_skb(skb);
+ usb_anchor_urb(entry, &priv->urb_pool);
+ } else {
+ skb_queue_tail(&priv->rx_queue, skb);
+ refilled_urbs++;
}
usb_free_urb(entry);
- entry = NULL;
+ }
+
+ return refilled_urbs ? 0 : err;
+}
+
+static int p54u_open(struct ieee80211_hw *dev)
+{
+ struct p54u_priv *priv = dev->priv;
+ int err;
+
+ err = p54u_rx_refill(dev);
+ if (err)
+ return err;
+
+ if (skb_queue_len(&priv->rx_queue) != 32) {
+ dev_err(&priv->udev->dev, "Not enough useable transfer buffers "
+ "available to initialize the device.");
+ p54u_cancel_urbs(dev);
+ return -ENOMEM;
}

return 0;
+}

- err:
- usb_free_urb(entry);
- kfree_skb(skb);
- p54u_free_urbs(dev);
- return ret;
+static void p54u_drain_urb_pool(struct ieee80211_hw *dev)
+{
+ struct p54u_priv *priv = dev->priv;
+ struct urb *entry;
+
+ while ((entry = usb_get_from_anchor(&priv->urb_pool))) {
+ usb_free_urb(entry);
+ usb_free_urb(entry);
+ }
+}
+
+
+static int p54u_init_urbs(struct ieee80211_hw *dev)
+{
+ struct p54u_priv *priv = dev->priv;
+ struct urb *entry;
+ int err = 0;
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ entry = usb_alloc_urb(0, GFP_KERNEL);
+ if (!entry) {
+ err = -ENOMEM;
+ break;
+ }
+
+ usb_anchor_urb(entry, &priv->urb_pool);
+ }
+
+ if (err)
+ p54u_drain_urb_pool(dev);
+
+ return err;
}

static void p54u_tx_3887(struct ieee80211_hw *dev, struct sk_buff *skb)
@@ -880,28 +1015,12 @@ static int p54u_upload_firmware_net2280(struct ieee80211_hw *dev)
return err;
}

-static int p54u_open(struct ieee80211_hw *dev)
+static void p54u_rx_refill_work(struct work_struct *work)
{
- struct p54u_priv *priv = dev->priv;
- int err;
-
- err = p54u_init_urbs(dev);
- if (err) {
- return err;
- }
-
- priv->common.open = p54u_init_urbs;
-
- return 0;
-}
+ struct p54u_priv *priv = container_of(work, struct p54u_priv,
+ rx_refill_work);

-static void p54u_stop(struct ieee80211_hw *dev)
-{
- /* TODO: figure out how to reliably stop the 3887 and net2280 so
- the hardware is still usable next time we want to start it.
- until then, we just stop listening to the hardware.. */
- p54u_free_urbs(dev);
- return;
+ p54u_rx_refill(priv->common.hw);
}

static int __devinit p54u_probe(struct usb_interface *intf,
@@ -928,6 +1047,8 @@ static int __devinit p54u_probe(struct usb_interface *intf,
priv->intf = intf;
skb_queue_head_init(&priv->rx_queue);
init_usb_anchor(&priv->submitted);
+ init_usb_anchor(&priv->urb_pool);
+ INIT_WORK(&priv->rx_refill_work, p54u_rx_refill_work);

usb_get_dev(udev);

@@ -949,8 +1070,7 @@ static int __devinit p54u_probe(struct usb_interface *intf,
recognized_pipes++;
}
}
- priv->common.open = p54u_open;
- priv->common.stop = p54u_stop;
+
if (recognized_pipes < P54U_PIPE_NUMBER) {
priv->hw_type = P54U_3887;
err = p54u_upload_firmware_3887(dev);
@@ -970,12 +1090,19 @@ static int __devinit p54u_probe(struct usb_interface *intf,
if (err)
goto err_free_dev;

- p54u_open(dev);
+ err = p54u_init_urbs(dev);
+ if (err)
+ goto err_free_dev;
+ err = p54u_open(dev);
+ if (err)
+ goto err_free_dev;
err = p54_read_eeprom(dev);
- p54u_stop(dev);
+ p54u_cancel_urbs(dev);
if (err)
goto err_free_dev;

+ priv->common.open = p54u_open;
+ priv->common.stop = p54u_cancel_urbs;
err = ieee80211_register_hw(dev);
if (err) {
dev_err(&udev->dev, "(p54usb) Cannot register netdevice\n");
@@ -999,9 +1126,12 @@ static void __devexit p54u_disconnect(struct usb_interface *intf)
if (!dev)
return;

+ priv = dev->priv;
+
+ p54u_drain_urb_pool(dev);
+
ieee80211_unregister_hw(dev);

- priv = dev->priv;
usb_put_dev(interface_to_usbdev(intf));
p54_free_common(dev);
ieee80211_free_hw(dev);
diff --git a/drivers/net/wireless/p54/p54usb.h b/drivers/net/wireless/p54/p54usb.h
index 8bc5898..4a5d8b2 100644
--- a/drivers/net/wireless/p54/p54usb.h
+++ b/drivers/net/wireless/p54/p54usb.h
@@ -132,9 +132,10 @@ struct p54u_priv {
P54U_3887
} hw_type;

- spinlock_t lock;
struct sk_buff_head rx_queue;
+ struct work_struct rx_refill_work;
struct usb_anchor submitted;
+ struct usb_anchor urb_pool;
};

#endif /* P54USB_H */

2009-01-24 21:41:49

by Artur Skawina

[permalink] [raw]

Subject: Re: [RFC][PATCH v2] p54usb: rx refill revamp

Artur Skawina wrote:
> Artur Skawina wrote:
>> Christian Lamparter wrote:
>>> On Saturday 24 January 2009 05:18:07 Artur Skawina wrote:
>>>> [My version schedules the work for every urb, even the poisoned ones]
>>> well, there's now a hard limit... no change of a endless loop now.
>> The whole point of the poisoning was to prevent resubmission when
>> canceling the urbs -- if you work around that manually, you could just
>> as well kill them, instead of poisoning.
>> I don't understand why want to add extra code to the rx irq just to
>> avoid scheduling a work when downing the i/f, and keep a nasty failure
>> case. The difference in down() performance is not going to be measurable,
>> and even if it was, it wouldn't matter.
>
> Oh, and we could always do something like
>
> if (likely(atomic_read(&urb->reject)==0))
> queue_work(priv->common.hw->workqueue, &priv->rx_refill_work);
>
> which should catch most cases when urbs are either killed or poisoned.

So the full patch vs wireless-testing would look like this:

[haven't merged the urb-cache and list->anchor-conversion changes yet]

diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c
index 9539ddc..c24fc53 100644
--- a/drivers/net/wireless/p54/p54usb.c
+++ b/drivers/net/wireless/p54/p54usb.c
@@ -80,22 +80,33 @@ static struct usb_device_id p54u_table[] __devinitdata = {

MODULE_DEVICE_TABLE(usb, p54u_table);

+/*
+ * Every successfully submitted RX urb goes through this completion
+ * function. Each one must be added to the refill list after being
+ * processed, this includes the failed/cancelled ones (status!=0).
+ * p54u_rx_refill() will take care of resubmitting them later.
+ * Alternatively, an urb can be reanchored and resubmitted, it will
+ * then come back here again, after the I/O is done.
+ */
+
static void p54u_rx_cb(struct urb *urb)
{
struct sk_buff *skb = (struct sk_buff *) urb->context;
struct p54u_rx_info *info = (struct p54u_rx_info *)skb->cb;
struct ieee80211_hw *dev = info->dev;
struct p54u_priv *priv = dev->priv;
+ unsigned long flags;

skb_unlink(skb, &priv->rx_queue);

if (unlikely(urb->status)) {
dev_kfree_skb_irq(skb);
- return;
+ goto stash_urb;
}

skb_put(skb, urb->actual_length);

+ /* remove firmware/device specific headers in front of the frame */
if (priv->hw_type == P54U_NET2280)
skb_pull(skb, priv->common.tx_hdr_len);
if (priv->common.fw_interface == FW_LM87) {
@@ -103,19 +114,12 @@ static void p54u_rx_cb(struct urb *urb)
skb_put(skb, 4);
}

- if (p54_rx(dev, skb)) {
- skb = dev_alloc_skb(priv->common.rx_mtu + 32);
- if (unlikely(!skb)) {
- /* TODO check rx queue length and refill *somewhere* */
- return;
- }
+ if (p54_rx(dev, skb) == 0) {
+ /*
+ * This skb can be reused.
+ * Undo all modifications and resubmit it.
+ */

- info = (struct p54u_rx_info *) skb->cb;
- info->urb = urb;
- info->dev = dev;
- urb->transfer_buffer = skb_tail_pointer(skb);
- urb->context = skb;
- } else {
if (priv->hw_type == P54U_NET2280)
skb_push(skb, priv->common.tx_hdr_len);
if (priv->common.fw_interface == FW_LM87) {
@@ -129,14 +133,60 @@ static void p54u_rx_cb(struct urb *urb)
WARN_ON(1);
urb->transfer_buffer = skb_tail_pointer(skb);
}
+resubmit_urb: /* Now both the urb and the skb are as good as new */
+ usb_anchor_urb(urb, &priv->submitted);
+ if (usb_submit_urb(urb, GFP_ATOMIC) == 0) {
+ skb_queue_tail(&priv->rx_queue, skb);
+ return ;
+ } else {
+ usb_unanchor_urb(urb);
+ dev_kfree_skb_irq(skb);
+ }
+ } else {
+ /*
+ * This skb has been given away to the mac80211 layer.
+ * We should put the urb on the refill list, where it
+ * can be linked to a newly allocated skb later.
+ * Except, if the workqueue that does the refilling can't
+ * keep up, we'll try a bit harder and attempt to obtain
+ * a new skb right here.
+ */
+ if (skb_queue_len(&priv->rx_queue)<29) {
+ skb = dev_alloc_skb(priv->common.rx_mtu + 32);
+ if (skb) {
+ info = (struct p54u_rx_info *)skb->cb;
+ info->urb = urb;
+ info->dev = dev;
+ /* Update the urb to use the new skb */
+ urb->transfer_buffer = skb_tail_pointer(skb);
+ urb->context = skb;
+ goto resubmit_urb;
+ }
+ }
}
- skb_queue_tail(&priv->rx_queue, skb);
- usb_anchor_urb(urb, &priv->submitted);
- if (usb_submit_urb(urb, GFP_ATOMIC)) {
- skb_unlink(skb, &priv->rx_queue);
- usb_unanchor_urb(urb);
- dev_kfree_skb_irq(skb);
+
+ /*
+ * This skb CANNOT be reused.
+ * Put the now unused urb into a list and do the refill later on in
+ * the less critical workqueue thread.
+ * This eases the memory pressure and prevents latency spikes.
+ */
+
+stash_urb:
+ urb->transfer_buffer = NULL;
+ urb->context = NULL;
+
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ list_add_tail(&urb->urb_list, &priv->rx_refill_list);
+ spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
+
+ if (atomic_read(&urb->reject)==0 && !priv->common.hw->workqueue) {
+ printk(KERN_WARNING "p54u_rx_cb(): workqueue missing\n");
+ return;
}
+
+ if (likely(atomic_read(&urb->reject)==0))
+ queue_work(priv->common.hw->workqueue, &priv->rx_refill_work);
}

static void p54u_tx_cb(struct urb *urb)
@@ -150,58 +200,129 @@ static void p54u_tx_cb(struct urb *urb)

static void p54u_tx_dummy_cb(struct urb *urb) { }

+static void p54u_free_rx_refill_list(struct ieee80211_hw *dev)
+{
+ struct p54u_priv *priv = dev->priv;
+ struct urb *entry, *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ list_for_each_entry_safe(entry, tmp, &priv->rx_refill_list, urb_list) {
+ list_del(&entry->urb_list);
+ usb_free_urb(entry);
+ }
+ spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
+}
+
static void p54u_free_urbs(struct ieee80211_hw *dev)
{
struct p54u_priv *priv = dev->priv;
- usb_kill_anchored_urbs(&priv->submitted);
+
+ usb_poison_anchored_urbs(&priv->submitted);
+ /*
+ * By now every RX urb has either finished or been cancelled;
+ * the p54u_rx_cb() completion has placed it on the refill
+ * list; any attempts to resubmit from p54u_rx_refill(),
+ * which could still be scheduled to run, will fail.
+ */
+ cancel_work_sync(&priv->rx_refill_work);
+ p54u_free_rx_refill_list(dev);
+ /*
+ * Unpoison the anchor itself; the old urbs are already gone,
+ * p54u_rx_cb() has moved them all to the refill list.
+ * Prevents new urbs from being poisoned when anchored.
+ */
+ usb_unpoison_anchored_urbs(&priv->submitted);
}

-static int p54u_init_urbs(struct ieee80211_hw *dev)
+static int p54u_rx_refill(struct ieee80211_hw *dev)
{
struct p54u_priv *priv = dev->priv;
- struct urb *entry = NULL;
- struct sk_buff *skb;
- struct p54u_rx_info *info;
+ struct urb *entry, *tmp;
+ unsigned long flags;
+ unsigned int filled = 0;
int ret = 0;

- while (skb_queue_len(&priv->rx_queue) < 32) {
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ list_for_each_entry_safe(entry, tmp, &priv->rx_refill_list, urb_list) {
+ struct p54u_rx_info *info;
+ struct sk_buff *skb;
+
+ list_del(&entry->urb_list);
+ spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
skb = __dev_alloc_skb(priv->common.rx_mtu + 32, GFP_KERNEL);
- if (!skb) {
- ret = -ENOMEM;
- goto err;
- }
- entry = usb_alloc_urb(0, GFP_KERNEL);
- if (!entry) {
- ret = -ENOMEM;
- goto err;
+
+ if (unlikely(!skb)) {
+ /*
+ * In order to prevent a loop, we put the urb
+ * back at the _front_ of the list, so we can
+ * march on, in out-of-memory situations.
+ */
+
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ list_add(&entry->urb_list, &priv->rx_refill_list);
+ continue;
}

usb_fill_bulk_urb(entry, priv->udev,
usb_rcvbulkpipe(priv->udev, P54U_PIPE_DATA),
skb_tail_pointer(skb),
priv->common.rx_mtu + 32, p54u_rx_cb, skb);
+
info = (struct p54u_rx_info *) skb->cb;
info->urb = entry;
info->dev = dev;
- skb_queue_tail(&priv->rx_queue, skb);

usb_anchor_urb(entry, &priv->submitted);
- ret = usb_submit_urb(entry, GFP_KERNEL);
+ ret=usb_submit_urb(entry, GFP_KERNEL);
if (ret) {
- skb_unlink(skb, &priv->rx_queue);
+ /*
+ * urb submittion failed.
+ * free the associated skb and put the urb back into
+ * the front of the refill list, so we can try our luck
+ * next time.
+ */
+
usb_unanchor_urb(entry);
- goto err;
+ kfree_skb(skb);
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ list_add(&entry->urb_list, &priv->rx_refill_list);
+ if (ret==-EPERM) /* urb has been poisoned */
+ break; /* no point in trying to submit the rest */
+ } else {
+ skb_queue_tail(&priv->rx_queue, skb);
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ filled++;
}
- usb_free_urb(entry);
- entry = NULL;
}
+ spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
+ return filled ? 0 : -ENOMEM;
+}

- return 0;
+static int p54u_init_urbs(struct ieee80211_hw *dev)
+{
+ struct p54u_priv *priv = dev->priv;
+ struct urb *entry;
+ unsigned long flags;
+ int ret = 0;
+ int i;

- err:
- usb_free_urb(entry);
- kfree_skb(skb);
- p54u_free_urbs(dev);
+ for (i = 0; i < 32; i++) {
+ entry = usb_alloc_urb(0, GFP_KERNEL);
+ if (!entry) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ spin_lock_irqsave(&priv->rx_refill_lock, flags);
+ list_add_tail(&entry->urb_list, &priv->rx_refill_list);
+ spin_unlock_irqrestore(&priv->rx_refill_lock, flags);
+ }
+
+ p54u_rx_refill(dev);
+err:
+ if (ret)
+ p54u_free_rx_refill_list(dev);
return ret;
}

@@ -880,6 +1001,14 @@ static int p54u_upload_firmware_net2280(struct ieee80211_hw *dev)
return err;
}

+static void p54u_rx_refill_work(struct work_struct *work)
+{
+ struct p54u_priv *priv = container_of(work, struct p54u_priv,
+ rx_refill_work);
+
+ p54u_rx_refill(priv->common.hw);
+}
+
static int p54u_open(struct ieee80211_hw *dev)
{
struct p54u_priv *priv = dev->priv;
@@ -928,6 +1057,9 @@ static int __devinit p54u_probe(struct usb_interface *intf,
priv->intf = intf;
skb_queue_head_init(&priv->rx_queue);
init_usb_anchor(&priv->submitted);
+ INIT_WORK(&priv->rx_refill_work, p54u_rx_refill_work);
+ spin_lock_init(&priv->rx_refill_lock);
+ INIT_LIST_HEAD(&priv->rx_refill_list);

usb_get_dev(udev);

@@ -999,6 +1131,8 @@ static void __devexit p54u_disconnect(struct usb_interface *intf)
if (!dev)
return;

+ p54u_free_rx_refill_list(dev);
+
ieee80211_unregister_hw(dev);

priv = dev->priv;
diff --git a/drivers/net/wireless/p54/p54usb.h b/drivers/net/wireless/p54/p54usb.h
index 8bc5898..04c7258 100644
--- a/drivers/net/wireless/p54/p54usb.h
+++ b/drivers/net/wireless/p54/p54usb.h
@@ -134,6 +134,10 @@ struct p54u_priv {

spinlock_t lock;
struct sk_buff_head rx_queue;
+ spinlock_t rx_refill_lock;
+ struct list_head rx_refill_list;
+ struct work_struct rx_refill_work;
+
struct usb_anchor submitted;
};