This patch tries to address a long standing issue:
how to survive serve memory starvation situations,
without losing the device due to missing transfer-buffers.
And with a flick of __GFP_NOWARN, we're able to handle ?all? memory
allocation failures on the rx-side during operation without much fuss.
However, there is still an issue within the xmit-part.
This is likely due to p54's demand for a large free headroom for
every outgoing frame:
+ transport header (differs from device to device)
-> 16 bytes transport header (USB 1st gen)
-> 8 bytes for (USB 2nd gen)
-> 0 bytes for spi & pci
+ 12 bytes for p54_hdr
+ 44 bytes for p54_tx_data
+ up to 3 bytes for alignment
(+ 802.11 header as well? )
and this is where ieee80211_skb_resize comes into the play...
which will try to _relocate_ (alloc new, copy, free old) frame data,
as the headroom is most of the time simply not enough.
=>
Call Trace: (from Larry - Bug #13319 )
[<ffffffff80292a7b>] __alloc_pages_internal+0x43d/0x45e
[<ffffffff802b1f1f>] alloc_pages_current+0xbe/0xc6
[<ffffffff802b6362>] new_slab+0xcf/0x28b
[<ffffffff802b4d1f>] ? unfreeze_slab+0x4c/0xbd
[<ffffffff802b672e>] __slab_alloc+0x210/0x44c
[<ffffffff803e7bee>] ? pskb_expand_head+0x52/0x166
[<ffffffff803e7bee>] ? pskb_expand_head+0x52/0x166
[<ffffffff802b7e60>] __kmalloc+0x119/0x194
[<ffffffff803e7bee>] pskb_expand_head+0x52/0x166
[<ffffffffa02913d6>] ieee80211_skb_resize+0x91/0xc7 [mac80211]
[<ffffffffa0291c0f>] ieee80211_master_start_xmit+0x298/0x319 [mac80211]
[<ffffffff803ef72a>] dev_hard_start_xmit+0x229/0x2a8
(sl*b debug option will help to bloat even more.)
So?! how to prevent ieee80211_skb_resize from raping
the bits of memory left?
the simplest answer is probably this one:
https://dev.openwrt.org/changeset/15761
--
back to rx failures.
the attached code below was only usb was tested so far!
you have been warned!
regards,
chr
btw: max what do you think about the p54spi changes, are they total ****?
---
diff --git a/drivers/net/wireless/p54/fwio.c b/drivers/net/wireless/p54/fwio.c
index 349375f..c9bc1da 100644
--- a/drivers/net/wireless/p54/fwio.c
+++ b/drivers/net/wireless/p54/fwio.c
@@ -94,7 +94,7 @@ int p54_parse_firmware(struct ieee80211_hw *dev, const struct firmware *fw)
else
priv->rx_mtu = (size_t)
0x620 - priv->tx_hdr_len;
- maxlen = priv->tx_hdr_len + /* USB devices */
+ maxlen = priv->rx_hdr_len + /* USB devices */
sizeof(struct p54_rx_data) +
4 + /* rx alignment */
IEEE80211_MAX_FRAG_THRESHOLD;
@@ -103,6 +103,18 @@ int p54_parse_firmware(struct ieee80211_hw *dev, const struct firmware *fw)
"to %d\n", priv->rx_mtu, maxlen);
priv->rx_mtu = maxlen;
}
+
+ /*
+ * Firmware may insert up to 4 padding bytes after
+ * the lmac header, but it doesn't amend the size
+ * of data transfer.
+ * Such packets has correct data size in header, thus
+ * referencing past the end of allocated skb.
+ * Therefore reserve 4 extra bytes for this case.
+ */
+ if (priv->rx_wa_extra_tail_space)
+ priv->rx_mtu += 4;
+
break;
}
case BR_CODE_EXPOSED_IF:
@@ -312,7 +324,7 @@ int p54_setup_mac(struct p54_common *priv)
{
struct sk_buff *skb;
struct p54_setup_mac *setup;
- u16 mode;
+ u16 mode, rx_mtu = priv->rx_mtu;
skb = p54_alloc_skb(priv, P54_HDR_FLAG_CONTROL_OPSET, sizeof(*setup),
P54_CONTROL_TYPE_SETUP, GFP_ATOMIC);
@@ -356,17 +368,25 @@ int p54_setup_mac(struct p54_common *priv)
memcpy(setup->bssid, priv->bssid, ETH_ALEN);
setup->rx_antenna = 2 & priv->rx_diversity_mask; /* automatic */
setup->rx_align = 0;
+
+ /*
+ * reserve additional bytes to compensate for the possible
+ * alignment-caused truncation.
+ */
+ if (priv->rx_wa_extra_tail_space)
+ rx_mtu -= 4;
+
if (priv->fw_var < 0x500) {
setup->v1.basic_rate_mask = cpu_to_le32(priv->basic_rate_mask);
memset(setup->v1.rts_rates, 0, 8);
setup->v1.rx_addr = cpu_to_le32(priv->rx_end);
- setup->v1.max_rx = cpu_to_le16(priv->rx_mtu);
+ setup->v1.max_rx = cpu_to_le16(rx_mtu);
setup->v1.rxhw = cpu_to_le16(priv->rxhw);
setup->v1.wakeup_timer = cpu_to_le16(priv->wakeup_timer);
setup->v1.unalloc0 = cpu_to_le16(0);
} else {
setup->v2.rx_addr = cpu_to_le32(priv->rx_end);
- setup->v2.max_rx = cpu_to_le16(priv->rx_mtu);
+ setup->v2.max_rx = cpu_to_le16(rx_mtu);
setup->v2.rxhw = cpu_to_le16(priv->rxhw);
setup->v2.timer = cpu_to_le16(priv->wakeup_timer);
setup->v2.truncate = cpu_to_le16(48896);
diff --git a/drivers/net/wireless/p54/p54.h b/drivers/net/wireless/p54/p54.h
index 6772ed5..30ebde1 100644
--- a/drivers/net/wireless/p54/p54.h
+++ b/drivers/net/wireless/p54/p54.h
@@ -176,6 +176,8 @@ struct p54_common {
/* firmware/hardware info */
unsigned int tx_hdr_len;
+ unsigned int rx_hdr_len;
+ bool rx_wa_extra_tail_space;
unsigned int fw_var;
unsigned int fw_interface;
u8 version;
@@ -234,7 +236,7 @@ struct p54_common {
};
/* interfaces for the drivers */
-int p54_rx(struct ieee80211_hw *dev, struct sk_buff *skb);
+struct sk_buff *p54_rx(struct ieee80211_hw *dev, struct sk_buff *skb);
void p54_free_skb(struct ieee80211_hw *dev, struct sk_buff *skb);
int p54_parse_firmware(struct ieee80211_hw *dev, const struct firmware *fw);
int p54_parse_eeprom(struct ieee80211_hw *dev, void *eeprom, int len);
@@ -245,5 +247,6 @@ int p54_register_common(struct ieee80211_hw *dev, struct device *pdev);
void p54_free_common(struct ieee80211_hw *dev);
void p54_unregister_common(struct ieee80211_hw *dev);
+struct sk_buff *p54_alloc_rxskb(struct p54_common *priv, gfp_t gfp_mask);
#endif /* P54_H */
diff --git a/drivers/net/wireless/p54/p54pci.c b/drivers/net/wireless/p54/p54pci.c
index d348c26..7d055da 100644
--- a/drivers/net/wireless/p54/p54pci.c
+++ b/drivers/net/wireless/p54/p54pci.c
@@ -149,7 +149,7 @@ static void p54p_refill_rx_ring(struct ieee80211_hw *dev,
if (!desc->host_addr) {
struct sk_buff *skb;
dma_addr_t mapping;
- skb = dev_alloc_skb(priv->common.rx_mtu + 32);
+ skb = dev_alloc_skb(priv->common.rx_mtu);
if (!skb)
break;
@@ -186,6 +186,7 @@ static void p54p_check_rx_ring(struct ieee80211_hw *dev, u32 *index,
(*index) = idx = le32_to_cpu(ring_control->device_idx[ring_index]);
idx %= ring_limit;
while (i != idx) {
+ dma_addr_t mapping;
u16 len;
struct sk_buff *skb;
desc = &ring[i];
@@ -197,25 +198,32 @@ static void p54p_check_rx_ring(struct ieee80211_hw *dev, u32 *index,
i %= ring_limit;
continue;
}
+
+ pci_unmap_single(priv->pdev,
+ le32_to_cpu(desc->host_addr),
+ priv->common.rx_mtu + 32,
+ PCI_DMA_FROMDEVICE);
+ rx_buf[i] = NULL;
+ desc->host_addr = 0;
skb_put(skb, len);
- if (p54_rx(dev, skb)) {
- pci_unmap_single(priv->pdev,
- le32_to_cpu(desc->host_addr),
- priv->common.rx_mtu + 32,
+ skb = p54_rx(dev, skb);
+ mapping = pci_map_single(priv->pdev,
+ skb_tail_pointer(skb),
+ skb->len,
PCI_DMA_FROMDEVICE);
- rx_buf[i] = NULL;
- desc->host_addr = 0;
- } else {
- skb_trim(skb, 0);
- desc->len = cpu_to_le16(priv->common.rx_mtu + 32);
- }
+
+ desc->host_addr = cpu_to_le32(mapping);
+ desc->device_addr = 0;
+ desc->len = cpu_to_le16(priv->common.rx_mtu + 32);
+ desc->flags = 0;
+ rx_buf[i] = skb;
i++;
i %= ring_limit;
}
-
- p54p_refill_rx_ring(dev, ring_index, ring, ring_limit, rx_buf);
+ wmb();
+ ring_control->host_idx[ring_index] = cpu_to_le32(idx);
}
/* caller must hold priv->lock */
diff --git a/drivers/net/wireless/p54/p54spi.c b/drivers/net/wireless/p54/p54spi.c
index 9b347ce..4c6ac3f 100644
--- a/drivers/net/wireless/p54/p54spi.c
+++ b/drivers/net/wireless/p54/p54spi.c
@@ -389,14 +389,10 @@ static int p54spi_rx(struct p54s_priv *priv)
return 0;
}
- /* Firmware may insert up to 4 padding bytes after the lmac header,
- * but it does not amend the size of SPI data transfer.
- * Such packets has correct data size in header, thus referencing
- * past the end of allocated skb. Reserve extra 4 bytes for this case */
- skb = dev_alloc_skb(len + 4);
+ skb = skb_dequeue(&priv->rx_pool);
if (!skb) {
p54spi_sleep(priv);
- dev_err(&priv->spi->dev, "could not alloc skb");
+ dev_err(&priv->spi->dev, "could not get skb");
return -ENOMEM;
}
@@ -409,12 +405,9 @@ static int p54spi_rx(struct p54s_priv *priv)
len - READAHEAD_SZ);
}
p54spi_sleep(priv);
- /* Put additional bytes to compensate for the possible
- * alignment-caused truncation */
- skb_put(skb, 4);
- if (p54_rx(priv->hw, skb) == 0)
- dev_kfree_skb(skb);
+ skb = p54_rx(priv->hw, skb);
+ skb_queue_tail(&priv->rx_pool, skb);
return 0;
}
@@ -629,6 +622,7 @@ static void p54spi_op_stop(struct ieee80211_hw *dev)
static int __devinit p54spi_probe(struct spi_device *spi)
{
struct p54s_priv *priv = NULL;
+ struct sk_buff *skb;
struct ieee80211_hw *hw;
int ret = -EINVAL;
@@ -645,6 +639,8 @@ static int __devinit p54spi_probe(struct spi_device *spi)
spi->bits_per_word = 16;
spi->max_speed_hz = 24000000;
+ skb_queue_head_init(&priv->rx_pool);
+ priv->common.rx_wa_extra_tail_space = true;
ret = spi_setup(spi);
if (ret < 0) {
@@ -689,6 +685,11 @@ static int __devinit p54spi_probe(struct spi_device *spi)
priv->common.stop = p54spi_op_stop;
priv->common.tx = p54spi_op_tx;
+ skb = __dev_alloc_skb(priv->common.rx_mtu, GFP_KERNEL);
+ if (!skb)
+ goto err_free_common;
+ skb_queue_tail(&priv->rx_pool, skb);
+
ret = p54spi_request_firmware(hw);
if (ret < 0)
goto err_free_common;
@@ -705,6 +706,7 @@ static int __devinit p54spi_probe(struct spi_device *spi)
err_free_common:
p54_free_common(priv->hw);
+ skb_queue_purge(&priv->rx_pool);
return ret;
}
@@ -715,6 +717,7 @@ static int __devexit p54spi_remove(struct spi_device *spi)
p54_unregister_common(priv->hw);
free_irq(gpio_to_irq(p54spi_gpio_irq), spi);
+ skb_queue_purge(&priv->rx_pool);
gpio_free(p54spi_gpio_power);
gpio_free(p54spi_gpio_irq);
diff --git a/drivers/net/wireless/p54/p54spi.h b/drivers/net/wireless/p54/p54spi.h
index 7fbe8d8..11ef2d5 100644
--- a/drivers/net/wireless/p54/p54spi.h
+++ b/drivers/net/wireless/p54/p54spi.h
@@ -120,6 +120,8 @@ struct p54s_priv {
enum fw_state fw_state;
const struct firmware *firmware;
+
+ struct sk_buff_head rx_pool;
};
#endif /* P54SPI_H */
diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c
index 461d88f..c521bbc 100644
--- a/drivers/net/wireless/p54/p54usb.c
+++ b/drivers/net/wireless/p54/p54usb.c
@@ -120,37 +120,14 @@ static void p54u_rx_cb(struct urb *urb)
}
skb_put(skb, urb->actual_length);
+ skb = p54_rx(dev, skb);
- if (priv->hw_type == P54U_NET2280)
- skb_pull(skb, priv->common.tx_hdr_len);
- if (priv->common.fw_interface == FW_LM87) {
- skb_pull(skb, 4);
- skb_put(skb, 4);
- }
-
- if (p54_rx(dev, skb)) {
- skb = dev_alloc_skb(priv->common.rx_mtu + 32);
- if (unlikely(!skb)) {
- /* TODO check rx queue length and refill *somewhere* */
- return;
- }
+ info = (struct p54u_rx_info *) skb->cb;
+ info->urb = urb;
+ info->dev = dev;
+ urb->transfer_buffer = skb_tail_pointer(skb);
+ urb->context = skb;
- info = (struct p54u_rx_info *) skb->cb;
- info->urb = urb;
- info->dev = dev;
- urb->transfer_buffer = skb_tail_pointer(skb);
- urb->context = skb;
- } else {
- if (priv->hw_type == P54U_NET2280)
- skb_push(skb, priv->common.tx_hdr_len);
- if (priv->common.fw_interface == FW_LM87) {
- skb_push(skb, 4);
- skb_put(skb, 4);
- }
- skb_reset_tail_pointer(skb);
- skb_trim(skb, 0);
- urb->transfer_buffer = skb_tail_pointer(skb);
- }
skb_queue_tail(&priv->rx_queue, skb);
usb_anchor_urb(urb, &priv->submitted);
if (usb_submit_urb(urb, GFP_ATOMIC)) {
@@ -186,7 +163,7 @@ static int p54u_init_urbs(struct ieee80211_hw *dev)
int ret = 0;
while (skb_queue_len(&priv->rx_queue) < 32) {
- skb = __dev_alloc_skb(priv->common.rx_mtu + 32, GFP_KERNEL);
+ skb = __dev_alloc_skb(priv->common.rx_mtu, GFP_KERNEL);
if (!skb) {
ret = -ENOMEM;
goto err;
@@ -927,14 +904,17 @@ static int __devinit p54u_probe(struct usb_interface *intf,
#endif
priv->hw_type = P54U_3887;
+ priv->common.rx_wa_extra_tail_space = true;
dev->extra_tx_headroom += sizeof(struct lm87_tx_hdr);
priv->common.tx_hdr_len = sizeof(struct lm87_tx_hdr);
+ priv->common.rx_hdr_len = sizeof(struct lm87_rx_hdr);
priv->common.tx = p54u_tx_lm87;
priv->upload_fw = p54u_upload_firmware_3887;
} else {
priv->hw_type = P54U_NET2280;
dev->extra_tx_headroom += sizeof(struct net2280_tx_hdr);
priv->common.tx_hdr_len = sizeof(struct net2280_tx_hdr);
+ priv->common.rx_hdr_len = sizeof(struct net2280_tx_hdr);
priv->common.tx = p54u_tx_net2280;
priv->upload_fw = p54u_upload_firmware_net2280;
}
diff --git a/drivers/net/wireless/p54/p54usb.h b/drivers/net/wireless/p54/p54usb.h
index e935b79..685a2b4 100644
--- a/drivers/net/wireless/p54/p54usb.h
+++ b/drivers/net/wireless/p54/p54usb.h
@@ -77,6 +77,10 @@ struct lm87_tx_hdr {
__le32 chksum;
} __attribute__((packed));
+struct lm87_rx_hdr {
+ __le32 chksum;
+} __packed;
+
/* Some flags for the isl hardware registers controlling DMA inside the
* chip */
#define ISL38XX_DMA_STATUS_DONE 0x00000001
diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
index ea074a6..45e5e88 100644
--- a/drivers/net/wireless/p54/txrx.c
+++ b/drivers/net/wireless/p54/txrx.c
@@ -324,10 +324,11 @@ static void p54_pspoll_workaround(struct p54_common *priv, struct sk_buff *skb)
}
}
-static int p54_rx_data(struct p54_common *priv, struct sk_buff *skb)
+static struct sk_buff *p54_rx_data(struct p54_common *priv, struct sk_buff *skb)
{
struct p54_rx_data *hdr = (struct p54_rx_data *) skb->data;
struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
+ struct sk_buff *tmp;
u16 freq = le16_to_cpu(hdr->freq);
size_t header_len = sizeof(*hdr);
u32 tsf32;
@@ -339,10 +340,14 @@ static int p54_rx_data(struct p54_common *priv, struct sk_buff *skb)
* nasty crash.
*/
if (unlikely(priv->mode == NL80211_IFTYPE_UNSPECIFIED))
- return 0;
+ return skb;
if (!(hdr->flags & cpu_to_le16(P54_HDR_FLAG_DATA_IN_FCS_GOOD)))
- return 0;
+ return skb;
+
+ tmp = dev_alloc_skb(priv->rx_mtu);
+ if (!tmp)
+ return skb;
if (hdr->decrypt_status == P54_DECRYPT_OK)
rx_status->flag |= RX_FLAG_DECRYPTED;
@@ -384,7 +389,7 @@ static int p54_rx_data(struct p54_common *priv, struct sk_buff *skb)
queue_delayed_work(priv->hw->workqueue, &priv->work,
msecs_to_jiffies(P54_STATISTICS_UPDATE));
- return -1;
+ return tmp;
}
static void p54_rx_frame_sent(struct p54_common *priv, struct sk_buff *skb)
@@ -566,7 +571,7 @@ static void p54_rx_trap(struct p54_common *priv, struct sk_buff *skb)
}
}
-static int p54_rx_control(struct p54_common *priv, struct sk_buff *skb)
+static struct sk_buff *p54_rx_control(struct p54_common *priv, struct sk_buff *skb)
{
struct p54_hdr *hdr = (struct p54_hdr *) skb->data;
@@ -590,19 +595,52 @@ static int p54_rx_control(struct p54_common *priv, struct sk_buff *skb)
wiphy_name(priv->hw->wiphy), le16_to_cpu(hdr->type));
break;
}
- return 0;
+ return skb;
}
-/* returns zero if skb can be reused */
-int p54_rx(struct ieee80211_hw *dev, struct sk_buff *skb)
+/* returns a new skb in case the old one was send to mac80211. */
+struct sk_buff *p54_rx(struct ieee80211_hw *dev, struct sk_buff *skb)
{
struct p54_common *priv = dev->priv;
- u16 type = le16_to_cpu(*((__le16 *)skb->data));
+ struct sk_buff *replacement;
+ struct p54_hdr *hdr;
+
+ if (unlikely(skb->len < sizeof(*hdr) + priv->rx_hdr_len)) {
+ if (net_ratelimit())
+ printk(KERN_ERR "%s: short read! - if this message "
+ "persists => check the device or cable.\n",
+ wiphy_name(dev->wiphy));
+
+ return skb;
+ }
+
+ if (priv->rx_wa_extra_tail_space) {
+ /*
+ * Put additional bytes to compensate for the possible
+ * alignment-caused truncation.
+ */
+
+ skb_put(skb, 4);
+ }
+
+ skb_pull(skb, priv->rx_hdr_len);
+ hdr = (void *) skb->data;
- if (type & P54_HDR_FLAG_CONTROL)
- return p54_rx_control(priv, skb);
+ if (hdr->flags & cpu_to_le16(P54_HDR_FLAG_CONTROL))
+ replacement = p54_rx_control(priv, skb);
else
- return p54_rx_data(priv, skb);
+ replacement = p54_rx_data(priv, skb);
+
+ if (replacement == skb) {
+ /*
+ * This skb was not _consumed_ by mac80211.
+ * Reinitialize dirty data structure before returning it back.
+ */
+
+ skb_put(skb, priv->rx_hdr_len);
+ skb_trim(skb, 0);
+ }
+ return replacement;
}
EXPORT_SYMBOL_GPL(p54_rx);
Christian Lamparter wrote:
> This patch tries to address a long standing issue:
> how to survive serve memory starvation situations,
> without losing the device due to missing transfer-buffers.
>
> And with a flick of __GFP_NOWARN, we're able to handle ?all? memory
> allocation failures on the rx-side during operation without much fuss.
>
> However, there is still an issue within the xmit-part.
> This is likely due to p54's demand for a large free headroom for
> every outgoing frame:
>
-- snip --
> diff --git a/drivers/net/wireless/p54/fwio.c b/drivers/net/wireless/p54/fwio.c
> index 349375f..c9bc1da 100644
> --- a/drivers/net/wireless/p54/fwio.c
> +++ b/drivers/net/wireless/p54/fwio.c
> @@ -94,7 +94,7 @@ int p54_parse_firmware(struct ieee80211_hw *dev, const struct firmware *fw)
> else
> priv->rx_mtu = (size_t)
> 0x620 - priv->tx_hdr_len;
> - maxlen = priv->tx_hdr_len + /* USB devices */
> + maxlen = priv->rx_hdr_len + /* USB devices */
> sizeof(struct p54_rx_data) +
> 4 + /* rx alignment */
> IEEE80211_MAX_FRAG_THRESHOLD;
> @@ -103,6 +103,18 @@ int p54_parse_firmware(struct ieee80211_hw *dev, const struct firmware *fw)
> "to %d\n", priv->rx_mtu, maxlen);
> priv->rx_mtu = maxlen;
> }
> +
> + /*
> + * Firmware may insert up to 4 padding bytes after
> + * the lmac header, but it doesn't amend the size
> + * of data transfer.
> + * Such packets has correct data size in header, thus
===
s/has/have/
> + * referencing past the end of allocated skb.
> + * Therefore reserve 4 extra bytes for this case.
> + */
> + if (priv->rx_wa_extra_tail_space)
> + priv->rx_mtu += 4;
> +
> break;
> }
> case BR_CODE_EXPOSED_IF:
> @@ -312,7 +324,7 @@ int p54_setup_mac(struct p54_common *priv)
> {
> struct sk_buff *skb;
> struct p54_setup_mac *setup;
> - u16 mode;
> + u16 mode, rx_mtu = priv->rx_mtu;
>
> skb = p54_alloc_skb(priv, P54_HDR_FLAG_CONTROL_OPSET, sizeof(*setup),
> P54_CONTROL_TYPE_SETUP, GFP_ATOMIC);
> @@ -356,17 +368,25 @@ int p54_setup_mac(struct p54_common *priv)
> memcpy(setup->bssid, priv->bssid, ETH_ALEN);
> setup->rx_antenna = 2 & priv->rx_diversity_mask; /* automatic */
> setup->rx_align = 0;
> +
> + /*
> + * reserve additional bytes to compensate for the possible
> + * alignment-caused truncation.
> + */
> + if (priv->rx_wa_extra_tail_space)
> + rx_mtu -= 4;
> +
> if (priv->fw_var < 0x500) {
> setup->v1.basic_rate_mask = cpu_to_le32(priv->basic_rate_mask);
> memset(setup->v1.rts_rates, 0, 8);
> setup->v1.rx_addr = cpu_to_le32(priv->rx_end);
> - setup->v1.max_rx = cpu_to_le16(priv->rx_mtu);
> + setup->v1.max_rx = cpu_to_le16(rx_mtu);
> setup->v1.rxhw = cpu_to_le16(priv->rxhw);
> setup->v1.wakeup_timer = cpu_to_le16(priv->wakeup_timer);
> setup->v1.unalloc0 = cpu_to_le16(0);
> } else {
> setup->v2.rx_addr = cpu_to_le32(priv->rx_end);
> - setup->v2.max_rx = cpu_to_le16(priv->rx_mtu);
> + setup->v2.max_rx = cpu_to_le16(rx_mtu);
> setup->v2.rxhw = cpu_to_le16(priv->rxhw);
> setup->v2.timer = cpu_to_le16(priv->wakeup_timer);
> setup->v2.truncate = cpu_to_le16(48896);
> diff --git a/drivers/net/wireless/p54/p54.h b/drivers/net/wireless/p54/p54.h
> index 6772ed5..30ebde1 100644
> --- a/drivers/net/wireless/p54/p54.h
> +++ b/drivers/net/wireless/p54/p54.h
> @@ -176,6 +176,8 @@ struct p54_common {
>
> /* firmware/hardware info */
> unsigned int tx_hdr_len;
> + unsigned int rx_hdr_len;
> + bool rx_wa_extra_tail_space;
> unsigned int fw_var;
> unsigned int fw_interface;
> u8 version;
> @@ -234,7 +236,7 @@ struct p54_common {
> };
>
> /* interfaces for the drivers */
> -int p54_rx(struct ieee80211_hw *dev, struct sk_buff *skb);
> +struct sk_buff *p54_rx(struct ieee80211_hw *dev, struct sk_buff *skb);
> void p54_free_skb(struct ieee80211_hw *dev, struct sk_buff *skb);
> int p54_parse_firmware(struct ieee80211_hw *dev, const struct firmware *fw);
> int p54_parse_eeprom(struct ieee80211_hw *dev, void *eeprom, int len);
> @@ -245,5 +247,6 @@ int p54_register_common(struct ieee80211_hw *dev, struct device *pdev);
> void p54_free_common(struct ieee80211_hw *dev);
>
> void p54_unregister_common(struct ieee80211_hw *dev);
> +struct sk_buff *p54_alloc_rxskb(struct p54_common *priv, gfp_t gfp_mask);
>
> #endif /* P54_H */
> diff --git a/drivers/net/wireless/p54/p54pci.c b/drivers/net/wireless/p54/p54pci.c
> index d348c26..7d055da 100644
> --- a/drivers/net/wireless/p54/p54pci.c
> +++ b/drivers/net/wireless/p54/p54pci.c
> @@ -149,7 +149,7 @@ static void p54p_refill_rx_ring(struct ieee80211_hw *dev,
> if (!desc->host_addr) {
> struct sk_buff *skb;
> dma_addr_t mapping;
> - skb = dev_alloc_skb(priv->common.rx_mtu + 32);
> + skb = dev_alloc_skb(priv->common.rx_mtu);
> if (!skb)
> break;
>
> @@ -186,6 +186,7 @@ static void p54p_check_rx_ring(struct ieee80211_hw *dev, u32 *index,
> (*index) = idx = le32_to_cpu(ring_control->device_idx[ring_index]);
> idx %= ring_limit;
> while (i != idx) {
> + dma_addr_t mapping;
> u16 len;
> struct sk_buff *skb;
> desc = &ring[i];
> @@ -197,25 +198,32 @@ static void p54p_check_rx_ring(struct ieee80211_hw *dev, u32 *index,
> i %= ring_limit;
> continue;
> }
> +
> + pci_unmap_single(priv->pdev,
> + le32_to_cpu(desc->host_addr),
> + priv->common.rx_mtu + 32,
> + PCI_DMA_FROMDEVICE);
> + rx_buf[i] = NULL;
> + desc->host_addr = 0;
> skb_put(skb, len);
>
> - if (p54_rx(dev, skb)) {
> - pci_unmap_single(priv->pdev,
> - le32_to_cpu(desc->host_addr),
> - priv->common.rx_mtu + 32,
> + skb = p54_rx(dev, skb);
> + mapping = pci_map_single(priv->pdev,
> + skb_tail_pointer(skb),
> + skb->len,
> PCI_DMA_FROMDEVICE);
> - rx_buf[i] = NULL;
> - desc->host_addr = 0;
> - } else {
> - skb_trim(skb, 0);
> - desc->len = cpu_to_le16(priv->common.rx_mtu + 32);
> - }
> +
> + desc->host_addr = cpu_to_le32(mapping);
> + desc->device_addr = 0;
> + desc->len = cpu_to_le16(priv->common.rx_mtu + 32);
> + desc->flags = 0;
> + rx_buf[i] = skb;
>
> i++;
> i %= ring_limit;
> }
> -
> - p54p_refill_rx_ring(dev, ring_index, ring, ring_limit, rx_buf);
> + wmb();
> + ring_control->host_idx[ring_index] = cpu_to_le32(idx);
> }
>
> /* caller must hold priv->lock */
> diff --git a/drivers/net/wireless/p54/p54spi.c b/drivers/net/wireless/p54/p54spi.c
> index 9b347ce..4c6ac3f 100644
> --- a/drivers/net/wireless/p54/p54spi.c
> +++ b/drivers/net/wireless/p54/p54spi.c
> @@ -389,14 +389,10 @@ static int p54spi_rx(struct p54s_priv *priv)
> return 0;
> }
>
> - /* Firmware may insert up to 4 padding bytes after the lmac header,
> - * but it does not amend the size of SPI data transfer.
> - * Such packets has correct data size in header, thus referencing
===
s/has/have/
> - * past the end of allocated skb. Reserve extra 4 bytes for this case */
> - skb = dev_alloc_skb(len + 4);
> + skb = skb_dequeue(&priv->rx_pool);
> if (!skb) {
> p54spi_sleep(priv);
> - dev_err(&priv->spi->dev, "could not alloc skb");
> + dev_err(&priv->spi->dev, "could not get skb");
> return -ENOMEM;
> }
>
> @@ -409,12 +405,9 @@ static int p54spi_rx(struct p54s_priv *priv)
> len - READAHEAD_SZ);
> }
> p54spi_sleep(priv);
> - /* Put additional bytes to compensate for the possible
> - * alignment-caused truncation */
> - skb_put(skb, 4);
>
> - if (p54_rx(priv->hw, skb) == 0)
> - dev_kfree_skb(skb);
> + skb = p54_rx(priv->hw, skb);
> + skb_queue_tail(&priv->rx_pool, skb);
>
> return 0;
> }
> @@ -629,6 +622,7 @@ static void p54spi_op_stop(struct ieee80211_hw *dev)
> static int __devinit p54spi_probe(struct spi_device *spi)
> {
> struct p54s_priv *priv = NULL;
> + struct sk_buff *skb;
> struct ieee80211_hw *hw;
> int ret = -EINVAL;
>
> @@ -645,6 +639,8 @@ static int __devinit p54spi_probe(struct spi_device *spi)
>
> spi->bits_per_word = 16;
> spi->max_speed_hz = 24000000;
> + skb_queue_head_init(&priv->rx_pool);
> + priv->common.rx_wa_extra_tail_space = true;
>
> ret = spi_setup(spi);
> if (ret < 0) {
> @@ -689,6 +685,11 @@ static int __devinit p54spi_probe(struct spi_device *spi)
> priv->common.stop = p54spi_op_stop;
> priv->common.tx = p54spi_op_tx;
>
> + skb = __dev_alloc_skb(priv->common.rx_mtu, GFP_KERNEL);
> + if (!skb)
> + goto err_free_common;
> + skb_queue_tail(&priv->rx_pool, skb);
> +
> ret = p54spi_request_firmware(hw);
> if (ret < 0)
> goto err_free_common;
> @@ -705,6 +706,7 @@ static int __devinit p54spi_probe(struct spi_device *spi)
>
> err_free_common:
> p54_free_common(priv->hw);
> + skb_queue_purge(&priv->rx_pool);
> return ret;
> }
>
> @@ -715,6 +717,7 @@ static int __devexit p54spi_remove(struct spi_device *spi)
> p54_unregister_common(priv->hw);
>
> free_irq(gpio_to_irq(p54spi_gpio_irq), spi);
> + skb_queue_purge(&priv->rx_pool);
>
> gpio_free(p54spi_gpio_power);
> gpio_free(p54spi_gpio_irq);
> diff --git a/drivers/net/wireless/p54/p54spi.h b/drivers/net/wireless/p54/p54spi.h
> index 7fbe8d8..11ef2d5 100644
> --- a/drivers/net/wireless/p54/p54spi.h
> +++ b/drivers/net/wireless/p54/p54spi.h
> @@ -120,6 +120,8 @@ struct p54s_priv {
>
> enum fw_state fw_state;
> const struct firmware *firmware;
> +
> + struct sk_buff_head rx_pool;
> };
>
> #endif /* P54SPI_H */
> diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c
> index 461d88f..c521bbc 100644
> --- a/drivers/net/wireless/p54/p54usb.c
> +++ b/drivers/net/wireless/p54/p54usb.c
> @@ -120,37 +120,14 @@ static void p54u_rx_cb(struct urb *urb)
> }
>
> skb_put(skb, urb->actual_length);
> + skb = p54_rx(dev, skb);
>
> - if (priv->hw_type == P54U_NET2280)
> - skb_pull(skb, priv->common.tx_hdr_len);
> - if (priv->common.fw_interface == FW_LM87) {
> - skb_pull(skb, 4);
> - skb_put(skb, 4);
> - }
> -
> - if (p54_rx(dev, skb)) {
> - skb = dev_alloc_skb(priv->common.rx_mtu + 32);
> - if (unlikely(!skb)) {
> - /* TODO check rx queue length and refill *somewhere* */
> - return;
> - }
> + info = (struct p54u_rx_info *) skb->cb;
> + info->urb = urb;
> + info->dev = dev;
> + urb->transfer_buffer = skb_tail_pointer(skb);
> + urb->context = skb;
>
> - info = (struct p54u_rx_info *) skb->cb;
> - info->urb = urb;
> - info->dev = dev;
> - urb->transfer_buffer = skb_tail_pointer(skb);
> - urb->context = skb;
> - } else {
> - if (priv->hw_type == P54U_NET2280)
> - skb_push(skb, priv->common.tx_hdr_len);
> - if (priv->common.fw_interface == FW_LM87) {
> - skb_push(skb, 4);
> - skb_put(skb, 4);
> - }
> - skb_reset_tail_pointer(skb);
> - skb_trim(skb, 0);
> - urb->transfer_buffer = skb_tail_pointer(skb);
> - }
> skb_queue_tail(&priv->rx_queue, skb);
> usb_anchor_urb(urb, &priv->submitted);
> if (usb_submit_urb(urb, GFP_ATOMIC)) {
> @@ -186,7 +163,7 @@ static int p54u_init_urbs(struct ieee80211_hw *dev)
> int ret = 0;
>
> while (skb_queue_len(&priv->rx_queue) < 32) {
> - skb = __dev_alloc_skb(priv->common.rx_mtu + 32, GFP_KERNEL);
> + skb = __dev_alloc_skb(priv->common.rx_mtu, GFP_KERNEL);
> if (!skb) {
> ret = -ENOMEM;
> goto err;
> @@ -927,14 +904,17 @@ static int __devinit p54u_probe(struct usb_interface *intf,
> #endif
>
> priv->hw_type = P54U_3887;
> + priv->common.rx_wa_extra_tail_space = true;
> dev->extra_tx_headroom += sizeof(struct lm87_tx_hdr);
> priv->common.tx_hdr_len = sizeof(struct lm87_tx_hdr);
> + priv->common.rx_hdr_len = sizeof(struct lm87_rx_hdr);
> priv->common.tx = p54u_tx_lm87;
> priv->upload_fw = p54u_upload_firmware_3887;
> } else {
> priv->hw_type = P54U_NET2280;
> dev->extra_tx_headroom += sizeof(struct net2280_tx_hdr);
> priv->common.tx_hdr_len = sizeof(struct net2280_tx_hdr);
> + priv->common.rx_hdr_len = sizeof(struct net2280_tx_hdr);
==
Should this be rx rather than tx?
-- skip --
I have a problem that is not new with this patch. Using p54usb with
LM87 firmware and under the heavy load of building a kernel with the
source tree mounted with NFS, the interface will go off-line and
cannot reconnect. When the driver is unloaded and reloaded, it is
unable to reload the firmware. My log is as follows:
usb 1-5: new high speed USB device using ehci_hcd and address 3
usb 1-5: configuration #1 chosen from 1 choice
cfg80211: Calling CRDA to update world regulatory domain
usb 1-5: reset high speed USB device using ehci_hcd and address 3
usb 1-5: firmware: requesting isl3887usb
phy0: p54 detected a LM87 firmware
p54: rx_mtu reduced from 3240 to 2380
phy0: FW rev 2.13.24.0 - Softmac protocol 5.9
phy0: cryptographic accelerator WEP:YES, TKIP:YES, CCMP:YES
phy0: hwaddr 00:90:4b:d2:1f:cd, MAC:isl3892 RF:Xbow
phy0: Selected rate control algorithm 'minstrel'
Registered led device: p54-phy0::assoc
Registered led device: p54-phy0::tx
Registered led device: p54-phy0::rx
Registered led device: p54-phy0::radio
usb 1-5: is registered as 'phy0'
usbcore: registered new interface driver p54usb
wlan0: authenticate with AP 00:1a:70:46:ba:b1
wlan0: authenticated
wlan0: associate with AP 00:1a:70:46:ba:b1
wlan0: RX AssocResp from 00:1a:70:46:ba:b1 (capab=0x431 status=0 aid=1)
wlan0: associated
wlan0: no probe response from AP 00:1a:70:46:ba:b1 - disassociating
usbcore: deregistering interface driver p54usb
cfg80211: Calling CRDA to update world regulatory domain
usb 1-5: reset high speed USB device using ehci_hcd and address 3
usb 1-5: firmware: requesting isl3887usb
phy0: p54 detected a LM87 firmware
p54: rx_mtu reduced from 3240 to 2380
phy0: FW rev 2.13.24.0 - Softmac protocol 5.9
phy0: cryptographic accelerator WEP:YES, TKIP:YES, CCMP:YES
usb 1-5: (p54usb) firmware upload failed!
p54usb: probe of 1-5:1.0 failed with error -110
Christian Lamparter wrote:
> On Saturday 04 July 2009 18:40:14 Larry Finger wrote:
>> I have logged the usb transfers, but not yet analyzed them.
> great!
>
>> This time I got a new failure - I hit this warning at
>> net/mac80211/tx.c:1299
>> retries++;
>> if (WARN(retries > 10, "tx refused but queue
>> active\n"))
>> goto drop;
>> goto retry;
>>
>
>> If I have analyzed this correctly, I hit this section of
>> p54_tx_qos_accounting_alloc at drivers/net/wireless/p54/txrx.c:204.
>> I'm running the splitup patches.
>>
>> if (unlikely(queue->len > queue->limit &&
>> IS_QOS_QUEUE(p54_queue))) {
>> spin_unlock_irqrestore(&priv->tx_stats_lock, flags);
>> return -ENOSPC;
>> }
>>
>> Any suggestions on debugging this would be appreciated.
> ---
> diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
> index ea074a6..69fc70a 100644
> --- a/drivers/net/wireless/p54/txrx.c
> +++ b/drivers/net/wireless/p54/txrx.c
> @@ -25,6 +25,7 @@
> #include "p54.h"
> #include "lmac.h"
>
> +#define P54_MM_DEBUG
> #ifdef P54_MM_DEBUG
> static void p54_dump_tx_queue(struct p54_common *priv)
> {
> @@ -200,7 +201,18 @@ static int p54_tx_qos_accounting_alloc(struct p54_common *priv,
>
> spin_lock_irqsave(&priv->tx_stats_lock, flags);
> if (unlikely(queue->len > queue->limit && IS_QOS_QUEUE(p54_queue))) {
> + u16 ac_queue = p54_queue - P54_QUEUE_DATA;
> + int i;
> +
> + printk(KERN_DEBUG "TX queue stats\n");
> + for (i = 0; i < 8; i++)
> + printk(KERN_DEBUG "\ttxq[%d]: used %d [of %d] => %s\n",
> + i, priv->tx_stats[i].len,
> + priv->tx_stats[i].limit,
> + ieee80211_queue_stopped(priv->hw, ac_queue) ?
> + "stopped" : "running");
> spin_unlock_irqrestore(&priv->tx_stats_lock, flags);
> + p54_dump_tx_queue(priv);
> return -ENOSPC;
> }
>
> ---
> let's hope the queue .len count does not turn negative!
Sorry. It did. The output of the printk is:
TX queue stats
txq[0]: used 0 [of 1] => running
txq[1]: used 0 [of 1] => running
txq[2]: used 0 [of 3] => running
txq[3]: used 0 [of 3] => running
txq[4]: used 0 [of 16] => running
txq[5]: used 0 [of 16] => running
txq[6]: used -1 [of 16] => running
txq[7]: used 0 [of 16] => running
phy5: / --- tx queue dump (0 entries) ---
phy5: \ --- [free: 14592], largest free block: 14592 ---
I added this statement for debugging:
@@ -224,6 +236,7 @@ static void p54_tx_qos_accounting_free(s
struct p54_tx_data *data = (void *) hdr->data;
priv->tx_stats[data->hw_queue].len--;
+ WARN_ON(priv->tx_stats[data->hw_queue].len < 0);
}
p54_wake_queues(priv);
}
Since I added that, I have gotten about 15 of the "wlan0: no probe
response from AP 00:1a:70:46:ba:b1 - disassociating" situations where
the interface goes offline, but no more of the negative queue len
variety. It looks as if I will need to debug it first.
Larry
> This patch tries to address a long standing issue:
> how to survive serve memory starvation situations,
> without losing the device due to missing transfer-buffers.
>
> And with a flick of __GFP_NOWARN, we're able to handle ?all? memory
> allocation failures on the rx-side during operation without much fuss.
>
> However, there is still an issue within the xmit-part.
> This is likely due to p54's demand for a large free headroom for
> every outgoing frame:
>
> + transport header (differs from device to device)
> -> 16 bytes transport header (USB 1st gen)
> -> 8 bytes for (USB 2nd gen)
> -> 0 bytes for spi & pci
> + 12 bytes for p54_hdr
> + 44 bytes for p54_tx_data
> + up to 3 bytes for alignment
> (+ 802.11 header as well? )
>
> and this is where ieee80211_skb_resize comes into the play...
> which will try to _relocate_ (alloc new, copy, free old) frame data,
> as the headroom is most of the time simply not enough.
> =>
> Call Trace: (from Larry - Bug #13319 )
> [<ffffffff80292a7b>] __alloc_pages_internal+0x43d/0x45e
> [<ffffffff802b1f1f>] alloc_pages_current+0xbe/0xc6
> [<ffffffff802b6362>] new_slab+0xcf/0x28b
> [<ffffffff802b4d1f>] ? unfreeze_slab+0x4c/0xbd
> [<ffffffff802b672e>] __slab_alloc+0x210/0x44c
> [<ffffffff803e7bee>] ? pskb_expand_head+0x52/0x166
> [<ffffffff803e7bee>] ? pskb_expand_head+0x52/0x166
> [<ffffffff802b7e60>] __kmalloc+0x119/0x194
> [<ffffffff803e7bee>] pskb_expand_head+0x52/0x166
> [<ffffffffa02913d6>] ieee80211_skb_resize+0x91/0xc7 [mac80211]
> [<ffffffffa0291c0f>] ieee80211_master_start_xmit+0x298/0x319 [mac80211]
> [<ffffffff803ef72a>] dev_hard_start_xmit+0x229/0x2a8
> (sl*b debug option will help to bloat even more.)
>
> So?! how to prevent ieee80211_skb_resize from raping
> the bits of memory left?
>
> the simplest answer is probably this one:
> https://dev.openwrt.org/changeset/15761
> --
>
> back to rx failures.
> the attached code below was only usb was tested so far!
> you have been warned!
>
> regards,
> chr
>
> btw: max what do you think about the p54spi changes, are they total ****?
Christian, I'm trying to test it, but it seems that many things have changed since 2.6.28.
Right now I see this:
[ 416.738586] Freeing init memory: 140K
[ 417.208801] cx3110x spi2.0: firmware: requesting 3826.arm
[ 417.272094] hub 1-0:1.0: hub_suspend
[ 417.272155] usb usb1: bus auto-suspend
[ 417.295501] phy0: p54 detected a LM20 firmware
[ 417.298034] p54: rx_mtu reduced from 3240 to 2376
[ 417.300598] phy0: FW rev 2.13.0.0.a.22.8 - Softmac protocol 5.6
[ 417.303558] phy0: cryptographic accelerator WEP:YES, TKIP:YES, CCMP:YES
[ 417.306732] cx3110x spi2.0: firmware: requesting 3826.eeprom
[ 417.385742] firmware spi2.0: firmware_loading_store: vmap() failed
[ 417.391540] cx3110x spi2.0: loading default eeprom...
[ 417.395568] phy0: hwaddr 00:02:ee:c0:ff:ee, MAC:isl3820 RF:Longbow
[ 417.468841] phy0: Selected rate control algorithm 'minstrel'
[ 417.473693] cx3110x spi2.0: is registered as 'phy0'
[ 419.150909] g_ether gadget: notify connect false
[ 419.182891] g_ether gadget: notify speed 425984000
[ 420.409210] usb0: eth_open
[ 420.409240] usb0: eth_start
[ 420.409423] g_ether gadget: ecm_open
[ 420.409454] g_ether gadget: notify connect true
[ 420.430908] g_ether gadget: notify speed 425984000
[ 421.186340] phy0: device now idle
[ 421.200958] skb_over_panic: text:bf000498 len:2 put:2 head:c793a200 data:c793a220 tail:0xc793a222 end:0xc793a220 dev:<NULL>
[ 421.211669] kernel BUG at net/core/skbuff.c:127!
[ 421.217407] Unable to handle kernel NULL pointer dereference at virtual address 00000000
[ 421.223571] pgd = c0004000
[ 421.229797] [00000000] *pgd=00000000
[ 421.236236] Internal error: Oops: 817 [#1]
[ 421.242736] Modules linked in: p54spi
[ 421.249420] CPU: 0 Not tainted (2.6.31-rc1-omap1-wl #4)
[ 421.256378] PC is at __bug+0x1c/0x28
[ 421.263458] LR is at __bug+0x18/0x28
[ 421.270538] pc : [<c002f828>] lr : [<c002f824>] psr: 60000113
[ 421.270568] sp : c798ff20 ip : 00000000 fp : 00000000
[ 421.284851] r10: 00000000 r9 : 00000000 r8 : c7976b34
[ 421.291870] r7 : c793a220 r6 : c793a222 r5 : c793a220 r4 : c793a200
[ 421.298980] r3 : 00000000 r2 : c033cb84 r1 : 000045b2 r0 : 0000003a
[ 421.306091] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel
[ 421.313323] Control: 00c5387d Table: 87fe0000 DAC: 00000017
[ 421.320526] Process phy0 (pid: 426, stack limit = 0xc798e268)
[ 421.327697] Stack: (0xc798ff20 to 0xc7990000)
[ 421.334747] ff20: 00000002 c01dc03c c793a200 c793a220 c793a222 c793a220 c02fba60 00000000
[ 421.342346] ff40: c798ff40 c784abc0 c793a220 bf000498 c784abc0 c01dd1a8 00000058 c7976940
[ 421.349975] ff60: c798ff6e bf000498 c798e000 80000058 c7976afc c7976940 50000000 c7976b0c
[ 421.357574] ff80: 00000000 bf0008ec c796fd20 10000000 c0060510 bf000808 c796fd20 c798e000
[ 421.365173] ffa0: c0060510 c0060650 c798ffd4 00000000 c78cc9a0 c00636ac c798ffb8 c798ffb8
[ 421.372558] ffc0: c798ffd4 c7951d98 c796fd20 c0063440 00000000 00000000 c798ffd8 c798ffd8
[ 421.379730] ffe0: 00000000 00000000 00000000 00000000 00000000 c002cca8 53384842 4e86725f
[ 421.386993] Code: e1a01000 e59f000c eb0088c3 e3a03000 (e5833000)
[ 421.394104] ---[ end trace 75ac12f5b28efc30 ]---
Looks like something's wrong with firmware loading.
I hope to fix it tomorrow and see how your changes work.
Thanks.
-- Max
>> Anyway skb that carry data frame will be handed to the mac
>> and replaced with new one. That is, this patch does not eliminate
>> possibility of control frame loss due to insufficient memory.
> control frames loss?
> can you please point me exactly where?
> Since this is a bug that needs to be fixed.
>
> as control frames' skb should be (and hopefully are) always reused.
> (see @ txrx.c line 590 - p54_rx_control -> return skb; )
>
>> Maybe we'd better have separate reusable skb for control frames which
>> is never handed out from the driver?
> well as a matter of fact this is already true for p54pci & p54usb
> (based on the assumption we are really recycling all control frames...
> ?and not somehow forgot a case and got a leak there?!)
Sorry, now I see, it's ok. Missed these lines in p54_rx_data:
+ tmp = dev_alloc_skb(priv->rx_mtu);
+ if (!tmp)
+ return skb;
--
Max
On Saturday 04 July 2009 18:40:14 Larry Finger wrote:
> I have logged the usb transfers, but not yet analyzed them.
great!
> This time I got a new failure - I hit this warning at
> net/mac80211/tx.c:1299
> retries++;
> if (WARN(retries > 10, "tx refused but queue
> active\n"))
> goto drop;
> goto retry;
>
> If I have analyzed this correctly, I hit this section of
> p54_tx_qos_accounting_alloc at drivers/net/wireless/p54/txrx.c:204.
> I'm running the splitup patches.
>
> if (unlikely(queue->len > queue->limit &&
> IS_QOS_QUEUE(p54_queue))) {
> spin_unlock_irqrestore(&priv->tx_stats_lock, flags);
> return -ENOSPC;
> }
>
> Any suggestions on debugging this would be appreciated.
---
diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
index ea074a6..69fc70a 100644
--- a/drivers/net/wireless/p54/txrx.c
+++ b/drivers/net/wireless/p54/txrx.c
@@ -25,6 +25,7 @@
#include "p54.h"
#include "lmac.h"
+#define P54_MM_DEBUG
#ifdef P54_MM_DEBUG
static void p54_dump_tx_queue(struct p54_common *priv)
{
@@ -200,7 +201,18 @@ static int p54_tx_qos_accounting_alloc(struct p54_common *priv,
spin_lock_irqsave(&priv->tx_stats_lock, flags);
if (unlikely(queue->len > queue->limit && IS_QOS_QUEUE(p54_queue))) {
+ u16 ac_queue = p54_queue - P54_QUEUE_DATA;
+ int i;
+
+ printk(KERN_DEBUG "TX queue stats\n");
+ for (i = 0; i < 8; i++)
+ printk(KERN_DEBUG "\ttxq[%d]: used %d [of %d] => %s\n",
+ i, priv->tx_stats[i].len,
+ priv->tx_stats[i].limit,
+ ieee80211_queue_stopped(priv->hw, ac_queue) ?
+ "stopped" : "running");
spin_unlock_irqrestore(&priv->tx_stats_lock, flags);
+ p54_dump_tx_queue(priv);
return -ENOSPC;
}
---
let's hope the queue .len count does not turn negative!
regards,
Chr
Christian Lamparter wrote:
> This patch tries to address a long standing issue:
> how to survive serve memory starvation situations,
> without losing the device due to missing transfer-buffers.
>
> And with a flick of __GFP_NOWARN, we're able to handle ?all? memory
> allocation failures on the rx-side during operation without much fuss.
>
> However, there is still an issue within the xmit-part.
> This is likely due to p54's demand for a large free headroom for
> every outgoing frame:
>
> + transport header (differs from device to device)
> -> 16 bytes transport header (USB 1st gen)
> -> 8 bytes for (USB 2nd gen)
> -> 0 bytes for spi & pci
> + 12 bytes for p54_hdr
> + 44 bytes for p54_tx_data
> + up to 3 bytes for alignment
> (+ 802.11 header as well? )
>
> and this is where ieee80211_skb_resize comes into the play...
> which will try to _relocate_ (alloc new, copy, free old) frame data,
> as the headroom is most of the time simply not enough.
> =>
> Call Trace: (from Larry - Bug #13319 )
> [<ffffffff80292a7b>] __alloc_pages_internal+0x43d/0x45e
> [<ffffffff802b1f1f>] alloc_pages_current+0xbe/0xc6
> [<ffffffff802b6362>] new_slab+0xcf/0x28b
> [<ffffffff802b4d1f>] ? unfreeze_slab+0x4c/0xbd
> [<ffffffff802b672e>] __slab_alloc+0x210/0x44c
> [<ffffffff803e7bee>] ? pskb_expand_head+0x52/0x166
> [<ffffffff803e7bee>] ? pskb_expand_head+0x52/0x166
> [<ffffffff802b7e60>] __kmalloc+0x119/0x194
> [<ffffffff803e7bee>] pskb_expand_head+0x52/0x166
> [<ffffffffa02913d6>] ieee80211_skb_resize+0x91/0xc7 [mac80211]
> [<ffffffffa0291c0f>] ieee80211_master_start_xmit+0x298/0x319 [mac80211]
> [<ffffffff803ef72a>] dev_hard_start_xmit+0x229/0x2a8
> (sl*b debug option will help to bloat even more.)
>
> So?! how to prevent ieee80211_skb_resize from raping
> the bits of memory left?
>
> the simplest answer is probably this one:
> https://dev.openwrt.org/changeset/15761
Christian,
I have not had a change to review and/or test this patch. I applaud
attempts to reduce memory, but the problem that I reported will be
fixed in 2.6.31. Whenever the SLUB debugging would force the order of
the request to increase, debugging will be turned off for that request
and a notification will be logged. This solution represents a
compromise among the developers - as usual none of them are really
happy with this approach, and it is expected that there will be
further development for 2.6.32.
After I test this patch, I will try reverting the change that reduced
MTU on the chance that it will no longer be necessary.
Larry
On Monday 06 July 2009 03:36:59 Larry Finger wrote:
> Christian Lamparter wrote:
>
> > hmm, can you please give this a go? (I hope this patch still applies...)
> > I'm curious if you can dump the tx_queue when p54_alloc_skb fail?
>
> The patch applied with the aid of wiggle.
yeah, I don't want to delay these changes... so they are based on top of
the last _official_ patches, rather than the experimental ones.
> Since adding it, I've had 7 of the disassociation failures, but none of the
> queue len problems.
ohh, forgot to add that...
- do you think you can add another p54_dump_tx_queue
when it "disassociates". I'm curious if the queue was empty or is full
(with what?!)
> I have modified every place that p54_alloc_skb() fails with a printk
> that tells why it failed and then calls p54_dump_tx_queue(), but I
> have not gotten to test it yet. I found an oops when 2.6.31-rc2 is
> booted that I'm bisecting right now.
yeah, in the mean time I'll get these fixes to linville.
Regards,
Chr
On Monday 06 July 2009 15:11:11 Max Filippov wrote:
> > This patch tries to address a long standing issue:
> > how to survive serve memory starvation situations,
> > without losing the device due to missing transfer-buffers.
>
> Christian, how does placement of skb allocation point make a
> difference?
not all drivers are equal... usb is the critical one.
just take a look at the these lines in p54u_rx_cb
skb = dev_alloc_skb(priv->common.rx_mtu + 32);
if (unlikely(!skb)) {
/* TODO check rx queue length and refill *somewhere* */
return;
}
so every time we failed to alloc the replacement skb, we have to
abandon one rx urb, until nothing is left and the user has
to replug/reload. pci is a bit more forgiving, but it can bite as well.
(Note: this scenario is _unlikely_ to happen ever since Larry scaled
down the rx_mtu from 3240 to about 2400 or so - which makes
dev_alloc_skb a 0-order allocation on the most common architectures)
> Anyway skb that carry data frame will be handed to the mac
> and replaced with new one. That is, this patch does not eliminate
> possibility of control frame loss due to insufficient memory.
control frames loss?
can you please point me exactly where?
Since this is a bug that needs to be fixed.
as control frames' skb should be (and hopefully are) always reused.
(see @ txrx.c line 590 - p54_rx_control -> return skb; )
> Maybe we'd better have separate reusable skb for control frames which
> is never handed out from the driver?
well as a matter of fact this is already true for p54pci & p54usb
(based on the assumption we are really recycling all control frames...
and not somehow forgot a case and got a leak there?!)
Regards,
Chr
Larry Finger wrote:
> @@ -224,6 +236,7 @@ static void p54_tx_qos_accounting_free(s
> struct p54_tx_data *data = (void *) hdr->data;
>
> priv->tx_stats[data->hw_queue].len--;
> + WARN_ON(priv->tx_stats[data->hw_queue].len < 0);
> }
> p54_wake_queues(priv);
> }
>
The new WARN_ON did _NOT_ trigger when the len went negative.
The only other place where len could be decremented is through
txhdr->backlog. I noticed that the p54common.c had
txhdr->backlog = current_queue->len;
This was replaced in txrx.c by
txhdr->backlog = priv->tx_stats[queue].len - 1;
Was this intentional?
To test if this is the problem, I added the following hunk:
@@ -840,6 +853,7 @@ int p54_tx_80211(struct ieee80211_hw *de
txhdr->crypt_offset = crypt_offset;
txhdr->hw_queue = queue;
txhdr->backlog = priv->tx_stats[queue].len - 1;
+ WARN_ON(!priv->tx_stats[queue].len);
memset(txhdr->durations, 0, sizeof(txhdr->durations));
txhdr->tx_antenna = ((info->antenna_sel_tx == 0) ?
2 : info->antenna_sel_tx - 1) & priv->tx_diversity_mask;
This WARN_ON did trigger just before txq[6].len went negative. I'm now
testing with that changed as follows:
@@ -839,7 +852,8 @@ int p54_tx_80211(struct ieee80211_hw *de
}
txhdr->crypt_offset = crypt_offset;
txhdr->hw_queue = queue;
- txhdr->backlog = priv->tx_stats[queue].len - 1;
+ txhdr->backlog = priv->tx_stats[queue].len;
+ WARN_ON(priv->tx_stats[queue].len < 0);
memset(txhdr->durations, 0, sizeof(txhdr->durations));
txhdr->tx_antenna = ((info->antenna_sel_tx == 0) ?
2 : info->antenna_sel_tx - 1) & priv->tx_diversity_mask;
This WARN_ON did not trigger, but I still had the queue len go negative.
One other question: struct p54_burst is defined in lmac.h, but it
doesn't seem to be used anywhere. Will it be needed later?
Larry
On Sat, 2009-07-04 at 00:53 +0200, Christian Lamparter wrote:
> the simplest answer is probably this one:
> https://dev.openwrt.org/changeset/15761
Humm. Davem and I looked into this problem some time ago, came up with
patches and concluded that if everything is set up correctly this
shouldn't be happening.
Look at net/mac80211/iface.c::ieee80211_if_add:
ndev->needed_headroom = local->tx_headroom +
4*6 /* four MAC addresses */
+ 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */
+ 6 /* mesh */
+ 8 /* rfc1042/bridge tunnel */
- ETH_HLEN /* ethernet hard_header_len */
+ IEEE80211_ENCRYPT_HEADROOM;
ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM;
Due to that, skbs allocated for our device ought to have enough
headroom. If they don't, you should try to figure out where they're
coming from and why they don't,
http://johannes.sipsolutions.net/patches/kernel/all/2008-05-04-23:09/028-skb-alloc-stackdump.patch might help with that.
OTOH, I guess that you're using IPv4 and that should do things
correctly, I think.
johannes
On Sunday 05 July 2009 19:49:57 Larry Finger wrote:
> How sure are you of the locking? It seems that the more threads that
> I'm using, the more likely that it is to happen. Similarly, the
> disassociation errors could be overloading the firmware by adding too
> many entries. Of course, it could result from a firmware error when
> the device is driven hard.
>
> I've only given it one trial, but p54usb only survived for 24 minutes
> running my other torture test with repeating tcpperf in one terminal
> and a flood ping in a second. This one got the disassociation error
> and also a "failure to remove key" (error code -95) and two "failed to
> uodate LEDs" (error code -12).
hmm, can you please give this a go? (I hope this patch still applies...)
I'm curious if you can dump the tx_queue when p54_alloc_skb fail?
---
diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c
index 461d88f..1dc1a09 100644
--- a/drivers/net/wireless/p54/p54usb.c
+++ b/drivers/net/wireless/p54/p54usb.c
@@ -246,8 +246,10 @@ static void p54u_tx_lm87(struct ieee80211_hw *dev, struct sk_buff *skb)
struct lm87_tx_hdr *hdr = (void *)skb->data - sizeof(*hdr);
data_urb = usb_alloc_urb(0, GFP_ATOMIC);
- if (!data_urb)
+ if (!data_urb) {
+ p54_free_skb(dev, skb);
return;
+ }
hdr->chksum = p54u_lm87_chksum((__le32 *)skb->data, skb->len);
hdr->device_addr = ((struct p54_hdr *)skb->data)->req_id;
@@ -269,27 +271,22 @@ static void p54u_tx_lm87(struct ieee80211_hw *dev, struct sk_buff *skb)
static void p54u_tx_net2280(struct ieee80211_hw *dev, struct sk_buff *skb)
{
struct p54u_priv *priv = dev->priv;
- struct urb *int_urb, *data_urb;
+ struct urb *int_urb = NULL, *data_urb = NULL;
struct net2280_tx_hdr *hdr = (void *)skb->data - sizeof(*hdr);
- struct net2280_reg_write *reg;
- int err = 0;
+ struct net2280_reg_write *reg = NULL;
+ int err = -ENOMEM;
reg = kmalloc(sizeof(*reg), GFP_ATOMIC);
if (!reg)
- return;
+ goto out;
int_urb = usb_alloc_urb(0, GFP_ATOMIC);
- if (!int_urb) {
- kfree(reg);
- return;
- }
+ if (!int_urb)
+ goto out;
data_urb = usb_alloc_urb(0, GFP_ATOMIC);
- if (!data_urb) {
- kfree(reg);
- usb_free_urb(int_urb);
- return;
- }
+ if (!data_urb)
+ goto out;
reg->port = cpu_to_le16(NET2280_DEV_U32);
reg->addr = cpu_to_le32(P54U_DEV_BASE);
@@ -329,14 +326,12 @@ static void p54u_tx_net2280(struct ieee80211_hw *dev, struct sk_buff *skb)
usb_unanchor_urb(data_urb);
goto out;
}
- out:
+out:
usb_free_urb(int_urb);
usb_free_urb(data_urb);
- if (err) {
- skb_pull(skb, sizeof(*hdr));
+ if (err)
p54_free_skb(dev, skb);
- }
}
static int p54u_write(struct p54u_priv *priv,
diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
index ea074a6..01eadb1 100644
--- a/drivers/net/wireless/p54/txrx.c
+++ b/drivers/net/wireless/p54/txrx.c
@@ -199,7 +199,7 @@ static int p54_tx_qos_accounting_alloc(struct p54_common *priv,
queue = &priv->tx_stats[p54_queue];
spin_lock_irqsave(&priv->tx_stats_lock, flags);
- if (unlikely(queue->len > queue->limit && IS_QOS_QUEUE(p54_queue))) {
+ if (unlikely(queue->len >= queue->limit && IS_QOS_QUEUE(p54_queue))) {
spin_unlock_irqrestore(&priv->tx_stats_lock, flags);
return -ENOSPC;
}
@@ -222,8 +222,11 @@ static void p54_tx_qos_accounting_free(struct p54_common *priv,
if (skb && IS_DATA_FRAME(skb)) {
struct p54_hdr *hdr = (void *) skb->data;
struct p54_tx_data *data = (void *) hdr->data;
+ unsigned long flags;
+ spin_lock_irqsave(&priv->tx_stats_lock, flags);
priv->tx_stats[data->hw_queue].len--;
+ spin_unlock_irqrestore(&priv->tx_stats_lock, flags);
}
p54_wake_queues(priv);
}
@@ -504,7 +507,6 @@ static void p54_rx_eeprom_readback(struct p54_common *priv,
priv->eeprom = NULL;
tmp = p54_find_and_unlink_skb(priv, hdr->req_id);
- p54_tx_qos_accounting_free(priv, tmp);
dev_kfree_skb_any(tmp);
complete(&priv->eeprom_comp);
}
@@ -531,7 +533,6 @@ static void p54_rx_stats(struct p54_common *priv, struct sk_buff *skb)
priv->noise = p54_rssi_to_dbm(priv, le32_to_cpu(stats->noise));
tmp = p54_find_and_unlink_skb(priv, hdr->req_id);
- p54_tx_qos_accounting_free(priv, tmp);
dev_kfree_skb_any(tmp);
}
On Saturday 04 July 2009 23:14:02 Larry Finger wrote:
> Larry Finger wrote:
> > @@ -224,6 +236,7 @@ static void p54_tx_qos_accounting_free(s
> > struct p54_tx_data *data = (void *) hdr->data;
> >
> > priv->tx_stats[data->hw_queue].len--;
> > + WARN_ON(priv->tx_stats[data->hw_queue].len < 0);
> > }
> > p54_wake_queues(priv);
> > }
> >
>
> The new WARN_ON did _NOT_ trigger when the len went negative.
Now that's what I call a really cool coincident!
The only _official_ codepath where the queue len actually could be
decremented is not really responsible for this -1!
maybe, after decreasing priv->tx_stats[data->hw_queue].len--;
we should override data->hw_queue (aka mark) and place WARN_ON
everywhere (e.g. p54_free_skb / p54_find_and_unlink_skb & p54_dump_tx_queue
whenever they spot the marker.
(OT: in there's a _wrong_ p54_tx_qos_accounting_free in p54_rx_eeprom_readback
which will be removed...
however its a bit unlikely this causes this havok as the eeprom_readback is a
control frame and therefore p54_tx_qos_accounting_free is a nop for those)
> The only other place where len could be decremented is through
> txhdr->backlog. I noticed that the p54common.c had
>
> txhdr->backlog = current_queue->len;
>
> This was replaced in txrx.c by
>
> txhdr->backlog = priv->tx_stats[queue].len - 1;
>
> Was this intentional?
the spec only says "Number of backlogged frames for given queue."
however there's no word about if this count is for the number of
frames which [are already(now)]/[or will be(old behaviour)]
in the device memory window...
and to add more confusion => there's even a third interpretation:
could be this the number of frames which are still buffered by the driver,
because they don't fit yet?
Now back to the lines:
I somehow fail to see how exactly tx_stats[queue].len gets
decremented here?
as the result of priv->tx_stats[queue].len - 1 will be written into
txhdr->backlog and not priv->tx_stats[queue].len?
it's really a txhdr->backlog = priv->tx_stats[queue].len - 1;
vs.
priv->tx_stats[queue].len = priv->tx_stats[queue].len - 1;
where as the second one is obviously wrong _here_...
> To test if this is the problem, I added the following hunk:
>
> @@ -840,6 +853,7 @@ int p54_tx_80211(struct ieee80211_hw *de
> txhdr->crypt_offset = crypt_offset;
> txhdr->hw_queue = queue;
> txhdr->backlog = priv->tx_stats[queue].len - 1;
> + WARN_ON(!priv->tx_stats[queue].len);
> memset(txhdr->durations, 0, sizeof(txhdr->durations));
> txhdr->tx_antenna = ((info->antenna_sel_tx == 0) ?
> 2 : info->antenna_sel_tx - 1) & priv->tx_diversity_mask;
>
> This WARN_ON did trigger just before txq[6].len went negative. I'm now
> testing with that changed as follows:
>
> @@ -839,7 +852,8 @@ int p54_tx_80211(struct ieee80211_hw *de
> }
> txhdr->crypt_offset = crypt_offset;
> txhdr->hw_queue = queue;
> - txhdr->backlog = priv->tx_stats[queue].len - 1;
> + txhdr->backlog = priv->tx_stats[queue].len;
> + WARN_ON(priv->tx_stats[queue].len < 0);
> memset(txhdr->durations, 0, sizeof(txhdr->durations));
> txhdr->tx_antenna = ((info->antenna_sel_tx == 0) ?
> 2 : info->antenna_sel_tx - 1) & priv->tx_diversity_mask;
>
> This WARN_ON did not trigger, but I still had the queue len go negative.
well, you have to change the WARN_ON to priv->tx_stats[queue].len <= 0 (or ==)
as priv->tx_stats[queue].len will never be 0 here, because it was just
incremented by p54_tx_qos_accounting_alloc.
>
> One other question: struct p54_burst is defined in lmac.h, but it
> doesn't seem to be used anywhere. Will it be needed later?
It's on the todo list...... somewhere.
However, the LMAC API needs to be updated for this.
As there is no description (TBD) about the flags.
Regards,
Chr
On Sunday 05 July 2009 02:56:59 Max Filippov wrote:
> > This patch tries to address a long standing issue:
> > how to survive serve memory starvation situations,
> > without losing the device due to missing transfer-buffers.
> >
> > And with a flick of __GFP_NOWARN, we're able to handle ?all? memory
> > allocation failures on the rx-side during operation without much fuss.
> >
> > However, there is still an issue within the xmit-part.
> > This is likely due to p54's demand for a large free headroom for
> > every outgoing frame:
> >
> > + transport header (differs from device to device)
> > -> 16 bytes transport header (USB 1st gen)
> > -> 8 bytes for (USB 2nd gen)
> > -> 0 bytes for spi & pci
> > + 12 bytes for p54_hdr
> > + 44 bytes for p54_tx_data
> > + up to 3 bytes for alignment
> > (+ 802.11 header as well? )
> >
> > and this is where ieee80211_skb_resize comes into the play...
> > which will try to _relocate_ (alloc new, copy, free old) frame data,
> > as the headroom is most of the time simply not enough.
> > =>
> > Call Trace: (from Larry - Bug #13319 )
> > [<ffffffff80292a7b>] __alloc_pages_internal+0x43d/0x45e
> > [<ffffffff802b1f1f>] alloc_pages_current+0xbe/0xc6
> > [<ffffffff802b6362>] new_slab+0xcf/0x28b
> > [<ffffffff802b4d1f>] ? unfreeze_slab+0x4c/0xbd
> > [<ffffffff802b672e>] __slab_alloc+0x210/0x44c
> > [<ffffffff803e7bee>] ? pskb_expand_head+0x52/0x166
> > [<ffffffff803e7bee>] ? pskb_expand_head+0x52/0x166
> > [<ffffffff802b7e60>] __kmalloc+0x119/0x194
> > [<ffffffff803e7bee>] pskb_expand_head+0x52/0x166
> > [<ffffffffa02913d6>] ieee80211_skb_resize+0x91/0xc7 [mac80211]
> > [<ffffffffa0291c0f>] ieee80211_master_start_xmit+0x298/0x319 [mac80211]
> > [<ffffffff803ef72a>] dev_hard_start_xmit+0x229/0x2a8
> > (sl*b debug option will help to bloat even more.)
> >
> > So?! how to prevent ieee80211_skb_resize from raping
> > the bits of memory left?
> >
> > the simplest answer is probably this one:
> > https://dev.openwrt.org/changeset/15761
> > --
> >
> > back to rx failures.
> > the attached code below was only usb was tested so far!
> > you have been warned!
> >
> > regards,
> > chr
> >
> > btw: max what do you think about the p54spi changes, are they total ****?
>
> Christian, I'm trying to test it, but it seems that many things have changed since 2.6.28.
> Right now I see this:
>
> [ 416.738586] Freeing init memory: 140K
> [ 417.208801] cx3110x spi2.0: firmware: requesting 3826.arm
> [ 417.272094] hub 1-0:1.0: hub_suspend
> [ 417.272155] usb usb1: bus auto-suspend
> [ 417.295501] phy0: p54 detected a LM20 firmware
> [ 417.298034] p54: rx_mtu reduced from 3240 to 2376
> [ 417.300598] phy0: FW rev 2.13.0.0.a.22.8 - Softmac protocol 5.6
> [ 417.303558] phy0: cryptographic accelerator WEP:YES, TKIP:YES, CCMP:YES
> [ 417.306732] cx3110x spi2.0: firmware: requesting 3826.eeprom
> [ 417.385742] firmware spi2.0: firmware_loading_store: vmap() failed
> [ 417.391540] cx3110x spi2.0: loading default eeprom...
> [ 417.395568] phy0: hwaddr 00:02:ee:c0:ff:ee, MAC:isl3820 RF:Longbow
> [ 417.468841] phy0: Selected rate control algorithm 'minstrel'
> [ 417.473693] cx3110x spi2.0: is registered as 'phy0'
> [ 419.150909] g_ether gadget: notify connect false
> [ 419.182891] g_ether gadget: notify speed 425984000
> [ 420.409210] usb0: eth_open
> [ 420.409240] usb0: eth_start
> [ 420.409423] g_ether gadget: ecm_open
> [ 420.409454] g_ether gadget: notify connect true
> [ 420.430908] g_ether gadget: notify speed 425984000
> [ 421.186340] phy0: device now idle
> [ 421.200958] skb_over_panic: text:bf000498 len:2 put:2 head:c793a200 data:c793a220 tail:0xc793a222 end:0xc793a220 dev:<NULL>
> [ 421.211669] kernel BUG at net/core/skbuff.c:127!
> [ 421.217407] Unable to handle kernel NULL pointer dereference at virtual address 00000000
> [ 421.223571] pgd = c0004000
> [ 421.229797] [00000000] *pgd=00000000
> [ 421.236236] Internal error: Oops: 817 [#1]
> [ 421.242736] Modules linked in: p54spi
> [ 421.249420] CPU: 0 Not tainted (2.6.31-rc1-omap1-wl #4)
> [ 421.256378] PC is at __bug+0x1c/0x28
> [ 421.263458] LR is at __bug+0x18/0x28
> [ 421.270538] pc : [<c002f828>] lr : [<c002f824>] psr: 60000113
> [ 421.270568] sp : c798ff20 ip : 00000000 fp : 00000000
> [ 421.284851] r10: 00000000 r9 : 00000000 r8 : c7976b34
> [ 421.291870] r7 : c793a220 r6 : c793a222 r5 : c793a220 r4 : c793a200
> [ 421.298980] r3 : 00000000 r2 : c033cb84 r1 : 000045b2 r0 : 0000003a
> [ 421.306091] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel
> [ 421.313323] Control: 00c5387d Table: 87fe0000 DAC: 00000017
> [ 421.320526] Process phy0 (pid: 426, stack limit = 0xc798e268)
> [ 421.327697] Stack: (0xc798ff20 to 0xc7990000)
> [ 421.334747] ff20: 00000002 c01dc03c c793a200 c793a220 c793a222 c793a220 c02fba60 00000000
> [ 421.342346] ff40: c798ff40 c784abc0 c793a220 bf000498 c784abc0 c01dd1a8 00000058 c7976940
> [ 421.349975] ff60: c798ff6e bf000498 c798e000 80000058 c7976afc c7976940 50000000 c7976b0c
> [ 421.357574] ff80: 00000000 bf0008ec c796fd20 10000000 c0060510 bf000808 c796fd20 c798e000
> [ 421.365173] ffa0: c0060510 c0060650 c798ffd4 00000000 c78cc9a0 c00636ac c798ffb8 c798ffb8
> [ 421.372558] ffc0: c798ffd4 c7951d98 c796fd20 c0063440 00000000 00000000 c798ffd8 c798ffd8
> [ 421.379730] ffe0: 00000000 00000000 00000000 00000000 00000000 c002cca8 53384842 4e86725f
> [ 421.386993] Code: e1a01000 e59f000c eb0088c3 e3a03000 (e5833000)
> [ 421.394104] ---[ end trace 75ac12f5b28efc30 ]---
>
> Looks like something's wrong with firmware loading.
> I hope to fix it tomorrow and see how your changes work.
hmm, looks like someone tries to skb_push on a NULL skb. hmmmm,
can you please enable ksym, it's a bit hard to see the obvious bug here.
Regards,
Chr
Christian,
I changed the section in p54_tx_qos_accounting_free() to be the following:
@@ -223,7 +235,10 @@ static void p54_tx_qos_accounting_free(s
struct p54_hdr *hdr = (void *) skb->data;
struct p54_tx_data *data = (void *) hdr->data;
- priv->tx_stats[data->hw_queue].len--;
+ if (priv->tx_stats[data->hw_queue].len)
+ priv->tx_stats[data->hw_queue].len--;
+ else
+ dump_stack();
}
p54_wake_queues(priv);
}
In a 1 hour period running a 'make ARCH=i386 -j6' with the kernel
sources on an NFS mounted volume, I got the following sequence of
"failures":
a. dump_stack() with call from p54_find_and_unlock_skb()
b. dump_stack() with call from p54_find_and_unlock_skb()
c. disassociation error
d. dump_stack() with call from p54_find_and_unlock_skb()
e. dump_stack() with call from p54_find_and_unlock_skb()
f. dump_stack() with call from p54_find_and_unlock_skb()
g. disassociation error
h. disassociation error
i. disassociation error
j. disassociation error
k. dump_stack() with call from p54_find_and_unlock_skb()
l. disassociation error
m. disassociation error
When the dump_stack() occurs, the interface keeps going as long as the
queue len never goes negative. With the disassociation error, the
device must be powered cycled by unplugging and replugging it. The
driver does not need to be reloaded.
If I run the kernel make with -j1 rather than -j6, the builds have
always completed. Neither of the errors show up.
A "paper over the problem" workaround for the queue len < 0 would be
- priv->tx_stats[data->hw_queue].len--;
+ if (priv->tx_stats[data->hw_queue].len)
+ priv->tx_stats[data->hw_queue].len--;
but it doesn't properly fix the problem.
How sure are you of the locking? It seems that the more threads that
I'm using, the more likely that it is to happen. Similarly, the
disassociation errors could be overloading the firmware by adding too
many entries. Of course, it could result from a firmware error when
the device is driven hard.
I've only given it one trial, but p54usb only survived for 24 minutes
running my other torture test with repeating tcpperf in one terminal
and a flood ping in a second. This one got the disassociation error
and also a "failure to remove key" (error code -95) and two "failed to
uodate LEDs" (error code -12).
Christian Lamparter wrote:
> hmm, can you please give this a go? (I hope this patch still applies...)
> I'm curious if you can dump the tx_queue when p54_alloc_skb fail?
The patch applied with the aid of wiggle. Since adding it, I've had 7
of the disassociation failures, but none of the queue len problems.
I have modified every place that p54_alloc_skb() fails with a printk
that tells why it failed and then calls p54_dump_tx_queue(), but I
have not gotten to test it yet. I found an oops when 2.6.31-rc2 is
booted that I'm bisecting right now.
Larry
> hmm, looks like someone tries to skb_push on a NULL skb. hmmmm,
> can you please enable ksym, it's a bit hard to see the obvious bug here.
Now it looks so:
[ 1612.708129] p54spi_work
[ 1612.708221] p54spi_rx
[ 1612.708251] p54spi_wakeup
[ 1612.708312] p54spi_wait_bit
[ 1612.708465] skb_over_panic: text:bf000544 len:88 put:88 head:c78f4000 data:c78f4020 tail:0xc78f4078 end:0xc78f4020 dev:<NULL>
[ 1612.721405] kernel BUG at net/core/skbuff.c:127!
[ 1612.728210] Unable to handle kernel NULL pointer dereference at virtual address 00000000
[ 1612.735687] pgd = c0004000
[ 1612.743225] [00000000] *pgd=00000000
[ 1612.750946] Internal error: Oops: 817 [#1]
[ 1612.758789] Modules linked in: p54spi
[ 1612.766815] CPU: 0 Not tainted (2.6.31-rc1-omap1-wl #5)
[ 1612.775207] PC is at __bug+0x20/0x2c
[ 1612.783721] LR is at release_console_sem+0x1b8/0x1ec
[ 1612.792510] pc : [<c002fb74>] lr : [<c0052dd4>] psr: 60000113
[ 1612.792541] sp : c7aebec8 ip : c7aebdf8 fp : c7aebed4
[ 1612.810119] r10: bf000904 r9 : 00000000 r8 : c7aebf30
[ 1612.818817] r7 : c78f4020 r6 : c78f4078 r5 : c78f4020 r4 : c78f4000
[ 1612.827545] r3 : 00000000 r2 : 60000113 r1 : 0000541c r0 : 0000003a
[ 1612.836334] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel
[ 1612.845245] Control: 00c5387d Table: 87fec000 DAC: 00000017
[ 1612.854125] Process phy0 (pid: 400, stack limit = 0xc7aea268)
[ 1612.862976] Stack: (0xc7aebec8 to 0xc7aec000)
[ 1612.871673] bec0: c7aebf0c c7aebed8 c01f5620 c002fb60 c78f4000 c78f4020
[ 1612.881011] bee0: c78f4078 c78f4020 c031c14c 00000000 c784abc0 c78f4020 bf000544 c7be3b34
[ 1612.890411] bf00: c7aebf2c c7aebf10 c01f6878 c01f55d0 c7be3940 c7be3940 00000058 c784abc0
[ 1612.899810] bf20: c7aebf5c c7aebf30 bf000544 c01f6824 00000000 00583680 c7be3afc c7be3afc
[ 1612.909179] bf40: c7be3940 50000000 c7be3b0c c7843680 c7aebf84 c7aebf60 bf000a08 bf000438
[ 1612.918395] bf60: c7abf458 10000000 c7aebf98 c7be3afc c795f780 c7aea000 c7aebfc4 c7aebf88
[ 1612.927368] bf80: c006318c bf000910 c7cccae0 00000000 c7843680 c0066778 c7aebf98 c7aebf98
[ 1612.936370] bfa0: c7aebfcc c7bf5ca0 c795f780 c0063000 00000000 00000000 c7aebff4 c7aebfc8
[ 1612.945190] bfc0: c00664ac c006300c 00000000 00000000 c7aebfd0 c7aebfd0 00000000 00000000
[ 1612.953918] bfe0: 00000000 00000000 00000000 c7aebff8 c0055270 c0066438 804b2021 804b2421
[ 1612.962829] Backtrace:
[ 1612.971343] [<c002fb54>] (__bug+0x0/0x2c) from [<c01f5620>] (skb_over_panic+0x5c/0x68)
[ 1612.980621] [<c01f55c4>] (skb_over_panic+0x0/0x68) from [<c01f6878>] (skb_put+0x60/0x70)
[ 1612.989990] r7:c7be3b34 r6:bf000544 r5:c78f4020 r4:c784abc0
[ 1612.999359] [<c01f6818>] (skb_put+0x0/0x70) from [<bf000544>] (p54spi_rx+0x118/0x190 [p54spi])
[ 1613.009246] r6:c784abc0 r5:00000058 r4:c7be3940
[ 1613.018920] [<bf00042c>] (p54spi_rx+0x0/0x190 [p54spi]) from [<bf000a08>] (p54spi_work+0x104/0x2ec [p54spi])
[ 1613.039001] r8:c7843680 r7:c7be3b0c r6:50000000 r5:c7be3940 r4:c7be3afc
[ 1613.049591] [<bf000904>] (p54spi_work+0x0/0x2ec [p54spi]) from [<c006318c>] (worker_thread+0x18c/0x20c)
[ 1613.070922] r7:c7aea000 r6:c795f780 r5:c7be3afc r4:c7aebf98
[ 1613.081756] [<c0063000>] (worker_thread+0x0/0x20c) from [<c00664ac>] (kthread+0x80/0x84)
[ 1613.092926] [<c006642c>] (kthread+0x0/0x84) from [<c0055270>] (do_exit+0x0/0x5ac)
[ 1613.104125] r7:00000000 r6:00000000 r5:00000000 r4:00000000
[ 1613.115203] Code: e1a01000 e59f000c eb008e3c e3a03000 (e5833000)
[ 1613.126403] ---[ end trace 1e6c70a2c1a5ece6 ]---
Thanks.
-- Max
On Saturday 04 July 2009 04:16:08 Larry Finger wrote:
> Christian Lamparter wrote:
> > This patch tries to address a long standing issue:
> > how to survive serve memory starvation situations,
> > without losing the device due to missing transfer-buffers.
> >
> > And with a flick of __GFP_NOWARN, we're able to handle ?all? memory
> > allocation failures on the rx-side during operation without much fuss.
> >
> > However, there is still an issue within the xmit-part.
> > This is likely due to p54's demand for a large free headroom for
> > every outgoing frame:
> >
> -- snip --
>
> > diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c
> > index 461d88f..c521bbc 100644
> > --- a/drivers/net/wireless/p54/p54usb.c
> > +++ b/drivers/net/wireless/p54/p54usb.c
> > @@ -120,37 +120,14 @@ static void p54u_rx_cb(struct urb *urb)
> > }
> >
> > skb_put(skb, urb->actual_length);
> > + skb = p54_rx(dev, skb);
> >
> > - if (priv->hw_type == P54U_NET2280)
> > - skb_pull(skb, priv->common.tx_hdr_len);
> > - if (priv->common.fw_interface == FW_LM87) {
> > - skb_pull(skb, 4);
> > - skb_put(skb, 4);
> > - }
> > -
> > - if (p54_rx(dev, skb)) {
> > - skb = dev_alloc_skb(priv->common.rx_mtu + 32);
> > - if (unlikely(!skb)) {
> > - /* TODO check rx queue length and refill *somewhere* */
> > - return;
> > - }
> > + info = (struct p54u_rx_info *) skb->cb;
> > + info->urb = urb;
> > + info->dev = dev;
> > + urb->transfer_buffer = skb_tail_pointer(skb);
> > + urb->context = skb;
> >
> > - info = (struct p54u_rx_info *) skb->cb;
> > - info->urb = urb;
> > - info->dev = dev;
> > - urb->transfer_buffer = skb_tail_pointer(skb);
> > - urb->context = skb;
> > - } else {
> > - if (priv->hw_type == P54U_NET2280)
> > - skb_push(skb, priv->common.tx_hdr_len);
> > - if (priv->common.fw_interface == FW_LM87) {
> > - skb_push(skb, 4);
> > - skb_put(skb, 4);
> > - }
> > - skb_reset_tail_pointer(skb);
> > - skb_trim(skb, 0);
> > - urb->transfer_buffer = skb_tail_pointer(skb);
> > - }
> > skb_queue_tail(&priv->rx_queue, skb);
> > usb_anchor_urb(urb, &priv->submitted);
> > if (usb_submit_urb(urb, GFP_ATOMIC)) {
> > @@ -186,7 +163,7 @@ static int p54u_init_urbs(struct ieee80211_hw *dev)
> > int ret = 0;
> >
> > while (skb_queue_len(&priv->rx_queue) < 32) {
> > - skb = __dev_alloc_skb(priv->common.rx_mtu + 32, GFP_KERNEL);
> > + skb = __dev_alloc_skb(priv->common.rx_mtu, GFP_KERNEL);
> > if (!skb) {
> > ret = -ENOMEM;
> > goto err;
> > @@ -927,14 +904,17 @@ static int __devinit p54u_probe(struct usb_interface *intf,
> > #endif
> >
> > priv->hw_type = P54U_3887;
> > + priv->common.rx_wa_extra_tail_space = true;
> > dev->extra_tx_headroom += sizeof(struct lm87_tx_hdr);
> > priv->common.tx_hdr_len = sizeof(struct lm87_tx_hdr);
> > + priv->common.rx_hdr_len = sizeof(struct lm87_rx_hdr);
> > priv->common.tx = p54u_tx_lm87;
> > priv->upload_fw = p54u_upload_firmware_3887;
> > } else {
> > priv->hw_type = P54U_NET2280;
> > dev->extra_tx_headroom += sizeof(struct net2280_tx_hdr);
> > priv->common.tx_hdr_len = sizeof(struct net2280_tx_hdr);
> > + priv->common.rx_hdr_len = sizeof(struct net2280_tx_hdr);
> ==
> Should this be rx rather than tx?
I hope not...
@@ -120,37 +120,14 @@ static void p54u_rx_cb(struct urb *urb)
skb_put(skb, urb->actual_length);
if (priv->hw_type == P54U_NET2280)
skb_pull(skb, priv->common.tx_hdr_len);
net2280_tx_hdr's name is misleading, as the rx urbs have the header as well.
will probably change that to net2280_hdr in the future.
> -- skip --
>
> I have a problem that is not new with this patch. Using p54usb with
> LM87 firmware and under the heavy load of building a kernel with the
> source tree mounted with NFS, the interface will go off-line and
> cannot reconnect. When the driver is unloaded and reloaded, it is
> unable to reload the firmware. My log is as follows:
>
> usb 1-5: new high speed USB device using ehci_hcd and address 3
> usb 1-5: configuration #1 chosen from 1 choice
> cfg80211: Calling CRDA to update world regulatory domain
> usb 1-5: reset high speed USB device using ehci_hcd and address 3
> usb 1-5: firmware: requesting isl3887usb
> phy0: p54 detected a LM87 firmware
> p54: rx_mtu reduced from 3240 to 2380
> phy0: FW rev 2.13.24.0 - Softmac protocol 5.9
> phy0: cryptographic accelerator WEP:YES, TKIP:YES, CCMP:YES
> phy0: hwaddr 00:90:4b:d2:1f:cd, MAC:isl3892 RF:Xbow
> phy0: Selected rate control algorithm 'minstrel'
> Registered led device: p54-phy0::assoc
> Registered led device: p54-phy0::tx
> Registered led device: p54-phy0::rx
> Registered led device: p54-phy0::radio
> usb 1-5: is registered as 'phy0'
> usbcore: registered new interface driver p54usb
> wlan0: authenticate with AP 00:1a:70:46:ba:b1
> wlan0: authenticated
> wlan0: associate with AP 00:1a:70:46:ba:b1
> wlan0: RX AssocResp from 00:1a:70:46:ba:b1 (capab=0x431 status=0 aid=1)
> wlan0: associated
> wlan0: no probe response from AP 00:1a:70:46:ba:b1 - disassociating
> usbcore: deregistering interface driver p54usb
> cfg80211: Calling CRDA to update world regulatory domain
> usb 1-5: reset high speed USB device using ehci_hcd and address 3
> usb 1-5: firmware: requesting isl3887usb
> phy0: p54 detected a LM87 firmware
> p54: rx_mtu reduced from 3240 to 2380
> phy0: FW rev 2.13.24.0 - Softmac protocol 5.9
> phy0: cryptographic accelerator WEP:YES, TKIP:YES, CCMP:YES
> usb 1-5: (p54usb) firmware upload failed!
> p54usb: probe of 1-5:1.0 failed with error -110
>
hmm, is it possible for you to log the usb transfer?
A excellent tool for this is usbmon (userspace)... can be found here:
http://git.sipsolutions.net/usbmon.git
usbmon -s 3000 -i $(USB_ID) (which is usually number between 1 and the amount of connected usb devices)) > dst
and set p54common nohwcrypt=1, if you don't want to show GK/TK around ;-)
Regards,
Chr
Christian Lamparter wrote:
>
> hmm, is it possible for you to log the usb transfer?
> A excellent tool for this is usbmon (userspace)... can be found here:
> http://git.sipsolutions.net/usbmon.git
>
> usbmon -s 3000 -i $(USB_ID) (which is usually number between 1 and the amount of connected usb devices)) > dst
>
> and set p54common nohwcrypt=1, if you don't want to show GK/TK around ;-)
I have logged the usb transfers, but not yet analyzed them. This time
I got a new failure - I hit this warning at
net/mac80211/tx.c:1299
retries++;
if (WARN(retries > 10, "tx refused but queue
active\n"))
goto drop;
goto retry;
If I have analyzed this correctly, I hit this section of
p54_tx_qos_accounting_alloc at drivers/net/wireless/p54/txrx.c:204.
I'm running the splitup patches.
if (unlikely(queue->len > queue->limit &&
IS_QOS_QUEUE(p54_queue))) {
spin_unlock_irqrestore(&priv->tx_stats_lock, flags);
return -ENOSPC;
}
Any suggestions on debugging this would be appreciated.
Larry
> > hmm, looks like someone tries to skb_push on a NULL skb. hmmmm,
> > can you please enable ksym, it's a bit hard to see the obvious bug here.
> [ 1612.708465] skb_over_panic: text:bf000544 len:88 put:88 head:c78f4000 data:c78f4020 tail:0xc78f4078 end:0xc78f4020 dev:<NULL>
I see here valid skb with size 0x20, where we try to put 88 more bytes.
That's because p54spi_probe calls __dev_alloc_skb before p54spi_request_firmware.
It's working with the following change:
diff --git a/drivers/net/wireless/p54/p54spi.c b/drivers/net/wireless/p54/p54spi.c
index ab5b9b8..ff73a64 100644
--- a/drivers/net/wireless/p54/p54spi.c
+++ b/drivers/net/wireless/p54/p54spi.c
@@ -651,11 +651,6 @@ static int __devinit p54spi_probe(struct spi_device *spi)
priv->common.stop = p54spi_op_stop;
priv->common.tx = p54spi_op_tx;
- skb = __dev_alloc_skb(priv->common.rx_mtu, GFP_KERNEL);
- if (!skb)
- goto err_free_common;
- skb_queue_tail(&priv->rx_pool, skb);
-
ret = p54spi_request_firmware(hw);
if (ret < 0)
goto err_free_common;
@@ -664,6 +659,11 @@ static int __devinit p54spi_probe(struct spi_device *spi)
if (ret)
goto err_free_common;
+ skb = __dev_alloc_skb(priv->common.rx_mtu, GFP_KERNEL);
+ if (!skb)
+ goto err_free_common;
+ skb_queue_tail(&priv->rx_pool, skb);
+
ret = p54_register_common(hw, &priv->spi->dev);
if (ret)
goto err_free_common;
Still cannot stress-test it: it hangs in IBSS mode (I suspect rate control)
and it cannot initialize mesh: firmware doesn't respond after beacon submission.
Does mesh work now with USB/PCI?
Thanks.
-- Max
> This patch tries to address a long standing issue:
> how to survive serve memory starvation situations,
> without losing the device due to missing transfer-buffers.
Christian, how does placement of skb allocation point make a
difference? Anyway skb that carry data frame will be handed to the mac
and replaced with new one. That is, this patch does not eliminate
possibility of control frame loss due to insufficient memory.
Maybe we'd better have separate reusable skb for control frames which
is never handed out from the driver?
--
Max