2008-10-06 16:01:04

by Johannes Berg

[permalink] [raw]
Subject: [PATCH] iwlwifi: fix DMA code and bugs

This patch cleans up the DMA code to be understandable and not
completely wrong. In particular:
* there is no need to have a weird iwl_tfd_frame_data struct that is
used 10 times, just use an address struct 20 times
* therefore, all the is_odd junk goes away
* fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
fragments of a descriptor rather than all descriptors (this may be
the cause of the dma unmapping problem I reported)
* some more cleanups

Signed-off-by: Johannes Berg <[email protected]>
---
Tested on 5000 hw, please apply.

drivers/net/wireless/iwlwifi/iwl-4965-hw.h | 98 ++++++++++++++---------------
drivers/net/wireless/iwlwifi/iwl-5000.c | 2
drivers/net/wireless/iwlwifi/iwl-helpers.h | 5 -
drivers/net/wireless/iwlwifi/iwl-tx.c | 84 +++++-------------------
4 files changed, 70 insertions(+), 119 deletions(-)

--- wireless-testing.orig/drivers/net/wireless/iwlwifi/iwl-4965-hw.h 2008-10-06 16:41:59.074744190 +0200
+++ wireless-testing/drivers/net/wireless/iwlwifi/iwl-4965-hw.h 2008-10-06 17:20:39.754106476 +0200
@@ -822,54 +822,11 @@ enum {
#define IWL49_NUM_QUEUES 16
#define IWL49_NUM_AMPDU_QUEUES 8

-/**
- * struct iwl_tfd_frame_data
- *
- * Describes up to 2 buffers containing (contiguous) portions of a Tx frame.
- * Each buffer must be on dword boundary.
- * Up to 10 iwl_tfd_frame_data structures, describing up to 20 buffers,
- * may be filled within a TFD (iwl_tfd_frame).
- *
- * Bit fields in tb1_addr:
- * 31- 0: Tx buffer 1 address bits [31:0]
- *
- * Bit fields in val1:
- * 31-16: Tx buffer 2 address bits [15:0]
- * 15- 4: Tx buffer 1 length (bytes)
- * 3- 0: Tx buffer 1 address bits [32:32]
- *
- * Bit fields in val2:
- * 31-20: Tx buffer 2 length (bytes)
- * 19- 0: Tx buffer 2 address bits [35:16]
- */
-struct iwl_tfd_frame_data {
- __le32 tb1_addr;
-
- __le32 val1;
- /* __le32 ptb1_32_35:4; */
-#define IWL_tb1_addr_hi_POS 0
-#define IWL_tb1_addr_hi_LEN 4
-#define IWL_tb1_addr_hi_SYM val1
- /* __le32 tb_len1:12; */
-#define IWL_tb1_len_POS 4
-#define IWL_tb1_len_LEN 12
-#define IWL_tb1_len_SYM val1
- /* __le32 ptb2_0_15:16; */
-#define IWL_tb2_addr_lo16_POS 16
-#define IWL_tb2_addr_lo16_LEN 16
-#define IWL_tb2_addr_lo16_SYM val1
-
- __le32 val2;
- /* __le32 ptb2_16_35:20; */
-#define IWL_tb2_addr_hi20_POS 0
-#define IWL_tb2_addr_hi20_LEN 20
-#define IWL_tb2_addr_hi20_SYM val2
- /* __le32 tb_len2:12; */
-#define IWL_tb2_len_POS 20
-#define IWL_tb2_len_LEN 12
-#define IWL_tb2_len_SYM val2
-} __attribute__ ((packed));
-
+struct iwl_tfd_addr_desc {
+ __le32 lo;
+ /* 4 bits address, 12 bits length */
+ __le16 hi_len;
+} __attribute__((packed));

/**
* struct iwl_tfd_frame
@@ -908,11 +865,54 @@ struct iwl_tfd_frame {
#define IWL_num_tbs_SYM val0
/* __le32 rsvd2:1; */
/* __le32 padding:2; */
- struct iwl_tfd_frame_data pa[10];
+ struct iwl_tfd_addr_desc addrs[20];
__le32 reserved;
} __attribute__ ((packed));


+static inline dma_addr_t iwl_get_addr(struct iwl_tfd_frame *frame, u8 idx)
+{
+ struct iwl_tfd_addr_desc *dsc = &frame->addrs[idx];
+
+ BUG_ON(idx >= 20);
+
+ return (u64)le32_to_cpu(dsc->lo) |
+ ((u64)(le16_to_cpu(dsc->hi_len) & 0xF)) << 32;
+}
+
+static inline u16 iwl_get_len(struct iwl_tfd_frame *frame, u8 idx)
+{
+ struct iwl_tfd_addr_desc *dsc = &frame->addrs[idx];
+
+ BUG_ON(idx >= 20);
+
+ return le16_to_cpu(dsc->hi_len) >> 4;
+}
+
+static inline void iwl_set_addr(struct iwl_tfd_frame *frame, u8 idx, dma_addr_t addr)
+{
+ struct iwl_tfd_addr_desc *dsc = &frame->addrs[idx];
+
+ BUG_ON(idx >= 20);
+ BUG_ON(addr & ~0xFFFFFFFFFULL);
+ WARN_ON(addr & 0x3);
+
+ dsc->lo = cpu_to_le32(addr);
+ dsc->hi_len &= cpu_to_le16(~0xF);
+ dsc->hi_len |= cpu_to_le16((u64)addr >> 32);
+}
+
+static inline void iwl_set_len(struct iwl_tfd_frame *frame, u8 idx, u16 len)
+{
+ struct iwl_tfd_addr_desc *dsc = &frame->addrs[idx];
+
+ BUG_ON(idx >= 20);
+
+ dsc->hi_len &= cpu_to_le16(0xF);
+ dsc->hi_len |= cpu_to_le16(len << 4);
+}
+
+
/**
* struct iwl4965_queue_byte_cnt_entry
*
--- wireless-testing.orig/drivers/net/wireless/iwlwifi/iwl-tx.c 2008-10-06 16:41:59.134581439 +0200
+++ wireless-testing/drivers/net/wireless/iwlwifi/iwl-tx.c 2008-10-06 17:41:58.547239140 +0200
@@ -63,63 +63,39 @@ static const u16 default_tid_to_tx_fifo[
* Does NOT advance any TFD circular buffer read/write indexes
* Does NOT free the TFD itself (which is within circular buffer)
*/
-static int iwl_hw_txq_free_tfd(struct iwl_priv *priv, struct iwl_tx_queue *txq)
+static void iwl_hw_txq_free_tfd(struct iwl_priv *priv, struct iwl_tx_queue *txq)
{
struct iwl_tfd_frame *bd_tmp = (struct iwl_tfd_frame *)&txq->bd[0];
struct iwl_tfd_frame *bd = &bd_tmp[txq->q.read_ptr];
struct pci_dev *dev = priv->pci_dev;
int i;
int counter = 0;
- int index, is_odd;

/* Host command buffers stay mapped in memory, nothing to clean */
if (txq->q.id == IWL_CMD_QUEUE_NUM)
- return 0;
+ return;

/* Sanity check on number of chunks */
counter = IWL_GET_BITS(*bd, num_tbs);
- if (counter > MAX_NUM_OF_TBS) {
+ if (counter >= MAX_NUM_OF_TBS) {
IWL_ERROR("Too many chunks: %i\n", counter);
/* @todo issue fatal error, it is quite serious situation */
- return 0;
+ return;
}

- /* Unmap chunks, if any.
- * TFD info for odd chunks is different format than for even chunks. */
+ /* Unmap chunks, if any. */
for (i = 0; i < counter; i++) {
- index = i / 2;
- is_odd = i & 0x1;
-
- if (is_odd)
- pci_unmap_single(
- dev,
- IWL_GET_BITS(bd->pa[index], tb2_addr_lo16) |
- (IWL_GET_BITS(bd->pa[index],
- tb2_addr_hi20) << 16),
- IWL_GET_BITS(bd->pa[index], tb2_len),
- PCI_DMA_TODEVICE);
-
- else if (i > 0)
- pci_unmap_single(dev,
- le32_to_cpu(bd->pa[index].tb1_addr),
- IWL_GET_BITS(bd->pa[index], tb1_len),
- PCI_DMA_TODEVICE);
-
- /* Free SKB, if any, for this chunk */
- if (txq->txb[txq->q.read_ptr].skb[i]) {
- struct sk_buff *skb = txq->txb[txq->q.read_ptr].skb[i];
+ pci_unmap_single(dev, iwl_get_addr(bd, i), iwl_get_len(bd, i),
+ PCI_DMA_TODEVICE);

- dev_kfree_skb(skb);
- txq->txb[txq->q.read_ptr].skb[i] = NULL;
- }
+ dev_kfree_skb(txq->txb[txq->q.read_ptr].skb[i]);
+ txq->txb[txq->q.read_ptr].skb[i] = NULL;
}
- return 0;
}

static int iwl_hw_txq_attach_buf_to_tfd(struct iwl_priv *priv, void *ptr,
dma_addr_t addr, u16 len)
{
- int index, is_odd;
struct iwl_tfd_frame *tfd = ptr;
u32 num_tbs = IWL_GET_BITS(*tfd, num_tbs);

@@ -130,20 +106,8 @@ static int iwl_hw_txq_attach_buf_to_tfd(
return -EINVAL;
}

- index = num_tbs / 2;
- is_odd = num_tbs & 0x1;
-
- if (!is_odd) {
- tfd->pa[index].tb1_addr = cpu_to_le32(addr);
- IWL_SET_BITS(tfd->pa[index], tb1_addr_hi,
- iwl_get_dma_hi_address(addr));
- IWL_SET_BITS(tfd->pa[index], tb1_len, len);
- } else {
- IWL_SET_BITS(tfd->pa[index], tb2_addr_lo16,
- (u32) (addr & 0xffff));
- IWL_SET_BITS(tfd->pa[index], tb2_addr_hi20, addr >> 16);
- IWL_SET_BITS(tfd->pa[index], tb2_len, len);
- }
+ iwl_set_addr(tfd, num_tbs, addr);
+ iwl_set_len(tfd, num_tbs, len);

IWL_SET_BITS(*tfd, num_tbs, num_tbs + 1);

@@ -950,7 +914,7 @@ int iwl_tx_skb(struct iwl_priv *priv, st
scratch_phys = txcmd_phys + sizeof(struct iwl_cmd_header) +
offsetof(struct iwl_tx_cmd, scratch);
tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
- tx_cmd->dram_msb_ptr = iwl_get_dma_hi_address(scratch_phys);
+ tx_cmd->dram_msb_ptr = ((u64)scratch_phys >> 32) & 0xF;

if (!ieee80211_has_morefrags(hdr->frame_control)) {
txq->need_update = 1;
@@ -1106,6 +1070,8 @@ int iwl_tx_queue_reclaim(struct iwl_priv
struct iwl_tx_info *tx_info;
int nfreed = 0;

+ BUILD_BUG_ON(sizeof(struct iwl_tfd_frame) != 128);
+
if ((index >= q->n_bd) || (iwl_queue_used(q, index) == 0)) {
IWL_ERROR("Read index for DMA queue txq id (%d), index %d, "
"is out of range [0-%d] %d %d.\n", txq_id,
@@ -1113,8 +1079,9 @@ int iwl_tx_queue_reclaim(struct iwl_priv
return 0;
}

- for (index = iwl_queue_inc_wrap(index, q->n_bd); q->read_ptr != index;
- q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {
+ for (index = iwl_queue_inc_wrap(index, q->n_bd);
+ q->read_ptr != index;
+ q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {

tx_info = &txq->txb[txq->q.read_ptr];
ieee80211_tx_status_irqsafe(priv->hw, tx_info->skb[0]);
@@ -1143,8 +1110,6 @@ static void iwl_hcmd_queue_reclaim(struc
struct iwl_tx_queue *txq = &priv->txq[txq_id];
struct iwl_queue *q = &txq->q;
struct iwl_tfd_frame *bd = &txq->bd[index];
- dma_addr_t dma_addr;
- int is_odd, buf_len;
int nfreed = 0;

if ((index >= q->n_bd) || (iwl_queue_used(q, index) == 0)) {
@@ -1156,25 +1121,16 @@ static void iwl_hcmd_queue_reclaim(struc

for (index = iwl_queue_inc_wrap(index, q->n_bd); q->read_ptr != index;
q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {
+ bd = &txq->bd[index];

if (nfreed > 1) {
IWL_ERROR("HCMD skipped: index (%d) %d %d\n", index,
q->write_ptr, q->read_ptr);
queue_work(priv->workqueue, &priv->restart);
}
- is_odd = (index/2) & 0x1;
- if (is_odd) {
- dma_addr = IWL_GET_BITS(bd->pa[index], tb2_addr_lo16) |
- (IWL_GET_BITS(bd->pa[index],
- tb2_addr_hi20) << 16);
- buf_len = IWL_GET_BITS(bd->pa[index], tb2_len);
- } else {
- dma_addr = le32_to_cpu(bd->pa[index].tb1_addr);
- buf_len = IWL_GET_BITS(bd->pa[index], tb1_len);
- }

- pci_unmap_single(priv->pci_dev, dma_addr, buf_len,
- PCI_DMA_TODEVICE);
+ pci_unmap_single(priv->pci_dev, iwl_get_addr(bd, 0),
+ iwl_get_len(bd, 0), PCI_DMA_TODEVICE);
nfreed++;
}
}
--- wireless-testing.orig/drivers/net/wireless/iwlwifi/iwl-helpers.h 2008-10-06 17:15:43.947106313 +0200
+++ wireless-testing/drivers/net/wireless/iwlwifi/iwl-helpers.h 2008-10-06 17:16:09.396356365 +0200
@@ -159,11 +159,6 @@ static inline unsigned long elapsed_jiff
return end + (MAX_JIFFY_OFFSET - start) + 1;
}

-static inline u8 iwl_get_dma_hi_address(dma_addr_t addr)
-{
- return sizeof(addr) > sizeof(u32) ? (addr >> 16) >> 16 : 0;
-}
-
/**
* iwl_queue_inc_wrap - increment queue index, wrap back to beginning
* @index -- current index
--- wireless-testing.orig/drivers/net/wireless/iwlwifi/iwl-5000.c 2008-10-06 17:16:20.974227693 +0200
+++ wireless-testing/drivers/net/wireless/iwlwifi/iwl-5000.c 2008-10-06 17:17:13.661108608 +0200
@@ -535,7 +535,7 @@ static int iwl5000_load_section(struct i

iwl_write_direct32(priv,
FH_TFDIB_CTRL1_REG(FH_SRVC_CHNL),
- (iwl_get_dma_hi_address(phy_addr)
+ ((((u64)phy_addr >> 32) & 0xF)
<< FH_MEM_TFDIB_REG1_ADDR_BITSHIFT) | byte_cnt);

iwl_write_direct32(priv,




2008-10-06 18:34:31

by Johannes Berg

[permalink] [raw]
Subject: Re: [PATCH v2] iwlwifi: fix DMA code and bugs

On Mon, 2008-10-06 at 20:30 +0200, Tomas Winkler wrote:

> > Of course, I would very much appreciate you review the actual bug fix in
> > iwl_hcmd_queue_reclaim, which consist of the addition of the line
> >
> > + bd = &txq->bd[index];
>
> Okay I found the source of the evil, with your big pointer :)
> The bug caused by this memory optimization juggling we've done and
> very bad cut and paste coding
> We moved for command buffers from pcI_alloc consitent to kmalloc which
> required pci mapping that that was coded wrongly. Now this kmalloc
> stuff is not good as well since we have this 36 bit memory limitation
> on 64 bit platforms.
> This is the issue I'm trying to address recently I'm not sure what yet
> what allocation schema would be best yet.

That can't be all though; it still crashes right away when I enable 64k
pages on my box.

johannes


Attachments:
signature.asc (836.00 B)
This is a digitally signed message part

2008-10-14 15:21:48

by Johannes Berg

[permalink] [raw]
Subject: Re: [PATCH v2] iwlwifi: fix DMA code and bugs

On Mon, 2008-10-06 at 18:10 +0200, Johannes Berg wrote:
> This patch cleans up the DMA code to be understandable and not
> completely wrong. In particular:
> * there is no need to have a weird iwl_tfd_frame_data struct that is
> used 10 times, just use an address struct 20 times
> * therefore, all the is_odd junk goes away
> * fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
> fragments of a descriptor rather than all descriptors (this may be
> the cause of the dma unmapping problem I reported)
> * some more cleanups
>
> Signed-off-by: Johannes Berg <[email protected]>
> ---
> Tested on 5000 hw, please apply.
>
> v2: fixes small issue with getting rid of iwl_get_dma_hi_address.

John, can you apply this? I keep getting invalid frees, or double-frees
without this patch.

[ 1831.106142] iommu_free: invalid entry
[ 1831.106207] free_entry= 0xb0040
[ 1831.106213] npages = 0x1
[ 1831.106219] entry = 0xb0040
[ 1831.106225] dma_addr = 0xb0040100
[ 1831.106231] Table = 0xc00000000083f348
[ 1831.106238] bus# = 0x0
[ 1831.106244] size = 0x80000
[ 1831.106249] startOff = 0x0
[ 1831.106255] index = 0x0
[ 1831.106266] ------------[ cut here ]------------
[ 1831.106273] Badness at arch/powerpc/kernel/iommu.c:260
[ 1831.106281] NIP: c000000000021a8c LR: c000000000021a88 CTR: c000000000047354
[ 1831.106291] REGS: c00000000080b140 TRAP: 0700 Tainted: G W (2.6.27-wl-03370-gea51ef8-dirty)
[ 1831.106299] MSR: 9000000000029032 <EE,ME,IR,DR> CR: 44004084 XER: 00000000
[ 1831.106337] TASK = c000000000773340[0] 'swapper' THREAD: c000000000808000 CPU: 0
[ 1831.106348] GPR00: c000000000021a88 c00000000080b3c0 c00000000080cd98 0000000000000023
[ 1831.106373] GPR04: 0000000000000001 c00000000003e10c 0000000000000000 0000000000000002
[ 1831.106397] GPR08: 0000000000000000 c000000000808000 0000000000000000 0000000000000001
[ 1831.106422] GPR12: 00000000000186a0 c00000000083c300 c0000000005b6b00 c000000000503a98
[ 1831.106446] GPR16: 0000000000000000 c00000020dc25ca8 c000000000808000 c00000020dc25cc0
[ 1831.106471] GPR20: 0000000000000001 c00000020dc31d80 0000000000000064 0000000000000008
[ 1831.106496] GPR24: c00000020dc31d80 c00000020dc34008 00000000b0040100 0000000000000001
[ 1831.106520] GPR28: c00000000083f348 00000000000b0040 c00000000079dae0 00000000000b0040
[ 1831.106556] NIP [c000000000021a8c] .__iommu_free+0x104/0x170
[ 1831.106566] LR [c000000000021a88] .__iommu_free+0x100/0x170
[ 1831.106573] Call Trace:
[ 1831.106582] [c00000000080b3c0] [c000000000021a88] .__iommu_free+0x100/0x170 (unreliable)
[ 1831.106600] [c00000000080b460] [c000000000021b50] .iommu_free+0x58/0xc0
[ 1831.106615] [c00000000080b500] [c00000000002188c] .dma_iommu_unmap_single+0x14/0x28
[ 1831.106655] [c00000000080b570] [d000000000377c7c] .iwl_tx_cmd_complete+0x35c/0x408 [iwlcore]
[ 1831.106689] [c00000000080b620] [d00000000033c788] .iwl_rx_handle+0x334/0x4a8 [iwlagn]
[ 1831.106716] [c00000000080b720] [d00000000033d278] .iwl4965_irq_tasklet+0x97c/0xccc [iwlagn]
[ 1831.106732] [c00000000080b7e0] [c0000000000551dc] .tasklet_action+0x14c/0x244
[ 1831.106746] [c00000000080b890] [c0000000000560cc] .__do_softirq+0xd8/0x1c4
[ 1831.106761] [c00000000080b940] [c00000000000c298] .do_softirq+0x5c/0xb8
[ 1831.106774] [c00000000080b9c0] [c000000000055b08] .irq_exit+0x74/0xe0
[ 1831.106788] [c00000000080ba40] [c00000000000c3fc] .do_IRQ+0x108/0x14c
[ 1831.106802] [c00000000080bad0] [c000000000004794] hardware_interrupt_entry+0x1c/0x20
[ 1831.106819] --- Exception: 501 at .cpu_idle+0x118/0x200
[ 1831.106821] LR = .cpu_idle+0x118/0x200
[ 1831.106831] [c00000000080bdc0] [c000000000011ea8] .cpu_idle+0xd0/0x200 (unreliable)
[ 1831.106852] [c00000000080be60] [c0000000003eeaec] .rest_init+0x8c/0xa4
[ 1831.106868] [c00000000080bee0] [c000000000587bc0] .start_kernel+0x4a0/0x4c8
[ 1831.106882] [c00000000080bf90] [c000000000007568] .start_here_common+0x3c/0x54
[ 1831.106893] Instruction dump:
[ 1831.106903] e89c0008 e87e8060 483d29ed 60000000 e89c0010 e87e8068 483d29dd 60000000
[ 1831.106971] e87e8070 e89c0020 483d29cd 60000000 <0fe00000> 48000040 e93e8078 7fe4fb78


Attachments:
signature.asc (836.00 B)
This is a digitally signed message part

2008-10-06 16:55:14

by Johannes Berg

[permalink] [raw]
Subject: Re: [PATCH v2] iwlwifi: fix DMA code and bugs

On Mon, 2008-10-06 at 18:51 +0200, Johannes Berg wrote:

> > > * therefore, all the is_odd junk goes away
> > > * fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
> > > fragments of a descriptor rather than all descriptors (this may be
> > > the cause of the dma unmapping problem I reported)
> > > * some more cleanups
> >
> > > Signed-off-by: Johannes Berg <[email protected]>
> > > --
> >
> >
> > > Tested on 5000 hw, please apply.
> >
> > Great job, however do not apply this before I review it I had strong
> > feeling this will not
> > work with aggregation flows.
>
> I cannot imagine why you think that, care to explain?

Of course, I would very much appreciate you review the actual bug fix in
iwl_hcmd_queue_reclaim, which consist of the addition of the line

+ bd = &txq->bd[index];

johannes


Attachments:
signature.asc (836.00 B)
This is a digitally signed message part

2008-10-06 17:21:31

by Johannes Berg

[permalink] [raw]
Subject: Re: [PATCH v2] iwlwifi: fix DMA code and bugs

On Mon, 2008-10-06 at 18:42 +0200, Marcel Holtmann wrote:
> Hi John,
>
> > This patch cleans up the DMA code to be understandable and not
> > completely wrong. In particular:
> > * there is no need to have a weird iwl_tfd_frame_data struct that is
> > used 10 times, just use an address struct 20 times
> > * therefore, all the is_odd junk goes away
> > * fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
> > fragments of a descriptor rather than all descriptors (this may be
> > the cause of the dma unmapping problem I reported)
> > * some more cleanups
> >
> > Signed-off-by: Johannes Berg <[email protected]>
> > ---
> > Tested on 5000 hw, please apply.
> >
> > v2: fixes small issue with getting rid of iwl_get_dma_hi_address.
>
> if this fixes the crashes with my 4965 card in my X61, then this is a
> candidate for 2.6.27-rc8. I will built a new kernel for my X61 as soon
> as possible.

I just tried, it certainly doesn't stop the thing from corrupting my
memory all over when I have 64k pages turned on.

johannes


Attachments:
signature.asc (836.00 B)
This is a digitally signed message part

2008-10-06 16:48:25

by Johannes Berg

[permalink] [raw]
Subject: Re: [PATCH v2] iwlwifi: fix DMA code and bugs

On Mon, 2008-10-06 at 18:42 +0200, Marcel Holtmann wrote:
> Hi John,
>
> > This patch cleans up the DMA code to be understandable and not
> > completely wrong. In particular:
> > * there is no need to have a weird iwl_tfd_frame_data struct that is
> > used 10 times, just use an address struct 20 times
> > * therefore, all the is_odd junk goes away
> > * fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
> > fragments of a descriptor rather than all descriptors (this may be
> > the cause of the dma unmapping problem I reported)
> > * some more cleanups
> >
> > Signed-off-by: Johannes Berg <[email protected]>
> > ---
> > Tested on 5000 hw, please apply.
> >
> > v2: fixes small issue with getting rid of iwl_get_dma_hi_address.
>
> if this fixes the crashes with my 4965 card in my X61, then this is a
> candidate for 2.6.27-rc8. I will built a new kernel for my X61 as soon
> as possible.

I doubt it will, but who knows. Let me know. Right now I'm trying to
massage the code into something readable.

johannes


Attachments:
signature.asc (836.00 B)
This is a digitally signed message part

2008-10-06 16:42:20

by Marcel Holtmann

[permalink] [raw]
Subject: Re: [PATCH v2] iwlwifi: fix DMA code and bugs

Hi John,

> This patch cleans up the DMA code to be understandable and not
> completely wrong. In particular:
> * there is no need to have a weird iwl_tfd_frame_data struct that is
> used 10 times, just use an address struct 20 times
> * therefore, all the is_odd junk goes away
> * fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
> fragments of a descriptor rather than all descriptors (this may be
> the cause of the dma unmapping problem I reported)
> * some more cleanups
>
> Signed-off-by: Johannes Berg <[email protected]>
> ---
> Tested on 5000 hw, please apply.
>
> v2: fixes small issue with getting rid of iwl_get_dma_hi_address.

if this fixes the crashes with my 4965 card in my X61, then this is a
candidate for 2.6.27-rc8. I will built a new kernel for my X61 as soon
as possible.

Regards

Marcel



2008-10-06 19:52:37

by Johannes Berg

[permalink] [raw]
Subject: Re: [PATCH v2] iwlwifi: fix DMA code and bugs

On Mon, 2008-10-06 at 15:23 -0400, John W. Linville wrote:
> On Mon, Oct 06, 2008 at 08:30:29PM +0200, Tomas Winkler wrote:
> > On Mon, Oct 6, 2008 at 6:54 PM, Johannes Berg <[email protected]> wrote:
> > > On Mon, 2008-10-06 at 18:51 +0200, Johannes Berg wrote:
> > >
> > >> > > * therefore, all the is_odd junk goes away
> > >> > > * fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
> > >> > > fragments of a descriptor rather than all descriptors (this may be
> > >> > > the cause of the dma unmapping problem I reported)
> > >> > > * some more cleanups
> > >> >
> > >> > > Signed-off-by: Johannes Berg <[email protected]>
> > >> > > --
> > >> >
> > >> >
> > >> > > Tested on 5000 hw, please apply.
> > >> >
> > >> > Great job, however do not apply this before I review it I had strong
> > >> > feeling this will not
> > >> > work with aggregation flows.
> > >>
> > >> I cannot imagine why you think that, care to explain?
> >
> > Yep, no connection I thought at the first glance you've thatched more code.
> > >
> > > Of course, I would very much appreciate you review the actual bug fix in
> > > iwl_hcmd_queue_reclaim, which consist of the addition of the line
> > >
> > > + bd = &txq->bd[index];
> >
> > Okay I found the source of the evil, with your big pointer :)
> > The bug caused by this memory optimization juggling we've done and
> > very bad cut and paste coding
> > We moved for command buffers from pcI_alloc consitent to kmalloc which
> > required pci mapping that that was coded wrongly. Now this kmalloc
> > stuff is not good as well since we have this 36 bit memory limitation
> > on 64 bit platforms.
> > This is the issue I'm trying to address recently I'm not sure what yet
> > what allocation schema would be best yet.
>
> What if we just revert that patch?

Doesn't help, still crashes right away with 64k pages. Not sure that
even is a regression though.

johannes


Attachments:
signature.asc (836.00 B)
This is a digitally signed message part

2008-10-06 16:10:39

by Johannes Berg

[permalink] [raw]
Subject: [PATCH v2] iwlwifi: fix DMA code and bugs

This patch cleans up the DMA code to be understandable and not
completely wrong. In particular:
* there is no need to have a weird iwl_tfd_frame_data struct that is
used 10 times, just use an address struct 20 times
* therefore, all the is_odd junk goes away
* fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
fragments of a descriptor rather than all descriptors (this may be
the cause of the dma unmapping problem I reported)
* some more cleanups

Signed-off-by: Johannes Berg <[email protected]>
---
Tested on 5000 hw, please apply.

v2: fixes small issue with getting rid of iwl_get_dma_hi_address.

Oh and who came up with struct iwl_tfd_frame_data? Were drugs involved?

drivers/net/wireless/iwlwifi/iwl-4965-hw.h | 98 ++++++++++++++---------------
drivers/net/wireless/iwlwifi/iwl-5000.c | 3
drivers/net/wireless/iwlwifi/iwl-helpers.h | 5 -
drivers/net/wireless/iwlwifi/iwl-tx.c | 84 +++++-------------------
4 files changed, 70 insertions(+), 120 deletions(-)

--- everything.orig/drivers/net/wireless/iwlwifi/iwl-4965-hw.h 2008-10-06 17:55:40.000000000 +0200
+++ everything/drivers/net/wireless/iwlwifi/iwl-4965-hw.h 2008-10-06 17:55:45.000000000 +0200
@@ -822,54 +822,11 @@ enum {
#define IWL49_NUM_QUEUES 16
#define IWL49_NUM_AMPDU_QUEUES 8

-/**
- * struct iwl_tfd_frame_data
- *
- * Describes up to 2 buffers containing (contiguous) portions of a Tx frame.
- * Each buffer must be on dword boundary.
- * Up to 10 iwl_tfd_frame_data structures, describing up to 20 buffers,
- * may be filled within a TFD (iwl_tfd_frame).
- *
- * Bit fields in tb1_addr:
- * 31- 0: Tx buffer 1 address bits [31:0]
- *
- * Bit fields in val1:
- * 31-16: Tx buffer 2 address bits [15:0]
- * 15- 4: Tx buffer 1 length (bytes)
- * 3- 0: Tx buffer 1 address bits [32:32]
- *
- * Bit fields in val2:
- * 31-20: Tx buffer 2 length (bytes)
- * 19- 0: Tx buffer 2 address bits [35:16]
- */
-struct iwl_tfd_frame_data {
- __le32 tb1_addr;
-
- __le32 val1;
- /* __le32 ptb1_32_35:4; */
-#define IWL_tb1_addr_hi_POS 0
-#define IWL_tb1_addr_hi_LEN 4
-#define IWL_tb1_addr_hi_SYM val1
- /* __le32 tb_len1:12; */
-#define IWL_tb1_len_POS 4
-#define IWL_tb1_len_LEN 12
-#define IWL_tb1_len_SYM val1
- /* __le32 ptb2_0_15:16; */
-#define IWL_tb2_addr_lo16_POS 16
-#define IWL_tb2_addr_lo16_LEN 16
-#define IWL_tb2_addr_lo16_SYM val1
-
- __le32 val2;
- /* __le32 ptb2_16_35:20; */
-#define IWL_tb2_addr_hi20_POS 0
-#define IWL_tb2_addr_hi20_LEN 20
-#define IWL_tb2_addr_hi20_SYM val2
- /* __le32 tb_len2:12; */
-#define IWL_tb2_len_POS 20
-#define IWL_tb2_len_LEN 12
-#define IWL_tb2_len_SYM val2
-} __attribute__ ((packed));
-
+struct iwl_tfd_addr_desc {
+ __le32 lo;
+ /* 4 bits address, 12 bits length */
+ __le16 hi_len;
+} __attribute__((packed));

/**
* struct iwl_tfd_frame
@@ -908,11 +865,54 @@ struct iwl_tfd_frame {
#define IWL_num_tbs_SYM val0
/* __le32 rsvd2:1; */
/* __le32 padding:2; */
- struct iwl_tfd_frame_data pa[10];
+ struct iwl_tfd_addr_desc addrs[20];
__le32 reserved;
} __attribute__ ((packed));


+static inline dma_addr_t iwl_get_addr(struct iwl_tfd_frame *frame, u8 idx)
+{
+ struct iwl_tfd_addr_desc *dsc = &frame->addrs[idx];
+
+ BUG_ON(idx >= 20);
+
+ return (u64)le32_to_cpu(dsc->lo) |
+ ((u64)(le16_to_cpu(dsc->hi_len) & 0xF)) << 32;
+}
+
+static inline u16 iwl_get_len(struct iwl_tfd_frame *frame, u8 idx)
+{
+ struct iwl_tfd_addr_desc *dsc = &frame->addrs[idx];
+
+ BUG_ON(idx >= 20);
+
+ return le16_to_cpu(dsc->hi_len) >> 4;
+}
+
+static inline void iwl_set_addr(struct iwl_tfd_frame *frame, u8 idx, dma_addr_t addr)
+{
+ struct iwl_tfd_addr_desc *dsc = &frame->addrs[idx];
+
+ BUG_ON(idx >= 20);
+ BUG_ON(addr & ~0xFFFFFFFFFULL);
+ WARN_ON(addr & 0x3);
+
+ dsc->lo = cpu_to_le32(addr);
+ dsc->hi_len &= cpu_to_le16(~0xF);
+ dsc->hi_len |= cpu_to_le16((u64)addr >> 32);
+}
+
+static inline void iwl_set_len(struct iwl_tfd_frame *frame, u8 idx, u16 len)
+{
+ struct iwl_tfd_addr_desc *dsc = &frame->addrs[idx];
+
+ BUG_ON(idx >= 20);
+
+ dsc->hi_len &= cpu_to_le16(0xF);
+ dsc->hi_len |= cpu_to_le16(len << 4);
+}
+
+
/**
* struct iwl4965_queue_byte_cnt_entry
*
--- everything.orig/drivers/net/wireless/iwlwifi/iwl-tx.c 2008-10-06 17:55:40.000000000 +0200
+++ everything/drivers/net/wireless/iwlwifi/iwl-tx.c 2008-10-06 18:06:38.000000000 +0200
@@ -63,63 +63,39 @@ static const u16 default_tid_to_tx_fifo[
* Does NOT advance any TFD circular buffer read/write indexes
* Does NOT free the TFD itself (which is within circular buffer)
*/
-static int iwl_hw_txq_free_tfd(struct iwl_priv *priv, struct iwl_tx_queue *txq)
+static void iwl_hw_txq_free_tfd(struct iwl_priv *priv, struct iwl_tx_queue *txq)
{
struct iwl_tfd_frame *bd_tmp = (struct iwl_tfd_frame *)&txq->bd[0];
struct iwl_tfd_frame *bd = &bd_tmp[txq->q.read_ptr];
struct pci_dev *dev = priv->pci_dev;
int i;
int counter = 0;
- int index, is_odd;

/* Host command buffers stay mapped in memory, nothing to clean */
if (txq->q.id == IWL_CMD_QUEUE_NUM)
- return 0;
+ return;

/* Sanity check on number of chunks */
counter = IWL_GET_BITS(*bd, num_tbs);
- if (counter > MAX_NUM_OF_TBS) {
+ if (counter >= MAX_NUM_OF_TBS) {
IWL_ERROR("Too many chunks: %i\n", counter);
/* @todo issue fatal error, it is quite serious situation */
- return 0;
+ return;
}

- /* Unmap chunks, if any.
- * TFD info for odd chunks is different format than for even chunks. */
+ /* Unmap chunks, if any. */
for (i = 0; i < counter; i++) {
- index = i / 2;
- is_odd = i & 0x1;
-
- if (is_odd)
- pci_unmap_single(
- dev,
- IWL_GET_BITS(bd->pa[index], tb2_addr_lo16) |
- (IWL_GET_BITS(bd->pa[index],
- tb2_addr_hi20) << 16),
- IWL_GET_BITS(bd->pa[index], tb2_len),
- PCI_DMA_TODEVICE);
-
- else if (i > 0)
- pci_unmap_single(dev,
- le32_to_cpu(bd->pa[index].tb1_addr),
- IWL_GET_BITS(bd->pa[index], tb1_len),
- PCI_DMA_TODEVICE);
-
- /* Free SKB, if any, for this chunk */
- if (txq->txb[txq->q.read_ptr].skb[i]) {
- struct sk_buff *skb = txq->txb[txq->q.read_ptr].skb[i];
+ pci_unmap_single(dev, iwl_get_addr(bd, i), iwl_get_len(bd, i),
+ PCI_DMA_TODEVICE);

- dev_kfree_skb(skb);
- txq->txb[txq->q.read_ptr].skb[i] = NULL;
- }
+ dev_kfree_skb(txq->txb[txq->q.read_ptr].skb[i]);
+ txq->txb[txq->q.read_ptr].skb[i] = NULL;
}
- return 0;
}

static int iwl_hw_txq_attach_buf_to_tfd(struct iwl_priv *priv, void *ptr,
dma_addr_t addr, u16 len)
{
- int index, is_odd;
struct iwl_tfd_frame *tfd = ptr;
u32 num_tbs = IWL_GET_BITS(*tfd, num_tbs);

@@ -130,20 +106,8 @@ static int iwl_hw_txq_attach_buf_to_tfd(
return -EINVAL;
}

- index = num_tbs / 2;
- is_odd = num_tbs & 0x1;
-
- if (!is_odd) {
- tfd->pa[index].tb1_addr = cpu_to_le32(addr);
- IWL_SET_BITS(tfd->pa[index], tb1_addr_hi,
- iwl_get_dma_hi_address(addr));
- IWL_SET_BITS(tfd->pa[index], tb1_len, len);
- } else {
- IWL_SET_BITS(tfd->pa[index], tb2_addr_lo16,
- (u32) (addr & 0xffff));
- IWL_SET_BITS(tfd->pa[index], tb2_addr_hi20, addr >> 16);
- IWL_SET_BITS(tfd->pa[index], tb2_len, len);
- }
+ iwl_set_addr(tfd, num_tbs, addr);
+ iwl_set_len(tfd, num_tbs, len);

IWL_SET_BITS(*tfd, num_tbs, num_tbs + 1);

@@ -950,7 +914,7 @@ int iwl_tx_skb(struct iwl_priv *priv, st
scratch_phys = txcmd_phys + sizeof(struct iwl_cmd_header) +
offsetof(struct iwl_tx_cmd, scratch);
tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
- tx_cmd->dram_msb_ptr = iwl_get_dma_hi_address(scratch_phys);
+ tx_cmd->dram_msb_ptr = ((u64)scratch_phys >> 32) & 0xFF;

if (!ieee80211_has_morefrags(hdr->frame_control)) {
txq->need_update = 1;
@@ -1106,6 +1070,8 @@ int iwl_tx_queue_reclaim(struct iwl_priv
struct iwl_tx_info *tx_info;
int nfreed = 0;

+ BUILD_BUG_ON(sizeof(struct iwl_tfd_frame) != 128);
+
if ((index >= q->n_bd) || (iwl_queue_used(q, index) == 0)) {
IWL_ERROR("Read index for DMA queue txq id (%d), index %d, "
"is out of range [0-%d] %d %d.\n", txq_id,
@@ -1113,8 +1079,9 @@ int iwl_tx_queue_reclaim(struct iwl_priv
return 0;
}

- for (index = iwl_queue_inc_wrap(index, q->n_bd); q->read_ptr != index;
- q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {
+ for (index = iwl_queue_inc_wrap(index, q->n_bd);
+ q->read_ptr != index;
+ q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {

tx_info = &txq->txb[txq->q.read_ptr];
ieee80211_tx_status_irqsafe(priv->hw, tx_info->skb[0]);
@@ -1143,8 +1110,6 @@ static void iwl_hcmd_queue_reclaim(struc
struct iwl_tx_queue *txq = &priv->txq[txq_id];
struct iwl_queue *q = &txq->q;
struct iwl_tfd_frame *bd = &txq->bd[index];
- dma_addr_t dma_addr;
- int is_odd, buf_len;
int nfreed = 0;

if ((index >= q->n_bd) || (iwl_queue_used(q, index) == 0)) {
@@ -1156,25 +1121,16 @@ static void iwl_hcmd_queue_reclaim(struc

for (index = iwl_queue_inc_wrap(index, q->n_bd); q->read_ptr != index;
q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {
+ bd = &txq->bd[index];

if (nfreed > 1) {
IWL_ERROR("HCMD skipped: index (%d) %d %d\n", index,
q->write_ptr, q->read_ptr);
queue_work(priv->workqueue, &priv->restart);
}
- is_odd = (index/2) & 0x1;
- if (is_odd) {
- dma_addr = IWL_GET_BITS(bd->pa[index], tb2_addr_lo16) |
- (IWL_GET_BITS(bd->pa[index],
- tb2_addr_hi20) << 16);
- buf_len = IWL_GET_BITS(bd->pa[index], tb2_len);
- } else {
- dma_addr = le32_to_cpu(bd->pa[index].tb1_addr);
- buf_len = IWL_GET_BITS(bd->pa[index], tb1_len);
- }

- pci_unmap_single(priv->pci_dev, dma_addr, buf_len,
- PCI_DMA_TODEVICE);
+ pci_unmap_single(priv->pci_dev, iwl_get_addr(bd, 0),
+ iwl_get_len(bd, 0), PCI_DMA_TODEVICE);
nfreed++;
}
}
--- everything.orig/drivers/net/wireless/iwlwifi/iwl-helpers.h 2008-09-11 05:22:48.000000000 +0200
+++ everything/drivers/net/wireless/iwlwifi/iwl-helpers.h 2008-10-06 17:55:45.000000000 +0200
@@ -159,11 +159,6 @@ static inline unsigned long elapsed_jiff
return end + (MAX_JIFFY_OFFSET - start) + 1;
}

-static inline u8 iwl_get_dma_hi_address(dma_addr_t addr)
-{
- return sizeof(addr) > sizeof(u32) ? (addr >> 16) >> 16 : 0;
-}
-
/**
* iwl_queue_inc_wrap - increment queue index, wrap back to beginning
* @index -- current index
--- everything.orig/drivers/net/wireless/iwlwifi/iwl-5000.c 2008-09-11 23:03:03.000000000 +0200
+++ everything/drivers/net/wireless/iwlwifi/iwl-5000.c 2008-10-06 18:04:24.000000000 +0200
@@ -535,8 +535,7 @@ static int iwl5000_load_section(struct i

iwl_write_direct32(priv,
FH_TFDIB_CTRL1_REG(FH_SRVC_CHNL),
- (iwl_get_dma_hi_address(phy_addr)
- << FH_MEM_TFDIB_REG1_ADDR_BITSHIFT) | byte_cnt);
+ (((u64)phy_addr >> 32) << FH_MEM_TFDIB_REG1_ADDR_BITSHIFT) | byte_cnt);

iwl_write_direct32(priv,
FH_TCSR_CHNL_TX_BUF_STS_REG(FH_SRVC_CHNL),



2008-10-06 16:51:47

by Johannes Berg

[permalink] [raw]
Subject: Re: [PATCH v2] iwlwifi: fix DMA code and bugs

On Mon, 2008-10-06 at 18:49 +0200, Tomas Winkler wrote:
> On Mon, Oct 6, 2008 at 6:10 PM, Johannes Berg <[email protected]> wrote:
> > This patch cleans up the DMA code to be understandable and not
> > completely wrong. In particular:
> > * there is no need to have a weird iwl_tfd_frame_data struct that is
> > used 10 times, just use an address struct 20 times
>
> In the hardware this is defined as couples

So? That doesn't actually matter at all.

> > * therefore, all the is_odd junk goes away
> > * fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
> > fragments of a descriptor rather than all descriptors (this may be
> > the cause of the dma unmapping problem I reported)
> > * some more cleanups
>
> > Signed-off-by: Johannes Berg <[email protected]>
> > --
>
>
> > Tested on 5000 hw, please apply.
>
> Great job, however do not apply this before I review it I had strong
> feeling this will not
> work with aggregation flows.

I cannot imagine why you think that, care to explain?

johannes


Attachments:
signature.asc (836.00 B)
This is a digitally signed message part

2008-10-14 15:44:49

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH v2] iwlwifi: fix DMA code and bugs

On Tue, 2008-10-14 at 17:21 +0200, Johannes Berg wrote:
> On Mon, 2008-10-06 at 18:10 +0200, Johannes Berg wrote:
> > This patch cleans up the DMA code to be understandable and not
> > completely wrong. In particular:
> > * there is no need to have a weird iwl_tfd_frame_data struct that is
> > used 10 times, just use an address struct 20 times
> > * therefore, all the is_odd junk goes away
> > * fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
> > fragments of a descriptor rather than all descriptors (this may be
> > the cause of the dma unmapping problem I reported)
> > * some more cleanups
> >
> > Signed-off-by: Johannes Berg <[email protected]>
> > ---
> > Tested on 5000 hw, please apply.
> >
> > v2: fixes small issue with getting rid of iwl_get_dma_hi_address.
>
> John, can you apply this? I keep getting invalid frees, or double-frees
> without this patch.

Tomas reworked this patch. I will send the new patch upstream soon.

Reinette

2008-10-06 18:30:31

by Tomas Winkler

[permalink] [raw]
Subject: Re: [PATCH v2] iwlwifi: fix DMA code and bugs

On Mon, Oct 6, 2008 at 6:54 PM, Johannes Berg <[email protected]> wrote:
> On Mon, 2008-10-06 at 18:51 +0200, Johannes Berg wrote:
>
>> > > * therefore, all the is_odd junk goes away
>> > > * fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
>> > > fragments of a descriptor rather than all descriptors (this may be
>> > > the cause of the dma unmapping problem I reported)
>> > > * some more cleanups
>> >
>> > > Signed-off-by: Johannes Berg <[email protected]>
>> > > --
>> >
>> >
>> > > Tested on 5000 hw, please apply.
>> >
>> > Great job, however do not apply this before I review it I had strong
>> > feeling this will not
>> > work with aggregation flows.
>>
>> I cannot imagine why you think that, care to explain?

Yep, no connection I thought at the first glance you've thatched more code.
>
> Of course, I would very much appreciate you review the actual bug fix in
> iwl_hcmd_queue_reclaim, which consist of the addition of the line
>
> + bd = &txq->bd[index];

Okay I found the source of the evil, with your big pointer :)
The bug caused by this memory optimization juggling we've done and
very bad cut and paste coding
We moved for command buffers from pcI_alloc consitent to kmalloc which
required pci mapping that that was coded wrongly. Now this kmalloc
stuff is not good as well since we have this 36 bit memory limitation
on 64 bit platforms.
This is the issue I'm trying to address recently I'm not sure what yet
what allocation schema would be best yet.
Tomas

2008-10-06 21:37:12

by Tomas Winkler

[permalink] [raw]
Subject: Re: [PATCH v2] iwlwifi: fix DMA code and bugs

On Mon, Oct 6, 2008 at 9:23 PM, John W. Linville <[email protected]> wrote:
> On Mon, Oct 06, 2008 at 08:30:29PM +0200, Tomas Winkler wrote:
>> On Mon, Oct 6, 2008 at 6:54 PM, Johannes Berg <[email protected]> wrote:
>> > On Mon, 2008-10-06 at 18:51 +0200, Johannes Berg wrote:
>> >
>> >> > > * therefore, all the is_odd junk goes away
>> >> > > * fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
>> >> > > fragments of a descriptor rather than all descriptors (this may be
>> >> > > the cause of the dma unmapping problem I reported)
>> >> > > * some more cleanups
>> >> >
>> >> > > Signed-off-by: Johannes Berg <[email protected]>
>> >> > > --
>> >> >
>> >> >
>> >> > > Tested on 5000 hw, please apply.
>> >> >
>> >> > Great job, however do not apply this before I review it I had strong
>> >> > feeling this will not
>> >> > work with aggregation flows.
>> >>
>> >> I cannot imagine why you think that, care to explain?
>>
>> Yep, no connection I thought at the first glance you've thatched more code.
>> >
>> > Of course, I would very much appreciate you review the actual bug fix in
>> > iwl_hcmd_queue_reclaim, which consist of the addition of the line
>> >
>> > + bd = &txq->bd[index];
>>
>> Okay I found the source of the evil, with your big pointer :)
>> The bug caused by this memory optimization juggling we've done and
>> very bad cut and paste coding
>> We moved for command buffers from pcI_alloc consitent to kmalloc which
>> required pci mapping that that was coded wrongly. Now this kmalloc
>> stuff is not good as well since we have this 36 bit memory limitation
>> on 64 bit platforms.
>> This is the issue I'm trying to address recently I'm not sure what yet
>> what allocation schema would be best yet.
>
> What if we just revert that patch?
>
Won't help there is a real problem here
Tomas

2008-10-06 16:49:01

by Tomas Winkler

[permalink] [raw]
Subject: Re: [PATCH v2] iwlwifi: fix DMA code and bugs

On Mon, Oct 6, 2008 at 6:10 PM, Johannes Berg <[email protected]> wrote:
> This patch cleans up the DMA code to be understandable and not
> completely wrong. In particular:
> * there is no need to have a weird iwl_tfd_frame_data struct that is
> used 10 times, just use an address struct 20 times

In the hardware this is defined as couples

> * therefore, all the is_odd junk goes away
> * fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
> fragments of a descriptor rather than all descriptors (this may be
> the cause of the dma unmapping problem I reported)
> * some more cleanups

> Signed-off-by: Johannes Berg <[email protected]>
> --


> Tested on 5000 hw, please apply.

Great job, however do not apply this before I review it I had strong
feeling this will not
work with aggregation flows.
Thanks
Tomas

2008-10-06 19:23:55

by John W. Linville

[permalink] [raw]
Subject: Re: [PATCH v2] iwlwifi: fix DMA code and bugs

On Mon, Oct 06, 2008 at 08:30:29PM +0200, Tomas Winkler wrote:
> On Mon, Oct 6, 2008 at 6:54 PM, Johannes Berg <[email protected]> wrote:
> > On Mon, 2008-10-06 at 18:51 +0200, Johannes Berg wrote:
> >
> >> > > * therefore, all the is_odd junk goes away
> >> > > * fix a bug in iwl_hcmd_queue_reclaim where it would reclaim all the
> >> > > fragments of a descriptor rather than all descriptors (this may be
> >> > > the cause of the dma unmapping problem I reported)
> >> > > * some more cleanups
> >> >
> >> > > Signed-off-by: Johannes Berg <[email protected]>
> >> > > --
> >> >
> >> >
> >> > > Tested on 5000 hw, please apply.
> >> >
> >> > Great job, however do not apply this before I review it I had strong
> >> > feeling this will not
> >> > work with aggregation flows.
> >>
> >> I cannot imagine why you think that, care to explain?
>
> Yep, no connection I thought at the first glance you've thatched more code.
> >
> > Of course, I would very much appreciate you review the actual bug fix in
> > iwl_hcmd_queue_reclaim, which consist of the addition of the line
> >
> > + bd = &txq->bd[index];
>
> Okay I found the source of the evil, with your big pointer :)
> The bug caused by this memory optimization juggling we've done and
> very bad cut and paste coding
> We moved for command buffers from pcI_alloc consitent to kmalloc which
> required pci mapping that that was coded wrongly. Now this kmalloc
> stuff is not good as well since we have this 36 bit memory limitation
> on 64 bit platforms.
> This is the issue I'm trying to address recently I'm not sure what yet
> what allocation schema would be best yet.

What if we just revert that patch?

---

From: John W. Linville <[email protected]>
Date: Mon, 6 Oct 2008 15:22:27 -0400
Subject: [PATCH] Revert "iwlwifi: memory allocation optimization"

This reverts commit da99c4b6c25964b90c79f19beccda208df1a865a.

Conflicts:

drivers/net/wireless/iwlwifi/iwl-tx.c
---
drivers/net/wireless/iwlwifi/iwl-5000.c | 6 +-
drivers/net/wireless/iwlwifi/iwl-dev.h | 3 +-
drivers/net/wireless/iwlwifi/iwl-hcmd.c | 2 +-
drivers/net/wireless/iwlwifi/iwl-tx.c | 94 +++++++++----------------------
4 files changed, 33 insertions(+), 72 deletions(-)

diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index b08036a..429f839 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -938,8 +938,8 @@ static void iwl5000_txq_update_byte_cnt_tbl(struct iwl_priv *priv,
len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE;

if (txq_id != IWL_CMD_QUEUE_NUM) {
- sta = txq->cmd[txq->q.write_ptr]->cmd.tx.sta_id;
- sec_ctl = txq->cmd[txq->q.write_ptr]->cmd.tx.sec_ctl;
+ sta = txq->cmd[txq->q.write_ptr].cmd.tx.sta_id;
+ sec_ctl = txq->cmd[txq->q.write_ptr].cmd.tx.sec_ctl;

switch (sec_ctl & TX_CMD_SEC_MSK) {
case TX_CMD_SEC_CCM:
@@ -978,7 +978,7 @@ static void iwl5000_txq_inval_byte_cnt_tbl(struct iwl_priv *priv,
u8 sta = 0;

if (txq_id != IWL_CMD_QUEUE_NUM)
- sta = txq->cmd[txq->q.read_ptr]->cmd.tx.sta_id;
+ sta = txq->cmd[txq->q.read_ptr].cmd.tx.sta_id;

shared_data->queues_byte_cnt_tbls[txq_id].tfd_offset[txq->q.read_ptr].
val = cpu_to_le16(1 | (sta << 12));
diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h
index cdfb343..8416cab 100644
--- a/drivers/net/wireless/iwlwifi/iwl-dev.h
+++ b/drivers/net/wireless/iwlwifi/iwl-dev.h
@@ -135,7 +135,8 @@ struct iwl_tx_info {
struct iwl_tx_queue {
struct iwl_queue q;
struct iwl_tfd_frame *bd;
- struct iwl_cmd *cmd[TFD_TX_CMD_SLOTS];
+ struct iwl_cmd *cmd;
+ dma_addr_t dma_addr_cmd;
struct iwl_tx_info *txb;
int need_update;
int sched_retry;
diff --git a/drivers/net/wireless/iwlwifi/iwl-hcmd.c b/drivers/net/wireless/iwlwifi/iwl-hcmd.c
index 2eb03ee..986d79c 100644
--- a/drivers/net/wireless/iwlwifi/iwl-hcmd.c
+++ b/drivers/net/wireless/iwlwifi/iwl-hcmd.c
@@ -227,7 +227,7 @@ cancel:
* TX cmd queue. Otherwise in case the cmd comes
* in later, it will possibly set an invalid
* address (cmd->meta.source). */
- qcmd = priv->txq[IWL_CMD_QUEUE_NUM].cmd[cmd_idx];
+ qcmd = &priv->txq[IWL_CMD_QUEUE_NUM].cmd[cmd_idx];
qcmd->meta.flags &= ~CMD_WANT_SKB;
}
fail:
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index 78b1a7a..fa88d8e 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -208,12 +208,11 @@ EXPORT_SYMBOL(iwl_txq_update_write_ptr);
* Free all buffers.
* 0-fill, but do not free "txq" descriptor structure.
*/
-static void iwl_tx_queue_free(struct iwl_priv *priv, int txq_id)
+static void iwl_tx_queue_free(struct iwl_priv *priv, struct iwl_tx_queue *txq)
{
- struct iwl_tx_queue *txq = &priv->txq[txq_id];
struct iwl_queue *q = &txq->q;
struct pci_dev *dev = priv->pci_dev;
- int i, slots_num, len;
+ int len;

if (q->n_bd == 0)
return;
@@ -228,12 +227,7 @@ static void iwl_tx_queue_free(struct iwl_priv *priv, int txq_id)
len += IWL_MAX_SCAN_SIZE;

/* De-alloc array of command/tx buffers */
- slots_num = (txq_id == IWL_CMD_QUEUE_NUM) ?
- TFD_CMD_SLOTS : TFD_TX_CMD_SLOTS;
- for (i = 0; i < slots_num; i++)
- kfree(txq->cmd[i]);
- if (txq_id == IWL_CMD_QUEUE_NUM)
- kfree(txq->cmd[slots_num]);
+ pci_free_consistent(dev, len, txq->cmd, txq->dma_addr_cmd);

/* De-alloc circular buffer of TFDs */
if (txq->q.n_bd)
@@ -405,8 +399,9 @@ static int iwl_hw_tx_queue_init(struct iwl_priv *priv,
static int iwl_tx_queue_init(struct iwl_priv *priv, struct iwl_tx_queue *txq,
int slots_num, u32 txq_id)
{
- int i, len;
- int ret;
+ struct pci_dev *dev = priv->pci_dev;
+ int len;
+ int rc = 0;

/*
* Alloc buffer array for commands (Tx or other types of commands).
@@ -416,24 +411,19 @@ static int iwl_tx_queue_init(struct iwl_priv *priv, struct iwl_tx_queue *txq,
* For normal Tx queues (all other queues), no super-size command
* space is needed.
*/
- len = sizeof(struct iwl_cmd);
- for (i = 0; i <= slots_num; i++) {
- if (i == slots_num) {
- if (txq_id == IWL_CMD_QUEUE_NUM)
- len += IWL_MAX_SCAN_SIZE;
- else
- continue;
- }
-
- txq->cmd[i] = kmalloc(len, GFP_KERNEL);
- if (!txq->cmd[i])
- goto err;
- }
+ len = sizeof(struct iwl_cmd) * slots_num;
+ if (txq_id == IWL_CMD_QUEUE_NUM)
+ len += IWL_MAX_SCAN_SIZE;
+ txq->cmd = pci_alloc_consistent(dev, len, &txq->dma_addr_cmd);
+ if (!txq->cmd)
+ return -ENOMEM;

/* Alloc driver data array and TFD circular buffer */
- ret = iwl_tx_queue_alloc(priv, txq, txq_id);
- if (ret)
- goto err;
+ rc = iwl_tx_queue_alloc(priv, txq, txq_id);
+ if (rc) {
+ pci_free_consistent(dev, len, txq->cmd, txq->dma_addr_cmd);
+ return -ENOMEM;
+ }

txq->need_update = 0;

@@ -448,17 +438,6 @@ static int iwl_tx_queue_init(struct iwl_priv *priv, struct iwl_tx_queue *txq,
iwl_hw_tx_queue_init(priv, txq);

return 0;
-err:
- for (i = 0; i < slots_num; i++) {
- kfree(txq->cmd[i]);
- txq->cmd[i] = NULL;
- }
-
- if (txq_id == IWL_CMD_QUEUE_NUM) {
- kfree(txq->cmd[slots_num]);
- txq->cmd[slots_num] = NULL;
- }
- return -ENOMEM;
}
/**
* iwl_hw_txq_ctx_free - Free TXQ Context
@@ -471,7 +450,7 @@ void iwl_hw_txq_ctx_free(struct iwl_priv *priv)

/* Tx queues */
for (txq_id = 0; txq_id < priv->hw_params.max_txq_num; txq_id++)
- iwl_tx_queue_free(priv, txq_id);
+ iwl_tx_queue_free(priv, &priv->txq[txq_id]);

/* Keep-warm buffer */
iwl_kw_free(priv);
@@ -878,7 +857,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
txq->txb[q->write_ptr].skb[0] = skb;

/* Set up first empty entry in queue's array of Tx/cmd buffers */
- out_cmd = txq->cmd[idx];
+ out_cmd = &txq->cmd[idx];
tx_cmd = &out_cmd->cmd.tx;
memset(&out_cmd->hdr, 0, sizeof(out_cmd->hdr));
memset(tx_cmd, 0, sizeof(struct iwl_tx_cmd));
@@ -918,9 +897,8 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)

/* Physical address of this Tx command's header (not MAC header!),
* within command buffer array. */
- txcmd_phys = pci_map_single(priv->pci_dev, out_cmd,
- sizeof(struct iwl_cmd), PCI_DMA_TODEVICE);
- txcmd_phys += offsetof(struct iwl_cmd, hdr);
+ txcmd_phys = txq->dma_addr_cmd + sizeof(struct iwl_cmd) * idx +
+ offsetof(struct iwl_cmd, hdr);

/* Add buffer containing Tx command and MAC(!) header to TFD's
* first entry */
@@ -1021,7 +999,7 @@ int iwl_enqueue_hcmd(struct iwl_priv *priv, struct iwl_host_cmd *cmd)
struct iwl_cmd *out_cmd;
dma_addr_t phys_addr;
unsigned long flags;
- int len, ret;
+ int ret;
u32 idx;
u16 fix_size;

@@ -1051,7 +1029,7 @@ int iwl_enqueue_hcmd(struct iwl_priv *priv, struct iwl_host_cmd *cmd)


idx = get_cmd_index(q, q->write_ptr, cmd->meta.flags & CMD_SIZE_HUGE);
- out_cmd = txq->cmd[idx];
+ out_cmd = &txq->cmd[idx];

out_cmd->hdr.cmd = cmd->id;
memcpy(&out_cmd->meta, &cmd->meta, sizeof(cmd->meta));
@@ -1065,11 +1043,9 @@ int iwl_enqueue_hcmd(struct iwl_priv *priv, struct iwl_host_cmd *cmd)
INDEX_TO_SEQ(q->write_ptr));
if (out_cmd->meta.flags & CMD_SIZE_HUGE)
out_cmd->hdr.sequence |= cpu_to_le16(SEQ_HUGE_FRAME);
- len = (idx == TFD_CMD_SLOTS) ?
- IWL_MAX_SCAN_SIZE : sizeof(struct iwl_cmd);
- phys_addr = pci_map_single(priv->pci_dev, out_cmd, len,
- PCI_DMA_TODEVICE);
- phys_addr += offsetof(struct iwl_cmd, hdr);
+
+ phys_addr = txq->dma_addr_cmd + sizeof(txq->cmd[0]) * idx +
+ offsetof(struct iwl_cmd, hdr);
iwl_hw_txq_attach_buf_to_tfd(priv, tfd, phys_addr, fix_size);

IWL_DEBUG_HC("Sending command %s (#%x), seq: 0x%04X, "
@@ -1134,9 +1110,6 @@ static void iwl_hcmd_queue_reclaim(struct iwl_priv *priv, int txq_id, int index)
{
struct iwl_tx_queue *txq = &priv->txq[txq_id];
struct iwl_queue *q = &txq->q;
- struct iwl_tfd_frame *bd = &txq->bd[index];
- dma_addr_t dma_addr;
- int is_odd, buf_len;
int nfreed = 0;

if ((index >= q->n_bd) || (iwl_queue_used(q, index) == 0)) {
@@ -1154,19 +1127,6 @@ static void iwl_hcmd_queue_reclaim(struct iwl_priv *priv, int txq_id, int index)
q->write_ptr, q->read_ptr);
queue_work(priv->workqueue, &priv->restart);
}
- is_odd = (index/2) & 0x1;
- if (is_odd) {
- dma_addr = IWL_GET_BITS(bd->pa[index], tb2_addr_lo16) |
- (IWL_GET_BITS(bd->pa[index],
- tb2_addr_hi20) << 16);
- buf_len = IWL_GET_BITS(bd->pa[index], tb2_len);
- } else {
- dma_addr = le32_to_cpu(bd->pa[index].tb1_addr);
- buf_len = IWL_GET_BITS(bd->pa[index], tb1_len);
- }
-
- pci_unmap_single(priv->pci_dev, dma_addr, buf_len,
- PCI_DMA_TODEVICE);
nfreed++;
}
}
@@ -1198,7 +1158,7 @@ void iwl_tx_cmd_complete(struct iwl_priv *priv, struct iwl_rx_mem_buffer *rxb)
BUG_ON(txq_id != IWL_CMD_QUEUE_NUM);

cmd_index = get_cmd_index(&priv->txq[IWL_CMD_QUEUE_NUM].q, index, huge);
- cmd = priv->txq[IWL_CMD_QUEUE_NUM].cmd[cmd_index];
+ cmd = &priv->txq[IWL_CMD_QUEUE_NUM].cmd[cmd_index];

/* Input error checking is done when commands are added to queue. */
if (cmd->meta.flags & CMD_WANT_SKB) {
--
1.5.4.3

--
John W. Linville Linux should be at the core
[email protected] of your literate lifestyle.