2006-10-18 23:37:59

by Chris Leech

[permalink] [raw]
Subject: [PATCH 0/7] drivers/dma & I/OAT fixes

Various fixes for the hardware memcpy engine code and ioatdma

Most of these I've posted before, except for the patch to handle sysfs
errors from Jeff Garzik. I've dropped the controversial change to not
offload loopback traffic.

These changes can be pulled from
git://lost.foo-projects.org/~cleech/linux-2.6 master

--
Chris Leech <[email protected]>
I/O Acceleration Technology Software Development
LAN Access Division / Digital Enterprise Group


2006-10-18 23:38:32

by Chris Leech

[permalink] [raw]
Subject: [PATCH 4/7] I/OAT: Remove the use of writeq from the ioatdma driver

There's only one now anyway, and it's not in a performance path,
so make it behave the same on 32-bit and 64-bit CPUs.

Signed-off-by: Chris Leech <[email protected]>
---

drivers/dma/ioatdma.c | 10 ++++------
1 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c
index ceb03ee..2800c19 100644
--- a/drivers/dma/ioatdma.c
+++ b/drivers/dma/ioatdma.c
@@ -608,13 +608,11 @@ static void ioat_start_null_desc(struct
list_add_tail(&desc->node, &ioat_chan->used_desc);
spin_unlock_bh(&ioat_chan->desc_lock);

-#if (BITS_PER_LONG == 64)
- writeq(desc->phys, ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET);
-#else
- writel((u32) desc->phys,
+ writel(((u64) desc->phys) & 0x00000000FFFFFFFF,
ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_LOW);
- writel(0, ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH);
-#endif
+ writel(((u64) desc->phys) >> 32,
+ ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH);
+
writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
}


2006-10-18 23:38:05

by Chris Leech

[permalink] [raw]
Subject: [PATCH 1/7] I/OAT: Push pending transactions to hardware more frequently

Every 20 descriptors turns out to be to few append commands with
newer/faster CPUs. Pushing every 4 still cuts down on MMIO writes to an
acceptable level without letting the DMA engine run out of work.

Signed-off-by: Chris Leech <[email protected]>
---

drivers/dma/ioatdma.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c
index 0358419..f3b34b5 100644
--- a/drivers/dma/ioatdma.c
+++ b/drivers/dma/ioatdma.c
@@ -310,7 +310,7 @@ static dma_cookie_t do_ioat_dma_memcpy(s
list_splice_init(&new_chain, ioat_chan->used_desc.prev);

ioat_chan->pending += desc_count;
- if (ioat_chan->pending >= 20) {
+ if (ioat_chan->pending >= 4) {
append = 1;
ioat_chan->pending = 0;
}
@@ -818,7 +818,7 @@ static void __devexit ioat_remove(struct
}

/* MODULE API */
-MODULE_VERSION("1.7");
+MODULE_VERSION("1.9");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Intel Corporation");


2006-10-18 23:38:33

by Chris Leech

[permalink] [raw]
Subject: [PATCH 3/7] I/OAT: Remove the wrappers around read(bwl)/write(bwl) in ioatdma

Signed-off-by: Chris Leech <[email protected]>
---

drivers/dma/ioatdma.c | 60 +++++++++++------------
drivers/dma/ioatdma_io.h | 118 ----------------------------------------------
2 files changed, 28 insertions(+), 150 deletions(-)

diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c
index f3b34b5..ceb03ee 100644
--- a/drivers/dma/ioatdma.c
+++ b/drivers/dma/ioatdma.c
@@ -32,7 +32,6 @@
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include "ioatdma.h"
-#include "ioatdma_io.h"
#include "ioatdma_registers.h"
#include "ioatdma_hw.h"

@@ -51,8 +50,8 @@ static int enumerate_dma_channels(struct
int i;
struct ioat_dma_chan *ioat_chan;

- device->common.chancnt = ioatdma_read8(device, IOAT_CHANCNT_OFFSET);
- xfercap_scale = ioatdma_read8(device, IOAT_XFERCAP_OFFSET);
+ device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+ xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));

for (i = 0; i < device->common.chancnt; i++) {
@@ -123,7 +122,7 @@ static int ioat_dma_alloc_chan_resources
* In-use bit automatically set by reading chanctrl
* If 0, we got it, if 1, someone else did
*/
- chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET);
+ chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
if (chanctrl & IOAT_CHANCTRL_CHANNEL_IN_USE)
return -EBUSY;

@@ -132,12 +131,12 @@ static int ioat_dma_alloc_chan_resources
IOAT_CHANCTRL_ERR_INT_EN |
IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
IOAT_CHANCTRL_ERR_COMPLETION_EN;
- ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl);
+ writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);

- chanerr = ioatdma_chan_read32(ioat_chan, IOAT_CHANERR_OFFSET);
+ chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
if (chanerr) {
printk("IOAT: CHANERR = %x, clearing\n", chanerr);
- ioatdma_chan_write32(ioat_chan, IOAT_CHANERR_OFFSET, chanerr);
+ writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
}

/* Allocate descriptors */
@@ -161,10 +160,10 @@ static int ioat_dma_alloc_chan_resources
&ioat_chan->completion_addr);
memset(ioat_chan->completion_virt, 0,
sizeof(*ioat_chan->completion_virt));
- ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_LOW,
- ((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF);
- ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_HIGH,
- ((u64) ioat_chan->completion_addr) >> 32);
+ writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
+ ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+ writel(((u64) ioat_chan->completion_addr) >> 32,
+ ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);

ioat_start_null_desc(ioat_chan);
return i;
@@ -182,7 +181,7 @@ static void ioat_dma_free_chan_resources

ioat_dma_memcpy_cleanup(ioat_chan);

- ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET);
+ writeb(IOAT_CHANCMD_RESET, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);

spin_lock_bh(&ioat_chan->desc_lock);
list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) {
@@ -210,9 +209,9 @@ static void ioat_dma_free_chan_resources
ioat_chan->last_completion = ioat_chan->completion_addr = 0;

/* Tell hw the chan is free */
- chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET);
+ chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
chanctrl &= ~IOAT_CHANCTRL_CHANNEL_IN_USE;
- ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl);
+ writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
}

/**
@@ -318,9 +317,8 @@ static dma_cookie_t do_ioat_dma_memcpy(s
spin_unlock_bh(&ioat_chan->desc_lock);

if (append)
- ioatdma_chan_write8(ioat_chan,
- IOAT_CHANCMD_OFFSET,
- IOAT_CHANCMD_APPEND);
+ writeb(IOAT_CHANCMD_APPEND,
+ ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
return cookie;
}

@@ -417,9 +415,8 @@ static void ioat_dma_memcpy_issue_pendin

if (ioat_chan->pending != 0) {
ioat_chan->pending = 0;
- ioatdma_chan_write8(ioat_chan,
- IOAT_CHANCMD_OFFSET,
- IOAT_CHANCMD_APPEND);
+ writeb(IOAT_CHANCMD_APPEND,
+ ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
}
}

@@ -449,7 +446,7 @@ static void ioat_dma_memcpy_cleanup(stru
if ((chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
printk("IOAT: Channel halted, chanerr = %x\n",
- ioatdma_chan_read32(chan, IOAT_CHANERR_OFFSET));
+ readl(chan->reg_base + IOAT_CHANERR_OFFSET));

/* TODO do something to salvage the situation */
}
@@ -569,21 +566,21 @@ static irqreturn_t ioat_do_interrupt(int
unsigned long attnstatus;
u8 intrctrl;

- intrctrl = ioatdma_read8(instance, IOAT_INTRCTRL_OFFSET);
+ intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);

if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
return IRQ_NONE;

if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
- ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl);
+ writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
return IRQ_NONE;
}

- attnstatus = ioatdma_read32(instance, IOAT_ATTNSTATUS_OFFSET);
+ attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);

printk(KERN_ERR "ioatdma error: interrupt! status %lx\n", attnstatus);

- ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl);
+ writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
return IRQ_HANDLED;
}

@@ -612,14 +609,13 @@ static void ioat_start_null_desc(struct
spin_unlock_bh(&ioat_chan->desc_lock);

#if (BITS_PER_LONG == 64)
- ioatdma_chan_write64(ioat_chan, IOAT_CHAINADDR_OFFSET, desc->phys);
+ writeq(desc->phys, ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET);
#else
- ioatdma_chan_write32(ioat_chan,
- IOAT_CHAINADDR_OFFSET_LOW,
- (u32) desc->phys);
- ioatdma_chan_write32(ioat_chan, IOAT_CHAINADDR_OFFSET_HIGH, 0);
+ writel((u32) desc->phys,
+ ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_LOW);
+ writel(0, ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH);
#endif
- ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_START);
+ writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
}

/*
@@ -748,7 +744,7 @@ static int __devinit ioat_probe(struct p

device->reg_base = reg_base;

- ioatdma_write8(device, IOAT_INTRCTRL_OFFSET, IOAT_INTRCTRL_MASTER_INT_EN);
+ writeb(IOAT_INTRCTRL_MASTER_INT_EN, device->reg_base + IOAT_INTRCTRL_OFFSET);
pci_set_master(pdev);

INIT_LIST_HEAD(&device->common.channels);
diff --git a/drivers/dma/ioatdma_io.h b/drivers/dma/ioatdma_io.h
deleted file mode 100644
index c0b4bf6..0000000
--- a/drivers/dma/ioatdma_io.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef IOATDMA_IO_H
-#define IOATDMA_IO_H
-
-#include <asm/io.h>
-
-/*
- * device and per-channel MMIO register read and write functions
- * this is a lot of anoying inline functions, but it's typesafe
- */
-
-static inline u8 ioatdma_read8(struct ioat_device *device,
- unsigned int offset)
-{
- return readb(device->reg_base + offset);
-}
-
-static inline u16 ioatdma_read16(struct ioat_device *device,
- unsigned int offset)
-{
- return readw(device->reg_base + offset);
-}
-
-static inline u32 ioatdma_read32(struct ioat_device *device,
- unsigned int offset)
-{
- return readl(device->reg_base + offset);
-}
-
-static inline void ioatdma_write8(struct ioat_device *device,
- unsigned int offset, u8 value)
-{
- writeb(value, device->reg_base + offset);
-}
-
-static inline void ioatdma_write16(struct ioat_device *device,
- unsigned int offset, u16 value)
-{
- writew(value, device->reg_base + offset);
-}
-
-static inline void ioatdma_write32(struct ioat_device *device,
- unsigned int offset, u32 value)
-{
- writel(value, device->reg_base + offset);
-}
-
-static inline u8 ioatdma_chan_read8(struct ioat_dma_chan *chan,
- unsigned int offset)
-{
- return readb(chan->reg_base + offset);
-}
-
-static inline u16 ioatdma_chan_read16(struct ioat_dma_chan *chan,
- unsigned int offset)
-{
- return readw(chan->reg_base + offset);
-}
-
-static inline u32 ioatdma_chan_read32(struct ioat_dma_chan *chan,
- unsigned int offset)
-{
- return readl(chan->reg_base + offset);
-}
-
-static inline void ioatdma_chan_write8(struct ioat_dma_chan *chan,
- unsigned int offset, u8 value)
-{
- writeb(value, chan->reg_base + offset);
-}
-
-static inline void ioatdma_chan_write16(struct ioat_dma_chan *chan,
- unsigned int offset, u16 value)
-{
- writew(value, chan->reg_base + offset);
-}
-
-static inline void ioatdma_chan_write32(struct ioat_dma_chan *chan,
- unsigned int offset, u32 value)
-{
- writel(value, chan->reg_base + offset);
-}
-
-#if (BITS_PER_LONG == 64)
-static inline u64 ioatdma_chan_read64(struct ioat_dma_chan *chan,
- unsigned int offset)
-{
- return readq(chan->reg_base + offset);
-}
-
-static inline void ioatdma_chan_write64(struct ioat_dma_chan *chan,
- unsigned int offset, u64 value)
-{
- writeq(value, chan->reg_base + offset);
-}
-#endif
-
-#endif /* IOATDMA_IO_H */
-

2006-10-18 23:39:33

by Chris Leech

[permalink] [raw]
Subject: [PATCH 5/7] I/OAT: Add documentation for the tcp_dma_copybreak sysctl

Signed-off-by: Chris Leech <[email protected]>
---

Documentation/networking/ip-sysctl.txt | 6 ++++++
1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index fd3c0c0..e9ee102 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -375,6 +375,12 @@ tcp_slow_start_after_idle - BOOLEAN
be timed out after an idle period.
Default: 1

+tcp_dma_copybreak - INTEGER
+ Lower limit, in bytes, of the size of socket reads that will be
+ offloaded to a DMA copy engine, if one is present in the system
+ and CONFIG_NET_DMA is enabled.
+ Default: 4096
+
CIPSOv4 Variables:

cipso_cache_enable - BOOLEAN

2006-10-18 23:39:28

by Chris Leech

[permalink] [raw]
Subject: [PATCH 7/7] I/OAT: Only offload copies for TCP when there will be a context switch

The performance wins come with having the DMA copy engine doing the copies
in parallel with the context switch. If there is enough data ready on the
socket at recv time just use a regular copy.

Signed-off-by: Chris Leech <[email protected]>
---

net/ipv4/tcp.c | 10 +++++++---
1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 66e9a72..ef0a6cd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1108,6 +1108,8 @@ int tcp_recvmsg(struct kiocb *iocb, stru
long timeo;
struct task_struct *user_recv = NULL;
int copied_early = 0;
+ int available = 0;
+ struct sk_buff *skb;

lock_sock(sk);

@@ -1134,7 +1136,11 @@ int tcp_recvmsg(struct kiocb *iocb, stru
#ifdef CONFIG_NET_DMA
tp->ucopy.dma_chan = NULL;
preempt_disable();
- if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
+ skb = skb_peek_tail(&sk->sk_receive_queue);
+ if (skb)
+ available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
+ if ((available < target) &&
+ (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
!sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) {
preempt_enable_no_resched();
tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len);
@@ -1143,7 +1149,6 @@ int tcp_recvmsg(struct kiocb *iocb, stru
#endif

do {
- struct sk_buff *skb;
u32 offset;

/* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
@@ -1431,7 +1436,6 @@ skip_copy:

#ifdef CONFIG_NET_DMA
if (tp->ucopy.dma_chan) {
- struct sk_buff *skb;
dma_cookie_t done, used;

dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);

2006-10-18 23:39:42

by Chris Leech

[permalink] [raw]
Subject: [PATCH 6/7] I/OAT: Add entries to MAINTAINERS for the DMA memcpy subsystem and ioatdma

Signed-off-by: Chris Leech <[email protected]>
---

MAINTAINERS | 12 ++++++++++++
1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5305dd6..533adbe 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -915,6 +915,12 @@ M: [email protected]
L: [email protected]
S: Maintained

+DMA GENERIC MEMCPY SUBSYSTEM
+P: Chris Leech
+M: [email protected]
+L: [email protected]
+S: Maintained
+
DOCBOOK FOR DOCUMENTATION
P: Martin Waitz
M: [email protected]
@@ -1516,6 +1522,12 @@ P: Tigran Aivazian
M: [email protected]
S: Maintained

+INTEL I/OAT DMA DRIVER
+P: Chris Leech
+M: [email protected]
+L: [email protected]
+S: Supported
+
INTEL IXP4XX RANDOM NUMBER GENERATOR SUPPORT
P: Deepak Saxena
M: [email protected]

2006-10-18 23:38:08

by Chris Leech

[permalink] [raw]
Subject: [PATCH 2/7] drivers/dma: handle sysfs errors

From: Jeff Garzik <[email protected]>

Signed-off-by: Jeff Garzik <[email protected]>
Signed-off-by: Chris Leech <[email protected]>
---

drivers/dma/dmaengine.c | 22 ++++++++++++++++++++--
1 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 1527804..dc65773 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -312,7 +312,7 @@ void dma_async_client_chan_request(struc
int dma_async_device_register(struct dma_device *device)
{
static int id;
- int chancnt = 0;
+ int chancnt = 0, rc;
struct dma_chan* chan;

if (!device)
@@ -334,8 +334,15 @@ int dma_async_device_register(struct dma
snprintf(chan->class_dev.class_id, BUS_ID_SIZE, "dma%dchan%d",
device->dev_id, chan->chan_id);

+ rc = class_device_register(&chan->class_dev);
+ if (rc) {
+ chancnt--;
+ free_percpu(chan->local);
+ chan->local = NULL;
+ goto err_out;
+ }
+
kref_get(&device->refcount);
- class_device_register(&chan->class_dev);
}

mutex_lock(&dma_list_mutex);
@@ -345,6 +352,17 @@ int dma_async_device_register(struct dma
dma_chans_rebalance();

return 0;
+
+err_out:
+ list_for_each_entry(chan, &device->channels, device_node) {
+ if (chan->local == NULL)
+ continue;
+ kref_put(&device->refcount, dma_async_device_cleanup);
+ class_device_unregister(&chan->class_dev);
+ chancnt--;
+ free_percpu(chan->local);
+ }
+ return rc;
}

/**