2007-10-18 23:20:43

by Shannon Nelson

[permalink] [raw]
Subject: [PATCH] I/OAT: Add support for version 2 of ioatdma device

Add support for version 2 of the ioatdma device. This device handles
the descriptor chain and DCA services slightly differently:
- Instead of moving the dma descriptors between a busy and an idle chain,
this new version uses a single circular chain so that we don't have
rewrite the next_descriptor pointers as we add new requests, and the
device doesn't need to re-read the last descriptor.
- The new device has the DCA tags defined internally instead of needing
them defined statically.

Signed-off-by: Shannon Nelson <[email protected]>
---

drivers/dma/ioat.c | 11 +
drivers/dma/ioat_dca.c | 164 +++++++++++
drivers/dma/ioat_dma.c | 578 ++++++++++++++++++++++++++++++++-------
drivers/dma/ioatdma.h | 32 +-
drivers/dma/ioatdma_hw.h | 33 ++
drivers/dma/ioatdma_registers.h | 106 ++++++-
include/linux/pci_ids.h | 1
7 files changed, 780 insertions(+), 145 deletions(-)

diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c
index f204c39..16e0fd8 100644
--- a/drivers/dma/ioat.c
+++ b/drivers/dma/ioat.c
@@ -39,10 +39,14 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Intel Corporation");

static struct pci_device_id ioat_pci_tbl[] = {
+ /* I/OAT v1 platforms */
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
{ PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
+
+ /* I/OAT v2 platforms */
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
{ 0, }
};

@@ -74,10 +78,17 @@ static int ioat_setup_functionality(struct pci_dev *pdev, void __iomem *iobase)
if (device->dma && ioat_dca_enabled)
device->dca = ioat_dca_init(pdev, iobase);
break;
+ case IOAT_VER_2_0:
+ device->dma = ioat_dma_probe(pdev, iobase);
+ if (device->dma && ioat_dca_enabled)
+ device->dca = ioat2_dca_init(pdev, iobase);
+ break;
default:
err = -ENODEV;
break;
}
+ if (!device->dma)
+ err = -ENODEV;
return err;
}

diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c
index ba98571..0fa8a98 100644
--- a/drivers/dma/ioat_dca.c
+++ b/drivers/dma/ioat_dca.c
@@ -261,3 +261,167 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
return dca;
}

+
+static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 id;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+ id = dcaid_from_pcidev(pdev);
+
+ if (ioatdca->requester_count == ioatdca->max_requesters)
+ return -ENODEV;
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == NULL) {
+ /* found an empty slot */
+ ioatdca->requester_count++;
+ ioatdca->req_slots[i].pdev = pdev;
+ ioatdca->req_slots[i].rid = id;
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+ writel(id | IOAT_DCA_GREQID_VALID,
+ ioatdca->iobase + global_req_table + (i * 4));
+ return i;
+ }
+ }
+ /* Error, ioatdma->requester_count is out of whack */
+ return -EFAULT;
+}
+
+static int ioat2_dca_remove_requester(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev) {
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+ writel(0, ioatdca->iobase + global_req_table + (i * 4));
+ ioatdca->req_slots[i].pdev = NULL;
+ ioatdca->req_slots[i].rid = 0;
+ ioatdca->requester_count--;
+ return i;
+ }
+ }
+ return -ENODEV;
+}
+
+static u8 ioat2_dca_get_tag(struct dca_provider *dca, int cpu)
+{
+ u8 tag;
+
+ tag = ioat_dca_get_tag(dca, cpu);
+ tag = (~tag) & 0x1F;
+ return tag;
+}
+
+static struct dca_ops ioat2_dca_ops = {
+ .add_requester = ioat2_dca_add_requester,
+ .remove_requester = ioat2_dca_remove_requester,
+ .get_tag = ioat2_dca_get_tag,
+};
+
+static int ioat2_dca_count_dca_slots(void *iobase, u16 dca_offset)
+{
+ int slots = 0;
+ u32 req;
+ u16 global_req_table;
+
+ global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
+ if (global_req_table == 0)
+ return 0;
+ do {
+ req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+ slots++;
+ } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+ return slots;
+}
+
+struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct dca_provider *dca;
+ struct ioat_dca_priv *ioatdca;
+ int slots;
+ int i;
+ int err;
+ u32 tag_map;
+ u16 dca_offset;
+ u16 csi_fsb_control;
+ u16 pcie_control;
+ u8 bit;
+
+ if (!system_has_dca_enabled(pdev))
+ return NULL;
+
+ dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+ if (dca_offset == 0)
+ return NULL;
+
+ slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
+ if (slots == 0)
+ return NULL;
+
+ dca = alloc_dca_provider(&ioat2_dca_ops,
+ sizeof(*ioatdca)
+ + (sizeof(struct ioat_dca_slot) * slots));
+ if (!dca)
+ return NULL;
+
+ ioatdca = dca_priv(dca);
+ ioatdca->iobase = iobase;
+ ioatdca->dca_base = iobase + dca_offset;
+ ioatdca->max_requesters = slots;
+
+ /* some bios might not know to turn these on */
+ csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+ if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
+ csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
+ writew(csi_fsb_control,
+ ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+ }
+ pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+ if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
+ pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
+ writew(pcie_control,
+ ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+ }
+
+
+ /* TODO version, compatibility and configuration checks */
+
+ /* copy out the APIC to DCA tag map */
+ tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
+ for (i = 0; i < 5; i++) {
+ bit = (tag_map >> (4 * i)) & 0x0f;
+ if (bit < 8)
+ ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
+ else
+ ioatdca->tag_map[i] = 0;
+ }
+
+ err = register_dca_provider(dca, &pdev->dev);
+ if (err) {
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ return dca;
+}
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index fa4e56d..6a79b43 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -36,18 +36,24 @@
#include "ioatdma_registers.h"
#include "ioatdma_hw.h"

-#define INITIAL_IOAT_DESC_COUNT 128
-
#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)

+static int ioat_pending_level = 4;
+module_param(ioat_pending_level, int, 0644);
+MODULE_PARM_DESC(ioat_pending_level,
+ "high-water mark for pushing ioat descriptors (default: 4)");
+
/* internal functions */
static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
+
+static struct ioat_desc_sw *
+ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
static struct ioat_desc_sw *
-ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
+ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);

static inline struct ioat_dma_chan *ioat_lookup_chan_by_index(
struct ioatdma_device *device,
@@ -130,6 +136,12 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
ioat_chan->device = device;
ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
ioat_chan->xfercap = xfercap;
+ ioat_chan->desccount = 0;
+ if (ioat_chan->device->version != IOAT_VER_1_2) {
+ writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE
+ | IOAT_DMA_DCA_ANY_CPU,
+ ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
+ }
spin_lock_init(&ioat_chan->cleanup_lock);
spin_lock_init(&ioat_chan->desc_lock);
INIT_LIST_HEAD(&ioat_chan->free_desc);
@@ -161,13 +173,17 @@ static void ioat_set_dest(dma_addr_t addr,
tx_to_ioat_desc(tx)->dst = addr;
}

-static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx)
+static inline void __ioat1_dma_memcpy_issue_pending(
+ struct ioat_dma_chan *ioat_chan);
+static inline void __ioat2_dma_memcpy_issue_pending(
+ struct ioat_dma_chan *ioat_chan);
+
+static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
struct ioat_desc_sw *prev, *new;
struct ioat_dma_descriptor *hw;
- int append = 0;
dma_cookie_t cookie;
LIST_HEAD(new_chain);
u32 copy;
@@ -209,7 +225,7 @@ static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx)
list_add_tail(&new->node, &new_chain);
desc_count++;
prev = new;
- } while (len && (new = ioat_dma_get_next_descriptor(ioat_chan)));
+ } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));

hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
if (new->async_tx.callback) {
@@ -246,20 +262,98 @@ static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx)
first->async_tx.phys;
__list_splice(&new_chain, ioat_chan->used_desc.prev);

+ ioat_chan->dmacount += desc_count;
ioat_chan->pending += desc_count;
- if (ioat_chan->pending >= 4) {
- append = 1;
- ioat_chan->pending = 0;
- }
+ if (ioat_chan->pending >= ioat_pending_level)
+ __ioat1_dma_memcpy_issue_pending(ioat_chan);
spin_unlock_bh(&ioat_chan->desc_lock);

- if (append)
- writeb(IOAT_CHANCMD_APPEND,
- ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
+ return cookie;
+}
+
+static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
+ struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
+ struct ioat_desc_sw *new;
+ struct ioat_dma_descriptor *hw;
+ dma_cookie_t cookie;
+ u32 copy;
+ size_t len;
+ dma_addr_t src, dst;
+ int orig_ack;
+ unsigned int desc_count = 0;
+
+ /* src and dest and len are stored in the initial descriptor */
+ len = first->len;
+ src = first->src;
+ dst = first->dst;
+ orig_ack = first->async_tx.ack;
+ new = first;
+
+ /* ioat_chan->desc_lock is still in force in version 2 path */
+
+ do {
+ copy = min((u32) len, ioat_chan->xfercap);
+
+ new->async_tx.ack = 1;
+
+ hw = new->hw;
+ hw->size = copy;
+ hw->ctl = 0;
+ hw->src_addr = src;
+ hw->dst_addr = dst;
+
+ len -= copy;
+ dst += copy;
+ src += copy;
+ desc_count++;
+ } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
+
+ hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
+ if (new->async_tx.callback) {
+ hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
+ if (first != new) {
+ /* move callback into to last desc */
+ new->async_tx.callback = first->async_tx.callback;
+ new->async_tx.callback_param
+ = first->async_tx.callback_param;
+ first->async_tx.callback = NULL;
+ first->async_tx.callback_param = NULL;
+ }
+ }
+
+ new->tx_cnt = desc_count;
+ new->async_tx.ack = orig_ack; /* client is in control of this ack */
+
+ /* store the original values for use in later cleanup */
+ if (new != first) {
+ new->src = first->src;
+ new->dst = first->dst;
+ new->len = first->len;
+ }
+
+ /* cookie incr and addition to used_list must be atomic */
+ cookie = ioat_chan->common.cookie;
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ ioat_chan->common.cookie = new->async_tx.cookie = cookie;
+
+ ioat_chan->dmacount += desc_count;
+ ioat_chan->pending += desc_count;
+ if (ioat_chan->pending >= ioat_pending_level)
+ __ioat2_dma_memcpy_issue_pending(ioat_chan);
+ spin_unlock_bh(&ioat_chan->desc_lock);

return cookie;
}

+/**
+ * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
+ * @ioat_chan: the channel supplying the memory pool for the descriptors
+ * @flags: allocation flags
+ */
static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
struct ioat_dma_chan *ioat_chan,
gfp_t flags)
@@ -284,15 +378,57 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
desc_sw->async_tx.tx_set_src = ioat_set_src;
desc_sw->async_tx.tx_set_dest = ioat_set_dest;
- desc_sw->async_tx.tx_submit = ioat_tx_submit;
+ switch (ioat_chan->device->version) {
+ case IOAT_VER_1_2:
+ desc_sw->async_tx.tx_submit = ioat1_tx_submit;
+ break;
+ case IOAT_VER_2_0:
+ desc_sw->async_tx.tx_submit = ioat2_tx_submit;
+ break;
+ }
INIT_LIST_HEAD(&desc_sw->async_tx.tx_list);
+
desc_sw->hw = desc;
desc_sw->async_tx.phys = phys;

return desc_sw;
}

-/* returns the actual number of allocated descriptors */
+static int ioat_initial_desc_count = 256;
+module_param(ioat_initial_desc_count, int, 0644);
+MODULE_PARM_DESC(ioat_initial_desc_count,
+ "initial descriptors per channel (default: 256)");
+
+/**
+ * ioat2_dma_massage_chan_desc - link the descriptors into a circle
+ * @ioat_chan: the channel to be massaged
+ */
+static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan)
+{
+ struct ioat_desc_sw *desc, *_desc;
+
+ /* setup used_desc */
+ ioat_chan->used_desc.next = ioat_chan->free_desc.next;
+ ioat_chan->used_desc.prev = NULL;
+
+ /* pull free_desc out of the circle so that every node is a hw
+ * descriptor, but leave it pointing to the list
+ */
+ ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next;
+ ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev;
+
+ /* circle link the hw descriptors */
+ desc = to_ioat_desc(ioat_chan->free_desc.next);
+ desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
+ list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) {
+ desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
+ }
+}
+
+/**
+ * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors
+ * @chan: the channel to be filled out
+ */
static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
{
struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
@@ -304,7 +440,7 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)

/* have we already been set up? */
if (!list_empty(&ioat_chan->free_desc))
- return INITIAL_IOAT_DESC_COUNT;
+ return ioat_chan->desccount;

/* Setup register to interrupt and write completion status on error */
chanctrl = IOAT_CHANCTRL_ERR_INT_EN |
@@ -320,7 +456,7 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
}

/* Allocate descriptors */
- for (i = 0; i < INITIAL_IOAT_DESC_COUNT; i++) {
+ for (i = 0; i < ioat_initial_desc_count; i++) {
desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
if (!desc) {
dev_err(&ioat_chan->device->pdev->dev,
@@ -330,7 +466,10 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
list_add_tail(&desc->node, &tmp_list);
}
spin_lock_bh(&ioat_chan->desc_lock);
+ ioat_chan->desccount = i;
list_splice(&tmp_list, &ioat_chan->free_desc);
+ if (ioat_chan->device->version != IOAT_VER_1_2)
+ ioat2_dma_massage_chan_desc(ioat_chan);
spin_unlock_bh(&ioat_chan->desc_lock);

/* allocate a completion writeback area */
@@ -347,10 +486,14 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);

tasklet_enable(&ioat_chan->cleanup_task);
- ioat_dma_start_null_desc(ioat_chan);
- return i;
+ ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */
+ return ioat_chan->desccount;
}

+/**
+ * ioat_dma_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
static void ioat_dma_free_chan_resources(struct dma_chan *chan)
{
struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
@@ -364,22 +507,45 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
/* Delay 100ms after reset to allow internal DMA logic to quiesce
* before removing DMA descriptor resources.
*/
- writeb(IOAT_CHANCMD_RESET, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
+ writeb(IOAT_CHANCMD_RESET,
+ ioat_chan->reg_base
+ + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
mdelay(100);

spin_lock_bh(&ioat_chan->desc_lock);
- list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) {
- in_use_descs++;
- list_del(&desc->node);
- pci_pool_free(ioatdma_device->dma_pool, desc->hw,
- desc->async_tx.phys);
- kfree(desc);
- }
- list_for_each_entry_safe(desc, _desc, &ioat_chan->free_desc, node) {
- list_del(&desc->node);
+ switch (ioat_chan->device->version) {
+ case IOAT_VER_1_2:
+ list_for_each_entry_safe(desc, _desc,
+ &ioat_chan->used_desc, node) {
+ in_use_descs++;
+ list_del(&desc->node);
+ pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+ desc->async_tx.phys);
+ kfree(desc);
+ }
+ list_for_each_entry_safe(desc, _desc,
+ &ioat_chan->free_desc, node) {
+ list_del(&desc->node);
+ pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+ desc->async_tx.phys);
+ kfree(desc);
+ }
+ break;
+ case IOAT_VER_2_0:
+ list_for_each_entry_safe(desc, _desc,
+ ioat_chan->free_desc.next, node) {
+ list_del(&desc->node);
+ pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+ desc->async_tx.phys);
+ kfree(desc);
+ }
+ desc = to_ioat_desc(ioat_chan->free_desc.next);
pci_pool_free(ioatdma_device->dma_pool, desc->hw,
desc->async_tx.phys);
kfree(desc);
+ INIT_LIST_HEAD(&ioat_chan->free_desc);
+ INIT_LIST_HEAD(&ioat_chan->used_desc);
+ break;
}
spin_unlock_bh(&ioat_chan->desc_lock);

@@ -395,6 +561,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)

ioat_chan->last_completion = ioat_chan->completion_addr = 0;
ioat_chan->pending = 0;
+ ioat_chan->dmacount = 0;
}

/**
@@ -406,7 +573,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
* has run out.
*/
static struct ioat_desc_sw *
-ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
+ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
{
struct ioat_desc_sw *new = NULL;

@@ -425,7 +592,82 @@ ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
return new;
}

-static struct dma_async_tx_descriptor *ioat_dma_prep_memcpy(
+static struct ioat_desc_sw *
+ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
+{
+ struct ioat_desc_sw *new = NULL;
+
+ /*
+ * used.prev points to where to start processing
+ * used.next points to next free descriptor
+ * if used.prev == NULL, there are none waiting to be processed
+ * if used.next == used.prev.prev, there is only one free descriptor,
+ * and we need to use it to as a noop descriptor before
+ * linking in a new set of descriptors, since the device
+ * has probably already read the pointer to it
+ */
+ if (ioat_chan->used_desc.prev &&
+ ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) {
+
+ struct ioat_desc_sw *desc = NULL;
+ struct ioat_desc_sw *noop_desc = NULL;
+ int i;
+
+ /* set up the noop descriptor */
+ noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
+ noop_desc->hw->size = 0;
+ noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
+ noop_desc->hw->src_addr = 0;
+ noop_desc->hw->dst_addr = 0;
+
+ ioat_chan->used_desc.next = ioat_chan->used_desc.next->next;
+ ioat_chan->pending++;
+ ioat_chan->dmacount++;
+
+ /* get a few more descriptors */
+ for (i = 16; i; i--) {
+ desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
+ BUG_ON(!desc);
+ list_add_tail(&desc->node, ioat_chan->used_desc.next);
+
+ desc->hw->next
+ = to_ioat_desc(desc->node.next)->async_tx.phys;
+ to_ioat_desc(desc->node.prev)->hw->next
+ = desc->async_tx.phys;
+ ioat_chan->desccount++;
+ }
+
+ ioat_chan->used_desc.next = noop_desc->node.next;
+ }
+ new = to_ioat_desc(ioat_chan->used_desc.next);
+ prefetch(new);
+ ioat_chan->used_desc.next = new->node.next;
+
+ if (ioat_chan->used_desc.prev == NULL)
+ ioat_chan->used_desc.prev = &new->node;
+
+ prefetch(new->hw);
+ return new;
+}
+
+static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
+ struct ioat_dma_chan *ioat_chan)
+{
+ if (!ioat_chan)
+ return NULL;
+
+ switch (ioat_chan->device->version) {
+ case IOAT_VER_1_2:
+ return ioat1_dma_get_next_descriptor(ioat_chan);
+ break;
+ case IOAT_VER_2_0:
+ return ioat2_dma_get_next_descriptor(ioat_chan);
+ break;
+ }
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
struct dma_chan *chan,
size_t len,
int int_en)
@@ -441,19 +683,62 @@ static struct dma_async_tx_descriptor *ioat_dma_prep_memcpy(
return new ? &new->async_tx : NULL;
}

+static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
+ struct dma_chan *chan,
+ size_t len,
+ int int_en)
+{
+ struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+ struct ioat_desc_sw *new;
+
+ spin_lock_bh(&ioat_chan->desc_lock);
+ new = ioat2_dma_get_next_descriptor(ioat_chan);
+ new->len = len;
+
+ /* leave ioat_chan->desc_lock set in version 2 path */
+ return new ? &new->async_tx : NULL;
+}
+
+
/**
* ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
* descriptors to hw
* @chan: DMA channel handle
*/
-static void ioat_dma_memcpy_issue_pending(struct dma_chan *chan)
+static inline void __ioat1_dma_memcpy_issue_pending(
+ struct ioat_dma_chan *ioat_chan)
+{
+ ioat_chan->pending = 0;
+ writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET);
+}
+
+static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
{
struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);

if (ioat_chan->pending != 0) {
- ioat_chan->pending = 0;
- writeb(IOAT_CHANCMD_APPEND,
- ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
+ spin_lock_bh(&ioat_chan->desc_lock);
+ __ioat1_dma_memcpy_issue_pending(ioat_chan);
+ spin_unlock_bh(&ioat_chan->desc_lock);
+ }
+}
+
+static inline void __ioat2_dma_memcpy_issue_pending(
+ struct ioat_dma_chan *ioat_chan)
+{
+ ioat_chan->pending = 0;
+ writew(ioat_chan->dmacount,
+ ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+}
+
+static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+ struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+
+ if (ioat_chan->pending != 0) {
+ spin_lock_bh(&ioat_chan->desc_lock);
+ __ioat2_dma_memcpy_issue_pending(ioat_chan);
+ spin_unlock_bh(&ioat_chan->desc_lock);
}
}

@@ -465,11 +750,17 @@ static void ioat_dma_cleanup_tasklet(unsigned long data)
chan->reg_base + IOAT_CHANCTRL_OFFSET);
}

+/**
+ * ioat_dma_memcpy_cleanup - cleanup up finished descriptors
+ * @chan: ioat channel to be cleaned up
+ */
static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
{
unsigned long phys_complete;
struct ioat_desc_sw *desc, *_desc;
dma_cookie_t cookie = 0;
+ unsigned long desc_phys;
+ struct ioat_desc_sw *latest_desc;

prefetch(ioat_chan->completion_virt);

@@ -507,56 +798,115 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)

cookie = 0;
spin_lock_bh(&ioat_chan->desc_lock);
- list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) {
-
- /*
- * Incoming DMA requests may use multiple descriptors, due to
- * exceeding xfercap, perhaps. If so, only the last one will
- * have a cookie, and require unmapping.
- */
- if (desc->async_tx.cookie) {
- cookie = desc->async_tx.cookie;
+ switch (ioat_chan->device->version) {
+ case IOAT_VER_1_2:
+ list_for_each_entry_safe(desc, _desc,
+ &ioat_chan->used_desc, node) {

/*
- * yes we are unmapping both _page and _single alloc'd
- * regions with unmap_page. Is this *really* that bad?
+ * Incoming DMA requests may use multiple descriptors,
+ * due to exceeding xfercap, perhaps. If so, only the
+ * last one will have a cookie, and require unmapping.
*/
- pci_unmap_page(ioat_chan->device->pdev,
- pci_unmap_addr(desc, dst),
- pci_unmap_len(desc, len),
- PCI_DMA_FROMDEVICE);
- pci_unmap_page(ioat_chan->device->pdev,
- pci_unmap_addr(desc, src),
- pci_unmap_len(desc, len),
- PCI_DMA_TODEVICE);
- if (desc->async_tx.callback) {
- desc->async_tx.callback(
- desc->async_tx.callback_param);
- desc->async_tx.callback = NULL;
+ if (desc->async_tx.cookie) {
+ cookie = desc->async_tx.cookie;
+
+ /*
+ * yes we are unmapping both _page and _single
+ * alloc'd regions with unmap_page. Is this
+ * *really* that bad?
+ */
+ pci_unmap_page(ioat_chan->device->pdev,
+ pci_unmap_addr(desc, dst),
+ pci_unmap_len(desc, len),
+ PCI_DMA_FROMDEVICE);
+ pci_unmap_page(ioat_chan->device->pdev,
+ pci_unmap_addr(desc, src),
+ pci_unmap_len(desc, len),
+ PCI_DMA_TODEVICE);
+
+ if (desc->async_tx.callback) {
+ desc->async_tx.callback(desc->async_tx.callback_param);
+ desc->async_tx.callback = NULL;
+ }
}
- }

- if (desc->async_tx.phys != phys_complete) {
- /*
- * a completed entry, but not the last, so cleanup
- * if the client is done with the descriptor
- */
- if (desc->async_tx.ack) {
- list_del(&desc->node);
- list_add_tail(&desc->node,
- &ioat_chan->free_desc);
- } else
+ if (desc->async_tx.phys != phys_complete) {
+ /*
+ * a completed entry, but not the last, so clean
+ * up if the client is done with the descriptor
+ */
+ if (desc->async_tx.ack) {
+ list_del(&desc->node);
+ list_add_tail(&desc->node,
+ &ioat_chan->free_desc);
+ } else
+ desc->async_tx.cookie = 0;
+ } else {
+ /*
+ * last used desc. Do not remove, so we can
+ * append from it, but don't look at it next
+ * time, either
+ */
desc->async_tx.cookie = 0;
- } else {
- /*
- * last used desc. Do not remove, so we can append from
- * it, but don't look at it next time, either
- */
- desc->async_tx.cookie = 0;

- /* TODO check status bits? */
+ /* TODO check status bits? */
+ break;
+ }
+ }
+ break;
+ case IOAT_VER_2_0:
+ /* has some other thread has already cleaned up? */
+ if (ioat_chan->used_desc.prev == NULL)
break;
+
+ /* work backwards to find latest finished desc */
+ desc = to_ioat_desc(ioat_chan->used_desc.next);
+ latest_desc = NULL;
+ do {
+ desc = to_ioat_desc(desc->node.prev);
+ desc_phys = (unsigned long)desc->async_tx.phys
+ & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+ if (desc_phys == phys_complete) {
+ latest_desc = desc;
+ break;
+ }
+ } while (&desc->node != ioat_chan->used_desc.prev);
+
+ if (latest_desc != NULL) {
+
+ /* work forwards to clear finished descriptors */
+ for (desc = to_ioat_desc(ioat_chan->used_desc.prev);
+ &desc->node != latest_desc->node.next &&
+ &desc->node != ioat_chan->used_desc.next;
+ desc = to_ioat_desc(desc->node.next)) {
+ if (desc->async_tx.cookie) {
+ cookie = desc->async_tx.cookie;
+ desc->async_tx.cookie = 0;
+
+ pci_unmap_page(ioat_chan->device->pdev,
+ pci_unmap_addr(desc, dst),
+ pci_unmap_len(desc, len),
+ PCI_DMA_FROMDEVICE);
+ pci_unmap_page(ioat_chan->device->pdev,
+ pci_unmap_addr(desc, src),
+ pci_unmap_len(desc, len),
+ PCI_DMA_TODEVICE);
+
+ if (desc->async_tx.callback) {
+ desc->async_tx.callback(desc->async_tx.callback_param);
+ desc->async_tx.callback = NULL;
+ }
+ }
+ }
+
+ /* move used.prev up beyond those that are finished */
+ if (&desc->node == ioat_chan->used_desc.next)
+ ioat_chan->used_desc.prev = NULL;
+ else
+ ioat_chan->used_desc.prev = &desc->node;
}
+ break;
}

spin_unlock_bh(&ioat_chan->desc_lock);
@@ -621,8 +971,6 @@ static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
return dma_async_is_complete(cookie, last_complete, last_used);
}

-/* PCI API */
-
static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
{
struct ioat_desc_sw *desc;
@@ -633,21 +981,34 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
| IOAT_DMA_DESCRIPTOR_CTL_INT_GN
| IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
- desc->hw->next = 0;
desc->hw->size = 0;
desc->hw->src_addr = 0;
desc->hw->dst_addr = 0;
desc->async_tx.ack = 1;
-
- list_add_tail(&desc->node, &ioat_chan->used_desc);
+ switch (ioat_chan->device->version) {
+ case IOAT_VER_1_2:
+ desc->hw->next = 0;
+ list_add_tail(&desc->node, &ioat_chan->used_desc);
+
+ writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
+ ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
+ writel(((u64) desc->async_tx.phys) >> 32,
+ ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
+
+ writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
+ + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
+ break;
+ case IOAT_VER_2_0:
+ writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
+ ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+ writel(((u64) desc->async_tx.phys) >> 32,
+ ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+
+ ioat_chan->dmacount++;
+ __ioat2_dma_memcpy_issue_pending(ioat_chan);
+ break;
+ }
spin_unlock_bh(&ioat_chan->desc_lock);
-
- writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
- ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_LOW);
- writel(((u64) desc->async_tx.phys) >> 32,
- ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH);
-
- writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
}

/*
@@ -693,14 +1054,14 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
dma_chan = container_of(device->common.channels.next,
struct dma_chan,
device_node);
- if (ioat_dma_alloc_chan_resources(dma_chan) < 1) {
+ if (device->common.device_alloc_chan_resources(dma_chan) < 1) {
dev_err(&device->pdev->dev,
"selftest cannot allocate chan resource\n");
err = -ENODEV;
goto out;
}

- tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0);
+ tx = device->common.device_prep_dma_memcpy(dma_chan, IOAT_TEST_SIZE, 0);
if (!tx) {
dev_err(&device->pdev->dev,
"Self-test prep failed, disabling\n");
@@ -710,24 +1071,25 @@ static int ioat_dma_self_test(struct ioatdma_device *device)

async_tx_ack(tx);
addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
- DMA_TO_DEVICE);
- ioat_set_src(addr, tx, 0);
+ DMA_TO_DEVICE);
+ tx->tx_set_src(addr, tx, 0);
addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
- DMA_FROM_DEVICE);
- ioat_set_dest(addr, tx, 0);
+ DMA_FROM_DEVICE);
+ tx->tx_set_dest(addr, tx, 0);
tx->callback = ioat_dma_test_callback;
tx->callback_param = (void *)0x8086;
- cookie = ioat_tx_submit(tx);
+ cookie = tx->tx_submit(tx);
if (cookie < 0) {
dev_err(&device->pdev->dev,
"Self-test setup failed, disabling\n");
err = -ENODEV;
goto free_resources;
}
- ioat_dma_memcpy_issue_pending(dma_chan);
+ device->common.device_issue_pending(dma_chan);
msleep(1);

- if (ioat_dma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ if (device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL)
+ != DMA_SUCCESS) {
dev_err(&device->pdev->dev,
"Self-test copy timed out, disabling\n");
err = -ENODEV;
@@ -741,7 +1103,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
}

free_resources:
- ioat_dma_free_chan_resources(dma_chan);
+ device->common.device_free_chan_resources(dma_chan);
out:
kfree(src);
kfree(dest);
@@ -941,16 +1303,28 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
INIT_LIST_HEAD(&device->common.channels);
ioat_dma_enumerate_channels(device);

- dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
device->common.device_alloc_chan_resources =
ioat_dma_alloc_chan_resources;
device->common.device_free_chan_resources =
ioat_dma_free_chan_resources;
- device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy;
+ device->common.dev = &pdev->dev;
+
+ dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
device->common.device_is_tx_complete = ioat_dma_is_complete;
- device->common.device_issue_pending = ioat_dma_memcpy_issue_pending;
device->common.device_dependency_added = ioat_dma_dependency_added;
- device->common.dev = &pdev->dev;
+ switch (device->version) {
+ case IOAT_VER_1_2:
+ device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
+ device->common.device_issue_pending =
+ ioat1_dma_memcpy_issue_pending;
+ break;
+ case IOAT_VER_2_0:
+ device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
+ device->common.device_issue_pending =
+ ioat2_dma_memcpy_issue_pending;
+ break;
+ }
+
dev_err(&device->pdev->dev,
"Intel(R) I/OAT DMA Engine found,"
" %d channels, device version 0x%02x, driver version %s\n",
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
index 5f9881e..b668234 100644
--- a/drivers/dma/ioatdma.h
+++ b/drivers/dma/ioatdma.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -28,7 +28,7 @@
#include <linux/cache.h>
#include <linux/pci_ids.h>

-#define IOAT_DMA_VERSION "1.26"
+#define IOAT_DMA_VERSION "2.04"

enum ioat_interrupt {
none = 0,
@@ -39,6 +39,8 @@ enum ioat_interrupt {
};

#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
+#define IOAT_DMA_DCA_ANY_CPU ~0
+

/**
* struct ioatdma_device - internal representation of a IOAT device
@@ -47,6 +49,9 @@ enum ioat_interrupt {
* @dma_pool: for allocating DMA descriptors
* @common: embedded struct dma_device
* @version: version of ioatdma device
+ * @irq_mode: which style irq to use
+ * @msix_entries: irq handlers
+ * @idx: per channel data
*/

struct ioatdma_device {
@@ -63,23 +68,7 @@ struct ioatdma_device {

/**
* struct ioat_dma_chan - internal representation of a DMA channel
- * @device:
- * @reg_base:
- * @sw_in_use:
- * @completion:
- * @completion_low:
- * @completion_high:
- * @completed_cookie: last cookie seen completed on cleanup
- * @cookie: value of last cookie given to client
- * @last_completion:
- * @xfercap:
- * @desc_lock:
- * @free_desc:
- * @used_desc:
- * @resource:
- * @device_node:
*/
-
struct ioat_dma_chan {

void __iomem *reg_base;
@@ -95,6 +84,8 @@ struct ioat_dma_chan {
struct list_head used_desc;

int pending;
+ int dmacount;
+ int desccount;

struct ioatdma_device *device;
struct dma_chan common;
@@ -134,12 +125,13 @@ struct ioat_desc_sw {
struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
void __iomem *iobase);
void ioat_dma_remove(struct ioatdma_device *device);
-struct dca_provider *ioat_dca_init(struct pci_dev *pdev,
- void __iomem *iobase);
+struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
#else
#define ioat_dma_probe(pdev, iobase) NULL
#define ioat_dma_remove(device) do { } while (0)
#define ioat_dca_init(pdev, iobase) NULL
+#define ioat2_dca_init(pdev, iobase) NULL
#endif

#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h
index 9e7434e..dd470fa 100644
--- a/drivers/dma/ioatdma_hw.h
+++ b/drivers/dma/ioatdma_hw.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -22,12 +22,19 @@
#define _IOAT_HW_H_

/* PCI Configuration Space Values */
-#define IOAT_PCI_VID 0x8086
-#define IOAT_PCI_DID 0x1A38
-#define IOAT_PCI_RID 0x00
-#define IOAT_PCI_SVID 0x8086
-#define IOAT_PCI_SID 0x8086
-#define IOAT_VER_1_2 0x12 /* Version 1.2 */
+#define IOAT_PCI_VID 0x8086
+
+/* CB device ID's */
+#define IOAT_PCI_DID_5000 0x1A38
+#define IOAT_PCI_DID_CNB 0x360B
+#define IOAT_PCI_DID_SCNB 0x65FF
+#define IOAT_PCI_DID_SNB 0x402F
+
+#define IOAT_PCI_RID 0x00
+#define IOAT_PCI_SVID 0x8086
+#define IOAT_PCI_SID 0x8086
+#define IOAT_VER_1_2 0x12 /* Version 1.2 */
+#define IOAT_VER_2_0 0x20 /* Version 2.0 */

struct ioat_dma_descriptor {
uint32_t size;
@@ -47,6 +54,16 @@ struct ioat_dma_descriptor {
#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008
#define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010
#define IOAT_DMA_DESCRIPTOR_NUL 0x00000020
-#define IOAT_DMA_DESCRIPTOR_OPCODE 0xFF000000
+#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040
+#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080
+#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100
+#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200
+#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400
+
+#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000
+#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000
+
+#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001
+#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000

#endif
diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h
index baaab5e..9832d7e 100644
--- a/drivers/dma/ioatdma_registers.h
+++ b/drivers/dma/ioatdma_registers.h
@@ -42,26 +42,25 @@
#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */
#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */
#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */
-#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */

#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */

#define IOAT_VER_OFFSET 0x08 /* 8-bit */
#define IOAT_VER_MAJOR_MASK 0xF0
#define IOAT_VER_MINOR_MASK 0x0F
-#define GET_IOAT_VER_MAJOR(x) ((x) & IOAT_VER_MAJOR_MASK)
+#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4)
#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK)

#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */

#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */
#define IOAT_INTRDELAY_INT_DELAY_MASK 0x3FFF /* Interrupt Delay Time */
-#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalesing Supported */
+#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */

#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001

-
#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */

/* DMA Channel Registers */
@@ -74,25 +73,101 @@
#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
#define IOAT_CHANCTRL_INT_DISABLE 0x0001

-#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatability */
-#define IOAT_DMA_COMP_V1 0x0001 /* Compatability with DMA version 1 */
-
-#define IOAT_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */
-#define IOAT_CHANSTS_OFFSET_LOW 0x04
-#define IOAT_CHANSTS_OFFSET_HIGH 0x08
-#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR 0xFFFFFFFFFFFFFFC0UL
+#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */
+
+
+#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */
+#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
+#define IOAT1_CHANSTS_OFFSET_LOW 0x04
+#define IOAT2_CHANSTS_OFFSET_LOW 0x08
+#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
+#define IOAT1_CHANSTS_OFFSET_HIGH 0x08
+#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C
+#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F
#define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010
+#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008
#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007
#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0
#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1
#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2
#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3

-#define IOAT_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */
-#define IOAT_CHAINADDR_OFFSET_LOW 0x0C
-#define IOAT_CHAINADDR_OFFSET_HIGH 0x10

-#define IOAT_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */
+
+#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET 0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET 0x00
+#define IOAT_DCA_VER_MAJOR_MASK 0xF0
+#define IOAT_DCA_VER_MINOR_MASK 0x0F
+
+#define IOAT_DCA_COMP_OFFSET 0x02
+#define IOAT_DCA_COMP_V1 0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET 0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH 0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET 0x06
+#define IOAT_PCI_CAPABILITY_MEMWR 0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET 0x0C
+#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID 0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET 0x10
+#define IOAT_DCA_GREQID_SIZE 0x04
+#define IOAT_DCA_GREQID_MASK 0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000
+#define IOAT_DCA_GREQID_VALID 0x20000000
+#define IOAT_DCA_GREQID_LASTID 0x80000000
+
+
+
+#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW 0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
#define IOAT_CHANCMD_RESET 0x20
#define IOAT_CHANCMD_RESUME 0x10
#define IOAT_CHANCMD_ABORT 0x08
@@ -124,6 +199,7 @@
#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000
#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
#define IOAT_CHANERR_SOFT_ERR 0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000

#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index df948b4..61a3023 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2327,6 +2327,7 @@
#define PCI_DEVICE_ID_INTEL_MCH_PC1 0x359a
#define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e
#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f
#define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff
#define PCI_DEVICE_ID_INTEL_TOLAPAI_0 0x5031
#define PCI_DEVICE_ID_INTEL_TOLAPAI_1 0x5032


2007-10-23 19:56:24

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] I/OAT: Add support for version 2 of ioatdma device

On Fri, 19 Oct 2007 00:20:25 -0700
Shannon Nelson <[email protected]> wrote:

> Add support for version 2 of the ioatdma device. This device handles
> the descriptor chain and DCA services slightly differently:
> - Instead of moving the dma descriptors between a busy and an idle chain,
> this new version uses a single circular chain so that we don't have
> rewrite the next_descriptor pointers as we add new requests, and the
> device doesn't need to re-read the last descriptor.
> - The new device has the DCA tags defined internally instead of needing
> them defined statically.
>
> ...
>
> +static inline void __ioat1_dma_memcpy_issue_pending(
> + struct ioat_dma_chan *ioat_chan);
> +static inline void __ioat2_dma_memcpy_issue_pending(
> + struct ioat_dma_chan *ioat_chan);

It's somewhat unusual to forward-declare an inline like this. My version
of gcc does do the right thing with it, but for the sake of
conventionality, readability and cleanliness, please consider just moving
the implementations higher up the file sometime, thanks.


> +static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx)
> +{
> + struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
> + struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
> + struct ioat_desc_sw *new;
> + struct ioat_dma_descriptor *hw;
> + dma_cookie_t cookie;
> + u32 copy;
> + size_t len;
> + dma_addr_t src, dst;
> + int orig_ack;
> + unsigned int desc_count = 0;
> +
> + /* src and dest and len are stored in the initial descriptor */
> + len = first->len;
> + src = first->src;
> + dst = first->dst;
> + orig_ack = first->async_tx.ack;
> + new = first;
> +
> + /* ioat_chan->desc_lock is still in force in version 2 path */
> +
> + do {
> + copy = min((u32) len, ioat_chan->xfercap);

min_t is the preferred tool for this.

> + new->async_tx.ack = 1;
> +
> + hw = new->hw;
> + hw->size = copy;
> + hw->ctl = 0;
> + hw->src_addr = src;
> + hw->dst_addr = dst;
> +
> + len -= copy;
> + dst += copy;
> + src += copy;
> + desc_count++;
> + } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
> +
> + hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
> + if (new->async_tx.callback) {
> + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
> + if (first != new) {
> + /* move callback into to last desc */
> + new->async_tx.callback = first->async_tx.callback;
> + new->async_tx.callback_param
> + = first->async_tx.callback_param;
> + first->async_tx.callback = NULL;
> + first->async_tx.callback_param = NULL;
> + }
> + }
> +
> + new->tx_cnt = desc_count;
> + new->async_tx.ack = orig_ack; /* client is in control of this ack */
> +
> + /* store the original values for use in later cleanup */
> + if (new != first) {
> + new->src = first->src;
> + new->dst = first->dst;
> + new->len = first->len;
> + }
> +
> + /* cookie incr and addition to used_list must be atomic */
> + cookie = ioat_chan->common.cookie;
> + cookie++;
> + if (cookie < 0)
> + cookie = 1;
> + ioat_chan->common.cookie = new->async_tx.cookie = cookie;
> +
> + ioat_chan->dmacount += desc_count;
> + ioat_chan->pending += desc_count;
> + if (ioat_chan->pending >= ioat_pending_level)
> + __ioat2_dma_memcpy_issue_pending(ioat_chan);
> + spin_unlock_bh(&ioat_chan->desc_lock);
>
> return cookie;
> }
>
> +{
> + struct ioat_desc_sw *new = NULL;
> +
> + /*
> + * used.prev points to where to start processing
> + * used.next points to next free descriptor
> + * if used.prev == NULL, there are none waiting to be processed
> + * if used.next == used.prev.prev, there is only one free descriptor,
> + * and we need to use it to as a noop descriptor before
> + * linking in a new set of descriptors, since the device
> + * has probably already read the pointer to it
> + */
> + if (ioat_chan->used_desc.prev &&
> + ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) {
> +
> + struct ioat_desc_sw *desc = NULL;
> + struct ioat_desc_sw *noop_desc = NULL;

The noop_desc initialisation is unneeded and so too is the initialisation
of `desc', I suspect.

The compiler should avoid any additional code generation, but unneeded
initialisations like this can cause useful warnings to be suppressed and
they're a bit misleading to the reader of the code.


> + int i;
> +
> + /* set up the noop descriptor */
> + noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
> + noop_desc->hw->size = 0;
> + noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
> + noop_desc->hw->src_addr = 0;
> + noop_desc->hw->dst_addr = 0;
> +
> + ioat_chan->used_desc.next = ioat_chan->used_desc.next->next;
> + ioat_chan->pending++;
> + ioat_chan->dmacount++;
> +
> + /* get a few more descriptors */
> + for (i = 16; i; i--) {
> + desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
> + BUG_ON(!desc);

You can't do that!

GFP_ATOMIC allocations can and do fail under regular (albeit heavy) use.
Code _must_ be able to recover from an allocation failure. Taking out the
whole machine is not acceptable.

An exception to this is when the GFP_ATOMIC allocation is happening at
initial boot time (ie: in __init in code which cannot be loaded as a
module).


> + list_add_tail(&desc->node, ioat_chan->used_desc.next);
> +
> + desc->hw->next
> + = to_ioat_desc(desc->node.next)->async_tx.phys;
> + to_ioat_desc(desc->node.prev)->hw->next
> + = desc->async_tx.phys;
> + ioat_chan->desccount++;
> + }
> +
> + ioat_chan->used_desc.next = noop_desc->node.next;
> + }
> + new = to_ioat_desc(ioat_chan->used_desc.next);
> + prefetch(new);
> + ioat_chan->used_desc.next = new->node.next;
> +
> + if (ioat_chan->used_desc.prev == NULL)
> + ioat_chan->used_desc.prev = &new->node;
> +
> + prefetch(new->hw);
> + return new;
> +}
> +
>
> +static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
> + struct dma_chan *chan,
> + size_t len,
> + int int_en)
> +{
> + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
> + struct ioat_desc_sw *new;
> +
> + spin_lock_bh(&ioat_chan->desc_lock);
> + new = ioat2_dma_get_next_descriptor(ioat_chan);
> + new->len = len;
> +
> + /* leave ioat_chan->desc_lock set in version 2 path */
> + return new ? &new->async_tx : NULL;
> +}

I assume that there's a spin_unlock_bh() somewhere..

> +static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan)
> +{
> + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
> +
> + if (ioat_chan->pending != 0) {
> + spin_lock_bh(&ioat_chan->desc_lock);
> + __ioat2_dma_memcpy_issue_pending(ioat_chan);
> + spin_unlock_bh(&ioat_chan->desc_lock);
> }
> }

Is it non-racy to test ->pending outside the lock?

>
> #define IOAT_LOW_COMPLETION_MASK 0xffffffc0
> +#define IOAT_DMA_DCA_ANY_CPU ~0
> +

The code is refreshingly free from usable comments. A reader might wonder what
things like IOAT_DMA_DCA_ANY_CPU actually represent. I can't work it out
from the bare C implementation.

> void ioat_dma_remove(struct ioatdma_device *device);
> -struct dca_provider *ioat_dca_init(struct pci_dev *pdev,
> - void __iomem *iobase);
> +struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
> +struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
> #else
> #define ioat_dma_probe(pdev, iobase) NULL
> #define ioat_dma_remove(device) do { } while (0)
> #define ioat_dca_init(pdev, iobase) NULL
> +#define ioat2_dca_init(pdev, iobase) NULL
> #endif

grumble. All these could be implemented as inlines. Please only use
macros when the code cannot be implemented in C.

Reasons:

- we get typechecking even when CONFIG_YOUR_STUFF=n

- can still take the address of the functions (dubious)

- looks nicer


2007-10-24 01:19:17

by Shannon Nelson

[permalink] [raw]
Subject: RE: [PATCH] I/OAT: Add support for version 2 of ioatdma device

>From: Andrew Morton [mailto:[email protected]]
>On Fri, 19 Oct 2007 00:20:25 -0700
>Shannon Nelson <[email protected]> wrote:
>
>> Add support for version 2 of the ioatdma device. This device handles
>> the descriptor chain and DCA services slightly differently:
>> - Instead of moving the dma descriptors between a busy and
>an idle chain,
>> this new version uses a single circular chain so that we
>don't have
>> rewrite the next_descriptor pointers as we add new
>requests, and the
>> device doesn't need to re-read the last descriptor.
>> - The new device has the DCA tags defined internally
>instead of needing
>> them defined statically.
>>

Andrew,

Thank you for your time and the review comments. I'll attend to the
details and repost.

sln