2005-04-01 20:53:46

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][1/27] IB/mthca: map MPT/MTT context in mem-free mode

In mem-free mode, when allocating memory regions, make sure that the
HCA has context memory mapped to cover the virtual space used for the
MPT and MTTs being used.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-03-31 19:06:51.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_main.c 2005-04-01 12:38:19.884644268 -0800
@@ -390,7 +390,7 @@
}

mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
- init_hca->mtt_seg_sz,
+ dev_lim->mtt_seg_sz,
mdev->limits.num_mtt_segs,
mdev->limits.reserved_mtts, 1);
if (!mdev->mr_table.mtt_table) {
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-03-31 19:06:42.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-04-01 12:38:19.911638409 -0800
@@ -192,6 +192,38 @@
up(&table->mutex);
}

+int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
+ int start, int end)
+{
+ int inc = MTHCA_TABLE_CHUNK_SIZE / table->obj_size;
+ int i, err;
+
+ for (i = start; i <= end; i += inc) {
+ err = mthca_table_get(dev, table, i);
+ if (err)
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ while (i > start) {
+ i -= inc;
+ mthca_table_put(dev, table, i);
+ }
+
+ return err;
+}
+
+void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
+ int start, int end)
+{
+ int i;
+
+ for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size)
+ mthca_table_put(dev, table, i);
+}
+
struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
u64 virt, int obj_size,
int nobj, int reserved,
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_memfree.h 2005-03-31 19:06:56.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_memfree.h 2005-04-01 12:38:19.895641881 -0800
@@ -85,6 +85,10 @@
void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table);
int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
+int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
+ int start, int end);
+void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
+ int start, int end);

static inline void mthca_icm_first(struct mthca_icm *icm,
struct mthca_icm_iter *iter)
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_mr.c 2005-03-31 19:07:06.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_mr.c 2005-04-01 12:38:19.903640145 -0800
@@ -38,6 +38,7 @@

#include "mthca_dev.h"
#include "mthca_cmd.h"
+#include "mthca_memfree.h"

/*
* Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
@@ -71,7 +72,7 @@
* through the bitmaps)
*/

-static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order)
+static u32 __mthca_alloc_mtt(struct mthca_dev *dev, int order)
{
int o;
int m;
@@ -105,7 +106,7 @@
return seg;
}

-static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order)
+static void __mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order)
{
seg >>= order;

@@ -122,6 +123,32 @@
spin_unlock(&dev->mr_table.mpt_alloc.lock);
}

+static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order)
+{
+ u32 seg = __mthca_alloc_mtt(dev, order);
+
+ if (seg == -1)
+ return -1;
+
+ if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg,
+ seg + (1 << order) - 1)) {
+ __mthca_free_mtt(dev, seg, order);
+ seg = -1;
+ }
+
+ return seg;
+}
+
+static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order)
+{
+ __mthca_free_mtt(dev, seg, order);
+
+ if (dev->hca_type == ARBEL_NATIVE)
+ mthca_table_put_range(dev, dev->mr_table.mtt_table, seg,
+ seg + (1 << order) - 1);
+}
+
static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
{
if (dev->hca_type == ARBEL_NATIVE)
@@ -141,7 +168,7 @@
int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
u32 access, struct mthca_mr *mr)
{
- void *mailbox;
+ void *mailbox = NULL;
struct mthca_mpt_entry *mpt_entry;
u32 key;
int err;
@@ -155,11 +182,17 @@
return -ENOMEM;
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);

+ if (dev->hca_type == ARBEL_NATIVE) {
+ err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
+ if (err)
+ goto err_out_mpt_free;
+ }
+
mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
GFP_KERNEL);
if (!mailbox) {
- mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_out_table;
}
mpt_entry = MAILBOX_ALIGN(mailbox);

@@ -180,16 +213,27 @@
err = mthca_SW2HW_MPT(dev, mpt_entry,
key & (dev->limits.num_mpts - 1),
&status);
- if (err)
+ if (err) {
mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
- else if (status) {
+ goto err_out_table;
+ } else if (status) {
mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
status);
err = -EINVAL;
+ goto err_out_table;
}

kfree(mailbox);
return err;
+
+err_out_table:
+ if (dev->hca_type == ARBEL_NATIVE)
+ mthca_table_put(dev, dev->mr_table.mpt_table, key);
+
+err_out_mpt_free:
+ mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
+ kfree(mailbox);
+ return err;
}

int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
@@ -213,6 +257,12 @@
return -ENOMEM;
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);

+ if (dev->hca_type == ARBEL_NATIVE) {
+ err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
+ if (err)
+ goto err_out_mpt_free;
+ }
+
for (i = dev->limits.mtt_seg_size / 8, mr->order = 0;
i < list_len;
i <<= 1, ++mr->order)
@@ -220,7 +270,7 @@

mr->first_seg = mthca_alloc_mtt(dev, mr->order);
if (mr->first_seg == -1)
- goto err_out_mpt_free;
+ goto err_out_table;

/*
* If list_len is odd, we add one more dummy entry for
@@ -307,13 +357,17 @@
kfree(mailbox);
return err;

- err_out_mailbox_free:
+err_out_mailbox_free:
kfree(mailbox);

- err_out_free_mtt:
+err_out_free_mtt:
mthca_free_mtt(dev, mr->first_seg, mr->order);

- err_out_mpt_free:
+err_out_table:
+ if (dev->hca_type == ARBEL_NATIVE)
+ mthca_table_put(dev, dev->mr_table.mpt_table, key);
+
+err_out_mpt_free:
mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
return err;
}
@@ -338,6 +392,9 @@
if (mr->order >= 0)
mthca_free_mtt(dev, mr->first_seg, mr->order);

+ if (dev->hca_type == ARBEL_NATIVE)
+ mthca_table_put(dev, dev->mr_table.mpt_table,
+ key_to_hw_index(dev, mr->ibmr.lkey));
mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, mr->ibmr.lkey));
}



2005-04-01 20:51:36

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][2/27] IB/mthca: fill in more device query fields

Implement more of the device_query method in mthca.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-03-31 19:07:00.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-04-01 12:38:20.843436141 -0800
@@ -987,6 +987,8 @@
if (dev->hca_type == ARBEL_NATIVE) {
MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET);
dev_lim->hca.arbel.resize_srq = field & 1;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET);
+ dev_lim->max_sg = min_t(int, field, dev_lim->max_sg);
MTHCA_GET(size, outbox, QUERY_DEV_LIM_MTT_ENTRY_SZ_OFFSET);
dev_lim->mtt_seg_sz = size;
MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET);
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-03-31 19:07:00.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-01 12:38:20.839437009 -0800
@@ -52,6 +52,8 @@
if (!in_mad || !out_mad)
goto out;

+ memset(props, 0, sizeof props);
+
props->fw_ver = mdev->fw_ver;

memset(in_mad, 0, sizeof *in_mad);
@@ -71,14 +73,26 @@
goto out;
}

- props->device_cap_flags = mdev->device_cap_flags;
- props->vendor_id = be32_to_cpup((u32 *) (out_mad->data + 36)) &
+ props->device_cap_flags = mdev->device_cap_flags;
+ props->vendor_id = be32_to_cpup((u32 *) (out_mad->data + 36)) &
0xffffff;
- props->vendor_part_id = be16_to_cpup((u16 *) (out_mad->data + 30));
- props->hw_ver = be16_to_cpup((u16 *) (out_mad->data + 32));
+ props->vendor_part_id = be16_to_cpup((u16 *) (out_mad->data + 30));
+ props->hw_ver = be16_to_cpup((u16 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
memcpy(&props->node_guid, out_mad->data + 12, 8);

+ props->max_mr_size = ~0ull;
+ props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps;
+ props->max_qp_wr = 0xffff;
+ props->max_sge = mdev->limits.max_sg;
+ props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
+ props->max_cqe = 0xffff;
+ props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
+ props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds;
+ props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift;
+ props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift;
+ props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay;
+
err = 0;
out:
kfree(in_mad);

2005-04-01 20:55:28

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][12/27] IB/mthca: fix format of CQ number for CQ events

CQ numbers are only 24 bits, so only print 6 hex digits and mask off
reserved part when reporting a CQ event.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_eq.c 2005-03-31 19:06:55.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_eq.c 2005-04-01 12:38:24.575625986 -0800
@@ -344,10 +344,10 @@
break;

case MTHCA_EVENT_TYPE_CQ_ERROR:
- mthca_warn(dev, "CQ %s on CQN %08x\n",
+ mthca_warn(dev, "CQ %s on CQN %06x\n",
eqe->event.cq_err.syndrome == 1 ?
"overrun" : "access violation",
- be32_to_cpu(eqe->event.cq_err.cqn));
+ be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
break;

case MTHCA_EVENT_TYPE_EQ_OVERFLOW:

2005-04-01 20:55:31

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][8/27] IB/mthca: fix MR allocation error path

From: Michael S. Tsirkin <[email protected]>

Fix error handling in MR allocation for mem-free mode:
mthca_free must get an MR index, not a key.

Signed-off-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_mr.c 2005-04-01 12:38:19.903640145 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_mr.c 2005-04-01 12:38:22.968974746 -0800
@@ -231,7 +231,7 @@
mthca_table_put(dev, dev->mr_table.mpt_table, key);

err_out_mpt_free:
- mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
+ mthca_free(&dev->mr_table.mpt_alloc, key);
kfree(mailbox);
return err;
}
@@ -368,7 +368,7 @@
mthca_table_put(dev, dev->mr_table.mpt_table, key);

err_out_mpt_free:
- mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
+ mthca_free(&dev->mr_table.mpt_alloc, key);
return err;
}


2005-04-01 20:57:30

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][13/27] IB/mthca: implement RDMA/atomic operations for mem-free mode

Add code to support RDMA and atomic send work requests in mem-free mode.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-01 12:38:21.580276194 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-01 12:38:25.023528759 -0800
@@ -1775,6 +1775,53 @@
size = sizeof (struct mthca_next_seg) / 16;

switch (qp->transport) {
+ case RC:
+ switch (wr->opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ ((struct mthca_raddr_seg *) wqe)->raddr =
+ cpu_to_be64(wr->wr.atomic.remote_addr);
+ ((struct mthca_raddr_seg *) wqe)->rkey =
+ cpu_to_be32(wr->wr.atomic.rkey);
+ ((struct mthca_raddr_seg *) wqe)->reserved = 0;
+
+ wqe += sizeof (struct mthca_raddr_seg);
+
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ ((struct mthca_atomic_seg *) wqe)->swap_add =
+ cpu_to_be64(wr->wr.atomic.swap);
+ ((struct mthca_atomic_seg *) wqe)->compare =
+ cpu_to_be64(wr->wr.atomic.compare_add);
+ } else {
+ ((struct mthca_atomic_seg *) wqe)->swap_add =
+ cpu_to_be64(wr->wr.atomic.compare_add);
+ ((struct mthca_atomic_seg *) wqe)->compare = 0;
+ }
+
+ wqe += sizeof (struct mthca_atomic_seg);
+ size += sizeof (struct mthca_raddr_seg) / 16 +
+ sizeof (struct mthca_atomic_seg);
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ case IB_WR_RDMA_READ:
+ ((struct mthca_raddr_seg *) wqe)->raddr =
+ cpu_to_be64(wr->wr.rdma.remote_addr);
+ ((struct mthca_raddr_seg *) wqe)->rkey =
+ cpu_to_be32(wr->wr.rdma.rkey);
+ ((struct mthca_raddr_seg *) wqe)->reserved = 0;
+ wqe += sizeof (struct mthca_raddr_seg);
+ size += sizeof (struct mthca_raddr_seg) / 16;
+ break;
+
+ default:
+ /* No extra segments required for sends */
+ break;
+ }
+
+ break;
+
case UD:
memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);

2005-04-01 21:00:29

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][16/27] IB/mthca: allow address handle creation in interrupt context

Make address handle verbs usable from interrupt context.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_av.c 2005-03-31 19:07:01.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_av.c 2005-04-01 12:38:26.648176093 -0800
@@ -63,7 +63,7 @@
ah->type = MTHCA_AH_PCI_POOL;

if (dev->hca_type == ARBEL_NATIVE) {
- ah->av = kmalloc(sizeof *ah->av, GFP_KERNEL);
+ ah->av = kmalloc(sizeof *ah->av, GFP_ATOMIC);
if (!ah->av)
return -ENOMEM;

@@ -77,7 +77,7 @@
if (index == -1)
goto on_hca_fail;

- av = kmalloc(sizeof *av, GFP_KERNEL);
+ av = kmalloc(sizeof *av, GFP_ATOMIC);
if (!av)
goto on_hca_fail;

@@ -89,7 +89,7 @@
on_hca_fail:
if (ah->type == MTHCA_AH_PCI_POOL) {
ah->av = pci_pool_alloc(dev->av_table.pool,
- SLAB_KERNEL, &ah->avdma);
+ SLAB_ATOMIC, &ah->avdma);
if (!ah->av)
return -ENOMEM;

--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-01 12:38:22.630048317 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-01 12:38:26.644176961 -0800
@@ -315,7 +315,7 @@
int err;
struct mthca_ah *ah;

- ah = kmalloc(sizeof *ah, GFP_KERNEL);
+ ah = kmalloc(sizeof *ah, GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);


2005-04-01 21:01:45

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][20/27] IB/mthca: add mthca_table_find() function

From: Michael S. Tsirkin <[email protected]>

Add mthca_table_find() function, which returns the lowmem address of
an entry in a mem-free HCA's context tables. This will be used by the
FMR implementation.

Signed-off-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-04-01 12:38:23.500859288 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-04-01 12:38:28.285820606 -0800
@@ -192,6 +192,40 @@
up(&table->mutex);
}

+void *mthca_table_find(struct mthca_icm_table *table, int obj)
+{
+ int idx, offset, i;
+ struct mthca_icm_chunk *chunk;
+ struct mthca_icm *icm;
+ struct page *page = NULL;
+
+ if (!table->lowmem)
+ return NULL;
+
+ down(&table->mutex);
+
+ idx = (obj & (table->num_obj - 1)) * table->obj_size;
+ icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE];
+ offset = idx % MTHCA_TABLE_CHUNK_SIZE;
+
+ if (!icm)
+ goto out;
+
+ list_for_each_entry(chunk, &icm->chunk_list, list) {
+ for (i = 0; i < chunk->npages; ++i) {
+ if (chunk->mem[i].length >= offset) {
+ page = chunk->mem[i].page;
+ break;
+ }
+ offset -= chunk->mem[i].length;
+ }
+ }
+
+out:
+ up(&table->mutex);
+ return page ? lowmem_page_address(page) + offset : NULL;
+}
+
int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
int start, int end)
{
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_memfree.h 2005-04-01 12:38:19.895641881 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_memfree.h 2005-04-01 12:38:28.280821691 -0800
@@ -85,6 +85,7 @@
void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table);
int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
+void *mthca_table_find(struct mthca_icm_table *table, int obj);
int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
int start, int end);
void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,

2005-04-01 21:00:27

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][15/27] IB/mthca: fill in opcode field for send completions

From: Michael S. Tsirkin <[email protected]>

Fill in missing fields in send completions.

Signed-off-by: Itamar Rabenstein <[email protected]>
Signed-off-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_cq.c 2005-04-01 12:38:24.207705852 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_cq.c 2005-04-01 12:38:26.177278312 -0800
@@ -473,7 +473,41 @@
}

if (is_send) {
- entry->opcode = IB_WC_SEND; /* XXX */
+ entry->wc_flags = 0;
+ switch (cqe->opcode) {
+ case MTHCA_OPCODE_RDMA_WRITE:
+ entry->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case MTHCA_OPCODE_RDMA_WRITE_IMM:
+ entry->opcode = IB_WC_RDMA_WRITE;
+ entry->wc_flags |= IB_WC_WITH_IMM;
+ break;
+ case MTHCA_OPCODE_SEND:
+ entry->opcode = IB_WC_SEND;
+ break;
+ case MTHCA_OPCODE_SEND_IMM:
+ entry->opcode = IB_WC_SEND;
+ entry->wc_flags |= IB_WC_WITH_IMM;
+ break;
+ case MTHCA_OPCODE_RDMA_READ:
+ entry->opcode = IB_WC_RDMA_READ;
+ entry->byte_len = be32_to_cpu(cqe->byte_cnt);
+ break;
+ case MTHCA_OPCODE_ATOMIC_CS:
+ entry->opcode = IB_WC_COMP_SWAP;
+ entry->byte_len = be32_to_cpu(cqe->byte_cnt);
+ break;
+ case MTHCA_OPCODE_ATOMIC_FA:
+ entry->opcode = IB_WC_FETCH_ADD;
+ entry->byte_len = be32_to_cpu(cqe->byte_cnt);
+ break;
+ case MTHCA_OPCODE_BIND_MW:
+ entry->opcode = IB_WC_BIND_MW;
+ break;
+ default:
+ entry->opcode = MTHCA_OPCODE_INVALID;
+ break;
+ }
} else {
entry->byte_len = be32_to_cpu(cqe->byte_cnt);
switch (cqe->opcode & 0x1f) {
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-01 12:38:25.561412000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-01 12:38:26.173279180 -0800
@@ -88,6 +88,19 @@
MTHCA_NUM_EQ
};

+enum {
+ MTHCA_OPCODE_NOP = 0x00,
+ MTHCA_OPCODE_RDMA_WRITE = 0x08,
+ MTHCA_OPCODE_RDMA_WRITE_IMM = 0x09,
+ MTHCA_OPCODE_SEND = 0x0a,
+ MTHCA_OPCODE_SEND_IMM = 0x0b,
+ MTHCA_OPCODE_RDMA_READ = 0x10,
+ MTHCA_OPCODE_ATOMIC_CS = 0x11,
+ MTHCA_OPCODE_ATOMIC_FA = 0x12,
+ MTHCA_OPCODE_BIND_MW = 0x18,
+ MTHCA_OPCODE_INVALID = 0xff
+};
+
struct mthca_cmd {
int use_events;
struct semaphore hcr_sem;
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-01 12:38:25.023528759 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-01 12:38:26.181277444 -0800
@@ -171,19 +171,6 @@
};

enum {
- MTHCA_OPCODE_NOP = 0x00,
- MTHCA_OPCODE_RDMA_WRITE = 0x08,
- MTHCA_OPCODE_RDMA_WRITE_IMM = 0x09,
- MTHCA_OPCODE_SEND = 0x0a,
- MTHCA_OPCODE_SEND_IMM = 0x0b,
- MTHCA_OPCODE_RDMA_READ = 0x10,
- MTHCA_OPCODE_ATOMIC_CS = 0x11,
- MTHCA_OPCODE_ATOMIC_FA = 0x12,
- MTHCA_OPCODE_BIND_MW = 0x18,
- MTHCA_OPCODE_INVALID = 0xff
-};
-
-enum {
MTHCA_NEXT_DBD = 1 << 7,
MTHCA_NEXT_FENCE = 1 << 6,
MTHCA_NEXT_CQ_UPDATE = 1 << 3,

2005-04-01 21:09:40

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][23/27] IB/mthca: tweaks to mthca_cmd.c

Minor tweaks to firmware command handling: kill off an unused get of a
value, and add a little more info to debug output.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-04-01 12:38:27.495992056 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-04-01 12:38:30.084430178 -0800
@@ -989,7 +989,6 @@
dev_lim->hca.arbel.resize_srq = field & 1;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET);
dev_lim->max_sg = min_t(int, field, dev_lim->max_sg);
- MTHCA_GET(size, outbox, QUERY_DEV_LIM_MTT_ENTRY_SZ_OFFSET);
MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET);
dev_lim->mpt_entry_sz = size;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET);
@@ -1297,8 +1296,8 @@
pci_free_consistent(dev->pdev, 16, inbox, indma);

if (!err)
- mthca_dbg(dev, "Mapped page at %llx for ICM.\n",
- (unsigned long long) virt);
+ mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n",
+ (unsigned long long) dma_addr, (unsigned long long) virt);

return err;
}

2005-04-01 21:09:41

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][22/27] IB/mthca: add fast memory region implementation

From: Michael S. Tsirkin <[email protected]>

Implement fast memory regions (FMRs), where the driver writes directly
into the HCA's translation tables rather than requiring a firmware
command. For Tavor, MTTs for FMR are separate from regular MTTs, and
are reserved at driver initialization. This is done to limit the
amount of virtual memory needed to map the MTTs. For Arbel, there's
no such limitation, and all MTTs and MPTs may be used for FMR or for
regular MR.

Signed-off-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-01 12:38:27.068084943 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-01 12:38:29.460565601 -0800
@@ -61,7 +61,8 @@
MTHCA_FLAG_SRQ = 1 << 2,
MTHCA_FLAG_MSI = 1 << 3,
MTHCA_FLAG_MSI_X = 1 << 4,
- MTHCA_FLAG_NO_LAM = 1 << 5
+ MTHCA_FLAG_NO_LAM = 1 << 5,
+ MTHCA_FLAG_FMR = 1 << 6
};

enum {
@@ -134,6 +135,7 @@
int reserved_eqs;
int num_mpts;
int num_mtt_segs;
+ int fmr_reserved_mtts;
int reserved_mtts;
int reserved_mrws;
int reserved_uars;
@@ -178,10 +180,17 @@

struct mthca_mr_table {
struct mthca_alloc mpt_alloc;
- struct mthca_buddy mtt_buddy;
+ struct mthca_buddy mtt_buddy;
+ struct mthca_buddy *fmr_mtt_buddy;
u64 mtt_base;
+ u64 mpt_base;
struct mthca_icm_table *mtt_table;
struct mthca_icm_table *mpt_table;
+ struct {
+ void __iomem *mpt_base;
+ void __iomem *mtt_base;
+ struct mthca_buddy mtt_buddy;
+ } tavor_fmr;
};

struct mthca_eq_table {
@@ -380,7 +389,17 @@
u64 *buffer_list, int buffer_size_shift,
int list_len, u64 iova, u64 total_size,
u32 access, struct mthca_mr *mr);
-void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
+void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
+
+int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
+ u32 access, struct mthca_fmr *fmr);
+int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova);
+void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
+int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova);
+void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
+int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr);

int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt);
void mthca_unmap_eq_icm(struct mthca_dev *dev);
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-04-01 12:38:25.566410914 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_main.c 2005-04-01 12:38:29.466564299 -0800
@@ -73,14 +73,15 @@
DRV_VERSION " (" DRV_RELDATE ")\n";

static struct mthca_profile default_profile = {
- .num_qp = 1 << 16,
- .rdb_per_qp = 4,
- .num_cq = 1 << 16,
- .num_mcg = 1 << 13,
- .num_mpt = 1 << 17,
- .num_mtt = 1 << 20,
- .num_udav = 1 << 15, /* Tavor only */
- .uarc_size = 1 << 18, /* Arbel only */
+ .num_qp = 1 << 16,
+ .rdb_per_qp = 4,
+ .num_cq = 1 << 16,
+ .num_mcg = 1 << 13,
+ .num_mpt = 1 << 17,
+ .num_mtt = 1 << 20,
+ .num_udav = 1 << 15, /* Tavor only */
+ .fmr_reserved_mtts = 1 << 18, /* Tavor only */
+ .uarc_size = 1 << 18, /* Arbel only */
};

static int __devinit mthca_tune_pci(struct mthca_dev *mdev)
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_mr.c 2005-04-01 12:38:28.676735749 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_mr.c 2005-04-01 12:38:29.493558440 -0800
@@ -66,6 +66,9 @@

#define MTHCA_MTT_FLAG_PRESENT 1

+#define MTHCA_MPT_STATUS_SW 0xF0
+#define MTHCA_MPT_STATUS_HW 0x00
+
/*
* Buddy allocator for MTT segments (currently not very efficient
* since it doesn't keep a free list and just searches linearly
@@ -442,6 +445,20 @@
return err;
}

+/* Free mr or fmr */
+static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order,
+ u32 first_seg, struct mthca_buddy *buddy)
+{
+ if (order >= 0)
+ mthca_free_mtt(dev, first_seg, order, buddy);
+
+ if (dev->hca_type == ARBEL_NATIVE)
+ mthca_table_put(dev, dev->mr_table.mpt_table,
+ arbel_key_to_hw_index(lkey));
+
+ mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
+}
+
void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
{
int err;
@@ -459,18 +476,288 @@
mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
status);

- if (mr->order >= 0)
- mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy);
+ mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg,
+ &dev->mr_table.mtt_buddy);
+}
+
+int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
+ u32 access, struct mthca_fmr *mr)
+{
+ struct mthca_mpt_entry *mpt_entry;
+ void *mailbox;
+ u64 mtt_seg;
+ u32 key, idx;
+ u8 status;
+ int list_len = mr->attr.max_pages;
+ int err = -ENOMEM;
+ int i;
+
+ might_sleep();
+
+ if (mr->attr.page_size < 12 || mr->attr.page_size >= 32)
+ return -EINVAL;
+
+ /* For Arbel, all MTTs must fit in the same page. */
+ if (dev->hca_type == ARBEL_NATIVE &&
+ mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
+ return -EINVAL;
+
+ mr->maps = 0;
+
+ key = mthca_alloc(&dev->mr_table.mpt_alloc);
+ if (key == -1)
+ return -ENOMEM;
+
+ idx = key & (dev->limits.num_mpts - 1);
+ mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
+ if (err)
+ goto err_out_mpt_free;
+
+ mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key);
+ BUG_ON(!mr->mem.arbel.mpt);
+ } else
+ mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
+ sizeof *(mr->mem.tavor.mpt) * idx;
+
+ for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
+ i < list_len;
+ i <<= 1, ++mr->order)
+ ; /* nothing */
+
+ mr->first_seg = mthca_alloc_mtt(dev, mr->order,
+ dev->mr_table.fmr_mtt_buddy);
+ if (mr->first_seg == -1)
+ goto err_out_table;
+
+ mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE;
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
+ mr->first_seg);
+ BUG_ON(!mr->mem.arbel.mtts);
+ } else
+ mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
+
+ mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
+ GFP_KERNEL);
+ if (!mailbox)
+ goto err_out_free_mtt;
+
+ mpt_entry = MAILBOX_ALIGN(mailbox);
+
+ mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
+ MTHCA_MPT_FLAG_MIO |
+ MTHCA_MPT_FLAG_REGION |
+ access);
+
+ mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12);
+ mpt_entry->key = cpu_to_be32(key);
+ mpt_entry->pd = cpu_to_be32(pd);
+ memset(&mpt_entry->start, 0,
+ sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
+ mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
+
+ if (0) {
+ mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
+ for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
+ if (i % 4 == 0)
+ printk("[%02x] ", i * 4);
+ printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
+ if ((i + 1) % 4 == 0)
+ printk("\n");
+ }
+ }
+
+ err = mthca_SW2HW_MPT(dev, mpt_entry,
+ key & (dev->limits.num_mpts - 1),
+ &status);
+ if (err) {
+ mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
+ goto err_out_mailbox_free;
+ }
+ if (status) {
+ mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
+ status);
+ err = -EINVAL;
+ goto err_out_mailbox_free;
+ }
+
+ kfree(mailbox);
+ return 0;
+
+err_out_mailbox_free:
+ kfree(mailbox);
+
+err_out_free_mtt:
+ mthca_free_mtt(dev, mr->first_seg, mr->order,
+ dev->mr_table.fmr_mtt_buddy);

+err_out_table:
if (dev->hca_type == ARBEL_NATIVE)
- mthca_table_put(dev, dev->mr_table.mpt_table,
- key_to_hw_index(dev, mr->ibmr.lkey));
- mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, mr->ibmr.lkey));
+ mthca_table_put(dev, dev->mr_table.mpt_table, key);
+
+err_out_mpt_free:
+ mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
+ return err;
+}
+
+int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
+{
+ if (fmr->maps)
+ return -EBUSY;
+
+ mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg,
+ dev->mr_table.fmr_mtt_buddy);
+ return 0;
+}
+
+static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
+ int list_len, u64 iova)
+{
+ int i, page_mask;
+
+ if (list_len > fmr->attr.max_pages)
+ return -EINVAL;
+
+ page_mask = (1 << fmr->attr.page_size) - 1;
+
+ /* We are getting page lists, so va must be page aligned. */
+ if (iova & page_mask)
+ return -EINVAL;
+
+ /* Trust the user not to pass misaligned data in page_list */
+ if (0)
+ for (i = 0; i < list_len; ++i) {
+ if (page_list[i] & ~page_mask)
+ return -EINVAL;
+ }
+
+ if (fmr->maps >= fmr->attr.max_maps)
+ return -EINVAL;
+
+ return 0;
+}
+
+
+int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova)
+{
+ struct mthca_fmr *fmr = to_mfmr(ibfmr);
+ struct mthca_dev *dev = to_mdev(ibfmr->device);
+ struct mthca_mpt_entry mpt_entry;
+ u32 key;
+ int i, err;
+
+ err = mthca_check_fmr(fmr, page_list, list_len, iova);
+ if (err)
+ return err;
+
+ ++fmr->maps;
+
+ key = tavor_key_to_hw_index(fmr->ibmr.lkey);
+ key += dev->limits.num_mpts;
+ fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
+
+ writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
+
+ for (i = 0; i < list_len; ++i) {
+ __be64 mtt_entry = cpu_to_be64(page_list[i] |
+ MTHCA_MTT_FLAG_PRESENT);
+ mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
+ }
+
+ mpt_entry.lkey = cpu_to_be32(key);
+ mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
+ mpt_entry.start = cpu_to_be64(iova);
+
+ writel(mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
+ memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
+ offsetof(struct mthca_mpt_entry, window_count) -
+ offsetof(struct mthca_mpt_entry, start));
+
+ writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
+
+ return 0;
+}
+
+int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova)
+{
+ struct mthca_fmr *fmr = to_mfmr(ibfmr);
+ struct mthca_dev *dev = to_mdev(ibfmr->device);
+ u32 key;
+ int i, err;
+
+ err = mthca_check_fmr(fmr, page_list, list_len, iova);
+ if (err)
+ return err;
+
+ ++fmr->maps;
+
+ key = arbel_key_to_hw_index(fmr->ibmr.lkey);
+ key += dev->limits.num_mpts;
+ fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
+
+ *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
+
+ wmb();
+
+ for (i = 0; i < list_len; ++i)
+ fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
+ MTHCA_MTT_FLAG_PRESENT);
+
+ fmr->mem.arbel.mpt->key = cpu_to_be32(key);
+ fmr->mem.arbel.mpt->lkey = cpu_to_be32(key);
+ fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
+ fmr->mem.arbel.mpt->start = cpu_to_be64(iova);
+
+ wmb();
+
+ *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
+
+ wmb();
+
+ return 0;
+}
+
+void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
+{
+ u32 key;
+
+ if (!fmr->maps)
+ return;
+
+ key = tavor_key_to_hw_index(fmr->ibmr.lkey);
+ key &= dev->limits.num_mpts - 1;
+ fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
+
+ fmr->maps = 0;
+
+ writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
+}
+
+void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
+{
+ u32 key;
+
+ if (!fmr->maps)
+ return;
+
+ key = arbel_key_to_hw_index(fmr->ibmr.lkey);
+ key &= dev->limits.num_mpts - 1;
+ fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
+
+ fmr->maps = 0;
+
+ *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
}

int __devinit mthca_init_mr_table(struct mthca_dev *dev)
{
- int err;
+ int err, i;

err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
dev->limits.num_mpts,
@@ -478,23 +765,93 @@
if (err)
return err;

+ if (dev->hca_type != ARBEL_NATIVE &&
+ (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
+ dev->limits.fmr_reserved_mtts = 0;
+ else
+ dev->mthca_flags |= MTHCA_FLAG_FMR;
+
err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
fls(dev->limits.num_mtt_segs - 1));
+
if (err)
goto err_mtt_buddy;

+ dev->mr_table.tavor_fmr.mpt_base = NULL;
+ dev->mr_table.tavor_fmr.mtt_base = NULL;
+
+ if (dev->limits.fmr_reserved_mtts) {
+ i = fls(dev->limits.fmr_reserved_mtts - 1);
+
+ if (i >= 31) {
+ mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n");
+ err = -EINVAL;
+ goto err_fmr_mpt;
+ }
+
+ dev->mr_table.tavor_fmr.mpt_base =
+ ioremap(dev->mr_table.mpt_base,
+ (1 << i) * sizeof (struct mthca_mpt_entry));
+
+ if (!dev->mr_table.tavor_fmr.mpt_base) {
+ mthca_warn(dev, "MPT ioremap for FMR failed.\n");
+ err = -ENOMEM;
+ goto err_fmr_mpt;
+ }
+
+ dev->mr_table.tavor_fmr.mtt_base =
+ ioremap(dev->mr_table.mtt_base,
+ (1 << i) * MTHCA_MTT_SEG_SIZE);
+ if (!dev->mr_table.tavor_fmr.mtt_base) {
+ mthca_warn(dev, "MTT ioremap for FMR failed.\n");
+ err = -ENOMEM;
+ goto err_fmr_mtt;
+ }
+
+ err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, i);
+ if (err)
+ goto err_fmr_mtt_buddy;
+
+ /* Prevent regular MRs from using FMR keys */
+ err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, i);
+ if (err)
+ goto err_reserve_fmr;
+
+ dev->mr_table.fmr_mtt_buddy =
+ &dev->mr_table.tavor_fmr.mtt_buddy;
+ } else
+ dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
+
+ /* FMR table is always the first, take reserved MTTs out of there */
if (dev->limits.reserved_mtts) {
- if (mthca_alloc_mtt(dev, fls(dev->limits.reserved_mtts - 1),
- &dev->mr_table.mtt_buddy) == -1) {
+ i = fls(dev->limits.reserved_mtts - 1);
+
+ if (mthca_alloc_mtt(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) {
mthca_warn(dev, "MTT table of order %d is too small.\n",
- dev->mr_table.mtt_buddy.max_order);
+ dev->mr_table.fmr_mtt_buddy->max_order);
err = -ENOMEM;
- goto err_mtt_buddy;
+ goto err_reserve_mtts;
}
}

return 0;

+err_reserve_mtts:
+err_reserve_fmr:
+ if (dev->limits.fmr_reserved_mtts)
+ mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
+
+err_fmr_mtt_buddy:
+ if (dev->mr_table.tavor_fmr.mtt_base)
+ iounmap(dev->mr_table.tavor_fmr.mtt_base);
+
+err_fmr_mtt:
+ if (dev->mr_table.tavor_fmr.mpt_base)
+ iounmap(dev->mr_table.tavor_fmr.mpt_base);
+
+err_fmr_mpt:
+ mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
+
err_mtt_buddy:
mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);

@@ -504,6 +861,15 @@
void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev)
{
/* XXX check if any MRs are still allocated? */
+ if (dev->limits.fmr_reserved_mtts)
+ mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
+
mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
+
+ if (dev->mr_table.tavor_fmr.mtt_base)
+ iounmap(dev->mr_table.tavor_fmr.mtt_base);
+ if (dev->mr_table.tavor_fmr.mpt_base)
+ iounmap(dev->mr_table.tavor_fmr.mpt_base);
+
mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
}
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_profile.c 2005-04-01 12:38:25.570410046 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_profile.c 2005-04-01 12:38:29.480561261 -0800
@@ -223,9 +223,10 @@
init_hca->mc_hash_sz = 1 << (profile[i].log_num - 1);
break;
case MTHCA_RES_MPT:
- dev->limits.num_mpts = profile[i].num;
- init_hca->mpt_base = profile[i].start;
- init_hca->log_mpt_sz = profile[i].log_num;
+ dev->limits.num_mpts = profile[i].num;
+ dev->mr_table.mpt_base = profile[i].start;
+ init_hca->mpt_base = profile[i].start;
+ init_hca->log_mpt_sz = profile[i].log_num;
break;
case MTHCA_RES_MTT:
dev->limits.num_mtt_segs = profile[i].num;
@@ -259,6 +260,18 @@
*/
dev->limits.num_pds = MTHCA_NUM_PDS;

+ /*
+ * For Tavor, FMRs use ioremapped PCI memory. For 32 bit
+ * systems it may use too much vmalloc space to map all MTT
+ * memory, so we reserve some MTTs for FMR access, taking them
+ * out of the MR pool. They don't use additional memory, but
+ * we assign them as part of the HCA profile anyway.
+ */
+ if (dev->hca_type == ARBEL_NATIVE)
+ dev->limits.fmr_reserved_mtts = 0;
+ else
+ dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts;
+
kfree(profile);
return total_size;
}
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_profile.h 2005-03-31 19:07:01.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_profile.h 2005-04-01 12:38:29.484560393 -0800
@@ -48,6 +48,7 @@
int num_udav;
int num_uar;
int uarc_size;
+ int fmr_reserved_mtts;
};

u64 mthca_make_profile(struct mthca_dev *mdev,
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-01 12:38:26.644176961 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-01 12:38:29.471563214 -0800
@@ -574,6 +574,74 @@
return 0;
}

+static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr)
+{
+ struct mthca_fmr *fmr;
+ int err;
+
+ fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
+ if (!fmr)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr);
+ err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num,
+ convert_access(mr_access_flags), fmr);
+
+ if (err) {
+ kfree(fmr);
+ return ERR_PTR(err);
+ }
+
+ return &fmr->ibmr;
+}
+
+static int mthca_dealloc_fmr(struct ib_fmr *fmr)
+{
+ struct mthca_fmr *mfmr = to_mfmr(fmr);
+ int err;
+
+ err = mthca_free_fmr(to_mdev(fmr->device), mfmr);
+ if (err)
+ return err;
+
+ kfree(mfmr);
+ return 0;
+}
+
+static int mthca_unmap_fmr(struct list_head *fmr_list)
+{
+ struct ib_fmr *fmr;
+ int err;
+ u8 status;
+ struct mthca_dev *mdev = NULL;
+
+ list_for_each_entry(fmr, fmr_list, list) {
+ if (mdev && to_mdev(fmr->device) != mdev)
+ return -EINVAL;
+ mdev = to_mdev(fmr->device);
+ }
+
+ if (!mdev)
+ return 0;
+
+ if (mdev->hca_type == ARBEL_NATIVE) {
+ list_for_each_entry(fmr, fmr_list, list)
+ mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr));
+
+ wmb();
+ } else
+ list_for_each_entry(fmr, fmr_list, list)
+ mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr));
+
+ err = mthca_SYNC_TPT(mdev, &status);
+ if (err)
+ return err;
+ if (status)
+ return -EINVAL;
+ return 0;
+}
+
static ssize_t show_rev(struct class_device *cdev, char *buf)
{
struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
@@ -637,6 +705,17 @@
dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
dev->ib_dev.dereg_mr = mthca_dereg_mr;
+
+ if (dev->mthca_flags & MTHCA_FLAG_FMR) {
+ dev->ib_dev.alloc_fmr = mthca_alloc_fmr;
+ dev->ib_dev.unmap_fmr = mthca_unmap_fmr;
+ dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr;
+ if (dev->hca_type == ARBEL_NATIVE)
+ dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
+ else
+ dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
+ }
+
dev->ib_dev.attach_mcast = mthca_multicast_attach;
dev->ib_dev.detach_mcast = mthca_multicast_detach;
dev->ib_dev.process_mad = mthca_process_mad;
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.h 2005-03-31 19:06:47.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.h 2005-04-01 12:38:29.475562346 -0800
@@ -60,6 +60,24 @@
u32 first_seg;
};

+struct mthca_fmr {
+ struct ib_fmr ibmr;
+ struct ib_fmr_attr attr;
+ int order;
+ u32 first_seg;
+ int maps;
+ union {
+ struct {
+ struct mthca_mpt_entry __iomem *mpt;
+ u64 __iomem *mtts;
+ } tavor;
+ struct {
+ struct mthca_mpt_entry *mpt;
+ __be64 *mtts;
+ } arbel;
+ } mem;
+};
+
struct mthca_pd {
struct ib_pd ibpd;
u32 pd_num;
@@ -218,6 +236,11 @@
dma_addr_t header_dma;
};

+static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
+{
+ return container_of(ibmr, struct mthca_fmr, ibmr);
+}
+
static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct mthca_mr, ibmr);

2005-04-01 21:15:49

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][25/27] IB/mthca: map context for RDMA responder in mem-free mode

Fix RDMA in mem-free mode: we need to make sure that the RDMA context
memory is mapped for the HCA.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-01 12:38:30.772280864 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-01 12:38:31.661087929 -0800
@@ -222,6 +222,7 @@
struct mthca_array qp;
struct mthca_icm_table *qp_table;
struct mthca_icm_table *eqp_table;
+ struct mthca_icm_table *rdb_table;
};

struct mthca_av_table {
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-04-01 12:38:30.776279996 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_main.c 2005-04-01 12:38:31.666086844 -0800
@@ -430,14 +430,25 @@
goto err_unmap_qp;
}

- mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
+ mdev->qp_table.rdb_table = mthca_alloc_icm_table(mdev, init_hca->rdb_base,
+ MTHCA_RDB_ENTRY_SIZE,
+ mdev->limits.num_qps <<
+ mdev->qp_table.rdb_shift,
+ 0, 0);
+ if (!mdev->qp_table.rdb_table) {
+ mthca_err(mdev, "Failed to map RDB context memory, aborting\n");
+ err = -ENOMEM;
+ goto err_unmap_eqp;
+ }
+
+ mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
dev_lim->cqc_entry_sz,
mdev->limits.num_cqs,
mdev->limits.reserved_cqs, 0);
if (!mdev->cq_table.table) {
mthca_err(mdev, "Failed to map CQ context memory, aborting.\n");
err = -ENOMEM;
- goto err_unmap_eqp;
+ goto err_unmap_rdb;
}

/*
@@ -463,6 +474,9 @@
err_unmap_cq:
mthca_free_icm_table(mdev, mdev->cq_table.table);

+err_unmap_rdb:
+ mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
+
err_unmap_eqp:
mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);

--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-01 12:38:30.827268928 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-01 12:38:31.673085325 -0800
@@ -1025,11 +1025,16 @@
if (ret)
goto err_qpc;

+ ret = mthca_table_get(dev, dev->qp_table.rdb_table,
+ qp->qpn << dev->qp_table.rdb_shift);
+ if (ret)
+ goto err_eqpc;
+
qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
qp->qpn, &qp->rq.db);
if (qp->rq.db_index < 0) {
ret = -ENOMEM;
- goto err_eqpc;
+ goto err_rdb;
}

qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
@@ -1045,6 +1050,10 @@
err_rq_db:
mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);

+err_rdb:
+ mthca_table_put(dev, dev->qp_table.rdb_table,
+ qp->qpn << dev->qp_table.rdb_shift);
+
err_eqpc:
mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);

@@ -1060,6 +1069,8 @@
if (mthca_is_memfree(dev)) {
mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+ mthca_table_put(dev, dev->qp_table.rdb_table,
+ qp->qpn << dev->qp_table.rdb_shift);
mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
}

2005-04-01 21:05:42

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][21/27] IB/mthca: split MR key munging routines

From: Michael S. Tsirkin <[email protected]>

Split Tavor and Arbel/mem-free index<->hw key munging routines, so that FMR implementation
can call correct implementation without testing HCA type (which it already knows).

Signed-off-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_mr.c 2005-04-01 12:38:27.075083423 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_mr.c 2005-04-01 12:38:28.676735749 -0800
@@ -198,20 +198,40 @@
seg + (1 << order) - 1);
}

+static inline u32 tavor_hw_index_to_key(u32 ind)
+{
+ return ind;
+}
+
+static inline u32 tavor_key_to_hw_index(u32 key)
+{
+ return key;
+}
+
+static inline u32 arbel_hw_index_to_key(u32 ind)
+{
+ return (ind >> 24) | (ind << 8);
+}
+
+static inline u32 arbel_key_to_hw_index(u32 key)
+{
+ return (key << 24) | (key >> 8);
+}
+
static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
{
if (dev->hca_type == ARBEL_NATIVE)
- return (ind >> 24) | (ind << 8);
+ return arbel_hw_index_to_key(ind);
else
- return ind;
+ return tavor_hw_index_to_key(ind);
}

static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
{
if (dev->hca_type == ARBEL_NATIVE)
- return (key << 24) | (key >> 8);
+ return arbel_key_to_hw_index(key);
else
- return key;
+ return tavor_key_to_hw_index(key);
}

int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,

2005-04-01 21:16:19

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][27/27] IB/mthca: add support for new MT25204 HCA

Decouple table of HCA features from exact HCA device type. Add a
current FW version field so we can warn when someone is using old FW.
Add support for new MT25204 HCA.

Remove the warning about mem-free support, since it should be pretty
solid at this point.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-01 12:38:31.661087929 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-01 12:38:32.606882623 -0800
@@ -49,20 +49,15 @@
#define DRV_VERSION "0.06-pre"
#define DRV_RELDATE "November 8, 2004"

-/* Types of supported HCA */
-enum {
- TAVOR, /* MT23108 */
- ARBEL_COMPAT, /* MT25208 in Tavor compat mode */
- ARBEL_NATIVE /* MT25208 with extended features */
-};
-
enum {
MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
MTHCA_FLAG_SRQ = 1 << 2,
MTHCA_FLAG_MSI = 1 << 3,
MTHCA_FLAG_MSI_X = 1 << 4,
MTHCA_FLAG_NO_LAM = 1 << 5,
- MTHCA_FLAG_FMR = 1 << 6
+ MTHCA_FLAG_FMR = 1 << 6,
+ MTHCA_FLAG_MEMFREE = 1 << 7,
+ MTHCA_FLAG_PCIE = 1 << 8
};

enum {
@@ -473,7 +468,7 @@

static inline int mthca_is_memfree(struct mthca_dev *dev)
{
- return dev->hca_type == ARBEL_NATIVE;
+ return dev->mthca_flags & MTHCA_FLAG_MEMFREE;
}

#endif /* MTHCA_DEV_H */
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-04-01 12:38:31.666086844 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_main.c 2005-04-01 12:38:32.611881538 -0800
@@ -103,7 +103,7 @@
"aborting.\n");
return -ENODEV;
}
- } else if (mdev->hca_type == TAVOR)
+ } else if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE))
mthca_info(mdev, "No PCI-X capability, not setting RBC.\n");

cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
@@ -119,8 +119,7 @@
"register, aborting.\n");
return -ENODEV;
}
- } else if (mdev->hca_type == ARBEL_NATIVE ||
- mdev->hca_type == ARBEL_COMPAT)
+ } else if (mdev->mthca_flags & MTHCA_FLAG_PCIE)
mthca_info(mdev, "No PCI Express capability, "
"not setting Max Read Request Size.\n");

@@ -438,7 +437,7 @@
if (!mdev->qp_table.rdb_table) {
mthca_err(mdev, "Failed to map RDB context memory, aborting\n");
err = -ENOMEM;
- goto err_unmap_eqp;
+ goto err_unmap_rdb;
}

mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
@@ -593,6 +592,7 @@

err_free_icm:
mthca_free_icm_table(mdev, mdev->cq_table.table);
+ mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
@@ -851,6 +851,7 @@

if (mthca_is_memfree(mdev)) {
mthca_free_icm_table(mdev, mdev->cq_table.table);
+ mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
@@ -869,11 +870,32 @@
mthca_SYS_DIS(mdev, &status);
}

+/* Types of supported HCA */
+enum {
+ TAVOR, /* MT23108 */
+ ARBEL_COMPAT, /* MT25208 in Tavor compat mode */
+ ARBEL_NATIVE, /* MT25208 with extended features */
+ SINAI /* MT25204 */
+};
+
+#define MTHCA_FW_VER(major, minor, subminor) \
+ (((u64) (major) << 32) | ((u64) (minor) << 16) | (u64) (subminor))
+
+static struct {
+ u64 latest_fw;
+ int is_memfree;
+ int is_pcie;
+} mthca_hca_table[] = {
+ [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 2), .is_memfree = 0, .is_pcie = 0 },
+ [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 6, 2), .is_memfree = 0, .is_pcie = 1 },
+ [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 0, 1), .is_memfree = 1, .is_pcie = 1 },
+ [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 }
+};
+
static int __devinit mthca_init_one(struct pci_dev *pdev,
const struct pci_device_id *id)
{
static int mthca_version_printed = 0;
- static int mthca_memfree_warned = 0;
int ddr_hidden = 0;
int err;
struct mthca_dev *mdev;
@@ -886,6 +908,12 @@
printk(KERN_INFO PFX "Initializing %s (%s)\n",
pci_pretty_name(pdev), pci_name(pdev));

+ if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
+ printk(KERN_ERR PFX "%s (%s) has invalid driver data %lx\n",
+ pci_pretty_name(pdev), pci_name(pdev), id->driver_data);
+ return -ENODEV;
+ }
+
err = pci_enable_device(pdev);
if (err) {
dev_err(&pdev->dev, "Cannot enable PCI device, "
@@ -950,15 +978,14 @@
goto err_free_res;
}

- mdev->pdev = pdev;
- mdev->hca_type = id->driver_data;
-
- if (mthca_is_memfree(mdev) && !mthca_memfree_warned++)
- mthca_warn(mdev, "Warning: native MT25208 mode support is incomplete. "
- "Your HCA may not work properly.\n");
+ mdev->pdev = pdev;

if (ddr_hidden)
mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
+ if (mthca_hca_table[id->driver_data].is_memfree)
+ mdev->mthca_flags |= MTHCA_FLAG_MEMFREE;
+ if (mthca_hca_table[id->driver_data].is_pcie)
+ mdev->mthca_flags |= MTHCA_FLAG_PCIE;

/*
* Now reset the HCA before we touch the PCI capabilities or
@@ -997,6 +1024,16 @@
if (err)
goto err_iounmap;

+ if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) {
+ mthca_warn(mdev, "HCA FW version %x.%x.%x is old (%x.%x.%x is current).\n",
+ (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
+ (int) (mdev->fw_ver & 0xffff),
+ (int) (mthca_hca_table[id->driver_data].latest_fw >> 32),
+ (int) (mthca_hca_table[id->driver_data].latest_fw >> 16) & 0xffff,
+ (int) (mthca_hca_table[id->driver_data].latest_fw & 0xffff));
+ mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n");
+ }
+
err = mthca_setup_hca(mdev);
if (err)
goto err_close;
@@ -1112,6 +1149,14 @@
.driver_data = ARBEL_NATIVE },
{ PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL),
.driver_data = ARBEL_NATIVE },
+ { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI),
+ .driver_data = SINAI },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_SINAI),
+ .driver_data = SINAI },
+ { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI_OLD),
+ .driver_data = SINAI },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_SINAI_OLD),
+ .driver_data = SINAI },
{ 0, }
};

--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-01 12:38:30.780279128 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-01 12:38:32.615880670 -0800
@@ -659,11 +659,18 @@
static ssize_t show_hca(struct class_device *cdev, char *buf)
{
struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
- switch (dev->hca_type) {
- case TAVOR: return sprintf(buf, "MT23108\n");
- case ARBEL_COMPAT: return sprintf(buf, "MT25208 (MT23108 compat mode)\n");
- case ARBEL_NATIVE: return sprintf(buf, "MT25208\n");
- default: return sprintf(buf, "unknown\n");
+ switch (dev->pdev->device) {
+ case PCI_DEVICE_ID_MELLANOX_TAVOR:
+ return sprintf(buf, "MT23108\n");
+ case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT:
+ return sprintf(buf, "MT25208 (MT23108 compat mode)\n");
+ case PCI_DEVICE_ID_MELLANOX_ARBEL:
+ return sprintf(buf, "MT25208\n");
+ case PCI_DEVICE_ID_MELLANOX_SINAI:
+ case PCI_DEVICE_ID_MELLANOX_SINAI_OLD:
+ return sprintf(buf, "MT25204\n");
+ default:
+ return sprintf(buf, "unknown\n");
}
}

--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_reset.c 2005-03-31 19:06:41.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_reset.c 2005-04-01 12:38:32.594885228 -0800
@@ -63,7 +63,7 @@
* header as well.
*/

- if (mdev->hca_type == TAVOR) {
+ if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE)) {
/* Look for the bridge -- its device ID will be 2 more
than HCA's device ID. */
while ((bridge = pci_get_device(mdev->pdev->vendor,

2005-04-01 21:15:49

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][24/27] IB/mthca: encapsulate mem-free check into mthca_is_memfree()

Clean up mem-free mode support by introducing mthca_is_memfree() function,
which encapsulates the logic of deciding if a device is mem-free.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_av.c 2005-04-01 12:38:26.648176093 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_av.c 2005-04-01 12:38:30.803274137 -0800
@@ -62,7 +62,7 @@

ah->type = MTHCA_AH_PCI_POOL;

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
ah->av = kmalloc(sizeof *ah->av, GFP_ATOMIC);
if (!ah->av)
return -ENOMEM;
@@ -192,7 +192,7 @@
{
int err;

- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
return 0;

err = mthca_alloc_init(&dev->av_table.alloc,
@@ -231,7 +231,7 @@

void __devexit mthca_cleanup_av_table(struct mthca_dev *dev)
{
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
return;

if (dev->av_table.av_map)
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-04-01 12:38:30.084430178 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-04-01 12:38:30.790276958 -0800
@@ -651,7 +651,7 @@
mthca_dbg(dev, "FW version %012llx, max commands %d\n",
(unsigned long long) dev->fw_ver, dev->cmd.max_cmds);

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET);
MTHCA_GET(dev->fw.arbel.clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET);
MTHCA_GET(dev->fw.arbel.eq_arm_base, outbox, QUERY_FW_EQ_ARM_BASE_OFFSET);
@@ -984,7 +984,7 @@

mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET);
dev_lim->hca.arbel.resize_srq = field & 1;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET);
@@ -1148,7 +1148,7 @@
/* TPT attributes */

MTHCA_PUT(inbox, param->mpt_base, INIT_HCA_MPT_BASE_OFFSET);
- if (dev->hca_type != ARBEL_NATIVE)
+ if (!mthca_is_memfree(dev))
MTHCA_PUT(inbox, param->mtt_seg_sz, INIT_HCA_MTT_SEG_SZ_OFFSET);
MTHCA_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET);
MTHCA_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET);
@@ -1161,7 +1161,7 @@

MTHCA_PUT(inbox, param->uar_scratch_base, INIT_HCA_UAR_SCATCH_BASE_OFFSET);

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
MTHCA_PUT(inbox, param->log_uarc_sz, INIT_HCA_UARC_SZ_OFFSET);
MTHCA_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET);
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_cq.c 2005-04-01 12:38:26.177278312 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_cq.c 2005-04-01 12:38:30.794276090 -0800
@@ -180,7 +180,7 @@
{
u32 doorbell[2];

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
*cq->set_ci_db = cpu_to_be32(cq->cons_index);
wmb();
} else {
@@ -760,7 +760,7 @@
if (cq->cqn == -1)
return -ENOMEM;

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
cq->arm_sn = 1;

err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
@@ -811,7 +811,7 @@
cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey);
cq_context->cqn = cpu_to_be32(cq->cqn);

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
cq_context->ci_db = cpu_to_be32(cq->set_ci_db_index);
cq_context->state_db = cpu_to_be32(cq->arm_db_index);
}
@@ -851,11 +851,11 @@
err_out_mailbox:
kfree(mailbox);

- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);

err_out_ci:
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);

err_out_icm:
@@ -916,7 +916,7 @@
mthca_free_mr(dev, &cq->mr);
mthca_free_cq_buf(dev, cq);

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
mthca_table_put(dev, dev->cq_table.table, cq->cqn);
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-01 12:38:29.460565601 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-01 12:38:30.772280864 -0800
@@ -470,4 +470,9 @@
return container_of(ibdev, struct mthca_dev, ib_dev);
}

+static inline int mthca_is_memfree(struct mthca_dev *dev)
+{
+ return dev->hca_type == ARBEL_NATIVE;
+}
+
#endif /* MTHCA_DEV_H */
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_eq.c 2005-04-01 12:38:24.575625986 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_eq.c 2005-04-01 12:38:30.799275005 -0800
@@ -198,7 +198,7 @@

static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
{
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
arbel_set_eq_ci(dev, eq, ci);
else
tavor_set_eq_ci(dev, eq, ci);
@@ -223,7 +223,7 @@

static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
{
- if (dev->hca_type != ARBEL_NATIVE) {
+ if (!mthca_is_memfree(dev)) {
u32 doorbell[2];

doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn);
@@ -535,11 +535,11 @@
MTHCA_EQ_OWNER_HW |
MTHCA_EQ_STATE_ARMED |
MTHCA_EQ_FLAG_TR);
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
eq_context->flags |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL);

eq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
} else {
eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
@@ -686,7 +686,7 @@

mthca_base = pci_resource_start(dev->pdev, 0);

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
/*
* We assume that the EQ arm and EQ set CI registers
* fall within the first BAR. We can't trust the
@@ -756,7 +756,7 @@

static void __devexit mthca_unmap_eq_regs(struct mthca_dev *dev)
{
- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
dev->fw.arbel.eq_set_ci_base,
MTHCA_EQ_SET_CI_SIZE,
@@ -880,7 +880,7 @@

for (i = 0; i < MTHCA_NUM_EQ; ++i) {
err = request_irq(dev->eq_table.eq[i].msi_x_vector,
- dev->hca_type == ARBEL_NATIVE ?
+ mthca_is_memfree(dev) ?
mthca_arbel_msi_x_interrupt :
mthca_tavor_msi_x_interrupt,
0, eq_name[i], dev->eq_table.eq + i);
@@ -890,7 +890,7 @@
}
} else {
err = request_irq(dev->pdev->irq,
- dev->hca_type == ARBEL_NATIVE ?
+ mthca_is_memfree(dev) ?
mthca_arbel_interrupt :
mthca_tavor_interrupt,
SA_SHIRQ, DRV_NAME, dev);
@@ -918,7 +918,7 @@
dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);

for (i = 0; i < MTHCA_EQ_CMD; ++i)
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
else
tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-04-01 12:38:29.466564299 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_main.c 2005-04-01 12:38:30.776279996 -0800
@@ -601,7 +601,7 @@

static int __devinit mthca_init_hca(struct mthca_dev *mdev)
{
- if (mdev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(mdev))
return mthca_init_arbel(mdev);
else
return mthca_init_tavor(mdev);
@@ -835,7 +835,7 @@

mthca_CLOSE_HCA(mdev, 0, &status);

- if (mdev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(mdev)) {
mthca_free_icm_table(mdev, mdev->cq_table.table);
mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
@@ -939,7 +939,7 @@
mdev->pdev = pdev;
mdev->hca_type = id->driver_data;

- if (mdev->hca_type == ARBEL_NATIVE && !mthca_memfree_warned++)
+ if (mthca_is_memfree(mdev) && !mthca_memfree_warned++)
mthca_warn(mdev, "Warning: native MT25208 mode support is incomplete. "
"Your HCA may not work properly.\n");

--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-04-01 12:38:28.285820606 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-04-01 12:38:30.831268060 -0800
@@ -472,7 +472,7 @@
{
int i;

- if (dev->hca_type != ARBEL_NATIVE)
+ if (!mthca_is_memfree(dev))
return 0;

dev->db_tab = kmalloc(sizeof *dev->db_tab, GFP_KERNEL);
@@ -504,7 +504,7 @@
int i;
u8 status;

- if (dev->hca_type != ARBEL_NATIVE)
+ if (!mthca_is_memfree(dev))
return;

/*
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_mr.c 2005-04-01 12:38:29.493558440 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_mr.c 2005-04-01 12:38:30.822270013 -0800
@@ -181,7 +181,7 @@
if (seg == -1)
return -1;

- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg,
seg + (1 << order) - 1)) {
mthca_buddy_free(buddy, seg, order);
@@ -196,7 +196,7 @@
{
mthca_buddy_free(buddy, seg, order);

- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
mthca_table_put_range(dev, dev->mr_table.mtt_table, seg,
seg + (1 << order) - 1);
}
@@ -223,7 +223,7 @@

static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
{
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
return arbel_hw_index_to_key(ind);
else
return tavor_hw_index_to_key(ind);
@@ -231,7 +231,7 @@

static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
{
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
return arbel_key_to_hw_index(key);
else
return tavor_key_to_hw_index(key);
@@ -254,7 +254,7 @@
return -ENOMEM;
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
if (err)
goto err_out_mpt_free;
@@ -299,7 +299,7 @@
return err;

err_out_table:
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
mthca_table_put(dev, dev->mr_table.mpt_table, key);

err_out_mpt_free:
@@ -329,7 +329,7 @@
return -ENOMEM;
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
if (err)
goto err_out_mpt_free;
@@ -437,7 +437,7 @@
mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy);

err_out_table:
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
mthca_table_put(dev, dev->mr_table.mpt_table, key);

err_out_mpt_free:
@@ -452,7 +452,7 @@
if (order >= 0)
mthca_free_mtt(dev, first_seg, order, buddy);

- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
mthca_table_put(dev, dev->mr_table.mpt_table,
arbel_key_to_hw_index(lkey));

@@ -498,7 +498,7 @@
return -EINVAL;

/* For Arbel, all MTTs must fit in the same page. */
- if (dev->hca_type == ARBEL_NATIVE &&
+ if (mthca_is_memfree(dev) &&
mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
return -EINVAL;

@@ -511,7 +511,7 @@
idx = key & (dev->limits.num_mpts - 1);
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
if (err)
goto err_out_mpt_free;
@@ -534,7 +534,7 @@

mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE;

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
mr->first_seg);
BUG_ON(!mr->mem.arbel.mtts);
@@ -596,7 +596,7 @@
dev->mr_table.fmr_mtt_buddy);

err_out_table:
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
mthca_table_put(dev, dev->mr_table.mpt_table, key);

err_out_mpt_free:
@@ -765,7 +765,7 @@
if (err)
return err;

- if (dev->hca_type != ARBEL_NATIVE &&
+ if (!mthca_is_memfree(dev) &&
(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
dev->limits.fmr_reserved_mtts = 0;
else
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_profile.c 2005-04-01 12:38:29.480561261 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_profile.c 2005-04-01 12:38:30.785278043 -0800
@@ -116,11 +116,11 @@
profile[i].type = i;
profile[i].log_num = max(ffs(profile[i].num) - 1, 0);
profile[i].size *= profile[i].num;
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
profile[i].size = max(profile[i].size, (u64) PAGE_SIZE);
}

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
mem_base = 0;
mem_avail = dev_lim->hca.arbel.max_icm_sz;
} else {
@@ -165,7 +165,7 @@
(unsigned long long) profile[i].size);
}

- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
mthca_dbg(dev, "HCA context memory: reserving %d KB\n",
(int) (total_size >> 10));
else
@@ -267,7 +267,7 @@
* out of the MR pool. They don't use additional memory, but
* we assign them as part of the HCA profile anyway.
*/
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
dev->limits.fmr_reserved_mtts = 0;
else
dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts;
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-01 12:38:29.471563214 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-01 12:38:30.780279128 -0800
@@ -625,7 +625,7 @@
if (!mdev)
return 0;

- if (mdev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(mdev)) {
list_for_each_entry(fmr, fmr_list, list)
mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr));

@@ -710,7 +710,7 @@
dev->ib_dev.alloc_fmr = mthca_alloc_fmr;
dev->ib_dev.unmap_fmr = mthca_unmap_fmr;
dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr;
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
else
dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
@@ -720,7 +720,7 @@
dev->ib_dev.detach_mcast = mthca_multicast_detach;
dev->ib_dev.process_mad = mthca_process_mad;

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
dev->ib_dev.post_send = mthca_arbel_post_send;
dev->ib_dev.post_recv = mthca_arbel_post_receive;
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-01 12:38:26.181277444 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-01 12:38:30.827268928 -0800
@@ -639,7 +639,7 @@
else if (attr_mask & IB_QP_PATH_MTU)
qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
qp_context->rq_size_stride =
((ffs(qp->rq.max) - 1) << 3) | (qp->rq.wqe_shift - 4);
qp_context->sq_size_stride =
@@ -731,7 +731,7 @@
qp_context->next_send_psn = cpu_to_be32(attr->sq_psn);
qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn);

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
qp_context->snd_wqe_base_l = cpu_to_be32(qp->send_wqe_offset);
qp_context->snd_db_index = cpu_to_be32(qp->sq.db_index);
}
@@ -822,7 +822,7 @@

qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn);

- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
qp_context->rcv_db_index = cpu_to_be32(qp->rq.db_index);

if (attr_mask & IB_QP_QKEY) {
@@ -897,7 +897,7 @@
size += 2 * sizeof (struct mthca_data_seg);
break;
case UD:
- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
size += sizeof (struct mthca_arbel_ud_seg);
else
size += sizeof (struct mthca_tavor_ud_seg);
@@ -1016,7 +1016,7 @@
{
int ret = 0;

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
if (ret)
return ret;
@@ -1057,7 +1057,7 @@
static void mthca_free_memfree(struct mthca_dev *dev,
struct mthca_qp *qp)
{
- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
@@ -1104,7 +1104,7 @@
return ret;
}

- if (dev->hca_type == ARBEL_NATIVE) {
+ if (mthca_is_memfree(dev)) {
for (i = 0; i < qp->rq.max; ++i) {
wqe = get_recv_wqe(qp, i);
wqe->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) <<
@@ -1127,7 +1127,7 @@
{
int i;

- if (dev->hca_type != ARBEL_NATIVE)
+ if (!mthca_is_memfree(dev))
return;

for (i = 0; 1 << i < qp->rq.max; ++i)
@@ -2011,7 +2011,7 @@
else
next = get_recv_wqe(qp, index);

- if (dev->hca_type == ARBEL_NATIVE)
+ if (mthca_is_memfree(dev))
*dbd = 1;
else
*dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD));

2005-04-01 21:15:44

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][26/27] IB/mthca: update receive queue initialization for new HCAs

Update initialization of receive queue to match new documentation.
This change is required to support new MT25204 HCA.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-01 12:38:31.673085325 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-01 12:38:32.124987229 -0800
@@ -181,6 +181,10 @@
MTHCA_MLX_SLR = 1 << 16
};

+enum {
+ MTHCA_INVAL_LKEY = 0x100
+};
+
struct mthca_next_seg {
u32 nda_op; /* [31:6] next WQE [4:0] next opcode */
u32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */
@@ -1093,7 +1097,6 @@
enum ib_sig_type send_policy,
struct mthca_qp *qp)
{
- struct mthca_next_seg *wqe;
int ret;
int i;

@@ -1116,18 +1119,28 @@
}

if (mthca_is_memfree(dev)) {
+ struct mthca_next_seg *next;
+ struct mthca_data_seg *scatter;
+ int size = (sizeof (struct mthca_next_seg) +
+ qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16;
+
for (i = 0; i < qp->rq.max; ++i) {
- wqe = get_recv_wqe(qp, i);
- wqe->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) <<
- qp->rq.wqe_shift);
- wqe->ee_nds = cpu_to_be32(1 << (qp->rq.wqe_shift - 4));
+ next = get_recv_wqe(qp, i);
+ next->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) <<
+ qp->rq.wqe_shift);
+ next->ee_nds = cpu_to_be32(size);
+
+ for (scatter = (void *) (next + 1);
+ (void *) scatter < (void *) next + (1 << qp->rq.wqe_shift);
+ ++scatter)
+ scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
}

for (i = 0; i < qp->sq.max; ++i) {
- wqe = get_send_wqe(qp, i);
- wqe->nda_op = cpu_to_be32((((i + 1) & (qp->sq.max - 1)) <<
- qp->sq.wqe_shift) +
- qp->send_wqe_offset);
+ next = get_send_wqe(qp, i);
+ next->nda_op = cpu_to_be32((((i + 1) & (qp->sq.max - 1)) <<
+ qp->sq.wqe_shift) +
+ qp->send_wqe_offset);
}
}

@@ -1986,7 +1999,7 @@

if (i < qp->rq.max_gs) {
((struct mthca_data_seg *) wqe)->byte_count = 0;
- ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(0x100);
+ ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
((struct mthca_data_seg *) wqe)->addr = 0;
}


2005-04-01 21:41:09

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][18/27] IB/mthca: add SYNC_TPT firmware command

From: Michael S. Tsirkin <[email protected]>

Add code for SYNC_TPT firmware command, which will be used by FMR implementation.

Signed-off-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-04-01 12:38:25.574409178 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-04-01 12:38:27.495992056 -0800
@@ -1404,6 +1404,11 @@
return err;
}

+int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, 0, CMD_SYNC_TPT, CMD_TIME_CLASS_B, status);
+}
+
int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
int eq_num, u8 *status)
{
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_cmd.h 2005-04-01 12:38:25.578408310 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_cmd.h 2005-04-01 12:38:27.500990971 -0800
@@ -276,6 +276,7 @@
int mpt_index, u8 *status);
int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
int num_mtt, u8 *status);
+int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status);
int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
int eq_num, u8 *status);
int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,

2005-04-01 21:41:08

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][19/27] IB/mthca: add mthca_write64_raw() for writing to MTT table directly

From: Michael S. Tsirkin <[email protected]>

Add mthca_write64_raw() function, which will be used to write FMR
entries that are in ioremapped PCI memory.

Signed-off-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_doorbell.h 2005-03-31 19:06:52.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_doorbell.h 2005-04-01 12:38:27.898904595 -0800
@@ -51,6 +51,11 @@
#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0)
#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL)

+static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
+{
+ __raw_writeq((__force u64) val, dest);
+}
+
static inline void mthca_write64(u32 val[2], void __iomem *dest,
spinlock_t *doorbell_lock)
{
@@ -74,6 +79,12 @@
#define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
#define MTHCA_GET_DOORBELL_LOCK(ptr) (ptr)

+static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
+{
+ __raw_writel(((__force u32 *) &val)[0], dest);
+ __raw_writel(((__force u32 *) &val)[1], dest + 4);
+}
+
static inline void mthca_write64(u32 val[2], void __iomem *dest,
spinlock_t *doorbell_lock)
{

2005-04-01 20:55:31

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][7/27] IB/mthca: clean up mthca_dereg_mr()

Signed-off-by: Michael S. Tsirkin <[email protected]>

It's cleaner to kfree mthca_mr, and not rely on the fact
that ib_mr is the first field in mthca_mr.

Signed-off-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-01 12:38:21.926201103 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-01 12:38:22.630048317 -0800
@@ -568,8 +568,9 @@

static int mthca_dereg_mr(struct ib_mr *mr)
{
- mthca_free_mr(to_mdev(mr->device), to_mmr(mr));
- kfree(mr);
+ struct mthca_mr *mmr = to_mmr(mr);
+ mthca_free_mr(to_mdev(mr->device), mmr);
+ kfree(mmr);
return 0;
}


2005-04-01 20:57:29

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][14/27] IB/mthca: fix MTT allocation in mem-free mode

Fix bug in MTT allocation in mem-free mode.

I misunderstood the MTT size value returned by the firmware -- it is
really the size of a single MTT entry, since mem-free mode does not
segment the MTT as the original firmware did. This meant that our MTT
addresses ended up being off by a factor of 8. This meant that our
MTT allocations might overlap, and so we could overwrite and corrupt
earlier memory regions when writing new MTT entries.

We fix this by always using our 64-byte MTT segment size. This allows
some simplification of the code as well, since there's no reason to
put the MTT segment size in a variable -- we can always use our enum
value directly.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-04-01 12:38:20.843436141 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-04-01 12:38:25.574409178 -0800
@@ -990,7 +990,6 @@
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET);
dev_lim->max_sg = min_t(int, field, dev_lim->max_sg);
MTHCA_GET(size, outbox, QUERY_DEV_LIM_MTT_ENTRY_SZ_OFFSET);
- dev_lim->mtt_seg_sz = size;
MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET);
dev_lim->mpt_entry_sz = size;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET);
@@ -1018,7 +1017,6 @@
} else {
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET);
dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f);
- dev_lim->mtt_seg_sz = MTHCA_MTT_SEG_SIZE;
dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE;
}

--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_cmd.h 2005-03-31 19:06:42.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_cmd.h 2005-04-01 12:38:25.578408310 -0800
@@ -162,7 +162,6 @@
int cqc_entry_sz;
int srq_entry_sz;
int uar_scratch_entry_sz;
- int mtt_seg_sz;
int mpt_entry_sz;
union {
struct {
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-03-31 19:06:41.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-01 12:38:25.561412000 -0800
@@ -121,7 +121,6 @@
int reserved_eqs;
int num_mpts;
int num_mtt_segs;
- int mtt_seg_size;
int reserved_mtts;
int reserved_mrws;
int reserved_uars;
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-04-01 12:38:23.852782896 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_main.c 2005-04-01 12:38:25.566410914 -0800
@@ -390,7 +390,7 @@
}

mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
- dev_lim->mtt_seg_sz,
+ MTHCA_MTT_SEG_SIZE,
mdev->limits.num_mtt_segs,
mdev->limits.reserved_mtts, 1);
if (!mdev->mr_table.mtt_table) {
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_mr.c 2005-04-01 12:38:22.968974746 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_mr.c 2005-04-01 12:38:25.582407442 -0800
@@ -263,7 +263,7 @@
goto err_out_mpt_free;
}

- for (i = dev->limits.mtt_seg_size / 8, mr->order = 0;
+ for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
i < list_len;
i <<= 1, ++mr->order)
; /* nothing */
@@ -286,7 +286,7 @@
mtt_entry = MAILBOX_ALIGN(mailbox);

mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
- mr->first_seg * dev->limits.mtt_seg_size);
+ mr->first_seg * MTHCA_MTT_SEG_SIZE);
mtt_entry[1] = 0;
for (i = 0; i < list_len; ++i)
mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
@@ -330,7 +330,7 @@
memset(&mpt_entry->lkey, 0,
sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base +
- mr->first_seg * dev->limits.mtt_seg_size);
+ mr->first_seg * MTHCA_MTT_SEG_SIZE);

if (0) {
mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_profile.c 2005-04-01 12:38:21.237350633 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_profile.c 2005-04-01 12:38:25.570410046 -0800
@@ -95,7 +95,7 @@
profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE;
profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE;
profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz;
- profile[MTHCA_RES_MTT].size = dev_lim->mtt_seg_sz;
+ profile[MTHCA_RES_MTT].size = MTHCA_MTT_SEG_SIZE;
profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz;
profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE;
profile[MTHCA_RES_UARC].size = request->uarc_size;
@@ -229,10 +229,9 @@
break;
case MTHCA_RES_MTT:
dev->limits.num_mtt_segs = profile[i].num;
- dev->limits.mtt_seg_size = dev_lim->mtt_seg_sz;
dev->mr_table.mtt_base = profile[i].start;
init_hca->mtt_base = profile[i].start;
- init_hca->mtt_seg_sz = ffs(dev_lim->mtt_seg_sz) - 7;
+ init_hca->mtt_seg_sz = ffs(MTHCA_MTT_SEG_SIZE) - 7;
break;
case MTHCA_RES_UAR:
dev->limits.num_uars = profile[i].num;

2005-04-01 21:45:48

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][17/27] IB/mthca: encapsulate MTT buddy allocator

From: Michael S. Tsirkin <[email protected]>

Encapsulate the buddy allocator used for MTT segments. This cleans up
the code and also gets us ready to add FMR support.

Signed-off-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-01 12:38:26.173279180 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-01 12:38:27.068084943 -0800
@@ -170,10 +170,15 @@
struct mthca_alloc alloc;
};

+struct mthca_buddy {
+ unsigned long **bits;
+ int max_order;
+ spinlock_t lock;
+};
+
struct mthca_mr_table {
struct mthca_alloc mpt_alloc;
- int max_mtt_order;
- unsigned long **mtt_buddy;
+ struct mthca_buddy mtt_buddy;
u64 mtt_base;
struct mthca_icm_table *mtt_table;
struct mthca_icm_table *mpt_table;
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_mr.c 2005-04-01 12:38:25.582407442 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_mr.c 2005-04-01 12:38:27.075083423 -0800
@@ -72,60 +72,108 @@
* through the bitmaps)
*/

-static u32 __mthca_alloc_mtt(struct mthca_dev *dev, int order)
+static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
{
int o;
int m;
u32 seg;

- spin_lock(&dev->mr_table.mpt_alloc.lock);
+ spin_lock(&buddy->lock);

- for (o = order; o <= dev->mr_table.max_mtt_order; ++o) {
- m = 1 << (dev->mr_table.max_mtt_order - o);
- seg = find_first_bit(dev->mr_table.mtt_buddy[o], m);
+ for (o = order; o <= buddy->max_order; ++o) {
+ m = 1 << (buddy->max_order - o);
+ seg = find_first_bit(buddy->bits[o], m);
if (seg < m)
goto found;
}

- spin_unlock(&dev->mr_table.mpt_alloc.lock);
+ spin_unlock(&buddy->lock);
return -1;

found:
- clear_bit(seg, dev->mr_table.mtt_buddy[o]);
+ clear_bit(seg, buddy->bits[o]);

while (o > order) {
--o;
seg <<= 1;
- set_bit(seg ^ 1, dev->mr_table.mtt_buddy[o]);
+ set_bit(seg ^ 1, buddy->bits[o]);
}

- spin_unlock(&dev->mr_table.mpt_alloc.lock);
+ spin_unlock(&buddy->lock);

seg <<= order;

return seg;
}

-static void __mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order)
+static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
{
seg >>= order;

- spin_lock(&dev->mr_table.mpt_alloc.lock);
+ spin_lock(&buddy->lock);

- while (test_bit(seg ^ 1, dev->mr_table.mtt_buddy[order])) {
- clear_bit(seg ^ 1, dev->mr_table.mtt_buddy[order]);
+ while (test_bit(seg ^ 1, buddy->bits[order])) {
+ clear_bit(seg ^ 1, buddy->bits[order]);
seg >>= 1;
++order;
}

- set_bit(seg, dev->mr_table.mtt_buddy[order]);
+ set_bit(seg, buddy->bits[order]);

- spin_unlock(&dev->mr_table.mpt_alloc.lock);
+ spin_unlock(&buddy->lock);
}

-static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order)
+static int __devinit mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
{
- u32 seg = __mthca_alloc_mtt(dev, order);
+ int i, s;
+
+ buddy->max_order = max_order;
+ spin_lock_init(&buddy->lock);
+
+ buddy->bits = kmalloc((buddy->max_order + 1) * sizeof (long *),
+ GFP_KERNEL);
+ if (!buddy->bits)
+ goto err_out;
+
+ memset(buddy->bits, 0, (buddy->max_order + 1) * sizeof (long *));
+
+ for (i = 0; i <= buddy->max_order; ++i) {
+ s = BITS_TO_LONGS(1 << (buddy->max_order - i));
+ buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
+ if (!buddy->bits[i])
+ goto err_out_free;
+ bitmap_zero(buddy->bits[i],
+ 1 << (buddy->max_order - i));
+ }
+
+ set_bit(0, buddy->bits[buddy->max_order]);
+
+ return 0;
+
+err_out_free:
+ for (i = 0; i <= buddy->max_order; ++i)
+ kfree(buddy->bits[i]);
+
+ kfree(buddy->bits);
+
+err_out:
+ return -ENOMEM;
+}
+
+static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy)
+{
+ int i;
+
+ for (i = 0; i <= buddy->max_order; ++i)
+ kfree(buddy->bits[i]);
+
+ kfree(buddy->bits);
+}
+
+static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order,
+ struct mthca_buddy *buddy)
+{
+ u32 seg = mthca_buddy_alloc(buddy, order);

if (seg == -1)
return -1;
@@ -133,16 +181,17 @@
if (dev->hca_type == ARBEL_NATIVE)
if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg,
seg + (1 << order) - 1)) {
- __mthca_free_mtt(dev, seg, order);
+ mthca_buddy_free(buddy, seg, order);
seg = -1;
}

return seg;
}

-static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order)
+static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order,
+ struct mthca_buddy* buddy)
{
- __mthca_free_mtt(dev, seg, order);
+ mthca_buddy_free(buddy, seg, order);

if (dev->hca_type == ARBEL_NATIVE)
mthca_table_put_range(dev, dev->mr_table.mtt_table, seg,
@@ -268,7 +317,8 @@
i <<= 1, ++mr->order)
; /* nothing */

- mr->first_seg = mthca_alloc_mtt(dev, mr->order);
+ mr->first_seg = mthca_alloc_mtt(dev, mr->order,
+ &dev->mr_table.mtt_buddy);
if (mr->first_seg == -1)
goto err_out_table;

@@ -361,7 +411,7 @@
kfree(mailbox);

err_out_free_mtt:
- mthca_free_mtt(dev, mr->first_seg, mr->order);
+ mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy);

err_out_table:
if (dev->hca_type == ARBEL_NATIVE)
@@ -390,7 +440,7 @@
status);

if (mr->order >= 0)
- mthca_free_mtt(dev, mr->first_seg, mr->order);
+ mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy);

if (dev->hca_type == ARBEL_NATIVE)
mthca_table_put(dev, dev->mr_table.mpt_table,
@@ -401,7 +451,6 @@
int __devinit mthca_init_mr_table(struct mthca_dev *dev)
{
int err;
- int i, s;

err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
dev->limits.num_mpts,
@@ -409,53 +458,24 @@
if (err)
return err;

- err = -ENOMEM;
-
- for (i = 1, dev->mr_table.max_mtt_order = 0;
- i < dev->limits.num_mtt_segs;
- i <<= 1, ++dev->mr_table.max_mtt_order)
- ; /* nothing */
-
- dev->mr_table.mtt_buddy = kmalloc((dev->mr_table.max_mtt_order + 1) *
- sizeof (long *),
- GFP_KERNEL);
- if (!dev->mr_table.mtt_buddy)
- goto err_out;
-
- for (i = 0; i <= dev->mr_table.max_mtt_order; ++i)
- dev->mr_table.mtt_buddy[i] = NULL;
-
- for (i = 0; i <= dev->mr_table.max_mtt_order; ++i) {
- s = BITS_TO_LONGS(1 << (dev->mr_table.max_mtt_order - i));
- dev->mr_table.mtt_buddy[i] = kmalloc(s * sizeof (long),
- GFP_KERNEL);
- if (!dev->mr_table.mtt_buddy[i])
- goto err_out_free;
- bitmap_zero(dev->mr_table.mtt_buddy[i],
- 1 << (dev->mr_table.max_mtt_order - i));
- }
-
- set_bit(0, dev->mr_table.mtt_buddy[dev->mr_table.max_mtt_order]);
-
- for (i = 0; i < dev->mr_table.max_mtt_order; ++i)
- if (1 << i >= dev->limits.reserved_mtts)
- break;
+ err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
+ fls(dev->limits.num_mtt_segs - 1));
+ if (err)
+ goto err_mtt_buddy;

- if (i == dev->mr_table.max_mtt_order) {
- mthca_err(dev, "MTT table of order %d is "
- "too small.\n", i);
- goto err_out_free;
+ if (dev->limits.reserved_mtts) {
+ if (mthca_alloc_mtt(dev, fls(dev->limits.reserved_mtts - 1),
+ &dev->mr_table.mtt_buddy) == -1) {
+ mthca_warn(dev, "MTT table of order %d is too small.\n",
+ dev->mr_table.mtt_buddy.max_order);
+ err = -ENOMEM;
+ goto err_mtt_buddy;
+ }
}

- (void) mthca_alloc_mtt(dev, i);
-
return 0;

- err_out_free:
- for (i = 0; i <= dev->mr_table.max_mtt_order; ++i)
- kfree(dev->mr_table.mtt_buddy[i]);
-
- err_out:
+err_mtt_buddy:
mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);

return err;
@@ -463,11 +483,7 @@

void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev)
{
- int i;
-
/* XXX check if any MRs are still allocated? */
- for (i = 0; i <= dev->mr_table.max_mtt_order; ++i)
- kfree(dev->mr_table.mtt_buddy[i]);
- kfree(dev->mr_table.mtt_buddy);
+ mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
}

2005-04-01 20:55:30

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][9/27] IB/mthca: release mutex on doorbell alloc error path

Release mutex on error return path from mthca_alloc_db().

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-04-01 12:38:22.274125578 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-04-01 12:38:23.500859288 -0800
@@ -337,7 +337,8 @@
break;

default:
- return -1;
+ ret = -EINVAL;
+ goto out;
}

for (i = start; i != end; i += dir)

2005-04-01 20:55:29

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][10/27] IB/mthca: print assigned IRQ when interrupt test fails

Print IRQ number when NOP command interrupt test fails to help debugging.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-04-01 12:38:19.884644268 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_main.c 2005-04-01 12:38:23.852782896 -0800
@@ -672,7 +672,10 @@

err = mthca_NOP(dev, &status);
if (err || status) {
- mthca_err(dev, "NOP command failed to generate interrupt, aborting.\n");
+ mthca_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting.\n",
+ dev->mthca_flags & MTHCA_FLAG_MSI_X ?
+ dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector :
+ dev->pdev->irq);
if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X))
mthca_err(dev, "Try again with MSI/MSI-X disabled.\n");
else

2005-04-01 20:55:29

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][11/27] IB/mthca: only free doorbell records in mem-free mode

On error path, only free doorbell records if we're in mem-free mode.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_cq.c 2005-03-31 19:06:42.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_cq.c 2005-04-01 12:38:24.207705852 -0800
@@ -817,10 +817,12 @@
err_out_mailbox:
kfree(mailbox);

- mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
+ if (dev->hca_type == ARBEL_NATIVE)
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);

err_out_ci:
- mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
+ if (dev->hca_type == ARBEL_NATIVE)
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);

err_out_icm:
mthca_table_put(dev, dev->cq_table.table, cq->cqn);

2005-04-01 22:05:58

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][6/27] IB/mthca: allocate correct number of doorbell pages

Doorbell record pages are allocated in HCA page size chunks (always
4096 bytes), so we need to divide by 4096 and not PAGE_SIZE when
figuring out how many pages we'll need space for.

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-04-01 12:38:19.911638409 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-04-01 12:38:22.274125578 -0800
@@ -446,7 +446,7 @@

init_MUTEX(&dev->db_tab->mutex);

- dev->db_tab->npages = dev->uar_table.uarc_size / PAGE_SIZE;
+ dev->db_tab->npages = dev->uar_table.uarc_size / 4096;
dev->db_tab->max_group1 = 0;
dev->db_tab->min_group2 = dev->db_tab->npages - 1;


2005-04-01 22:06:00

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][4/27] IB/mthca: fix posting sends with immediate data

When posting a work request with immediate data, put the immediate
data in the immediate data field of the hardware's work request
(rather than overwriting the flags field).

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_qp.c 2005-03-31 19:06:41.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-01 12:38:21.580276194 -0800
@@ -1465,7 +1465,7 @@
cpu_to_be32(1);
if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- ((struct mthca_next_seg *) wqe)->flags = wr->imm_data;
+ ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;

wqe += sizeof (struct mthca_next_seg);
size = sizeof (struct mthca_next_seg) / 16;
@@ -1769,7 +1769,7 @@
cpu_to_be32(1);
if (wr->opcode == IB_WR_SEND_WITH_IMM ||
wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- ((struct mthca_next_seg *) wqe)->flags = wr->imm_data;
+ ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;

wqe += sizeof (struct mthca_next_seg);
size = sizeof (struct mthca_next_seg) / 16;

2005-04-01 22:06:01

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][3/27] IB/mthca: fix calculation of RDB shift

Fix calculation of rdb_shift by using original number of QPs, not
their slot in profile[] (which will be rearranged when we sort it).

Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_profile.c 2005-03-31 19:07:14.000000000 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_profile.c 2005-04-01 12:38:21.237350633 -0800
@@ -208,8 +208,7 @@
break;
case MTHCA_RES_RDB:
for (dev->qp_table.rdb_shift = 0;
- profile[MTHCA_RES_QP].num << dev->qp_table.rdb_shift <
- profile[i].num;
+ request->num_qp << dev->qp_table.rdb_shift < profile[i].num;
++dev->qp_table.rdb_shift)
; /* nothing */
dev->qp_table.rdb_base = (u32) profile[i].start;

2005-04-01 22:05:59

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][5/27] IB/mthca: allow unaligned memory regions

From: Michael S. Tsirkin <[email protected]>

The first buffer of a memory region is not required to be
page-aligned, so don't return an error if it's not.

Signed-off-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-01 12:38:20.839437009 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-01 12:38:21.926201103 -0800
@@ -494,7 +494,7 @@
mask = 0;
total_size = 0;
for (i = 0; i < num_phys_buf; ++i) {
- if (buffer_list[i].addr & ~PAGE_MASK)
+ if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
return ERR_PTR(-EINVAL);
if (i != 0 && i != num_phys_buf - 1 &&
(buffer_list[i].size & ~PAGE_MASK))

2005-04-01 22:14:18

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][26.5/27] Add MT25204 PCI IDs

Ugh, this patch is required to build support for the new Mellanox
HCAs. Greg K-H applied it to his tree a while ago but it hasn't made
it to Linus yet.

Sorry,
Roland

Add PCI device IDs for new Mellanox MT25204 "Sinai" InfiniHost III Lx HCA.

Signed-off-by: Roland Dreier <[email protected]>

--- linux-export.orig/include/linux/pci_ids.h 2005-03-31 19:07:14.000000000 -0800
+++ linux-export/include/linux/pci_ids.h 2005-04-01 14:03:16.468519075 -0800
@@ -2122,6 +2122,8 @@
#define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44
#define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT 0x6278
#define PCI_DEVICE_ID_MELLANOX_ARBEL 0x6282
+#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
+#define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274

#define PCI_VENDOR_ID_PDC 0x15e9
#define PCI_DEVICE_ID_PDC_1841 0x1841

2005-04-08 00:10:47

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH][26.5/27] Add MT25204 PCI IDs

On Fri, Apr 01, 2005 at 02:06:50PM -0800, Roland Dreier wrote:
> Ugh, this patch is required to build support for the new Mellanox
> HCAs. Greg K-H applied it to his tree a while ago but it hasn't made
> it to Linus yet.
>
> Sorry,
> Roland
>
> Add PCI device IDs for new Mellanox MT25204 "Sinai" InfiniHost III Lx HCA.
>
> Signed-off-by: Roland Dreier <[email protected]>

Already in 2.6.12-rc2.

thanks,

greg k-h