2005-01-24 06:14:28

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][0/12] InfiniBand: updates for 2.6.11-rc2

Here are updates since the last merge of drivers/infiniband taken from
the OpenIB repository. A couple small fixes, the addition of "issm"
device support to allow userspace to set the IsSM port capability bit,
and a bunch of mthca driver improvements. There shouldn't be anything
risky (and it's all confined to drivers/infiniband).

Thanks,
Roland


2005-01-24 06:14:37

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][1/12] InfiniBand/core: compat_ioctl conversion minor fixes

Slightly tidy up Andi Kleen's compat_ioctl conversion for the
InfiniBand MAD driver by removing the no-longer-needed include of
ioctl32.h, killing unreachable code and doing some really anal
whitespace fixing.

Signed-off-by: Roland Dreier <[email protected]>

--- linux-bk.orig/drivers/infiniband/core/user_mad.c 2005-01-23 21:48:45.709546808 -0800
+++ linux-bk/drivers/infiniband/core/user_mad.c 2005-01-23 21:49:32.872376968 -0800
@@ -43,7 +43,6 @@
#include <linux/poll.h>
#include <linux/rwsem.h>
#include <linux/kref.h>
-#include <linux/ioctl32.h>

#include <asm/uaccess.h>

@@ -502,14 +501,14 @@
}

static struct file_operations umad_fops = {
- .owner = THIS_MODULE,
- .read = ib_umad_read,
- .write = ib_umad_write,
- .poll = ib_umad_poll,
+ .owner = THIS_MODULE,
+ .read = ib_umad_read,
+ .write = ib_umad_write,
+ .poll = ib_umad_poll,
.unlocked_ioctl = ib_umad_ioctl,
- .compat_ioctl = ib_umad_ioctl,
- .open = ib_umad_open,
- .release = ib_umad_close
+ .compat_ioctl = ib_umad_ioctl,
+ .open = ib_umad_open,
+ .release = ib_umad_close
};

static struct ib_client umad_client = {
@@ -705,8 +704,6 @@

return 0;

- ib_unregister_client(&umad_client);
-
out_class:
class_unregister(&umad_class);


2005-01-24 06:20:07

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][4/12] InfiniBand/core: fix port capability enums bit order

Correct defines of port capability mask enum values (bits were ordered
backwards) and add new capability bits from IBA spec version 1.2.

Signed-off-by: Roland Dreier <[email protected]>

--- linux-bk.orig/drivers/infiniband/include/ib_verbs.h 2005-01-23 08:30:22.000000000 -0800
+++ linux-bk/drivers/infiniband/include/ib_verbs.h 2005-01-23 20:46:23.606432952 -0800
@@ -154,25 +154,28 @@
};

enum ib_port_cap_flags {
- IB_PORT_SM = (1<<31),
- IB_PORT_NOTICE_SUP = (1<<30),
- IB_PORT_TRAP_SUP = (1<<29),
- IB_PORT_AUTO_MIGR_SUP = (1<<27),
- IB_PORT_SL_MAP_SUP = (1<<26),
- IB_PORT_MKEY_NVRAM = (1<<25),
- IB_PORT_PKEY_NVRAM = (1<<24),
- IB_PORT_LED_INFO_SUP = (1<<23),
- IB_PORT_SM_DISABLED = (1<<22),
- IB_PORT_SYS_IMAGE_GUID_SUP = (1<<21),
- IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = (1<<20),
- IB_PORT_CM_SUP = (1<<16),
- IB_PORT_SNMP_TUNNEL_SUP = (1<<15),
- IB_PORT_REINIT_SUP = (1<<14),
- IB_PORT_DEVICE_MGMT_SUP = (1<<13),
- IB_PORT_VENDOR_CLASS_SUP = (1<<12),
- IB_PORT_DR_NOTICE_SUP = (1<<11),
- IB_PORT_PORT_NOTICE_SUP = (1<<10),
- IB_PORT_BOOT_MGMT_SUP = (1<<9)
+ IB_PORT_SM = 1 << 1,
+ IB_PORT_NOTICE_SUP = 1 << 2,
+ IB_PORT_TRAP_SUP = 1 << 3,
+ IB_PORT_OPT_IPD_SUP = 1 << 4,
+ IB_PORT_AUTO_MIGR_SUP = 1 << 5,
+ IB_PORT_SL_MAP_SUP = 1 << 6,
+ IB_PORT_MKEY_NVRAM = 1 << 7,
+ IB_PORT_PKEY_NVRAM = 1 << 8,
+ IB_PORT_LED_INFO_SUP = 1 << 9,
+ IB_PORT_SM_DISABLED = 1 << 10,
+ IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
+ IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+ IB_PORT_CM_SUP = 1 << 16,
+ IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
+ IB_PORT_REINIT_SUP = 1 << 18,
+ IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
+ IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
+ IB_PORT_DR_NOTICE_SUP = 1 << 21,
+ IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
+ IB_PORT_BOOT_MGMT_SUP = 1 << 23,
+ IB_PORT_LINK_LATENCY_SUP = 1 << 24,
+ IB_PORT_CLIENT_REG_SUP = 1 << 25
};

enum ib_port_width {

2005-01-24 06:19:55

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][2/12] InfiniBand/mthca: more Arbel Mem-Free support

Continue development of Arbel Mem-Free support: we now compute a valid
profile, allocate context memory, map sufficient aux memory for HCA
page tables, map sufficient context memory to cover all reserved
firmware resources and successfully call the INIT_HCA and
QUERY_ADAPTER firmware commands. Fix a few error gotos that unwound
the wrong things.

Signed-off-by: Roland Dreier <[email protected]>

--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_cmd.h 2005-01-23 08:30:23.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_cmd.h 2005-01-23 20:26:07.036379712 -0800
@@ -174,27 +174,30 @@

struct mthca_init_hca_param {
u64 qpc_base;
- u8 log_num_qps;
u64 eec_base;
- u8 log_num_eecs;
u64 srqc_base;
- u8 log_num_srqs;
u64 cqc_base;
- u8 log_num_cqs;
u64 eqpc_base;
u64 eeec_base;
u64 eqc_base;
- u8 log_num_eqs;
u64 rdb_base;
u64 mc_base;
+ u64 mpt_base;
+ u64 mtt_base;
+ u64 uar_scratch_base;
+ u64 uarc_base;
u16 log_mc_entry_sz;
u16 mc_hash_sz;
+ u8 log_num_qps;
+ u8 log_num_eecs;
+ u8 log_num_srqs;
+ u8 log_num_cqs;
+ u8 log_num_eqs;
u8 log_mc_table_sz;
- u64 mpt_base;
u8 mtt_seg_sz;
u8 log_mpt_sz;
- u64 mtt_base;
- u64 uar_scratch_base;
+ u8 log_uar_sz;
+ u8 log_uarc_sz;
};

struct mthca_init_ib_param {
@@ -238,6 +241,13 @@
int port, u8 *status);
int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status);
int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status);
+int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status);
+int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status);
+int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status);
+int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
+int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status);
+int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
+ u8 *status);
int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
int mpt_index, u8 *status);
int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry,
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_profile.c 2005-01-23 08:32:07.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_profile.c 2005-01-23 20:26:07.033380168 -0800
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -60,7 +60,7 @@
MTHCA_NUM_PDS = 1 << 15
};

-int mthca_make_profile(struct mthca_dev *dev,
+u64 mthca_make_profile(struct mthca_dev *dev,
struct mthca_profile *request,
struct mthca_dev_lim *dev_lim,
struct mthca_init_hca_param *init_hca)
@@ -116,6 +116,8 @@
profile[i].type = i;
profile[i].log_num = max(ffs(profile[i].num) - 1, 0);
profile[i].size *= profile[i].num;
+ if (dev->hca_type == ARBEL_NATIVE)
+ profile[i].size = max(profile[i].size, (u64) PAGE_SIZE);
}

if (dev->hca_type == ARBEL_NATIVE) {
@@ -239,6 +241,10 @@
case MTHCA_RES_UDAV:
dev->av_table.ddr_av_base = profile[i].start;
dev->av_table.num_ddr_avs = profile[i].num;
+ case MTHCA_RES_UARC:
+ init_hca->uarc_base = profile[i].start;
+ init_hca->log_uarc_sz = ffs(request->uarc_size) - 13;
+ init_hca->log_uar_sz = ffs(request->num_uar) - 1;
default:
break;
}
@@ -251,5 +257,5 @@
dev->limits.num_pds = MTHCA_NUM_PDS;

kfree(profile);
- return 0;
+ return total_size;
}
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-01-23 08:30:22.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-01-23 20:26:07.037379560 -0800
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -34,6 +34,16 @@

#include "mthca_memfree.h"
#include "mthca_dev.h"
+#include "mthca_cmd.h"
+
+/*
+ * We allocate in as big chunks as we can, up to a maximum of 256 KB
+ * per chunk.
+ */
+enum {
+ MTHCA_ICM_ALLOC_SIZE = 1 << 18,
+ MTHCA_TABLE_CHUNK_SIZE = 1 << 18
+};

void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
{
@@ -71,11 +81,7 @@

INIT_LIST_HEAD(&icm->chunk_list);

- /*
- * We allocate in as big chunks as we can, up to a maximum of
- * 256 KB per chunk.
- */
- cur_order = get_order(1 << 18);
+ cur_order = get_order(MTHCA_ICM_ALLOC_SIZE);

while (npages > 0) {
if (!chunk) {
@@ -131,3 +137,70 @@
mthca_free_icm(dev, icm);
return NULL;
}
+
+struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
+ u64 virt, unsigned size,
+ unsigned reserved,
+ int use_lowmem)
+{
+ struct mthca_icm_table *table;
+ int num_icm;
+ int i;
+ u8 status;
+
+ num_icm = size / MTHCA_TABLE_CHUNK_SIZE;
+
+ table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
+ if (!table)
+ return NULL;
+
+ table->virt = virt;
+ table->num_icm = num_icm;
+ init_MUTEX(&table->sem);
+
+ for (i = 0; i < num_icm; ++i)
+ table->icm[i] = NULL;
+
+ for (i = 0; i < (reserved + MTHCA_TABLE_CHUNK_SIZE - 1) / MTHCA_TABLE_CHUNK_SIZE; ++i) {
+ table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
+ (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
+ __GFP_NOWARN);
+ if (!table->icm[i])
+ goto err;
+ if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ &status) || status) {
+ mthca_free_icm(dev, table->icm[i]);
+ table->icm[i] = NULL;
+ goto err;
+ }
+ }
+
+ return table;
+
+err:
+ for (i = 0; i < num_icm; ++i)
+ if (table->icm[i]) {
+ mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
+ mthca_free_icm(dev, table->icm[i]);
+ }
+
+ kfree(table);
+
+ return NULL;
+}
+
+void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
+{
+ int i;
+ u8 status;
+
+ for (i = 0; i < table->num_icm; ++i)
+ if (table->icm[i]) {
+ mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
+ mthca_free_icm(dev, table->icm[i]);
+ }
+
+ kfree(table);
+}
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_eq.c 2005-01-23 08:30:57.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_eq.c 2005-01-23 20:26:07.036379712 -0800
@@ -574,6 +574,50 @@
dev->eq_table.eq + i);
}

+int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
+{
+ int ret;
+ u8 status;
+
+ /*
+ * We assume that mapping one page is enough for the whole EQ
+ * context table. This is fine with all current HCAs, because
+ * we only use 32 EQs and each EQ uses 32 bytes of context
+ * memory, or 1 KB total.
+ */
+ dev->eq_table.icm_virt = icm_virt;
+ dev->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
+ if (!dev->eq_table.icm_page)
+ return -ENOMEM;
+ dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ if (pci_dma_mapping_error(dev->eq_table.icm_dma)) {
+ __free_page(dev->eq_table.icm_page);
+ return -ENOMEM;
+ }
+
+ ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt, &status);
+ if (!ret && status)
+ ret = -EINVAL;
+ if (ret) {
+ pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+ __free_page(dev->eq_table.icm_page);
+ }
+
+ return ret;
+}
+
+void __devexit mthca_unmap_eq_icm(struct mthca_dev *dev)
+{
+ u8 status;
+
+ mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status);
+ pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+ __free_page(dev->eq_table.icm_page);
+}
+
int __devinit mthca_init_eq_table(struct mthca_dev *dev)
{
int err;
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-01-23 08:30:33.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_main.c 2005-01-23 20:26:07.032380320 -0800
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -82,12 +82,9 @@
.num_cq = 1 << 16,
.num_mcg = 1 << 13,
.num_mpt = 1 << 17,
- .num_mtt = 1 << 20
-};
-
-enum {
- MTHCA_TAVOR_NUM_UDAV = 1 << 15,
- MTHCA_ARBEL_UARC_SIZE = 1 << 18
+ .num_mtt = 1 << 20,
+ .num_udav = 1 << 15, /* Tavor only */
+ .uarc_size = 1 << 18, /* Arbel only */
};

static int __devinit mthca_tune_pci(struct mthca_dev *mdev)
@@ -207,58 +204,58 @@
err = mthca_QUERY_FW(mdev, &status);
if (err) {
mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
- goto err_out_disable;
+ goto err_disable;
}
if (status) {
mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
"aborting.\n", status);
err = -EINVAL;
- goto err_out_disable;
+ goto err_disable;
}
err = mthca_QUERY_DDR(mdev, &status);
if (err) {
mthca_err(mdev, "QUERY_DDR command failed, aborting.\n");
- goto err_out_disable;
+ goto err_disable;
}
if (status) {
mthca_err(mdev, "QUERY_DDR returned status 0x%02x, "
"aborting.\n", status);
err = -EINVAL;
- goto err_out_disable;
+ goto err_disable;
}

err = mthca_dev_lim(mdev, &dev_lim);

profile = default_profile;
- profile.num_uar = dev_lim.uar_size / PAGE_SIZE;
- profile.num_udav = MTHCA_TAVOR_NUM_UDAV;
+ profile.num_uar = dev_lim.uar_size / PAGE_SIZE;
+ profile.uarc_size = 0;

err = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
- if (err)
- goto err_out_disable;
+ if (err < 0)
+ goto err_disable;

err = mthca_INIT_HCA(mdev, &init_hca, &status);
if (err) {
mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
- goto err_out_disable;
+ goto err_disable;
}
if (status) {
mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
"aborting.\n", status);
err = -EINVAL;
- goto err_out_disable;
+ goto err_disable;
}

err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
if (err) {
mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
- goto err_out_disable;
+ goto err_close;
}
if (status) {
mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
"aborting.\n", status);
err = -EINVAL;
- goto err_out_close;
+ goto err_close;
}

mdev->eq_table.inta_pin = adapter.inta_pin;
@@ -266,10 +263,10 @@

return 0;

-err_out_close:
+err_close:
mthca_CLOSE_HCA(mdev, 0, &status);

-err_out_disable:
+err_disable:
mthca_SYS_DIS(mdev, &status);

return err;
@@ -282,15 +279,15 @@

/* FIXME: use HCA-attached memory for FW if present */

- mdev->fw.arbel.icm =
+ mdev->fw.arbel.fw_icm =
mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages,
GFP_HIGHUSER | __GFP_NOWARN);
- if (!mdev->fw.arbel.icm) {
+ if (!mdev->fw.arbel.fw_icm) {
mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
return -ENOMEM;
}

- err = mthca_MAP_FA(mdev, mdev->fw.arbel.icm, &status);
+ err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm, &status);
if (err) {
mthca_err(mdev, "MAP_FA command failed, aborting.\n");
goto err_free;
@@ -317,13 +314,146 @@
mthca_UNMAP_FA(mdev, &status);

err_free:
- mthca_free_icm(mdev, mdev->fw.arbel.icm);
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
+ return err;
+}
+
+static int __devinit mthca_init_icm(struct mthca_dev *mdev,
+ struct mthca_dev_lim *dev_lim,
+ struct mthca_init_hca_param *init_hca,
+ u64 icm_size)
+{
+ u64 aux_pages;
+ u8 status;
+ int err;
+
+ err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages, &status);
+ if (err) {
+ mthca_err(mdev, "SET_ICM_SIZE command failed, aborting.\n");
+ return err;
+ }
+ if (status) {
+ mthca_err(mdev, "SET_ICM_SIZE returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+
+ mthca_dbg(mdev, "%lld KB of HCA context requires %lld KB aux memory.\n",
+ (unsigned long long) icm_size >> 10,
+ (unsigned long long) aux_pages << 2);
+
+ mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, aux_pages,
+ GFP_HIGHUSER | __GFP_NOWARN);
+ if (!mdev->fw.arbel.aux_icm) {
+ mthca_err(mdev, "Couldn't allocate aux memory, aborting.\n");
+ return -ENOMEM;
+ }
+
+ err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm, &status);
+ if (err) {
+ mthca_err(mdev, "MAP_ICM_AUX command failed, aborting.\n");
+ goto err_free_aux;
+ }
+ if (status) {
+ mthca_err(mdev, "MAP_ICM_AUX returned status 0x%02x, aborting.\n", status);
+ err = -EINVAL;
+ goto err_free_aux;
+ }
+
+ err = mthca_map_eq_icm(mdev, init_hca->eqc_base);
+ if (err) {
+ mthca_err(mdev, "Failed to map EQ context memory, aborting.\n");
+ goto err_unmap_aux;
+ }
+
+ mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
+ mdev->limits.num_mtt_segs *
+ init_hca->mtt_seg_sz,
+ mdev->limits.reserved_mtts *
+ init_hca->mtt_seg_sz, 1);
+ if (!mdev->mr_table.mtt_table) {
+ mthca_err(mdev, "Failed to map MTT context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_eq;
+ }
+
+ mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base,
+ mdev->limits.num_mpts *
+ dev_lim->mpt_entry_sz,
+ mdev->limits.reserved_mrws *
+ dev_lim->mpt_entry_sz, 1);
+ if (!mdev->mr_table.mpt_table) {
+ mthca_err(mdev, "Failed to map MPT context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_mtt;
+ }
+
+ mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base,
+ mdev->limits.num_qps *
+ dev_lim->qpc_entry_sz,
+ mdev->limits.reserved_qps *
+ dev_lim->qpc_entry_sz, 1);
+ if (!mdev->qp_table.qp_table) {
+ mthca_err(mdev, "Failed to map QP context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_mpt;
+ }
+
+ mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base,
+ mdev->limits.num_qps *
+ dev_lim->eqpc_entry_sz,
+ mdev->limits.reserved_qps *
+ dev_lim->eqpc_entry_sz, 1);
+ if (!mdev->qp_table.eqp_table) {
+ mthca_err(mdev, "Failed to map EQP context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_qp;
+ }
+
+ mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
+ mdev->limits.num_cqs *
+ dev_lim->cqc_entry_sz,
+ mdev->limits.reserved_cqs *
+ dev_lim->cqc_entry_sz, 1);
+ if (!mdev->cq_table.table) {
+ mthca_err(mdev, "Failed to map CQ context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_eqp;
+ }
+
+ return 0;
+
+err_unmap_eqp:
+ mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
+
+err_unmap_qp:
+ mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
+
+err_unmap_mpt:
+ mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
+
+err_unmap_mtt:
+ mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
+
+err_unmap_eq:
+ mthca_unmap_eq_icm(mdev);
+
+err_unmap_aux:
+ mthca_UNMAP_ICM_AUX(mdev, &status);
+
+err_free_aux:
+ mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+
return err;
}

static int __devinit mthca_init_arbel(struct mthca_dev *mdev)
{
- struct mthca_dev_lim dev_lim;
+ struct mthca_dev_lim dev_lim;
+ struct mthca_profile profile;
+ struct mthca_init_hca_param init_hca;
+ struct mthca_adapter adapter;
+ u64 icm_size;
u8 status;
int err;

@@ -355,26 +485,77 @@
err = mthca_load_fw(mdev);
if (err) {
mthca_err(mdev, "Failed to start FW, aborting.\n");
- goto err_out_disable;
+ goto err_disable;
}

err = mthca_dev_lim(mdev, &dev_lim);
if (err) {
mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
- goto err_out_stop_fw;
+ goto err_stop_fw;
}

- mthca_warn(mdev, "Sorry, native MT25208 mode support is not done, "
- "aborting.\n");
- err = -ENODEV;
+ profile = default_profile;
+ profile.num_uar = dev_lim.uar_size / PAGE_SIZE;
+ profile.num_udav = 0;

-err_out_stop_fw:
+ icm_size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
+ if ((int) icm_size < 0) {
+ err = icm_size;
+ goto err_stop_fw;
+ }
+
+ err = mthca_init_icm(mdev, &dev_lim, &init_hca, icm_size);
+ if (err)
+ goto err_stop_fw;
+
+ err = mthca_INIT_HCA(mdev, &init_hca, &status);
+ if (err) {
+ mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
+ goto err_free_icm;
+ }
+ if (status) {
+ mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_free_icm;
+ }
+
+ err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
+ goto err_free_icm;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_free_icm;
+ }
+
+ mdev->eq_table.inta_pin = adapter.inta_pin;
+ mdev->rev_id = adapter.revision_id;
+
+ return 0;
+
+err_free_icm:
+ mthca_free_icm_table(mdev, mdev->cq_table.table);
+ mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
+ mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
+ mthca_unmap_eq_icm(mdev);
+
+ mthca_UNMAP_ICM_AUX(mdev, &status);
+ mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+
+err_stop_fw:
mthca_UNMAP_FA(mdev, &status);
- mthca_free_icm(mdev, mdev->fw.arbel.icm);
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);

-err_out_disable:
+err_disable:
if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
mthca_DISABLE_LAM(mdev, &status);
+
return err;
}

@@ -403,82 +584,89 @@
if (err) {
mthca_err(dev, "Failed to initialize "
"memory region table, aborting.\n");
- goto err_out_pd_table_free;
+ goto err_pd_table_free;
}

err = mthca_pd_alloc(dev, &dev->driver_pd);
if (err) {
mthca_err(dev, "Failed to create driver PD, "
"aborting.\n");
- goto err_out_mr_table_free;
+ goto err_mr_table_free;
+ }
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ mthca_warn(dev, "Sorry, native MT25208 mode support is not done, "
+ "aborting.\n");
+ err = -ENODEV;
+ goto err_pd_free;
}

err = mthca_init_eq_table(dev);
if (err) {
mthca_err(dev, "Failed to initialize "
"event queue table, aborting.\n");
- goto err_out_pd_free;
+ goto err_pd_free;
}

err = mthca_cmd_use_events(dev);
if (err) {
mthca_err(dev, "Failed to switch to event-driven "
"firmware commands, aborting.\n");
- goto err_out_eq_table_free;
+ goto err_eq_table_free;
}

err = mthca_init_cq_table(dev);
if (err) {
mthca_err(dev, "Failed to initialize "
"completion queue table, aborting.\n");
- goto err_out_cmd_poll;
+ goto err_cmd_poll;
}

err = mthca_init_qp_table(dev);
if (err) {
mthca_err(dev, "Failed to initialize "
"queue pair table, aborting.\n");
- goto err_out_cq_table_free;
+ goto err_cq_table_free;
}

err = mthca_init_av_table(dev);
if (err) {
mthca_err(dev, "Failed to initialize "
"address vector table, aborting.\n");
- goto err_out_qp_table_free;
+ goto err_qp_table_free;
}

err = mthca_init_mcg_table(dev);
if (err) {
mthca_err(dev, "Failed to initialize "
"multicast group table, aborting.\n");
- goto err_out_av_table_free;
+ goto err_av_table_free;
}

return 0;

-err_out_av_table_free:
+err_av_table_free:
mthca_cleanup_av_table(dev);

-err_out_qp_table_free:
+err_qp_table_free:
mthca_cleanup_qp_table(dev);

-err_out_cq_table_free:
+err_cq_table_free:
mthca_cleanup_cq_table(dev);

-err_out_cmd_poll:
+err_cmd_poll:
mthca_cmd_use_polling(dev);

-err_out_eq_table_free:
+err_eq_table_free:
mthca_cleanup_eq_table(dev);

-err_out_pd_free:
+err_pd_free:
mthca_pd_free(dev, &dev->driver_pd);

-err_out_mr_table_free:
+err_mr_table_free:
mthca_cleanup_mr_table(dev);

-err_out_pd_table_free:
+err_pd_table_free:
mthca_cleanup_pd_table(dev);
return err;
}
@@ -507,32 +695,32 @@
MTHCA_CLR_INT_SIZE,
DRV_NAME)) {
err = -EBUSY;
- goto err_out_bar0_beg;
+ goto err_bar0_beg;
}

err = pci_request_region(pdev, 2, DRV_NAME);
if (err)
- goto err_out_bar0_end;
+ goto err_bar0_end;

if (!ddr_hidden) {
err = pci_request_region(pdev, 4, DRV_NAME);
if (err)
- goto err_out_bar2;
+ goto err_bar2;
}

return 0;

-err_out_bar0_beg:
+err_bar0_beg:
release_mem_region(pci_resource_start(pdev, 0) +
MTHCA_HCR_BASE,
MTHCA_MAP_HCR_SIZE);

-err_out_bar0_end:
+err_bar0_end:
release_mem_region(pci_resource_start(pdev, 0) +
MTHCA_CLR_INT_BASE,
MTHCA_CLR_INT_SIZE);

-err_out_bar2:
+err_bar2:
pci_release_region(pdev, 2);
return err;
}
@@ -582,8 +770,18 @@
mthca_CLOSE_HCA(mdev, 0, &status);

if (mdev->hca_type == ARBEL_NATIVE) {
+ mthca_free_icm_table(mdev, mdev->cq_table.table);
+ mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
+ mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
+ mthca_unmap_eq_icm(mdev);
+
+ mthca_UNMAP_ICM_AUX(mdev, &status);
+ mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+
mthca_UNMAP_FA(mdev, &status);
- mthca_free_icm(mdev, mdev->fw.arbel.icm);
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);

if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
mthca_DISABLE_LAM(mdev, &status);
@@ -623,13 +821,13 @@
pci_resource_len(pdev, 0) != 1 << 20) {
dev_err(&pdev->dev, "Missing DCS, aborting.");
err = -ENODEV;
- goto err_out_disable_pdev;
+ goto err_disable_pdev;
}
if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) ||
pci_resource_len(pdev, 2) != 1 << 23) {
dev_err(&pdev->dev, "Missing UAR, aborting.");
err = -ENODEV;
- goto err_out_disable_pdev;
+ goto err_disable_pdev;
}
if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM))
ddr_hidden = 1;
@@ -638,7 +836,7 @@
if (err) {
dev_err(&pdev->dev, "Cannot obtain PCI resources, "
"aborting.\n");
- goto err_out_disable_pdev;
+ goto err_disable_pdev;
}

pci_set_master(pdev);
@@ -649,7 +847,7 @@
err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
if (err) {
dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
- goto err_out_free_res;
+ goto err_free_res;
}
}
err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
@@ -660,7 +858,7 @@
if (err) {
dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
"aborting.\n");
- goto err_out_free_res;
+ goto err_free_res;
}
}

@@ -669,7 +867,7 @@
dev_err(&pdev->dev, "Device struct alloc failed, "
"aborting.\n");
err = -ENOMEM;
- goto err_out_free_res;
+ goto err_free_res;
}

mdev->pdev = pdev;
@@ -686,7 +884,7 @@
err = mthca_reset(mdev);
if (err) {
mthca_err(mdev, "Failed to reset HCA, aborting.\n");
- goto err_out_free_dev;
+ goto err_free_dev;
}

if (msi_x && !mthca_enable_msi_x(mdev))
@@ -705,7 +903,7 @@
mthca_err(mdev, "Couldn't map command register, "
"aborting.\n");
err = -ENOMEM;
- goto err_out_free_dev;
+ goto err_free_dev;
}
mdev->clr_base = ioremap(mthca_base + MTHCA_CLR_INT_BASE,
MTHCA_CLR_INT_SIZE);
@@ -713,7 +911,7 @@
mthca_err(mdev, "Couldn't map command register, "
"aborting.\n");
err = -ENOMEM;
- goto err_out_iounmap;
+ goto err_iounmap;
}

mthca_base = pci_resource_start(pdev, 2);
@@ -722,37 +920,37 @@
mthca_err(mdev, "Couldn't map kernel access region, "
"aborting.\n");
err = -ENOMEM;
- goto err_out_iounmap_clr;
+ goto err_iounmap_clr;
}

err = mthca_tune_pci(mdev);
if (err)
- goto err_out_iounmap_kar;
+ goto err_iounmap_kar;

err = mthca_init_hca(mdev);
if (err)
- goto err_out_iounmap_kar;
+ goto err_iounmap_kar;

err = mthca_setup_hca(mdev);
if (err)
- goto err_out_close;
+ goto err_close;

err = mthca_register_device(mdev);
if (err)
- goto err_out_cleanup;
+ goto err_cleanup;

err = mthca_create_agents(mdev);
if (err)
- goto err_out_unregister;
+ goto err_unregister;

pci_set_drvdata(pdev, mdev);

return 0;

-err_out_unregister:
+err_unregister:
mthca_unregister_device(mdev);

-err_out_cleanup:
+err_cleanup:
mthca_cleanup_mcg_table(mdev);
mthca_cleanup_av_table(mdev);
mthca_cleanup_qp_table(mdev);
@@ -765,19 +963,19 @@
mthca_cleanup_mr_table(mdev);
mthca_cleanup_pd_table(mdev);

-err_out_close:
+err_close:
mthca_close_hca(mdev);

-err_out_iounmap_kar:
+err_iounmap_kar:
iounmap(mdev->kar);

-err_out_iounmap_clr:
+err_iounmap_clr:
iounmap(mdev->clr_base);

-err_out_iounmap:
+err_iounmap:
iounmap(mdev->hcr);

-err_out_free_dev:
+err_free_dev:
if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
pci_disable_msix(pdev);
if (mdev->mthca_flags & MTHCA_FLAG_MSI)
@@ -785,10 +983,10 @@

ib_dealloc_device(&mdev->ib_dev);

-err_out_free_res:
+err_free_res:
mthca_release_regions(pdev, ddr_hidden);

-err_out_disable_pdev:
+err_disable_pdev:
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
return err;
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_profile.h 2005-01-23 08:31:33.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_profile.h 2005-01-23 20:26:07.035379864 -0800
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -50,7 +50,7 @@
int uarc_size;
};

-int mthca_make_profile(struct mthca_dev *mdev,
+u64 mthca_make_profile(struct mthca_dev *mdev,
struct mthca_profile *request,
struct mthca_dev_lim *dev_lim,
struct mthca_init_hca_param *init_hca);
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-01-23 08:31:13.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-01-23 20:26:07.034380016 -0800
@@ -509,7 +509,8 @@
return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, HZ, status);
}

-int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status)
+static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
+ u64 virt, u8 *status)
{
u32 *inbox;
dma_addr_t indma;
@@ -518,12 +519,17 @@
int nent = 0;
int i;
int err = 0;
- int ts = 0;
+ int ts = 0, tc = 0;

inbox = pci_alloc_consistent(dev->pdev, PAGE_SIZE, &indma);
+ if (!inbox)
+ return -ENOMEM;
+
memset(inbox, 0, PAGE_SIZE);

- for (mthca_icm_first(icm, &iter); !mthca_icm_last(&iter); mthca_icm_next(&iter)) {
+ for (mthca_icm_first(icm, &iter);
+ !mthca_icm_last(&iter);
+ mthca_icm_next(&iter)) {
/*
* We have to pass pages that are aligned to their
* size, so find the least significant 1 in the
@@ -538,13 +544,20 @@
goto out;
}
for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) {
+ if (virt != -1) {
+ *((__be64 *) (inbox + nent * 4)) =
+ cpu_to_be64(virt);
+ virt += 1 << lg;
+ }
+
*((__be64 *) (inbox + nent * 4 + 2)) =
cpu_to_be64((mthca_icm_addr(&iter) +
- (i << lg)) |
- (lg - 12));
+ (i << lg)) | (lg - 12));
ts += 1 << (lg - 10);
+ ++tc;
+
if (nent == PAGE_SIZE / 16) {
- err = mthca_cmd(dev, indma, nent, 0, CMD_MAP_FA,
+ err = mthca_cmd(dev, indma, nent, 0, op,
CMD_TIME_CLASS_B, status);
if (err || *status)
goto out;
@@ -553,18 +566,33 @@
}
}

- if (nent) {
- err = mthca_cmd(dev, indma, nent, 0, CMD_MAP_FA,
+ if (nent)
+ err = mthca_cmd(dev, indma, nent, 0, op,
CMD_TIME_CLASS_B, status);
- }

- mthca_dbg(dev, "Mapped %d KB of host memory for FW.\n", ts);
+ switch (op) {
+ case CMD_MAP_FA:
+ mthca_dbg(dev, "Mapped %d chunks/%d KB for FW.\n", tc, ts);
+ break;
+ case CMD_MAP_ICM_AUX:
+ mthca_dbg(dev, "Mapped %d chunks/%d KB for ICM aux.\n", tc, ts);
+ break;
+ case CMD_MAP_ICM:
+ mthca_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM.\n",
+ tc, ts, (unsigned long long) virt - (ts << 10));
+ break;
+ }

out:
pci_free_consistent(dev->pdev, PAGE_SIZE, inbox, indma);
return err;
}

+int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status)
+{
+ return mthca_map_cmd(dev, CMD_MAP_FA, icm, -1, status);
+}
+
int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status)
{
return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_FA, CMD_TIME_CLASS_B, status);
@@ -1068,8 +1096,11 @@
#define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10)
#define INIT_HCA_UAR_OFFSET 0x120
#define INIT_HCA_UAR_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x00)
+#define INIT_HCA_UARC_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x09)
+#define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a)
#define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b)
#define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10)
+#define INIT_HCA_UAR_CTX_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x18)

inbox = pci_alloc_consistent(dev->pdev, INIT_HCA_IN_SIZE, &indma);
if (!inbox)
@@ -1117,7 +1148,8 @@
/* TPT attributes */

MTHCA_PUT(inbox, param->mpt_base, INIT_HCA_MPT_BASE_OFFSET);
- MTHCA_PUT(inbox, param->mtt_seg_sz, INIT_HCA_MTT_SEG_SZ_OFFSET);
+ if (dev->hca_type != ARBEL_NATIVE)
+ MTHCA_PUT(inbox, param->mtt_seg_sz, INIT_HCA_MTT_SEG_SZ_OFFSET);
MTHCA_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET);
MTHCA_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET);

@@ -1125,7 +1157,14 @@
{
u8 uar_page_sz = PAGE_SHIFT - 12;
MTHCA_PUT(inbox, uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET);
- MTHCA_PUT(inbox, param->uar_scratch_base, INIT_HCA_UAR_SCATCH_BASE_OFFSET);
+ }
+
+ MTHCA_PUT(inbox, param->uar_scratch_base, INIT_HCA_UAR_SCATCH_BASE_OFFSET);
+
+ if (dev->hca_type == ARBEL_NATIVE) {
+ MTHCA_PUT(inbox, param->log_uarc_sz, INIT_HCA_UARC_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET);
}

err = mthca_cmd(dev, indma, 0, 0, CMD_INIT_HCA,
@@ -1199,6 +1238,68 @@
return mthca_cmd(dev, 0, 0, panic, CMD_CLOSE_HCA, HZ, status);
}

+int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status)
+{
+ return mthca_map_cmd(dev, CMD_MAP_ICM, icm, virt, status);
+}
+
+int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status)
+{
+ u64 *inbox;
+ dma_addr_t indma;
+ int err;
+
+ inbox = pci_alloc_consistent(dev->pdev, 16, &indma);
+ if (!inbox)
+ return -ENOMEM;
+
+ inbox[0] = cpu_to_be64(virt);
+ inbox[1] = cpu_to_be64(dma_addr | (PAGE_SHIFT - 12));
+
+ err = mthca_cmd(dev, indma, 1, 0, CMD_MAP_ICM, CMD_TIME_CLASS_B, status);
+
+ pci_free_consistent(dev->pdev, 16, inbox, indma);
+
+ if (!err)
+ mthca_dbg(dev, "Mapped page at %llx for ICM.\n",
+ (unsigned long long) virt);
+
+ return err;
+}
+
+int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status)
+{
+ return mthca_cmd(dev, virt, page_count, 0, CMD_UNMAP_ICM, CMD_TIME_CLASS_B, status);
+}
+
+int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status)
+{
+ return mthca_map_cmd(dev, CMD_MAP_ICM_AUX, icm, -1, status);
+}
+
+int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_ICM_AUX, CMD_TIME_CLASS_B, status);
+}
+
+int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
+ u8 *status)
+{
+ int ret = mthca_cmd_imm(dev, icm_size, aux_pages, 0, 0, CMD_SET_ICM_SIZE,
+ CMD_TIME_CLASS_A, status);
+
+ if (ret || status)
+ return ret;
+
+ /*
+ * Arbel page size is always 4 KB; round up number of system
+ * pages needed.
+ */
+ *aux_pages = (*aux_pages + (1 << (PAGE_SHIFT - 12)) - 1) >> (PAGE_SHIFT - 12);
+
+ return 0;
+}
+
int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
int mpt_index, u8 *status)
{
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_memfree.h 2005-01-23 08:31:06.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_memfree.h 2005-01-23 20:26:07.032380320 -0800
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -38,8 +38,10 @@
#include <linux/list.h>
#include <linux/pci.h>

+#include <asm/semaphore.h>
+
#define MTHCA_ICM_CHUNK_LEN \
- ((512 - sizeof (struct list_head) - 2 * sizeof (int)) / \
+ ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \
(sizeof (struct scatterlist)))

struct mthca_icm_chunk {
@@ -53,6 +55,13 @@
struct list_head chunk_list;
};

+struct mthca_icm_table {
+ u64 virt;
+ int num_icm;
+ struct semaphore sem;
+ struct mthca_icm *icm[0];
+};
+
struct mthca_icm_iter {
struct mthca_icm *icm;
struct mthca_icm_chunk *chunk;
@@ -65,6 +74,12 @@
unsigned int gfp_mask);
void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm);

+struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
+ u64 virt, unsigned size,
+ unsigned reserved,
+ int use_lowmem);
+void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table);
+
static inline void mthca_icm_first(struct mthca_icm *icm,
struct mthca_icm_iter *iter)
{
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-01-23 08:30:19.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_dev.h 2005-01-23 20:26:07.030380624 -0800
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -153,10 +153,12 @@
};

struct mthca_mr_table {
- struct mthca_alloc mpt_alloc;
- int max_mtt_order;
- unsigned long **mtt_buddy;
- u64 mtt_base;
+ struct mthca_alloc mpt_alloc;
+ int max_mtt_order;
+ unsigned long **mtt_buddy;
+ u64 mtt_base;
+ struct mthca_icm_table *mtt_table;
+ struct mthca_icm_table *mpt_table;
};

struct mthca_eq_table {
@@ -164,23 +166,29 @@
void __iomem *clr_int;
u32 clr_mask;
struct mthca_eq eq[MTHCA_NUM_EQ];
+ u64 icm_virt;
+ struct page *icm_page;
+ dma_addr_t icm_dma;
int have_irq;
u8 inta_pin;
};

struct mthca_cq_table {
- struct mthca_alloc alloc;
- spinlock_t lock;
- struct mthca_array cq;
+ struct mthca_alloc alloc;
+ spinlock_t lock;
+ struct mthca_array cq;
+ struct mthca_icm_table *table;
};

struct mthca_qp_table {
- struct mthca_alloc alloc;
- u32 rdb_base;
- int rdb_shift;
- int sqp_start;
- spinlock_t lock;
- struct mthca_array qp;
+ struct mthca_alloc alloc;
+ u32 rdb_base;
+ int rdb_shift;
+ int sqp_start;
+ spinlock_t lock;
+ struct mthca_array qp;
+ struct mthca_icm_table *qp_table;
+ struct mthca_icm_table *eqp_table;
};

struct mthca_av_table {
@@ -216,7 +224,8 @@
u64 clr_int_base;
u64 eq_arm_base;
u64 eq_set_ci_base;
- struct mthca_icm *icm;
+ struct mthca_icm *fw_icm;
+ struct mthca_icm *aux_icm;
u16 fw_pages;
} arbel;
} fw;
@@ -329,6 +338,9 @@
u32 access, struct mthca_mr *mr);
void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);

+int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt);
+void mthca_unmap_eq_icm(struct mthca_dev *dev);
+
int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
struct ib_wc *entry);
void mthca_arm_cq(struct mthca_dev *dev, struct mthca_cq *cq,

2005-01-24 06:22:57

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][9/12] InfiniBand/ipoib: remove uses of yield()

Replace uses of yield() with msleep(1) as suggested by kernel janitors.

Signed-off-by: Roland Dreier <[email protected]>

--- linux-bk.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2005-01-23 08:31:58.000000000 -0800
+++ linux-bk/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2005-01-23 20:52:46.294255560 -0800
@@ -509,7 +509,7 @@
goto timeout;
}

- yield();
+ msleep(1);
}

ipoib_dbg(priv, "All sends and receives done.\n");
@@ -535,7 +535,7 @@
break;
}

- yield();
+ msleep(1);
}

return 0;

2005-01-24 06:22:48

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][5/12] InfiniBand/mthca: don't write ECR in MSI-X mode

From: "Michael S. Tsirkin" <[email protected]>

We don't need to write to the ECR to clear events when using MSI-X,
since we never read the ECR anyway.

Signed-off-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>

--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_eq.c 2005-01-23 20:38:50.946247760 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_eq.c 2005-01-23 20:47:40.946675448 -0800
@@ -381,7 +381,6 @@
struct mthca_eq *eq = eq_ptr;
struct mthca_dev *dev = eq->dev;

- writel(eq->ecr_mask, dev->hcr + MTHCA_ECR_CLR_OFFSET + 4);
mthca_eq_int(dev, eq);

/* MSI-X vectors always belong to us */

2005-01-24 06:27:32

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][10/12] InfiniBand/core: add IsSM userspace support

Implement setting/clearing IsSM port capability bit from userspace via
"issm" special files (set IsSM bit on open, clear on close).

Signed-off-by: Roland Dreier <[email protected]>

--- linux-bk.orig/drivers/infiniband/core/user_mad.c 2005-01-23 20:57:19.946654072 -0800
+++ linux-bk/drivers/infiniband/core/user_mad.c 2005-01-23 20:57:56.183145288 -0800
@@ -45,6 +45,7 @@
#include <linux/kref.h>

#include <asm/uaccess.h>
+#include <asm/semaphore.h>

#include <ib_mad.h>
#include <ib_user_mad.h>
@@ -54,7 +55,7 @@
MODULE_LICENSE("Dual BSD/GPL");

enum {
- IB_UMAD_MAX_PORTS = 256,
+ IB_UMAD_MAX_PORTS = 64,
IB_UMAD_MAX_AGENTS = 32
};

@@ -62,6 +63,12 @@
int devnum;
struct cdev dev;
struct class_device class_dev;
+
+ int sm_devnum;
+ struct cdev sm_dev;
+ struct class_device sm_class_dev;
+ struct semaphore sm_sem;
+
struct ib_device *ib_dev;
struct ib_umad_device *umad_dev;
u8 port_num;
@@ -92,7 +99,7 @@

static dev_t base_dev;
static spinlock_t map_lock;
-static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
+static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2);

static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device);
@@ -511,6 +518,54 @@
.release = ib_umad_close
};

+static int ib_umad_sm_open(struct inode *inode, struct file *filp)
+{
+ struct ib_umad_port *port =
+ container_of(inode->i_cdev, struct ib_umad_port, sm_dev);
+ struct ib_port_modify props = {
+ .set_port_cap_mask = IB_PORT_SM
+ };
+ int ret;
+
+ if (filp->f_flags & O_NONBLOCK) {
+ if (down_trylock(&port->sm_sem))
+ return -EAGAIN;
+ } else {
+ if (down_interruptible(&port->sm_sem))
+ return -ERESTARTSYS;
+ }
+
+ ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
+ if (ret) {
+ up(&port->sm_sem);
+ return ret;
+ }
+
+ filp->private_data = port;
+
+ return 0;
+}
+
+static int ib_umad_sm_close(struct inode *inode, struct file *filp)
+{
+ struct ib_umad_port *port = filp->private_data;
+ struct ib_port_modify props = {
+ .clr_port_cap_mask = IB_PORT_SM
+ };
+ int ret;
+
+ ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
+ up(&port->sm_sem);
+
+ return ret;
+}
+
+static struct file_operations umad_sm_fops = {
+ .owner = THIS_MODULE,
+ .open = ib_umad_sm_open,
+ .release = ib_umad_sm_close
+};
+
static struct ib_client umad_client = {
.name = "umad",
.add = ib_umad_add_one,
@@ -519,17 +574,18 @@

static ssize_t show_dev(struct class_device *class_dev, char *buf)
{
- struct ib_umad_port *port =
- container_of(class_dev, struct ib_umad_port, class_dev);
+ struct ib_umad_port *port = class_get_devdata(class_dev);

- return print_dev_t(buf, port->dev.dev);
+ if (class_dev == &port->class_dev)
+ return print_dev_t(buf, port->dev.dev);
+ else
+ return print_dev_t(buf, port->sm_dev.dev);
}
static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);

static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
{
- struct ib_umad_port *port =
- container_of(class_dev, struct ib_umad_port, class_dev);
+ struct ib_umad_port *port = class_get_devdata(class_dev);

return sprintf(buf, "%s\n", port->ib_dev->name);
}
@@ -537,8 +593,7 @@

static ssize_t show_port(struct class_device *class_dev, char *buf)
{
- struct ib_umad_port *port =
- container_of(class_dev, struct ib_umad_port, class_dev);
+ struct ib_umad_port *port = class_get_devdata(class_dev);

return sprintf(buf, "%d\n", port->port_num);
}
@@ -554,11 +609,16 @@

static void ib_umad_release_port(struct class_device *class_dev)
{
- struct ib_umad_port *port =
- container_of(class_dev, struct ib_umad_port, class_dev);
+ struct ib_umad_port *port = class_get_devdata(class_dev);
+
+ if (class_dev == &port->class_dev) {
+ cdev_del(&port->dev);
+ clear_bit(port->devnum, dev_map);
+ } else {
+ cdev_del(&port->sm_dev);
+ clear_bit(port->sm_devnum, dev_map);
+ }

- cdev_del(&port->dev);
- clear_bit(port->devnum, dev_map);
kref_put(&port->umad_dev->ref, ib_umad_release_dev);
}

@@ -573,6 +633,94 @@
}
static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);

+static int ib_umad_init_port(struct ib_device *device, int port_num,
+ struct ib_umad_port *port)
+{
+ spin_lock(&map_lock);
+ port->devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
+ if (port->devnum >= IB_UMAD_MAX_PORTS) {
+ spin_unlock(&map_lock);
+ return -1;
+ }
+ port->sm_devnum = find_next_zero_bit(dev_map, IB_UMAD_MAX_PORTS * 2, IB_UMAD_MAX_PORTS);
+ if (port->sm_devnum >= IB_UMAD_MAX_PORTS * 2) {
+ spin_unlock(&map_lock);
+ return -1;
+ }
+ set_bit(port->devnum, dev_map);
+ set_bit(port->sm_devnum, dev_map);
+ spin_unlock(&map_lock);
+
+ port->ib_dev = device;
+ port->port_num = port_num;
+ init_MUTEX(&port->sm_sem);
+
+ cdev_init(&port->dev, &umad_fops);
+ port->dev.owner = THIS_MODULE;
+ kobject_set_name(&port->dev.kobj, "umad%d", port->devnum);
+ if (cdev_add(&port->dev, base_dev + port->devnum, 1))
+ return -1;
+
+ port->class_dev.class = &umad_class;
+ port->class_dev.dev = device->dma_device;
+
+ snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum);
+
+ if (class_device_register(&port->class_dev))
+ goto err_cdev;
+
+ class_set_devdata(&port->class_dev, port);
+ kref_get(&port->umad_dev->ref);
+
+ if (class_device_create_file(&port->class_dev, &class_device_attr_dev))
+ goto err_class;
+ if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev))
+ goto err_class;
+ if (class_device_create_file(&port->class_dev, &class_device_attr_port))
+ goto err_class;
+
+ cdev_init(&port->sm_dev, &umad_sm_fops);
+ port->sm_dev.owner = THIS_MODULE;
+ kobject_set_name(&port->dev.kobj, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
+ if (cdev_add(&port->sm_dev, base_dev + port->sm_devnum, 1))
+ return -1;
+
+ port->sm_class_dev.class = &umad_class;
+ port->sm_class_dev.dev = device->dma_device;
+
+ snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
+
+ if (class_device_register(&port->sm_class_dev))
+ goto err_sm_cdev;
+
+ class_set_devdata(&port->sm_class_dev, port);
+ kref_get(&port->umad_dev->ref);
+
+ if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev))
+ goto err_sm_class;
+ if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev))
+ goto err_sm_class;
+ if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port))
+ goto err_sm_class;
+
+ return 0;
+
+err_sm_class:
+ class_device_unregister(&port->sm_class_dev);
+
+err_sm_cdev:
+ cdev_del(&port->sm_dev);
+
+err_class:
+ class_device_unregister(&port->class_dev);
+
+err_cdev:
+ cdev_del(&port->dev);
+ clear_bit(port->devnum, dev_map);
+
+ return -1;
+}
+
static void ib_umad_add_one(struct ib_device *device)
{
struct ib_umad_device *umad_dev;
@@ -601,58 +749,20 @@

for (i = s; i <= e; ++i) {
umad_dev->port[i - s].umad_dev = umad_dev;
- kref_get(&umad_dev->ref);
-
- spin_lock(&map_lock);
- umad_dev->port[i - s].devnum =
- find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
- if (umad_dev->port[i - s].devnum >= IB_UMAD_MAX_PORTS) {
- spin_unlock(&map_lock);
- goto err;
- }
- set_bit(umad_dev->port[i - s].devnum, dev_map);
- spin_unlock(&map_lock);

- umad_dev->port[i - s].ib_dev = device;
- umad_dev->port[i - s].port_num = i;
-
- cdev_init(&umad_dev->port[i - s].dev, &umad_fops);
- umad_dev->port[i - s].dev.owner = THIS_MODULE;
- kobject_set_name(&umad_dev->port[i - s].dev.kobj,
- "umad%d", umad_dev->port[i - s].devnum);
- if (cdev_add(&umad_dev->port[i - s].dev, base_dev +
- umad_dev->port[i - s].devnum, 1))
+ if (ib_umad_init_port(device, i, &umad_dev->port[i - s]))
goto err;
-
- umad_dev->port[i - s].class_dev.class = &umad_class;
- umad_dev->port[i - s].class_dev.dev = device->dma_device;
- snprintf(umad_dev->port[i - s].class_dev.class_id,
- BUS_ID_SIZE, "umad%d", umad_dev->port[i - s].devnum);
- if (class_device_register(&umad_dev->port[i - s].class_dev))
- goto err_class;
-
- if (class_device_create_file(&umad_dev->port[i - s].class_dev,
- &class_device_attr_dev))
- goto err_class;
- if (class_device_create_file(&umad_dev->port[i - s].class_dev,
- &class_device_attr_ibdev))
- goto err_class;
- if (class_device_create_file(&umad_dev->port[i - s].class_dev,
- &class_device_attr_port))
- goto err_class;
}

ib_set_client_data(device, &umad_client, umad_dev);

return;

-err_class:
- cdev_del(&umad_dev->port[i - s].dev);
- clear_bit(umad_dev->port[i - s].devnum, dev_map);
-
err:
- while (--i >= s)
+ while (--i >= s) {
class_device_unregister(&umad_dev->port[i - s].class_dev);
+ class_device_unregister(&umad_dev->port[i - s].sm_class_dev);
+ }

kref_put(&umad_dev->ref, ib_umad_release_dev);
}
@@ -665,8 +775,10 @@
if (!umad_dev)
return;

- for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
+ for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) {
class_device_unregister(&umad_dev->port[i].class_dev);
+ class_device_unregister(&umad_dev->port[i].sm_class_dev);
+ }

kref_put(&umad_dev->ref, ib_umad_release_dev);
}
@@ -677,7 +789,7 @@

spin_lock_init(&map_lock);

- ret = alloc_chrdev_region(&base_dev, 0, IB_UMAD_MAX_PORTS,
+ ret = alloc_chrdev_region(&base_dev, 0, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
printk(KERN_ERR "user_mad: couldn't get device number\n");
@@ -708,7 +820,7 @@
class_unregister(&umad_class);

out_chrdev:
- unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS);
+ unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);

out:
return ret;
@@ -718,7 +830,7 @@
{
ib_unregister_client(&umad_client);
class_unregister(&umad_class);
- unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS);
+ unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
}

module_init(ib_umad_init);
--- linux-bk.orig/Documentation/infiniband/user_mad.txt 2005-01-23 08:30:27.000000000 -0800
+++ linux-bk/Documentation/infiniband/user_mad.txt 2005-01-23 20:57:46.505616496 -0800
@@ -2,9 +2,10 @@

Device files

- Each port of each InfiniBand device has a "umad" device attached.
- For example, a two-port HCA will have two devices, while a switch
- will have one device (for switch port 0).
+ Each port of each InfiniBand device has a "umad" device and an
+ "issm" device attached. For example, a two-port HCA will have two
+ umad devices and two issm devices, while a switch will have one
+ device of each type (for switch port 0).

Creating MAD agents

@@ -63,19 +64,36 @@
if (ret != sizeof mad)
perror("write");

+Setting IsSM Capability Bit
+
+ To set the IsSM capability bit for a port, simply open the
+ corresponding issm device file. If the IsSM bit is already set,
+ then the open call will block until the bit is cleared (or return
+ immediately with errno set to EAGAIN if the O_NONBLOCK flag is
+ passed to open()). The IsSM bit will be cleared when the issm file
+ is closed. No read, write or other operations can be performed on
+ the issm file.
+
/dev files

To create the appropriate character device files automatically with
udev, a rule like

KERNEL="umad*", NAME="infiniband/%k"
+ KERNEL="issm*", NAME="infiniband/%k"

- can be used. This will create a device node named
+ can be used. This will create device nodes named

/dev/infiniband/umad0
+ /dev/infiniband/issm0

for the first port, and so on. The InfiniBand device and port
- associated with this device can be determined from the files
+ associated with these devices can be determined from the files

/sys/class/infiniband_mad/umad0/ibdev
/sys/class/infiniband_mad/umad0/port
+
+ and
+
+ /sys/class/infiniband_mad/issm0/ibdev
+ /sys/class/infiniband_mad/issm0/port

2005-01-24 06:27:33

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][11/12] InfiniBand/mthca: clean up ioremap()/request_region() usage

From: "Michael S. Tsirkin" <[email protected]>

Here are misc fixes for mthca mapping:

1. Thinkably, MSI tables or another region could fall between HCR
and ECR tables.
Thus its arguably wrong to map both tables in one region.
So, do it separately.
I think its also more readable to have ecr_base and access ecr there,
not access ecr with hcr pointer.

2. mthca_request_regions error handling was borken
(wrong order of cleanups). For example on all errors
pci_release_region was called which is wrong if the region
was not yet mapped. And other such cleanups.

3. Fixed some error messages too.

Signed-off-by: Roland Dreier <[email protected]>

--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_eq.c 2005-01-23 20:51:23.740805592 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_eq.c 2005-01-23 20:58:55.772086392 -0800
@@ -366,10 +366,11 @@
if (dev->eq_table.clr_mask)
writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);

- if ((ecr = readl(dev->hcr + MTHCA_ECR_OFFSET + 4)) != 0) {
+ if ((ecr = readl(dev->ecr_base + 4)) != 0) {
work = 1;

- writel(ecr, dev->hcr + MTHCA_ECR_CLR_OFFSET + 4);
+ writel(ecr, dev->ecr_base +
+ MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);

for (i = 0; i < MTHCA_NUM_EQ; ++i)
if (ecr & dev->eq_table.eq[i].ecr_mask)
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-01-23 20:52:01.962994936 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_main.c 2005-01-23 20:58:55.771086544 -0800
@@ -699,57 +699,83 @@
*/
if (!request_mem_region(pci_resource_start(pdev, 0) +
MTHCA_HCR_BASE,
- MTHCA_MAP_HCR_SIZE,
- DRV_NAME))
- return -EBUSY;
+ MTHCA_HCR_SIZE,
+ DRV_NAME)) {
+ err = -EBUSY;
+ goto err_hcr_failed;
+ }
+
+ if (!request_mem_region(pci_resource_start(pdev, 0) +
+ MTHCA_ECR_BASE,
+ MTHCA_MAP_ECR_SIZE,
+ DRV_NAME)) {
+ err = -EBUSY;
+ goto err_ecr_failed;
+ }

if (!request_mem_region(pci_resource_start(pdev, 0) +
MTHCA_CLR_INT_BASE,
MTHCA_CLR_INT_SIZE,
DRV_NAME)) {
err = -EBUSY;
- goto err_bar0_beg;
+ goto err_int_failed;
}

+
err = pci_request_region(pdev, 2, DRV_NAME);
if (err)
- goto err_bar0_end;
+ goto err_bar2_failed;

if (!ddr_hidden) {
err = pci_request_region(pdev, 4, DRV_NAME);
if (err)
- goto err_bar2;
+ goto err_bar4_failed;
}

return 0;

-err_bar0_beg:
- release_mem_region(pci_resource_start(pdev, 0) +
- MTHCA_HCR_BASE,
- MTHCA_MAP_HCR_SIZE);
+err_bar4_failed:
+
+ pci_release_region(pdev, 2);
+err_bar2_failed:

-err_bar0_end:
release_mem_region(pci_resource_start(pdev, 0) +
MTHCA_CLR_INT_BASE,
MTHCA_CLR_INT_SIZE);
+err_int_failed:
+
+ release_mem_region(pci_resource_start(pdev, 0) +
+ MTHCA_ECR_BASE,
+ MTHCA_MAP_ECR_SIZE);
+err_ecr_failed:
+
+ release_mem_region(pci_resource_start(pdev, 0) +
+ MTHCA_HCR_BASE,
+ MTHCA_HCR_SIZE);
+err_hcr_failed:

-err_bar2:
- pci_release_region(pdev, 2);
return err;
}

static void mthca_release_regions(struct pci_dev *pdev,
int ddr_hidden)
{
- release_mem_region(pci_resource_start(pdev, 0) +
- MTHCA_HCR_BASE,
- MTHCA_MAP_HCR_SIZE);
+ if (!ddr_hidden)
+ pci_release_region(pdev, 4);
+
+ pci_release_region(pdev, 2);
+
release_mem_region(pci_resource_start(pdev, 0) +
MTHCA_CLR_INT_BASE,
MTHCA_CLR_INT_SIZE);
- pci_release_region(pdev, 2);
- if (!ddr_hidden)
- pci_release_region(pdev, 4);
+
+ release_mem_region(pci_resource_start(pdev, 0) +
+ MTHCA_ECR_BASE,
+ MTHCA_MAP_ECR_SIZE);
+
+ release_mem_region(pci_resource_start(pdev, 0) +
+ MTHCA_HCR_BASE,
+ MTHCA_HCR_SIZE);
}

static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev)
@@ -911,29 +937,39 @@
mdev->cmd.use_events = 0;

mthca_base = pci_resource_start(pdev, 0);
- mdev->hcr = ioremap(mthca_base + MTHCA_HCR_BASE, MTHCA_MAP_HCR_SIZE);
+ mdev->hcr = ioremap(mthca_base + MTHCA_HCR_BASE, MTHCA_HCR_SIZE);
if (!mdev->hcr) {
mthca_err(mdev, "Couldn't map command register, "
"aborting.\n");
err = -ENOMEM;
goto err_free_dev;
}
+
mdev->clr_base = ioremap(mthca_base + MTHCA_CLR_INT_BASE,
MTHCA_CLR_INT_SIZE);
if (!mdev->clr_base) {
- mthca_err(mdev, "Couldn't map command register, "
+ mthca_err(mdev, "Couldn't map interrupt clear register, "
"aborting.\n");
err = -ENOMEM;
goto err_iounmap;
}

+ mdev->ecr_base = ioremap(mthca_base + MTHCA_ECR_BASE,
+ MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE);
+ if (!mdev->ecr_base) {
+ mthca_err(mdev, "Couldn't map ecr register, "
+ "aborting.\n");
+ err = -ENOMEM;
+ goto err_iounmap_clr;
+ }
+
mthca_base = pci_resource_start(pdev, 2);
mdev->kar = ioremap(mthca_base + PAGE_SIZE * MTHCA_KAR_PAGE, PAGE_SIZE);
if (!mdev->kar) {
mthca_err(mdev, "Couldn't map kernel access region, "
"aborting.\n");
err = -ENOMEM;
- goto err_iounmap_clr;
+ goto err_iounmap_ecr;
}

err = mthca_tune_pci(mdev);
@@ -982,6 +1018,9 @@
err_iounmap_kar:
iounmap(mdev->kar);

+err_iounmap_ecr:
+ iounmap(mdev->ecr_base);
+
err_iounmap_clr:
iounmap(mdev->clr_base);

@@ -1033,6 +1072,7 @@
mthca_close_hca(mdev);

iounmap(mdev->hcr);
+ iounmap(mdev->ecr_base);
iounmap(mdev->clr_base);

if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_config_reg.h 2005-01-23 08:30:41.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_config_reg.h 2005-01-23 20:58:55.772086392 -0800
@@ -43,13 +43,8 @@
#define MTHCA_ECR_SIZE 0x00008
#define MTHCA_ECR_CLR_BASE 0x80708
#define MTHCA_ECR_CLR_SIZE 0x00008
-#define MTHCA_ECR_OFFSET (MTHCA_ECR_BASE - MTHCA_HCR_BASE)
-#define MTHCA_ECR_CLR_OFFSET (MTHCA_ECR_CLR_BASE - MTHCA_HCR_BASE)
+#define MTHCA_MAP_ECR_SIZE (MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE)
#define MTHCA_CLR_INT_BASE 0xf00d8
#define MTHCA_CLR_INT_SIZE 0x00008

-#define MTHCA_MAP_HCR_SIZE (MTHCA_ECR_CLR_BASE + \
- MTHCA_ECR_CLR_SIZE - \
- MTHCA_HCR_BASE)
-
#endif /* MTHCA_CONFIG_REG_H */
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-01-23 20:39:02.036561776 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_dev.h 2005-01-23 20:58:55.770086696 -0800
@@ -237,6 +237,7 @@
struct semaphore cap_mask_mutex;

void __iomem *hcr;
+ void __iomem *ecr_base;
void __iomem *clr_base;
void __iomem *kar;


2005-01-24 06:31:51

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][12/12] InfiniBand/mthca: remove x86 SSE pessimization

Get rid of the x86 SSE code for atomic 64-bit writes to doorbell
registers. Saving/setting CR0 plus a clts instruction are too
expensive for it to ever be a win, and the config option was just
confusing.

Signed-off-by: Roland Dreier <[email protected]>

--- linux-bk.orig/drivers/infiniband/hw/mthca/Kconfig 2005-01-23 08:30:27.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/Kconfig 2005-01-23 21:00:44.744520064 -0800
@@ -14,13 +14,3 @@
This option causes the mthca driver produce a bunch of debug
messages. Select this is you are developing the driver or
trying to diagnose a problem.
-
-config INFINIBAND_MTHCA_SSE_DOORBELL
- bool "SSE doorbell code"
- depends on INFINIBAND_MTHCA && X86 && !X86_64
- default n
- ---help---
- This option will have the mthca driver use SSE instructions
- to ring hardware doorbell registers. This may improve
- performance for some workloads, but the driver will not run
- on processors without SSE instructions.
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-01-23 20:58:55.771086544 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_main.c 2005-01-23 21:00:44.745519912 -0800
@@ -40,10 +40,6 @@
#include <linux/pci.h>
#include <linux/interrupt.h>

-#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL
-#include <asm/cpufeature.h>
-#endif
-
#include "mthca_dev.h"
#include "mthca_config_reg.h"
#include "mthca_cmd.h"
@@ -1117,22 +1113,6 @@
{
int ret;

- /*
- * TODO: measure whether dynamically choosing doorbell code at
- * runtime affects our performance. Is there a "magic" way to
- * choose without having to follow a function pointer every
- * time we ring a doorbell?
- */
-#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL
- if (!cpu_has_xmm) {
- printk(KERN_ERR PFX "mthca was compiled with SSE doorbell code, but\n");
- printk(KERN_ERR PFX "the current CPU does not support SSE.\n");
- printk(KERN_ERR PFX "Turn off CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL "
- "and recompile.\n");
- return -ENODEV;
- }
-#endif
-
ret = pci_register_driver(&mthca_driver);
return ret < 0 ? ret : 0;
}
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_doorbell.h 2005-01-23 08:30:38.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_doorbell.h 2005-01-23 21:00:44.746519760 -0800
@@ -32,9 +32,7 @@
* $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $
*/

-#include <linux/config.h>
#include <linux/types.h>
-#include <linux/preempt.h>

#define MTHCA_RD_DOORBELL 0x00
#define MTHCA_SEND_DOORBELL 0x10
@@ -59,51 +57,13 @@
__raw_writeq(*(u64 *) val, dest);
}

-#elif defined(CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL)
-/* Use SSE to write 64 bits atomically without a lock. */
-
-#define MTHCA_DECLARE_DOORBELL_LOCK(name)
-#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0)
-#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL)
-
-static inline unsigned long mthca_get_fpu(void)
-{
- unsigned long cr0;
-
- preempt_disable();
- asm volatile("mov %%cr0,%0; clts" : "=r" (cr0));
- return cr0;
-}
-
-static inline void mthca_put_fpu(unsigned long cr0)
-{
- asm volatile("mov %0,%%cr0" : : "r" (cr0));
- preempt_enable();
-}
-
-static inline void mthca_write64(u32 val[2], void __iomem *dest,
- spinlock_t *doorbell_lock)
-{
- /* i386 stack is aligned to 8 bytes, so this should be OK: */
- u8 xmmsave[8] __attribute__((aligned(8)));
- unsigned long cr0;
-
- cr0 = mthca_get_fpu();
-
- asm volatile (
- "movlps %%xmm0,(%0); \n\t"
- "movlps (%1),%%xmm0; \n\t"
- "movlps %%xmm0,(%2); \n\t"
- "movlps (%0),%%xmm0; \n\t"
- :
- : "r" (xmmsave), "r" (val), "r" (dest)
- : "memory" );
-
- mthca_put_fpu(cr0);
-}
-
#else
-/* Just fall back to a spinlock to protect the doorbell */
+
+/*
+ * Just fall back to a spinlock to protect the doorbell if
+ * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
+ * MMIO writes.
+ */

#define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
#define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)

2005-01-24 06:22:56

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][7/12] InfiniBand/mthca: optimize event queue handling

From: "Michael S. Tsirkin" <[email protected]>

Event queue handling performance improvements:
- Only calculate EQ entry address once, and don't truncate the
consumer index until we really need to.
- Only read ECR once. If a new event occurs while we're in the
interrupt handler, we'll get another interrupt anyway, since we
only clear events once.

Signed-off-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>

--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_provider.h 2005-01-23 08:30:27.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_provider.h 2005-01-23 20:51:23.739805744 -0800
@@ -66,11 +66,11 @@
struct mthca_dev *dev;
int eqn;
u32 ecr_mask;
+ u32 cons_index;
u16 msi_x_vector;
u16 msi_x_entry;
int have_irq;
int nent;
- int cons_index;
struct mthca_buf_list *page_list;
struct mthca_mr mr;
};
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_eq.c 2005-01-23 20:47:40.946675448 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_eq.c 2005-01-23 20:51:23.740805592 -0800
@@ -164,12 +164,12 @@
MTHCA_ASYNC_EVENT_MASK;
}

-static inline void set_eq_ci(struct mthca_dev *dev, int eqn, int ci)
+static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
{
u32 doorbell[2];

- doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eqn);
- doorbell[1] = cpu_to_be32(ci);
+ doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn);
+ doorbell[1] = cpu_to_be32(ci & (eq->nent - 1));

mthca_write64(doorbell,
dev->kar + MTHCA_EQ_DOORBELL,
@@ -200,21 +200,22 @@
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}

-static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, int entry)
+static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry)
{
- return eq->page_list[entry * MTHCA_EQ_ENTRY_SIZE / PAGE_SIZE].buf
- + (entry * MTHCA_EQ_ENTRY_SIZE) % PAGE_SIZE;
+ unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE;
+ return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
}

-static inline int next_eqe_sw(struct mthca_eq *eq)
+static inline struct mthca_eqe* next_eqe_sw(struct mthca_eq *eq)
{
- return !(MTHCA_EQ_ENTRY_OWNER_HW &
- get_eqe(eq, eq->cons_index)->owner);
+ struct mthca_eqe* eqe;
+ eqe = get_eqe(eq, eq->cons_index);
+ return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe;
}

-static inline void set_eqe_hw(struct mthca_eq *eq, int entry)
+static inline void set_eqe_hw(struct mthca_eqe *eqe)
{
- get_eqe(eq, entry)->owner = MTHCA_EQ_ENTRY_OWNER_HW;
+ eqe->owner = MTHCA_EQ_ENTRY_OWNER_HW;
}

static void port_change(struct mthca_dev *dev, int port, int active)
@@ -235,10 +236,10 @@
{
struct mthca_eqe *eqe;
int disarm_cqn;
+ int eqes_found = 0;

- while (next_eqe_sw(eq)) {
+ while ((eqe = next_eqe_sw(eq))) {
int set_ci = 0;
- eqe = get_eqe(eq, eq->cons_index);

/*
* Make sure we read EQ entry contents after we've
@@ -328,12 +329,13 @@
break;
};

- set_eqe_hw(eq, eq->cons_index);
- eq->cons_index = (eq->cons_index + 1) & (eq->nent - 1);
+ set_eqe_hw(eqe);
+ ++eq->cons_index;
+ eqes_found = 1;

if (set_ci) {
wmb(); /* see comment below */
- set_eq_ci(dev, eq->eqn, eq->cons_index);
+ set_eq_ci(dev, eq, eq->cons_index);
set_ci = 0;
}
}
@@ -347,8 +349,10 @@
* possibility of the HCA writing an entry and then
* having set_eqe_hw() overwrite the owner field.
*/
- wmb();
- set_eq_ci(dev, eq->eqn, eq->cons_index);
+ if (likely(eqes_found)) {
+ wmb();
+ set_eq_ci(dev, eq, eq->cons_index);
+ }
eq_req_not(dev, eq->eqn);
}

@@ -362,7 +366,7 @@
if (dev->eq_table.clr_mask)
writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);

- while ((ecr = readl(dev->hcr + MTHCA_ECR_OFFSET + 4)) != 0) {
+ if ((ecr = readl(dev->hcr + MTHCA_ECR_OFFSET + 4)) != 0) {
work = 1;

writel(ecr, dev->hcr + MTHCA_ECR_CLR_OFFSET + 4);
@@ -440,7 +444,7 @@
}

for (i = 0; i < nent; ++i)
- set_eqe_hw(eq, i);
+ set_eqe_hw(get_eqe(eq, i));

eq->eqn = mthca_alloc(&dev->eq_table.alloc);
if (eq->eqn == -1)

2005-01-24 06:22:54

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][8/12] InfiniBand/mthca: test IRQ routing during initialization

When we switch to interrupt-driven command mode, test interrupt
generation with a NOP firmware command. Broken MSI/MSI-X and
interrupt line routing problems seem to be very common, and this makes
the error message much clearer -- before this change we would
mysteriously fail when initializing the QP table.

Signed-off-by: Roland Dreier <[email protected]>

--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_cmd.h 2005-01-23 20:49:34.829362648 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_cmd.h 2005-01-23 20:52:01.964994632 -0800
@@ -289,6 +289,7 @@
u8 *status);
int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash,
u8 *status);
+int mthca_NOP(struct mthca_dev *dev, u8 *status);

#define MAILBOX_ALIGN(x) ((void *) ALIGN((unsigned long) (x), MTHCA_CMD_MAILBOX_ALIGN))

--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-01-23 20:38:50.947247608 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_main.c 2005-01-23 20:52:01.962994936 -0800
@@ -570,6 +570,7 @@
static int __devinit mthca_setup_hca(struct mthca_dev *dev)
{
int err;
+ u8 status;

MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock);

@@ -615,6 +616,18 @@
goto err_eq_table_free;
}

+ err = mthca_NOP(dev, &status);
+ if (err || status) {
+ mthca_err(dev, "NOP command failed to generate interrupt, aborting.\n");
+ if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X))
+ mthca_err(dev, "Try again with MSI/MSI-X disabled.\n");
+ else
+ mthca_err(dev, "BIOS or ACPI interrupt routing problem?\n");
+
+ goto err_cmd_poll;
+ } else
+ mthca_dbg(dev, "NOP command IRQ test passed\n");
+
err = mthca_init_cq_table(dev);
if (err) {
mthca_err(dev, "Failed to initialize "
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-01-23 20:49:34.828362800 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-01-23 20:52:01.963994784 -0800
@@ -1757,3 +1757,8 @@
pci_unmap_single(dev->pdev, indma, 16, PCI_DMA_TODEVICE);
return err;
}
+
+int mthca_NOP(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0x1f, 0, CMD_NOP, msecs_to_jiffies(100), status);
+}

2005-01-24 06:22:53

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][6/12] InfiniBand/mthca: pass full process_mad info to firmware

From: "Michael S. Tsirkin" <[email protected]>

Pass full extended MAD information to firmware when a work completion is supplied to
the MAD_IFC command. This allows B_Key checking/trap generation.

Signed-off-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>

--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_cmd.h 2005-01-23 20:39:02.039561320 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_cmd.h 2005-01-23 20:49:34.829362648 -0800
@@ -280,7 +280,8 @@
void *qp_context, u8 *status);
int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
u8 *status);
-int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int port,
+int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
+ int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
void *in_mad, void *response_mad, u8 *status);
int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm,
u8 *status);
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-01-23 20:39:02.036561776 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_provider.c 2005-01-23 20:49:34.826363104 -0800
@@ -59,8 +59,8 @@
in_mad->method = IB_MGMT_METHOD_GET;
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;

- err = mthca_MAD_IFC(to_mdev(ibdev), 1,
- 1, in_mad, out_mad,
+ err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
+ 1, NULL, NULL, in_mad, out_mad,
&status);
if (err)
goto out;
@@ -104,8 +104,8 @@
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);

- err = mthca_MAD_IFC(to_mdev(ibdev), 1,
- port, in_mad, out_mad,
+ err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
+ port, NULL, NULL, in_mad, out_mad,
&status);
if (err)
goto out;
@@ -189,8 +189,8 @@
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);

- err = mthca_MAD_IFC(to_mdev(ibdev), 1,
- port, in_mad, out_mad,
+ err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
+ port, NULL, NULL, in_mad, out_mad,
&status);
if (err)
goto out;
@@ -228,8 +228,8 @@
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);

- err = mthca_MAD_IFC(to_mdev(ibdev), 1,
- port, in_mad, out_mad,
+ err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
+ port, NULL, NULL, in_mad, out_mad,
&status);
if (err)
goto out;
@@ -248,8 +248,8 @@
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);

- err = mthca_MAD_IFC(to_mdev(ibdev), 1,
- port, in_mad, out_mad,
+ err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
+ port, NULL, NULL, in_mad, out_mad,
&status);
if (err)
goto out;
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-01-23 20:39:02.038561472 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-01-23 20:49:34.828362800 -0800
@@ -36,6 +36,7 @@
#include <linux/pci.h>
#include <linux/errno.h>
#include <asm/io.h>
+#include <ib_mad.h>

#include "mthca_dev.h"
#include "mthca_config_reg.h"
@@ -1626,13 +1627,24 @@
CMD_TIME_CLASS_B, status);
}

-int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int port,
- void *in_mad, void *response_mad, u8 *status) {
+int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
+ int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
+ void *in_mad, void *response_mad, u8 *status)
+{
void *box;
dma_addr_t dma;
int err;
+ u32 in_modifier = port;
+ u8 op_modifier = 0;

-#define MAD_IFC_BOX_SIZE 512
+#define MAD_IFC_BOX_SIZE 0x400
+#define MAD_IFC_MY_QPN_OFFSET 0x100
+#define MAD_IFC_RQPN_OFFSET 0x104
+#define MAD_IFC_SL_OFFSET 0x108
+#define MAD_IFC_G_PATH_OFFSET 0x109
+#define MAD_IFC_RLID_OFFSET 0x10a
+#define MAD_IFC_PKEY_OFFSET 0x10e
+#define MAD_IFC_GRH_OFFSET 0x140

box = pci_alloc_consistent(dev->pdev, MAD_IFC_BOX_SIZE, &dma);
if (!box)
@@ -1640,11 +1652,46 @@

memcpy(box, in_mad, 256);

- err = mthca_cmd_box(dev, dma, dma + 256, port, !!ignore_mkey,
+ /*
+ * Key check traps can't be generated unless we have in_wc to
+ * tell us where to send the trap.
+ */
+ if (ignore_mkey || !in_wc)
+ op_modifier |= 0x1;
+ if (ignore_bkey || !in_wc)
+ op_modifier |= 0x2;
+
+ if (in_wc) {
+ u8 val;
+
+ memset(box + 256, 0, 256);
+
+ MTHCA_PUT(box, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET);
+ MTHCA_PUT(box, in_wc->src_qp, MAD_IFC_RQPN_OFFSET);
+
+ val = in_wc->sl << 4;
+ MTHCA_PUT(box, val, MAD_IFC_SL_OFFSET);
+
+ val = in_wc->dlid_path_bits |
+ (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
+ MTHCA_PUT(box, val, MAD_IFC_GRH_OFFSET);
+
+ MTHCA_PUT(box, in_wc->slid, MAD_IFC_RLID_OFFSET);
+ MTHCA_PUT(box, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
+
+ if (in_grh)
+ memcpy((u8 *) box + MAD_IFC_GRH_OFFSET, in_grh, 40);
+
+ op_modifier |= 0x10;
+
+ in_modifier |= in_wc->slid << 16;
+ }
+
+ err = mthca_cmd_box(dev, dma, dma + 512, in_modifier, op_modifier,
CMD_MAD_IFC, CMD_TIME_CLASS_C, status);

if (!err && !*status)
- memcpy(response_mad, box + 256, 256);
+ memcpy(response_mad, box + 512, 256);

pci_free_consistent(dev->pdev, MAD_IFC_BOX_SIZE, box, dma);
return err;
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_mad.c 2005-01-23 08:31:45.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_mad.c 2005-01-23 20:49:34.827362952 -0800
@@ -232,8 +232,9 @@
return IB_MAD_RESULT_SUCCESS;

err = mthca_MAD_IFC(to_mdev(ibdev),
- !!(mad_flags & IB_MAD_IGNORE_MKEY),
- port_num, in_mad, out_mad,
+ mad_flags & IB_MAD_IGNORE_MKEY,
+ mad_flags & IB_MAD_IGNORE_BKEY,
+ port_num, in_wc, in_grh, in_mad, out_mad,
&status);
if (err) {
mthca_err(to_mdev(ibdev), "MAD_IFC failed\n");

2005-01-24 06:22:49

by Roland Dreier

[permalink] [raw]
Subject: [PATCH][3/12] InfiniBand/mthca: implement modifying port attributes

Implement the port_modify() device method for mthca using the SET_IB
firmware command. In particular this allows changing the port
capability mask.

Signed-off-by: Roland Dreier <[email protected]>

--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_cmd.h 2005-01-23 20:38:50.944248064 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_cmd.h 2005-01-23 20:39:02.039561320 -0800
@@ -215,6 +215,13 @@
u64 si_guid;
};

+struct mthca_set_ib_param {
+ int set_si_guid;
+ int reset_qkey_viol;
+ u64 si_guid;
+ u32 cap_mask;
+};
+
int mthca_cmd_use_events(struct mthca_dev *dev);
void mthca_cmd_use_polling(struct mthca_dev *dev);
void mthca_cmd_event(struct mthca_dev *dev, u16 token,
@@ -241,6 +248,8 @@
int port, u8 *status);
int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status);
int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status);
+int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
+ int port, u8 *status);
int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status);
int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status);
int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status);
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-01-23 20:38:25.991041528 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_provider.c 2005-01-23 20:39:02.036561776 -0800
@@ -137,7 +137,35 @@
u8 port, int port_modify_mask,
struct ib_port_modify *props)
{
- return 0;
+ struct mthca_set_ib_param set_ib;
+ struct ib_port_attr attr;
+ int err;
+ u8 status;
+
+ if (down_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
+ return -ERESTARTSYS;
+
+ err = mthca_query_port(ibdev, port, &attr);
+ if (err)
+ goto out;
+
+ set_ib.set_si_guid = 0;
+ set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR);
+
+ set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
+ ~props->clr_port_cap_mask;
+
+ err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ up(&to_mdev(ibdev)->cap_mask_mutex);
+ return err;
}

static int mthca_query_pkey(struct ib_device *ibdev,
@@ -619,6 +647,8 @@
}
}

+ init_MUTEX(&dev->cap_mask_mutex);
+
return 0;
}

--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-01-23 20:38:50.949247304 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_cmd.c 2005-01-23 20:39:02.038561472 -0800
@@ -1238,6 +1238,41 @@
return mthca_cmd(dev, 0, 0, panic, CMD_CLOSE_HCA, HZ, status);
}

+int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
+ int port, u8 *status)
+{
+ u32 *inbox;
+ dma_addr_t indma;
+ int err;
+ u32 flags = 0;
+
+#define SET_IB_IN_SIZE 0x40
+#define SET_IB_FLAGS_OFFSET 0x00
+#define SET_IB_FLAG_SIG (1 << 18)
+#define SET_IB_FLAG_RQK (1 << 0)
+#define SET_IB_CAP_MASK_OFFSET 0x04
+#define SET_IB_SI_GUID_OFFSET 0x08
+
+ inbox = pci_alloc_consistent(dev->pdev, SET_IB_IN_SIZE, &indma);
+ if (!inbox)
+ return -ENOMEM;
+
+ memset(inbox, 0, SET_IB_IN_SIZE);
+
+ flags |= param->set_si_guid ? SET_IB_FLAG_SIG : 0;
+ flags |= param->reset_qkey_viol ? SET_IB_FLAG_RQK : 0;
+ MTHCA_PUT(inbox, flags, SET_IB_FLAGS_OFFSET);
+
+ MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET);
+ MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET);
+
+ err = mthca_cmd(dev, indma, port, 0, CMD_SET_IB,
+ CMD_TIME_CLASS_B, status);
+
+ pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+ return err;
+}
+
int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status)
{
return mthca_map_cmd(dev, CMD_MAP_ICM, icm, virt, status);
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-01-23 20:38:50.950247152 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_dev.h 2005-01-23 20:39:02.036561776 -0800
@@ -234,6 +234,7 @@
u64 ddr_end;

MTHCA_DECLARE_DOORBELL_LOCK(doorbell_lock)
+ struct semaphore cap_mask_mutex;

void __iomem *hcr;
void __iomem *clr_base;

2005-01-24 18:44:57

by Roland Dreier

[permalink] [raw]
Subject: [openib-general] [PATCH][13/12] InfiniBand/mthca: initialize mutex earlier

One more bug that slipped in...


The cap_mask_mutex needs to be initialized before
ib_register_device(), because device registration will call client
init functions that may try to modify the capability mask.

Signed-off-by: Roland Dreier <[email protected]>

--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-01-23 21:51:46.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_provider.c 2005-01-24 10:39:12.623987624 -0800
@@ -634,6 +634,8 @@
dev->ib_dev.detach_mcast = mthca_multicast_detach;
dev->ib_dev.process_mad = mthca_process_mad;

+ init_MUTEX(&dev->cap_mask_mutex);
+
ret = ib_register_device(&dev->ib_dev);
if (ret)
return ret;
@@ -647,8 +649,6 @@
}
}

- init_MUTEX(&dev->cap_mask_mutex);
-
return 0;
}