2015-06-29 14:50:58

by Allen Hubbe

[permalink] [raw]
Subject: [PATCH 0/9] NTB: Fix and cleanup ntb drivers

This series includes the following changes to fix bugs, add new hardware
support, and improve usability and performance of the ntb drivers.

Bug Fixes (issues preexisting the new ntb api):
NTB: Fix ntb_transport out-of-order RX update
NTB: Schedule to receive on QP link up
NTB: ntb_netdev not covering all receive errors
NTB: Fix oops in debugfs when transport is half-up

Bug Fixes (regressions since the new ntb api):
NTB: Fix transport stats for multiple devices

Additions:
NTB: Add PCI Device IDs for Broadwell Xeon
NTB: Add flow control to the ntb_netdev
NTB: Make the transport list in order of discovery
NTB: Clean up QP stats info


Allen Hubbe (2):
NTB: Fix ntb_transport out-of-order RX update
NTB: Schedule to receive on QP link up

Dave Jiang (7):
NTB: Add flow control to the ntb_netdev
NTB: Fix transport stats for multiple devices
NTB: ntb_netdev not covering all receive errors
NTB: Fix oops in debugfs when transport is half-up
NTB: Add PCI Device IDs for Broadwell Xeon
NTB: Make the transport list in order of discovery
NTB: Clean up QP stats info

drivers/net/ntb_netdev.c | 80 ++++++++++++-
drivers/ntb/hw/intel/ntb_hw_intel.c | 15 +++
drivers/ntb/hw/intel/ntb_hw_intel.h | 3 +
drivers/ntb/ntb_transport.c | 228 +++++++++++++++++++++++-------------
include/linux/ntb_transport.h | 1 +
5 files changed, 246 insertions(+), 81 deletions(-)

--
2.4.0.rc0.43.gcf8a8c6


2015-06-29 14:52:49

by Allen Hubbe

[permalink] [raw]
Subject: [PATCH 1/9] NTB: Fix ntb_transport out-of-order RX update

It was possible for a synchronous update of the RX index in the error
case to get ahead of the asynchronous RX index update in the normal
case. Change the RX processing to preserve an RX completion order.

There were two error cases. First, if a buffer is not present to
receive data, there would be no queue entry to preserve the RX
completion order. Instead of dropping the RX frame, leave the RX frame
in the ring. Schedule RX processing when RX entries are enqueued, in
case there are RX frames waiting in the ring to be received.

Second, if a buffer is too small to receive data, drop the frame in the
ring, mark the RX entry as done, and indicate the error in the RX entry
length. Check for a negative length in the receive callback in
ntb_netdev, and count occurances as rx_length_errors.

Signed-off-by: Allen Hubbe <[email protected]>
---
drivers/net/ntb_netdev.c | 7 ++
drivers/ntb/ntb_transport.c | 168 ++++++++++++++++++++++++++------------------
2 files changed, 106 insertions(+), 69 deletions(-)

diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index 3cc316cb7e6b..5f1ee7c05f68 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -102,6 +102,12 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data,

netdev_dbg(ndev, "%s: %d byte payload received\n", __func__, len);

+ if (len < 0) {
+ ndev->stats.rx_errors++;
+ ndev->stats.rx_length_errors++;
+ goto enqueue_again;
+ }
+
skb_put(skb, len);
skb->protocol = eth_type_trans(skb, ndev);
skb->ip_summed = CHECKSUM_NONE;
@@ -121,6 +127,7 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
return;
}

+enqueue_again:
rc = ntb_transport_rx_enqueue(qp, skb, skb->data, ndev->mtu + ETH_HLEN);
if (rc) {
dev_kfree_skb(skb);
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index efe3ad4122f2..b87578fb91d6 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -142,10 +142,10 @@ struct ntb_transport_qp {

void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len);
+ struct list_head rx_post_q;
struct list_head rx_pend_q;
struct list_head rx_free_q;
- spinlock_t ntb_rx_pend_q_lock;
- spinlock_t ntb_rx_free_q_lock;
+ spinlock_t ntb_rx_q_lock;
void *rx_buff;
unsigned int rx_index;
unsigned int rx_max_entry;
@@ -534,6 +534,27 @@ out:
return entry;
}

+static struct ntb_queue_entry *ntb_list_mv(spinlock_t *lock,
+ struct list_head *list,
+ struct list_head *to_list)
+{
+ struct ntb_queue_entry *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(lock, flags);
+
+ if (list_empty(list)) {
+ entry = NULL;
+ } else {
+ entry = list_first_entry(list, struct ntb_queue_entry, entry);
+ list_move(&entry->entry, to_list);
+ }
+
+ spin_unlock_irqrestore(lock, flags);
+
+ return entry;
+}
+
static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
unsigned int qp_num)
{
@@ -941,10 +962,10 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
INIT_DELAYED_WORK(&qp->link_work, ntb_qp_link_work);
INIT_WORK(&qp->link_cleanup, ntb_qp_link_cleanup_work);

- spin_lock_init(&qp->ntb_rx_pend_q_lock);
- spin_lock_init(&qp->ntb_rx_free_q_lock);
+ spin_lock_init(&qp->ntb_rx_q_lock);
spin_lock_init(&qp->ntb_tx_free_q_lock);

+ INIT_LIST_HEAD(&qp->rx_post_q);
INIT_LIST_HEAD(&qp->rx_pend_q);
INIT_LIST_HEAD(&qp->rx_free_q);
INIT_LIST_HEAD(&qp->tx_free_q);
@@ -1107,22 +1128,47 @@ static void ntb_transport_free(struct ntb_client *self, struct ntb_dev *ndev)
kfree(nt);
}

-static void ntb_rx_copy_callback(void *data)
+static void ntb_complete_rxc(struct ntb_transport_qp *qp)
{
- struct ntb_queue_entry *entry = data;
- struct ntb_transport_qp *qp = entry->qp;
- void *cb_data = entry->cb_data;
- unsigned int len = entry->len;
- struct ntb_payload_header *hdr = entry->rx_hdr;
+ struct ntb_queue_entry *entry;
+ void *cb_data;
+ unsigned int len;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&qp->ntb_rx_q_lock, irqflags);
+
+ while (!list_empty(&qp->rx_post_q)) {
+ entry = list_first_entry(&qp->rx_post_q,
+ struct ntb_queue_entry, entry);
+ if (!(entry->flags & DESC_DONE_FLAG))
+ break;
+
+ entry->rx_hdr->flags = 0;
+ iowrite32(entry->index, &qp->rx_info->entry);
+
+ cb_data = entry->cb_data;
+ len = entry->len;
+
+ list_move_tail(&entry->entry, &qp->rx_free_q);

- hdr->flags = 0;
+ spin_unlock_irqrestore(&qp->ntb_rx_q_lock, irqflags);

- iowrite32(entry->index, &qp->rx_info->entry);
+ if (qp->rx_handler && qp->client_ready)
+ qp->rx_handler(qp, qp->cb_data, cb_data, len);

- ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q);
+ spin_lock_irqsave(&qp->ntb_rx_q_lock, irqflags);
+ }

- if (qp->rx_handler && qp->client_ready)
- qp->rx_handler(qp, qp->cb_data, cb_data, len);
+ spin_unlock_irqrestore(&qp->ntb_rx_q_lock, irqflags);
+}
+
+static void ntb_rx_copy_callback(void *data)
+{
+ struct ntb_queue_entry *entry = data;
+
+ entry->flags |= DESC_DONE_FLAG;
+
+ ntb_complete_rxc(entry->qp);
}

static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset)
@@ -1138,19 +1184,18 @@ static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset)
ntb_rx_copy_callback(entry);
}

-static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
- size_t len)
+static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
{
struct dma_async_tx_descriptor *txd;
struct ntb_transport_qp *qp = entry->qp;
struct dma_chan *chan = qp->dma_chan;
struct dma_device *device;
- size_t pay_off, buff_off;
+ size_t pay_off, buff_off, len;
struct dmaengine_unmap_data *unmap;
dma_cookie_t cookie;
void *buf = entry->buf;

- entry->len = len;
+ len = entry->len;

if (!chan)
goto err;
@@ -1226,7 +1271,6 @@ static int ntb_process_rxc(struct ntb_transport_qp *qp)
struct ntb_payload_header *hdr;
struct ntb_queue_entry *entry;
void *offset;
- int rc;

offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index;
hdr = offset + qp->rx_max_frame - sizeof(struct ntb_payload_header);
@@ -1255,65 +1299,43 @@ static int ntb_process_rxc(struct ntb_transport_qp *qp)
return -EIO;
}

- entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q);
+ entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q);
if (!entry) {
dev_dbg(&qp->ndev->pdev->dev, "no receive buffer\n");
qp->rx_err_no_buf++;
-
- rc = -ENOMEM;
- goto err;
+ return -EAGAIN;
}

+ entry->rx_hdr = hdr;
+ entry->index = qp->rx_index;
+
if (hdr->len > entry->len) {
dev_dbg(&qp->ndev->pdev->dev,
"receive buffer overflow! Wanted %d got %d\n",
hdr->len, entry->len);
qp->rx_err_oflow++;

- rc = -EIO;
- goto err;
- }
+ entry->len = -EIO;
+ entry->flags |= DESC_DONE_FLAG;

- dev_dbg(&qp->ndev->pdev->dev,
- "RX OK index %u ver %u size %d into buf size %d\n",
- qp->rx_index, hdr->ver, hdr->len, entry->len);
+ ntb_complete_rxc(qp);
+ } else {
+ dev_dbg(&qp->ndev->pdev->dev,
+ "RX OK index %u ver %u size %d into buf size %d\n",
+ qp->rx_index, hdr->ver, hdr->len, entry->len);

- qp->rx_bytes += hdr->len;
- qp->rx_pkts++;
+ qp->rx_bytes += hdr->len;
+ qp->rx_pkts++;

- entry->index = qp->rx_index;
- entry->rx_hdr = hdr;
+ entry->len = hdr->len;

- ntb_async_rx(entry, offset, hdr->len);
+ ntb_async_rx(entry, offset);
+ }

qp->rx_index++;
qp->rx_index %= qp->rx_max_entry;

return 0;
-
-err:
- /* FIXME: if this syncrhonous update of the rx_index gets ahead of
- * asyncrhonous ntb_rx_copy_callback of previous entry, there are three
- * scenarios:
- *
- * 1) The peer might miss this update, but observe the update
- * from the memcpy completion callback. In this case, the buffer will
- * not be freed on the peer to be reused for a different packet. The
- * successful rx of a later packet would clear the condition, but the
- * condition could persist if several rx fail in a row.
- *
- * 2) The peer may observe this update before the asyncrhonous copy of
- * prior packets is completed. The peer may overwrite the buffers of
- * the prior packets before they are copied.
- *
- * 3) Both: the peer may observe the update, and then observe the index
- * decrement by the asynchronous completion callback. Who knows what
- * badness that will cause.
- */
- hdr->flags = 0;
- iowrite32(qp->rx_index, &qp->rx_info->entry);
-
- return rc;
}

static void ntb_transport_rxc_db(unsigned long data)
@@ -1333,7 +1355,7 @@ static void ntb_transport_rxc_db(unsigned long data)
break;
}

- if (qp->dma_chan)
+ if (i && qp->dma_chan)
dma_async_issue_pending(qp->dma_chan);

if (i == qp->rx_max_entry) {
@@ -1609,7 +1631,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
goto err1;

entry->qp = qp;
- ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry,
+ ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
&qp->rx_free_q);
}

@@ -1634,7 +1656,7 @@ err2:
while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
kfree(entry);
err1:
- while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q)))
+ while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
kfree(entry);
if (qp->dma_chan)
dma_release_channel(qp->dma_chan);
@@ -1689,11 +1711,16 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
qp->tx_handler = NULL;
qp->event_handler = NULL;

- while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q)))
+ while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
kfree(entry);

- while ((entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q))) {
- dev_warn(&pdev->dev, "Freeing item from a non-empty queue\n");
+ while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q))) {
+ dev_warn(&pdev->dev, "Freeing item from non-empty rx_pend_q\n");
+ kfree(entry);
+ }
+
+ while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q))) {
+ dev_warn(&pdev->dev, "Freeing item from non-empty rx_post_q\n");
kfree(entry);
}

@@ -1724,14 +1751,14 @@ void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len)
if (!qp || qp->client_ready)
return NULL;

- entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q);
+ entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q);
if (!entry)
return NULL;

buf = entry->cb_data;
*len = entry->len;

- ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q);
+ ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);

return buf;
}
@@ -1757,15 +1784,18 @@ int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
if (!qp)
return -EINVAL;

- entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q);
+ entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q);
if (!entry)
return -ENOMEM;

entry->cb_data = cb;
entry->buf = data;
entry->len = len;
+ entry->flags = 0;
+
+ ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q);

- ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, &qp->rx_pend_q);
+ tasklet_schedule(&qp->rxc_db_work);

return 0;
}
--
2.4.0.rc0.43.gcf8a8c6

2015-06-29 14:50:47

by Allen Hubbe

[permalink] [raw]
Subject: [PATCH 3/9] NTB: Fix transport stats for multiple devices

From: Dave Jiang <[email protected]>

Currently the debugfs does not have files for all NTB transport queue
pairs. When there are multiple NTBs present in a system, the QP names
of the last transport clobber the names of previously added transport
QPs. Only the last added QPs can be observed via debugfs.

Create a directory per NTB transport to associate the QPs with that
transport. Name the directory the same as the PCI device.

Signed-off-by: Dave Jiang <[email protected]>
---
drivers/ntb/ntb_transport.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 3c257f1d235f..30d8b52421ec 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -211,6 +211,8 @@ struct ntb_transport_ctx {
bool link_is_up;
struct delayed_work link_work;
struct work_struct link_cleanup;
+
+ struct dentry *debugfs_node_dir;
};

enum {
@@ -951,12 +953,12 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
qp->tx_max_frame = min(transport_mtu, tx_size / 2);
qp->tx_max_entry = tx_size / qp->tx_max_frame;

- if (nt_debugfs_dir) {
+ if (nt->debugfs_node_dir) {
char debugfs_name[4];

snprintf(debugfs_name, 4, "qp%d", qp_num);
qp->debugfs_dir = debugfs_create_dir(debugfs_name,
- nt_debugfs_dir);
+ nt->debugfs_node_dir);

qp->debugfs_stats = debugfs_create_file("stats", S_IRUSR,
qp->debugfs_dir, qp,
@@ -1059,6 +1061,11 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
goto err2;
}

+ if (nt_debugfs_dir) {
+ nt->debugfs_node_dir =
+ debugfs_create_dir(pci_name(ndev->pdev), nt_debugfs_dir);
+ }
+
for (i = 0; i < qp_count; i++) {
rc = ntb_transport_init_queue(nt, i);
if (rc)
--
2.4.0.rc0.43.gcf8a8c6

2015-06-29 14:51:30

by Allen Hubbe

[permalink] [raw]
Subject: [PATCH 5/9] NTB: Fix oops in debugfs when transport is half-up

From: Dave Jiang <[email protected]>

When the remote side is not up, we do not have all the context for the
transport, and that causes NULL ptr access. Have the debugfs reads check
to see if transport is up before we make access.

Signed-off-by: Dave Jiang <[email protected]>
---
drivers/ntb/ntb_transport.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 30d8b52421ec..1764bb2fd265 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -438,13 +438,17 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
char *buf;
ssize_t ret, out_offset, out_count;

+ qp = filp->private_data;
+
+ if (!qp || !qp->link_is_up)
+ return 0;
+
out_count = 1000;

buf = kmalloc(out_count, GFP_KERNEL);
if (!buf)
return -ENOMEM;

- qp = filp->private_data;
out_offset = 0;
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"NTB QP stats\n");
--
2.4.0.rc0.43.gcf8a8c6

2015-06-29 14:51:26

by Allen Hubbe

[permalink] [raw]
Subject: [PATCH 6/9] NTB: Schedule to receive on QP link up

Schedule to receive on QP link up, to make sure that the doorbell is
properly cleared for interrupts.

Signed-off-by: Allen Hubbe <[email protected]>
---
drivers/ntb/ntb_transport.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 1764bb2fd265..a040a4ee6f33 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -901,6 +901,8 @@ static void ntb_qp_link_work(struct work_struct *work)

if (qp->event_handler)
qp->event_handler(qp->cb_data, qp->link_is_up);
+
+ tasklet_schedule(&qp->rxc_db_work);
} else if (nt->link_is_up)
schedule_delayed_work(&qp->link_work,
msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT));
--
2.4.0.rc0.43.gcf8a8c6

2015-06-29 14:52:34

by Allen Hubbe

[permalink] [raw]
Subject: [PATCH 7/9] NTB: Add PCI Device IDs for Broadwell Xeon

From: Dave Jiang <[email protected]>

Adding PCI Device IDs for B2B (back to back), RP (root port, primary),
and TB (transparent bridge, secondary) devices.

Signed-off-by: Dave Jiang <[email protected]>
---
drivers/ntb/hw/intel/ntb_hw_intel.c | 15 +++++++++++++++
drivers/ntb/hw/intel/ntb_hw_intel.h | 3 +++
2 files changed, 18 insertions(+)

diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index 0ba625781197..211177f9eeff 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -190,14 +190,17 @@ static inline int pdev_is_xeon(struct pci_dev *pdev)
case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
+ case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
+ case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+ case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
return 1;
}
return 0;
@@ -1844,6 +1847,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev)
case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+ case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
+ case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
+ case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
ndev->hwerr_flags |= NTB_HWERR_SDOORBELL_LOCKUP;
break;
}
@@ -1858,6 +1864,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev)
case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+ case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
+ case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
+ case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
ndev->hwerr_flags |= NTB_HWERR_SB01BASE_LOCKUP;
break;
}
@@ -1879,6 +1888,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev)
case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+ case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
+ case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
+ case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
ndev->hwerr_flags |= NTB_HWERR_B2BDOORBELL_BIT14;
break;
}
@@ -2235,14 +2247,17 @@ static const struct pci_device_id intel_ntb_pci_tbl[] = {
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BDX)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_BDX)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_BDX)},
{0}
};
MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl);
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h
index 7ddaf387b679..ea0612f797df 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.h
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.h
@@ -67,6 +67,9 @@
#define PCI_DEVICE_ID_INTEL_NTB_PS_HSX 0x2F0E
#define PCI_DEVICE_ID_INTEL_NTB_SS_HSX 0x2F0F
#define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD 0x0C4E
+#define PCI_DEVICE_ID_INTEL_NTB_B2B_BDX 0x6F0D
+#define PCI_DEVICE_ID_INTEL_NTB_PS_BDX 0x6F0E
+#define PCI_DEVICE_ID_INTEL_NTB_SS_BDX 0x6F0F

/* Intel Xeon hardware */

--
2.4.0.rc0.43.gcf8a8c6

2015-06-29 14:51:17

by Allen Hubbe

[permalink] [raw]
Subject: [PATCH 8/9] NTB: Make the transport list in order of discovery

From: Dave Jiang <[email protected]>

The list should be added from the bottom and not the top in order to
ensure the transport is provided in the same order to clients as ntb
devices are discovered.

Signed-off-by: Dave Jiang <[email protected]>
---
drivers/ntb/ntb_transport.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index a040a4ee6f33..713be97be95f 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -296,7 +296,7 @@ static LIST_HEAD(ntb_transport_list);

static int ntb_bus_init(struct ntb_transport_ctx *nt)
{
- list_add(&nt->entry, &ntb_transport_list);
+ list_add_tail(&nt->entry, &ntb_transport_list);
return 0;
}

--
2.4.0.rc0.43.gcf8a8c6

2015-06-29 14:51:07

by Allen Hubbe

[permalink] [raw]
Subject: [PATCH 9/9] NTB: Clean up QP stats info

From: Dave Jiang <[email protected]>

Make QP stats info more readable for debugging purposes. Also add an
entry to indicate whether DMA is being used.

Signed-off-by: Dave Jiang <[email protected]>
---
drivers/ntb/ntb_transport.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 713be97be95f..b617cf218e6d 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -451,7 +451,7 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,

out_offset = 0;
out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "NTB QP stats\n");
+ "\nNTB QP stats:\n\n");
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"rx_bytes - \t%llu\n", qp->rx_bytes);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
@@ -469,11 +469,11 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"rx_err_ver - \t%llu\n", qp->rx_err_ver);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "rx_buff - \t%p\n", qp->rx_buff);
+ "rx_buff - \t0x%p\n", qp->rx_buff);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"rx_index - \t%u\n", qp->rx_index);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "rx_max_entry - \t%u\n", qp->rx_max_entry);
+ "rx_max_entry - \t%u\n\n", qp->rx_max_entry);

out_offset += snprintf(buf + out_offset, out_count - out_offset,
"tx_bytes - \t%llu\n", qp->tx_bytes);
@@ -488,21 +488,28 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "tx_mw - \t%p\n", qp->tx_mw);
+ "tx_mw - \t0x%p\n", qp->tx_mw);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "tx_index - \t%u\n", qp->tx_index);
+ "tx_index (H) - \t%u\n", qp->tx_index);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "tx_max_entry - \t%u\n", qp->tx_max_entry);
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "qp->remote_rx_info->entry - \t%u\n",
+ "RRI (T) - \t%u\n",
qp->remote_rx_info->entry);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "tx_max_entry - \t%u\n", qp->tx_max_entry);
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
"free tx - \t%u\n",
ntb_transport_tx_free_entry(qp));

out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "\nQP Link %s\n",
+ "\n");
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "Using DMA - \t%s\n", use_dma ? "Yes" : "No");
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "QP Link - \t%s\n",
qp->link_is_up ? "Up" : "Down");
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "\n");
+
if (out_offset > out_count)
out_offset = out_count;

--
2.4.0.rc0.43.gcf8a8c6

2015-06-29 15:07:04

by Allen Hubbe

[permalink] [raw]
Subject: RE: [PATCH 4/9] NTB: ntb_netdev not covering all receive errors

> From: Dave Jiang <mailto:[email protected]>

I will fix to remove "mailto:" in the commit author.
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m???? ????????I?

2015-06-29 15:11:12

by Allen Hubbe

[permalink] [raw]
Subject: RE: [PATCH 0/9] NTB: Fix and cleanup ntb drivers

> NTB: Add flow control to the ntb_netdev

PATCH 2/9 tripped some permission and was rejected by my mail server.

I will ask Dave to email this patch separately.

Allen
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m???? ????????I?

2015-06-29 15:29:48

by Allen Hubbe

[permalink] [raw]
Subject: [PATCH 2/9] NTB: Add flow control to the ntb_netdev

From: Dave Jiang <[email protected]>

Right now if we push the NTB really hard, we start dropping packets due to
not able to process the packets fast enough. We need to stop the upper layer
from flooding us when that happens.

A timer is necessary in order to restart the queue once the resource has
been processed on the receive side. Due to the way NTB is setup, the
resources on the tx side are tied to the processing of the rx side and
there's no async way to know when the rx side has released those resources.

The following module parameters are added:
tx_time: The time in usecs for us to wait before we check if we restart
transmit queue. (default: 1)
tx_start: The number of descriptors we wait for to free up before resuming
transmit queue. (default: 10)
tx_stop: The low water mark of free descriptors before we stop tx queue.
(default: 5)

Signed-off-by: Dave Jiang <[email protected]>
---
drivers/net/ntb_netdev.c | 71 +++++++++++++++++++++++++++++++++++++++++++
drivers/ntb/ntb_transport.c | 20 +++++++++++-
include/linux/ntb_transport.h | 1 +
3 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index 5f1ee7c05f68..7a0aeac00c24 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -61,11 +61,24 @@ MODULE_VERSION(NTB_NETDEV_VER);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Intel Corporation");

+static unsigned int tx_time = 1;
+module_param(tx_time, uint, 0644);
+MODULE_PARM_DESC(tx_time, "Time in usecs for tx resource reaper");
+
+static unsigned int tx_start = 10;
+module_param(tx_start, uint, 0644);
+MODULE_PARM_DESC(tx_start, "Number of descriptors to free before resume tx");
+
+static unsigned int tx_stop = 5;
+module_param(tx_stop, uint, 0644);
+MODULE_PARM_DESC(tx_stop, "Number of descriptors before stop upper layer tx");
+
struct ntb_netdev {
struct list_head list;
struct pci_dev *pdev;
struct net_device *ndev;
struct ntb_transport_qp *qp;
+ struct timer_list tx_timer;
};

#define NTB_TX_TIMEOUT_MS 1000
@@ -136,11 +149,39 @@ enqueue_again:
}
}

+static int __ntb_netdev_maybe_stop_tx(struct net_device *netdev,
+ struct ntb_transport_qp *qp, int size)
+{
+ struct ntb_netdev *dev = netdev_priv(netdev);
+
+ netif_stop_queue(netdev);
+ smp_mb();
+
+ if (likely(ntb_transport_tx_free_entry(qp) < size)) {
+ mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time));
+ return -EBUSY;
+ }
+
+ netif_start_queue(netdev);
+ return 0;
+}
+
+static int ntb_netdev_maybe_stop_tx(struct net_device *ndev,
+ struct ntb_transport_qp *qp, int size)
+{
+ if (netif_queue_stopped(ndev) ||
+ (ntb_transport_tx_free_entry(qp) >= size))
+ return 0;
+
+ return __ntb_netdev_maybe_stop_tx(ndev, qp, size);
+}
+
static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len)
{
struct net_device *ndev = qp_data;
struct sk_buff *skb;
+ struct ntb_netdev *dev = netdev_priv(ndev);

skb = data;
if (!skb || !ndev)
@@ -155,6 +196,12 @@ static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
}

dev_kfree_skb(skb);
+
+ if (ntb_transport_tx_free_entry(dev->qp) >= tx_start) {
+ smp_mb();
+ if (netif_queue_stopped(ndev))
+ netif_wake_queue(ndev);
+ }
}

static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb,
@@ -163,10 +210,15 @@ static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb,
struct ntb_netdev *dev = netdev_priv(ndev);
int rc;

+ ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop);
+
rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len);
if (rc)
goto err;

+ /* check for next submit */
+ ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop);
+
return NETDEV_TX_OK;

err:
@@ -175,6 +227,20 @@ err:
return NETDEV_TX_BUSY;
}

+static void ntb_netdev_tx_timer(unsigned long data)
+{
+ struct net_device *ndev = (struct net_device *)data;
+ struct ntb_netdev *dev = netdev_priv(ndev);
+
+ if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) {
+ mod_timer(&dev->tx_timer, jiffies + msecs_to_jiffies(tx_time));
+ } else {
+ smp_mb();
+ if (netif_queue_stopped(ndev))
+ netif_wake_queue(ndev);
+ }
+}
+
static int ntb_netdev_open(struct net_device *ndev)
{
struct ntb_netdev *dev = netdev_priv(ndev);
@@ -197,8 +263,11 @@ static int ntb_netdev_open(struct net_device *ndev)
}
}

+ setup_timer(&dev->tx_timer, ntb_netdev_tx_timer, (unsigned long)ndev);
+
netif_carrier_off(ndev);
ntb_transport_link_up(dev->qp);
+ netif_start_queue(ndev);

return 0;

@@ -219,6 +288,8 @@ static int ntb_netdev_close(struct net_device *ndev)
while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
dev_kfree_skb(skb);

+ del_timer_sync(&dev->tx_timer);
+
return 0;
}

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index b87578fb91d6..3c257f1d235f 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -487,6 +487,12 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
"tx_index - \t%u\n", qp->tx_index);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"tx_max_entry - \t%u\n", qp->tx_max_entry);
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "qp->remote_rx_info->entry - \t%u\n",
+ qp->remote_rx_info->entry);
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "free tx - \t%u\n",
+ ntb_transport_tx_free_entry(qp));

out_offset += snprintf(buf + out_offset, out_count - out_offset,
"\nQP Link %s\n",
@@ -528,6 +534,7 @@ static struct ntb_queue_entry *ntb_list_rm(spinlock_t *lock,
}
entry = list_first_entry(list, struct ntb_queue_entry, entry);
list_del(&entry->entry);
+
out:
spin_unlock_irqrestore(lock, flags);

@@ -1826,7 +1833,7 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
if (!entry) {
qp->tx_err_no_buf++;
- return -ENOMEM;
+ return -EBUSY;
}

entry->cb_data = cb;
@@ -1952,6 +1959,17 @@ unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp)
}
EXPORT_SYMBOL_GPL(ntb_transport_max_size);

+#define NTBQ_SPACE(tail, head, size) ((tail) > (head)) ? ((tail) - (head)) : ((size) - (head) + (tail))
+
+unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp)
+{
+ unsigned int head = qp->tx_index;
+ unsigned int tail = qp->remote_rx_info->entry;
+
+ return NTBQ_SPACE(tail, head, qp->tx_max_entry);
+}
+EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry);
+
static void ntb_transport_doorbell_callback(void *data, int vector)
{
struct ntb_transport_ctx *nt = data;
diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h
index 2862861366a5..7243eb98a722 100644
--- a/include/linux/ntb_transport.h
+++ b/include/linux/ntb_transport.h
@@ -83,3 +83,4 @@ void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len);
void ntb_transport_link_up(struct ntb_transport_qp *qp);
void ntb_transport_link_down(struct ntb_transport_qp *qp);
bool ntb_transport_link_query(struct ntb_transport_qp *qp);
+unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp);
--
2.4.0.rc0.43.gcf8a8c6

2015-06-29 15:30:02

by Allen Hubbe

[permalink] [raw]
Subject: [PATCH 4/9] NTB: ntb_netdev not covering all receive errors

From: Dave Jiang <[email protected]>

ntb_netdev is allowing the link to come up even when -ENOMEM is returned
from ntb_transport_rx_enqueue. Fix to cover all possible errors.

Signed-off-by: Dave Jiang <[email protected]>
---
drivers/net/ntb_netdev.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index 7a0aeac00c24..4d4d4f19049d 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -257,7 +257,7 @@ static int ntb_netdev_open(struct net_device *ndev)

rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
ndev->mtu + ETH_HLEN);
- if (rc == -EINVAL) {
+ if (rc) {
dev_kfree_skb(skb);
goto err;
}
--
2.4.0.rc0.43.gcf8a8c6