From: Atul Gupta Subject: [PATCH v2 1/2] cxgb4: Add support for Inline IPSec Tx Date: Thu, 9 Nov 2017 16:58:39 +0530 Message-ID: <1510226919-20431-1-git-send-email-atul.gupta@chelsio.com> Cc: Harsh Jain , Ganesh Goudar To: herbert@gondor.apana.org.au, linux-crypto@vger.kernel.org, netdev@vger.kernel.org, steffen.klassert@secunet.com Return-path: Received: from stargate.chelsio.com ([12.32.117.8]:24198 "EHLO stargate.chelsio.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753451AbdKIL3G (ORCPT ); Thu, 9 Nov 2017 06:29:06 -0500 Sender: linux-crypto-owner@vger.kernel.org List-ID: Added Tx routine for ULD - define interface for ULD Tx. Export routines used for Tx data - Routines common for data transmit are used by cxgb4 and chcr drivers. - EXPORT routines enable transmit from chcr driver. Signed-off-by: Atul Gupta Signed-off-by: Harsh Jain Signed-off-by: Ganesh Goudar --- V2: Fixed the build warnings and created patch against cryptodev to avoid possible merge conflicts --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 23 +++++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 2 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 3 + drivers/net/ethernet/chelsio/cxgb4/sge.c | 102 ++++++++++----------- 6 files changed, 81 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index ea72d2d..0aa681e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -58,6 +58,13 @@ extern struct list_head adapter_list; extern struct mutex uld_mutex; +/* Suspend an Ethernet Tx queue with fewer available descriptors than this. + * This is the same as calc_tx_descs() for a TSO packet with + * nr_frags == MAX_SKB_FRAGS. + */ +#define ETHTXQ_STOP_THRES \ + (1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8)) + enum { MAX_NPORTS = 4, /* max # of ports */ SERNUM_LEN = 24, /* Serial # length */ @@ -553,6 +560,7 @@ enum { /* adapter flags */ enum { ULP_CRYPTO_LOOKASIDE = 1 << 0, + ULP_CRYPTO_IPSEC_INLINE = 1 << 1, }; struct rx_sw_desc; @@ -947,6 +955,11 @@ enum { SCHED_CLASS_RATEMODE_ABS = 1, /* Kb/s */ }; +struct tx_sw_desc { /* SW state per Tx descriptor */ + struct sk_buff *skb; + struct ulptx_sgl *sgl; +}; + /* Support for "sched_queue" command to allow one or more NIC TX Queues * to be bound to a TX Scheduling Class. */ @@ -1627,4 +1640,14 @@ int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, void free_tx_desc(struct adapter *adap, struct sge_txq *q, unsigned int n, bool unmap); void free_txq(struct adapter *adap, struct sge_txq *q); +void cxgb4_reclaim_completed_tx(struct adapter *adap, + struct sge_txq *q, bool unmap); +int cxgb4_map_skb(struct device *dev, const struct sk_buff *skb, + dma_addr_t *addr); +void cxgb4_inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q, + void *pos); +void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q, + struct ulptx_sgl *sgl, u64 *end, unsigned int start, + const dma_addr_t *addr); +void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n); #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 76540b0..b45bea9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -3095,6 +3095,8 @@ static int chcr_show(struct seq_file *seq, void *v) atomic_read(&adap->chcr_stats.error)); seq_printf(seq, "Fallback: %10u \n", atomic_read(&adap->chcr_stats.fallback)); + seq_printf(seq, "IPSec PDU: %10u\n", + atomic_read(&adap->chcr_stats.ipsec_cnt)); return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 92d9d79..2243e31 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4005,7 +4005,7 @@ static int adap_init0(struct adapter *adap) } else { adap->vres.ncrypto_fc = val[0]; } - adap->params.crypto |= ULP_CRYPTO_LOOKASIDE; + adap->params.crypto = ntohs(caps_cmd.cryptocaps); adap->num_uld += 1; } #undef FW_PARAM_PFVF diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 71a315b..6b5fea4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -637,6 +637,7 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) lld->nchan = adap->params.nports; lld->nports = adap->params.nports; lld->wr_cred = adap->params.ofldq_wr_cred; + lld->crypto = adap->params.crypto; lld->iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A)); lld->iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A); lld->iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 84541fc..a91b295 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -292,6 +292,7 @@ struct chcr_stats_debug { atomic_t complete; atomic_t error; atomic_t fallback; + atomic_t ipsec_cnt; }; #define OCQ_WIN_OFFSET(pdev, vres) \ @@ -317,6 +318,7 @@ struct cxgb4_lld_info { unsigned char wr_cred; /* WR 16-byte credits */ unsigned char adapter_type; /* type of adapter */ unsigned char fw_api_ver; /* FW API version */ + unsigned char crypto; /* crypto support */ unsigned int fw_vers; /* FW version */ unsigned int iscsi_iolen; /* iSCSI max I/O length */ unsigned int cclk_ps; /* Core clock period in psec */ @@ -365,6 +367,7 @@ struct cxgb4_uld_info { struct t4_lro_mgr *lro_mgr, struct napi_struct *napi); void (*lro_flush)(struct t4_lro_mgr *); + int (*tx_handler)(struct sk_buff *skb, struct net_device *dev); }; int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 6a934c7..7b4f61b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,7 @@ #include "t4_msg.h" #include "t4fw_api.h" #include "cxgb4_ptp.h" +#include "cxgb4_uld.h" /* * Rx buffer size. We use largish buffers if possible but settle for single @@ -110,14 +112,6 @@ #define NOMEM_TMR_IDX (SGE_NTIMERS - 1) /* - * Suspend an Ethernet Tx queue with fewer available descriptors than this. - * This is the same as calc_tx_descs() for a TSO packet with - * nr_frags == MAX_SKB_FRAGS. - */ -#define ETHTXQ_STOP_THRES \ - (1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8)) - -/* * Suspension threshold for non-Ethernet Tx queues. We require enough room * for a full sized WR. */ @@ -134,11 +128,6 @@ */ #define MAX_CTRL_WR_LEN SGE_MAX_WR_LEN -struct tx_sw_desc { /* SW state per Tx descriptor */ - struct sk_buff *skb; - struct ulptx_sgl *sgl; -}; - struct rx_sw_desc { /* SW state per Rx descriptor */ struct page *page; dma_addr_t dma_addr; @@ -248,8 +237,8 @@ static inline bool fl_starving(const struct adapter *adapter, return fl->avail - fl->pend_cred <= s->fl_starve_thres; } -static int map_skb(struct device *dev, const struct sk_buff *skb, - dma_addr_t *addr) +int cxgb4_map_skb(struct device *dev, const struct sk_buff *skb, + dma_addr_t *addr) { const skb_frag_t *fp, *end; const struct skb_shared_info *si; @@ -277,6 +266,7 @@ static int map_skb(struct device *dev, const struct sk_buff *skb, out_err: return -ENOMEM; } +EXPORT_SYMBOL(cxgb4_map_skb); #ifdef CONFIG_NEED_DMA_MAP_STATE static void unmap_skb(struct device *dev, const struct sk_buff *skb, @@ -411,7 +401,7 @@ static inline int reclaimable(const struct sge_txq *q) } /** - * reclaim_completed_tx - reclaims completed Tx descriptors + * cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors * @adap: the adapter * @q: the Tx queue to reclaim completed descriptors from * @unmap: whether the buffers should be unmapped for DMA @@ -420,7 +410,7 @@ static inline int reclaimable(const struct sge_txq *q) * and frees the associated buffers if possible. Called with the Tx * queue locked. */ -static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q, +inline void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q, bool unmap) { int avail = reclaimable(q); @@ -437,6 +427,7 @@ static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q, q->in_use -= avail; } } +EXPORT_SYMBOL(cxgb4_reclaim_completed_tx); static inline int get_buf_size(struct adapter *adapter, const struct rx_sw_desc *d) @@ -833,7 +824,7 @@ static inline unsigned int calc_tx_descs(const struct sk_buff *skb) } /** - * write_sgl - populate a scatter/gather list for a packet + * cxgb4_write_sgl - populate a scatter/gather list for a packet * @skb: the packet * @q: the Tx queue we are writing into * @sgl: starting location for writing the SGL @@ -849,9 +840,9 @@ static inline unsigned int calc_tx_descs(const struct sk_buff *skb) * right after the end of the SGL but does not account for any potential * wrap around, i.e., @end > @sgl. */ -static void write_sgl(const struct sk_buff *skb, struct sge_txq *q, - struct ulptx_sgl *sgl, u64 *end, unsigned int start, - const dma_addr_t *addr) +void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q, + struct ulptx_sgl *sgl, u64 *end, unsigned int start, + const dma_addr_t *addr) { unsigned int i, len; struct ulptx_sge_pair *to; @@ -903,6 +894,7 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q, if ((uintptr_t)end & 8) /* 0-pad to multiple of 16 */ *end = 0; } +EXPORT_SYMBOL(cxgb4_write_sgl); /* This function copies 64 byte coalesced work request to * memory mapped BAR2 space. For coalesced WR SGE fetches @@ -921,14 +913,14 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) } /** - * ring_tx_db - check and potentially ring a Tx queue's doorbell + * cxgb4_ring_tx_db - check and potentially ring a Tx queue's doorbell * @adap: the adapter * @q: the Tx queue * @n: number of new descriptors to give to HW * * Ring the doorbel for a Tx queue. */ -static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) +inline void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) { /* Make sure that all writes to the TX Descriptors are committed * before we tell the hardware about them. @@ -995,9 +987,10 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) wmb(); } } +EXPORT_SYMBOL(cxgb4_ring_tx_db); /** - * inline_tx_skb - inline a packet's data into Tx descriptors + * cxgb4_inline_tx_skb - inline a packet's data into Tx descriptors * @skb: the packet * @q: the Tx queue where the packet will be inlined * @pos: starting position in the Tx queue where to inline the packet @@ -1007,8 +1000,8 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) * Most of the complexity of this operation is dealing with wrap arounds * in the middle of the packet we want to inline. */ -static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q, - void *pos) +void cxgb4_inline_tx_skb(const struct sk_buff *skb, + const struct sge_txq *q, void *pos) { u64 *p; int left = (void *)q->stat - pos; @@ -1030,6 +1023,7 @@ static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q, if ((uintptr_t)p & 8) *p = 0; } +EXPORT_SYMBOL(cxgb4_inline_tx_skb); static void *inline_tx_skb_header(const struct sk_buff *skb, const struct sge_txq *q, void *pos, @@ -1199,6 +1193,12 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) pi = netdev_priv(dev); adap = pi->adapter; + ssi = skb_shinfo(skb); +#ifdef CONFIG_CHELSIO_IPSEC_INLINE + if (xfrm_offload(skb) && !ssi->gso_size) + return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev); +#endif /* CHELSIO_IPSEC_INLINE */ + qidx = skb_get_queue_mapping(skb); if (ptp_enabled) { spin_lock(&adap->ptp_lock); @@ -1215,7 +1215,7 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) } skb_tx_timestamp(skb); - reclaim_completed_tx(adap, &q->q, true); + cxgb4_reclaim_completed_tx(adap, &q->q, true); cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F; #ifdef CONFIG_CHELSIO_T4_FCOE @@ -1245,7 +1245,7 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) immediate = true; if (!immediate && - unlikely(map_skb(adap->pdev_dev, skb, addr) < 0)) { + unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) { q->mapping_err++; if (ptp_enabled) spin_unlock(&adap->ptp_lock); @@ -1264,7 +1264,6 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) end = (u64 *)wr + flits; len = immediate ? skb->len : 0; - ssi = skb_shinfo(skb); if (ssi->gso_size) { struct cpl_tx_pkt_lso *lso = (void *)wr; bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0; @@ -1341,13 +1340,13 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) cpl->ctrl1 = cpu_to_be64(cntrl); if (immediate) { - inline_tx_skb(skb, &q->q, cpl + 1); + cxgb4_inline_tx_skb(skb, &q->q, cpl + 1); dev_consume_skb_any(skb); } else { int last_desc; - write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1), end, 0, - addr); + cxgb4_write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1), + end, 0, addr); skb_orphan(skb); last_desc = q->q.pidx + ndesc - 1; @@ -1359,7 +1358,7 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) txq_advance(&q->q, ndesc); - ring_tx_db(adap, &q->q, ndesc); + cxgb4_ring_tx_db(adap, &q->q, ndesc); if (ptp_enabled) spin_unlock(&adap->ptp_lock); return NETDEV_TX_OK; @@ -1369,9 +1368,9 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs * @q: the SGE control Tx queue * - * This is a variant of reclaim_completed_tx() that is used for Tx queues - * that send only immediate data (presently just the control queues) and - * thus do not have any sk_buffs to release. + * This is a variant of cxgb4_reclaim_completed_tx() that is used + * for Tx queues that send only immediate data (presently just + * the control queues) and thus do not have any sk_buffs to release. */ static inline void reclaim_completed_tx_imm(struct sge_txq *q) { @@ -1446,13 +1445,13 @@ static int ctrl_xmit(struct sge_ctrl_txq *q, struct sk_buff *skb) } wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx]; - inline_tx_skb(skb, &q->q, wr); + cxgb4_inline_tx_skb(skb, &q->q, wr); txq_advance(&q->q, ndesc); if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) ctrlq_check_stop(q, wr); - ring_tx_db(q->adap, &q->q, ndesc); + cxgb4_ring_tx_db(q->adap, &q->q, ndesc); spin_unlock(&q->sendq.lock); kfree_skb(skb); @@ -1487,7 +1486,7 @@ static void restart_ctrlq(unsigned long data) txq_advance(&q->q, ndesc); spin_unlock(&q->sendq.lock); - inline_tx_skb(skb, &q->q, wr); + cxgb4_inline_tx_skb(skb, &q->q, wr); kfree_skb(skb); if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) { @@ -1500,14 +1499,15 @@ static void restart_ctrlq(unsigned long data) } } if (written > 16) { - ring_tx_db(q->adap, &q->q, written); + cxgb4_ring_tx_db(q->adap, &q->q, written); written = 0; } spin_lock(&q->sendq.lock); } q->full = 0; -ringdb: if (written) - ring_tx_db(q->adap, &q->q, written); +ringdb: + if (written) + cxgb4_ring_tx_db(q->adap, &q->q, written); spin_unlock(&q->sendq.lock); } @@ -1650,7 +1650,7 @@ static void service_ofldq(struct sge_uld_txq *q) */ spin_unlock(&q->sendq.lock); - reclaim_completed_tx(q->adap, &q->q, false); + cxgb4_reclaim_completed_tx(q->adap, &q->q, false); flits = skb->priority; /* previously saved */ ndesc = flits_to_desc(flits); @@ -1661,9 +1661,9 @@ static void service_ofldq(struct sge_uld_txq *q) pos = (u64 *)&q->q.desc[q->q.pidx]; if (is_ofld_imm(skb)) - inline_tx_skb(skb, &q->q, pos); - else if (map_skb(q->adap->pdev_dev, skb, - (dma_addr_t *)skb->head)) { + cxgb4_inline_tx_skb(skb, &q->q, pos); + else if (cxgb4_map_skb(q->adap->pdev_dev, skb, + (dma_addr_t *)skb->head)) { txq_stop_maperr(q); spin_lock(&q->sendq.lock); break; @@ -1694,9 +1694,9 @@ static void service_ofldq(struct sge_uld_txq *q) pos = (void *)txq->desc; } - write_sgl(skb, &q->q, (void *)pos, - end, hdr_len, - (dma_addr_t *)skb->head); + cxgb4_write_sgl(skb, &q->q, (void *)pos, + end, hdr_len, + (dma_addr_t *)skb->head); #ifdef CONFIG_NEED_DMA_MAP_STATE skb->dev = q->adap->port[0]; skb->destructor = deferred_unmap_destructor; @@ -1710,7 +1710,7 @@ static void service_ofldq(struct sge_uld_txq *q) txq_advance(&q->q, ndesc); written += ndesc; if (unlikely(written > 32)) { - ring_tx_db(q->adap, &q->q, written); + cxgb4_ring_tx_db(q->adap, &q->q, written); written = 0; } @@ -1725,7 +1725,7 @@ static void service_ofldq(struct sge_uld_txq *q) kfree_skb(skb); } if (likely(written)) - ring_tx_db(q->adap, &q->q, written); + cxgb4_ring_tx_db(q->adap, &q->q, written); /*Indicate that no thread is processing the Pending Send Queue * currently. -- 1.8.3.1