2020-04-01 04:00:09

by Long Li

[permalink] [raw]
Subject: [PATCH 1/2] cifs: smbd: Properly process errors on ib_post_send

From: Long Li <[email protected]>

When processing errors from ib_post_send(), the transport state needs to be
rolled back to the condition before the error.

Refactor the old code to make it easy to roll back on IB errors, and fix this.

Signed-off-by: Long Li <[email protected]>
---
fs/cifs/smbdirect.c | 196 +++++++++++++++++---------------------------
1 file changed, 77 insertions(+), 119 deletions(-)

diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index bdae6d41748c..b961260a6336 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -800,27 +800,55 @@ static int manage_keep_alive_before_sending(struct smbd_connection *info)
return 0;
}

-/*
- * Build and prepare the SMBD packet header
- * This function waits for avaialbe send credits and build a SMBD packet
- * header. The caller then optional append payload to the packet after
- * the header
- * intput values
- * size: the size of the payload
- * remaining_data_length: remaining data to send if this is part of a
- * fragmented packet
- * output values
- * request_out: the request allocated from this function
- * return values: 0 on success, otherwise actual error code returned
- */
-static int smbd_create_header(struct smbd_connection *info,
- int size, int remaining_data_length,
- struct smbd_request **request_out)
+/* Post the send request */
+static int smbd_post_send(struct smbd_connection *info,
+ struct smbd_request *request)
+{
+ struct ib_send_wr send_wr;
+ int rc, i;
+
+ for (i = 0; i < request->num_sge; i++) {
+ log_rdma_send(INFO,
+ "rdma_request sge[%d] addr=%llu length=%u\n",
+ i, request->sge[i].addr, request->sge[i].length);
+ ib_dma_sync_single_for_device(
+ info->id->device,
+ request->sge[i].addr,
+ request->sge[i].length,
+ DMA_TO_DEVICE);
+ }
+
+ request->cqe.done = send_done;
+
+ send_wr.next = NULL;
+ send_wr.wr_cqe = &request->cqe;
+ send_wr.sg_list = request->sge;
+ send_wr.num_sge = request->num_sge;
+ send_wr.opcode = IB_WR_SEND;
+ send_wr.send_flags = IB_SEND_SIGNALED;
+
+ rc = ib_post_send(info->id->qp, &send_wr, NULL);
+ if (rc) {
+ log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
+ smbd_disconnect_rdma_connection(info);
+ rc = -EAGAIN;
+ } else
+ /* Reset timer for idle connection after packet is sent */
+ mod_delayed_work(info->workqueue, &info->idle_timer_work,
+ info->keep_alive_interval*HZ);
+
+ return rc;
+}
+
+static int smbd_post_send_sgl(struct smbd_connection *info,
+ struct scatterlist *sgl, int data_length, int remaining_data_length)
{
+ int num_sgs;
+ int i, rc;
+ int header_length;
struct smbd_request *request;
struct smbd_data_transfer *packet;
- int header_length;
- int rc;
+ struct scatterlist *sg;

/* Wait for send credits. A SMBD packet needs one credit */
rc = wait_event_interruptible(info->wait_send_queue,
@@ -835,6 +863,15 @@ static int smbd_create_header(struct smbd_connection *info,
}
atomic_dec(&info->send_credits);

+wait_sq:
+ wait_event(info->wait_post_send,
+ atomic_read(&info->send_pending) < info->send_credit_target);
+ if (unlikely(atomic_inc_return(&info->send_pending) >
+ info->send_credit_target)) {
+ atomic_dec(&info->send_pending);
+ goto wait_sq;
+ }
+
request = mempool_alloc(info->request_mempool, GFP_KERNEL);
if (!request) {
rc = -ENOMEM;
@@ -856,11 +893,11 @@ static int smbd_create_header(struct smbd_connection *info,
packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED);

packet->reserved = 0;
- if (!size)
+ if (!data_length)
packet->data_offset = 0;
else
packet->data_offset = cpu_to_le32(24);
- packet->data_length = cpu_to_le32(size);
+ packet->data_length = cpu_to_le32(data_length);
packet->remaining_data_length = cpu_to_le32(remaining_data_length);
packet->padding = 0;

@@ -875,7 +912,7 @@ static int smbd_create_header(struct smbd_connection *info,
/* Map the packet to DMA */
header_length = sizeof(struct smbd_data_transfer);
/* If this is a packet without payload, don't send padding */
- if (!size)
+ if (!data_length)
header_length = offsetof(struct smbd_data_transfer, padding);

request->num_sge = 1;
@@ -884,107 +921,15 @@ static int smbd_create_header(struct smbd_connection *info,
header_length,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
- mempool_free(request, info->request_mempool);
rc = -EIO;
+ request->sge[0].addr = 0;
goto err_dma;
}

request->sge[0].length = header_length;
request->sge[0].lkey = info->pd->local_dma_lkey;

- *request_out = request;
- return 0;
-
-err_dma:
- /* roll back receive credits */
- spin_lock(&info->lock_new_credits_offered);
- info->new_credits_offered += packet->credits_granted;
- spin_unlock(&info->lock_new_credits_offered);
- atomic_sub(packet->credits_granted, &info->receive_credits);
-
-err_alloc:
- /* roll back send credits */
- atomic_inc(&info->send_credits);
-
- return rc;
-}
-
-static void smbd_destroy_header(struct smbd_connection *info,
- struct smbd_request *request)
-{
-
- ib_dma_unmap_single(info->id->device,
- request->sge[0].addr,
- request->sge[0].length,
- DMA_TO_DEVICE);
- mempool_free(request, info->request_mempool);
- atomic_inc(&info->send_credits);
-}
-
-/* Post the send request */
-static int smbd_post_send(struct smbd_connection *info,
- struct smbd_request *request)
-{
- struct ib_send_wr send_wr;
- int rc, i;
-
- for (i = 0; i < request->num_sge; i++) {
- log_rdma_send(INFO,
- "rdma_request sge[%d] addr=%llu length=%u\n",
- i, request->sge[i].addr, request->sge[i].length);
- ib_dma_sync_single_for_device(
- info->id->device,
- request->sge[i].addr,
- request->sge[i].length,
- DMA_TO_DEVICE);
- }
-
- request->cqe.done = send_done;
-
- send_wr.next = NULL;
- send_wr.wr_cqe = &request->cqe;
- send_wr.sg_list = request->sge;
- send_wr.num_sge = request->num_sge;
- send_wr.opcode = IB_WR_SEND;
- send_wr.send_flags = IB_SEND_SIGNALED;
-
-wait_sq:
- wait_event(info->wait_post_send,
- atomic_read(&info->send_pending) < info->send_credit_target);
- if (unlikely(atomic_inc_return(&info->send_pending) >
- info->send_credit_target)) {
- atomic_dec(&info->send_pending);
- goto wait_sq;
- }
-
- rc = ib_post_send(info->id->qp, &send_wr, NULL);
- if (rc) {
- log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
- if (atomic_dec_and_test(&info->send_pending))
- wake_up(&info->wait_send_pending);
- smbd_disconnect_rdma_connection(info);
- rc = -EAGAIN;
- } else
- /* Reset timer for idle connection after packet is sent */
- mod_delayed_work(info->workqueue, &info->idle_timer_work,
- info->keep_alive_interval*HZ);
-
- return rc;
-}
-
-static int smbd_post_send_sgl(struct smbd_connection *info,
- struct scatterlist *sgl, int data_length, int remaining_data_length)
-{
- int num_sgs;
- int i, rc;
- struct smbd_request *request;
- struct scatterlist *sg;
-
- rc = smbd_create_header(
- info, data_length, remaining_data_length, &request);
- if (rc)
- return rc;
-
+ /* Fill in the packet data payload */
num_sgs = sgl ? sg_nents(sgl) : 0;
for_each_sg(sgl, sg, num_sgs, i) {
request->sge[i+1].addr =
@@ -994,7 +939,7 @@ static int smbd_post_send_sgl(struct smbd_connection *info,
info->id->device, request->sge[i+1].addr)) {
rc = -EIO;
request->sge[i+1].addr = 0;
- goto dma_mapping_failure;
+ goto err_dma;
}
request->sge[i+1].length = sg->length;
request->sge[i+1].lkey = info->pd->local_dma_lkey;
@@ -1005,14 +950,27 @@ static int smbd_post_send_sgl(struct smbd_connection *info,
if (!rc)
return 0;

-dma_mapping_failure:
- for (i = 1; i < request->num_sge; i++)
+err_dma:
+ for (i = 0; i < request->num_sge; i++)
if (request->sge[i].addr)
ib_dma_unmap_single(info->id->device,
request->sge[i].addr,
request->sge[i].length,
DMA_TO_DEVICE);
- smbd_destroy_header(info, request);
+ mempool_free(request, info->request_mempool);
+
+ /* roll back receive credits and credits to be offered */
+ spin_lock(&info->lock_new_credits_offered);
+ info->new_credits_offered += packet->credits_granted;
+ spin_unlock(&info->lock_new_credits_offered);
+ atomic_sub(packet->credits_granted, &info->receive_credits);
+
+err_alloc:
+ /* roll back send credits and pending */
+ atomic_inc(&info->send_credits);
+ if (atomic_dec_and_test(&info->send_pending))
+ wake_up(&info->wait_send_pending);
+
return rc;
}

--
2.17.1


2020-04-01 04:01:33

by Long Li

[permalink] [raw]
Subject: [PATCH 2/2] cifs: smbd: Do not schedule work to send immediate packet on every receive

From: Long Li <[email protected]>

Immediate packets should only be sent to peer when there are new
receive credits made available. New credits show up on freeing
receive buffer, not on receiving data.

Fix this by avoid unnenecessary work schedules.

Signed-off-by: Long Li <[email protected]>
---
fs/cifs/smbdirect.c | 61 ++++++++-------------------------------------
fs/cifs/smbdirect.h | 1 -
2 files changed, 10 insertions(+), 52 deletions(-)

diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index b961260a6336..6d94bc6525b7 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -380,27 +380,6 @@ static bool process_negotiation_response(
return true;
}

-/*
- * Check and schedule to send an immediate packet
- * This is used to extend credtis to remote peer to keep the transport busy
- */
-static void check_and_send_immediate(struct smbd_connection *info)
-{
- if (info->transport_status != SMBD_CONNECTED)
- return;
-
- info->send_immediate = true;
-
- /*
- * Promptly send a packet if our peer is running low on receive
- * credits
- */
- if (atomic_read(&info->receive_credits) <
- info->receive_credit_target - 1)
- queue_delayed_work(
- info->workqueue, &info->send_immediate_work, 0);
-}
-
static void smbd_post_send_credits(struct work_struct *work)
{
int ret = 0;
@@ -450,8 +429,16 @@ static void smbd_post_send_credits(struct work_struct *work)
info->new_credits_offered += ret;
spin_unlock(&info->lock_new_credits_offered);

- /* Check if we can post new receive and grant credits to peer */
- check_and_send_immediate(info);
+ /* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */
+ info->send_immediate = true;
+ if (atomic_read(&info->receive_credits) <
+ info->receive_credit_target - 1) {
+ if (info->keep_alive_requested == KEEP_ALIVE_PENDING ||
+ info->send_immediate) {
+ log_keep_alive(INFO, "send an empty message\n");
+ smbd_post_send_empty(info);
+ }
+ }
}

/* Called from softirq, when recv is done */
@@ -546,12 +533,6 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
info->keep_alive_requested = KEEP_ALIVE_PENDING;
}

- /*
- * Check if we need to send something to remote peer to
- * grant more credits or respond to KEEP_ALIVE packet
- */
- check_and_send_immediate(info);
-
return;

default:
@@ -1292,25 +1273,6 @@ static void destroy_receive_buffers(struct smbd_connection *info)
mempool_free(response, info->response_mempool);
}

-/*
- * Check and send an immediate or keep alive packet
- * The condition to send those packets are defined in [MS-SMBD] 3.1.1.1
- * Connection.KeepaliveRequested and Connection.SendImmediate
- * The idea is to extend credits to server as soon as it becomes available
- */
-static void send_immediate_work(struct work_struct *work)
-{
- struct smbd_connection *info = container_of(
- work, struct smbd_connection,
- send_immediate_work.work);
-
- if (info->keep_alive_requested == KEEP_ALIVE_PENDING ||
- info->send_immediate) {
- log_keep_alive(INFO, "send an empty message\n");
- smbd_post_send_empty(info);
- }
-}
-
/* Implement idle connection timer [MS-SMBD] 3.1.6.2 */
static void idle_connection_timer(struct work_struct *work)
{
@@ -1365,8 +1327,6 @@ void smbd_destroy(struct TCP_Server_Info *server)

log_rdma_event(INFO, "cancelling idle timer\n");
cancel_delayed_work_sync(&info->idle_timer_work);
- log_rdma_event(INFO, "cancelling send immediate work\n");
- cancel_delayed_work_sync(&info->send_immediate_work);

log_rdma_event(INFO, "wait for all send posted to IB to finish\n");
wait_event(info->wait_send_pending,
@@ -1700,7 +1660,6 @@ static struct smbd_connection *_smbd_get_connection(

init_waitqueue_head(&info->wait_send_queue);
INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer);
- INIT_DELAYED_WORK(&info->send_immediate_work, send_immediate_work);
queue_delayed_work(info->workqueue, &info->idle_timer_work,
info->keep_alive_interval*HZ);

diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h
index 07c8f5638c39..a87fca82a796 100644
--- a/fs/cifs/smbdirect.h
+++ b/fs/cifs/smbdirect.h
@@ -153,7 +153,6 @@ struct smbd_connection {

struct workqueue_struct *workqueue;
struct delayed_work idle_timer_work;
- struct delayed_work send_immediate_work;

/* Memory pool for preallocating buffers */
/* request pool for RDMA send */
--
2.17.1