Hi Anna-
This is the second series of NFS/RDMA client patches for v4.16.
This is a bunch of minor fixes and clean-ups in preparation for
adding static trace points to the RPC-over-RDMA transport
implementation. Please consider these for v4.16.
---
Chuck Lever (16):
xprtrdma: Fix buffer leak after transport set up failure
xprtrdma: Fix backchannel allocation of extra rpcrdma_reps
xprtrdma: Eliminate unnecessary lock cycle in xprt_rdma_send_request
xprtrdma: Per-mode handling for Remote Invalidation
xprtrdma: Remove ri_reminv_expected
xprtrdma: Remove unused padding variables
xprtrdma: Initialize the xprt address string array earlier
xprtrdma: Remove another sockaddr_storage field (cdata::addr)
xprtrdma: Support IPv6 in xprt_rdma_set_port
xprtrdma: Move unmap-safe logic to rpcrdma_marshal_req
xprtrdma: buf_free not called for CB replies
xprtrdma: Split xprt_rdma_send_request
xprtrdma: Don't clear RPC_BC_PA_IN_USE on pre-allocated rpc_rqst's
xprtrdma: Replace all usage of "frmr" with "frwr"
xprtrdma: Remove usage of "mw"
xprtrdma: Introduce rpcrdma_mw_unmap_and_put
include/linux/sunrpc/xprtrdma.h | 2
net/sunrpc/xprtrdma/backchannel.c | 68 +++++---
net/sunrpc/xprtrdma/fmr_ops.c | 155 +++++++++---------
net/sunrpc/xprtrdma/frwr_ops.c | 317 +++++++++++++++++++------------------
net/sunrpc/xprtrdma/rpc_rdma.c | 98 +++++------
net/sunrpc/xprtrdma/transport.c | 102 +++++-------
net/sunrpc/xprtrdma/verbs.c | 212 ++++++++++++++-----------
net/sunrpc/xprtrdma/xprt_rdma.h | 112 +++++++------
8 files changed, 552 insertions(+), 514 deletions(-)
--
Chuck Lever
Refactoring change: Remote Invalidation is particular to the memory
registration mode that is use. Use a callout instead of a generic
function to handle Remote Invalidation.
This gets rid of the 8-byte flags field in struct rpcrdma_mw, of
which only a single bit flag has been allocated.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/frwr_ops.c | 24 +++++++++++++++++++++---
net/sunrpc/xprtrdma/rpc_rdma.c | 24 ++++--------------------
net/sunrpc/xprtrdma/verbs.c | 1 -
net/sunrpc/xprtrdma/xprt_rdma.h | 8 ++------
4 files changed, 27 insertions(+), 30 deletions(-)
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 773e66e..e1f7303 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -450,6 +450,26 @@
return ERR_PTR(-ENOTCONN);
}
+/* Handle a remotely invalidated mw on the @mws list
+ */
+static void
+frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mws)
+{
+ struct rpcrdma_mw *mw;
+
+ list_for_each_entry(mw, mws, mw_list)
+ if (mw->mw_handle == rep->rr_inv_rkey) {
+ struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+
+ list_del(&mw->mw_list);
+ mw->frmr.fr_state = FRMR_IS_INVALID;
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ rpcrdma_put_mw(r_xprt, mw);
+ break; /* only one invalidated MR per RPC */
+ }
+}
+
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
@@ -478,9 +498,6 @@
list_for_each_entry(mw, mws, mw_list) {
mw->frmr.fr_state = FRMR_IS_INVALID;
- if (mw->mw_flags & RPCRDMA_MW_F_RI)
- continue;
-
f = &mw->frmr;
dprintk("RPC: %s: invalidating frmr %p\n",
__func__, f);
@@ -553,6 +570,7 @@
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
+ .ro_reminv = frwr_op_reminv,
.ro_unmap_sync = frwr_op_unmap_sync,
.ro_recover_mr = frwr_op_recover_mr,
.ro_open = frwr_op_open,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a3f2ab2..d7463bc 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -984,24 +984,6 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
return fixup_copy_count;
}
-/* Caller must guarantee @rep remains stable during this call.
- */
-static void
-rpcrdma_mark_remote_invalidation(struct list_head *mws,
- struct rpcrdma_rep *rep)
-{
- struct rpcrdma_mw *mw;
-
- if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
- return;
-
- list_for_each_entry(mw, mws, mw_list)
- if (mw->mw_handle == rep->rr_inv_rkey) {
- mw->mw_flags = RPCRDMA_MW_F_RI;
- break; /* only one invalidated MR per RPC */
- }
-}
-
/* By convention, backchannel calls arrive via rdma_msg type
* messages, and never populate the chunk lists. This makes
* the RPC/RDMA header small and fixed in size, so it is
@@ -1339,9 +1321,11 @@ void rpcrdma_deferred_completion(struct work_struct *work)
struct rpcrdma_rep *rep =
container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
+ struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
- rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
- rpcrdma_release_rqst(rep->rr_rxprt, req);
+ if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
+ r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered);
+ rpcrdma_release_rqst(r_xprt, req);
rpcrdma_complete_rqst(rep);
}
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 6eecd97..1cf1eb4 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1307,7 +1307,6 @@ struct rpcrdma_mw *
if (!mw)
goto out_nomws;
- mw->mw_flags = 0;
return mw;
out_nomws:
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 3b63e61..e787dda 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -272,7 +272,6 @@ struct rpcrdma_mw {
struct scatterlist *mw_sg;
int mw_nents;
enum dma_data_direction mw_dir;
- unsigned long mw_flags;
union {
struct rpcrdma_fmr fmr;
struct rpcrdma_frmr frmr;
@@ -284,11 +283,6 @@ struct rpcrdma_mw {
struct list_head mw_all;
};
-/* mw_flags */
-enum {
- RPCRDMA_MW_F_RI = 1,
-};
-
/*
* struct rpcrdma_req -- structure central to the request/reply sequence.
*
@@ -485,6 +479,8 @@ struct rpcrdma_memreg_ops {
(*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool,
struct rpcrdma_mw **);
+ void (*ro_reminv)(struct rpcrdma_rep *rep,
+ struct list_head *mws);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *);
void (*ro_recover_mr)(struct rpcrdma_mw *);
Clean up. Remove fields that should have been removed by
commit b3221d6a53c4 ("xprtrdma: Remove logic that constructs
RDMA_MSGP type calls").
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/transport.c | 9 +++------
net/sunrpc/xprtrdma/xprt_rdma.h | 1 -
2 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 7db063f..dc9000d 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -67,7 +67,6 @@
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
-static unsigned int xprt_rdma_inline_write_padding;
unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
int xprt_rdma_pad_optimize;
@@ -81,6 +80,7 @@
static unsigned int max_padding = PAGE_SIZE;
static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
static unsigned int max_memreg = RPCRDMA_LAST - 1;
+static unsigned int dummy;
static struct ctl_table_header *sunrpc_table_header;
@@ -114,7 +114,7 @@
},
{
.procname = "rdma_inline_write_padding",
- .data = &xprt_rdma_inline_write_padding,
+ .data = &dummy,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -387,8 +387,6 @@
if (cdata.inline_rsize > cdata.rsize)
cdata.inline_rsize = cdata.rsize;
- cdata.padding = xprt_rdma_inline_write_padding;
-
/*
* Create new transport instance, which includes initialized
* o ia
@@ -895,8 +893,7 @@ int xprt_rdma_init(void)
"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
xprt_rdma_slot_table_entries,
xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
- dprintk("\tPadding %d\n\tMemreg %d\n",
- xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
+ dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
if (!sunrpc_table_header)
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 80ea3db..375df3d 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -437,7 +437,6 @@ struct rpcrdma_create_data_internal {
unsigned int wsize; /* mount wsize - max write hdr+data */
unsigned int inline_rsize; /* max non-rdma read data payload */
unsigned int inline_wsize; /* max non-rdma write data payload */
- unsigned int padding; /* non-rdma write header padding */
};
/*
Save more space in struct rpcrdma_xprt by removing the redundant
"addr" field from struct rpcrdma_create_data_internal. Wherever
we have rpcrdma_xprt, we also have the rpc_xprt, which has a
sockaddr_storage field with the same content.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/transport.c | 8 ++------
net/sunrpc/xprtrdma/verbs.c | 20 +++++++++-----------
net/sunrpc/xprtrdma/xprt_rdma.h | 3 +--
3 files changed, 12 insertions(+), 19 deletions(-)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index f6d171e..8ba0aa8 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -361,9 +361,7 @@
/*
* Set up RDMA-specific connect data.
*/
-
- sap = (struct sockaddr *)&cdata.addr;
- memcpy(sap, args->dstaddr, args->addrlen);
+ sap = args->dstaddr;
/* Ensure xprt->addr holds valid server TCP (not RDMA)
* address, for any side protocols which peek at it */
@@ -397,7 +395,7 @@
new_xprt = rpcx_to_rdmax(xprt);
- rc = rpcrdma_ia_open(new_xprt, sap);
+ rc = rpcrdma_ia_open(new_xprt);
if (rc)
goto out1;
@@ -483,8 +481,6 @@
sap = (struct sockaddr_in *)&xprt->addr;
sap->sin_port = htons(port);
- sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr;
- sap->sin_port = htons(port);
dprintk("RPC: %s: %u\n", __func__, port);
}
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 0b4d6a3..d6c737d 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -294,8 +294,7 @@
}
static struct rdma_cm_id *
-rpcrdma_create_id(struct rpcrdma_xprt *xprt,
- struct rpcrdma_ia *ia, struct sockaddr *addr)
+rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
{
unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1;
struct rdma_cm_id *id;
@@ -314,7 +313,9 @@
}
ia->ri_async_rc = -ETIMEDOUT;
- rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
+ rc = rdma_resolve_addr(id, NULL,
+ (struct sockaddr *)&xprt->rx_xprt.addr,
+ RDMA_RESOLVE_TIMEOUT);
if (rc) {
dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
__func__, rc);
@@ -361,19 +362,18 @@
/**
* rpcrdma_ia_open - Open and initialize an Interface Adapter.
- * @xprt: controlling transport
- * @addr: IP address of remote peer
+ * @xprt: transport with IA to (re)initialize
*
* Returns 0 on success, negative errno if an appropriate
* Interface Adapter could not be found and opened.
*/
int
-rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
+rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
{
struct rpcrdma_ia *ia = &xprt->rx_ia;
int rc;
- ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
+ ia->ri_id = rpcrdma_create_id(xprt, ia);
if (IS_ERR(ia->ri_id)) {
rc = PTR_ERR(ia->ri_id);
goto out_err;
@@ -649,13 +649,12 @@
rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
- struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
int rc, err;
pr_info("%s: r_xprt = %p\n", __func__, r_xprt);
rc = -EHOSTUNREACH;
- if (rpcrdma_ia_open(r_xprt, sap))
+ if (rpcrdma_ia_open(r_xprt))
goto out1;
rc = -ENOMEM;
@@ -687,7 +686,6 @@
rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
struct rpcrdma_ia *ia)
{
- struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
struct rdma_cm_id *id, *old;
int err, rc;
@@ -696,7 +694,7 @@
rpcrdma_ep_disconnect(ep, ia);
rc = -EHOSTUNREACH;
- id = rpcrdma_create_id(r_xprt, ia, sap);
+ id = rpcrdma_create_id(r_xprt, ia);
if (IS_ERR(id))
goto out;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 0b28026..7c09e2a 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -430,7 +430,6 @@ struct rpcrdma_buffer {
* This data should be set with mount options
*/
struct rpcrdma_create_data_internal {
- struct sockaddr_storage addr; /* RDMA server address */
unsigned int max_requests; /* max requests (slots) in flight */
unsigned int rsize; /* mount rsize - max read hdr+data */
unsigned int wsize; /* mount wsize - max write hdr+data */
@@ -543,7 +542,7 @@ struct rpcrdma_xprt {
/*
* Interface Adapter calls - xprtrdma/verbs.c
*/
-int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
+int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *);
The rpcrdma_req is not shared yet, and its associated Send hasn't
been posted, thus RMW should be safe. There's no need for the
expense of a lock cycle here.
Fixes: 0ba6f37012db ("xprtrdma: Refactor rpcrdma_deferred_completion")
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/transport.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 7f9b628..7db063f 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -735,7 +735,7 @@
goto drop_connection;
req->rl_connect_cookie = xprt->connect_cookie;
- set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
+ __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
Clean up.
Commit b5f0afbea4f2 ("xprtrdma: Per-connection pad optimization")
should have removed this.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/verbs.c | 2 --
net/sunrpc/xprtrdma/xprt_rdma.h | 1 -
2 files changed, 3 deletions(-)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 1cf1eb4..4a9b6f8 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -192,7 +192,6 @@
unsigned int rsize, wsize;
/* Default settings for RPC-over-RDMA Version One */
- r_xprt->rx_ia.ri_reminv_expected = false;
r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
@@ -200,7 +199,6 @@
if (pmsg &&
pmsg->cp_magic == rpcrdma_cmp_magic &&
pmsg->cp_version == RPCRDMA_CMP_VERSION) {
- r_xprt->rx_ia.ri_reminv_expected = true;
r_xprt->rx_ia.ri_implicit_roundup = true;
rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e787dda..80ea3db 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -77,7 +77,6 @@ struct rpcrdma_ia {
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
unsigned int ri_max_send_sges;
- bool ri_reminv_expected;
bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype;
unsigned long ri_flags;
This leak has been around forever, and is exceptionally rare.
EINVAL causes mount to fail with "an incorrect mount option was
specified" although it's not likely that one of the mount
options is incorrect. Instead, return ENODEV in this case, as this
appears to be an issue with system or device configuration rather
than a specific mount option.
Some obsolete comments are also removed.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/transport.c | 13 ++-----------
1 file changed, 2 insertions(+), 11 deletions(-)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 6ee1ad8..7f9b628 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -414,20 +414,10 @@
if (rc)
goto out2;
- /*
- * Allocate pre-registered send and receive buffers for headers and
- * any inline data. Also specify any padding which will be provided
- * from a preregistered zero buffer.
- */
rc = rpcrdma_buffer_create(new_xprt);
if (rc)
goto out3;
- /*
- * Register a callback for connection events. This is necessary because
- * connection loss notification is async. We also catch connection loss
- * when reaping receives.
- */
INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
xprt_rdma_connect_worker);
@@ -448,8 +438,9 @@
return xprt;
out4:
+ rpcrdma_buffer_destroy(&new_xprt->rx_buf);
xprt_rdma_free_addresses(xprt);
- rc = -EINVAL;
+ rc = -ENODEV;
out3:
rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
out2:
This makes the address strings available for debugging messages in
earlier stages of transport set up.
The first benefit is to get rid of the single-use rep_remote_addr
field, saving 128+ bytes in struct rpcrdma_ep.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/transport.c | 5 ++---
net/sunrpc/xprtrdma/verbs.c | 16 +++++++---------
net/sunrpc/xprtrdma/xprt_rdma.h | 13 ++++++++++++-
3 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index dc9000d..f6d171e 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -373,6 +373,7 @@
if (rpc_get_port(sap))
xprt_set_bound(xprt);
+ xprt_rdma_format_addresses(xprt, sap);
cdata.max_requests = xprt->max_reqs;
@@ -405,7 +406,6 @@
*/
new_xprt->rx_data = cdata;
new_ep = &new_xprt->rx_ep;
- new_ep->rep_remote_addr = cdata.addr;
rc = rpcrdma_ep_create(&new_xprt->rx_ep,
&new_xprt->rx_ia, &new_xprt->rx_data);
@@ -419,7 +419,6 @@
INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
xprt_rdma_connect_worker);
- xprt_rdma_format_addresses(xprt, sap);
xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
if (xprt->max_payload == 0)
goto out4;
@@ -437,13 +436,13 @@
out4:
rpcrdma_buffer_destroy(&new_xprt->rx_buf);
- xprt_rdma_free_addresses(xprt);
rc = -ENODEV;
out3:
rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
out2:
rpcrdma_ia_close(&new_xprt->rx_ia);
out1:
+ xprt_rdma_free_addresses(xprt);
xprt_free(xprt);
return ERR_PTR(rc);
}
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 4a9b6f8..0b4d6a3 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -219,9 +219,6 @@
struct rpcrdma_xprt *xprt = id->context;
struct rpcrdma_ia *ia = &xprt->rx_ia;
struct rpcrdma_ep *ep = &xprt->rx_ep;
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
-#endif
int connstate = 0;
switch (event->event) {
@@ -244,9 +241,9 @@
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- pr_info("rpcrdma: removing device %s for %pIS:%u\n",
+ pr_info("rpcrdma: removing device %s for %s:%s\n",
ia->ri_device->name,
- sap, rpc_get_port(sap));
+ rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt));
#endif
set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
ep->rep_connected = -ENODEV;
@@ -269,8 +266,8 @@
connstate = -ENETDOWN;
goto connected;
case RDMA_CM_EVENT_REJECTED:
- dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n",
- sap, rpc_get_port(sap),
+ dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
+ rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
rdma_reject_msg(id, event->status));
connstate = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
@@ -285,8 +282,9 @@
wake_up_all(&ep->rep_connect_wait);
/*FALLTHROUGH*/
default:
- dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n",
- __func__, sap, rpc_get_port(sap),
+ dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n",
+ __func__,
+ rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
ia->ri_device->name, ia->ri_ops->ro_displayname,
ep, rdma_event_msg(event->event));
break;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 375df3d..0b28026 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -100,7 +100,6 @@ struct rpcrdma_ep {
wait_queue_head_t rep_connect_wait;
struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma;
- struct sockaddr_storage rep_remote_addr;
struct delayed_work rep_connect_worker;
};
@@ -519,6 +518,18 @@ struct rpcrdma_xprt {
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
+static inline const char *
+rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
+{
+ return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR];
+}
+
+static inline const char *
+rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt)
+{
+ return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT];
+}
+
/* Setting this to 0 ensures interoperability with early servers.
* Setting this to 1 enhances certain unaligned read/write performance.
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
The backchannel code uses rpcrdma_recv_buffer_put to add new reps
to the free rep list. This also decrements rb_recv_count, which
spoofs the receive overrun logic in rpcrdma_buffer_get_rep.
Commit 9b06688bc3b9 ("xprtrdma: Fix additional uses of
spin_lock_irqsave(rb_lock)") replaced the original open-coded
list_add with a call to rpcrdma_recv_buffer_put(), but then a year
later, commit 05c974669ece ("xprtrdma: Fix receive buffer
accounting") added rep accounting to rpcrdma_recv_buffer_put.
It was an oversight to let the backchannel continue to use this
function.
The fix this, let's combine the "add to free list" logic with
rpcrdma_create_rep.
Also, do not allocate RPCRDMA_MAX_BC_REQUESTS rpcrdma_reps in
rpcrdma_buffer_create and then allocate additional rpcrdma_reps in
rpcrdma_bc_setup_reps. Allocating the extra reps during backchannel
set-up is sufficient.
Fixes: 05c974669ece ("xprtrdma: Fix receive buffer accounting")
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/backchannel.c | 12 ++----------
net/sunrpc/xprtrdma/verbs.c | 32 +++++++++++++++++++-------------
net/sunrpc/xprtrdma/xprt_rdma.h | 2 +-
3 files changed, 22 insertions(+), 24 deletions(-)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 8b818bb..256c67b 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -74,21 +74,13 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
unsigned int count)
{
- struct rpcrdma_rep *rep;
int rc = 0;
while (count--) {
- rep = rpcrdma_create_rep(r_xprt);
- if (IS_ERR(rep)) {
- pr_err("RPC: %s: reply buffer alloc failed\n",
- __func__);
- rc = PTR_ERR(rep);
+ rc = rpcrdma_create_rep(r_xprt);
+ if (rc)
break;
- }
-
- rpcrdma_recv_buffer_put(rep);
}
-
return rc;
}
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8607c02..6eecd97 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1093,10 +1093,17 @@ struct rpcrdma_req *
return req;
}
-struct rpcrdma_rep *
+/**
+ * rpcrdma_create_rep - Allocate an rpcrdma_rep object
+ * @r_xprt: controlling transport
+ *
+ * Returns 0 on success or a negative errno on failure.
+ */
+int
rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
int rc;
@@ -1121,12 +1128,18 @@ struct rpcrdma_rep *
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
- return rep;
+
+ spin_lock(&buf->rb_lock);
+ list_add(&rep->rr_list, &buf->rb_recv_bufs);
+ spin_unlock(&buf->rb_lock);
+ return 0;
out_free:
kfree(rep);
out:
- return ERR_PTR(rc);
+ dprintk("RPC: %s: reply buffer %d alloc failed\n",
+ __func__, rc);
+ return rc;
}
int
@@ -1167,17 +1180,10 @@ struct rpcrdma_rep *
}
INIT_LIST_HEAD(&buf->rb_recv_bufs);
- for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) {
- struct rpcrdma_rep *rep;
-
- rep = rpcrdma_create_rep(r_xprt);
- if (IS_ERR(rep)) {
- dprintk("RPC: %s: reply buffer %d alloc failed\n",
- __func__, i);
- rc = PTR_ERR(rep);
+ for (i = 0; i <= buf->rb_max_requests; i++) {
+ rc = rpcrdma_create_rep(r_xprt);
+ if (rc)
goto out;
- }
- list_add(&rep->rr_list, &buf->rb_recv_bufs);
}
rc = rpcrdma_sendctxs_create(r_xprt);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 1342f743..3b63e61 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -564,8 +564,8 @@ int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
* Buffer calls - xprtrdma/verbs.c
*/
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
-struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *);
+int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
Clean up a harmless oversight. xprtrdma's ->set_port method has
never properly supported IPv6.
This issue has never been a problem because NFS/RDMA mounts have
always required "port=20049", thus so far, rpcbind is not invoked
for these mounts.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/transport.c | 28 ++++++++++++++++++++++++----
1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 8ba0aa8..cebcd02 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -474,14 +474,34 @@
rpcrdma_ep_disconnect(ep, ia);
}
+/**
+ * xprt_rdma_set_port - update server port with rpcbind result
+ * @xprt: controlling RPC transport
+ * @port: new port value
+ *
+ * Transport connect status is unchanged.
+ */
static void
xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
{
- struct sockaddr_in *sap;
+ struct sockaddr *sap = (struct sockaddr *)&xprt->addr;
+ char buf[8];
+
+ dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n",
+ __func__, xprt,
+ xprt->address_strings[RPC_DISPLAY_ADDR],
+ xprt->address_strings[RPC_DISPLAY_PORT],
+ port);
+
+ rpc_set_port(sap, port);
- sap = (struct sockaddr_in *)&xprt->addr;
- sap->sin_port = htons(port);
- dprintk("RPC: %s: %u\n", __func__, port);
+ kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
+ snprintf(buf, sizeof(buf), "%u", port);
+ xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
+
+ kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
+ snprintf(buf, sizeof(buf), "%4hx", port);
+ xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
}
/**
Since commit 5a6d1db45569 ("SUNRPC: Add a transport-specific private
field in rpc_rqst"), the rpc_rqst's for RPC-over-RDMA backchannel
operations leave rq_buffer set to NULL.
xprt_release does not invoke ->op->buf_free when rq_buffer is NULL.
The RPCRDMA_REQ_F_BACKCHANNEL check in xprt_rdma_free is therefore
redundant because xprt_rdma_free is not invoked for backchannel
requests.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/backchannel.c | 1 -
net/sunrpc/xprtrdma/transport.c | 3 ---
net/sunrpc/xprtrdma/xprt_rdma.h | 3 +--
3 files changed, 1 insertion(+), 6 deletions(-)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 256c67b..11fb38f 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req))
return PTR_ERR(req);
- __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
DMA_TO_DEVICE, GFP_KERNEL);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index d77dee5..d0cd6d4 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -686,9 +686,6 @@
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
- return;
-
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 7c09e2a..ed7e513 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -354,8 +354,7 @@ struct rpcrdma_req {
/* rl_flags */
enum {
- RPCRDMA_REQ_F_BACKCHANNEL = 0,
- RPCRDMA_REQ_F_PENDING,
+ RPCRDMA_REQ_F_PENDING = 0,
RPCRDMA_REQ_F_TX_RESOURCES,
};
Clean up. @rqst is set up differently for backchannel Replies. For
example, rqst->rq_task and task->tk_client are both NULL. So it is
easier to understand and maintain this code path if it is separated.
Also, we can get rid of the confusing rl_connect_cookie hack in
rpcrdma_bc_receive_call.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/backchannel.c | 48 +++++++++++++++++++++++++++++--------
net/sunrpc/xprtrdma/rpc_rdma.c | 5 ----
net/sunrpc/xprtrdma/transport.c | 27 ++++++++-------------
net/sunrpc/xprtrdma/xprt_rdma.h | 2 +-
4 files changed, 50 insertions(+), 32 deletions(-)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 11fb38f..6c66a4f 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -187,13 +187,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
return maxmsg - RPCRDMA_HDRLEN_MIN;
}
-/**
- * rpcrdma_bc_marshal_reply - Send backwards direction reply
- * @rqst: buffer containing RPC reply data
- *
- * Returns zero on success.
- */
-int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
+static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
@@ -221,6 +215,43 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
}
/**
+ * xprt_rdma_bc_send_reply - marshal and send a backchannel reply
+ * @rqst: RPC rqst with a backchannel RPC reply in rq_snd_buf
+ *
+ * Caller holds the transport's write lock.
+ *
+ * Returns:
+ * %0 if the RPC message has been sent
+ * %-ENOTCONN if the caller should reconnect and call again
+ * %-EIO if a permanent error occurred and the request was not
+ * sent. Do not try to send this message again.
+ */
+int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ int rc;
+
+ if (!xprt_connected(rqst->rq_xprt))
+ goto drop_connection;
+
+ rc = rpcrdma_bc_marshal_reply(rqst);
+ if (rc < 0)
+ goto failed_marshal;
+
+ if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
+ goto drop_connection;
+ return 0;
+
+failed_marshal:
+ if (rc != -ENOTCONN)
+ return rc;
+drop_connection:
+ xprt_disconnect_done(rqst->rq_xprt);
+ return -ENOTCONN;
+}
+
+/**
* xprt_rdma_bc_destroy - Release resources for handling backchannel requests
* @xprt: transport associated with these backchannel resources
* @reqs: number of incoming requests to destroy; ignored
@@ -330,9 +361,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
__func__, rep, req);
req->rl_reply = rep;
- /* Defeat the retransmit detection logic in send_request */
- req->rl_connect_cookie = 0;
-
/* Queue rqst for ULP's callback service */
bc_serv = xprt->bc_serv;
spin_lock(&bc_serv->sv_cb_lock);
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index dd7c0aa..9207aea 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -754,11 +754,6 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
__be32 *p;
int ret;
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
- if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
- return rpcrdma_bc_marshal_reply(rqst);
-#endif
-
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf,
req->rl_rdmabuf->rg_base);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index d0cd6d4..be8c4e6 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -699,22 +699,12 @@
*
* Caller holds the transport's write lock.
*
- * Return values:
- * 0: The request has been sent
- * ENOTCONN: Caller needs to invoke connect logic then call again
- * ENOBUFS: Call again later to send the request
- * EIO: A permanent error occurred. The request was not sent,
- * and don't try it again
- *
- * send_request invokes the meat of RPC RDMA. It must do the following:
- *
- * 1. Marshal the RPC request into an RPC RDMA request, which means
- * putting a header in front of data, and creating IOVs for RDMA
- * from those in the request.
- * 2. In marshaling, detect opportunities for RDMA, and use them.
- * 3. Post a recv message to set up asynch completion, then send
- * the request (rpcrdma_ep_post).
- * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
+ * Returns:
+ * %0 if the RPC message has been sent
+ * %-ENOTCONN if the caller should reconnect and call again
+ * %-ENOBUFS if the caller should call again later
+ * %-EIO if a permanent error occurred and the request was not
+ * sent. Do not try to send this message again.
*/
static int
xprt_rdma_send_request(struct rpc_task *task)
@@ -725,6 +715,11 @@
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int rc = 0;
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ if (unlikely(!rqst->rq_buffer))
+ return xprt_rdma_bc_send_reply(rqst);
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
if (!xprt_connected(xprt))
goto drop_connection;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index ed7e513..e084130 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -666,7 +666,7 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
-int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
+int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
Clean up. This logic is related to marshaling the request, and I'd
like to keep everything that touches req->rl_registered close
together, for CPU cache efficiency.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/rpc_rdma.c | 11 +++++++++++
net/sunrpc/xprtrdma/transport.c | 5 -----
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index d7463bc..dd7c0aa 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -821,6 +821,17 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
rtype = rpcrdma_areadch;
}
+ /* If this is a retransmit, discard previously registered
+ * chunks. Very likely the connection has been replaced,
+ * so these registrations are invalid and unusable.
+ */
+ while (unlikely(!list_empty(&req->rl_registered))) {
+ struct rpcrdma_mw *mw;
+
+ mw = rpcrdma_pop_mw(&req->rl_registered);
+ rpcrdma_defer_mr_recovery(mw);
+ }
+
/* This implementation supports the following combinations
* of chunk lists in one RPC-over-RDMA Call message:
*
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index cebcd02..d77dee5 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -731,11 +731,6 @@
if (!xprt_connected(xprt))
goto drop_connection;
- /* On retransmit, remove any previously registered chunks */
- if (unlikely(!list_empty(&req->rl_registered)))
- r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
- &req->rl_registered);
-
rc = rpcrdma_marshal_req(r_xprt, rqst);
if (rc < 0)
goto failed_marshal;
No need for the overhead of atomically setting and clearing this bit
flag for every use of a pre-allocated backchannel rpc_rqst. These
are a distinct pool of rpc_rqsts that are used only for callback
operations, so it is safe to simply leave the bit set.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/backchannel.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 6c66a4f..3c7998a 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -120,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
rqst->rq_xprt = &r_xprt->rx_xprt;
INIT_LIST_HEAD(&rqst->rq_list);
INIT_LIST_HEAD(&rqst->rq_bc_list);
+ __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
goto out_free;
@@ -284,11 +285,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
dprintk("RPC: %s: freeing rqst %p (req %p)\n",
__func__, rqst, rpcr_to_rdmar(rqst));
- smp_mb__before_atomic();
- WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
- clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
- smp_mb__after_atomic();
-
spin_lock_bh(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock_bh(&xprt->bc_pa_lock);
@@ -343,7 +339,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
rqst->rq_xid = *p;
rqst->rq_private_buf.len = size;
- set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
buf = &rqst->rq_rcv_buf;
memset(buf, 0, sizeof(*buf));
Clean up: Over time, the industry has adopted the term "frwr"
instead of "frmr". The term "frwr" is now more widely recognized.
For the past couple of years I've attempted to add new code using
"frwr" , but there still remains plenty of older code that still
uses "frmr". Replace all usage of "frmr" to avoid confusion.
While we're churning code, rename variables unhelpfully called "f"
to "frwr", to improve code clarity.
Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/xprtrdma.h | 2
net/sunrpc/xprtrdma/frwr_ops.c | 176 ++++++++++++++++++++-------------------
net/sunrpc/xprtrdma/transport.c | 2
net/sunrpc/xprtrdma/verbs.c | 2
net/sunrpc/xprtrdma/xprt_rdma.h | 18 ++--
5 files changed, 100 insertions(+), 100 deletions(-)
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index 221b7a2..5859563 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -64,7 +64,7 @@ enum rpcrdma_memreg {
RPCRDMA_MEMWINDOWS,
RPCRDMA_MEMWINDOWS_ASYNC,
RPCRDMA_MTHCAFMR,
- RPCRDMA_FRMR,
+ RPCRDMA_FRWR,
RPCRDMA_ALLPHYSICAL,
RPCRDMA_LAST
};
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index e1f7303..185eb69 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/
/* Lightweight memory registration using Fast Registration Work
- * Requests (FRWR). Also referred to sometimes as FRMR mode.
+ * Requests (FRWR).
*
* FRWR features ordered asynchronous registration and deregistration
* of arbitrarily sized memory regions. This is the fastest and safest
@@ -15,9 +15,9 @@
/* Normal operation
*
* A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
- * Work Request (frmr_op_map). When the RDMA operation is finished, this
+ * Work Request (frwr_op_map). When the RDMA operation is finished, this
* Memory Region is invalidated using a LOCAL_INV Work Request
- * (frmr_op_unmap).
+ * (frwr_op_unmap).
*
* Typically these Work Requests are not signaled, and neither are RDMA
* SEND Work Requests (with the exception of signaling occasionally to
@@ -98,12 +98,12 @@
static int
frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
{
- unsigned int depth = ia->ri_max_frmr_depth;
- struct rpcrdma_frmr *f = &r->frmr;
+ unsigned int depth = ia->ri_max_frwr_depth;
+ struct rpcrdma_frwr *frwr = &r->frwr;
int rc;
- f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
- if (IS_ERR(f->fr_mr))
+ frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
+ if (IS_ERR(frwr->fr_mr))
goto out_mr_err;
r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL);
@@ -111,11 +111,11 @@
goto out_list_err;
sg_init_table(r->mw_sg, depth);
- init_completion(&f->fr_linv_done);
+ init_completion(&frwr->fr_linv_done);
return 0;
out_mr_err:
- rc = PTR_ERR(f->fr_mr);
+ rc = PTR_ERR(frwr->fr_mr);
dprintk("RPC: %s: ib_alloc_mr status %i\n",
__func__, rc);
return rc;
@@ -124,7 +124,7 @@
rc = -ENOMEM;
dprintk("RPC: %s: sg allocation failure\n",
__func__);
- ib_dereg_mr(f->fr_mr);
+ ib_dereg_mr(frwr->fr_mr);
return rc;
}
@@ -137,7 +137,7 @@
if (!list_empty(&r->mw_list))
list_del(&r->mw_list);
- rc = ib_dereg_mr(r->frmr.fr_mr);
+ rc = ib_dereg_mr(r->frwr.fr_mr);
if (rc)
pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
r, rc);
@@ -148,41 +148,41 @@
static int
__frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
{
- struct rpcrdma_frmr *f = &r->frmr;
+ struct rpcrdma_frwr *frwr = &r->frwr;
int rc;
- rc = ib_dereg_mr(f->fr_mr);
+ rc = ib_dereg_mr(frwr->fr_mr);
if (rc) {
pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
rc, r);
return rc;
}
- f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
- ia->ri_max_frmr_depth);
- if (IS_ERR(f->fr_mr)) {
+ frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
+ ia->ri_max_frwr_depth);
+ if (IS_ERR(frwr->fr_mr)) {
pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
- PTR_ERR(f->fr_mr), r);
- return PTR_ERR(f->fr_mr);
+ PTR_ERR(frwr->fr_mr), r);
+ return PTR_ERR(frwr->fr_mr);
}
- dprintk("RPC: %s: recovered FRMR %p\n", __func__, f);
- f->fr_state = FRMR_IS_INVALID;
+ dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
+ frwr->fr_state = FRWR_IS_INVALID;
return 0;
}
-/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR.
+/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
*/
static void
frwr_op_recover_mr(struct rpcrdma_mw *mw)
{
- enum rpcrdma_frmr_state state = mw->frmr.fr_state;
+ enum rpcrdma_frwr_state state = mw->frwr.fr_state;
struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int rc;
rc = __frwr_reset_mr(ia, mw);
- if (state != FRMR_FLUSHED_LI)
+ if (state != FRWR_FLUSHED_LI)
ib_dma_unmap_sg(ia->ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
if (rc)
@@ -193,7 +193,7 @@
return;
out_release:
- pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw);
+ pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mw);
r_xprt->rx_stats.mrs_orphaned++;
spin_lock(&r_xprt->rx_buf.rb_mwlock);
@@ -214,31 +214,31 @@
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
- ia->ri_max_frmr_depth =
+ ia->ri_max_frwr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
attrs->max_fast_reg_page_list_len);
dprintk("RPC: %s: device's max FR page list len = %u\n",
- __func__, ia->ri_max_frmr_depth);
-
- /* Add room for frmr register and invalidate WRs.
- * 1. FRMR reg WR for head
- * 2. FRMR invalidate WR for head
- * 3. N FRMR reg WRs for pagelist
- * 4. N FRMR invalidate WRs for pagelist
- * 5. FRMR reg WR for tail
- * 6. FRMR invalidate WR for tail
+ __func__, ia->ri_max_frwr_depth);
+
+ /* Add room for frwr register and invalidate WRs.
+ * 1. FRWR reg WR for head
+ * 2. FRWR invalidate WR for head
+ * 3. N FRWR reg WRs for pagelist
+ * 4. N FRWR invalidate WRs for pagelist
+ * 5. FRWR reg WR for tail
+ * 6. FRWR invalidate WR for tail
* 7. The RDMA_SEND WR
*/
depth = 7;
- /* Calculate N if the device max FRMR depth is smaller than
+ /* Calculate N if the device max FRWR depth is smaller than
* RPCRDMA_MAX_DATA_SEGS.
*/
- if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
- delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth;
+ if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
+ delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
do {
- depth += 2; /* FRMR reg + invalidate */
- delta -= ia->ri_max_frmr_depth;
+ depth += 2; /* FRWR reg + invalidate */
+ delta -= ia->ri_max_frwr_depth;
} while (delta > 0);
}
@@ -252,7 +252,7 @@
}
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
- ia->ri_max_frmr_depth);
+ ia->ri_max_frwr_depth);
return 0;
}
@@ -265,7 +265,7 @@
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth);
+ RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
}
static void
@@ -286,14 +286,14 @@
static void
frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
+ struct rpcrdma_frwr *frwr;
struct ib_cqe *cqe;
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) {
cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
- frmr->fr_state = FRMR_FLUSHED_FR;
+ frwr = container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+ frwr->fr_state = FRWR_FLUSHED_FR;
__frwr_sendcompletion_flush(wc, "fastreg");
}
}
@@ -307,14 +307,14 @@
static void
frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
+ struct rpcrdma_frwr *frwr;
struct ib_cqe *cqe;
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) {
cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
- frmr->fr_state = FRMR_FLUSHED_LI;
+ frwr = container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+ frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
}
}
@@ -329,17 +329,17 @@
static void
frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
+ struct rpcrdma_frwr *frwr;
struct ib_cqe *cqe;
/* WARNING: Only wr_cqe and status are reliable at this point */
cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+ frwr = container_of(cqe, struct rpcrdma_frwr, fr_cqe);
if (wc->status != IB_WC_SUCCESS) {
- frmr->fr_state = FRMR_FLUSHED_LI;
+ frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
}
- complete(&frmr->fr_linv_done);
+ complete(&frwr->fr_linv_done);
}
/* Post a REG_MR Work Request to register a memory region
@@ -351,8 +351,8 @@
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
+ struct rpcrdma_frwr *frwr;
struct rpcrdma_mw *mw;
- struct rpcrdma_frmr *frmr;
struct ib_mr *mr;
struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr;
@@ -366,14 +366,13 @@
mw = rpcrdma_get_mw(r_xprt);
if (!mw)
return ERR_PTR(-ENOBUFS);
- } while (mw->frmr.fr_state != FRMR_IS_INVALID);
- frmr = &mw->frmr;
- frmr->fr_state = FRMR_IS_VALID;
- mr = frmr->fr_mr;
- reg_wr = &frmr->fr_regwr;
-
- if (nsegs > ia->ri_max_frmr_depth)
- nsegs = ia->ri_max_frmr_depth;
+ } while (mw->frwr.fr_state != FRWR_IS_INVALID);
+ frwr = &mw->frwr;
+ frwr->fr_state = FRWR_IS_VALID;
+ mr = frwr->fr_mr;
+
+ if (nsegs > ia->ri_max_frwr_depth)
+ nsegs = ia->ri_max_frwr_depth;
for (i = 0; i < nsegs;) {
if (seg->mr_page)
sg_set_page(&mw->mw_sg[i],
@@ -402,16 +401,17 @@
if (unlikely(n != mw->mw_nents))
goto out_mapmr_err;
- dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n",
- __func__, frmr, mw->mw_nents, mr->length);
+ dprintk("RPC: %s: Using frwr %p to map %u segments (%llu bytes)\n",
+ __func__, frwr, mw->mw_nents, mr->length);
key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key);
+ reg_wr = &frwr->fr_regwr;
reg_wr->wr.next = NULL;
reg_wr->wr.opcode = IB_WR_REG_MR;
- frmr->fr_cqe.done = frwr_wc_fastreg;
- reg_wr->wr.wr_cqe = &frmr->fr_cqe;
+ frwr->fr_cqe.done = frwr_wc_fastreg;
+ reg_wr->wr.wr_cqe = &frwr->fr_cqe;
reg_wr->wr.num_sge = 0;
reg_wr->wr.send_flags = 0;
reg_wr->mr = mr;
@@ -434,18 +434,18 @@
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
mw->mw_sg, i);
- frmr->fr_state = FRMR_IS_INVALID;
+ frwr->fr_state = FRWR_IS_INVALID;
rpcrdma_put_mw(r_xprt, mw);
return ERR_PTR(-EIO);
out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
- frmr->fr_mr, n, mw->mw_nents);
+ frwr->fr_mr, n, mw->mw_nents);
rpcrdma_defer_mr_recovery(mw);
return ERR_PTR(-EIO);
out_senderr:
- pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc);
+ pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
rpcrdma_defer_mr_recovery(mw);
return ERR_PTR(-ENOTCONN);
}
@@ -462,7 +462,7 @@
struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
list_del(&mw->mw_list);
- mw->frmr.fr_state = FRMR_IS_INVALID;
+ mw->frwr.fr_state = FRWR_IS_INVALID;
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
@@ -483,7 +483,7 @@
{
struct ib_send_wr *first, **prev, *last, *bad_wr;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct rpcrdma_frmr *f;
+ struct rpcrdma_frwr *frwr;
struct rpcrdma_mw *mw;
int count, rc;
@@ -492,20 +492,20 @@
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
- f = NULL;
+ frwr = NULL;
count = 0;
prev = &first;
list_for_each_entry(mw, mws, mw_list) {
- mw->frmr.fr_state = FRMR_IS_INVALID;
+ mw->frwr.fr_state = FRWR_IS_INVALID;
- f = &mw->frmr;
- dprintk("RPC: %s: invalidating frmr %p\n",
- __func__, f);
+ frwr = &mw->frwr;
+ dprintk("RPC: %s: invalidating frwr %p\n",
+ __func__, frwr);
- f->fr_cqe.done = frwr_wc_localinv;
- last = &f->fr_invwr;
+ frwr->fr_cqe.done = frwr_wc_localinv;
+ last = &frwr->fr_invwr;
memset(last, 0, sizeof(*last));
- last->wr_cqe = &f->fr_cqe;
+ last->wr_cqe = &frwr->fr_cqe;
last->opcode = IB_WR_LOCAL_INV;
last->ex.invalidate_rkey = mw->mw_handle;
count++;
@@ -513,7 +513,7 @@
*prev = last;
prev = &last->next;
}
- if (!f)
+ if (!frwr)
goto unmap;
/* Strong send queue ordering guarantees that when the
@@ -521,8 +521,8 @@
* are complete.
*/
last->send_flags = IB_SEND_SIGNALED;
- f->fr_cqe.done = frwr_wc_localinv_wake;
- reinit_completion(&f->fr_linv_done);
+ frwr->fr_cqe.done = frwr_wc_localinv_wake;
+ reinit_completion(&frwr->fr_linv_done);
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
@@ -532,7 +532,7 @@
bad_wr = NULL;
rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
if (bad_wr != first)
- wait_for_completion(&f->fr_linv_done);
+ wait_for_completion(&frwr->fr_linv_done);
if (rc)
goto reset_mrs;
@@ -542,8 +542,8 @@
unmap:
while (!list_empty(mws)) {
mw = rpcrdma_pop_mw(mws);
- dprintk("RPC: %s: DMA unmapping frmr %p\n",
- __func__, &mw->frmr);
+ dprintk("RPC: %s: DMA unmapping frwr %p\n",
+ __func__, &mw->frwr);
ib_dma_unmap_sg(ia->ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
@@ -551,15 +551,15 @@
return;
reset_mrs:
- pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc);
+ pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
/* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted.
*/
while (bad_wr) {
- f = container_of(bad_wr, struct rpcrdma_frmr,
- fr_invwr);
- mw = container_of(f, struct rpcrdma_mw, frmr);
+ frwr = container_of(bad_wr, struct rpcrdma_frwr,
+ fr_invwr);
+ mw = container_of(frwr, struct rpcrdma_mw, frwr);
__frwr_reset_mr(ia, mw);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index be8c4e6..ddf0d87 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -67,7 +67,7 @@
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
-unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
+unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
int xprt_rdma_pad_optimize;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index d6c737d..8405799 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -388,7 +388,7 @@
}
switch (xprt_rdma_memreg_strategy) {
- case RPCRDMA_FRMR:
+ case RPCRDMA_FRWR:
if (frwr_is_supported(ia)) {
ia->ri_ops = &rpcrdma_frwr_memreg_ops;
break;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e084130..f52269a 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -73,7 +73,7 @@ struct rpcrdma_ia {
struct completion ri_remove_done;
int ri_async_rc;
unsigned int ri_max_segs;
- unsigned int ri_max_frmr_depth;
+ unsigned int ri_max_frwr_depth;
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
unsigned int ri_max_send_sges;
@@ -242,17 +242,17 @@ enum {
* rpcrdma_deregister_external() uses this metadata to unmap and
* release these resources when an RPC is complete.
*/
-enum rpcrdma_frmr_state {
- FRMR_IS_INVALID, /* ready to be used */
- FRMR_IS_VALID, /* in use */
- FRMR_FLUSHED_FR, /* flushed FASTREG WR */
- FRMR_FLUSHED_LI, /* flushed LOCALINV WR */
+enum rpcrdma_frwr_state {
+ FRWR_IS_INVALID, /* ready to be used */
+ FRWR_IS_VALID, /* in use */
+ FRWR_FLUSHED_FR, /* flushed FASTREG WR */
+ FRWR_FLUSHED_LI, /* flushed LOCALINV WR */
};
-struct rpcrdma_frmr {
+struct rpcrdma_frwr {
struct ib_mr *fr_mr;
struct ib_cqe fr_cqe;
- enum rpcrdma_frmr_state fr_state;
+ enum rpcrdma_frwr_state fr_state;
struct completion fr_linv_done;
union {
struct ib_reg_wr fr_regwr;
@@ -272,7 +272,7 @@ struct rpcrdma_mw {
enum dma_data_direction mw_dir;
union {
struct rpcrdma_fmr fmr;
- struct rpcrdma_frmr frmr;
+ struct rpcrdma_frwr frwr;
};
struct rpcrdma_xprt *mw_xprt;
u32 mw_handle;
Clean up: struct rpcrdma_mw was named after Memory Windows, but
xprtrdma no longer supports a Memory Window registration mode.
Rename rpcrdma_mw and its fields to reduce confusion and make
the code more sensible to read.
Renaming "mw" was suggested by Tom Talpey, the author of the
original xprtrdma implementation. It's a good idea, but I haven't
done this until now because it's a huge diffstat for no benefit
other than code readability.
However, I'm about to introduce static trace points that expose
a few of xprtrdma's internal data structures. They should make sense
in the trace report, and it's reasonable to treat trace points as a
kernel API contract which might be difficult to change later.
While I'm churning things up, two additional changes:
- rename variables unhelpfully called "r" to "mr", to improve code
clarity, and
- rename the MR-related helper functions using the form
"rpcrdma_mr_<verb>", to be consistent with other areas of the
code.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/fmr_ops.c | 148 ++++++++++++++++-----------------
net/sunrpc/xprtrdma/frwr_ops.c | 177 ++++++++++++++++++++-------------------
net/sunrpc/xprtrdma/rpc_rdma.c | 64 +++++++-------
net/sunrpc/xprtrdma/verbs.c | 119 +++++++++++++++-----------
net/sunrpc/xprtrdma/xprt_rdma.h | 62 +++++++-------
5 files changed, 292 insertions(+), 278 deletions(-)
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 29fc84c..8bd0399 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/
@@ -47,7 +47,7 @@ enum {
}
static int
-fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
+fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
static struct ib_fmr_attr fmr_attr = {
.max_pages = RPCRDMA_MAX_FMR_SGES,
@@ -55,106 +55,106 @@ enum {
.page_shift = PAGE_SHIFT
};
- mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
+ mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
sizeof(u64), GFP_KERNEL);
- if (!mw->fmr.fm_physaddrs)
+ if (!mr->fmr.fm_physaddrs)
goto out_free;
- mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
- sizeof(*mw->mw_sg), GFP_KERNEL);
- if (!mw->mw_sg)
+ mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
+ sizeof(*mr->mr_sg), GFP_KERNEL);
+ if (!mr->mr_sg)
goto out_free;
- sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES);
+ sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
- mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
+ mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
&fmr_attr);
- if (IS_ERR(mw->fmr.fm_mr))
+ if (IS_ERR(mr->fmr.fm_mr))
goto out_fmr_err;
return 0;
out_fmr_err:
dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
- PTR_ERR(mw->fmr.fm_mr));
+ PTR_ERR(mr->fmr.fm_mr));
out_free:
- kfree(mw->mw_sg);
- kfree(mw->fmr.fm_physaddrs);
+ kfree(mr->mr_sg);
+ kfree(mr->fmr.fm_physaddrs);
return -ENOMEM;
}
static int
-__fmr_unmap(struct rpcrdma_mw *mw)
+__fmr_unmap(struct rpcrdma_mr *mr)
{
LIST_HEAD(l);
int rc;
- list_add(&mw->fmr.fm_mr->list, &l);
+ list_add(&mr->fmr.fm_mr->list, &l);
rc = ib_unmap_fmr(&l);
- list_del(&mw->fmr.fm_mr->list);
+ list_del(&mr->fmr.fm_mr->list);
return rc;
}
static void
-fmr_op_release_mr(struct rpcrdma_mw *r)
+fmr_op_release_mr(struct rpcrdma_mr *mr)
{
LIST_HEAD(unmap_list);
int rc;
/* Ensure MW is not on any rl_registered list */
- if (!list_empty(&r->mw_list))
- list_del(&r->mw_list);
+ if (!list_empty(&mr->mr_list))
+ list_del(&mr->mr_list);
- kfree(r->fmr.fm_physaddrs);
- kfree(r->mw_sg);
+ kfree(mr->fmr.fm_physaddrs);
+ kfree(mr->mr_sg);
/* In case this one was left mapped, try to unmap it
* to prevent dealloc_fmr from failing with EBUSY
*/
- rc = __fmr_unmap(r);
+ rc = __fmr_unmap(mr);
if (rc)
pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
- r, rc);
+ mr, rc);
- rc = ib_dealloc_fmr(r->fmr.fm_mr);
+ rc = ib_dealloc_fmr(mr->fmr.fm_mr);
if (rc)
pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
- r, rc);
+ mr, rc);
- kfree(r);
+ kfree(mr);
}
/* Reset of a single FMR.
*/
static void
-fmr_op_recover_mr(struct rpcrdma_mw *mw)
+fmr_op_recover_mr(struct rpcrdma_mr *mr)
{
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
int rc;
/* ORDER: invalidate first */
- rc = __fmr_unmap(mw);
+ rc = __fmr_unmap(mr);
/* ORDER: then DMA unmap */
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
if (rc)
goto out_release;
- rpcrdma_put_mw(r_xprt, mw);
+ rpcrdma_mr_put(mr);
r_xprt->rx_stats.mrs_recovered++;
return;
out_release:
- pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw);
+ pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++;
- spin_lock(&r_xprt->rx_buf.rb_mwlock);
- list_del(&mw->mw_all);
- spin_unlock(&r_xprt->rx_buf.rb_mwlock);
+ spin_lock(&r_xprt->rx_buf.rb_mrlock);
+ list_del(&mr->mr_all);
+ spin_unlock(&r_xprt->rx_buf.rb_mrlock);
- fmr_op_release_mr(mw);
+ fmr_op_release_mr(mr);
}
static int
@@ -180,15 +180,15 @@ enum {
*/
static struct rpcrdma_mr_seg *
fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, struct rpcrdma_mw **out)
+ int nsegs, bool writing, struct rpcrdma_mr **out)
{
struct rpcrdma_mr_seg *seg1 = seg;
int len, pageoff, i, rc;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
u64 *dma_pages;
- mw = rpcrdma_get_mw(r_xprt);
- if (!mw)
+ mr = rpcrdma_mr_get(r_xprt);
+ if (!mr)
return ERR_PTR(-ENOBUFS);
pageoff = offset_in_page(seg1->mr_offset);
@@ -199,12 +199,12 @@ enum {
nsegs = RPCRDMA_MAX_FMR_SGES;
for (i = 0; i < nsegs;) {
if (seg->mr_page)
- sg_set_page(&mw->mw_sg[i],
+ sg_set_page(&mr->mr_sg[i],
seg->mr_page,
seg->mr_len,
offset_in_page(seg->mr_offset));
else
- sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
+ sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len);
len += seg->mr_len;
++seg;
@@ -214,40 +214,40 @@ enum {
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- mw->mw_dir = rpcrdma_data_dir(writing);
+ mr->mr_dir = rpcrdma_data_dir(writing);
- mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, i, mw->mw_dir);
- if (!mw->mw_nents)
+ mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, i, mr->mr_dir);
+ if (!mr->mr_nents)
goto out_dmamap_err;
- for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++)
- dma_pages[i] = sg_dma_address(&mw->mw_sg[i]);
- rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents,
+ for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
+ dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
+ rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
dma_pages[0]);
if (rc)
goto out_maperr;
- mw->mw_handle = mw->fmr.fm_mr->rkey;
- mw->mw_length = len;
- mw->mw_offset = dma_pages[0] + pageoff;
+ mr->mr_handle = mr->fmr.fm_mr->rkey;
+ mr->mr_length = len;
+ mr->mr_offset = dma_pages[0] + pageoff;
- *out = mw;
+ *out = mr;
return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
- mw->mw_sg, i);
- rpcrdma_put_mw(r_xprt, mw);
+ mr->mr_sg, i);
+ rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);
out_maperr:
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
len, (unsigned long long)dma_pages[0],
- pageoff, mw->mw_nents, rc);
+ pageoff, mr->mr_nents, rc);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);
}
@@ -256,13 +256,13 @@ enum {
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
- * Caller ensures that @mws is not empty before the call. This
+ * Caller ensures that @mrs is not empty before the call. This
* function empties the list.
*/
static void
-fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
+fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
LIST_HEAD(unmap_list);
int rc;
@@ -271,10 +271,10 @@ enum {
* ib_unmap_fmr() is slow, so use a single call instead
* of one call per mapped FMR.
*/
- list_for_each_entry(mw, mws, mw_list) {
+ list_for_each_entry(mr, mrs, mr_list) {
dprintk("RPC: %s: unmapping fmr %p\n",
- __func__, &mw->fmr);
- list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
+ __func__, &mr->fmr);
+ list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
}
r_xprt->rx_stats.local_inv_needed++;
rc = ib_unmap_fmr(&unmap_list);
@@ -284,14 +284,14 @@ enum {
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
*/
- while (!list_empty(mws)) {
- mw = rpcrdma_pop_mw(mws);
+ while (!list_empty(mrs)) {
+ mr = rpcrdma_mr_pop(mrs);
dprintk("RPC: %s: DMA unmapping fmr %p\n",
- __func__, &mw->fmr);
- list_del(&mw->fmr.fm_mr->list);
+ __func__, &mr->fmr);
+ list_del(&mr->fmr.fm_mr->list);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ rpcrdma_mr_put(mr);
}
return;
@@ -299,10 +299,10 @@ enum {
out_reset:
pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
- while (!list_empty(mws)) {
- mw = rpcrdma_pop_mw(mws);
- list_del(&mw->fmr.fm_mr->list);
- fmr_op_recover_mr(mw);
+ while (!list_empty(mrs)) {
+ mr = rpcrdma_mr_pop(mrs);
+ list_del(&mr->fmr.fm_mr->list);
+ fmr_op_recover_mr(mr);
}
}
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 185eb69..8ba4b33 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -17,7 +17,7 @@
* A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
* Work Request (frwr_op_map). When the RDMA operation is finished, this
* Memory Region is invalidated using a LOCAL_INV Work Request
- * (frwr_op_unmap).
+ * (frwr_op_unmap_sync).
*
* Typically these Work Requests are not signaled, and neither are RDMA
* SEND Work Requests (with the exception of signaling occasionally to
@@ -26,7 +26,7 @@
*
* As an optimization, frwr_op_unmap marks MRs INVALID before the
* LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
- * rb_mws immediately so that no work (like managing a linked list
+ * rb_mrs immediately so that no work (like managing a linked list
* under a spinlock) is needed in the completion upcall.
*
* But this means that frwr_op_map() can occasionally encounter an MR
@@ -60,7 +60,7 @@
* When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
* with ib_dereg_mr and then are re-initialized. Because MR recovery
* allocates fresh resources, it is deferred to a workqueue, and the
- * recovered MRs are placed back on the rb_mws list when recovery is
+ * recovered MRs are placed back on the rb_mrs list when recovery is
* complete. frwr_op_map allocates another MR for the current RPC while
* the broken MR is reset.
*
@@ -96,21 +96,21 @@
}
static int
-frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
unsigned int depth = ia->ri_max_frwr_depth;
- struct rpcrdma_frwr *frwr = &r->frwr;
+ struct rpcrdma_frwr *frwr = &mr->frwr;
int rc;
frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
if (IS_ERR(frwr->fr_mr))
goto out_mr_err;
- r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL);
- if (!r->mw_sg)
+ mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
+ if (!mr->mr_sg)
goto out_list_err;
- sg_init_table(r->mw_sg, depth);
+ sg_init_table(mr->mr_sg, depth);
init_completion(&frwr->fr_linv_done);
return 0;
@@ -129,32 +129,32 @@
}
static void
-frwr_op_release_mr(struct rpcrdma_mw *r)
+frwr_op_release_mr(struct rpcrdma_mr *mr)
{
int rc;
- /* Ensure MW is not on any rl_registered list */
- if (!list_empty(&r->mw_list))
- list_del(&r->mw_list);
+ /* Ensure MR is not on any rl_registered list */
+ if (!list_empty(&mr->mr_list))
+ list_del(&mr->mr_list);
- rc = ib_dereg_mr(r->frwr.fr_mr);
+ rc = ib_dereg_mr(mr->frwr.fr_mr);
if (rc)
pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
- r, rc);
- kfree(r->mw_sg);
- kfree(r);
+ mr, rc);
+ kfree(mr->mr_sg);
+ kfree(mr);
}
static int
-__frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
- struct rpcrdma_frwr *frwr = &r->frwr;
+ struct rpcrdma_frwr *frwr = &mr->frwr;
int rc;
rc = ib_dereg_mr(frwr->fr_mr);
if (rc) {
pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
- rc, r);
+ rc, mr);
return rc;
}
@@ -162,7 +162,7 @@
ia->ri_max_frwr_depth);
if (IS_ERR(frwr->fr_mr)) {
pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
- PTR_ERR(frwr->fr_mr), r);
+ PTR_ERR(frwr->fr_mr), mr);
return PTR_ERR(frwr->fr_mr);
}
@@ -174,33 +174,33 @@
/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
*/
static void
-frwr_op_recover_mr(struct rpcrdma_mw *mw)
+frwr_op_recover_mr(struct rpcrdma_mr *mr)
{
- enum rpcrdma_frwr_state state = mw->frwr.fr_state;
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ enum rpcrdma_frwr_state state = mr->frwr.fr_state;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int rc;
- rc = __frwr_reset_mr(ia, mw);
+ rc = __frwr_mr_reset(ia, mr);
if (state != FRWR_FLUSHED_LI)
ib_dma_unmap_sg(ia->ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
if (rc)
goto out_release;
- rpcrdma_put_mw(r_xprt, mw);
+ rpcrdma_mr_put(mr);
r_xprt->rx_stats.mrs_recovered++;
return;
out_release:
- pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mw);
+ pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++;
- spin_lock(&r_xprt->rx_buf.rb_mwlock);
- list_del(&mw->mw_all);
- spin_unlock(&r_xprt->rx_buf.rb_mwlock);
+ spin_lock(&r_xprt->rx_buf.rb_mrlock);
+ list_del(&mr->mr_all);
+ spin_unlock(&r_xprt->rx_buf.rb_mrlock);
- frwr_op_release_mr(mw);
+ frwr_op_release_mr(mr);
}
static int
@@ -347,40 +347,39 @@
*/
static struct rpcrdma_mr_seg *
frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, struct rpcrdma_mw **out)
+ int nsegs, bool writing, struct rpcrdma_mr **out)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
struct rpcrdma_frwr *frwr;
- struct rpcrdma_mw *mw;
- struct ib_mr *mr;
+ struct rpcrdma_mr *mr;
+ struct ib_mr *ibmr;
struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr;
int rc, i, n;
u8 key;
- mw = NULL;
+ mr = NULL;
do {
- if (mw)
- rpcrdma_defer_mr_recovery(mw);
- mw = rpcrdma_get_mw(r_xprt);
- if (!mw)
+ if (mr)
+ rpcrdma_mr_defer_recovery(mr);
+ mr = rpcrdma_mr_get(r_xprt);
+ if (!mr)
return ERR_PTR(-ENOBUFS);
- } while (mw->frwr.fr_state != FRWR_IS_INVALID);
- frwr = &mw->frwr;
+ } while (mr->frwr.fr_state != FRWR_IS_INVALID);
+ frwr = &mr->frwr;
frwr->fr_state = FRWR_IS_VALID;
- mr = frwr->fr_mr;
if (nsegs > ia->ri_max_frwr_depth)
nsegs = ia->ri_max_frwr_depth;
for (i = 0; i < nsegs;) {
if (seg->mr_page)
- sg_set_page(&mw->mw_sg[i],
+ sg_set_page(&mr->mr_sg[i],
seg->mr_page,
seg->mr_len,
offset_in_page(seg->mr_offset));
else
- sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
+ sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len);
++seg;
@@ -391,21 +390,22 @@
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- mw->mw_dir = rpcrdma_data_dir(writing);
+ mr->mr_dir = rpcrdma_data_dir(writing);
- mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir);
- if (!mw->mw_nents)
+ mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
+ if (!mr->mr_nents)
goto out_dmamap_err;
- n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE);
- if (unlikely(n != mw->mw_nents))
+ ibmr = frwr->fr_mr;
+ n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
+ if (unlikely(n != mr->mr_nents))
goto out_mapmr_err;
dprintk("RPC: %s: Using frwr %p to map %u segments (%llu bytes)\n",
- __func__, frwr, mw->mw_nents, mr->length);
+ __func__, frwr, mr->mr_nents, ibmr->length);
- key = (u8)(mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(mr, ++key);
+ key = (u8)(ibmr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(ibmr, ++key);
reg_wr = &frwr->fr_regwr;
reg_wr->wr.next = NULL;
@@ -414,8 +414,8 @@
reg_wr->wr.wr_cqe = &frwr->fr_cqe;
reg_wr->wr.num_sge = 0;
reg_wr->wr.send_flags = 0;
- reg_wr->mr = mr;
- reg_wr->key = mr->rkey;
+ reg_wr->mr = ibmr;
+ reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
@@ -424,48 +424,48 @@
if (rc)
goto out_senderr;
- mw->mw_handle = mr->rkey;
- mw->mw_length = mr->length;
- mw->mw_offset = mr->iova;
+ mr->mr_handle = ibmr->rkey;
+ mr->mr_length = ibmr->length;
+ mr->mr_offset = ibmr->iova;
- *out = mw;
+ *out = mr;
return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
- mw->mw_sg, i);
+ mr->mr_sg, i);
frwr->fr_state = FRWR_IS_INVALID;
- rpcrdma_put_mw(r_xprt, mw);
+ rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);
out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
- frwr->fr_mr, n, mw->mw_nents);
- rpcrdma_defer_mr_recovery(mw);
+ frwr->fr_mr, n, mr->mr_nents);
+ rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-EIO);
out_senderr:
pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
- rpcrdma_defer_mr_recovery(mw);
+ rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-ENOTCONN);
}
-/* Handle a remotely invalidated mw on the @mws list
+/* Handle a remotely invalidated mr on the @mrs list
*/
static void
-frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mws)
+frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
{
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
- list_for_each_entry(mw, mws, mw_list)
- if (mw->mw_handle == rep->rr_inv_rkey) {
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ list_for_each_entry(mr, mrs, mr_list)
+ if (mr->mr_handle == rep->rr_inv_rkey) {
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- list_del(&mw->mw_list);
- mw->frwr.fr_state = FRWR_IS_INVALID;
+ list_del(&mr->mr_list);
+ mr->frwr.fr_state = FRWR_IS_INVALID;
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ rpcrdma_mr_put(mr);
break; /* only one invalidated MR per RPC */
}
}
@@ -475,16 +475,16 @@
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
- * Caller ensures that @mws is not empty before the call. This
+ * Caller ensures that @mrs is not empty before the call. This
* function empties the list.
*/
static void
-frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
+frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{
struct ib_send_wr *first, **prev, *last, *bad_wr;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_frwr *frwr;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int count, rc;
/* ORDER: Invalidate all of the MRs first
@@ -495,10 +495,11 @@
frwr = NULL;
count = 0;
prev = &first;
- list_for_each_entry(mw, mws, mw_list) {
- mw->frwr.fr_state = FRWR_IS_INVALID;
+ list_for_each_entry(mr, mrs, mr_list) {
+ mr->frwr.fr_state = FRWR_IS_INVALID;
+
+ frwr = &mr->frwr;
- frwr = &mw->frwr;
dprintk("RPC: %s: invalidating frwr %p\n",
__func__, frwr);
@@ -507,7 +508,7 @@
memset(last, 0, sizeof(*last));
last->wr_cqe = &frwr->fr_cqe;
last->opcode = IB_WR_LOCAL_INV;
- last->ex.invalidate_rkey = mw->mw_handle;
+ last->ex.invalidate_rkey = mr->mr_handle;
count++;
*prev = last;
@@ -537,16 +538,16 @@
goto reset_mrs;
/* ORDER: Now DMA unmap all of the MRs, and return
- * them to the free MW list.
+ * them to the free MR list.
*/
unmap:
- while (!list_empty(mws)) {
- mw = rpcrdma_pop_mw(mws);
+ while (!list_empty(mrs)) {
+ mr = rpcrdma_mr_pop(mrs);
dprintk("RPC: %s: DMA unmapping frwr %p\n",
- __func__, &mw->frwr);
+ __func__, &mr->frwr);
ib_dma_unmap_sg(ia->ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ rpcrdma_mr_put(mr);
}
return;
@@ -559,9 +560,9 @@
while (bad_wr) {
frwr = container_of(bad_wr, struct rpcrdma_frwr,
fr_invwr);
- mw = container_of(frwr, struct rpcrdma_mw, frwr);
+ mr = container_of(frwr, struct rpcrdma_mr, frwr);
- __frwr_reset_mr(ia, mw);
+ __frwr_mr_reset(ia, mr);
bad_wr = bad_wr->next;
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 9207aea..9601af0 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -292,15 +292,15 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
}
static void
-xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw)
+xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
{
- *iptr++ = cpu_to_be32(mw->mw_handle);
- *iptr++ = cpu_to_be32(mw->mw_length);
- xdr_encode_hyper(iptr, mw->mw_offset);
+ *iptr++ = cpu_to_be32(mr->mr_handle);
+ *iptr++ = cpu_to_be32(mr->mr_length);
+ xdr_encode_hyper(iptr, mr->mr_offset);
}
static int
-encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
+encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
{
__be32 *p;
@@ -308,12 +308,12 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
if (unlikely(!p))
return -EMSGSIZE;
- xdr_encode_rdma_segment(p, mw);
+ xdr_encode_rdma_segment(p, mr);
return 0;
}
static int
-encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
+encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
u32 position)
{
__be32 *p;
@@ -324,7 +324,7 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
*p++ = xdr_one; /* Item present */
*p++ = cpu_to_be32(position);
- xdr_encode_rdma_segment(p, mw);
+ xdr_encode_rdma_segment(p, mr);
return 0;
}
@@ -348,7 +348,7 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
unsigned int pos;
int nsegs;
@@ -363,21 +363,21 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- false, &mw);
+ false, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_read_segment(xdr, mw, pos) < 0)
+ if (encode_read_segment(xdr, mr, pos) < 0)
return -EMSGSIZE;
dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n",
rqst->rq_task->tk_pid, __func__, pos,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
+ mr->mr_length, (unsigned long long)mr->mr_offset,
+ mr->mr_handle, mr->mr_nents < nsegs ? "more" : "last");
r_xprt->rx_stats.read_chunk_count++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
return 0;
@@ -404,7 +404,7 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int nsegs, nchunks;
__be32 *segcount;
@@ -425,23 +425,23 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
nchunks = 0;
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mw);
+ true, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_rdma_segment(xdr, mw) < 0)
+ if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE;
dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n",
rqst->rq_task->tk_pid, __func__,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
+ mr->mr_length, (unsigned long long)mr->mr_offset,
+ mr->mr_handle, mr->mr_nents < nsegs ? "more" : "last");
r_xprt->rx_stats.write_chunk_count++;
r_xprt->rx_stats.total_rdma_request += seg->mr_len;
nchunks++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
/* Update count of segments in this Write chunk */
@@ -468,7 +468,7 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int nsegs, nchunks;
__be32 *segcount;
@@ -487,23 +487,23 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
nchunks = 0;
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mw);
+ true, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_rdma_segment(xdr, mw) < 0)
+ if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE;
dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n",
rqst->rq_task->tk_pid, __func__,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
+ mr->mr_length, (unsigned long long)mr->mr_offset,
+ mr->mr_handle, mr->mr_nents < nsegs ? "more" : "last");
r_xprt->rx_stats.reply_chunk_count++;
r_xprt->rx_stats.total_rdma_request += seg->mr_len;
nchunks++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
/* Update count of segments in the Reply chunk */
@@ -821,10 +821,10 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
* so these registrations are invalid and unusable.
*/
while (unlikely(!list_empty(&req->rl_registered))) {
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
- mw = rpcrdma_pop_mw(&req->rl_registered);
- rpcrdma_defer_mr_recovery(mw);
+ mr = rpcrdma_mr_pop(&req->rl_registered);
+ rpcrdma_mr_defer_recovery(mr);
}
/* This implementation supports the following combinations
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8405799..2582729 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -71,8 +71,8 @@
/*
* internal functions
*/
-static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
-static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
+static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
+static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
@@ -458,7 +458,7 @@
rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
}
- rpcrdma_destroy_mrs(buf);
+ rpcrdma_mrs_destroy(buf);
/* Allow waiters to continue */
complete(&ia->ri_remove_done);
@@ -671,7 +671,7 @@
goto out3;
}
- rpcrdma_create_mrs(r_xprt);
+ rpcrdma_mrs_create(r_xprt);
return 0;
out3:
@@ -992,15 +992,15 @@ void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
{
struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
rb_recovery_worker.work);
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
spin_lock(&buf->rb_recovery_lock);
while (!list_empty(&buf->rb_stale_mrs)) {
- mw = rpcrdma_pop_mw(&buf->rb_stale_mrs);
+ mr = rpcrdma_mr_pop(&buf->rb_stale_mrs);
spin_unlock(&buf->rb_recovery_lock);
- dprintk("RPC: %s: recovering MR %p\n", __func__, mw);
- mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw);
+ dprintk("RPC: %s: recovering MR %p\n", __func__, mr);
+ mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr);
spin_lock(&buf->rb_recovery_lock);
}
@@ -1008,20 +1008,20 @@ void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
}
void
-rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw)
+rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr)
{
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
spin_lock(&buf->rb_recovery_lock);
- rpcrdma_push_mw(mw, &buf->rb_stale_mrs);
+ rpcrdma_mr_push(mr, &buf->rb_stale_mrs);
spin_unlock(&buf->rb_recovery_lock);
schedule_delayed_work(&buf->rb_recovery_worker, 0);
}
static void
-rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
+rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -1030,30 +1030,30 @@ void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
LIST_HEAD(all);
for (count = 0; count < 32; count++) {
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int rc;
- mw = kzalloc(sizeof(*mw), GFP_KERNEL);
- if (!mw)
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
break;
- rc = ia->ri_ops->ro_init_mr(ia, mw);
+ rc = ia->ri_ops->ro_init_mr(ia, mr);
if (rc) {
- kfree(mw);
+ kfree(mr);
break;
}
- mw->mw_xprt = r_xprt;
+ mr->mr_xprt = r_xprt;
- list_add(&mw->mw_list, &free);
- list_add(&mw->mw_all, &all);
+ list_add(&mr->mr_list, &free);
+ list_add(&mr->mr_all, &all);
}
- spin_lock(&buf->rb_mwlock);
- list_splice(&free, &buf->rb_mws);
+ spin_lock(&buf->rb_mrlock);
+ list_splice(&free, &buf->rb_mrs);
list_splice(&all, &buf->rb_all);
r_xprt->rx_stats.mrs_allocated += count;
- spin_unlock(&buf->rb_mwlock);
+ spin_unlock(&buf->rb_mrlock);
dprintk("RPC: %s: created %u MRs\n", __func__, count);
}
@@ -1066,7 +1066,7 @@ void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
rx_buf);
- rpcrdma_create_mrs(r_xprt);
+ rpcrdma_mrs_create(r_xprt);
}
struct rpcrdma_req *
@@ -1144,10 +1144,10 @@ struct rpcrdma_req *
buf->rb_max_requests = r_xprt->rx_data.max_requests;
buf->rb_bc_srv_max_requests = 0;
- spin_lock_init(&buf->rb_mwlock);
+ spin_lock_init(&buf->rb_mrlock);
spin_lock_init(&buf->rb_lock);
spin_lock_init(&buf->rb_recovery_lock);
- INIT_LIST_HEAD(&buf->rb_mws);
+ INIT_LIST_HEAD(&buf->rb_mrs);
INIT_LIST_HEAD(&buf->rb_all);
INIT_LIST_HEAD(&buf->rb_stale_mrs);
INIT_DELAYED_WORK(&buf->rb_refresh_worker,
@@ -1155,7 +1155,7 @@ struct rpcrdma_req *
INIT_DELAYED_WORK(&buf->rb_recovery_worker,
rpcrdma_mr_recovery_worker);
- rpcrdma_create_mrs(r_xprt);
+ rpcrdma_mrs_create(r_xprt);
INIT_LIST_HEAD(&buf->rb_send_bufs);
INIT_LIST_HEAD(&buf->rb_allreqs);
@@ -1229,26 +1229,26 @@ struct rpcrdma_req *
}
static void
-rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf)
+rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
{
struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
rx_buf);
struct rpcrdma_ia *ia = rdmab_to_ia(buf);
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
unsigned int count;
count = 0;
- spin_lock(&buf->rb_mwlock);
+ spin_lock(&buf->rb_mrlock);
while (!list_empty(&buf->rb_all)) {
- mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
- list_del(&mw->mw_all);
+ mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all);
+ list_del(&mr->mr_all);
- spin_unlock(&buf->rb_mwlock);
- ia->ri_ops->ro_release_mr(mw);
+ spin_unlock(&buf->rb_mrlock);
+ ia->ri_ops->ro_release_mr(mr);
count++;
- spin_lock(&buf->rb_mwlock);
+ spin_lock(&buf->rb_mrlock);
}
- spin_unlock(&buf->rb_mwlock);
+ spin_unlock(&buf->rb_mrlock);
r_xprt->rx_stats.mrs_allocated = 0;
dprintk("RPC: %s: released %u MRs\n", __func__, count);
@@ -1285,26 +1285,33 @@ struct rpcrdma_req *
spin_unlock(&buf->rb_reqslock);
buf->rb_recv_count = 0;
- rpcrdma_destroy_mrs(buf);
+ rpcrdma_mrs_destroy(buf);
}
-struct rpcrdma_mw *
-rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
+/**
+ * rpcrdma_mr_get - Allocate an rpcrdma_mr object
+ * @r_xprt: controlling transport
+ *
+ * Returns an initialized rpcrdma_mr or NULL if no free
+ * rpcrdma_mr objects are available.
+ */
+struct rpcrdma_mr *
+rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
- struct rpcrdma_mw *mw = NULL;
+ struct rpcrdma_mr *mr = NULL;
- spin_lock(&buf->rb_mwlock);
- if (!list_empty(&buf->rb_mws))
- mw = rpcrdma_pop_mw(&buf->rb_mws);
- spin_unlock(&buf->rb_mwlock);
+ spin_lock(&buf->rb_mrlock);
+ if (!list_empty(&buf->rb_mrs))
+ mr = rpcrdma_mr_pop(&buf->rb_mrs);
+ spin_unlock(&buf->rb_mrlock);
- if (!mw)
- goto out_nomws;
- return mw;
+ if (!mr)
+ goto out_nomrs;
+ return mr;
-out_nomws:
- dprintk("RPC: %s: no MWs available\n", __func__);
+out_nomrs:
+ dprintk("RPC: %s: no MRs available\n", __func__);
if (r_xprt->rx_ep.rep_connected != -ENODEV)
schedule_delayed_work(&buf->rb_refresh_worker, 0);
@@ -1314,14 +1321,20 @@ struct rpcrdma_mw *
return NULL;
}
+/**
+ * rpcrdma_mr_put - Release an rpcrdma_mr object
+ * @mr: object to release
+ *
+ */
void
-rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
+rpcrdma_mr_put(struct rpcrdma_mr *mr)
{
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
- spin_lock(&buf->rb_mwlock);
- rpcrdma_push_mw(mw, &buf->rb_mws);
- spin_unlock(&buf->rb_mwlock);
+ spin_lock(&buf->rb_mrlock);
+ rpcrdma_mr_push(mr, &buf->rb_mrs);
+ spin_unlock(&buf->rb_mrlock);
}
static struct rpcrdma_rep *
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index f52269a..530ace6 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -230,12 +230,12 @@ enum {
};
/*
- * struct rpcrdma_mw - external memory region metadata
+ * struct rpcrdma_mr - external memory region metadata
*
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*
- * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During
+ * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
* call_allocate, rpcrdma_buffer_get() assigns one to each segment in
* an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
* track of registration metadata while each RPC is pending.
@@ -265,20 +265,20 @@ struct rpcrdma_fmr {
u64 *fm_physaddrs;
};
-struct rpcrdma_mw {
- struct list_head mw_list;
- struct scatterlist *mw_sg;
- int mw_nents;
- enum dma_data_direction mw_dir;
+struct rpcrdma_mr {
+ struct list_head mr_list;
+ struct scatterlist *mr_sg;
+ int mr_nents;
+ enum dma_data_direction mr_dir;
union {
struct rpcrdma_fmr fmr;
struct rpcrdma_frwr frwr;
};
- struct rpcrdma_xprt *mw_xprt;
- u32 mw_handle;
- u32 mw_length;
- u64 mw_offset;
- struct list_head mw_all;
+ struct rpcrdma_xprt *mr_xprt;
+ u32 mr_handle;
+ u32 mr_length;
+ u64 mr_offset;
+ struct list_head mr_all;
};
/*
@@ -371,19 +371,19 @@ enum {
}
static inline void
-rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list)
+rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
{
- list_add_tail(&mw->mw_list, list);
+ list_add_tail(&mr->mr_list, list);
}
-static inline struct rpcrdma_mw *
-rpcrdma_pop_mw(struct list_head *list)
+static inline struct rpcrdma_mr *
+rpcrdma_mr_pop(struct list_head *list)
{
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
- mw = list_first_entry(list, struct rpcrdma_mw, mw_list);
- list_del(&mw->mw_list);
- return mw;
+ mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
+ list_del(&mr->mr_list);
+ return mr;
}
/*
@@ -393,8 +393,8 @@ enum {
* One of these is associated with a transport instance
*/
struct rpcrdma_buffer {
- spinlock_t rb_mwlock; /* protect rb_mws list */
- struct list_head rb_mws;
+ spinlock_t rb_mrlock; /* protect rb_mrs list */
+ struct list_head rb_mrs;
struct list_head rb_all;
unsigned long rb_sc_head;
@@ -473,19 +473,19 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mr_seg *
(*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool,
- struct rpcrdma_mw **);
+ struct rpcrdma_mr **);
void (*ro_reminv)(struct rpcrdma_rep *rep,
- struct list_head *mws);
+ struct list_head *mrs);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *);
- void (*ro_recover_mr)(struct rpcrdma_mw *);
+ void (*ro_recover_mr)(struct rpcrdma_mr *mr);
int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *,
struct rpcrdma_create_data_internal *);
size_t (*ro_maxpages)(struct rpcrdma_xprt *);
int (*ro_init_mr)(struct rpcrdma_ia *,
- struct rpcrdma_mw *);
- void (*ro_release_mr)(struct rpcrdma_mw *);
+ struct rpcrdma_mr *);
+ void (*ro_release_mr)(struct rpcrdma_mr *mr);
const char *ro_displayname;
const int ro_send_w_inv_ok;
};
@@ -574,15 +574,15 @@ int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
-struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
-void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
+struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
+void rpcrdma_mr_put(struct rpcrdma_mr *mr);
+void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
+
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
-void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
-
struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
gfp_t);
bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
Clean up: Code review suggested that a common bit of code can be
placed into a helper function, and this gives us fewer places to
stick an "I DMA unmapped something" trace point.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/fmr_ops.c | 19 ++++++++-----------
net/sunrpc/xprtrdma/frwr_ops.c | 10 ++--------
net/sunrpc/xprtrdma/verbs.c | 26 ++++++++++++++++++++++----
net/sunrpc/xprtrdma/xprt_rdma.h | 1 +
4 files changed, 33 insertions(+), 23 deletions(-)
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 8bd0399..7f2f2b7 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -135,14 +135,12 @@ enum {
/* ORDER: invalidate first */
rc = __fmr_unmap(mr);
-
- /* ORDER: then DMA unmap */
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mr->mr_sg, mr->mr_nents, mr->mr_dir);
if (rc)
goto out_release;
- rpcrdma_mr_put(mr);
+ /* ORDER: then DMA unmap */
+ rpcrdma_mr_unmap_and_put(mr);
+
r_xprt->rx_stats.mrs_recovered++;
return;
@@ -150,6 +148,9 @@ enum {
pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++;
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+
spin_lock(&r_xprt->rx_buf.rb_mrlock);
list_del(&mr->mr_all);
spin_unlock(&r_xprt->rx_buf.rb_mrlock);
@@ -245,9 +246,7 @@ enum {
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
len, (unsigned long long)dma_pages[0],
pageoff, mr->mr_nents, rc);
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mr->mr_sg, mr->mr_nents, mr->mr_dir);
- rpcrdma_mr_put(mr);
+ rpcrdma_mr_unmap_and_put(mr);
return ERR_PTR(-EIO);
}
@@ -289,9 +288,7 @@ enum {
dprintk("RPC: %s: DMA unmapping fmr %p\n",
__func__, &mr->fmr);
list_del(&mr->fmr.fm_mr->list);
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mr->mr_sg, mr->mr_nents, mr->mr_dir);
- rpcrdma_mr_put(mr);
+ rpcrdma_mr_unmap_and_put(mr);
}
return;
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 8ba4b33..35e3a54 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -459,13 +459,9 @@
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
- struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-
list_del(&mr->mr_list);
mr->frwr.fr_state = FRWR_IS_INVALID;
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mr->mr_sg, mr->mr_nents, mr->mr_dir);
- rpcrdma_mr_put(mr);
+ rpcrdma_mr_unmap_and_put(mr);
break; /* only one invalidated MR per RPC */
}
}
@@ -545,9 +541,7 @@
mr = rpcrdma_mr_pop(mrs);
dprintk("RPC: %s: DMA unmapping frwr %p\n",
__func__, &mr->frwr);
- ib_dma_unmap_sg(ia->ri_device,
- mr->mr_sg, mr->mr_nents, mr->mr_dir);
- rpcrdma_mr_put(mr);
+ rpcrdma_mr_unmap_and_put(mr);
}
return;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 2582729..9cc8abc 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1321,6 +1321,14 @@ struct rpcrdma_mr *
return NULL;
}
+static void
+__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr)
+{
+ spin_lock(&buf->rb_mrlock);
+ rpcrdma_mr_push(mr, &buf->rb_mrs);
+ spin_unlock(&buf->rb_mrlock);
+}
+
/**
* rpcrdma_mr_put - Release an rpcrdma_mr object
* @mr: object to release
@@ -1329,12 +1337,22 @@ struct rpcrdma_mr *
void
rpcrdma_mr_put(struct rpcrdma_mr *mr)
{
+ __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr);
+}
+
+/**
+ * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it
+ * @mr: object to release
+ *
+ */
+void
+rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
+{
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
- spin_lock(&buf->rb_mrlock);
- rpcrdma_mr_push(mr, &buf->rb_mrs);
- spin_unlock(&buf->rb_mrlock);
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
}
static struct rpcrdma_rep *
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 530ace6..28ae1fb 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -576,6 +576,7 @@ int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
void rpcrdma_mr_put(struct rpcrdma_mr *mr);
+void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
Hi Chuck,
On 12/14/2017 08:55 PM, Chuck Lever wrote:
> Hi Anna-
>
> This is the second series of NFS/RDMA client patches for v4.16.
>
> This is a bunch of minor fixes and clean-ups in preparation for
> adding static trace points to the RPC-over-RDMA transport
> implementation. Please consider these for v4.16.
These patches look okay to me. Thanks!
Anna
>
> ---
>
> Chuck Lever (16):
> xprtrdma: Fix buffer leak after transport set up failure
> xprtrdma: Fix backchannel allocation of extra rpcrdma_reps
> xprtrdma: Eliminate unnecessary lock cycle in xprt_rdma_send_request
> xprtrdma: Per-mode handling for Remote Invalidation
> xprtrdma: Remove ri_reminv_expected
> xprtrdma: Remove unused padding variables
> xprtrdma: Initialize the xprt address string array earlier
> xprtrdma: Remove another sockaddr_storage field (cdata::addr)
> xprtrdma: Support IPv6 in xprt_rdma_set_port
> xprtrdma: Move unmap-safe logic to rpcrdma_marshal_req
> xprtrdma: buf_free not called for CB replies
> xprtrdma: Split xprt_rdma_send_request
> xprtrdma: Don't clear RPC_BC_PA_IN_USE on pre-allocated rpc_rqst's
> xprtrdma: Replace all usage of "frmr" with "frwr"
> xprtrdma: Remove usage of "mw"
> xprtrdma: Introduce rpcrdma_mw_unmap_and_put
>
>
> include/linux/sunrpc/xprtrdma.h | 2
> net/sunrpc/xprtrdma/backchannel.c | 68 +++++---
> net/sunrpc/xprtrdma/fmr_ops.c | 155 +++++++++---------
> net/sunrpc/xprtrdma/frwr_ops.c | 317 +++++++++++++++++++------------------
> net/sunrpc/xprtrdma/rpc_rdma.c | 98 +++++------
> net/sunrpc/xprtrdma/transport.c | 102 +++++-------
> net/sunrpc/xprtrdma/verbs.c | 212 ++++++++++++++-----------
> net/sunrpc/xprtrdma/xprt_rdma.h | 112 +++++++------
> 8 files changed, 552 insertions(+), 514 deletions(-)
>
> --
> Chuck Lever
>
> On Dec 19, 2017, at 10:28 AM, Anna Schumaker =
<[email protected]> wrote:
>=20
> Hi Chuck,
>=20
> On 12/14/2017 08:55 PM, Chuck Lever wrote:
>> Hi Anna-
>>=20
>> This is the second series of NFS/RDMA client patches for v4.16.
>>=20
>> This is a bunch of minor fixes and clean-ups in preparation for
>> adding static trace points to the RPC-over-RDMA transport
>> implementation. Please consider these for v4.16.
>=20
> These patches look okay to me. Thanks!
Excellent, I will prepare and send the next series very soon.
> Anna
>=20
>>=20
>> ---
>>=20
>> Chuck Lever (16):
>> xprtrdma: Fix buffer leak after transport set up failure
>> xprtrdma: Fix backchannel allocation of extra rpcrdma_reps
>> xprtrdma: Eliminate unnecessary lock cycle in =
xprt_rdma_send_request
>> xprtrdma: Per-mode handling for Remote Invalidation
>> xprtrdma: Remove ri_reminv_expected
>> xprtrdma: Remove unused padding variables
>> xprtrdma: Initialize the xprt address string array earlier
>> xprtrdma: Remove another sockaddr_storage field (cdata::addr)
>> xprtrdma: Support IPv6 in xprt_rdma_set_port
>> xprtrdma: Move unmap-safe logic to rpcrdma_marshal_req
>> xprtrdma: buf_free not called for CB replies
>> xprtrdma: Split xprt_rdma_send_request
>> xprtrdma: Don't clear RPC_BC_PA_IN_USE on pre-allocated =
rpc_rqst's
>> xprtrdma: Replace all usage of "frmr" with "frwr"
>> xprtrdma: Remove usage of "mw"
>> xprtrdma: Introduce rpcrdma_mw_unmap_and_put
>>=20
>>=20
>> include/linux/sunrpc/xprtrdma.h | 2=20
>> net/sunrpc/xprtrdma/backchannel.c | 68 +++++---
>> net/sunrpc/xprtrdma/fmr_ops.c | 155 +++++++++---------
>> net/sunrpc/xprtrdma/frwr_ops.c | 317 =
+++++++++++++++++++------------------
>> net/sunrpc/xprtrdma/rpc_rdma.c | 98 +++++------
>> net/sunrpc/xprtrdma/transport.c | 102 +++++-------
>> net/sunrpc/xprtrdma/verbs.c | 212 ++++++++++++++-----------
>> net/sunrpc/xprtrdma/xprt_rdma.h | 112 +++++++------
>> 8 files changed, 552 insertions(+), 514 deletions(-)
>>=20
>> --
>> Chuck Lever
>>=20
--
Chuck Lever