From: "J. Bruce Fields" Subject: [PATCH 092/100] rdma: ONCRPC RDMA protocol marshalling Date: Fri, 25 Jan 2008 18:17:12 -0500 Message-ID: <1201303040-7779-92-git-send-email-bfields@citi.umich.edu> References: <20080125231521.GG25141@fieldses.org> <1201303040-7779-1-git-send-email-bfields@citi.umich.edu> <1201303040-7779-2-git-send-email-bfields@citi.umich.edu> <1201303040-7779-3-git-send-email-bfields@citi.umich.edu> <1201303040-7779-4-git-send-email-bfields@citi.umich.edu> <1201303040-7779-5-git-send-email-bfields@citi.umich.edu> <1201303040-7779-6-git-send-email-bfields@citi.umich.edu> <1201303040-7779-7-git-send-email-bfields@citi.umich.edu> <1201303040-7779-8-git-send-email-bfields@citi.umich.edu> <1201303040-7779-9-git-send-email-bfields@citi.umich.edu> <1201303040-7779-10-git-send-email-bfields@citi.umich.edu> <1201303040-7779-11-git-send-email-bfields@citi.umich.edu> <1201303040-7779-12-git-send-email-bfields@citi.umich.edu> <1201303040-7779-13-git-send-email-bfields@citi.umich.edu> <1201303040-7779-14-git-send-email-bfields@citi.umich.edu> <1201303040-7779-15-git-send-email-bfields@citi.umich.edu> <1201303040-7779-16-git-send-email-bfields@citi.umich.edu> <1201303040-7779-17-git-send-email-bfields@citi.umich.edu> <1201303040-7779-18-git-send-email-bfields@citi.umich.edu> <1201303040-7779-19-git-send-email-bfields@citi.umich.edu> <1201303040-7779-20-git-send-email-bfields@citi.umich.edu> <1201303040-7779-21-git-send-email-bfields@citi.umich.edu> <1201303040-7779-22-git-send-email-bfields@citi.umich.edu> <1201303040-7779-23-git-send-email-bfields@citi.umich.edu> <1201303040-7779-24-git-send-email-bfields@citi.umich.edu> <1201303040-7779-25-git-send-email-bfields@citi.umich.edu> <1201303040-7779-26-git-send-email-bfields@citi.umich.edu> <1201303040-7779-27-git-send-email-bfields@citi.umich.edu> <1201303040-7779-28-git-send-email-bfields@citi.umich.edu> <1201303040-7779-29-git-send-email-bfields@citi.umich.edu> <1201303040-7779-30-git-send-email-bfields@citi.umich.edu> <1201303040-7779-31-git-send-email-bfields@citi.umich.edu> <1201303040-7779-32-git-send-email-bfields@citi.umich.edu> <1201303040-7779-33-git-send-email-bfields@citi.umich.edu> <1201303040-7779-34-git-send-email-bfields@citi.umich.edu> <1201303040-7779-35-git-send-email-bfields@citi.umich.edu> <1201303040-7779-36-git-send-email-bfields@citi.umich.edu> <1201303040-7779-37-git-send-email-bfields@citi.umich.edu> <1201303040-7779-38-git-send-email-bfields@citi.umich.edu> <1201303040-7779-39-git-send-email-bfields@citi.umich.edu> <1201303040-7779-40-git-send-email-bfields@citi.umich.edu> <1201303040-7779-41-git-send-email-bfields@citi.umich.edu> <1201303040-7779-42-git-send-email-bfields@citi.umich.edu> <1201303040-7779-43-git-send-email-bfields@citi.umich.edu> <1201303040-7779-44-git-send-email-bfields@citi.umich.edu> <1201303040-7779-45-git-send-email-bfields@citi.umich.edu> <1201303040-7779-46-git-send-email-bfields@citi.umich.edu> <1201303040-7779-47-git-send-email-bfields@citi.umich.edu> <1201303040-7779-48-git-send-email-bfields@citi.umich.edu> <1201303040-7779-49-git-send-email-bfields@citi.umich.edu> <1201303040-7779-50-git-send-email-bfields@citi.umich.edu> <1201303040-7779-51-git-send-email-bfields@citi.umich.edu> <1201303040-7779-52-git-send-email-bfields@citi.umich.edu> <1201303040-7779-53-git-send-email-bfields@citi.umich.edu> <1201303040-7779-54-git-send-email-bfields@citi.umich.edu> <1201303040-7779-55-git-send-email-bfields@citi.umich.edu> <1201303040-7779-56-git-send-email-bfields@citi.umich.edu> <1201303040-7779-57-git-send-email-bfields@citi.umich.edu> <1201303040-7779-58-git-send-email-bfields@citi.umich.edu> <1201303040-7779-59-git-send-email-bfields@citi.umich.edu> <1201303040-7779-60-git-send-email-bfields@citi.umich.edu> <1201303040-7779-61-git-send-email-bfields@citi.umich.edu> <1201303040-7779-62-git-send-email-bfields@citi.umich.edu> <1201303040-7779-63-git-send-email-bfields@citi.umich.edu> <1201303040-7779-64-git-send-email-bfields@citi.umich.edu> <1201303040-7779-65-git-send-email-bfields@citi.umich.edu> <1201303040-7779-66-git-send-email-bfields@citi.umich.edu> <1201303040-7779-67-git-send-email-bfields@citi.umich.edu> <1201303040-7779-68-git-send-email-bfields@citi.umich.edu> <1201303040-7779-69-git-send-email-bfields@citi.umich.edu> <1201303040-7779-70-git-send-email-bfields@citi.umich.edu> <1201303040-7779-71-git-send-email-bfields@citi.umich.edu> <1201303040-7779-72-git-send-email-bfields@citi.umich.edu> <1201303040-7779-73-git-send-email-bfields@citi.umich.edu> <1201303040-7779-74-git-send-email-bfields@citi.umich.edu> <1201303040-7779-75-git-send-email-bfields@citi.umich.edu> <1201303040-7779-76-git-send-email-bfields@citi.umich.edu> <1201303040-7779-77-git-send-email-bfields@citi.umich.edu> <1201303040-7779-78-git-send-email-bfields@citi.umich.edu> <1201303040-7779-79-git-send-email-bfields@citi.umich.edu> <1201303040-7779-80-git-send-email-bfields@citi.umich.edu> <1201303040-7779-81-git-send-email-bfields@citi.umich.edu> <1201303040-7779-82-git-send-email-bfields@citi.umich.edu> <1201303040-7779-83-git-send-email-bfields@citi.umich.edu> <1201303040-7779-84-git-send-email-bfields@citi.umich.edu> <1201303040-7779-85-git-send-email-bfields@citi.umich.edu> <1201303040-7779-86-git-send-email-bfields@citi.umich.edu> <1201303040-7779-87-git-send-email-bfields@citi.umich.edu> <1201303040-7779-88-git-send-email-bfields@citi.umich.edu> <1201303040-7779-89-git-send-email-bfields@citi.umich.edu> <1201303040-7779-90-git-send-email-bfields@citi.umich.edu> <1201303040-7779-91-git-send-email-bfields@citi.umich.edu> Cc: Tom Tucker , "J. Bruce Fields" To: linux-nfs@vger.kernel.org Return-path: Received: from mail.fieldses.org ([66.93.2.214]:47561 "EHLO fieldses.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1762002AbYAYXSc (ORCPT ); Fri, 25 Jan 2008 18:18:32 -0500 In-Reply-To: <1201303040-7779-91-git-send-email-bfields@citi.umich.edu> Sender: linux-nfs-owner@vger.kernel.org List-ID: From: Tom Tucker This logic parses the ONCRDMA protocol headers that precede the actual RPC header. It is placed in a separate file to keep all protocol aware code in a single place. Signed-off-by: Tom Tucker Acked-by: Neil Brown Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_marshal.c | 412 ++++++++++++++++++++++++++++++++ 1 files changed, 412 insertions(+), 0 deletions(-) create mode 100644 net/sunrpc/xprtrdma/svc_rdma_marshal.c diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c new file mode 100644 index 0000000..9530ef2 --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -0,0 +1,412 @@ +/* + * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Author: Tom Tucker + */ + +#include +#include +#include +#include +#include + +#define RPCDBG_FACILITY RPCDBG_SVCXPRT + +/* + * Decodes a read chunk list. The expected format is as follows: + * descrim : xdr_one + * position : u32 offset into XDR stream + * handle : u32 RKEY + * . . . + * end-of-list: xdr_zero + */ +static u32 *decode_read_list(u32 *va, u32 *vaend) +{ + struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; + + while (ch->rc_discrim != xdr_zero) { + u64 ch_offset; + + if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) > + (unsigned long)vaend) { + dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch); + return NULL; + } + + ch->rc_discrim = ntohl(ch->rc_discrim); + ch->rc_position = ntohl(ch->rc_position); + ch->rc_target.rs_handle = ntohl(ch->rc_target.rs_handle); + ch->rc_target.rs_length = ntohl(ch->rc_target.rs_length); + va = (u32 *)&ch->rc_target.rs_offset; + xdr_decode_hyper(va, &ch_offset); + put_unaligned(ch_offset, (u64 *)va); + ch++; + } + return (u32 *)&ch->rc_position; +} + +/* + * Determine number of chunks and total bytes in chunk list. The chunk + * list has already been verified to fit within the RPCRDMA header. + */ +void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, + int *ch_count, int *byte_count) +{ + /* compute the number of bytes represented by read chunks */ + *byte_count = 0; + *ch_count = 0; + for (; ch->rc_discrim != 0; ch++) { + *byte_count = *byte_count + ch->rc_target.rs_length; + *ch_count = *ch_count + 1; + } +} + +/* + * Decodes a write chunk list. The expected format is as follows: + * descrim : xdr_one + * nchunks : + * handle : u32 RKEY ---+ + * length : u32 | + * offset : remove va + + * . . . | + * ---+ + */ +static u32 *decode_write_list(u32 *va, u32 *vaend) +{ + int ch_no; + struct rpcrdma_write_array *ary = + (struct rpcrdma_write_array *)va; + + /* Check for not write-array */ + if (ary->wc_discrim == xdr_zero) + return (u32 *)&ary->wc_nchunks; + + if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > + (unsigned long)vaend) { + dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); + return NULL; + } + ary->wc_discrim = ntohl(ary->wc_discrim); + ary->wc_nchunks = ntohl(ary->wc_nchunks); + if (((unsigned long)&ary->wc_array[0] + + (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > + (unsigned long)vaend) { + dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", + ary, ary->wc_nchunks, vaend); + return NULL; + } + for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { + u64 ch_offset; + + ary->wc_array[ch_no].wc_target.rs_handle = + ntohl(ary->wc_array[ch_no].wc_target.rs_handle); + ary->wc_array[ch_no].wc_target.rs_length = + ntohl(ary->wc_array[ch_no].wc_target.rs_length); + va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; + xdr_decode_hyper(va, &ch_offset); + put_unaligned(ch_offset, (u64 *)va); + } + + /* + * rs_length is the 2nd 4B field in wc_target and taking its + * address skips the list terminator + */ + return (u32 *)&ary->wc_array[ch_no].wc_target.rs_length; +} + +static u32 *decode_reply_array(u32 *va, u32 *vaend) +{ + int ch_no; + struct rpcrdma_write_array *ary = + (struct rpcrdma_write_array *)va; + + /* Check for no reply-array */ + if (ary->wc_discrim == xdr_zero) + return (u32 *)&ary->wc_nchunks; + + if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > + (unsigned long)vaend) { + dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); + return NULL; + } + ary->wc_discrim = ntohl(ary->wc_discrim); + ary->wc_nchunks = ntohl(ary->wc_nchunks); + if (((unsigned long)&ary->wc_array[0] + + (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > + (unsigned long)vaend) { + dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", + ary, ary->wc_nchunks, vaend); + return NULL; + } + for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { + u64 ch_offset; + + ary->wc_array[ch_no].wc_target.rs_handle = + ntohl(ary->wc_array[ch_no].wc_target.rs_handle); + ary->wc_array[ch_no].wc_target.rs_length = + ntohl(ary->wc_array[ch_no].wc_target.rs_length); + va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; + xdr_decode_hyper(va, &ch_offset); + put_unaligned(ch_offset, (u64 *)va); + } + + return (u32 *)&ary->wc_array[ch_no]; +} + +int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, + struct svc_rqst *rqstp) +{ + struct rpcrdma_msg *rmsgp = NULL; + u32 *va; + u32 *vaend; + u32 hdr_len; + + rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; + + /* Verify that there's enough bytes for header + something */ + if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) { + dprintk("svcrdma: header too short = %d\n", + rqstp->rq_arg.len); + return -EINVAL; + } + + /* Decode the header */ + rmsgp->rm_xid = ntohl(rmsgp->rm_xid); + rmsgp->rm_vers = ntohl(rmsgp->rm_vers); + rmsgp->rm_credit = ntohl(rmsgp->rm_credit); + rmsgp->rm_type = ntohl(rmsgp->rm_type); + + if (rmsgp->rm_vers != RPCRDMA_VERSION) + return -ENOSYS; + + /* Pull in the extra for the padded case and bump our pointer */ + if (rmsgp->rm_type == RDMA_MSGP) { + int hdrlen; + rmsgp->rm_body.rm_padded.rm_align = + ntohl(rmsgp->rm_body.rm_padded.rm_align); + rmsgp->rm_body.rm_padded.rm_thresh = + ntohl(rmsgp->rm_body.rm_padded.rm_thresh); + + va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; + rqstp->rq_arg.head[0].iov_base = va; + hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); + rqstp->rq_arg.head[0].iov_len -= hdrlen; + if (hdrlen > rqstp->rq_arg.len) + return -EINVAL; + return hdrlen; + } + + /* The chunk list may contain either a read chunk list or a write + * chunk list and a reply chunk list. + */ + va = &rmsgp->rm_body.rm_chunks[0]; + vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); + va = decode_read_list(va, vaend); + if (!va) + return -EINVAL; + va = decode_write_list(va, vaend); + if (!va) + return -EINVAL; + va = decode_reply_array(va, vaend); + if (!va) + return -EINVAL; + + rqstp->rq_arg.head[0].iov_base = va; + hdr_len = (unsigned long)va - (unsigned long)rmsgp; + rqstp->rq_arg.head[0].iov_len -= hdr_len; + + *rdma_req = rmsgp; + return hdr_len; +} + +int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp) +{ + struct rpcrdma_msg *rmsgp = NULL; + struct rpcrdma_read_chunk *ch; + struct rpcrdma_write_array *ary; + u32 *va; + u32 hdrlen; + + dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n", + rqstp); + rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; + + /* Pull in the extra for the padded case and bump our pointer */ + if (rmsgp->rm_type == RDMA_MSGP) { + va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; + rqstp->rq_arg.head[0].iov_base = va; + hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); + rqstp->rq_arg.head[0].iov_len -= hdrlen; + return hdrlen; + } + + /* + * Skip all chunks to find RPC msg. These were previously processed + */ + va = &rmsgp->rm_body.rm_chunks[0]; + + /* Skip read-list */ + for (ch = (struct rpcrdma_read_chunk *)va; + ch->rc_discrim != xdr_zero; ch++); + va = (u32 *)&ch->rc_position; + + /* Skip write-list */ + ary = (struct rpcrdma_write_array *)va; + if (ary->wc_discrim == xdr_zero) + va = (u32 *)&ary->wc_nchunks; + else + /* + * rs_length is the 2nd 4B field in wc_target and taking its + * address skips the list terminator + */ + va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length; + + /* Skip reply-array */ + ary = (struct rpcrdma_write_array *)va; + if (ary->wc_discrim == xdr_zero) + va = (u32 *)&ary->wc_nchunks; + else + va = (u32 *)&ary->wc_array[ary->wc_nchunks]; + + rqstp->rq_arg.head[0].iov_base = va; + hdrlen = (unsigned long)va - (unsigned long)rmsgp; + rqstp->rq_arg.head[0].iov_len -= hdrlen; + + return hdrlen; +} + +int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, + struct rpcrdma_msg *rmsgp, + enum rpcrdma_errcode err, u32 *va) +{ + u32 *startp = va; + + *va++ = htonl(rmsgp->rm_xid); + *va++ = htonl(rmsgp->rm_vers); + *va++ = htonl(xprt->sc_max_requests); + *va++ = htonl(RDMA_ERROR); + *va++ = htonl(err); + if (err == ERR_VERS) { + *va++ = htonl(RPCRDMA_VERSION); + *va++ = htonl(RPCRDMA_VERSION); + } + + return (int)((unsigned long)va - (unsigned long)startp); +} + +int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp) +{ + struct rpcrdma_write_array *wr_ary; + + /* There is no read-list in a reply */ + + /* skip write list */ + wr_ary = (struct rpcrdma_write_array *) + &rmsgp->rm_body.rm_chunks[1]; + if (wr_ary->wc_discrim) + wr_ary = (struct rpcrdma_write_array *) + &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]. + wc_target.rs_length; + else + wr_ary = (struct rpcrdma_write_array *) + &wr_ary->wc_nchunks; + + /* skip reply array */ + if (wr_ary->wc_discrim) + wr_ary = (struct rpcrdma_write_array *) + &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]; + else + wr_ary = (struct rpcrdma_write_array *) + &wr_ary->wc_nchunks; + + return (unsigned long) wr_ary - (unsigned long) rmsgp; +} + +void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks) +{ + struct rpcrdma_write_array *ary; + + /* no read-list */ + rmsgp->rm_body.rm_chunks[0] = xdr_zero; + + /* write-array discrim */ + ary = (struct rpcrdma_write_array *) + &rmsgp->rm_body.rm_chunks[1]; + ary->wc_discrim = xdr_one; + ary->wc_nchunks = htonl(chunks); + + /* write-list terminator */ + ary->wc_array[chunks].wc_target.rs_handle = xdr_zero; + + /* reply-array discriminator */ + ary->wc_array[chunks].wc_target.rs_length = xdr_zero; +} + +void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary, + int chunks) +{ + ary->wc_discrim = xdr_one; + ary->wc_nchunks = htonl(chunks); +} + +void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, + int chunk_no, + u32 rs_handle, u64 rs_offset, + u32 write_len) +{ + struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target; + seg->rs_handle = htonl(rs_handle); + seg->rs_length = htonl(write_len); + xdr_encode_hyper((u32 *) &seg->rs_offset, rs_offset); +} + +void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, + struct rpcrdma_msg *rdma_argp, + struct rpcrdma_msg *rdma_resp, + enum rpcrdma_proc rdma_type) +{ + rdma_resp->rm_xid = htonl(rdma_argp->rm_xid); + rdma_resp->rm_vers = htonl(rdma_argp->rm_vers); + rdma_resp->rm_credit = htonl(xprt->sc_max_requests); + rdma_resp->rm_type = htonl(rdma_type); + + /* Encode chunks lists */ + rdma_resp->rm_body.rm_chunks[0] = xdr_zero; + rdma_resp->rm_body.rm_chunks[1] = xdr_zero; + rdma_resp->rm_body.rm_chunks[2] = xdr_zero; +} -- 1.5.4.rc2.60.gb2e62