2015-09-04 19:08:09

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH v1 0/5] NFS: Add READ_PLUS support

This is an updated posting of my NFS v4.2 READ_PLUS patches from several
months ago, and it should apply against current development trees.

I tested this code using dd to read a file between two KVMs, and I compared
the reported transfer rates against various NFS versions and the local XFS
filesystem. I tested using four 5 GB files, the first two simply entirely
data or entirely hole. The others alternate between data and hole pages, at
either 4 KB (one page) or 8 KB (two page) intervals.

I found that dd uses a default block size of 512 bytes, which could cause
reads over NFS to take forever. I bumped this up to 128K during my testing
by passing the argument "bs=128K" to dd. My results are below:

| XFS | NFS v3 | NFS v4.0 | NFS v4.1 | NFS v4.2
-------|------------|------------|------------|------------|------------
Data | 2.7 GB/s | 845 MB/s | 864 MB/s | 847 MB/s | 807 MB/s
Hole | 4.1 GB/s | 980 MB/s | 1.1 GB/s | 989 MB/s | 2.3 GB/s
1 Page | 1.6 GB/s | 681 MB/s | 683 MB/s | 688 MB/s | 672 MB/s
2 Page | 2.5 GB/s | 760 MB/s | 760 MB/s | 755 MB/s | 836 MB/s

The pure data case is slightly slower on NFS v4.2, most likely due to
additional server-side lseeks while reading. The alternating 1 page
regions test didn't see as large a slowdown, most likely because of to
the additional savings by not transferring zeroes over the wire. Any
slowdown caused by additional seeks is more than made up for by the time
we reach 2 page intervals.

These patches and the corresponding server changes are available in the
[read_plus] branch of

git://git.linux-nfs.org/projects/anna/linux-nfs.git

Questions? Comments? Thoughts?

Anna


Anna Schumaker (5):
SUNRPC: Split out a function for setting current page
SUNRPC: Add the ability to expand holes in data pages
NFS: Add basic READ_PLUS support
SUNRPC: Add the ability to shift data to a specific offset
NFS: Add support for decoding multiple segments

fs/nfs/nfs42xdr.c | 162 ++++++++++++++++++++++++++++++
fs/nfs/nfs4proc.c | 32 +++++-
fs/nfs/nfs4xdr.c | 1 +
include/linux/nfs4.h | 1 +
include/linux/nfs_fs_sb.h | 1 +
include/linux/nfs_xdr.h | 2 +-
include/linux/sunrpc/xdr.h | 2 +
net/sunrpc/xdr.c | 243 ++++++++++++++++++++++++++++++++++++++++++++-
8 files changed, 438 insertions(+), 6 deletions(-)

--
2.5.1



2015-09-04 19:07:54

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH v1 1/5] SUNRPC: Split out a function for setting current page

I'm going to need this bit of code in a few places for READ_PLUS
decoding, so let's make it a helper function.

Signed-off-by: Anna Schumaker <[email protected]>
---
net/sunrpc/xdr.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 4439ac4..b1c4ffb 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -759,6 +759,12 @@ static int xdr_set_page_base(struct xdr_stream *xdr,
return 0;
}

+static void xdr_set_page(struct xdr_stream *xdr, unsigned int base)
+{
+ if (xdr_set_page_base(xdr, base, PAGE_SIZE) < 0)
+ xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len);
+}
+
static void xdr_set_next_page(struct xdr_stream *xdr)
{
unsigned int newbase;
@@ -766,8 +772,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr)
newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT;
newbase -= xdr->buf->page_base;

- if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
- xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len);
+ xdr_set_page(xdr, newbase);
}

static bool xdr_set_next_buffer(struct xdr_stream *xdr)
--
2.5.1


2015-09-04 19:07:55

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH v1 2/5] SUNRPC: Add the ability to expand holes in data pages

This patch adds the ability to "read a hole" into a set of XDR data
pages by taking the following steps:

1) Shift all data after the current xdr->p to the right, possibly into
the tail,
2) Zero the specified range, and
3) Update xdr->p to point beyond the hole.

Signed-off-by: Anna Schumaker <[email protected]>
---
include/linux/sunrpc/xdr.h | 1 +
net/sunrpc/xdr.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 100 insertions(+)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 70c6b92..81c5a3f 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -229,6 +229,7 @@ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
+extern size_t xdr_expand_hole(struct xdr_stream *, size_t, size_t);

#endif /* __KERNEL__ */

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index b1c4ffb..be154e7 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -219,6 +219,40 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
} while ((len -= copy) != 0);
}

+static void
+_shift_data_right_tail(struct xdr_buf *buf, size_t pgfrom_base, size_t len)
+{
+ struct kvec *tail = buf->tail;
+
+ /* Make room for new data. */
+ if (tail->iov_len > 0)
+ memmove((char *)tail->iov_base + len, tail->iov_base, len);
+
+ _copy_from_pages((char *)tail->iov_base,
+ buf->pages,
+ buf->page_base + pgfrom_base,
+ len);
+
+ tail->iov_len += len;
+}
+
+static void
+_shift_data_right(struct xdr_buf *buf, size_t to, size_t from, size_t len)
+{
+ size_t shift = len;
+
+ if ((to + len) > buf->page_len) {
+ shift = (to + len) - buf->page_len;
+ _shift_data_right_tail(buf, (from + len) - shift, shift);
+ shift = len - shift;
+ }
+
+ _shift_data_right_pages(buf->pages,
+ buf->page_base + to,
+ buf->page_base + from,
+ shift);
+}
+
/**
* _copy_to_pages
* @pages: array of pages
@@ -304,6 +338,33 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
EXPORT_SYMBOL_GPL(_copy_from_pages);

/**
+ * _zero_data_pages
+ * @pages: array of pages
+ * @pgbase: beginning page vector address
+ * @len: length
+ */
+static void
+_zero_data_pages(struct page **pages, size_t pgbase, size_t len)
+{
+ struct page **page;
+ size_t zero;
+
+ page = pages + (pgbase >> PAGE_CACHE_SHIFT);
+ pgbase &= ~PAGE_CACHE_MASK;
+
+ do {
+ zero = len;
+ if (pgbase + zero > PAGE_SIZE)
+ zero = PAGE_SIZE - pgbase;
+
+ zero_user_segment(*page, pgbase, pgbase + zero);
+ page++;
+ pgbase = 0;
+
+ } while ((len -= zero) != 0);
+}
+
+/**
* xdr_shrink_bufhead
* @buf: xdr_buf
* @len: bytes to remove from buf->head[0]
@@ -445,6 +506,24 @@ unsigned int xdr_stream_pos(const struct xdr_stream *xdr)
EXPORT_SYMBOL_GPL(xdr_stream_pos);

/**
+ * xdr_page_pos - Return the current offset from the start of the xdr->buf->pages
+ * @xdr: pointer to struct xdr_stream
+ */
+static size_t xdr_page_pos(const struct xdr_stream *xdr)
+{
+ unsigned int offset;
+ unsigned int base = xdr->buf->page_len;
+ void *kaddr = xdr->buf->tail->iov_base;;
+
+ if (xdr->page_ptr) {
+ base = (xdr->page_ptr - xdr->buf->pages) * PAGE_SIZE;
+ kaddr = page_address(*xdr->page_ptr);
+ }
+ offset = xdr->p - (__be32 *)kaddr;
+ return base + (offset * sizeof(__be32));
+}
+
+/**
* xdr_init_encode - Initialize a struct xdr_stream for sending data.
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer in which to encode data
@@ -976,6 +1055,26 @@ unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
}
EXPORT_SYMBOL_GPL(xdr_read_pages);

+size_t xdr_expand_hole(struct xdr_stream *xdr, size_t offset, size_t length)
+{
+ struct xdr_buf *buf = xdr->buf;
+ size_t from = 0;
+
+ if ((offset + length) > buf->page_len)
+ length = buf->page_len - offset;
+
+ if (offset == 0)
+ xdr_align_pages(xdr, xdr->nwords << 2);
+ else
+ from = xdr_page_pos(xdr);
+
+ _shift_data_right(buf, offset + length, from, xdr->nwords << 2);
+ _zero_data_pages(buf->pages, buf->page_base + offset, length);
+ xdr_set_page(xdr, offset + length);
+ return length;
+}
+EXPORT_SYMBOL_GPL(xdr_expand_hole);
+
/**
* xdr_enter_page - decode data from the XDR page
* @xdr: pointer to xdr_stream struct
--
2.5.1


2015-09-04 19:07:56

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH v1 3/5] NFS: Add basic READ_PLUS support

This patch adds support for decoding a single NFS4_CONTENT_DATA or
NFS4_CONTENT_HOLE segment returned by the server. This gives a simple
implementation that does not need to spent a lot of time shifting data
arount.

Signed-off-by: Anna Schumaker <[email protected]>
---
fs/nfs/nfs42xdr.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++
fs/nfs/nfs4proc.c | 32 +++++++++-
fs/nfs/nfs4xdr.c | 1 +
include/linux/nfs4.h | 1 +
include/linux/nfs_fs_sb.h | 1 +
include/linux/nfs_xdr.h | 2 +-
6 files changed, 191 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 0eb29e1..333da85 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -15,6 +15,14 @@
#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \
encode_fallocate_maxsz)
#define decode_deallocate_maxsz (op_decode_hdr_maxsz)
+#define encode_read_plus_maxsz (op_encode_hdr_maxsz + \
+ encode_stateid_maxsz + 3)
+#define decode_read_plus_maxsz (op_decode_hdr_maxsz + \
+ 1 /* rpr_eof */ + \
+ 1 /* rpr_contents count */ + \
+ 1 /* data_content4 */ + \
+ 2 /* data_info4.di_offset */ + \
+ 2 /* data_info4.di_length */)
#define encode_seek_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + \
2 /* offset */ + \
@@ -51,6 +59,12 @@
decode_putfh_maxsz + \
decode_deallocate_maxsz + \
decode_getattr_maxsz)
+#define NFS4_enc_read_plus_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_read_plus_maxsz)
+#define NFS4_dec_read_plus_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_read_plus_maxsz)
#define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
encode_seek_maxsz)
@@ -91,6 +105,16 @@ static void encode_deallocate(struct xdr_stream *xdr,
encode_fallocate(xdr, args);
}

+static void encode_read_plus(struct xdr_stream *xdr,
+ struct nfs_pgio_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_READ_PLUS, decode_read_plus_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &args->stateid);
+ encode_uint64(xdr, args->offset);
+ encode_uint32(xdr, args->count);
+}
+
static void encode_seek(struct xdr_stream *xdr,
struct nfs42_seek_args *args,
struct compound_hdr *hdr)
@@ -167,6 +191,28 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
}

/*
+ * Encode READ_PLUS request
+ */
+static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_pgio_args *args)
+{
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_read_plus(xdr, args, &hdr);
+
+ xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
+ args->pages, args->pgbase, args->count);
+ req->rq_rcv_buf.flags |= XDRBUF_READ;
+ encode_nops(&hdr);
+}
+
+/*
* Encode SEEK request
*/
static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
@@ -216,6 +262,92 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re
return decode_op_hdr(xdr, OP_DEALLOCATE);
}

+static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res)
+{
+ __be32 *p;
+ uint32_t count, recvd;
+ uint64_t offset;
+
+ p = xdr_inline_decode(xdr, 8 + 4);
+ if (unlikely(!p))
+ goto out_overflow;
+
+ p = xdr_decode_hyper(p, &offset);
+ count = be32_to_cpup(p);
+
+ recvd = xdr_read_pages(xdr, count);
+ if (recvd < count)
+ res->eof = 0;
+
+ res->count = recvd;
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *res)
+{
+ __be32 *p;
+ uint64_t offset, length;
+ size_t recvd;
+
+ p = xdr_inline_decode(xdr, 8 + 8);
+ if (unlikely(!p))
+ goto out_overflow;
+
+ p = xdr_decode_hyper(p, &offset);
+ p = xdr_decode_hyper(p, &length);
+
+ recvd = xdr_expand_hole(xdr, 0, length);
+ if (recvd < length)
+ res->eof = 0;
+
+ res->count = recvd;
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
+{
+ __be32 *p;
+ int status, type;
+ uint32_t segments;
+
+ status = decode_op_hdr(xdr, OP_READ_PLUS);
+ if (status)
+ return status;
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(!p))
+ goto out_overflow;
+
+ res->count = 0;
+ res->eof = be32_to_cpup(p++);
+ segments = be32_to_cpup(p++);
+ if (segments == 0)
+ return 0;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+
+ type = be32_to_cpup(p++);
+ if (type == NFS4_CONTENT_DATA)
+ status = decode_read_plus_data(xdr, res);
+ else
+ status = decode_read_plus_hole(xdr, res);
+
+ if (segments > 1)
+ res->eof = 0;
+ return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
{
int status;
@@ -298,6 +430,32 @@ out:
}

/*
+ * Decode READ_PLUS request
+ */
+static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs_pgio_res *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_read_plus(xdr, res);
+ if (!status)
+ status = res->count;
+out:
+ return status;
+}
+
+/*
* Decode SEEK request
*/
static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 693b903..54066e1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -69,6 +69,10 @@

#include "nfs4trace.h"

+#ifdef CONFIG_NFS_V4_2
+#include "nfs42.h"
+#endif /* CONFIG_NFS_V4_2 */
+
#define NFSDBG_FACILITY NFSDBG_PROC

#define NFS4_POLL_RETRY_MIN (HZ/10)
@@ -4239,9 +4243,15 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,

static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
-
+ struct nfs_server *server = NFS_SERVER(hdr->inode);
dprintk("--> %s\n", __func__);

+ if ((server->caps & NFS_CAP_READ_PLUS) && (task->tk_status == -ENOTSUPP)) {
+ server->caps &= ~NFS_CAP_READ_PLUS;
+ if (rpc_restart_call_prepare(task))
+ task->tk_status = 0;
+ return -EAGAIN;
+ }
if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN;
if (nfs4_read_stateid_changed(task, &hdr->args))
@@ -4250,12 +4260,27 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
nfs4_read_done_cb(task, hdr);
}

+#ifdef CONFIG_NFS_V4_2
+static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
+{
+ if (server->caps & NFS_CAP_READ_PLUS)
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
+ else
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+}
+#else
+static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
+{
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+}
+#endif /* CONFIG_NFS_V4_2 */
+
static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{
hdr->timestamp = jiffies;
hdr->pgio_done_cb = nfs4_read_done_cb;
- msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg);
nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0);
}

@@ -8681,7 +8706,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_ALLOCATE
| NFS_CAP_DEALLOCATE
| NFS_CAP_SEEK
- | NFS_CAP_LAYOUTSTATS,
+ | NFS_CAP_LAYOUTSTATS
+ | NFS_CAP_READ_PLUS,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index ff4784c..fe4ac5b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7461,6 +7461,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(ALLOCATE, enc_allocate, dec_allocate),
PROC(DEALLOCATE, enc_deallocate, dec_deallocate),
PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats),
+ PROC(READ_PLUS, enc_read_plus, dec_read_plus),
#endif /* CONFIG_NFS_V4_2 */
};

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 00121f2..44c009c 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -501,6 +501,7 @@ enum {
NFSPROC4_CLNT_ALLOCATE,
NFSPROC4_CLNT_DEALLOCATE,
NFSPROC4_CLNT_LAYOUTSTATS,
+ NFSPROC4_CLNT_READ_PLUS,
};

/* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 570a7df..dd67d6f 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -243,5 +243,6 @@ struct nfs_server {
#define NFS_CAP_ALLOCATE (1U << 20)
#define NFS_CAP_DEALLOCATE (1U << 21)
#define NFS_CAP_LAYOUTSTATS (1U << 22)
+#define NFS_CAP_READ_PLUS (1U << 23)

#endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b4392d8..ca9ea32 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -563,7 +563,7 @@ struct nfs_pgio_args {
struct nfs_pgio_res {
struct nfs4_sequence_res seq_res;
struct nfs_fattr * fattr;
- __u32 count;
+ __u64 count;
__u32 op_status;
int eof; /* used by read */
struct nfs_writeverf * verf; /* used by write */
--
2.5.1


2015-09-04 19:07:57

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH v1 5/5] NFS: Add support for decoding multiple segments

We now have everything we need to read holes and then shift data to
where it's supposed to be.

Signed-off-by: Anna Schumaker <[email protected]>
---
fs/nfs/nfs42xdr.c | 36 ++++++++++++++++++++----------------
1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 333da85..b0e6a35 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -275,11 +275,11 @@ static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *re
p = xdr_decode_hyper(p, &offset);
count = be32_to_cpup(p);

- recvd = xdr_read_pages(xdr, count);
+ recvd = xdr_align_data(xdr, res->count, count);
if (recvd < count)
res->eof = 0;

- res->count = recvd;
+ res->count += recvd;
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
@@ -299,11 +299,11 @@ static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *re
p = xdr_decode_hyper(p, &offset);
p = xdr_decode_hyper(p, &length);

- recvd = xdr_expand_hole(xdr, 0, length);
+ recvd = xdr_expand_hole(xdr, res->count, length);
if (recvd < length)
res->eof = 0;

- res->count = recvd;
+ res->count += recvd;
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
@@ -314,7 +314,7 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
{
__be32 *p;
int status, type;
- uint32_t segments;
+ uint32_t i, segments;

status = decode_op_hdr(xdr, OP_READ_PLUS);
if (status)
@@ -327,20 +327,24 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
res->count = 0;
res->eof = be32_to_cpup(p++);
segments = be32_to_cpup(p++);
- if (segments == 0)
- return 0;

- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- goto out_overflow;
+ for (i = 0; i < segments; i++) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;

- type = be32_to_cpup(p++);
- if (type == NFS4_CONTENT_DATA)
- status = decode_read_plus_data(xdr, res);
- else
- status = decode_read_plus_hole(xdr, res);
+ type = be32_to_cpup(p);
+ if (type == NFS4_CONTENT_DATA)
+ status = decode_read_plus_data(xdr, res);
+ else
+ status = decode_read_plus_hole(xdr, res);
+ if (status)
+ break;
+ if (res->count == xdr->buf->page_len)
+ break;
+ }

- if (segments > 1)
+ if (i < segments)
res->eof = 0;
return status;
out_overflow:
--
2.5.1


2015-09-04 19:07:57

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH v1 4/5] SUNRPC: Add the ability to shift data to a specific offset

Expanding holes tends to put the data content a few bytes to the right
of where we want it. This patch implements a left-shift operation to
line everything up properly.

Signed-off-by: Anna Schumaker <[email protected]>
---
include/linux/sunrpc/xdr.h | 1 +
net/sunrpc/xdr.c | 135 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 136 insertions(+)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 81c5a3f..3670bf6 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -230,6 +230,7 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
extern size_t xdr_expand_hole(struct xdr_stream *, size_t, size_t);
+extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint64_t);

#endif /* __KERNEL__ */

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index be154e7..7f64274 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -16,6 +16,9 @@
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/msg_prot.h>

+static void _copy_to_pages(struct page **, size_t, const char *, size_t);
+
+
/*
* XDR functions for basic NFS types
*/
@@ -253,6 +256,117 @@ _shift_data_right(struct xdr_buf *buf, size_t to, size_t from, size_t len)
shift);
}

+
+/**
+ * _shift_data_left_pages
+ * @pages: vector of pages containing both the source and dest memory area.
+ * @pgto_base: page vector address of destination
+ * @pgfrom_base: page vector address of source
+ * @len: number of bytes to copy
+ *
+ * Note: the addresses pgto_base and pgfrom_base are both calculated in
+ * the same way:
+ * if a memory area starts at byte 'base' in page 'pages[i]',
+ * then its address is given as (i << PAGE_CACHE_SHIFT) + base
+ * Alse note: pgto_base must be < pgfrom_base, but the memory areas
+ * they point to may overlap.
+ */
+static void
+_shift_data_left_pages(struct page **pages, size_t pgto_base,
+ size_t pgfrom_base, size_t len)
+{
+ struct page **pgfrom, **pgto;
+ char *vfrom, *vto;
+ size_t copy;
+
+ BUG_ON(pgfrom_base <= pgto_base);
+
+ pgto = pages + (pgto_base >> PAGE_CACHE_SHIFT);
+ pgfrom = pages + (pgfrom_base >> PAGE_CACHE_SHIFT);
+
+ pgto_base = pgto_base % PAGE_CACHE_SIZE;
+ pgfrom_base = pgfrom_base % PAGE_CACHE_SIZE;
+
+ do {
+ if (pgto_base >= PAGE_CACHE_SIZE) {
+ pgto_base = 0;
+ pgto++;
+ }
+ if (pgfrom_base >= PAGE_CACHE_SIZE){
+ pgfrom_base = 0;
+ pgfrom++;
+ }
+
+ copy = len;
+ if (copy > (PAGE_CACHE_SIZE - pgto_base))
+ copy = PAGE_CACHE_SIZE - pgto_base;
+ if (copy > (PAGE_CACHE_SIZE - pgfrom_base))
+ copy = PAGE_CACHE_SIZE - pgfrom_base;
+
+ if (pgto_base == 131056)
+ break;
+
+ vto = kmap_atomic(*pgto);
+ if (*pgto != *pgfrom) {
+ vfrom = kmap_atomic(*pgfrom);
+ memcpy(vto + pgto_base, vfrom + pgfrom_base, copy);
+ kunmap_atomic(vfrom);
+ } else
+ memmove(vto + pgto_base, vto + pgfrom_base, copy);
+ flush_dcache_page(*pgto);
+ kunmap_atomic(vto);
+
+ pgto_base += copy;
+ pgfrom_base += copy;
+
+ } while ((len -= copy) != 0);
+}
+
+static void
+_shift_data_left_tail(struct xdr_buf *buf, size_t pgto_base,
+ size_t tail_from, size_t len)
+{
+ struct kvec *tail = buf->tail;
+ size_t shift = len;
+
+ if (len == 0)
+ return;
+ if (pgto_base + len > buf->page_len)
+ shift = buf->page_len - pgto_base;
+
+ _copy_to_pages(buf->pages,
+ buf->page_base + pgto_base,
+ (char *)(tail->iov_base + tail_from),
+ shift);
+
+ memmove((char *)tail->iov_base, tail->iov_base + tail_from + shift, shift);
+ tail->iov_len -= (tail_from + shift);
+}
+
+static void
+_shift_data_left(struct xdr_buf *buf, size_t to, size_t from, size_t len)
+{
+ size_t shift = len;
+
+ if (from < buf->page_len) {
+ shift = min(len, buf->page_len - from);
+ _shift_data_left_pages(buf->pages,
+ buf->page_base + to,
+ buf->page_base + from,
+ shift);
+ to += shift;
+ from += shift;
+ shift = len - shift;
+ }
+
+ if (shift == 0)
+ return;
+ if (from >= buf->page_len)
+ from -= buf->page_len;
+
+ _shift_data_left_tail(buf, to, from, shift);
+}
+
/**
* _copy_to_pages
* @pages: array of pages
@@ -1075,6 +1189,27 @@ size_t xdr_expand_hole(struct xdr_stream *xdr, size_t offset, size_t length)
}
EXPORT_SYMBOL_GPL(xdr_expand_hole);

+uint64_t xdr_align_data(struct xdr_stream *xdr, uint64_t offset, uint64_t length)
+{
+ struct xdr_buf *buf = xdr->buf;
+ size_t from = offset;
+
+ if (offset + length > buf->page_len)
+ length = buf->page_len - offset;
+
+ if (offset == 0)
+ xdr_align_pages(xdr, xdr->nwords << 2);
+ else {
+ from = xdr_page_pos(xdr);
+ _shift_data_left(buf, offset, from, length);
+ }
+
+ xdr->nwords -= XDR_QUADLEN(length);
+ xdr_set_page(xdr, from + length);
+ return length;
+}
+EXPORT_SYMBOL_GPL(xdr_align_data);
+
/**
* xdr_enter_page - decode data from the XDR page
* @xdr: pointer to xdr_stream struct
--
2.5.1