Return-Path: Received: from metis.ext.pengutronix.de ([92.198.50.35]:53314 "EHLO metis.ext.pengutronix.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752768Ab1AJKu6 (ORCPT ); Mon, 10 Jan 2011 05:50:58 -0500 Date: Mon, 10 Jan 2011 11:50:19 +0100 From: Uwe =?iso-8859-1?Q?Kleine-K=F6nig?= To: Trond Myklebust Cc: James Bottomley , Russell King - ARM Linux , Linus Torvalds , linux-nfs@vger.kernel.org, linux-kernel@vger.kernel.org, Marc Kleine-Budde , Marc Kleine-Budde , linux-arm-kernel@lists.infradead.org, Parisc List , linux-arch@vger.kernel.org Subject: Re: still nfs problems [Was: Linux 2.6.37-rc8] Message-ID: <20110110105019.GC24920@pengutronix.de> References: <1294268808.2952.18.camel@heimdal.trondhjem.org> <1294270104.16957.73.camel@mulgrave.site> <1294335614.22825.154.camel@mulgrave.site> <1294336054.2905.1.camel@heimdal.trondhjem.org> <1294426405.2929.23.camel@heimdal.trondhjem.org> <20110107190229.GX31708@n2100.arm.linux.org.uk> <1294427467.4895.66.camel@mulgrave.site> <1294505384.4181.14.camel@heimdal.trondhjem.org> <1294528551.4181.19.camel@heimdal.trondhjem.org> Content-Type: text/plain; charset=iso-8859-1 In-Reply-To: <1294528551.4181.19.camel@heimdal.trondhjem.org> Sender: linux-nfs-owner@vger.kernel.org List-ID: MIME-Version: 1.0 Hi Trond, On Sat, Jan 08, 2011 at 06:15:51PM -0500, Trond Myklebust wrote: > ----------------------------------------------------------------------------------- It would be great if you could add a "8<" in the line above next time. Then git-am -c does the right thing (at least I think so). > From 8b2e60cef5c65eef41ab61286f62dec6bfb1ac27 Mon Sep 17 00:00:00 2001 > From: Trond Myklebust > Date: Sat, 8 Jan 2011 17:45:38 -0500 > Subject: [PATCH] NFS: Don't use vm_map_ram() in readdir > > vm_map_ram() is not available on NOMMU platforms, and causes trouble > on incoherrent architectures such as ARM when we access the page data > through both the direct and the virtual mapping. > > The alternative is to use the direct mapping to access page data > for the case when we are not crossing a page boundary, but to copy > the data into a linear scratch buffer when we are accessing data > that spans page boundaries. > > Signed-off-by: Trond Myklebust Tested-by: Uwe Kleine-K?nig on tx28. Thanks Uwe > --- > fs/nfs/dir.c | 44 ++++++------- > fs/nfs/nfs2xdr.c | 6 -- > fs/nfs/nfs3xdr.c | 6 -- > fs/nfs/nfs4xdr.c | 6 -- > include/linux/sunrpc/xdr.h | 4 +- > net/sunrpc/xdr.c | 155 +++++++++++++++++++++++++++++++++++--------- > 6 files changed, 148 insertions(+), 73 deletions(-) > > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c > index 996dd89..0108cf4 100644 > --- a/fs/nfs/dir.c > +++ b/fs/nfs/dir.c > @@ -33,7 +33,6 @@ > #include > #include > #include > -#include > #include > > #include "delegation.h" > @@ -459,25 +458,26 @@ out: > /* Perform conversion from xdr to cache array */ > static > int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, > - void *xdr_page, struct page *page, unsigned int buflen) > + struct page **xdr_pages, struct page *page, unsigned int buflen) > { > struct xdr_stream stream; > - struct xdr_buf buf; > - __be32 *ptr = xdr_page; > + struct xdr_buf buf = { > + .pages = xdr_pages, > + .page_len = buflen, > + .buflen = buflen, > + .len = buflen, > + }; > + struct page *scratch; > struct nfs_cache_array *array; > unsigned int count = 0; > int status; > > - buf.head->iov_base = xdr_page; > - buf.head->iov_len = buflen; > - buf.tail->iov_len = 0; > - buf.page_base = 0; > - buf.page_len = 0; > - buf.buflen = buf.head->iov_len; > - buf.len = buf.head->iov_len; > - > - xdr_init_decode(&stream, &buf, ptr); > + scratch = alloc_page(GFP_KERNEL); > + if (scratch == NULL) > + return -ENOMEM; > > + xdr_init_decode(&stream, &buf, NULL); > + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); > > do { > status = xdr_decode(desc, entry, &stream); > @@ -506,6 +506,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en > } else > status = PTR_ERR(array); > } > + > + put_page(scratch); > return status; > } > > @@ -521,7 +523,6 @@ static > void nfs_readdir_free_large_page(void *ptr, struct page **pages, > unsigned int npages) > { > - vm_unmap_ram(ptr, npages); > nfs_readdir_free_pagearray(pages, npages); > } > > @@ -530,9 +531,8 @@ void nfs_readdir_free_large_page(void *ptr, struct page **pages, > * to nfs_readdir_free_large_page > */ > static > -void *nfs_readdir_large_page(struct page **pages, unsigned int npages) > +int nfs_readdir_large_page(struct page **pages, unsigned int npages) > { > - void *ptr; > unsigned int i; > > for (i = 0; i < npages; i++) { > @@ -541,13 +541,11 @@ void *nfs_readdir_large_page(struct page **pages, unsigned int npages) > goto out_freepages; > pages[i] = page; > } > + return 0; > > - ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL); > - if (!IS_ERR_OR_NULL(ptr)) > - return ptr; > out_freepages: > nfs_readdir_free_pagearray(pages, i); > - return NULL; > + return -ENOMEM; > } > > static > @@ -577,8 +575,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, > memset(array, 0, sizeof(struct nfs_cache_array)); > array->eof_index = -1; > > - pages_ptr = nfs_readdir_large_page(pages, array_size); > - if (!pages_ptr) > + status = nfs_readdir_large_page(pages, array_size); > + if (status < 0) > goto out_release_array; > do { > unsigned int pglen; > @@ -587,7 +585,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, > if (status < 0) > break; > pglen = status; > - status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen); > + status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen); > if (status < 0) { > if (status == -ENOSPC) > status = 0; > diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c > index 5914a19..b382a1b 100644 > --- a/fs/nfs/nfs2xdr.c > +++ b/fs/nfs/nfs2xdr.c > @@ -487,12 +487,6 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se > > entry->d_type = DT_UNKNOWN; > > - p = xdr_inline_peek(xdr, 8); > - if (p != NULL) > - entry->eof = !p[0] && p[1]; > - else > - entry->eof = 0; > - > return p; > > out_overflow: > diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c > index f6cc60f..ba91236 100644 > --- a/fs/nfs/nfs3xdr.c > +++ b/fs/nfs/nfs3xdr.c > @@ -647,12 +647,6 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s > memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); > } > > - p = xdr_inline_peek(xdr, 8); > - if (p != NULL) > - entry->eof = !p[0] && p[1]; > - else > - entry->eof = 0; > - > return p; > > out_overflow: > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c > index 9f1826b..0662a98 100644 > --- a/fs/nfs/nfs4xdr.c > +++ b/fs/nfs/nfs4xdr.c > @@ -6215,12 +6215,6 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, > if (verify_attr_len(xdr, p, len) < 0) > goto out_overflow; > > - p = xdr_inline_peek(xdr, 8); > - if (p != NULL) > - entry->eof = !p[0] && p[1]; > - else > - entry->eof = 0; > - > return p; > > out_overflow: > diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h > index 498ab93..7783c68 100644 > --- a/include/linux/sunrpc/xdr.h > +++ b/include/linux/sunrpc/xdr.h > @@ -201,6 +201,8 @@ struct xdr_stream { > > __be32 *end; /* end of available buffer space */ > struct kvec *iov; /* pointer to the current kvec */ > + struct kvec scratch; /* Scratch buffer */ > + struct page **page_ptr; /* pointer to the current page */ > }; > > extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); > @@ -208,7 +210,7 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); > extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, > unsigned int base, unsigned int len); > extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); > -extern __be32 *xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes); > +extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); > extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); > extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); > extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); > diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c > index cd9e841..679cd67 100644 > --- a/net/sunrpc/xdr.c > +++ b/net/sunrpc/xdr.c > @@ -552,6 +552,74 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b > } > EXPORT_SYMBOL_GPL(xdr_write_pages); > > +static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov, > + __be32 *p, unsigned int len) > +{ > + if (len > iov->iov_len) > + len = iov->iov_len; > + if (p == NULL) > + p = (__be32*)iov->iov_base; > + xdr->p = p; > + xdr->end = (__be32*)(iov->iov_base + len); > + xdr->iov = iov; > + xdr->page_ptr = NULL; > +} > + > +static int xdr_set_page_base(struct xdr_stream *xdr, > + unsigned int base, unsigned int len) > +{ > + unsigned int pgnr; > + unsigned int maxlen; > + unsigned int pgoff; > + unsigned int pgend; > + void *kaddr; > + > + maxlen = xdr->buf->page_len; > + if (base >= maxlen) > + return -EINVAL; > + maxlen -= base; > + if (len > maxlen) > + len = maxlen; > + > + base += xdr->buf->page_base; > + > + pgnr = base >> PAGE_SHIFT; > + xdr->page_ptr = &xdr->buf->pages[pgnr]; > + kaddr = page_address(*xdr->page_ptr); > + > + pgoff = base & ~PAGE_MASK; > + xdr->p = (__be32*)(kaddr + pgoff); > + > + pgend = pgoff + len; > + if (pgend > PAGE_SIZE) > + pgend = PAGE_SIZE; > + xdr->end = (__be32*)(kaddr + pgend); > + xdr->iov = NULL; > + return 0; > +} > + > +static void xdr_set_next_page(struct xdr_stream *xdr) > +{ > + unsigned int newbase; > + > + newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT; > + newbase -= xdr->buf->page_base; > + > + if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0) > + xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len); > +} > + > +static bool xdr_set_next_buffer(struct xdr_stream *xdr) > +{ > + if (xdr->page_ptr != NULL) > + xdr_set_next_page(xdr); > + else if (xdr->iov == xdr->buf->head) { > + if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0) > + xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len); > + } > + return xdr->p != xdr->end; > +} > + > /** > * xdr_init_decode - Initialize an xdr_stream for decoding data. > * @xdr: pointer to xdr_stream struct > @@ -560,41 +628,67 @@ EXPORT_SYMBOL_GPL(xdr_write_pages); > */ > void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) > { > - struct kvec *iov = buf->head; > - unsigned int len = iov->iov_len; > - > - if (len > buf->len) > - len = buf->len; > xdr->buf = buf; > - xdr->iov = iov; > - xdr->p = p; > - xdr->end = (__be32 *)((char *)iov->iov_base + len); > + xdr->scratch.iov_base = NULL; > + xdr->scratch.iov_len = 0; > + if (buf->head[0].iov_len != 0) > + xdr_set_iov(xdr, buf->head, p, buf->len); > + else if (buf->page_len != 0) > + xdr_set_page_base(xdr, 0, buf->len); > } > EXPORT_SYMBOL_GPL(xdr_init_decode); > > -/** > - * xdr_inline_peek - Allow read-ahead in the XDR data stream > - * @xdr: pointer to xdr_stream struct > - * @nbytes: number of bytes of data to decode > - * > - * Check if the input buffer is long enough to enable us to decode > - * 'nbytes' more bytes of data starting at the current position. > - * If so return the current pointer without updating the current > - * pointer position. > - */ > -__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes) > +static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) > { > __be32 *p = xdr->p; > __be32 *q = p + XDR_QUADLEN(nbytes); > > if (unlikely(q > xdr->end || q < p)) > return NULL; > + xdr->p = q; > return p; > } > -EXPORT_SYMBOL_GPL(xdr_inline_peek); > > /** > - * xdr_inline_decode - Retrieve non-page XDR data to decode > + * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. > + * @xdr: pointer to xdr_stream struct > + * @buf: pointer to an empty buffer > + * @buflen: size of 'buf' > + * > + * The scratch buffer is used when decoding from an array of pages. > + * If an xdr_inline_decode() call spans across page boundaries, then > + * we copy the data into the scratch buffer in order to allow linear > + * access. > + */ > +void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) > +{ > + xdr->scratch.iov_base = buf; > + xdr->scratch.iov_len = buflen; > +} > +EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer); > + > +static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes) > +{ > + __be32 *p; > + void *cpdest = xdr->scratch.iov_base; > + size_t cplen = (char *)xdr->end - (char *)xdr->p; > + > + if (nbytes > xdr->scratch.iov_len) > + return NULL; > + memcpy(cpdest, xdr->p, cplen); > + cpdest += cplen; > + nbytes -= cplen; > + if (!xdr_set_next_buffer(xdr)) > + return NULL; > + p = __xdr_inline_decode(xdr, nbytes); > + if (p == NULL) > + return NULL; > + memcpy(cpdest, p, nbytes); > + return xdr->scratch.iov_base; > +} > + > +/** > + * xdr_inline_decode - Retrieve XDR data to decode > * @xdr: pointer to xdr_stream struct > * @nbytes: number of bytes of data to decode > * > @@ -605,13 +699,16 @@ EXPORT_SYMBOL_GPL(xdr_inline_peek); > */ > __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) > { > - __be32 *p = xdr->p; > - __be32 *q = p + XDR_QUADLEN(nbytes); > + __be32 *p; > > - if (unlikely(q > xdr->end || q < p)) > + if (nbytes == 0) > + return xdr->p; > + if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr)) > return NULL; > - xdr->p = q; > - return p; > + p = __xdr_inline_decode(xdr, nbytes); > + if (p != NULL) > + return p; > + return xdr_copy_to_scratch(xdr, nbytes); > } > EXPORT_SYMBOL_GPL(xdr_inline_decode); > > @@ -671,16 +768,12 @@ EXPORT_SYMBOL_GPL(xdr_read_pages); > */ > void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) > { > - char * kaddr = page_address(xdr->buf->pages[0]); > xdr_read_pages(xdr, len); > /* > * Position current pointer at beginning of tail, and > * set remaining message length. > */ > - if (len > PAGE_CACHE_SIZE - xdr->buf->page_base) > - len = PAGE_CACHE_SIZE - xdr->buf->page_base; > - xdr->p = (__be32 *)(kaddr + xdr->buf->page_base); > - xdr->end = (__be32 *)((char *)xdr->p + len); > + xdr_set_page_base(xdr, 0, len); > } > EXPORT_SYMBOL_GPL(xdr_enter_page); > > -- > 1.7.3.4 > > > > -- > Trond Myklebust > Linux NFS client maintainer > > NetApp > Trond.Myklebust@netapp.com > www.netapp.com > > -- Pengutronix e.K. | Uwe Kleine-K?nig | Industrial Linux Solutions | http://www.pengutronix.de/ |