2022-06-17 20:30:22

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH] NFS: Allow setting rsize / wsize to a multiple of PAGE_SIZE

From: Anna Schumaker <[email protected]>

Previously, we required this to value to be a power of 2 for UDP related
reasons. This patch keeps the power of 2 rule for UDP but allows more
flexibility for TCP and RDMA.

Signed-off-by: Anna Schumaker <[email protected]>
---
fs/nfs/client.c | 13 +++++++------
fs/nfs/flexfilelayout/flexfilelayoutdev.c | 6 ++++--
fs/nfs/internal.h | 18 ++++++++++++++++++
fs/nfs/nfs4client.c | 4 ++--
4 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index e828504cc396..da8da5cdbbc1 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -708,9 +708,9 @@ static int nfs_init_server(struct nfs_server *server,
}

if (ctx->rsize)
- server->rsize = nfs_block_size(ctx->rsize, NULL);
+ server->rsize = nfs_io_size(ctx->rsize, clp->cl_proto);
if (ctx->wsize)
- server->wsize = nfs_block_size(ctx->wsize, NULL);
+ server->wsize = nfs_io_size(ctx->wsize, clp->cl_proto);

server->acregmin = ctx->acregmin * HZ;
server->acregmax = ctx->acregmax * HZ;
@@ -755,18 +755,19 @@ static int nfs_init_server(struct nfs_server *server,
static void nfs_server_set_fsinfo(struct nfs_server *server,
struct nfs_fsinfo *fsinfo)
{
+ struct nfs_client *clp = server->nfs_client;
unsigned long max_rpc_payload, raw_max_rpc_payload;

/* Work out a lot of parameters */
if (server->rsize == 0)
- server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
+ server->rsize = nfs_io_size(fsinfo->rtpref, clp->cl_proto);
if (server->wsize == 0)
- server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
+ server->wsize = nfs_io_size(fsinfo->wtpref, clp->cl_proto);

if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
- server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
+ server->rsize = nfs_io_size(fsinfo->rtmax, clp->cl_proto);
if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
- server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
+ server->wsize = nfs_io_size(fsinfo->wtmax, clp->cl_proto);

raw_max_rpc_payload = rpc_max_payload(server->client);
max_rpc_payload = nfs_block_size(raw_max_rpc_payload, NULL);
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index bfa7202ca7be..e028f5a0ef5f 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -113,8 +113,10 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_err_drain_dsaddrs;
ds_versions[i].version = be32_to_cpup(p++);
ds_versions[i].minor_version = be32_to_cpup(p++);
- ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL);
- ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL);
+ ds_versions[i].rsize = nfs_io_size(be32_to_cpup(p++),
+ server->nfs_client->cl_proto);
+ ds_versions[i].wsize = nfs_io_size(be32_to_cpup(p++),
+ server->nfs_client->cl_proto);
ds_versions[i].tightly_coupled = be32_to_cpup(p);

if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 8f8cd6e2d4db..af6d261241ff 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -704,6 +704,24 @@ unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
return nfs_block_bits(bsize, nrbitsp);
}

+/*
+ * Compute and set NFS server rsize / wsize
+ */
+static inline
+unsigned long nfs_io_size(unsigned long iosize, enum xprt_transports proto)
+{
+ if (iosize < NFS_MIN_FILE_IO_SIZE)
+ iosize = NFS_DEF_FILE_IO_SIZE;
+ else if (iosize >= NFS_MAX_FILE_IO_SIZE)
+ iosize = NFS_MAX_FILE_IO_SIZE;
+ else
+ iosize = iosize & PAGE_MASK;
+
+ if (proto == XPRT_TRANSPORT_UDP)
+ return nfs_block_bits(iosize, NULL);
+ return iosize;
+}
+
/*
* Determine the maximum file size for a superblock
*/
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 47a6cf892c95..3c5678aec006 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1161,9 +1161,9 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
return error;

if (ctx->rsize)
- server->rsize = nfs_block_size(ctx->rsize, NULL);
+ server->rsize = nfs_io_size(ctx->rsize, server->nfs_client->cl_proto);
if (ctx->wsize)
- server->wsize = nfs_block_size(ctx->wsize, NULL);
+ server->wsize = nfs_io_size(ctx->wsize, server->nfs_client->cl_proto);

server->acregmin = ctx->acregmin * HZ;
server->acregmax = ctx->acregmax * HZ;
--
2.36.1


2022-11-08 10:11:39

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH] NFS: Allow setting rsize / wsize to a multiple of PAGE_SIZE

On Fri, 2022-06-17 at 16:23 -0400, Anna Schumaker wrote:
> From: Anna Schumaker <[email protected]>
>
> Previously, we required this to value to be a power of 2 for UDP related
> reasons. This patch keeps the power of 2 rule for UDP but allows more
> flexibility for TCP and RDMA.
>
> Signed-off-by: Anna Schumaker <[email protected]>
> ---
> fs/nfs/client.c | 13 +++++++------
> fs/nfs/flexfilelayout/flexfilelayoutdev.c | 6 ++++--
> fs/nfs/internal.h | 18 ++++++++++++++++++
> fs/nfs/nfs4client.c | 4 ++--
> 4 files changed, 31 insertions(+), 10 deletions(-)
>
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index e828504cc396..da8da5cdbbc1 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -708,9 +708,9 @@ static int nfs_init_server(struct nfs_server *server,
> }
>
> if (ctx->rsize)
> - server->rsize = nfs_block_size(ctx->rsize, NULL);
> + server->rsize = nfs_io_size(ctx->rsize, clp->cl_proto);
> if (ctx->wsize)
> - server->wsize = nfs_block_size(ctx->wsize, NULL);
> + server->wsize = nfs_io_size(ctx->wsize, clp->cl_proto);
>
> server->acregmin = ctx->acregmin * HZ;
> server->acregmax = ctx->acregmax * HZ;
> @@ -755,18 +755,19 @@ static int nfs_init_server(struct nfs_server *server,
> static void nfs_server_set_fsinfo(struct nfs_server *server,
> struct nfs_fsinfo *fsinfo)
> {
> + struct nfs_client *clp = server->nfs_client;
> unsigned long max_rpc_payload, raw_max_rpc_payload;
>
> /* Work out a lot of parameters */
> if (server->rsize == 0)
> - server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
> + server->rsize = nfs_io_size(fsinfo->rtpref, clp->cl_proto);
> if (server->wsize == 0)
> - server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
> + server->wsize = nfs_io_size(fsinfo->wtpref, clp->cl_proto);
>
> if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
> - server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
> + server->rsize = nfs_io_size(fsinfo->rtmax, clp->cl_proto);
> if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
> - server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
> + server->wsize = nfs_io_size(fsinfo->wtmax, clp->cl_proto);
>
> raw_max_rpc_payload = rpc_max_payload(server->client);
> max_rpc_payload = nfs_block_size(raw_max_rpc_payload, NULL);
> diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> index bfa7202ca7be..e028f5a0ef5f 100644
> --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> @@ -113,8 +113,10 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
> goto out_err_drain_dsaddrs;
> ds_versions[i].version = be32_to_cpup(p++);
> ds_versions[i].minor_version = be32_to_cpup(p++);
> - ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL);
> - ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL);
> + ds_versions[i].rsize = nfs_io_size(be32_to_cpup(p++),
> + server->nfs_client->cl_proto);
> + ds_versions[i].wsize = nfs_io_size(be32_to_cpup(p++),
> + server->nfs_client->cl_proto);
> ds_versions[i].tightly_coupled = be32_to_cpup(p);
>
> if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE)
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index 8f8cd6e2d4db..af6d261241ff 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -704,6 +704,24 @@ unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
> return nfs_block_bits(bsize, nrbitsp);
> }
>
> +/*
> + * Compute and set NFS server rsize / wsize
> + */
> +static inline
> +unsigned long nfs_io_size(unsigned long iosize, enum xprt_transports proto)
> +{
> + if (iosize < NFS_MIN_FILE_IO_SIZE)
> + iosize = NFS_DEF_FILE_IO_SIZE;
> + else if (iosize >= NFS_MAX_FILE_IO_SIZE)
> + iosize = NFS_MAX_FILE_IO_SIZE;
> + else
> + iosize = iosize & PAGE_MASK;
> +
> + if (proto == XPRT_TRANSPORT_UDP)
> + return nfs_block_bits(iosize, NULL);
> + return iosize;
> +}
> +
> /*
> * Determine the maximum file size for a superblock
> */
> diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> index 47a6cf892c95..3c5678aec006 100644
> --- a/fs/nfs/nfs4client.c
> +++ b/fs/nfs/nfs4client.c
> @@ -1161,9 +1161,9 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
> return error;
>
> if (ctx->rsize)
> - server->rsize = nfs_block_size(ctx->rsize, NULL);
> + server->rsize = nfs_io_size(ctx->rsize, server->nfs_client->cl_proto);
> if (ctx->wsize)
> - server->wsize = nfs_block_size(ctx->wsize, NULL);
> + server->wsize = nfs_io_size(ctx->wsize, server->nfs_client->cl_proto);
>
> server->acregmin = ctx->acregmin * HZ;
> server->acregmax = ctx->acregmax * HZ;

This patch seems to have caused a regression. With this patch in place,
I can't set an rsize/wsize value that is less than 4k:

# mount server:/export /mnt -o rsize=1024,wsize=1024

...now yields:

server:/export on /mnt type nfs4 (rw,relatime,vers=4.2,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=192.168.1.210,local_lock=none,addr=192.168.1.3)

...such that the requested sizes were ignored.

Was this an intended effect? If we really do want to deprecate the use
of small rsize/wsize with TCP/RDMA, then we probably ought to reject
these mount attempts with -EINVAL.
--
Jeff Layton <[email protected]>

2022-11-08 16:34:13

by Tom Talpey

[permalink] [raw]
Subject: Re: [PATCH] NFS: Allow setting rsize / wsize to a multiple of PAGE_SIZE

On 11/8/2022 5:01 AM, Jeff Layton wrote:
> On Fri, 2022-06-17 at 16:23 -0400, Anna Schumaker wrote:
>> From: Anna Schumaker <[email protected]>
>>
>> Previously, we required this to value to be a power of 2 for UDP related
>> reasons. This patch keeps the power of 2 rule for UDP but allows more
>> flexibility for TCP and RDMA.
>>
>> Signed-off-by: Anna Schumaker <[email protected]>
>> ---
>> fs/nfs/client.c | 13 +++++++------
>> fs/nfs/flexfilelayout/flexfilelayoutdev.c | 6 ++++--
>> fs/nfs/internal.h | 18 ++++++++++++++++++
>> fs/nfs/nfs4client.c | 4 ++--
>> 4 files changed, 31 insertions(+), 10 deletions(-)
>>
>> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
>> index e828504cc396..da8da5cdbbc1 100644
>> --- a/fs/nfs/client.c
>> +++ b/fs/nfs/client.c
>> @@ -708,9 +708,9 @@ static int nfs_init_server(struct nfs_server *server,
>> }
>>
>> if (ctx->rsize)
>> - server->rsize = nfs_block_size(ctx->rsize, NULL);
>> + server->rsize = nfs_io_size(ctx->rsize, clp->cl_proto);
>> if (ctx->wsize)
>> - server->wsize = nfs_block_size(ctx->wsize, NULL);
>> + server->wsize = nfs_io_size(ctx->wsize, clp->cl_proto);
>>
>> server->acregmin = ctx->acregmin * HZ;
>> server->acregmax = ctx->acregmax * HZ;
>> @@ -755,18 +755,19 @@ static int nfs_init_server(struct nfs_server *server,
>> static void nfs_server_set_fsinfo(struct nfs_server *server,
>> struct nfs_fsinfo *fsinfo)
>> {
>> + struct nfs_client *clp = server->nfs_client;
>> unsigned long max_rpc_payload, raw_max_rpc_payload;
>>
>> /* Work out a lot of parameters */
>> if (server->rsize == 0)
>> - server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
>> + server->rsize = nfs_io_size(fsinfo->rtpref, clp->cl_proto);
>> if (server->wsize == 0)
>> - server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
>> + server->wsize = nfs_io_size(fsinfo->wtpref, clp->cl_proto);
>>
>> if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
>> - server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
>> + server->rsize = nfs_io_size(fsinfo->rtmax, clp->cl_proto);
>> if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
>> - server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
>> + server->wsize = nfs_io_size(fsinfo->wtmax, clp->cl_proto);
>>
>> raw_max_rpc_payload = rpc_max_payload(server->client);
>> max_rpc_payload = nfs_block_size(raw_max_rpc_payload, NULL);
>> diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
>> index bfa7202ca7be..e028f5a0ef5f 100644
>> --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
>> +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
>> @@ -113,8 +113,10 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
>> goto out_err_drain_dsaddrs;
>> ds_versions[i].version = be32_to_cpup(p++);
>> ds_versions[i].minor_version = be32_to_cpup(p++);
>> - ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL);
>> - ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL);
>> + ds_versions[i].rsize = nfs_io_size(be32_to_cpup(p++),
>> + server->nfs_client->cl_proto);
>> + ds_versions[i].wsize = nfs_io_size(be32_to_cpup(p++),
>> + server->nfs_client->cl_proto);
>> ds_versions[i].tightly_coupled = be32_to_cpup(p);
>>
>> if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE)
>> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
>> index 8f8cd6e2d4db..af6d261241ff 100644
>> --- a/fs/nfs/internal.h
>> +++ b/fs/nfs/internal.h
>> @@ -704,6 +704,24 @@ unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
>> return nfs_block_bits(bsize, nrbitsp);
>> }
>>
>> +/*
>> + * Compute and set NFS server rsize / wsize
>> + */
>> +static inline
>> +unsigned long nfs_io_size(unsigned long iosize, enum xprt_transports proto)
>> +{
>> + if (iosize < NFS_MIN_FILE_IO_SIZE)
>> + iosize = NFS_DEF_FILE_IO_SIZE;
>> + else if (iosize >= NFS_MAX_FILE_IO_SIZE)
>> + iosize = NFS_MAX_FILE_IO_SIZE;
>> + else
>> + iosize = iosize & PAGE_MASK;
>> +
>> + if (proto == XPRT_TRANSPORT_UDP)
>> + return nfs_block_bits(iosize, NULL);
>> + return iosize;
>> +}
>> +
>> /*
>> * Determine the maximum file size for a superblock
>> */
>> diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
>> index 47a6cf892c95..3c5678aec006 100644
>> --- a/fs/nfs/nfs4client.c
>> +++ b/fs/nfs/nfs4client.c
>> @@ -1161,9 +1161,9 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
>> return error;
>>
>> if (ctx->rsize)
>> - server->rsize = nfs_block_size(ctx->rsize, NULL);
>> + server->rsize = nfs_io_size(ctx->rsize, server->nfs_client->cl_proto);
>> if (ctx->wsize)
>> - server->wsize = nfs_block_size(ctx->wsize, NULL);
>> + server->wsize = nfs_io_size(ctx->wsize, server->nfs_client->cl_proto);
>>
>> server->acregmin = ctx->acregmin * HZ;
>> server->acregmax = ctx->acregmax * HZ;
>
> This patch seems to have caused a regression. With this patch in place,
> I can't set an rsize/wsize value that is less than 4k:
>
> # mount server:/export /mnt -o rsize=1024,wsize=1024
>
> ...now yields:
>
> server:/export on /mnt type nfs4 (rw,relatime,vers=4.2,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=192.168.1.210,local_lock=none,addr=192.168.1.3)
>
> ...such that the requested sizes were ignored.
>
> Was this an intended effect? If we really do want to deprecate the use
> of small rsize/wsize with TCP/RDMA, then we probably ought to reject
> these mount attempts with -EINVAL.

I hope that's not the intent! Small r/w sizes can be quite useful for
many deployments, for example where network bandwidth is limited or
highly contended. And RDMA below 4KB shifts to inline-only (no direct
placement), which is useful for constrained environments and can
actually improve performance in certain cases.

It seems as if there should be some sort of notice if the sizes are
ignored, in any case. Asking for 1KB and getting 1MB is a rather
unfriendly result.

Tom.

2022-11-08 16:35:24

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH] NFS: Allow setting rsize / wsize to a multiple of PAGE_SIZE

On Tue, Nov 8, 2022 at 11:11 AM Tom Talpey <[email protected]> wrote:
>
> On 11/8/2022 5:01 AM, Jeff Layton wrote:
> > On Fri, 2022-06-17 at 16:23 -0400, Anna Schumaker wrote:
> >> From: Anna Schumaker <[email protected]>
> >>
> >> Previously, we required this to value to be a power of 2 for UDP related
> >> reasons. This patch keeps the power of 2 rule for UDP but allows more
> >> flexibility for TCP and RDMA.
> >>
> >> Signed-off-by: Anna Schumaker <[email protected]>
> >> ---
> >> fs/nfs/client.c | 13 +++++++------
> >> fs/nfs/flexfilelayout/flexfilelayoutdev.c | 6 ++++--
> >> fs/nfs/internal.h | 18 ++++++++++++++++++
> >> fs/nfs/nfs4client.c | 4 ++--
> >> 4 files changed, 31 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> >> index e828504cc396..da8da5cdbbc1 100644
> >> --- a/fs/nfs/client.c
> >> +++ b/fs/nfs/client.c
> >> @@ -708,9 +708,9 @@ static int nfs_init_server(struct nfs_server *server,
> >> }
> >>
> >> if (ctx->rsize)
> >> - server->rsize = nfs_block_size(ctx->rsize, NULL);
> >> + server->rsize = nfs_io_size(ctx->rsize, clp->cl_proto);
> >> if (ctx->wsize)
> >> - server->wsize = nfs_block_size(ctx->wsize, NULL);
> >> + server->wsize = nfs_io_size(ctx->wsize, clp->cl_proto);
> >>
> >> server->acregmin = ctx->acregmin * HZ;
> >> server->acregmax = ctx->acregmax * HZ;
> >> @@ -755,18 +755,19 @@ static int nfs_init_server(struct nfs_server *server,
> >> static void nfs_server_set_fsinfo(struct nfs_server *server,
> >> struct nfs_fsinfo *fsinfo)
> >> {
> >> + struct nfs_client *clp = server->nfs_client;
> >> unsigned long max_rpc_payload, raw_max_rpc_payload;
> >>
> >> /* Work out a lot of parameters */
> >> if (server->rsize == 0)
> >> - server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
> >> + server->rsize = nfs_io_size(fsinfo->rtpref, clp->cl_proto);
> >> if (server->wsize == 0)
> >> - server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
> >> + server->wsize = nfs_io_size(fsinfo->wtpref, clp->cl_proto);
> >>
> >> if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
> >> - server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
> >> + server->rsize = nfs_io_size(fsinfo->rtmax, clp->cl_proto);
> >> if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
> >> - server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
> >> + server->wsize = nfs_io_size(fsinfo->wtmax, clp->cl_proto);
> >>
> >> raw_max_rpc_payload = rpc_max_payload(server->client);
> >> max_rpc_payload = nfs_block_size(raw_max_rpc_payload, NULL);
> >> diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> >> index bfa7202ca7be..e028f5a0ef5f 100644
> >> --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> >> +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> >> @@ -113,8 +113,10 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
> >> goto out_err_drain_dsaddrs;
> >> ds_versions[i].version = be32_to_cpup(p++);
> >> ds_versions[i].minor_version = be32_to_cpup(p++);
> >> - ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL);
> >> - ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL);
> >> + ds_versions[i].rsize = nfs_io_size(be32_to_cpup(p++),
> >> + server->nfs_client->cl_proto);
> >> + ds_versions[i].wsize = nfs_io_size(be32_to_cpup(p++),
> >> + server->nfs_client->cl_proto);
> >> ds_versions[i].tightly_coupled = be32_to_cpup(p);
> >>
> >> if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE)
> >> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> >> index 8f8cd6e2d4db..af6d261241ff 100644
> >> --- a/fs/nfs/internal.h
> >> +++ b/fs/nfs/internal.h
> >> @@ -704,6 +704,24 @@ unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
> >> return nfs_block_bits(bsize, nrbitsp);
> >> }
> >>
> >> +/*
> >> + * Compute and set NFS server rsize / wsize
> >> + */
> >> +static inline
> >> +unsigned long nfs_io_size(unsigned long iosize, enum xprt_transports proto)
> >> +{
> >> + if (iosize < NFS_MIN_FILE_IO_SIZE)
> >> + iosize = NFS_DEF_FILE_IO_SIZE;
> >> + else if (iosize >= NFS_MAX_FILE_IO_SIZE)
> >> + iosize = NFS_MAX_FILE_IO_SIZE;
> >> + else
> >> + iosize = iosize & PAGE_MASK;
> >> +
> >> + if (proto == XPRT_TRANSPORT_UDP)
> >> + return nfs_block_bits(iosize, NULL);
> >> + return iosize;
> >> +}
> >> +
> >> /*
> >> * Determine the maximum file size for a superblock
> >> */
> >> diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> >> index 47a6cf892c95..3c5678aec006 100644
> >> --- a/fs/nfs/nfs4client.c
> >> +++ b/fs/nfs/nfs4client.c
> >> @@ -1161,9 +1161,9 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
> >> return error;
> >>
> >> if (ctx->rsize)
> >> - server->rsize = nfs_block_size(ctx->rsize, NULL);
> >> + server->rsize = nfs_io_size(ctx->rsize, server->nfs_client->cl_proto);
> >> if (ctx->wsize)
> >> - server->wsize = nfs_block_size(ctx->wsize, NULL);
> >> + server->wsize = nfs_io_size(ctx->wsize, server->nfs_client->cl_proto);
> >>
> >> server->acregmin = ctx->acregmin * HZ;
> >> server->acregmax = ctx->acregmax * HZ;
> >
> > This patch seems to have caused a regression. With this patch in place,
> > I can't set an rsize/wsize value that is less than 4k:
> >
> > # mount server:/export /mnt -o rsize=1024,wsize=1024
> >
> > ...now yields:
> >
> > server:/export on /mnt type nfs4 (rw,relatime,vers=4.2,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=192.168.1.210,local_lock=none,addr=192.168.1.3)
> >
> > ...such that the requested sizes were ignored.
> >
> > Was this an intended effect? If we really do want to deprecate the use
> > of small rsize/wsize with TCP/RDMA, then we probably ought to reject
> > these mount attempts with -EINVAL.
>
> I hope that's not the intent! Small r/w sizes can be quite useful for
> many deployments, for example where network bandwidth is limited or
> highly contended. And RDMA below 4KB shifts to inline-only (no direct
> placement), which is useful for constrained environments and can
> actually improve performance in certain cases.
>
> It seems as if there should be some sort of notice if the sizes are
> ignored, in any case. Asking for 1KB and getting 1MB is a rather
> unfriendly result.

That definitely wasn't the intent! I was trying to allow for rsizes
that aren't necessarily a power of two, but didn't think about the
small rsize case when I did it. I'll work on a fix!

Anna
>
> Tom.