2023-01-10 19:00:20

by Jorge Mora

[permalink] [raw]
Subject: [PATCH v1] NFSD: add IO_ADVISE operation

If multiple bits are set, select just one using a predetermined
order of precedence. If there are no bits which correspond to
any of the advice values (POSIX_FADV_*), the server simply
replies with NFS4ERR_UNION_NOTSUPP.

If a client sends a bitmap mask with multiple words, ignore all
but the first word in the bitmap. The response is always the
same first word of the bitmap mask given in the request.

Signed-off-by: Jorge Mora <[email protected]>
---
fs/nfsd/nfs4proc.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/nfs4xdr.c | 27 +++++++++++++++++++--
fs/nfsd/xdr4.h | 11 +++++++++
3 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8beb2bc4c328..65179a3946f5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -35,6 +35,7 @@
#include <linux/fs_struct.h>
#include <linux/file.h>
#include <linux/falloc.h>
+#include <linux/fadvise.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/namei.h>
@@ -1995,6 +1996,53 @@ nfsd4_deallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE);
}

+static __be32
+nfsd4_io_advise(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_io_advise *io_advise = &u->io_advise;
+ struct nfsd_file *nf;
+ __be32 status;
+ int advice;
+ int ret;
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &io_advise->stateid,
+ RD_STATE, &nf, NULL);
+ if (status) {
+ dprintk("NFSD: %s: couldn't process stateid!\n", __func__);
+ return status;
+ }
+
+ /*
+ * If multiple bits are set, select just one using the following
+ * order of precedence
+ */
+ if (io_advise->hints & NFS_IO_ADVISE4_NORMAL)
+ advice = POSIX_FADV_NORMAL;
+ else if (io_advise->hints & NFS_IO_ADVISE4_RANDOM)
+ advice = POSIX_FADV_RANDOM;
+ else if (io_advise->hints & NFS_IO_ADVISE4_SEQUENTIAL)
+ advice = POSIX_FADV_SEQUENTIAL;
+ else if (io_advise->hints & NFS_IO_ADVISE4_WILLNEED)
+ advice = POSIX_FADV_WILLNEED;
+ else if (io_advise->hints & NFS_IO_ADVISE4_DONTNEED)
+ advice = POSIX_FADV_DONTNEED;
+ else if (io_advise->hints & NFS_IO_ADVISE4_NOREUSE)
+ advice = POSIX_FADV_NOREUSE;
+ else {
+ status = nfserr_union_notsupp;
+ goto out;
+ }
+
+ ret = vfs_fadvise(nf->nf_file, io_advise->offset, io_advise->count, advice);
+ if (ret < 0)
+ status = nfserrno(ret);
+out:
+ nfsd_file_put(nf);
+ return status;
+}
+
static __be32
nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -3096,6 +3144,12 @@ static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp,
#endif /* CONFIG_NFSD_PNFS */


+static u32 nfsd4_io_advise_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 2) * sizeof(__be32);
+}
+
static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp,
const struct nfsd4_op *op)
{
@@ -3479,6 +3533,11 @@ static const struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_DEALLOCATE",
.op_rsize_bop = nfsd4_only_status_rsize,
},
+ [OP_IO_ADVISE] = {
+ .op_func = nfsd4_io_advise,
+ .op_name = "OP_IO_ADVISE",
+ .op_rsize_bop = nfsd4_io_advise_rsize,
+ },
[OP_CLONE] = {
.op_func = nfsd4_clone,
.op_flags = OP_MODIFIES_SOMETHING,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index bcfeb1a922c0..8814c24047ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1882,6 +1882,22 @@ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
return nfs_ok;
}

+static __be32
+nfsd4_decode_io_advise(struct nfsd4_compoundargs *argp,
+ struct nfsd4_io_advise *io_advise)
+{
+ __be32 status;
+
+ status = nfsd4_decode_stateid4(argp, &io_advise->stateid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u64(argp->xdr, &io_advise->offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &io_advise->count) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_bitmap4(argp, &io_advise->hints, 1);
+}
+
static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
struct nl4_server *ns)
{
@@ -2338,7 +2354,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
[OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
[OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify,
[OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
- [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_io_advise,
[OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status,
@@ -4948,6 +4964,13 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr;
}

+static __be32
+nfsd4_encode_io_advise(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_io_advise *io_advise)
+{
+ return nfsd4_encode_bitmap(resp->xdr, io_advise->hints, 0, 0);
+}
+
static __be32
nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_seek *seek)
@@ -5282,7 +5305,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
[OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
[OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify,
[OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_io_advise,
[OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 0eb00105d845..9b8ba4d6e3ab 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -518,6 +518,16 @@ struct nfsd4_fallocate {
u64 falloc_length;
};

+struct nfsd4_io_advise {
+ /* request */
+ stateid_t stateid;
+ loff_t offset;
+ u64 count;
+
+ /* both */
+ u32 hints;
+};
+
struct nfsd4_clone {
/* request */
stateid_t cl_src_stateid;
@@ -688,6 +698,7 @@ struct nfsd4_op {
/* NFSv4.2 */
struct nfsd4_fallocate allocate;
struct nfsd4_fallocate deallocate;
+ struct nfsd4_io_advise io_advise;
struct nfsd4_clone clone;
struct nfsd4_copy copy;
struct nfsd4_offload_status offload_status;
--
2.31.1


2023-01-10 19:04:39

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v1] NFSD: add IO_ADVISE operation



> On Jan 10, 2023, at 1:47 PM, Jorge Mora <[email protected]> wrote:
>
> If multiple bits are set, select just one using a predetermined
> order of precedence. If there are no bits which correspond to
> any of the advice values (POSIX_FADV_*), the server simply
> replies with NFS4ERR_UNION_NOTSUPP.
>
> If a client sends a bitmap mask with multiple words, ignore all
> but the first word in the bitmap. The response is always the
> same first word of the bitmap mask given in the request.

Hi Jorge-

I'd rather not add this operation just because it happens to be
missing. Is there a reason you need it? Does it provide some
kind of performance benefit, for instance? The patch description
really does need to provide this kind of rationale, and hopefully
some performance measurements.

Do the POSIX_FADV_* settings map to behavior that a client can
expect in other server implementations? That is, do you expect
the proposed implementation below to interoperate properly?


> Signed-off-by: Jorge Mora <[email protected]>
> ---
> fs/nfsd/nfs4proc.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++
> fs/nfsd/nfs4xdr.c | 27 +++++++++++++++++++--
> fs/nfsd/xdr4.h | 11 +++++++++
> 3 files changed, 95 insertions(+), 2 deletions(-)
>
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 8beb2bc4c328..65179a3946f5 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -35,6 +35,7 @@
> #include <linux/fs_struct.h>
> #include <linux/file.h>
> #include <linux/falloc.h>
> +#include <linux/fadvise.h>
> #include <linux/slab.h>
> #include <linux/kthread.h>
> #include <linux/namei.h>
> @@ -1995,6 +1996,53 @@ nfsd4_deallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE);
> }
>
> +static __be32
> +nfsd4_io_advise(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> + union nfsd4_op_u *u)
> +{
> + struct nfsd4_io_advise *io_advise = &u->io_advise;
> + struct nfsd_file *nf;
> + __be32 status;
> + int advice;
> + int ret;
> +
> + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> + &io_advise->stateid,
> + RD_STATE, &nf, NULL);
> + if (status) {
> + dprintk("NFSD: %s: couldn't process stateid!\n", __func__);
> + return status;
> + }
> +
> + /*
> + * If multiple bits are set, select just one using the following
> + * order of precedence
> + */
> + if (io_advise->hints & NFS_IO_ADVISE4_NORMAL)
> + advice = POSIX_FADV_NORMAL;
> + else if (io_advise->hints & NFS_IO_ADVISE4_RANDOM)
> + advice = POSIX_FADV_RANDOM;
> + else if (io_advise->hints & NFS_IO_ADVISE4_SEQUENTIAL)
> + advice = POSIX_FADV_SEQUENTIAL;
> + else if (io_advise->hints & NFS_IO_ADVISE4_WILLNEED)
> + advice = POSIX_FADV_WILLNEED;
> + else if (io_advise->hints & NFS_IO_ADVISE4_DONTNEED)
> + advice = POSIX_FADV_DONTNEED;
> + else if (io_advise->hints & NFS_IO_ADVISE4_NOREUSE)
> + advice = POSIX_FADV_NOREUSE;
> + else {
> + status = nfserr_union_notsupp;
> + goto out;
> + }
> +
> + ret = vfs_fadvise(nf->nf_file, io_advise->offset, io_advise->count, advice);
> + if (ret < 0)
> + status = nfserrno(ret);
> +out:
> + nfsd_file_put(nf);
> + return status;
> +}
> +
> static __be32
> nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> union nfsd4_op_u *u)
> @@ -3096,6 +3144,12 @@ static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp,
> #endif /* CONFIG_NFSD_PNFS */
>
>
> +static u32 nfsd4_io_advise_rsize(const struct svc_rqst *rqstp,
> + const struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + 2) * sizeof(__be32);
> +}
> +
> static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp,
> const struct nfsd4_op *op)
> {
> @@ -3479,6 +3533,11 @@ static const struct nfsd4_operation nfsd4_ops[] = {
> .op_name = "OP_DEALLOCATE",
> .op_rsize_bop = nfsd4_only_status_rsize,
> },
> + [OP_IO_ADVISE] = {
> + .op_func = nfsd4_io_advise,
> + .op_name = "OP_IO_ADVISE",
> + .op_rsize_bop = nfsd4_io_advise_rsize,
> + },
> [OP_CLONE] = {
> .op_func = nfsd4_clone,
> .op_flags = OP_MODIFIES_SOMETHING,
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index bcfeb1a922c0..8814c24047ff 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1882,6 +1882,22 @@ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
> return nfs_ok;
> }
>
> +static __be32
> +nfsd4_decode_io_advise(struct nfsd4_compoundargs *argp,
> + struct nfsd4_io_advise *io_advise)
> +{
> + __be32 status;
> +
> + status = nfsd4_decode_stateid4(argp, &io_advise->stateid);
> + if (status)
> + return status;
> + if (xdr_stream_decode_u64(argp->xdr, &io_advise->offset) < 0)
> + return nfserr_bad_xdr;
> + if (xdr_stream_decode_u64(argp->xdr, &io_advise->count) < 0)
> + return nfserr_bad_xdr;
> + return nfsd4_decode_bitmap4(argp, &io_advise->hints, 1);
> +}
> +
> static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
> struct nl4_server *ns)
> {
> @@ -2338,7 +2354,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
> [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
> [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify,
> [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
> - [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
> + [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_io_advise,
> [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
> [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
> [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status,
> @@ -4948,6 +4964,13 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
> return nfserr;
> }
>
> +static __be32
> +nfsd4_encode_io_advise(struct nfsd4_compoundres *resp, __be32 nfserr,
> + struct nfsd4_io_advise *io_advise)
> +{
> + return nfsd4_encode_bitmap(resp->xdr, io_advise->hints, 0, 0);
> +}
> +
> static __be32
> nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
> struct nfsd4_seek *seek)
> @@ -5282,7 +5305,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
> [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
> [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify,
> [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
> - [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
> + [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_io_advise,
> [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
> [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
> [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 0eb00105d845..9b8ba4d6e3ab 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -518,6 +518,16 @@ struct nfsd4_fallocate {
> u64 falloc_length;
> };
>
> +struct nfsd4_io_advise {
> + /* request */
> + stateid_t stateid;
> + loff_t offset;
> + u64 count;
> +
> + /* both */
> + u32 hints;
> +};
> +
> struct nfsd4_clone {
> /* request */
> stateid_t cl_src_stateid;
> @@ -688,6 +698,7 @@ struct nfsd4_op {
> /* NFSv4.2 */
> struct nfsd4_fallocate allocate;
> struct nfsd4_fallocate deallocate;
> + struct nfsd4_io_advise io_advise;
> struct nfsd4_clone clone;
> struct nfsd4_copy copy;
> struct nfsd4_offload_status offload_status;
> --
> 2.31.1
>

--
Chuck Lever



2023-01-11 14:23:12

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v1] NFSD: add IO_ADVISE operation

On Tue, Jan 10, 2023 at 2:04 PM Chuck Lever III <[email protected]> wrote:
>
>
>
> > On Jan 10, 2023, at 1:47 PM, Jorge Mora <[email protected]> wrote:
> >
> > If multiple bits are set, select just one using a predetermined
> > order of precedence. If there are no bits which correspond to
> > any of the advice values (POSIX_FADV_*), the server simply
> > replies with NFS4ERR_UNION_NOTSUPP.
> >
> > If a client sends a bitmap mask with multiple words, ignore all
> > but the first word in the bitmap. The response is always the
> > same first word of the bitmap mask given in the request.
>
> Hi Jorge-
>
> I'd rather not add this operation just because it happens to be
> missing. Is there a reason you need it?

The motivation was to implement the not implement and hope to serve as
a foundation for others to expand on and do something. Also because
typically if we put in a client piece support there is supposed to be
server side as well.

> Does it provide some
> kind of performance benefit, for instance? The patch description
> really does need to provide this kind of rationale, and hopefully
> some performance measurements.
>
> Do the POSIX_FADV_* settings map to behavior that a client can
> expect in other server implementations?

I thought the purpose of IO_ADVISE is to advise but not expect. If the
server wants to do something with the knowledge it can.

> That is, do you expect
> the proposed implementation below to interoperate properly?
>
>
> > Signed-off-by: Jorge Mora <[email protected]>
> > ---
> > fs/nfsd/nfs4proc.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++
> > fs/nfsd/nfs4xdr.c | 27 +++++++++++++++++++--
> > fs/nfsd/xdr4.h | 11 +++++++++
> > 3 files changed, 95 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index 8beb2bc4c328..65179a3946f5 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -35,6 +35,7 @@
> > #include <linux/fs_struct.h>
> > #include <linux/file.h>
> > #include <linux/falloc.h>
> > +#include <linux/fadvise.h>
> > #include <linux/slab.h>
> > #include <linux/kthread.h>
> > #include <linux/namei.h>
> > @@ -1995,6 +1996,53 @@ nfsd4_deallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE);
> > }
> >
> > +static __be32
> > +nfsd4_io_advise(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > + union nfsd4_op_u *u)
> > +{
> > + struct nfsd4_io_advise *io_advise = &u->io_advise;
> > + struct nfsd_file *nf;
> > + __be32 status;
> > + int advice;
> > + int ret;
> > +
> > + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> > + &io_advise->stateid,
> > + RD_STATE, &nf, NULL);
> > + if (status) {
> > + dprintk("NFSD: %s: couldn't process stateid!\n", __func__);
> > + return status;
> > + }
> > +
> > + /*
> > + * If multiple bits are set, select just one using the following
> > + * order of precedence
> > + */
> > + if (io_advise->hints & NFS_IO_ADVISE4_NORMAL)
> > + advice = POSIX_FADV_NORMAL;
> > + else if (io_advise->hints & NFS_IO_ADVISE4_RANDOM)
> > + advice = POSIX_FADV_RANDOM;
> > + else if (io_advise->hints & NFS_IO_ADVISE4_SEQUENTIAL)
> > + advice = POSIX_FADV_SEQUENTIAL;
> > + else if (io_advise->hints & NFS_IO_ADVISE4_WILLNEED)
> > + advice = POSIX_FADV_WILLNEED;
> > + else if (io_advise->hints & NFS_IO_ADVISE4_DONTNEED)
> > + advice = POSIX_FADV_DONTNEED;
> > + else if (io_advise->hints & NFS_IO_ADVISE4_NOREUSE)
> > + advice = POSIX_FADV_NOREUSE;
> > + else {
> > + status = nfserr_union_notsupp;
> > + goto out;
> > + }
> > +
> > + ret = vfs_fadvise(nf->nf_file, io_advise->offset, io_advise->count, advice);
> > + if (ret < 0)
> > + status = nfserrno(ret);
> > +out:
> > + nfsd_file_put(nf);
> > + return status;
> > +}
> > +
> > static __be32
> > nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > union nfsd4_op_u *u)
> > @@ -3096,6 +3144,12 @@ static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp,
> > #endif /* CONFIG_NFSD_PNFS */
> >
> >
> > +static u32 nfsd4_io_advise_rsize(const struct svc_rqst *rqstp,
> > + const struct nfsd4_op *op)
> > +{
> > + return (op_encode_hdr_size + 2) * sizeof(__be32);
> > +}
> > +
> > static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp,
> > const struct nfsd4_op *op)
> > {
> > @@ -3479,6 +3533,11 @@ static const struct nfsd4_operation nfsd4_ops[] = {
> > .op_name = "OP_DEALLOCATE",
> > .op_rsize_bop = nfsd4_only_status_rsize,
> > },
> > + [OP_IO_ADVISE] = {
> > + .op_func = nfsd4_io_advise,
> > + .op_name = "OP_IO_ADVISE",
> > + .op_rsize_bop = nfsd4_io_advise_rsize,
> > + },
> > [OP_CLONE] = {
> > .op_func = nfsd4_clone,
> > .op_flags = OP_MODIFIES_SOMETHING,
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index bcfeb1a922c0..8814c24047ff 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -1882,6 +1882,22 @@ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
> > return nfs_ok;
> > }
> >
> > +static __be32
> > +nfsd4_decode_io_advise(struct nfsd4_compoundargs *argp,
> > + struct nfsd4_io_advise *io_advise)
> > +{
> > + __be32 status;
> > +
> > + status = nfsd4_decode_stateid4(argp, &io_advise->stateid);
> > + if (status)
> > + return status;
> > + if (xdr_stream_decode_u64(argp->xdr, &io_advise->offset) < 0)
> > + return nfserr_bad_xdr;
> > + if (xdr_stream_decode_u64(argp->xdr, &io_advise->count) < 0)
> > + return nfserr_bad_xdr;
> > + return nfsd4_decode_bitmap4(argp, &io_advise->hints, 1);
> > +}
> > +
> > static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
> > struct nl4_server *ns)
> > {
> > @@ -2338,7 +2354,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
> > [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
> > [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify,
> > [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
> > - [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
> > + [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_io_advise,
> > [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
> > [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
> > [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status,
> > @@ -4948,6 +4964,13 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
> > return nfserr;
> > }
> >
> > +static __be32
> > +nfsd4_encode_io_advise(struct nfsd4_compoundres *resp, __be32 nfserr,
> > + struct nfsd4_io_advise *io_advise)
> > +{
> > + return nfsd4_encode_bitmap(resp->xdr, io_advise->hints, 0, 0);
> > +}
> > +
> > static __be32
> > nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
> > struct nfsd4_seek *seek)
> > @@ -5282,7 +5305,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
> > [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
> > [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify,
> > [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
> > - [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
> > + [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_io_advise,
> > [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
> > [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
> > [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
> > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > index 0eb00105d845..9b8ba4d6e3ab 100644
> > --- a/fs/nfsd/xdr4.h
> > +++ b/fs/nfsd/xdr4.h
> > @@ -518,6 +518,16 @@ struct nfsd4_fallocate {
> > u64 falloc_length;
> > };
> >
> > +struct nfsd4_io_advise {
> > + /* request */
> > + stateid_t stateid;
> > + loff_t offset;
> > + u64 count;
> > +
> > + /* both */
> > + u32 hints;
> > +};
> > +
> > struct nfsd4_clone {
> > /* request */
> > stateid_t cl_src_stateid;
> > @@ -688,6 +698,7 @@ struct nfsd4_op {
> > /* NFSv4.2 */
> > struct nfsd4_fallocate allocate;
> > struct nfsd4_fallocate deallocate;
> > + struct nfsd4_io_advise io_advise;
> > struct nfsd4_clone clone;
> > struct nfsd4_copy copy;
> > struct nfsd4_offload_status offload_status;
> > --
> > 2.31.1
> >
>
> --
> Chuck Lever
>
>
>

2023-01-12 02:05:44

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v1] NFSD: add IO_ADVISE operation


> On Jan 11, 2023, at 9:15 AM, Olga Kornievskaia <[email protected]> wrote:
>
> On Tue, Jan 10, 2023 at 2:04 PM Chuck Lever III <[email protected]> wrote:
>>
>>> On Jan 10, 2023, at 1:47 PM, Jorge Mora <[email protected]> wrote:
>>>
>>> If multiple bits are set, select just one using a predetermined
>>> order of precedence. If there are no bits which correspond to
>>> any of the advice values (POSIX_FADV_*), the server simply
>>> replies with NFS4ERR_UNION_NOTSUPP.
>>>
>>> If a client sends a bitmap mask with multiple words, ignore all
>>> but the first word in the bitmap. The response is always the
>>> same first word of the bitmap mask given in the request.

>> Does it provide some
>> kind of performance benefit, for instance? The patch description
>> really does need to provide this kind of rationale, and hopefully
>> some performance measurements.
>>
>> Do the POSIX_FADV_* settings map to behavior that a client can
>> expect in other server implementations?
>
> I thought the purpose of IO_ADVISE is to advise but not expect. If the
> server wants to do something with the knowledge it can.

Fair enough: what should the Linux server do with these hints?
Show some data that demonstrates a specific benefit to application
workloads for the server implementation choices that Jorge proposed.

I'm not saying NAK, more like "not yet." This work doesn't look ready
to be merged without a way to evaluate whether the proposed design
choices are reasonable and will do little or no harm. There's no
discussion of that in either a cover letter or the patch descriptions,
so it's really hard for me to tell whether this has been thought
through.

In the meantime, the client side is supposed to work correctly
whether the server implements IO_ADVISE or not... so I don't see
a pressing need to merge a server IO_ADVISE implementation until we
have a sensible architectural direction and one or more use cases
that can actually benefit.


--
Chuck Lever



2023-01-13 17:50:14

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v1] NFSD: add IO_ADVISE operation



> On Jan 12, 2023, at 11:47 AM, Jorge Mora <[email protected]> wrote:
>
> Hello Chuck,
>
> Thank you for your comments, I will look into these issues and get some data to justify the implementation.

Awesome. That will be great!


--
Chuck Lever