2021-10-25 20:49:50

by Ioannis Angelakopoulos

[permalink] [raw]
Subject: [RFC PATCH 1/7] FUSE: Add the fsnotify opcode and in/out structs to FUSE

Since fsnotify is the backend for the inotify subsystem all the backend
code implementation we add is related to fsnotify.

To support an fsnotify request in FUSE and specifically virtiofs we add a
new opcode for the FSNOTIFY (51) operation request in the "fuse.h" header.

Also add the "fuse_notify_fsnotify_in" and "fuse_notify_fsnotify_out"
structs that are responsible for passing the fsnotify/inotify related data
to and from the FUSE server.

Signed-off-by: Ioannis Angelakopoulos <[email protected]>
---
include/uapi/linux/fuse.h | 23 ++++++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 46838551ea84..418b7fc72417 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -186,6 +186,9 @@
* - add FUSE_SYNCFS
* 7.35
* - add FUSE_NOTIFY_LOCK
+ * 7.36
+ * - add FUSE_HAVE_FSNOTIFY
+ * - add fuse_notify_fsnotify_(in,out)
*/

#ifndef _LINUX_FUSE_H
@@ -221,7 +224,7 @@
#define FUSE_KERNEL_VERSION 7

/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 35
+#define FUSE_KERNEL_MINOR_VERSION 36

/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -338,6 +341,7 @@ struct fuse_file_lock {
* write/truncate sgid is killed only if file has group
* execute permission. (Same as Linux VFS behavior).
* FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in
+ * FUSE_HAVE_FSNOTIFY: remote fsnotify/inotify event subsystem support
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -369,6 +373,7 @@ struct fuse_file_lock {
#define FUSE_SUBMOUNTS (1 << 27)
#define FUSE_HANDLE_KILLPRIV_V2 (1 << 28)
#define FUSE_SETXATTR_EXT (1 << 29)
+#define FUSE_HAVE_FSNOTIFY (1 << 30)

/**
* CUSE INIT request/reply flags
@@ -515,6 +520,7 @@ enum fuse_opcode {
FUSE_SETUPMAPPING = 48,
FUSE_REMOVEMAPPING = 49,
FUSE_SYNCFS = 50,
+ FUSE_FSNOTIFY = 51,

/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -532,6 +538,7 @@ enum fuse_notify_code {
FUSE_NOTIFY_RETRIEVE = 5,
FUSE_NOTIFY_DELETE = 6,
FUSE_NOTIFY_LOCK = 7,
+ FUSE_NOTIFY_FSNOTIFY = 8,
FUSE_NOTIFY_CODE_MAX,
};

@@ -571,6 +578,20 @@ struct fuse_getattr_in {
uint64_t fh;
};

+struct fuse_notify_fsnotify_out {
+ uint64_t inode;
+ uint64_t mask;
+ uint32_t namelen;
+ uint32_t cookie;
+};
+
+struct fuse_notify_fsnotify_in {
+ uint64_t mask;
+ uint64_t group;
+ uint32_t action;
+ uint32_t padding;
+};
+
#define FUSE_COMPAT_ATTR_OUT_SIZE 96

struct fuse_attr_out {
--
2.33.0


2021-10-26 20:19:39

by Amir Goldstein

[permalink] [raw]
Subject: Re: [RFC PATCH 1/7] FUSE: Add the fsnotify opcode and in/out structs to FUSE

On Mon, Oct 25, 2021 at 11:47 PM Ioannis Angelakopoulos
<[email protected]> wrote:
>
> Since fsnotify is the backend for the inotify subsystem all the backend
> code implementation we add is related to fsnotify.
>
> To support an fsnotify request in FUSE and specifically virtiofs we add a
> new opcode for the FSNOTIFY (51) operation request in the "fuse.h" header.
>
> Also add the "fuse_notify_fsnotify_in" and "fuse_notify_fsnotify_out"
> structs that are responsible for passing the fsnotify/inotify related data
> to and from the FUSE server.
>
> Signed-off-by: Ioannis Angelakopoulos <[email protected]>
> ---
> include/uapi/linux/fuse.h | 23 ++++++++++++++++++++++-
> 1 file changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 46838551ea84..418b7fc72417 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -186,6 +186,9 @@
> * - add FUSE_SYNCFS
> * 7.35
> * - add FUSE_NOTIFY_LOCK
> + * 7.36
> + * - add FUSE_HAVE_FSNOTIFY
> + * - add fuse_notify_fsnotify_(in,out)
> */
>
> #ifndef _LINUX_FUSE_H
> @@ -221,7 +224,7 @@
> #define FUSE_KERNEL_VERSION 7
>
> /** Minor version number of this interface */
> -#define FUSE_KERNEL_MINOR_VERSION 35
> +#define FUSE_KERNEL_MINOR_VERSION 36
>
> /** The node ID of the root inode */
> #define FUSE_ROOT_ID 1
> @@ -338,6 +341,7 @@ struct fuse_file_lock {
> * write/truncate sgid is killed only if file has group
> * execute permission. (Same as Linux VFS behavior).
> * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in
> + * FUSE_HAVE_FSNOTIFY: remote fsnotify/inotify event subsystem support
> */
> #define FUSE_ASYNC_READ (1 << 0)
> #define FUSE_POSIX_LOCKS (1 << 1)
> @@ -369,6 +373,7 @@ struct fuse_file_lock {
> #define FUSE_SUBMOUNTS (1 << 27)
> #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28)
> #define FUSE_SETXATTR_EXT (1 << 29)
> +#define FUSE_HAVE_FSNOTIFY (1 << 30)
>
> /**
> * CUSE INIT request/reply flags
> @@ -515,6 +520,7 @@ enum fuse_opcode {
> FUSE_SETUPMAPPING = 48,
> FUSE_REMOVEMAPPING = 49,
> FUSE_SYNCFS = 50,
> + FUSE_FSNOTIFY = 51,
>
> /* CUSE specific operations */
> CUSE_INIT = 4096,
> @@ -532,6 +538,7 @@ enum fuse_notify_code {
> FUSE_NOTIFY_RETRIEVE = 5,
> FUSE_NOTIFY_DELETE = 6,
> FUSE_NOTIFY_LOCK = 7,
> + FUSE_NOTIFY_FSNOTIFY = 8,
> FUSE_NOTIFY_CODE_MAX,
> };
>
> @@ -571,6 +578,20 @@ struct fuse_getattr_in {
> uint64_t fh;
> };
>
> +struct fuse_notify_fsnotify_out {
> + uint64_t inode;

64bit inode is not a good unique identifier of the object.
you need to either include the generation in object identifier
or much better use the object's nfs file handle, the same way
that fanotify stores object identifiers.

> + uint64_t mask;
> + uint32_t namelen;
> + uint32_t cookie;

I object to persisting with the two-events-joined-by-cookie design.
Any new design should include a single event for rename
with information about src and dst.

I know this is inconvenient, but we are NOT going to create a "remote inotify"
interface, we need to create a "remote fsnotify" interface and if server wants
to use inotify, it will need to join the disjoined MOVE_FROM/TO event into
a single "remote event", that FUSE will use to call fsnotify_move().

Thanks,
Amir.

2021-10-27 07:07:30

by Amir Goldstein

[permalink] [raw]
Subject: Re: [RFC PATCH 1/7] FUSE: Add the fsnotify opcode and in/out structs to FUSE

On Tue, Oct 26, 2021 at 5:56 PM Amir Goldstein <[email protected]> wrote:
>
> On Mon, Oct 25, 2021 at 11:47 PM Ioannis Angelakopoulos
> <[email protected]> wrote:
> >
> > Since fsnotify is the backend for the inotify subsystem all the backend
> > code implementation we add is related to fsnotify.
> >
> > To support an fsnotify request in FUSE and specifically virtiofs we add a
> > new opcode for the FSNOTIFY (51) operation request in the "fuse.h" header.
> >
> > Also add the "fuse_notify_fsnotify_in" and "fuse_notify_fsnotify_out"
> > structs that are responsible for passing the fsnotify/inotify related data
> > to and from the FUSE server.
> >
> > Signed-off-by: Ioannis Angelakopoulos <[email protected]>
> > ---
> > include/uapi/linux/fuse.h | 23 ++++++++++++++++++++++-
> > 1 file changed, 22 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> > index 46838551ea84..418b7fc72417 100644
> > --- a/include/uapi/linux/fuse.h
> > +++ b/include/uapi/linux/fuse.h
> > @@ -186,6 +186,9 @@
> > * - add FUSE_SYNCFS
> > * 7.35
> > * - add FUSE_NOTIFY_LOCK
> > + * 7.36
> > + * - add FUSE_HAVE_FSNOTIFY
> > + * - add fuse_notify_fsnotify_(in,out)
> > */
> >
> > #ifndef _LINUX_FUSE_H
> > @@ -221,7 +224,7 @@
> > #define FUSE_KERNEL_VERSION 7
> >
> > /** Minor version number of this interface */
> > -#define FUSE_KERNEL_MINOR_VERSION 35
> > +#define FUSE_KERNEL_MINOR_VERSION 36
> >
> > /** The node ID of the root inode */
> > #define FUSE_ROOT_ID 1
> > @@ -338,6 +341,7 @@ struct fuse_file_lock {
> > * write/truncate sgid is killed only if file has group
> > * execute permission. (Same as Linux VFS behavior).
> > * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in
> > + * FUSE_HAVE_FSNOTIFY: remote fsnotify/inotify event subsystem support
> > */
> > #define FUSE_ASYNC_READ (1 << 0)
> > #define FUSE_POSIX_LOCKS (1 << 1)
> > @@ -369,6 +373,7 @@ struct fuse_file_lock {
> > #define FUSE_SUBMOUNTS (1 << 27)
> > #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28)
> > #define FUSE_SETXATTR_EXT (1 << 29)
> > +#define FUSE_HAVE_FSNOTIFY (1 << 30)
> >
> > /**
> > * CUSE INIT request/reply flags
> > @@ -515,6 +520,7 @@ enum fuse_opcode {
> > FUSE_SETUPMAPPING = 48,
> > FUSE_REMOVEMAPPING = 49,
> > FUSE_SYNCFS = 50,
> > + FUSE_FSNOTIFY = 51,
> >
> > /* CUSE specific operations */
> > CUSE_INIT = 4096,
> > @@ -532,6 +538,7 @@ enum fuse_notify_code {
> > FUSE_NOTIFY_RETRIEVE = 5,
> > FUSE_NOTIFY_DELETE = 6,
> > FUSE_NOTIFY_LOCK = 7,
> > + FUSE_NOTIFY_FSNOTIFY = 8,
> > FUSE_NOTIFY_CODE_MAX,
> > };
> >
> > @@ -571,6 +578,20 @@ struct fuse_getattr_in {
> > uint64_t fh;
> > };
> >
> > +struct fuse_notify_fsnotify_out {
> > + uint64_t inode;
>
> 64bit inode is not a good unique identifier of the object.
> you need to either include the generation in object identifier
> or much better use the object's nfs file handle, the same way
> that fanotify stores object identifiers.
>
> > + uint64_t mask;
> > + uint32_t namelen;
> > + uint32_t cookie;
>
> I object to persisting with the two-events-joined-by-cookie design.
> Any new design should include a single event for rename
> with information about src and dst.
>
> I know this is inconvenient, but we are NOT going to create a "remote inotify"
> interface, we need to create a "remote fsnotify" interface and if server wants
> to use inotify, it will need to join the disjoined MOVE_FROM/TO event into
> a single "remote event", that FUSE will use to call fsnotify_move().
>

TBH, the disjoint vs. joint from/to event is an unfinished business
for fanotify.
So my objection above is more of a strong wish.
But I admit that if existing network protocols already encode the disjoint
from/to events semantics, I may need to fold back on that objection.

Thanks,
Amir.

2021-10-27 17:36:58

by Amir Goldstein

[permalink] [raw]
Subject: Re: [RFC PATCH 1/7] FUSE: Add the fsnotify opcode and in/out structs to FUSE

>> > + uint64_t mask;
>> > + uint32_t namelen;
>> > + uint32_t cookie;
>>
>> I object to persisting with the two-events-joined-by-cookie design.
>> Any new design should include a single event for rename
>> with information about src and dst.
>>
>> I know this is inconvenient, but we are NOT going to create a "remote inotify"
>> interface, we need to create a "remote fsnotify" interface and if server wants
>> to use inotify, it will need to join the disjoined MOVE_FROM/TO event into
>> a single "remote event", that FUSE will use to call fsnotify_move().
>>
> I do not fully understand this. Should we define a new "remote" event
> that corresponds to the merged MOVE_FROM/TO events and send that to the guest instead?

Yes.

First of all, my objection to the old cookie API stems from seeing applications
that do not use it correctly and because I find that it can be hard to use it.

For those reasons, when we extended fanotify to provide "inotify compatible"
semantics, the cookie API was not carried over to fanotify.

You can see an example of "inotify compatible" API in the implementation of
fsnotifywatch [1].

That said, the functionality of joining to/from events is still missing in
fanotify and I have posted several proposals for an API extension that
will solve it using an event that contains information on both src and dst [2].

> If that is the case, wouldn't that require that user space processes be aware of this newly added "remote"
> event?

No, the application will not be aware of the FUSE notify protocol at all.
If application is using inotify, it will get the old from+to+cookie events, but
FUSE server will notify the guest {MOVE, inode, from_path, to_path}
and FUSE client will call fsnotify_mode() or a variant.
Then inotify watchers will get what they are used to and fsnotify watchers
will get whatever information the existing or future API will provide.

To explain this from another perspective, you currently implemented the
virtiofsd watch using inotify, but you should not limit virtiofsd to inotify.
Once you learn the benefits of fanotify, you may consider implementing
the virtiofsd watches using fanotify. It should not matter for the inotify
application in the guest at all.

With the current implementation of watch in virtiofsd using inotify, that
will cause the inconvenience of having to match the from/to events
before reporting the event to guest, but inconvenience for a specific
implementation is not what should be driving the API design.

> Also in the inotify man page there is a limitations case that states that it is possible for one of these events
> to be missing. What should the server do in this case?
>

The protocol should be able to carry src and dst information, but
you could leave src (of moved_to) or dst (of moved_from) empty.
In that case, FUSE client cannot call fsnotify_move() as is, so we
can either change fsnotify_move() or use a variant for reporting remote
rename events.

The extra info for rename events with the FAN_REPORT_TARGET_FID
proposal is also designed to be optional.

Thanks,
Amir.

[1] https://man7.org/linux/man-pages/man1/fsnotifywatch.1.html
[2] https://lore.kernel.org/linux-fsdevel/CAOQ4uxjYDDk00VPdWtRB1_tf+gCoPFgSQ9O0p0fGaW_JiFUUKA@mail.gmail.com/

2021-10-27 22:17:52

by Vivek Goyal

[permalink] [raw]
Subject: Re: [RFC PATCH 1/7] FUSE: Add the fsnotify opcode and in/out structs to FUSE

On Tue, Oct 26, 2021 at 05:56:03PM +0300, Amir Goldstein wrote:
> On Mon, Oct 25, 2021 at 11:47 PM Ioannis Angelakopoulos
> <[email protected]> wrote:
> >
> > Since fsnotify is the backend for the inotify subsystem all the backend
> > code implementation we add is related to fsnotify.
> >
> > To support an fsnotify request in FUSE and specifically virtiofs we add a
> > new opcode for the FSNOTIFY (51) operation request in the "fuse.h" header.
> >
> > Also add the "fuse_notify_fsnotify_in" and "fuse_notify_fsnotify_out"
> > structs that are responsible for passing the fsnotify/inotify related data
> > to and from the FUSE server.
> >
> > Signed-off-by: Ioannis Angelakopoulos <[email protected]>
> > ---
> > include/uapi/linux/fuse.h | 23 ++++++++++++++++++++++-
> > 1 file changed, 22 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> > index 46838551ea84..418b7fc72417 100644
> > --- a/include/uapi/linux/fuse.h
> > +++ b/include/uapi/linux/fuse.h
> > @@ -186,6 +186,9 @@
> > * - add FUSE_SYNCFS
> > * 7.35
> > * - add FUSE_NOTIFY_LOCK
> > + * 7.36
> > + * - add FUSE_HAVE_FSNOTIFY
> > + * - add fuse_notify_fsnotify_(in,out)
> > */
> >
> > #ifndef _LINUX_FUSE_H
> > @@ -221,7 +224,7 @@
> > #define FUSE_KERNEL_VERSION 7
> >
> > /** Minor version number of this interface */
> > -#define FUSE_KERNEL_MINOR_VERSION 35
> > +#define FUSE_KERNEL_MINOR_VERSION 36
> >
> > /** The node ID of the root inode */
> > #define FUSE_ROOT_ID 1
> > @@ -338,6 +341,7 @@ struct fuse_file_lock {
> > * write/truncate sgid is killed only if file has group
> > * execute permission. (Same as Linux VFS behavior).
> > * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in
> > + * FUSE_HAVE_FSNOTIFY: remote fsnotify/inotify event subsystem support
> > */
> > #define FUSE_ASYNC_READ (1 << 0)
> > #define FUSE_POSIX_LOCKS (1 << 1)
> > @@ -369,6 +373,7 @@ struct fuse_file_lock {
> > #define FUSE_SUBMOUNTS (1 << 27)
> > #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28)
> > #define FUSE_SETXATTR_EXT (1 << 29)
> > +#define FUSE_HAVE_FSNOTIFY (1 << 30)
> >
> > /**
> > * CUSE INIT request/reply flags
> > @@ -515,6 +520,7 @@ enum fuse_opcode {
> > FUSE_SETUPMAPPING = 48,
> > FUSE_REMOVEMAPPING = 49,
> > FUSE_SYNCFS = 50,
> > + FUSE_FSNOTIFY = 51,
> >
> > /* CUSE specific operations */
> > CUSE_INIT = 4096,
> > @@ -532,6 +538,7 @@ enum fuse_notify_code {
> > FUSE_NOTIFY_RETRIEVE = 5,
> > FUSE_NOTIFY_DELETE = 6,
> > FUSE_NOTIFY_LOCK = 7,
> > + FUSE_NOTIFY_FSNOTIFY = 8,
> > FUSE_NOTIFY_CODE_MAX,
> > };
> >
> > @@ -571,6 +578,20 @@ struct fuse_getattr_in {
> > uint64_t fh;
> > };
> >
> > +struct fuse_notify_fsnotify_out {
> > + uint64_t inode;
>
> 64bit inode is not a good unique identifier of the object.

I think he wants to store 64bit nodeid (internal to fuse so that client
and server can figure out which inode they are talking about). But I
think you are concerned about what happens if an event arrived for an
inode after inode has been released and nodeid possibly used for some
other inode. And then we will find that new inode in guest cache and
end up associating event with wrong inode.

Generation number will help in the sense that server has a chance
to always update generation number on lookup. So even if nodeid
is reused, generation number will make make sure we don't end
up associating this event with reused node id inode. I guess
makes sense.

> you need to either include the generation in object identifier
> or much better use the object's nfs file handle, the same way
> that fanotify stores object identifiers.

I think nfs file handle is much more complicated and its a separate
project altogether. I am assuming we are talking about persistent
nfs file handle as generated by host. I think biggest issue we faced
with that is that guest is untrusted and we don't want to resolve
file handle provided by guest on host otherwise guest can craft
file handles and possibly be able to open other files on same filesystem
outside shared dir.

>
> > + uint64_t mask;
> > + uint32_t namelen;
> > + uint32_t cookie;
>
> I object to persisting with the two-events-joined-by-cookie design.
> Any new design should include a single event for rename
> with information about src and dst.
>
> I know this is inconvenient, but we are NOT going to create a "remote inotify"
> interface, we need to create a "remote fsnotify" interface and if server wants
> to use inotify, it will need to join the disjoined MOVE_FROM/TO event into
> a single "remote event", that FUSE will use to call fsnotify_move().

man inotify says following.

" Matching up the IN_MOVED_FROM and IN_MOVED_TO event pair generated by
rename(2) is thus inherently racy. (Don't forget that if an object is
renamed outside of a monitored directory, there may not even be an
IN_MOVED_TO event.)"

So if guest is no monitoring target dir of renamed file, then we will not
even get IN_MOVED_TO. In that case we can't merge two events into one.

And this sounds like inotify/fanotify interface needs to come up with
an merged event and in that case remote filesystem will simply propagate
that event. (Instead of coming up with a new event only for remote
filesystems. Sounds like this is not a problem limited to remote
filesystems only).

Thanks
Vivek

2021-10-28 04:16:10

by Amir Goldstein

[permalink] [raw]
Subject: Re: [RFC PATCH 1/7] FUSE: Add the fsnotify opcode and in/out structs to FUSE

> > you need to either include the generation in object identifier
> > or much better use the object's nfs file handle, the same way
> > that fanotify stores object identifiers.
>
> I think nfs file handle is much more complicated and its a separate
> project altogether. I am assuming we are talking about persistent
> nfs file handle as generated by host. I think biggest issue we faced
> with that is that guest is untrusted and we don't want to resolve
> file handle provided by guest on host otherwise guest can craft
> file handles and possibly be able to open other files on same filesystem
> outside shared dir.
>

Right now, virtiofsd keeps all inodes and dentries of live client inodes
pinned in cache on the server.
If you switch to file handles design, virtiofsd only need to keep a map
of all the file handles that server handed out to client to address
this concern.

For directories, this practice is not even needed for security, because
a decoded directory file handle can be verified to be within the shared dir.
It is only needed to prevent DoS, because a crafted directory file handle
(outside of shared dir) can be used to generate extra IO and thrash the
inode/dentry cache on the server.

> >
> > > + uint64_t mask;
> > > + uint32_t namelen;
> > > + uint32_t cookie;
> >
> > I object to persisting with the two-events-joined-by-cookie design.
> > Any new design should include a single event for rename
> > with information about src and dst.
> >
> > I know this is inconvenient, but we are NOT going to create a "remote inotify"
> > interface, we need to create a "remote fsnotify" interface and if server wants
> > to use inotify, it will need to join the disjoined MOVE_FROM/TO event into
> > a single "remote event", that FUSE will use to call fsnotify_move().
>
> man inotify says following.
>
> " Matching up the IN_MOVED_FROM and IN_MOVED_TO event pair generated by
> rename(2) is thus inherently racy. (Don't forget that if an object is
> renamed outside of a monitored directory, there may not even be an
> IN_MOVED_TO event.)"
>
> So if guest is no monitoring target dir of renamed file, then we will not
> even get IN_MOVED_TO. In that case we can't merge two events into one.
>
> And this sounds like inotify/fanotify interface needs to come up with
> an merged event and in that case remote filesystem will simply propagate
> that event. (Instead of coming up with a new event only for remote
> filesystems. Sounds like this is not a problem limited to remote
> filesystems only).
>

I don't see it that way.
I see the "internal protocol" for filesystems/vfs to report rename is
fsnotify_move() which carries information about both src and target.
I would like the remote protocol to carry the same information.
It is then up to the userspace API whether to report the rename
as two events or a unified event.

For example, debugfs, calls fsnotify_move() when an object is renamed
from underneath the vfs. It does not call fsnotify() twice with a cookie,
because we would not want to change local filesystem nor remote protocols
when we want to add new userspace APIs for reporting fsevents.

That comes down to my feeling that this claims to be a proposal
for "remote fsnotify", but it looks and sounds like a proposal for
"remote inotify" and this is the core of my objection to passing
the rename cookie in the protocol.

Regarding the issue that the src/dst path may not be known
to the server, as I wrote, it is fine if either the src/dst path information
is omitted from an event, but the protocol should provide the
placeholder to report them.

After sufficient arguing, I might be convinced that the cookie may be included
as an optional field in addition to the fields that I requested.

I understand why you write that this sounds like an fanotify interface
that needs to be resolved and you are correct, but the reason that the
fanotify interface issue was not yet resolved is that we are trying to not
repeat the mistakes of the past and for that same reason, I am insisting
on the protocol.

Thanks,
Amir.

2021-10-28 14:23:55

by Vivek Goyal

[permalink] [raw]
Subject: Re: [RFC PATCH 1/7] FUSE: Add the fsnotify opcode and in/out structs to FUSE

On Thu, Oct 28, 2021 at 07:13:10AM +0300, Amir Goldstein wrote:
> > > you need to either include the generation in object identifier
> > > or much better use the object's nfs file handle, the same way
> > > that fanotify stores object identifiers.
> >
> > I think nfs file handle is much more complicated and its a separate
> > project altogether. I am assuming we are talking about persistent
> > nfs file handle as generated by host. I think biggest issue we faced
> > with that is that guest is untrusted and we don't want to resolve
> > file handle provided by guest on host otherwise guest can craft
> > file handles and possibly be able to open other files on same filesystem
> > outside shared dir.
> >
>
> Right now, virtiofsd keeps all inodes and dentries of live client inodes
> pinned in cache on the server.
> If you switch to file handles design, virtiofsd only need to keep a map
> of all the file handles that server handed out to client to address
> this concern.

Keeping a map of all file handles server handed out to client, should work.
But that means these file handles are not persistent and can't be used after
a vritiofsd restart (virtiofsd will lose its map over restart). And that
means one can not pass these file handles to user space. And that rules out
giving file handle to user space as part of fanotify API, IIUC.

So in practice it is as good as (nodeid, generation) solution. And using
file handles means giving CAP_DAC_READ_SEARCH to virtiofsd daemon. Will
really like to run virtiofsd unrpviliged (in a user namespace) and be
able to support as many features as possible.

>
> For directories, this practice is not even needed for security, because
> a decoded directory file handle can be verified to be within the shared dir.
> It is only needed to prevent DoS, because a crafted directory file handle
> (outside of shared dir) can be used to generate extra IO and thrash the
> inode/dentry cache on the server.

Good to know that for directories only DOS is a concern.

>
> > >
> > > > + uint64_t mask;
> > > > + uint32_t namelen;
> > > > + uint32_t cookie;
> > >
> > > I object to persisting with the two-events-joined-by-cookie design.
> > > Any new design should include a single event for rename
> > > with information about src and dst.
> > >
> > > I know this is inconvenient, but we are NOT going to create a "remote inotify"
> > > interface, we need to create a "remote fsnotify" interface and if server wants
> > > to use inotify, it will need to join the disjoined MOVE_FROM/TO event into
> > > a single "remote event", that FUSE will use to call fsnotify_move().
> >
> > man inotify says following.
> >
> > " Matching up the IN_MOVED_FROM and IN_MOVED_TO event pair generated by
> > rename(2) is thus inherently racy. (Don't forget that if an object is
> > renamed outside of a monitored directory, there may not even be an
> > IN_MOVED_TO event.)"
> >
> > So if guest is no monitoring target dir of renamed file, then we will not
> > even get IN_MOVED_TO. In that case we can't merge two events into one.
> >
> > And this sounds like inotify/fanotify interface needs to come up with
> > an merged event and in that case remote filesystem will simply propagate
> > that event. (Instead of coming up with a new event only for remote
> > filesystems. Sounds like this is not a problem limited to remote
> > filesystems only).
> >
>
> I don't see it that way.
> I see the "internal protocol" for filesystems/vfs to report rename is
> fsnotify_move() which carries information about both src and target.
> I would like the remote protocol to carry the same information.
> It is then up to the userspace API whether to report the rename
> as two events or a unified event.

Sure not a bad idea and allowing "cookie" does not stop remote filesystems
from reporting single rename event.

In this case virtiofs is just a passthrough filesystem. It simply reports
back what underlying filesystem is reporting. So if inotify interface
reports two events, it will simply send that. In fact old users will
expect that.

I understand that you don't like two separate events and hence wants
to kill cookie. But how will we fix that when underlying inotify API
reports two separate events. If we try to merge these there is no
guarantee that we can do that because we might get only one events
as we might not be watching either source/target directory. We place
watches only as specified by client.

>
> For example, debugfs, calls fsnotify_move() when an object is renamed
> from underneath the vfs. It does not call fsnotify() twice with a cookie,
> because we would not want to change local filesystem nor remote protocols
> when we want to add new userspace APIs for reporting fsevents.
>
> That comes down to my feeling that this claims to be a proposal
> for "remote fsnotify", but it looks and sounds like a proposal for
> "remote inotify" and this is the core of my objection to passing
> the rename cookie in the protocol.

We are starting with remote inotify support and hoping it can be extended
to remote fanotify in limited feature form. So want to make modifications
in such a way so that one can easily extend it for fanotify as well.

Now question is, that should there be separate fuse commands for inotify
and fanotify request/events. Or we should try to merge these two and
design fuse_notify_fsnotify_out{} and fuse_notify_fsnotify_in{} in such
a way so that it could support both inotify/fanotify. I guess later will
make more sense. Just that it will be more complicated as well because
fanotify API can send much more information to user space. Like, file
handles, "fd", "pid" etc.

I think "pid" might not make much sense in the context of remote filesystem.
In case of virtiofsd, it will simply be pid of another virtiofsd instance
most likely which does not mean anything for guest process. In fact,
there is a chance that it could be same pid as of guest process.

>
> Regarding the issue that the src/dst path may not be known
> to the server, as I wrote, it is fine if either the src/dst path information
> is omitted from an event, but the protocol should provide the
> placeholder to report them.

This kind of makes more sense. That protocol should have capability to report
a rename event with both src/dst path to future proof it. That way when
inotify/fanotify APIs start reporting a single rename event, we will be
able to simply send it back to client without any fuse protocol
modifications.

One risk of adding space for src/dst path info in fuse_notify_fsnotify_out{}
is that we don't know how this information will end up looking like when
inotify/fanotify actually start supporting single rename event. It is
possible that we add some fields now and final single event format looks
different and now we have unused fields.

>
> After sufficient arguing, I might be convinced that the cookie may be included
> as an optional field in addition to the fields that I requested.

But we should allow cookie as well so that older inotify API continues
to be supported as well.
>
> I understand why you write that this sounds like an fanotify interface
> that needs to be resolved and you are correct, but the reason that the
> fanotify interface issue was not yet resolved is that we are trying to not
> repeat the mistakes of the past and for that same reason, I am insisting
> on the protocol.

Right now we are writing protocol fields based on what inotify (and
possibly fanotify) are supporting. But if you want to extend it further
so that it can report something which you intend to introduce in future,
I think that's fine too.

So how will additional fields will look like to support this signle
rename event.

Vivek