2022-12-19 00:04:59

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v2 00/10] Performance fixes for 9p filesystem

This is the second version of a patch series which adds a number
of features to improve read/write performance in the 9p filesystem.
Mostly it focuses on fixing caching to help utilize the recently
increased MSIZE limits and also fixes some problematic behavior
within the writeback code.

Altogether, these show roughly 10x speed increases on simple
file transfers. Future patch sets will improve cache consistency
and directory caching.

These patches are also available on github:
https://github.com/v9fs/linux/tree/ericvh/9p-next

Tested against qemu, cpu, and diod with fsx, dbench, and some
simple benchmarks.

Signed-off-by: Eric Van Hensbergen <[email protected]>



2022-12-19 00:08:00

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v2 04/10] Remove unnecessary superblock flags

These flags just add unnecessary extra operations.
When 9p is run without cache, it inherently implements
these options so we don't need them in the superblock
(which ends up sending extraneous fsyncs, etc.). User
can still request these options on mount, but we don't
need to set them as default.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/vfs_super.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 266c4693e20c..65d96fa94ba2 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -84,9 +84,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
sb->s_bdi->io_pages = v9ses->maxdata >> PAGE_SHIFT;
}

- sb->s_flags |= SB_ACTIVE | SB_DIRSYNC;
- if (!v9ses->cache)
- sb->s_flags |= SB_SYNCHRONOUS;
+ sb->s_flags |= SB_ACTIVE;

#ifdef CONFIG_9P_FS_POSIX_ACL
if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL)
--
2.37.2

2022-12-19 00:15:34

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v2 02/10] Expand setup of writeback cache to all levels

If cache is enabled, make sure we are putting the right things
in place (mainly impacts mmap). This also sets us up for more
cache levels.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/v9fs.c | 2 +-
fs/9p/vfs_addr.c | 2 --
fs/9p/vfs_file.c | 7 ++++---
fs/9p/vfs_inode.c | 3 +--
fs/9p/vfs_inode_dotl.c | 7 ++++---
5 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 3a9c4517265f..61a51b90600d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -468,7 +468,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,

#ifdef CONFIG_9P_FSCACHE
/* register the session for caching */
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ if (v9ses->cache == CACHE_FSCACHE) {
rc = v9fs_cache_session_get_cookie(v9ses, dev_name);
if (rc < 0)
goto err_clnt;
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 93373486ab04..9da47465e568 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -279,8 +279,6 @@ static int v9fs_write_begin(struct file *filp, struct address_space *mapping,

p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);

- BUG_ON(!v9inode->writeback_fid);
-
/* Prefetch area to be written into the cache if we're caching this
* file. We need to do this before we get a lock on the page in case
* there's more than one writer competing for the same cache block.
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b740017634ef..3b6458846a0b 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -73,8 +73,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
}

mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
- !v9inode->writeback_fid &&
+ if ((v9ses->cache) && !v9inode->writeback_fid &&
((file->f_flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -92,9 +91,11 @@ int v9fs_file_open(struct inode *inode, struct file *file)
v9inode->writeback_fid = (void *) writeback_fid;
}
mutex_unlock(&v9inode->v_mutex);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
+#endif
v9fs_open_fid_add(inode, &fid);
return 0;
out_error:
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 27a04a226d97..33e521c60e2c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -843,8 +843,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
inode = d_inode(dentry);
v9inode = V9FS_I(inode);
mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
- !v9inode->writeback_fid &&
+ if ((v9ses->cache) && !v9inode->writeback_fid &&
((flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 8696e8899c27..9fde73ffadaa 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -316,8 +316,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,

v9inode = V9FS_I(inode);
mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
- !v9inode->writeback_fid &&
+ if ((v9ses->cache) && !v9inode->writeback_fid &&
((flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -340,9 +339,11 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
if (err)
goto out;
file->private_data = ofid;
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
+#endif
v9fs_open_fid_add(inode, &ofid);
file->f_mode |= FMODE_CREATED;
out:
--
2.37.2

2022-12-19 00:16:27

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v2 10/10] writeback mode fixes

This fixes several detected problems from preivous
patches when running with writeback mode. In
particular this fixes issues with files which are opened
as write only and getattr on files which dirty caches.

This patch makes sure that cache behavior for an open file is stored in
the client copy of fid->mode. This allows us to reflect cache behavior
from mount flags, open mode, and information from the server to
inform readahead and writeback behavior.

This includes adding support for a 9p semantic that qid.version==0
is used to mark a file as non-cachable which is important for
synthetic files. This may have a side-effect of not supporting
caching on certain legacy file servers that do not properly set
qid.version. There is also now a mount flag which can disable
the qid.version behavior.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
Documentation/filesystems/9p.rst | 24 ++++-----
fs/9p/fid.c | 23 +++++++--
fs/9p/fid.h | 32 ++++++++++++
fs/9p/v9fs.h | 1 -
fs/9p/vfs_addr.c | 22 ++++----
fs/9p/vfs_file.c | 88 +++++++++++---------------------
fs/9p/vfs_inode.c | 45 ++++++----------
fs/9p/vfs_inode_dotl.c | 44 ++++++----------
fs/9p/vfs_super.c | 21 +++++---
9 files changed, 150 insertions(+), 150 deletions(-)

diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst
index 0e800b8f73cc..0c2c7a181d85 100644
--- a/Documentation/filesystems/9p.rst
+++ b/Documentation/filesystems/9p.rst
@@ -79,18 +79,14 @@ Options

cache=mode specifies a caching policy. By default, no caches are used.

- none
- default no cache policy, metadata and data
- alike are synchronous.
- loose
- no attempts are made at consistency,
- intended for exclusive, read-only mounts
- fscache
- use FS-Cache for a persistent, read-only
- cache backend.
- mmap
- minimal cache that is only used for read-write
- mmap. Northing else is cached, like cache=none
+ ========= =============================================
+ none no cache of file or metadata
+ readahead readahead caching of files
+ writeback delayed writeback of files
+ mmap support mmap operations read/write with cache
+ loose meta-data and file cache with no coherency
+ fscache use FS-Cache for a persistent cache backend
+ ========= =============================================

debug=n specifies debug level. The debug level is a bitmask.

@@ -137,6 +133,10 @@ Options
This can be used to share devices/named pipes/sockets between
hosts. This functionality will be expanded in later versions.

+ directio bypass page cache on all read/write operations
+
+ ignoreqv ignore qid.version==0 as a marker to ignore cache
+
noxattr do not offer xattr functions on this mount.

access there are four access modes.
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 805151114e96..4f326a7019d3 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -41,14 +41,24 @@ void v9fs_fid_add(struct dentry *dentry, struct p9_fid **pfid)
*pfid = NULL;
}

+static bool v9fs_is_writeable(int mode)
+{
+ if ((mode & P9_OWRITE) || (mode & P9_ORDWR))
+ return true;
+ else
+ return false;
+}
+
/**
* v9fs_fid_find_inode - search for an open fid off of the inode list
* @inode: return a fid pointing to a specific inode
+ * @writeable: only consider fids which are writeable
* @uid: return a fid belonging to the specified user
+ * @any: ignore uid as a selection criteria
*
*/
-
-static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid)
+struct p9_fid *v9fs_fid_find_inode(struct inode *inode, bool want_writeable,
+ kuid_t uid, bool any)
{
struct hlist_head *h;
struct p9_fid *fid, *ret = NULL;
@@ -58,7 +68,12 @@ static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid)
spin_lock(&inode->i_lock);
h = (struct hlist_head *)&inode->i_private;
hlist_for_each_entry(fid, h, ilist) {
- if (uid_eq(fid->uid, uid)) {
+ if (any || uid_eq(fid->uid, uid)) {
+ if (want_writeable && !v9fs_is_writeable(fid->mode)) {
+ p9_debug(P9_DEBUG_VFS, " mode: %x not writeable?\n",
+ fid->mode);
+ continue;
+ }
p9_fid_get(fid);
ret = fid;
break;
@@ -118,7 +133,7 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
spin_unlock(&dentry->d_lock);
} else {
if (dentry->d_inode)
- ret = v9fs_fid_find_inode(dentry->d_inode, uid);
+ ret = v9fs_fid_find_inode(dentry->d_inode, false, uid, any);
}

return ret;
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 8a4e8cd12ca2..b248221e7906 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -7,7 +7,10 @@
#ifndef FS_9P_FID_H
#define FS_9P_FID_H
#include <linux/list.h>
+#include "v9fs.h"

+struct p9_fid *v9fs_fid_find_inode(struct inode *inode, bool want_writeable,
+ kuid_t uid, bool any);
struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
static inline struct p9_fid *v9fs_parent_fid(struct dentry *dentry)
{
@@ -32,4 +35,33 @@ static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
p9_fid_put(fid);
return nfid;
}
+/**
+ * v9fs_fid_addmodes - add cache flags to fid mode (for client use only)
+ * @fid: fid to augment
+ * @s_flags: session info mount flags
+ * @s_cache: session info cache flags
+ * @f_flags: unix open flags
+ *
+ * make sure mode reflects flags of underlying mounts
+ * also qid.version == 0 reflects a synthetic or legacy file system
+ * NOTE: these are set after open so only reflect 9p client not
+ * underlying file system on server.
+ */
+static inline void v9fs_fid_add_modes(struct p9_fid *fid, int s_flags,
+ int s_cache, unsigned int f_flags)
+{
+ if (fid->qid.type != P9_QTFILE)
+ return;
+
+ if ((!s_cache) ||
+ ((fid->qid.version == 0) && !(s_flags & V9FS_IGNORE_QV)) ||
+ (s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) {
+ fid->mode |= P9L_DIRECT; /* no read or write cache */
+ } else if ((s_cache < CACHE_WRITEBACK) ||
+ (f_flags & O_DSYNC) | (s_flags & V9FS_SYNC)) {
+ fid->mode |= P9L_NOWRITECACHE;
+ } else if (s_cache == CACHE_LOOSE) {
+ fid->mode |= P9L_LOOSE; /* noncoherent cache */
+ }
+}
#endif
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index c80c318ff31c..9c6bc57512bf 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -118,7 +118,6 @@ struct v9fs_inode {
struct netfs_inode netfs; /* Netfslib context and vfs inode */
struct p9_qid qid;
unsigned int cache_validity;
- struct p9_fid *writeback_fid;
struct mutex v_mutex;
};

diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 9da47465e568..b9f857f15757 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -57,8 +57,6 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
*/
static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
{
- struct inode *inode = file_inode(file);
- struct v9fs_inode *v9inode = V9FS_I(inode);
struct p9_fid *fid = file->private_data;

BUG_ON(!fid);
@@ -66,11 +64,8 @@ static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
/* we might need to read from a fid that was opened write-only
* for read-modify-write of page cache, use the writeback fid
* for that */
- if (rreq->origin == NETFS_READ_FOR_WRITE &&
- (fid->mode & O_ACCMODE) == O_WRONLY) {
- fid = v9inode->writeback_fid;
- BUG_ON(!fid);
- }
+ WARN_ON(rreq->origin == NETFS_READ_FOR_WRITE &&
+ !(fid->mode & P9_ORDWR));

p9_fid_get(fid);
rreq->netfs_priv = fid;
@@ -164,6 +159,7 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio)
loff_t i_size = i_size_read(inode);
struct iov_iter from;
size_t len = folio_size(folio);
+ struct p9_fid *writeback_fid;
int err;

if (start >= i_size)
@@ -173,13 +169,17 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio)

iov_iter_xarray(&from, WRITE, &folio_mapping(folio)->i_pages, start, len);

- /* We should have writeback_fid always set */
- BUG_ON(!v9inode->writeback_fid);
+ writeback_fid = v9fs_fid_find_inode(inode, true, INVALID_UID, true);
+ if (!writeback_fid) {
+ WARN_ONCE(1, "folio expected an open fid inode->i_private=%p\n",
+ inode->i_private);
+ return -EINVAL;
+ }

folio_wait_fscache(folio);
folio_start_writeback(folio);

- p9_client_write(v9inode->writeback_fid, start, &from, &err);
+ p9_client_write(writeback_fid, start, &from, &err);

if (err == 0 &&
fscache_cookie_enabled(cookie) &&
@@ -192,6 +192,8 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio)
}

folio_end_writeback(folio);
+ p9_fid_put(writeback_fid);
+
return err;
}

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 64158664dcb4..b9873e81215e 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -43,7 +43,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
int err;
struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
- struct p9_fid *fid, *writeback_fid;
+ struct p9_fid *fid;
int omode;

p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
@@ -60,7 +60,19 @@ int v9fs_file_open(struct inode *inode, struct file *file)
if (IS_ERR(fid))
return PTR_ERR(fid);

- err = p9_client_open(fid, omode);
+ if ((v9ses->cache >= CACHE_WRITEBACK) && (omode & P9_OWRITE)) {
+ int writeback_omode = (omode & !P9_OWRITE) | P9_ORDWR;
+
+ p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, try opening O_RDWR\n");
+ err = p9_client_open(fid, writeback_omode);
+ if (err < 0) {
+ p9_debug(P9_DEBUG_CACHE, "could not open O_RDWR, disabling caches\n");
+ err = p9_client_open(fid, omode);
+ fid->mode |= P9L_DIRECT;
+ }
+ } else {
+ err = p9_client_open(fid, omode);
+ }
if (err < 0) {
p9_fid_put(fid);
return err;
@@ -72,36 +84,14 @@ int v9fs_file_open(struct inode *inode, struct file *file)
file->private_data = fid;
}

- mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
- ((file->f_flags & O_ACCMODE) != O_RDONLY)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during open time instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- writeback_fid = v9fs_writeback_fid(file_dentry(file));
- if (IS_ERR(writeback_fid)) {
- err = PTR_ERR(writeback_fid);
- mutex_unlock(&v9inode->v_mutex);
- goto out_error;
- }
- v9inode->writeback_fid = (void *) writeback_fid;
- }
- mutex_unlock(&v9inode->v_mutex);
#ifdef CONFIG_9P_FSCACHE
if (v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
#endif
+ v9fs_fid_add_modes(fid, v9ses->flags, v9ses->cache, file->f_flags);
v9fs_open_fid_add(inode, &fid);
return 0;
-out_error:
- p9_fid_put(file->private_data);
- file->private_data = NULL;
- return err;
}

/**
@@ -367,14 +357,14 @@ v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct p9_fid *fid = iocb->ki_filp->private_data;
int ret, err = 0;
- struct inode *inode = file_inode(iocb->ki_filp);
- struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);

- p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n",
- iov_iter_count(to), iocb->ki_pos);
+ p9_debug(P9_DEBUG_VFS, "fid %d count %zu offset %lld\n",
+ fid->fid, iov_iter_count(to), iocb->ki_pos);

- if (v9ses->cache > CACHE_MMAP)
+ if (!(fid->mode & P9L_DIRECT)) {
+ p9_debug(P9_DEBUG_VFS, "(cached)\n");
return generic_file_read_iter(iocb, to);
+ }

if (iocb->ki_filp->f_flags & O_NONBLOCK)
ret = p9_client_read_once(fid, iocb->ki_pos, to, &err);
@@ -397,14 +387,17 @@ static ssize_t
v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
+ struct p9_fid *fid = file->private_data;
ssize_t retval;
loff_t origin;
int err = 0;
- struct inode *inode = file_inode(iocb->ki_filp);
- struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);

- if (v9ses->cache >= CACHE_WRITEBACK)
+ p9_debug(P9_DEBUG_VFS, "fid %d\n", fid->fid);
+
+ if (!(fid->mode & (P9L_DIRECT | P9L_NOWRITECACHE))) {
+ p9_debug(P9_DEBUG_CACHE, "(cached)\n");
return generic_file_write_iter(iocb, from);
+ }

retval = generic_write_checks(iocb, from);
if (retval <= 0)
@@ -488,36 +481,18 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
{
int retval;
struct inode *inode = file_inode(filp);
- struct v9fs_inode *v9inode = V9FS_I(inode);
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
- struct p9_fid *fid;
+
+ p9_debug(P9_DEBUG_MMAP, "filp :%p\n", filp);

if (v9ses->cache < CACHE_MMAP) {
+ p9_debug(P9_DEBUG_CACHE, "(no mmap mode)");
+ if (vma->vm_flags & VM_MAYSHARE)
+ return -ENODEV;
invalidate_inode_pages2(filp->f_mapping);
return generic_file_readonly_mmap(filp, vma);
}

- mutex_lock(&v9inode->v_mutex);
- if (!v9inode->writeback_fid &&
- (vma->vm_flags & VM_SHARED) &&
- (vma->vm_flags & VM_WRITE)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during mmap instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- fid = v9fs_writeback_fid(file_dentry(filp));
- if (IS_ERR(fid)) {
- retval = PTR_ERR(fid);
- mutex_unlock(&v9inode->v_mutex);
- return retval;
- }
- v9inode->writeback_fid = (void *) fid;
- }
- mutex_unlock(&v9inode->v_mutex);
-
retval = generic_file_mmap(filp, vma);
if (!retval)
vma->vm_ops = &v9fs_mmap_file_vm_ops;
@@ -551,7 +526,6 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
/* Update file times before taking page lock */
file_update_time(filp);

- BUG_ON(!v9inode->writeback_fid);
if (folio_lock_killable(folio) < 0)
return VM_FAULT_RETRY;
if (folio_mapping(folio) != inode->i_mapping)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index de99f9275a94..c61709d98934 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -230,7 +230,6 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
v9inode = alloc_inode_sb(sb, v9fs_inode_cache, GFP_KERNEL);
if (!v9inode)
return NULL;
- v9inode->writeback_fid = NULL;
v9inode->cache_validity = 0;
mutex_init(&v9inode->v_mutex);
return &v9inode->netfs.inode;
@@ -383,9 +382,6 @@ void v9fs_evict_inode(struct inode *inode)
filemap_fdatawrite(&inode->i_data);

fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false);
- /* clunk the fid stashed in writeback_fid */
- p9_fid_put(v9inode->writeback_fid);
- v9inode->writeback_fid = NULL;
}

static int v9fs_test_inode(struct inode *inode, void *data)
@@ -796,9 +792,10 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
u32 perm;
struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
- struct p9_fid *fid, *inode_fid;
+ struct p9_fid *fid;
struct dentry *res = NULL;
struct inode *inode;
+ int p9_omode;

if (d_in_lookup(dentry)) {
res = v9fs_vfs_lookup(dir, dentry, 0);
@@ -817,9 +814,14 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,

v9ses = v9fs_inode2v9ses(dir);
perm = unixmode2p9mode(v9ses, mode);
- fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
- v9fs_uflags2omode(flags,
- v9fs_proto_dotu(v9ses)));
+ p9_omode = v9fs_uflags2omode(flags, v9fs_proto_dotu(v9ses));
+
+ if ((v9ses->cache >= CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) {
+ p9_omode = (p9_omode & !P9_OWRITE) | P9_ORDWR;
+ p9_debug(P9_DEBUG_CACHE,
+ "write-only file with writeback enabled, creating w/ O_RDWR\n");
+ }
+ fid = v9fs_create(v9ses, dir, dentry, NULL, perm, p9_omode);
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
goto error;
@@ -828,25 +830,6 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
v9fs_invalidate_inode_attr(dir);
inode = d_inode(dentry);
v9inode = V9FS_I(inode);
- mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
- ((flags & O_ACCMODE) != O_RDONLY)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during open time instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- inode_fid = v9fs_writeback_fid(dentry);
- if (IS_ERR(inode_fid)) {
- err = PTR_ERR(inode_fid);
- mutex_unlock(&v9inode->v_mutex);
- goto error;
- }
- v9inode->writeback_fid = (void *) inode_fid;
- }
- mutex_unlock(&v9inode->v_mutex);
err = finish_open(file, dentry, generic_file_open);
if (err)
goto error;
@@ -855,6 +838,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
+
+ v9fs_fid_add_modes(fid, v9ses->flags, v9ses->cache, file->f_flags);
v9fs_open_fid_add(inode, &fid);

file->f_mode |= FMODE_CREATED;
@@ -1024,7 +1009,7 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(&init_user_ns, d_inode(dentry), stat);
+ generic_fillattr(&init_user_ns, inode, stat);
return 0;
} else if (v9ses->cache >= CACHE_WRITEBACK) {
if (S_ISREG(inode->i_mode))
@@ -1119,10 +1104,10 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
+ truncate_pagecache(inode, iattr->ia_size);
+
if (v9ses->cache == CACHE_FSCACHE)
fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
- else
- invalidate_mapping_pages(&inode->i_data, 0, -1);
}

v9fs_invalidate_inode_attr(inode);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 708cd728cc70..36e7ccdf11df 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -232,12 +232,13 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
int err = 0;
kgid_t gid;
umode_t mode;
+ int p9_omode = v9fs_open_to_dotl_flags(flags);
const unsigned char *name = NULL;
struct p9_qid qid;
struct inode *inode;
struct p9_fid *fid = NULL;
struct v9fs_inode *v9inode;
- struct p9_fid *dfid = NULL, *ofid = NULL, *inode_fid = NULL;
+ struct p9_fid *dfid = NULL, *ofid = NULL;
struct v9fs_session_info *v9ses;
struct posix_acl *pacl = NULL, *dacl = NULL;
struct dentry *res = NULL;
@@ -282,14 +283,19 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
/* Update mode based on ACL value */
err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
if (err) {
- p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n",
+ p9_debug(P9_DEBUG_VFS, "Failed to get acl values in create %d\n",
err);
goto out;
}
- err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
- mode, gid, &qid);
+
+ if ((v9ses->cache >= CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) {
+ p9_omode = (p9_omode & !P9_OWRITE) | P9_ORDWR;
+ p9_debug(P9_DEBUG_CACHE,
+ "write-only file with writeback enabled, creating w/ O_RDWR\n");
+ }
+ err = p9_client_create_dotl(ofid, name, p9_omode, mode, gid, &qid);
if (err < 0) {
- p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n",
+ p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in create %d\n",
err);
goto out;
}
@@ -315,25 +321,6 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
d_instantiate(dentry, inode);

v9inode = V9FS_I(inode);
- mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache) && !v9inode->writeback_fid &&
- ((flags & O_ACCMODE) != O_RDONLY)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during open time instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- inode_fid = v9fs_writeback_fid(dentry);
- if (IS_ERR(inode_fid)) {
- err = PTR_ERR(inode_fid);
- mutex_unlock(&v9inode->v_mutex);
- goto out;
- }
- v9inode->writeback_fid = (void *) inode_fid;
- }
- mutex_unlock(&v9inode->v_mutex);
/* Since we are opening a file, assign the open fid to the file */
err = finish_open(file, dentry, generic_file_open);
if (err)
@@ -344,6 +331,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
#endif
+ v9fs_fid_add_modes(ofid, v9ses->flags, v9ses->cache, flags);
v9fs_open_fid_add(inode, &ofid);
file->f_mode |= FMODE_CREATED;
out:
@@ -464,9 +452,9 @@ v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(&init_user_ns, d_inode(dentry), stat);
+ generic_fillattr(&init_user_ns, inode, stat);
return 0;
- } else if (v9ses->cache >= CACHE_WRITEBACK) {
+ } else if (v9ses->cache) {
if (S_ISREG(inode->i_mode))
filemap_write_and_wait(inode->i_mapping);
}
@@ -604,10 +592,10 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size !=
i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
+ truncate_pagecache(inode, iattr->ia_size);
+
if (v9ses->cache == CACHE_FSCACHE)
fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
- else
- invalidate_mapping_pages(&inode->i_data, 0, -1);
}

v9fs_invalidate_inode_attr(inode);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 5fc6a945bfff..797f717e1a91 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -292,23 +292,26 @@ static int v9fs_write_inode(struct inode *inode,
{
int ret;
struct p9_wstat wstat;
+ struct p9_fid *fid = v9fs_fid_find_inode(inode, false, INVALID_UID, true);
struct v9fs_inode *v9inode;
+
/*
* send an fsync request to server irrespective of
* wbc->sync_mode.
*/
- p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
+ p9_debug(P9_DEBUG_VFS, "%s: inode %p writeback_fid: %p\n", __func__, inode, fid);
v9inode = V9FS_I(inode);
- if (!v9inode->writeback_fid)
- return 0;
+ if (!fid)
+ return -EINVAL;
v9fs_blank_wstat(&wstat);

- ret = p9_client_wstat(v9inode->writeback_fid, &wstat);
+ ret = p9_client_wstat(fid, &wstat);
if (ret < 0) {
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return ret;
}
fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode));
+ p9_fid_put(fid);
return 0;
}

@@ -316,6 +319,7 @@ static int v9fs_write_inode_dotl(struct inode *inode,
struct writeback_control *wbc)
{
int ret;
+ struct p9_fid *fid = v9fs_fid_find_inode(inode, FMODE_WRITE, INVALID_UID, 1);
struct v9fs_inode *v9inode;
/*
* send an fsync request to server irrespective of
@@ -323,16 +327,17 @@ static int v9fs_write_inode_dotl(struct inode *inode,
*/
v9inode = V9FS_I(inode);
p9_debug(P9_DEBUG_VFS, "%s: inode %p, writeback_fid %p\n",
- __func__, inode, v9inode->writeback_fid);
- if (!v9inode->writeback_fid)
- return 0;
+ __func__, inode, fid);
+ if (!fid)
+ return -EINVAL;

- ret = p9_client_fsync(v9inode->writeback_fid, 0);
+ ret = p9_client_fsync(fid, 0);
if (ret < 0) {
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return ret;
}
fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode));
+ p9_fid_put(fid);
return 0;
}

--
2.37.2

2022-12-19 00:25:23

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v2 08/10] Add new mount modes

Add some additional mount modes for cache management including
specifying directio as a mount option and an option for ignore
qid.version for determining whether or not a file is cacheable.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/v9fs.c | 16 ++++++++++++++--
fs/9p/v9fs.h | 5 ++++-
2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index f8e952c013f9..43d3806150a9 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -38,7 +38,7 @@ enum {
/* String options */
Opt_uname, Opt_remotename, Opt_cache, Opt_cachetag,
/* Options that take no arguments */
- Opt_nodevmap, Opt_noxattr,
+ Opt_nodevmap, Opt_noxattr, Opt_directio, Opt_ignoreqv,
/* Access options */
Opt_access, Opt_posixacl,
/* Lock timeout option */
@@ -56,6 +56,8 @@ static const match_table_t tokens = {
{Opt_remotename, "aname=%s"},
{Opt_nodevmap, "nodevmap"},
{Opt_noxattr, "noxattr"},
+ {Opt_directio, "directio"},
+ {Opt_ignoreqv, "ignoreqv"},
{Opt_cache, "cache=%s"},
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
@@ -125,7 +127,7 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root)
if (v9ses->nodev)
seq_puts(m, ",nodevmap");
if (v9ses->cache)
- seq_printf(m, ",%s", v9fs_cache_modes[v9ses->cache]);
+ seq_printf(m, ",cache=%s", v9fs_cache_modes[v9ses->cache]);
#ifdef CONFIG_9P_FSCACHE
if (v9ses->cachetag && v9ses->cache == CACHE_FSCACHE)
seq_printf(m, ",cachetag=%s", v9ses->cachetag);
@@ -147,6 +149,10 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root)
break;
}

+ if (v9ses->flags & V9FS_IGNORE_QV)
+ seq_puts(m, ",ignoreqv");
+ if (v9ses->flags & V9FS_DIRECT_IO)
+ seq_puts(m, ",directio");
if (v9ses->flags & V9FS_POSIX_ACL)
seq_puts(m, ",posixacl");

@@ -276,6 +282,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_noxattr:
v9ses->flags |= V9FS_NO_XATTR;
break;
+ case Opt_directio:
+ v9ses->flags |= V9FS_DIRECT_IO;
+ break;
+ case Opt_ignoreqv:
+ v9ses->flags |= V9FS_IGNORE_QV;
+ break;
case Opt_cachetag:
#ifdef CONFIG_9P_FSCACHE
kfree(v9ses->cachetag);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index a08cf6618c86..c80c318ff31c 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -37,7 +37,10 @@ enum p9_session_flags {
V9FS_ACCESS_USER = 0x08,
V9FS_ACCESS_CLIENT = 0x10,
V9FS_POSIX_ACL = 0x20,
- V9FS_NO_XATTR = 0x40
+ V9FS_NO_XATTR = 0x40,
+ V9FS_IGNORE_QV = 0x80,
+ V9FS_DIRECT_IO = 0x100,
+ V9FS_SYNC = 0x200
};

/* possible values of ->cache */
--
2.37.2

2022-12-19 00:25:43

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v2 06/10] fix bug in client create for .L

We are supposed to set fid->mode to reflect the flags
that were used to open the file. We were actually setting
it to the creation mode which is the default perms of the
file not the flags the file was opened with.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
net/9p/client.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/9p/client.c b/net/9p/client.c
index f982d36b55b8..4ae41f8f7286 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1293,7 +1293,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags,
qid->type, qid->path, qid->version, iounit);

memmove(&ofid->qid, qid, sizeof(struct p9_qid));
- ofid->mode = mode;
+ ofid->mode = flags;
ofid->iounit = iounit;

free_and_error:
--
2.37.2

2022-12-19 00:26:54

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v2 07/10] Add additional debug flags and open modes

Add some additional debug flags to assist with debugging
cache changes. Also add some additional open modes so we
can track cache state in fids more directly.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
include/net/9p/9p.h | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 429adf6be29c..61c20b89becd 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -42,6 +42,8 @@ enum p9_debug_flags {
P9_DEBUG_PKT = (1<<10),
P9_DEBUG_FSC = (1<<11),
P9_DEBUG_VPKT = (1<<12),
+ P9_DEBUG_CACHE = (1<<13),
+ P9_DEBUG_MMAP = (1<<14),
};

#ifdef CONFIG_NET_9P_DEBUG
@@ -213,6 +215,9 @@ enum p9_open_mode_t {
P9_ORCLOSE = 0x40,
P9_OAPPEND = 0x80,
P9_OEXCL = 0x1000,
+ P9L_DIRECT = 0x2000, /* cache disabled */
+ P9L_NOWRITECACHE = 0x4000, /* no write caching */
+ P9L_LOOSE = 0x8000, /* loose cache */
};

/**
--
2.37.2

2023-01-23 17:26:45

by Christian Schoenebeck

[permalink] [raw]
Subject: Re: [PATCH v2 00/10] Performance fixes for 9p filesystem

On Monday, December 19, 2022 12:22:07 AM CET Eric Van Hensbergen wrote:
> This is the second version of a patch series which adds a number
> of features to improve read/write performance in the 9p filesystem.
> Mostly it focuses on fixing caching to help utilize the recently
> increased MSIZE limits and also fixes some problematic behavior
> within the writeback code.
>
> Altogether, these show roughly 10x speed increases on simple
> file transfers. Future patch sets will improve cache consistency
> and directory caching.
>
> These patches are also available on github:
> https://github.com/v9fs/linux/tree/ericvh/9p-next
>
> Tested against qemu, cpu, and diod with fsx, dbench, and some
> simple benchmarks.
>
> Signed-off-by: Eric Van Hensbergen <[email protected]>

Hi Eric,

what's your plan on this series? I just had a look at your github repo and saw
there is a lot of stuff marked as WIP.

Best regards,
Christian Schoenebeck



2023-01-24 02:39:12

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v3 00/11] Performance fixes for 9p filesystem

This is the third version of a patch series which adds a number
of features to improve read/write performance in the 9p filesystem.
Mostly it focuses on fixing caching to help utilize the recently
increased MSIZE limits and also fixes some problematic behavior
within the writeback code.

All together, these show roughly 10x speed increases on simple
file transfers. Future patch sets will improve cache consistency
and directory caching.

These patches are also available on github:
https://github.com/v9fs/linux/tree/ericvh/for-next
and on kernel.org:
https://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs.git

Tested against qemu, cpu, and diod with fsx, dbench, and some
simple benchmarks.

Eric Van Hensbergen (11):
Adjust maximum MSIZE to account for p9 header
Expand setup of writeback cache to all levels
Consolidate file operations and add readahead and writeback
Remove unnecessary superblock flags
allow disable of xattr support on mount
fix bug in client create for .L
Add additional debug flags and open modes
Add new mount modes
fix error reporting in v9fs_dir_release
writeback mode fixes
Fix revalidate

Documentation/filesystems/9p.rst | 26 +++--
fs/9p/fid.c | 52 ++++-----
fs/9p/fid.h | 33 +++++-
fs/9p/v9fs.c | 49 +++++---
fs/9p/v9fs.h | 9 +-
fs/9p/v9fs_vfs.h | 4 -
fs/9p/vfs_addr.c | 24 ++--
fs/9p/vfs_dentry.c | 3 +-
fs/9p/vfs_dir.c | 16 ++-
fs/9p/vfs_file.c | 194 +++++++------------------------
fs/9p/vfs_inode.c | 71 ++++-------
fs/9p/vfs_inode_dotl.c | 62 +++++-----
fs/9p/vfs_super.c | 28 +++--
include/net/9p/9p.h | 5 +
net/9p/client.c | 8 +-
15 files changed, 256 insertions(+), 328 deletions(-)

--
2.37.2


2023-01-24 02:39:14

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v3 01/11] Adjust maximum MSIZE to account for p9 header

Add maximum p9 header size to MSIZE to make sure we can
have page aligned data.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
net/9p/client.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/9p/client.c b/net/9p/client.c
index 622ec6a586ee..6c2a768a6ab1 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -28,7 +28,11 @@
#define CREATE_TRACE_POINTS
#include <trace/events/9p.h>

-#define DEFAULT_MSIZE (128 * 1024)
+/* DEFAULT MSIZE = 32 pages worth of payload + P9_HDRSZ +
+ * room for write (16 extra) or read (11 extra) operands.
+ */
+
+#define DEFAULT_MSIZE ((128 * 1024) + P9_IOHDRSZ)

/* Client Option Parsing (code inspired by NFS code)
* - a little lazy - parse all client options
--
2.37.2


2023-01-24 02:39:26

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v3 04/11] Remove unnecessary superblock flags

These flags just add unnecessary extra operations.
When 9p is run without cache, it inherently implements
these options so we don't need them in the superblock
(which ends up sending extraneous fsyncs, etc.). User
can still request these options on mount, but we don't
need to set them as default.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/vfs_super.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 266c4693e20c..65d96fa94ba2 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -84,9 +84,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
sb->s_bdi->io_pages = v9ses->maxdata >> PAGE_SHIFT;
}

- sb->s_flags |= SB_ACTIVE | SB_DIRSYNC;
- if (!v9ses->cache)
- sb->s_flags |= SB_SYNCHRONOUS;
+ sb->s_flags |= SB_ACTIVE;

#ifdef CONFIG_9P_FS_POSIX_ACL
if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL)
--
2.37.2


2023-01-24 02:39:28

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v3 02/11] Expand setup of writeback cache to all levels

If cache is enabled, make sure we are putting the right things
in place (mainly impacts mmap). This also sets us up for more
cache levels.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/v9fs.c | 2 +-
fs/9p/vfs_addr.c | 2 --
fs/9p/vfs_file.c | 7 ++++---
fs/9p/vfs_inode.c | 3 +--
fs/9p/vfs_inode_dotl.c | 7 ++++---
5 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 3a9c4517265f..61a51b90600d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -468,7 +468,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,

#ifdef CONFIG_9P_FSCACHE
/* register the session for caching */
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ if (v9ses->cache == CACHE_FSCACHE) {
rc = v9fs_cache_session_get_cookie(v9ses, dev_name);
if (rc < 0)
goto err_clnt;
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 97599edbc300..6f46d7e4c750 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -279,8 +279,6 @@ static int v9fs_write_begin(struct file *filp, struct address_space *mapping,

p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);

- BUG_ON(!v9inode->writeback_fid);
-
/* Prefetch area to be written into the cache if we're caching this
* file. We need to do this before we get a lock on the page in case
* there's more than one writer competing for the same cache block.
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b740017634ef..3b6458846a0b 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -73,8 +73,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
}

mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
- !v9inode->writeback_fid &&
+ if ((v9ses->cache) && !v9inode->writeback_fid &&
((file->f_flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -92,9 +91,11 @@ int v9fs_file_open(struct inode *inode, struct file *file)
v9inode->writeback_fid = (void *) writeback_fid;
}
mutex_unlock(&v9inode->v_mutex);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
+#endif
v9fs_open_fid_add(inode, &fid);
return 0;
out_error:
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 27a04a226d97..33e521c60e2c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -843,8 +843,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
inode = d_inode(dentry);
v9inode = V9FS_I(inode);
mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
- !v9inode->writeback_fid &&
+ if ((v9ses->cache) && !v9inode->writeback_fid &&
((flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index f806b3f11649..bff37a312e64 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -316,8 +316,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,

v9inode = V9FS_I(inode);
mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
- !v9inode->writeback_fid &&
+ if ((v9ses->cache) && !v9inode->writeback_fid &&
((flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -340,9 +339,11 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
if (err)
goto out;
file->private_data = ofid;
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
+#endif
v9fs_open_fid_add(inode, &ofid);
file->f_mode |= FMODE_CREATED;
out:
--
2.37.2


2023-01-24 02:39:32

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v3 05/11] allow disable of xattr support on mount

xattr creates a lot of additional messages for 9p in
the current implementation. This allows users to
conditionalize xattr support on 9p mount if they
are on a connection with bad latency. Using this
flag is also useful when debugging other aspects
of 9p as it reduces the noise in the trace files.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
Documentation/filesystems/9p.rst | 2 ++
fs/9p/v9fs.c | 9 ++++++++-
fs/9p/v9fs.h | 3 ++-
fs/9p/vfs_super.c | 3 ++-
4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst
index 7b5964bc8865..0e800b8f73cc 100644
--- a/Documentation/filesystems/9p.rst
+++ b/Documentation/filesystems/9p.rst
@@ -137,6 +137,8 @@ Options
This can be used to share devices/named pipes/sockets between
hosts. This functionality will be expanded in later versions.

+ noxattr do not offer xattr functions on this mount.
+
access there are four access modes.
user
if a user tries to access a file on v9fs
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index a46bf9121f11..f8e952c013f9 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -38,7 +38,7 @@ enum {
/* String options */
Opt_uname, Opt_remotename, Opt_cache, Opt_cachetag,
/* Options that take no arguments */
- Opt_nodevmap,
+ Opt_nodevmap, Opt_noxattr,
/* Access options */
Opt_access, Opt_posixacl,
/* Lock timeout option */
@@ -55,6 +55,7 @@ static const match_table_t tokens = {
{Opt_uname, "uname=%s"},
{Opt_remotename, "aname=%s"},
{Opt_nodevmap, "nodevmap"},
+ {Opt_noxattr, "noxattr"},
{Opt_cache, "cache=%s"},
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
@@ -149,6 +150,9 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root)
if (v9ses->flags & V9FS_POSIX_ACL)
seq_puts(m, ",posixacl");

+ if (v9ses->flags & V9FS_NO_XATTR)
+ seq_puts(m, ",noxattr");
+
return p9_show_client_options(m, v9ses->clnt);
}

@@ -269,6 +273,9 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_nodevmap:
v9ses->nodev = 1;
break;
+ case Opt_noxattr:
+ v9ses->flags |= V9FS_NO_XATTR;
+ break;
case Opt_cachetag:
#ifdef CONFIG_9P_FSCACHE
kfree(v9ses->cachetag);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 5813967ecdf0..a08cf6618c86 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -36,7 +36,8 @@ enum p9_session_flags {
V9FS_ACCESS_SINGLE = 0x04,
V9FS_ACCESS_USER = 0x08,
V9FS_ACCESS_CLIENT = 0x10,
- V9FS_POSIX_ACL = 0x20
+ V9FS_POSIX_ACL = 0x20,
+ V9FS_NO_XATTR = 0x40
};

/* possible values of ->cache */
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 65d96fa94ba2..5fc6a945bfff 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -64,7 +64,8 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
sb->s_magic = V9FS_MAGIC;
if (v9fs_proto_dotl(v9ses)) {
sb->s_op = &v9fs_super_ops_dotl;
- sb->s_xattr = v9fs_xattr_handlers;
+ if (!(v9ses->flags & V9FS_NO_XATTR))
+ sb->s_xattr = v9fs_xattr_handlers;
} else {
sb->s_op = &v9fs_super_ops;
sb->s_time_max = U32_MAX;
--
2.37.2


2023-01-24 02:39:35

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v3 03/11] Consolidate file operations and add readahead and writeback

We had 3 different sets of file operations across 2 different protocol
variants differentiated by cache which really only changed 3
functions. But the real problem is that certain file modes, mount
options, and other factors weren't being considered when we
decided whether or not to use caches.

This consolidates all the operations and switches
to conditionals within a common set to decide whether or not
to do different aspects of caching.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/v9fs.c | 30 ++++------
fs/9p/v9fs.h | 2 +
fs/9p/v9fs_vfs.h | 4 --
fs/9p/vfs_dir.c | 9 +++
fs/9p/vfs_file.c | 123 +++++++----------------------------------
fs/9p/vfs_inode.c | 31 ++++-------
fs/9p/vfs_inode_dotl.c | 19 ++++++-
7 files changed, 71 insertions(+), 147 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 61a51b90600d..a46bf9121f11 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -39,8 +39,6 @@ enum {
Opt_uname, Opt_remotename, Opt_cache, Opt_cachetag,
/* Options that take no arguments */
Opt_nodevmap,
- /* Cache options */
- Opt_cache_loose, Opt_fscache, Opt_mmap,
/* Access options */
Opt_access, Opt_posixacl,
/* Lock timeout option */
@@ -58,9 +56,6 @@ static const match_table_t tokens = {
{Opt_remotename, "aname=%s"},
{Opt_nodevmap, "nodevmap"},
{Opt_cache, "cache=%s"},
- {Opt_cache_loose, "loose"},
- {Opt_fscache, "fscache"},
- {Opt_mmap, "mmap"},
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
{Opt_posixacl, "posixacl"},
@@ -69,10 +64,12 @@ static const match_table_t tokens = {
};

static const char *const v9fs_cache_modes[nr__p9_cache_modes] = {
- [CACHE_NONE] = "none",
- [CACHE_MMAP] = "mmap",
- [CACHE_LOOSE] = "loose",
- [CACHE_FSCACHE] = "fscache",
+ [CACHE_NONE] = "none",
+ [CACHE_READAHEAD] = "readahead",
+ [CACHE_WRITEBACK] = "writeback",
+ [CACHE_MMAP] = "mmap",
+ [CACHE_LOOSE] = "loose",
+ [CACHE_FSCACHE] = "fscache",
};

/* Interpret mount options for cache mode */
@@ -89,6 +86,12 @@ static int get_cache_mode(char *s)
} else if (!strcmp(s, "mmap")) {
version = CACHE_MMAP;
p9_debug(P9_DEBUG_9P, "Cache mode: mmap\n");
+ } else if (!strcmp(s, "writeback")) {
+ version = CACHE_WRITEBACK;
+ p9_debug(P9_DEBUG_9P, "Cache mode: writeback\n");
+ } else if (!strcmp(s, "readahead")) {
+ version = CACHE_READAHEAD;
+ p9_debug(P9_DEBUG_9P, "Cache mode: readahead\n");
} else if (!strcmp(s, "none")) {
version = CACHE_NONE;
p9_debug(P9_DEBUG_9P, "Cache mode: none\n");
@@ -266,15 +269,6 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_nodevmap:
v9ses->nodev = 1;
break;
- case Opt_cache_loose:
- v9ses->cache = CACHE_LOOSE;
- break;
- case Opt_fscache:
- v9ses->cache = CACHE_FSCACHE;
- break;
- case Opt_mmap:
- v9ses->cache = CACHE_MMAP;
- break;
case Opt_cachetag:
#ifdef CONFIG_9P_FSCACHE
kfree(v9ses->cachetag);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 6acabc2e7dc9..5813967ecdf0 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -51,6 +51,8 @@ enum p9_session_flags {
enum p9_cache_modes {
CACHE_NONE,
CACHE_MMAP,
+ CACHE_READAHEAD,
+ CACHE_WRITEBACK,
CACHE_LOOSE,
CACHE_FSCACHE,
nr__p9_cache_modes
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index bc417da7e9c1..cce87c9bdd8b 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -36,10 +36,6 @@ extern const struct file_operations v9fs_dir_operations;
extern const struct file_operations v9fs_dir_operations_dotl;
extern const struct dentry_operations v9fs_dentry_operations;
extern const struct dentry_operations v9fs_cached_dentry_operations;
-extern const struct file_operations v9fs_cached_file_operations;
-extern const struct file_operations v9fs_cached_file_operations_dotl;
-extern const struct file_operations v9fs_mmap_file_operations;
-extern const struct file_operations v9fs_mmap_file_operations_dotl;
extern struct kmem_cache *v9fs_inode_cache;

struct inode *v9fs_alloc_inode(struct super_block *sb);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 59b0e8948f78..ec831c27a58e 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -214,6 +214,15 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
inode, filp, fid ? fid->fid : -1);
if (fid) {
+ if ((fid->qid.type == P9_QTFILE) && (filp->f_mode & FMODE_WRITE)) {
+ int retval = file_write_and_wait_range(filp, 0, -1);
+
+ if (retval != 0) {
+ p9_debug(P9_DEBUG_ERROR,
+ "trying to flush filp %p failed with error code %d\n",
+ filp, retval);
+ }
+ }
spin_lock(&inode->i_lock);
hlist_del(&fid->ilist);
spin_unlock(&inode->i_lock);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3b6458846a0b..64158664dcb4 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -73,7 +73,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
}

mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache) && !v9inode->writeback_fid &&
+ if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
((file->f_flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -367,10 +367,15 @@ v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct p9_fid *fid = iocb->ki_filp->private_data;
int ret, err = 0;
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);

p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n",
iov_iter_count(to), iocb->ki_pos);

+ if (v9ses->cache > CACHE_MMAP)
+ return generic_file_read_iter(iocb, to);
+
if (iocb->ki_filp->f_flags & O_NONBLOCK)
ret = p9_client_read_once(fid, iocb->ki_pos, to, &err);
else
@@ -395,6 +400,11 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t retval;
loff_t origin;
int err = 0;
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+
+ if (v9ses->cache >= CACHE_WRITEBACK)
+ return generic_file_write_iter(iocb, from);

retval = generic_write_checks(iocb, from);
if (retval <= 0)
@@ -477,25 +487,16 @@ static int
v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
{
int retval;
-
-
- retval = generic_file_mmap(filp, vma);
- if (!retval)
- vma->vm_ops = &v9fs_file_vm_ops;
-
- return retval;
-}
-
-static int
-v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
-{
- int retval;
- struct inode *inode;
- struct v9fs_inode *v9inode;
+ struct inode *inode = file_inode(filp);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
struct p9_fid *fid;

- inode = file_inode(filp);
- v9inode = V9FS_I(inode);
+ if (v9ses->cache < CACHE_MMAP) {
+ invalidate_inode_pages2(filp->f_mapping);
+ return generic_file_readonly_mmap(filp, vma);
+ }
+
mutex_lock(&v9inode->v_mutex);
if (!v9inode->writeback_fid &&
(vma->vm_flags & VM_SHARED) &&
@@ -563,35 +564,6 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}

-/**
- * v9fs_mmap_file_read_iter - read from a file
- * @iocb: The operation parameters
- * @to: The buffer to read into
- *
- */
-static ssize_t
-v9fs_mmap_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
-{
- /* TODO: Check if there are dirty pages */
- return v9fs_file_read_iter(iocb, to);
-}
-
-/**
- * v9fs_mmap_file_write_iter - write to a file
- * @iocb: The operation parameters
- * @from: The data to write
- *
- */
-static ssize_t
-v9fs_mmap_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
- /*
- * TODO: invalidate mmaps on filp's inode between
- * offset and offset+count
- */
- return v9fs_file_write_iter(iocb, from);
-}
-
static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
{
struct inode *inode;
@@ -628,34 +600,6 @@ static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
.page_mkwrite = v9fs_vm_page_mkwrite,
};

-
-const struct file_operations v9fs_cached_file_operations = {
- .llseek = generic_file_llseek,
- .read_iter = generic_file_read_iter,
- .write_iter = generic_file_write_iter,
- .open = v9fs_file_open,
- .release = v9fs_dir_release,
- .lock = v9fs_file_lock,
- .mmap = v9fs_file_mmap,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fsync = v9fs_file_fsync,
-};
-
-const struct file_operations v9fs_cached_file_operations_dotl = {
- .llseek = generic_file_llseek,
- .read_iter = generic_file_read_iter,
- .write_iter = generic_file_write_iter,
- .open = v9fs_file_open,
- .release = v9fs_dir_release,
- .lock = v9fs_file_lock_dotl,
- .flock = v9fs_file_flock_dotl,
- .mmap = v9fs_file_mmap,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fsync = v9fs_file_fsync_dotl,
-};
-
const struct file_operations v9fs_file_operations = {
.llseek = generic_file_llseek,
.read_iter = v9fs_file_read_iter,
@@ -677,34 +621,7 @@ const struct file_operations v9fs_file_operations_dotl = {
.release = v9fs_dir_release,
.lock = v9fs_file_lock_dotl,
.flock = v9fs_file_flock_dotl,
- .mmap = generic_file_readonly_mmap,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fsync = v9fs_file_fsync_dotl,
-};
-
-const struct file_operations v9fs_mmap_file_operations = {
- .llseek = generic_file_llseek,
- .read_iter = v9fs_mmap_file_read_iter,
- .write_iter = v9fs_mmap_file_write_iter,
- .open = v9fs_file_open,
- .release = v9fs_dir_release,
- .lock = v9fs_file_lock,
- .mmap = v9fs_mmap_file_mmap,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fsync = v9fs_file_fsync,
-};
-
-const struct file_operations v9fs_mmap_file_operations_dotl = {
- .llseek = generic_file_llseek,
- .read_iter = v9fs_mmap_file_read_iter,
- .write_iter = v9fs_mmap_file_write_iter,
- .open = v9fs_file_open,
- .release = v9fs_dir_release,
- .lock = v9fs_file_lock_dotl,
- .flock = v9fs_file_flock_dotl,
- .mmap = v9fs_mmap_file_mmap,
+ .mmap = v9fs_file_mmap,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync_dotl,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 33e521c60e2c..de99f9275a94 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -287,24 +287,10 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
case S_IFREG:
if (v9fs_proto_dotl(v9ses)) {
inode->i_op = &v9fs_file_inode_operations_dotl;
- if (v9ses->cache == CACHE_LOOSE ||
- v9ses->cache == CACHE_FSCACHE)
- inode->i_fop =
- &v9fs_cached_file_operations_dotl;
- else if (v9ses->cache == CACHE_MMAP)
- inode->i_fop = &v9fs_mmap_file_operations_dotl;
- else
- inode->i_fop = &v9fs_file_operations_dotl;
+ inode->i_fop = &v9fs_file_operations_dotl;
} else {
inode->i_op = &v9fs_file_inode_operations;
- if (v9ses->cache == CACHE_LOOSE ||
- v9ses->cache == CACHE_FSCACHE)
- inode->i_fop =
- &v9fs_cached_file_operations;
- else if (v9ses->cache == CACHE_MMAP)
- inode->i_fop = &v9fs_mmap_file_operations;
- else
- inode->i_fop = &v9fs_file_operations;
+ inode->i_fop = &v9fs_file_operations;
}

break;
@@ -843,7 +829,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
inode = d_inode(dentry);
v9inode = V9FS_I(inode);
mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache) && !v9inode->writeback_fid &&
+ if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
((flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -1030,6 +1016,7 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
+ struct inode *inode = d_inode(dentry);
struct v9fs_session_info *v9ses;
struct p9_fid *fid;
struct p9_wstat *st;
@@ -1039,6 +1026,9 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
generic_fillattr(&init_user_ns, d_inode(dentry), stat);
return 0;
+ } else if (v9ses->cache >= CACHE_WRITEBACK) {
+ if (S_ISREG(inode->i_mode))
+ filemap_write_and_wait(inode->i_mapping);
}
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
@@ -1127,9 +1117,12 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,
return retval;

if ((iattr->ia_valid & ATTR_SIZE) &&
- iattr->ia_size != i_size_read(inode)) {
+ iattr->ia_size != i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
- fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
+ if (v9ses->cache == CACHE_FSCACHE)
+ fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
+ else
+ invalidate_mapping_pages(&inode->i_data, 0, -1);
}

v9fs_invalidate_inode_attr(inode);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index bff37a312e64..8e104ba544ad 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -458,6 +458,7 @@ v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
struct dentry *dentry = path->dentry;
struct v9fs_session_info *v9ses;
struct p9_fid *fid;
+ struct inode *inode = d_inode(dentry);
struct p9_stat_dotl *st;

p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
@@ -465,6 +466,9 @@ v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
generic_fillattr(&init_user_ns, d_inode(dentry), stat);
return 0;
+ } else if (v9ses->cache >= CACHE_WRITEBACK) {
+ if (S_ISREG(inode->i_mode))
+ filemap_write_and_wait(inode->i_mapping);
}
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
@@ -540,12 +544,14 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
struct dentry *dentry, struct iattr *iattr)
{
int retval, use_dentry = 0;
+ struct inode *inode = d_inode(dentry);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL;
struct p9_iattr_dotl p9attr = {
.uid = INVALID_UID,
.gid = INVALID_GID,
};
- struct inode *inode = d_inode(dentry);

p9_debug(P9_DEBUG_VFS, "\n");

@@ -553,6 +559,8 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
if (retval)
return retval;

+ v9ses = v9fs_dentry2v9ses(dentry);
+
p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid);
if (iattr->ia_valid & ATTR_MODE)
p9attr.mode = iattr->ia_mode;
@@ -593,9 +601,14 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
return retval;
}

- if ((iattr->ia_valid & ATTR_SIZE) &&
- iattr->ia_size != i_size_read(inode))
+ if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size !=
+ i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
+ if (v9ses->cache == CACHE_FSCACHE)
+ fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
+ else
+ invalidate_mapping_pages(&inode->i_data, 0, -1);
+ }

v9fs_invalidate_inode_attr(inode);
setattr_copy(&init_user_ns, inode, iattr);
--
2.37.2


2023-01-24 02:39:37

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v3 07/11] Add additional debug flags and open modes

Add some additional debug flags to assist with debugging
cache changes. Also add some additional open modes so we
can track cache state in fids more directly.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
include/net/9p/9p.h | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 429adf6be29c..61c20b89becd 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -42,6 +42,8 @@ enum p9_debug_flags {
P9_DEBUG_PKT = (1<<10),
P9_DEBUG_FSC = (1<<11),
P9_DEBUG_VPKT = (1<<12),
+ P9_DEBUG_CACHE = (1<<13),
+ P9_DEBUG_MMAP = (1<<14),
};

#ifdef CONFIG_NET_9P_DEBUG
@@ -213,6 +215,9 @@ enum p9_open_mode_t {
P9_ORCLOSE = 0x40,
P9_OAPPEND = 0x80,
P9_OEXCL = 0x1000,
+ P9L_DIRECT = 0x2000, /* cache disabled */
+ P9L_NOWRITECACHE = 0x4000, /* no write caching */
+ P9L_LOOSE = 0x8000, /* loose cache */
};

/**
--
2.37.2


2023-01-24 02:39:41

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v3 06/11] fix bug in client create for .L

We are supposed to set fid->mode to reflect the flags
that were used to open the file. We were actually setting
it to the creation mode which is the default perms of the
file not the flags the file was opened with.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
net/9p/client.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/9p/client.c b/net/9p/client.c
index 6c2a768a6ab1..2adcb5e7b0e2 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1293,7 +1293,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags,
qid->type, qid->path, qid->version, iounit);

memmove(&ofid->qid, qid, sizeof(struct p9_qid));
- ofid->mode = mode;
+ ofid->mode = flags;
ofid->iounit = iounit;

free_and_error:
--
2.37.2


2023-01-24 02:39:56

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v3 09/11] fix error reporting in v9fs_dir_release

Checking the p9_fid_put value allows us to pass back errors
involved if we end up clunking the fid as part of dir_release.

This can help with more graceful response to errors in writeback
among other things.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/vfs_dir.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index ec831c27a58e..1fc07bb86e6f 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -197,7 +197,7 @@ static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx)


/**
- * v9fs_dir_release - close a directory
+ * v9fs_dir_release - called on a close of a file or directory
* @inode: inode of the directory
* @filp: file pointer to a directory
*
@@ -209,6 +209,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
struct p9_fid *fid;
__le32 version;
loff_t i_size;
+ int retval = 0;

fid = filp->private_data;
p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
@@ -226,7 +227,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
spin_lock(&inode->i_lock);
hlist_del(&fid->ilist);
spin_unlock(&inode->i_lock);
- p9_fid_put(fid);
+ retval = p9_fid_put(fid);
}

if ((filp->f_mode & FMODE_WRITE)) {
@@ -237,7 +238,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
} else {
fscache_unuse_cookie(v9fs_inode_cookie(v9inode), NULL, NULL);
}
- return 0;
+ return retval;
}

const struct file_operations v9fs_dir_operations = {
--
2.37.2


2023-01-24 02:40:03

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v3 08/11] Add new mount modes

Add some additional mount modes for cache management including
specifying directio as a mount option and an option for ignore
qid.version for determining whether or not a file is cacheable.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/v9fs.c | 16 ++++++++++++++--
fs/9p/v9fs.h | 5 ++++-
2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index f8e952c013f9..43d3806150a9 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -38,7 +38,7 @@ enum {
/* String options */
Opt_uname, Opt_remotename, Opt_cache, Opt_cachetag,
/* Options that take no arguments */
- Opt_nodevmap, Opt_noxattr,
+ Opt_nodevmap, Opt_noxattr, Opt_directio, Opt_ignoreqv,
/* Access options */
Opt_access, Opt_posixacl,
/* Lock timeout option */
@@ -56,6 +56,8 @@ static const match_table_t tokens = {
{Opt_remotename, "aname=%s"},
{Opt_nodevmap, "nodevmap"},
{Opt_noxattr, "noxattr"},
+ {Opt_directio, "directio"},
+ {Opt_ignoreqv, "ignoreqv"},
{Opt_cache, "cache=%s"},
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
@@ -125,7 +127,7 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root)
if (v9ses->nodev)
seq_puts(m, ",nodevmap");
if (v9ses->cache)
- seq_printf(m, ",%s", v9fs_cache_modes[v9ses->cache]);
+ seq_printf(m, ",cache=%s", v9fs_cache_modes[v9ses->cache]);
#ifdef CONFIG_9P_FSCACHE
if (v9ses->cachetag && v9ses->cache == CACHE_FSCACHE)
seq_printf(m, ",cachetag=%s", v9ses->cachetag);
@@ -147,6 +149,10 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root)
break;
}

+ if (v9ses->flags & V9FS_IGNORE_QV)
+ seq_puts(m, ",ignoreqv");
+ if (v9ses->flags & V9FS_DIRECT_IO)
+ seq_puts(m, ",directio");
if (v9ses->flags & V9FS_POSIX_ACL)
seq_puts(m, ",posixacl");

@@ -276,6 +282,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_noxattr:
v9ses->flags |= V9FS_NO_XATTR;
break;
+ case Opt_directio:
+ v9ses->flags |= V9FS_DIRECT_IO;
+ break;
+ case Opt_ignoreqv:
+ v9ses->flags |= V9FS_IGNORE_QV;
+ break;
case Opt_cachetag:
#ifdef CONFIG_9P_FSCACHE
kfree(v9ses->cachetag);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index a08cf6618c86..c80c318ff31c 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -37,7 +37,10 @@ enum p9_session_flags {
V9FS_ACCESS_USER = 0x08,
V9FS_ACCESS_CLIENT = 0x10,
V9FS_POSIX_ACL = 0x20,
- V9FS_NO_XATTR = 0x40
+ V9FS_NO_XATTR = 0x40,
+ V9FS_IGNORE_QV = 0x80,
+ V9FS_DIRECT_IO = 0x100,
+ V9FS_SYNC = 0x200
};

/* possible values of ->cache */
--
2.37.2


2023-01-24 02:40:19

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v3 10/11] writeback mode fixes

This fixes several detected problems from preivous
patches when running with writeback mode. In
particular this fixes issues with files which are opened
as write only and getattr on files which dirty caches.

This patch makes sure that cache behavior for an open file is stored in
the client copy of fid->mode. This allows us to reflect cache behavior
from mount flags, open mode, and information from the server to
inform readahead and writeback behavior.

This includes adding support for a 9p semantic that qid.version==0
is used to mark a file as non-cachable which is important for
synthetic files. This may have a side-effect of not supporting
caching on certain legacy file servers that do not properly set
qid.version. There is also now a mount flag which can disable
the qid.version behavior.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
Documentation/filesystems/9p.rst | 24 ++++-----
fs/9p/fid.c | 49 +++++++-----------
fs/9p/fid.h | 33 +++++++++++-
fs/9p/v9fs.h | 1 -
fs/9p/vfs_addr.c | 22 ++++----
fs/9p/vfs_file.c | 88 +++++++++++---------------------
fs/9p/vfs_inode.c | 45 ++++++----------
fs/9p/vfs_inode_dotl.c | 44 ++++++----------
fs/9p/vfs_super.c | 21 +++++---
9 files changed, 150 insertions(+), 177 deletions(-)

diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst
index 0e800b8f73cc..0c2c7a181d85 100644
--- a/Documentation/filesystems/9p.rst
+++ b/Documentation/filesystems/9p.rst
@@ -79,18 +79,14 @@ Options

cache=mode specifies a caching policy. By default, no caches are used.

- none
- default no cache policy, metadata and data
- alike are synchronous.
- loose
- no attempts are made at consistency,
- intended for exclusive, read-only mounts
- fscache
- use FS-Cache for a persistent, read-only
- cache backend.
- mmap
- minimal cache that is only used for read-write
- mmap. Northing else is cached, like cache=none
+ ========= =============================================
+ none no cache of file or metadata
+ readahead readahead caching of files
+ writeback delayed writeback of files
+ mmap support mmap operations read/write with cache
+ loose meta-data and file cache with no coherency
+ fscache use FS-Cache for a persistent cache backend
+ ========= =============================================

debug=n specifies debug level. The debug level is a bitmask.

@@ -137,6 +133,10 @@ Options
This can be used to share devices/named pipes/sockets between
hosts. This functionality will be expanded in later versions.

+ directio bypass page cache on all read/write operations
+
+ ignoreqv ignore qid.version==0 as a marker to ignore cache
+
noxattr do not offer xattr functions on this mount.

access there are four access modes.
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 805151114e96..8c1697619f3d 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -23,7 +23,6 @@ static inline void __add_fid(struct dentry *dentry, struct p9_fid *fid)
hlist_add_head(&fid->dlist, (struct hlist_head *)&dentry->d_fsdata);
}

-
/**
* v9fs_fid_add - add a fid to a dentry
* @dentry: dentry that the fid is being added to
@@ -41,14 +40,24 @@ void v9fs_fid_add(struct dentry *dentry, struct p9_fid **pfid)
*pfid = NULL;
}

+static bool v9fs_is_writeable(int mode)
+{
+ if ((mode & P9_OWRITE) || (mode & P9_ORDWR))
+ return true;
+ else
+ return false;
+}
+
/**
* v9fs_fid_find_inode - search for an open fid off of the inode list
* @inode: return a fid pointing to a specific inode
+ * @writeable: only consider fids which are writeable
* @uid: return a fid belonging to the specified user
+ * @any: ignore uid as a selection criteria
*
*/
-
-static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid)
+struct p9_fid *v9fs_fid_find_inode(struct inode *inode, bool want_writeable,
+ kuid_t uid, bool any)
{
struct hlist_head *h;
struct p9_fid *fid, *ret = NULL;
@@ -58,7 +67,12 @@ static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid)
spin_lock(&inode->i_lock);
h = (struct hlist_head *)&inode->i_private;
hlist_for_each_entry(fid, h, ilist) {
- if (uid_eq(fid->uid, uid)) {
+ if (any || uid_eq(fid->uid, uid)) {
+ if (want_writeable && !v9fs_is_writeable(fid->mode)) {
+ p9_debug(P9_DEBUG_VFS, " mode: %x not writeable?\n",
+ fid->mode);
+ continue;
+ }
p9_fid_get(fid);
ret = fid;
break;
@@ -118,7 +132,7 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
spin_unlock(&dentry->d_lock);
} else {
if (dentry->d_inode)
- ret = v9fs_fid_find_inode(dentry->d_inode, uid);
+ ret = v9fs_fid_find_inode(dentry->d_inode, false, uid, any);
}

return ret;
@@ -299,28 +313,3 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
return v9fs_fid_lookup_with_uid(dentry, uid, any);
}

-struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
-{
- int err;
- struct p9_fid *fid, *ofid;
-
- ofid = v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0);
- fid = clone_fid(ofid);
- if (IS_ERR(fid))
- goto error_out;
- p9_fid_put(ofid);
- /*
- * writeback fid will only be used to write back the
- * dirty pages. We always request for the open fid in read-write
- * mode so that a partial page write which result in page
- * read can work.
- */
- err = p9_client_open(fid, O_RDWR);
- if (err < 0) {
- p9_fid_put(fid);
- fid = ERR_PTR(err);
- goto error_out;
- }
-error_out:
- return fid;
-}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 8a4e8cd12ca2..11576e1364bf 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -7,14 +7,16 @@
#ifndef FS_9P_FID_H
#define FS_9P_FID_H
#include <linux/list.h>
+#include "v9fs.h"

+struct p9_fid *v9fs_fid_find_inode(struct inode *inode, bool want_writeable,
+ kuid_t uid, bool any);
struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
static inline struct p9_fid *v9fs_parent_fid(struct dentry *dentry)
{
return v9fs_fid_lookup(dentry->d_parent);
}
void v9fs_fid_add(struct dentry *dentry, struct p9_fid **fid);
-struct p9_fid *v9fs_writeback_fid(struct dentry *dentry);
void v9fs_open_fid_add(struct inode *inode, struct p9_fid **fid);
static inline struct p9_fid *clone_fid(struct p9_fid *fid)
{
@@ -32,4 +34,33 @@ static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
p9_fid_put(fid);
return nfid;
}
+/**
+ * v9fs_fid_addmodes - add cache flags to fid mode (for client use only)
+ * @fid: fid to augment
+ * @s_flags: session info mount flags
+ * @s_cache: session info cache flags
+ * @f_flags: unix open flags
+ *
+ * make sure mode reflects flags of underlying mounts
+ * also qid.version == 0 reflects a synthetic or legacy file system
+ * NOTE: these are set after open so only reflect 9p client not
+ * underlying file system on server.
+ */
+static inline void v9fs_fid_add_modes(struct p9_fid *fid, int s_flags,
+ int s_cache, unsigned int f_flags)
+{
+ if (fid->qid.type != P9_QTFILE)
+ return;
+
+ if ((!s_cache) ||
+ ((fid->qid.version == 0) && !(s_flags & V9FS_IGNORE_QV)) ||
+ (s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) {
+ fid->mode |= P9L_DIRECT; /* no read or write cache */
+ } else if ((s_cache < CACHE_WRITEBACK) ||
+ (f_flags & O_DSYNC) | (s_flags & V9FS_SYNC)) {
+ fid->mode |= P9L_NOWRITECACHE;
+ } else if (s_cache == CACHE_LOOSE) {
+ fid->mode |= P9L_LOOSE; /* noncoherent cache */
+ }
+}
#endif
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index c80c318ff31c..9c6bc57512bf 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -118,7 +118,6 @@ struct v9fs_inode {
struct netfs_inode netfs; /* Netfslib context and vfs inode */
struct p9_qid qid;
unsigned int cache_validity;
- struct p9_fid *writeback_fid;
struct mutex v_mutex;
};

diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 6f46d7e4c750..211165430a8a 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -57,8 +57,6 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
*/
static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
{
- struct inode *inode = file_inode(file);
- struct v9fs_inode *v9inode = V9FS_I(inode);
struct p9_fid *fid = file->private_data;

BUG_ON(!fid);
@@ -66,11 +64,8 @@ static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
/* we might need to read from a fid that was opened write-only
* for read-modify-write of page cache, use the writeback fid
* for that */
- if (rreq->origin == NETFS_READ_FOR_WRITE &&
- (fid->mode & O_ACCMODE) == O_WRONLY) {
- fid = v9inode->writeback_fid;
- BUG_ON(!fid);
- }
+ WARN_ON(rreq->origin == NETFS_READ_FOR_WRITE &&
+ !(fid->mode & P9_ORDWR));

p9_fid_get(fid);
rreq->netfs_priv = fid;
@@ -164,6 +159,7 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio)
loff_t i_size = i_size_read(inode);
struct iov_iter from;
size_t len = folio_size(folio);
+ struct p9_fid *writeback_fid;
int err;

if (start >= i_size)
@@ -173,13 +169,17 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio)

iov_iter_xarray(&from, ITER_SOURCE, &folio_mapping(folio)->i_pages, start, len);

- /* We should have writeback_fid always set */
- BUG_ON(!v9inode->writeback_fid);
+ writeback_fid = v9fs_fid_find_inode(inode, true, INVALID_UID, true);
+ if (!writeback_fid) {
+ WARN_ONCE(1, "folio expected an open fid inode->i_private=%p\n",
+ inode->i_private);
+ return -EINVAL;
+ }

folio_wait_fscache(folio);
folio_start_writeback(folio);

- p9_client_write(v9inode->writeback_fid, start, &from, &err);
+ p9_client_write(writeback_fid, start, &from, &err);

if (err == 0 &&
fscache_cookie_enabled(cookie) &&
@@ -192,6 +192,8 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio)
}

folio_end_writeback(folio);
+ p9_fid_put(writeback_fid);
+
return err;
}

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 64158664dcb4..b9873e81215e 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -43,7 +43,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
int err;
struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
- struct p9_fid *fid, *writeback_fid;
+ struct p9_fid *fid;
int omode;

p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
@@ -60,7 +60,19 @@ int v9fs_file_open(struct inode *inode, struct file *file)
if (IS_ERR(fid))
return PTR_ERR(fid);

- err = p9_client_open(fid, omode);
+ if ((v9ses->cache >= CACHE_WRITEBACK) && (omode & P9_OWRITE)) {
+ int writeback_omode = (omode & !P9_OWRITE) | P9_ORDWR;
+
+ p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, try opening O_RDWR\n");
+ err = p9_client_open(fid, writeback_omode);
+ if (err < 0) {
+ p9_debug(P9_DEBUG_CACHE, "could not open O_RDWR, disabling caches\n");
+ err = p9_client_open(fid, omode);
+ fid->mode |= P9L_DIRECT;
+ }
+ } else {
+ err = p9_client_open(fid, omode);
+ }
if (err < 0) {
p9_fid_put(fid);
return err;
@@ -72,36 +84,14 @@ int v9fs_file_open(struct inode *inode, struct file *file)
file->private_data = fid;
}

- mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
- ((file->f_flags & O_ACCMODE) != O_RDONLY)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during open time instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- writeback_fid = v9fs_writeback_fid(file_dentry(file));
- if (IS_ERR(writeback_fid)) {
- err = PTR_ERR(writeback_fid);
- mutex_unlock(&v9inode->v_mutex);
- goto out_error;
- }
- v9inode->writeback_fid = (void *) writeback_fid;
- }
- mutex_unlock(&v9inode->v_mutex);
#ifdef CONFIG_9P_FSCACHE
if (v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
#endif
+ v9fs_fid_add_modes(fid, v9ses->flags, v9ses->cache, file->f_flags);
v9fs_open_fid_add(inode, &fid);
return 0;
-out_error:
- p9_fid_put(file->private_data);
- file->private_data = NULL;
- return err;
}

/**
@@ -367,14 +357,14 @@ v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct p9_fid *fid = iocb->ki_filp->private_data;
int ret, err = 0;
- struct inode *inode = file_inode(iocb->ki_filp);
- struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);

- p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n",
- iov_iter_count(to), iocb->ki_pos);
+ p9_debug(P9_DEBUG_VFS, "fid %d count %zu offset %lld\n",
+ fid->fid, iov_iter_count(to), iocb->ki_pos);

- if (v9ses->cache > CACHE_MMAP)
+ if (!(fid->mode & P9L_DIRECT)) {
+ p9_debug(P9_DEBUG_VFS, "(cached)\n");
return generic_file_read_iter(iocb, to);
+ }

if (iocb->ki_filp->f_flags & O_NONBLOCK)
ret = p9_client_read_once(fid, iocb->ki_pos, to, &err);
@@ -397,14 +387,17 @@ static ssize_t
v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
+ struct p9_fid *fid = file->private_data;
ssize_t retval;
loff_t origin;
int err = 0;
- struct inode *inode = file_inode(iocb->ki_filp);
- struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);

- if (v9ses->cache >= CACHE_WRITEBACK)
+ p9_debug(P9_DEBUG_VFS, "fid %d\n", fid->fid);
+
+ if (!(fid->mode & (P9L_DIRECT | P9L_NOWRITECACHE))) {
+ p9_debug(P9_DEBUG_CACHE, "(cached)\n");
return generic_file_write_iter(iocb, from);
+ }

retval = generic_write_checks(iocb, from);
if (retval <= 0)
@@ -488,36 +481,18 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
{
int retval;
struct inode *inode = file_inode(filp);
- struct v9fs_inode *v9inode = V9FS_I(inode);
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
- struct p9_fid *fid;
+
+ p9_debug(P9_DEBUG_MMAP, "filp :%p\n", filp);

if (v9ses->cache < CACHE_MMAP) {
+ p9_debug(P9_DEBUG_CACHE, "(no mmap mode)");
+ if (vma->vm_flags & VM_MAYSHARE)
+ return -ENODEV;
invalidate_inode_pages2(filp->f_mapping);
return generic_file_readonly_mmap(filp, vma);
}

- mutex_lock(&v9inode->v_mutex);
- if (!v9inode->writeback_fid &&
- (vma->vm_flags & VM_SHARED) &&
- (vma->vm_flags & VM_WRITE)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during mmap instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- fid = v9fs_writeback_fid(file_dentry(filp));
- if (IS_ERR(fid)) {
- retval = PTR_ERR(fid);
- mutex_unlock(&v9inode->v_mutex);
- return retval;
- }
- v9inode->writeback_fid = (void *) fid;
- }
- mutex_unlock(&v9inode->v_mutex);
-
retval = generic_file_mmap(filp, vma);
if (!retval)
vma->vm_ops = &v9fs_mmap_file_vm_ops;
@@ -551,7 +526,6 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
/* Update file times before taking page lock */
file_update_time(filp);

- BUG_ON(!v9inode->writeback_fid);
if (folio_lock_killable(folio) < 0)
return VM_FAULT_RETRY;
if (folio_mapping(folio) != inode->i_mapping)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index de99f9275a94..c61709d98934 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -230,7 +230,6 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
v9inode = alloc_inode_sb(sb, v9fs_inode_cache, GFP_KERNEL);
if (!v9inode)
return NULL;
- v9inode->writeback_fid = NULL;
v9inode->cache_validity = 0;
mutex_init(&v9inode->v_mutex);
return &v9inode->netfs.inode;
@@ -383,9 +382,6 @@ void v9fs_evict_inode(struct inode *inode)
filemap_fdatawrite(&inode->i_data);

fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false);
- /* clunk the fid stashed in writeback_fid */
- p9_fid_put(v9inode->writeback_fid);
- v9inode->writeback_fid = NULL;
}

static int v9fs_test_inode(struct inode *inode, void *data)
@@ -796,9 +792,10 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
u32 perm;
struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
- struct p9_fid *fid, *inode_fid;
+ struct p9_fid *fid;
struct dentry *res = NULL;
struct inode *inode;
+ int p9_omode;

if (d_in_lookup(dentry)) {
res = v9fs_vfs_lookup(dir, dentry, 0);
@@ -817,9 +814,14 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,

v9ses = v9fs_inode2v9ses(dir);
perm = unixmode2p9mode(v9ses, mode);
- fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
- v9fs_uflags2omode(flags,
- v9fs_proto_dotu(v9ses)));
+ p9_omode = v9fs_uflags2omode(flags, v9fs_proto_dotu(v9ses));
+
+ if ((v9ses->cache >= CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) {
+ p9_omode = (p9_omode & !P9_OWRITE) | P9_ORDWR;
+ p9_debug(P9_DEBUG_CACHE,
+ "write-only file with writeback enabled, creating w/ O_RDWR\n");
+ }
+ fid = v9fs_create(v9ses, dir, dentry, NULL, perm, p9_omode);
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
goto error;
@@ -828,25 +830,6 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
v9fs_invalidate_inode_attr(dir);
inode = d_inode(dentry);
v9inode = V9FS_I(inode);
- mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
- ((flags & O_ACCMODE) != O_RDONLY)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during open time instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- inode_fid = v9fs_writeback_fid(dentry);
- if (IS_ERR(inode_fid)) {
- err = PTR_ERR(inode_fid);
- mutex_unlock(&v9inode->v_mutex);
- goto error;
- }
- v9inode->writeback_fid = (void *) inode_fid;
- }
- mutex_unlock(&v9inode->v_mutex);
err = finish_open(file, dentry, generic_file_open);
if (err)
goto error;
@@ -855,6 +838,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
+
+ v9fs_fid_add_modes(fid, v9ses->flags, v9ses->cache, file->f_flags);
v9fs_open_fid_add(inode, &fid);

file->f_mode |= FMODE_CREATED;
@@ -1024,7 +1009,7 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(&init_user_ns, d_inode(dentry), stat);
+ generic_fillattr(&init_user_ns, inode, stat);
return 0;
} else if (v9ses->cache >= CACHE_WRITEBACK) {
if (S_ISREG(inode->i_mode))
@@ -1119,10 +1104,10 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
+ truncate_pagecache(inode, iattr->ia_size);
+
if (v9ses->cache == CACHE_FSCACHE)
fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
- else
- invalidate_mapping_pages(&inode->i_data, 0, -1);
}

v9fs_invalidate_inode_attr(inode);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 8e104ba544ad..dcc5fd0567b8 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -232,12 +232,13 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
int err = 0;
kgid_t gid;
umode_t mode;
+ int p9_omode = v9fs_open_to_dotl_flags(flags);
const unsigned char *name = NULL;
struct p9_qid qid;
struct inode *inode;
struct p9_fid *fid = NULL;
struct v9fs_inode *v9inode;
- struct p9_fid *dfid = NULL, *ofid = NULL, *inode_fid = NULL;
+ struct p9_fid *dfid = NULL, *ofid = NULL;
struct v9fs_session_info *v9ses;
struct posix_acl *pacl = NULL, *dacl = NULL;
struct dentry *res = NULL;
@@ -282,14 +283,19 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
/* Update mode based on ACL value */
err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
if (err) {
- p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n",
+ p9_debug(P9_DEBUG_VFS, "Failed to get acl values in create %d\n",
err);
goto out;
}
- err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
- mode, gid, &qid);
+
+ if ((v9ses->cache >= CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) {
+ p9_omode = (p9_omode & !P9_OWRITE) | P9_ORDWR;
+ p9_debug(P9_DEBUG_CACHE,
+ "write-only file with writeback enabled, creating w/ O_RDWR\n");
+ }
+ err = p9_client_create_dotl(ofid, name, p9_omode, mode, gid, &qid);
if (err < 0) {
- p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n",
+ p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in create %d\n",
err);
goto out;
}
@@ -315,25 +321,6 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
d_instantiate(dentry, inode);

v9inode = V9FS_I(inode);
- mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache) && !v9inode->writeback_fid &&
- ((flags & O_ACCMODE) != O_RDONLY)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during open time instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- inode_fid = v9fs_writeback_fid(dentry);
- if (IS_ERR(inode_fid)) {
- err = PTR_ERR(inode_fid);
- mutex_unlock(&v9inode->v_mutex);
- goto out;
- }
- v9inode->writeback_fid = (void *) inode_fid;
- }
- mutex_unlock(&v9inode->v_mutex);
/* Since we are opening a file, assign the open fid to the file */
err = finish_open(file, dentry, generic_file_open);
if (err)
@@ -344,6 +331,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
#endif
+ v9fs_fid_add_modes(ofid, v9ses->flags, v9ses->cache, flags);
v9fs_open_fid_add(inode, &ofid);
file->f_mode |= FMODE_CREATED;
out:
@@ -464,9 +452,9 @@ v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(&init_user_ns, d_inode(dentry), stat);
+ generic_fillattr(&init_user_ns, inode, stat);
return 0;
- } else if (v9ses->cache >= CACHE_WRITEBACK) {
+ } else if (v9ses->cache) {
if (S_ISREG(inode->i_mode))
filemap_write_and_wait(inode->i_mapping);
}
@@ -604,10 +592,10 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size !=
i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
+ truncate_pagecache(inode, iattr->ia_size);
+
if (v9ses->cache == CACHE_FSCACHE)
fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
- else
- invalidate_mapping_pages(&inode->i_data, 0, -1);
}

v9fs_invalidate_inode_attr(inode);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 5fc6a945bfff..797f717e1a91 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -292,23 +292,26 @@ static int v9fs_write_inode(struct inode *inode,
{
int ret;
struct p9_wstat wstat;
+ struct p9_fid *fid = v9fs_fid_find_inode(inode, false, INVALID_UID, true);
struct v9fs_inode *v9inode;
+
/*
* send an fsync request to server irrespective of
* wbc->sync_mode.
*/
- p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
+ p9_debug(P9_DEBUG_VFS, "%s: inode %p writeback_fid: %p\n", __func__, inode, fid);
v9inode = V9FS_I(inode);
- if (!v9inode->writeback_fid)
- return 0;
+ if (!fid)
+ return -EINVAL;
v9fs_blank_wstat(&wstat);

- ret = p9_client_wstat(v9inode->writeback_fid, &wstat);
+ ret = p9_client_wstat(fid, &wstat);
if (ret < 0) {
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return ret;
}
fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode));
+ p9_fid_put(fid);
return 0;
}

@@ -316,6 +319,7 @@ static int v9fs_write_inode_dotl(struct inode *inode,
struct writeback_control *wbc)
{
int ret;
+ struct p9_fid *fid = v9fs_fid_find_inode(inode, FMODE_WRITE, INVALID_UID, 1);
struct v9fs_inode *v9inode;
/*
* send an fsync request to server irrespective of
@@ -323,16 +327,17 @@ static int v9fs_write_inode_dotl(struct inode *inode,
*/
v9inode = V9FS_I(inode);
p9_debug(P9_DEBUG_VFS, "%s: inode %p, writeback_fid %p\n",
- __func__, inode, v9inode->writeback_fid);
- if (!v9inode->writeback_fid)
- return 0;
+ __func__, inode, fid);
+ if (!fid)
+ return -EINVAL;

- ret = p9_client_fsync(v9inode->writeback_fid, 0);
+ ret = p9_client_fsync(fid, 0);
if (ret < 0) {
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return ret;
}
fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode));
+ p9_fid_put(fid);
return 0;
}

--
2.37.2


2023-01-24 02:40:22

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v3 11/11] Fix revalidate

Unclear if this case ever happens, but if no inode in dentry, then
the dentry is definitely invalid. Seemed to be the opposite in the
existing code.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/vfs_dentry.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 65fa2df5e49b..b0c3f8e8ea00 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -68,7 +68,7 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)

inode = d_inode(dentry);
if (!inode)
- goto out_valid;
+ return 0;

v9inode = V9FS_I(inode);
if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) {
@@ -91,7 +91,6 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
if (retval < 0)
return retval;
}
-out_valid:
return 1;
}

--
2.37.2


2023-01-24 02:50:09

by Dominique Martinet

[permalink] [raw]
Subject: Re: [PATCH v2 00/10] Performance fixes for 9p filesystem

[email protected] wrote on Mon, Jan 23, 2023 at 08:33:46PM -0600:
> I’m fine with funneling these through Dominique since he’s currently
> the active maintainer, but I’ve also re-established kernel.org
> <http://kernel.org/> credentials so I can field the pull-request if
> desired.

I'm happy either way; I've had a (too quick to really call review) look
at the code itself and it mostly makes sense to me, and as you pointed
out some would warrant a Cc stable@ and not waiting if I had time to do
this seriously, but I'm not sure I'll make it if this needs to wait for
me.

Do you also have a tree that goes in -next ? I think I asked before but
lost your reply, sorry.
If not it'll probably be easier for me to pick it up this cycle, but
that's about the only reason I'd see for me to take the patches as
things stand.

--
Dominique

2023-01-24 02:52:01

by Eric Van Hensbergen

[permalink] [raw]
Subject: Re: [PATCH v2 00/10] Performance fixes for 9p filesystem

Well timed prompt, sorry — I had been out of pocket while traveling. The WIPs in the development branch on GitHub are me working my way through the dir-cache patches (which was intended as the next set of patches after this one) — but those are complimentary to this set, so I’m about send out a [V3] without those so we can get this into linux-next with enough time for some more exhaustive testing before the next merge window.

I’m fine with funneling these through Dominique since he’s currently the active maintainer, but I’ve also re-established kernel.org <http://kernel.org/> credentials so I can field the pull-request if desired.

-Eric


> On Jan 23, 2023, at 10:31 AM, Christian Schoenebeck <[email protected]> wrote:
>
> On Monday, December 19, 2022 12:22:07 AM CET Eric Van Hensbergen wrote:
>> This is the second version of a patch series which adds a number
>> of features to improve read/write performance in the 9p filesystem.
>> Mostly it focuses on fixing caching to help utilize the recently
>> increased MSIZE limits and also fixes some problematic behavior
>> within the writeback code.
>>
>> Altogether, these show roughly 10x speed increases on simple
>> file transfers. Future patch sets will improve cache consistency
>> and directory caching.
>>
>> These patches are also available on github:
>> https://github.com/v9fs/linux/tree/ericvh/9p-next
>>
>> Tested against qemu, cpu, and diod with fsx, dbench, and some
>> simple benchmarks.
>>
>> Signed-off-by: Eric Van Hensbergen <[email protected]>
>
> Hi Eric,
>
> what's your plan on this series? I just had a look at your github repo and saw
> there is a lot of stuff marked as WIP.
>
> Best regards,
> Christian Schoenebeck
>
>


2023-01-24 03:35:20

by Amir Goldstein

[permalink] [raw]
Subject: Re: [PATCH v2 08/10] Add new mount modes

On Mon, Dec 19, 2022 at 2:22 AM Eric Van Hensbergen
<[email protected]> wrote:
>
> Add some additional mount modes for cache management including
> specifying directio as a mount option and an option for ignore
> qid.version for determining whether or not a file is cacheable.
>
> Signed-off-by: Eric Van Hensbergen <[email protected]>


Eric,

A comment related to the entire series.

It is quite odd to see patches titled "Add new mount modes"
on fsdevel and possibly in git log later.

Would you mind adding 9p: prefix to your 9p patches
and the vast majority of filesystem patches do?

Thanks,
Amir.

2023-02-02 11:27:51

by Christian Schoenebeck

[permalink] [raw]
Subject: Re: [PATCH v3 00/11] Performance fixes for 9p filesystem

On Tuesday, January 24, 2023 3:38:23 AM CET Eric Van Hensbergen wrote:
> This is the third version of a patch series which adds a number
> of features to improve read/write performance in the 9p filesystem.
> Mostly it focuses on fixing caching to help utilize the recently
> increased MSIZE limits and also fixes some problematic behavior
> within the writeback code.
>
> All together, these show roughly 10x speed increases on simple
> file transfers. Future patch sets will improve cache consistency
> and directory caching.
>
> These patches are also available on github:
> https://github.com/v9fs/linux/tree/ericvh/for-next
> and on kernel.org:
> https://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs.git
>
> Tested against qemu, cpu, and diod with fsx, dbench, and some
> simple benchmarks.

Looks like this needs more work.

I only had a glimpse on your patches yet, but made some tests by doing
compilations on guest on top of a 9p root fs [1], msize=500k. Under that
scenario:

* loose: this is suprisingly the only mode where I can see some performance
increase, over "loose" on master it compiled ~5% faster, but I also got some
misbehaviours on guest.

* writeahead: no performance results, as compilation already aborts when
trying to detect a compiler. I had to restore a previous snapshot to repair
things after this test.

* readahead: significant performance drop. In comparison to "loose" on master
compilation takes 6 times longer with "readahead". There are some severe
misbehaviours on guest as well, and after boot I get this warning:

[ 5.782846] folio expected an open fid inode->i_private=0000000000000000
[ 5.786641] WARNING: CPU: 0 PID: 321 at fs/9p/vfs_addr.c:174 v9fs_vfs_write_folio_locked (fs/9p/vfs_addr.c:174 (discriminator 3)) 9p
[ 5.792496] Modules linked in: ppdev(E) bochs(E) sg(E) drm_vram_helper(E) joydev(E) evdev(E) drm_kms_helper(E) serio_raw(E) drm_ttm_helper(E) pcsp)
[ 5.816806] CPU: 0 PID: 321 Comm: chown Tainted: G E 6.2.0-rc6+ #61
[ 5.821694] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.1-0-g3208b098f51a-prebuilt.qemu.org 04/01/2014
[ 5.827362] RIP: 0010:v9fs_vfs_write_folio_locked (fs/9p/vfs_addr.c:174 (discriminator 3)) 9p

Code starting with the faulting instruction
===========================================
...
[ 5.835360] RSP: 0018:ffffc900007d3a38 EFLAGS: 00010282
[ 5.836982] RAX: 0000000000000000 RBX: ffff888106c86680 RCX: 0000000000000000
[ 5.838877] RDX: 0000000000000001 RSI: ffffffff821eb1e6 RDI: 00000000ffffffff
[ 5.841179] RBP: ffffea0004279300 R08: 0000000000000000 R09: 00000000ffffefff
[ 5.843039] R10: ffffc900007d38e8 R11: ffffffff824bede8 R12: 0000000000000000
[ 5.844850] R13: 00000000ffffffea R14: 0000000000000014 R15: 0000000000000014
[ 5.846366] FS: 00007fd0fc4a0580(0000) GS:ffff88842fc00000(0000) knlGS:0000000000000000
[ 5.848250] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 5.849386] CR2: 00007fd0fc38f4f0 CR3: 0000000100302000 CR4: 00000000000006f0
[ 5.850824] Call Trace:
[ 5.851622] <TASK>
[ 5.852052] v9fs_vfs_writepage (fs/9p/vfs_addr.c:207) 9p
[ 5.852841] __writepage (mm/page-writeback.c:2537)
[ 5.853438] write_cache_pages (mm/page-writeback.c:2473)
[ 5.854205] ? __pfx___writepage (mm/page-writeback.c:2535)
[ 5.855309] ? delete_node (lib/radix-tree.c:575)
[ 5.856122] ? radix_tree_delete_item (lib/radix-tree.c:1432)
[ 5.857101] do_writepages (mm/page-writeback.c:2564 mm/page-writeback.c:2552 mm/page-writeback.c:2583)
[ 5.857954] ? radix_tree_delete_item (lib/radix-tree.c:1432)
[ 5.859103] filemap_fdatawrite_wbc (mm/filemap.c:389 mm/filemap.c:378)
[ 5.860043] __filemap_fdatawrite_range (mm/filemap.c:422)
[ 5.861050] filemap_write_and_wait_range (mm/filemap.c:682 mm/filemap.c:665)
[ 5.862132] v9fs_vfs_setattr_dotl (./include/linux/pagemap.h:60 fs/9p/vfs_inode_dotl.c:583) 9p
[ 5.863312] notify_change (fs/attr.c:486)
[ 5.864043] ? chown_common (fs/open.c:736)
[ 5.864793] chown_common (fs/open.c:736)
[ 5.865538] ? preempt_count_add (./include/linux/ftrace.h:977 kernel/sched/core.c:5737 kernel/sched/core.c:5734 kernel/sched/core.c:5762)
[ 5.866420] do_fchownat (fs/open.c:768)
[ 5.867419] __x64_sys_fchownat (fs/open.c:782 fs/open.c:779 fs/open.c:779)
[ 5.868319] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
[ 5.869116] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
[ 5.871008] RIP: 0033:0x7fd0fc3b7b9a

Best regards,
Christian Schoenebeck

[1] https://wiki.qemu.org/Documentation/9p_root_fs




2023-02-03 19:12:33

by Eric Van Hensbergen

[permalink] [raw]
Subject: Re: [PATCH v3 00/11] Performance fixes for 9p filesystem

Hi Christian, thanks for the feedback -- will dig in and see if I can
find what's gone south here. Clearly my approach to writeback without
writeback_fid didn't cover all the corner cases and thats the cause of
the fault. Can I get a better idea of how to reproduce - you booted
with a root 9p file system, and then tried to build...what?

Performance degradation is interesting, runs counter to the
unit-testing and benchmarking I did, but I didn't do something as
logical as a build to check -- need to tease apart whether this is a
read problem, a write problem...or both. My intuition is that its on
the write side, but as part of going through the code I made the cache
code a lot more pessimistic so its possible I inadvertently killed an
optimistic optimization.

Finally, just to clarify, the panic you had at the end happened with
readahead? Seems interesting because clearly it thought it was
writing back something that it shouldn't have been writing back (since
writeback caches weren't enabled). I'm thinking something was marked
as dirty even though the underlying system just wrote-through the
change and so the writeback isn't actually required. This may also be
an indicator of the performance issue if we are actually writing
through the data in addition to an unnecessary write-back (which I
also worry is writing back bad data in the second case).

Can you give me an idea of what the other misbehaviors were?

-eric

On Thu, Feb 2, 2023 at 5:27 AM Christian Schoenebeck
<[email protected]> wrote:
>
> On Tuesday, January 24, 2023 3:38:23 AM CET Eric Van Hensbergen wrote:
> > This is the third version of a patch series which adds a number
> > of features to improve read/write performance in the 9p filesystem.
> > Mostly it focuses on fixing caching to help utilize the recently
> > increased MSIZE limits and also fixes some problematic behavior
> > within the writeback code.
> >
> > All together, these show roughly 10x speed increases on simple
> > file transfers. Future patch sets will improve cache consistency
> > and directory caching.
> >
> > These patches are also available on github:
> > https://github.com/v9fs/linux/tree/ericvh/for-next
> > and on kernel.org:
> > https://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs.git
> >
> > Tested against qemu, cpu, and diod with fsx, dbench, and some
> > simple benchmarks.
>
> Looks like this needs more work.
>
> I only had a glimpse on your patches yet, but made some tests by doing
> compilations on guest on top of a 9p root fs [1], msize=500k. Under that
> scenario:
>
> * loose: this is suprisingly the only mode where I can see some performance
> increase, over "loose" on master it compiled ~5% faster, but I also got some
> misbehaviours on guest.
>
> * writeahead: no performance results, as compilation already aborts when
> trying to detect a compiler. I had to restore a previous snapshot to repair
> things after this test.
>
> * readahead: significant performance drop. In comparison to "loose" on master
> compilation takes 6 times longer with "readahead". There are some severe
> misbehaviours on guest as well, and after boot I get this warning:
>
> [ 5.782846] folio expected an open fid inode->i_private=0000000000000000
> [ 5.786641] WARNING: CPU: 0 PID: 321 at fs/9p/vfs_addr.c:174 v9fs_vfs_write_folio_locked (fs/9p/vfs_addr.c:174 (discriminator 3)) 9p
> [ 5.792496] Modules linked in: ppdev(E) bochs(E) sg(E) drm_vram_helper(E) joydev(E) evdev(E) drm_kms_helper(E) serio_raw(E) drm_ttm_helper(E) pcsp)
> [ 5.816806] CPU: 0 PID: 321 Comm: chown Tainted: G E 6.2.0-rc6+ #61
> [ 5.821694] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.1-0-g3208b098f51a-prebuilt.qemu.org 04/01/2014
> [ 5.827362] RIP: 0010:v9fs_vfs_write_folio_locked (fs/9p/vfs_addr.c:174 (discriminator 3)) 9p
>
> Code starting with the faulting instruction
> ===========================================
> ...
> [ 5.835360] RSP: 0018:ffffc900007d3a38 EFLAGS: 00010282
> [ 5.836982] RAX: 0000000000000000 RBX: ffff888106c86680 RCX: 0000000000000000
> [ 5.838877] RDX: 0000000000000001 RSI: ffffffff821eb1e6 RDI: 00000000ffffffff
> [ 5.841179] RBP: ffffea0004279300 R08: 0000000000000000 R09: 00000000ffffefff
> [ 5.843039] R10: ffffc900007d38e8 R11: ffffffff824bede8 R12: 0000000000000000
> [ 5.844850] R13: 00000000ffffffea R14: 0000000000000014 R15: 0000000000000014
> [ 5.846366] FS: 00007fd0fc4a0580(0000) GS:ffff88842fc00000(0000) knlGS:0000000000000000
> [ 5.848250] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 5.849386] CR2: 00007fd0fc38f4f0 CR3: 0000000100302000 CR4: 00000000000006f0
> [ 5.850824] Call Trace:
> [ 5.851622] <TASK>
> [ 5.852052] v9fs_vfs_writepage (fs/9p/vfs_addr.c:207) 9p
> [ 5.852841] __writepage (mm/page-writeback.c:2537)
> [ 5.853438] write_cache_pages (mm/page-writeback.c:2473)
> [ 5.854205] ? __pfx___writepage (mm/page-writeback.c:2535)
> [ 5.855309] ? delete_node (lib/radix-tree.c:575)
> [ 5.856122] ? radix_tree_delete_item (lib/radix-tree.c:1432)
> [ 5.857101] do_writepages (mm/page-writeback.c:2564 mm/page-writeback.c:2552 mm/page-writeback.c:2583)
> [ 5.857954] ? radix_tree_delete_item (lib/radix-tree.c:1432)
> [ 5.859103] filemap_fdatawrite_wbc (mm/filemap.c:389 mm/filemap.c:378)
> [ 5.860043] __filemap_fdatawrite_range (mm/filemap.c:422)
> [ 5.861050] filemap_write_and_wait_range (mm/filemap.c:682 mm/filemap.c:665)
> [ 5.862132] v9fs_vfs_setattr_dotl (./include/linux/pagemap.h:60 fs/9p/vfs_inode_dotl.c:583) 9p
> [ 5.863312] notify_change (fs/attr.c:486)
> [ 5.864043] ? chown_common (fs/open.c:736)
> [ 5.864793] chown_common (fs/open.c:736)
> [ 5.865538] ? preempt_count_add (./include/linux/ftrace.h:977 kernel/sched/core.c:5737 kernel/sched/core.c:5734 kernel/sched/core.c:5762)
> [ 5.866420] do_fchownat (fs/open.c:768)
> [ 5.867419] __x64_sys_fchownat (fs/open.c:782 fs/open.c:779 fs/open.c:779)
> [ 5.868319] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
> [ 5.869116] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
> [ 5.871008] RIP: 0033:0x7fd0fc3b7b9a
>
> Best regards,
> Christian Schoenebeck
>
> [1] https://wiki.qemu.org/Documentation/9p_root_fs
>
>
>

2023-02-04 13:41:12

by Christian Schoenebeck

[permalink] [raw]
Subject: Re: [PATCH v3 00/11] Performance fixes for 9p filesystem

On Friday, February 3, 2023 8:12:14 PM CET Eric Van Hensbergen wrote:
> Hi Christian, thanks for the feedback -- will dig in and see if I can
> find what's gone south here. Clearly my approach to writeback without
> writeback_fid didn't cover all the corner cases and thats the cause of
> the fault. Can I get a better idea of how to reproduce - you booted
> with a root 9p file system, and then tried to build...what?

KDE, which builds numerous packages, multi-threaded by default. In the past we
had 9p issues which triggered only after hours of compiling, however in this
case I don't think that you need to build something fancy. Because it already
fails at the very beginning of any build process, just when detecting a
compiler.

May I ask what kind of scenario you have tested so far? It was not a multi-
threaded context, right? Large chunk or small chunk I/O?

> Performance degradation is interesting, runs counter to the
> unit-testing and benchmarking I did, but I didn't do something as
> logical as a build to check -- need to tease apart whether this is a
> read problem, a write problem...or both. My intuition is that its on
> the write side, but as part of going through the code I made the cache
> code a lot more pessimistic so its possible I inadvertently killed an
> optimistic optimization.

I have not walked down the road to investigate individual I/O errors or even
their cause yet, but from my feeling it could also be related to fid vs.
writeback_fid. I saw you dropped a fix we made there last year, but haven't
checked yet whether your changes would handle it correctly in another way.

> Finally, just to clarify, the panic you had at the end happened with
> readahead? Seems interesting because clearly it thought it was
> writing back something that it shouldn't have been writing back (since
> writeback caches weren't enabled). I'm thinking something was marked
> as dirty even though the underlying system just wrote-through the
> change and so the writeback isn't actually required. This may also be
> an indicator of the performance issue if we are actually writing
> through the data in addition to an unnecessary write-back (which I
> also worry is writing back bad data in the second case).

It was not a kernel panic. It's a warning that appears right after boot, but
the system continues to run. So that warning is printed before starting the
actual build process. And yes, the warning is printed with "readahead".

> Can you give me an idea of what the other misbehaviors were?

There were really all sorts of misbheaviour on application level, e.g. no
command history being available from shell (arrow up/down), things hanging on
the shell for a long time, error messages. And after the writeahead test the
build directory was screwed, i.e. even after rebooting with a regular kernel
things no longer built correctly, so I had to restore a snapshot.

Best regards,
Christian Schoenebeck



2023-02-04 21:39:20

by Eric Van Hensbergen

[permalink] [raw]
Subject: Re: [PATCH v3 00/11] Performance fixes for 9p filesystem

Okay, thanks for the additional detail - I have an idea of what the
problem might be.

As far as my tests - I've predominantly tested with dbench, fsx (with
and without mmap tests), postmark, and bonnie -- running single and
multithreaded. All seem to work fine and didn't report errors. I
thought the dbench trace was based on a build, but perhaps that's
inaccurate, or perhaps there's something peculiar with it being the
root file system (I have always just mounted it after boot, not tried
booting with it as root).

(thinking out loud)
In any case, I think the fact that we see that error when in readahead
mode is the key hint, because it means it thinks something is
writeback cached when it shouldn't be. The writeback is triggered by
the setattr, which always calls filemap_write_and_wait -- this is all
old code, not something added by the change. My assumption was that
if the inode had no dirty data (writebacks) then it would just not do
anything -- this should be the case in readahead mode. So we gotta
figure out why it thinks it has dirty data. Looking at the code where
the warning is printed, its a WARN_ONCE so its quite possible we are
hitting this left and right -- we can probably switch that to always
print to get an idea of this being the root cause. Need to add some
more debug code to understand what we think we are writing back as
anything there should have been flushed when the file was closed.
To your multithreaded concern, I suppose there could be a race between
flushing mmap writes and the setattr also calling writeback, but the
folio is supposed to be locked at this point so you think there would
only be one writeback. This will be easier to understand once I
reproduce and have a full trace and we know what file we are talking
about and what other operations might have been in flight.

There is a case in mmap that I was worried always required writeback,
but I did enough unit testing to convince myself that wasn't the case
-- so could be something down that path but will reproduce your
environment first and see if I can get the same types of error (I'm
most of the way there at this point, it is just we are digging out
from an ice storm here in texas so there's been more chainsawing than
coding....).

-eric

On Sat, Feb 4, 2023 at 7:40 AM Christian Schoenebeck
<[email protected]> wrote:
>
> On Friday, February 3, 2023 8:12:14 PM CET Eric Van Hensbergen wrote:
> > Hi Christian, thanks for the feedback -- will dig in and see if I can
> > find what's gone south here. Clearly my approach to writeback without
> > writeback_fid didn't cover all the corner cases and thats the cause of
> > the fault. Can I get a better idea of how to reproduce - you booted
> > with a root 9p file system, and then tried to build...what?
>
> KDE, which builds numerous packages, multi-threaded by default. In the past we
> had 9p issues which triggered only after hours of compiling, however in this
> case I don't think that you need to build something fancy. Because it already
> fails at the very beginning of any build process, just when detecting a
> compiler.
>
> May I ask what kind of scenario you have tested so far? It was not a multi-
> threaded context, right? Large chunk or small chunk I/O?
>
> > Performance degradation is interesting, runs counter to the
> > unit-testing and benchmarking I did, but I didn't do something as
> > logical as a build to check -- need to tease apart whether this is a
> > read problem, a write problem...or both. My intuition is that its on
> > the write side, but as part of going through the code I made the cache
> > code a lot more pessimistic so its possible I inadvertently killed an
> > optimistic optimization.
>
> I have not walked down the road to investigate individual I/O errors or even
> their cause yet, but from my feeling it could also be related to fid vs.
> writeback_fid. I saw you dropped a fix we made there last year, but haven't
> checked yet whether your changes would handle it correctly in another way.
>
> > Finally, just to clarify, the panic you had at the end happened with
> > readahead? Seems interesting because clearly it thought it was
> > writing back something that it shouldn't have been writing back (since
> > writeback caches weren't enabled). I'm thinking something was marked
> > as dirty even though the underlying system just wrote-through the
> > change and so the writeback isn't actually required. This may also be
> > an indicator of the performance issue if we are actually writing
> > through the data in addition to an unnecessary write-back (which I
> > also worry is writing back bad data in the second case).
>
> It was not a kernel panic. It's a warning that appears right after boot, but
> the system continues to run. So that warning is printed before starting the
> actual build process. And yes, the warning is printed with "readahead".
>
> > Can you give me an idea of what the other misbehaviors were?
>
> There were really all sorts of misbheaviour on application level, e.g. no
> command history being available from shell (arrow up/down), things hanging on
> the shell for a long time, error messages. And after the writeahead test the
> build directory was screwed, i.e. even after rebooting with a regular kernel
> things no longer built correctly, so I had to restore a snapshot.
>
> Best regards,
> Christian Schoenebeck
>
>

2023-02-05 16:38:16

by Eric Van Hensbergen

[permalink] [raw]
Subject: Re: [PATCH v3 00/11] Performance fixes for 9p filesystem

On Thu, Feb 2, 2023 at 5:27 AM Christian Schoenebeck
<[email protected]> wrote:
>
> Looks like this needs more work.
>
> I only had a glimpse on your patches yet, but made some tests by doing
> compilations on guest on top of a 9p root fs [1], msize=500k. Under that
> scenario:
>
> * loose: this is suprisingly the only mode where I can see some performance
> increase, over "loose" on master it compiled ~5% faster, but I also got some
> misbehaviours on guest.
>

I was so focused on the bugs that I forgot to respond to the
performance concerns -- just to be clear, readahead and writeback
aren't meant to be more performant than loose, they are meant to have
stronger guarantees of consistency with the server file system. Loose
is inclusive of readahead and writeback, and it keeps the caches
around for longer, and it does some dir caching as well -- so its
always going to win, but it does so with risk of being more
inconsistent with the server file system and should only be done when
the guest/client has exclusive access or the filesystem itself is
read-only. I've a design for a "tight" cache, which will also not be
as performant as loose but will add consistent dir-caching on top of
readahead and writeback -- once we've properly vetted that it should
likely be the default cache option and any fscache should be built on
top of it. I was also thinking of augmenting "tight" and "loose" with
a "temporal" cache that works more like NFS and bounds consistency to
a particular time quanta. Loose was always a bit of a "hack" for some
particular use cases and has always been a bit problematic in my mind.

So, to make sure we are on the same page, was your performance
uplifts/penalties versus cache=none or versus legacy cache=loose? The
10x perf improvement in the patch series was in streaming reads over
cache=none. I'll add the cache=loose datapoints to my performance
notebook (on github) for the future as points of reference, but I'd
always expect cache=loose to be the upper bound (although I have seen
some things in the code to do with directory reads/etc. that could be
improved there and should benefit from some of the changes I have
planned once I get to the dir caching).

-eric

2023-02-06 13:23:04

by Christian Schoenebeck

[permalink] [raw]
Subject: Re: [PATCH v3 00/11] Performance fixes for 9p filesystem

On Sunday, February 5, 2023 5:37:53 PM CET Eric Van Hensbergen wrote:
> I was so focused on the bugs that I forgot to respond to the
> performance concerns -- just to be clear, readahead and writeback
> aren't meant to be more performant than loose, they are meant to have
> stronger guarantees of consistency with the server file system. Loose
> is inclusive of readahead and writeback, and it keeps the caches
> around for longer, and it does some dir caching as well -- so its
> always going to win, but it does so with risk of being more
> inconsistent with the server file system and should only be done when
> the guest/client has exclusive access or the filesystem itself is
> read-only.

Okay, that's surprising to me indeed. My expecation was that "loose" would
still retain its previous behaviour, i.e. loose consistency cache but without
any readahead or writeback. I already wondered about the transitivity you used
in code for cache selection with direct `<=` comparison of user's cache
option.

Having said that, I wonder whether it would make sense to handle these as
options independent of each other (e.g. cache=loose,readahead), but not sure,
maybe it would overcomplicate things unnecessarily.

> I've a design for a "tight" cache, which will also not be
> as performant as loose but will add consistent dir-caching on top of
> readahead and writeback -- once we've properly vetted that it should
> likely be the default cache option and any fscache should be built on
> top of it. I was also thinking of augmenting "tight" and "loose" with
> a "temporal" cache that works more like NFS and bounds consistency to
> a particular time quanta. Loose was always a bit of a "hack" for some
> particular use cases and has always been a bit problematic in my mind.

Or we could add notifications on file changes from server side, because that's
what this is actually about, right?

> So, to make sure we are on the same page, was your performance
> uplifts/penalties versus cache=none or versus legacy cache=loose?

I have not tested cache=none at all, because in the scenario of 9p being a
root fs, you need at least cache=mmap, otherwise you won't even be able to
boot a minimum system.

I compared:

* master(cache=loose) vs. this(cache=loose)

* master(cache=loose) vs. this(cache=readahead)

* master(cache=loose) vs. this(cache=writeback)

> The 10x perf improvement in the patch series was in streaming reads over
> cache=none.

OK, that's an important information to mention in the first place. Because
when say you measured a performance plus of x times, I would assume you
compared it to at least a somewhat similar setup. I mean cache=loose was
always much faster than cache=none before.

> I'll add the cache=loose datapoints to my performance
> notebook (on github) for the future as points of reference, but I'd
> always expect cache=loose to be the upper bound (although I have seen
> some things in the code to do with directory reads/etc. that could be
> improved there and should benefit from some of the changes I have
> planned once I get to the dir caching).
>
> -eric



2023-02-06 13:37:48

by Eric Van Hensbergen

[permalink] [raw]
Subject: Re: [PATCH v3 00/11] Performance fixes for 9p filesystem

On Mon, Feb 6, 2023 at 7:20 AM Christian Schoenebeck
<[email protected]> wrote:
>
> Okay, that's surprising to me indeed. My expecation was that "loose" would
> still retain its previous behaviour, i.e. loose consistency cache but without
> any readahead or writeback. I already wondered about the transitivity you used
> in code for cache selection with direct `<=` comparison of user's cache
> option.
>
> Having said that, I wonder whether it would make sense to handle these as
> options independent of each other (e.g. cache=loose,readahead), but not sure,
> maybe it would overcomplicate things unnecessarily.
>

That's fair and I've considered it, but was waiting until I get to the
dir cache changes to figure out which way I wanted to go. I imagine
the way that would play out is there are three types of caching
(readahead, writeback, dir) with writeback inclusive of readahead
still though. Then there would be three cache policies (tight,
temporal, loose) and finally there'd be a seperate option for fscache
(open question as to whether or not fscache with < dir makes sense..I
think probably not).

> > I've a design for a "tight" cache, which will also not be
> > as performant as loose but will add consistent dir-caching on top of
> > readahead and writeback -- once we've properly vetted that it should
> > likely be the default cache option and any fscache should be built on
> > top of it. I was also thinking of augmenting "tight" and "loose" with
> > a "temporal" cache that works more like NFS and bounds consistency to
> > a particular time quanta. Loose was always a bit of a "hack" for some
> > particular use cases and has always been a bit problematic in my mind.
>
> Or we could add notifications on file changes from server side, because that's
> what this is actually about, right?
>

Yeah, that's always an option, but would be tricky to work out the 9p
model for this as model is explicitly RPC so we'd have to post a read
for file changes. We had the same discussion for locks and decided to
keep it simple for now. I'm not opposed to exploring this, but we'd
want to keep it as a invalidate log with a single open posted read --
could use a synthetic or something similar to the Tauth messages to
have that. That's gonna go on the end-of-the-backlog for
consideration, but happy to review if someone else wants to go after
it.

> > So, to make sure we are on the same page, was your performance
> > uplifts/penalties versus cache=none or versus legacy cache=loose?
>
> I have not tested cache=none at all, because in the scenario of 9p being a
> root fs, you need at least cache=mmap, otherwise you won't even be able to
> boot a minimum system.
>

Yeah, understood -- mmap ~= writeback so the writeback issues would
persist there. FWIW, I continue to see no problems with cache=none,
but that makes sense as all the changes are in the cache code. Will
keep crunching on getting this fixed.

> I compared:
>
> * master(cache=loose) vs. this(cache=loose)
>
> * master(cache=loose) vs. this(cache=readahead)
>
> * master(cache=loose) vs. this(cache=writeback)
>
> > The 10x perf improvement in the patch series was in streaming reads over
> > cache=none.
>
> OK, that's an important information to mention in the first place. Because
> when say you measured a performance plus of x times, I would assume you
> compared it to at least a somewhat similar setup. I mean cache=loose was
> always much faster than cache=none before.
>

Sorry that I didn't make that more clear. The original motivation for
the patch series was the cpu project that Ron and I have been
collaborating on and cache==loose was problematic for that use case so
we wanted something that approached the performance of cache==loose
but with tighter consistency (in particular the ability to actually do
read-ahead with open-to-close consistency). As you pointed out
though, there was a 5% improvement in loose (probably due to reduction
of messages associated with management of the writeback_fid). In any
case, the hope is to make cache=mmap (and eventually cache=tight) the
default cache mode versus cache=none -- but have to get this stable
first.

As I said, the dir-cache changes in the WIP patch series are expected
to benefit loose a bit more (particularly around the dir-read pain
points) and I spotted several cases where loose appears to be
re-requesting files it already has in cache -- so there may be more to
it. But that being said, I don't expected to get 10x out of those
changes (although depends on the types of operations being performed).
Will know better when I get further along.

-eric

2023-02-18 00:43:56

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v4 00/11] Performance fixes for 9p filesystem

This is the fourth version of a patch series which adds a number
of features to improve read/write performance in the 9p filesystem.
Mostly it focuses on fixing caching to help utilize the recently
increased MSIZE limits and also fixes some problematic behavior
within the writeback code.

All together, these show roughly 10x speed increases on simple
file transfers over no caching for readahead mode. Future patch
sets will improve cache consistency and directory caching, which
should benefit loose mode.

This iteration of the patch incorporates an important fix for
writeback which uses a stronger mechanism to flush writeback on
close of files and addresses observed bugs in previous versions of
the patch for writeback, mmap, and loose cache modes.

These patches are also available on github:
https://github.com/v9fs/linux/tree/ericvh/for-next
and on kernel.org:
https://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs.git

Tested against qemu, cpu, and diod with fsx, dbench, and postmark
in every caching mode.

I'm gonna definitely submit the first couple patches as they are
fairly harmless - but would like to submit the whole series to the
upcoming merge window. Would appreciate reviews.

Eric Van Hensbergen (11):
net/9p: Adjust maximum MSIZE to account for p9 header
fs/9p: Expand setup of writeback cache to all levels
fs/9p: Consolidate file operations and add readahead and writeback
fs/9p: Remove unnecessary superblock flags
fs/9p: allow disable of xattr support on mount
net/9p: fix bug in client create for .L
9p: Add additional debug flags and open modes
fs/9p: Add new mount modes
fs/9p: fix error reporting in v9fs_dir_release
fs/9p: writeback mode fixes
fs/9p: Fix revalidate

Documentation/filesystems/9p.rst | 26 ++--
fs/9p/fid.c | 49 +++-----
fs/9p/fid.h | 33 ++++-
fs/9p/v9fs.c | 49 +++++---
fs/9p/v9fs.h | 10 +-
fs/9p/v9fs_vfs.h | 4 -
fs/9p/vfs_addr.c | 24 ++--
fs/9p/vfs_dentry.c | 3 +-
fs/9p/vfs_dir.c | 10 +-
fs/9p/vfs_file.c | 205 +++++++------------------------
fs/9p/vfs_inode.c | 102 +++++++--------
fs/9p/vfs_inode_dotl.c | 69 ++++++-----
fs/9p/vfs_super.c | 28 +++--
include/net/9p/9p.h | 5 +
net/9p/client.c | 8 +-
15 files changed, 284 insertions(+), 341 deletions(-)

--
2.37.2


2023-02-18 00:44:15

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v4 03/11] fs/9p: Consolidate file operations and add readahead and writeback

We had 3 different sets of file operations across 2 different protocol
variants differentiated by cache which really only changed 3
functions. But the real problem is that certain file modes, mount
options, and other factors weren't being considered when we
decided whether or not to use caches.

This consolidates all the operations and switches
to conditionals within a common set to decide whether or not
to do different aspects of caching.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/v9fs.c | 30 ++++------
fs/9p/v9fs.h | 3 +
fs/9p/v9fs_vfs.h | 4 --
fs/9p/vfs_dir.c | 3 +
fs/9p/vfs_file.c | 131 +++++++----------------------------------
fs/9p/vfs_inode.c | 62 ++++++++++++-------
fs/9p/vfs_inode_dotl.c | 22 +++++--
7 files changed, 98 insertions(+), 157 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 61a51b90600d..a46bf9121f11 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -39,8 +39,6 @@ enum {
Opt_uname, Opt_remotename, Opt_cache, Opt_cachetag,
/* Options that take no arguments */
Opt_nodevmap,
- /* Cache options */
- Opt_cache_loose, Opt_fscache, Opt_mmap,
/* Access options */
Opt_access, Opt_posixacl,
/* Lock timeout option */
@@ -58,9 +56,6 @@ static const match_table_t tokens = {
{Opt_remotename, "aname=%s"},
{Opt_nodevmap, "nodevmap"},
{Opt_cache, "cache=%s"},
- {Opt_cache_loose, "loose"},
- {Opt_fscache, "fscache"},
- {Opt_mmap, "mmap"},
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
{Opt_posixacl, "posixacl"},
@@ -69,10 +64,12 @@ static const match_table_t tokens = {
};

static const char *const v9fs_cache_modes[nr__p9_cache_modes] = {
- [CACHE_NONE] = "none",
- [CACHE_MMAP] = "mmap",
- [CACHE_LOOSE] = "loose",
- [CACHE_FSCACHE] = "fscache",
+ [CACHE_NONE] = "none",
+ [CACHE_READAHEAD] = "readahead",
+ [CACHE_WRITEBACK] = "writeback",
+ [CACHE_MMAP] = "mmap",
+ [CACHE_LOOSE] = "loose",
+ [CACHE_FSCACHE] = "fscache",
};

/* Interpret mount options for cache mode */
@@ -89,6 +86,12 @@ static int get_cache_mode(char *s)
} else if (!strcmp(s, "mmap")) {
version = CACHE_MMAP;
p9_debug(P9_DEBUG_9P, "Cache mode: mmap\n");
+ } else if (!strcmp(s, "writeback")) {
+ version = CACHE_WRITEBACK;
+ p9_debug(P9_DEBUG_9P, "Cache mode: writeback\n");
+ } else if (!strcmp(s, "readahead")) {
+ version = CACHE_READAHEAD;
+ p9_debug(P9_DEBUG_9P, "Cache mode: readahead\n");
} else if (!strcmp(s, "none")) {
version = CACHE_NONE;
p9_debug(P9_DEBUG_9P, "Cache mode: none\n");
@@ -266,15 +269,6 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_nodevmap:
v9ses->nodev = 1;
break;
- case Opt_cache_loose:
- v9ses->cache = CACHE_LOOSE;
- break;
- case Opt_fscache:
- v9ses->cache = CACHE_FSCACHE;
- break;
- case Opt_mmap:
- v9ses->cache = CACHE_MMAP;
- break;
case Opt_cachetag:
#ifdef CONFIG_9P_FSCACHE
kfree(v9ses->cachetag);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 6acabc2e7dc9..517b2201ad24 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -51,6 +51,8 @@ enum p9_session_flags {
enum p9_cache_modes {
CACHE_NONE,
CACHE_MMAP,
+ CACHE_READAHEAD,
+ CACHE_WRITEBACK,
CACHE_LOOSE,
CACHE_FSCACHE,
nr__p9_cache_modes
@@ -155,6 +157,7 @@ extern int v9fs_vfs_rename(struct user_namespace *mnt_userns,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags);
+extern int v9fs_flush_inode_writeback(struct inode *inode);
extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
struct p9_fid *fid,
struct super_block *sb, int new);
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index bc417da7e9c1..cce87c9bdd8b 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -36,10 +36,6 @@ extern const struct file_operations v9fs_dir_operations;
extern const struct file_operations v9fs_dir_operations_dotl;
extern const struct dentry_operations v9fs_dentry_operations;
extern const struct dentry_operations v9fs_cached_dentry_operations;
-extern const struct file_operations v9fs_cached_file_operations;
-extern const struct file_operations v9fs_cached_file_operations_dotl;
-extern const struct file_operations v9fs_mmap_file_operations;
-extern const struct file_operations v9fs_mmap_file_operations_dotl;
extern struct kmem_cache *v9fs_inode_cache;

struct inode *v9fs_alloc_inode(struct super_block *sb);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 59b0e8948f78..bd31593437f3 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -214,6 +214,9 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
inode, filp, fid ? fid->fid : -1);
if (fid) {
+ if ((fid->qid.type == P9_QTFILE) && (filp->f_mode & FMODE_WRITE))
+ v9fs_flush_inode_writeback(inode);
+
spin_lock(&inode->i_lock);
hlist_del(&fid->ilist);
spin_unlock(&inode->i_lock);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3b6458846a0b..20e4bd299fc2 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -28,7 +28,6 @@
#include "fid.h"
#include "cache.h"

-static const struct vm_operations_struct v9fs_file_vm_ops;
static const struct vm_operations_struct v9fs_mmap_file_vm_ops;

/**
@@ -73,7 +72,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
}

mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache) && !v9inode->writeback_fid &&
+ if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
((file->f_flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -367,10 +366,15 @@ v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct p9_fid *fid = iocb->ki_filp->private_data;
int ret, err = 0;
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);

p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n",
iov_iter_count(to), iocb->ki_pos);

+ if (v9ses->cache > CACHE_MMAP)
+ return generic_file_read_iter(iocb, to);
+
if (iocb->ki_filp->f_flags & O_NONBLOCK)
ret = p9_client_read_once(fid, iocb->ki_pos, to, &err);
else
@@ -395,6 +399,11 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t retval;
loff_t origin;
int err = 0;
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+
+ if (v9ses->cache >= CACHE_WRITEBACK)
+ return generic_file_write_iter(iocb, from);

retval = generic_write_checks(iocb, from);
if (retval <= 0)
@@ -477,25 +486,16 @@ static int
v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
{
int retval;
-
-
- retval = generic_file_mmap(filp, vma);
- if (!retval)
- vma->vm_ops = &v9fs_file_vm_ops;
-
- return retval;
-}
-
-static int
-v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
-{
- int retval;
- struct inode *inode;
- struct v9fs_inode *v9inode;
+ struct inode *inode = file_inode(filp);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
struct p9_fid *fid;

- inode = file_inode(filp);
- v9inode = V9FS_I(inode);
+ if (v9ses->cache < CACHE_MMAP) {
+ invalidate_inode_pages2(filp->f_mapping);
+ return generic_file_readonly_mmap(filp, vma);
+ }
+
mutex_lock(&v9inode->v_mutex);
if (!v9inode->writeback_fid &&
(vma->vm_flags & VM_SHARED) &&
@@ -563,35 +563,6 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}

-/**
- * v9fs_mmap_file_read_iter - read from a file
- * @iocb: The operation parameters
- * @to: The buffer to read into
- *
- */
-static ssize_t
-v9fs_mmap_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
-{
- /* TODO: Check if there are dirty pages */
- return v9fs_file_read_iter(iocb, to);
-}
-
-/**
- * v9fs_mmap_file_write_iter - write to a file
- * @iocb: The operation parameters
- * @from: The data to write
- *
- */
-static ssize_t
-v9fs_mmap_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
- /*
- * TODO: invalidate mmaps on filp's inode between
- * offset and offset+count
- */
- return v9fs_file_write_iter(iocb, from);
-}
-
static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
{
struct inode *inode;
@@ -614,13 +585,6 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
}

-
-static const struct vm_operations_struct v9fs_file_vm_ops = {
- .fault = filemap_fault,
- .map_pages = filemap_map_pages,
- .page_mkwrite = v9fs_vm_page_mkwrite,
-};
-
static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
.close = v9fs_mmap_vm_close,
.fault = filemap_fault,
@@ -628,34 +592,6 @@ static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
.page_mkwrite = v9fs_vm_page_mkwrite,
};

-
-const struct file_operations v9fs_cached_file_operations = {
- .llseek = generic_file_llseek,
- .read_iter = generic_file_read_iter,
- .write_iter = generic_file_write_iter,
- .open = v9fs_file_open,
- .release = v9fs_dir_release,
- .lock = v9fs_file_lock,
- .mmap = v9fs_file_mmap,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fsync = v9fs_file_fsync,
-};
-
-const struct file_operations v9fs_cached_file_operations_dotl = {
- .llseek = generic_file_llseek,
- .read_iter = generic_file_read_iter,
- .write_iter = generic_file_write_iter,
- .open = v9fs_file_open,
- .release = v9fs_dir_release,
- .lock = v9fs_file_lock_dotl,
- .flock = v9fs_file_flock_dotl,
- .mmap = v9fs_file_mmap,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fsync = v9fs_file_fsync_dotl,
-};
-
const struct file_operations v9fs_file_operations = {
.llseek = generic_file_llseek,
.read_iter = v9fs_file_read_iter,
@@ -677,34 +613,7 @@ const struct file_operations v9fs_file_operations_dotl = {
.release = v9fs_dir_release,
.lock = v9fs_file_lock_dotl,
.flock = v9fs_file_flock_dotl,
- .mmap = generic_file_readonly_mmap,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fsync = v9fs_file_fsync_dotl,
-};
-
-const struct file_operations v9fs_mmap_file_operations = {
- .llseek = generic_file_llseek,
- .read_iter = v9fs_mmap_file_read_iter,
- .write_iter = v9fs_mmap_file_write_iter,
- .open = v9fs_file_open,
- .release = v9fs_dir_release,
- .lock = v9fs_file_lock,
- .mmap = v9fs_mmap_file_mmap,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fsync = v9fs_file_fsync,
-};
-
-const struct file_operations v9fs_mmap_file_operations_dotl = {
- .llseek = generic_file_llseek,
- .read_iter = v9fs_mmap_file_read_iter,
- .write_iter = v9fs_mmap_file_write_iter,
- .open = v9fs_file_open,
- .release = v9fs_dir_release,
- .lock = v9fs_file_lock_dotl,
- .flock = v9fs_file_flock_dotl,
- .mmap = v9fs_mmap_file_mmap,
+ .mmap = v9fs_file_mmap,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync_dotl,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 33e521c60e2c..8ffa6631b1fd 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -219,6 +219,35 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
wstat->extension = NULL;
}

+/**
+ * v9fs_flush_inode_writeback - writeback any data associated with inode
+ * @inode: inode to writeback
+ *
+ * This is used to make sure anything that needs to be written
+ * to server gets flushed before we do certain operations (setattr, getattr, close)
+ *
+ */
+
+int v9fs_flush_inode_writeback(struct inode *inode)
+{
+ struct writeback_control wbc = {
+ .nr_to_write = LONG_MAX,
+ .sync_mode = WB_SYNC_ALL,
+ .range_start = 0,
+ .range_end = -1,
+ };
+
+ int retval = filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
+
+ if (retval != 0) {
+ p9_debug(P9_DEBUG_ERROR,
+ "trying to flush inode %p failed with error code %d\n",
+ inode, retval);
+ }
+
+ return retval;
+}
+
/**
* v9fs_alloc_inode - helper function to allocate an inode
* @sb: The superblock to allocate the inode from
@@ -287,24 +316,10 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
case S_IFREG:
if (v9fs_proto_dotl(v9ses)) {
inode->i_op = &v9fs_file_inode_operations_dotl;
- if (v9ses->cache == CACHE_LOOSE ||
- v9ses->cache == CACHE_FSCACHE)
- inode->i_fop =
- &v9fs_cached_file_operations_dotl;
- else if (v9ses->cache == CACHE_MMAP)
- inode->i_fop = &v9fs_mmap_file_operations_dotl;
- else
- inode->i_fop = &v9fs_file_operations_dotl;
+ inode->i_fop = &v9fs_file_operations_dotl;
} else {
inode->i_op = &v9fs_file_inode_operations;
- if (v9ses->cache == CACHE_LOOSE ||
- v9ses->cache == CACHE_FSCACHE)
- inode->i_fop =
- &v9fs_cached_file_operations;
- else if (v9ses->cache == CACHE_MMAP)
- inode->i_fop = &v9fs_mmap_file_operations;
- else
- inode->i_fop = &v9fs_file_operations;
+ inode->i_fop = &v9fs_file_operations;
}

break;
@@ -843,7 +858,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
inode = d_inode(dentry);
v9inode = V9FS_I(inode);
mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache) && !v9inode->writeback_fid &&
+ if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
((flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -1030,6 +1045,7 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
+ struct inode *inode = d_inode(dentry);
struct v9fs_session_info *v9ses;
struct p9_fid *fid;
struct p9_wstat *st;
@@ -1039,6 +1055,9 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
generic_fillattr(&init_user_ns, d_inode(dentry), stat);
return 0;
+ } else if (v9ses->cache >= CACHE_WRITEBACK) {
+ if (S_ISREG(inode->i_mode))
+ v9fs_flush_inode_writeback(inode);
}
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
@@ -1116,7 +1135,7 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,

/* Write all dirty data */
if (d_is_reg(dentry))
- filemap_write_and_wait(inode->i_mapping);
+ v9fs_flush_inode_writeback(inode);

retval = p9_client_wstat(fid, &wstat);

@@ -1127,9 +1146,12 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,
return retval;

if ((iattr->ia_valid & ATTR_SIZE) &&
- iattr->ia_size != i_size_read(inode)) {
+ iattr->ia_size != i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
- fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
+ if (v9ses->cache == CACHE_FSCACHE)
+ fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
+ else
+ invalidate_mapping_pages(&inode->i_data, 0, -1);
}

v9fs_invalidate_inode_attr(inode);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index bff37a312e64..4f01808c3bae 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -458,6 +458,7 @@ v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
struct dentry *dentry = path->dentry;
struct v9fs_session_info *v9ses;
struct p9_fid *fid;
+ struct inode *inode = d_inode(dentry);
struct p9_stat_dotl *st;

p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
@@ -465,6 +466,10 @@ v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
generic_fillattr(&init_user_ns, d_inode(dentry), stat);
return 0;
+ } else if (v9ses->cache >= CACHE_WRITEBACK) {
+ if (S_ISREG(inode->i_mode))
+ v9fs_flush_inode_writeback(inode);
+
}
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
@@ -540,12 +545,14 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
struct dentry *dentry, struct iattr *iattr)
{
int retval, use_dentry = 0;
+ struct inode *inode = d_inode(dentry);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL;
struct p9_iattr_dotl p9attr = {
.uid = INVALID_UID,
.gid = INVALID_GID,
};
- struct inode *inode = d_inode(dentry);

p9_debug(P9_DEBUG_VFS, "\n");

@@ -553,6 +560,8 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
if (retval)
return retval;

+ v9ses = v9fs_dentry2v9ses(dentry);
+
p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid);
if (iattr->ia_valid & ATTR_MODE)
p9attr.mode = iattr->ia_mode;
@@ -584,7 +593,7 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,

/* Write all dirty data */
if (S_ISREG(inode->i_mode))
- filemap_write_and_wait(inode->i_mapping);
+ v9fs_flush_inode_writeback(inode);

retval = p9_client_setattr(fid, &p9attr);
if (retval < 0) {
@@ -593,9 +602,14 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
return retval;
}

- if ((iattr->ia_valid & ATTR_SIZE) &&
- iattr->ia_size != i_size_read(inode))
+ if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size !=
+ i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
+ if (v9ses->cache == CACHE_FSCACHE)
+ fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
+ else
+ invalidate_mapping_pages(&inode->i_data, 0, -1);
+ }

v9fs_invalidate_inode_attr(inode);
setattr_copy(&init_user_ns, inode, iattr);
--
2.37.2


2023-02-18 00:44:20

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v4 09/11] fs/9p: fix error reporting in v9fs_dir_release

Checking the p9_fid_put value allows us to pass back errors
involved if we end up clunking the fid as part of dir_release.

This can help with more graceful response to errors in writeback
among other things.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/vfs_dir.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index bd31593437f3..44918c60357f 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -197,7 +197,7 @@ static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx)


/**
- * v9fs_dir_release - close a directory
+ * v9fs_dir_release - called on a close of a file or directory
* @inode: inode of the directory
* @filp: file pointer to a directory
*
@@ -209,6 +209,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
struct p9_fid *fid;
__le32 version;
loff_t i_size;
+ int retval = 0;

fid = filp->private_data;
p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
@@ -220,7 +221,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
spin_lock(&inode->i_lock);
hlist_del(&fid->ilist);
spin_unlock(&inode->i_lock);
- p9_fid_put(fid);
+ retval = p9_fid_put(fid);
}

if ((filp->f_mode & FMODE_WRITE)) {
@@ -231,7 +232,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
} else {
fscache_unuse_cookie(v9fs_inode_cookie(v9inode), NULL, NULL);
}
- return 0;
+ return retval;
}

const struct file_operations v9fs_dir_operations = {
--
2.37.2


2023-02-18 00:45:23

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v4 08/11] fs/9p: Add new mount modes

Add some additional mount modes for cache management including
specifying directio as a mount option and an option for ignore
qid.version for determining whether or not a file is cacheable.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/v9fs.c | 16 ++++++++++++++--
fs/9p/v9fs.h | 5 ++++-
2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index f8e952c013f9..43d3806150a9 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -38,7 +38,7 @@ enum {
/* String options */
Opt_uname, Opt_remotename, Opt_cache, Opt_cachetag,
/* Options that take no arguments */
- Opt_nodevmap, Opt_noxattr,
+ Opt_nodevmap, Opt_noxattr, Opt_directio, Opt_ignoreqv,
/* Access options */
Opt_access, Opt_posixacl,
/* Lock timeout option */
@@ -56,6 +56,8 @@ static const match_table_t tokens = {
{Opt_remotename, "aname=%s"},
{Opt_nodevmap, "nodevmap"},
{Opt_noxattr, "noxattr"},
+ {Opt_directio, "directio"},
+ {Opt_ignoreqv, "ignoreqv"},
{Opt_cache, "cache=%s"},
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
@@ -125,7 +127,7 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root)
if (v9ses->nodev)
seq_puts(m, ",nodevmap");
if (v9ses->cache)
- seq_printf(m, ",%s", v9fs_cache_modes[v9ses->cache]);
+ seq_printf(m, ",cache=%s", v9fs_cache_modes[v9ses->cache]);
#ifdef CONFIG_9P_FSCACHE
if (v9ses->cachetag && v9ses->cache == CACHE_FSCACHE)
seq_printf(m, ",cachetag=%s", v9ses->cachetag);
@@ -147,6 +149,10 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root)
break;
}

+ if (v9ses->flags & V9FS_IGNORE_QV)
+ seq_puts(m, ",ignoreqv");
+ if (v9ses->flags & V9FS_DIRECT_IO)
+ seq_puts(m, ",directio");
if (v9ses->flags & V9FS_POSIX_ACL)
seq_puts(m, ",posixacl");

@@ -276,6 +282,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_noxattr:
v9ses->flags |= V9FS_NO_XATTR;
break;
+ case Opt_directio:
+ v9ses->flags |= V9FS_DIRECT_IO;
+ break;
+ case Opt_ignoreqv:
+ v9ses->flags |= V9FS_IGNORE_QV;
+ break;
case Opt_cachetag:
#ifdef CONFIG_9P_FSCACHE
kfree(v9ses->cachetag);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index d90141d25d0d..48c7614c9333 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -37,7 +37,10 @@ enum p9_session_flags {
V9FS_ACCESS_USER = 0x08,
V9FS_ACCESS_CLIENT = 0x10,
V9FS_POSIX_ACL = 0x20,
- V9FS_NO_XATTR = 0x40
+ V9FS_NO_XATTR = 0x40,
+ V9FS_IGNORE_QV = 0x80,
+ V9FS_DIRECT_IO = 0x100,
+ V9FS_SYNC = 0x200
};

/* possible values of ->cache */
--
2.37.2


2023-02-18 00:48:28

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v4 11/11] fs/9p: Fix revalidate

Unclear if this case ever happens, but if no inode in dentry, then
the dentry is definitely invalid. Seemed to be the opposite in the
existing code.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/vfs_dentry.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 65fa2df5e49b..b0c3f8e8ea00 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -68,7 +68,7 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)

inode = d_inode(dentry);
if (!inode)
- goto out_valid;
+ return 0;

v9inode = V9FS_I(inode);
if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) {
@@ -91,7 +91,6 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
if (retval < 0)
return retval;
}
-out_valid:
return 1;
}

--
2.37.2


2023-02-18 00:49:01

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v4 01/11] net/9p: Adjust maximum MSIZE to account for p9 header

Add maximum p9 header size to MSIZE to make sure we can
have page aligned data.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
net/9p/client.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/9p/client.c b/net/9p/client.c
index 622ec6a586ee..6c2a768a6ab1 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -28,7 +28,11 @@
#define CREATE_TRACE_POINTS
#include <trace/events/9p.h>

-#define DEFAULT_MSIZE (128 * 1024)
+/* DEFAULT MSIZE = 32 pages worth of payload + P9_HDRSZ +
+ * room for write (16 extra) or read (11 extra) operands.
+ */
+
+#define DEFAULT_MSIZE ((128 * 1024) + P9_IOHDRSZ)

/* Client Option Parsing (code inspired by NFS code)
* - a little lazy - parse all client options
--
2.37.2


2023-02-18 00:49:12

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v4 02/11] fs/9p: Expand setup of writeback cache to all levels

If cache is enabled, make sure we are putting the right things
in place (mainly impacts mmap). This also sets us up for more
cache levels.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/v9fs.c | 2 +-
fs/9p/vfs_addr.c | 2 --
fs/9p/vfs_file.c | 7 ++++---
fs/9p/vfs_inode.c | 3 +--
fs/9p/vfs_inode_dotl.c | 7 ++++---
5 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 3a9c4517265f..61a51b90600d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -468,7 +468,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,

#ifdef CONFIG_9P_FSCACHE
/* register the session for caching */
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ if (v9ses->cache == CACHE_FSCACHE) {
rc = v9fs_cache_session_get_cookie(v9ses, dev_name);
if (rc < 0)
goto err_clnt;
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 97599edbc300..6f46d7e4c750 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -279,8 +279,6 @@ static int v9fs_write_begin(struct file *filp, struct address_space *mapping,

p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);

- BUG_ON(!v9inode->writeback_fid);
-
/* Prefetch area to be written into the cache if we're caching this
* file. We need to do this before we get a lock on the page in case
* there's more than one writer competing for the same cache block.
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b740017634ef..3b6458846a0b 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -73,8 +73,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
}

mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
- !v9inode->writeback_fid &&
+ if ((v9ses->cache) && !v9inode->writeback_fid &&
((file->f_flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -92,9 +91,11 @@ int v9fs_file_open(struct inode *inode, struct file *file)
v9inode->writeback_fid = (void *) writeback_fid;
}
mutex_unlock(&v9inode->v_mutex);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
+#endif
v9fs_open_fid_add(inode, &fid);
return 0;
out_error:
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 27a04a226d97..33e521c60e2c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -843,8 +843,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
inode = d_inode(dentry);
v9inode = V9FS_I(inode);
mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
- !v9inode->writeback_fid &&
+ if ((v9ses->cache) && !v9inode->writeback_fid &&
((flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index f806b3f11649..bff37a312e64 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -316,8 +316,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,

v9inode = V9FS_I(inode);
mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
- !v9inode->writeback_fid &&
+ if ((v9ses->cache) && !v9inode->writeback_fid &&
((flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -340,9 +339,11 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
if (err)
goto out;
file->private_data = ofid;
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
+#endif
v9fs_open_fid_add(inode, &ofid);
file->f_mode |= FMODE_CREATED;
out:
--
2.37.2


2023-02-18 00:49:14

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v4 05/11] fs/9p: allow disable of xattr support on mount

xattr creates a lot of additional messages for 9p in
the current implementation. This allows users to
conditionalize xattr support on 9p mount if they
are on a connection with bad latency. Using this
flag is also useful when debugging other aspects
of 9p as it reduces the noise in the trace files.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
Documentation/filesystems/9p.rst | 2 ++
fs/9p/v9fs.c | 9 ++++++++-
fs/9p/v9fs.h | 3 ++-
fs/9p/vfs_super.c | 3 ++-
4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst
index 7b5964bc8865..0e800b8f73cc 100644
--- a/Documentation/filesystems/9p.rst
+++ b/Documentation/filesystems/9p.rst
@@ -137,6 +137,8 @@ Options
This can be used to share devices/named pipes/sockets between
hosts. This functionality will be expanded in later versions.

+ noxattr do not offer xattr functions on this mount.
+
access there are four access modes.
user
if a user tries to access a file on v9fs
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index a46bf9121f11..f8e952c013f9 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -38,7 +38,7 @@ enum {
/* String options */
Opt_uname, Opt_remotename, Opt_cache, Opt_cachetag,
/* Options that take no arguments */
- Opt_nodevmap,
+ Opt_nodevmap, Opt_noxattr,
/* Access options */
Opt_access, Opt_posixacl,
/* Lock timeout option */
@@ -55,6 +55,7 @@ static const match_table_t tokens = {
{Opt_uname, "uname=%s"},
{Opt_remotename, "aname=%s"},
{Opt_nodevmap, "nodevmap"},
+ {Opt_noxattr, "noxattr"},
{Opt_cache, "cache=%s"},
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
@@ -149,6 +150,9 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root)
if (v9ses->flags & V9FS_POSIX_ACL)
seq_puts(m, ",posixacl");

+ if (v9ses->flags & V9FS_NO_XATTR)
+ seq_puts(m, ",noxattr");
+
return p9_show_client_options(m, v9ses->clnt);
}

@@ -269,6 +273,9 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_nodevmap:
v9ses->nodev = 1;
break;
+ case Opt_noxattr:
+ v9ses->flags |= V9FS_NO_XATTR;
+ break;
case Opt_cachetag:
#ifdef CONFIG_9P_FSCACHE
kfree(v9ses->cachetag);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 517b2201ad24..d90141d25d0d 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -36,7 +36,8 @@ enum p9_session_flags {
V9FS_ACCESS_SINGLE = 0x04,
V9FS_ACCESS_USER = 0x08,
V9FS_ACCESS_CLIENT = 0x10,
- V9FS_POSIX_ACL = 0x20
+ V9FS_POSIX_ACL = 0x20,
+ V9FS_NO_XATTR = 0x40
};

/* possible values of ->cache */
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 65d96fa94ba2..5fc6a945bfff 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -64,7 +64,8 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
sb->s_magic = V9FS_MAGIC;
if (v9fs_proto_dotl(v9ses)) {
sb->s_op = &v9fs_super_ops_dotl;
- sb->s_xattr = v9fs_xattr_handlers;
+ if (!(v9ses->flags & V9FS_NO_XATTR))
+ sb->s_xattr = v9fs_xattr_handlers;
} else {
sb->s_op = &v9fs_super_ops;
sb->s_time_max = U32_MAX;
--
2.37.2


2023-02-18 07:49:15

by Dominique Martinet

[permalink] [raw]
Subject: Re: [PATCH v4 00/11] Performance fixes for 9p filesystem

Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 12:33:12AM +0000:
> I'm gonna definitely submit the first couple patches as they are
> fairly harmless - but would like to submit the whole series to the
> upcoming merge window.

Could you take the three patches I have in my 9p-next branch:
https://github.com/martinetd/linux/commits/9p-next

If you're going to submit some?
The async stuff still isn't good, but there three patches have had
reviews and should be good to go.

(I guess we can just send Linus two pull requests for 9p, but it doesn't
really make sense given the low number of patches)

> Would appreciate reviews.

Just one first review on the form: let's start a new thread for every
new revision of the patchset.

I also used to relink from the pervious cover letter and thought that
made more sense at the time, but I was told to split threads a while ago
and now I'm trying some new tools based on lkml.kernel.org's public
inbox thread view I can agree it's much simpler to grab a batch of patch
if older versions aren't mixed in the thread.
(For the curious, I'm just grabbing the thread to review on an e-ink
reader for my eyes, but there's also b4 that I've been meaning to try at
some point -- https://b4.docs.kernel.org/en/latest/ -- that will likely
work the same)

Anyway, off to look at patches a bit.

--
Dominiquee

2023-02-18 07:51:08

by Dominique Martinet

[permalink] [raw]
Subject: Re: [PATCH v4 01/11] net/9p: Adjust maximum MSIZE to account for p9 header

Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 12:33:13AM +0000:
> Add maximum p9 header size to MSIZE to make sure we can
> have page aligned data.
>
> Signed-off-by: Eric Van Hensbergen <[email protected]>

I'd be tempted to round that up to the next PAGE_SIZE, but it likely
won't make any difference in practice if we want to round the actual
payload. Let's go for this.

Reviewed-by: Dominique Martinet <[email protected]>

> ---
> net/9p/client.c | 6 +++++-
> 1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/net/9p/client.c b/net/9p/client.c
> index 622ec6a586ee..6c2a768a6ab1 100644
> --- a/net/9p/client.c
> +++ b/net/9p/client.c
> @@ -28,7 +28,11 @@
> #define CREATE_TRACE_POINTS
> #include <trace/events/9p.h>
>
> -#define DEFAULT_MSIZE (128 * 1024)
> +/* DEFAULT MSIZE = 32 pages worth of payload + P9_HDRSZ +
> + * room for write (16 extra) or read (11 extra) operands.
> + */
> +
> +#define DEFAULT_MSIZE ((128 * 1024) + P9_IOHDRSZ)
>
> /* Client Option Parsing (code inspired by NFS code)
> * - a little lazy - parse all client options

2023-02-18 07:52:27

by Dominique Martinet

[permalink] [raw]
Subject: Re: [PATCH v4 05/11] fs/9p: allow disable of xattr support on mount

Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 12:33:17AM +0000:
> xattr creates a lot of additional messages for 9p in
> the current implementation. This allows users to
> conditionalize xattr support on 9p mount if they
> are on a connection with bad latency. Using this
> flag is also useful when debugging other aspects
> of 9p as it reduces the noise in the trace files.
>
> Signed-off-by: Eric Van Hensbergen <[email protected]>

Reviewed-by: Dominique Martinet <[email protected]>

--
Dominique

2023-02-18 08:47:21

by Dominique Martinet

[permalink] [raw]
Subject: Re: [PATCH v4 08/11] fs/9p: Add new mount modes

Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 12:33:20AM +0000:
> Add some additional mount modes for cache management including
> specifying directio as a mount option and an option for ignore
> qid.version for determining whether or not a file is cacheable.

direct io is standard enough but ignore QV probably warrants a comment
in the code and not just a word in the commit message.

I see you've added these in Documentation/filesystems/9p.rst in
the "writeback mode fixes" -- I guess we can live with commits
introducing options not being 100% coherent within the series (the
implementation also comes in that fixes commit), but perhaps a '/*
ignore qid.version */' comment in the enum?

> diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
> index d90141d25d0d..48c7614c9333 100644
> --- a/fs/9p/v9fs.h
> +++ b/fs/9p/v9fs.h
> @@ -37,7 +37,10 @@ enum p9_session_flags {
> V9FS_ACCESS_USER = 0x08,
> V9FS_ACCESS_CLIENT = 0x10,
> V9FS_POSIX_ACL = 0x20,
> - V9FS_NO_XATTR = 0x40
> + V9FS_NO_XATTR = 0x40,
> + V9FS_IGNORE_QV = 0x80,
> + V9FS_DIRECT_IO = 0x100,
> + V9FS_SYNC = 0x200

... And while we're here, indentation seems off on sync

--
Dominique

2023-02-18 08:50:18

by Dominique Martinet

[permalink] [raw]
Subject: Re: [PATCH v4 09/11] fs/9p: fix error reporting in v9fs_dir_release

Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 12:33:21AM +0000:
> Checking the p9_fid_put value allows us to pass back errors
> involved if we end up clunking the fid as part of dir_release.
>
> This can help with more graceful response to errors in writeback
> among other things.

That is good!

Note if there are other refs we won't see any error :/ But I think we
should check p9_fid_put return value way more often, even if all we do
with it is print a warning at some debug level for context.

Reviewed-by: Dominique Martinet <[email protected]>

--
Dominique

2023-02-18 08:55:44

by Dominique Martinet

[permalink] [raw]
Subject: Re: [PATCH v4 11/11] fs/9p: Fix revalidate

Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 12:33:23AM +0000:
> Unclear if this case ever happens, but if no inode in dentry, then
> the dentry is definitely invalid. Seemed to be the opposite in the
> existing code.

Looking at other implementations of d_revalidate (ecryptfs, cifs, vfat)
it seems to be assumed that the inode is always valid.

I'd just remove the if, or if we keep it add a WARN or something for a
while so we can remove it in a few releases?

(That said, it's better to return 0 than 1 here, so don't take this for
a no -- progress is progress)
--
Dominique

2023-02-18 08:57:57

by Dominique Martinet

[permalink] [raw]
Subject: Re: [PATCH v4 02/11] fs/9p: Expand setup of writeback cache to all levels

Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 12:33:14AM +0000:
> If cache is enabled, make sure we are putting the right things
> in place (mainly impacts mmap). This also sets us up for more
> cache levels.
>
> Signed-off-by: Eric Van Hensbergen <[email protected]>

Reviewed-by: Dominique Martinet <[email protected]>

--
Dominique

2023-02-18 09:25:05

by Dominique Martinet

[permalink] [raw]
Subject: Re: [PATCH v4 03/11] fs/9p: Consolidate file operations and add readahead and writeback

Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 12:33:15AM +0000:
> We had 3 different sets of file operations across 2 different protocol
> variants differentiated by cache which really only changed 3
> functions. But the real problem is that certain file modes, mount
> options, and other factors weren't being considered when we
> decided whether or not to use caches.
>
> This consolidates all the operations and switches
> to conditionals within a common set to decide whether or not
> to do different aspects of caching.
>
> Signed-off-by: Eric Van Hensbergen <[email protected]>

Reviewed-by: Dominique Martinet <[email protected]>

> ---
> struct inode *v9fs_alloc_inode(struct super_block *sb);
> diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
> index 59b0e8948f78..bd31593437f3 100644
> --- a/fs/9p/vfs_dir.c
> +++ b/fs/9p/vfs_dir.c
> @@ -214,6 +214,9 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
> p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
> inode, filp, fid ? fid->fid : -1);
> if (fid) {
> + if ((fid->qid.type == P9_QTFILE) && (filp->f_mode & FMODE_WRITE))
> + v9fs_flush_inode_writeback(inode);
> +

Ok so this bugged me to no end; that seems to be because we use the same
v9fs_dir_release for v9fs_file_operations's .release and not just
v9fs_dir_operations... So it's to be expected we'll get files here.

At this point I'd suggest to use two functions, but that's probably
overdoing it.
Let's check S_ISREG(inode->i_mode) instead of fid->qid though; it
shouldn't make any difference but that's what you use in other parts of
the code and it will be easier to understand for people familiar with
the vfs.


> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
> index 33e521c60e2c..8ffa6631b1fd 100644
> --- a/fs/9p/vfs_inode.c
> +++ b/fs/9p/vfs_inode.c
> @@ -219,6 +219,35 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
> wstat->extension = NULL;
> }
>
> +/**
> + * v9fs_flush_inode_writeback - writeback any data associated with inode
> + * @inode: inode to writeback
> + *
> + * This is used to make sure anything that needs to be written
> + * to server gets flushed before we do certain operations (setattr, getattr, close)
> + *
> + */
> +
> +int v9fs_flush_inode_writeback(struct inode *inode)
> +{
> + struct writeback_control wbc = {
> + .nr_to_write = LONG_MAX,
> + .sync_mode = WB_SYNC_ALL,
> + .range_start = 0,
> + .range_end = -1,
> + };
> +
> + int retval = filemap_fdatawrite_wbc(inode->i_mapping, &wbc);

Hmm, that function only starts the writeback, but doesn't wait for it.

Wasn't the point to replace 'filemap_write_and_wait' with
v9fs_flush_inode_writeback?
I don't think it's a good idea to remove the wait before setattrs and
the like; if you don't want to wait on close()'s release (but we
probably should too) perhaps split this in two?

> diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
> index bff37a312e64..4f01808c3bae 100644
> --- a/fs/9p/vfs_inode_dotl.c
> +++ b/fs/9p/vfs_inode_dotl.c
> @@ -593,9 +602,14 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
> return retval;
> }
>
> - if ((iattr->ia_valid & ATTR_SIZE) &&
> - iattr->ia_size != i_size_read(inode))
> + if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size !=
> + i_size_read(inode)) {
> truncate_setsize(inode, iattr->ia_size);
> + if (v9ses->cache == CACHE_FSCACHE)
> + fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
> + else
> + invalidate_mapping_pages(&inode->i_data, 0, -1);

Hm, I don't think these are exclusive; resize_cookie doesn't seem to do
much about the page cache.

However, truncate_setsize calls trucate_pagecache which I believe does
the right thing; and I don't see why we would need to invalidate
[0;size[ here? We didn't before.

. . . . . . .
Ah, you've replaced it preciesly with that in "fs/9p: writeback mode
fixes"; this is annoying to review :/

So with that problem gone, I think I'm ok with this patch with the
exception of the flush inode writeback that doesn't wait (and the
nitpick on S_ISREG)

--
Dominique

2023-02-18 16:17:37

by Eric Van Hensbergen

[permalink] [raw]
Subject: Re: [PATCH v4 03/11] fs/9p: Consolidate file operations and add readahead and writeback

On Sat, Feb 18, 2023 at 3:25 AM <[email protected]> wrote:
>
> Ok so this bugged me to no end; that seems to be because we use the same
> v9fs_dir_release for v9fs_file_operations's .release and not just
> v9fs_dir_operations... So it's to be expected we'll get files here.
>
> At this point I'd suggest to use two functions, but that's probably
> overdoing it.
> Let's check S_ISREG(inode->i_mode) instead of fid->qid though; it
> shouldn't make any difference but that's what you use in other parts of
> the code and it will be easier to understand for people familiar with
> the vfs.
>

I can rename the function as part of the patch since it would be a bit
more accurate,
but then it is still in vfs_dir. I think there did used to be two
functions but there
was so much overlap we collapsed into one.

>
> > diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
> > index 33e521c60e2c..8ffa6631b1fd 100644
> > --- a/fs/9p/vfs_inode.c
> > +++ b/fs/9p/vfs_inode.c
> > @@ -219,6 +219,35 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
> > wstat->extension = NULL;
> > }
> >
> > +/**
> > + * v9fs_flush_inode_writeback - writeback any data associated with inode
> > + * @inode: inode to writeback
> > + *
> > + * This is used to make sure anything that needs to be written
> > + * to server gets flushed before we do certain operations (setattr, getattr, close)
> > + *
> > + */
> > +
> > +int v9fs_flush_inode_writeback(struct inode *inode)
> > +{
> > + struct writeback_control wbc = {
> > + .nr_to_write = LONG_MAX,
> > + .sync_mode = WB_SYNC_ALL,
> > + .range_start = 0,
> > + .range_end = -1,
> > + };
> > +
> > + int retval = filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
>
> Hmm, that function only starts the writeback, but doesn't wait for it.
>
> Wasn't the point to replace 'filemap_write_and_wait' with
> v9fs_flush_inode_writeback?
> I don't think it's a good idea to remove the wait before setattrs and
> the like; if you don't want to wait on close()'s release (but we
> probably should too) perhaps split this in two?
>

I had thought that this is what it does, of course I could just be getting
lucky. The filemap_fdatawrite_wbc doesn't say anything about whether
WBC_SYNC_ALL forces a wait, but the next function (__filemap_fdatawrite_range)
does: (it it calls filemap_fdatawrite_wbc)

* If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
* opposed to a regular memory cleansing writeback. The difference between
* these two operations is that if a dirty page/buffer is encountered, it must
* be waited upon, and not just skipped over.

So I think we are good? Happy to use a different function if it makes sense,
but this was the one that seemed to trigger the correct behavior.

-eric

2023-02-18 16:20:05

by Eric Van Hensbergen

[permalink] [raw]
Subject: Re: [PATCH v4 03/11] fs/9p: Consolidate file operations and add readahead and writeback

...of course, relooking at the functions in mm/filemap.c it seems like
I can probably just use filemap_fdatawrite
instead of having my own flush function since it basically sets up wbc
the same way....

On Sat, Feb 18, 2023 at 10:17 AM Eric Van Hensbergen <[email protected]> wrote:
>
> On Sat, Feb 18, 2023 at 3:25 AM <[email protected]> wrote:
> >
> > Ok so this bugged me to no end; that seems to be because we use the same
> > v9fs_dir_release for v9fs_file_operations's .release and not just
> > v9fs_dir_operations... So it's to be expected we'll get files here.
> >
> > At this point I'd suggest to use two functions, but that's probably
> > overdoing it.
> > Let's check S_ISREG(inode->i_mode) instead of fid->qid though; it
> > shouldn't make any difference but that's what you use in other parts of
> > the code and it will be easier to understand for people familiar with
> > the vfs.
> >
>
> I can rename the function as part of the patch since it would be a bit
> more accurate,
> but then it is still in vfs_dir. I think there did used to be two
> functions but there
> was so much overlap we collapsed into one.
>
> >
> > > diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
> > > index 33e521c60e2c..8ffa6631b1fd 100644
> > > --- a/fs/9p/vfs_inode.c
> > > +++ b/fs/9p/vfs_inode.c
> > > @@ -219,6 +219,35 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
> > > wstat->extension = NULL;
> > > }
> > >
> > > +/**
> > > + * v9fs_flush_inode_writeback - writeback any data associated with inode
> > > + * @inode: inode to writeback
> > > + *
> > > + * This is used to make sure anything that needs to be written
> > > + * to server gets flushed before we do certain operations (setattr, getattr, close)
> > > + *
> > > + */
> > > +
> > > +int v9fs_flush_inode_writeback(struct inode *inode)
> > > +{
> > > + struct writeback_control wbc = {
> > > + .nr_to_write = LONG_MAX,
> > > + .sync_mode = WB_SYNC_ALL,
> > > + .range_start = 0,
> > > + .range_end = -1,
> > > + };
> > > +
> > > + int retval = filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
> >
> > Hmm, that function only starts the writeback, but doesn't wait for it.
> >
> > Wasn't the point to replace 'filemap_write_and_wait' with
> > v9fs_flush_inode_writeback?
> > I don't think it's a good idea to remove the wait before setattrs and
> > the like; if you don't want to wait on close()'s release (but we
> > probably should too) perhaps split this in two?
> >
>
> I had thought that this is what it does, of course I could just be getting
> lucky. The filemap_fdatawrite_wbc doesn't say anything about whether
> WBC_SYNC_ALL forces a wait, but the next function (__filemap_fdatawrite_range)
> does: (it it calls filemap_fdatawrite_wbc)
>
> * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
> * opposed to a regular memory cleansing writeback. The difference between
> * these two operations is that if a dirty page/buffer is encountered, it must
> * be waited upon, and not just skipped over.
>
> So I think we are good? Happy to use a different function if it makes sense,
> but this was the one that seemed to trigger the correct behavior.
>
> -eric

2023-02-18 20:36:13

by Dominique Martinet

[permalink] [raw]
Subject: Re: [PATCH v4 03/11] fs/9p: Consolidate file operations and add readahead and writeback

Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 10:19:47AM -0600:
> ...of course, relooking at the functions in mm/filemap.c it seems like
> I can probably just use filemap_fdatawrite
> instead of having my own flush function since it basically sets up wbc
> the same way....

hmm, I was basing myself off file_write_and_wait_range that also calls
file_check_and_advance_wb_err before returning, but the wait actually
comes from fdatawrite in there...

So, right:
- WB_SYNC is probably ok, but if we go that way let's use
filemap_fdatawrite -- less things to think about :)
- if we want any sort of error reporting
file_check_and_advance_wb_err() is probably useful, at which point
keeping the old function is just as good. That doesn't do any wait, just
checks f_wb_err ... in a really complicated way... I don't want to have
to think about.

--
Dominique

2023-02-19 21:36:37

by Christian Schoenebeck

[permalink] [raw]
Subject: Re: [PATCH v4 00/11] Performance fixes for 9p filesystem

On Saturday, February 18, 2023 1:33:12 AM CET Eric Van Hensbergen wrote:
> This is the fourth version of a patch series which adds a number
> of features to improve read/write performance in the 9p filesystem.
> Mostly it focuses on fixing caching to help utilize the recently
> increased MSIZE limits and also fixes some problematic behavior
> within the writeback code.
>
> All together, these show roughly 10x speed increases on simple
> file transfers over no caching for readahead mode. Future patch
> sets will improve cache consistency and directory caching, which
> should benefit loose mode.
>
> This iteration of the patch incorporates an important fix for
> writeback which uses a stronger mechanism to flush writeback on
> close of files and addresses observed bugs in previous versions of
> the patch for writeback, mmap, and loose cache modes.
>
> These patches are also available on github:
> https://github.com/v9fs/linux/tree/ericvh/for-next
> and on kernel.org:
> https://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs.git
>
> Tested against qemu, cpu, and diod with fsx, dbench, and postmark
> in every caching mode.
>
> I'm gonna definitely submit the first couple patches as they are
> fairly harmless - but would like to submit the whole series to the
> upcoming merge window. Would appreciate reviews.

I tested this version thoroughly today (msize=512k in all tests). Good news
first: the previous problems of v3 are gone. Great! But I'm still trying to
make sense of the performance numbers I get with these patches.

So when doing some compilations with 9p, performance of mmap, writeback and
readahead are basically all the same, and only loose being 6x faster than the
other cache modes. Expected performance results? No errors at least. Good!

Then I tested simple linear file I/O. First linear writing a 12GB file
(time dd if=/dev/zero of=test.data bs=1G count=12):

writeback 3m10s [this series - v4]
readahead 0m11s [this series - v4]
mmap 0m11s [this series - v4]
mmap 0m11s [master]
loose 2m50s [this series - v4]
loose 2m19s [master]

That's a bit surprising. Why is loose and writeback slower?

Next linear reading a 12GB file
(time cat test.data > /dev/null):

writeback 0m24s [this series - v4]
readahead 0m25s [this series - v4]
mmap 0m25s [this series - v4]
mmap 0m9s [master]
loose 0m24s [this series - v4]
loose 0m24s [master]

mmap degredation sticks out here, and no improvement with the other modes?

I always performed a guest reboot between each run BTW.




2023-02-20 01:14:03

by Eric Van Hensbergen

[permalink] [raw]
Subject: Re: [PATCH v4 00/11] Performance fixes for 9p filesystem

Glad to hear bugs disappeared. writeback having a different
performance than mmap is confusing as they should be equivalent.

The huge blocksize on your dd is an interesting choice -- it will
completely get rid of any impact of readahead. To see impact of
readahead, choose a blocksize of
less than msize (like 4k) to actually see the perf of readahead. The
mmap degradation is likely due to stricter coherence (open-to-close
consistency means we wait on writeout), but I'd probably need to go in
and trace to verify (which probably isn't a bad idea overall).
probably a similar situation for loose and writeback. Essentially,
before close consistency it didn't have to wait for the final write to
complete before it returns so you see a faster time (even though data
wasn't actually written all the way through so you aren't measuring
the last little bit of the write (which can be quite large of a big
msize).

I'm going to take a pass through tomorrow making some fixups that
Dominiquee suggested and trying to reproduce/fix the fscache problems.

-eric

On Sun, Feb 19, 2023 at 3:36 PM Christian Schoenebeck
<[email protected]> wrote:
>
> On Saturday, February 18, 2023 1:33:12 AM CET Eric Van Hensbergen wrote:
> > This is the fourth version of a patch series which adds a number
> > of features to improve read/write performance in the 9p filesystem.
> > Mostly it focuses on fixing caching to help utilize the recently
> > increased MSIZE limits and also fixes some problematic behavior
> > within the writeback code.
> >
> > All together, these show roughly 10x speed increases on simple
> > file transfers over no caching for readahead mode. Future patch
> > sets will improve cache consistency and directory caching, which
> > should benefit loose mode.
> >
> > This iteration of the patch incorporates an important fix for
> > writeback which uses a stronger mechanism to flush writeback on
> > close of files and addresses observed bugs in previous versions of
> > the patch for writeback, mmap, and loose cache modes.
> >
> > These patches are also available on github:
> > https://github.com/v9fs/linux/tree/ericvh/for-next
> > and on kernel.org:
> > https://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs.git
> >
> > Tested against qemu, cpu, and diod with fsx, dbench, and postmark
> > in every caching mode.
> >
> > I'm gonna definitely submit the first couple patches as they are
> > fairly harmless - but would like to submit the whole series to the
> > upcoming merge window. Would appreciate reviews.
>
> I tested this version thoroughly today (msize=512k in all tests). Good news
> first: the previous problems of v3 are gone. Great! But I'm still trying to
> make sense of the performance numbers I get with these patches.
>
> So when doing some compilations with 9p, performance of mmap, writeback and
> readahead are basically all the same, and only loose being 6x faster than the
> other cache modes. Expected performance results? No errors at least. Good!
>
> Then I tested simple linear file I/O. First linear writing a 12GB file
> (time dd if=/dev/zero of=test.data bs=1G count=12):
>
> writeback 3m10s [this series - v4]
> readahead 0m11s [this series - v4]
> mmap 0m11s [this series - v4]
> mmap 0m11s [master]
> loose 2m50s [this series - v4]
> loose 2m19s [master]
>
> That's a bit surprising. Why is loose and writeback slower?
>
> Next linear reading a 12GB file
> (time cat test.data > /dev/null):
>
> writeback 0m24s [this series - v4]
> readahead 0m25s [this series - v4]
> mmap 0m25s [this series - v4]
> mmap 0m9s [master]
> loose 0m24s [this series - v4]
> loose 0m24s [master]
>
> mmap degredation sticks out here, and no improvement with the other modes?
>
> I always performed a guest reboot between each run BTW.
>
>
>

2023-02-27 02:51:41

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v5 3/11] fs/9p: Consolidate file operations and add readahead and writeback

We had 3 different sets of file operations across 2 different protocol
variants differentiated by cache which really only changed 3
functions. But the real problem is that certain file modes, mount
options, and other factors weren't being considered when we
decided whether or not to use caches.

This consolidates all the operations and switches
to conditionals within a common set to decide whether or not
to do different aspects of caching.

Signed-off-by: Eric Van Hensbergen <[email protected]>
Reviewed-by: Dominique Martinet <[email protected]>
---
fs/9p/v9fs.c | 30 ++++------
fs/9p/v9fs.h | 2 +
fs/9p/v9fs_vfs.h | 4 --
fs/9p/vfs_dir.c | 10 +++-
fs/9p/vfs_file.c | 131 +++++++----------------------------------
fs/9p/vfs_inode.c | 41 +++++++------
fs/9p/vfs_inode_dotl.c | 28 +++++++--
7 files changed, 84 insertions(+), 162 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 61a51b90600d..a46bf9121f11 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -39,8 +39,6 @@ enum {
Opt_uname, Opt_remotename, Opt_cache, Opt_cachetag,
/* Options that take no arguments */
Opt_nodevmap,
- /* Cache options */
- Opt_cache_loose, Opt_fscache, Opt_mmap,
/* Access options */
Opt_access, Opt_posixacl,
/* Lock timeout option */
@@ -58,9 +56,6 @@ static const match_table_t tokens = {
{Opt_remotename, "aname=%s"},
{Opt_nodevmap, "nodevmap"},
{Opt_cache, "cache=%s"},
- {Opt_cache_loose, "loose"},
- {Opt_fscache, "fscache"},
- {Opt_mmap, "mmap"},
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
{Opt_posixacl, "posixacl"},
@@ -69,10 +64,12 @@ static const match_table_t tokens = {
};

static const char *const v9fs_cache_modes[nr__p9_cache_modes] = {
- [CACHE_NONE] = "none",
- [CACHE_MMAP] = "mmap",
- [CACHE_LOOSE] = "loose",
- [CACHE_FSCACHE] = "fscache",
+ [CACHE_NONE] = "none",
+ [CACHE_READAHEAD] = "readahead",
+ [CACHE_WRITEBACK] = "writeback",
+ [CACHE_MMAP] = "mmap",
+ [CACHE_LOOSE] = "loose",
+ [CACHE_FSCACHE] = "fscache",
};

/* Interpret mount options for cache mode */
@@ -89,6 +86,12 @@ static int get_cache_mode(char *s)
} else if (!strcmp(s, "mmap")) {
version = CACHE_MMAP;
p9_debug(P9_DEBUG_9P, "Cache mode: mmap\n");
+ } else if (!strcmp(s, "writeback")) {
+ version = CACHE_WRITEBACK;
+ p9_debug(P9_DEBUG_9P, "Cache mode: writeback\n");
+ } else if (!strcmp(s, "readahead")) {
+ version = CACHE_READAHEAD;
+ p9_debug(P9_DEBUG_9P, "Cache mode: readahead\n");
} else if (!strcmp(s, "none")) {
version = CACHE_NONE;
p9_debug(P9_DEBUG_9P, "Cache mode: none\n");
@@ -266,15 +269,6 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_nodevmap:
v9ses->nodev = 1;
break;
- case Opt_cache_loose:
- v9ses->cache = CACHE_LOOSE;
- break;
- case Opt_fscache:
- v9ses->cache = CACHE_FSCACHE;
- break;
- case Opt_mmap:
- v9ses->cache = CACHE_MMAP;
- break;
case Opt_cachetag:
#ifdef CONFIG_9P_FSCACHE
kfree(v9ses->cachetag);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 6acabc2e7dc9..972cfc681fe0 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -50,6 +50,8 @@ enum p9_session_flags {

enum p9_cache_modes {
CACHE_NONE,
+ CACHE_READAHEAD,
+ CACHE_WRITEBACK,
CACHE_MMAP,
CACHE_LOOSE,
CACHE_FSCACHE,
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index bc417da7e9c1..cce87c9bdd8b 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -36,10 +36,6 @@ extern const struct file_operations v9fs_dir_operations;
extern const struct file_operations v9fs_dir_operations_dotl;
extern const struct dentry_operations v9fs_dentry_operations;
extern const struct dentry_operations v9fs_cached_dentry_operations;
-extern const struct file_operations v9fs_cached_file_operations;
-extern const struct file_operations v9fs_cached_file_operations_dotl;
-extern const struct file_operations v9fs_mmap_file_operations;
-extern const struct file_operations v9fs_mmap_file_operations_dotl;
extern struct kmem_cache *v9fs_inode_cache;

struct inode *v9fs_alloc_inode(struct super_block *sb);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 3d74b04fe0de..289b58cb896e 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -197,9 +197,9 @@ static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx)


/**
- * v9fs_dir_release - called on a close of a file or directory
- * @inode: inode of the directory
- * @filp: file pointer to a directory
+ * v9fs_dir_release - close a directory or a file
+ * @inode: inode of the directory or file
+ * @filp: file pointer to a directory or file
*
*/

@@ -214,7 +214,11 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
fid = filp->private_data;
p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
inode, filp, fid ? fid->fid : -1);
+
if (fid) {
+ if ((S_ISREG(inode->i_mode)) && (filp->f_mode & FMODE_WRITE))
+ retval = filemap_fdatawrite(inode->i_mapping);
+
spin_lock(&inode->i_lock);
hlist_del(&fid->ilist);
spin_unlock(&inode->i_lock);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3b6458846a0b..20e4bd299fc2 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -28,7 +28,6 @@
#include "fid.h"
#include "cache.h"

-static const struct vm_operations_struct v9fs_file_vm_ops;
static const struct vm_operations_struct v9fs_mmap_file_vm_ops;

/**
@@ -73,7 +72,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
}

mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache) && !v9inode->writeback_fid &&
+ if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
((file->f_flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -367,10 +366,15 @@ v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct p9_fid *fid = iocb->ki_filp->private_data;
int ret, err = 0;
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);

p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n",
iov_iter_count(to), iocb->ki_pos);

+ if (v9ses->cache > CACHE_MMAP)
+ return generic_file_read_iter(iocb, to);
+
if (iocb->ki_filp->f_flags & O_NONBLOCK)
ret = p9_client_read_once(fid, iocb->ki_pos, to, &err);
else
@@ -395,6 +399,11 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t retval;
loff_t origin;
int err = 0;
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+
+ if (v9ses->cache >= CACHE_WRITEBACK)
+ return generic_file_write_iter(iocb, from);

retval = generic_write_checks(iocb, from);
if (retval <= 0)
@@ -477,25 +486,16 @@ static int
v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
{
int retval;
-
-
- retval = generic_file_mmap(filp, vma);
- if (!retval)
- vma->vm_ops = &v9fs_file_vm_ops;
-
- return retval;
-}
-
-static int
-v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
-{
- int retval;
- struct inode *inode;
- struct v9fs_inode *v9inode;
+ struct inode *inode = file_inode(filp);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
struct p9_fid *fid;

- inode = file_inode(filp);
- v9inode = V9FS_I(inode);
+ if (v9ses->cache < CACHE_MMAP) {
+ invalidate_inode_pages2(filp->f_mapping);
+ return generic_file_readonly_mmap(filp, vma);
+ }
+
mutex_lock(&v9inode->v_mutex);
if (!v9inode->writeback_fid &&
(vma->vm_flags & VM_SHARED) &&
@@ -563,35 +563,6 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}

-/**
- * v9fs_mmap_file_read_iter - read from a file
- * @iocb: The operation parameters
- * @to: The buffer to read into
- *
- */
-static ssize_t
-v9fs_mmap_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
-{
- /* TODO: Check if there are dirty pages */
- return v9fs_file_read_iter(iocb, to);
-}
-
-/**
- * v9fs_mmap_file_write_iter - write to a file
- * @iocb: The operation parameters
- * @from: The data to write
- *
- */
-static ssize_t
-v9fs_mmap_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
- /*
- * TODO: invalidate mmaps on filp's inode between
- * offset and offset+count
- */
- return v9fs_file_write_iter(iocb, from);
-}
-
static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
{
struct inode *inode;
@@ -614,13 +585,6 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
}

-
-static const struct vm_operations_struct v9fs_file_vm_ops = {
- .fault = filemap_fault,
- .map_pages = filemap_map_pages,
- .page_mkwrite = v9fs_vm_page_mkwrite,
-};
-
static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
.close = v9fs_mmap_vm_close,
.fault = filemap_fault,
@@ -628,34 +592,6 @@ static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
.page_mkwrite = v9fs_vm_page_mkwrite,
};

-
-const struct file_operations v9fs_cached_file_operations = {
- .llseek = generic_file_llseek,
- .read_iter = generic_file_read_iter,
- .write_iter = generic_file_write_iter,
- .open = v9fs_file_open,
- .release = v9fs_dir_release,
- .lock = v9fs_file_lock,
- .mmap = v9fs_file_mmap,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fsync = v9fs_file_fsync,
-};
-
-const struct file_operations v9fs_cached_file_operations_dotl = {
- .llseek = generic_file_llseek,
- .read_iter = generic_file_read_iter,
- .write_iter = generic_file_write_iter,
- .open = v9fs_file_open,
- .release = v9fs_dir_release,
- .lock = v9fs_file_lock_dotl,
- .flock = v9fs_file_flock_dotl,
- .mmap = v9fs_file_mmap,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fsync = v9fs_file_fsync_dotl,
-};
-
const struct file_operations v9fs_file_operations = {
.llseek = generic_file_llseek,
.read_iter = v9fs_file_read_iter,
@@ -677,34 +613,7 @@ const struct file_operations v9fs_file_operations_dotl = {
.release = v9fs_dir_release,
.lock = v9fs_file_lock_dotl,
.flock = v9fs_file_flock_dotl,
- .mmap = generic_file_readonly_mmap,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fsync = v9fs_file_fsync_dotl,
-};
-
-const struct file_operations v9fs_mmap_file_operations = {
- .llseek = generic_file_llseek,
- .read_iter = v9fs_mmap_file_read_iter,
- .write_iter = v9fs_mmap_file_write_iter,
- .open = v9fs_file_open,
- .release = v9fs_dir_release,
- .lock = v9fs_file_lock,
- .mmap = v9fs_mmap_file_mmap,
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fsync = v9fs_file_fsync,
-};
-
-const struct file_operations v9fs_mmap_file_operations_dotl = {
- .llseek = generic_file_llseek,
- .read_iter = v9fs_mmap_file_read_iter,
- .write_iter = v9fs_mmap_file_write_iter,
- .open = v9fs_file_open,
- .release = v9fs_dir_release,
- .lock = v9fs_file_lock_dotl,
- .flock = v9fs_file_flock_dotl,
- .mmap = v9fs_mmap_file_mmap,
+ .mmap = v9fs_file_mmap,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync_dotl,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 33e521c60e2c..61da885e4d40 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -287,24 +287,10 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
case S_IFREG:
if (v9fs_proto_dotl(v9ses)) {
inode->i_op = &v9fs_file_inode_operations_dotl;
- if (v9ses->cache == CACHE_LOOSE ||
- v9ses->cache == CACHE_FSCACHE)
- inode->i_fop =
- &v9fs_cached_file_operations_dotl;
- else if (v9ses->cache == CACHE_MMAP)
- inode->i_fop = &v9fs_mmap_file_operations_dotl;
- else
- inode->i_fop = &v9fs_file_operations_dotl;
+ inode->i_fop = &v9fs_file_operations_dotl;
} else {
inode->i_op = &v9fs_file_inode_operations;
- if (v9ses->cache == CACHE_LOOSE ||
- v9ses->cache == CACHE_FSCACHE)
- inode->i_fop =
- &v9fs_cached_file_operations;
- else if (v9ses->cache == CACHE_MMAP)
- inode->i_fop = &v9fs_mmap_file_operations;
- else
- inode->i_fop = &v9fs_file_operations;
+ inode->i_fop = &v9fs_file_operations;
}

break;
@@ -843,7 +829,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
inode = d_inode(dentry);
v9inode = V9FS_I(inode);
mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache) && !v9inode->writeback_fid &&
+ if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
((flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -1030,6 +1016,7 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
+ struct inode *inode = d_inode(dentry);
struct v9fs_session_info *v9ses;
struct p9_fid *fid;
struct p9_wstat *st;
@@ -1039,6 +1026,12 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
generic_fillattr(&init_user_ns, d_inode(dentry), stat);
return 0;
+ } else if (v9ses->cache >= CACHE_WRITEBACK) {
+ if (S_ISREG(inode->i_mode)) {
+ int retval = filemap_fdatawrite(inode->i_mapping);
+ if (retval)
+ p9_debug(P9_DEBUG_ERROR, "flushing writeback during getattr returned %d\n", retval);
+ }
}
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
@@ -1115,8 +1108,11 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,
}

/* Write all dirty data */
- if (d_is_reg(dentry))
- filemap_write_and_wait(inode->i_mapping);
+ if (d_is_reg(dentry)) {
+ retval = filemap_fdatawrite(inode->i_mapping);
+ if (retval)
+ p9_debug(P9_DEBUG_ERROR, "flushing writeback during setattr returned %d\n", retval);
+ }

retval = p9_client_wstat(fid, &wstat);

@@ -1127,9 +1123,12 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,
return retval;

if ((iattr->ia_valid & ATTR_SIZE) &&
- iattr->ia_size != i_size_read(inode)) {
+ iattr->ia_size != i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
- fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
+ if (v9ses->cache == CACHE_FSCACHE)
+ fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
+ else
+ invalidate_mapping_pages(&inode->i_data, 0, -1);
}

v9fs_invalidate_inode_attr(inode);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index bff37a312e64..84c1619e091d 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -458,6 +458,7 @@ v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
struct dentry *dentry = path->dentry;
struct v9fs_session_info *v9ses;
struct p9_fid *fid;
+ struct inode *inode = d_inode(dentry);
struct p9_stat_dotl *st;

p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
@@ -465,6 +466,12 @@ v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
generic_fillattr(&init_user_ns, d_inode(dentry), stat);
return 0;
+ } else if (v9ses->cache >= CACHE_WRITEBACK) {
+ if (S_ISREG(inode->i_mode)) {
+ int retval = filemap_fdatawrite(inode->i_mapping);
+ if (retval)
+ p9_debug(P9_DEBUG_ERROR, "flushing writeback during getattr returned %d\n", retval);
+ }
}
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
@@ -540,12 +547,14 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
struct dentry *dentry, struct iattr *iattr)
{
int retval, use_dentry = 0;
+ struct inode *inode = d_inode(dentry);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL;
struct p9_iattr_dotl p9attr = {
.uid = INVALID_UID,
.gid = INVALID_GID,
};
- struct inode *inode = d_inode(dentry);

p9_debug(P9_DEBUG_VFS, "\n");

@@ -553,6 +562,8 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
if (retval)
return retval;

+ v9ses = v9fs_dentry2v9ses(dentry);
+
p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid);
if (iattr->ia_valid & ATTR_MODE)
p9attr.mode = iattr->ia_mode;
@@ -583,8 +594,12 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
return PTR_ERR(fid);

/* Write all dirty data */
- if (S_ISREG(inode->i_mode))
- filemap_write_and_wait(inode->i_mapping);
+ if (S_ISREG(inode->i_mode)) {
+ retval = filemap_fdatawrite(inode->i_mapping);
+ if (retval < 0) {
+ p9_debug(P9_DEBUG_ERROR, "Flushing file prior to setattr failed: %d\n", retval);
+ }
+ }

retval = p9_client_setattr(fid, &p9attr);
if (retval < 0) {
@@ -593,9 +608,12 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
return retval;
}

- if ((iattr->ia_valid & ATTR_SIZE) &&
- iattr->ia_size != i_size_read(inode))
+ if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size !=
+ i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
+ if (v9ses->cache == CACHE_FSCACHE)
+ fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
+ }

v9fs_invalidate_inode_attr(inode);
setattr_copy(&init_user_ns, inode, iattr);
--
2.37.2


2023-02-27 02:55:53

by Eric Van Hensbergen

[permalink] [raw]
Subject: [PATCH v5 8/11] fs/9p: Add new mount modes

Add some additional mount modes for cache management including
specifying directio as a mount option and an option for ignore
qid.version for determining whether or not a file is cacheable.

Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/v9fs.c | 16 ++++++++++++++--
fs/9p/v9fs.h | 5 ++++-
2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index f8e952c013f9..43d3806150a9 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -38,7 +38,7 @@ enum {
/* String options */
Opt_uname, Opt_remotename, Opt_cache, Opt_cachetag,
/* Options that take no arguments */
- Opt_nodevmap, Opt_noxattr,
+ Opt_nodevmap, Opt_noxattr, Opt_directio, Opt_ignoreqv,
/* Access options */
Opt_access, Opt_posixacl,
/* Lock timeout option */
@@ -56,6 +56,8 @@ static const match_table_t tokens = {
{Opt_remotename, "aname=%s"},
{Opt_nodevmap, "nodevmap"},
{Opt_noxattr, "noxattr"},
+ {Opt_directio, "directio"},
+ {Opt_ignoreqv, "ignoreqv"},
{Opt_cache, "cache=%s"},
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
@@ -125,7 +127,7 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root)
if (v9ses->nodev)
seq_puts(m, ",nodevmap");
if (v9ses->cache)
- seq_printf(m, ",%s", v9fs_cache_modes[v9ses->cache]);
+ seq_printf(m, ",cache=%s", v9fs_cache_modes[v9ses->cache]);
#ifdef CONFIG_9P_FSCACHE
if (v9ses->cachetag && v9ses->cache == CACHE_FSCACHE)
seq_printf(m, ",cachetag=%s", v9ses->cachetag);
@@ -147,6 +149,10 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root)
break;
}

+ if (v9ses->flags & V9FS_IGNORE_QV)
+ seq_puts(m, ",ignoreqv");
+ if (v9ses->flags & V9FS_DIRECT_IO)
+ seq_puts(m, ",directio");
if (v9ses->flags & V9FS_POSIX_ACL)
seq_puts(m, ",posixacl");

@@ -276,6 +282,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_noxattr:
v9ses->flags |= V9FS_NO_XATTR;
break;
+ case Opt_directio:
+ v9ses->flags |= V9FS_DIRECT_IO;
+ break;
+ case Opt_ignoreqv:
+ v9ses->flags |= V9FS_IGNORE_QV;
+ break;
case Opt_cachetag:
#ifdef CONFIG_9P_FSCACHE
kfree(v9ses->cachetag);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index a93327aed0d2..8e79b5b619af 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -37,7 +37,10 @@ enum p9_session_flags {
V9FS_ACCESS_USER = 0x08,
V9FS_ACCESS_CLIENT = 0x10,
V9FS_POSIX_ACL = 0x20,
- V9FS_NO_XATTR = 0x40
+ V9FS_NO_XATTR = 0x40,
+ V9FS_IGNORE_QV = 0x80, /* ignore qid.version for cache hints */
+ V9FS_DIRECT_IO = 0x100,
+ V9FS_SYNC = 0x200
};

/* possible values of ->cache */
--
2.37.2