This fixes several detected problems from preivous
patches when running with writeback mode. In
particular this fixes issues with files which are opened
as write only and getattr on files which dirty caches.
This patch makes sure that cache behavior for an open file is stored in
the client copy of fid->mode. This allows us to reflect cache behavior
from mount flags, open mode, and information from the server to
inform readahead and writeback behavior.
This includes adding support for a 9p semantic that qid.version==0
is used to mark a file as non-cachable which is important for
synthetic files. This may have a side-effect of not supporting
caching on certain legacy file servers that do not properly set
qid.version. There is also now a mount flag which can disable
the qid.version behavior.
Signed-off-by: Eric Van Hensbergen <[email protected]>
---
Documentation/filesystems/9p.rst | 24 ++++-----
fs/9p/fid.c | 49 +++++++----------
fs/9p/fid.h | 33 +++++++++++-
fs/9p/v9fs.h | 1 -
fs/9p/vfs_addr.c | 22 ++++----
fs/9p/vfs_file.c | 91 +++++++++++---------------------
fs/9p/vfs_inode.c | 45 ++++++----------
fs/9p/vfs_inode_dotl.c | 50 +++++++-----------
fs/9p/vfs_super.c | 21 +++++---
9 files changed, 153 insertions(+), 183 deletions(-)
diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst
index 0e800b8f73cc..0c2c7a181d85 100644
--- a/Documentation/filesystems/9p.rst
+++ b/Documentation/filesystems/9p.rst
@@ -79,18 +79,14 @@ Options
cache=mode specifies a caching policy. By default, no caches are used.
- none
- default no cache policy, metadata and data
- alike are synchronous.
- loose
- no attempts are made at consistency,
- intended for exclusive, read-only mounts
- fscache
- use FS-Cache for a persistent, read-only
- cache backend.
- mmap
- minimal cache that is only used for read-write
- mmap. Northing else is cached, like cache=none
+ ========= =============================================
+ none no cache of file or metadata
+ readahead readahead caching of files
+ writeback delayed writeback of files
+ mmap support mmap operations read/write with cache
+ loose meta-data and file cache with no coherency
+ fscache use FS-Cache for a persistent cache backend
+ ========= =============================================
debug=n specifies debug level. The debug level is a bitmask.
@@ -137,6 +133,10 @@ Options
This can be used to share devices/named pipes/sockets between
hosts. This functionality will be expanded in later versions.
+ directio bypass page cache on all read/write operations
+
+ ignoreqv ignore qid.version==0 as a marker to ignore cache
+
noxattr do not offer xattr functions on this mount.
access there are four access modes.
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 805151114e96..8c1697619f3d 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -23,7 +23,6 @@ static inline void __add_fid(struct dentry *dentry, struct p9_fid *fid)
hlist_add_head(&fid->dlist, (struct hlist_head *)&dentry->d_fsdata);
}
-
/**
* v9fs_fid_add - add a fid to a dentry
* @dentry: dentry that the fid is being added to
@@ -41,14 +40,24 @@ void v9fs_fid_add(struct dentry *dentry, struct p9_fid **pfid)
*pfid = NULL;
}
+static bool v9fs_is_writeable(int mode)
+{
+ if ((mode & P9_OWRITE) || (mode & P9_ORDWR))
+ return true;
+ else
+ return false;
+}
+
/**
* v9fs_fid_find_inode - search for an open fid off of the inode list
* @inode: return a fid pointing to a specific inode
+ * @writeable: only consider fids which are writeable
* @uid: return a fid belonging to the specified user
+ * @any: ignore uid as a selection criteria
*
*/
-
-static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid)
+struct p9_fid *v9fs_fid_find_inode(struct inode *inode, bool want_writeable,
+ kuid_t uid, bool any)
{
struct hlist_head *h;
struct p9_fid *fid, *ret = NULL;
@@ -58,7 +67,12 @@ static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid)
spin_lock(&inode->i_lock);
h = (struct hlist_head *)&inode->i_private;
hlist_for_each_entry(fid, h, ilist) {
- if (uid_eq(fid->uid, uid)) {
+ if (any || uid_eq(fid->uid, uid)) {
+ if (want_writeable && !v9fs_is_writeable(fid->mode)) {
+ p9_debug(P9_DEBUG_VFS, " mode: %x not writeable?\n",
+ fid->mode);
+ continue;
+ }
p9_fid_get(fid);
ret = fid;
break;
@@ -118,7 +132,7 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
spin_unlock(&dentry->d_lock);
} else {
if (dentry->d_inode)
- ret = v9fs_fid_find_inode(dentry->d_inode, uid);
+ ret = v9fs_fid_find_inode(dentry->d_inode, false, uid, any);
}
return ret;
@@ -299,28 +313,3 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
return v9fs_fid_lookup_with_uid(dentry, uid, any);
}
-struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
-{
- int err;
- struct p9_fid *fid, *ofid;
-
- ofid = v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0);
- fid = clone_fid(ofid);
- if (IS_ERR(fid))
- goto error_out;
- p9_fid_put(ofid);
- /*
- * writeback fid will only be used to write back the
- * dirty pages. We always request for the open fid in read-write
- * mode so that a partial page write which result in page
- * read can work.
- */
- err = p9_client_open(fid, O_RDWR);
- if (err < 0) {
- p9_fid_put(fid);
- fid = ERR_PTR(err);
- goto error_out;
- }
-error_out:
- return fid;
-}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 8a4e8cd12ca2..11576e1364bf 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -7,14 +7,16 @@
#ifndef FS_9P_FID_H
#define FS_9P_FID_H
#include <linux/list.h>
+#include "v9fs.h"
+struct p9_fid *v9fs_fid_find_inode(struct inode *inode, bool want_writeable,
+ kuid_t uid, bool any);
struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
static inline struct p9_fid *v9fs_parent_fid(struct dentry *dentry)
{
return v9fs_fid_lookup(dentry->d_parent);
}
void v9fs_fid_add(struct dentry *dentry, struct p9_fid **fid);
-struct p9_fid *v9fs_writeback_fid(struct dentry *dentry);
void v9fs_open_fid_add(struct inode *inode, struct p9_fid **fid);
static inline struct p9_fid *clone_fid(struct p9_fid *fid)
{
@@ -32,4 +34,33 @@ static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
p9_fid_put(fid);
return nfid;
}
+/**
+ * v9fs_fid_addmodes - add cache flags to fid mode (for client use only)
+ * @fid: fid to augment
+ * @s_flags: session info mount flags
+ * @s_cache: session info cache flags
+ * @f_flags: unix open flags
+ *
+ * make sure mode reflects flags of underlying mounts
+ * also qid.version == 0 reflects a synthetic or legacy file system
+ * NOTE: these are set after open so only reflect 9p client not
+ * underlying file system on server.
+ */
+static inline void v9fs_fid_add_modes(struct p9_fid *fid, int s_flags,
+ int s_cache, unsigned int f_flags)
+{
+ if (fid->qid.type != P9_QTFILE)
+ return;
+
+ if ((!s_cache) ||
+ ((fid->qid.version == 0) && !(s_flags & V9FS_IGNORE_QV)) ||
+ (s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) {
+ fid->mode |= P9L_DIRECT; /* no read or write cache */
+ } else if ((s_cache < CACHE_WRITEBACK) ||
+ (f_flags & O_DSYNC) | (s_flags & V9FS_SYNC)) {
+ fid->mode |= P9L_NOWRITECACHE;
+ } else if (s_cache == CACHE_LOOSE) {
+ fid->mode |= P9L_LOOSE; /* noncoherent cache */
+ }
+}
#endif
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 48c7614c9333..8be9c0f59679 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -118,7 +118,6 @@ struct v9fs_inode {
struct netfs_inode netfs; /* Netfslib context and vfs inode */
struct p9_qid qid;
unsigned int cache_validity;
- struct p9_fid *writeback_fid;
struct mutex v_mutex;
};
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 6f46d7e4c750..211165430a8a 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -57,8 +57,6 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
*/
static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
{
- struct inode *inode = file_inode(file);
- struct v9fs_inode *v9inode = V9FS_I(inode);
struct p9_fid *fid = file->private_data;
BUG_ON(!fid);
@@ -66,11 +64,8 @@ static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
/* we might need to read from a fid that was opened write-only
* for read-modify-write of page cache, use the writeback fid
* for that */
- if (rreq->origin == NETFS_READ_FOR_WRITE &&
- (fid->mode & O_ACCMODE) == O_WRONLY) {
- fid = v9inode->writeback_fid;
- BUG_ON(!fid);
- }
+ WARN_ON(rreq->origin == NETFS_READ_FOR_WRITE &&
+ !(fid->mode & P9_ORDWR));
p9_fid_get(fid);
rreq->netfs_priv = fid;
@@ -164,6 +159,7 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio)
loff_t i_size = i_size_read(inode);
struct iov_iter from;
size_t len = folio_size(folio);
+ struct p9_fid *writeback_fid;
int err;
if (start >= i_size)
@@ -173,13 +169,17 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio)
iov_iter_xarray(&from, ITER_SOURCE, &folio_mapping(folio)->i_pages, start, len);
- /* We should have writeback_fid always set */
- BUG_ON(!v9inode->writeback_fid);
+ writeback_fid = v9fs_fid_find_inode(inode, true, INVALID_UID, true);
+ if (!writeback_fid) {
+ WARN_ONCE(1, "folio expected an open fid inode->i_private=%p\n",
+ inode->i_private);
+ return -EINVAL;
+ }
folio_wait_fscache(folio);
folio_start_writeback(folio);
- p9_client_write(v9inode->writeback_fid, start, &from, &err);
+ p9_client_write(writeback_fid, start, &from, &err);
if (err == 0 &&
fscache_cookie_enabled(cookie) &&
@@ -192,6 +192,8 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio)
}
folio_end_writeback(folio);
+ p9_fid_put(writeback_fid);
+
return err;
}
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 20e4bd299fc2..936daff9f948 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -42,7 +42,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
int err;
struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
- struct p9_fid *fid, *writeback_fid;
+ struct p9_fid *fid;
int omode;
p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
@@ -59,7 +59,19 @@ int v9fs_file_open(struct inode *inode, struct file *file)
if (IS_ERR(fid))
return PTR_ERR(fid);
- err = p9_client_open(fid, omode);
+ if ((v9ses->cache >= CACHE_WRITEBACK) && (omode & P9_OWRITE)) {
+ int writeback_omode = (omode & !P9_OWRITE) | P9_ORDWR;
+
+ p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, try opening O_RDWR\n");
+ err = p9_client_open(fid, writeback_omode);
+ if (err < 0) {
+ p9_debug(P9_DEBUG_CACHE, "could not open O_RDWR, disabling caches\n");
+ err = p9_client_open(fid, omode);
+ fid->mode |= P9L_DIRECT;
+ }
+ } else {
+ err = p9_client_open(fid, omode);
+ }
if (err < 0) {
p9_fid_put(fid);
return err;
@@ -71,36 +83,14 @@ int v9fs_file_open(struct inode *inode, struct file *file)
file->private_data = fid;
}
- mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
- ((file->f_flags & O_ACCMODE) != O_RDONLY)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during open time instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- writeback_fid = v9fs_writeback_fid(file_dentry(file));
- if (IS_ERR(writeback_fid)) {
- err = PTR_ERR(writeback_fid);
- mutex_unlock(&v9inode->v_mutex);
- goto out_error;
- }
- v9inode->writeback_fid = (void *) writeback_fid;
- }
- mutex_unlock(&v9inode->v_mutex);
#ifdef CONFIG_9P_FSCACHE
if (v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
#endif
+ v9fs_fid_add_modes(fid, v9ses->flags, v9ses->cache, file->f_flags);
v9fs_open_fid_add(inode, &fid);
return 0;
-out_error:
- p9_fid_put(file->private_data);
- file->private_data = NULL;
- return err;
}
/**
@@ -366,14 +356,14 @@ v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct p9_fid *fid = iocb->ki_filp->private_data;
int ret, err = 0;
- struct inode *inode = file_inode(iocb->ki_filp);
- struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
- p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n",
- iov_iter_count(to), iocb->ki_pos);
+ p9_debug(P9_DEBUG_VFS, "fid %d count %zu offset %lld\n",
+ fid->fid, iov_iter_count(to), iocb->ki_pos);
- if (v9ses->cache > CACHE_MMAP)
+ if (!(fid->mode & P9L_DIRECT)) {
+ p9_debug(P9_DEBUG_VFS, "(cached)\n");
return generic_file_read_iter(iocb, to);
+ }
if (iocb->ki_filp->f_flags & O_NONBLOCK)
ret = p9_client_read_once(fid, iocb->ki_pos, to, &err);
@@ -396,14 +386,17 @@ static ssize_t
v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
+ struct p9_fid *fid = file->private_data;
ssize_t retval;
loff_t origin;
int err = 0;
- struct inode *inode = file_inode(iocb->ki_filp);
- struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
- if (v9ses->cache >= CACHE_WRITEBACK)
+ p9_debug(P9_DEBUG_VFS, "fid %d\n", fid->fid);
+
+ if (!(fid->mode & (P9L_DIRECT | P9L_NOWRITECACHE))) {
+ p9_debug(P9_DEBUG_CACHE, "(cached)\n");
return generic_file_write_iter(iocb, from);
+ }
retval = generic_write_checks(iocb, from);
if (retval <= 0)
@@ -487,36 +480,18 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
{
int retval;
struct inode *inode = file_inode(filp);
- struct v9fs_inode *v9inode = V9FS_I(inode);
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
- struct p9_fid *fid;
+
+ p9_debug(P9_DEBUG_MMAP, "filp :%p\n", filp);
if (v9ses->cache < CACHE_MMAP) {
+ p9_debug(P9_DEBUG_CACHE, "(no mmap mode)");
+ if (vma->vm_flags & VM_MAYSHARE)
+ return -ENODEV;
invalidate_inode_pages2(filp->f_mapping);
return generic_file_readonly_mmap(filp, vma);
}
- mutex_lock(&v9inode->v_mutex);
- if (!v9inode->writeback_fid &&
- (vma->vm_flags & VM_SHARED) &&
- (vma->vm_flags & VM_WRITE)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during mmap instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- fid = v9fs_writeback_fid(file_dentry(filp));
- if (IS_ERR(fid)) {
- retval = PTR_ERR(fid);
- mutex_unlock(&v9inode->v_mutex);
- return retval;
- }
- v9inode->writeback_fid = (void *) fid;
- }
- mutex_unlock(&v9inode->v_mutex);
-
retval = generic_file_mmap(filp, vma);
if (!retval)
vma->vm_ops = &v9fs_mmap_file_vm_ops;
@@ -527,7 +502,6 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
static vm_fault_t
v9fs_vm_page_mkwrite(struct vm_fault *vmf)
{
- struct v9fs_inode *v9inode;
struct folio *folio = page_folio(vmf->page);
struct file *filp = vmf->vma->vm_file;
struct inode *inode = file_inode(filp);
@@ -536,8 +510,6 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
p9_debug(P9_DEBUG_VFS, "folio %p fid %lx\n",
folio, (unsigned long)filp->private_data);
- v9inode = V9FS_I(inode);
-
/* Wait for the page to be written to the cache before we allow it to
* be modified. We then assume the entire page will need writing back.
*/
@@ -550,7 +522,6 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
/* Update file times before taking page lock */
file_update_time(filp);
- BUG_ON(!v9inode->writeback_fid);
if (folio_lock_killable(folio) < 0)
return VM_FAULT_RETRY;
if (folio_mapping(folio) != inode->i_mapping)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 8ffa6631b1fd..d53475e1ba27 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -259,7 +259,6 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
v9inode = alloc_inode_sb(sb, v9fs_inode_cache, GFP_KERNEL);
if (!v9inode)
return NULL;
- v9inode->writeback_fid = NULL;
v9inode->cache_validity = 0;
mutex_init(&v9inode->v_mutex);
return &v9inode->netfs.inode;
@@ -412,9 +411,6 @@ void v9fs_evict_inode(struct inode *inode)
filemap_fdatawrite(&inode->i_data);
fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false);
- /* clunk the fid stashed in writeback_fid */
- p9_fid_put(v9inode->writeback_fid);
- v9inode->writeback_fid = NULL;
}
static int v9fs_test_inode(struct inode *inode, void *data)
@@ -825,9 +821,10 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
u32 perm;
struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
- struct p9_fid *fid, *inode_fid;
+ struct p9_fid *fid;
struct dentry *res = NULL;
struct inode *inode;
+ int p9_omode;
if (d_in_lookup(dentry)) {
res = v9fs_vfs_lookup(dir, dentry, 0);
@@ -846,9 +843,14 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
v9ses = v9fs_inode2v9ses(dir);
perm = unixmode2p9mode(v9ses, mode);
- fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
- v9fs_uflags2omode(flags,
- v9fs_proto_dotu(v9ses)));
+ p9_omode = v9fs_uflags2omode(flags, v9fs_proto_dotu(v9ses));
+
+ if ((v9ses->cache >= CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) {
+ p9_omode = (p9_omode & !P9_OWRITE) | P9_ORDWR;
+ p9_debug(P9_DEBUG_CACHE,
+ "write-only file with writeback enabled, creating w/ O_RDWR\n");
+ }
+ fid = v9fs_create(v9ses, dir, dentry, NULL, perm, p9_omode);
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
goto error;
@@ -857,25 +859,6 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
v9fs_invalidate_inode_attr(dir);
inode = d_inode(dentry);
v9inode = V9FS_I(inode);
- mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
- ((flags & O_ACCMODE) != O_RDONLY)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during open time instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- inode_fid = v9fs_writeback_fid(dentry);
- if (IS_ERR(inode_fid)) {
- err = PTR_ERR(inode_fid);
- mutex_unlock(&v9inode->v_mutex);
- goto error;
- }
- v9inode->writeback_fid = (void *) inode_fid;
- }
- mutex_unlock(&v9inode->v_mutex);
err = finish_open(file, dentry, generic_file_open);
if (err)
goto error;
@@ -884,6 +867,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
+
+ v9fs_fid_add_modes(fid, v9ses->flags, v9ses->cache, file->f_flags);
v9fs_open_fid_add(inode, &fid);
file->f_mode |= FMODE_CREATED;
@@ -1053,7 +1038,7 @@ v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(&init_user_ns, d_inode(dentry), stat);
+ generic_fillattr(&init_user_ns, inode, stat);
return 0;
} else if (v9ses->cache >= CACHE_WRITEBACK) {
if (S_ISREG(inode->i_mode))
@@ -1148,10 +1133,10 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
+ truncate_pagecache(inode, iattr->ia_size);
+
if (v9ses->cache == CACHE_FSCACHE)
fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
- else
- invalidate_mapping_pages(&inode->i_data, 0, -1);
}
v9fs_invalidate_inode_attr(inode);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 4f01808c3bae..f5f15878cbb7 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -232,12 +232,12 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
int err = 0;
kgid_t gid;
umode_t mode;
+ int p9_omode = v9fs_open_to_dotl_flags(flags);
const unsigned char *name = NULL;
struct p9_qid qid;
struct inode *inode;
struct p9_fid *fid = NULL;
- struct v9fs_inode *v9inode;
- struct p9_fid *dfid = NULL, *ofid = NULL, *inode_fid = NULL;
+ struct p9_fid *dfid = NULL, *ofid = NULL;
struct v9fs_session_info *v9ses;
struct posix_acl *pacl = NULL, *dacl = NULL;
struct dentry *res = NULL;
@@ -282,14 +282,19 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
/* Update mode based on ACL value */
err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
if (err) {
- p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n",
+ p9_debug(P9_DEBUG_VFS, "Failed to get acl values in create %d\n",
err);
goto out;
}
- err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
- mode, gid, &qid);
+
+ if ((v9ses->cache >= CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) {
+ p9_omode = (p9_omode & !P9_OWRITE) | P9_ORDWR;
+ p9_debug(P9_DEBUG_CACHE,
+ "write-only file with writeback enabled, creating w/ O_RDWR\n");
+ }
+ err = p9_client_create_dotl(ofid, name, p9_omode, mode, gid, &qid);
if (err < 0) {
- p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n",
+ p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in create %d\n",
err);
goto out;
}
@@ -314,36 +319,19 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
v9fs_fid_add(dentry, &fid);
d_instantiate(dentry, inode);
- v9inode = V9FS_I(inode);
- mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache) && !v9inode->writeback_fid &&
- ((flags & O_ACCMODE) != O_RDONLY)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during open time instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- inode_fid = v9fs_writeback_fid(dentry);
- if (IS_ERR(inode_fid)) {
- err = PTR_ERR(inode_fid);
- mutex_unlock(&v9inode->v_mutex);
- goto out;
- }
- v9inode->writeback_fid = (void *) inode_fid;
- }
- mutex_unlock(&v9inode->v_mutex);
/* Since we are opening a file, assign the open fid to the file */
err = finish_open(file, dentry, generic_file_open);
if (err)
goto out;
file->private_data = ofid;
#ifdef CONFIG_9P_FSCACHE
- if (v9ses->cache == CACHE_FSCACHE)
+ if (v9ses->cache == CACHE_FSCACHE) {
+ struct v9fs_inode *v9inode = V9FS_I(inode);
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
+ }
#endif
+ v9fs_fid_add_modes(ofid, v9ses->flags, v9ses->cache, flags);
v9fs_open_fid_add(inode, &ofid);
file->f_mode |= FMODE_CREATED;
out:
@@ -464,9 +452,9 @@ v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(&init_user_ns, d_inode(dentry), stat);
+ generic_fillattr(&init_user_ns, inode, stat);
return 0;
- } else if (v9ses->cache >= CACHE_WRITEBACK) {
+ } else if (v9ses->cache) {
if (S_ISREG(inode->i_mode))
v9fs_flush_inode_writeback(inode);
@@ -605,10 +593,10 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size !=
i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
+ truncate_pagecache(inode, iattr->ia_size);
+
if (v9ses->cache == CACHE_FSCACHE)
fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
- else
- invalidate_mapping_pages(&inode->i_data, 0, -1);
}
v9fs_invalidate_inode_attr(inode);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 5fc6a945bfff..797f717e1a91 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -292,23 +292,26 @@ static int v9fs_write_inode(struct inode *inode,
{
int ret;
struct p9_wstat wstat;
+ struct p9_fid *fid = v9fs_fid_find_inode(inode, false, INVALID_UID, true);
struct v9fs_inode *v9inode;
+
/*
* send an fsync request to server irrespective of
* wbc->sync_mode.
*/
- p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
+ p9_debug(P9_DEBUG_VFS, "%s: inode %p writeback_fid: %p\n", __func__, inode, fid);
v9inode = V9FS_I(inode);
- if (!v9inode->writeback_fid)
- return 0;
+ if (!fid)
+ return -EINVAL;
v9fs_blank_wstat(&wstat);
- ret = p9_client_wstat(v9inode->writeback_fid, &wstat);
+ ret = p9_client_wstat(fid, &wstat);
if (ret < 0) {
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return ret;
}
fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode));
+ p9_fid_put(fid);
return 0;
}
@@ -316,6 +319,7 @@ static int v9fs_write_inode_dotl(struct inode *inode,
struct writeback_control *wbc)
{
int ret;
+ struct p9_fid *fid = v9fs_fid_find_inode(inode, FMODE_WRITE, INVALID_UID, 1);
struct v9fs_inode *v9inode;
/*
* send an fsync request to server irrespective of
@@ -323,16 +327,17 @@ static int v9fs_write_inode_dotl(struct inode *inode,
*/
v9inode = V9FS_I(inode);
p9_debug(P9_DEBUG_VFS, "%s: inode %p, writeback_fid %p\n",
- __func__, inode, v9inode->writeback_fid);
- if (!v9inode->writeback_fid)
- return 0;
+ __func__, inode, fid);
+ if (!fid)
+ return -EINVAL;
- ret = p9_client_fsync(v9inode->writeback_fid, 0);
+ ret = p9_client_fsync(fid, 0);
if (ret < 0) {
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return ret;
}
fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode));
+ p9_fid_put(fid);
return 0;
}
--
2.37.2
(not reviewed this yet, just a kerneldoc warning first before I forget)
Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 12:33:22AM +0000:
> diff --git a/fs/9p/fid.c b/fs/9p/fid.c
> index 805151114e96..8c1697619f3d 100644
> --- a/fs/9p/fid.c
> +++ b/fs/9p/fid.c
> @@ -41,14 +40,24 @@ void v9fs_fid_add(struct dentry *dentry, struct p9_fid **pfid)
> *pfid = NULL;
> }
>
> +static bool v9fs_is_writeable(int mode)
> +{
> + if ((mode & P9_OWRITE) || (mode & P9_ORDWR))
> + return true;
> + else
> + return false;
> +}
> +
> /**
> * v9fs_fid_find_inode - search for an open fid off of the inode list
> * @inode: return a fid pointing to a specific inode
> + * @writeable: only consider fids which are writeable
`make M=fs/9p W=1` complains about doc discreptancy here,
writeable vs. want_writeable.
These are a pain, but let's make sure new ones don't creep in...
(I just wish we could make W=1 the default for part of the subtree, but
I didn't find an easy way to do so last time I checked -- perhaps you'll
have more luck if you have time to look)
> * @uid: return a fid belonging to the specified user
> + * @any: ignore uid as a selection criteria
> *
> */
> -
> -static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid)
> +struct p9_fid *v9fs_fid_find_inode(struct inode *inode, bool want_writeable,
> + kuid_t uid, bool any)
--
Dominique
Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 12:33:22AM +0000:
> This fixes several detected problems from preivous
> patches when running with writeback mode. In
> particular this fixes issues with files which are opened
> as write only and getattr on files which dirty caches.
>
> This patch makes sure that cache behavior for an open file is stored in
> the client copy of fid->mode. This allows us to reflect cache behavior
> from mount flags, open mode, and information from the server to
> inform readahead and writeback behavior.
>
> This includes adding support for a 9p semantic that qid.version==0
> is used to mark a file as non-cachable which is important for
> synthetic files. This may have a side-effect of not supporting
> caching on certain legacy file servers that do not properly set
> qid.version. There is also now a mount flag which can disable
> the qid.version behavior.
>
> Signed-off-by: Eric Van Hensbergen <[email protected]>
Didn't have time to review it all thoroughly, sending what I have
anyway...
> diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst
> index 0e800b8f73cc..0c2c7a181d85 100644
> --- a/Documentation/filesystems/9p.rst
> +++ b/Documentation/filesystems/9p.rst
> @@ -79,18 +79,14 @@ Options
>
> cache=mode specifies a caching policy. By default, no caches are used.
>
> - none
> - default no cache policy, metadata and data
> - alike are synchronous.
> - loose
> - no attempts are made at consistency,
> - intended for exclusive, read-only mounts
> - fscache
> - use FS-Cache for a persistent, read-only
> - cache backend.
> - mmap
> - minimal cache that is only used for read-write
> - mmap. Northing else is cached, like cache=none
> + ========= =============================================
> + none no cache of file or metadata
> + readahead readahead caching of files
> + writeback delayed writeback of files
> + mmap support mmap operations read/write with cache
> + loose meta-data and file cache with no coherency
> + fscache use FS-Cache for a persistent cache backend
> + ========= =============================================
perhaps a word saying the caches are incremental, only one can be used,
and listing them in order?
e.g. it's not clear from this that writeback also enables readahead,
and as a user I'd try to use cache=readahead,cache=writeback and wonder
why that doesn't work (well, I guess it would in that order...)
> diff --git a/fs/9p/fid.c b/fs/9p/fid.c
> index 805151114e96..8c1697619f3d 100644
> --- a/fs/9p/fid.c
> +++ b/fs/9p/fid.c
> @@ -41,14 +40,24 @@ void v9fs_fid_add(struct dentry *dentry, struct p9_fid **pfid)
> *pfid = NULL;
> }
>
> +static bool v9fs_is_writeable(int mode)
> +{
> + if ((mode & P9_OWRITE) || (mode & P9_ORDWR))
(style) that's usually written 'if (mode & (P9_OWRITE | P9_ORDWR))'
(I don't really care, the compiler will likely generate the same more
efficient check)
> @@ -32,4 +34,33 @@ static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
> p9_fid_put(fid);
> return nfid;
> }
> +/**
> + * v9fs_fid_addmodes - add cache flags to fid mode (for client use only)
> + * @fid: fid to augment
> + * @s_flags: session info mount flags
> + * @s_cache: session info cache flags
> + * @f_flags: unix open flags
> + *
> + * make sure mode reflects flags of underlying mounts
> + * also qid.version == 0 reflects a synthetic or legacy file system
> + * NOTE: these are set after open so only reflect 9p client not
> + * underlying file system on server.
Ok, so ignore my comment about that in other commit; but that note
really should also be in the header or commits should make sense in
order...
Rand aside, what's the point? It saves a lookup for the session in
v9fs_file_read/write_iter ? We don't support changing cache mode for new
fids with `mount -o remount` do we...
Ah, I see you're adding DIRECT to the mode if you fail opening the
writeback fid; ok that makes more sense.
I'd appreciate a comment as well for that, around the enum definition
rather than here, if you want to humor me on this.
> v9fs_file.c
> @@ -59,7 +59,19 @@ int v9fs_file_open(struct inode *inode, struct file *file)
> if (IS_ERR(fid))
> return PTR_ERR(fid);
>
> - err = p9_client_open(fid, omode);
> + if ((v9ses->cache >= CACHE_WRITEBACK) && (omode & P9_OWRITE)) {
> + int writeback_omode = (omode & !P9_OWRITE) | P9_ORDWR;
omode & ~P9_OWRITE ?
`!P9_OWRITE` will be 0...
> diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
> index 5fc6a945bfff..797f717e1a91 100644
> --- a/fs/9p/vfs_super.c
> +++ b/fs/9p/vfs_super.c
> @@ -323,16 +327,17 @@ static int v9fs_write_inode_dotl(struct inode *inode,
> */
> v9inode = V9FS_I(inode);
> p9_debug(P9_DEBUG_VFS, "%s: inode %p, writeback_fid %p\n",
> - __func__, inode, v9inode->writeback_fid);
> - if (!v9inode->writeback_fid)
> - return 0;
> + __func__, inode, fid);
> + if (!fid)
> + return -EINVAL;
Hmm, what happens if we return EINVAL here?
Might want a WARN_ONCE or something?
[email protected] wrote on Sat, Feb 18, 2023 at 07:01:22PM +0900:
> > diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
> > index 5fc6a945bfff..797f717e1a91 100644
> > --- a/fs/9p/vfs_super.c
> > +++ b/fs/9p/vfs_super.c
>
> > @@ -323,16 +327,17 @@ static int v9fs_write_inode_dotl(struct inode *inode,
> > */
> > v9inode = V9FS_I(inode);
> > p9_debug(P9_DEBUG_VFS, "%s: inode %p, writeback_fid %p\n",
> > - __func__, inode, v9inode->writeback_fid);
> > - if (!v9inode->writeback_fid)
> > - return 0;
> > + __func__, inode, fid);
> > + if (!fid)
> > + return -EINVAL;
>
> Hmm, what happens if we return EINVAL here?
> Might want a WARN_ONCE or something?
Answering myself on this: No idea what happens, but it's fairly
common...
(I saw it from wb_writeback which considers any non-zero return value as
'progress', so the error is progress as well... Might make more sense to
return 0 here actually? need more thorough checking, didn't take time to
dig through this either...)
That aside I ran with my comments addressed and cache=fscache, and
things blew up during ./configure of coreutils-9.1 in qemu:
(I ran it as root without setting the env var so it failed, that much is
expected -- the evict_inode blowing up isn't)
-------
checking whether mknod can create fifo without root privileges... configure: error: in `/mnt/coreutils-9.1':
configure: error: you should not run configure as root (set FORCE_UNSAFE_CONFIGURE=1 in environment to bypass this check)
See `config.log' for more details
FS-Cache:
FS-Cache: Assertion failed
FS-Cache: 2 == 0 is false
------------[ cut here ]------------
kernel BUG at fs/fscache/cookie.c:985!
invalid opcode: 0000 [#3] SMP PTI
CPU: 0 PID: 9707 Comm: rm Tainted: G D 6.2.0-rc2+ #37
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014
RIP: 0010:__fscache_relinquish_cookie.cold+0x5a/0x8f
Code: 48 c7 c7 21 5e b8 81 e8 34 87 ff ff 48 c7 c7 2f 5e b8 81 e8 28 87 ff ff 48 63 73 04 31 d2 48 c7 c7 00 61 b8 81 e8 16 87 ff ff <0f> 0b 44 8b 47 04 8b 4f 0c 45 0f b8
RSP: 0018:ffffc90002697e08 EFLAGS: 00010286
RAX: 0000000000000019 RBX: ffff8880077de210 RCX: 00000000ffffefff
RDX: 00000000ffffffea RSI: 00000000ffffefff RDI: 0000000000000001
RBP: ffffc90002697e18 R08: 0000000000004ffb R09: 00000000ffffefff
R10: ffffffff8264ea20 R11: ffffffff8264ea20 R12: 0000000000000000
R13: ffffffffc00870e0 R14: ffff88800308cd20 R15: ffff8880046a0020
FS: 00007fec5aa33000(0000) GS:ffff88807cc00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000009af4d8 CR3: 0000000007490000 CR4: 00000000000006b0
Call Trace:
<TASK>
v9fs_evict_inode+0x78/0x90 [9p]
evict+0xc0/0x160
iput+0x171/0x220
do_unlinkat+0x197/0x280
__x64_sys_unlinkat+0x37/0x60
do_syscall_64+0x3c/0x80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7fec5ab33fdb
Code: 73 01 c3 48 8b 0d 55 9e 0f 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 07 01 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 08
RSP: 002b:00007ffd460b1858 EFLAGS: 00000246 ORIG_RAX: 0000000000000107
RAX: ffffffffffffffda RBX: 00000000009af830 RCX: 00007fec5ab33fdb
RDX: 0000000000000000 RSI: 00000000009ae3d0 RDI: 00000000ffffff9c
RBP: 00000000009ae340 R08: 0000000000000003 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007ffd460b1a40 R14: 0000000000000000 R15: 00000000009af830
</TASK>
Modules linked in: 9pnet_virtio 9p 9pnet siw ib_core
---[ end trace 0000000000000000 ]---
RIP: 0010:__fscache_relinquish_cookie.cold+0x5a/0x8f
Code: 48 c7 c7 21 5e b8 81 e8 34 87 ff ff 48 c7 c7 2f 5e b8 81 e8 28 87 ff ff 48 63 73 04 31 d2 48 c7 c7 00 61 b8 81 e8 16 87 ff ff <0f> 0b 44 8b 47 04 8b 4f 0c 45 0f b8
RSP: 0018:ffffc90002237e08 EFLAGS: 00010286
RAX: 0000000000000019 RBX: ffff8880077de9a0 RCX: 00000000ffffefff
RDX: 00000000ffffffea RSI: 00000000ffffefff RDI: 0000000000000001
RBP: ffffc90002237e18 R08: 0000000000004ffb R09: 00000000ffffefff
R10: ffffffff8264ea20 R11: ffffffff8264ea20 R12: 0000000000000000
R13: ffffffffc00870e0 R14: ffff888003a6b500 R15: ffff8880046a0020
FS: 00007fec5aa33000(0000) GS:ffff88807cc00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000009af4d8 CR3: 0000000007490000 CR4: 00000000000006b0
./configure: line 88: 9707 Segmentation fault exit $1
-----------
I don't have time to investigate but I'm afraid this needs a bit more
time as well, sorry :/
For reference, here's how I addressed my comments. I don't think that's
related to the problem at hand but can retry later without it if you
think something's fishy:
---------
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 44918c60357f..c16c39ba55d6 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -215,7 +215,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
inode, filp, fid ? fid->fid : -1);
if (fid) {
- if ((fid->qid.type == P9_QTFILE) && (filp->f_mode & FMODE_WRITE))
+ if ((S_ISREG(inode->i_mode)) && (filp->f_mode & FMODE_WRITE))
v9fs_flush_inode_writeback(inode);
spin_lock(&inode->i_lock);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 936daff9f948..e322d4196be6 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -60,7 +60,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
return PTR_ERR(fid);
if ((v9ses->cache >= CACHE_WRITEBACK) && (omode & P9_OWRITE)) {
- int writeback_omode = (omode & !P9_OWRITE) | P9_ORDWR;
+ int writeback_omode = (omode & ~P9_OWRITE) | P9_ORDWR;
p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, try opening O_RDWR\n");
err = p9_client_open(fid, writeback_omode);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index d53475e1ba27..062c34524b1f 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -230,22 +230,7 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
int v9fs_flush_inode_writeback(struct inode *inode)
{
- struct writeback_control wbc = {
- .nr_to_write = LONG_MAX,
- .sync_mode = WB_SYNC_ALL,
- .range_start = 0,
- .range_end = -1,
- };
-
- int retval = filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
-
- if (retval != 0) {
- p9_debug(P9_DEBUG_ERROR,
- "trying to flush inode %p failed with error code %d\n",
- inode, retval);
- }
-
- return retval;
+ return filemap_write_and_wait(inode->i_mapping);
}
/**
------
--
Dominique
This is stupidity on my part, but can you send me your setup for
fscache so I can test the way you are testing it? It is the one thing
I still haven't incorporated into my test matrix so definitely a blind
spot I appreciate you exposing.
-eric
On Sat, Feb 18, 2023 at 6:16 AM <[email protected]> wrote:
>
> [email protected] wrote on Sat, Feb 18, 2023 at 07:01:22PM +0900:
> > > diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
> > > index 5fc6a945bfff..797f717e1a91 100644
> > > --- a/fs/9p/vfs_super.c
> > > +++ b/fs/9p/vfs_super.c
> >
> > > @@ -323,16 +327,17 @@ static int v9fs_write_inode_dotl(struct inode *inode,
> > > */
> > > v9inode = V9FS_I(inode);
> > > p9_debug(P9_DEBUG_VFS, "%s: inode %p, writeback_fid %p\n",
> > > - __func__, inode, v9inode->writeback_fid);
> > > - if (!v9inode->writeback_fid)
> > > - return 0;
> > > + __func__, inode, fid);
> > > + if (!fid)
> > > + return -EINVAL;
> >
> > Hmm, what happens if we return EINVAL here?
> > Might want a WARN_ONCE or something?
>
> Answering myself on this: No idea what happens, but it's fairly
> common...
> (I saw it from wb_writeback which considers any non-zero return value as
> 'progress', so the error is progress as well... Might make more sense to
> return 0 here actually? need more thorough checking, didn't take time to
> dig through this either...)
>
> That aside I ran with my comments addressed and cache=fscache, and
> things blew up during ./configure of coreutils-9.1 in qemu:
> (I ran it as root without setting the env var so it failed, that much is
> expected -- the evict_inode blowing up isn't)
> -------
> checking whether mknod can create fifo without root privileges... configure: error: in `/mnt/coreutils-9.1':
> configure: error: you should not run configure as root (set FORCE_UNSAFE_CONFIGURE=1 in environment to bypass this check)
> See `config.log' for more details
> FS-Cache:
> FS-Cache: Assertion failed
> FS-Cache: 2 == 0 is false
> ------------[ cut here ]------------
> kernel BUG at fs/fscache/cookie.c:985!
> invalid opcode: 0000 [#3] SMP PTI
> CPU: 0 PID: 9707 Comm: rm Tainted: G D 6.2.0-rc2+ #37
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014
> RIP: 0010:__fscache_relinquish_cookie.cold+0x5a/0x8f
> Code: 48 c7 c7 21 5e b8 81 e8 34 87 ff ff 48 c7 c7 2f 5e b8 81 e8 28 87 ff ff 48 63 73 04 31 d2 48 c7 c7 00 61 b8 81 e8 16 87 ff ff <0f> 0b 44 8b 47 04 8b 4f 0c 45 0f b8
> RSP: 0018:ffffc90002697e08 EFLAGS: 00010286
> RAX: 0000000000000019 RBX: ffff8880077de210 RCX: 00000000ffffefff
> RDX: 00000000ffffffea RSI: 00000000ffffefff RDI: 0000000000000001
> RBP: ffffc90002697e18 R08: 0000000000004ffb R09: 00000000ffffefff
> R10: ffffffff8264ea20 R11: ffffffff8264ea20 R12: 0000000000000000
> R13: ffffffffc00870e0 R14: ffff88800308cd20 R15: ffff8880046a0020
> FS: 00007fec5aa33000(0000) GS:ffff88807cc00000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00000000009af4d8 CR3: 0000000007490000 CR4: 00000000000006b0
> Call Trace:
> <TASK>
> v9fs_evict_inode+0x78/0x90 [9p]
> evict+0xc0/0x160
> iput+0x171/0x220
> do_unlinkat+0x197/0x280
> __x64_sys_unlinkat+0x37/0x60
> do_syscall_64+0x3c/0x80
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
> RIP: 0033:0x7fec5ab33fdb
> Code: 73 01 c3 48 8b 0d 55 9e 0f 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 07 01 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 08
> RSP: 002b:00007ffd460b1858 EFLAGS: 00000246 ORIG_RAX: 0000000000000107
> RAX: ffffffffffffffda RBX: 00000000009af830 RCX: 00007fec5ab33fdb
> RDX: 0000000000000000 RSI: 00000000009ae3d0 RDI: 00000000ffffff9c
> RBP: 00000000009ae340 R08: 0000000000000003 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
> R13: 00007ffd460b1a40 R14: 0000000000000000 R15: 00000000009af830
> </TASK>
> Modules linked in: 9pnet_virtio 9p 9pnet siw ib_core
> ---[ end trace 0000000000000000 ]---
> RIP: 0010:__fscache_relinquish_cookie.cold+0x5a/0x8f
> Code: 48 c7 c7 21 5e b8 81 e8 34 87 ff ff 48 c7 c7 2f 5e b8 81 e8 28 87 ff ff 48 63 73 04 31 d2 48 c7 c7 00 61 b8 81 e8 16 87 ff ff <0f> 0b 44 8b 47 04 8b 4f 0c 45 0f b8
> RSP: 0018:ffffc90002237e08 EFLAGS: 00010286
> RAX: 0000000000000019 RBX: ffff8880077de9a0 RCX: 00000000ffffefff
> RDX: 00000000ffffffea RSI: 00000000ffffefff RDI: 0000000000000001
> RBP: ffffc90002237e18 R08: 0000000000004ffb R09: 00000000ffffefff
> R10: ffffffff8264ea20 R11: ffffffff8264ea20 R12: 0000000000000000
> R13: ffffffffc00870e0 R14: ffff888003a6b500 R15: ffff8880046a0020
> FS: 00007fec5aa33000(0000) GS:ffff88807cc00000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00000000009af4d8 CR3: 0000000007490000 CR4: 00000000000006b0
> ./configure: line 88: 9707 Segmentation fault exit $1
> -----------
>
> I don't have time to investigate but I'm afraid this needs a bit more
> time as well, sorry :/
>
>
>
>
>
>
>
>
>
>
> For reference, here's how I addressed my comments. I don't think that's
> related to the problem at hand but can retry later without it if you
> think something's fishy:
> ---------
> diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
> index 44918c60357f..c16c39ba55d6 100644
> --- a/fs/9p/vfs_dir.c
> +++ b/fs/9p/vfs_dir.c
> @@ -215,7 +215,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
> p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
> inode, filp, fid ? fid->fid : -1);
> if (fid) {
> - if ((fid->qid.type == P9_QTFILE) && (filp->f_mode & FMODE_WRITE))
> + if ((S_ISREG(inode->i_mode)) && (filp->f_mode & FMODE_WRITE))
> v9fs_flush_inode_writeback(inode);
>
> spin_lock(&inode->i_lock);
> diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
> index 936daff9f948..e322d4196be6 100644
> --- a/fs/9p/vfs_file.c
> +++ b/fs/9p/vfs_file.c
> @@ -60,7 +60,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
> return PTR_ERR(fid);
>
> if ((v9ses->cache >= CACHE_WRITEBACK) && (omode & P9_OWRITE)) {
> - int writeback_omode = (omode & !P9_OWRITE) | P9_ORDWR;
> + int writeback_omode = (omode & ~P9_OWRITE) | P9_ORDWR;
>
> p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, try opening O_RDWR\n");
> err = p9_client_open(fid, writeback_omode);
> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
> index d53475e1ba27..062c34524b1f 100644
> --- a/fs/9p/vfs_inode.c
> +++ b/fs/9p/vfs_inode.c
> @@ -230,22 +230,7 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
>
> int v9fs_flush_inode_writeback(struct inode *inode)
> {
> - struct writeback_control wbc = {
> - .nr_to_write = LONG_MAX,
> - .sync_mode = WB_SYNC_ALL,
> - .range_start = 0,
> - .range_end = -1,
> - };
> -
> - int retval = filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
> -
> - if (retval != 0) {
> - p9_debug(P9_DEBUG_ERROR,
> - "trying to flush inode %p failed with error code %d\n",
> - inode, retval);
> - }
> -
> - return retval;
> + return filemap_write_and_wait(inode->i_mapping);
> }
>
> /**
> ------
> --
> Dominique
On Saturday, February 18, 2023 11:01:22 AM CET [email protected] wrote:
> Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 12:33:22AM +0000:
> > This fixes several detected problems from preivous
> > patches when running with writeback mode. In
> > particular this fixes issues with files which are opened
> > as write only and getattr on files which dirty caches.
> >
> > This patch makes sure that cache behavior for an open file is stored in
> > the client copy of fid->mode. This allows us to reflect cache behavior
> > from mount flags, open mode, and information from the server to
> > inform readahead and writeback behavior.
> >
> > This includes adding support for a 9p semantic that qid.version==0
> > is used to mark a file as non-cachable which is important for
> > synthetic files. This may have a side-effect of not supporting
> > caching on certain legacy file servers that do not properly set
> > qid.version. There is also now a mount flag which can disable
> > the qid.version behavior.
> >
> > Signed-off-by: Eric Van Hensbergen <[email protected]>
>
> Didn't have time to review it all thoroughly, sending what I have
> anyway...
>
> > diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst
> > index 0e800b8f73cc..0c2c7a181d85 100644
> > --- a/Documentation/filesystems/9p.rst
> > +++ b/Documentation/filesystems/9p.rst
> > @@ -79,18 +79,14 @@ Options
> >
> > cache=mode specifies a caching policy. By default, no caches are used.
> >
> > - none
> > - default no cache policy, metadata and data
> > - alike are synchronous.
> > - loose
> > - no attempts are made at consistency,
> > - intended for exclusive, read-only mounts
> > - fscache
> > - use FS-Cache for a persistent, read-only
> > - cache backend.
> > - mmap
> > - minimal cache that is only used for read-write
> > - mmap. Northing else is cached, like cache=none
> > + ========= =============================================
> > + none no cache of file or metadata
> > + readahead readahead caching of files
> > + writeback delayed writeback of files
> > + mmap support mmap operations read/write with cache
> > + loose meta-data and file cache with no coherency
> > + fscache use FS-Cache for a persistent cache backend
> > + ========= =============================================
>
> perhaps a word saying the caches are incremental, only one can be used,
> and listing them in order?
> e.g. it's not clear from this that writeback also enables readahead,
> and as a user I'd try to use cache=readahead,cache=writeback and wonder
> why that doesn't work (well, I guess it would in that order...)
+1 on docs
The question was also whether to make these true separate options before being
merged.
I give these patches a spin tomorrow.
Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 10:40:27AM -0600:
> This is stupidity on my part, but can you send me your setup for
> fscache so I can test the way you are testing it? It is the one thing
> I still haven't incorporated into my test matrix so definitely a blind
> spot I appreciate you exposing.
I mounted with fscache but I didn't actually have a backend running when
testing, so there's really not setup involved -- just qemu's
'-virtfs local,path=/tmp/linux-test,mount_tag=tmp,security_model=mapped-file'
and
'mount -t 9p -o debug=1,cache=fscache,trans=virtio tmp /mnt'
In that case, fscache basically acts like loose, except it also does all
its cookies stuff so it's sort of useful to test anyway.
I also have a setup that runs cachefilesd on top but it wasn't involved
here; for completeness if you want to test:
- add an extra disk to qemu
qemu-img create -f qcow2 /tmp/disk 1G
'-device virtio-blk-pci,drive=hd0 -drive if=none,file=/tmp/disk,snapshot=on,id=hd0'
- just runs cachefilesd (ignore hardcoded nix paths, that's my easy way out
of running straight out of initrd without making it huge):
---
if [ -e /dev/vda ]; then
/run/current-system/sw/bin/mkfs.ext4 -q /dev/vda
mkdir /cache /var/run
mount /dev/vda /cache
echo -e 'dir /cache\ntag cache' > /etc/cachefilesd.conf
echo 'modprobe cachefiles'
echo '/nix/store/4vy0z1ygfidfmbzaxblkmzv7j0irhhwc-cachefilesd-0.10.10/bin/cachefilesd -s -d > /cache/log 2>&1'
fi
---
And that's basically it; you should see files poping up in /dev/vda when
you do reads (iirc writes weren't cached through last I looked)
--
Dominique
Yeah, I guess it depends on what options we want to separate,
writeback == mmap so we can eliminate one option and just use mmap I
suppose. I feel like readahead has value as it maintains the most
consistency on the host file system since it shouldn't be doing any
writeback buffering. readahead and mmap are different than loose in
that they don't do any do any dir cache. To your earlier comments (in
a different thread) it very well may be that eventually we separate
these into file_cache=[ readahead | mmap | loose ] and dir_cache = [
tight | temporal | loose ] and fscache is its own beast. It struck me
as well with xattr enabled we may want to have separate caches for
xattr caching since it generates a load of traffic with security on.
On Sat, Feb 18, 2023 at 1:58 PM Christian Schoenebeck
<[email protected]> wrote:
>
> On Saturday, February 18, 2023 11:01:22 AM CET [email protected] wrote:
> > Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 12:33:22AM +0000:
> > > This fixes several detected problems from preivous
> > > patches when running with writeback mode. In
> > > particular this fixes issues with files which are opened
> > > as write only and getattr on files which dirty caches.
> > >
> > > This patch makes sure that cache behavior for an open file is stored in
> > > the client copy of fid->mode. This allows us to reflect cache behavior
> > > from mount flags, open mode, and information from the server to
> > > inform readahead and writeback behavior.
> > >
> > > This includes adding support for a 9p semantic that qid.version==0
> > > is used to mark a file as non-cachable which is important for
> > > synthetic files. This may have a side-effect of not supporting
> > > caching on certain legacy file servers that do not properly set
> > > qid.version. There is also now a mount flag which can disable
> > > the qid.version behavior.
> > >
> > > Signed-off-by: Eric Van Hensbergen <[email protected]>
> >
> > Didn't have time to review it all thoroughly, sending what I have
> > anyway...
> >
> > > diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst
> > > index 0e800b8f73cc..0c2c7a181d85 100644
> > > --- a/Documentation/filesystems/9p.rst
> > > +++ b/Documentation/filesystems/9p.rst
> > > @@ -79,18 +79,14 @@ Options
> > >
> > > cache=mode specifies a caching policy. By default, no caches are used.
> > >
> > > - none
> > > - default no cache policy, metadata and data
> > > - alike are synchronous.
> > > - loose
> > > - no attempts are made at consistency,
> > > - intended for exclusive, read-only mounts
> > > - fscache
> > > - use FS-Cache for a persistent, read-only
> > > - cache backend.
> > > - mmap
> > > - minimal cache that is only used for read-write
> > > - mmap. Northing else is cached, like cache=none
> > > + ========= =============================================
> > > + none no cache of file or metadata
> > > + readahead readahead caching of files
> > > + writeback delayed writeback of files
> > > + mmap support mmap operations read/write with cache
> > > + loose meta-data and file cache with no coherency
> > > + fscache use FS-Cache for a persistent cache backend
> > > + ========= =============================================
> >
> > perhaps a word saying the caches are incremental, only one can be used,
> > and listing them in order?
> > e.g. it's not clear from this that writeback also enables readahead,
> > and as a user I'd try to use cache=readahead,cache=writeback and wonder
> > why that doesn't work (well, I guess it would in that order...)
>
> +1 on docs
>
> The question was also whether to make these true separate options before being
> merged.
>
> I give these patches a spin tomorrow.
>
>
>
Eric Van Hensbergen wrote on Sat, Feb 18, 2023 at 04:24:08PM -0600:
> Yeah, I guess it depends on what options we want to separate,
> writeback == mmap so we can eliminate one option and just use mmap I
> suppose.
For history (since I implemented it for CEA back then), mmap was added
because we had applications relying on mmap (so wanted that to somehow
work) without turning to cache=loose as that doesn't behave well with
nfs-ganesha (in particular, fids not closed until memory pressure comes
reclaiming them, which can be very late or never come, doesn't work well
with ganesha that used to (and probably still does, didn't check) cap
the maximum number of fid active per client.
I think writeback would be acceptable for this usecase, especially since
with your patches we now flush on close.
For clarity though I'd use writeback in the documentation, and keep mmap
as a legacy mapping just for mount opts parsing; the behaviour is
slightly different than it used to be (normal read/writes were sync) so
it's good to be clear about that.
> I feel like readahead has value as it maintains the most
> consistency on the host file system since it shouldn't be doing any
> writeback buffering. readahead and mmap are different than loose in
> that they don't do any do any dir cache. To your earlier comments (in
> a different thread) it very well may be that eventually we separate
> these into file_cache=[ readahead | mmap | loose ] and dir_cache = [
> tight | temporal | loose ] and fscache is its own beast.
Separating the two makes sense implementation-wise as well, I like this
idea.
What would the difference be between file_cache=writeback and loose?
Do you plan some form of revalidation with writeback, e.g. using qid
version that loose wouldn't do? (sorry if it's already done, I don't
recall seeing that)
fscache is currently a cache option but it's pretty much unrelated, we
can have it as a separate option and alias cache=fscache to
`file_cache=writeback(loose),dir_cache=loose,fscache=on`
but on its own it ought to work with any level of file_cache and no
dir_cache...
The test matrix will be fun, though :|
> It struck me as well with xattr enabled we may want to have separate
> caches for xattr caching since it generates a load of traffic with
> security on.
xattr caching currently isn't done at all afaik, and it'd definitely
make sense with any kind of dir_cache... That'd likely halve the
requests for some find-like workloads.
Probably another new option as well..
--
Dominique
On Sat, Feb 18, 2023 at 5:41 PM <[email protected]> wrote:
>
> For clarity though I'd use writeback in the documentation, and keep mmap
> as a legacy mapping just for mount opts parsing; the behaviour is
> slightly different than it used to be (normal read/writes were sync) so
> it's good to be clear about that.
>
Yeah, that was my intent in the current form -- although it did occur to me
that at some point we might want something subtly different for mmap,
however, I couldn't ever work out in my head if there would be any way to
be consistent if we had mmap but no transient caching -- I guess one form
would be to only cache/buffer while file is mmapped which we should be
able to track and in that way it could be distinct from the others.
>
> Separating the two makes sense implementation-wise as well, I like this
> idea.
> What would the difference be between file_cache=writeback and loose?
> Do you plan some form of revalidation with writeback, e.g. using qid
> version that loose wouldn't do? (sorry if it's already done, I don't
> recall seeing that)
>
It would mostly have to do with when we validate the cache. Loose
essentially means NEVER validate the cache and assume the cache
is always correct. But in the file_cache= case we would only do it
for file contents
and not directory.
> fscache is currently a cache option but it's pretty much unrelated, we
> can have it as a separate option and alias cache=fscache to
> `file_cache=writeback(loose),dir_cache=loose,fscache=on`
> but on its own it ought to work with any level of file_cache and no
> dir_cache...
> The test matrix will be fun, though :|
>
Yeah - feels like fscache should just be seperate, but then it can follow
the consistency policies of file and/or dir as to when to revalidate with
server.
Test matrix is already a nightmare :). Right now I have a simple one
with multiple fstabs for various options (which I feed in with cpu), but
I'm gonna add this into my python notebook script so I can explore
all options (and all config options for 9p in the kernel configs) -- but
probably keep a smoke test "quick" regression as well.
> > It struck me as well with xattr enabled we may want to have separate
> > caches for xattr caching since it generates a load of traffic with
> > security on.
>
> xattr caching currently isn't done at all afaik, and it'd definitely
> make sense with any kind of dir_cache... That'd likely halve the
> requests for some find-like workloads.
> Probably another new option as well..
Yeah, I was going to tackle this after the dir cache stuff is fixed up.
-eric
I chased my tail on this for like the last month - but I finally found
the problem with fscache and it was related to one of the other
problems you pointed out - but I got distracted by a dozen red
herrings (which are probably bugs in fscache, but not caused by my
patch) -- in any case, it turns out the assert in fscache is due to an
imbalance in unuse_cookie -- and its because dirty_folio calls
use_cookie under the hood and fscache_unpin_writeback is the balance
and it calls unuse cookie under the hood. Because there was no fid,
we were erroring out and never balancing - and it gets caught when we
close things down.
Secondarily, I'm not sure wtf we were doing in write_inode --
everybody else just calls fscache_unpin_writeback and nothing else.
The whole fsync stuff appears to be unnecessary code -- which means we
don't need a fid (certainly not an open fid) so I just removed that
code, called unpin_writeback and all my tests pass with fscache. I'm
going to run a longer suite of tests just to make sure I didn't
accidently perturb anything else, but then I'll consolidate, clean-up
and repost the patches.
-eric
On Sat, Feb 18, 2023 at 6:16 AM <[email protected]> wrote:
>
> [email protected] wrote on Sat, Feb 18, 2023 at 07:01:22PM +0900:
> > > diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
> > > index 5fc6a945bfff..797f717e1a91 100644
> > > --- a/fs/9p/vfs_super.c
> > > +++ b/fs/9p/vfs_super.c
> >
> > > @@ -323,16 +327,17 @@ static int v9fs_write_inode_dotl(struct inode *inode,
> > > */
> > > v9inode = V9FS_I(inode);
> > > p9_debug(P9_DEBUG_VFS, "%s: inode %p, writeback_fid %p\n",
> > > - __func__, inode, v9inode->writeback_fid);
> > > - if (!v9inode->writeback_fid)
> > > - return 0;
> > > + __func__, inode, fid);
> > > + if (!fid)
> > > + return -EINVAL;
> >
> > Hmm, what happens if we return EINVAL here?
> > Might want a WARN_ONCE or something?
>
> Answering myself on this: No idea what happens, but it's fairly
> common...
> (I saw it from wb_writeback which considers any non-zero return value as
> 'progress', so the error is progress as well... Might make more sense to
> return 0 here actually? need more thorough checking, didn't take time to
> dig through this either...)
>
> That aside I ran with my comments addressed and cache=fscache, and
> things blew up during ./configure of coreutils-9.1 in qemu:
> (I ran it as root without setting the env var so it failed, that much is
> expected -- the evict_inode blowing up isn't)
> -------
> checking whether mknod can create fifo without root privileges... configure: error: in `/mnt/coreutils-9.1':
> configure: error: you should not run configure as root (set FORCE_UNSAFE_CONFIGURE=1 in environment to bypass this check)
> See `config.log' for more details
> FS-Cache:
> FS-Cache: Assertion failed
> FS-Cache: 2 == 0 is false
> ------------[ cut here ]------------
> kernel BUG at fs/fscache/cookie.c:985!
> invalid opcode: 0000 [#3] SMP PTI
> CPU: 0 PID: 9707 Comm: rm Tainted: G D 6.2.0-rc2+ #37
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014
> RIP: 0010:__fscache_relinquish_cookie.cold+0x5a/0x8f
> Code: 48 c7 c7 21 5e b8 81 e8 34 87 ff ff 48 c7 c7 2f 5e b8 81 e8 28 87 ff ff 48 63 73 04 31 d2 48 c7 c7 00 61 b8 81 e8 16 87 ff ff <0f> 0b 44 8b 47 04 8b 4f 0c 45 0f b8
> RSP: 0018:ffffc90002697e08 EFLAGS: 00010286
> RAX: 0000000000000019 RBX: ffff8880077de210 RCX: 00000000ffffefff
> RDX: 00000000ffffffea RSI: 00000000ffffefff RDI: 0000000000000001
> RBP: ffffc90002697e18 R08: 0000000000004ffb R09: 00000000ffffefff
> R10: ffffffff8264ea20 R11: ffffffff8264ea20 R12: 0000000000000000
> R13: ffffffffc00870e0 R14: ffff88800308cd20 R15: ffff8880046a0020
> FS: 00007fec5aa33000(0000) GS:ffff88807cc00000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00000000009af4d8 CR3: 0000000007490000 CR4: 00000000000006b0
> Call Trace:
> <TASK>
> v9fs_evict_inode+0x78/0x90 [9p]
> evict+0xc0/0x160
> iput+0x171/0x220
> do_unlinkat+0x197/0x280
> __x64_sys_unlinkat+0x37/0x60
> do_syscall_64+0x3c/0x80
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
> RIP: 0033:0x7fec5ab33fdb
> Code: 73 01 c3 48 8b 0d 55 9e 0f 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 07 01 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 08
> RSP: 002b:00007ffd460b1858 EFLAGS: 00000246 ORIG_RAX: 0000000000000107
> RAX: ffffffffffffffda RBX: 00000000009af830 RCX: 00007fec5ab33fdb
> RDX: 0000000000000000 RSI: 00000000009ae3d0 RDI: 00000000ffffff9c
> RBP: 00000000009ae340 R08: 0000000000000003 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
> R13: 00007ffd460b1a40 R14: 0000000000000000 R15: 00000000009af830
> </TASK>
> Modules linked in: 9pnet_virtio 9p 9pnet siw ib_core
> ---[ end trace 0000000000000000 ]---
> RIP: 0010:__fscache_relinquish_cookie.cold+0x5a/0x8f
> Code: 48 c7 c7 21 5e b8 81 e8 34 87 ff ff 48 c7 c7 2f 5e b8 81 e8 28 87 ff ff 48 63 73 04 31 d2 48 c7 c7 00 61 b8 81 e8 16 87 ff ff <0f> 0b 44 8b 47 04 8b 4f 0c 45 0f b8
> RSP: 0018:ffffc90002237e08 EFLAGS: 00010286
> RAX: 0000000000000019 RBX: ffff8880077de9a0 RCX: 00000000ffffefff
> RDX: 00000000ffffffea RSI: 00000000ffffefff RDI: 0000000000000001
> RBP: ffffc90002237e18 R08: 0000000000004ffb R09: 00000000ffffefff
> R10: ffffffff8264ea20 R11: ffffffff8264ea20 R12: 0000000000000000
> R13: ffffffffc00870e0 R14: ffff888003a6b500 R15: ffff8880046a0020
> FS: 00007fec5aa33000(0000) GS:ffff88807cc00000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00000000009af4d8 CR3: 0000000007490000 CR4: 00000000000006b0
> ./configure: line 88: 9707 Segmentation fault exit $1
> -----------
>
> I don't have time to investigate but I'm afraid this needs a bit more
> time as well, sorry :/
>
>
>
>
>
>
>
>
>
>
> For reference, here's how I addressed my comments. I don't think that's
> related to the problem at hand but can retry later without it if you
> think something's fishy:
> ---------
> diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
> index 44918c60357f..c16c39ba55d6 100644
> --- a/fs/9p/vfs_dir.c
> +++ b/fs/9p/vfs_dir.c
> @@ -215,7 +215,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
> p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
> inode, filp, fid ? fid->fid : -1);
> if (fid) {
> - if ((fid->qid.type == P9_QTFILE) && (filp->f_mode & FMODE_WRITE))
> + if ((S_ISREG(inode->i_mode)) && (filp->f_mode & FMODE_WRITE))
> v9fs_flush_inode_writeback(inode);
>
> spin_lock(&inode->i_lock);
> diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
> index 936daff9f948..e322d4196be6 100644
> --- a/fs/9p/vfs_file.c
> +++ b/fs/9p/vfs_file.c
> @@ -60,7 +60,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
> return PTR_ERR(fid);
>
> if ((v9ses->cache >= CACHE_WRITEBACK) && (omode & P9_OWRITE)) {
> - int writeback_omode = (omode & !P9_OWRITE) | P9_ORDWR;
> + int writeback_omode = (omode & ~P9_OWRITE) | P9_ORDWR;
>
> p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, try opening O_RDWR\n");
> err = p9_client_open(fid, writeback_omode);
> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
> index d53475e1ba27..062c34524b1f 100644
> --- a/fs/9p/vfs_inode.c
> +++ b/fs/9p/vfs_inode.c
> @@ -230,22 +230,7 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
>
> int v9fs_flush_inode_writeback(struct inode *inode)
> {
> - struct writeback_control wbc = {
> - .nr_to_write = LONG_MAX,
> - .sync_mode = WB_SYNC_ALL,
> - .range_start = 0,
> - .range_end = -1,
> - };
> -
> - int retval = filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
> -
> - if (retval != 0) {
> - p9_debug(P9_DEBUG_ERROR,
> - "trying to flush inode %p failed with error code %d\n",
> - inode, retval);
> - }
> -
> - return retval;
> + return filemap_write_and_wait(inode->i_mapping);
> }
>
> /**
> ------
> --
> Dominique
This patch removes the creating of an additional writeback_fid
for opened files. The patch addresses problems when files
were opened write-only or getattr on files with dirty caches.
This patch also incorporates information about cache behavior
in the fid for every file. This allows us to reflect cache
behavior from mount flags, open mode, and information from
the server to inform readahead and writeback behavior.
This includes adding support for a 9p semantic that qid.version==0
is used to mark a file as non-cachable which is important for
synthetic files. This may have a side-effect of not supporting
caching on certain legacy file servers that do not properly set
qid.version. There is also now a mount flag which can disable
the qid.version behavior.
Signed-off-by: Eric Van Hensbergen <[email protected]>
---
fs/9p/fid.c | 48 +++++++++-------------
fs/9p/fid.h | 33 ++++++++++++++-
fs/9p/v9fs.h | 1 -
fs/9p/vfs_addr.c | 22 +++++-----
fs/9p/vfs_file.c | 91 ++++++++++++++----------------------------
fs/9p/vfs_inode.c | 45 +++++++--------------
fs/9p/vfs_inode_dotl.c | 48 +++++++++-------------
fs/9p/vfs_super.c | 33 ++++-----------
8 files changed, 135 insertions(+), 186 deletions(-)
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 805151114e96..de009a33e0e2 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -41,14 +41,24 @@ void v9fs_fid_add(struct dentry *dentry, struct p9_fid **pfid)
*pfid = NULL;
}
+static bool v9fs_is_writeable(int mode)
+{
+ if (mode & (P9_OWRITE|P9_ORDWR))
+ return true;
+ else
+ return false;
+}
+
/**
* v9fs_fid_find_inode - search for an open fid off of the inode list
* @inode: return a fid pointing to a specific inode
+ * @want_writeable: only consider fids which are writeable
* @uid: return a fid belonging to the specified user
+ * @any: ignore uid as a selection criteria
*
*/
-
-static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid)
+struct p9_fid *v9fs_fid_find_inode(struct inode *inode, bool want_writeable,
+ kuid_t uid, bool any)
{
struct hlist_head *h;
struct p9_fid *fid, *ret = NULL;
@@ -58,7 +68,12 @@ static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid)
spin_lock(&inode->i_lock);
h = (struct hlist_head *)&inode->i_private;
hlist_for_each_entry(fid, h, ilist) {
- if (uid_eq(fid->uid, uid)) {
+ if (any || uid_eq(fid->uid, uid)) {
+ if (want_writeable && !v9fs_is_writeable(fid->mode)) {
+ p9_debug(P9_DEBUG_VFS, " mode: %x not writeable?\n",
+ fid->mode);
+ continue;
+ }
p9_fid_get(fid);
ret = fid;
break;
@@ -118,7 +133,7 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
spin_unlock(&dentry->d_lock);
} else {
if (dentry->d_inode)
- ret = v9fs_fid_find_inode(dentry->d_inode, uid);
+ ret = v9fs_fid_find_inode(dentry->d_inode, false, uid, any);
}
return ret;
@@ -299,28 +314,3 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
return v9fs_fid_lookup_with_uid(dentry, uid, any);
}
-struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
-{
- int err;
- struct p9_fid *fid, *ofid;
-
- ofid = v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0);
- fid = clone_fid(ofid);
- if (IS_ERR(fid))
- goto error_out;
- p9_fid_put(ofid);
- /*
- * writeback fid will only be used to write back the
- * dirty pages. We always request for the open fid in read-write
- * mode so that a partial page write which result in page
- * read can work.
- */
- err = p9_client_open(fid, O_RDWR);
- if (err < 0) {
- p9_fid_put(fid);
- fid = ERR_PTR(err);
- goto error_out;
- }
-error_out:
- return fid;
-}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 8a4e8cd12ca2..11576e1364bf 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -7,14 +7,16 @@
#ifndef FS_9P_FID_H
#define FS_9P_FID_H
#include <linux/list.h>
+#include "v9fs.h"
+struct p9_fid *v9fs_fid_find_inode(struct inode *inode, bool want_writeable,
+ kuid_t uid, bool any);
struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
static inline struct p9_fid *v9fs_parent_fid(struct dentry *dentry)
{
return v9fs_fid_lookup(dentry->d_parent);
}
void v9fs_fid_add(struct dentry *dentry, struct p9_fid **fid);
-struct p9_fid *v9fs_writeback_fid(struct dentry *dentry);
void v9fs_open_fid_add(struct inode *inode, struct p9_fid **fid);
static inline struct p9_fid *clone_fid(struct p9_fid *fid)
{
@@ -32,4 +34,33 @@ static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
p9_fid_put(fid);
return nfid;
}
+/**
+ * v9fs_fid_addmodes - add cache flags to fid mode (for client use only)
+ * @fid: fid to augment
+ * @s_flags: session info mount flags
+ * @s_cache: session info cache flags
+ * @f_flags: unix open flags
+ *
+ * make sure mode reflects flags of underlying mounts
+ * also qid.version == 0 reflects a synthetic or legacy file system
+ * NOTE: these are set after open so only reflect 9p client not
+ * underlying file system on server.
+ */
+static inline void v9fs_fid_add_modes(struct p9_fid *fid, int s_flags,
+ int s_cache, unsigned int f_flags)
+{
+ if (fid->qid.type != P9_QTFILE)
+ return;
+
+ if ((!s_cache) ||
+ ((fid->qid.version == 0) && !(s_flags & V9FS_IGNORE_QV)) ||
+ (s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) {
+ fid->mode |= P9L_DIRECT; /* no read or write cache */
+ } else if ((s_cache < CACHE_WRITEBACK) ||
+ (f_flags & O_DSYNC) | (s_flags & V9FS_SYNC)) {
+ fid->mode |= P9L_NOWRITECACHE;
+ } else if (s_cache == CACHE_LOOSE) {
+ fid->mode |= P9L_LOOSE; /* noncoherent cache */
+ }
+}
#endif
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 12401f1be5d6..999cdbcbfed9 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -118,7 +118,6 @@ struct v9fs_inode {
struct netfs_inode netfs; /* Netfslib context and vfs inode */
struct p9_qid qid;
unsigned int cache_validity;
- struct p9_fid *writeback_fid;
struct mutex v_mutex;
};
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 6f46d7e4c750..211165430a8a 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -57,8 +57,6 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
*/
static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
{
- struct inode *inode = file_inode(file);
- struct v9fs_inode *v9inode = V9FS_I(inode);
struct p9_fid *fid = file->private_data;
BUG_ON(!fid);
@@ -66,11 +64,8 @@ static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
/* we might need to read from a fid that was opened write-only
* for read-modify-write of page cache, use the writeback fid
* for that */
- if (rreq->origin == NETFS_READ_FOR_WRITE &&
- (fid->mode & O_ACCMODE) == O_WRONLY) {
- fid = v9inode->writeback_fid;
- BUG_ON(!fid);
- }
+ WARN_ON(rreq->origin == NETFS_READ_FOR_WRITE &&
+ !(fid->mode & P9_ORDWR));
p9_fid_get(fid);
rreq->netfs_priv = fid;
@@ -164,6 +159,7 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio)
loff_t i_size = i_size_read(inode);
struct iov_iter from;
size_t len = folio_size(folio);
+ struct p9_fid *writeback_fid;
int err;
if (start >= i_size)
@@ -173,13 +169,17 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio)
iov_iter_xarray(&from, ITER_SOURCE, &folio_mapping(folio)->i_pages, start, len);
- /* We should have writeback_fid always set */
- BUG_ON(!v9inode->writeback_fid);
+ writeback_fid = v9fs_fid_find_inode(inode, true, INVALID_UID, true);
+ if (!writeback_fid) {
+ WARN_ONCE(1, "folio expected an open fid inode->i_private=%p\n",
+ inode->i_private);
+ return -EINVAL;
+ }
folio_wait_fscache(folio);
folio_start_writeback(folio);
- p9_client_write(v9inode->writeback_fid, start, &from, &err);
+ p9_client_write(writeback_fid, start, &from, &err);
if (err == 0 &&
fscache_cookie_enabled(cookie) &&
@@ -192,6 +192,8 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio)
}
folio_end_writeback(folio);
+ p9_fid_put(writeback_fid);
+
return err;
}
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index cd1f3f4079d7..9f1d464bc1b5 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -43,7 +43,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
int err;
struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
- struct p9_fid *fid, *writeback_fid;
+ struct p9_fid *fid;
int omode;
p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
@@ -60,7 +60,19 @@ int v9fs_file_open(struct inode *inode, struct file *file)
if (IS_ERR(fid))
return PTR_ERR(fid);
- err = p9_client_open(fid, omode);
+ if ((v9ses->cache >= CACHE_WRITEBACK) && (omode & P9_OWRITE)) {
+ int writeback_omode = (omode & ~P9_OWRITE) | P9_ORDWR;
+
+ p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, try opening O_RDWR\n");
+ err = p9_client_open(fid, writeback_omode);
+ if (err < 0) {
+ p9_debug(P9_DEBUG_CACHE, "could not open O_RDWR, disabling caches\n");
+ err = p9_client_open(fid, omode);
+ fid->mode |= P9L_DIRECT;
+ }
+ } else {
+ err = p9_client_open(fid, omode);
+ }
if (err < 0) {
p9_fid_put(fid);
return err;
@@ -72,36 +84,14 @@ int v9fs_file_open(struct inode *inode, struct file *file)
file->private_data = fid;
}
- mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
- ((file->f_flags & O_ACCMODE) != O_RDONLY)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during open time instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- writeback_fid = v9fs_writeback_fid(file_dentry(file));
- if (IS_ERR(writeback_fid)) {
- err = PTR_ERR(writeback_fid);
- mutex_unlock(&v9inode->v_mutex);
- goto out_error;
- }
- v9inode->writeback_fid = (void *) writeback_fid;
- }
- mutex_unlock(&v9inode->v_mutex);
#ifdef CONFIG_9P_FSCACHE
if (v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
#endif
+ v9fs_fid_add_modes(fid, v9ses->flags, v9ses->cache, file->f_flags);
v9fs_open_fid_add(inode, &fid);
return 0;
-out_error:
- p9_fid_put(file->private_data);
- file->private_data = NULL;
- return err;
}
/**
@@ -367,14 +357,14 @@ v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct p9_fid *fid = iocb->ki_filp->private_data;
int ret, err = 0;
- struct inode *inode = file_inode(iocb->ki_filp);
- struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
- p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n",
- iov_iter_count(to), iocb->ki_pos);
+ p9_debug(P9_DEBUG_VFS, "fid %d count %zu offset %lld\n",
+ fid->fid, iov_iter_count(to), iocb->ki_pos);
- if (v9ses->cache > CACHE_MMAP)
+ if (!(fid->mode & P9L_DIRECT)) {
+ p9_debug(P9_DEBUG_VFS, "(cached)\n");
return generic_file_read_iter(iocb, to);
+ }
if (iocb->ki_filp->f_flags & O_NONBLOCK)
ret = p9_client_read_once(fid, iocb->ki_pos, to, &err);
@@ -397,14 +387,17 @@ static ssize_t
v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
+ struct p9_fid *fid = file->private_data;
ssize_t retval;
loff_t origin;
int err = 0;
- struct inode *inode = file_inode(iocb->ki_filp);
- struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
- if (v9ses->cache >= CACHE_WRITEBACK)
+ p9_debug(P9_DEBUG_VFS, "fid %d\n", fid->fid);
+
+ if (!(fid->mode & (P9L_DIRECT | P9L_NOWRITECACHE))) {
+ p9_debug(P9_DEBUG_CACHE, "(cached)\n");
return generic_file_write_iter(iocb, from);
+ }
retval = generic_write_checks(iocb, from);
if (retval <= 0)
@@ -488,36 +481,18 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
{
int retval;
struct inode *inode = file_inode(filp);
- struct v9fs_inode *v9inode = V9FS_I(inode);
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
- struct p9_fid *fid;
+
+ p9_debug(P9_DEBUG_MMAP, "filp :%p\n", filp);
if (v9ses->cache < CACHE_MMAP) {
+ p9_debug(P9_DEBUG_CACHE, "(no mmap mode)");
+ if (vma->vm_flags & VM_MAYSHARE)
+ return -ENODEV;
invalidate_inode_pages2(filp->f_mapping);
return generic_file_readonly_mmap(filp, vma);
}
- mutex_lock(&v9inode->v_mutex);
- if (!v9inode->writeback_fid &&
- (vma->vm_flags & VM_SHARED) &&
- (vma->vm_flags & VM_WRITE)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during mmap instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- fid = v9fs_writeback_fid(file_dentry(filp));
- if (IS_ERR(fid)) {
- retval = PTR_ERR(fid);
- mutex_unlock(&v9inode->v_mutex);
- return retval;
- }
- v9inode->writeback_fid = (void *) fid;
- }
- mutex_unlock(&v9inode->v_mutex);
-
retval = generic_file_mmap(filp, vma);
if (!retval)
vma->vm_ops = &v9fs_mmap_file_vm_ops;
@@ -528,7 +503,6 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
static vm_fault_t
v9fs_vm_page_mkwrite(struct vm_fault *vmf)
{
- struct v9fs_inode *v9inode;
struct folio *folio = page_folio(vmf->page);
struct file *filp = vmf->vma->vm_file;
struct inode *inode = file_inode(filp);
@@ -537,8 +511,6 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
p9_debug(P9_DEBUG_VFS, "folio %p fid %lx\n",
folio, (unsigned long)filp->private_data);
- v9inode = V9FS_I(inode);
-
/* Wait for the page to be written to the cache before we allow it to
* be modified. We then assume the entire page will need writing back.
*/
@@ -551,7 +523,6 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
/* Update file times before taking page lock */
file_update_time(filp);
- BUG_ON(!v9inode->writeback_fid);
if (folio_lock_killable(folio) < 0)
return VM_FAULT_RETRY;
if (folio_mapping(folio) != inode->i_mapping)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 63590f55363b..fb5e5c0e41e4 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -230,7 +230,6 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
v9inode = alloc_inode_sb(sb, v9fs_inode_cache, GFP_KERNEL);
if (!v9inode)
return NULL;
- v9inode->writeback_fid = NULL;
v9inode->cache_validity = 0;
mutex_init(&v9inode->v_mutex);
return &v9inode->netfs.inode;
@@ -383,9 +382,6 @@ void v9fs_evict_inode(struct inode *inode)
filemap_fdatawrite(&inode->i_data);
fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false);
- /* clunk the fid stashed in writeback_fid */
- p9_fid_put(v9inode->writeback_fid);
- v9inode->writeback_fid = NULL;
}
static int v9fs_test_inode(struct inode *inode, void *data)
@@ -796,9 +792,10 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
u32 perm;
struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
- struct p9_fid *fid, *inode_fid;
+ struct p9_fid *fid;
struct dentry *res = NULL;
struct inode *inode;
+ int p9_omode;
if (d_in_lookup(dentry)) {
res = v9fs_vfs_lookup(dir, dentry, 0);
@@ -817,9 +814,14 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
v9ses = v9fs_inode2v9ses(dir);
perm = unixmode2p9mode(v9ses, mode);
- fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
- v9fs_uflags2omode(flags,
- v9fs_proto_dotu(v9ses)));
+ p9_omode = v9fs_uflags2omode(flags, v9fs_proto_dotu(v9ses));
+
+ if ((v9ses->cache >= CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) {
+ p9_omode = (p9_omode & !P9_OWRITE) | P9_ORDWR;
+ p9_debug(P9_DEBUG_CACHE,
+ "write-only file with writeback enabled, creating w/ O_RDWR\n");
+ }
+ fid = v9fs_create(v9ses, dir, dentry, NULL, perm, p9_omode);
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
goto error;
@@ -828,25 +830,6 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
v9fs_invalidate_inode_attr(dir);
inode = d_inode(dentry);
v9inode = V9FS_I(inode);
- mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache >= CACHE_WRITEBACK) && !v9inode->writeback_fid &&
- ((flags & O_ACCMODE) != O_RDONLY)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during open time instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- inode_fid = v9fs_writeback_fid(dentry);
- if (IS_ERR(inode_fid)) {
- err = PTR_ERR(inode_fid);
- mutex_unlock(&v9inode->v_mutex);
- goto error;
- }
- v9inode->writeback_fid = (void *) inode_fid;
- }
- mutex_unlock(&v9inode->v_mutex);
err = finish_open(file, dentry, generic_file_open);
if (err)
goto error;
@@ -855,6 +838,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
+
+ v9fs_fid_add_modes(fid, v9ses->flags, v9ses->cache, file->f_flags);
v9fs_open_fid_add(inode, &fid);
file->f_mode |= FMODE_CREATED;
@@ -1024,7 +1009,7 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
return 0;
} else if (v9ses->cache >= CACHE_WRITEBACK) {
if (S_ISREG(inode->i_mode)) {
@@ -1128,10 +1113,10 @@ static int v9fs_vfs_setattr(struct mnt_idmap *idmap,
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
+ truncate_pagecache(inode, iattr->ia_size);
+
if (v9ses->cache == CACHE_FSCACHE)
fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
- else
- invalidate_mapping_pages(&inode->i_data, 0, -1);
}
v9fs_invalidate_inode_attr(inode);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index a28eb3aeab29..4b9488cb7a56 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -232,12 +232,12 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
int err = 0;
kgid_t gid;
umode_t mode;
+ int p9_omode = v9fs_open_to_dotl_flags(flags);
const unsigned char *name = NULL;
struct p9_qid qid;
struct inode *inode;
struct p9_fid *fid = NULL;
- struct v9fs_inode *v9inode;
- struct p9_fid *dfid = NULL, *ofid = NULL, *inode_fid = NULL;
+ struct p9_fid *dfid = NULL, *ofid = NULL;
struct v9fs_session_info *v9ses;
struct posix_acl *pacl = NULL, *dacl = NULL;
struct dentry *res = NULL;
@@ -282,14 +282,19 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
/* Update mode based on ACL value */
err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
if (err) {
- p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n",
+ p9_debug(P9_DEBUG_VFS, "Failed to get acl values in create %d\n",
err);
goto out;
}
- err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
- mode, gid, &qid);
+
+ if ((v9ses->cache >= CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) {
+ p9_omode = (p9_omode & !P9_OWRITE) | P9_ORDWR;
+ p9_debug(P9_DEBUG_CACHE,
+ "write-only file with writeback enabled, creating w/ O_RDWR\n");
+ }
+ err = p9_client_create_dotl(ofid, name, p9_omode, mode, gid, &qid);
if (err < 0) {
- p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n",
+ p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in create %d\n",
err);
goto out;
}
@@ -314,36 +319,19 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
v9fs_fid_add(dentry, &fid);
d_instantiate(dentry, inode);
- v9inode = V9FS_I(inode);
- mutex_lock(&v9inode->v_mutex);
- if ((v9ses->cache) && !v9inode->writeback_fid &&
- ((flags & O_ACCMODE) != O_RDONLY)) {
- /*
- * clone a fid and add it to writeback_fid
- * we do it during open time instead of
- * page dirty time via write_begin/page_mkwrite
- * because we want write after unlink usecase
- * to work.
- */
- inode_fid = v9fs_writeback_fid(dentry);
- if (IS_ERR(inode_fid)) {
- err = PTR_ERR(inode_fid);
- mutex_unlock(&v9inode->v_mutex);
- goto out;
- }
- v9inode->writeback_fid = (void *) inode_fid;
- }
- mutex_unlock(&v9inode->v_mutex);
/* Since we are opening a file, assign the open fid to the file */
err = finish_open(file, dentry, generic_file_open);
if (err)
goto out;
file->private_data = ofid;
#ifdef CONFIG_9P_FSCACHE
- if (v9ses->cache == CACHE_FSCACHE)
+ if (v9ses->cache == CACHE_FSCACHE) {
+ struct v9fs_inode *v9inode = V9FS_I(inode);
fscache_use_cookie(v9fs_inode_cookie(v9inode),
file->f_mode & FMODE_WRITE);
+ }
#endif
+ v9fs_fid_add_modes(ofid, v9ses->flags, v9ses->cache, flags);
v9fs_open_fid_add(inode, &ofid);
file->f_mode |= FMODE_CREATED;
out:
@@ -464,9 +452,9 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
return 0;
- } else if (v9ses->cache >= CACHE_WRITEBACK) {
+ } else if (v9ses->cache) {
if (S_ISREG(inode->i_mode)) {
int retval = filemap_fdatawrite(inode->i_mapping);
@@ -613,6 +601,8 @@ int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap,
if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size !=
i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
+ truncate_pagecache(inode, iattr->ia_size);
+
if (v9ses->cache == CACHE_FSCACHE)
fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size);
}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 5fc6a945bfff..af83b39e340c 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -290,49 +290,30 @@ static int v9fs_drop_inode(struct inode *inode)
static int v9fs_write_inode(struct inode *inode,
struct writeback_control *wbc)
{
- int ret;
- struct p9_wstat wstat;
struct v9fs_inode *v9inode;
+
/*
* send an fsync request to server irrespective of
* wbc->sync_mode.
*/
p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
- v9inode = V9FS_I(inode);
- if (!v9inode->writeback_fid)
- return 0;
- v9fs_blank_wstat(&wstat);
- ret = p9_client_wstat(v9inode->writeback_fid, &wstat);
- if (ret < 0) {
- __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
- return ret;
- }
+ v9inode = V9FS_I(inode);
fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode));
+
return 0;
}
static int v9fs_write_inode_dotl(struct inode *inode,
struct writeback_control *wbc)
{
- int ret;
struct v9fs_inode *v9inode;
- /*
- * send an fsync request to server irrespective of
- * wbc->sync_mode.
- */
+
v9inode = V9FS_I(inode);
- p9_debug(P9_DEBUG_VFS, "%s: inode %p, writeback_fid %p\n",
- __func__, inode, v9inode->writeback_fid);
- if (!v9inode->writeback_fid)
- return 0;
-
- ret = p9_client_fsync(v9inode->writeback_fid, 0);
- if (ret < 0) {
- __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
- return ret;
- }
+ p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
+
fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode));
+
return 0;
}
--
2.39.2
Le 27/03/2023 à 04:59, Eric Van Hensbergen a écrit :
> This patch removes the creating of an additional writeback_fid
> for opened files. The patch addresses problems when files
> were opened write-only or getattr on files with dirty caches.
>
> This patch also incorporates information about cache behavior
> in the fid for every file. This allows us to reflect cache
> behavior from mount flags, open mode, and information from
> the server to inform readahead and writeback behavior.
>
> This includes adding support for a 9p semantic that qid.version==0
> is used to mark a file as non-cachable which is important for
> synthetic files. This may have a side-effect of not supporting
> caching on certain legacy file servers that do not properly set
> qid.version. There is also now a mount flag which can disable
> the qid.version behavior.
>
> Signed-off-by: Eric Van Hensbergen <[email protected]>
> ---
> fs/9p/fid.c | 48 +++++++++-------------
> fs/9p/fid.h | 33 ++++++++++++++-
> fs/9p/v9fs.h | 1 -
> fs/9p/vfs_addr.c | 22 +++++-----
> fs/9p/vfs_file.c | 91 ++++++++++++++----------------------------
> fs/9p/vfs_inode.c | 45 +++++++--------------
> fs/9p/vfs_inode_dotl.c | 48 +++++++++-------------
> fs/9p/vfs_super.c | 33 ++++-----------
> 8 files changed, 135 insertions(+), 186 deletions(-)
>
Hi,
this patch has already reached -next, but there is some spurious code.
As, I'm not sure what the real intent is, I prefer to reply here instead
of sending a patch.
[...]
> @@ -817,9 +814,14 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
>
> v9ses = v9fs_inode2v9ses(dir);
> perm = unixmode2p9mode(v9ses, mode);
> - fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
> - v9fs_uflags2omode(flags,
> - v9fs_proto_dotu(v9ses)));
> + p9_omode = v9fs_uflags2omode(flags, v9fs_proto_dotu(v9ses));
> +
> + if ((v9ses->cache >= CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) {
> + p9_omode = (p9_omode & !P9_OWRITE) | P9_ORDWR;
This code looks strange.
P9_OWRITE is 0x01, so !P9_OWRITE is 0.
So the code is equivalent to "p9_omode = P9_ORDWR;"
Is it what is expexted?
Maybe
p9_omode = (p9_omode & ~P9_OWRITE) | P9_ORDWR;
?
> + p9_debug(P9_DEBUG_CACHE,
> + "write-only file with writeback enabled, creating w/ O_RDWR\n");
> + }
> + fid = v9fs_create(v9ses, dir, dentry, NULL, perm, p9_omode);
> if (IS_ERR(fid)) {
> err = PTR_ERR(fid);
> goto error;
[...]
> diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
> index a28eb3aeab29..4b9488cb7a56 100644
> --- a/fs/9p/vfs_inode_dotl.c
> +++ b/fs/9p/vfs_inode_dotl.c
> @@ -232,12 +232,12 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
> int err = 0;
> kgid_t gid;
> umode_t mode;
> + int p9_omode = v9fs_open_to_dotl_flags(flags);
> const unsigned char *name = NULL;
> struct p9_qid qid;
> struct inode *inode;
> struct p9_fid *fid = NULL;
> - struct v9fs_inode *v9inode;
> - struct p9_fid *dfid = NULL, *ofid = NULL, *inode_fid = NULL;
> + struct p9_fid *dfid = NULL, *ofid = NULL;
> struct v9fs_session_info *v9ses;
> struct posix_acl *pacl = NULL, *dacl = NULL;
> struct dentry *res = NULL;
> @@ -282,14 +282,19 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
> /* Update mode based on ACL value */
> err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
> if (err) {
> - p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n",
> + p9_debug(P9_DEBUG_VFS, "Failed to get acl values in create %d\n",
> err);
> goto out;
> }
> - err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
> - mode, gid, &qid);
> +
> + if ((v9ses->cache >= CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) {
> + p9_omode = (p9_omode & !P9_OWRITE) | P9_ORDWR;
Same here.
CJ
> + p9_debug(P9_DEBUG_CACHE,
> + "write-only file with writeback enabled, creating w/ O_RDWR\n");
> + }
> + err = p9_client_create_dotl(ofid, name, p9_omode, mode, gid, &qid);
> if (err < 0) {
> - p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n",
> + p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in create %d\n",
> err);
> goto out;
> }
Christophe JAILLET wrote on Tue, Apr 25, 2023 at 09:11:01AM +0200:
> This code looks strange.
> P9_OWRITE is 0x01, so !P9_OWRITE is 0.
> So the code is equivalent to "p9_omode = P9_ORDWR;"
>
> Is it what is expexted?
>
> Maybe
> p9_omode = (p9_omode & ~P9_OWRITE) | P9_ORDWR;
> ?
Since we're discussing tooling, sparse caught this one:
fs/9p/vfs_inode.c:826:38: warning: dubious: x & !y
fs/9p/vfs_inode_dotl.c:291:38: warning: dubious: x & !y
(runing with make `M=fs/9p W=1 C=2` ; unfortunately error code doesn't
reflect a problem so that'll require inspecting output to automate...)
I've tried running scan-build in a very old-fashioned way (getting the
gcc lines from make V=1 and re-running with scan-build) and it had some
arguable warnings (setting `ret = 0` before it is overwritten again is
considered a dead store), but it had some real problems as well so it
might be worth fixing these just to reduce the clutter and run it all
the time.
I'll post a couple of patches tomorrow (unrelated to this)
--
Dominique
I swear I fixed that, must have been one of my fixes got dropped in
the process of churning over this patch. I'm quite concerned that
this is coming up during the merge window because I'd really rather
not punt this patch series another two months. I'm going to apply the
fix as an additional patch which hopefully Linus will accept with the
rest of the series.
On Tue, Apr 25, 2023 at 12:11 AM Christophe JAILLET
<[email protected]> wrote:
>
> Le 27/03/2023 à 04:59, Eric Van Hensbergen a écrit :
> > This patch removes the creating of an additional writeback_fid
> > for opened files. The patch addresses problems when files
> > were opened write-only or getattr on files with dirty caches.
> >
> > This patch also incorporates information about cache behavior
> > in the fid for every file. This allows us to reflect cache
> > behavior from mount flags, open mode, and information from
> > the server to inform readahead and writeback behavior.
> >
> > This includes adding support for a 9p semantic that qid.version==0
> > is used to mark a file as non-cachable which is important for
> > synthetic files. This may have a side-effect of not supporting
> > caching on certain legacy file servers that do not properly set
> > qid.version. There is also now a mount flag which can disable
> > the qid.version behavior.
> >
> > Signed-off-by: Eric Van Hensbergen <[email protected]>
> > ---
> > fs/9p/fid.c | 48 +++++++++-------------
> > fs/9p/fid.h | 33 ++++++++++++++-
> > fs/9p/v9fs.h | 1 -
> > fs/9p/vfs_addr.c | 22 +++++-----
> > fs/9p/vfs_file.c | 91 ++++++++++++++----------------------------
> > fs/9p/vfs_inode.c | 45 +++++++--------------
> > fs/9p/vfs_inode_dotl.c | 48 +++++++++-------------
> > fs/9p/vfs_super.c | 33 ++++-----------
> > 8 files changed, 135 insertions(+), 186 deletions(-)
> >
>
> Hi,
>
> this patch has already reached -next, but there is some spurious code.
>
> As, I'm not sure what the real intent is, I prefer to reply here instead
> of sending a patch.
>
>
> [...]
>
> > @@ -817,9 +814,14 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
> >
> > v9ses = v9fs_inode2v9ses(dir);
> > perm = unixmode2p9mode(v9ses, mode);
> > - fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
> > - v9fs_uflags2omode(flags,
> > - v9fs_proto_dotu(v9ses)));
> > + p9_omode = v9fs_uflags2omode(flags, v9fs_proto_dotu(v9ses));
> > +
> > + if ((v9ses->cache >= CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) {
> > + p9_omode = (p9_omode & !P9_OWRITE) | P9_ORDWR;
>
> This code looks strange.
> P9_OWRITE is 0x01, so !P9_OWRITE is 0.
> So the code is equivalent to "p9_omode = P9_ORDWR;"
>
> Is it what is expexted?
>
> Maybe
> p9_omode = (p9_omode & ~P9_OWRITE) | P9_ORDWR;
> ?
>
> > + p9_debug(P9_DEBUG_CACHE,
> > + "write-only file with writeback enabled, creating w/ O_RDWR\n");
> > + }
> > + fid = v9fs_create(v9ses, dir, dentry, NULL, perm, p9_omode);
> > if (IS_ERR(fid)) {
> > err = PTR_ERR(fid);
> > goto error;
>
> [...]
>
> > diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
> > index a28eb3aeab29..4b9488cb7a56 100644
> > --- a/fs/9p/vfs_inode_dotl.c
> > +++ b/fs/9p/vfs_inode_dotl.c
> > @@ -232,12 +232,12 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
> > int err = 0;
> > kgid_t gid;
> > umode_t mode;
> > + int p9_omode = v9fs_open_to_dotl_flags(flags);
> > const unsigned char *name = NULL;
> > struct p9_qid qid;
> > struct inode *inode;
> > struct p9_fid *fid = NULL;
> > - struct v9fs_inode *v9inode;
> > - struct p9_fid *dfid = NULL, *ofid = NULL, *inode_fid = NULL;
> > + struct p9_fid *dfid = NULL, *ofid = NULL;
> > struct v9fs_session_info *v9ses;
> > struct posix_acl *pacl = NULL, *dacl = NULL;
> > struct dentry *res = NULL;
> > @@ -282,14 +282,19 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
> > /* Update mode based on ACL value */
> > err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
> > if (err) {
> > - p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n",
> > + p9_debug(P9_DEBUG_VFS, "Failed to get acl values in create %d\n",
> > err);
> > goto out;
> > }
> > - err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
> > - mode, gid, &qid);
> > +
> > + if ((v9ses->cache >= CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) {
> > + p9_omode = (p9_omode & !P9_OWRITE) | P9_ORDWR;
>
> Same here.
>
> CJ
>
> > + p9_debug(P9_DEBUG_CACHE,
> > + "write-only file with writeback enabled, creating w/ O_RDWR\n");
> > + }
> > + err = p9_client_create_dotl(ofid, name, p9_omode, mode, gid, &qid);
> > if (err < 0) {
> > - p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n",
> > + p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in create %d\n",
> > err);
> > goto out;
> > }
>