2018-09-28 15:43:23

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 0/9] fuse: readdir caching

Allow caching readdir in fuse. Use the page cache for this, which solves
cache shrinking on memory pressure.

I think it might make sense to extract something like this into a set of
VFS helpers, so that other filesystems can also make use of this facility.

Thanks,
Miklos

---
Miklos Szeredi (9):
fuse: split out readdir.c
fuse: add FOPEN_CACHE_DIR
fuse: extract fuse_emit() helper
fuse: allow caching readdir
fuse: allow using readdir cache
fuse: add readdir cache version
fuse: use mtime for readdir cache verification
fuse: use iversion for readdir cache verification
fuse: reduce size of struct fuse_inode

fs/fuse/Makefile | 2 +-
fs/fuse/dir.c | 293 +++---------------------
fs/fuse/file.c | 10 +
fs/fuse/fuse_i.h | 83 ++++++-
fs/fuse/inode.c | 11 +-
fs/fuse/readdir.c | 567 ++++++++++++++++++++++++++++++++++++++++++++++
include/uapi/linux/fuse.h | 7 +-
7 files changed, 688 insertions(+), 285 deletions(-)
create mode 100644 fs/fuse/readdir.c

--
2.14.3



2018-09-28 15:43:31

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 7/9] fuse: use mtime for readdir cache verification

Store the modification time of the directory in the cache, obtained before
starting to fill the cache.

When reading the cache, verify that the directory hasn't changed, by
checking if current modification time is the same as the one stored in the
cache.

This only needs to be done when the current file position is at the
beginning of the directory, as mandated by POSIX.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/fuse_i.h | 3 +++
fs/fuse/readdir.c | 38 ++++++++++++++++++++++++++++++++++----
2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ef018ea5bcd9..e498f9edf01f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -117,6 +117,9 @@ struct fuse_inode {
/** version of the cache */
u64 version;

+ /** modification time of directory when cache was started */
+ struct timespec64 mtime;
+
/** protects above fields */
spinlock_t lock;
} rdc;
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index edb445c4cfbd..f96525cef518 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -397,8 +397,10 @@ static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
return res;
}

-static void fuse_rdc_reset(struct fuse_inode *fi)
+static void fuse_rdc_reset(struct inode *inode)
{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
fi->rdc.cached = false;
fi->rdc.version++;
fi->rdc.size = 0;
@@ -411,6 +413,7 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
{
struct fuse_file *ff = file->private_data;
struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
enum fuse_parse_result res;
pgoff_t index;
@@ -424,12 +427,40 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
ff->readdir.cache_off = 0;
}

+ /*
+ * We're just about to start reading into the cache or reading the
+ * cache; both cases require an up-to-date mtime value.
+ */
+ if (!ctx->pos && fc->auto_inval_data) {
+ int err = fuse_update_attributes(inode, file);
+
+ if (err)
+ return err;
+ }
+
retry:
spin_lock(&fi->rdc.lock);
+retry_locked:
if (!fi->rdc.cached) {
+ /* Starting cache? Set cache mtime. */
+ if (!ctx->pos && !fi->rdc.size) {
+ fi->rdc.mtime = inode->i_mtime;
+ }
spin_unlock(&fi->rdc.lock);
return UNCACHED;
}
+ /*
+ * When at the beginning of the directory (i.e. just after opendir(3) or
+ * rewinddir(3)), then need to check whether directory contents have
+ * changed, and reset the cache if so.
+ */
+ if (!ctx->pos) {
+ if (!timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
+ fuse_rdc_reset(inode);
+ goto retry_locked;
+ }
+ }
+
/*
* If cache version changed since the last getdents() call, then reset
* the cache stream.
@@ -467,9 +498,8 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
* Uh-oh: page gone missing, cache is useless
*/
if (fi->rdc.version == ff->readdir.version)
- fuse_rdc_reset(fi);
- spin_unlock(&fi->rdc.lock);
- return UNCACHED;
+ fuse_rdc_reset(inode);
+ goto retry_locked;
}

/* Make sure it's still the same version after getting the page. */
--
2.14.3


2018-09-28 15:43:33

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 9/9] fuse: reduce size of struct fuse_inode

Do this by grouping fields used for cached writes and putting them into a
union with fileds used for cached readdir (with obviously no overlap, since
we don't have hybrid objects).

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/dir.c | 13 +++++++++++-
fs/fuse/file.c | 8 ++++++++
fs/fuse/fuse_i.h | 62 +++++++++++++++++++++++++++++++-------------------------
fs/fuse/inode.c | 16 ++++-----------
4 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 6800fdc3e730..d1b2f42d746e 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1414,8 +1414,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
file = NULL;
}

- if (attr->ia_valid & ATTR_SIZE)
+ if (attr->ia_valid & ATTR_SIZE) {
+ if (WARN_ON(!S_ISREG(inode->i_mode)))
+ return -EIO;
is_truncate = true;
+ }

if (is_truncate) {
fuse_set_nowrite(inode);
@@ -1619,8 +1622,16 @@ void fuse_init_common(struct inode *inode)

void fuse_init_dir(struct inode *inode)
{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
inode->i_op = &fuse_dir_inode_operations;
inode->i_fop = &fuse_dir_operations;
+
+ spin_lock_init(&fi->rdc.lock);
+ fi->rdc.cached = false;
+ fi->rdc.size = 0;
+ fi->rdc.pos = 0;
+ fi->rdc.version = 0;
}

void fuse_init_symlink(struct inode *inode)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 101e64897b5f..1ca3fe5be3e4 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3064,6 +3064,14 @@ static const struct address_space_operations fuse_file_aops = {

void fuse_init_file_inode(struct inode *inode)
{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
inode->i_fop = &fuse_file_operations;
inode->i_data.a_ops = &fuse_file_aops;
+
+ INIT_LIST_HEAD(&fi->write_files);
+ INIT_LIST_HEAD(&fi->queued_writes);
+ fi->writectr = 0;
+ init_waitqueue_head(&fi->page_waitq);
+ INIT_LIST_HEAD(&fi->writepages);
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1e8d20e86760..41cc9fb33d02 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -87,45 +87,51 @@ struct fuse_inode {
/** Version of last attribute change */
u64 attr_version;

- /** Files usable in writepage. Protected by fc->lock */
- struct list_head write_files;
+ union {
+ /* Write related fields (regular file only) */
+ struct {
+ /** Files usable in writepage. Protected by fc->lock */
+ struct list_head write_files;

- /** Writepages pending on truncate or fsync */
- struct list_head queued_writes;
+ /** Writepages pending on truncate or fsync */
+ struct list_head queued_writes;

- /** Number of sent writes, a negative bias (FUSE_NOWRITE)
- * means more writes are blocked */
- int writectr;
+ /** Number of sent writes, a negative bias
+ * (FUSE_NOWRITE) means more writes are blocked */
+ int writectr;

- /** Waitq for writepage completion */
- wait_queue_head_t page_waitq;
+ /** Waitq for writepage completion */
+ wait_queue_head_t page_waitq;

- /** List of writepage requestst (pending or sent) */
- struct list_head writepages;
+ /** List of writepage requestst (pending or sent) */
+ struct list_head writepages;
+ };

- /** readdir cache */
- struct {
- /** true if fully cached */
- bool cached;
+ /** readdir cache (directory only) */
+ struct {
+ /** true if fully cached */
+ bool cached;

- /** size of cache */
- loff_t size;
+ /** size of cache */
+ loff_t size;

- /** position at end of cache (position of next entry) */
- loff_t pos;
+ /** position at end of cache (position of next entry) */
+ loff_t pos;

- /** version of the cache */
- u64 version;
+ /** version of the cache */
+ u64 version;

- /** modification time of directory when cache was started */
- struct timespec64 mtime;
+ /** modification time of directory when cache was
+ * started */
+ struct timespec64 mtime;

- /** iversion of directory when cache was started */
- u64 iversion;
+ /** iversion of directory when cache was started */
+ u64 iversion;

- /** protects above fields */
- spinlock_t lock;
- } rdc;
+ /** protects above fields */
+ spinlock_t lock;
+ } rdc;
+ };

/** Miscellaneous bits describing inode state */
unsigned long state;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6d0a87308f86..bd58ab7e1858 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -93,18 +93,8 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->nodeid = 0;
fi->nlookup = 0;
fi->attr_version = 0;
- fi->writectr = 0;
fi->orig_ino = 0;
fi->state = 0;
- INIT_LIST_HEAD(&fi->write_files);
- INIT_LIST_HEAD(&fi->queued_writes);
- INIT_LIST_HEAD(&fi->writepages);
- init_waitqueue_head(&fi->page_waitq);
- spin_lock_init(&fi->rdc.lock);
- fi->rdc.cached = false;
- fi->rdc.size = 0;
- fi->rdc.pos = 0;
- fi->rdc.version = 0;
mutex_init(&fi->mutex);
fi->forget = fuse_alloc_forget();
if (!fi->forget) {
@@ -124,8 +114,10 @@ static void fuse_i_callback(struct rcu_head *head)
static void fuse_destroy_inode(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
- BUG_ON(!list_empty(&fi->write_files));
- BUG_ON(!list_empty(&fi->queued_writes));
+ if (S_ISREG(inode->i_mode)) {
+ WARN_ON(!list_empty(&fi->write_files));
+ WARN_ON(!list_empty(&fi->queued_writes));
+ }
mutex_destroy(&fi->mutex);
kfree(fi->forget);
call_rcu(&inode->i_rcu, fuse_i_callback);
--
2.14.3


2018-09-28 15:43:47

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 8/9] fuse: use iversion for readdir cache verification

Use the internal iversion counter to make sure modifications of the
directory through this filesystem are not missed by the mtime check (due to
mtime granularity).

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/dir.c | 21 ++++++++++++++-------
fs/fuse/fuse_i.h | 3 +++
fs/fuse/readdir.c | 5 ++++-
3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 3a333b0ea9ad..6800fdc3e730 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -14,6 +14,7 @@
#include <linux/namei.h>
#include <linux/slab.h>
#include <linux/xattr.h>
+#include <linux/iversion.h>
#include <linux/posix_acl.h>

static void fuse_advise_use_readdirplus(struct inode *dir)
@@ -89,6 +90,12 @@ void fuse_invalidate_attr(struct inode *inode)
get_fuse_inode(inode)->i_time = 0;
}

+static void fuse_dir_changed(struct inode *dir)
+{
+ fuse_invalidate_attr(dir);
+ inode_maybe_inc_iversion(dir, false);
+}
+
/**
* Mark the attributes as stale due to an atime change. Avoid the invalidate if
* atime is not used.
@@ -447,7 +454,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
kfree(forget);
d_instantiate(entry, inode);
fuse_change_entry_timeout(entry, &outentry);
- fuse_invalidate_attr(dir);
+ fuse_dir_changed(dir);
err = finish_open(file, entry, generic_file_open);
if (err) {
fuse_sync_release(ff, flags);
@@ -561,7 +568,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
} else {
fuse_change_entry_timeout(entry, &outarg);
}
- fuse_invalidate_attr(dir);
+ fuse_dir_changed(dir);
return 0;

out_put_forget_req:
@@ -671,7 +678,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
drop_nlink(inode);
spin_unlock(&fc->lock);
fuse_invalidate_attr(inode);
- fuse_invalidate_attr(dir);
+ fuse_dir_changed(dir);
fuse_invalidate_entry_cache(entry);
fuse_update_ctime(inode);
} else if (err == -EINTR)
@@ -693,7 +700,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
err = fuse_simple_request(fc, &args);
if (!err) {
clear_nlink(d_inode(entry));
- fuse_invalidate_attr(dir);
+ fuse_dir_changed(dir);
fuse_invalidate_entry_cache(entry);
} else if (err == -EINTR)
fuse_invalidate_entry(entry);
@@ -732,9 +739,9 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
fuse_update_ctime(d_inode(newent));
}

- fuse_invalidate_attr(olddir);
+ fuse_dir_changed(olddir);
if (olddir != newdir)
- fuse_invalidate_attr(newdir);
+ fuse_dir_changed(newdir);

/* newent will end up negative */
if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
@@ -967,7 +974,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
if (!entry)
goto unlock;

- fuse_invalidate_attr(parent);
+ fuse_dir_changed(parent);
fuse_invalidate_entry(entry);

if (child_nodeid != 0 && d_really_is_positive(entry)) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e498f9edf01f..1e8d20e86760 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -120,6 +120,9 @@ struct fuse_inode {
/** modification time of directory when cache was started */
struct timespec64 mtime;

+ /** iversion of directory when cache was started */
+ u64 iversion;
+
/** protects above fields */
spinlock_t lock;
} rdc;
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index f96525cef518..d91ae7449b73 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -8,6 +8,7 @@


#include "fuse_i.h"
+#include <linux/iversion.h>
#include <linux/posix_acl.h>

static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
@@ -445,6 +446,7 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
/* Starting cache? Set cache mtime. */
if (!ctx->pos && !fi->rdc.size) {
fi->rdc.mtime = inode->i_mtime;
+ fi->rdc.iversion = inode_query_iversion(inode);
}
spin_unlock(&fi->rdc.lock);
return UNCACHED;
@@ -455,7 +457,8 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
* changed, and reset the cache if so.
*/
if (!ctx->pos) {
- if (!timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
+ if (inode_peek_iversion(inode) != fi->rdc.iversion ||
+ !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
fuse_rdc_reset(inode);
goto retry_locked;
}
--
2.14.3


2018-09-28 15:43:57

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 4/9] fuse: allow caching readdir

This patch just adds the cache filling functions, which are invoked if
FOPEN_CACHE_DIR flag is set in the OPENDIR reply.

Cache reading and cache invalidation are added by subsequent patches.

The directory cache uses the page cache. Directory entries are packed into
a page in the same format as in the READDIR reply. A page only contains
whole entries, the space at the end of the page is cleared. The page is
locked while being modified.

Multiple parallel readdirs on the same directory can fill the cache; the
only constraint is that continuity must be maintained (d_off of last entry
points to position of current entry).

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/fuse_i.h | 15 ++++++++++
fs/fuse/inode.c | 4 +++
fs/fuse/readdir.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 107 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5e01ea3d137d..d01c4606c149 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -103,6 +103,21 @@ struct fuse_inode {
/** List of writepage requestst (pending or sent) */
struct list_head writepages;

+ /** readdir cache */
+ struct {
+ /** true if fully cached */
+ bool cached;
+
+ /** size of cache */
+ loff_t size;
+
+ /** position at end of cache (position of next entry) */
+ loff_t pos;
+
+ /** protects above fields */
+ spinlock_t lock;
+ } rdc;
+
/** Miscellaneous bits describing inode state */
unsigned long state;

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index db9e60b7eb69..03d8105a851d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -100,6 +100,10 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
INIT_LIST_HEAD(&fi->queued_writes);
INIT_LIST_HEAD(&fi->writepages);
init_waitqueue_head(&fi->page_waitq);
+ spin_lock_init(&fi->rdc.lock);
+ fi->rdc.cached = false;
+ fi->rdc.size = 0;
+ fi->rdc.pos = 0;
mutex_init(&fi->mutex);
fi->forget = fuse_alloc_forget();
if (!fi->forget) {
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 65336c93c1f4..180f336b933f 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -26,9 +26,91 @@ static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
return false;
}

+static void fuse_add_dirent_to_cache(struct file *file,
+ struct fuse_dirent *dirent, loff_t pos)
+{
+ struct fuse_inode *fi = get_fuse_inode(file_inode(file));
+ size_t reclen = FUSE_DIRENT_SIZE(dirent);
+ pgoff_t index;
+ struct page *page;
+ loff_t size;
+ unsigned int offset;
+ void *addr;
+
+ spin_lock(&fi->rdc.lock);
+ /*
+ * Is cache already completed? Or this entry does not go at the end of
+ * cache?
+ */
+ if (fi->rdc.cached || pos != fi->rdc.pos) {
+ spin_unlock(&fi->rdc.lock);
+ return;
+ }
+ size = fi->rdc.size;
+ offset = size & ~PAGE_MASK;
+ index = size >> PAGE_SHIFT;
+ /* Dirent doesn't fit in current page? Jump to next page. */
+ if (offset + reclen > PAGE_SIZE) {
+ index++;
+ offset = 0;
+ }
+ spin_unlock(&fi->rdc.lock);
+
+ if (offset) {
+ page = find_lock_page(file->f_mapping, index);
+ } else {
+ page = find_or_create_page(file->f_mapping, index,
+ mapping_gfp_mask(file->f_mapping));
+ }
+ if (!page)
+ return;
+
+ spin_lock(&fi->rdc.lock);
+ /* Raced with another readdir */
+ if (fi->rdc.size != size || WARN_ON(fi->rdc.pos != pos))
+ goto unlock;
+
+ addr = kmap_atomic(page);
+ if (!offset)
+ clear_page(addr);
+ memcpy(addr + offset, dirent, reclen);
+ kunmap_atomic(addr);
+ fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
+ fi->rdc.pos = dirent->off;
+unlock:
+ spin_unlock(&fi->rdc.lock);
+ unlock_page(page);
+ put_page(page);
+}
+
+static void fuse_readdir_cache_end(struct file *file, loff_t pos)
+{
+ struct fuse_inode *fi = get_fuse_inode(file_inode(file));
+ loff_t end;
+
+ spin_lock(&fi->rdc.lock);
+ /* does cache end position match current position? */
+ if (fi->rdc.pos != pos) {
+ spin_unlock(&fi->rdc.lock);
+ return;
+ }
+
+ fi->rdc.cached = true;
+ end = ALIGN(fi->rdc.size, PAGE_SIZE);
+ spin_unlock(&fi->rdc.lock);
+
+ /* truncate unused tail of cache */
+ truncate_inode_pages(file->f_mapping, end);
+}
+
static bool fuse_emit(struct file *file, struct dir_context *ctx,
struct fuse_dirent *dirent)
{
+ struct fuse_file *ff = file->private_data;
+
+ if (ff->open_flags & FOPEN_CACHE_DIR)
+ fuse_add_dirent_to_cache(file, dirent, ctx->pos);
+
return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
dirent->type);
}
@@ -249,7 +331,12 @@ int fuse_readdir(struct file *file, struct dir_context *ctx)
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err) {
- if (plus) {
+ if (!nbytes) {
+ struct fuse_file *ff = file->private_data;
+
+ if (ff->open_flags & FOPEN_CACHE_DIR)
+ fuse_readdir_cache_end(file, ctx->pos);
+ } else if (plus) {
err = parse_dirplusfile(page_address(page), nbytes,
file, ctx, attr_version);
} else {
--
2.14.3


2018-09-28 15:44:36

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 1/9] fuse: split out readdir.c

Directory reading code is about to grow larger, so split it out from dir.c
into a new source file.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/Makefile | 2 +-
fs/fuse/dir.c | 259 +-----------------------------------------------------
fs/fuse/fuse_i.h | 12 +++
fs/fuse/readdir.c | 259 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 274 insertions(+), 258 deletions(-)
create mode 100644 fs/fuse/readdir.c

diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 60da84a86dab..f7b807bc1027 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -5,4 +5,4 @@
obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o

-fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o
+fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 0979609d6eba..3a333b0ea9ad 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -16,22 +16,6 @@
#include <linux/xattr.h>
#include <linux/posix_acl.h>

-static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
-{
- struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_inode *fi = get_fuse_inode(dir);
-
- if (!fc->do_readdirplus)
- return false;
- if (!fc->readdirplus_auto)
- return true;
- if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
- return true;
- if (ctx->pos == 0)
- return true;
- return false;
-}
-
static void fuse_advise_use_readdirplus(struct inode *dir)
{
struct fuse_inode *fi = get_fuse_inode(dir);
@@ -80,8 +64,7 @@ static u64 time_to_jiffies(u64 sec, u32 nsec)
* Set dentry and possibly attribute timeouts from the lookup/mk*
* replies
*/
-static void fuse_change_entry_timeout(struct dentry *entry,
- struct fuse_entry_out *o)
+void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
{
fuse_dentry_settime(entry,
time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
@@ -92,7 +75,7 @@ static u64 attr_timeout(struct fuse_attr_out *o)
return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
}

-static u64 entry_attr_timeout(struct fuse_entry_out *o)
+u64 entry_attr_timeout(struct fuse_entry_out *o)
{
return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
}
@@ -262,11 +245,6 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
goto out;
}

-static int invalid_nodeid(u64 nodeid)
-{
- return !nodeid || nodeid == FUSE_ROOT_ID;
-}
-
static int fuse_dentry_init(struct dentry *dentry)
{
dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);
@@ -1165,239 +1143,6 @@ static int fuse_permission(struct inode *inode, int mask)
return err;
}

-static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
- struct dir_context *ctx)
-{
- while (nbytes >= FUSE_NAME_OFFSET) {
- struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
- size_t reclen = FUSE_DIRENT_SIZE(dirent);
- if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
- return -EIO;
- if (reclen > nbytes)
- break;
- if (memchr(dirent->name, '/', dirent->namelen) != NULL)
- return -EIO;
-
- if (!dir_emit(ctx, dirent->name, dirent->namelen,
- dirent->ino, dirent->type))
- break;
-
- buf += reclen;
- nbytes -= reclen;
- ctx->pos = dirent->off;
- }
-
- return 0;
-}
-
-static int fuse_direntplus_link(struct file *file,
- struct fuse_direntplus *direntplus,
- u64 attr_version)
-{
- struct fuse_entry_out *o = &direntplus->entry_out;
- struct fuse_dirent *dirent = &direntplus->dirent;
- struct dentry *parent = file->f_path.dentry;
- struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
- struct dentry *dentry;
- struct dentry *alias;
- struct inode *dir = d_inode(parent);
- struct fuse_conn *fc;
- struct inode *inode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-
- if (!o->nodeid) {
- /*
- * Unlike in the case of fuse_lookup, zero nodeid does not mean
- * ENOENT. Instead, it only means the userspace filesystem did
- * not want to return attributes/handle for this entry.
- *
- * So do nothing.
- */
- return 0;
- }
-
- if (name.name[0] == '.') {
- /*
- * We could potentially refresh the attributes of the directory
- * and its parent?
- */
- if (name.len == 1)
- return 0;
- if (name.name[1] == '.' && name.len == 2)
- return 0;
- }
-
- if (invalid_nodeid(o->nodeid))
- return -EIO;
- if (!fuse_valid_type(o->attr.mode))
- return -EIO;
-
- fc = get_fuse_conn(dir);
-
- name.hash = full_name_hash(parent, name.name, name.len);
- dentry = d_lookup(parent, &name);
- if (!dentry) {
-retry:
- dentry = d_alloc_parallel(parent, &name, &wq);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- }
- if (!d_in_lookup(dentry)) {
- struct fuse_inode *fi;
- inode = d_inode(dentry);
- if (!inode ||
- get_node_id(inode) != o->nodeid ||
- ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
- d_invalidate(dentry);
- dput(dentry);
- goto retry;
- }
- if (is_bad_inode(inode)) {
- dput(dentry);
- return -EIO;
- }
-
- fi = get_fuse_inode(inode);
- spin_lock(&fc->lock);
- fi->nlookup++;
- spin_unlock(&fc->lock);
-
- forget_all_cached_acls(inode);
- fuse_change_attributes(inode, &o->attr,
- entry_attr_timeout(o),
- attr_version);
- /*
- * The other branch comes via fuse_iget()
- * which bumps nlookup inside
- */
- } else {
- inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
- &o->attr, entry_attr_timeout(o),
- attr_version);
- if (!inode)
- inode = ERR_PTR(-ENOMEM);
-
- alias = d_splice_alias(inode, dentry);
- d_lookup_done(dentry);
- if (alias) {
- dput(dentry);
- dentry = alias;
- }
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- }
- if (fc->readdirplus_auto)
- set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
- fuse_change_entry_timeout(dentry, o);
-
- dput(dentry);
- return 0;
-}
-
-static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
- struct dir_context *ctx, u64 attr_version)
-{
- struct fuse_direntplus *direntplus;
- struct fuse_dirent *dirent;
- size_t reclen;
- int over = 0;
- int ret;
-
- while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
- direntplus = (struct fuse_direntplus *) buf;
- dirent = &direntplus->dirent;
- reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
-
- if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
- return -EIO;
- if (reclen > nbytes)
- break;
- if (memchr(dirent->name, '/', dirent->namelen) != NULL)
- return -EIO;
-
- if (!over) {
- /* We fill entries into dstbuf only as much as
- it can hold. But we still continue iterating
- over remaining entries to link them. If not,
- we need to send a FORGET for each of those
- which we did not link.
- */
- over = !dir_emit(ctx, dirent->name, dirent->namelen,
- dirent->ino, dirent->type);
- if (!over)
- ctx->pos = dirent->off;
- }
-
- buf += reclen;
- nbytes -= reclen;
-
- ret = fuse_direntplus_link(file, direntplus, attr_version);
- if (ret)
- fuse_force_forget(file, direntplus->entry_out.nodeid);
- }
-
- return 0;
-}
-
-static int fuse_readdir(struct file *file, struct dir_context *ctx)
-{
- int plus, err;
- size_t nbytes;
- struct page *page;
- struct inode *inode = file_inode(file);
- struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
- u64 attr_version = 0;
- bool locked;
-
- if (is_bad_inode(inode))
- return -EIO;
-
- req = fuse_get_req(fc, 1);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- page = alloc_page(GFP_KERNEL);
- if (!page) {
- fuse_put_request(fc, req);
- return -ENOMEM;
- }
-
- plus = fuse_use_readdirplus(inode, ctx);
- req->out.argpages = 1;
- req->num_pages = 1;
- req->pages[0] = page;
- req->page_descs[0].length = PAGE_SIZE;
- if (plus) {
- attr_version = fuse_get_attr_version(fc);
- fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
- FUSE_READDIRPLUS);
- } else {
- fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
- FUSE_READDIR);
- }
- locked = fuse_lock_inode(inode);
- fuse_request_send(fc, req);
- fuse_unlock_inode(inode, locked);
- nbytes = req->out.args[0].size;
- err = req->out.h.error;
- fuse_put_request(fc, req);
- if (!err) {
- if (plus) {
- err = parse_dirplusfile(page_address(page), nbytes,
- file, ctx,
- attr_version);
- } else {
- err = parse_dirfile(page_address(page), nbytes, file,
- ctx);
- }
- }
-
- __free_page(page);
- fuse_invalidate_atime(inode);
- return err;
-}
-
static const char *fuse_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index f78e9614bb5f..5e01ea3d137d 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -697,6 +697,11 @@ static inline u64 get_node_id(struct inode *inode)
return get_fuse_inode(inode)->nodeid;
}

+static inline int invalid_nodeid(u64 nodeid)
+{
+ return !nodeid || nodeid == FUSE_ROOT_ID;
+}
+
/** Device operations */
extern const struct file_operations fuse_dev_operations;

@@ -873,6 +878,9 @@ void fuse_invalidate_entry_cache(struct dentry *entry);

void fuse_invalidate_atime(struct inode *inode);

+u64 entry_attr_timeout(struct fuse_entry_out *o);
+void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o);
+
/**
* Acquire reference to fuse_conn
*/
@@ -992,4 +1000,8 @@ struct posix_acl;
struct posix_acl *fuse_get_acl(struct inode *inode, int type);
int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);

+
+/* readdir.c */
+int fuse_readdir(struct file *file, struct dir_context *ctx);
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
new file mode 100644
index 000000000000..3e100e00e21e
--- /dev/null
+++ b/fs/fuse/readdir.c
@@ -0,0 +1,259 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2018 Miklos Szeredi <[email protected]>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+
+#include "fuse_i.h"
+#include <linux/posix_acl.h>
+
+static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
+{
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_inode *fi = get_fuse_inode(dir);
+
+ if (!fc->do_readdirplus)
+ return false;
+ if (!fc->readdirplus_auto)
+ return true;
+ if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
+ return true;
+ if (ctx->pos == 0)
+ return true;
+ return false;
+}
+
+static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
+ struct dir_context *ctx)
+{
+ while (nbytes >= FUSE_NAME_OFFSET) {
+ struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
+ size_t reclen = FUSE_DIRENT_SIZE(dirent);
+ if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
+ return -EIO;
+ if (reclen > nbytes)
+ break;
+ if (memchr(dirent->name, '/', dirent->namelen) != NULL)
+ return -EIO;
+
+ if (!dir_emit(ctx, dirent->name, dirent->namelen,
+ dirent->ino, dirent->type))
+ break;
+
+ buf += reclen;
+ nbytes -= reclen;
+ ctx->pos = dirent->off;
+ }
+
+ return 0;
+}
+
+static int fuse_direntplus_link(struct file *file,
+ struct fuse_direntplus *direntplus,
+ u64 attr_version)
+{
+ struct fuse_entry_out *o = &direntplus->entry_out;
+ struct fuse_dirent *dirent = &direntplus->dirent;
+ struct dentry *parent = file->f_path.dentry;
+ struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
+ struct dentry *dentry;
+ struct dentry *alias;
+ struct inode *dir = d_inode(parent);
+ struct fuse_conn *fc;
+ struct inode *inode;
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+
+ if (!o->nodeid) {
+ /*
+ * Unlike in the case of fuse_lookup, zero nodeid does not mean
+ * ENOENT. Instead, it only means the userspace filesystem did
+ * not want to return attributes/handle for this entry.
+ *
+ * So do nothing.
+ */
+ return 0;
+ }
+
+ if (name.name[0] == '.') {
+ /*
+ * We could potentially refresh the attributes of the directory
+ * and its parent?
+ */
+ if (name.len == 1)
+ return 0;
+ if (name.name[1] == '.' && name.len == 2)
+ return 0;
+ }
+
+ if (invalid_nodeid(o->nodeid))
+ return -EIO;
+ if (!fuse_valid_type(o->attr.mode))
+ return -EIO;
+
+ fc = get_fuse_conn(dir);
+
+ name.hash = full_name_hash(parent, name.name, name.len);
+ dentry = d_lookup(parent, &name);
+ if (!dentry) {
+retry:
+ dentry = d_alloc_parallel(parent, &name, &wq);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ }
+ if (!d_in_lookup(dentry)) {
+ struct fuse_inode *fi;
+ inode = d_inode(dentry);
+ if (!inode ||
+ get_node_id(inode) != o->nodeid ||
+ ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
+ d_invalidate(dentry);
+ dput(dentry);
+ goto retry;
+ }
+ if (is_bad_inode(inode)) {
+ dput(dentry);
+ return -EIO;
+ }
+
+ fi = get_fuse_inode(inode);
+ spin_lock(&fc->lock);
+ fi->nlookup++;
+ spin_unlock(&fc->lock);
+
+ forget_all_cached_acls(inode);
+ fuse_change_attributes(inode, &o->attr,
+ entry_attr_timeout(o),
+ attr_version);
+ /*
+ * The other branch comes via fuse_iget()
+ * which bumps nlookup inside
+ */
+ } else {
+ inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
+ &o->attr, entry_attr_timeout(o),
+ attr_version);
+ if (!inode)
+ inode = ERR_PTR(-ENOMEM);
+
+ alias = d_splice_alias(inode, dentry);
+ d_lookup_done(dentry);
+ if (alias) {
+ dput(dentry);
+ dentry = alias;
+ }
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ }
+ if (fc->readdirplus_auto)
+ set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
+ fuse_change_entry_timeout(dentry, o);
+
+ dput(dentry);
+ return 0;
+}
+
+static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
+ struct dir_context *ctx, u64 attr_version)
+{
+ struct fuse_direntplus *direntplus;
+ struct fuse_dirent *dirent;
+ size_t reclen;
+ int over = 0;
+ int ret;
+
+ while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
+ direntplus = (struct fuse_direntplus *) buf;
+ dirent = &direntplus->dirent;
+ reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
+
+ if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
+ return -EIO;
+ if (reclen > nbytes)
+ break;
+ if (memchr(dirent->name, '/', dirent->namelen) != NULL)
+ return -EIO;
+
+ if (!over) {
+ /* We fill entries into dstbuf only as much as
+ it can hold. But we still continue iterating
+ over remaining entries to link them. If not,
+ we need to send a FORGET for each of those
+ which we did not link.
+ */
+ over = !dir_emit(ctx, dirent->name, dirent->namelen,
+ dirent->ino, dirent->type);
+ if (!over)
+ ctx->pos = dirent->off;
+ }
+
+ buf += reclen;
+ nbytes -= reclen;
+
+ ret = fuse_direntplus_link(file, direntplus, attr_version);
+ if (ret)
+ fuse_force_forget(file, direntplus->entry_out.nodeid);
+ }
+
+ return 0;
+}
+
+int fuse_readdir(struct file *file, struct dir_context *ctx)
+{
+ int plus, err;
+ size_t nbytes;
+ struct page *page;
+ struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ u64 attr_version = 0;
+ bool locked;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ req = fuse_get_req(fc, 1);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ fuse_put_request(fc, req);
+ return -ENOMEM;
+ }
+
+ plus = fuse_use_readdirplus(inode, ctx);
+ req->out.argpages = 1;
+ req->num_pages = 1;
+ req->pages[0] = page;
+ req->page_descs[0].length = PAGE_SIZE;
+ if (plus) {
+ attr_version = fuse_get_attr_version(fc);
+ fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
+ FUSE_READDIRPLUS);
+ } else {
+ fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
+ FUSE_READDIR);
+ }
+ locked = fuse_lock_inode(inode);
+ fuse_request_send(fc, req);
+ fuse_unlock_inode(inode, locked);
+ nbytes = req->out.args[0].size;
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ if (plus) {
+ err = parse_dirplusfile(page_address(page), nbytes,
+ file, ctx, attr_version);
+ } else {
+ err = parse_dirfile(page_address(page), nbytes, file,
+ ctx);
+ }
+ }
+
+ __free_page(page);
+ fuse_invalidate_atime(inode);
+ return err;
+}
--
2.14.3


2018-09-28 15:45:01

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 2/9] fuse: add FOPEN_CACHE_DIR

Add flag returned by OPENDIR request to allow kernel to cache directory
contents in page cache. The effect of FOPEN_CACHE_DIR is twofold:

a) if not already cached, it writes entries into the cache

b) if already cached, it allows reading entries from the cache

The FOPEN_KEEP_CACHE has the same effect as on regular files: unless this
flag is given the cache is cleared upon completion of open.

So FOPEN_KEEP_CACHE and FOPEN_KEEP_CACHE flags should be used together to
make use of the directory caching facility introduced in the following
patches.

The FUSE_AUTO_INVAL_DATA flag returned in INIT reply also has the same
affect on the directory cache as it has on data cache for regular files.

Signed-off-by: Miklos Szeredi <[email protected]>
---
include/uapi/linux/fuse.h | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 92fa24c24c92..e30e3a6868cd 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -116,6 +116,9 @@
*
* 7.27
* - add FUSE_ABORT_ERROR
+ *
+ * 7.28
+ * - add FOPEN_CACHE_DIR
*/

#ifndef _LINUX_FUSE_H
@@ -151,7 +154,7 @@
#define FUSE_KERNEL_VERSION 7

/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 27
+#define FUSE_KERNEL_MINOR_VERSION 28

/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -219,10 +222,12 @@ struct fuse_file_lock {
* FOPEN_DIRECT_IO: bypass page cache for this open file
* FOPEN_KEEP_CACHE: don't invalidate the data cache on open
* FOPEN_NONSEEKABLE: the file is not seekable
+ * FOPEN_CACHE_DIR: allow caching this directory
*/
#define FOPEN_DIRECT_IO (1 << 0)
#define FOPEN_KEEP_CACHE (1 << 1)
#define FOPEN_NONSEEKABLE (1 << 2)
+#define FOPEN_CACHE_DIR (1 << 3)

/**
* INIT request/reply flags
--
2.14.3


2018-09-28 15:45:22

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 6/9] fuse: add readdir cache version

Allow the cache to be invalidated when page(s) have gone missing. In this
case increment the version of the cache and reset to an empty state.

Add a version number to the directory stream in struct fuse_file as well,
indicating the version of the cache it's supposed to be reading. If the
cache version doesn't match the stream's version, then reset the stream to
the beginning of the cache.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/fuse_i.h | 7 +++++++
fs/fuse/inode.c | 1 +
fs/fuse/readdir.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 116fe14053f1..ef018ea5bcd9 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -114,6 +114,9 @@ struct fuse_inode {
/** position at end of cache (position of next entry) */
loff_t pos;

+ /** version of the cache */
+ u64 version;
+
/** protects above fields */
spinlock_t lock;
} rdc;
@@ -176,6 +179,10 @@ struct fuse_file {

/** Offset in cache */
loff_t cache_off;
+
+ /** Version of cache we are reading */
+ u64 version;
+
} readdir;

/** RB node to be linked on fuse_conn->polled_files */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 03d8105a851d..6d0a87308f86 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -104,6 +104,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->rdc.cached = false;
fi->rdc.size = 0;
fi->rdc.pos = 0;
+ fi->rdc.version = 0;
mutex_init(&fi->mutex);
fi->forget = fuse_alloc_forget();
if (!fi->forget) {
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index e6ae82f2df9d..edb445c4cfbd 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -34,6 +34,7 @@ static void fuse_add_dirent_to_cache(struct file *file,
pgoff_t index;
struct page *page;
loff_t size;
+ u64 version;
unsigned int offset;
void *addr;

@@ -46,6 +47,7 @@ static void fuse_add_dirent_to_cache(struct file *file,
spin_unlock(&fi->rdc.lock);
return;
}
+ version = fi->rdc.version;
size = fi->rdc.size;
offset = size & ~PAGE_MASK;
index = size >> PAGE_SHIFT;
@@ -67,7 +69,8 @@ static void fuse_add_dirent_to_cache(struct file *file,

spin_lock(&fi->rdc.lock);
/* Raced with another readdir */
- if (fi->rdc.size != size || WARN_ON(fi->rdc.pos != pos))
+ if (fi->rdc.version != version || fi->rdc.size != size ||
+ WARN_ON(fi->rdc.pos != pos))
goto unlock;

addr = kmap_atomic(page);
@@ -394,6 +397,14 @@ static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
return res;
}

+static void fuse_rdc_reset(struct fuse_inode *fi)
+{
+ fi->rdc.cached = false;
+ fi->rdc.version++;
+ fi->rdc.size = 0;
+ fi->rdc.pos = 0;
+}
+
#define UNCACHED 1

static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
@@ -419,6 +430,21 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
spin_unlock(&fi->rdc.lock);
return UNCACHED;
}
+ /*
+ * If cache version changed since the last getdents() call, then reset
+ * the cache stream.
+ */
+ if (ff->readdir.version != fi->rdc.version) {
+ ff->readdir.pos = 0;
+ ff->readdir.cache_off = 0;
+ }
+ /*
+ * If at the beginning of the cache, than reset version to
+ * current.
+ */
+ if (ff->readdir.pos == 0)
+ ff->readdir.version = fi->rdc.version;
+
WARN_ON(fi->rdc.size < ff->readdir.cache_off);

index = ff->readdir.cache_off >> PAGE_SHIFT;
@@ -435,13 +461,30 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)

page = find_get_page_flags(file->f_mapping, index,
FGP_ACCESSED | FGP_LOCK);
+ spin_lock(&fi->rdc.lock);
if (!page) {
/*
* Uh-oh: page gone missing, cache is useless
*/
+ if (fi->rdc.version == ff->readdir.version)
+ fuse_rdc_reset(fi);
+ spin_unlock(&fi->rdc.lock);
return UNCACHED;
}

+ /* Make sure it's still the same version after getting the page. */
+ if (ff->readdir.version != fi->rdc.version) {
+ spin_unlock(&fi->rdc.lock);
+ unlock_page(page);
+ put_page(page);
+ goto retry;
+ }
+ spin_unlock(&fi->rdc.lock);
+
+ /*
+ * Contents of the page are now protected against changing by holding
+ * the page lock.
+ */
addr = kmap(page);
res = fuse_parse_cache(ff, addr, size, ctx);
kunmap(page);
--
2.14.3


2018-09-28 15:45:32

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 5/9] fuse: allow using readdir cache

The cache is only used if it's completed, not while it's still being
filled; this constraint could be lifted later, if it turns out to be
useful.

Introduce state in struct fuse_file that indicates the position within the
cache. After a seek, reset the position to the beginning of the cache and
search the cache for the current position. If the current position is not
found in the cache, then fall back to uncached readdir.

It can also happen that page(s) disappear from the cache, in which case we
must also fall back to uncached readdir.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/file.c | 2 +
fs/fuse/fuse_i.h | 15 ++++++
fs/fuse/readdir.c | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 161 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 32d0b883e74f..101e64897b5f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -59,6 +59,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
}

INIT_LIST_HEAD(&ff->write_entry);
+ mutex_init(&ff->readdir.lock);
refcount_set(&ff->count, 1);
RB_CLEAR_NODE(&ff->polled_node);
init_waitqueue_head(&ff->poll_wait);
@@ -73,6 +74,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
void fuse_file_free(struct fuse_file *ff)
{
fuse_request_free(ff->reserved_req);
+ mutex_destroy(&ff->readdir.lock);
kfree(ff);
}

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d01c4606c149..116fe14053f1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -163,6 +163,21 @@ struct fuse_file {
/** Entry on inode's write_files list */
struct list_head write_entry;

+ /** Readdir related */
+ struct {
+ /**
+ * Protects below fields against (crazy) parallel readdir on
+ * same open file. Uncontended in the normal case.
+ */
+ struct mutex lock;
+
+ /** Dir stream position */
+ loff_t pos;
+
+ /** Offset in cache */
+ loff_t cache_off;
+ } readdir;
+
/** RB node to be linked on fuse_conn->polled_files */
struct rb_node polled_node;

diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 180f336b933f..e6ae82f2df9d 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -287,7 +287,7 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
return 0;
}

-int fuse_readdir(struct file *file, struct dir_context *ctx)
+static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
{
int plus, err;
size_t nbytes;
@@ -298,9 +298,6 @@ int fuse_readdir(struct file *file, struct dir_context *ctx)
u64 attr_version = 0;
bool locked;

- if (is_bad_inode(inode))
- return -EIO;
-
req = fuse_get_req(fc, 1);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -349,3 +346,146 @@ int fuse_readdir(struct file *file, struct dir_context *ctx)
fuse_invalidate_atime(inode);
return err;
}
+
+enum fuse_parse_result {
+ FOUND_ERR = -1,
+ FOUND_NONE = 0,
+ FOUND_SOME,
+ FOUND_ALL,
+};
+
+static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
+ void *addr, unsigned int size,
+ struct dir_context *ctx)
+{
+ unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
+ enum fuse_parse_result res = FOUND_NONE;
+
+ WARN_ON(offset >= size);
+
+ for (;;) {
+ struct fuse_dirent *dirent = addr + offset;
+ unsigned int nbytes = size - offset;
+ size_t reclen = FUSE_DIRENT_SIZE(dirent);
+
+ if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
+ break;
+
+ if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
+ return FOUND_ERR;
+ if (WARN_ON(reclen > nbytes))
+ return FOUND_ERR;
+ if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
+ return FOUND_ERR;
+
+ if (ff->readdir.pos == ctx->pos) {
+ res = FOUND_SOME;
+ if (!dir_emit(ctx, dirent->name, dirent->namelen,
+ dirent->ino, dirent->type))
+ return FOUND_ALL;
+ ctx->pos = dirent->off;
+ }
+ ff->readdir.pos = dirent->off;
+ ff->readdir.cache_off += reclen;
+
+ offset += reclen;
+ }
+
+ return res;
+}
+
+#define UNCACHED 1
+
+static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
+{
+ struct fuse_file *ff = file->private_data;
+ struct inode *inode = file_inode(file);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ enum fuse_parse_result res;
+ pgoff_t index;
+ unsigned int size;
+ struct page *page;
+ void *addr;
+
+ /* Seeked? If so, reset the cache stream */
+ if (ff->readdir.pos != ctx->pos) {
+ ff->readdir.pos = 0;
+ ff->readdir.cache_off = 0;
+ }
+
+retry:
+ spin_lock(&fi->rdc.lock);
+ if (!fi->rdc.cached) {
+ spin_unlock(&fi->rdc.lock);
+ return UNCACHED;
+ }
+ WARN_ON(fi->rdc.size < ff->readdir.cache_off);
+
+ index = ff->readdir.cache_off >> PAGE_SHIFT;
+
+ if (index == (fi->rdc.size >> PAGE_SHIFT))
+ size = fi->rdc.size & ~PAGE_MASK;
+ else
+ size = PAGE_SIZE;
+ spin_unlock(&fi->rdc.lock);
+
+ /* EOF? */
+ if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
+ return 0;
+
+ page = find_get_page_flags(file->f_mapping, index,
+ FGP_ACCESSED | FGP_LOCK);
+ if (!page) {
+ /*
+ * Uh-oh: page gone missing, cache is useless
+ */
+ return UNCACHED;
+ }
+
+ addr = kmap(page);
+ res = fuse_parse_cache(ff, addr, size, ctx);
+ kunmap(page);
+ unlock_page(page);
+ put_page(page);
+
+ if (res == FOUND_ERR)
+ return -EIO;
+
+ if (res == FOUND_ALL)
+ return 0;
+
+ if (size == PAGE_SIZE) {
+ /* We hit end of page: skip to next page. */
+ ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
+ goto retry;
+ }
+
+ /*
+ * End of cache reached. If found position, then we are done, otherwise
+ * need to fall back to uncached, since the position we were looking for
+ * wasn't in the cache.
+ */
+ return res == FOUND_SOME ? 0 : UNCACHED;
+}
+
+int fuse_readdir(struct file *file, struct dir_context *ctx)
+{
+ struct fuse_file *ff = file->private_data;
+ struct inode *inode = file_inode(file);
+ int err;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ mutex_lock(&ff->readdir.lock);
+
+ err = UNCACHED;
+ if (ff->open_flags & FOPEN_CACHE_DIR)
+ err = fuse_readdir_cached(file, ctx);
+ if (err == UNCACHED)
+ err = fuse_readdir_uncached(file, ctx);
+
+ mutex_unlock(&ff->readdir.lock);
+
+ return err;
+}
--
2.14.3


2018-09-28 15:45:37

by Miklos Szeredi

[permalink] [raw]
Subject: [PATCH 3/9] fuse: extract fuse_emit() helper

Prepare for cache filling by introducing a helper for emitting a single
directory entry.

Signed-off-by: Miklos Szeredi <[email protected]>
---
fs/fuse/readdir.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 3e100e00e21e..65336c93c1f4 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -26,6 +26,13 @@ static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
return false;
}

+static bool fuse_emit(struct file *file, struct dir_context *ctx,
+ struct fuse_dirent *dirent)
+{
+ return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
+ dirent->type);
+}
+
static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx)
{
@@ -39,8 +46,7 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
return -EIO;

- if (!dir_emit(ctx, dirent->name, dirent->namelen,
- dirent->ino, dirent->type))
+ if (!fuse_emit(file, ctx, dirent))
break;

buf += reclen;
@@ -183,8 +189,7 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
we need to send a FORGET for each of those
which we did not link.
*/
- over = !dir_emit(ctx, dirent->name, dirent->namelen,
- dirent->ino, dirent->type);
+ over = !fuse_emit(file, ctx, dirent);
if (!over)
ctx->pos = dirent->off;
}
--
2.14.3


2018-09-28 16:43:00

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH 4/9] fuse: allow caching readdir

On Fri, Sep 28, 2018 at 05:42:29PM +0200, Miklos Szeredi wrote:
> @@ -103,6 +103,21 @@ struct fuse_inode {
> /** List of writepage requestst (pending or sent) */
> struct list_head writepages;
>
> + /** readdir cache */
> + struct {
> + /** true if fully cached */
> + bool cached;

umm, you're using kernel-doc markers without formatting this comment as
kernel-doc. See Documentation/doc-guide/kernel-doc.rst but I believe
you should be writing this as:

/** @rdc.cached: true if fully cached */

or just use /* */ if you don't want kernel-doc.