I have posted this before, but have extended the patch into a few more
functions. The intent of the code is as before -- to improve JFFS2 lookups
by not locking i_mutex for long periods when files are not in cache. For
our embedded environment, we see a *five second* improvement in boot time.
This patch is an attempt to improve the speed of JFFS2 at startup. Our
particular problem is that we have a 30MB file on NOR flash which takes
about five seconds to read and test the data CRCs. During this time access
to other files in the same directory is blocked, due to
parent->d_inode->i_mutex being locked.
This patch solves this problem by adding a 'pre-lookup' call down to JFFS2,
which can be called without this mutex held. When the actual lookup is
performed, the results are in JFFS2's cache, and the dentry can be filled
in quickly.
However, given that I do not have experience at Linux filesystem code,
I can't be sure that this is a correct solution, or that there isn't a
better way of achieving what I'm trying to do. I feel there must be a way
to do this without creating a new VFS function call.
I suspect other filesystems could benefit from this too, as a lot of them
call the same d_splice_alias() function to fill in the dentry. JFFS2
already seems to have all the lower-level locks that are needed for this to
work; I don't know if that's true in other filesystems which could be
relying on the directory's i_mutex being locked. Because JFFS2 needs to
walk the entire file, there are big gains to be made here; other filesystems
may gain little to nothing.
I'm not expecting that this patch will get applied as-is, but please let me
know if there is any merit to it, whether it should work, and what still
needs to be done to if this is to be made part of the kernel.
At startup, reading a directory (for example to do an ls), requires finding
information on every file. To support JFFS2 recovery from partially written
blocks, the JFFS2 driver must scan all data blocks to verify the checksums
are correct just to be able to correctly report the length of a file. This
will take some time, and will be dependent on the amount of data on the
filesystem.
What makes this worse is that any path lookup will lock the dentry cache
to add the new entry. The JFFS2 driver then spends time finding the file
information (reading the entire file), before it returns the new dentry
information allowing the cache to be unlocked. During this time, no other
files in the same directory can be opened or even tested for existence.
However, there is no need for the dentry cache to be locked for the scan of
the file. The JFFS2 driver already locks the file, so the file will not be
deleted or modified. It also ensures that if another process tries to scan
the same file, the second process will be blocked and the scan only proceed
once.
To make the scan occur without locking the cache, a new vfs call has been
added which allows a filesystem to scan the file, but not return anything.
When the lookup occurs after this, the JFFS2 driver will find this
information and can quickly return the filled-in dentry.
Signed-off-by: Mark Tomlinson <[email protected]>
---
fs/jffs2/dir.c | 41 +++++++++++++++++++++++++++++------
fs/namei.c | 63 +++++++++++++++++++++++++++++++++++++++++++-----------
include/linux/fs.h | 1 +
3 files changed, 85 insertions(+), 20 deletions(-)
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 1ba5c97..69c0ec4 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -36,6 +36,7 @@ static int jffs2_rmdir (struct inode *,struct dentry *);
static int jffs2_mknod (struct inode *,struct dentry *,umode_t,dev_t);
static int jffs2_rename (struct inode *, struct dentry *,
struct inode *, struct dentry *);
+static void jffs2_prescan(struct inode *dir_i, struct qstr *d_name);
const struct file_operations jffs2_dir_operations =
{
@@ -51,6 +52,7 @@ const struct inode_operations jffs2_dir_inode_operations =
{
.create = jffs2_create,
.lookup = jffs2_lookup,
+ .prescan = jffs2_prescan,
.link = jffs2_link,
.unlink = jffs2_unlink,
.symlink = jffs2_symlink,
@@ -74,8 +76,12 @@ const struct inode_operations jffs2_dir_inode_operations =
and we use the same hash function as the dentries. Makes this
nice and simple
*/
-static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
- unsigned int flags)
+/* The prescan function does not have a dentry to fill in, so create this common function
+ * which is just passed the name and the inode for the directory.
+ * This function is very similar to the original jffs2_lookup, except for the arguments
+ * and the fact that the dentry (now not passed) is not updated.
+ */
+static struct inode *jffs2_lookup_common(struct inode *dir_i, struct qstr *d_name)
{
struct jffs2_inode_info *dir_f;
struct jffs2_full_dirent *fd = NULL, *fd_list;
@@ -84,7 +90,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
jffs2_dbg(1, "jffs2_lookup()\n");
- if (target->d_name.len > JFFS2_MAX_NAME_LEN)
+ if (d_name->len > JFFS2_MAX_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -92,11 +98,11 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
mutex_lock(&dir_f->sem);
/* NB: The 2.2 backport will need to explicitly check for '.' and '..' here */
- for (fd_list = dir_f->dents; fd_list && fd_list->nhash <= target->d_name.hash; fd_list = fd_list->next) {
- if (fd_list->nhash == target->d_name.hash &&
+ for (fd_list = dir_f->dents; fd_list && fd_list->nhash <= d_name->hash; fd_list = fd_list->next) {
+ if (fd_list->nhash == d_name->hash &&
(!fd || fd_list->version > fd->version) &&
- strlen(fd_list->name) == target->d_name.len &&
- !strncmp(fd_list->name, target->d_name.name, target->d_name.len)) {
+ strlen(fd_list->name) == d_name->len &&
+ !strncmp(fd_list->name, d_name->name, d_name->len)) {
fd = fd_list;
}
}
@@ -108,6 +114,27 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
if (IS_ERR(inode))
pr_warn("iget() failed for ino #%u\n", ino);
}
+ return inode;
+}
+
+/* Fill in an inode, and store the information in cache. This allows a
+ * subsequent jffs2_lookup() call to proceed quickly, which is useful
+ * since the jffs2_lookup() call will have the directory entry cache
+ * locked.
+ */
+static void jffs2_prescan(struct inode *dir_i, struct qstr *d_name)
+{
+ (void)jffs2_lookup_common(dir_i, d_name);
+}
+
+/* jffs2_lookup function has the same functionality as before,
+ * just refactored */
+static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
+ unsigned int flags)
+{
+ struct inode *inode;
+
+ inode = jffs2_lookup_common(dir_i, &target->d_name);
return d_splice_alias(inode, target);
}
diff --git a/fs/namei.c b/fs/namei.c
index fe30d3b..36a0d84 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1322,13 +1322,12 @@ static void follow_dotdot(struct nameidata *nd)
*
* dir->d_inode->i_mutex must be held
*/
-static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
- unsigned int flags, bool *need_lookup)
+static struct dentry *lookup_dcache_no_alloc(struct qstr *name, struct dentry *dir,
+ unsigned int flags)
{
struct dentry *dentry;
int error;
- *need_lookup = false;
dentry = d_lookup(dir, name);
if (dentry) {
if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
@@ -1345,6 +1344,16 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
}
}
}
+ return dentry;
+}
+
+static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
+ unsigned int flags, bool *need_lookup)
+{
+ struct dentry *dentry;
+
+ *need_lookup = false;
+ dentry = lookup_dcache_no_alloc(name, dir, flags);
if (!dentry) {
dentry = d_alloc(dir, name);
@@ -1394,6 +1403,37 @@ static struct dentry *__lookup_hash(struct qstr *name,
return lookup_real(base->d_inode, dentry, flags);
}
+static struct dentry *__lookup_hash_lock(struct nameidata *nd, unsigned int subclass)
+{
+ struct dentry *parent = nd->path.dentry;
+ struct inode *d_inode = parent->d_inode;
+ struct dentry *dentry;
+
+ if (d_inode->i_op->prescan) {
+ /* First, just try to find the dentry in the cache.
+ * If it is not present, don't do anything.
+ */
+ mutex_lock_nested(&d_inode->i_mutex, subclass);
+ dentry = lookup_dcache_no_alloc(&nd->last, parent, nd->flags);
+ if (likely(dentry)) {
+ return dentry;
+ }
+ mutex_unlock(&d_inode->i_mutex);
+
+ /* Not in cache. Warn filesystem layer that a lookup is coming.
+ * This can be done without the directory's mutex held, since
+ * no dentry is being filled in here.
+ */
+ d_inode->i_op->prescan(d_inode, &nd->last);
+ }
+ /* Actually perform the lookup via the filesystem. The prescan
+ * done above will hopefully ensure this is now a quick operation,
+ * so the directory's mutex is not held for a long time.
+ */
+ mutex_lock_nested(&d_inode->i_mutex, subclass);
+ return __lookup_hash(&nd->last, parent, nd->flags);
+}
+
/*
* It's more convoluted than I'd like it to be, but... it's still fairly
* small and for now I'd prefer to have fast path as straight as possible.
@@ -1505,9 +1545,9 @@ static int lookup_slow(struct nameidata *nd, struct path *path)
parent = nd->path.dentry;
BUG_ON(nd->inode != parent->d_inode);
- mutex_lock(&parent->d_inode->i_mutex);
- dentry = __lookup_hash(&nd->last, parent, nd->flags);
+ dentry = __lookup_hash_lock(nd, I_MUTEX_NORMAL);
mutex_unlock(&parent->d_inode->i_mutex);
+
if (IS_ERR(dentry))
return PTR_ERR(dentry);
path->mnt = nd->path.mnt;
@@ -2068,8 +2108,8 @@ struct dentry *kern_path_locked(const char *name, struct path *path)
d = ERR_PTR(-EINVAL);
goto out;
}
- mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
- d = __lookup_hash(&nd.last, nd.path.dentry, 0);
+ nd.flags = 0;
+ d = __lookup_hash_lock(&nd, I_MUTEX_PARENT);
if (IS_ERR(d)) {
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
path_put(&nd.path);
@@ -3355,8 +3395,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
/*
* Do the final lookup.
*/
- mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
- dentry = lookup_hash(&nd);
+ dentry = __lookup_hash_lock(&nd, I_MUTEX_PARENT);
if (IS_ERR(dentry))
goto unlock;
@@ -3669,8 +3708,7 @@ retry:
if (error)
goto exit1;
- mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
- dentry = lookup_hash(&nd);
+ dentry = __lookup_hash_lock(&nd, I_MUTEX_PARENT);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit2;
@@ -3789,8 +3827,7 @@ retry:
if (error)
goto exit1;
retry_deleg:
- mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
- dentry = lookup_hash(&nd);
+ dentry = __lookup_hash_lock(&nd, I_MUTEX_PARENT);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35ec87e..8d68867 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1639,6 +1639,7 @@ struct inode_operations {
umode_t create_mode, int *opened);
int (*tmpfile) (struct inode *, struct dentry *, umode_t);
int (*set_acl)(struct inode *, struct posix_acl *, int);
+ void (*prescan) (struct inode *dir, struct qstr *name);
/* WARNING: probably going away soon, do not use! */
int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
--
1.9.1