shmem_symlink error path forgot to unwind. The failed symlink left
over could be unlinked, but long ones were left on the shmem_inodes
list, causing oops in swapoff at shutdown if not earlier.
--- 2.4.19-pre10/mm/shmem.c Tue Jun 4 13:54:20 2002
+++ linux/mm/shmem.c Tue Jun 11 19:02:30 2002
@@ -1123,17 +1123,20 @@
return error;
len = strlen(symname) + 1;
- if (len > PAGE_CACHE_SIZE)
- return -ENAMETOOLONG;
-
inode = dentry->d_inode;
info = SHMEM_I(inode);
inode->i_size = len-1;
+ inode->i_op = &shmem_symlink_inline_operations;
+
if (len <= sizeof(struct shmem_inode_info)) {
/* do it inline */
memcpy(info, symname, len);
- inode->i_op = &shmem_symlink_inline_operations;
} else {
+ if (len > PAGE_CACHE_SIZE) {
+ error = -ENAMETOOLONG;
+ goto rmnod;
+ }
+ inode->i_op = &shmem_symlink_inode_operations;
spin_lock (&shmem_ilock);
list_add (&info->list, &shmem_inodes);
spin_unlock (&shmem_ilock);
@@ -1141,7 +1144,8 @@
page = shmem_getpage_locked(info, inode, 0);
if (IS_ERR(page)) {
up(&info->sem);
- return PTR_ERR(page);
+ error = PTR_ERR(page);
+ goto rmnod;
}
kaddr = kmap(page);
memcpy(kaddr, symname, len);
@@ -1150,10 +1154,14 @@
UnlockPage(page);
page_cache_release(page);
up(&info->sem);
- inode->i_op = &shmem_symlink_inode_operations;
}
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
return 0;
+
+rmnod:
+ if (!shmem_unlink(dir, dentry))
+ d_delete(dentry);
+ return error;
}
static int shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen)
shmem_mknod should not update directory times if it cannot get an inode.
--- 2.4.19-pre10/mm/shmem.c Tue Jun 11 19:02:30 2002
+++ linux/mm/shmem.c Tue Jun 11 19:02:30 2002
@@ -987,8 +987,8 @@
struct inode * inode = shmem_get_inode(dir->i_sb, mode, dev);
int error = -ENOSPC;
- dir->i_ctime = dir->i_mtime = CURRENT_TIME;
if (inode) {
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
error = 0;
shmem_truncate omitted to clear final partial page if that was on swap.
Found by Andrew's fsx testing on 2.5, this fix already went into 2.5.20.
--- 2.4.19-pre10/mm/shmem.c Tue Jun 11 19:02:30 2002
+++ linux/mm/shmem.c Tue Jun 11 19:02:30 2002
@@ -49,6 +49,8 @@
static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
atomic_t shmem_nrpages = ATOMIC_INIT(0); /* Not used right now */
+static struct page *shmem_getpage_locked(struct shmem_inode_info *, struct inode *, unsigned long);
+
#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
/*
@@ -313,6 +315,7 @@
static void shmem_truncate (struct inode * inode)
{
unsigned long index;
+ unsigned long partial;
unsigned long freed = 0;
struct shmem_inode_info * info = SHMEM_I(inode);
@@ -320,6 +323,28 @@
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
spin_lock (&info->lock);
index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ partial = inode->i_size & ~PAGE_CACHE_MASK;
+
+ if (partial) {
+ swp_entry_t *entry = shmem_swp_entry(info, index-1, 0);
+ struct page *page;
+ /*
+ * This check is racy: it's faintly possible that page
+ * was assigned to swap during truncate_inode_pages,
+ * and now assigned to file; but better than nothing.
+ */
+ if (!IS_ERR(entry) && entry->val) {
+ spin_unlock(&info->lock);
+ page = shmem_getpage_locked(info, inode, index-1);
+ if (!IS_ERR(page)) {
+ memclear_highpage_flush(page, partial,
+ PAGE_CACHE_SIZE - partial);
+ UnlockPage(page);
+ page_cache_release(page);
+ }
+ spin_lock(&info->lock);
+ }
+ }
while (index < info->next_index)
freed += shmem_truncate_indirect(info, index);
Several simple speedups to tmpfs swapoff: without patch, swapoff of a
kernel tree in tmpfs might take take 2 minutes, with patch 4 seconds.
Inline search go no further than necessary; only search inode when it
has swapped pages; start next search from same inode; list in order.
--- 2.4.19-pre10/mm/shmem.c Tue Jun 11 19:02:30 2002
+++ linux/mm/shmem.c Tue Jun 11 19:02:30 2002
@@ -372,28 +372,30 @@
clear_inode(inode);
}
-static int shmem_clear_swp (swp_entry_t entry, swp_entry_t *ptr, int size) {
+static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *ptr, swp_entry_t *eptr)
+{
swp_entry_t *test;
- for (test = ptr; test < ptr + size; test++) {
- if (test->val == entry.val) {
- swap_free (entry);
- *test = (swp_entry_t) {0};
+ for (test = ptr; test < eptr; test++) {
+ if (test->val == entry.val)
return test - ptr;
- }
}
return -1;
}
-static int shmem_unuse_inode (struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
+static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
{
swp_entry_t *ptr;
unsigned long idx;
int offset;
-
+
idx = 0;
+ ptr = info->i_direct;
spin_lock (&info->lock);
- offset = shmem_clear_swp (entry, info->i_direct, SHMEM_NR_DIRECT);
+ offset = info->next_index;
+ if (offset > SHMEM_NR_DIRECT)
+ offset = SHMEM_NR_DIRECT;
+ offset = shmem_find_swp(entry, ptr, ptr + offset);
if (offset >= 0)
goto found;
@@ -402,13 +404,18 @@
ptr = shmem_swp_entry(info, idx, 0);
if (IS_ERR(ptr))
continue;
- offset = shmem_clear_swp (entry, ptr, ENTRIES_PER_PAGE);
+ offset = info->next_index - idx;
+ if (offset > ENTRIES_PER_PAGE)
+ offset = ENTRIES_PER_PAGE;
+ offset = shmem_find_swp(entry, ptr, ptr + offset);
if (offset >= 0)
goto found;
}
spin_unlock (&info->lock);
return 0;
found:
+ swap_free(entry);
+ ptr[offset] = (swp_entry_t) {0};
delete_from_swap_cache(page);
add_to_page_cache(page, info->inode->i_mapping, offset + idx);
SetPageDirty(page);
@@ -419,7 +426,7 @@
}
/*
- * unuse_shmem() search for an eventually swapped out shmem page.
+ * shmem_unuse() search for an eventually swapped out shmem page.
*/
void shmem_unuse(swp_entry_t entry, struct page *page)
{
@@ -430,8 +437,12 @@
list_for_each(p, &shmem_inodes) {
info = list_entry(p, struct shmem_inode_info, list);
- if (shmem_unuse_inode(info, entry, page))
+ if (info->swapped && shmem_unuse_inode(info, entry, page)) {
+ /* move head to start search for next from here */
+ list_del(&shmem_inodes);
+ list_add_tail(&shmem_inodes, p);
break;
+ }
}
spin_unlock (&shmem_ilock);
}
@@ -721,7 +732,7 @@
inode->i_op = &shmem_inode_operations;
inode->i_fop = &shmem_file_operations;
spin_lock (&shmem_ilock);
- list_add (&SHMEM_I(inode)->list, &shmem_inodes);
+ list_add_tail(&SHMEM_I(inode)->list, &shmem_inodes);
spin_unlock (&shmem_ilock);
break;
case S_IFDIR:
@@ -1163,7 +1174,7 @@
}
inode->i_op = &shmem_symlink_inode_operations;
spin_lock (&shmem_ilock);
- list_add (&info->list, &shmem_inodes);
+ list_add_tail(&info->list, &shmem_inodes);
spin_unlock (&shmem_ilock);
down(&info->sem);
page = shmem_getpage_locked(info, inode, 0);
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
On June 11, 2002 20:25, Hugh Dickins wrote:
> On Tue, 11 Jun 2002, Ryan Cumming wrote:
> > On June 11, 2002 19:54, Hugh Dickins wrote:
> > > + dir->i_ctime = dir->i_mtime = CURRENT_TIME;
> >
> > I'm probably misreading this, but why does shmem_mknod modify the
> > directory's ctime?
>
> Hmmm, good question. Perhaps I'll have dreamt up an answer by morning.
Well, lets see if the list has any ideas while you're sleeping.
- -Ryan
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.0.7 (GNU/Linux)
iD8DBQE9BsCLLGMzRzbJfbQRAic3AJ9hh76od28Ic5OzB2PU8hLsV5vogACfbITB
8FyDd6i5BeMtk3xLzt1Y5ns=
=7+h9
-----END PGP SIGNATURE-----
On Tue, Jun 11, 2002 at 08:31:20PM -0700, Ryan Cumming wrote:
> On June 11, 2002 20:25, Hugh Dickins wrote:
> > On Tue, 11 Jun 2002, Ryan Cumming wrote:
> > > On June 11, 2002 19:54, Hugh Dickins wrote:
> > > > + dir->i_ctime = dir->i_mtime = CURRENT_TIME;
> > >
> > > I'm probably misreading this, but why does shmem_mknod modify the
> > > directory's ctime?
> >
> > Hmmm, good question. Perhaps I'll have dreamt up an answer by morning.
> Well, lets see if the list has any ideas while you're sleeping.
My guess would be that 'mtime' should be always updated because the
directory contents is changed (a new name is added). And 'ctime' has to
change whenever this new name cause a change in the size of the
directory because the i_size attribute has changed.
As tmpfs doesn't really have on-disk blocks for it's directories, every
create will change the directory size. (as well as every unlink).
Jan