2008-08-18 12:28:19

by Takashi Sato

[permalink] [raw]
Subject: [PATCH 1/3] Implement generic freeze feature

The ioctls for the generic freeze feature are below.
o Freeze the filesystem
int ioctl(int fd, int FIFREEZE, arg)
fd: The file descriptor of the mountpoint
FIFREEZE: request code for the freeze
arg: Ignored
Return value: 0 if the operation succeeds. Otherwise, -1

o Unfreeze the filesystem
int ioctl(int fd, int FITHAW, arg)
fd: The file descriptor of the mountpoint
FITHAW: request code for unfreeze
arg: Ignored
Return value: 0 if the operation succeeds. Otherwise, -1

Signed-off-by: Takashi Sato <[email protected]>
Signed-off-by: Masayuki Hamaguchi <[email protected]>
---
fs/block_dev.c | 2 +
fs/buffer.c | 27 ++++++++++++++++++-
fs/ioctl.c | 61 ++++++++++++++++++++++++++++++++++++++++++++
include/linux/buffer_head.h | 2 -
include/linux/fs.h | 6 ++++
5 files changed, 96 insertions(+), 2 deletions(-)

diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2.org/fs/block_dev.c linux-2.6.27-rc2-freeze/fs
/block_dev.c
--- linux-2.6.27-rc2.org/fs/block_dev.c 2008-08-06 13:49:54.000000000 +0900
+++ linux-2.6.27-rc2-freeze/fs/block_dev.c 2008-08-07 08:59:54.000000000 +0900
@@ -285,6 +285,8 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&bdev->bd_holder_list);
#endif
inode_init_once(&ei->vfs_inode);
+ /* Initialize semaphore for freeze. */
+ sema_init(&bdev->bd_freeze_sem, 1);
}

static inline void __bd_forget(struct inode *inode)
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2.org/fs/buffer.c linux-2.6.27-rc2-freeze/fs/bu
ffer.c
--- linux-2.6.27-rc2.org/fs/buffer.c 2008-08-06 13:49:54.000000000 +0900
+++ linux-2.6.27-rc2-freeze/fs/buffer.c 2008-08-07 08:59:54.000000000 +0900
@@ -201,6 +201,15 @@ struct super_block *freeze_bdev(struct b
{
struct super_block *sb;

+ down(&bdev->bd_freeze_sem);
+ bdev->bd_freeze_count++;
+ if (bdev->bd_freeze_count > 1) {
+ sb = get_super(bdev);
+ drop_super(sb);
+ up(&bdev->bd_freeze_sem);
+ return sb;
+ }
+
down(&bdev->bd_mount_sem);
sb = get_super(bdev);
if (sb && !(sb->s_flags & MS_RDONLY)) {
@@ -219,6 +228,8 @@ struct super_block *freeze_bdev(struct b
}

sync_blockdev(bdev);
+ up(&bdev->bd_freeze_sem);
+
return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */
}
EXPORT_SYMBOL(freeze_bdev);
@@ -230,8 +241,20 @@ EXPORT_SYMBOL(freeze_bdev);
*
* Unlocks the filesystem and marks it writeable again after freeze_bdev().
*/
-void thaw_bdev(struct block_device *bdev, struct super_block *sb)
+int thaw_bdev(struct block_device *bdev, struct super_block *sb)
{
+
+ down(&bdev->bd_freeze_sem);
+ if (!bdev->bd_freeze_count) {
+ up(&bdev->bd_freeze_sem);
+ return 0;
+ }
+ bdev->bd_freeze_count--;
+ if (bdev->bd_freeze_count > 0) {
+ up(&bdev->bd_freeze_sem);
+ return 0;
+ }
+
if (sb) {
BUG_ON(sb->s_bdev != bdev);

@@ -244,6 +267,8 @@ void thaw_bdev(struct block_device *bdev
}

up(&bdev->bd_mount_sem);
+ up(&bdev->bd_freeze_sem);
+ return 0;
}
EXPORT_SYMBOL(thaw_bdev);

diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2.org/fs/ioctl.c linux-2.6.27-rc2-freeze/fs/ioc
tl.c
--- linux-2.6.27-rc2.org/fs/ioctl.c 2008-08-06 13:49:54.000000000 +0900
+++ linux-2.6.27-rc2-freeze/fs/ioctl.c 2008-08-07 08:59:54.000000000 +0900
@@ -13,6 +13,7 @@
#include <linux/security.h>
#include <linux/module.h>
#include <linux/uaccess.h>
+#include <linux/buffer_head.h>

#include <asm/ioctls.h>

@@ -141,6 +142,57 @@ static int ioctl_fioasync(unsigned int f
}

/*
+ * ioctl_freeze - Freeze the filesystem.
+ *
+ * @filp: target file
+ *
+ * Call freeze_bdev() to freeze the filesystem.
+ */
+static int ioctl_freeze(struct file *filp)
+{
+ struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* If filesystem doesn't support freeze feature, return. */
+ if (sb->s_op->write_super_lockfs == NULL)
+ return -EOPNOTSUPP;
+
+ /* If a regular file or a directory isn't specified, return. */
+ if (sb->s_bdev == NULL)
+ return -EINVAL;
+
+ /* Freeze */
+ sb = freeze_bdev(sb->s_bdev);
+ if (IS_ERR(sb))
+ return PTR_ERR(sb);
+ return 0;
+}
+
+/*
+ * ioctl_thaw - Thaw the filesystem.
+ *
+ * @filp: target file
+ *
+ * Call thaw_bdev() to thaw the filesystem.
+ */
+static int ioctl_thaw(struct file *filp)
+{
+ struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* If a regular file or a directory isn't specified, return EINVAL. */
+ if (sb->s_bdev == NULL)
+ return -EINVAL;
+
+ /* Thaw */
+ return thaw_bdev(sb->s_bdev, sb);
+}
+
+/*
* When you add any new common ioctls to the switches above and below
* please update compat_sys_ioctl() too.
*
@@ -181,6 +233,15 @@ int do_vfs_ioctl(struct file *filp, unsi
} else
error = -ENOTTY;
break;
+
+ case FIFREEZE:
+ error = ioctl_freeze(filp);
+ break;
+
+ case FITHAW:
+ error = ioctl_thaw(filp);
+ break;
+
default:
if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
error = file_ioctl(filp, cmd, arg);
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2.org/include/linux/buffer_head.h linux-2.6.27-
rc2-freeze/include/linux/buffer_head.h
--- linux-2.6.27-rc2.org/include/linux/buffer_head.h 2008-08-06 13:49:54.000000000 +0900
+++ linux-2.6.27-rc2-freeze/include/linux/buffer_head.h 2008-08-07 08:59:54.000000000 +0900
@@ -170,7 +170,7 @@ void __wait_on_buffer(struct buffer_head
wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
int fsync_bdev(struct block_device *);
struct super_block *freeze_bdev(struct block_device *);
-void thaw_bdev(struct block_device *, struct super_block *);
+int thaw_bdev(struct block_device *, struct super_block *);
int fsync_super(struct super_block *);
int fsync_no_super(struct block_device *);
struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2.org/include/linux/fs.h linux-2.6.27-rc2-freez
e/include/linux/fs.h
--- linux-2.6.27-rc2.org/include/linux/fs.h 2008-08-06 13:49:54.000000000 +0900
+++ linux-2.6.27-rc2-freeze/include/linux/fs.h 2008-08-07 08:59:54.000000000 +0900
@@ -226,6 +226,8 @@ extern int dir_notify_enable;
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
+#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
+#define FITHAW _IOWR('X', 120, int) /* Thaw */

#define FS_IOC_GETFLAGS _IOR('f', 1, long)
#define FS_IOC_SETFLAGS _IOW('f', 2, long)
@@ -574,6 +576,10 @@ struct block_device {
* care to not mess up bd_private for that case.
*/
unsigned long bd_private;
+ /* The counter of freeze processes */
+ int bd_freeze_count;
+ /* Semaphore for freeze */
+ struct semaphore bd_freeze_sem;
};

/*


2008-08-21 19:59:51

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/3] Implement generic freeze feature

On Mon, 18 Aug 2008 21:28:19 +0900
Takashi Sato <[email protected]> wrote:

> The ioctls for the generic freeze feature are below.
> o Freeze the filesystem
> int ioctl(int fd, int FIFREEZE, arg)
> fd: The file descriptor of the mountpoint
> FIFREEZE: request code for the freeze
> arg: Ignored
> Return value: 0 if the operation succeeds. Otherwise, -1
>
> o Unfreeze the filesystem
> int ioctl(int fd, int FITHAW, arg)
> fd: The file descriptor of the mountpoint
> FITHAW: request code for unfreeze
> arg: Ignored
> Return value: 0 if the operation succeeds. Otherwise, -1
>
>
> ...
>
> --- linux-2.6.27-rc2.org/include/linux/fs.h 2008-08-06 13:49:54.000000000 +0900
> +++ linux-2.6.27-rc2-freeze/include/linux/fs.h 2008-08-07 08:59:54.000000000 +0900
> @@ -226,6 +226,8 @@ extern int dir_notify_enable;
> #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
> #define FIBMAP _IO(0x00,1) /* bmap access */
> #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
> +#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
> +#define FITHAW _IOWR('X', 120, int) /* Thaw */

FIFREEZE is 119, but a few lines above we have

#define BLKDISCARD _IO(0x12,119)

Should we be using 120 and 121 here?

> #define FS_IOC_GETFLAGS _IOR('f', 1, long)
> #define FS_IOC_SETFLAGS _IOW('f', 2, long)
> @@ -574,6 +576,10 @@ struct block_device {
> * care to not mess up bd_private for that case.
> */
> unsigned long bd_private;
> + /* The counter of freeze processes */
> + int bd_freeze_count;
> + /* Semaphore for freeze */
> + struct semaphore bd_freeze_sem;

"freeze" is not an adequate description of what this protects. I think
it's only the modification and testing of bd_freeze_count, isn't it?

If so, all this could be done more neatly by removing the lock,
switching to atomic_t and using our (rich) atomic_t operations.

otoh, perhaps it protects more than this, in which case the lock
can/should be switched to a `struct mutex'?



2008-08-22 07:10:11

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 1/3] Implement generic freeze feature

On Aug 21, 2008 12:58 -0700, Andrew Morton wrote:
> > #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
> > +#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
> > +#define FITHAW _IOWR('X', 120, int) /* Thaw */
>
> FIFREEZE is 119, but a few lines above we have
>
> #define BLKDISCARD _IO(0x12,119)
>
> Should we be using 120 and 121 here?

No, because 'X' != 0x12... The 'X' is used because this ioctl is compatible
with the XFS implementation of this feature.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


2008-08-22 18:15:03

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 1/3] Implement generic freeze feature

On Mon, Aug 18, 2008 at 09:28:19PM +0900, Takashi Sato wrote:
> + down(&bdev->bd_freeze_sem);
> + bdev->bd_freeze_count++;
> + if (bdev->bd_freeze_count > 1) {
> + sb = get_super(bdev);
> + drop_super(sb);
> + up(&bdev->bd_freeze_sem);
> + return sb;
> + }
> +
> down(&bdev->bd_mount_sem);

Now you have a reference counter of freezes which actually is pretty
sensible, but also needs some documentation. What I don't understand
here at all is why you do the get_super/drop_super in the already frozen
case.

Now that the freeze_count has replaced one of the uses of bd_mount_sem
you should also replace the other use in the unmount path by simply
checking for the freez_count and abort if it's set. To do so you'll
need to hold the bd_mount_sem over the whole unmount operation to
prevent new frezes from coming in.

As others noted it should be a mutex and not a semaphore.

> /*
> + * ioctl_freeze - Freeze the filesystem.
> + *
> + * @filp: target file
> + *
> + * Call freeze_bdev() to freeze the filesystem.
> + */
> +static int ioctl_freeze(struct file *filp)

This is not quite kerneldcoc format, which would ne a /** as commnt
start. But I don't think the comment is actually needed, it's a pretty
obvious file scope function. (Same commnt also applies to ioctl_thaw)

> + struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + /* If filesystem doesn't support freeze feature, return. */
> + if (sb->s_op->write_super_lockfs == NULL)
> + return -EOPNOTSUPP;
> +
> + /* If a regular file or a directory isn't specified, return. */
> + if (sb->s_bdev == NULL)
> + return -EINVAL;

I don't understand this commnt. What you are checking is that the
filesystem has a non-NULL s_bdev, which implies a not blockdevice-backed
filesystem.


2008-08-29 09:38:51

by Takashi Sato

[permalink] [raw]
Subject: Re: [PATCH 1/3] Implement generic freeze feature

Hi,

Christoph Hellwig wrote:
>On Mon, Aug 18, 2008 at 09:28:19PM +0900, Takashi Sato wrote:
>> + down(&bdev->bd_freeze_sem);
>> + bdev->bd_freeze_count++;
>> + if (bdev->bd_freeze_count > 1) {
>> + sb = get_super(bdev);
>> + drop_super(sb);
>> + up(&bdev->bd_freeze_sem);
>> + return sb;
>> + }
>> +
>> down(&bdev->bd_mount_sem);
>
>Now you have a reference counter of freezes which actually is pretty
>sensible, but also needs some documentation. What I don't understand
>here at all is why you do the get_super/drop_super in the already frozen
>case.

Even if the filesystem has already been frozen, the superblock
should be returned. Because a caller should recognize the success of
freeze_bdev() and call thaw_bdev() to decrease the reference count.
But I will remove drop_super() as it should be called in thaw_bdev().

>
>Now that the freeze_count has replaced one of the uses of bd_mount_sem
>you should also replace the other use in the unmount path by simply
>checking for the freez_count and abort if it's set. To do so you'll
>need to hold the bd_mount_sem over the whole unmount operation to
>prevent new frezes from coming in.

In the original implementation,
unmount is protected by s_umount(semaphore),
not bd_mount_sem. So, unmount task waits for unfreeze.
I think this original behavior shouldn't be changed,
so the existing s_umount lock is better.

>
>As others noted it should be a mutex and not a semaphore.

As you said, we should use the mutex.
I will replace it.

>
>> /*
>> + * ioctl_freeze - Freeze the filesystem.
>> + *
>> + * @filp: target file
>> + *
>> + * Call freeze_bdev() to freeze the filesystem.
>> + */
>> +static int ioctl_freeze(struct file *filp)
>
>This is not quite kerneldcoc format, which would ne a /** as commnt
>start. But I don't think the comment is actually needed, it's a pretty
>obvious file scope function. (Same commnt also applies to ioctl_thaw)

I will remove these comments.

>
>> + struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
>> +
>> + if (!capable(CAP_SYS_ADMIN))
>> + return -EPERM;
>> +
>> + /* If filesystem doesn't support freeze feature, return. */
>> + if (sb->s_op->write_super_lockfs == NULL)
>> + return -EOPNOTSUPP;
>> +
>> + /* If a regular file or a directory isn't specified, return. */
>> + if (sb->s_bdev == NULL)
>> + return -EINVAL;
>
>I don't understand this commnt. What you are checking is that the
>filesystem has a non-NULL s_bdev, which implies a not blockdevice-backed
>filesystem.

I will fix the comment as :
" If a blockdevice-backed filesystem isn't specified, return."

Cheers, Takashi

2008-08-29 09:36:56

by Takashi Sato

[permalink] [raw]
Subject: Re: [PATCH 1/3] Implement generic freeze feature

Hi,

Andrew Morton wrote:
>> --- linux-2.6.27-rc2.org/include/linux/fs.h 2008-08-06 13:49:54.000000000 +0900
>> +++ linux-2.6.27-rc2-freeze/include/linux/fs.h 2008-08-07 08:59:54.000000000 +0900
>> @@ -226,6 +226,8 @@ extern int dir_notify_enable;
>> #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
>> #define FIBMAP _IO(0x00,1) /* bmap access */
>> #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
>> +#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
>> +#define FITHAW _IOWR('X', 120, int) /* Thaw */
>
>FIFREEZE is 119, but a few lines above we have
>
>#define BLKDISCARD _IO(0x12,119)
>
>Should we be using 120 and 121 here?

As Andreas said, we need to use 'X' to keep compatibility with
XFS's freeze ioctl.

>
>> #define FS_IOC_GETFLAGS _IOR('f', 1, long)
>> #define FS_IOC_SETFLAGS _IOW('f', 2, long)
>> @@ -574,6 +576,10 @@ struct block_device {
>> * care to not mess up bd_private for that case.
>> */
>> unsigned long bd_private;
>> + /* The counter of freeze processes */
>> + int bd_freeze_count;
>> + /* Semaphore for freeze */
>> + struct semaphore bd_freeze_sem;
>
>"freeze" is not an adequate description of what this protects. I think
>it's only the modification and testing of bd_freeze_count, isn't it?
>
>If so, all this could be done more neatly by removing the lock,
>switching to atomic_t and using our (rich) atomic_t operations.
>
>otoh, perhaps it protects more than this, in which case the lock
>can/should be switched to a `struct mutex'?

bd_freeze_sem protects the following two sequences.
1. freeze_bdev()
- Test of bd_freeze_count
- Increment of bd_freeze_count
- s_op->write_super_lockfs
- Set unfreeze timer

2. thaw_bdev()
- Test of bd_freeze_count
- Decrement of bd_freeze_count
- s_op->unlockfs
- Unset unfreeze timer
Because the journal sync in ext3's write_super_lockfs might
need a long time, we should use the mutex (not atomic_t).
If bd_freeze_sem protects only the modification and
testing of bd_freeze_count, freeze_bdev() and thaw_bdev() will
run simultaneously and unexpected problem will occur.
(For example, after we run the freeze ioctl with timeout period,
the filesystem is frozen, but the unfreeze timer isn't set.)

Cheers, Takashi

2008-09-04 16:55:15

by Eric Sandeen

[permalink] [raw]
Subject: Re: [PATCH 1/3] Implement generic freeze feature

Takashi Sato wrote:

> @@ -141,6 +142,57 @@ static int ioctl_fioasync(unsigned int f
> }
>
> /*
> + * ioctl_freeze - Freeze the filesystem.
> + *
> + * @filp: target file
> + *
> + * Call freeze_bdev() to freeze the filesystem.
> + */
> +static int ioctl_freeze(struct file *filp)
> +{
> + struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + /* If filesystem doesn't support freeze feature, return. */
> + if (sb->s_op->write_super_lockfs == NULL)
> + return -EOPNOTSUPP;
> +
> + /* If a regular file or a directory isn't specified, return. */
> + if (sb->s_bdev == NULL)
> + return -EINVAL;
> +
> + /* Freeze */
> + sb = freeze_bdev(sb->s_bdev);
> + if (IS_ERR(sb))
> + return PTR_ERR(sb);
> + return 0;
> +}

Not a problem with your patch exactly, but I was just wondering; you
check here whether the sb returned from freeze_bdev is an ERR_PTR (as
does lock_fs()) - but, freeze_bdev never returns an error, does it?
->write_super_lockfs is a void...

It really seems that at least we should be able to handle IO errors on
the freeze request, and tell the user "No, your filesystem was not
frozen..."?

Maybe I'll whip up a patch to see about propagating freeze errors up
from the filesystems that implement it, unless I'm missing some reason
not to do so...?

Also, should this be checking for a NULL returned from freeze_bdev as
well? I guess this should never happen if we have a file open on which
we are calling the ioctl ...

-Eric

2008-09-11 10:58:08

by Takashi Sato

[permalink] [raw]
Subject: Re: [PATCH 1/3] Implement generic freeze feature

Hi,

Eric Sandeen:
>> +static int ioctl_freeze(struct file *filp)
>> +{
>> + struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
>> +
>> + if (!capable(CAP_SYS_ADMIN))
>> + return -EPERM;
>> +
>> + /* If filesystem doesn't support freeze feature, return. */
>> + if (sb->s_op->write_super_lockfs == NULL)
>> + return -EOPNOTSUPP;
>> +
>> + /* If a regular file or a directory isn't specified, return. */
>> + if (sb->s_bdev == NULL)
>> + return -EINVAL;
>> +
>> + /* Freeze */
>> + sb = freeze_bdev(sb->s_bdev);
>> + if (IS_ERR(sb))
>> + return PTR_ERR(sb);
>> + return 0;
>> +}
>
> Not a problem with your patch exactly, but I was just wondering; you
> check here whether the sb returned from freeze_bdev is an ERR_PTR (as
> does lock_fs()) - but, freeze_bdev never returns an error, does it?
> ->write_super_lockfs is a void...
>
> It really seems that at least we should be able to handle IO errors on
> the freeze request, and tell the user "No, your filesystem was not
> frozen..."?
>
> Maybe I'll whip up a patch to see about propagating freeze errors up
> from the filesystems that implement it, unless I'm missing some reason
> not to do so...?

Right.
We should handle an IO error which occurs in write_super_lockfs.
I will change the write_super_lockfs's type to "int" so that it can return an error.
And I will consider returning an error of ext3_write_super_lockfs because
journal_flush() in ext3_write_super_lockfs() doesn't handle an IO error.

> Also, should this be checking for a NULL returned from freeze_bdev as
> well? I guess this should never happen if we have a file open on which
> we are calling the ioctl ...

I think ioctl_freeze doesn't need to check NULL because it never happen
as you said.

Cheers, Takashi