2008-08-18 12:28:56

by Takashi Sato

[permalink] [raw]
Subject: [PATCH 3/3] Add timeout feature

The timeout feature is added to freeze ioctl. And new ioctl
to reset the timeout period is added.
o Freeze the filesystem
int ioctl(int fd, int FIFREEZE, long *timeout_sec)
fd: The file descriptor of the mountpoint
FIFREEZE: request code for the freeze
timeout_sec: the timeout period in seconds
If it's 0 or 1, the timeout isn't set.
This special case of "1" is implemented to keep
the compatibility with XFS applications.
Return value: 0 if the operation succeeds. Otherwise, -1

o Reset the timeout period
int ioctl(int fd, int FIFREEZE_RESET_TIMEOUT, long *timeout_sec)
fd:file descriptor of mountpoint
FIFREEZE_RESET_TIMEOUT: request code for reset of timeout period
timeout_sec: new timeout period in seconds
Return value: 0 if the operation succeeds. Otherwise, -1
Error number: If the filesystem has already been unfrozen,
errno is set to EINVAL.

Signed-off-by: Takashi Sato <[email protected]>
Signed-off-by: Masayuki Hamaguchi <[email protected]>
---
drivers/md/dm.c | 2 -
fs/block_dev.c | 2 +
fs/buffer.c | 16 +++++++--
fs/ioctl.c | 77 ++++++++++++++++++++++++++++++++++++++++++--
fs/super.c | 57 ++++++++++++++++++++++++++++++++
fs/xfs/xfs_fsops.c | 2 -
include/linux/buffer_head.h | 3 +
include/linux/fs.h | 10 +++++
8 files changed, 160 insertions(+), 9 deletions(-)

diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/drivers/md/dm.c linux-2.6.27-rc2-timeout/
drivers/md/dm.c
--- linux-2.6.27-rc2-xfs/drivers/md/dm.c 2008-08-07 09:00:27.000000000 +0900
+++ linux-2.6.27-rc2-timeout/drivers/md/dm.c 2008-08-07 09:14:30.000000000 +0900
@@ -1451,7 +1451,7 @@ static int lock_fs(struct mapped_device

WARN_ON(md->frozen_sb);

- md->frozen_sb = freeze_bdev(md->suspended_bdev);
+ md->frozen_sb = freeze_bdev(md->suspended_bdev, 0);
if (IS_ERR(md->frozen_sb)) {
r = PTR_ERR(md->frozen_sb);
md->frozen_sb = NULL;
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/fs/block_dev.c linux-2.6.27-rc2-timeout/f
s/block_dev.c
--- linux-2.6.27-rc2-xfs/fs/block_dev.c 2008-08-07 09:00:29.000000000 +0900
+++ linux-2.6.27-rc2-timeout/fs/block_dev.c 2008-08-07 09:14:30.000000000 +0900
@@ -287,6 +287,8 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
/* Initialize semaphore for freeze. */
sema_init(&bdev->bd_freeze_sem, 1);
+ /* Setup freeze timeout function. */
+ INIT_DELAYED_WORK(&bdev->bd_freeze_timeout, freeze_timeout);
}

static inline void __bd_forget(struct inode *inode)
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/fs/buffer.c linux-2.6.27-rc2-timeout/fs/b
uffer.c
--- linux-2.6.27-rc2-xfs/fs/buffer.c 2008-08-07 09:00:29.000000000 +0900
+++ linux-2.6.27-rc2-timeout/fs/buffer.c 2008-08-07 09:14:30.000000000 +0900
@@ -190,14 +190,18 @@ int fsync_bdev(struct block_device *bdev

/**
* freeze_bdev -- lock a filesystem and force it into a consistent state
- * @bdev: blockdevice to lock
+ * @bdev: blockdevice to lock
+ * @timeout_msec: timeout period
*
* This takes the block device bd_mount_sem to make sure no new mounts
* happen on bdev until thaw_bdev() is called.
* If a superblock is found on this device, we take the s_umount semaphore
* on it to make sure nobody unmounts until the snapshot creation is done.
+ * If timeout_msec is bigger than 0, this registers the delayed work for
+ * timeout of the freeze feature.
*/
-struct super_block *freeze_bdev(struct block_device *bdev)
+struct super_block *freeze_bdev(struct block_device *bdev,
+ unsigned int timeout_msec)
{
struct super_block *sb;

@@ -228,8 +232,11 @@ struct super_block *freeze_bdev(struct b
}

sync_blockdev(bdev);
- up(&bdev->bd_freeze_sem);
+ /* Setup unfreeze timer. */
+ if (timeout_msec > 0)
+ add_freeze_timeout(bdev, timeout_msec);

+ up(&bdev->bd_freeze_sem);
return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */
}
EXPORT_SYMBOL(freeze_bdev);
@@ -255,6 +262,9 @@ int thaw_bdev(struct block_device *bdev,
return 0;
}

+ /* Delete unfreeze timer. */
+ del_freeze_timeout(bdev);
+
if (sb) {
BUG_ON(sb->s_bdev != bdev);

diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/fs/ioctl.c linux-2.6.27-rc2-timeout/fs/io
ctl.c
--- linux-2.6.27-rc2-xfs/fs/ioctl.c 2008-08-07 09:00:29.000000000 +0900
+++ linux-2.6.27-rc2-timeout/fs/ioctl.c 2008-08-07 09:14:30.000000000 +0900
@@ -145,12 +145,16 @@ static int ioctl_fioasync(unsigned int f
* ioctl_freeze - Freeze the filesystem.
*
* @filp: target file
+ * @argp: timeout value(sec)
*
* Call freeze_bdev() to freeze the filesystem.
*/
-static int ioctl_freeze(struct file *filp)
+static int ioctl_freeze(struct file *filp, int __user *argp)
{
+ int timeout_sec;
+ unsigned int timeout_msec;
struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+ int error;

if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -163,8 +167,25 @@ static int ioctl_freeze(struct file *fil
if (sb->s_bdev == NULL)
return -EINVAL;

+ /* arg(sec) to tick value. */
+ error = get_user(timeout_sec, argp);
+ if (error != 0)
+ return error;
+
+ if (timeout_sec < 0 || timeout_sec > UINT_MAX/1000)
+ return -EINVAL;
+
+ /*
+ * If 1 is specified as the timeout period it is changed into 0
+ * to retain compatibility with XFS's xfs_freeze.
+ */
+ if (timeout_sec == 1)
+ timeout_sec = 0;
+
+ timeout_msec = timeout_sec * 1000;
+
/* Freeze */
- sb = freeze_bdev(sb->s_bdev);
+ sb = freeze_bdev(sb->s_bdev, timeout_msec);
if (IS_ERR(sb))
return PTR_ERR(sb);
return 0;
@@ -193,6 +214,52 @@ static int ioctl_thaw(struct file *filp)
}

/*
+ * ioctl_freeze_reset_timeout - Reset timeout for freeze.
+ *
+ * @filp: target file
+ * @argp: timeout value(sec)
+ *
+ * Reset timeout for freeze.
+ */
+static int
+ioctl_freeze_reset_timeout(struct file *filp, int __user *argp)
+{
+ int timeout_sec;
+ unsigned int timeout_msec;
+ struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+ struct block_device *bdev = sb->s_bdev;
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* If a regular file or a directory isn't specified, return EINVAL. */
+ if (bdev == NULL)
+ return -EINVAL;
+
+ /* arg(sec) to tick value */
+ error = get_user(timeout_sec, argp);
+ if (error)
+ return error;
+
+ if (timeout_sec <= 0 || timeout_sec > UINT_MAX/1000)
+ return -EINVAL;
+
+ timeout_msec = timeout_sec * 1000;
+
+ down(&bdev->bd_freeze_sem);
+ if (!bdev->bd_freeze_count) {
+ up(&bdev->bd_freeze_sem);
+ return -EINVAL;
+ }
+ /* setup unfreeze timer */
+ add_freeze_timeout(bdev, timeout_msec);
+ up(&bdev->bd_freeze_sem);
+
+ return 0;
+}
+
+/*
* When you add any new common ioctls to the switches above and below
* please update compat_sys_ioctl() too.
*
@@ -235,13 +302,17 @@ int do_vfs_ioctl(struct file *filp, unsi
break;

case FIFREEZE:
- error = ioctl_freeze(filp);
+ error = ioctl_freeze(filp, argp);
break;

case FITHAW:
error = ioctl_thaw(filp);
break;

+ case FIFREEZE_RESET_TIMEOUT:
+ error = ioctl_freeze_reset_timeout(filp, argp);
+ break;
+
default:
if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
error = file_ioctl(filp, cmd, arg);
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/fs/super.c linux-2.6.27-rc2-timeout/fs/su
per.c
--- linux-2.6.27-rc2-xfs/fs/super.c 2008-08-07 09:00:29.000000000 +0900
+++ linux-2.6.27-rc2-timeout/fs/super.c 2008-08-07 09:14:30.000000000 +0900
@@ -981,3 +981,60 @@ struct vfsmount *kern_mount_data(struct
}

EXPORT_SYMBOL_GPL(kern_mount_data);
+
+/*
+ * freeze_timeout - Thaw the filesystem.
+ *
+ * @work: work queue (delayed_work.work)
+ *
+ * Called by the delayed work when elapsing the timeout period.
+ * Thaw the filesystem.
+ */
+void freeze_timeout(struct work_struct *work)
+{
+ struct block_device *bd = container_of(work,
+ struct block_device, bd_freeze_timeout.work);
+ struct super_block *sb = get_super(bd);
+
+ thaw_bdev(bd, sb);
+
+ if (sb)
+ drop_super(sb);
+}
+EXPORT_SYMBOL_GPL(freeze_timeout);
+
+/*
+ * add_freeze_timeout - Add timeout for freeze.
+ *
+ * @bdev: block device struct
+ * @timeout_msec: timeout period
+ *
+ * Add the delayed work for freeze timeout to the delayed work queue.
+ */
+void add_freeze_timeout(struct block_device *bdev, unsigned int timeout_msec)
+{
+ s64 timeout_jiffies = msecs_to_jiffies(timeout_msec);
+
+ /* Set delayed work queue */
+ cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
+ schedule_delayed_work(&bdev->bd_freeze_timeout, timeout_jiffies);
+}
+
+/*
+ * del_freeze_timeout - Delete timeout for freeze.
+ *
+ * @bdev: block device struct
+ *
+ * Delete the delayed work for freeze timeout from the delayed work queue.
+ */
+void del_freeze_timeout(struct block_device *bdev)
+{
+ /*
+ * It's possible that the delayed work task (freeze_timeout()) calls
+ * del_freeze_timeout(). If the delayed work task calls
+ * cancel_delayed_work_sync((), the deadlock will occur.
+ * So we need this check (delayed_work_pending()).
+ */
+ if (delayed_work_pending(&bdev->bd_freeze_timeout))
+ cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
+}
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/fs/xfs/xfs_fsops.c linux-2.6.27-rc2-timeo
ut/fs/xfs/xfs_fsops.c
--- linux-2.6.27-rc2-xfs/fs/xfs/xfs_fsops.c 2008-08-07 09:00:29.000000000 +0900
+++ linux-2.6.27-rc2-timeout/fs/xfs/xfs_fsops.c 2008-08-07 09:14:30.000000000 +0900
@@ -621,7 +621,7 @@ xfs_fs_goingdown(
{
switch (inflags) {
case XFS_FSOP_GOING_FLAGS_DEFAULT: {
- struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
+ struct super_block *sb = freeze_bdev(mp->m_super->s_bdev, 0);

if (sb && !IS_ERR(sb)) {
xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/include/linux/buffer_head.h linux-2.6.27-
rc2-timeout/include/linux/buffer_head.h
--- linux-2.6.27-rc2-xfs/include/linux/buffer_head.h 2008-08-07 09:00:39.000000000 +0900
+++ linux-2.6.27-rc2-timeout/include/linux/buffer_head.h 2008-08-07 09:14:30.000000000 +0900
@@ -169,7 +169,8 @@ int sync_blockdev(struct block_device *b
void __wait_on_buffer(struct buffer_head *);
wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
int fsync_bdev(struct block_device *);
-struct super_block *freeze_bdev(struct block_device *);
+struct super_block *freeze_bdev(struct block_device *,
+ unsigned int timeout_msec);
int thaw_bdev(struct block_device *, struct super_block *);
int fsync_super(struct super_block *);
int fsync_no_super(struct block_device *);
diff -uprN -X linux-2.6.27-rc2.org/Documentation/dontdiff linux-2.6.27-rc2-xfs/include/linux/fs.h linux-2.6.27-rc2-timeo
ut/include/linux/fs.h
--- linux-2.6.27-rc2-xfs/include/linux/fs.h 2008-08-07 09:00:39.000000000 +0900
+++ linux-2.6.27-rc2-timeout/include/linux/fs.h 2008-08-07 09:14:30.000000000 +0900
@@ -8,6 +8,7 @@

#include <linux/limits.h>
#include <linux/ioctl.h>
+#include <linux/workqueue.h>

/*
* It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -228,6 +229,7 @@ extern int dir_notify_enable;
#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
#define FITHAW _IOWR('X', 120, int) /* Thaw */
+#define FIFREEZE_RESET_TIMEOUT _IO(0x00, 3) /* Reset freeze timeout */

#define FS_IOC_GETFLAGS _IOR('f', 1, long)
#define FS_IOC_SETFLAGS _IOW('f', 2, long)
@@ -576,10 +578,13 @@ struct block_device {
* care to not mess up bd_private for that case.
*/
unsigned long bd_private;
+
/* The counter of freeze processes */
int bd_freeze_count;
/* Semaphore for freeze */
struct semaphore bd_freeze_sem;
+ /* Delayed work for freeze */
+ struct delayed_work bd_freeze_timeout;
};

/*
@@ -2159,5 +2164,10 @@ int proc_nr_files(struct ctl_table *tabl

int get_filesystem_list(char * buf);

+extern void add_freeze_timeout(struct block_device *bdev,
+ unsigned int timeout_msec);
+extern void del_freeze_timeout(struct block_device *bdev);
+extern void freeze_timeout(struct work_struct *work);
+
#endif /* __KERNEL__ */
#endif /* _LINUX_FS_H */


2008-08-21 20:20:06

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 3/3] Add timeout feature

On Mon, 18 Aug 2008 21:28:56 +0900
Takashi Sato <[email protected]> wrote:

> The timeout feature is added to freeze ioctl. And new ioctl
> to reset the timeout period is added.
> o Freeze the filesystem
> int ioctl(int fd, int FIFREEZE, long *timeout_sec)
> fd: The file descriptor of the mountpoint
> FIFREEZE: request code for the freeze
> timeout_sec: the timeout period in seconds
> If it's 0 or 1, the timeout isn't set.
> This special case of "1" is implemented to keep
> the compatibility with XFS applications.
> Return value: 0 if the operation succeeds. Otherwise, -1
>
> o Reset the timeout period
> int ioctl(int fd, int FIFREEZE_RESET_TIMEOUT, long *timeout_sec)
> fd:file descriptor of mountpoint
> FIFREEZE_RESET_TIMEOUT: request code for reset of timeout period
> timeout_sec: new timeout period in seconds
> Return value: 0 if the operation succeeds. Otherwise, -1
> Error number: If the filesystem has already been unfrozen,
> errno is set to EINVAL.

I don't think the changelogs actually explained why this feature is
being added?

Which userspace tools are expected to send these ioctls? Something in
util-linux? dm-utils? Are patches to those packages planned?

>
> ...
>
> /*
> + * ioctl_freeze_reset_timeout - Reset timeout for freeze.
> + *
> + * @filp: target file
> + * @argp: timeout value(sec)
> + *
> + * Reset timeout for freeze.
> + */
> +static int
> +ioctl_freeze_reset_timeout(struct file *filp, int __user *argp)
> +{
> + int timeout_sec;
> + unsigned int timeout_msec;
> + struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
> + struct block_device *bdev = sb->s_bdev;
> + int error;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + /* If a regular file or a directory isn't specified, return EINVAL. */
> + if (bdev == NULL)
> + return -EINVAL;
> +
> + /* arg(sec) to tick value */
> + error = get_user(timeout_sec, argp);
> + if (error)
> + return error;
> +
> + if (timeout_sec <= 0 || timeout_sec > UINT_MAX/1000)
> + return -EINVAL;
> +
> + timeout_msec = timeout_sec * 1000;
> +
> + down(&bdev->bd_freeze_sem);
> + if (!bdev->bd_freeze_count) {
> + up(&bdev->bd_freeze_sem);
> + return -EINVAL;
> + }
> + /* setup unfreeze timer */
> + add_freeze_timeout(bdev, timeout_msec);
> + up(&bdev->bd_freeze_sem);
> +
> + return 0;
> +}

This duplicates quite a bit of code from ioctl_freeze(). Can this be
cleaned up?


> +/*
> * When you add any new common ioctls to the switches above and below
> * please update compat_sys_ioctl() too.
> *
> @@ -235,13 +302,17 @@ int do_vfs_ioctl(struct file *filp, unsi
> break;
>
> case FIFREEZE:
> - error = ioctl_freeze(filp);
> + error = ioctl_freeze(filp, argp);
> break;
>
> case FITHAW:
> error = ioctl_thaw(filp);
> break;
>
> + case FIFREEZE_RESET_TIMEOUT:
> + error = ioctl_freeze_reset_timeout(filp, argp);
> + break;
> +
> default:
> if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
> error = file_ioctl(filp, cmd, arg);
>
> ...
>
> EXPORT_SYMBOL_GPL(kern_mount_data);
> +
> +/*
> + * freeze_timeout - Thaw the filesystem.
> + *
> + * @work: work queue (delayed_work.work)
> + *
> + * Called by the delayed work when elapsing the timeout period.
> + * Thaw the filesystem.
> + */
> +void freeze_timeout(struct work_struct *work)
> +{
> + struct block_device *bd = container_of(work,
> + struct block_device, bd_freeze_timeout.work);
> + struct super_block *sb = get_super(bd);
> +
> + thaw_bdev(bd, sb);
> +
> + if (sb)
> + drop_super(sb);
> +}
> +EXPORT_SYMBOL_GPL(freeze_timeout);

I can't see why this was exported.

> +/*
> + * add_freeze_timeout - Add timeout for freeze.
> + *
> + * @bdev: block device struct
> + * @timeout_msec: timeout period
> + *
> + * Add the delayed work for freeze timeout to the delayed work queue.
> + */
> +void add_freeze_timeout(struct block_device *bdev, unsigned int timeout_msec)
> +{
> + s64 timeout_jiffies = msecs_to_jiffies(timeout_msec);
> +
> + /* Set delayed work queue */
> + cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
> + schedule_delayed_work(&bdev->bd_freeze_timeout, timeout_jiffies);
> +}

I don't particularly like the names of these new global symbols. The
kernel already has a "freezer" thing, part of power-management.
Introducing another one is a bit confusing.

otoh, freezer seems to have consistently used "freezer", so the 'r'
arguable saves us.

Still, I'd have thought that "fsfreeze" would have been a clearer, more
specific identifier for the whole project.

> +/*
> + * del_freeze_timeout - Delete timeout for freeze.
> + *
> + * @bdev: block device struct
> + *
> + * Delete the delayed work for freeze timeout from the delayed work queue.
> + */
> +void del_freeze_timeout(struct block_device *bdev)
> +{
> + /*
> + * It's possible that the delayed work task (freeze_timeout()) calls
> + * del_freeze_timeout(). If the delayed work task calls
> + * cancel_delayed_work_sync((), the deadlock will occur.
> + * So we need this check (delayed_work_pending()).
> + */
> + if (delayed_work_pending(&bdev->bd_freeze_timeout))
> + cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
> +}

So if the calling task is keventd via run_workqueue() then
delayed_work_pending() should return false due to run_workqueue()
ordering, so we avoid the deadlock.

Seems a bit racy if some other process starts the delayed-work while
this function is running but I guess the new semaphore prevents that.

Perhaps cancel_delayed_work_sync() shouldn't hang up if called from the
work handler?

2008-08-22 18:16:56

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 3/3] Add timeout feature

On Thu, Aug 21, 2008 at 01:20:06PM -0700, Andrew Morton wrote:
> I don't think the changelogs actually explained why this feature is
> being added?
>
> Which userspace tools are expected to send these ioctls? Something in
> util-linux? dm-utils? Are patches to those packages planned?

Currently the only surspace using freeze and thaw is xfs_freeze from
xfsprogs, which would work for various other filesystems that implement
->write_super_lockfs now instead of just XFS with patch 1.

The freeze stuff in this third patch isn't and won't be used by
xfs_freeze and doesn't make all that much sense (and we already had a
lot of previous discussion on this..)

2008-08-24 17:03:57

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH 3/3] Add timeout feature

On 08/21, Andrew Morton wrote:
>
> On Mon, 18 Aug 2008 21:28:56 +0900
> Takashi Sato <[email protected]> wrote:
>
> > +void del_freeze_timeout(struct block_device *bdev)
> > +{
> > + /*
> > + * It's possible that the delayed work task (freeze_timeout()) calls
> > + * del_freeze_timeout(). If the delayed work task calls
> > + * cancel_delayed_work_sync((), the deadlock will occur.
> > + * So we need this check (delayed_work_pending()).
> > + */
> > + if (delayed_work_pending(&bdev->bd_freeze_timeout))
> > + cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
> > +}

I don't understand this patch, but the code above looks strange to me...

Let's suppose del_freeze_timeout() is called by ioctl_thaw()->thaw_bdev().
Now,

IF delayed_work_pending() == T

we can deadlock if the timer expires before
cancel_delayed_work_sync() cancels it?
in that case we are going to wait for this work,
but freeze_timeout()->thaw_bdev() will block
on ->bd_freeze_sem, no?

ELSE

we don't really flush the work, it is possible
the timer has already expired and the work
is pending. It will run later.

Perhaps this all is correct, but in that case, why can't we just do

void del_freeze_timeout(struct block_device *bdev)
{
cancel_delayed_work(&bdev->bd_freeze_timeout);
}

?

> Perhaps cancel_delayed_work_sync() shouldn't hang up if called from the
> work handler?

This is trivial,

--- kernel/workqueue.c
+++ kernel/workqueue.c
@@ -516,6 +516,9 @@ static void wait_on_cpu_work(struct cpu_
struct wq_barrier barr;
int running = 0;

+ if (cwq->thread == current)
+ return;
+
spin_lock_irq(&cwq->lock);
if (unlikely(cwq->current_work == work)) {
insert_wq_barrier(cwq, &barr, cwq->worklist.next);

but do we really need this?

We have a similar hack in flush_cpu_workqueue(), and we are going
to kill it once we fix the callers.

I dunno.

Oleg.


2008-08-29 09:39:03

by Takashi Sato

[permalink] [raw]
Subject: Re: [PATCH 3/3] Add timeout feature

Hi, Andrew and Oleg.

Andrew Morton wrote:
>On Mon, 18 Aug 2008 21:28:56 +0900
>Takashi Sato <[email protected]> wrote:
>
>> The timeout feature is added to freeze ioctl. And new ioctl
>> to reset the timeout period is added.
>> o Freeze the filesystem
>> int ioctl(int fd, int FIFREEZE, long *timeout_sec)
>> fd: The file descriptor of the mountpoint
>> FIFREEZE: request code for the freeze
>> timeout_sec: the timeout period in seconds
>> If it's 0 or 1, the timeout isn't set.
>> This special case of "1" is implemented to keep
>> the compatibility with XFS applications.
>> Return value: 0 if the operation succeeds. Otherwise, -1
>>
>> o Reset the timeout period
>> int ioctl(int fd, int FIFREEZE_RESET_TIMEOUT, long *timeout_sec)
>> fd:file descriptor of mountpoint
>> FIFREEZE_RESET_TIMEOUT: request code for reset of timeout period
>> timeout_sec: new timeout period in seconds
>> Return value: 0 if the operation succeeds. Otherwise, -1
>> Error number: If the filesystem has already been unfrozen,
>> errno is set to EINVAL.
>
>I don't think the changelogs actually explained why this feature is
>being added?

I will fix the changelogs as following.

-----------------------------------------------------------
The timeout feature is added to "freeze ioctl" to solve a deadlock
when the freezer accesses a frozen filesystem. And new ioctl
to reset the timeout period is added to extend the timeout period.
For example, the freezer resets the timeout period to 10 seconds every 5
seconds. In this approach, even if the freezer causes a deadlock by
accessing the frozen filesystem, it will be solved by the timeout
in 10 seconds and the freezer will be able to recognize that
at the next reset of timeout period.
-----------------------------------------------------------

>
>Which userspace tools are expected to send these ioctls? Something in
>util-linux? dm-utils? Are patches to those packages planned?

I think the management software for the storage device
will use these ioctls to take a consistent backup.

>>
>> ...
>>
>> /*
>> + * ioctl_freeze_reset_timeout - Reset timeout for freeze.
>> + *
>> + * @filp: target file
>> + * @argp: timeout value(sec)
>> + *
>> + * Reset timeout for freeze.
>> + */
>> +static int
>> +ioctl_freeze_reset_timeout(struct file *filp, int __user *argp)
>> +{
>> + int timeout_sec;
>> + unsigned int timeout_msec;
>> + struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
>> + struct block_device *bdev = sb->s_bdev;
>> + int error;
>> +
>> + if (!capable(CAP_SYS_ADMIN))
>> + return -EPERM;
>> +
>> + /* If a regular file or a directory isn't specified, return EINVAL. */
>> + if (bdev == NULL)
>> + return -EINVAL;
>> +
>> + /* arg(sec) to tick value */
>> + error = get_user(timeout_sec, argp);
>> + if (error)
>> + return error;
>> +
>> + if (timeout_sec <= 0 || timeout_sec > UINT_MAX/1000)
>> + return -EINVAL;
>> +
>> + timeout_msec = timeout_sec * 1000;
>> +
>> + down(&bdev->bd_freeze_sem);
>> + if (!bdev->bd_freeze_count) {
>> + up(&bdev->bd_freeze_sem);
>> + return -EINVAL;
>> + }
>> + /* setup unfreeze timer */
>> + add_freeze_timeout(bdev, timeout_msec);
>> + up(&bdev->bd_freeze_sem);
>> +
>> + return 0;
>> +}
>
>This duplicates quite a bit of code from ioctl_freeze(). Can this be
>cleaned up?

I will clean it up.

>> +/*
>> * When you add any new common ioctls to the switches above and below
>> * please update compat_sys_ioctl() too.
>> *
>> @@ -235,13 +302,17 @@ int do_vfs_ioctl(struct file *filp, unsi
>> break;
>>
>> case FIFREEZE:
>> - error = ioctl_freeze(filp);
>> + error = ioctl_freeze(filp, argp);
>> break;
>>
>> case FITHAW:
>> error = ioctl_thaw(filp);
>> break;
>>
>> + case FIFREEZE_RESET_TIMEOUT:
>> + error = ioctl_freeze_reset_timeout(filp, argp);
>> + break;
>> +
>> default:
>> if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
>> error = file_ioctl(filp, cmd, arg);
>>
>> ...
>>
>> EXPORT_SYMBOL_GPL(kern_mount_data);
>> +
>> +/*
>> + * freeze_timeout - Thaw the filesystem.
>> + *
>> + * @work: work queue (delayed_work.work)
>> + *
>> + * Called by the delayed work when elapsing the timeout period.
>> + * Thaw the filesystem.
>> + */
>> +void freeze_timeout(struct work_struct *work)
>> +{
>> + struct block_device *bd = container_of(work,
>> + struct block_device, bd_freeze_timeout.work);
>> + struct super_block *sb = get_super(bd);
>> +
>> + thaw_bdev(bd, sb);
>> +
>> + if (sb)
>> + drop_super(sb);
>> +}
>> +EXPORT_SYMBOL_GPL(freeze_timeout);
>
>I can't see why this was exported.

It isn't needed, I will delete it.

>> +/*
>> + * add_freeze_timeout - Add timeout for freeze.
>> + *
>> + * @bdev: block device struct
>> + * @timeout_msec: timeout period
>> + *
>> + * Add the delayed work for freeze timeout to the delayed work queue.
>> + */
>> +void add_freeze_timeout(struct block_device *bdev, unsigned int timeout_msec)
>> +{
>> + s64 timeout_jiffies = msecs_to_jiffies(timeout_msec);
>> +
>> + /* Set delayed work queue */
>> + cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
>> + schedule_delayed_work(&bdev->bd_freeze_timeout, timeout_jiffies);
>> +}
>
>I don't particularly like the names of these new global symbols. The
>kernel already has a "freezer" thing, part of power-management.
>Introducing another one is a bit confusing.
>
>otoh, freezer seems to have consistently used "freezer", so the 'r'
>arguable saves us.
>
>Still, I'd have thought that "fsfreeze" would have been a clearer, more
>specific identifier for the whole project.

I will rename the names of these global symbols.
For example, "add_fsfreeze_timeout"...

>> +/*
>> + * del_freeze_timeout - Delete timeout for freeze.
>> + *
>> + * @bdev: block device struct
>> + *
>> + * Delete the delayed work for freeze timeout from the delayed work queue.
>> + */
>> +void del_freeze_timeout(struct block_device *bdev)
>> +{
>> + /*
>> + * It's possible that the delayed work task (freeze_timeout()) calls
>> + * del_freeze_timeout(). If the delayed work task calls
>> + * cancel_delayed_work_sync((), the deadlock will occur.
>> + * So we need this check (delayed_work_pending()).
>> + */
>> + if (delayed_work_pending(&bdev->bd_freeze_timeout))
>> + cancel_delayed_work_sync(&bdev->bd_freeze_timeout);
>> +}
>
>So if the calling task is keventd via run_workqueue() then
>delayed_work_pending() should return false due to run_workqueue()
>ordering, so we avoid the deadlock.
>
>Seems a bit racy if some other process starts the delayed-work while
>this function is running but I guess the new semaphore prevents that.
>
>Perhaps cancel_delayed_work_sync() shouldn't hang up if called from the
>work handler?

I think so.

In my current implementation,
the delayed work task calls thaw_bdev() to unfreeze a filesystem
and it calls del_freeze_timeout(). So, the deadlock occurs.
But I've found that the delayed work task doesn't need to call
del_freeze_timeout() because it is removed
from the delayed work queue automatically after the work finishes.
So I will fix thaw_bdev() so that it doesn't call
del_freeze_timeout() only when it's called by the delayed work task.
And I will delete delayed_work_pending() in del_freeze_timeout().

Cheers, Takashi