Miles Lane wrote:
>
> Hi,
>
> Is there any possibility we could:
>
> 1) Add support to the boot/mounting process
> so that, if a machine is being powered by
> battery, EXT3 partitions are mounted with
> EXT2, instead?
>
> 2) While the machine is running, notice when the
> power source switches between AC and battery
> or vice versa and remount partitions EXT3
> partitions to use EXT2 whenever a battery is
> being used?
>
umm, why?
If it's because of the disk-spins-up-too-much problem then
that can be addressed by allowing the commit interval to be
set to larger values.
--- 2.4.19-pre10/fs/jbd/journal.c~ext3-commit-interval Fri Jun 7 22:56:37 2002
+++ 2.4.19-pre10-akpm/fs/jbd/journal.c Sat Jun 8 00:30:32 2002
@@ -34,6 +34,7 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/sysctl.h>
#include <asm/uaccess.h>
#include <linux/proc_fs.h>
@@ -85,6 +86,8 @@ EXPORT_SYMBOL(journal_force_commit);
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
+int jbd_commit_interval = 5; /* /proc/sys/fs/jbd_commit_interval */
+
/*
* journal_datalist_lock is used to protect data buffers:
*
@@ -223,8 +226,8 @@ int kjournald(void *arg)
journal->j_task = current;
wake_up(&journal->j_wait_done_commit);
- printk(KERN_INFO "kjournald starting. Commit interval %ld seconds\n",
- journal->j_commit_interval / HZ);
+ printk(KERN_INFO "kjournald starting. Commit interval %d seconds\n",
+ jbd_commit_interval);
list_add(&journal->j_all_journals, &all_journals);
/* And now, wait forever for commit wakeup events. */
@@ -708,8 +711,6 @@ static journal_t * journal_init_common (
init_MUTEX(&journal->j_checkpoint_sem);
init_MUTEX(&journal->j_sem);
- journal->j_commit_interval = (HZ * 5);
-
/* The journal is marked for error until we succeed with recovery! */
journal->j_flags = JFS_ABORT;
@@ -1775,59 +1776,41 @@ int journal_enable_debug;
EXPORT_SYMBOL(journal_enable_debug);
#endif
-#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
-
-static struct proc_dir_entry *proc_jbd_debug;
-
-int read_jbd_debug(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int ret;
-
- ret = sprintf(page + off, "%d\n", journal_enable_debug);
- *eof = 1;
- return ret;
-}
+static ctl_table jbd_table[] = {
+ { 1, "jbd-commit-interval", &jbd_commit_interval,
+ sizeof(jbd_commit_interval), 0644, NULL,
+ &proc_dointvec, NULL, },
+#ifdef CONFIG_JBD_DEBUG
+ { 2, "jbd-debug", &journal_enable_debug,
+ sizeof(journal_enable_debug), 0644, NULL,
+ &proc_dointvec, NULL, },
+#endif
+ { 0, },
+};
-int write_jbd_debug(struct file *file, const char *buffer,
- unsigned long count, void *data)
-{
- char buf[32];
+static ctl_table jbd_root[] = {
+ { FS_JBD, "jbd", NULL, 0, 0755, jbd_table, },
+ { 0, },
+};
- if (count > ARRAY_SIZE(buf) - 1)
- count = ARRAY_SIZE(buf) - 1;
- if (copy_from_user(buf, buffer, count))
- return -EFAULT;
- buf[ARRAY_SIZE(buf) - 1] = '\0';
- journal_enable_debug = simple_strtoul(buf, NULL, 10);
- return count;
-}
+static ctl_table fs_root[] = {
+ { CTL_FS, "fs", NULL, 0, 0755, jbd_root, },
+ { 0, },
+};
-#define JBD_PROC_NAME "sys/fs/jbd-debug"
+static struct ctl_table_header *sysctl_header;
-static void __init create_jbd_proc_entry(void)
+static void __init create_jbd_sysctls(void)
{
- proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
- if (proc_jbd_debug) {
- /* Why is this so hard? */
- proc_jbd_debug->read_proc = read_jbd_debug;
- proc_jbd_debug->write_proc = write_jbd_debug;
- }
+ sysctl_header = register_sysctl_table(fs_root, 0);
}
-static void __exit remove_jbd_proc_entry(void)
+static void __exit remove_jbd_sysctls(void)
{
- if (proc_jbd_debug)
- remove_proc_entry(JBD_PROC_NAME, NULL);
+ if (sysctl_header)
+ unregister_sysctl_table(sysctl_header);
}
-#else
-
-#define create_jbd_proc_entry() do {} while (0)
-#define remove_jbd_proc_entry() do {} while (0)
-
-#endif
-
/*
* Module startup and shutdown
*/
@@ -1856,7 +1839,7 @@ static int __init journal_init(void)
ret = journal_init_caches();
if (ret != 0)
journal_destroy_caches();
- create_jbd_proc_entry();
+ create_jbd_sysctls();
return ret;
}
@@ -1867,7 +1850,7 @@ static void __exit journal_exit(void)
if (n)
printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
#endif
- remove_jbd_proc_entry();
+ remove_jbd_sysctls();
journal_destroy_caches();
}
--- 2.4.19-pre10/fs/jbd/transaction.c~ext3-commit-interval Fri Jun 7 22:56:37 2002
+++ 2.4.19-pre10-akpm/fs/jbd/transaction.c Fri Jun 7 22:56:37 2002
@@ -56,7 +56,7 @@ static transaction_t * get_transaction (
transaction->t_journal = journal;
transaction->t_state = T_RUNNING;
transaction->t_tid = journal->j_transaction_sequence++;
- transaction->t_expires = jiffies + journal->j_commit_interval;
+ transaction->t_expires = jiffies + jbd_commit_interval * HZ;
/* Set up the commit timer for the new transaction. */
J_ASSERT (!journal->j_commit_timer_active);
--- 2.4.19-pre10/include/linux/ext3_fs_sb.h~ext3-commit-interval Fri Jun 7 22:56:37 2002
+++ 2.4.19-pre10-akpm/include/linux/ext3_fs_sb.h Fri Jun 7 22:57:32 2002
@@ -67,7 +67,6 @@ struct ext3_sb_info {
struct inode * s_journal_inode;
struct journal_s * s_journal;
struct list_head s_orphan;
- unsigned long s_commit_interval;
struct block_device *journal_bdev;
#ifdef CONFIG_JBD_DEBUG
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
--- 2.4.19-pre10/include/linux/jbd.h~ext3-commit-interval Fri Jun 7 22:56:37 2002
+++ 2.4.19-pre10-akpm/include/linux/jbd.h Fri Jun 7 22:58:07 2002
@@ -522,10 +522,6 @@ struct journal_s
* compound commit transaction */
int j_max_transaction_buffers;
- /* What is the maximum transaction lifetime before we begin a
- * commit? */
- unsigned long j_commit_interval;
-
/* The timer used to wakeup the commit thread: */
struct timer_list * j_commit_timer;
int j_commit_timer_active;
@@ -864,6 +860,8 @@ static inline int buffer_jbd_data(struct
#endif /* CONFIG_JBD || CONFIG_JBD_MODULE || !__KERNEL__ */
+extern int jbd_commit_interval;
+
/*
* Compatibility no-ops which allow the kernel to compile without CONFIG_JBD
* go here.
--- 2.4.19-pre10/include/linux/sysctl.h~ext3-commit-interval Sat Jun 8 00:03:48 2002
+++ 2.4.19-pre10-akpm/include/linux/sysctl.h Sat Jun 8 00:04:22 2002
@@ -546,6 +546,7 @@ enum
FS_LEASES=13, /* int: leases enabled */
FS_DIR_NOTIFY=14, /* int: directory notification enabled */
FS_LEASE_TIME=15, /* int: maximum time to wait for a lease break */
+ FS_JBD, /* JBD subdir */
};
/* CTL_DEBUG names: */
--- 2.4.19-pre10/Documentation/sysctl/fs.txt~ext3-commit-interval Sat Jun 8 00:30:58 2002
+++ 2.4.19-pre10-akpm/Documentation/sysctl/fs.txt Sat Jun 8 00:39:50 2002
@@ -27,6 +27,7 @@ Currently, these files are in /proc/sys/
- overflowgid
- super-max
- super-nr
+- jbd/
Documentation for the files in /proc/sys/fs/binfmt_misc is
in Documentation/binfmt_misc.txt.
@@ -138,3 +139,40 @@ thus the maximum number of mounted files
can have. You only need to increase super-max if you need to
mount more filesystems than the current value in super-max
allows you to.
+
+==============================================================
+
+jbd/jbd-commit-interval:
+
+Defines, in seconds, the largest period of time for which the
+Journalled Block Device driver (JBD) will allow dirty data to remain in
+memory. JBD is used by the ext3 filesystem.
+
+The default value is five seconds. Increasing this value will provide
+"longer" transactions, and may be used to avoid repetitive spinup of
+disk drives.
+
+Note that `kupdate' activity will also cause a JBD commit, so it is
+necessary to also increase the bdflush `interval' parameter. This is
+the fifth field in /proc/sys/vm/bdflush.
+
+It should be noted that increasing the value of `jbd-commit-interval'
+will increase the potential for data loss in the event of a system
+crash. ext3 recovery will only restore the filesystem state to that
+which pertained at the time of the last commit. So setting this to
+five minutes means that you can lose up to five minute's worth of data.
+
+==============================================================
+
+jbd/jbd-debug:
+
+If the kernel was compiled for JBD debugging then this sysctl will
+cause status information to be generated by the JBD driver. The
+default value is zero (no debugging). Larger values cause ore
+information to be emitted into the system logs.
+
+Note that system logging messages can themselves generate disk activity
+which will trigger more JBD debug messages. So this option can cause a
+rapid growth in logfile usage if it is used while a kernel logging
+daemon is in operation.
+
-
On Tue, 2002-06-25 at 10:03, Andrew Morton wrote:
> Miles Lane wrote:
> >
> > Hi,
> >
> > Is there any possibility we could:
> >
> > 1) Add support to the boot/mounting process
> > so that, if a machine is being powered by
> > battery, EXT3 partitions are mounted with
> > EXT2, instead?
> >
> > 2) While the machine is running, notice when the
> > power source switches between AC and battery
> > or vice versa and remount partitions EXT3
> > partitions to use EXT2 whenever a battery is
> > being used?
> >
>
> umm, why?
>
> If it's because of the disk-spins-up-too-much problem then
> that can be addressed by allowing the commit interval to be
> set to larger values.
Thanks Andrew,
Yes, the concern is the syncing every few seconds.
Would it be possible and make sense to have this
setting get adjusted dynamically when a laptop goes
onto battery power?
Miles
Hi,
On Tue, Jun 25, 2002 at 10:03:47AM -0700, Andrew Morton
<[email protected]> wrote:
> If it's because of the disk-spins-up-too-much problem then
> that can be addressed by allowing the commit interval to be
> set to larger values.
> +int jbd_commit_interval = 5; /* /proc/sys/fs/jbd_commit_interval */
I suspect you want this to be per-mount, not system-wide (although
filesystems could easily just inherit the system default dynamically
if there's no per-fs override.) I could easily imagine a user wanting
a different interval for a scratch disk, for example.
Cheers,
Stephen
On Tue, Jun 25, 2002 at 10:03:47AM -0700, Andrew Morton wrote:
> If it's because of the disk-spins-up-too-much problem then
> that can be addressed by allowing the commit interval to be
> set to larger values.
The updated commit interval will only affect new transactions, correct?
In other words, when changing the commit interval from t_old to t_new,
it will take t_old seconds until we can be certain there are only
transactions with a t_new expiry interval in the queue? Or is there a
way to flush the current queue of transactions, eg. by fsync()ing the
underlying block device, or by sending a magic signal to kjournald? If
such manual interaction is possible, it'd also be handy to have the
opposite: a be-anal mode (eg. if commit interval==0) meaning 'do not
write any transaction to disk until explicitly told to'. This parallels
the way kupdated can be tuned for traditional write-back.
Regards,
Daniel.
Hi,
On Fri, Jun 28, 2002 at 11:59:42PM +0200, Daniel Kobras wrote:
> On Tue, Jun 25, 2002 at 10:03:47AM -0700, Andrew Morton wrote:
> > If it's because of the disk-spins-up-too-much problem then
> > that can be addressed by allowing the commit interval to be
> > set to larger values.
>
> The updated commit interval will only affect new transactions, correct?
> In other words, when changing the commit interval from t_old to t_new,
> it will take t_old seconds until we can be certain there are only
> transactions with a t_new expiry interval in the queue?
Yes, unless:
> Or is there a
> way to flush the current queue of transactions, eg. by fsync()ing the
> underlying block device, or by sending a magic signal to kjournald?
an fsync() on any file or directory on the filesystem will ensure that
all old transactions have completed, and a sync() will ensure that any
old transactions are at least on their way to disk.
Cheers,
Stephen
Hi!
> > > If it's because of the disk-spins-up-too-much problem then
> > > that can be addressed by allowing the commit interval to be
> > > set to larger values.
> >
> > The updated commit interval will only affect new transactions, correct?
> > In other words, when changing the commit interval from t_old to t_new,
> > it will take t_old seconds until we can be certain there are only
> > transactions with a t_new expiry interval in the queue?
>
> Yes, unless:
> > Or is there a
> > way to flush the current queue of transactions, eg. by fsync()ing the
> > underlying block device, or by sending a magic signal to kjournald?
>
> an fsync() on any file or directory on the filesystem will ensure that
> all old transactions have completed, and a sync() will ensure that any
> old transactions are at least on their way to disk.
Ugh, does that mean that if I
"sync ; poweroff"
my data are not safe?
Pavel
--
(about SSSCA) "I don't say this lightly. However, I really think that the U.S.
no longer is classifiable as a democracy, but rather as a plutocracy." --hpa
Hi,
On Wed, Jul 03, 2002 at 05:04:48AM +0200, Pavel Machek <[email protected]> wrote:
> > an fsync() on any file or directory on the filesystem will ensure that
> > all old transactions have completed, and a sync() will ensure that any
> > old transactions are at least on their way to disk.
>
> Ugh, does that mean that if I
>
> "sync ; poweroff"
>
> my data are not safe?
Right --- sync only guarantees that the writes have started; you're
not safe until the disk light is off.
The VFS kernel core syncs each filesystem sequentially during sync and
bdflush. If we do each one synchronously, we end up serialising IO
and performance with multiple disks goes _way_ down. However, you can
choose synchronous completion of ext3_write_super() by giving modular
ext3 the module option "do_sync_supers=1".
--Stephen
On Tue, Jul 02, 2002 at 01:13:14PM +0100, Stephen C. Tweedie wrote:
> On Fri, Jun 28, 2002 at 11:59:42PM +0200, Daniel Kobras wrote:
> > Or is there a
> > way to flush the current queue of transactions, eg. by fsync()ing the
> > underlying block device, or by sending a magic signal to kjournald?
>
> an fsync() on any file or directory on the filesystem will ensure that
> all old transactions have completed, and a sync() will ensure that any
> old transactions are at least on their way to disk.
With emphasis on 'on the filesystem', I suppose? In other words, if we
have an ext3 fs on /dev/hda1 mounted on /mnt, it is not sufficient to
fsync("/dev/hda1") to flush the transactions, but fsync("/mnt") will do?
(Excuse the sloppy notation.)
Regards,
Daniel.
Hi,
On Fri, Jul 05, 2002 at 12:05:11AM +0200, Daniel Kobras
<[email protected]> wrote:
> On Tue, Jul 02, 2002 at 01:13:14PM +0100, Stephen C. Tweedie wrote:
> > an fsync() on any file or directory on the filesystem will ensure that
> > all old transactions have completed, and a sync() will ensure that any
> > old transactions are at least on their way to disk.
>
> With emphasis on 'on the filesystem', I suppose? In other words, if we
> have an ext3 fs on /dev/hda1 mounted on /mnt, it is not sufficient to
> fsync("/dev/hda1") to flush the transactions, but fsync("/mnt") will do?
> (Excuse the sloppy notation.)
Right.
--Stephen
Hi!
> > > an fsync() on any file or directory on the filesystem will ensure that
> > > all old transactions have completed, and a sync() will ensure that any
> > > old transactions are at least on their way to disk.
> >
> > With emphasis on 'on the filesystem', I suppose? In other words, if we
> > have an ext3 fs on /dev/hda1 mounted on /mnt, it is not sufficient to
> > fsync("/dev/hda1") to flush the transactions, but fsync("/mnt") will do?
> > (Excuse the sloppy notation.)
>
> Right.
So... If I do fsync("/"), will it flush everything? Probably not.
Is there some easy way to sync everything to disk and wait for
completion? [On suspend-to-something I'd llike to do that for
additional safety.
Pavel
--
Worst form of spam? Adding advertisment signatures ala sourceforge.net.
What goes next? Inserting advertisment *into* email?
Hi,
On Sat, Jul 06, 2002 at 02:58:35AM +0200, Pavel Machek wrote:
> So... If I do fsync("/"), will it flush everything? Probably not.
Right, it will only do the root fs.
> Is there some easy way to sync everything to disk and wait for
> completion? [On suspend-to-something I'd llike to do that for
> additional safety.
No, the VFS write_super() method currently has no wait-for-completion
mechanism.
Cheers,
Stephen