2005-09-09 08:45:56

by Akinobu Mita

[permalink] [raw]
Subject: [PATCH 0/6] jbd cleanup

The following 6 patches cleanup the jbd code and kill about 200 lines.
First of 4 patches can apply to 2.6.13-git8 and 2.6.13-mm2.
The rest of them can apply to 2.6.13-mm2.

fs/jbd/checkpoint.c | 179 +++++++++++--------------------------------
fs/jbd/commit.c | 101 ++++++++++--------------
fs/jbd/journal.c | 11 +-
fs/jbd/revoke.c | 158 ++++++++++++++-----------------------
fs/jbd/transaction.c | 113 +++++----------------------
include/linux/jbd.h | 28 +++---
include/linux/journal-head.h | 4
7 files changed, 201 insertions(+), 393 deletions(-)


2005-09-09 08:47:12

by Akinobu Mita

[permalink] [raw]
Subject: [PATCH 1/6] jbd: remove duplicated debug print

remove duplicated debug print

Signed-off-by: Akinobu Mita <[email protected]>

---

commit.c | 2 --
1 files changed, 2 deletions(-)

--- 2.6-mm/fs/jbd/commit.c.orig 2005-09-02 00:53:49.000000000 +0900
+++ 2.6-mm/fs/jbd/commit.c 2005-09-02 00:54:11.000000000 +0900
@@ -425,8 +425,6 @@ write_out_data:

journal_write_revoke_records(journal, commit_transaction);

- jbd_debug(3, "JBD: commit phase 2\n");
-
/*
* If we found any dirty or locked buffers, then we should have
* looped back up to the write_out_data label. If there weren't

2005-09-09 08:48:13

by Akinobu Mita

[permalink] [raw]
Subject: [PATCH 2/6] jbd: use hlist for the revoke tables

use struct hlist_head and hlist_node for the revoke tables.

Signed-off-by: Akinobu Mita <[email protected]>

---

revoke.c | 56 ++++++++++++++++++++++++++------------------------------
1 files changed, 26 insertions(+), 30 deletions(-)

diff -Nurp 2.6.13-mm1.old/fs/jbd/revoke.c 2.6.13-mm1/fs/jbd/revoke.c
--- 2.6.13-mm1.old/fs/jbd/revoke.c 2005-09-04 21:46:35.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/revoke.c 2005-09-04 21:50:25.000000000 +0900
@@ -79,7 +79,7 @@ static kmem_cache_t *revoke_table_cache;

struct jbd_revoke_record_s
{
- struct list_head hash;
+ struct hlist_node hash;
tid_t sequence; /* Used for recovery only */
unsigned long blocknr;
};
@@ -92,7 +92,7 @@ struct jbd_revoke_table_s
* for recovery. Must be a power of two. */
int hash_size;
int hash_shift;
- struct list_head *hash_table;
+ struct hlist_head *hash_table;
};


@@ -119,7 +119,6 @@ static inline int hash(journal_t *journa
static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
tid_t seq)
{
- struct list_head *hash_list;
struct jbd_revoke_record_s *record;

repeat:
@@ -129,9 +128,9 @@ repeat:

record->sequence = seq;
record->blocknr = blocknr;
- hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
spin_lock(&journal->j_revoke_lock);
- list_add(&record->hash, hash_list);
+ hlist_add_head(&record->hash,
+ &journal->j_revoke->hash_table[hash(journal, blocknr)]);
spin_unlock(&journal->j_revoke_lock);
return 0;

@@ -148,19 +147,16 @@ oom:
static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
unsigned long blocknr)
{
- struct list_head *hash_list;
+ struct hlist_node *node;
struct jbd_revoke_record_s *record;

- hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
-
spin_lock(&journal->j_revoke_lock);
- record = (struct jbd_revoke_record_s *) hash_list->next;
- while (&(record->hash) != hash_list) {
+ hlist_for_each_entry(record, node,
+ &journal->j_revoke->hash_table[hash(journal, blocknr)], hash) {
if (record->blocknr == blocknr) {
spin_unlock(&journal->j_revoke_lock);
return record;
}
- record = (struct jbd_revoke_record_s *) record->hash.next;
}
spin_unlock(&journal->j_revoke_lock);
return NULL;
@@ -219,7 +215,7 @@ int journal_init_revoke(journal_t *journ
journal->j_revoke->hash_shift = shift;

journal->j_revoke->hash_table =
- kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
+ kmalloc(hash_size * sizeof(struct hlist_head), GFP_KERNEL);
if (!journal->j_revoke->hash_table) {
kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
journal->j_revoke = NULL;
@@ -227,7 +223,7 @@ int journal_init_revoke(journal_t *journ
}

for (tmp = 0; tmp < hash_size; tmp++)
- INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+ INIT_HLIST_HEAD(&journal->j_revoke->hash_table[tmp]);

journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
if (!journal->j_revoke_table[1]) {
@@ -246,7 +242,7 @@ int journal_init_revoke(journal_t *journ
journal->j_revoke->hash_shift = shift;

journal->j_revoke->hash_table =
- kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
+ kmalloc(hash_size * sizeof(struct hlist_head), GFP_KERNEL);
if (!journal->j_revoke->hash_table) {
kfree(journal->j_revoke_table[0]->hash_table);
kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
@@ -256,7 +252,7 @@ int journal_init_revoke(journal_t *journ
}

for (tmp = 0; tmp < hash_size; tmp++)
- INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+ INIT_HLIST_HEAD(&journal->j_revoke->hash_table[tmp]);

spin_lock_init(&journal->j_revoke_lock);

@@ -268,7 +264,7 @@ int journal_init_revoke(journal_t *journ
void journal_destroy_revoke(journal_t *journal)
{
struct jbd_revoke_table_s *table;
- struct list_head *hash_list;
+ struct hlist_head *hash_list;
int i;

table = journal->j_revoke_table[0];
@@ -277,7 +273,7 @@ void journal_destroy_revoke(journal_t *j

for (i=0; i<table->hash_size; i++) {
hash_list = &table->hash_table[i];
- J_ASSERT (list_empty(hash_list));
+ J_ASSERT (hlist_empty(hash_list));
}

kfree(table->hash_table);
@@ -290,7 +286,7 @@ void journal_destroy_revoke(journal_t *j

for (i=0; i<table->hash_size; i++) {
hash_list = &table->hash_table[i];
- J_ASSERT (list_empty(hash_list));
+ J_ASSERT (hlist_empty(hash_list));
}

kfree(table->hash_table);
@@ -445,7 +441,7 @@ int journal_cancel_revoke(handle_t *hand
jbd_debug(4, "cancelled existing revoke on "
"blocknr %llu\n", (unsigned long long)bh->b_blocknr);
spin_lock(&journal->j_revoke_lock);
- list_del(&record->hash);
+ hlist_del(&record->hash);
spin_unlock(&journal->j_revoke_lock);
kmem_cache_free(revoke_record_cache, record);
did_revoke = 1;
@@ -488,7 +484,7 @@ void journal_switch_revoke_table(journal
journal->j_revoke = journal->j_revoke_table[0];

for (i = 0; i < journal->j_revoke->hash_size; i++)
- INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
+ INIT_HLIST_HEAD(&journal->j_revoke->hash_table[i]);
}

/*
@@ -504,7 +500,6 @@ void journal_write_revoke_records(journa
struct journal_head *descriptor;
struct jbd_revoke_record_s *record;
struct jbd_revoke_table_s *revoke;
- struct list_head *hash_list;
int i, offset, count;

descriptor = NULL;
@@ -516,16 +511,16 @@ void journal_write_revoke_records(journa
journal->j_revoke_table[1] : journal->j_revoke_table[0];

for (i = 0; i < revoke->hash_size; i++) {
- hash_list = &revoke->hash_table[i];
+ struct hlist_head *hash_list = &revoke->hash_table[i];

- while (!list_empty(hash_list)) {
- record = (struct jbd_revoke_record_s *)
- hash_list->next;
+ while (!hlist_empty(hash_list)) {
+ record = hlist_entry(hash_list->first,
+ struct jbd_revoke_record_s, hash);
write_one_revoke_record(journal, transaction,
&descriptor, &offset,
record);
count++;
- list_del(&record->hash);
+ hlist_del(&record->hash);
kmem_cache_free(revoke_record_cache, record);
}
}
@@ -686,7 +681,7 @@ int journal_test_revoke(journal_t *journ
void journal_clear_revoke(journal_t *journal)
{
int i;
- struct list_head *hash_list;
+ struct hlist_head *hash_list;
struct jbd_revoke_record_s *record;
struct jbd_revoke_table_s *revoke;

@@ -694,9 +689,10 @@ void journal_clear_revoke(journal_t *jou

for (i = 0; i < revoke->hash_size; i++) {
hash_list = &revoke->hash_table[i];
- while (!list_empty(hash_list)) {
- record = (struct jbd_revoke_record_s*) hash_list->next;
- list_del(&record->hash);
+ while (!hlist_empty(hash_list)) {
+ record = hlist_entry(hash_list->first,
+ struct jbd_revoke_record_s, hash);
+ hlist_del(&record->hash);
kmem_cache_free(revoke_record_cache, record);
}
}

2005-09-09 08:49:30

by Akinobu Mita

[permalink] [raw]
Subject: [PATCH 3/6] jbd: cleanup for initializing/destroying the revoke tables

use loop counter for initializing/destroying a pair of the revoke tables.

Signed-off-by: Akinobu Mita <[email protected]>

---

revoke.c | 116 ++++++++++++++++++++++-----------------------------------------
1 files changed, 42 insertions(+), 74 deletions(-)

diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/fs/jbd/revoke.c 2.6.13-mm1/fs/jbd/revoke.c
--- 2.6.13-mm1.old/fs/jbd/revoke.c 2005-09-05 03:21:00.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/revoke.c 2005-09-05 11:16:04.000000000 +0900
@@ -193,108 +193,76 @@ void journal_destroy_revoke_caches(void)

int journal_init_revoke(journal_t *journal, int hash_size)
{
- int shift, tmp;
+ int shift = 0;
+ int tmp = hash_size;
+ int i;

+ /* Check that the hash_size is a power of two */
+ J_ASSERT ((hash_size & (hash_size-1)) == 0);
J_ASSERT (journal->j_revoke_table[0] == NULL);

- shift = 0;
- tmp = hash_size;
- while((tmp >>= 1UL) != 0UL)
+ while ((tmp >>= 1UL) != 0UL)
shift++;

- journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
- if (!journal->j_revoke_table[0])
- return -ENOMEM;
- journal->j_revoke = journal->j_revoke_table[0];
-
- /* Check that the hash_size is a power of two */
- J_ASSERT ((hash_size & (hash_size-1)) == 0);
+ for (i = 0; i < 2; i++) {
+ struct jbd_revoke_table_s *table;

- journal->j_revoke->hash_size = hash_size;
+ table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+ if (!table)
+ goto nomem;
+
+ table->hash_size = hash_size;
+ table->hash_shift = shift;
+ table->hash_table = kmalloc(hash_size * sizeof(struct hlist_head), GFP_KERNEL);
+ if (!table->hash_table) {
+ kmem_cache_free(revoke_table_cache, table);
+ goto nomem;
+ }

- journal->j_revoke->hash_shift = shift;
+ for (tmp = 0; tmp < hash_size; tmp++)
+ INIT_HLIST_HEAD(&table->hash_table[tmp]);

- journal->j_revoke->hash_table =
- kmalloc(hash_size * sizeof(struct hlist_head), GFP_KERNEL);
- if (!journal->j_revoke->hash_table) {
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- journal->j_revoke = NULL;
- return -ENOMEM;
- }
-
- for (tmp = 0; tmp < hash_size; tmp++)
- INIT_HLIST_HEAD(&journal->j_revoke->hash_table[tmp]);
-
- journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
- if (!journal->j_revoke_table[1]) {
- kfree(journal->j_revoke_table[0]->hash_table);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- return -ENOMEM;
+ journal->j_revoke_table[i] = table;
}
-
journal->j_revoke = journal->j_revoke_table[1];
+ spin_lock_init(&journal->j_revoke_lock);

- /* Check that the hash_size is a power of two */
- J_ASSERT ((hash_size & (hash_size-1)) == 0);
-
- journal->j_revoke->hash_size = hash_size;
-
- journal->j_revoke->hash_shift = shift;
+ return 0;

- journal->j_revoke->hash_table =
- kmalloc(hash_size * sizeof(struct hlist_head), GFP_KERNEL);
- if (!journal->j_revoke->hash_table) {
- kfree(journal->j_revoke_table[0]->hash_table);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
- journal->j_revoke = NULL;
- return -ENOMEM;
+nomem:
+ while (i--) {
+ kfree(journal->j_revoke_table[i]->hash_table);
+ kmem_cache_free(revoke_table_cache, journal->j_revoke_table[i]);
}

- for (tmp = 0; tmp < hash_size; tmp++)
- INIT_HLIST_HEAD(&journal->j_revoke->hash_table[tmp]);
-
- spin_lock_init(&journal->j_revoke_lock);
-
- return 0;
+ return -ENOMEM;
}

/* Destoy a journal's revoke table. The table must already be empty! */

void journal_destroy_revoke(journal_t *journal)
{
- struct jbd_revoke_table_s *table;
- struct hlist_head *hash_list;
- int i;
+ int j;

- table = journal->j_revoke_table[0];
- if (!table)
- return;
+ journal->j_revoke = NULL;

- for (i=0; i<table->hash_size; i++) {
- hash_list = &table->hash_table[i];
- J_ASSERT (hlist_empty(hash_list));
- }
+ for (j = 0; j < 2; j++) {
+ int i;
+ struct jbd_revoke_table_s *table = journal->j_revoke_table[j];

- kfree(table->hash_table);
- kmem_cache_free(revoke_table_cache, table);
- journal->j_revoke = NULL;
+ if (!table)
+ return;

- table = journal->j_revoke_table[1];
- if (!table)
- return;
+ for (i = 0; i < table->hash_size; i++) {
+ struct hlist_head *hash_list = &table->hash_table[i];
+ J_ASSERT (hlist_empty(hash_list));
+ }

- for (i=0; i<table->hash_size; i++) {
- hash_list = &table->hash_table[i];
- J_ASSERT (hlist_empty(hash_list));
+ kfree(table->hash_table);
+ kmem_cache_free(revoke_table_cache, table);
}
-
- kfree(table->hash_table);
- kmem_cache_free(revoke_table_cache, table);
- journal->j_revoke = NULL;
}

-
#ifdef __KERNEL__

/*

2005-09-09 08:50:53

by Akinobu Mita

[permalink] [raw]
Subject: [PATCH 4/6] jbd: use list_head for the list of buffers on a transaction's data

use struct list_head for doubly-linked list of buffers on a transaction's
data, metadata or forget queue.

Signed-off-by: Akinobu Mita <[email protected]>

---

fs/jbd/checkpoint.c | 12 ++--
fs/jbd/commit.c | 79 ++++++++++++++++--------------
fs/jbd/journal.c | 1
fs/jbd/transaction.c | 110 ++++++++-----------------------------------
include/linux/jbd.h | 20 +++----
include/linux/journal-head.h | 2
6 files changed, 80 insertions(+), 144 deletions(-)

diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/fs/jbd/checkpoint.c 2.6.13-mm1/fs/jbd/checkpoint.c
--- 2.6.13-mm1.old/fs/jbd/checkpoint.c 2005-09-05 03:15:17.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/checkpoint.c 2005-09-05 03:15:35.000000000 +0900
@@ -684,12 +684,12 @@ void __journal_drop_transaction(journal_
}

J_ASSERT(transaction->t_state == T_FINISHED);
- J_ASSERT(transaction->t_buffers == NULL);
- J_ASSERT(transaction->t_sync_datalist == NULL);
- J_ASSERT(transaction->t_forget == NULL);
- J_ASSERT(transaction->t_iobuf_list == NULL);
- J_ASSERT(transaction->t_shadow_list == NULL);
- J_ASSERT(transaction->t_log_list == NULL);
+ J_ASSERT(list_empty(&transaction->t_metadata_list));
+ J_ASSERT(list_empty(&transaction->t_syncdata_list));
+ J_ASSERT(list_empty(&transaction->t_forget_list));
+ J_ASSERT(list_empty(&transaction->t_io_list));
+ J_ASSERT(list_empty(&transaction->t_shadow_list));
+ J_ASSERT(list_empty(&transaction->t_logctl_list));
J_ASSERT(transaction->t_checkpoint_list == NULL);
J_ASSERT(transaction->t_checkpoint_io_list == NULL);
J_ASSERT(transaction->t_updates == 0);
diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/fs/jbd/commit.c 2.6.13-mm1/fs/jbd/commit.c
--- 2.6.13-mm1.old/fs/jbd/commit.c 2005-09-05 03:16:12.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/commit.c 2005-09-05 03:15:35.000000000 +0900
@@ -250,8 +250,9 @@ void journal_commit_transaction(journal_
* that multiple journal_get_write_access() calls to the same
* buffer are perfectly permissable.
*/
- while (commit_transaction->t_reserved_list) {
- jh = commit_transaction->t_reserved_list;
+ while (!list_empty(&commit_transaction->t_reserved_list)) {
+ jh = list_entry(commit_transaction->t_reserved_list.next,
+ struct journal_head, b_list);
JBUFFER_TRACE(jh, "reserved, unused: refile");
/*
* A journal_get_undo_access()+journal_release_buffer() may
@@ -300,14 +301,9 @@ void journal_commit_transaction(journal_
* will be tracked for a new trasaction only -bzzz
*/
spin_lock(&journal->j_list_lock);
- if (commit_transaction->t_buffers) {
- new_jh = jh = commit_transaction->t_buffers->b_tnext;
- do {
- J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
- new_jh->b_modified == 0);
- new_jh->b_modified = 0;
- new_jh = new_jh->b_tnext;
- } while (new_jh != jh);
+ list_for_each_entry(jh, &commit_transaction->t_metadata_list, b_list) {
+ J_ASSERT_JH(jh, jh->b_modified == 1 || jh->b_modified == 0);
+ jh->b_modified = 0;
}
spin_unlock(&journal->j_list_lock);

@@ -319,7 +315,7 @@ void journal_commit_transaction(journal_
err = 0;
/*
* Whenever we unlock the journal and sleep, things can get added
- * onto ->t_sync_datalist, so we have to keep looping back to
+ * onto ->t_syncdata_list, so we have to keep looping back to
* write_out_data until we *know* that the list is empty.
*/
bufs = 0;
@@ -331,11 +327,12 @@ write_out_data:
cond_resched();
spin_lock(&journal->j_list_lock);

- while (commit_transaction->t_sync_datalist) {
+ while (!list_empty(&commit_transaction->t_syncdata_list)) {
struct buffer_head *bh;

- jh = commit_transaction->t_sync_datalist;
- commit_transaction->t_sync_datalist = jh->b_tnext;
+ jh = list_entry(commit_transaction->t_syncdata_list.next,
+ struct journal_head, b_list);
+ list_move_tail(&jh->b_list, &commit_transaction->t_syncdata_list);
bh = jh2bh(jh);
if (buffer_locked(bh)) {
BUFFER_TRACE(bh, "locked");
@@ -389,10 +386,11 @@ write_out_data:
/*
* Wait for all previously submitted IO to complete.
*/
- while (commit_transaction->t_locked_list) {
+ while (!list_empty(&commit_transaction->t_locked_list)) {
struct buffer_head *bh;

- jh = commit_transaction->t_locked_list->b_tprev;
+ jh = list_entry(commit_transaction->t_locked_list.prev,
+ struct journal_head, b_list);
bh = jh2bh(jh);
get_bh(bh);
if (buffer_locked(bh)) {
@@ -431,7 +429,7 @@ write_out_data:
* any then journal_clean_data_list should have wiped the list
* clean by now, so check that it is in fact empty.
*/
- J_ASSERT (commit_transaction->t_sync_datalist == NULL);
+ J_ASSERT (list_empty(&commit_transaction->t_syncdata_list));

jbd_debug (3, "JBD: commit phase 3\n");

@@ -444,11 +442,12 @@ write_out_data:

descriptor = NULL;
bufs = 0;
- while (commit_transaction->t_buffers) {
+ while (!list_empty(&commit_transaction->t_metadata_list)) {

/* Find the next buffer to be journaled... */

- jh = commit_transaction->t_buffers;
+ jh = list_entry(commit_transaction->t_metadata_list.next,
+ struct journal_head, b_list);

/* If we're in abort mode, we just un-journal the buffer and
release it for background writing. */
@@ -460,7 +459,7 @@ write_out_data:
* any descriptor buffers which may have been
* already allocated, even if we are now
* aborting. */
- if (!commit_transaction->t_buffers)
+ if (list_empty(&commit_transaction->t_metadata_list))
goto start_journal_io;
continue;
}
@@ -569,7 +568,7 @@ write_out_data:
let the IO rip! */

if (bufs == journal->j_wbufsize ||
- commit_transaction->t_buffers == NULL ||
+ list_empty(&commit_transaction->t_metadata_list) ||
space_left < sizeof(journal_block_tag_t) + 16) {

jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
@@ -601,8 +600,8 @@ start_journal_io:
/* Lo and behold: we have just managed to send a transaction to
the log. Before we can commit it, wait for the IO so far to
complete. Control buffers being written are on the
- transaction's t_log_list queue, and metadata buffers are on
- the t_iobuf_list queue.
+ transaction's t_logctl_list queue, and metadata buffers are on
+ the t_io_list queue.

Wait for the buffers in reverse order. That way we are
less likely to be woken up until all IOs have completed, and
@@ -616,10 +615,11 @@ start_journal_io:
* See __journal_try_to_free_buffer.
*/
wait_for_iobuf:
- while (commit_transaction->t_iobuf_list != NULL) {
+ while (!list_empty(&commit_transaction->t_io_list)) {
struct buffer_head *bh;

- jh = commit_transaction->t_iobuf_list->b_tprev;
+ jh = list_entry(commit_transaction->t_io_list.prev,
+ struct journal_head, b_list);
bh = jh2bh(jh);
if (buffer_locked(bh)) {
wait_on_buffer(bh);
@@ -637,7 +637,7 @@ wait_for_iobuf:
journal_unfile_buffer(journal, jh);

/*
- * ->t_iobuf_list should contain only dummy buffer_heads
+ * ->t_io_list should contain only dummy buffer_heads
* which were created by journal_write_metadata_buffer().
*/
BUFFER_TRACE(bh, "dumping temporary bh");
@@ -648,7 +648,8 @@ wait_for_iobuf:

/* We also have to unlock and free the corresponding
shadowed buffer */
- jh = commit_transaction->t_shadow_list->b_tprev;
+ jh = list_entry(commit_transaction->t_shadow_list.prev,
+ struct journal_head, b_list);
bh = jh2bh(jh);
clear_bit(BH_JWrite, &bh->b_state);
J_ASSERT_BH(bh, buffer_jbddirty(bh));
@@ -666,16 +667,17 @@ wait_for_iobuf:
__brelse(bh);
}

- J_ASSERT (commit_transaction->t_shadow_list == NULL);
+ J_ASSERT (list_empty(&commit_transaction->t_shadow_list));

jbd_debug(3, "JBD: commit phase 5\n");

/* Here we wait for the revoke record and descriptor record buffers */
wait_for_ctlbuf:
- while (commit_transaction->t_log_list != NULL) {
+ while (!list_empty(&commit_transaction->t_logctl_list)) {
struct buffer_head *bh;

- jh = commit_transaction->t_log_list->b_tprev;
+ jh = list_entry(commit_transaction->t_logctl_list.prev,
+ struct journal_head, b_list);
bh = jh2bh(jh);
if (buffer_locked(bh)) {
wait_on_buffer(bh);
@@ -710,12 +712,12 @@ wait_for_iobuf:

jbd_debug(3, "JBD: commit phase 7\n");

- J_ASSERT(commit_transaction->t_sync_datalist == NULL);
- J_ASSERT(commit_transaction->t_buffers == NULL);
+ J_ASSERT(list_empty(&commit_transaction->t_syncdata_list));
+ J_ASSERT(list_empty(&commit_transaction->t_metadata_list));
J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
- J_ASSERT(commit_transaction->t_iobuf_list == NULL);
- J_ASSERT(commit_transaction->t_shadow_list == NULL);
- J_ASSERT(commit_transaction->t_log_list == NULL);
+ J_ASSERT(list_empty(&commit_transaction->t_io_list));
+ J_ASSERT(list_empty(&commit_transaction->t_shadow_list));
+ J_ASSERT(list_empty(&commit_transaction->t_logctl_list));

restart_loop:
/*
@@ -723,11 +725,12 @@ restart_loop:
* to this list we have to be careful and hold the j_list_lock.
*/
spin_lock(&journal->j_list_lock);
- while (commit_transaction->t_forget) {
+ while (!list_empty(&commit_transaction->t_forget_list)) {
transaction_t *cp_transaction;
struct buffer_head *bh;

- jh = commit_transaction->t_forget;
+ jh = list_entry(commit_transaction->t_forget_list.next,
+ struct journal_head, b_list);
spin_unlock(&journal->j_list_lock);
bh = jh2bh(jh);
jbd_lock_bh_state(bh);
@@ -811,7 +814,7 @@ restart_loop:
* Now recheck if some buffers did not get attached to the transaction
* while the lock was dropped...
*/
- if (commit_transaction->t_forget) {
+ if (!list_empty(&commit_transaction->t_forget_list)) {
spin_unlock(&journal->j_list_lock);
spin_unlock(&journal->j_state_lock);
goto restart_loop;
diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/fs/jbd/journal.c 2.6.13-mm1/fs/jbd/journal.c
--- 2.6.13-mm1.old/fs/jbd/journal.c 2005-09-05 03:15:17.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/journal.c 2005-09-05 03:15:39.000000000 +0900
@@ -1761,6 +1761,7 @@ repeat:
set_buffer_jbd(bh);
bh->b_private = jh;
jh->b_bh = bh;
+ INIT_LIST_HEAD(&jh->b_list);
get_bh(bh);
BUFFER_TRACE(bh, "added journal_head");
}
diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/fs/jbd/transaction.c 2.6.13-mm1/fs/jbd/transaction.c
--- 2.6.13-mm1.old/fs/jbd/transaction.c 2005-09-05 03:15:17.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/transaction.c 2005-09-05 03:15:35.000000000 +0900
@@ -51,6 +51,14 @@ get_transaction(journal_t *journal, tran
transaction->t_tid = journal->j_transaction_sequence++;
transaction->t_expires = jiffies + journal->j_commit_interval;
spin_lock_init(&transaction->t_handle_lock);
+ INIT_LIST_HEAD(&transaction->t_reserved_list);
+ INIT_LIST_HEAD(&transaction->t_locked_list);
+ INIT_LIST_HEAD(&transaction->t_metadata_list);
+ INIT_LIST_HEAD(&transaction->t_syncdata_list);
+ INIT_LIST_HEAD(&transaction->t_forget_list);
+ INIT_LIST_HEAD(&transaction->t_io_list);
+ INIT_LIST_HEAD(&transaction->t_shadow_list);
+ INIT_LIST_HEAD(&transaction->t_logctl_list);

/* Set up the commit timer for the new transaction. */
journal->j_commit_timer->expires = transaction->t_expires;
@@ -1414,64 +1422,12 @@ int journal_force_commit(journal_t *jour
return ret;
}

-/*
- *
- * List management code snippets: various functions for manipulating the
- * transaction buffer lists.
- *
- */
-
-/*
- * Append a buffer to a transaction list, given the transaction's list head
- * pointer.
- *
- * j_list_lock is held.
- *
- * jbd_lock_bh_state(jh2bh(jh)) is held.
- */
-
-static inline void
-__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
-{
- if (!*list) {
- jh->b_tnext = jh->b_tprev = jh;
- *list = jh;
- } else {
- /* Insert at the tail of the list to preserve order */
- struct journal_head *first = *list, *last = first->b_tprev;
- jh->b_tprev = last;
- jh->b_tnext = first;
- last->b_tnext = first->b_tprev = jh;
- }
-}
-
-/*
- * Remove a buffer from a transaction list, given the transaction's list
- * head pointer.
- *
- * Called with j_list_lock held, and the journal may not be locked.
- *
- * jbd_lock_bh_state(jh2bh(jh)) is held.
- */
-
-static inline void
-__blist_del_buffer(struct journal_head **list, struct journal_head *jh)
-{
- if (*list == jh) {
- *list = jh->b_tnext;
- if (*list == jh)
- *list = NULL;
- }
- jh->b_tprev->b_tnext = jh->b_tnext;
- jh->b_tnext->b_tprev = jh->b_tprev;
-}
-
/*
* Remove a buffer from the appropriate transaction list.
*
* Note that this function can *change* the value of
- * bh->b_transaction->t_sync_datalist, t_buffers, t_forget,
- * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller
+ * bh->b_transaction->t_syncdata_list, t_metadata_list, t_forget_list,
+ * t_io_list, t_shadow_list, t_logctl_list or t_reserved_list. If the caller
* is holding onto a copy of one of thee pointers, it could go bad.
* Generally the caller needs to re-read the pointer from the transaction_t.
*
@@ -1479,7 +1435,6 @@ __blist_del_buffer(struct journal_head *
*/
void __journal_temp_unlink_buffer(struct journal_head *jh)
{
- struct journal_head **list = NULL;
transaction_t *transaction;
struct buffer_head *bh = jh2bh(jh);

@@ -1495,35 +1450,12 @@ void __journal_temp_unlink_buffer(struct
switch (jh->b_jlist) {
case BJ_None:
return;
- case BJ_SyncData:
- list = &transaction->t_sync_datalist;
- break;
case BJ_Metadata:
- transaction->t_nr_buffers--;
- J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
- list = &transaction->t_buffers;
- break;
- case BJ_Forget:
- list = &transaction->t_forget;
- break;
- case BJ_IO:
- list = &transaction->t_iobuf_list;
- break;
- case BJ_Shadow:
- list = &transaction->t_shadow_list;
- break;
- case BJ_LogCtl:
- list = &transaction->t_log_list;
- break;
- case BJ_Reserved:
- list = &transaction->t_reserved_list;
- break;
- case BJ_Locked:
- list = &transaction->t_locked_list;
+ transaction->t_nr_metadata--;
+ J_ASSERT_JH(jh, transaction->t_nr_metadata >= 0);
break;
}
-
- __blist_del_buffer(list, jh);
+ list_del(&jh->b_list);
jh->b_jlist = BJ_None;
if (test_clear_buffer_jbddirty(bh))
mark_buffer_dirty(bh); /* Expose it to the VM */
@@ -1924,7 +1856,7 @@ int journal_invalidatepage(journal_t *jo
void __journal_file_buffer(struct journal_head *jh,
transaction_t *transaction, int jlist)
{
- struct journal_head **list = NULL;
+ struct list_head *list = NULL;
int was_dirty = 0;
struct buffer_head *bh = jh2bh(jh);

@@ -1959,23 +1891,23 @@ void __journal_file_buffer(struct journa
J_ASSERT_JH(jh, !jh->b_frozen_data);
return;
case BJ_SyncData:
- list = &transaction->t_sync_datalist;
+ list = &transaction->t_syncdata_list;
break;
case BJ_Metadata:
- transaction->t_nr_buffers++;
- list = &transaction->t_buffers;
+ transaction->t_nr_metadata++;
+ list = &transaction->t_metadata_list;
break;
case BJ_Forget:
- list = &transaction->t_forget;
+ list = &transaction->t_forget_list;
break;
case BJ_IO:
- list = &transaction->t_iobuf_list;
+ list = &transaction->t_io_list;
break;
case BJ_Shadow:
list = &transaction->t_shadow_list;
break;
case BJ_LogCtl:
- list = &transaction->t_log_list;
+ list = &transaction->t_logctl_list;
break;
case BJ_Reserved:
list = &transaction->t_reserved_list;
@@ -1985,7 +1917,7 @@ void __journal_file_buffer(struct journa
break;
}

- __blist_add_buffer(list, jh);
+ list_add_tail(&jh->b_list, list);
jh->b_jlist = jlist;

if (was_dirty)
diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/include/linux/jbd.h 2.6.13-mm1/include/linux/jbd.h
--- 2.6.13-mm1.old/include/linux/jbd.h 2005-09-05 03:15:24.000000000 +0900
+++ 2.6.13-mm1/include/linux/jbd.h 2005-09-05 03:15:35.000000000 +0900
@@ -459,39 +459,39 @@ struct transaction_s
*/
unsigned long t_log_start;

- /* Number of buffers on the t_buffers list [j_list_lock] */
- int t_nr_buffers;
+ /* Number of buffers on the t_metadata_list [j_list_lock] */
+ int t_nr_metadata;

/*
* Doubly-linked circular list of all buffers reserved but not yet
* modified by this transaction [j_list_lock]
*/
- struct journal_head *t_reserved_list;
+ struct list_head t_reserved_list;

/*
* Doubly-linked circular list of all buffers under writeout during
* commit [j_list_lock]
*/
- struct journal_head *t_locked_list;
+ struct list_head t_locked_list;

/*
* Doubly-linked circular list of all metadata buffers owned by this
* transaction [j_list_lock]
*/
- struct journal_head *t_buffers;
+ struct list_head t_metadata_list;

/*
* Doubly-linked circular list of all data buffers still to be
* flushed before this transaction can be committed [j_list_lock]
*/
- struct journal_head *t_sync_datalist;
+ struct list_head t_syncdata_list;

/*
* Doubly-linked circular list of all forget buffers (superseded
* buffers which we can un-checkpoint once this transaction commits)
* [j_list_lock]
*/
- struct journal_head *t_forget;
+ struct list_head t_forget_list;

/*
* Doubly-linked circular list of all buffers still to be flushed before
@@ -509,20 +509,20 @@ struct transaction_s
* Doubly-linked circular list of temporary buffers currently undergoing
* IO in the log [j_list_lock]
*/
- struct journal_head *t_iobuf_list;
+ struct list_head t_io_list;

/*
* Doubly-linked circular list of metadata buffers being shadowed by log
* IO. The IO buffers on the iobuf list and the shadow buffers on this
* list match each other one for one at all times. [j_list_lock]
*/
- struct journal_head *t_shadow_list;
+ struct list_head t_shadow_list;

/*
* Doubly-linked circular list of control buffers being written to the
* log. [j_list_lock]
*/
- struct journal_head *t_log_list;
+ struct list_head t_logctl_list;

/*
* Protects info related to handles
diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/include/linux/journal-head.h 2.6.13-mm1/include/linux/journal-head.h
--- 2.6.13-mm1.old/include/linux/journal-head.h 2005-09-05 03:15:24.000000000 +0900
+++ 2.6.13-mm1/include/linux/journal-head.h 2005-09-05 03:15:35.000000000 +0900
@@ -72,7 +72,7 @@ struct journal_head {
* Doubly-linked list of buffers on a transaction's data, metadata or
* forget queue. [t_list_lock] [jbd_lock_bh_state()]
*/
- struct journal_head *b_tnext, *b_tprev;
+ struct list_head b_list;

/*
* Pointer to the compound transaction against which this buffer

2005-09-09 08:52:37

by Akinobu Mita

[permalink] [raw]
Subject: [-mm PATCH 5/6] jbd: use list_head for the list of all transactions waiting for

use struct list_head for a linked circular list of all transactions
waiting for checkpointing on a journal control structure.

Signed-off-by: Akinobu Mita <[email protected]>

---

fs/jbd/checkpoint.c | 48 ++++++++++++++++++++----------------------------
fs/jbd/commit.c | 16 ++--------------
fs/jbd/journal.c | 9 +++++----
fs/jbd/transaction.c | 1 +
include/linux/jbd.h | 4 ++--
5 files changed, 30 insertions(+), 48 deletions(-)

diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/fs/jbd/checkpoint.c 2.6.13-mm1/fs/jbd/checkpoint.c
--- 2.6.13-mm1.old/fs/jbd/checkpoint.c 2005-09-04 23:31:48.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/checkpoint.c 2005-09-05 00:23:28.000000000 +0900
@@ -180,8 +180,10 @@ static void __wait_cp_io(journal_t *jour
this_tid = transaction->t_tid;
restart:
/* Didn't somebody clean up the transaction in the meanwhile */
- if (journal->j_checkpoint_transactions != transaction ||
- transaction->t_tid != this_tid)
+ if (list_empty(&journal->j_checkpoint_transactions) ||
+ list_entry(journal->j_checkpoint_transactions.next, transaction_t,
+ t_cplist) != transaction ||
+ transaction->t_tid != this_tid)
return;
while (!released && transaction->t_checkpoint_io_list) {
jh = transaction->t_checkpoint_io_list;
@@ -328,9 +330,10 @@ int log_do_checkpoint(journal_t *journal
* and write it.
*/
spin_lock(&journal->j_list_lock);
- if (!journal->j_checkpoint_transactions)
+ if (list_empty(&journal->j_checkpoint_transactions))
goto out;
- transaction = journal->j_checkpoint_transactions;
+ transaction = list_entry(journal->j_checkpoint_transactions.next,
+ transaction_t, t_cplist);
this_tid = transaction->t_tid;
restart:
/*
@@ -338,8 +341,10 @@ restart:
* done (maybe it's a new transaction, but it fell at the same
* address).
*/
- if (journal->j_checkpoint_transactions == transaction ||
- transaction->t_tid == this_tid) {
+ if ((!list_empty(&journal->j_checkpoint_transactions) &&
+ list_entry(journal->j_checkpoint_transactions.next,
+ transaction_t, t_cplist) == transaction) ||
+ transaction->t_tid == this_tid) {
int batch_count = 0;
struct buffer_head *bhs[NR_BATCH];
struct journal_head *jh;
@@ -410,7 +415,7 @@ out:

int cleanup_journal_tail(journal_t *journal)
{
- transaction_t * transaction;
+ transaction_t * transaction = NULL;
tid_t first_tid;
unsigned long blocknr, freed;

@@ -423,7 +428,9 @@ int cleanup_journal_tail(journal_t *jour

spin_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock);
- transaction = journal->j_checkpoint_transactions;
+ if (!list_empty(&journal->j_checkpoint_transactions))
+ transaction = list_entry(journal->j_checkpoint_transactions.next,
+ transaction_t, t_cplist);
if (transaction) {
first_tid = transaction->t_tid;
blocknr = transaction->t_log_start;
@@ -530,18 +537,11 @@ static int journal_clean_one_cp_list(str

int __journal_clean_checkpoint_list(journal_t *journal)
{
- transaction_t *transaction, *last_transaction, *next_transaction;
+ transaction_t *transaction, *next_transaction;
int ret = 0, released;

- transaction = journal->j_checkpoint_transactions;
- if (!transaction)
- goto out;
-
- last_transaction = transaction->t_cpprev;
- next_transaction = transaction;
- do {
- transaction = next_transaction;
- next_transaction = transaction->t_cpnext;
+ list_for_each_entry_safe(transaction, next_transaction,
+ &journal->j_checkpoint_transactions, t_cplist) {
ret += journal_clean_one_cp_list(transaction->
t_checkpoint_list, &released);
if (need_resched())
@@ -557,7 +557,7 @@ int __journal_clean_checkpoint_list(jour
t_checkpoint_io_list, &released);
if (need_resched())
goto out;
- } while (transaction != last_transaction);
+ }
out:
return ret;
}
@@ -673,15 +673,7 @@ void __journal_insert_checkpoint(struct
void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
{
assert_spin_locked(&journal->j_list_lock);
- if (transaction->t_cpnext) {
- transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
- transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
- if (journal->j_checkpoint_transactions == transaction)
- journal->j_checkpoint_transactions =
- transaction->t_cpnext;
- if (journal->j_checkpoint_transactions == transaction)
- journal->j_checkpoint_transactions = NULL;
- }
+ list_del(&transaction->t_cplist);

J_ASSERT(transaction->t_state == T_FINISHED);
J_ASSERT(list_empty(&transaction->t_metadata_list));
diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/fs/jbd/commit.c 2.6.13-mm1/fs/jbd/commit.c
--- 2.6.13-mm1.old/fs/jbd/commit.c 2005-09-04 23:31:48.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/commit.c 2005-09-04 23:41:01.000000000 +0900
@@ -835,20 +835,8 @@ restart_loop:
if (commit_transaction->t_checkpoint_list == NULL) {
__journal_drop_transaction(journal, commit_transaction);
} else {
- if (journal->j_checkpoint_transactions == NULL) {
- journal->j_checkpoint_transactions = commit_transaction;
- commit_transaction->t_cpnext = commit_transaction;
- commit_transaction->t_cpprev = commit_transaction;
- } else {
- commit_transaction->t_cpnext =
- journal->j_checkpoint_transactions;
- commit_transaction->t_cpprev =
- commit_transaction->t_cpnext->t_cpprev;
- commit_transaction->t_cpnext->t_cpprev =
- commit_transaction;
- commit_transaction->t_cpprev->t_cpnext =
- commit_transaction;
- }
+ list_add_tail(&commit_transaction->t_cplist,
+ &journal->j_checkpoint_transactions);
}
spin_unlock(&journal->j_list_lock);

diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/fs/jbd/journal.c 2.6.13-mm1/fs/jbd/journal.c
--- 2.6.13-mm1.old/fs/jbd/journal.c 2005-09-04 23:31:48.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/journal.c 2005-09-04 23:33:19.000000000 +0900
@@ -653,6 +653,7 @@ static journal_t * journal_init_common (
goto fail;
memset(journal, 0, sizeof(*journal));

+ INIT_LIST_HEAD(&journal->j_checkpoint_transactions);
init_waitqueue_head(&journal->j_wait_transaction_locked);
init_waitqueue_head(&journal->j_wait_logspace);
init_waitqueue_head(&journal->j_wait_done_commit);
@@ -1130,7 +1131,7 @@ void journal_destroy(journal_t *journal)

/* Totally anal locking here... */
spin_lock(&journal->j_list_lock);
- while (journal->j_checkpoint_transactions != NULL) {
+ while (!list_empty(&journal->j_checkpoint_transactions)) {
spin_unlock(&journal->j_list_lock);
log_do_checkpoint(journal);
spin_lock(&journal->j_list_lock);
@@ -1138,7 +1139,7 @@ void journal_destroy(journal_t *journal)

J_ASSERT(journal->j_running_transaction == NULL);
J_ASSERT(journal->j_committing_transaction == NULL);
- J_ASSERT(journal->j_checkpoint_transactions == NULL);
+ J_ASSERT(list_empty(&journal->j_checkpoint_transactions));
spin_unlock(&journal->j_list_lock);

/* We can now mark the journal as empty. */
@@ -1352,7 +1353,7 @@ int journal_flush(journal_t *journal)

/* ...and flush everything in the log out to disk. */
spin_lock(&journal->j_list_lock);
- while (!err && journal->j_checkpoint_transactions != NULL) {
+ while (!err && !list_empty(&journal->j_checkpoint_transactions)) {
spin_unlock(&journal->j_list_lock);
err = log_do_checkpoint(journal);
spin_lock(&journal->j_list_lock);
@@ -1375,7 +1376,7 @@ int journal_flush(journal_t *journal)

J_ASSERT(!journal->j_running_transaction);
J_ASSERT(!journal->j_committing_transaction);
- J_ASSERT(!journal->j_checkpoint_transactions);
+ J_ASSERT(list_empty(&journal->j_checkpoint_transactions));
J_ASSERT(journal->j_head == journal->j_tail);
J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
spin_unlock(&journal->j_state_lock);
diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/fs/jbd/transaction.c 2.6.13-mm1/fs/jbd/transaction.c
--- 2.6.13-mm1.old/fs/jbd/transaction.c 2005-09-04 23:31:47.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/transaction.c 2005-09-04 23:33:19.000000000 +0900
@@ -59,6 +59,7 @@ get_transaction(journal_t *journal, tran
INIT_LIST_HEAD(&transaction->t_io_list);
INIT_LIST_HEAD(&transaction->t_shadow_list);
INIT_LIST_HEAD(&transaction->t_logctl_list);
+ INIT_LIST_HEAD(&transaction->t_cplist);

/* Set up the commit timer for the new transaction. */
journal->j_commit_timer->expires = transaction->t_expires;
diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/include/linux/jbd.h 2.6.13-mm1/include/linux/jbd.h
--- 2.6.13-mm1.old/include/linux/jbd.h 2005-09-04 23:32:35.000000000 +0900
+++ 2.6.13-mm1/include/linux/jbd.h 2005-09-04 23:33:15.000000000 +0900
@@ -545,7 +545,7 @@ struct transaction_s
* Forward and backward links for the circular list of all transactions
* awaiting checkpoint. [j_list_lock]
*/
- transaction_t *t_cpnext, *t_cpprev;
+ struct list_head t_cplist;

/*
* When will the transaction expire (become due for commit), in jiffies?
@@ -667,7 +667,7 @@ struct journal_s
* ... and a linked circular list of all transactions waiting for
* checkpointing. [j_list_lock]
*/
- transaction_t *j_checkpoint_transactions;
+ struct list_head j_checkpoint_transactions;

/*
* Wait queue for waiting for a locked transaction to start committing,

2005-09-09 08:53:38

by Akinobu Mita

[permalink] [raw]
Subject: [-mm PATCH 6/6] jbd: use list_head for a transaction checkpoint list

use struct list_head for doubly-linked list of buffers still remaining to be
flushed before an old transaction can be checkpointed.

Signed-off-by: Akinobu Mita <[email protected]>

---

fs/jbd/checkpoint.c | 119 +++++++------------------------------------
fs/jbd/commit.c | 4 -
fs/jbd/journal.c | 1
fs/jbd/transaction.c | 2
include/linux/jbd.h | 4 -
include/linux/journal-head.h | 2
6 files changed, 30 insertions(+), 102 deletions(-)

diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/fs/jbd/checkpoint.c 2.6.13-mm1/fs/jbd/checkpoint.c
--- 2.6.13-mm1.old/fs/jbd/checkpoint.c 2005-09-05 03:21:20.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/checkpoint.c 2005-09-05 03:21:33.000000000 +0900
@@ -22,71 +22,7 @@
#include <linux/jbd.h>
#include <linux/errno.h>
#include <linux/slab.h>
-
-/*
- * Unlink a buffer from a transaction checkpoint list.
- *
- * Called with j_list_lock held.
- */
-
-static void __buffer_unlink_first(struct journal_head *jh)
-{
- transaction_t *transaction;
-
- transaction = jh->b_cp_transaction;
-
- jh->b_cpnext->b_cpprev = jh->b_cpprev;
- jh->b_cpprev->b_cpnext = jh->b_cpnext;
- if (transaction->t_checkpoint_list == jh) {
- transaction->t_checkpoint_list = jh->b_cpnext;
- if (transaction->t_checkpoint_list == jh)
- transaction->t_checkpoint_list = NULL;
- }
-}
-
-/*
- * Unlink a buffer from a transaction checkpoint(io) list.
- *
- * Called with j_list_lock held.
- */
-
-static inline void __buffer_unlink(struct journal_head *jh)
-{
- transaction_t *transaction;
-
- transaction = jh->b_cp_transaction;
-
- __buffer_unlink_first(jh);
- if (transaction->t_checkpoint_io_list == jh) {
- transaction->t_checkpoint_io_list = jh->b_cpnext;
- if (transaction->t_checkpoint_io_list == jh)
- transaction->t_checkpoint_io_list = NULL;
- }
-}
-
-/*
- * Move a buffer from the checkpoint list to the checkpoint io list
- *
- * Called with j_list_lock held
- */
-
-static inline void __buffer_relink_io(struct journal_head *jh)
-{
- transaction_t *transaction;
-
- transaction = jh->b_cp_transaction;
- __buffer_unlink_first(jh);
-
- if (!transaction->t_checkpoint_io_list) {
- jh->b_cpnext = jh->b_cpprev = jh;
- } else {
- jh->b_cpnext = transaction->t_checkpoint_io_list;
- jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
- jh->b_cpprev->b_cpnext = jh;
- jh->b_cpnext->b_cpprev = jh;
- }
- transaction->t_checkpoint_io_list = jh;
-}
+#include <linux/list.h>

/*
* Try to release a checkpointed buffer from its transaction.
@@ -185,8 +121,9 @@ restart:
t_cplist) != transaction ||
transaction->t_tid != this_tid)
return;
- while (!released && transaction->t_checkpoint_io_list) {
- jh = transaction->t_checkpoint_io_list;
+ while (!released && !list_empty(&transaction->t_checkpoint_io_list)) {
+ jh = list_entry(transaction->t_checkpoint_io_list.next,
+ struct journal_head, b_cplist);
bh = jh2bh(jh);
if (!jbd_trylock_bh_state(bh)) {
jbd_sync_bh(journal, bh);
@@ -288,7 +225,9 @@ static int __process_buffer(journal_t *j
J_ASSERT_BH(bh, !buffer_jwrite(bh));
set_buffer_jwrite(bh);
bhs[*batch_count] = bh;
- __buffer_relink_io(jh);
+ list_del(&jh->b_cplist);
+ list_add(&jh->b_cplist,
+ &jh->b_cp_transaction->t_checkpoint_io_list);
jbd_unlock_bh_state(bh);
(*batch_count)++;
if (*batch_count == NR_BATCH) {
@@ -350,10 +289,11 @@ restart:
struct journal_head *jh;
int retry = 0;

- while (!retry && transaction->t_checkpoint_list) {
+ while (!retry && !list_empty(&transaction->t_checkpoint_list)) {
struct buffer_head *bh;

- jh = transaction->t_checkpoint_list;
+ jh = list_entry(transaction->t_checkpoint_list.next,
+ struct journal_head, b_cplist);
bh = jh2bh(jh);
if (!jbd_trylock_bh_state(bh)) {
jbd_sync_bh(journal, bh);
@@ -488,20 +428,14 @@ int cleanup_journal_tail(journal_t *jour
* Returns number of bufers reaped (for debug)
*/

-static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
+static int journal_clean_one_cp_list(struct list_head *head, int *released)
{
- struct journal_head *last_jh;
- struct journal_head *next_jh = jh;
+ struct journal_head *jh, *next_jh;
int ret, freed = 0;

*released = 0;
- if (!jh)
- return 0;

- last_jh = jh->b_cpprev;
- do {
- jh = next_jh;
- next_jh = jh->b_cpnext;
+ list_for_each_entry_safe(jh, next_jh, head, b_cplist) {
/* Use trylock because of the ranking */
if (jbd_trylock_bh_state(jh2bh(jh))) {
ret = __try_to_free_cp_buf(jh);
@@ -520,7 +454,7 @@ static int journal_clean_one_cp_list(str
*/
if (need_resched())
return freed;
- } while (jh != last_jh);
+ }

return freed;
}
@@ -542,7 +476,7 @@ int __journal_clean_checkpoint_list(jour

list_for_each_entry_safe(transaction, next_transaction,
&journal->j_checkpoint_transactions, t_cplist) {
- ret += journal_clean_one_cp_list(transaction->
+ ret += journal_clean_one_cp_list(&transaction->
t_checkpoint_list, &released);
if (need_resched())
goto out;
@@ -553,7 +487,7 @@ int __journal_clean_checkpoint_list(jour
* t_checkpoint_list with removing the buffer from the list as
* we can possibly see not yet submitted buffers on io_list
*/
- ret += journal_clean_one_cp_list(transaction->
+ ret += journal_clean_one_cp_list(&transaction->
t_checkpoint_io_list, &released);
if (need_resched())
goto out;
@@ -596,11 +530,11 @@ int __journal_remove_checkpoint(struct j
}
journal = transaction->t_journal;

- __buffer_unlink(jh);
+ list_del(&jh->b_cplist);
jh->b_cp_transaction = NULL;

- if (transaction->t_checkpoint_list != NULL ||
- transaction->t_checkpoint_io_list != NULL)
+ if (!list_empty(&transaction->t_checkpoint_list) ||
+ !list_empty(&transaction->t_checkpoint_io_list))
goto out;
JBUFFER_TRACE(jh, "transaction has no more buffers");

@@ -648,16 +582,7 @@ void __journal_insert_checkpoint(struct
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);

jh->b_cp_transaction = transaction;
-
- if (!transaction->t_checkpoint_list) {
- jh->b_cpnext = jh->b_cpprev = jh;
- } else {
- jh->b_cpnext = transaction->t_checkpoint_list;
- jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
- jh->b_cpprev->b_cpnext = jh;
- jh->b_cpnext->b_cpprev = jh;
- }
- transaction->t_checkpoint_list = jh;
+ list_add(&jh->b_cplist, &transaction->t_checkpoint_list);
}

/*
@@ -682,8 +607,8 @@ void __journal_drop_transaction(journal_
J_ASSERT(list_empty(&transaction->t_io_list));
J_ASSERT(list_empty(&transaction->t_shadow_list));
J_ASSERT(list_empty(&transaction->t_logctl_list));
- J_ASSERT(transaction->t_checkpoint_list == NULL);
- J_ASSERT(transaction->t_checkpoint_io_list == NULL);
+ J_ASSERT(list_empty(&transaction->t_checkpoint_list));
+ J_ASSERT(list_empty(&transaction->t_checkpoint_io_list));
J_ASSERT(transaction->t_updates == 0);
J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction);
diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/fs/jbd/commit.c 2.6.13-mm1/fs/jbd/commit.c
--- 2.6.13-mm1.old/fs/jbd/commit.c 2005-09-05 03:21:20.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/commit.c 2005-09-05 03:21:33.000000000 +0900
@@ -714,7 +714,7 @@ wait_for_iobuf:

J_ASSERT(list_empty(&commit_transaction->t_syncdata_list));
J_ASSERT(list_empty(&commit_transaction->t_metadata_list));
- J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
+ J_ASSERT(list_empty(&commit_transaction->t_checkpoint_list));
J_ASSERT(list_empty(&commit_transaction->t_io_list));
J_ASSERT(list_empty(&commit_transaction->t_shadow_list));
J_ASSERT(list_empty(&commit_transaction->t_logctl_list));
@@ -832,7 +832,7 @@ restart_loop:
journal->j_committing_transaction = NULL;
spin_unlock(&journal->j_state_lock);

- if (commit_transaction->t_checkpoint_list == NULL) {
+ if (list_empty(&commit_transaction->t_checkpoint_list)) {
__journal_drop_transaction(journal, commit_transaction);
} else {
list_add_tail(&commit_transaction->t_cplist,
diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/fs/jbd/journal.c 2.6.13-mm1/fs/jbd/journal.c
--- 2.6.13-mm1.old/fs/jbd/journal.c 2005-09-05 03:21:20.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/journal.c 2005-09-05 03:21:36.000000000 +0900
@@ -1763,6 +1763,7 @@ repeat:
bh->b_private = jh;
jh->b_bh = bh;
INIT_LIST_HEAD(&jh->b_list);
+ INIT_LIST_HEAD(&jh->b_cplist);
get_bh(bh);
BUFFER_TRACE(bh, "added journal_head");
}
diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/fs/jbd/transaction.c 2.6.13-mm1/fs/jbd/transaction.c
--- 2.6.13-mm1.old/fs/jbd/transaction.c 2005-09-05 03:21:20.000000000 +0900
+++ 2.6.13-mm1/fs/jbd/transaction.c 2005-09-05 03:21:36.000000000 +0900
@@ -60,6 +60,8 @@ get_transaction(journal_t *journal, tran
INIT_LIST_HEAD(&transaction->t_shadow_list);
INIT_LIST_HEAD(&transaction->t_logctl_list);
INIT_LIST_HEAD(&transaction->t_cplist);
+ INIT_LIST_HEAD(&transaction->t_checkpoint_list);
+ INIT_LIST_HEAD(&transaction->t_checkpoint_io_list);

/* Set up the commit timer for the new transaction. */
journal->j_commit_timer->expires = transaction->t_expires;
diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/include/linux/jbd.h 2.6.13-mm1/include/linux/jbd.h
--- 2.6.13-mm1.old/include/linux/jbd.h 2005-09-05 03:21:20.000000000 +0900
+++ 2.6.13-mm1/include/linux/jbd.h 2005-09-05 03:21:33.000000000 +0900
@@ -497,13 +497,13 @@ struct transaction_s
* Doubly-linked circular list of all buffers still to be flushed before
* this transaction can be checkpointed. [j_list_lock]
*/
- struct journal_head *t_checkpoint_list;
+ struct list_head t_checkpoint_list;

/*
* Doubly-linked circular list of all buffers submitted for IO while
* checkpointing. [j_list_lock]
*/
- struct journal_head *t_checkpoint_io_list;
+ struct list_head t_checkpoint_io_list;

/*
* Doubly-linked circular list of temporary buffers currently undergoing
diff -X 2.6.13-mm1/Documentation/dontdiff -Nurp 2.6.13-mm1.old/include/linux/journal-head.h 2.6.13-mm1/include/linux/journal-head.h
--- 2.6.13-mm1.old/include/linux/journal-head.h 2005-09-05 03:20:41.000000000 +0900
+++ 2.6.13-mm1/include/linux/journal-head.h 2005-09-05 03:21:33.000000000 +0900
@@ -86,7 +86,7 @@ struct journal_head {
* before an old transaction can be checkpointed.
* [j_list_lock]
*/
- struct journal_head *b_cpnext, *b_cpprev;
+ struct list_head b_cplist;
};

#endif /* JOURNAL_HEAD_H_INCLUDED */

2005-09-09 09:16:06

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 0/6] jbd cleanup

Akinobu Mita <[email protected]> wrote:
>
> The following 6 patches cleanup the jbd code and kill about 200 lines.
>

Thanks, but I'm not inclined to apply them.

a) Maybe 70-80% of the Linux world uses this filesystem. We need to be
very cautious in making changes to it.

b) A relatively large number of people are carrying quite large
out-of-tree patches, some of which they're hoping to merge sometime.
Admittedly more against ext3 than JBD, but there is potential here to
cause those people trouble.

Plus the switch to list_heads in journal_s has some impact on type safety
and debuggability - I considered doing it years ago but decided not to
because I found I _used_ those pointers fairly commonly in development.
list_heads are a bit of a pain in gdb (kgdb and kernel core dumps), for
example.

2005-09-09 18:16:55

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 1/6] jbd: remove duplicated debug print

On Fri, Sep 09, 2005 at 05:43:42PM +0900, Akinobu Mita wrote:
> remove duplicated debug print

> - jbd_debug(3, "JBD: commit phase 2\n");
> -

If you're going to do this, please renumber the rest of the "commit
phase n" messages. Or the debugging messages will look very funny.

- Ted

2005-09-10 14:36:53

by Akinobu Mita

[permalink] [raw]
Subject: Re: [PATCH 1/6] jbd: remove duplicated debug print

On Fri, Sep 09, 2005 at 02:16:49PM -0400, Theodore Ts'o wrote:
> On Fri, Sep 09, 2005 at 05:43:42PM +0900, Akinobu Mita wrote:
> > remove duplicated debug print
>
> > - jbd_debug(3, "JBD: commit phase 2\n");
> > -
>
> If you're going to do this, please renumber the rest of the "commit
> phase n" messages. Or the debugging messages will look very funny.

The second duplicated "commit phase 2" only does:

J_ASSERT (commit_transaction->t_sync_datalist == NULL);

So I thought it might be accidentaly inserted.
diff -U 9 :

--- ./fs/jbd/commit.c.orig 2005-09-10 22:09:05.000000000 +0900
+++ ./fs/jbd/commit.c 2005-09-10 22:09:25.000000000 +0900
@@ -419,20 +419,18 @@ write_out_data:
cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);

if (err)
__journal_abort_hard(journal);

journal_write_revoke_records(journal, commit_transaction);

- jbd_debug(3, "JBD: commit phase 2\n");
-
/*
* If we found any dirty or locked buffers, then we should have
* looped back up to the write_out_data label. If there weren't
* any then journal_clean_data_list should have wiped the list
* clean by now, so check that it is in fact empty.
*/
J_ASSERT (commit_transaction->t_sync_datalist == NULL);

jbd_debug (3, "JBD: commit phase 3\n");

2005-09-10 14:56:09

by Akinobu Mita

[permalink] [raw]
Subject: Re: [PATCH 0/6] jbd cleanup

On Fri, Sep 09, 2005 at 02:15:22AM -0700, Andrew Morton wrote:
> Akinobu Mita <[email protected]> wrote:
> >
> > The following 6 patches cleanup the jbd code and kill about 200 lines.
> >
>
> Thanks, but I'm not inclined to apply them.
>
> a) Maybe 70-80% of the Linux world uses this filesystem. We need to be
> very cautious in making changes to it.

And we need many eyeballs.
(I've tried to understand how the jbd works several times.
But I always failed.)

> b) A relatively large number of people are carrying quite large
> out-of-tree patches, some of which they're hoping to merge sometime.
> Admittedly more against ext3 than JBD, but there is potential here to
> cause those people trouble.
>
> Plus the switch to list_heads in journal_s has some impact on type safety
> and debuggability - I considered doing it years ago but decided not to
> because I found I _used_ those pointers fairly commonly in development.
> list_heads are a bit of a pain in gdb (kgdb and kernel core dumps), for
> example.

About the debuggability of list_heads, how about adding the kind of
the following gdb macros in .gdbinit?

---

define list_entry
set $ptr=$arg0
p ($arg1 *)((char *)$ptr - (size_t) &(($arg1 *)0)->$arg2)
end

define list_entry_s
set $ptr=$arg0
p (struct $arg1 *)((char *)$ptr - (size_t) &((struct $arg1 *)0)->$arg2)
end

define to_journal_head
list_entry_s $arg0 journal_head b_list
end

2005-09-10 21:59:29

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 0/6] jbd cleanup

Akinobu Mita <[email protected]> wrote:
>
> On Fri, Sep 09, 2005 at 02:15:22AM -0700, Andrew Morton wrote:
> > Akinobu Mita <[email protected]> wrote:
> > >
> > > The following 6 patches cleanup the jbd code and kill about 200 lines.
> > >
> >
> > Thanks, but I'm not inclined to apply them.
> >
> > a) Maybe 70-80% of the Linux world uses this filesystem. We need to be
> > very cautious in making changes to it.
>
> And we need many eyeballs.

True. And the only way to really learn code is to make changes to it.

> (I've tried to understand how the jbd works several times.
> But I always failed.)

It's very hard to reverse engineer the high-level design concepts from the
implementation. And the design concepts in JBD are really complex, which
is a problem fo us.

When I first had to learn the thing 4-5 years back I sat down for a solid
week and wrote a 40-odd page how-it-works document for myself, just to
force it into my head. It was probably about 50% accurate, but it was a
useful exercise.

> About the debuggability of list_heads, how about adding the kind of
> the following gdb macros in .gdbinit?
>
> ---
>
> define list_entry
> set $ptr=$arg0
> p ($arg1 *)((char *)$ptr - (size_t) &(($arg1 *)0)->$arg2)
> end
>
> define list_entry_s
> set $ptr=$arg0
> p (struct $arg1 *)((char *)$ptr - (size_t) &((struct $arg1 *)0)->$arg2)
> end
>
> define to_journal_head
> list_entry_s $arg0 journal_head b_list
> end

Here's mine ;)

# list_entry list type member
define list_entry
set $off = (int)&(((struct $arg1 *)0)->$arg2)
set $addr = (int)$arg0
set $res = $addr - $off
printf "0x%x\n", (struct $arg1 *)$res
end