jbd: fix a cause of __schedule_bug via blkdev_releasepage
From: Toshiyuki Okajima <[email protected]>
A cause of this problem is calling log_wait_commit() on
journal_try_to_free_buffers() with a read-lock via blkdev_releasepage(). This
logic is for uncommitted data buffers. And a read/write-lock is required for a
client usage of blkdev_releasepage.
By the way, we want to release only metadata buffers on ext3_release_metadata().
Because a page which binds to blkdev is used as metadata for ext3.
Therefore we don't need to wait for a commit on journal_try_to_free_buffers()
via ext3_release_matadata().
As a result, we add a journal_try_to_free_metadata_buffers() almost same
as journal_try_to_free_buffers() except not calling log_wait_commit.
This issue was reported by Aneesh Kumar K.V.
http://marc.info/?l=linux-ext4&m=122814568309893&w=2
Reported-by: "Aneesh Kumar K.V" <[email protected]>
Signed-off-by: Toshiyuki Okajima <[email protected]>
Cc: Balbir Singh <[email protected]>
Cc: "Theodore Ts'o" <[email protected]>
--
fs/jbd/journal.c | 1 +
fs/jbd/transaction.c | 34 ++++++++++++++++++++++++++++++----
include/linux/jbd.h | 1 +
3 files changed, 32 insertions(+), 4 deletions(-)
diff -Nurp linux-2.6.28-rc6/fs/jbd/journal.c linux-2.6.28-rc6.2/fs/jbd/journal.c
--- linux-2.6.28-rc6/fs/jbd/journal.c 2008-11-21 08:19:22.000000000 +0900
+++ linux-2.6.28-rc6.2/fs/jbd/journal.c 2008-12-02 09:54:26.000000000 +0900
@@ -79,6 +79,7 @@ EXPORT_SYMBOL(journal_wipe);
EXPORT_SYMBOL(journal_blocks_per_page);
EXPORT_SYMBOL(journal_invalidatepage);
EXPORT_SYMBOL(journal_try_to_free_buffers);
+EXPORT_SYMBOL(journal_try_to_free_metadata_buffers);
EXPORT_SYMBOL(journal_force_commit);
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
diff -Nurp linux-2.6.28-rc6/fs/jbd/transaction.c linux-2.6.28-rc6.2/fs/jbd/transaction.c
--- linux-2.6.28-rc6/fs/jbd/transaction.c 2008-11-21 08:19:22.000000000 +0900
+++ linux-2.6.28-rc6.2/fs/jbd/transaction.c 2008-12-02 10:21:45.000000000 +0900
@@ -1687,12 +1687,14 @@ static void journal_wait_for_transaction
}
/**
- * int journal_try_to_free_buffers() - try to free page buffers.
+ * int __journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
* @gfp_mask: we use the mask to detect how hard should we try to release
* buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
* release the buffers.
+ * @is_metadata: If true, we won't wait for commit though __GFP_WAIT
+ * and __GFP_FS is set.
*
*
* For all the buffers on this page,
@@ -1718,14 +1720,14 @@ static void journal_wait_for_transaction
*
* Who else is affected by this? hmm... Really the only contender
* is do_get_write_access() - it could be looking at the buffer while
- * journal_try_to_free_buffer() is changing its state. But that
+ * __journal_try_to_free_buffer() is changing its state. But that
* cannot happen because we never reallocate freed data as metadata
* while the data is part of a transaction. Yes?
*
* Return 0 on failure, 1 on success
*/
-int journal_try_to_free_buffers(journal_t *journal,
- struct page *page, gfp_t gfp_mask)
+static int __journal_try_to_free_buffers(journal_t *journal,
+ struct page *page, gfp_t gfp_mask, bool is_metadata)
{
struct buffer_head *head;
struct buffer_head *bh;
@@ -1756,6 +1758,8 @@ int journal_try_to_free_buffers(journal_
} while ((bh = bh->b_this_page) != head);
ret = try_to_free_buffers(page);
+ if (is_metadata)
+ return ret;
/*
* There are a number of places where journal_try_to_free_buffers()
@@ -1781,6 +1785,28 @@ busy:
}
/*
+ * journal_try_to_free_buffers:
+ * This is a wrapper function for __journal_try_to_free_buffers to try to
+ * release data.
+ */
+int journal_try_to_free_buffers(journal_t *journal,
+ struct page *page, gfp_t gfp_mask)
+{
+ return __journal_try_to_free_buffers(journal, page, gfp_mask, false);
+}
+
+/*
+ * journal_try_to_free_metadata_buffers:
+ * This is a wrapper function for __journal_try_to_free_buffers to try to
+ * release metadata.
+ */
+int journal_try_to_free_metadata_buffers(journal_t *journal,
+ struct page *page, gfp_t gfp_mask)
+{
+ return __journal_try_to_free_buffers(journal, page, gfp_mask, true);
+}
+
+/*
* This buffer is no longer needed. If it is on an older transaction's
* checkpoint list we need to record it on this transaction's forget list
* to pin this buffer (and hence its checkpointing transaction) down until
diff -Nurp linux-2.6.28-rc6/include/linux/jbd.h linux-2.6.28-rc6.2/include/linux/jbd.h
--- linux-2.6.28-rc6/include/linux/jbd.h 2008-11-21 08:19:22.000000000 +0900
+++ linux-2.6.28-rc6.2/include/linux/jbd.h 2008-12-02 09:58:59.000000000 +0900
@@ -893,6 +893,7 @@ extern void journal_sync_buffer (struct
extern void journal_invalidatepage(journal_t *,
struct page *, unsigned long);
extern int journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
+extern int journal_try_to_free_metadata_buffers(journal_t *, struct page *, gfp_t);
extern int journal_stop(handle_t *);
extern int journal_flush (journal_t *);
extern void journal_lock_updates (journal_t *);