Sometimes process will be stalled in "wait_on_bit_io(&bh->b_state,
BH_Shadow, TASK_UNINTERRUPTIBLE)" for a while, and in order to analyse
app's latency thoroughly, add a new tracepoint to track this delay.
Trace info likes below:
kworker/u32:1-100 [004] .... 690.217690: jbd2_wait_on_shadow: dev 254,17 latency(us) 3363
fsstress-2139 [013] .... 690.217830: jbd2_wait_on_shadow: dev 254,17 latency(us) 2403
fsstress-2130 [005] .... 690.218241: jbd2_wait_on_shadow: dev 254,17 latency(us) 3589
fsstress-2131 [009] .... 690.230933: jbd2_wait_on_shadow: dev 254,17 latency(us) 11799
fsstress-2132 [007] .... 690.230961: jbd2_wait_on_shadow: dev 254,17 latency(us) 11540
fsstress-2130 [005] .... 690.230965: jbd2_wait_on_shadow: dev 254,17 latency(us) 3577
fsstress-2139 [005] .... 690.230979: jbd2_wait_on_shadow: dev 254,17 latency(us) 11716
fsstress-2137 [003] .... 690.230980: jbd2_wait_on_shadow: dev 254,17 latency(us) 836
fsstress-2133 [015] .... 690.230981: jbd2_wait_on_shadow: dev 254,17 latency(us) 11341
fsstress-2130 [005] .... 690.230988: jbd2_wait_on_shadow: dev 254,17 latency(us) 21
Signed-off-by: Xiaoguang Wang <[email protected]>
---
fs/jbd2/transaction.c | 4 ++++
include/trace/events/jbd2.h | 21 +++++++++++++++++++++
2 files changed, 25 insertions(+)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index afc06daee5bb..5d7a96e10133 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -864,6 +864,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
int error;
char *frozen_buffer = NULL;
unsigned long start_lock, time_lock;
+ s64 start_us;
if (is_handle_aborted(handle))
return -EROFS;
@@ -994,7 +995,10 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
if (buffer_shadow(bh)) {
JBUFFER_TRACE(jh, "on shadow: sleep");
jbd_unlock_bh_state(bh);
+ start_us = ktime_to_us(ktime_get());
wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
+ trace_jbd2_wait_on_shadow(bh->b_bdev->bd_dev,
+ ktime_to_us(ktime_get()) - start_us);
goto repeat;
}
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 2310b259329f..e42072c74ce9 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -380,6 +380,27 @@ TRACE_EVENT(jbd2_lock_buffer_stall,
__entry->stall_ms)
);
+TRACE_EVENT(jbd2_wait_on_shadow,
+
+ TP_PROTO(dev_t dev, unsigned long wait_us),
+
+ TP_ARGS(dev, wait_us),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field(unsigned long, wait_us )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->wait_us = wait_us;
+ ),
+
+ TP_printk("dev %d,%d latency(us) %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->wait_us)
+);
+
#endif /* _TRACE_JBD2_H */
/* This part must be outside protection */
--
2.17.2
In current jbd2's implemention, jbd2 won't reclaim journal space unless
free journal space is lower than specified threshold, sometimes there'll
be many processes blocked on waiting for free journal space, so here
also add a tracepoint to trace this delay:
Trace info likes below:
rm-1609 [012] .... 232.134012: jbd2_wait_on_credits: dev 254,17 latency(us) 30249
rm-1609 [012] .... 232.540123: jbd2_wait_on_credits: dev 254,17 latency(us) 45491
rm-1609 [012] .... 233.074011: jbd2_wait_on_credits: dev 254,17 latency(us) 111308
rm-1609 [012] .... 233.798669: jbd2_wait_on_credits: dev 254,17 latency(us) 54398
rm-1609 [011] .... 234.311211: jbd2_wait_on_credits: dev 254,17 latency(us) 48049
rm-1609 [011] .... 234.917501: jbd2_wait_on_credits: dev 254,17 latency(us) 51491
rm-1609 [011] .... 235.776268: jbd2_wait_on_credits: dev 254,17 latency(us) 51248
Signed-off-by: Xiaoguang Wang <[email protected]>
---
fs/jbd2/transaction.c | 6 +++++-
include/trace/events/jbd2.h | 20 ++++++++++++++++++++
2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6757911a4a17..15f613ae32de 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -266,8 +266,12 @@ static int add_transaction_credits(journal_t *journal, int blocks,
read_unlock(&journal->j_state_lock);
jbd2_might_wait_for_commit(journal);
write_lock(&journal->j_state_lock);
- if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
+ if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
+ s64 start_us = ktime_to_us(ktime_get());
__jbd2_log_wait_for_space(journal);
+ trace_jbd2_wait_on_credits(journal->j_fs_dev->bd_dev,
+ ktime_to_us(ktime_get()) - start_us);
+ }
write_unlock(&journal->j_state_lock);
return 1;
}
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index f2eebd839bf2..108f320ef362 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -422,6 +422,26 @@ TRACE_EVENT(jbd2_wait_on_transaction_locked,
__entry->wait_us)
);
+TRACE_EVENT(jbd2_wait_on_credits,
+
+ TP_PROTO(dev_t dev, unsigned long wait_us),
+
+ TP_ARGS(dev, wait_us),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field(unsigned long, wait_us )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->wait_us = wait_us;
+ ),
+
+ TP_printk("dev %d,%d latency(us) %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->wait_us)
+);
#endif /* _TRACE_JBD2_H */
/* This part must be outside protection */
--
2.17.2
Sometimes process will be stalled in wait_transaction_locked() for a while,
also add a new tracepoint to track this delay.
Trace info likes below:
fsstress-1793 [009] .... 519.967867: jbd2_wait_on_transaction_locked: dev 254,17 latency(us) 73442
fsstress-1788 [002] .... 519.967877: jbd2_wait_on_transaction_locked: dev 254,17 latency(us) 75189
fsstress-1792 [012] .... 519.967882: jbd2_wait_on_transaction_locked: dev 254,17 latency(us) 148260
fsstress-1786 [011] .... 519.967885: jbd2_wait_on_transaction_locked: dev 254,17 latency(us) 143292
fsstress-1796 [004] .... 519.967889: jbd2_wait_on_transaction_locked: dev 254,17 latency(us) 147945
fsstress-1791 [015] .... 519.967892: jbd2_wait_on_transaction_locked: dev 254,17 latency(us) 148126
fsstress-1794 [009] .... 519.967938: jbd2_wait_on_transaction_locked: dev 254,17 latency(us) 148347
fsstress-1787 [003] .... 519.967990: jbd2_wait_on_transaction_locked: dev 254,17 latency(us) 148152
fsstress-1795 [004] .... 519.967999: jbd2_wait_on_transaction_locked: dev 254,17 latency(us) 141676
fsstress-1800 [000] .... 519.968001: jbd2_wait_on_transaction_locked: dev 254,17 latency(us) 148141
Signed-off-by: Xiaoguang Wang <[email protected]>
---
fs/jbd2/transaction.c | 3 +++
include/trace/events/jbd2.h | 21 +++++++++++++++++++++
2 files changed, 24 insertions(+)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 5d7a96e10133..6757911a4a17 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -148,6 +148,7 @@ static void wait_transaction_locked(journal_t *journal)
DEFINE_WAIT(wait);
int need_to_start;
tid_t tid = journal->j_running_transaction->t_tid;
+ s64 start_us = ktime_to_us(ktime_get());
prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE);
@@ -158,6 +159,8 @@ static void wait_transaction_locked(journal_t *journal)
jbd2_might_wait_for_commit(journal);
schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait);
+ trace_jbd2_wait_on_transaction_locked(journal->j_fs_dev->bd_dev,
+ ktime_to_us(ktime_get()) - start_us);
}
/*
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index e42072c74ce9..f2eebd839bf2 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -401,6 +401,27 @@ TRACE_EVENT(jbd2_wait_on_shadow,
__entry->wait_us)
);
+TRACE_EVENT(jbd2_wait_on_transaction_locked,
+
+ TP_PROTO(dev_t dev, unsigned long wait_us),
+
+ TP_ARGS(dev, wait_us),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field(unsigned long, wait_us )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev;
+ __entry->wait_us = wait_us;
+ ),
+
+ TP_printk("dev %d,%d latency(us) %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->wait_us)
+);
+
#endif /* _TRACE_JBD2_H */
/* This part must be outside protection */
--
2.17.2