pnfs_update_layout is really the "nexus" of layout handling. If it
returns NULL then we end up going through the MDS. This patch adds
some tracepoints to that function that allow us to determine the
cause when we end up going through the MDS unexpectedly.
Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfs/nfs4trace.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfs/pnfs.c | 38 +++++++++++++++++++++++++++++------
include/linux/nfs4.h | 14 +++++++++++++
3 files changed, 102 insertions(+), 6 deletions(-)
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 671cf68fe56b..26a78f48c2d6 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -1198,6 +1198,62 @@ DEFINE_NFS4_INODE_EVENT(nfs4_layoutcommit);
DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn);
DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close);
+#define show_pnfs_update_layout_reason(reason) \
+ __print_symbolic(reason, \
+ { PNFS_UPDATE_LAYOUT_UNKNOWN, "unknown" }, \
+ { PNFS_UPDATE_LAYOUT_NO_PNFS, "no pnfs" }, \
+ { PNFS_UPDATE_LAYOUT_RD_ZEROLEN, "read+zerolen" }, \
+ { PNFS_UPDATE_LAYOUT_MDSTHRESH, "mdsthresh" }, \
+ { PNFS_UPDATE_LAYOUT_NOMEM, "nomem" }, \
+ { PNFS_UPDATE_LAYOUT_BULK_RECALL, "bulk recall" }, \
+ { PNFS_UPDATE_LAYOUT_IO_TEST_FAIL, "io test fail" }, \
+ { PNFS_UPDATE_LAYOUT_FOUND_CACHED, "found cached" }, \
+ { PNFS_UPDATE_LAYOUT_RETURN, "layoutreturn" }, \
+ { PNFS_UPDATE_LAYOUT_BLOCKED, "layouts blocked" }, \
+ { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" })
+
+TRACE_EVENT(pnfs_update_layout,
+ TP_PROTO(struct inode *inode,
+ loff_t pos,
+ u64 count,
+ enum pnfs_iomode iomode,
+ struct pnfs_layout_segment *lseg,
+ enum pnfs_update_layout_reason reason
+ ),
+ TP_ARGS(inode, pos, count, iomode, lseg, reason),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u64, fileid)
+ __field(u32, fhandle)
+ __field(loff_t, pos)
+ __field(u64, count)
+ __field(enum pnfs_iomode, iomode)
+ __field(struct pnfs_layout_segment *, lseg)
+ __field(enum pnfs_update_layout_reason, reason)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->pos = pos;
+ __entry->count = count;
+ __entry->iomode = iomode;
+ __entry->lseg = lseg;
+ __entry->reason = reason;
+ ),
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "iomode=%s pos=%llu count=%llu lseg=%p (%s)",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ show_pnfs_iomode(__entry->iomode),
+ (unsigned long long)__entry->pos,
+ (unsigned long long)__entry->count, __entry->lseg,
+ show_pnfs_update_layout_reason(__entry->reason)
+ )
+);
+
#endif /* CONFIG_NFS_V4_1 */
#endif /* _TRACE_NFS4_H */
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5a8ae2125b50..3688189ac2b2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1520,14 +1520,23 @@ pnfs_update_layout(struct inode *ino,
struct pnfs_layout_segment *lseg = NULL;
bool first;
- if (!pnfs_enabled_sb(NFS_SERVER(ino)))
+ if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ PNFS_UPDATE_LAYOUT_NO_PNFS);
goto out;
+ }
- if (iomode == IOMODE_READ && i_size_read(ino) == 0)
+ if (iomode == IOMODE_READ && i_size_read(ino) == 0) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ PNFS_UPDATE_LAYOUT_RD_ZEROLEN);
goto out;
+ }
- if (pnfs_within_mdsthreshold(ctx, ino, iomode))
+ if (pnfs_within_mdsthreshold(ctx, ino, iomode)) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ PNFS_UPDATE_LAYOUT_MDSTHRESH);
goto out;
+ }
lookup_again:
first = false;
@@ -1535,19 +1544,26 @@ lookup_again:
lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
if (lo == NULL) {
spin_unlock(&ino->i_lock);
+ trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ PNFS_UPDATE_LAYOUT_NOMEM);
goto out;
}
/* Do we even need to bother with this? */
if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ PNFS_UPDATE_LAYOUT_BULK_RECALL);
dprintk("%s matches recall, use MDS\n", __func__);
goto out_unlock;
}
/* if LAYOUTGET already failed once we don't try again */
if (pnfs_layout_io_test_failed(lo, iomode) &&
- !pnfs_should_retry_layoutget(lo))
+ !pnfs_should_retry_layoutget(lo)) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
goto out_unlock;
+ }
first = list_empty(&lo->plh_segs);
if (first) {
@@ -1567,8 +1583,11 @@ lookup_again:
* already exists
*/
lseg = pnfs_find_lseg(lo, &arg);
- if (lseg)
+ if (lseg) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ PNFS_UPDATE_LAYOUT_FOUND_CACHED);
goto out_unlock;
+ }
}
/*
@@ -1585,11 +1604,16 @@ lookup_again:
dprintk("%s retrying\n", __func__);
goto lookup_again;
}
+ trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ PNFS_UPDATE_LAYOUT_RETURN);
goto out_put_layout_hdr;
}
- if (pnfs_layoutgets_blocked(lo))
+ if (pnfs_layoutgets_blocked(lo)) {
+ trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ PNFS_UPDATE_LAYOUT_BLOCKED);
goto out_unlock;
+ }
atomic_inc(&lo->plh_outstanding);
spin_unlock(&ino->i_lock);
@@ -1614,6 +1638,8 @@ lookup_again:
lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
pnfs_clear_retry_layoutget(lo);
atomic_dec(&lo->plh_outstanding);
+ trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
out_put_layout_hdr:
if (first)
pnfs_clear_first_layoutget(lo);
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index e7e78537aea2..0e30f2c5ff49 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -592,4 +592,18 @@ enum data_content4 {
NFS4_CONTENT_HOLE = 1,
};
+enum pnfs_update_layout_reason {
+ PNFS_UPDATE_LAYOUT_UNKNOWN = 0,
+ PNFS_UPDATE_LAYOUT_NO_PNFS,
+ PNFS_UPDATE_LAYOUT_RD_ZEROLEN,
+ PNFS_UPDATE_LAYOUT_MDSTHRESH,
+ PNFS_UPDATE_LAYOUT_NOMEM,
+ PNFS_UPDATE_LAYOUT_BULK_RECALL,
+ PNFS_UPDATE_LAYOUT_IO_TEST_FAIL,
+ PNFS_UPDATE_LAYOUT_FOUND_CACHED,
+ PNFS_UPDATE_LAYOUT_RETURN,
+ PNFS_UPDATE_LAYOUT_BLOCKED,
+ PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET,
+};
+
#endif
--
2.5.0
Instead of displaying a layout segment pointer in these tracepoints,
let's use the layout stateid, now that Olga gave us a set of tools for
displaying them.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4trace.h | 23 +++++++++++++++++------
fs/nfs/pnfs.c | 20 ++++++++++----------
2 files changed, 27 insertions(+), 16 deletions(-)
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 238925c1aafd..de5469a5b4f7 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -1461,10 +1461,10 @@ TRACE_EVENT(pnfs_update_layout,
loff_t pos,
u64 count,
enum pnfs_iomode iomode,
- struct pnfs_layout_segment *lseg,
+ struct pnfs_layout_hdr *lo,
enum pnfs_update_layout_reason reason
),
- TP_ARGS(inode, pos, count, iomode, lseg, reason),
+ TP_ARGS(inode, pos, count, iomode, lo, reason),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, fileid)
@@ -1472,7 +1472,8 @@ TRACE_EVENT(pnfs_update_layout,
__field(loff_t, pos)
__field(u64, count)
__field(enum pnfs_iomode, iomode)
- __field(struct pnfs_layout_segment *, lseg)
+ __field(int, layoutstateid_seq)
+ __field(u32, layoutstateid_hash)
__field(enum pnfs_update_layout_reason, reason)
),
TP_fast_assign(
@@ -1482,18 +1483,28 @@ TRACE_EVENT(pnfs_update_layout,
__entry->pos = pos;
__entry->count = count;
__entry->iomode = iomode;
- __entry->lseg = lseg;
__entry->reason = reason;
+ if (lo != NULL) {
+ __entry->layoutstateid_seq =
+ be32_to_cpu(lo->plh_stateid->seqid);
+ __entry->layoutstateid_hash =
+ nfs_stateid_hash(lo->plh_stateid);
+ } else {
+ __entry->layoutstateid_seq = 0;
+ __entry->layoutstateid_hash = 0;
+ }
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "iomode=%s pos=%llu count=%llu lseg=%p (%s)",
+ "iomode=%s pos=%llu count=%llu "
+ "layoutstateid=%d:0x%08x (%s)",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
show_pnfs_iomode(__entry->iomode),
(unsigned long long)__entry->pos,
- (unsigned long long)__entry->count, __entry->lseg,
+ (unsigned long long)__entry->count,
+ __entry->layoutstateid_seq, __entry->layoutstateid_hash,
show_pnfs_update_layout_reason(__entry->reason)
)
);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 1489065bb051..6095a8d42766 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1521,19 +1521,19 @@ pnfs_update_layout(struct inode *ino,
bool first;
if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
PNFS_UPDATE_LAYOUT_NO_PNFS);
goto out;
}
if (iomode == IOMODE_READ && i_size_read(ino) == 0) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
PNFS_UPDATE_LAYOUT_RD_ZEROLEN);
goto out;
}
if (pnfs_within_mdsthreshold(ctx, ino, iomode)) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
PNFS_UPDATE_LAYOUT_MDSTHRESH);
goto out;
}
@@ -1544,14 +1544,14 @@ lookup_again:
lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
if (lo == NULL) {
spin_unlock(&ino->i_lock);
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
PNFS_UPDATE_LAYOUT_NOMEM);
goto out;
}
/* Do we even need to bother with this? */
if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_BULK_RECALL);
dprintk("%s matches recall, use MDS\n", __func__);
goto out_unlock;
@@ -1560,7 +1560,7 @@ lookup_again:
/* if LAYOUTGET already failed once we don't try again */
if (pnfs_layout_io_test_failed(lo, iomode) &&
!pnfs_should_retry_layoutget(lo)) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
goto out_unlock;
}
@@ -1584,7 +1584,7 @@ lookup_again:
*/
lseg = pnfs_find_lseg(lo, &arg);
if (lseg) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_FOUND_CACHED);
goto out_unlock;
}
@@ -1604,13 +1604,13 @@ lookup_again:
dprintk("%s retrying\n", __func__);
goto lookup_again;
}
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_RETURN);
goto out_put_layout_hdr;
}
if (pnfs_layoutgets_blocked(lo)) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_BLOCKED);
goto out_unlock;
}
@@ -1638,7 +1638,7 @@ lookup_again:
lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
pnfs_clear_retry_layoutget(lo);
atomic_dec(&lo->plh_outstanding);
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
out_put_layout_hdr:
if (first)
--
2.5.0
Instead of displaying a layout segment pointer in these tracepoints,
let's use the layout stateid, now that Olga gave us a set of tools for
displaying them.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4trace.c | 1 +
fs/nfs/nfs4trace.h | 23 +++++++++++++++++------
fs/nfs/pnfs.c | 20 ++++++++++----------
3 files changed, 28 insertions(+), 16 deletions(-)
diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c
index d774335cc8bc..2850bce19244 100644
--- a/fs/nfs/nfs4trace.c
+++ b/fs/nfs/nfs4trace.c
@@ -6,6 +6,7 @@
#include "internal.h"
#include "nfs4session.h"
#include "callback.h"
+#include "pnfs.h"
#define CREATE_TRACE_POINTS
#include "nfs4trace.h"
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 238925c1aafd..d08d0c84b778 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -1461,10 +1461,10 @@ TRACE_EVENT(pnfs_update_layout,
loff_t pos,
u64 count,
enum pnfs_iomode iomode,
- struct pnfs_layout_segment *lseg,
+ struct pnfs_layout_hdr *lo,
enum pnfs_update_layout_reason reason
),
- TP_ARGS(inode, pos, count, iomode, lseg, reason),
+ TP_ARGS(inode, pos, count, iomode, lo, reason),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, fileid)
@@ -1472,7 +1472,8 @@ TRACE_EVENT(pnfs_update_layout,
__field(loff_t, pos)
__field(u64, count)
__field(enum pnfs_iomode, iomode)
- __field(struct pnfs_layout_segment *, lseg)
+ __field(int, layoutstateid_seq)
+ __field(u32, layoutstateid_hash)
__field(enum pnfs_update_layout_reason, reason)
),
TP_fast_assign(
@@ -1482,18 +1483,28 @@ TRACE_EVENT(pnfs_update_layout,
__entry->pos = pos;
__entry->count = count;
__entry->iomode = iomode;
- __entry->lseg = lseg;
__entry->reason = reason;
+ if (lo != NULL) {
+ __entry->layoutstateid_seq =
+ be32_to_cpu(lo->plh_stateid.seqid);
+ __entry->layoutstateid_hash =
+ nfs_stateid_hash(&lo->plh_stateid);
+ } else {
+ __entry->layoutstateid_seq = 0;
+ __entry->layoutstateid_hash = 0;
+ }
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "iomode=%s pos=%llu count=%llu lseg=%p (%s)",
+ "iomode=%s pos=%llu count=%llu "
+ "layoutstateid=%d:0x%08x (%s)",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
show_pnfs_iomode(__entry->iomode),
(unsigned long long)__entry->pos,
- (unsigned long long)__entry->count, __entry->lseg,
+ (unsigned long long)__entry->count,
+ __entry->layoutstateid_seq, __entry->layoutstateid_hash,
show_pnfs_update_layout_reason(__entry->reason)
)
);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 1489065bb051..6095a8d42766 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1521,19 +1521,19 @@ pnfs_update_layout(struct inode *ino,
bool first;
if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
PNFS_UPDATE_LAYOUT_NO_PNFS);
goto out;
}
if (iomode == IOMODE_READ && i_size_read(ino) == 0) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
PNFS_UPDATE_LAYOUT_RD_ZEROLEN);
goto out;
}
if (pnfs_within_mdsthreshold(ctx, ino, iomode)) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
PNFS_UPDATE_LAYOUT_MDSTHRESH);
goto out;
}
@@ -1544,14 +1544,14 @@ lookup_again:
lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
if (lo == NULL) {
spin_unlock(&ino->i_lock);
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, NULL,
PNFS_UPDATE_LAYOUT_NOMEM);
goto out;
}
/* Do we even need to bother with this? */
if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_BULK_RECALL);
dprintk("%s matches recall, use MDS\n", __func__);
goto out_unlock;
@@ -1560,7 +1560,7 @@ lookup_again:
/* if LAYOUTGET already failed once we don't try again */
if (pnfs_layout_io_test_failed(lo, iomode) &&
!pnfs_should_retry_layoutget(lo)) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
goto out_unlock;
}
@@ -1584,7 +1584,7 @@ lookup_again:
*/
lseg = pnfs_find_lseg(lo, &arg);
if (lseg) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_FOUND_CACHED);
goto out_unlock;
}
@@ -1604,13 +1604,13 @@ lookup_again:
dprintk("%s retrying\n", __func__);
goto lookup_again;
}
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_RETURN);
goto out_put_layout_hdr;
}
if (pnfs_layoutgets_blocked(lo)) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_BLOCKED);
goto out_unlock;
}
@@ -1638,7 +1638,7 @@ lookup_again:
lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
pnfs_clear_retry_layoutget(lo);
atomic_dec(&lo->plh_outstanding);
- trace_pnfs_update_layout(ino, pos, count, iomode, lseg,
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
out_put_layout_hdr:
if (first)
--
2.5.0