2024-05-22 09:02:11

by Dongliang Cui

[permalink] [raw]
Subject: [PATCH v3] block: Add ioprio to block_rq tracepoint

Sometimes we need to track the processing order of requests with
ioprio set. So the ioprio of request can be useful information.

Example:

block_rq_insert: 8,0 WS 4096 () 16573296 + 8 rt,4 [highpool[1]]
block_rq_issue: 8,0 WS 4096 () 16573296 + 8 rt,4 [kworker/7:0H]
block_rq_complete: 8,0 WS () 16573296 + 8 rt,4 [0]

Signed-off-by: Dongliang Cui <[email protected]>
---
Changes in v3:
- Change the location of the priority macro definition.
---
---
include/trace/events/block.h | 43 +++++++++++++++++++++++++++---------
1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 0e128ad51460..4563c852ad65 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -9,9 +9,17 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/tracepoint.h>
+#include <uapi/linux/ioprio.h>

#define RWBS_LEN 8

+#define IOPRIO_CLASS_STRINGS \
+ { IOPRIO_CLASS_NONE, "none" }, \
+ { IOPRIO_CLASS_RT, "rt" }, \
+ { IOPRIO_CLASS_BE, "be" }, \
+ { IOPRIO_CLASS_IDLE, "idle" }, \
+ { IOPRIO_CLASS_INVALID, "invalid"}
+
#ifdef CONFIG_BUFFER_HEAD
DECLARE_EVENT_CLASS(block_buffer,

@@ -82,6 +90,8 @@ TRACE_EVENT(block_rq_requeue,
__field( dev_t, dev )
__field( sector_t, sector )
__field( unsigned int, nr_sector )
+ __field( unsigned int, ioprio_class )
+ __field( unsigned int, ioprio_value )
__array( char, rwbs, RWBS_LEN )
__dynamic_array( char, cmd, 1 )
),
@@ -90,16 +100,19 @@ TRACE_EVENT(block_rq_requeue,
__entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0;
__entry->sector = blk_rq_trace_sector(rq);
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
+ __entry->ioprio_class = rq->ioprio >> IOPRIO_CLASS_SHIFT & 0x3;
+ __entry->ioprio_value = rq->ioprio & 0xff;

blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
),

- TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+ TP_printk("%d,%d %s (%s) %llu + %u %s,%u [%d]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __get_str(cmd),
- (unsigned long long)__entry->sector,
- __entry->nr_sector, 0)
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __print_symbolic(__entry->ioprio_class, IOPRIO_CLASS_STRINGS),
+ __entry->ioprio_value, 0)
);

DECLARE_EVENT_CLASS(block_rq_completion,
@@ -113,6 +126,8 @@ DECLARE_EVENT_CLASS(block_rq_completion,
__field( sector_t, sector )
__field( unsigned int, nr_sector )
__field( int , error )
+ __field( unsigned int, ioprio_class )
+ __field( unsigned int, ioprio_value )
__array( char, rwbs, RWBS_LEN )
__dynamic_array( char, cmd, 1 )
),
@@ -122,16 +137,19 @@ DECLARE_EVENT_CLASS(block_rq_completion,
__entry->sector = blk_rq_pos(rq);
__entry->nr_sector = nr_bytes >> 9;
__entry->error = blk_status_to_errno(error);
+ __entry->ioprio_class = rq->ioprio >> IOPRIO_CLASS_SHIFT & 0x3;
+ __entry->ioprio_value = rq->ioprio & 0xff;

blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
),

- TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+ TP_printk("%d,%d %s (%s) %llu + %u %s,%u [%d]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __get_str(cmd),
- (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->error)
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __print_symbolic(__entry->ioprio_class, IOPRIO_CLASS_STRINGS),
+ __entry->ioprio_value, __entry->error)
);

/**
@@ -180,8 +198,10 @@ DECLARE_EVENT_CLASS(block_rq,
__field( sector_t, sector )
__field( unsigned int, nr_sector )
__field( unsigned int, bytes )
+ __field( unsigned int, ioprio_class )
+ __field( unsigned int, ioprio_value )
__array( char, rwbs, RWBS_LEN )
- __array( char, comm, TASK_COMM_LEN )
+ __array( char, comm, TASK_COMM_LEN )
__dynamic_array( char, cmd, 1 )
),

@@ -190,17 +210,20 @@ DECLARE_EVENT_CLASS(block_rq,
__entry->sector = blk_rq_trace_sector(rq);
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
__entry->bytes = blk_rq_bytes(rq);
+ __entry->ioprio_class = rq->ioprio >> IOPRIO_CLASS_SHIFT & 0x3;
+ __entry->ioprio_value = rq->ioprio & 0xff;

blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),

- TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+ TP_printk("%d,%d %s %u (%s) %llu + %u %s,%u [%s]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __entry->bytes, __get_str(cmd),
- (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->comm)
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __print_symbolic(__entry->ioprio_class, IOPRIO_CLASS_STRINGS),
+ __entry->ioprio_value, __entry->comm)
);

/**
--
2.25.1



2024-05-22 16:51:56

by Bart Van Assche

[permalink] [raw]
Subject: Re: [PATCH v3] block: Add ioprio to block_rq tracepoint

On 5/22/24 02:01, Dongliang Cui wrote:
> +#define IOPRIO_CLASS_STRINGS \
> + { IOPRIO_CLASS_NONE, "none" }, \
> + { IOPRIO_CLASS_RT, "rt" }, \
> + { IOPRIO_CLASS_BE, "be" }, \
> + { IOPRIO_CLASS_IDLE, "idle" }, \
> + { IOPRIO_CLASS_INVALID, "invalid"}
> +
> #ifdef CONFIG_BUFFER_HEAD
> DECLARE_EVENT_CLASS(block_buffer,
>
> @@ -82,6 +90,8 @@ TRACE_EVENT(block_rq_requeue,
> __field( dev_t, dev )
> __field( sector_t, sector )
> __field( unsigned int, nr_sector )
> + __field( unsigned int, ioprio_class )
> + __field( unsigned int, ioprio_value )
> __array( char, rwbs, RWBS_LEN )
> __dynamic_array( char, cmd, 1 )
> ),
> @@ -90,16 +100,19 @@ TRACE_EVENT(block_rq_requeue,
> __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0;
> __entry->sector = blk_rq_trace_sector(rq);
> __entry->nr_sector = blk_rq_trace_nr_sectors(rq);
> + __entry->ioprio_class = rq->ioprio >> IOPRIO_CLASS_SHIFT & 0x3;
> + __entry->ioprio_value = rq->ioprio & 0xff;

Why to split the I/O priority field when storing it in __entry instead of when
the values are printed? Combined the ioprio bitfields occupy 16 bits. The above
patch reserves 64 bits in __entry. I think that's overkill. Additionally, some
bits of the I/O priority bits are discarded by the above code before I/O
priority information is reported.

Please split the I/O priority information into the three fields defined in
include/uapi/linux/ioprio.h (class, hint, prio) and use the macros from that
header file for splitting I/O priority information.

Thanks,

Bart.