2024-04-15 06:09:09

by Dongliang Cui

[permalink] [raw]
Subject: [PATCH RESEND] block: Add ioprio to block_rq tracepoint

Sometimes we need to track the processing order of requests with
ioprio set. So the ioprio of request can be useful information.

Example:

block_rq_insert: 8,0 WS 4096 () 16573296 + 8 rt,4 [highpool[1]]
block_rq_issue: 8,0 WS 4096 () 16573296 + 8 rt,4 [kworker/7:0H]
block_rq_complete: 8,0 WS () 16573296 + 8 rt,4 [0]

Signed-off-by: Dongliang Cui <[email protected]>
---
include/linux/blktrace_api.h | 2 ++
include/trace/events/block.h | 63 ++++++++++++++++++++++--------------
kernel/trace/blktrace.c | 11 +++++++
3 files changed, 51 insertions(+), 25 deletions(-)

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 122c62e561fc..adb0333efbdb 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -112,6 +112,8 @@ struct compat_blk_user_trace_setup {

void blk_fill_rwbs(char *rwbs, blk_opf_t opf);

+void blk_fill_ioprio(u32 ioprio, char *ioprio_class, u32 *ioprio_value);
+
static inline sector_t blk_rq_trace_sector(struct request *rq)
{
/*
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 0e128ad51460..1d41fade160a 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -10,7 +10,8 @@
#include <linux/buffer_head.h>
#include <linux/tracepoint.h>

-#define RWBS_LEN 8
+#define RWBS_LEN 8
+#define IOPRIO_CLASS_LEN 8

#ifdef CONFIG_BUFFER_HEAD
DECLARE_EVENT_CLASS(block_buffer,
@@ -79,11 +80,13 @@ TRACE_EVENT(block_rq_requeue,
TP_ARGS(rq),

TP_STRUCT__entry(
- __field( dev_t, dev )
- __field( sector_t, sector )
- __field( unsigned int, nr_sector )
- __array( char, rwbs, RWBS_LEN )
- __dynamic_array( char, cmd, 1 )
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __array( char, rwbs, RWBS_LEN )
+ __array( char, ioprio_class, IOPRIO_CLASS_LEN )
+ __field( unsigned int, ioprio_value )
+ __dynamic_array( char, cmd, 1 )
),

TP_fast_assign(
@@ -92,14 +95,16 @@ TRACE_EVENT(block_rq_requeue,
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);

blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
+ blk_fill_ioprio(rq->ioprio, __entry->ioprio_class, &__entry->ioprio_value);
__get_str(cmd)[0] = '\0';
),

- TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+ TP_printk("%d,%d %s (%s) %llu + %u %s,%u [%d]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __get_str(cmd),
(unsigned long long)__entry->sector,
- __entry->nr_sector, 0)
+ __entry->nr_sector, __entry->ioprio_class,
+ __entry->ioprio_value, 0)
);

DECLARE_EVENT_CLASS(block_rq_completion,
@@ -109,12 +114,14 @@ DECLARE_EVENT_CLASS(block_rq_completion,
TP_ARGS(rq, error, nr_bytes),

TP_STRUCT__entry(
- __field( dev_t, dev )
- __field( sector_t, sector )
- __field( unsigned int, nr_sector )
- __field( int , error )
- __array( char, rwbs, RWBS_LEN )
- __dynamic_array( char, cmd, 1 )
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __field( int, error )
+ __array( char, rwbs, RWBS_LEN )
+ __array( char, ioprio_class, IOPRIO_CLASS_LEN )
+ __field( unsigned int, ioprio_value )
+ __dynamic_array( char, cmd, 1 )
),

TP_fast_assign(
@@ -124,14 +131,16 @@ DECLARE_EVENT_CLASS(block_rq_completion,
__entry->error = blk_status_to_errno(error);

blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
+ blk_fill_ioprio(rq->ioprio, __entry->ioprio_class, &__entry->ioprio_value);
__get_str(cmd)[0] = '\0';
),

- TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+ TP_printk("%d,%d %s (%s) %llu + %u %s,%u [%d]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __get_str(cmd),
(unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->error)
+ __entry->nr_sector, __entry->ioprio_class,
+ __entry->ioprio_value, __entry->error)
);

/**
@@ -176,13 +185,15 @@ DECLARE_EVENT_CLASS(block_rq,
TP_ARGS(rq),

TP_STRUCT__entry(
- __field( dev_t, dev )
- __field( sector_t, sector )
- __field( unsigned int, nr_sector )
- __field( unsigned int, bytes )
- __array( char, rwbs, RWBS_LEN )
- __array( char, comm, TASK_COMM_LEN )
- __dynamic_array( char, cmd, 1 )
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __field( unsigned int, bytes )
+ __array( char, rwbs, RWBS_LEN )
+ __array( char, ioprio_class, IOPRIO_CLASS_LEN )
+ __field( unsigned int, ioprio_value )
+ __array( char, comm, TASK_COMM_LEN )
+ __dynamic_array( char, cmd, 1 )
),

TP_fast_assign(
@@ -192,15 +203,17 @@ DECLARE_EVENT_CLASS(block_rq,
__entry->bytes = blk_rq_bytes(rq);

blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
+ blk_fill_ioprio(rq->ioprio, __entry->ioprio_class, &__entry->ioprio_value);
__get_str(cmd)[0] = '\0';
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),

- TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+ TP_printk("%d,%d %s %u (%s) %llu + %u %s,%u [%s]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __entry->bytes, __get_str(cmd),
(unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->comm)
+ __entry->nr_sector, __entry->ioprio_class,
+ __entry->ioprio_value, __entry->comm)
);

/**
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d5d94510afd3..e55aa49f94db 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -19,6 +19,7 @@
#include <linux/uaccess.h>
#include <linux/list.h>
#include <linux/blk-cgroup.h>
+#include <linux/ioprio.h>

#include "../../block/blk.h"

@@ -26,6 +27,9 @@

#include "trace_output.h"

+/* Type of ioprio */
+static char *classes[] = {"none", "rt", "be", "idle"};
+
#ifdef CONFIG_BLK_DEV_IO_TRACE

static unsigned int blktrace_seq __read_mostly = 1;
@@ -1914,5 +1918,12 @@ void blk_fill_rwbs(char *rwbs, blk_opf_t opf)
}
EXPORT_SYMBOL_GPL(blk_fill_rwbs);

+void blk_fill_ioprio(u32 ioprio, char *ioprio_class, u32 *ioprio_value)
+{
+ memcpy(ioprio_class, classes[(ioprio >> IOPRIO_CLASS_SHIFT) & 0x3], IOPRIO_CLASS_LEN);
+ *ioprio_value = ioprio & 0xff;
+}
+EXPORT_SYMBOL_GPL(blk_fill_ioprio);
+
#endif /* CONFIG_EVENT_TRACING */

--
2.25.1



2024-04-15 07:16:15

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH RESEND] block: Add ioprio to block_rq tracepoint

On Mon, 15 Apr 2024 14:07:10 +0800
Dongliang Cui <[email protected]> wrote:

> Sometimes we need to track the processing order of requests with
> ioprio set. So the ioprio of request can be useful information.
>
> Example:
>
> block_rq_insert: 8,0 WS 4096 () 16573296 + 8 rt,4 [highpool[1]]
> block_rq_issue: 8,0 WS 4096 () 16573296 + 8 rt,4 [kworker/7:0H]
> block_rq_complete: 8,0 WS () 16573296 + 8 rt,4 [0]

Note, it's up to the subsystem to take trace event patches.

>
> Signed-off-by: Dongliang Cui <[email protected]>
> ---
> include/linux/blktrace_api.h | 2 ++
> include/trace/events/block.h | 63 ++++++++++++++++++++++--------------
> kernel/trace/blktrace.c | 11 +++++++
> 3 files changed, 51 insertions(+), 25 deletions(-)
>
> diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
> index 122c62e561fc..adb0333efbdb 100644
> --- a/include/linux/blktrace_api.h
> +++ b/include/linux/blktrace_api.h
> @@ -112,6 +112,8 @@ struct compat_blk_user_trace_setup {
>
> void blk_fill_rwbs(char *rwbs, blk_opf_t opf);
>
> +void blk_fill_ioprio(u32 ioprio, char *ioprio_class, u32 *ioprio_value);
> +
> static inline sector_t blk_rq_trace_sector(struct request *rq)
> {
> /*
> diff --git a/include/trace/events/block.h b/include/trace/events/block.h
> index 0e128ad51460..1d41fade160a 100644
> --- a/include/trace/events/block.h
> +++ b/include/trace/events/block.h
> @@ -10,7 +10,8 @@
> #include <linux/buffer_head.h>
> #include <linux/tracepoint.h>
>
> -#define RWBS_LEN 8
> +#define RWBS_LEN 8
> +#define IOPRIO_CLASS_LEN 8
>
> #ifdef CONFIG_BUFFER_HEAD
> DECLARE_EVENT_CLASS(block_buffer,
> @@ -79,11 +80,13 @@ TRACE_EVENT(block_rq_requeue,
> TP_ARGS(rq),
>
> TP_STRUCT__entry(
> - __field( dev_t, dev )
> - __field( sector_t, sector )
> - __field( unsigned int, nr_sector )
> - __array( char, rwbs, RWBS_LEN )
> - __dynamic_array( char, cmd, 1 )
> + __field( dev_t, dev )
> + __field( sector_t, sector )
> + __field( unsigned int, nr_sector )
> + __array( char, rwbs, RWBS_LEN )
> + __array( char, ioprio_class, IOPRIO_CLASS_LEN )
> + __field( unsigned int, ioprio_value )
> + __dynamic_array( char, cmd, 1 )
> ),
>
> TP_fast_assign(
> @@ -92,14 +95,16 @@ TRACE_EVENT(block_rq_requeue,
> __entry->nr_sector = blk_rq_trace_nr_sectors(rq);
>
> blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
> + blk_fill_ioprio(rq->ioprio, __entry->ioprio_class, &__entry->ioprio_value);
> __get_str(cmd)[0] = '\0';
> ),
>
> - TP_printk("%d,%d %s (%s) %llu + %u [%d]",
> + TP_printk("%d,%d %s (%s) %llu + %u %s,%u [%d]",
> MAJOR(__entry->dev), MINOR(__entry->dev),
> __entry->rwbs, __get_str(cmd),
> (unsigned long long)__entry->sector,
> - __entry->nr_sector, 0)
> + __entry->nr_sector, __entry->ioprio_class,
> + __entry->ioprio_value, 0)
> );


> diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
> index d5d94510afd3..e55aa49f94db 100644
> --- a/kernel/trace/blktrace.c
> +++ b/kernel/trace/blktrace.c
> @@ -19,6 +19,7 @@
> #include <linux/uaccess.h>
> #include <linux/list.h>
> #include <linux/blk-cgroup.h>
> +#include <linux/ioprio.h>
>
> #include "../../block/blk.h"
>
> @@ -26,6 +27,9 @@
>
> #include "trace_output.h"
>
> +/* Type of ioprio */
> +static char *classes[] = {"none", "rt", "be", "idle"};
> +
> #ifdef CONFIG_BLK_DEV_IO_TRACE
>
> static unsigned int blktrace_seq __read_mostly = 1;
> @@ -1914,5 +1918,12 @@ void blk_fill_rwbs(char *rwbs, blk_opf_t opf)
> }
> EXPORT_SYMBOL_GPL(blk_fill_rwbs);
>
> +void blk_fill_ioprio(u32 ioprio, char *ioprio_class, u32 *ioprio_value)
> +{
> + memcpy(ioprio_class, classes[(ioprio >> IOPRIO_CLASS_SHIFT) & 0x3], IOPRIO_CLASS_LEN);
> + *ioprio_value = ioprio & 0xff;
> +}
> +EXPORT_SYMBOL_GPL(blk_fill_ioprio);
> +
> #endif /* CONFIG_EVENT_TRACING */
>

Instead of doing:

blk_fill_ioprio(rq->ioprio, __entry->ioprio_class, &__entry->ioprio_value);


Why not do:

__field( int, ioprio_class )

[..]

__entry->ioprio_class = rq->ioprio >> IOPRIO_CLASS_SHIFT & 0x3;

[..]

TP_printk("%d,%d %s (%s) %llu + %u %s,%u [%d]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __get_str(cmd),
(unsigned long long)__entry->sector,
__entry->nr_sector,
__print_symbolic(ioclass,
{ 0, "none" }, { 1, "rt" }, { 2, "bt" }, { 3, "idle" }),
__entry->ioprio_value, 0)
);

?

Then you don't need to save a string into the buffer, and just print
the mapping on reading of the buffer.

-- Steve