Commit 83a712e0afef ("sunrpc: add some tracepoints around enqueue and
dequeue of svc_xprt") merged in v3.19-rc1 added some new trace events,
however a couple of them printed data from dereferenced pointers rather
than storing the data in the struct. In general this isn't safe as the
print may not happen until later when the data may have changed or been
freed, and nor is it portable as userland won't have access to that
other data in order to interpret the trace data itself.
Fix by copying the data into the struct and printing from there.
Fixes: 83a712e0afef ("sunrpc: add some tracepoints around enqueue ...")
Signed-off-by: James Hogan <[email protected]>
Cc: Jeff Layton <[email protected]>
Cc: J. Bruce Fields <[email protected]>
Cc: Trond Myklebust <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: <[email protected]> # v3.19+
---
Build tested only. Perhaps somebody familiar with the code could give it
a spin to sanity check the trace output.
---
include/trace/events/sunrpc.h | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index b9c1dc6c825a..47dfcaebfaaf 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -503,18 +503,22 @@ TRACE_EVENT(svc_xprt_do_enqueue,
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
- __field(struct svc_rqst *, rqst)
+ __field_struct(struct sockaddr_storage, ss)
+ __field(unsigned long, flags);
+ __field(int, pid)
),
TP_fast_assign(
__entry->xprt = xprt;
- __entry->rqst = rqst;
+ xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
+ __entry->flags = xprt ? xprt->xpt_flags : 0;
+ __entry->pid = rqst ? rqst->rq_task->pid : 0;
),
TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
- (struct sockaddr *)&__entry->xprt->xpt_remote,
- __entry->rqst ? __entry->rqst->rq_task->pid : 0,
- show_svc_xprt_flags(__entry->xprt->xpt_flags))
+ (struct sockaddr *)&__entry->ss,
+ __entry->pid,
+ show_svc_xprt_flags(__entry->flags))
);
TRACE_EVENT(svc_xprt_dequeue,
@@ -562,17 +566,21 @@ TRACE_EVENT(svc_handle_xprt,
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
+ __field_struct(struct sockaddr_storage, ss)
+ __field(unsigned long, flags);
__field(int, len)
),
TP_fast_assign(
__entry->xprt = xprt;
+ xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
+ __entry->flags = xprt ? xprt->xpt_flags : 0;
__entry->len = len;
),
TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
- (struct sockaddr *)&__entry->xprt->xpt_remote, __entry->len,
- show_svc_xprt_flags(__entry->xprt->xpt_flags))
+ (struct sockaddr *)&__entry->ss, __entry->len,
+ show_svc_xprt_flags(__entry->flags))
);
#endif /* _TRACE_SUNRPC_H */
--
2.0.5
On Tue, Feb 24, 2015 at 6:47 AM, James Hogan <[email protected]> wrote:
> Commit 83a712e0afef ("sunrpc: add some tracepoints around enqueue and
> dequeue of svc_xprt") merged in v3.19-rc1 added some new trace events,
> however a couple of them printed data from dereferenced pointers rather
> than storing the data in the struct. In general this isn't safe as the
> print may not happen until later when the data may have changed or been
> freed, and nor is it portable as userland won't have access to that
> other data in order to interpret the trace data itself.
>
> Fix by copying the data into the struct and printing from there.
>
> Fixes: 83a712e0afef ("sunrpc: add some tracepoints around enqueue ...")
> Signed-off-by: James Hogan <[email protected]>
> Cc: Jeff Layton <[email protected]>
> Cc: J. Bruce Fields <[email protected]>
> Cc: Trond Myklebust <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: <[email protected]> # v3.19+
> ---
> Build tested only. Perhaps somebody familiar with the code could give it
> a spin to sanity check the trace output.
> ---
> include/trace/events/sunrpc.h | 22 +++++++++++++++-------
> 1 file changed, 15 insertions(+), 7 deletions(-)
>
> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
> index b9c1dc6c825a..47dfcaebfaaf 100644
> --- a/include/trace/events/sunrpc.h
> +++ b/include/trace/events/sunrpc.h
> @@ -503,18 +503,22 @@ TRACE_EVENT(svc_xprt_do_enqueue,
>
> TP_STRUCT__entry(
> __field(struct svc_xprt *, xprt)
> - __field(struct svc_rqst *, rqst)
> + __field_struct(struct sockaddr_storage, ss)
> + __field(unsigned long, flags);
> + __field(int, pid)
> ),
>
> TP_fast_assign(
> __entry->xprt = xprt;
> - __entry->rqst = rqst;
> + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
How could xprt ever be NULL here, and even if it was, why the esoteric
C instead of a simple 'if' statement?
> + __entry->flags = xprt ? xprt->xpt_flags : 0;
> + __entry->pid = rqst ? rqst->rq_task->pid : 0;
> ),
>
> TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
> - (struct sockaddr *)&__entry->xprt->xpt_remote,
> - __entry->rqst ? __entry->rqst->rq_task->pid : 0,
> - show_svc_xprt_flags(__entry->xprt->xpt_flags))
> + (struct sockaddr *)&__entry->ss,
> + __entry->pid,
> + show_svc_xprt_flags(__entry->flags))
> );
>
> TRACE_EVENT(svc_xprt_dequeue,
> @@ -562,17 +566,21 @@ TRACE_EVENT(svc_handle_xprt,
>
> TP_STRUCT__entry(
> __field(struct svc_xprt *, xprt)
> + __field_struct(struct sockaddr_storage, ss)
> + __field(unsigned long, flags);
> __field(int, len)
> ),
>
> TP_fast_assign(
> __entry->xprt = xprt;
> + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
Ditto.
> + __entry->flags = xprt ? xprt->xpt_flags : 0;
> __entry->len = len;
> ),
>
> TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
> - (struct sockaddr *)&__entry->xprt->xpt_remote, __entry->len,
> - show_svc_xprt_flags(__entry->xprt->xpt_flags))
> + (struct sockaddr *)&__entry->ss, __entry->len,
> + show_svc_xprt_flags(__entry->flags))
> );
> #endif /* _TRACE_SUNRPC_H */
>
> --
> 2.0.5
>
--
Trond Myklebust
Linux NFS client maintainer, PrimaryData
[email protected]
Hi Trond,
On 24/02/15 13:36, Trond Myklebust wrote:
> On Tue, Feb 24, 2015 at 6:47 AM, James Hogan <[email protected]> wrote:
>> Commit 83a712e0afef ("sunrpc: add some tracepoints around enqueue and
>> dequeue of svc_xprt") merged in v3.19-rc1 added some new trace events,
>> however a couple of them printed data from dereferenced pointers rather
>> than storing the data in the struct. In general this isn't safe as the
>> print may not happen until later when the data may have changed or been
>> freed, and nor is it portable as userland won't have access to that
>> other data in order to interpret the trace data itself.
>>
>> Fix by copying the data into the struct and printing from there.
>>
>> Fixes: 83a712e0afef ("sunrpc: add some tracepoints around enqueue ...")
>> Signed-off-by: James Hogan <[email protected]>
>> Cc: Jeff Layton <[email protected]>
>> Cc: J. Bruce Fields <[email protected]>
>> Cc: Trond Myklebust <[email protected]>
>> Cc: Steven Rostedt <[email protected]>
>> Cc: Ingo Molnar <[email protected]>
>> Cc: <[email protected]> # v3.19+
>> ---
>> Build tested only. Perhaps somebody familiar with the code could give it
>> a spin to sanity check the trace output.
>> ---
>> include/trace/events/sunrpc.h | 22 +++++++++++++++-------
>> 1 file changed, 15 insertions(+), 7 deletions(-)
>>
>> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
>> index b9c1dc6c825a..47dfcaebfaaf 100644
>> --- a/include/trace/events/sunrpc.h
>> +++ b/include/trace/events/sunrpc.h
>> @@ -503,18 +503,22 @@ TRACE_EVENT(svc_xprt_do_enqueue,
>>
>> TP_STRUCT__entry(
>> __field(struct svc_xprt *, xprt)
>> - __field(struct svc_rqst *, rqst)
>> + __field_struct(struct sockaddr_storage, ss)
>> + __field(unsigned long, flags);
>> + __field(int, pid)
>> ),
>>
>> TP_fast_assign(
>> __entry->xprt = xprt;
>> - __entry->rqst = rqst;
>> + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
>
> How could xprt ever be NULL here, and even if it was, why the esoteric
> C instead of a simple 'if' statement?
Yeh, I had a straight forward unconditional assignment before, but I
changed it purely for consistency with the svc_xprt_dequeue trace event.
I don't pretend to understand the details of what is being traced
though, so I'm happy to change it if required.
Thanks
James
>
>> + __entry->flags = xprt ? xprt->xpt_flags : 0;
>> + __entry->pid = rqst ? rqst->rq_task->pid : 0;
>> ),
>>
>> TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
>> - (struct sockaddr *)&__entry->xprt->xpt_remote,
>> - __entry->rqst ? __entry->rqst->rq_task->pid : 0,
>> - show_svc_xprt_flags(__entry->xprt->xpt_flags))
>> + (struct sockaddr *)&__entry->ss,
>> + __entry->pid,
>> + show_svc_xprt_flags(__entry->flags))
>> );
>>
>> TRACE_EVENT(svc_xprt_dequeue,
>> @@ -562,17 +566,21 @@ TRACE_EVENT(svc_handle_xprt,
>>
>> TP_STRUCT__entry(
>> __field(struct svc_xprt *, xprt)
>> + __field_struct(struct sockaddr_storage, ss)
>> + __field(unsigned long, flags);
>> __field(int, len)
>> ),
>>
>> TP_fast_assign(
>> __entry->xprt = xprt;
>> + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
>
> Ditto.
>
>> + __entry->flags = xprt ? xprt->xpt_flags : 0;
>> __entry->len = len;
>> ),
>>
>> TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
>> - (struct sockaddr *)&__entry->xprt->xpt_remote, __entry->len,
>> - show_svc_xprt_flags(__entry->xprt->xpt_flags))
>> + (struct sockaddr *)&__entry->ss, __entry->len,
>> + show_svc_xprt_flags(__entry->flags))
>> );
>> #endif /* _TRACE_SUNRPC_H */
>>
>> --
>> 2.0.5
>>
>
>
>
On Tue, 24 Feb 2015 11:47:56 +0000
James Hogan <[email protected]> wrote:
> TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
> - (struct sockaddr *)&__entry->xprt->xpt_remote,
There's actually nothing wrong with the above even if xprt is NULL.
It's not dereferencing the structure, it is just getting the address of
what would be dereference.
> - __entry->rqst ? __entry->rqst->rq_task->pid : 0,
> - show_svc_xprt_flags(__entry->xprt->xpt_flags))
> + (struct sockaddr *)&__entry->ss,
The above is meaningless. You just printed the address of the ring
buffer and this will be different (and useless) every time.
> + __entry->pid,
> + show_svc_xprt_flags(__entry->flags))
> );
>
> TRACE_EVENT(svc_xprt_dequeue,
> @@ -562,17 +566,21 @@ TRACE_EVENT(svc_handle_xprt,
>
> TP_STRUCT__entry(
> __field(struct svc_xprt *, xprt)
> + __field_struct(struct sockaddr_storage, ss)
> + __field(unsigned long, flags);
> __field(int, len)
> ),
>
> TP_fast_assign(
> __entry->xprt = xprt;
> + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
> + __entry->flags = xprt ? xprt->xpt_flags : 0;
> __entry->len = len;
> ),
>
> TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
> - (struct sockaddr *)&__entry->xprt->xpt_remote, __entry->len,
> - show_svc_xprt_flags(__entry->xprt->xpt_flags))
> + (struct sockaddr *)&__entry->ss, __entry->len,
Ditto.
Don't use field_struct() unless you really know what you are doing.
This is copying the entire struct into the ring buffer and only using
the address of that struct. Which not only is useless, but wastes a lot
of space in the ring buffer.
-- Steve
> + show_svc_xprt_flags(__entry->flags))
> );
> #endif /* _TRACE_SUNRPC_H */
>
Hi Steven,
On 24/02/15 14:09, Steven Rostedt wrote:
> On Tue, 24 Feb 2015 11:47:56 +0000
> James Hogan <[email protected]> wrote:
>
>
>
>> TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
>> - (struct sockaddr *)&__entry->xprt->xpt_remote,
>
> There's actually nothing wrong with the above even if xprt is NULL.
> It's not dereferencing the structure, it is just getting the address of
> what would be dereference.
I think that corresponds to the %pIScp format which I presumed does
dereference the pointer?
Looking at Documentation/printk-formats.txt I see:
> IPv4/IPv6 addresses (generic, with port, flowinfo, scope):
> ...
> %pISpc 1.2.3.4:12345 or [1:2:3:4:5:6:7:8]:12345
Same applies below. Should these formats still be avoided?
Thanks for reviewing,
Cheers
James
>
>> - __entry->rqst ? __entry->rqst->rq_task->pid : 0,
>> - show_svc_xprt_flags(__entry->xprt->xpt_flags))
>> + (struct sockaddr *)&__entry->ss,
>
> The above is meaningless. You just printed the address of the ring
> buffer and this will be different (and useless) every time.
>
>> + __entry->pid,
>> + show_svc_xprt_flags(__entry->flags))
>> );
>>
>> TRACE_EVENT(svc_xprt_dequeue,
>> @@ -562,17 +566,21 @@ TRACE_EVENT(svc_handle_xprt,
>>
>> TP_STRUCT__entry(
>> __field(struct svc_xprt *, xprt)
>> + __field_struct(struct sockaddr_storage, ss)
>> + __field(unsigned long, flags);
>> __field(int, len)
>> ),
>>
>> TP_fast_assign(
>> __entry->xprt = xprt;
>> + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
>> + __entry->flags = xprt ? xprt->xpt_flags : 0;
>> __entry->len = len;
>> ),
>>
>> TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
>> - (struct sockaddr *)&__entry->xprt->xpt_remote, __entry->len,
>> - show_svc_xprt_flags(__entry->xprt->xpt_flags))
>> + (struct sockaddr *)&__entry->ss, __entry->len,
>
> Ditto.
>
> Don't use field_struct() unless you really know what you are doing.
> This is copying the entire struct into the ring buffer and only using
> the address of that struct. Which not only is useless, but wastes a lot
> of space in the ring buffer.
>
> -- Steve
>
>> + show_svc_xprt_flags(__entry->flags))
>> );
>> #endif /* _TRACE_SUNRPC_H */
>>
>
On Tue, 24 Feb 2015 14:19:07 +0000
James Hogan <[email protected]> wrote:
> Hi Steven,
>
> On 24/02/15 14:09, Steven Rostedt wrote:
> > On Tue, 24 Feb 2015 11:47:56 +0000
> > James Hogan <[email protected]> wrote:
> >
> >
> >
> >> TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
> >> - (struct sockaddr *)&__entry->xprt->xpt_remote,
> >
> > There's actually nothing wrong with the above even if xprt is NULL.
> > It's not dereferencing the structure, it is just getting the address of
> > what would be dereference.
>
> I think that corresponds to the %pIScp format which I presumed does
> dereference the pointer?
Ah, I missed the "__entry->xprt" part :-p
>
> Looking at Documentation/printk-formats.txt I see:
>
> > IPv4/IPv6 addresses (generic, with port, flowinfo, scope):
> > ...
> > %pISpc 1.2.3.4:12345 or [1:2:3:4:5:6:7:8]:12345
>
> Same applies below. Should these formats still be avoided?
No, we can still use them.
I assume that the %pISpc expects a "struct sockaddr" passed to it as
that is what is typecast in the print. We might as well make the ss into
that structure instead of a struct sockaddr_storage, as it looks like
the storage one is much larger, and we only care about the sockaddr
part. Let's not waste the ring buffer if we don't need to.
-- Steve
On Tue, 24 Feb 2015 08:36:48 -0500
Trond Myklebust <[email protected]> wrote:
> > diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
> > index b9c1dc6c825a..47dfcaebfaaf 100644
> > --- a/include/trace/events/sunrpc.h
> > +++ b/include/trace/events/sunrpc.h
> > @@ -503,18 +503,22 @@ TRACE_EVENT(svc_xprt_do_enqueue,
> >
> > TP_STRUCT__entry(
> > __field(struct svc_xprt *, xprt)
> > - __field(struct svc_rqst *, rqst)
> > + __field_struct(struct sockaddr_storage, ss)
> > + __field(unsigned long, flags);
> > + __field(int, pid)
> > ),
> >
> > TP_fast_assign(
> > __entry->xprt = xprt;
> > - __entry->rqst = rqst;
> > + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
>
> How could xprt ever be NULL here, and even if it was, why the esoteric
> C instead of a simple 'if' statement?
If it can never be NULL here, then we do not need the conditional.
-- Steve
On 2/24/15 7:48 AM, Steven Rostedt wrote:
> I assume that the %pISpc expects a "struct sockaddr" passed to it as
> that is what is typecast in the print. We might as well make the ss into
> that structure instead of a struct sockaddr_storage, as it looks like
> the storage one is much larger, and we only care about the sockaddr
> part. Let's not waste the ring buffer if we don't need to.
Per lib/vsprintf.c, it expects either a sockaddr_in or sockaddr_in6:
case 'S': {
const union {
struct sockaddr raw;
struct sockaddr_in v4;
struct sockaddr_in6 v6;
} *sa = ptr;
sockaddr_in6 > sockaddr so ss should be declared accordingly.
David
On Tue, 24 Feb 2015 09:03:09 -0700
David Ahern <[email protected]> wrote:
> On 2/24/15 7:48 AM, Steven Rostedt wrote:
> > I assume that the %pISpc expects a "struct sockaddr" passed to it as
> > that is what is typecast in the print. We might as well make the ss into
> > that structure instead of a struct sockaddr_storage, as it looks like
> > the storage one is much larger, and we only care about the sockaddr
> > part. Let's not waste the ring buffer if we don't need to.
>
> Per lib/vsprintf.c, it expects either a sockaddr_in or sockaddr_in6:
>
> case 'S': {
> const union {
> struct sockaddr raw;
> struct sockaddr_in v4;
> struct sockaddr_in6 v6;
> } *sa = ptr;
>
> sockaddr_in6 > sockaddr so ss should be declared accordingly.
Agreed. Thanks for pointing that out.
-- Steve