2023-02-02 20:23:44

by Christophe JAILLET

[permalink] [raw]
Subject: [PATCH] RDMA/irdma: Slightly optimize irdma_form_ah_cm_frame()

There is no need to zero 'pktsize' bytes of 'buf', only the header needs
to be cleared, to be safe.
All the other bytes are already written with some memcpy() at the end of
the function.

Doing so also gives the opportunity to the compiler to avoid the memset()
call. It can be inlined now that the length is known as compile time.

Signed-off-by: Christophe JAILLET <[email protected]>
---
Just in case, here is the diff of what is generated by gcc 11.3.0 before
and after the patch.

.L736:
-# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, pktsize);
+# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, sizeof(*tcph));
call __sanitizer_cov_trace_pc #
- xorl %esi, %esi #
- movzwl %r13w, %edx # _194, __fortify_size
- movq %rbp, %rdi # buf,
- call memset #
- leaq 104(%r12), %rax #, _259
+ movl $0, 16(%rbp) #, MEM <char[1:20]> [(void *)buf_114]
+ leaq 104(%r12), %rax #, _295
+# drivers/infiniband/hw/irdma/cm.c:342: sqbuf->totallen = pktsize;
+ movzwl %r13w, %r13d # _192, _192
+# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, sizeof(*tcph));
+ movq $0, 0(%rbp) #, MEM <char[1:20]> [(void *)buf_114]
+# drivers/infiniband/hw/irdma/cm.c:342: sqbuf->totallen = pktsize;
+ movq %rax, %rdi # _295,
+# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, sizeof(*tcph));
+ movq $0, 8(%rbp) #, MEM <char[1:20]> [(void *)buf_114]
+ movq %rax, 64(%rsp) # _295, %sfp
# drivers/infiniband/hw/irdma/cm.c:342: sqbuf->totallen = pktsize;
---
drivers/infiniband/hw/irdma/cm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index 195aa9ea18b6..48c2a303e9ec 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -337,7 +337,7 @@ static struct irdma_puda_buf *irdma_form_ah_cm_frame(struct irdma_cm_node *cm_no

pktsize = sizeof(*tcph) + opts_len + hdr_len + pd_len;

- memset(buf, 0, pktsize);
+ memset(buf, 0, sizeof(*tcph));

sqbuf->totallen = pktsize;
sqbuf->tcphlen = sizeof(*tcph) + opts_len;
--
2.34.1



2023-04-12 16:04:07

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH] RDMA/irdma: Slightly optimize irdma_form_ah_cm_frame()

On Thu, Feb 02, 2023 at 09:23:24PM +0100, Christophe JAILLET wrote:
> There is no need to zero 'pktsize' bytes of 'buf', only the header needs
> to be cleared, to be safe.
> All the other bytes are already written with some memcpy() at the end of
> the function.
>
> Doing so also gives the opportunity to the compiler to avoid the memset()
> call. It can be inlined now that the length is known as compile time.
>
> Signed-off-by: Christophe JAILLET <[email protected]>
> ---
> Just in case, here is the diff of what is generated by gcc 11.3.0 before
> and after the patch.
>
> .L736:
> -# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, pktsize);
> +# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, sizeof(*tcph));
> call __sanitizer_cov_trace_pc #
> - xorl %esi, %esi #
> - movzwl %r13w, %edx # _194, __fortify_size
> - movq %rbp, %rdi # buf,
> - call memset #
> - leaq 104(%r12), %rax #, _259
> + movl $0, 16(%rbp) #, MEM <char[1:20]> [(void *)buf_114]
> + leaq 104(%r12), %rax #, _295
> +# drivers/infiniband/hw/irdma/cm.c:342: sqbuf->totallen = pktsize;
> + movzwl %r13w, %r13d # _192, _192
> +# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, sizeof(*tcph));
> + movq $0, 0(%rbp) #, MEM <char[1:20]> [(void *)buf_114]
> +# drivers/infiniband/hw/irdma/cm.c:342: sqbuf->totallen = pktsize;
> + movq %rax, %rdi # _295,
> +# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, sizeof(*tcph));
> + movq $0, 8(%rbp) #, MEM <char[1:20]> [(void *)buf_114]
> + movq %rax, 64(%rsp) # _295, %sfp
> # drivers/infiniband/hw/irdma/cm.c:342: sqbuf->totallen = pktsize;
> ---
> drivers/infiniband/hw/irdma/cm.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)

Shiraz??

Jason

2023-04-12 17:12:55

by Shiraz Saleem

[permalink] [raw]
Subject: RE: [PATCH] RDMA/irdma: Slightly optimize irdma_form_ah_cm_frame()

> Subject: Re: [PATCH] RDMA/irdma: Slightly optimize
> irdma_form_ah_cm_frame()
>
> On Thu, Feb 02, 2023 at 09:23:24PM +0100, Christophe JAILLET wrote:
> > There is no need to zero 'pktsize' bytes of 'buf', only the header
> > needs to be cleared, to be safe.
> > All the other bytes are already written with some memcpy() at the end
> > of the function.
> >
> > Doing so also gives the opportunity to the compiler to avoid the
> > memset() call. It can be inlined now that the length is known as compile time.
> >
> > Signed-off-by: Christophe JAILLET <[email protected]>
> > ---
> > Just in case, here is the diff of what is generated by gcc 11.3.0
> > before and after the patch.
> >
> > .L736:
> > -# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, pktsize);
> > +# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, sizeof(*tcph));
> > call __sanitizer_cov_trace_pc #
> > - xorl %esi, %esi #
> > - movzwl %r13w, %edx # _194, __fortify_size
> > - movq %rbp, %rdi # buf,
> > - call memset #
> > - leaq 104(%r12), %rax #, _259
> > + movl $0, 16(%rbp) #, MEM <char[1:20]> [(void *)buf_114]
> > + leaq 104(%r12), %rax #, _295
> > +# drivers/infiniband/hw/irdma/cm.c:342: sqbuf->totallen = pktsize;
> > + movzwl %r13w, %r13d # _192, _192
> > +# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, sizeof(*tcph));
> > + movq $0, 0(%rbp) #, MEM <char[1:20]> [(void *)buf_114]
> > +# drivers/infiniband/hw/irdma/cm.c:342: sqbuf->totallen = pktsize;
> > + movq %rax, %rdi # _295,
> > +# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, sizeof(*tcph));
> > + movq $0, 8(%rbp) #, MEM <char[1:20]> [(void *)buf_114]
> > + movq %rax, 64(%rsp) # _295, %sfp
> > # drivers/infiniband/hw/irdma/cm.c:342: sqbuf->totallen = pktsize;
> > ---
> > drivers/infiniband/hw/irdma/cm.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
>
> Shiraz??
>

Sorry for the delay as I didn't see this earlier.

Reviewed-by: Shiraz Saleem <[email protected]>


2023-04-12 23:08:58

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH] RDMA/irdma: Slightly optimize irdma_form_ah_cm_frame()

On Thu, Feb 02, 2023 at 09:23:24PM +0100, Christophe JAILLET wrote:
> There is no need to zero 'pktsize' bytes of 'buf', only the header needs
> to be cleared, to be safe.
> All the other bytes are already written with some memcpy() at the end of
> the function.
>
> Doing so also gives the opportunity to the compiler to avoid the memset()
> call. It can be inlined now that the length is known as compile time.
>
> Signed-off-by: Christophe JAILLET <[email protected]>
> Reviewed-by: Shiraz Saleem <[email protected]>
> ---
> Just in case, here is the diff of what is generated by gcc 11.3.0 before
> and after the patch.
>
> .L736:
> -# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, pktsize);
> +# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, sizeof(*tcph));
> call __sanitizer_cov_trace_pc #
> - xorl %esi, %esi #
> - movzwl %r13w, %edx # _194, __fortify_size
> - movq %rbp, %rdi # buf,
> - call memset #
> - leaq 104(%r12), %rax #, _259
> + movl $0, 16(%rbp) #, MEM <char[1:20]> [(void *)buf_114]
> + leaq 104(%r12), %rax #, _295
> +# drivers/infiniband/hw/irdma/cm.c:342: sqbuf->totallen = pktsize;
> + movzwl %r13w, %r13d # _192, _192
> +# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, sizeof(*tcph));
> + movq $0, 0(%rbp) #, MEM <char[1:20]> [(void *)buf_114]
> +# drivers/infiniband/hw/irdma/cm.c:342: sqbuf->totallen = pktsize;
> + movq %rax, %rdi # _295,
> +# drivers/infiniband/hw/irdma/cm.c:340: memset(buf, 0, sizeof(*tcph));
> + movq $0, 8(%rbp) #, MEM <char[1:20]> [(void *)buf_114]
> + movq %rax, 64(%rsp) # _295, %sfp
> # drivers/infiniband/hw/irdma/cm.c:342: sqbuf->totallen = pktsize;
> ---
> drivers/infiniband/hw/irdma/cm.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)

Applied to for-next

Thanks,
Jason