2021-08-07 00:19:38

by Kees Cook

[permalink] [raw]
Subject: [PATCH] net/mlx5e: Avoid field-overflowing memcpy()

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring fields.

Use flexible arrays instead of zero-element arrays (which look like they
are always overflowing) and split the cross-field memcpy() into two halves
that can be appropriately bounds-checked by the compiler.

We were doing:

#define ETH_HLEN 14
#define VLAN_HLEN 4
...
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
...
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
...
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
struct mlx5_wqe_data_seg *dseg = wqe->data;
...
memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);

target is wqe->eth.inline_hdr.start (which the compiler sees as being
2 bytes in size), but copying 18, intending to write across start
(really vlan_tci, 2 bytes). The remaining 16 bytes get written into
wqe->data[0], covering byte_count (4 bytes), lkey (4 bytes), and addr
(8 bytes).

struct mlx5e_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl; /* 0 16 */
struct mlx5_wqe_eth_seg eth; /* 16 16 */
struct mlx5_wqe_data_seg data[]; /* 32 0 */

/* size: 32, cachelines: 1, members: 3 */
/* last cacheline: 32 bytes */
};

struct mlx5_wqe_eth_seg {
u8 swp_outer_l4_offset; /* 0 1 */
u8 swp_outer_l3_offset; /* 1 1 */
u8 swp_inner_l4_offset; /* 2 1 */
u8 swp_inner_l3_offset; /* 3 1 */
u8 cs_flags; /* 4 1 */
u8 swp_flags; /* 5 1 */
__be16 mss; /* 6 2 */
__be32 flow_table_metadata; /* 8 4 */
union {
struct {
__be16 sz; /* 12 2 */
u8 start[2]; /* 14 2 */
} inline_hdr; /* 12 4 */
struct {
__be16 type; /* 12 2 */
__be16 vlan_tci; /* 14 2 */
} insert; /* 12 4 */
__be32 trailer; /* 12 4 */
}; /* 12 4 */

/* size: 16, cachelines: 1, members: 9 */
/* last cacheline: 16 bytes */
};

struct mlx5_wqe_data_seg {
__be32 byte_count; /* 0 4 */
__be32 lkey; /* 4 4 */
__be64 addr; /* 8 8 */

/* size: 16, cachelines: 1, members: 3 */
/* last cacheline: 16 bytes */
};

So, split the memcpy() so the compiler can reason about the buffer
sizes.

"pahole" shows no size nor member offset changes to struct mlx5e_tx_wqe
nor struct mlx5e_umr_wqe. "objdump -d" shows no meaningful object
code changes (i.e. only source line number induced differences and
optimizations).

Cc: Saeed Mahameed <[email protected]>
Cc: Leon Romanovsky <[email protected]>
Cc: "David S. Miller" <[email protected]>
Cc: Jakub Kicinski <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Kees Cook <[email protected]>
---
drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 ++--
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 +++-
2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 35668986878a..40af561c98d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -201,7 +201,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
struct mlx5e_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_eth_seg eth;
- struct mlx5_wqe_data_seg data[0];
+ struct mlx5_wqe_data_seg data[];
};

struct mlx5e_rx_wqe_ll {
@@ -217,7 +217,7 @@ struct mlx5e_umr_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_umr_ctrl_seg uctrl;
struct mlx5_mkey_seg mkc;
- struct mlx5_mtt inline_mtts[0];
+ struct mlx5_mtt inline_mtts[];
};

extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 2f0df5cc1a2d..efae2444c26f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -341,8 +341,10 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,

/* copy the inline part if required */
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
- memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
+ memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
+ memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
+ MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
dma_len -= MLX5E_XDP_MIN_INLINE;
dma_addr += MLX5E_XDP_MIN_INLINE;
dseg++;
--
2.30.2


2021-08-07 00:21:47

by Saeed Mahameed

[permalink] [raw]
Subject: Re: [PATCH] net/mlx5e: Avoid field-overflowing memcpy()

On Fri, 2021-08-06 at 14:50 -0700, Kees Cook wrote:
> In preparation for FORTIFY_SOURCE performing compile-time and run-
> time
> field bounds checking for memcpy(), memmove(), and memset(), avoid
> intentionally writing across neighboring fields.
>
> Use flexible arrays instead of zero-element arrays (which look like
> they
> are always overflowing) and split the cross-field memcpy() into two
> halves
> that can be appropriately bounds-checked by the compiler.
>
> We were doing:
>
>         #define ETH_HLEN  14
>         #define VLAN_HLEN  4
>         ...
>         #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
>         ...
>         struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
>         ...
>         struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
>         struct mlx5_wqe_data_seg *dseg = wqe->data;
>         ...
>         memcpy(eseg->inline_hdr.start, xdptxd->data,
> MLX5E_XDP_MIN_INLINE);
>
> target is wqe->eth.inline_hdr.start (which the compiler sees as being
> 2 bytes in size), but copying 18, intending to write across start
> (really vlan_tci, 2 bytes). The remaining 16 bytes get written into
> wqe->data[0], covering byte_count (4 bytes), lkey (4 bytes), and addr
> (8 bytes).
>
> struct mlx5e_tx_wqe {
>         struct mlx5_wqe_ctrl_seg   ctrl;                 /*     0   
> 16 */
>         struct mlx5_wqe_eth_seg    eth;                  /*    16   
> 16 */
>         struct mlx5_wqe_data_seg   data[];               /*    32    
> 0 */
>
>         /* size: 32, cachelines: 1, members: 3 */
>         /* last cacheline: 32 bytes */
> };
>
> struct mlx5_wqe_eth_seg {
>         u8                         swp_outer_l4_offset;  /*     0    
> 1 */
>         u8                         swp_outer_l3_offset;  /*     1    
> 1 */
>         u8                         swp_inner_l4_offset;  /*     2    
> 1 */
>         u8                         swp_inner_l3_offset;  /*     3    
> 1 */
>         u8                         cs_flags;             /*     4    
> 1 */
>         u8                         swp_flags;            /*     5    
> 1 */
>         __be16                     mss;                  /*     6    
> 2 */
>         __be32                     flow_table_metadata;  /*     8    
> 4 */
>         union {
>                 struct {
>                         __be16     sz;                   /*    12    
> 2 */
>                         u8         start[2];             /*    14    
> 2 */
>                 } inline_hdr;                            /*    12    
> 4 */
>                 struct {
>                         __be16     type;                 /*    12    
> 2 */
>                         __be16     vlan_tci;             /*    14    
> 2 */
>                 } insert;                                /*    12    
> 4 */
>                 __be32             trailer;              /*    12    
> 4 */
>         };                                               /*    12    
> 4 */
>
>         /* size: 16, cachelines: 1, members: 9 */
>         /* last cacheline: 16 bytes */
> };
>
> struct mlx5_wqe_data_seg {
>         __be32                     byte_count;           /*     0    
> 4 */
>         __be32                     lkey;                 /*     4    
> 4 */
>         __be64                     addr;                 /*     8    
> 8 */
>
>         /* size: 16, cachelines: 1, members: 3 */
>         /* last cacheline: 16 bytes */
> };
>
> So, split the memcpy() so the compiler can reason about the buffer
> sizes.
>
> "pahole" shows no size nor member offset changes to struct
> mlx5e_tx_wqe
> nor struct mlx5e_umr_wqe. "objdump -d" shows no meaningful object
> code changes (i.e. only source line number induced differences and
> optimizations).
>
>

spiting the memcpy doesn't induce any performance degradation ? extra
instruction to copy the 1st 2 bytes ?


[...]
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c

why only here ? mlx5 has at least 3 other places where we use this
unbound memcpy ..

> @@ -341,8 +341,10 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
> struct mlx5e_xmit_data *xdptxd,
>  
>         /* copy the inline part if required */
>         if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
> -               memcpy(eseg->inline_hdr.start, xdptxd->data,
> MLX5E_XDP_MIN_INLINE);
> +               memcpy(eseg->inline_hdr.start, xdptxd->data,
> sizeof(eseg->inline_hdr.start));
>                 eseg->inline_hdr.sz =
> cpu_to_be16(MLX5E_XDP_MIN_INLINE);
> +               memcpy(dseg, xdptxd->data + sizeof(eseg-
> >inline_hdr.start),
> +                      MLX5E_XDP_MIN_INLINE - sizeof(eseg-
> >inline_hdr.start));
>                 dma_len  -= MLX5E_XDP_MIN_INLINE;
>                 dma_addr += MLX5E_XDP_MIN_INLINE;
>                 dseg++;


2021-08-07 00:24:20

by Saeed Mahameed

[permalink] [raw]
Subject: Re: [PATCH] net/mlx5e: Avoid field-overflowing memcpy()

On Fri, 2021-08-06 at 15:45 -0700, Kees Cook wrote:
> On Fri, Aug 06, 2021 at 03:17:56PM -0700, Saeed Mahameed wrote:
> > On Fri, 2021-08-06 at 14:50 -0700, Kees Cook wrote:
> > > [...]
> > > So, split the memcpy() so the compiler can reason about the buffer
> > > sizes.
> > >
> > > "pahole" shows no size nor member offset changes to struct > >
> > > mlx5e_tx_wqe
> > > nor struct mlx5e_umr_wqe. "objdump -d" shows no meaningful object
> > > code changes (i.e. only source line number induced differences and
> > > optimizations).
> >
> > spiting the memcpy doesn't induce any performance degradation ? extra
> > instruction to copy the 1st 2 bytes ?
>
> Not meaningfully, but strictly speaking, yes, it's a different series
> of
> instructions.
>
> > [...]
> > > --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> > > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> >
> > why only here ? mlx5 has at least 3 other places where we use this
> > unbound memcpy ..
>
> Can you point them out? I've been fixing only the ones I've been able
> to
> find through instrumentation (generally speaking, those with fixed
> sizes).
>

we will need to examine each change carefully to look for performance
degradation and maybe run some micro-benchmark tests in house before i
can ack this patch.

$ git grep -n "eseg->inline_hdr.start"
drivers/infiniband/hw/mlx5/wr.c:129: copysz = min_t(u64,
*cur_edge - (void *)eseg->inline_hdr.start,
drivers/infiniband/hw/mlx5/wr.c:131: memcpy(eseg-
>inline_hdr.start, pdata, copysz);
drivers/infiniband/hw/mlx5/wr.c:133:
sizeof(eseg->inline_hdr.start) + copysz, 16);
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c:344:
memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:510:
mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs);
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:514:
memcpy(eseg->inline_hdr.start, skb->data, attr->ihs);
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c:1033:
memcpy(eseg->inline_hdr.start, skb->data, attr.ihs);


2021-08-07 00:24:24

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH] net/mlx5e: Avoid field-overflowing memcpy()

On Fri, Aug 06, 2021 at 03:17:56PM -0700, Saeed Mahameed wrote:
> On Fri, 2021-08-06 at 14:50 -0700, Kees Cook wrote:
> > [...]
> > So, split the memcpy() so the compiler can reason about the buffer
> > sizes.
> >
> > "pahole" shows no size nor member offset changes to struct > > mlx5e_tx_wqe
> > nor struct mlx5e_umr_wqe. "objdump -d" shows no meaningful object
> > code changes (i.e. only source line number induced differences and
> > optimizations).
>
> spiting the memcpy doesn't induce any performance degradation ? extra
> instruction to copy the 1st 2 bytes ?

Not meaningfully, but strictly speaking, yes, it's a different series of
instructions.

> [...]
> > --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
>
> why only here ? mlx5 has at least 3 other places where we use this
> unbound memcpy ..

Can you point them out? I've been fixing only the ones I've been able to
find through instrumentation (generally speaking, those with fixed
sizes).

--
Kees Cook