2022-05-09 17:15:44

by Devid Antonio Filoni

[permalink] [raw]
Subject: [PATCH RESEND 0/2] j1939: make sure that sent DAT/CTL frames are marked as TX

Hello,

If candump -x is used to dump CAN bus traffic on an interface while a J1939
socket is sending multi-packet messages, then the DAT and CTL frames
show up as RX instead of TX.

This patch series sets to generated struct sk_buff the owning struct sock
pointer so that the MSG_DONTROUTE flag can be set by recv functions.

I'm not sure that j1939_session_skb_get is needed, I think that session->sk
could be directly passed as can_skb_set_owner parameter. This patch
is based on j1939_simple_txnext function which uses j1939_session_skb_get.
I can provide an additional patch to remove the calls to
j1939_session_skb_get function if you think they are not needed.

Thank you,
Devid

Devid Antonio Filoni (2):
can: j1939: make sure that sent DAT frames are marked as TX
can: j1939: make sure that sent CTL frames are marked as TX

net/can/j1939/transport.c | 69 ++++++++++++++++++++++++++++++++-------
1 file changed, 57 insertions(+), 12 deletions(-)

--
2.25.1



2022-05-09 17:15:50

by Devid Antonio Filoni

[permalink] [raw]
Subject: [PATCH RESEND 1/2] can: j1939: make sure that sent DAT frames are marked as TX

Fixes: 9d71dd0 ("can: add support of SAE J1939 protocol")
Signed-off-by: Devid Antonio Filoni <[email protected]>
---
net/can/j1939/transport.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 307ee1174a6e..030f5fe901e1 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -621,18 +621,30 @@ static int j1939_tp_tx_dat(struct j1939_session *session,
const u8 *dat, int len)
{
struct j1939_priv *priv = session->priv;
+ struct sk_buff *se_skb;
struct sk_buff *skb;
+ int ret;

skb = j1939_tp_tx_dat_new(priv, &session->skcb,
false, false);
if (IS_ERR(skb))
return PTR_ERR(skb);

+ se_skb = j1939_session_skb_get(session);
+ if (se_skb)
+ can_skb_set_owner(skb, se_skb->sk);
+
skb_put_data(skb, dat, len);
if (j1939_tp_padding && len < 8)
memset(skb_put(skb, 8 - len), 0xff, 8 - len);

- return j1939_send_one(priv, skb);
+ ret = j1939_send_one(priv, skb);
+
+ if (ret)
+ kfree_skb(se_skb);
+ else
+ consume_skb(se_skb);
+ return ret;
}

static int j1939_xtp_do_tx_ctl(struct j1939_priv *priv,
--
2.25.1


2022-05-09 17:15:49

by Devid Antonio Filoni

[permalink] [raw]
Subject: [PATCH RESEND 2/2] can: j1939: make sure that sent CTL frames are marked as TX

Fixes: 9d71dd0 ("can: add support of SAE J1939 protocol")
Signed-off-by: Devid Antonio Filoni <[email protected]>
---
net/can/j1939/transport.c | 55 +++++++++++++++++++++++++++++++--------
1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 030f5fe901e1..b8368f9c78c2 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -648,6 +648,7 @@ static int j1939_tp_tx_dat(struct j1939_session *session,
}

static int j1939_xtp_do_tx_ctl(struct j1939_priv *priv,
+ struct sock *re_sk,
const struct j1939_sk_buff_cb *re_skcb,
bool swap_src_dst, pgn_t pgn, const u8 *dat)
{
@@ -661,6 +662,8 @@ static int j1939_xtp_do_tx_ctl(struct j1939_priv *priv,
if (IS_ERR(skb))
return PTR_ERR(skb);

+ can_skb_set_owner(skb, re_sk);
+
skdat = skb_put(skb, 8);
memcpy(skdat, dat, 5);
skdat[5] = (pgn >> 0);
@@ -674,13 +677,26 @@ static inline int j1939_tp_tx_ctl(struct j1939_session *session,
bool swap_src_dst, const u8 *dat)
{
struct j1939_priv *priv = session->priv;
+ struct sk_buff *se_skb = j1939_session_skb_get(session);
+ struct sock *se_skb_sk = NULL;
+ int ret;
+
+ if (se_skb)
+ se_skb_sk = se_skb->sk;

- return j1939_xtp_do_tx_ctl(priv, &session->skcb,
- swap_src_dst,
- session->skcb.addr.pgn, dat);
+ ret = j1939_xtp_do_tx_ctl(priv, se_skb_sk, &session->skcb,
+ swap_src_dst,
+ session->skcb.addr.pgn, dat);
+
+ if (ret)
+ kfree_skb(se_skb);
+ else
+ consume_skb(se_skb);
+ return ret;
}

static int j1939_xtp_tx_abort(struct j1939_priv *priv,
+ struct sock *re_sk,
const struct j1939_sk_buff_cb *re_skcb,
bool swap_src_dst,
enum j1939_xtp_abort err,
@@ -694,7 +710,7 @@ static int j1939_xtp_tx_abort(struct j1939_priv *priv,
memset(dat, 0xff, sizeof(dat));
dat[0] = J1939_TP_CMD_ABORT;
dat[1] = err;
- return j1939_xtp_do_tx_ctl(priv, re_skcb, swap_src_dst, pgn, dat);
+ return j1939_xtp_do_tx_ctl(priv, re_sk, re_skcb, swap_src_dst, pgn, dat);
}

void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec)
@@ -1117,6 +1133,8 @@ static void __j1939_session_cancel(struct j1939_session *session,
enum j1939_xtp_abort err)
{
struct j1939_priv *priv = session->priv;
+ struct sk_buff *se_skb;
+ struct sock *se_skb_sk = NULL;

WARN_ON_ONCE(!err);
lockdep_assert_held(&session->priv->active_session_list_lock);
@@ -1125,9 +1143,15 @@ static void __j1939_session_cancel(struct j1939_session *session,
session->state = J1939_SESSION_WAITING_ABORT;
/* do not send aborts on incoming broadcasts */
if (!j1939_cb_is_broadcast(&session->skcb)) {
- j1939_xtp_tx_abort(priv, &session->skcb,
- !session->transmission,
- err, session->skcb.addr.pgn);
+ se_skb = j1939_session_skb_get(session);
+ if (se_skb)
+ se_skb_sk = se_skb->sk;
+ if (j1939_xtp_tx_abort(priv, se_skb_sk, &session->skcb,
+ !session->transmission,
+ err, session->skcb.addr.pgn))
+ kfree_skb(se_skb);
+ else
+ consume_skb(se_skb);
}

if (session->sk)
@@ -1274,6 +1298,8 @@ static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session,
const struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
pgn_t pgn = j1939_xtp_ctl_to_pgn(skb->data);
struct j1939_priv *priv = session->priv;
+ struct sk_buff *se_skb;
+ struct sock *se_skb_sk = NULL;
enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT;
u8 cmd = skb->data[0];

@@ -1318,8 +1344,15 @@ static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session,

netdev_warn(priv->ndev, "%s: 0x%p: CMD 0x%02x with PGN 0x%05x for running session with different PGN 0x%05x.\n",
__func__, session, cmd, pgn, session->skcb.addr.pgn);
- if (abort != J1939_XTP_NO_ABORT)
- j1939_xtp_tx_abort(priv, skcb, true, abort, pgn);
+ if (abort != J1939_XTP_NO_ABORT) {
+ se_skb = j1939_session_skb_get(session);
+ if (se_skb)
+ se_skb_sk = se_skb->sk;
+ if (j1939_xtp_tx_abort(priv, se_skb_sk, skcb, true, abort, pgn))
+ kfree_skb(se_skb);
+ else
+ consume_skb(se_skb);
+ }

return true;
}
@@ -1625,13 +1658,13 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
}

if (abort != J1939_XTP_NO_ABORT) {
- j1939_xtp_tx_abort(priv, &skcb, true, abort, pgn);
+ j1939_xtp_tx_abort(priv, skb->sk, &skcb, true, abort, pgn);
return NULL;
}

session = j1939_session_fresh_new(priv, len, &skcb);
if (!session) {
- j1939_xtp_tx_abort(priv, &skcb, true,
+ j1939_xtp_tx_abort(priv, skb->sk, &skcb, true,
J1939_XTP_ABORT_RESOURCE, pgn);
return NULL;
}
--
2.25.1


2022-05-10 11:57:59

by Oleksij Rempel

[permalink] [raw]
Subject: Re: [PATCH RESEND 0/2] j1939: make sure that sent DAT/CTL frames are marked as TX

Hi Devid,

On Mon, May 09, 2022 at 07:07:44PM +0200, Devid Antonio Filoni wrote:
> Hello,
>
> If candump -x is used to dump CAN bus traffic on an interface while a J1939
> socket is sending multi-packet messages, then the DAT and CTL frames
> show up as RX instead of TX.
>
> This patch series sets to generated struct sk_buff the owning struct sock
> pointer so that the MSG_DONTROUTE flag can be set by recv functions.
>
> I'm not sure that j1939_session_skb_get is needed, I think that session->sk
> could be directly passed as can_skb_set_owner parameter. This patch
> is based on j1939_simple_txnext function which uses j1939_session_skb_get.
> I can provide an additional patch to remove the calls to
> j1939_session_skb_get function if you think they are not needed.

Thank you for your patches. By testing it I noticed that there is a memory
leak in current kernel and it seems to be even worse after this patches.
Found by this test:
https://github.com/linux-can/can-tests/blob/master/j1939/run_all.sh#L13

Can you please investigate it (or wait until I get time to do it).

Regards,
Oleksij
--
Pengutronix e.K. | |
Steuerwalder Str. 21 | http://www.pengutronix.de/ |
31137 Hildesheim, Germany | Phone: +49-5121-206917-0 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |

2022-05-10 19:32:50

by Devid Antonio Filoni

[permalink] [raw]
Subject: Re: [PATCH RESEND 0/2] j1939: make sure that sent DAT/CTL frames are marked as TX

Hi Oleksij,

On Tue, 2022-05-10 at 06:34 +0200, Oleksij Rempel wrote:
> Hi Devid,
>
> On Mon, May 09, 2022 at 07:07:44PM +0200, Devid Antonio Filoni wrote:
> > Hello,
> >
> > If candump -x is used to dump CAN bus traffic on an interface while a J1939
> > socket is sending multi-packet messages, then the DAT and CTL frames
> > show up as RX instead of TX.
> >
> > This patch series sets to generated struct sk_buff the owning struct sock
> > pointer so that the MSG_DONTROUTE flag can be set by recv functions.
> >
> > I'm not sure that j1939_session_skb_get is needed, I think that session->sk
> > could be directly passed as can_skb_set_owner parameter. This patch
> > is based on j1939_simple_txnext function which uses j1939_session_skb_get.
> > I can provide an additional patch to remove the calls to
> > j1939_session_skb_get function if you think they are not needed.
>
> Thank you for your patches. By testing it I noticed that there is a memory
> leak in current kernel and it seems to be even worse after this patches.
> Found by this test:
> https://github.com/linux-can/can-tests/blob/master/j1939/run_all.sh#L13
>
>
> Can you please investigate it (or wait until I get time to do it).
>
> Regards,
> Oleksij
>

I checked the test you linked and I can see that the number of the
instances of the can_j1939 module increases on each
j1939_ac_100k_dual_can.sh test execution (then the script exits),
however this doesn't seem to be worse with my patches, I have the same
results with the original kernel. Did you execute a particular test to
verify that the memory leak is worse with my patches?
I tried to take a look at all code that I changed in my patches but the
used ref counters seem to be handled correctly in called functions. I
suspected that the issue may be caused by the ref counter increased
in can_skb_set_owner() function but, even if I remove that call from the
j1939_simple_txnext() function in original kernel, I can still reproduce
the memory leak.
I think the issue is somewhere else, I'll try to give another look but I
can't assure nothing.

Best Regards,
Devid


2022-05-11 12:10:27

by Oleksij Rempel

[permalink] [raw]
Subject: Re: [PATCH RESEND 0/2] j1939: make sure that sent DAT/CTL frames are marked as TX

Hi Devid,

On Tue, May 10, 2022 at 08:12:32PM +0200, Devid Antonio Filoni wrote:
> Hi Oleksij,
>
> On Tue, 2022-05-10 at 06:34 +0200, Oleksij Rempel wrote:
> > Hi Devid,
> >
> > On Mon, May 09, 2022 at 07:07:44PM +0200, Devid Antonio Filoni wrote:
> > > Hello,
> > >
> > > If candump -x is used to dump CAN bus traffic on an interface while a J1939
> > > socket is sending multi-packet messages, then the DAT and CTL frames
> > > show up as RX instead of TX.
> > >
> > > This patch series sets to generated struct sk_buff the owning struct sock
> > > pointer so that the MSG_DONTROUTE flag can be set by recv functions.
> > >
> > > I'm not sure that j1939_session_skb_get is needed, I think that session->sk
> > > could be directly passed as can_skb_set_owner parameter. This patch
> > > is based on j1939_simple_txnext function which uses j1939_session_skb_get.
> > > I can provide an additional patch to remove the calls to
> > > j1939_session_skb_get function if you think they are not needed.
> >
> > Thank you for your patches. By testing it I noticed that there is a memory
> > leak in current kernel and it seems to be even worse after this patches.
> > Found by this test:
> > https://github.com/linux-can/can-tests/blob/master/j1939/run_all.sh#L13
> >
> >
> > Can you please investigate it (or wait until I get time to do it).
> >
> > Regards,
> > Oleksij
> >
>
> I checked the test you linked and I can see that the number of the
> instances of the can_j1939 module increases on each
> j1939_ac_100k_dual_can.sh test execution (then the script exits),
> however this doesn't seem to be worse with my patches, I have the same
> results with the original kernel. Did you execute a particular test to
> verify that the memory leak is worse with my patches?
> I tried to take a look at all code that I changed in my patches but the
> used ref counters seem to be handled correctly in called functions. I
> suspected that the issue may be caused by the ref counter increased
> in can_skb_set_owner() function but, even if I remove that call from the
> j1939_simple_txnext() function in original kernel, I can still reproduce
> the memory leak.
> I think the issue is somewhere else, I'll try to give another look but I
> can't assure nothing.

Suddenly detecting local frames by skb->sk will not work for all control
packets. I'll send different patch solving it for all j1939 and raw
variants.

Regards,
Oleksij
--
Pengutronix e.K. | |
Steuerwalder Str. 21 | http://www.pengutronix.de/ |
31137 Hildesheim, Germany | Phone: +49-5121-206917-0 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |