Return-Path: MIME-Version: 1.0 In-Reply-To: <1328279789-10870-5-git-send-email-Andrei.Emeltchenko.news@gmail.com> References: <1328279789-10870-1-git-send-email-Andrei.Emeltchenko.news@gmail.com> <1328279789-10870-5-git-send-email-Andrei.Emeltchenko.news@gmail.com> Date: Fri, 3 Feb 2012 19:14:24 -0200 Message-ID: Subject: Re: [RFCv2 04/10] Bluetooth: Revert to mutexes from RCU list From: Ulisses Furquim To: Emeltchenko Andrei Cc: linux-bluetooth@vger.kernel.org Content-Type: text/plain; charset=ISO-8859-1 Sender: linux-bluetooth-owner@vger.kernel.org List-ID: Hi Andrei, On Fri, Feb 3, 2012 at 12:36 PM, Emeltchenko Andrei wrote: > From: Andrei Emeltchenko > > Usage of RCU list looks not reasonalbe for a number of reasons: > our code sleep and we have to use socket spinlocks, some parts > of code are updaters thus we need to use mutexes anyway. Just a comment. RCU updater side should be locked in most cases so that's not really a big reason for removing RCU usage here. > Signed-off-by: Andrei Emeltchenko > --- > ?net/bluetooth/l2cap_core.c | ?110 ++++++++++++++++++++++--------------------- > ?1 files changed, 56 insertions(+), 54 deletions(-) > > diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c > index f1a6b3c..010ef75 100644 > --- a/net/bluetooth/l2cap_core.c > +++ b/net/bluetooth/l2cap_core.c > @@ -77,36 +77,24 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, > > ?static struct l2cap_chan *__l2cap_get_chan_by_dcid(struct l2cap_conn *conn, u16 cid) > ?{ > - ? ? ? struct l2cap_chan *c, *r = NULL; > - > - ? ? ? rcu_read_lock(); > + ? ? ? struct l2cap_chan *c; > > - ? ? ? list_for_each_entry_rcu(c, &conn->chan_l, list) { > - ? ? ? ? ? ? ? if (c->dcid == cid) { > - ? ? ? ? ? ? ? ? ? ? ? r = c; > - ? ? ? ? ? ? ? ? ? ? ? break; > - ? ? ? ? ? ? ? } > + ? ? ? list_for_each_entry(c, &conn->chan_l, list) { > + ? ? ? ? ? ? ? if (c->dcid == cid) > + ? ? ? ? ? ? ? ? ? ? ? return c; > ? ? ? ?} > - > - ? ? ? rcu_read_unlock(); > - ? ? ? return r; > + ? ? ? return NULL; > ?} > > ?static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 cid) > ?{ > - ? ? ? struct l2cap_chan *c, *r = NULL; > - > - ? ? ? rcu_read_lock(); > + ? ? ? struct l2cap_chan *c; > > - ? ? ? list_for_each_entry_rcu(c, &conn->chan_l, list) { > - ? ? ? ? ? ? ? if (c->scid == cid) { > - ? ? ? ? ? ? ? ? ? ? ? r = c; > - ? ? ? ? ? ? ? ? ? ? ? break; > - ? ? ? ? ? ? ? } > + ? ? ? list_for_each_entry(c, &conn->chan_l, list) { > + ? ? ? ? ? ? ? if (c->scid == cid) > + ? ? ? ? ? ? ? ? ? ? ? return c; > ? ? ? ?} > - > - ? ? ? rcu_read_unlock(); > - ? ? ? return r; > + ? ? ? return NULL; > ?} > > ?/* Find channel with given SCID. > @@ -115,36 +103,34 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 ci > ?{ > ? ? ? ?struct l2cap_chan *c; > > + ? ? ? mutex_lock(&conn->chan_lock); > ? ? ? ?c = __l2cap_get_chan_by_scid(conn, cid); > ? ? ? ?if (c) > ? ? ? ? ? ? ? ?lock_sock(c->sk); > + ? ? ? mutex_unlock(&conn->chan_lock); > ? ? ? ?return c; > ?} Ok. We have now chan_lock around the unlocked versions and we still return a locked sock. However, we do have to be careful not to open windows where we can deadlock. See my other comments below about that. > ?static struct l2cap_chan *__l2cap_get_chan_by_ident(struct l2cap_conn *conn, u8 ident) > ?{ > - ? ? ? struct l2cap_chan *c, *r = NULL; > - > - ? ? ? rcu_read_lock(); > + ? ? ? struct l2cap_chan *c; > > - ? ? ? list_for_each_entry_rcu(c, &conn->chan_l, list) { > - ? ? ? ? ? ? ? if (c->ident == ident) { > - ? ? ? ? ? ? ? ? ? ? ? r = c; > - ? ? ? ? ? ? ? ? ? ? ? break; > - ? ? ? ? ? ? ? } > + ? ? ? list_for_each_entry(c, &conn->chan_l, list) { > + ? ? ? ? ? ? ? if (c->ident == ident) > + ? ? ? ? ? ? ? ? ? ? ? return c; > ? ? ? ?} > - > - ? ? ? rcu_read_unlock(); > - ? ? ? return r; > + ? ? ? return NULL; > ?} > > ?static inline struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn, u8 ident) > ?{ > ? ? ? ?struct l2cap_chan *c; > > + ? ? ? mutex_lock(&conn->chan_lock); > ? ? ? ?c = __l2cap_get_chan_by_ident(conn, ident); > ? ? ? ?if (c) > ? ? ? ? ? ? ? ?lock_sock(c->sk); > + ? ? ? mutex_unlock(&conn->chan_lock); > ? ? ? ?return c; > ?} > > @@ -259,6 +245,7 @@ static void l2cap_chan_timeout(struct work_struct *work) > > ? ? ? ?BT_DBG("chan %p state %d", chan, chan->state); > > + ? ? ? mutex_lock(&chan->conn->chan_lock); > ? ? ? ?lock_sock(sk); > > ? ? ? ?if (chan->state == BT_CONNECTED || chan->state == BT_CONFIG) > @@ -272,6 +259,7 @@ static void l2cap_chan_timeout(struct work_struct *work) > ? ? ? ?l2cap_chan_close(chan, reason); > > ? ? ? ?release_sock(sk); > + ? ? ? mutex_unlock(&chan->conn->chan_lock); > > ? ? ? ?chan->ops->close(chan->data); > ? ? ? ?l2cap_chan_put(chan); > @@ -357,7 +345,9 @@ static void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) > > ? ? ? ?l2cap_chan_hold(chan); > > - ? ? ? list_add_rcu(&chan->list, &conn->chan_l); > + ? ? ? mutex_lock(&conn->chan_lock); > + ? ? ? list_add(&chan->list, &conn->chan_l); > + ? ? ? mutex_unlock(&conn->chan_lock); > ?} Hmm, I'm not sure this is correct. Have you seen l2cap_connect_req() calls __l2cap_get_chan_by_dcid() without the chan_lock? It's like that because it was relying on RCU but now we do need to lock chan_lock there as well. I'd recommend we turn l2cap_chan_add() into __l2cap_chan_add() and lock chan_lock in the callers. Do you agree? > ?/* Delete channel. > @@ -374,8 +364,7 @@ static void l2cap_chan_del(struct l2cap_chan *chan, int err) > > ? ? ? ?if (conn) { > ? ? ? ? ? ? ? ?/* Delete from channel list */ > - ? ? ? ? ? ? ? list_del_rcu(&chan->list); > - ? ? ? ? ? ? ? synchronize_rcu(); > + ? ? ? ? ? ? ? list_del(&chan->list); > > ? ? ? ? ? ? ? ?l2cap_chan_put(chan); > > @@ -426,10 +415,14 @@ static void l2cap_chan_cleanup_listen(struct sock *parent) > ? ? ? ?/* Close not yet accepted channels */ > ? ? ? ?while ((sk = bt_accept_dequeue(parent, NULL))) { > ? ? ? ? ? ? ? ?struct l2cap_chan *chan = l2cap_pi(sk)->chan; > + > + ? ? ? ? ? ? ? mutex_lock(&chan->conn->chan_lock); > ? ? ? ? ? ? ? ?__clear_chan_timer(chan); > ? ? ? ? ? ? ? ?lock_sock(sk); > ? ? ? ? ? ? ? ?l2cap_chan_close(chan, ECONNRESET); > ? ? ? ? ? ? ? ?release_sock(sk); > + ? ? ? ? ? ? ? mutex_unlock(&chan->conn->chan_lock); > + > ? ? ? ? ? ? ? ?chan->ops->close(chan->data); > ? ? ? ?} > ?} I see you're adding locks around l2cap_chan_close() instead of just locking list_del() which might be a good approach. Is that what you want? Before we had to drop the lock to call l2cap_chan_close() because it would be locked again to change the list (which I thought wasn't good). And could you please remove the leftover comment in l2cap_conn_start() about releasing the lock before calling l2cap_chan_close()? Have you seen l2cap_sock_shutdown() in l2cap_sock.c also calls l2cap_chan_close()? Why don't we need to lock chan_lock there? If that's what you wanted then please describe this kind of change in the commit message (at least). This way we can refer to it if anything happens or if we need to understand the change. I do think you write short commit messages so please be more verbose. > ?/* ---- L2CAP signalling commands ---- */ > @@ -2755,6 +2753,7 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd > ? ? ? ? ? ? ? ? ? ? ? ?return -EFAULT; > ? ? ? ?} > > + ? ? ? mutex_lock(&conn->chan_lock); > ? ? ? ?sk = chan->sk; > > ? ? ? ?switch (result) { > @@ -2782,6 +2781,8 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd > ? ? ? ?} > > ? ? ? ?release_sock(sk); > + ? ? ? mutex_unlock(&conn->chan_lock); > + > ? ? ? ?return 0; > ?} > @@ -3032,6 +3033,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd > ? ? ? ?if (!chan) > ? ? ? ? ? ? ? ?return 0; > > + ? ? ? mutex_lock(&conn->chan_lock); > ? ? ? ?sk = chan->sk; > > ? ? ? ?rsp.dcid = cpu_to_le16(chan->scid); > @@ -3042,6 +3044,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd > > ? ? ? ?l2cap_chan_del(chan, ECONNRESET); > ? ? ? ?release_sock(sk); > + ? ? ? mutex_unlock(&conn->chan_lock); > > ? ? ? ?chan->ops->close(chan->data); > ? ? ? ?return 0; Here in l2cap_connect_rsp() and l2cap_disconnect_req() it seems we have a window where we can deadlock chan_lock and sock lock. Have you seen that? It can be that it may never happen but it'd be good to avoid that IMO. We receive a locked sock from either l2cap_get_chan_by_scid() or l2cap_get_chan_by_ident() and then we try to lock chan_lock. That's a different order you've been locking them. Maybe we can lock chan_lock then use the unlocked versions and explicitly lock sock after receiving chan? > @@ -3063,6 +3066,7 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd > ? ? ? ?if (!chan) > ? ? ? ? ? ? ? ?return 0; > > + ? ? ? mutex_lock(&conn->chan_lock); > ? ? ? ?sk = chan->sk; > > ? ? ? ?l2cap_chan_del(chan, 0); The last comment also applies here. Besides, aren't we missing an unlock? How much have you been testing these changes? I'd like to avoid introducing more bugs with changes are supposed to fix things so please do test this as much as you can. Regards, -- Ulisses Furquim ProFUSION embedded systems http://profusion.mobi Mobile: +55 19 9250 0942 Skype: ulissesffs