The pppoatm_send() uses vcc->send() directly and does not check if vcc
is ready for send(). This causes Oops when send() is used after
vcc_destroy_socket() at least with usbatm driver:
Oops: 0000 [#1] PREEMPT
Pid: 0, comm: swapper Not tainted 3.6.0-krzysiek-00001-gb7cd93b-dirty #60 /AK32
EIP: 0060:[<c01413c6>] EFLAGS: 00010082 CPU: 0
EIP is at __wake_up_common+0x16/0x70
EAX: 30707070 EBX: 00000292 ECX: 00000001 EDX: dca75fc0
ESI: 00000000 EDI: de7f500f EBP: df409f24 ESP: df409f08
DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
CR0: 8005003b CR2: 30707070 CR3: 1c920000 CR4: 000007d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
Process swapper (pid: 0, ti=df408000 task=c07bd4e0 task.ti=c07b0000)
Stack:
00000000 00000001 00000001 dca75fc0 00000292 00000000 de7f500f df409f3c
c0143299 00000000 00000000 dc84f000 dc84f000 df409f4c c0602bf0 00000000
dc84f000 df409f58 c0604301 dc840cc0 df409fb4 c04672e5 c076a240 00000000
Call Trace:
[<c0143299>] __wake_up+0x29/0x50
[<c0602bf0>] vcc_write_space+0x40/0x80
[<c0604301>] atm_pop_raw+0x21/0x30
[<c04672e5>] usbatm_tx_process+0x2a5/0x380
[<c0126cf9>] tasklet_action+0x39/0x70
[<c0126f1f>] __do_softirq+0x7f/0x120
[<c0126ea0>] ? local_bh_enable_ip+0xa0/0xa0
<IRQ>
Now pppoatm_send(), like vcc_sendmsg(), checks for vcc flags that
indicate that vcc is not ready.
Signed-off-by: Krzysztof Mazur <[email protected]>
Cc: David Woodhouse <[email protected]>
---
Previously sent with more details as:
http://marc.info/?l=linux-kernel&m=134952646810580&w=2
This patch extends race window between pppoatm_send() and vcc_destroy_socket().
This race exists also without this patch and it's fixed in patch 2.
net/atm/pppoatm.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 226dca9..0dcb5dc 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -269,6 +269,8 @@ static inline int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
{
struct pppoatm_vcc *pvcc = chan_to_pvcc(chan);
+ struct atm_vcc *vcc;
+
ATM_SKB(skb)->vcc = pvcc->atmvcc;
pr_debug("(skb=0x%p, vcc=0x%p)\n", skb, pvcc->atmvcc);
if (skb->data[0] == '\0' && (pvcc->flags & SC_COMP_PROT))
@@ -301,6 +303,12 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
return 1;
}
+ vcc = ATM_SKB(skb)->vcc;
+ if (test_bit(ATM_VF_RELEASED, &vcc->flags)
+ || test_bit(ATM_VF_CLOSE, &vcc->flags)
+ || !test_bit(ATM_VF_READY, &vcc->flags))
+ goto nospace;
+
atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
--
1.8.0.2.g35080e9
The pppoatm_send() calls vcc->send() and now also checks for
some vcc flags that indicate destroyed vcc without proper locking.
The vcc_sendmsg() uses lock_sock(sk). This lock is used by
vcc_release(), so vcc_destroy_socket() will not be called between
check and during ->send(). The vcc_release_async() sets ATM_VF_CLOSE,
but it should be safe to call ->send() after it, because
vcc->dev->ops->close() is not called.
The pppoatm_send() is called with BH disabled, so bh_lock_sock()
should be used instead of lock_sock().
Signed-off-by: Krzysztof Mazur <[email protected]>
Cc: David Woodhouse <[email protected]>
---
net/atm/pppoatm.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 0dcb5dc..e3b2d69 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -270,6 +270,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
{
struct pppoatm_vcc *pvcc = chan_to_pvcc(chan);
struct atm_vcc *vcc;
+ int ret;
ATM_SKB(skb)->vcc = pvcc->atmvcc;
pr_debug("(skb=0x%p, vcc=0x%p)\n", skb, pvcc->atmvcc);
@@ -304,17 +305,24 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
}
vcc = ATM_SKB(skb)->vcc;
+ bh_lock_sock(sk_atm(vcc));
+ if (sock_owned_by_user(sk_atm(vcc)))
+ goto nospace_unlock_sock;
if (test_bit(ATM_VF_RELEASED, &vcc->flags)
|| test_bit(ATM_VF_CLOSE, &vcc->flags)
|| !test_bit(ATM_VF_READY, &vcc->flags))
- goto nospace;
+ goto nospace_unlock_sock;
atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
- return ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
+ ret = ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
? DROP_PACKET : 1;
+ bh_unlock_sock(sk_atm(vcc));
+ return ret;
+nospace_unlock_sock:
+ bh_unlock_sock(sk_atm(vcc));
nospace:
/*
* We don't have space to send this SKB now, but we might have
--
1.8.0.2.g35080e9
The pppoatm gets a reference to atmvcc, but does not increment vcc
usage count. The vcc uses vcc->sk socket for reference counting,
so sock_hold() and sock_put() should be used by pppoatm.
Signed-off-by: Krzysztof Mazur <[email protected]>
Cc: David Woodhouse <[email protected]>
---
net/atm/pppoatm.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index e3b2d69..a766d96 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -154,6 +154,7 @@ static void pppoatm_unassign_vcc(struct atm_vcc *atmvcc)
tasklet_kill(&pvcc->wakeup_tasklet);
ppp_unregister_channel(&pvcc->chan);
atmvcc->user_back = NULL;
+ sock_put(sk_atm(pvcc->atmvcc));
kfree(pvcc);
/* Gee, I hope we have the big kernel lock here... */
module_put(THIS_MODULE);
@@ -373,6 +374,7 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
if (pvcc == NULL)
return -ENOMEM;
pvcc->atmvcc = atmvcc;
+ sock_hold(sk_atm(atmvcc));
/* Maximum is zero, so that we can use atomic_inc_not_zero() */
atomic_set(&pvcc->inflight, NONE_INFLIGHT);
@@ -387,6 +389,7 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
pvcc->wakeup_tasklet.data = (unsigned long) &pvcc->chan;
err = ppp_register_channel(&pvcc->chan);
if (err != 0) {
+ sock_put(sk_atm(atmvcc));
kfree(pvcc);
return err;
}
--
1.8.0.2.g35080e9
David, if you could review this series I'd really appreciate it.
On Tue, 2012-10-23 at 02:52 -0400, David Miller wrote:
>
> David, if you could review this series I'd really appreciate it.
Will do. I glanced at it last night but need to be in the right frame of
mind for thinking about ATM locking.
I know I have a bottle of vodka *somewhere* around here... I saw it
after we moved...
--
dwmw2
On Mon, 2012-10-22 at 19:14 +0200, Krzysztof Mazur wrote:
> Now pppoatm_send(), like vcc_sendmsg(), checks for vcc flags that
> indicate that vcc is not ready.
I note that vcc_sendmsg() also checks for sock->state == SS_CONNECTED.
Is that check not needed here? Otherwise, looks sane enough.
Acked-By: David Woodhouse <[email protected]>
--
dwmw2
On Mon, 2012-10-22 at 19:14 +0200, Krzysztof Mazur wrote:
> The pppoatm_send() calls vcc->send() and now also checks for
> some vcc flags that indicate destroyed vcc without proper locking.
>
> The vcc_sendmsg() uses lock_sock(sk). This lock is used by
> vcc_release(), so vcc_destroy_socket() will not be called between
> check and during ->send(). The vcc_release_async() sets ATM_VF_CLOSE,
> but it should be safe to call ->send() after it, because
> vcc->dev->ops->close() is not called.
>
> The pppoatm_send() is called with BH disabled, so bh_lock_sock()
> should be used instead of lock_sock().
Should we be locking it earlier, so that the atm_may_send() call is also
covered by the lock?
Either way, it's an obvious improvement on what we had before — and even
if the answer to my question above is 'yes', exceeding the configured
size by one packet is both harmless and almost never going to happen
since we now limit ourselves to two packets anyway. So:
Acked-By: David Woodhouse <[email protected]>
--
dwmw2
On Mon, 2012-10-22 at 19:14 +0200, Krzysztof Mazur wrote:
> The pppoatm gets a reference to atmvcc, but does not increment vcc
> usage count. The vcc uses vcc->sk socket for reference counting,
> so sock_hold() and sock_put() should be used by pppoatm.
>
> Signed-off-by: Krzysztof Mazur <[email protected]>
> Cc: David Woodhouse <[email protected]>
Acked-By: David Woodhouse <[email protected]>
But did you spot what's in the end of the context of the first hunk...?
> --- a/net/atm/pppoatm.c
> +++ b/net/atm/pppoatm.c
> @@ -154,6 +154,7 @@ static void pppoatm_unassign_vcc(struct atm_vcc
> *atmvcc)
> tasklet_kill(&pvcc->wakeup_tasklet);
> ppp_unregister_channel(&pvcc->chan);
> atmvcc->user_back = NULL;
> + sock_put(sk_atm(pvcc->atmvcc));
> kfree(pvcc);
> /* Gee, I hope we have the big kernel lock here... */
> module_put(THIS_MODULE);
Fairly sure that hope is unfounded these days... :)
--
dwmw2
In message <[email protected]>,Krzysztof Mazur writes:
>The pppoatm_send() calls vcc->send() and now also checks for
>some vcc flags that indicate destroyed vcc without proper locking.
...
>The vcc_sendmsg() uses lock_sock(sk). This lock is used by
>vcc_release(), so vcc_destroy_socket() will not be called between
>check and during ->send(). The vcc_release_async() sets ATM_VF_CLOSE,
>but it should be safe to call ->send() after it, because
>vcc->dev->ops->close() is not called.
as i recall from way back, this shouldnt be necessary. closing a vcc
for an attached protocol isnt supposed to require addtional locking
or synchronization.
vcc_release() locks the socket and vcc_destroy_socket() calls the device's
vcc close routine and pushes a NULL skb to the attached protocol.
this NULL push is supposed to let the attached protocol that no more
sends and recvs can be handled.
that said, the order for the device vcc close and push does seem
reversed. since i imagine there could be a pending pppoatm_send()
during this interval. the push of the NULL skb is allowed to wait for
the subprotocol to finish its cleanup/shutdown.
On Tue, Oct 30, 2012 at 10:26:46AM -0400, Chas Williams (CONTRACTOR) wrote:
> In message <[email protected]>,Krzysztof Mazur writes:
>
> as i recall from way back, this shouldnt be necessary. closing a vcc
> for an attached protocol isnt supposed to require addtional locking
> or synchronization.
Such locking is already used by vcc_sendmsg() and I think we should do here
exacly what vcc_sendmsg() does.
>
> vcc_release() locks the socket and vcc_destroy_socket() calls the device's
> vcc close routine and pushes a NULL skb to the attached protocol.
> this NULL push is supposed to let the attached protocol that no more
> sends and recvs can be handled.
>
> that said, the order for the device vcc close and push does seem
> reversed. since i imagine there could be a pending pppoatm_send()
> during this interval. the push of the NULL skb is allowed to wait for
> the subprotocol to finish its cleanup/shutdown.
Yes, this problem can be probably fixed by reversing close and push
and adding some synchronization to pppoatm_unassign_vcc(), but I think
we need that locking anyway, for instance for synchronization for
checking and incrementing sk->sk_wmem_alloc, between pppoatm_send()
and vcc_sendmsg().
Thanks.
Krzysiek
On Tue, Oct 30, 2012 at 09:37:48AM +0000, David Woodhouse wrote:
>
> Should we be locking it earlier, so that the atm_may_send() call is also
> covered by the lock?
Yes, but only to protect against concurent vcc_sendmsg().
>
> Either way, it's an obvious improvement on what we had before ??? and even
> if the answer to my question above is 'yes', exceeding the configured
> size by one packet is both harmless and almost never going to happen
> since we now limit ourselves to two packets anyway. So:
>
> Acked-By: David Woodhouse <[email protected]>
>
I'm sending proposed patch (not tested).
Should I squash it into original patch or send it later because it's
not really important?
Thanks.
Krzysiek
-- >8 --
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index a766d96..3081834 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -214,15 +214,7 @@ error:
static inline int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
{
- /*
- * It's not clear that we need to bother with using atm_may_send()
- * to check we don't exceed sk->sk_sndbuf. If userspace sets a
- * value of sk_sndbuf which is lower than the MTU, we're going to
- * block for ever. But the code always did that before we introduced
- * the packet count limit, so...
- */
- if (atm_may_send(pvcc->atmvcc, size) &&
- atomic_inc_not_zero_hint(&pvcc->inflight, NONE_INFLIGHT))
+ if (atomic_inc_not_zero_hint(&pvcc->inflight, NONE_INFLIGHT))
return 1;
/*
@@ -251,8 +243,7 @@ static inline int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
* code path that calls pppoatm_send(), and is thus going to
* wait for us to finish.
*/
- if (atm_may_send(pvcc->atmvcc, size) &&
- atomic_inc_not_zero(&pvcc->inflight))
+ if (atomic_inc_not_zero(&pvcc->inflight))
return 1;
return 0;
@@ -314,6 +305,16 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
|| !test_bit(ATM_VF_READY, &vcc->flags))
goto nospace_unlock_sock;
+ /*
+ * It's not clear that we need to bother with using atm_may_send()
+ * to check we don't exceed sk->sk_sndbuf. If userspace sets a
+ * value of sk_sndbuf which is lower than the MTU, we're going to
+ * block for ever. But the code always did that before we introduced
+ * the packet count limit, so...
+ */
+ if (!atm_may_send(vcc, skb->truesize))
+ goto nospace_unlock_sock;
+
atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
On Tue, Oct 30, 2012 at 09:39:22AM +0000, David Woodhouse wrote:
> On Mon, 2012-10-22 at 19:14 +0200, Krzysztof Mazur wrote:
> > The pppoatm gets a reference to atmvcc, but does not increment vcc
> > usage count. The vcc uses vcc->sk socket for reference counting,
> > so sock_hold() and sock_put() should be used by pppoatm.
> >
> > Signed-off-by: Krzysztof Mazur <[email protected]>
> > Cc: David Woodhouse <[email protected]>
>
> Acked-By: David Woodhouse <[email protected]>
This patch is not needed, because vcc_destroy_socket()
calls pppoatm_push(vcc, NULL) to indicate that vcc is now closed,
before vcc_release() calls sock_put() and it's properly handled
by pppoatm.
I will drop this patch.
>
> But did you spot what's in the end of the context of the first hunk...?
>
> > --- a/net/atm/pppoatm.c
> > +++ b/net/atm/pppoatm.c
> > @@ -154,6 +154,7 @@ static void pppoatm_unassign_vcc(struct atm_vcc
> > *atmvcc)
> > tasklet_kill(&pvcc->wakeup_tasklet);
> > ppp_unregister_channel(&pvcc->chan);
> > atmvcc->user_back = NULL;
> > + sock_put(sk_atm(pvcc->atmvcc));
> > kfree(pvcc);
> > /* Gee, I hope we have the big kernel lock here... */
> > module_put(THIS_MODULE);
>
> Fairly sure that hope is unfounded these days... :)
>
Yes, I saw that.
Thanks.
Krzysiek
On Tue, Oct 30, 2012 at 08:07:25PM +0100, Krzysztof Mazur wrote:
> On Tue, Oct 30, 2012 at 09:37:48AM +0000, David Woodhouse wrote:
> >
> > Should we be locking it earlier, so that the atm_may_send() call is also
> > covered by the lock?
>
> Yes, but only to protect against concurent vcc_sendmsg().
>
> >
> > Either way, it's an obvious improvement on what we had before ??? and even
> > if the answer to my question above is 'yes', exceeding the configured
> > size by one packet is both harmless and almost never going to happen
> > since we now limit ourselves to two packets anyway. So:
> >
> > Acked-By: David Woodhouse <[email protected]>
> >
>
David, I think we should also fix the issue with sk_sndbuf < MTU,
which is described in comment in pppoatm_may_send() added by
your "pppoatm: Fix excessive queue bloat" patch.
The vcc_sendmsg() already does that.
Krzysiek
-- >8 --
Subject: [PATCH] pppoatm: fix sending packets when sk_sndbuf < MTU
Now pppoatm_send() works, when sk_sndbuf is smaller than MTU. This
issue was already pointed in comment:
/*
* It's not clear that we need to bother with using atm_may_send()
* to check we don't exceed sk->sk_sndbuf. If userspace sets a
* value of sk_sndbuf which is lower than the MTU, we're going to
* block for ever. But the code always did that before we introduced
* the packet count limit, so...
*/
The test is copied from alloc_tx() which is used by vcc_sendmsg().
Signed-off-by: Krzysztof Mazur <[email protected]>
---
net/atm/pppoatm.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 4cc81b5..f25536b 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -306,12 +306,9 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
/*
* It's not clear that we need to bother with using atm_may_send()
- * to check we don't exceed sk->sk_sndbuf. If userspace sets a
- * value of sk_sndbuf which is lower than the MTU, we're going to
- * block for ever. But the code always did that before we introduced
- * the packet count limit, so...
+ * to check we don't exceed sk->sk_sndbuf.
*/
- if (!atm_may_send(vcc, skb->truesize))
+ if (sk_wmem_alloc_get(sk_atm(vcc)) && !atm_may_send(vcc, skb->truesize))
goto nospace_unlock_sock;
atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
--
1.8.0.172.g62af90c
On Tue, Oct 30, 2012 at 09:35:00AM +0000, David Woodhouse wrote:
> On Mon, 2012-10-22 at 19:14 +0200, Krzysztof Mazur wrote:
> > Now pppoatm_send(), like vcc_sendmsg(), checks for vcc flags that
> > indicate that vcc is not ready.
>
> I note that vcc_sendmsg() also checks for sock->state == SS_CONNECTED.
> Is that check not needed here? Otherwise, looks sane enough.
>
> Acked-By: David Woodhouse <[email protected]>
I don't think so. We never leave SS_CONNECTED state. This check is
done in vcc_sendmsg() because it's called from userspace.
However maybe we should check socket state before assigning vcc to
pppoatm (untested):
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 0dcb5dc..df06d14 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -414,6 +414,8 @@ static int pppoatm_ioctl(struct socket *sock, unsigned int cmd,
return -ENOIOCTLCMD;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
+ if (sock->state != SS_CONNECTED)
+ return -EINVAL;
return pppoatm_assign_vcc(atmvcc, argp);
}
case PPPIOCGCHAN:
Thanks.
Krzysiek
On Tue, Oct 30, 2012 at 07:20:01PM +0100, Krzysztof Mazur wrote:
> On Tue, Oct 30, 2012 at 10:26:46AM -0400, Chas Williams (CONTRACTOR) wrote:
> > In message <[email protected]>,Krzysztof Mazur writes:
> >
> > as i recall from way back, this shouldnt be necessary. closing a vcc
> > for an attached protocol isnt supposed to require addtional locking
> > or synchronization.
>
> Such locking is already used by vcc_sendmsg() and I think we should do here
> exacly what vcc_sendmsg() does.
>
> >
> > vcc_release() locks the socket and vcc_destroy_socket() calls the device's
> > vcc close routine and pushes a NULL skb to the attached protocol.
> > this NULL push is supposed to let the attached protocol that no more
> > sends and recvs can be handled.
> >
> > that said, the order for the device vcc close and push does seem
> > reversed. since i imagine there could be a pending pppoatm_send()
> > during this interval. the push of the NULL skb is allowed to wait for
> > the subprotocol to finish its cleanup/shutdown.
>
> Yes, this problem can be probably fixed by reversing close and push
> and adding some synchronization to pppoatm_unassign_vcc(), but I think
> we need that locking anyway, for instance for synchronization for
> checking and incrementing sk->sk_wmem_alloc, between pppoatm_send()
> and vcc_sendmsg().
>
I think that the same problem exists in other drivers (net/atm/br2684.c,
net/atm/clip.c, maybe other).
Reversing order of close() and push(vcc, NULL) operations seems to
be a good idea, but synchronization with push(vcc, NULL)
and function that calls vcc->send() must be added to all drivers.
I think it's better to just use ATM socket lock - lock_sock(sk_atm(vcc)),
it will fix also problems with synchronization with vcc_sendmsg()
and possibly other functions (ioctl?).
I think that we should add a wrapper to vcc->send(), based on
fixed pppoatm_send(), that performs required checks and takes the ATM socket
lock.
But I think we should reverse those operations anyway, because some
drivers may use other locks, not ATM socket lock, for proper
synchronization.
Krzysiek
-- >8 --
diff --git a/net/atm/common.c b/net/atm/common.c
index 0c0ad93..a0e4411 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -171,10 +171,10 @@ static void vcc_destroy_socket(struct sock *sk)
set_bit(ATM_VF_CLOSE, &vcc->flags);
clear_bit(ATM_VF_READY, &vcc->flags);
if (vcc->dev) {
- if (vcc->dev->ops->close)
- vcc->dev->ops->close(vcc);
if (vcc->push)
vcc->push(vcc, NULL); /* atmarpd has no push */
+ if (vcc->dev->ops->close)
+ vcc->dev->ops->close(vcc);
while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
atm_return(vcc, skb->truesize);
On Tue, 2012-10-30 at 20:52 +0100, Krzysztof Mazur wrote:
>
> --- a/net/atm/pppoatm.c
> +++ b/net/atm/pppoatm.c
> @@ -306,12 +306,9 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
>
> /*
> * It's not clear that we need to bother with using atm_may_send()
> - * to check we don't exceed sk->sk_sndbuf. If userspace sets a
> - * value of sk_sndbuf which is lower than the MTU, we're going to
> - * block for ever. But the code always did that before we introduced
> - * the packet count limit, so...
> + * to check we don't exceed sk->sk_sndbuf.
> */
> - if (!atm_may_send(vcc, skb->truesize))
> + if (sk_wmem_alloc_get(sk_atm(vcc)) && !atm_may_send(vcc, skb->truesize))
> goto nospace_unlock_sock;
Does this break the pvcc->blocked handling that coordinates with
pppoatm_pop()?
If we have one packet in flight, so pppoatm_may_send() permits a new one
to be queued... but they're *large* packets to sk_wmem_alloc doesn't
permit it. Immediately after the check, pppoatm_pop() runs and leaves
the queue empty. We return zero, blocking the queue… which never gets
woken because we didn't set the BLOCKED flag and thus the tasklet never
runs.
In fact, I think we need the BLOCKED handling for the
sock_owned_by_user() case too? When the VCC is actually closed, I
suppose that's not recoverable and we don't care about waking the queue
anyway? But any time we end up returning zero from pppoatm_send(), we
*need* to ensure that a wakeup will happen in future unless the socket
is actually dead.
--
dwmw2
On Wed, Oct 31, 2012 at 10:41:47AM +0100, Krzysztof Mazur wrote:
>
> I think that we should add a wrapper to vcc->send(), based on
> fixed pppoatm_send(), that performs required checks and takes the ATM socket
> lock.
>
I'm sending initial version of such wrapper and update to pppoatm.
Untested but the code is just copied from pppoatm_send.
In final series I will fix some old &sk_atm(ATM_SKB(skb)->vcc)-like
code from original version, before moving to vcc_send_bh(), but
it's just an initial idea for some comments.
Krzysiek
diff --git a/net/atm/common.c b/net/atm/common.c
index 0c0ad93..e0602d2 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -558,6 +558,32 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
return copied;
}
+int vcc_send_bh(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+ int ret;
+
+ bh_lock_sock(sk_atm(vcc));
+ ret = -EAGAIN;
+ if (sock_owned_by_user(sk_atm(vcc)))
+ goto out;
+ if (test_bit(ATM_VF_RELEASED, &vcc->flags)
+ || test_bit(ATM_VF_CLOSE, &vcc->flags)
+ || !test_bit(ATM_VF_READY, &vcc->flags))
+ goto out;
+
+ if (sk_wmem_alloc_get(sk_atm(vcc)) && !atm_may_send(vcc, skb->truesize))
+ goto out;
+
+ atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
+ ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
+ pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
+ skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
+ ret = ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb);
+out:
+ bh_unlock_sock(sk_atm(vcc));
+ return ret;
+}
+
int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
size_t total_len)
{
diff --git a/net/atm/common.h b/net/atm/common.h
index cc3c2da..3a1c340 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -15,6 +15,7 @@ int vcc_release(struct socket *sock);
int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
size_t size, int flags);
+int vcc_send_bh(struct atm_vcc *vcc, struct sk_buff *skb);
int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
size_t total_len);
unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 5fc335a..7612f18 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -296,31 +296,10 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
}
vcc = ATM_SKB(skb)->vcc;
- bh_lock_sock(sk_atm(vcc));
- if (sock_owned_by_user(sk_atm(vcc)))
- goto nospace_unlock_sock;
- if (test_bit(ATM_VF_RELEASED, &vcc->flags)
- || test_bit(ATM_VF_CLOSE, &vcc->flags)
- || !test_bit(ATM_VF_READY, &vcc->flags))
- goto nospace_unlock_sock;
-
- /*
- * It's not clear that we need to bother with using atm_may_send()
- * to check we don't exceed sk->sk_sndbuf.
- */
- if (sk_wmem_alloc_get(sk_atm(vcc)) && !atm_may_send(vcc, skb->truesize))
- goto nospace_unlock_sock;
-
- atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
- ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
- pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
- skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
- ret = ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
- ? DROP_PACKET : 1;
- bh_unlock_sock(sk_atm(vcc));
- return ret;
-nospace_unlock_sock:
- bh_unlock_sock(sk_atm(vcc));
+ ret = vcc_send_bh(vcc, skb);
+ if (ret == -EAGAIN)
+ goto nospace;
+ return ret ? DROP_PACKET : 1;
nospace:
/*
* We don't have space to send this SKB now, but we might have
On Wed, Oct 31, 2012 at 10:16:18AM +0000, David Woodhouse wrote:
>
> Does this break the pvcc->blocked handling that coordinates with
> pppoatm_pop()?
>
> If we have one packet in flight, so pppoatm_may_send() permits a new one
> to be queued... but they're *large* packets to sk_wmem_alloc doesn't
> permit it. Immediately after the check, pppoatm_pop() runs and leaves
> the queue empty. We return zero, blocking the queue??? which never gets
> woken because we didn't set the BLOCKED flag and thus the tasklet never
> runs.
>
> In fact, I think we need the BLOCKED handling for the
> sock_owned_by_user() case too? When the VCC is actually closed, I
> suppose that's not recoverable and we don't care about waking the queue
> anyway? But any time we end up returning zero from pppoatm_send(), we
> *need* to ensure that a wakeup will happen in future unless the socket
> is actually dead.
>
Yes, original patch had also the same problem with sock_owned_by_user(),
so I just incorrectly assumed that we can do "goto nospace" after
pppoatm_may_send(), but ppooatm_may_send() must be the last test.
So I just moved all other tests earlier and and now pppoatm_may_send()
is also protected by ATM socket lock as you suggested earlier.
Krzysiek
-- >8 --
Subject: [PATCH] pppoatm: fix race condition with destroying of vcc
The pppoatm_send() calls vcc->send() and now also checks for
some vcc flags that indicate destroyed vcc without proper locking.
The vcc_sendmsg() uses lock_sock(sk). This lock is used by
vcc_release(), so vcc_destroy_socket() will not be called between
check and during ->send(). The vcc_release_async() sets ATM_VF_CLOSE,
but it should be safe to call ->send() after it, because
vcc->dev->ops->close() is not called.
The pppoatm_send() is called with BH disabled, so bh_lock_sock()
should be used instead of lock_sock().
Signed-off-by: Krzysztof Mazur <[email protected]>
---
net/atm/pppoatm.c | 27 +++++++++++++++++++--------
1 file changed, 19 insertions(+), 8 deletions(-)
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 0dcb5dc..eb76bd3 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -270,11 +270,22 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
{
struct pppoatm_vcc *pvcc = chan_to_pvcc(chan);
struct atm_vcc *vcc;
+ int ret;
ATM_SKB(skb)->vcc = pvcc->atmvcc;
pr_debug("(skb=0x%p, vcc=0x%p)\n", skb, pvcc->atmvcc);
if (skb->data[0] == '\0' && (pvcc->flags & SC_COMP_PROT))
(void) skb_pull(skb, 1);
+
+ vcc = ATM_SKB(skb)->vcc;
+ bh_lock_sock(sk_atm(vcc));
+ if (sock_owned_by_user(sk_atm(vcc)))
+ goto nospace;
+ if (test_bit(ATM_VF_RELEASED, &vcc->flags)
+ || test_bit(ATM_VF_CLOSE, &vcc->flags)
+ || !test_bit(ATM_VF_READY, &vcc->flags))
+ goto nospace;
+
switch (pvcc->encaps) { /* LLC encapsulation needed */
case e_llc:
if (skb_headroom(skb) < LLC_LEN) {
@@ -287,8 +298,10 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
}
consume_skb(skb);
skb = n;
- if (skb == NULL)
+ if (skb == NULL) {
+ bh_unlock_sock(sk_atm(vcc));
return DROP_PACKET;
+ }
} else if (!pppoatm_may_send(pvcc, skb->truesize))
goto nospace;
memcpy(skb_push(skb, LLC_LEN), pppllc, LLC_LEN);
@@ -298,24 +311,22 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
goto nospace;
break;
case e_autodetect:
+ bh_unlock_sock(sk_atm(vcc));
pr_debug("Trying to send without setting encaps!\n");
kfree_skb(skb);
return 1;
}
- vcc = ATM_SKB(skb)->vcc;
- if (test_bit(ATM_VF_RELEASED, &vcc->flags)
- || test_bit(ATM_VF_CLOSE, &vcc->flags)
- || !test_bit(ATM_VF_READY, &vcc->flags))
- goto nospace;
-
atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
- return ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
+ ret = ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
? DROP_PACKET : 1;
+ bh_unlock_sock(sk_atm(vcc));
+ return ret;
nospace:
+ bh_unlock_sock(sk_atm(vcc));
/*
* We don't have space to send this SKB now, but we might have
* already applied SC_COMP_PROT compression, so may need to undo
--
1.8.0.172.g62af90c
On Wed, 2012-10-31 at 12:30 +0100, Krzysztof Mazur wrote:
> Yes, original patch had also the same problem with sock_owned_by_user(),
> so I just incorrectly assumed that we can do "goto nospace" after
> pppoatm_may_send(), but ppooatm_may_send() must be the last test.
>
> So I just moved all other tests earlier and and now pppoatm_may_send()
> is also protected by ATM socket lock as you suggested earlier.
I don't think that's sufficient. When we return zero from
pppoatm_send(), the generic PPP code considers the channel to be
blocked, and it won't send any more data to it, ever, until we call
ppp_output_wakeup(). Which we do from a tasklet, triggered in
pppoatm_pop() *iff* the BLOCKED flag is set.
So we play silly buggers in pppoatm_may_send() to ensure that *if* we're
going to return zero, we make damn sure the BLOCKED flag is set and that
pppoatm_pop() is going to see that it's set. There are extensive
comments in pppoatm_pop() and pppoatm_may_send() which try to explain
this. It works because there's *always* going to be packet in flight if
we say that the sk_wmem is full, so of course there's *always* going to
be a later call to pppoatm_pop() to wake things up.
However, if you're going to return zero from pppoatm_send() when
sock_owned_by_user() is true, what guarantees that ppp_output_wakeup()
will ever be called?
--
dwmw2
On Wed, 31 Oct 2012 10:41:47 +0100
Krzysztof Mazur <[email protected]> wrote:
> On Tue, Oct 30, 2012 at 07:20:01PM +0100, Krzysztof Mazur wrote:
> > Yes, this problem can be probably fixed by reversing close and push
> > and adding some synchronization to pppoatm_unassign_vcc(), but I think
> > we need that locking anyway, for instance for synchronization for
> > checking and incrementing sk->sk_wmem_alloc, between pppoatm_send()
> > and vcc_sendmsg().
> >
>
> I think that the same problem exists in other drivers (net/atm/br2684.c,
> net/atm/clip.c, maybe other).
>
> Reversing order of close() and push(vcc, NULL) operations seems to
> be a good idea, but synchronization with push(vcc, NULL)
> and function that calls vcc->send() must be added to all drivers.
this was the scheme that was (and is) currently in place. detaching a
protocol from the atm layer never had a separate function, so it was
decided at some point to just push a NULL skb as a signal to the next
layer that i needed to cleanly shutdown and detach. the push(vcc,
NULL) always happens in a sleepable context, so waiting for whatever
attached protocol scheduler to finish up is not a problem. after the
pushing of the skb NULL, the attached protocol should not send or recv
any data on that vcc.
reversing the order of the push and close certainly seems like the right
thing to do. i would like to see if it would fix your problem. making
the minimal change to get something working would be preferred before
adding additional complexity. i am just surprised we havent seen this
bug before.
> I think it's better to just use ATM socket lock - lock_sock(sk_atm(vcc)),
> it will fix also problems with synchronization with vcc_sendmsg()
> and possibly other functions (ioctl?).
>
> I think that we should add a wrapper to vcc->send(), based on
> fixed pppoatm_send(), that performs required checks and takes the ATM socket
> lock.
>
> But I think we should reverse those operations anyway, because some
> drivers may use other locks, not ATM socket lock, for proper
> synchronization.
i dont think this is a bad idea. vcc_release_async() could happen
(this would be a bit unusual for a pvc but removing the usbatm device
would do this) and there is no point in sending on a vcc that is
closing.
On Wed, Oct 31, 2012 at 04:03:52PM -0400, chas williams - CONTRACTOR wrote:
>
> reversing the order of the push and close certainly seems like the right
> thing to do. i would like to see if it would fix your problem. making
> the minimal change to get something working would be preferred before
> adding additional complexity. i am just surprised we havent seen this
> bug before.
Yes, it fixes the problem and it's probably the best fix for original
issue.
There are also some minor potential issues in pppoatm driver:
- locking issues, but now only between pppoatm_send() and
vcc_sendmsg() and maybe some other functions,
- missing check for SS_CONNECTED in pppoatm_ioctl,
- problem described in comment in pppoatm_may_send() when
sk->sk_sndbuf < MTU, sk_wmem_alloc_get() should be added
there
but I think that for now the patch that changes the order of push
and close is sufficient.
I probably saw that bug a log time ago (around 2.6.30), but it was
too rare to see what caused panic, but after
9d02daf754238adac48fa075ee79e7edd3d79ed3 (pppoatm: Fix excessive queue bloat)
this bug occurs much more frequently.
Thanks.
Krzysiek
-- >8 --
Subject: [PATCH] atm: detach protocol before closing vcc
The vcc_destroy_socket() closes vcc before the protocol is detached
from vcc by calling vcc->push() with NULL skb. This leaves some time
window, where the protocol may call vcc->send() on closed vcc.
It happens at least with pppoatm protocol and usbatm driver, and causes
an Oops:
Oops: 0000 [#1] PREEMPT
Pid: 0, comm: swapper Not tainted 3.6.0-krzysiek-00001-gb7cd93b-dirty #60 /AK32
EIP: 0060:[<c01413c6>] EFLAGS: 00010082 CPU: 0
EIP is at __wake_up_common+0x16/0x70
EAX: 30707070 EBX: 00000292 ECX: 00000001 EDX: dca75fc0
ESI: 00000000 EDI: de7f500f EBP: df409f24 ESP: df409f08
DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
CR0: 8005003b CR2: 30707070 CR3: 1c920000 CR4: 000007d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
Process swapper (pid: 0, ti=df408000 task=c07bd4e0 task.ti=c07b0000)
Stack:
00000000 00000001 00000001 dca75fc0 00000292 00000000 de7f500f df409f3c
c0143299 00000000 00000000 dc84f000 dc84f000 df409f4c c0602bf0 00000000
dc84f000 df409f58 c0604301 dc840cc0 df409fb4 c04672e5 c076a240 00000000
Call Trace:
[<c0143299>] __wake_up+0x29/0x50
[<c0602bf0>] vcc_write_space+0x40/0x80
[<c0604301>] atm_pop_raw+0x21/0x30
[<c04672e5>] usbatm_tx_process+0x2a5/0x380
[<c0126cf9>] tasklet_action+0x39/0x70
[<c0126f1f>] __do_softirq+0x7f/0x120
[<c0126ea0>] ? local_bh_enable_ip+0xa0/0xa0
<IRQ>
Now the protocol is detached before vcc is closed.
Signed-off-by: Krzysztof Mazur <[email protected]>
---
net/atm/common.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/atm/common.c b/net/atm/common.c
index 0c0ad93..a0e4411 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -171,10 +171,10 @@ static void vcc_destroy_socket(struct sock *sk)
set_bit(ATM_VF_CLOSE, &vcc->flags);
clear_bit(ATM_VF_READY, &vcc->flags);
if (vcc->dev) {
- if (vcc->dev->ops->close)
- vcc->dev->ops->close(vcc);
if (vcc->push)
vcc->push(vcc, NULL); /* atmarpd has no push */
+ if (vcc->dev->ops->close)
+ vcc->dev->ops->close(vcc);
while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
atm_return(vcc, skb->truesize);
--
1.8.0.172.g62af90c
On Wed, 31 Oct 2012 23:04:35 +0100
Krzysztof Mazur <[email protected]> wrote:
> There are also some minor potential issues in pppoatm driver:
>
> - locking issues, but now only between pppoatm_send() and
> vcc_sendmsg() and maybe some other functions,
these have been around for a while. i agree that something should be
done about it. just not sure what should be synchronizing this mess.
> - missing check for SS_CONNECTED in pppoatm_ioctl,
in practice you will never run into this because a pvc is immediately
put into SS_CONNECTED mode (right before the userspace open()
returns). however, should it check? yes. i dont see anything
preventing you from running ppp on svc's.
On Thu, Nov 01, 2012 at 10:26:28AM -0400, chas williams - CONTRACTOR wrote:
> On Wed, 31 Oct 2012 23:04:35 +0100
> Krzysztof Mazur <[email protected]> wrote:
>
> > There are also some minor potential issues in pppoatm driver:
> >
> > - locking issues, but now only between pppoatm_send() and
> > vcc_sendmsg() and maybe some other functions,
>
> these have been around for a while. i agree that something should be
> done about it. just not sure what should be synchronizing this mess.
I think the ATM socket lock should be used. I'm sending the latest
patch that adds this locking after David Woodhouse's comments. The vcc->flags
check is now probably unnecessary.
>
> > - missing check for SS_CONNECTED in pppoatm_ioctl,
>
> in practice you will never run into this because a pvc is immediately
> put into SS_CONNECTED mode (right before the userspace open()
> returns). however, should it check? yes. i dont see anything
> preventing you from running ppp on svc's.
I can confirm that the problem really exists, without connect() in pppoatm
plugin in pppd, I have seen an Oops and panic. I will send appropriate
patch.
Thanks.
Krzysiek
-- >8 --
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index f27a07a..ef19436 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -269,10 +269,23 @@ static inline int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
{
struct pppoatm_vcc *pvcc = chan_to_pvcc(chan);
+ struct atm_vcc *vcc;
+ int ret;
+
ATM_SKB(skb)->vcc = pvcc->atmvcc;
pr_debug("(skb=0x%p, vcc=0x%p)\n", skb, pvcc->atmvcc);
if (skb->data[0] == '\0' && (pvcc->flags & SC_COMP_PROT))
(void) skb_pull(skb, 1);
+
+ vcc = ATM_SKB(skb)->vcc;
+ bh_lock_sock(sk_atm(vcc));
+ if (sock_owned_by_user(sk_atm(vcc)))
+ goto nospace;
+ if (test_bit(ATM_VF_RELEASED, &vcc->flags)
+ || test_bit(ATM_VF_CLOSE, &vcc->flags)
+ || !test_bit(ATM_VF_READY, &vcc->flags))
+ goto nospace;
+
switch (pvcc->encaps) { /* LLC encapsulation needed */
case e_llc:
if (skb_headroom(skb) < LLC_LEN) {
@@ -285,8 +298,10 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
}
consume_skb(skb);
skb = n;
- if (skb == NULL)
+ if (skb == NULL) {
+ bh_unlock_sock(sk_atm(vcc));
return DROP_PACKET;
+ }
} else if (!pppoatm_may_send(pvcc, skb->truesize))
goto nospace;
memcpy(skb_push(skb, LLC_LEN), pppllc, LLC_LEN);
@@ -296,6 +311,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
goto nospace;
break;
case e_autodetect:
+ bh_unlock_sock(sk_atm(vcc));
pr_debug("Trying to send without setting encaps!\n");
kfree_skb(skb);
return 1;
@@ -305,9 +321,12 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
- return ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
+ ret = ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
? DROP_PACKET : 1;
+ bh_unlock_sock(sk_atm(vcc));
+ return ret;
nospace:
+ bh_unlock_sock(sk_atm(vcc));
/*
* We don't have space to send this SKB now, but we might have
* already applied SC_COMP_PROT compression, so may need to undo
On Fri, Nov 02, 2012 at 10:40:18AM +0100, Krzysztof Mazur wrote:
> On Thu, Nov 01, 2012 at 10:26:28AM -0400, chas williams - CONTRACTOR wrote:
> > On Wed, 31 Oct 2012 23:04:35 +0100
> > Krzysztof Mazur <[email protected]> wrote:
> > > - missing check for SS_CONNECTED in pppoatm_ioctl,
> >
> > in practice you will never run into this because a pvc is immediately
> > put into SS_CONNECTED mode (right before the userspace open()
> > returns). however, should it check? yes. i dont see anything
> > preventing you from running ppp on svc's.
>
> I can confirm that the problem really exists, without connect() in pppoatm
> plugin in pppd, I have seen an Oops and panic. I will send appropriate
> patch.
I'm sending the patch that fixes this issue. Works correctly with original
pppd, and does not crash with pppd without connect() - the pppd just
logs:
pppd[3460]: ioctl(ATM_SETBACKEND): Invalid argument
and exits.
Krzysiek
-- >8 --
Subject: [PATCH] pppoatm: allow assign only on a connected socket
The pppoatm does not check if the used vcc is in connected state,
causing an Oops in pppoatm_send() when vcc->send() is called
on not fully connected socket.
Now pppoatm can be assigned only on connected sockets; otherwise
-EINVAL error is returned.
Signed-off-by: Krzysztof Mazur <[email protected]>
---
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [< (null)>] (null)
*pde = 00000000
Oops: 0000 [#1] PREEMPT
Pid: 4154, comm: pppd Not tainted 3.6.0-krzysiek-00002-g3ff1093 #95 /AK32
EIP: 0060:[<00000000>] EFLAGS: 00010202 CPU: 0
EIP is at 0x0
EAX: d95f7800 EBX: d9d4ba80 ECX: d95f7800 EDX: d9d4ba80
ESI: ffffffff EDI: 00000001 EBP: 000001c0 ESP: d9823f34
DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
CR0: 8005003b CR2: 00000000 CR3: 1e7b6000 CR4: 000007d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
Process pppd (pid: 4154, ti=d9822000 task=d99918a0 task.ti=d9822000)
Stack:
c060cd9b c043a2ca d99ed860 d99ed864 d9d4ba80 08094f22 c043a228 d9d4ba80
d99ed860 0000000c c043a347 ffffffff 0000000c d94311a0 08094f22 c043a290
c019f72e d9823f9c 00000003 09df1090 d94311a0 08094f22 00000008 d9822000
Call Trace:
[<c060cd9b>] ? pppoatm_send+0x6b/0x300
[<c043a2ca>] ? ppp_write+0x3a/0xe0
[<c043a228>] ? ppp_channel_push+0x38/0xa0
[<c043a347>] ? ppp_write+0xb7/0xe0
[<c043a290>] ? ppp_channel_push+0xa0/0xa0
[<c019f72e>] ? vfs_write+0x8e/0x140
[<c019f88c>] ? sys_write+0x3c/0x70
[<c062ab50>] ? sysenter_do_call+0x12/0x26
Code: Bad EIP value.
EIP: [<00000000>] 0x0 SS:ESP 0068:d9823f34
CR2: 0000000000000000
---[ end trace e29cf1805f576278 ]---
Kernel panic - not syncing: Fatal exception in interrupt
net/atm/pppoatm.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 226dca9..f27a07a 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -406,6 +406,8 @@ static int pppoatm_ioctl(struct socket *sock, unsigned int cmd,
return -ENOIOCTLCMD;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
+ if (sock->state != SS_CONNECTED)
+ return -EINVAL;
return pppoatm_assign_vcc(atmvcc, argp);
}
case PPPIOCGCHAN:
--
1.8.0.172.g62af90c
Krzysztof, you've fixed a bunch of races... but I think there's one
still left.
An ATM driver will often have code like this, which gets called from
arbitrary contexts:
if (vcc->pop)
vcc->pop(vcc, skb);
Now, what happens if pppoatm_send(vcc, NULL) happens after the address
of vcc->pop (currently pppoatm_pop) has been loaded, but before the
function is actually called?
You tear down all the setup and set vcc->user_back to NULL. And then
pppoatm_pop() gets called. And promptly crashes because pvcc is NULL.
A lot of these problems exist for br2684 too, and in prodding at it a
little I can consistently crash the system by sending a flood of
outbound packets while I kill the br2684ctl program. I end up in
br2684_pop() with vcc->user_back == NULL. In looking to see how you'd
fixed that in pppoatm, I realised that you haven't... :)
--
dwmw2
On Tue, Nov 27, 2012 at 05:16:32PM +0000, David Woodhouse wrote:
> Krzysztof, you've fixed a bunch of races... but I think there's one
> still left.
>
> An ATM driver will often have code like this, which gets called from
> arbitrary contexts:
> if (vcc->pop)
> vcc->pop(vcc, skb);
>
> Now, what happens if pppoatm_send(vcc, NULL) happens after the address
> of vcc->pop (currently pppoatm_pop) has been loaded, but before the
> function is actually called?
>
> You tear down all the setup and set vcc->user_back to NULL. And then
> pppoatm_pop() gets called. And promptly crashes because pvcc is NULL.
>
> A lot of these problems exist for br2684 too, and in prodding at it a
> little I can consistently crash the system by sending a flood of
> outbound packets while I kill the br2684ctl program. I end up in
> br2684_pop() with vcc->user_back == NULL. In looking to see how you'd
> fixed that in pppoatm, I realised that you haven't... :)
>
Yes, I missed that one - it's even worse, I introduced that bug
in "[PATCH 1/7] atm: detach protocol before closing vcc". Before that
patch that scenario shouldn't happen because vcc was closed before
calling pppoatm_send(vcc, NULL) - the driver should provide appropriate
synchronization.
I think that we should just drop that patch. With later changes it's not
necessary - the pppoatm_send() can be safely called while closing vcc.
Krzysiek
On Tue, 2012-11-27 at 18:39 +0100, Krzysztof Mazur wrote:
> Yes, I missed that one - it's even worse, I introduced that bug
> in "[PATCH 1/7] atm: detach protocol before closing vcc". Before that
> patch that scenario shouldn't happen because vcc was closed before
> calling pppoatm_send(vcc, NULL) - the driver should provide appropriate
> synchronization.
>
> I think that we should just drop that patch. With later changes it's not
> necessary - the pppoatm_send() can be safely called while closing vcc.
I'm not running with that patch. This bug exists for br2684 even before
it, and I think also for pppoatm.
In solos-pci at least, the ops->close() function doesn't flush all
pending skbs for this vcc before returning. So can be a tasklet
somewhere which has loaded the address of the vcc->pop function from one
of them, and is going to call it in some unspecified amount of time.
Should we make the device's ->close function wait for all TX and RX skbs
for this vcc to complete?
--
dwmw2
On Tue, Nov 27, 2012 at 06:02:29PM +0000, David Woodhouse wrote:
> On Tue, 2012-11-27 at 18:39 +0100, Krzysztof Mazur wrote:
> > Yes, I missed that one - it's even worse, I introduced that bug
> > in "[PATCH 1/7] atm: detach protocol before closing vcc". Before that
> > patch that scenario shouldn't happen because vcc was closed before
> > calling pppoatm_send(vcc, NULL) - the driver should provide appropriate
> > synchronization.
> >
> > I think that we should just drop that patch. With later changes it's not
> > necessary - the pppoatm_send() can be safely called while closing vcc.
>
> I'm not running with that patch. This bug exists for br2684 even before
> it, and I think also for pppoatm.
>
> In solos-pci at least, the ops->close() function doesn't flush all
> pending skbs for this vcc before returning. So can be a tasklet
> somewhere which has loaded the address of the vcc->pop function from one
> of them, and is going to call it in some unspecified amount of time.
>
> Should we make the device's ->close function wait for all TX and RX skbs
> for this vcc to complete?
Yes, the ->close() can sleep and after vcc is closed the ->pop() shouldn't be
called.
While reviewing your br2684 patch I also found that some ATM drivers does
not call ->pop() when ->send() fails, they should do:
if (vcc->pop)
vcc->pop(vcc, skb);
else
dev_kfree_skb(skb);
but some drivers just call dev_kfree_skb(skb).
I think that we should add atm_pop() function that does that and fix all
drivers.
Krzysiek
On Tue, Nov 27, 2012 at 06:02:29PM +0000, David Woodhouse wrote:
>
> I'm not running with that patch. This bug exists for br2684 even before
> it, and I think also for pppoatm.
>
Did you use your "atm: br2684: Fix excessive queue bloat" patch?
With that patch for pppoatm the dev->close()/pppoatm_send() race
was much easier to trigger. Maybe with an equivalent patch for br2684
the races are also easier triggerable.
Krzysiek
On Tue, 27 Nov 2012 18:02:29 +0000
David Woodhouse <[email protected]> wrote:
> In solos-pci at least, the ops->close() function doesn't flush all
> pending skbs for this vcc before returning. So can be a tasklet
> somewhere which has loaded the address of the vcc->pop function from one
> of them, and is going to call it in some unspecified amount of time.
>
> Should we make the device's ->close function wait for all TX and RX skbs
> for this vcc to complete?
the driver's close routine should wait for any of the pending tx and rx
to complete. take a look at the he.c in driver/atm
We should no longer be calling the old pop routine for the vcc, after
vcc_release() has completed. Make sure we wait for any pending TX skbs
to complete, by waiting for our own PKT_PCLOSE control skb to be sent.
Signed-off-by: David Woodhouse <[email protected]>
---
On Tue, 2012-11-27 at 13:54 -0500, chas williams - CONTRACTOR wrote:
> the driver's close routine should wait for any of the pending tx and
> rx to complete.
Nathan, does this help? I can test here to a certain extent, but when I
use it in PPPoE mode and then crash the router, the DSLAM tends to
refuse to talk to me for an arbitrary period of time after that. Which
is something of a PITA.
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 9851093..b5bb332 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -94,6 +94,7 @@ struct pkt_hdr {
struct solos_skb_cb {
struct atm_vcc *vcc;
uint32_t dma_addr;
+ struct completion *c;
};
@@ -868,6 +869,7 @@ static void pclose(struct atm_vcc *vcc)
struct solos_card *card = vcc->dev->dev_data;
struct sk_buff *skb;
struct pkt_hdr *header;
+ DECLARE_COMPLETION_ONSTACK(c);
skb = alloc_skb(sizeof(*header), GFP_ATOMIC);
if (!skb) {
@@ -881,11 +883,15 @@ static void pclose(struct atm_vcc *vcc)
header->vci = cpu_to_le16(vcc->vci);
header->type = cpu_to_le16(PKT_PCLOSE);
+ SKB_CB(skb)->c = &c;
+
fpga_queue(card, SOLOS_CHAN(vcc->dev), skb, NULL);
clear_bit(ATM_VF_ADDR, &vcc->flags);
clear_bit(ATM_VF_READY, &vcc->flags);
+ wait_for_completion(&c);
+
/* Hold up vcc_destroy_socket() (our caller) until solos_bh() in the
tasklet has finished processing any incoming packets (and, more to
the point, using the vcc pointer). */
@@ -1011,9 +1017,12 @@ static uint32_t fpga_tx(struct solos_card *card)
if (vcc) {
atomic_inc(&vcc->stats->tx);
solos_pop(vcc, oldskb);
- } else
+ } else {
+ struct pkt_hdr *header = (void *)oldskb->data;
+ if (le16_to_cpu(header->type) == PKT_PCLOSE)
+ complete(SKB_CB(oldskb)->c);
dev_kfree_skb_irq(oldskb);
-
+ }
}
}
/* For non-DMA TX, write the 'TX start' bit for all four ports simultaneously */
--
David Woodhouse Open Source Technology Centre
[email protected] Intel Corporation
Avoid submitting patches to a vcc which is being closed. Things go badly
wrong when the ->pop method gets later called after everything's been
torn down.
Signed-off-by: David Woodhouse <[email protected]>
---
On Tue, 2012-11-27 at 22:36 +0000, David Woodhouse wrote:
> Nathan, does this help?
I think that's necessary, but not sufficient. You'll want something like
this too... I can now kill br2684ctl while there's a flood of outgoing
packets, and get a handful of the printks that I had in here until a few
seconds ago when I edited it out of the patch in my mail client... and
no more panic.
I do also now have Krzysztof's patch 1/7 (detach protocol before closing
vcc) but I don't think it actually matters any more.
--- a/net/atm/br2684.c~ 2012-11-23 23:14:29.000000000 +0000
+++ b/net/atm/br2684.c 2012-11-27 23:09:18.502403881 +0000
@@ -249,6 +249,12 @@ static int br2684_xmit_vcc(struct sk_buf
skb_debug(skb);
ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
+ if (test_bit(ATM_VF_RELEASED, &atmvcc->flags)
+ || test_bit(ATM_VF_CLOSE, &atmvcc->flags)
+ || !test_bit(ATM_VF_READY, &atmvcc->flags)) {
+ dev_kfree_skb(skb);
+ return 0;
+ }
pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
ATM_SKB(skb)->atm_options = atmvcc->atm_options;
--
dwmw2
On Tue, Nov 27, 2012 at 11:28:36PM +0000, David Woodhouse wrote:
> Avoid submitting patches to a vcc which is being closed. Things go badly
> wrong when the ->pop method gets later called after everything's been
> torn down.
>
> Signed-off-by: David Woodhouse <[email protected]>
> ---
> On Tue, 2012-11-27 at 22:36 +0000, David Woodhouse wrote:
> > Nathan, does this help?
>
> I think that's necessary, but not sufficient. You'll want something like
> this too... I can now kill br2684ctl while there's a flood of outgoing
> packets, and get a handful of the printks that I had in here until a few
> seconds ago when I edited it out of the patch in my mail client... and
> no more panic.
>
> I do also now have Krzysztof's patch 1/7 (detach protocol before closing
> vcc) but I don't think it actually matters any more.
If you do this actually it's better to don't use patch 1/7 because
it introduces race condition that you found earlier.
>
> --- a/net/atm/br2684.c~ 2012-11-23 23:14:29.000000000 +0000
> +++ b/net/atm/br2684.c 2012-11-27 23:09:18.502403881 +0000
> @@ -249,6 +249,12 @@ static int br2684_xmit_vcc(struct sk_buf
> skb_debug(skb);
>
> ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
> + if (test_bit(ATM_VF_RELEASED, &atmvcc->flags)
> + || test_bit(ATM_VF_CLOSE, &atmvcc->flags)
> + || !test_bit(ATM_VF_READY, &atmvcc->flags)) {
> + dev_kfree_skb(skb);
> + return 0;
> + }
> pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
> atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
> ATM_SKB(skb)->atm_options = atmvcc->atm_options;
>
With this patch you have still theoretical race that was fixed in patches
5 and 8 in pppoatm series, but I never seen that in practice.
Acked-by: Krzysztof Mazur <[email protected]>
Krzysiek
On Wed, 2012-11-28 at 00:51 +0100, Krzysztof Mazur wrote:
> If you do this actually it's better to don't use patch 1/7 because
> it introduces race condition that you found earlier.
Right. I've omitted that from the git tree I just pushed out.
> With this patch you have still theoretical race that was fixed in patches
> 5 and 8 in pppoatm series, but I never seen that in practice.
And I think it's even less likely for br2684. At least with pppoatm you
might have had pppd sending frames. But for br2684 they *only* come from
its start_xmit function... which is serialised anyway.
I do get strange oopses when I try to add BQL to br2684, but that's not
something to be looking at at 1am...
I *do* need the equivalent of your patch 4, which is the module_put
race.
--
dwmw2
On Wed, Nov 28, 2012 at 12:54:46AM +0000, David Woodhouse wrote:
> On Wed, 2012-11-28 at 00:51 +0100, Krzysztof Mazur wrote:
> > If you do this actually it's better to don't use patch 1/7 because
> > it introduces race condition that you found earlier.
>
> Right. I've omitted that from the git tree I just pushed out.
>
> > With this patch you have still theoretical race that was fixed in patches
> > 5 and 8 in pppoatm series, but I never seen that in practice.
>
> And I think it's even less likely for br2684. At least with pppoatm you
> might have had pppd sending frames. But for br2684 they *only* come from
> its start_xmit function... which is serialised anyway.
>
> I do get strange oopses when I try to add BQL to br2684, but that's not
> something to be looking at at 1am...
>
> I *do* need the equivalent of your patch 4, which is the module_put
> race.
>
I think you might need also an equivalent of
"[PATCH v3 3/7] pppoatm: allow assign only on a connected socket".
I'm not sure yet. In will test if I can trigger that Oops on pppoatm
without that patch. Testing vcc flags might be sufficient - that's
what I did in the first patch, but you asked what about SOCK_CONNECTED,
and I think it was really needed.
Krzysiek
-- >8 --
Subject: [PATCH] br2684: allow assign only on a connected socket
The br2684 does not check if used vcc is in connected state,
causing potential Oops in pppoatm_send() when vcc->send() is called
on not fully connected socket.
Now br2684 can be assigned only on connected sockets; otherwise
-EINVAL error is returned.
Signed-off-by: Krzysztof Mazur <[email protected]>
---
net/atm/br2684.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 59e8edb..e88998c 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -704,10 +704,13 @@ static int br2684_ioctl(struct socket *sock, unsigned int cmd,
return -ENOIOCTLCMD;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- if (cmd == ATM_SETBACKEND)
+ if (cmd == ATM_SETBACKEND) {
+ if (sock->state != SS_CONNECTED)
+ return -EINVAL;
return br2684_regvcc(atmvcc, argp);
- else
+ } else {
return br2684_create(argp);
+ }
#ifdef CONFIG_ATM_BR2684_IPFILTER
case BR2684_SETFILT:
if (atmvcc->push != br2684_push)
--
1.8.0.411.g71a7da8
> On Tue, 27 Nov 2012 18:02:29 +0000
> David Woodhouse <[email protected]> wrote:
>
> > In solos-pci at least, the ops->close() function doesn't flush all
> > pending skbs for this vcc before returning. So can be a tasklet
> > somewhere which has loaded the address of the vcc->pop function from one
> > of them, and is going to call it in some unspecified amount of time.
> >
> > Should we make the device's ->close function wait for all TX and RX skbs
> > for this vcc to complete?
>
> the driver's close routine should wait for any of the pending tx and rx
> to complete. take a look at the he.c in driver/atm
I'm not sure that sleeping for long periods in close() is always a
good idea. If the process is event driven it will be unable to
handle events on other fd until the close completes.
This may be known not to be true in this case, but is more generally
a problem.
In this case the close should probably (IMHO at least) only sleep
while pending tx and rx are aborted/discarded.
Even when it might make sense to sleep in close until tx drains
there needs to be a finite timeout before it become abortive.
David
On Wed, 2012-11-28 at 09:08 +0100, Krzysztof Mazur wrote:
>
> I think you might need also an equivalent of
> "[PATCH v3 3/7] pppoatm: allow assign only on a connected socket".
>
> I'm not sure yet. In will test if I can trigger that Oops on pppoatm
> without that patch. Testing vcc flags might be sufficient - that's
> what I did in the first patch, but you asked what about SOCK_CONNECTED,
> and I think it was really needed.
Even if the READY check avoids the oops, I think the patch makes sense.
Especially as it makes things consistent with pppoatm. I've applied it
to the tree. Thanks.
--
dwmw2
On Wed, Nov 28, 2012 at 09:21:37AM -0000, David Laight wrote:
> > On Tue, 27 Nov 2012 18:02:29 +0000
> > David Woodhouse <[email protected]> wrote:
> >
> > > In solos-pci at least, the ops->close() function doesn't flush all
> > > pending skbs for this vcc before returning. So can be a tasklet
> > > somewhere which has loaded the address of the vcc->pop function from one
> > > of them, and is going to call it in some unspecified amount of time.
> > >
> > > Should we make the device's ->close function wait for all TX and RX skbs
> > > for this vcc to complete?
> >
> > the driver's close routine should wait for any of the pending tx and rx
> > to complete. take a look at the he.c in driver/atm
>
> I'm not sure that sleeping for long periods in close() is always a
> good idea. If the process is event driven it will be unable to
> handle events on other fd until the close completes.
> This may be known not to be true in this case, but is more generally
> a problem.
> In this case the close should probably (IMHO at least) only sleep
> while pending tx and rx are aborted/discarded.
>
> Even when it might make sense to sleep in close until tx drains
> there needs to be a finite timeout before it become abortive.
>
The ->close() routine can just abort any pending rx/tx and just wait
for completion of currently running rx/tx code. That shouldn't take
long.
Krzysiek
On Wed, 2012-11-28 at 11:04 +0100, Krzysztof Mazur wrote:
>
> The ->close() routine can just abort any pending rx/tx and just wait
> for completion of currently running rx/tx code. That shouldn't take
> long.
If it's been submitted to the hardware for DMA, it can't do that very
easily.
And if I can't be bothered to write code to go through the entire damn
queue and inspect every packet to see if it's a data packet and check
the VCI/VPI and try to steal it, it can't be done for the software queue
either :)
The queue ought to be short; if it isn't, then we already screwed up.
The close therefore should be quick, and it *doesn't* have to be
instant.
If someone wants to return immediately, there's always
vcc_release_async()...
--
dwmw2
On Wed, 28 Nov 2012 10:24:28 +0000
David Woodhouse <[email protected]> wrote:
> On Wed, 2012-11-28 at 11:04 +0100, Krzysztof Mazur wrote:
> >
> > The ->close() routine can just abort any pending rx/tx and just wait
> > for completion of currently running rx/tx code. That shouldn't take
> > long.
>
> If it's been submitted to the hardware for DMA, it can't do that very
> easily.
>
> And if I can't be bothered to write code to go through the entire damn
> queue and inspect every packet to see if it's a data packet and check
> the VCI/VPI and try to steal it, it can't be done for the software queue
> either :)
>
> The queue ought to be short; if it isn't, then we already screwed up.
> The close therefore should be quick, and it *doesn't* have to be
> instant.
>
> If someone wants to return immediately, there's always
> vcc_release_async()...
>
i dont think that would be quite the right way to do it.
vcc_release_async() just mark's the vcc for deletion--you still need to
go through and close it eventually. however, nothing would prevent you
from writing a close routine that could just reschedule something
periodically to check to see if the hardware finally finished closing
the vcc and can be reused. the part that needs fixed for this would be
marking the vcc for reuse. you would need to keep the vpi.vci marked
as busy so that someone else doesnt try to reuse it while it is
closing. right now, vcc_destroy_socket() always removes the vcc from
the vcc list -- regardless of whether or not close fully succeeded.
From: David Woodhouse <[email protected]>
Date: Tue, 27 Nov 2012 23:28:36 +0000
> + if (test_bit(ATM_VF_RELEASED, &atmvcc->flags)
> + || test_bit(ATM_VF_CLOSE, &atmvcc->flags)
> + || !test_bit(ATM_VF_READY, &atmvcc->flags)) {
Please:
if (X ||
Y ||
Z)
not:
if (X
|| Y
|| Z)
Thanks.
On Wed, 2012-11-28 at 11:41 -0500, David Miller wrote:
>
> Please:
>
> if (X ||
> Y ||
> Z)
>
> not:
>
> if (X
> || Y
> || Z)
Thanks. Fixed in both Krzysztof's original pppoatm version, and my
br2684 patch, in the git tree at git://git.infradead.org/~dwmw2/atm.git
I knew there was something else that offended me about the original,
other than just the whitespace (which is also fixed).
--
dwmw2
From: David Woodhouse <[email protected]>
Date: Wed, 28 Nov 2012 17:01:10 +0000
> On Wed, 2012-11-28 at 11:41 -0500, David Miller wrote:
>>
>> Please:
>>
>> if (X ||
>> Y ||
>> Z)
>>
>> not:
>>
>> if (X
>> || Y
>> || Z)
>
> Thanks. Fixed in both Krzysztof's original pppoatm version, and my
> br2684 patch, in the git tree at git://git.infradead.org/~dwmw2/atm.git
>
> I knew there was something else that offended me about the original,
> other than just the whitespace (which is also fixed).
Do you want me to pull that tree into net-next or is there a plan to
repost the entire series of work for a final submission?
On Wed, 2012-11-28 at 12:04 -0500, David Miller wrote:
> Do you want me to pull that tree into net-next or is there a plan to
> repost the entire series of work for a final submission?
I think it needs a little more testing/consensus first. I'd like an ack
from Chas on the atm ->release_cb() thing, at least. And I wouldn't mind
confirmation from Nathan's customer that they're no longer seeing the
panics.
And then I'll either send an explicit pull request, or submit it as
patches — whichever you prefer.
Hopefully in the next day or so; the merge window is approaching...
--
dwmw2
From: David Woodhouse <[email protected]>
Date: Wed, 28 Nov 2012 17:09:15 +0000
> And then I'll either send an explicit pull request, or submit it as
> patches $B!=(B whichever you prefer.
The canonical thing is to do both, send the pull request in the
"[PATCH 0/N]" email, and then the patches so everyone can see the
final form of the changes.
> Hopefully in the next day or so; the merge window is approaching...
Yep, thanks.
On Tue, Nov 27, 2012 at 07:28:43PM +0100, Krzysztof Mazur wrote:
>
> While reviewing your br2684 patch I also found that some ATM drivers does
> not call ->pop() when ->send() fails, they should do:
>
> if (vcc->pop)
> vcc->pop(vcc, skb);
> else
> dev_kfree_skb(skb);
>
> but some drivers just call dev_kfree_skb(skb).
>
> I think that we should add atm_pop() function that does that and fix all
> drivers.
>
I'm sending a patch that implements that idea.
Currently we need two arguments vcc and skb. However, we have reserved
ATM_SKB(skb)->vcc in skb control block for keeping vcc
and we can create single argument version vcc_pop(skb). In that case
we need to move:
ATM_SKB(skb)->vcc = vcc;
from ATM drivers to functions that call atmdev_ops->send().
Krzysiek
-- >8 --
Subject: [PATCH] atm: introduce vcc_pop_*()
The atm drivers to free skb, that they got from ->send(), cannot just use
dev_kfree_skb*(), but they must use something like:
if (vcc->pop)
vcc->pop(vcc, skb);
else
dev_kfree_skb_any(skb);
When vcc->pop is non-NULL, but they must in such case call vcc->pop().
This causes duplicated code in many drivers, and some drivers even forgot
to call vcc->pop() in some error handling code.
The new vcc_pop_*() functions are equivalent to dev_kfree_skb*().
Currently we always use dev_kfree_skb_any() to free, because using
other versions it's probably worthless optimization - in ->pop() we
already use only dev_kfree_skb_any(). The other functions we added
only to not loose information from converting existing code that
uses some non-any dev_kfree_skb*() variants.
Signed-off-by: Krzysztof Mazur <[email protected]>
---
include/linux/atmdev.h | 11 +++++++++++
net/atm/common.c | 9 +++++++++
2 files changed, 20 insertions(+)
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index c1da539..dedccad 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -283,6 +283,17 @@ int atm_pcr_goal(const struct atm_trafprm *tp);
void vcc_release_async(struct atm_vcc *vcc, int reply);
+/*
+ * vcc_pop_*() functions should be used by ATM driver to free transmitted
+ * skbs - skbs that were sent to driver by atmdev_opt->send() function.
+ *
+ * We provide three functions that can be used in different contexts.
+ * See dev_kfree_skb*() documentation for details.
+ */
+void vcc_pop_any(struct atm_vcc *vcc, struct sk_buff *skb);
+#define vcc_pop(vcc, skb) vcc_pop_any(vcc, skb)
+#define vcc_pop_irq(vcc, skb) vcc_pop_any(vcc, skb)
+
struct atm_ioctl {
struct module *owner;
/* A module reference is kept if appropriate over this call.
diff --git a/net/atm/common.c b/net/atm/common.c
index 806fc0a..ad9c77d 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -654,6 +654,15 @@ out:
return error;
}
+void vcc_pop_any(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+ if (vcc->pop)
+ vcc->pop(vcc, skb);
+ else
+ dev_kfree_skb_any(skb);
+}
+EXPORT_SYMBOL(vcc_pop_any);
+
unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
--
1.8.0.411.g71a7da8
On Wed, 2012-11-28 at 21:18 +0100, Krzysztof Mazur wrote:
>
> +void vcc_pop_any(struct atm_vcc *vcc, struct sk_buff *skb)
> +{
> + if (vcc->pop)
> + vcc->pop(vcc, skb);
if (vcc && vcc->pop) perhaps?
--
dwmw2
On Wed, 28 Nov 2012 21:18:37 +0100
Krzysztof Mazur <[email protected]> wrote:
> On Tue, Nov 27, 2012 at 07:28:43PM +0100, Krzysztof Mazur wrote:
> > I think that we should add atm_pop() function that does that and fix all
> > drivers.
> >
>
> I'm sending a patch that implements that idea.
>
> Currently we need two arguments vcc and skb. However, we have reserved
> ATM_SKB(skb)->vcc in skb control block for keeping vcc
> and we can create single argument version vcc_pop(skb). In that case
> we need to move:
>
> ATM_SKB(skb)->vcc = vcc;
>
> from ATM drivers to functions that call atmdev_ops->send().
i dont like the vcc->pop() implementation and at one point i had the
crazy idea of using skb->destructors to handle it. however, i think it
would be necessary to clone the skb's so any existing destructor is
preserved.
> +#define vcc_pop(vcc, skb) vcc_pop_any(vcc, skb)
> +#define vcc_pop_irq(vcc, skb) vcc_pop_any(vcc, skb)
don't define these if you dont plan on using them anway.
On Wed, Nov 28, 2012 at 08:44:41PM +0000, David Woodhouse wrote:
> On Wed, 2012-11-28 at 21:18 +0100, Krzysztof Mazur wrote:
> >
> > +void vcc_pop_any(struct atm_vcc *vcc, struct sk_buff *skb)
> > +{
> > + if (vcc->pop)
> > + vcc->pop(vcc, skb);
>
> if (vcc && vcc->pop) perhaps?
>
yes, we can do that, at least "he" and "nicstar" use that in some cases.
Krzysiek
On Wed, Nov 28, 2012 at 04:20:01PM -0500, chas williams - CONTRACTOR wrote:
> On Wed, 28 Nov 2012 21:18:37 +0100
> Krzysztof Mazur <[email protected]> wrote:
>
> > On Tue, Nov 27, 2012 at 07:28:43PM +0100, Krzysztof Mazur wrote:
> > > I think that we should add atm_pop() function that does that and fix all
> > > drivers.
> > >
> >
> > I'm sending a patch that implements that idea.
> >
> > Currently we need two arguments vcc and skb. However, we have reserved
> > ATM_SKB(skb)->vcc in skb control block for keeping vcc
> > and we can create single argument version vcc_pop(skb). In that case
> > we need to move:
> >
> > ATM_SKB(skb)->vcc = vcc;
> >
> > from ATM drivers to functions that call atmdev_ops->send().
>
> i dont like the vcc->pop() implementation and at one point i had the
> crazy idea of using skb->destructors to handle it. however, i think it
> would be necessary to clone the skb's so any existing destructor is
> preserved.
With this patch we will kill vcc->pop() in drivers and in future
we can do that without changes in drivers.
>
> > +#define vcc_pop(vcc, skb) vcc_pop_any(vcc, skb)
> > +#define vcc_pop_irq(vcc, skb) vcc_pop_any(vcc, skb)
>
> don't define these if you dont plan on using them anway.
I removed them. I also added check if vcc is NULL, as David Woodhouse
suggested, some drivers use that.
Krzysiek
-- >8 --
Subject: [PATCH v2] atm: introduce vcc_pop()
The atm drivers to free skb, that they got from ->send(), cannot just use
dev_kfree_skb*(), but they must use something like:
if (vcc->pop)
vcc->pop(vcc, skb);
else
dev_kfree_skb_any(skb);
When vcc->pop() is non-NULL, but they must in such case call vcc->pop().
This causes duplicated code in many drivers, and some drivers even forgot
to call vcc->pop() in some error handling code.
Signed-off-by: Krzysztof Mazur <[email protected]>
---
include/linux/atmdev.h | 8 ++++++++
net/atm/common.c | 9 +++++++++
2 files changed, 17 insertions(+)
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index c1da539..57bd93f 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -283,6 +283,14 @@ int atm_pcr_goal(const struct atm_trafprm *tp);
void vcc_release_async(struct atm_vcc *vcc, int reply);
+/**
+ * vcc_pop - free transmitted ATM skb
+ *
+ * vcc_pop() should be used by ATM driver to free skbs, that were sent
+ * to driver by atmdev_opt->send() function.
+ */
+void vcc_pop(struct atm_vcc *vcc, struct sk_buff *skb);
+
struct atm_ioctl {
struct module *owner;
/* A module reference is kept if appropriate over this call.
diff --git a/net/atm/common.c b/net/atm/common.c
index 806fc0a..76bf76c 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -654,6 +654,15 @@ out:
return error;
}
+void vcc_pop(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+ if (vcc && vcc->pop)
+ vcc->pop(vcc, skb);
+ else
+ dev_kfree_skb_any(skb);
+}
+EXPORT_SYMBOL(vcc_pop);
+
unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
--
1.8.0.411.g71a7da8
On Wed, 28 Nov 2012 22:45:34 +0100
Krzysztof Mazur <[email protected]> wrote:
> On Wed, Nov 28, 2012 at 04:20:01PM -0500, chas williams - CONTRACTOR wrote:
> > i dont like the vcc->pop() implementation and at one point i had the
> > crazy idea of using skb->destructors to handle it. however, i think it
> > would be necessary to clone the skb's so any existing destructor is
> > preserved.
>
> With this patch we will kill vcc->pop() in drivers and in future
> we can do that without changes in drivers.
ok
> >
> > > +#define vcc_pop(vcc, skb) vcc_pop_any(vcc, skb)
> > > +#define vcc_pop_irq(vcc, skb) vcc_pop_any(vcc, skb)
> >
> > don't define these if you dont plan on using them anway.
>
> I removed them. I also added check if vcc is NULL, as David Woodhouse
> suggested, some drivers use that.
it should probably be if (likely(vcc) && likely(vcc->pop)) since it
will almost always be the case.
On Wed, Nov 28, 2012 at 04:59:06PM -0500, chas williams - CONTRACTOR wrote:
> On Wed, 28 Nov 2012 22:45:34 +0100
> Krzysztof Mazur <[email protected]> wrote:
>
> > On Wed, Nov 28, 2012 at 04:20:01PM -0500, chas williams - CONTRACTOR wrote:
> > > i dont like the vcc->pop() implementation and at one point i had the
> > > crazy idea of using skb->destructors to handle it. however, i think it
> > > would be necessary to clone the skb's so any existing destructor is
> > > preserved.
> >
> > With this patch we will kill vcc->pop() in drivers and in future
> > we can do that without changes in drivers.
>
> ok
>
> > >
> > > > +#define vcc_pop(vcc, skb) vcc_pop_any(vcc, skb)
> > > > +#define vcc_pop_irq(vcc, skb) vcc_pop_any(vcc, skb)
> > >
> > > don't define these if you dont plan on using them anway.
> >
> > I removed them. I also added check if vcc is NULL, as David Woodhouse
> > suggested, some drivers use that.
>
> it should probably be if (likely(vcc) && likely(vcc->pop)) since it
> will almost always be the case.
Thanks,
Krzysiek
-- >8 --
Subject: [PATCH v3] atm: introduce vcc_pop()
The atm drivers to free skb, that they got from ->send(), cannot just use
dev_kfree_skb*(), but they must use something like:
if (vcc->pop)
vcc->pop(vcc, skb);
else
dev_kfree_skb_any(skb);
When vcc->pop() is non-NULL, but they must in such case call vcc->pop().
This causes duplicated code in many drivers, and some drivers even forgot
to call vcc->pop() in some error handling code.
Signed-off-by: Krzysztof Mazur <[email protected]>
---
include/linux/atmdev.h | 8 ++++++++
net/atm/common.c | 9 +++++++++
2 files changed, 17 insertions(+)
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index c1da539..57bd93f 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -283,6 +283,14 @@ int atm_pcr_goal(const struct atm_trafprm *tp);
void vcc_release_async(struct atm_vcc *vcc, int reply);
+/**
+ * vcc_pop - free transmitted ATM skb
+ *
+ * vcc_pop() should be used by ATM driver to free skbs, that were sent
+ * to driver by atmdev_opt->send() function.
+ */
+void vcc_pop(struct atm_vcc *vcc, struct sk_buff *skb);
+
struct atm_ioctl {
struct module *owner;
/* A module reference is kept if appropriate over this call.
diff --git a/net/atm/common.c b/net/atm/common.c
index 806fc0a..c42ff62 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -654,6 +654,15 @@ out:
return error;
}
+void vcc_pop(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+ if (likely(vcc) && likely(vcc->pop))
+ vcc->pop(vcc, skb);
+ else
+ dev_kfree_skb_any(skb);
+}
+EXPORT_SYMBOL(vcc_pop);
+
unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
--
1.8.0.411.g71a7da8
On Wed, 2012-11-28 at 09:21 +0000, David Laight wrote:
> Even when it might make sense to sleep in close until tx drains
> there needs to be a finite timeout before it become abortive.
You are, of course, right. We should never wait for hardware for ever.
And just to serve me right, I seem to have hit a bug in the latest Solos
firmware (1.11) which makes it sometimes lock up when I reboot. So it
never responds to the PKT_PCLOSE packet... and thus it deadlocks when I
try to kill pppd and unload the module to reset it :)
New version...
From 53dd01c08fec5b26006a009b25e4210127fdb27a Mon Sep 17 00:00:00 2001
From: David Woodhouse <[email protected]>
Date: Tue, 27 Nov 2012 23:49:24 +0000
Subject: [PATCH] solos-pci: Wait for pending TX to complete when releasing
vcc
We should no longer be calling the old pop routine for the vcc, after
vcc_release() has completed. Make sure we wait for any pending TX skbs
to complete, by waiting for our own PKT_PCLOSE control skb to be sent.
Signed-off-by: David Woodhouse <[email protected]>
---
drivers/atm/solos-pci.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 9851093..3720670 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -92,6 +92,7 @@ struct pkt_hdr {
};
struct solos_skb_cb {
+ struct completion c;
struct atm_vcc *vcc;
uint32_t dma_addr;
};
@@ -881,11 +882,18 @@ static void pclose(struct atm_vcc *vcc)
header->vci = cpu_to_le16(vcc->vci);
header->type = cpu_to_le16(PKT_PCLOSE);
+ init_completion(&SKB_CB(skb)->c);
+
fpga_queue(card, SOLOS_CHAN(vcc->dev), skb, NULL);
clear_bit(ATM_VF_ADDR, &vcc->flags);
clear_bit(ATM_VF_READY, &vcc->flags);
+ if (!wait_for_completion_timeout(&SKB_CB(skb)->c,
+ jiffies + msecs_to_jiffies(5000)))
+ dev_warn(&card->dev->dev, "Timeout waiting for VCC close on port %d\n",
+ SOLOS_CHAN(vcc->dev));
+
/* Hold up vcc_destroy_socket() (our caller) until solos_bh() in the
tasklet has finished processing any incoming packets (and, more to
the point, using the vcc pointer). */
@@ -1011,9 +1019,12 @@ static uint32_t fpga_tx(struct solos_card *card)
if (vcc) {
atomic_inc(&vcc->stats->tx);
solos_pop(vcc, oldskb);
- } else
+ } else {
+ struct pkt_hdr *header = (void *)oldskb->data;
+ if (le16_to_cpu(header->type) == PKT_PCLOSE)
+ complete(&SKB_CB(oldskb)->c);
dev_kfree_skb_irq(oldskb);
-
+ }
}
}
/* For non-DMA TX, write the 'TX start' bit for all four ports simultaneously */
@@ -1345,6 +1356,8 @@ static struct pci_driver fpga_driver = {
static int __init solos_pci_init(void)
{
+ BUILD_BUG_ON(sizeof(struct solos_skb_cb) > sizeof(((struct sk_buff *)0)->cb));
+
printk(KERN_INFO "Solos PCI Driver Version %s\n", VERSION);
return pci_register_driver(&fpga_driver);
}
--
1.8.0
--
dwmw2
On Wed, Nov 28, 2012 at 11:10:40PM +0100, Krzysztof Mazur wrote:
> On Wed, Nov 28, 2012 at 04:59:06PM -0500, chas williams - CONTRACTOR wrote:
> > On Wed, 28 Nov 2012 22:45:34 +0100
> > Krzysztof Mazur <[email protected]> wrote:
> >
> > > On Wed, Nov 28, 2012 at 04:20:01PM -0500, chas williams - CONTRACTOR wrote:
> > > > i dont like the vcc->pop() implementation and at one point i had the
> > > > crazy idea of using skb->destructors to handle it. however, i think it
> > > > would be necessary to clone the skb's so any existing destructor is
> > > > preserved.
> > >
> > > With this patch we will kill vcc->pop() in drivers and in future
> > > we can do that without changes in drivers.
> >
> > ok
> >
> > > >
> > > > > +#define vcc_pop(vcc, skb) vcc_pop_any(vcc, skb)
> > > > > +#define vcc_pop_irq(vcc, skb) vcc_pop_any(vcc, skb)
> > > >
> > > > don't define these if you dont plan on using them anway.
> > >
> > > I removed them. I also added check if vcc is NULL, as David Woodhouse
> > > suggested, some drivers use that.
> >
> > it should probably be if (likely(vcc) && likely(vcc->pop)) since it
> > will almost always be the case.
>
I think that we should also add that single-argument skb-only version.
Currently it can be used only after the driver does ATM_SKB(skb)->vcc = vcc.
Most drivers do that.
Thanks,
Krzysiek
-- >8 --
Subject: [PATCH] atm: introduce atm_pop_skb()
Many ATM drivers store vcc in ATM_SKB(skb)->vcc and use it for
freeing skbs. Now they can just use atm_pop_skb() to free such
buffers.
Signed-off-by: Krzysztof Mazur <[email protected]>
---
include/linux/atmdev.h | 8 ++++++++
net/atm/common.c | 6 ++++++
2 files changed, 14 insertions(+)
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 57bd93f..648fb79 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -291,6 +291,14 @@ void vcc_release_async(struct atm_vcc *vcc, int reply);
*/
void vcc_pop(struct atm_vcc *vcc, struct sk_buff *skb);
+/**
+ * vcc_pop_skb - free transmitted ATM skb
+ *
+ * This variant of vcc_pop() assumes that ATM_SKB(skb)->vcc is set
+ * by driver.
+ */
+void vcc_pop_skb(struct sk_buff *skb);
+
struct atm_ioctl {
struct module *owner;
/* A module reference is kept if appropriate over this call.
diff --git a/net/atm/common.c b/net/atm/common.c
index c42ff62..378c911 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -663,6 +663,12 @@ void vcc_pop(struct atm_vcc *vcc, struct sk_buff *skb)
}
EXPORT_SYMBOL(vcc_pop);
+void vcc_pop_skb(struct sk_buff *skb)
+{
+ vcc_pop(ATM_SKB(skb)->vcc, skb);
+}
+EXPORT_SYMBOL(vcc_pop_skb);
+
unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
--
1.8.0.411.g71a7da8
On Wed, Nov 28, 2012 at 10:18:35PM +0000, David Woodhouse wrote:
> On Wed, 2012-11-28 at 09:21 +0000, David Laight wrote:
> > Even when it might make sense to sleep in close until tx drains
> > there needs to be a finite timeout before it become abortive.
>
> You are, of course, right. We should never wait for hardware for ever.
> And just to serve me right, I seem to have hit a bug in the latest Solos
> firmware (1.11) which makes it sometimes lock up when I reboot. So it
> never responds to the PKT_PCLOSE packet... and thus it deadlocks when I
> try to kill pppd and unload the module to reset it :)
>
> New version...
>
> From 53dd01c08fec5b26006a009b25e4210127fdb27a Mon Sep 17 00:00:00 2001
> From: David Woodhouse <[email protected]>
> Date: Tue, 27 Nov 2012 23:49:24 +0000
> Subject: [PATCH] solos-pci: Wait for pending TX to complete when releasing
> vcc
>
> We should no longer be calling the old pop routine for the vcc, after
> vcc_release() has completed. Make sure we wait for any pending TX skbs
> to complete, by waiting for our own PKT_PCLOSE control skb to be sent.
>
> Signed-off-by: David Woodhouse <[email protected]>
> ---
> drivers/atm/solos-pci.c | 17 +++++++++++++++--
> 1 file changed, 15 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
> index 9851093..3720670 100644
> --- a/drivers/atm/solos-pci.c
> +++ b/drivers/atm/solos-pci.c
> @@ -92,6 +92,7 @@ struct pkt_hdr {
> };
>
> struct solos_skb_cb {
> + struct completion c;
> struct atm_vcc *vcc;
> uint32_t dma_addr;
> };
> @@ -881,11 +882,18 @@ static void pclose(struct atm_vcc *vcc)
> header->vci = cpu_to_le16(vcc->vci);
> header->type = cpu_to_le16(PKT_PCLOSE);
>
> + init_completion(&SKB_CB(skb)->c);
> +
> fpga_queue(card, SOLOS_CHAN(vcc->dev), skb, NULL);
>
> clear_bit(ATM_VF_ADDR, &vcc->flags);
> clear_bit(ATM_VF_READY, &vcc->flags);
>
> + if (!wait_for_completion_timeout(&SKB_CB(skb)->c,
> + jiffies + msecs_to_jiffies(5000)))
> + dev_warn(&card->dev->dev, "Timeout waiting for VCC close on port %d\n",
> + SOLOS_CHAN(vcc->dev));
> +
do we really need to wait here?
Why don't just do something like that:
tasklet_disable(&card->tlet);
spin_lock(&card->tx_queue_lock);
for each skb in queue
SKB_CB(skb)->vcc = NULL;
spin_unlock(&card->tx_queue_lock);
tasklet_enable(&card->tlet);
or if we really want to call vcc->pop() for such skbs:
tasklet_disable(&card->tlet);
spin_lock(&card->tx_queue_lock);
for each skb in queue {
skb_get(skb);
solos_pop(SKB_CB(skb)->vcc, skb);
SKB_CB(skb)->vcc = NULL;
}
spin_unlock(&card->tx_queue_lock);
tasklet_enable(&card->tlet);
Krzysiek
On Thu, 2012-11-29 at 11:57 +0100, Krzysztof Mazur wrote:
> do we really need to wait here?
> Why don't just do something like that:
>
> tasklet_disable(&card->tlet);
> spin_lock(&card->tx_queue_lock);
> for each skb in queue
> SKB_CB(skb)->vcc = NULL;
> spin_unlock(&card->tx_queue_lock);
> tasklet_enable(&card->tlet);
>
> or if we really want to call vcc->pop() for such skbs:
>
> tasklet_disable(&card->tlet);
> spin_lock(&card->tx_queue_lock);
> for each skb in queue {
> skb_get(skb);
> solos_pop(SKB_CB(skb)->vcc, skb);
> SKB_CB(skb)->vcc = NULL;
> }
> spin_unlock(&card->tx_queue_lock);
> tasklet_enable(&card->tlet);
Yes, we could certainly remove the packets from the tx_queue first.
However, in the card->using_dma case there might be a skb for this vcc
*currently* being DMA'd, and we'd still need to wait for that one.
I suppose we could just have a waitqueue in *every* TX skb, and under
card->tx_lock we could add ourselves to *that* waitqueue. Or just a
global waitqueue for DMA tx_done, perhaps. But waiting for our own
PKT_PCLOSE skb is just 'cleaner' in my view. It's simpler, and it's much
easier to test. Even if I had DMA-capable hardware, I'd have to get the
right timing to properly test that TX-pending-DMA case.
So dequeuing the packets would only serve to make pclose() slightly
faster, rather than simplifying it. It's hardly a fast path that we care
about, and I've also already ensured that there should only be one or
two packets queued per vcc *anyway*. So I'm mostly inclined not to
bother.
(I did fix the timeout argument to wait_for_completion_timeout())
--
dwmw2
On Thu, Nov 29, 2012 at 11:55:43AM +0000, David Woodhouse wrote:
> On Thu, 2012-11-29 at 11:57 +0100, Krzysztof Mazur wrote:
> > do we really need to wait here?
> > Why don't just do something like that:
> >
> > tasklet_disable(&card->tlet);
> > spin_lock(&card->tx_queue_lock);
> > for each skb in queue
> > SKB_CB(skb)->vcc = NULL;
> > spin_unlock(&card->tx_queue_lock);
> > tasklet_enable(&card->tlet);
> >
> > or if we really want to call vcc->pop() for such skbs:
> >
> > tasklet_disable(&card->tlet);
> > spin_lock(&card->tx_queue_lock);
> > for each skb in queue {
> > skb_get(skb);
> > solos_pop(SKB_CB(skb)->vcc, skb);
> > SKB_CB(skb)->vcc = NULL;
> > }
> > spin_unlock(&card->tx_queue_lock);
> > tasklet_enable(&card->tlet);
>
> Yes, we could certainly remove the packets from the tx_queue first.
>
> However, in the card->using_dma case there might be a skb for this vcc
> *currently* being DMA'd, and we'd still need to wait for that one.
Removing packets from tx_queue is not needed. We can transmit packets
also after close. We just can't call vcc->pop() after close,
so we can just set SKB_CB(skb)->vcc of such packets to NULL so fpga_tx()
won't call vcc->pop().
Maybe I was not precise enough, I'm think that all we need is
something like that:
-- >8 --
Subject: [PATCH] solos-pci: don't call vcc->pop() after pclose()
After atmdev_ops->close() we cannot use vcc->pop() because the vcc may,
and probably will be destroyed.
We can just set vcc for such frames to NULL because fpga_tx() after
completion will call dev_kfree_skb() in that case.
Signed-off-by: Krzysztof Mazur <[email protected]>
---
drivers/atm/solos-pci.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 9851093..aabe021 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -868,6 +868,19 @@ static void pclose(struct atm_vcc *vcc)
struct solos_card *card = vcc->dev->dev_data;
struct sk_buff *skb;
struct pkt_hdr *header;
+ unsigned int port;
+
+ tasklet_disable(&card->tlet);
+ spin_lock(&card->tx_queue_lock);
+ for (port = 0; port < card->nr_ports; port++)
+ skb_queue_walk(&card->tx_queue[port], skb)
+ if (SKB_CB(skb)->vcc == vcc) {
+ skb_get(skb);
+ solos_pop(SKB_CB(skb)->vcc, skb);
+ SKB_CB(skb)->vcc = NULL;
+ }
+ spin_unlock(&card->tx_queue_lock);
+ tasklet_enable(&card->tlet);
skb = alloc_skb(sizeof(*header), GFP_ATOMIC);
if (!skb) {
--
1.8.0.411.g71a7da8
On Thu, 2012-11-29 at 13:43 +0100, Krzysztof Mazur wrote:
>
> Removing packets from tx_queue is not needed. We can transmit packets
> also after close. We just can't call vcc->pop() after close,
> so we can just set SKB_CB(skb)->vcc of such packets to NULL so
> fpga_tx() won't call vcc->pop().
Your patch doesn't do that, does it? You'd want something like
if (card->tx_skb[port] && SKB_CB(card->tx_skb[port]->vcc) == vcc)
SKB_CB(card->tx_skb[port]->vcc) = NULL;
Under card->tx_lock should suffice.
And do we just *not* call the ->pop() on that skb ever? And hope that it
doesn't screw up some other state somewhere? Like if we're doing MLPPP
and I've implemented BQL for PPP... we might never call
ppp_completed_queue() for that skb, so even though this *channel* is
going away, it might still contribute towards the perceived queue on the
overall PPP netdev?
Failing to call ->pop() could cause memory leaks and other issues; I
don't think it's reasonable. I think we *have* to wait for
card->tx_skb[port] if it's for the VCC we're closing.
--
dwmw2
On Thu, Nov 29, 2012 at 12:57:17PM +0000, David Woodhouse wrote:
> On Thu, 2012-11-29 at 13:43 +0100, Krzysztof Mazur wrote:
> >
> > Removing packets from tx_queue is not needed. We can transmit packets
> > also after close. We just can't call vcc->pop() after close,
> > so we can just set SKB_CB(skb)->vcc of such packets to NULL so
> > fpga_tx() won't call vcc->pop().
>
> Your patch doesn't do that, does it? You'd want something like
>
> if (card->tx_skb[port] && SKB_CB(card->tx_skb[port]->vcc) == vcc)
> SKB_CB(card->tx_skb[port]->vcc) = NULL;
No, I want to process all queued packets, not just only those that
were already sent do card.
In that case we will need to remove other packets with that vcc
from queue, of couse we can still do that in the same loop, something
like:
if (SKB_CB(skb)->vcc == vcc) {
if (card->tx_skb[port] == skb) {
skb_get(skb);
solos_pop(SKB_CB(skb)->vcc, skb);
SKB_CB(skb)->vcc = NULL;
} else {
skb_unlink(skb, &card->tx_queue[port]);
solos_pop(SKB_CB(skb)->vcc, skb);
}
}
But I don't think that this optization is needed.
>
> Under card->tx_lock should suffice.
>
> And do we just *not* call the ->pop() on that skb ever? And hope that it
> doesn't screw up some other state somewhere? Like if we're doing MLPPP
> and I've implemented BQL for PPP... we might never call
> ppp_completed_queue() for that skb, so even though this *channel* is
> going away, it might still contribute towards the perceived queue on the
> overall PPP netdev?
>
> Failing to call ->pop() could cause memory leaks and other issues; I
> don't think it's reasonable. I think we *have* to wait for
> card->tx_skb[port] if it's for the VCC we're closing.
We are calling ->pop() in solos_pop() just before SKB_CB(skb)->vcc = NULL,
but we are doing that before we really finish processing that packet,
that's why we do skb_get(skb).
Krzysiek
On Thu, 29 Nov 2012 11:57:15 +0100
Krzysztof Mazur <[email protected]> wrote:
> or if we really want to call vcc->pop() for such skbs:
you need to call ->pop() to cleaning up the wmem_alloc accounting.
otherwise you will get complaints from the atm stack later about
freeing a vcc that had outstanding data.
On Thu, 29 Nov 2012 13:43:44 +0100
Krzysztof Mazur <[email protected]> wrote:
> Removing packets from tx_queue is not needed. We can transmit packets
> also after close. We just can't call vcc->pop() after close,
> so we can just set SKB_CB(skb)->vcc of such packets to NULL so fpga_tx()
> won't call vcc->pop().
i dont think you can transmit packets after close(). you can transmit
packets during close() though. if you transmit after close that means
that you are using the vpi/vci pair that the atm stack thinks is no
longer in use. additionally after close(), the hardware should be in a
state such that you cannot transmit or receive on the vpi/vci that has
been closed.
close() needs to make sure that any pending tx packets are sent or
otherwise disposed of (like turning off the transmit segmentation
engine, clearing the packets, or whatever). any partially reassembled
pdu's also need to be cleared as well.
On Thu, 2012-11-29 at 14:20 +0100, Krzysztof Mazur wrote:
> if (card->tx_skb[port] == skb) {
> skb_get(skb);
> solos_pop(SKB_CB(skb)->vcc, skb);
> SKB_CB(skb)->vcc = NULL;
Um... yes, that would probably work. But it's subtle enough that it
bothers me. And if it *did* cause any strange issues, it's a rare case
and would be hard to reproduce/debug. Is it *really* necessary, just to
speed up the vcc close? I'm inclined to stick with the 'KISS' approach.
(Note that the card->tx_skb[port] skb isn't on the queue any more; you'd
do that bit separately and not in the skb_queue_walk() loop.)
--
dwmw2
On Thu, Nov 29, 2012 at 02:42:17PM +0000, David Woodhouse wrote:
> On Thu, 2012-11-29 at 14:20 +0100, Krzysztof Mazur wrote:
> > if (card->tx_skb[port] == skb) {
> > skb_get(skb);
> > solos_pop(SKB_CB(skb)->vcc, skb);
> > SKB_CB(skb)->vcc = NULL;
>
> Um... yes, that would probably work. But it's subtle enough that it
> bothers me. And if it *did* cause any strange issues, it's a rare case
> and would be hard to reproduce/debug. Is it *really* necessary, just to
> speed up the vcc close? I'm inclined to stick with the 'KISS' approach.
That's why I proposed that simple loop that just calls ->pop() and
sets vcc to NULL.
Forget about that, Chas said that we cannot leave close() until we
close that vcc, so we really need to wait.
Krzysiek
On Wed, Nov 28, 2012 at 10:18:35PM +0000, David Woodhouse wrote:
> On Wed, 2012-11-28 at 09:21 +0000, David Laight wrote:
> > Even when it might make sense to sleep in close until tx drains
> > there needs to be a finite timeout before it become abortive.
>
> You are, of course, right. We should never wait for hardware for ever.
> And just to serve me right, I seem to have hit a bug in the latest Solos
> firmware (1.11) which makes it sometimes lock up when I reboot. So it
> never responds to the PKT_PCLOSE packet... and thus it deadlocks when I
> try to kill pppd and unload the module to reset it :)
>
> New version...
>
> From 53dd01c08fec5b26006a009b25e4210127fdb27a Mon Sep 17 00:00:00 2001
> From: David Woodhouse <[email protected]>
> Date: Tue, 27 Nov 2012 23:49:24 +0000
> Subject: [PATCH] solos-pci: Wait for pending TX to complete when releasing
> vcc
>
> We should no longer be calling the old pop routine for the vcc, after
> vcc_release() has completed. Make sure we wait for any pending TX skbs
> to complete, by waiting for our own PKT_PCLOSE control skb to be sent.
>
> Signed-off-by: David Woodhouse <[email protected]>
> ---
> drivers/atm/solos-pci.c | 17 +++++++++++++++--
> 1 file changed, 15 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
> index 9851093..3720670 100644
> --- a/drivers/atm/solos-pci.c
> +++ b/drivers/atm/solos-pci.c
> @@ -92,6 +92,7 @@ struct pkt_hdr {
> };
>
> struct solos_skb_cb {
> + struct completion c;
> struct atm_vcc *vcc;
> uint32_t dma_addr;
> };
> @@ -881,11 +882,18 @@ static void pclose(struct atm_vcc *vcc)
> header->vci = cpu_to_le16(vcc->vci);
> header->type = cpu_to_le16(PKT_PCLOSE);
>
> + init_completion(&SKB_CB(skb)->c);
> +
> fpga_queue(card, SOLOS_CHAN(vcc->dev), skb, NULL);
>
> clear_bit(ATM_VF_ADDR, &vcc->flags);
> clear_bit(ATM_VF_READY, &vcc->flags);
>
> + if (!wait_for_completion_timeout(&SKB_CB(skb)->c,
> + jiffies + msecs_to_jiffies(5000)))
> + dev_warn(&card->dev->dev, "Timeout waiting for VCC close on port %d\n",
> + SOLOS_CHAN(vcc->dev));
> +
I don't like two thinks about this patch:
- if allos_skb(sizeof(*header), GFP_ATOMIC) at beginning of
pclose() fails we will crash
- if card wakes up after this timeout we will probably crash too
That's why proposed different approach, but it has other problems.
Krzysiek
On Wed, 28 Nov 2012 22:18:35 +0000
David Woodhouse <[email protected]> wrote:
> diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
> index 9851093..3720670 100644
> --- a/drivers/atm/solos-pci.c
> +++ b/drivers/atm/solos-pci.c
> @@ -92,6 +92,7 @@ struct pkt_hdr {
> };
>
> struct solos_skb_cb {
> + struct completion c;
> struct atm_vcc *vcc;
> uint32_t dma_addr;
> };
> @@ -881,11 +882,18 @@ static void pclose(struct atm_vcc *vcc)
> header->vci = cpu_to_le16(vcc->vci);
> header->type = cpu_to_le16(PKT_PCLOSE);
>
> + init_completion(&SKB_CB(skb)->c);
> +
> fpga_queue(card, SOLOS_CHAN(vcc->dev), skb, NULL);
>
> clear_bit(ATM_VF_ADDR, &vcc->flags);
> clear_bit(ATM_VF_READY, &vcc->flags);
you shouldnt clear ATM_VF_ADDR until the vpi/vci is actually closed and
ready for reuse. at this point, it isnt. ATM_VF_READY should already
be clear at this point but you should set it before you queue your
PKT_CLOSE. these flags probably should be handled outside the drivers
since the context for them is pretty clear. just another patch i never
got around to writing...
checking for ATM_VF_READY in find_vcc() is probably going to give you
grief as well since ATM_VF_READY isnt entirely under your control. you
need to be able to find the vcc until after pclose() is finished since
your tasklet might have a few packets it is still processing?
On Thu, 2012-11-29 at 16:09 +0100, Krzysztof Mazur wrote:
>
> I don't like two thinks about this patch:
>
> - if allos_skb(sizeof(*header), GFP_ATOMIC) at beginning of
> pclose() fails we will crash
>
> - if card wakes up after this timeout we will probably crash too
>
> That's why proposed different approach, but it has other problems.
How about this variant on what you suggested. Yes, we can definitely
remove everything that's in the queue... as long as we use
skb_queue_walk_safe() instead of skb_queue_walk().
We can use GFP_KERNEL instead of GFP_ATOMIC, which at least reduces the
likelihood of failing to close the vcc.
We end up waiting *only* if there is a packet which is *currently* being
DMA'd to the card. And if the card doesn't take that within 5 seconds,
it almost certainly never will. So I can live with that.
I'd definitely want someone with a DMA-capable FPGA to test this
properly, adding printks to it to make sure the interesting path is
being exercised. Nathan, you should be able to trigger it with the same
test that used to just crash the system entirely — send a flood of
packets while you kill br2684ctl.
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 6258961..2006a39 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -92,7 +92,6 @@ struct pkt_hdr {
};
struct solos_skb_cb {
- struct completion c;
struct atm_vcc *vcc;
uint32_t dma_addr;
};
@@ -868,10 +867,11 @@ static int popen(struct atm_vcc *vcc)
static void pclose(struct atm_vcc *vcc)
{
struct solos_card *card = vcc->dev->dev_data;
- struct sk_buff *skb;
+ struct sk_buff *skb, *tmpskb;
struct pkt_hdr *header;
+ unsigned char port = SOLOS_CHAN(vcc->dev);
- skb = alloc_skb(sizeof(*header), GFP_ATOMIC);
+ skb = alloc_skb(sizeof(*header), GFP_KERNEL);
if (!skb) {
dev_warn(&card->dev->dev, "Failed to allocate sk_buff in pclose()\n");
return;
@@ -883,22 +883,50 @@ static void pclose(struct atm_vcc *vcc)
header->vci = cpu_to_le16(vcc->vci);
header->type = cpu_to_le16(PKT_PCLOSE);
- init_completion(&SKB_CB(skb)->c);
-
fpga_queue(card, SOLOS_CHAN(vcc->dev), skb, NULL);
clear_bit(ATM_VF_ADDR, &vcc->flags);
clear_bit(ATM_VF_READY, &vcc->flags);
- if (!wait_for_completion_timeout(&SKB_CB(skb)->c,
- msecs_to_jiffies(5000)))
- dev_warn(&card->dev->dev, "Timeout waiting for VCC close on port %d\n",
- SOLOS_CHAN(vcc->dev));
+ /* Remove any yet-to-be-transmitted packets from the pending queue */
+ spin_lock(&card->tx_queue_lock);
+ skb_queue_walk_safe(&card->tx_queue[port], skb, tmpskb) {
+ if (SKB_CB(skb)->vcc == vcc) {
+ skb_unlink(skb, &card->tx_queue[port]);
+ solos_pop(vcc, skb);
+ }
+ }
+ spin_unlock(&card->tx_queue_lock);
/* Hold up vcc_destroy_socket() (our caller) until solos_bh() in the
tasklet has finished processing any incoming packets (and, more to
the point, using the vcc pointer). */
tasklet_unlock_wait(&card->tlet);
+
+ /*
+ * If we're in DMA mode and a skb on this VCC is *currently* being
+ * submitted, wait for it to finish (using param_wq)
+ */
+ if (card->using_dma) {
+ DEFINE_WAIT(wait);
+
+ spin_lock(&card->tx_lock);
+ while (card->tx_skb[port] && SKB_CB(card->tx_skb[port])->vcc == vcc) {
+ prepare_to_wait(&card->param_wq, &wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&card->tx_lock);
+
+ if (schedule_timeout(5 * HZ)) {
+ dev_warn(&card->dev->dev,
+ "Timeout waiting for VCC close on port %d\n",
+ port);
+ goto done_waiting;
+ }
+ spin_lock(&card->tx_lock);
+ }
+ spin_unlock(&card->tx_lock);
+ done_waiting:
+ finish_wait(&card->param_wq, &wait);
+ }
return;
}
@@ -1020,12 +1048,15 @@ static uint32_t fpga_tx(struct solos_card *card)
if (vcc) {
atomic_inc(&vcc->stats->tx);
solos_pop(vcc, oldskb);
- } else {
- struct pkt_hdr *header = (void *)oldskb->data;
- if (le16_to_cpu(header->type) == PKT_PCLOSE)
- complete(&SKB_CB(oldskb)->c);
+
+ /*
+ * If it's a TX skb on a closed VCC, pclose()
+ * may be waiting for it...
+ */
+ if (!test_bit(ATM_VF_READY, &vcc->flags))
+ wake_up(&card->param_wq);
+ } else
dev_kfree_skb_irq(oldskb);
- }
}
}
/* For non-DMA TX, write the 'TX start' bit for all four ports simultaneously */
--
dwmw2
On Thu, 2012-11-29 at 10:37 -0500, chas williams - CONTRACTOR wrote:
> you shouldnt clear ATM_VF_ADDR until the vpi/vci is actually closed and
> ready for reuse. at this point, it isnt.
So I should always wait for the completion of my PKT_CLOSE and only
clear ATM_VF_ADDR when it's actually done?
But can you define 'ready for reuse'? From the moment I clear
ATM_VF_ADDR, another CPU may enter my popen() function to set up another
VCC with the same parameters, and everything should work fine. The
PKT_POPEN will end up on the queue *after* my PKT_PCLOSE for the old
VCC. Any received packets will be dropped until the new VCC gets
ATM_VF_READY set (by the popen function).
What's the actual failure mode, caused by me clearing ATM_VF_ADDR "too
early"?
> ATM_VF_READY should already be clear at this point but you should set
> it before you queue your PKT_CLOSE.
I should *set* it? Do you mean clear it? Yes, I see it's cleared by
vcc_destroy_socket()... but all the other ATM drivers also seem to clear
it for themselves, and that would appear to be harmless.
> checking for ATM_VF_READY in find_vcc() is probably going to give you
> grief as well since ATM_VF_READY isnt entirely under your control.
That's fine. If *anyone* has cleared ATM_VF_READY, I stop sending
packets up it. Or, more to the point, I stop using the damn thing at
all. See commit 1f6ea6e511e5ec730d8e88651da1b7b6e8fd1333.
> you need to be able to find the vcc until after pclose() is finished since
> your tasklet might have a few packets it is still processing?
The whole point of that check is that the tasklet *won't* be able to
find it any more, and it'll just discard incoming packets for the
obsolescent VCC.
--
dwmw2
On Thu, 29 Nov 2012 15:47:57 +0000
David Woodhouse <[email protected]> wrote:
> @@ -1020,12 +1048,15 @@ static uint32_t fpga_tx(struct solos_card *card)
> if (vcc) {
> atomic_inc(&vcc->stats->tx);
> solos_pop(vcc, oldskb);
> - } else {
> - struct pkt_hdr *header = (void *)oldskb->data;
> - if (le16_to_cpu(header->type) == PKT_PCLOSE)
> - complete(&SKB_CB(oldskb)->c);
> +
> + /*
> + * If it's a TX skb on a closed VCC, pclose()
> + * may be waiting for it...
> + */
> + if (!test_bit(ATM_VF_READY, &vcc->flags))
> + wake_up(&card->param_wq);
> + } else
> dev_kfree_skb_irq(oldskb);
> - }
the part that bothers me (and i dont have the programmer's guide for
the solos hardware) is that you are watching for the PKT_PCLOSE to be
sent to the card. shouldnt you be watching for the PKT_PCLOSE to be
returned from the card (assuming it does such a thing) so that you can
be assured that the tx/rx for this vpi/vci pair has been "stopped"?
On Thu, 29 Nov 2012 15:59:08 +0000
David Woodhouse <[email protected]> wrote:
> On Thu, 2012-11-29 at 10:37 -0500, chas williams - CONTRACTOR wrote:
> > you shouldnt clear ATM_VF_ADDR until the vpi/vci is actually closed and
> > ready for reuse. at this point, it isnt.
>
> So I should always wait for the completion of my PKT_CLOSE and only
> clear ATM_VF_ADDR when it's actually done?
>
> But can you define 'ready for reuse'? From the moment I clear
> ATM_VF_ADDR, another CPU may enter my popen() function to set up another
> VCC with the same parameters, and everything should work fine. The
> PKT_POPEN will end up on the queue *after* my PKT_PCLOSE for the old
> VCC. Any received packets will be dropped until the new VCC gets
> ATM_VF_READY set (by the popen function).
>
> What's the actual failure mode, caused by me clearing ATM_VF_ADDR "too
> early"?
there may not be one (due to serialization from other parts of the
atm stack) but you "shouldn't" clear ATM_VF_ADDR until the vpi/vci pair
is ready for reuse. by reuse, i mean that any previous rx/tx data in
the vpi/vci segmentation hardware has been removed/cleared.
> > ATM_VF_READY should already be clear at this point but you should set
> > it before you queue your PKT_CLOSE.
>
> I should *set* it? Do you mean clear it? Yes, I see it's cleared by
sorry, i did mean clear it.
> vcc_destroy_socket()... but all the other ATM drivers also seem to clear
> it for themselves, and that would appear to be harmless.
yeah, like i said, it is spuriously cleared in the drivers and should
probably just be moved to under the control of the next layer up
completely. drivers/atm should just handle the hardware side, not the
software side.
> > checking for ATM_VF_READY in find_vcc() is probably going to give you
> > grief as well since ATM_VF_READY isnt entirely under your control.
>
> That's fine. If *anyone* has cleared ATM_VF_READY, I stop sending
> packets up it. Or, more to the point, I stop using the damn thing at
> all. See commit 1f6ea6e511e5ec730d8e88651da1b7b6e8fd1333.
>
> > you need to be able to find the vcc until after pclose() is finished since
> > your tasklet might have a few packets it is still processing?
>
> The whole point of that check is that the tasklet *won't* be able to
> find it any more, and it'll just discard incoming packets for the
> obsolescent VCC.
that's fine as long as you understand this. in the case of the he, i
needed to be able to find the vcc until close() is finished so that i
can wakeup the sleeper in the close() routine that is waiting for the
reassembly queue to be cleared/reset. also, i still needed to find the
vcc for the tx side during close() since i still might need to pop()
skb's that are being sent during the close() while i am still trying to
get the hardware to shutdown the transmit dma engine.
On Thu, 2012-11-29 at 10:59 -0500, chas williams - CONTRACTOR wrote:
> the part that bothers me (and i dont have the programmer's guide for
> the solos hardware) is that you are watching for the PKT_PCLOSE to be
> sent to the card. shouldnt you be watching for the PKT_PCLOSE to be
> returned from the card (assuming it does such a thing) so that you can
> be assured that the tx/rx for this vpi/vci pair has been "stopped"?
Define "stopped".
For the RX case... the other end may *always* take it upon itself to
send us a packet marked with arbitrary VCI/VPI, right? There's no
connection setup for it "on the wire", in the case of PVC?
So bearing that in mind: from the moment ATM_VF_READY gets cleared, as
far as the ATM core is concerned, we will no longer receive packets on
the given VCC. If we receive any, we'll just complain about receiving
packets for an unknown VCI/VPI.
For the TX case ... yes, we need to be sure we aren't continuing to send
packets after our close() routine completes. We *used* to, but the
resulting ->pop() calls were causing problems, and that's why we're
looking at this code path closer. The currently proposed patches (except
one suggestion from Krzyztof that we both shouted down) would fix that.
--
dwmw2
On Thu, Nov 29, 2012 at 03:47:57PM +0000, David Woodhouse wrote:
> On Thu, 2012-11-29 at 16:09 +0100, Krzysztof Mazur wrote:
> >
> > I don't like two thinks about this patch:
> >
> > - if allos_skb(sizeof(*header), GFP_ATOMIC) at beginning of
> > pclose() fails we will crash
> >
> > - if card wakes up after this timeout we will probably crash too
> >
> > That's why proposed different approach, but it has other problems.
>
> How about this variant on what you suggested. Yes, we can definitely
> remove everything that's in the queue... as long as we use
> skb_queue_walk_safe() instead of skb_queue_walk().
>
> We can use GFP_KERNEL instead of GFP_ATOMIC, which at least reduces the
> likelihood of failing to close the vcc.
>
> We end up waiting *only* if there is a packet which is *currently* being
> DMA'd to the card. And if the card doesn't take that within 5 seconds,
> it almost certainly never will. So I can live with that.
>
Yeah, that shouldn't happen.
> + if (!test_bit(ATM_VF_READY, &vcc->flags))
> + wake_up(&card->param_wq);
> + } else
according to CodingStyle:
+ } else {
> dev_kfree_skb_irq(oldskb);
> - }
+ }
Krzysiek
On Thu, 29 Nov 2012 16:24:29 +0000
David Woodhouse <[email protected]> wrote:
> On Thu, 2012-11-29 at 10:59 -0500, chas williams - CONTRACTOR wrote:
> > the part that bothers me (and i dont have the programmer's guide for
> > the solos hardware) is that you are watching for the PKT_PCLOSE to be
> > sent to the card. shouldnt you be watching for the PKT_PCLOSE to be
> > returned from the card (assuming it does such a thing) so that you can
> > be assured that the tx/rx for this vpi/vci pair has been "stopped"?
>
> Define "stopped".
>
> For the RX case... the other end may *always* take it upon itself to
> send us a packet marked with arbitrary VCI/VPI, right? There's no
> connection setup for it "on the wire", in the case of PVC?
most atm hardware that i am familiar with, wont deliver vpi/vci data
unless you are actually trying to receive it. however, this hardware
is generally doing its reassembly in hardware and delivering aal5
pdu's and needs to manage its memory resources carefully. you might be
trying to reassemble 1000 pdu's from different vpi/vci's.
> So bearing that in mind: from the moment ATM_VF_READY gets cleared, as
> far as the ATM core is concerned, we will no longer receive packets on
> the given VCC. If we receive any, we'll just complain about receiving
> packets for an unknown VCI/VPI.
>
> For the TX case ... yes, we need to be sure we aren't continuing to send
> packets after our close() routine completes. We *used* to, but the
> resulting ->pop() calls were causing problems, and that's why we're
> looking at this code path closer. The currently proposed patches (except
> one suggestion from Krzyztof that we both shouted down) would fix that.
again part of this is poor synchronization. the detach protocol (i.e.
push of a NULL skb) should be flushing any pending transmits and
shutting down whatever in the protocol is doing any sending and
receiving. however, i release this might be difficult to do since the
detach protocol is invoked in such a strange way.
On Thu, 2012-11-29 at 12:17 -0500, chas williams - CONTRACTOR wrote:
> most atm hardware that i am familiar with, wont deliver vpi/vci data
> unless you are actually trying to receive it. however, this hardware
> is generally doing its reassembly in hardware and delivering aal5
> pdu's and needs to manage its memory resources carefully. you might be
> trying to reassemble 1000 pdu's from different vpi/vci's.
In almost *all* cases, there is only one VCC. So much so, in fact, that
I only just realised its firmware seems to crash if you send it a
PKT_POPEN for a new VPI/VCI pair while it's already got one open. But
yes, I think it does actually work in the same way.
We do see the 'packet received for unknown VCC' complaint, after we
reboot the host without resetting the card. And as shown in the commit I
just referenced, we rely on the !ATM_VF_READY check in order to prevent
a use-after-free when the tasklet is sending packets up a VCC that's
just been closed.
> > So bearing that in mind: from the moment ATM_VF_READY gets cleared, as
> > far as the ATM core is concerned, we will no longer receive packets on
> > the given VCC. If we receive any, we'll just complain about receiving
> > packets for an unknown VCI/VPI.
> >
> > For the TX case ... yes, we need to be sure we aren't continuing to send
> > packets after our close() routine completes. We *used* to, but the
> > resulting ->pop() calls were causing problems, and that's why we're
> > looking at this code path closer. The currently proposed patches (except
> > one suggestion from Krzyztof that we both shouted down) would fix that.
>
> again part of this is poor synchronization. the detach protocol (i.e.
> push of a NULL skb) should be flushing any pending transmits and
> shutting down whatever in the protocol is doing any sending and
> receiving. however, i release this might be difficult to do since the
> detach protocol is invoked in such a strange way.
You mean that (e.g.) pppoatm_push(vcc, NULL) should be waiting for any
pending TX skb (that has already been passed off to the driver) to
*complete*? How would it even do that?
--
dwmw2
On Thu, 29 Nov 2012 18:11:48 +0000
David Woodhouse <[email protected]> wrote:
> We do see the 'packet received for unknown VCC' complaint, after we
> reboot the host without resetting the card. And as shown in the commit I
> just referenced, we rely on the !ATM_VF_READY check in order to prevent
> a use-after-free when the tasklet is sending packets up a VCC that's
> just been closed.
well that behavior is just crap. why is it delivering cells for a
vpi/vci pair that is not open? regardless, i pointed out the behavior
of find_vcc() just in case you need to do some clean up on data that is
coming back while you are attempting to finish operations during your
driver's close() but this cleanup might not be happening since you
arent able to get a reference to the vcc so you can pop() the data or
whatever you might need to do.
> > again part of this is poor synchronization. the detach protocol (i.e.
> > push of a NULL skb) should be flushing any pending transmits and
> > shutting down whatever in the protocol is doing any sending and
> > receiving. however, i release this might be difficult to do since the
> > detach protocol is invoked in such a strange way.
>
> You mean that (e.g.) pppoatm_push(vcc, NULL) should be waiting for any
> pending TX skb (that has already been passed off to the driver) to
> *complete*? How would it even do that?
i think the order of the vcc_destroy_socket() operations is a bit
wrong. it should call detach protocol (i.e. push a NULL). this should
cause the attached protocol to stop any future sends and receives, and
it CAN sleep in this context (and only this context -- generally
sending cannot sleep which is why this might seem confusing) to do
whatever it needs to do to wait for the attached protocol to clean up
queues, flush data etc.
then the driver close() should be called. this takes care of cleaning
up any pending tx or rx that is in the hardware. and of course,
close() can sleep since it will be in a interrutible context.
the pop() might be screwed up here though. you might have skb's in the
driver that should be pop()'ed with the formerly attached protocol.
you could wait for pending tx's sent to the driver. you know that your
attached protocol's pop() will be called. keep count of the
outstanding transmit skb's. you cant do this though if you close() the
vcc before you detach the protocol.
On Thu, 2012-11-29 at 13:29 -0500, chas williams - CONTRACTOR wrote:
> On Thu, 29 Nov 2012 18:11:48 +0000
> David Woodhouse <[email protected]> wrote:
>
> > We do see the 'packet received for unknown VCC' complaint, after we
> > reboot the host without resetting the card. And as shown in the commit I
> > just referenced, we rely on the !ATM_VF_READY check in order to prevent
> > a use-after-free when the tasklet is sending packets up a VCC that's
> > just been closed.
>
> well that behavior is just crap. why is it delivering cells for a
> vpi/vci pair that is not open?
In the reboot case... Because it *was* open and the device has no way of
knowing that the host just rebooted. We don't reset the card on loading
the driver, because that would cause an ADSL resync.
Perhaps we could send a 'close all VCCs' command to the firmware though.
Nathan, could we add that to the firmware?
Or we could just respond to any unwanted incoming packet by sending a
close for that specific VCC. And be careful about potential races with
open().
In the close case... The *tasklet* is running to process an incoming
packet, finds an open and active VCC and is *about* to send a packet up
to it. Meanwhile, our close() runs and the VCC is destroyed. And then
the tasklet... oops, use-after-free and crash.
Hence commit 1f6ea6e51 which makes the tasklet refuse to process RX for
a VCC which doesn't have the ATM_VF_READY flag set. Because it knows
it's *being* closed. And the close() routine waits for any *existing*
tasklet run to finish, to make sure nobody's referencing the VCC, before
it returns and allows vcc_destroy_socket() to complete. It's the RCU
principle, basically.
> > You mean that (e.g.) pppoatm_push(vcc, NULL) should be waiting for any
> > pending TX skb (that has already been passed off to the driver) to
> > *complete*? How would it even do that?
>
> i think the order of the vcc_destroy_socket() operations is a bit
> wrong. it should call detach protocol (i.e. push a NULL). this should
> cause the attached protocol to stop any future sends and receives, and
> it CAN sleep in this context (and only this context -- generally
> sending cannot sleep which is why this might seem confusing) to do
> whatever it needs to do to wait for the attached protocol to clean up
> queues, flush data etc.
>
> then the driver close() should be called. this takes care of cleaning
> up any pending tx or rx that is in the hardware. and of course,
> close() can sleep since it will be in a interrutible context.
This is basically what Krzysztof's patch 1/7 was doing, which we've now
dropped from the series.
There are issues with *either* ordering.
The current case is that we call vcc->dev->ops->close(vcc) *first*,
before vcc->push(vcc, NULL). And that means that the device is told to
close the VCC while the protocol may still be pushing packets to it.
Hence the patches to both pppoatm and br2684 to make them check for
ATM_VF_READY and *stop* pushing packets if it's not set.
If we flip it round and tell the protocol first, then it tears down all
its data structures while the driver is still happily calling its
->pop() on transmitted skbs. Which leads to the panic in br2684_pop()
that we've *also* seen (because we weren't actually flushing the TX
packets in the driver's close(), which had the same effect). Yes, you
suggest that the protocol could keep track of the skbs it's sent down
and wait for them... but surely it's better to let the driver get called
first and *abort* them?
At this point, I think we're better off as we are (with Krzysztof's
patch 1/7 dropped, and leaving vcc->dev->ops->close() being called
before vcc->push(NULL). We've fairly much solved the issues with that
arrangement, by checking ATM_VF_READY in the protocols' ->push()
functions.
--
dwmw2
On Wed, 2012-11-28 at 17:09 +0000, David Woodhouse wrote:
> On Wed, 2012-11-28 at 12:04 -0500, David Miller wrote:
> > Do you want me to pull that tree into net-next or is there a plan to
> > repost the entire series of work for a final submission?
>
> I think it needs a little more testing/consensus first. I'd like an ack
> from Chas on the atm ->release_cb() thing, at least. And I wouldn't mind
> confirmation from Nathan's customer that they're no longer seeing the
> panics.
>
The customer has confirmed that they haven't seen any panics. I tested
these patches on OpenWrt with Kernel 3.3.8 and couldn't get a panic:
c118dc5 solos-pci: Fix leak of skb received for unknown vcc
e539793 br2684: fix module_put() race
3656320 br2684: don't send frames on not-ready vcc
753f920 solos-pci: Wait for pending TX to complete when releasing vcc
91ab2cf pppoatm: do not inline pppoatm_may_send()
85b48fa pppoatm: drop frames to not-ready vcc
3ac1080 pppoatm: take ATM socket lock in pppoatm_send()
e41faed pppoatm: fix module_put() race
3b1a914 pppoatm: allow assign only on a connected socket
ec809bd atm: add owner of push() callback to atmvcc
ae088d6 atm: br2684: Fix excessive queue bloat
I haven't tested these ones:
230a012 pppoatm: fix missing wakeup in pppoatm_send()
1c0c800 atm: Add release_cb() callback to vcc
On Fri, 2012-11-30 at 12:18 +1100, Nathan Williams wrote:
> The customer has confirmed that they haven't seen any panics. I tested
> these patches on OpenWrt with Kernel 3.3.8 and couldn't get a panic:
Thanks.
> I haven't tested these ones:
>
> 230a012 pppoatm: fix missing wakeup in pppoatm_send()
> 1c0c800 atm: Add release_cb() callback to vcc
The race that fixes is fairly unlikely, but if you do see transmit
stalls you can make them apply to your 3.3.8 kernel by applying the same
preliminary patch that is now in the OpenWRT Attitude Adjustment branch:
https://dev.openwrt.org/browser/branches/attitude_adjustment/target/linux/generic/patches-3.3/080-prot-release-cb.patch
--
dwmw2
In message <[email protected]>,David Woodhouse writes:
>At this point, I think we're better off as we are (with Krzysztof's
>patch 1/7 dropped, and leaving vcc->dev->ops->close() being called
>before vcc->push(NULL). We've fairly much solved the issues with that
>arrangement, by checking ATM_VF_READY in the protocols' ->push()
>functions.
it isnt clear to me that fixes the race entirely either.
vcc_destroy_socket() and any of the push()/sends()'s are not serialized.
while you may clear the ATM_VF_READY flag, you might not clear it soon
enough for any particular push() that is already running. so it still
seems like you are racing close() against push() at this point. the
window is greatly reduced, but it still exists.
On Thu, 2012-11-29 at 20:38 -0500, Chas Williams (CONTRACTOR) wrote:
> it isnt clear to me that fixes the race entirely either.
> vcc_destroy_socket() and any of the push()/sends()'s are not
> serialized.
> while you may clear the ATM_VF_READY flag, you might not clear it soon
> enough for any particular push() that is already running. so it still
> seems like you are racing close() against push() at this point. the
> window is greatly reduced, but it still exists.
I think it's actually fixed for pppoatm by the bh_lock_sock() and the
sock_owned_by_user() check. As soon as vcc_release() calls lock_sock(),
pppoatm stops accepting packets.
It should be simple enough to do the same in br2684.
--
dwmw2
On Fri, 2012-11-30 at 01:57 +0000, David Woodhouse wrote:
> I think it's actually fixed for pppoatm by the bh_lock_sock() and the
> sock_owned_by_user() check. As soon as vcc_release() calls lock_sock(),
> pppoatm stops accepting packets.
>
> It should be simple enough to do the same in br2684.
Um... but now I come to look at it... Krzysztof, doesn't your 'pppoatm:
take ATM socket lock in pppoatm_send()' patch actually *break* the case
of sending via vcc_sendmsg()?
Why did you include the sock_owned_by_user() check in there and not just
use bh_lock_sock()?
With the sock_owned_by_user() check, it'll *always* drop packets
submitted through vcc_sendmsg(), won't it?
Admittedly, for PPPoATM and BR2684 we never do want to have packets
submitted directly from userspace that way; they should all come via the
PPP channel or the netdev respectively. So we might want to keep the
sock_owned_by_user() check because it fixes the close race, and
explicitly document it.
But it doesn't necessarily work for other protocols, so we may need a
better solution for the general case. Perhaps drop the
sock_owned_by_user() check, and put bh_lock_sock() around the beginning
of vcc_destroy_socket() where it clears ATM_VF_READY? That'll ensure
that no ->push() is *currently* operating on a skb having seen that the
VCC is still open.
Or maybe we just make the *devices* check the ATM_VF_CLOSE flag and
refuse to send the skb? Put the entire thing into their domain. Although
that may involve extra locking in the driver to synchronise send() and
close() sufficiently.
I'm still reluctant to swap the order of the device/protocol close in
vcc_destroy_socket(). I think that'll just swap one set of problems
which is now fairly well-understood and mostly solved, for another set.
In particular, I think the device needs to see the close first, because
*it* can actually abort or flush any pending TX and RX (including
synchronising with its tasklet as solos-pci does, etc.). Only then does
the protocol tear its data structures down. But I suppose the new set of
problems could be found and overcome, if Chas wants to propose an
alternative patch set...
--
dwmw2
On Fri, Nov 30, 2012 at 08:25:22AM +0000, David Woodhouse wrote:
> On Fri, 2012-11-30 at 01:57 +0000, David Woodhouse wrote:
> > I think it's actually fixed for pppoatm by the bh_lock_sock() and the
> > sock_owned_by_user() check. As soon as vcc_release() calls lock_sock(),
> > pppoatm stops accepting packets.
> >
> > It should be simple enough to do the same in br2684.
>
> Um... but now I come to look at it... Krzysztof, doesn't your 'pppoatm:
> take ATM socket lock in pppoatm_send()' patch actually *break* the case
> of sending via vcc_sendmsg()?
no, in case of
pppoatm_send()
vcc_sendmsg()
the vcc_sendmsg() will just wait for releasing sk->sk_lock.slock.
When the vcc_sendmsg() gets lock first
vcc_sendmsg()
pppoatm_send()
The pppoatm_send() might spin for a while for sk->sk_lock.slock, but
after lock_sock() the vcc_sendmsg() releases that lock and
pppoatm_send() will acquire it and notice that locked is locked
(sock_owned_by_user() returns true) and will just block pppoatm,
and will be woken up in release_sock() (fix was fixed by your 10/17
patch).
>
> Why did you include the sock_owned_by_user() check in there and not just
> use bh_lock_sock()?
because bh_lock_sock() will succeeds even with concurrent vcc_sendmsg()
and will have some races in that case.
>
> With the sock_owned_by_user() check, it'll *always* drop packets
> submitted through vcc_sendmsg(), won't it?
No, sock_owned_by_user() is just in pppoatm_send() and instead of
dropping packets we block pppd.
>
> Admittedly, for PPPoATM and BR2684 we never do want to have packets
> submitted directly from userspace that way; they should all come via the
> PPP channel or the netdev respectively. So we might want to keep the
> sock_owned_by_user() check because it fixes the close race, and
> explicitly document it.
It fixes also races with vcc_sendmsg(). If we really don't wont
vcc_sendmsg() with pppoatm and br2684 we must do some protection
than vcc_sendmsg() will fail instead of racing with pppoatm_send()
and crashing with some drivers that does not support concurent
->send().
>
> But it doesn't necessarily work for other protocols, so we may need a
> better solution for the general case. Perhaps drop the
> sock_owned_by_user() check, and put bh_lock_sock() around the beginning
> of vcc_destroy_socket() where it clears ATM_VF_READY? That'll ensure
> that no ->push() is *currently* operating on a skb having seen that the
> VCC is still open.
>
> Or maybe we just make the *devices* check the ATM_VF_CLOSE flag and
> refuse to send the skb? Put the entire thing into their domain. Although
> that may involve extra locking in the driver to synchronise send() and
> close() sufficiently.
We need some additional synchronizization with pppoatm_send(), now
we use:
tasklet_kill(&pvcc->wakeup_tasklet);
ppp_unregister_channel(&pvcc->chan);
In ppp_unregister_channel() we will synchronize with the function
calling pppoatm_send() using "downl" lock.
And this must be done in pppoatm.
>
> I'm still reluctant to swap the order of the device/protocol close in
> vcc_destroy_socket(). I think that'll just swap one set of problems
> which is now fairly well-understood and mostly solved, for another set.
> In particular, I think the device needs to see the close first, because
> *it* can actually abort or flush any pending TX and RX (including
> synchronising with its tasklet as solos-pci does, etc.). Only then does
> the protocol tear its data structures down. But I suppose the new set of
> problems could be found and overcome, if Chas wants to propose an
> alternative patch set...
>
I think that the current order is good, now we have:
1. stop_sending_fames to protocol
now TX is shut down
(currently done by
set_bit(ATM_VF_CLOSE, &vcc->flags);
clear_bit(ATM_VF_READY, &vcc->flags);
)
2. close_device to device
now RX is shut down
3. device_was_closed to protocol
ugly push(NULL), but we can add some other callback.
we also can do:
1. disable RX to device
now RX is shut down
2. detach to protocol
now TX is shut down
(now protocol can fully detach because RX is disabled)
3. close_device to device
(device is not used anymore)
Krzysiek
On Fri, 2012-11-30 at 10:53 +0100, Krzysztof Mazur wrote:
> On Fri, Nov 30, 2012 at 08:25:22AM +0000, David Woodhouse wrote:
> > On Fri, 2012-11-30 at 01:57 +0000, David Woodhouse wrote:
> > > I think it's actually fixed for pppoatm by the bh_lock_sock() and the
> > > sock_owned_by_user() check. As soon as vcc_release() calls lock_sock(),
> > > pppoatm stops accepting packets.
> > >
> > > It should be simple enough to do the same in br2684.
> >
> > Um... but now I come to look at it... Krzysztof, doesn't your 'pppoatm:
> > take ATM socket lock in pppoatm_send()' patch actually *break* the case
> > of sending via vcc_sendmsg()?
>
> no,
... oops, sorry. My sleep-deprived brain thought that we were calling
pppoatm_send() *from* vcc_sendmsg() with the lock held. But of course
we're not; we're calling directly into the driver. So that's OK.
In that case I think we're fine. I'll just do the same thing in
br2684_push(), fix up the comment you just corrected, and we're all
good.
> I think that the current order is good, now we have:
>
> 1. stop_sending_fames to protocol
> now TX is shut down
> (currently done by
> set_bit(ATM_VF_CLOSE, &vcc->flags);
> clear_bit(ATM_VF_READY, &vcc->flags);
> )
Right, with the caveat the the socket lock is required for
synchronisation on this. But that's OK. Or we *could* perhaps introduce
an explicit call into the protocol for it, if we really wanted. But I'm
inclined not to.
> 2. close_device to device
> now RX is shut down
> 3. device_was_closed to protocol
> ugly push(NULL), but we can add some other callback.
> we also can do:
>
> 1. disable RX to device
> now RX is shut down
> 2. detach to protocol
> now TX is shut down
> (now protocol can fully detach because RX is disabled)
Careful. You have to flush the TX packets which are currently in-flight.
It's not sufficient just to stop sending any more. And you have to do it
*before* the data structures are torn down.
> 3. close_device to device
> (device is not used anymore)
Really, what we're saying is that *one* of the driver or protocol close
functions needs to be split, and we need to do DPD or PDP. Since the
device driver *can* abort/flush the TX queue and also any pending RX
being handled by a tasklet, I think it makes most sense to keep it in
the middle, with the protocol being handled first and last... which is
the current order, as long as we consider setting ATM_VF_CLOSE to be the
first part.
--
dwmw2
On Fri, 2012-11-30 at 12:10 +0000, David Woodhouse wrote:
> In that case I think we're fine. I'll just do the same thing in
> br2684_push(), fix up the comment you just corrected, and we're all
> good.
OK, here's an update to me my patch 8/17 'br2684: don't send frames on
not-ready vcc'. It takes the socket lock and does fairly much the same
thing as your pppoatm version. It returns NETDEV_TX_BUSY and stops the
queue if the socket is locked, and it gets woken from the ->release_cb
callback.
I've dropped your Acked-By: since it's mostly new, but feel free to give
me a fresh one. With this I think we're done.
Unless Chas has any objections, I'll ask Dave to pull it...
From 47d5ad4c98452bcddfd00da1c659dac85202f213 Mon Sep 17 00:00:00 2001
From: David Woodhouse <[email protected]>
Date: Tue, 27 Nov 2012 23:28:36 +0000
Subject: [PATCH] br2684: don't send frames on not-ready vcc
Avoid submitting packets to a vcc which is being closed. Things go badly
wrong when the ->pop method gets later called after everything's been
torn down.
Use the ATM socket lock for synchronisation with vcc_destroy_socket(),
which clears the ATM_VF_READY bit under the same lock. Otherwise, we
could end up submitting a packet to the device driver even after its
->ops->close method has been called. And it could call the vcc's ->pop
method after the protocol has been shut down. Which leads to a panic.
Signed-off-by: David Woodhouse <[email protected]>
---
net/atm/br2684.c | 48 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 45 insertions(+), 3 deletions(-)
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 8eb6fbe..5ff145f 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -68,6 +68,7 @@ struct br2684_vcc {
/* keep old push, pop functions for chaining */
void (*old_push)(struct atm_vcc *vcc, struct sk_buff *skb);
void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb);
+ void (*old_release_cb)(struct atm_vcc *vcc);
enum br2684_encaps encaps;
struct list_head brvccs;
#ifdef CONFIG_ATM_BR2684_IPFILTER
@@ -269,6 +270,22 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
return !atmvcc->send(atmvcc, skb);
}
+static void br2684_release_cb(struct atm_vcc *atmvcc)
+{
+ struct br2684_vcc *brvcc = BR2684_VCC(atmvcc);
+
+ /*
+ * A race with br2684_xmit_vcc() might cause a spurious wakeup just
+ * after that function *stops* the queue, and qspace might actually
+ * go negative before the queue stops again. We cope with that.
+ */
+ if (atomic_read(&brvcc->qspace) > 0)
+ netif_wake_queue(brvcc->device);
+
+ if (brvcc->old_release_cb)
+ brvcc->old_release_cb(atmvcc);
+}
+
static inline struct br2684_vcc *pick_outgoing_vcc(const struct sk_buff *skb,
const struct br2684_dev *brdev)
{
@@ -280,6 +297,8 @@ static netdev_tx_t br2684_start_xmit(struct sk_buff *skb,
{
struct br2684_dev *brdev = BRPRIV(dev);
struct br2684_vcc *brvcc;
+ struct atm_vcc *atmvcc;
+ netdev_tx_t ret = NETDEV_TX_OK;
pr_debug("skb_dst(skb)=%p\n", skb_dst(skb));
read_lock(&devs_lock);
@@ -290,9 +309,26 @@ static netdev_tx_t br2684_start_xmit(struct sk_buff *skb,
dev->stats.tx_carrier_errors++;
/* netif_stop_queue(dev); */
dev_kfree_skb(skb);
- read_unlock(&devs_lock);
- return NETDEV_TX_OK;
+ goto out_devs;
+ }
+ atmvcc = brvcc->atmvcc;
+
+ bh_lock_sock(sk_atm(atmvcc));
+
+ if (test_bit(ATM_VF_RELEASED, &atmvcc->flags) ||
+ test_bit(ATM_VF_CLOSE, &atmvcc->flags) ||
+ !test_bit(ATM_VF_READY, &atmvcc->flags)) {
+ dev->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ goto out;
}
+
+ if (sock_owned_by_user(sk_atm(atmvcc))) {
+ netif_stop_queue(brvcc->device);
+ ret = NETDEV_TX_BUSY;
+ goto out;
+ }
+
if (!br2684_xmit_vcc(skb, dev, brvcc)) {
/*
* We should probably use netif_*_queue() here, but that
@@ -304,8 +340,11 @@ static netdev_tx_t br2684_start_xmit(struct sk_buff *skb,
dev->stats.tx_errors++;
dev->stats.tx_fifo_errors++;
}
+ out:
+ bh_unlock_sock(sk_atm(atmvcc));
+ out_devs:
read_unlock(&devs_lock);
- return NETDEV_TX_OK;
+ return ret;
}
/*
@@ -378,6 +417,7 @@ static void br2684_close_vcc(struct br2684_vcc *brvcc)
list_del(&brvcc->brvccs);
write_unlock_irq(&devs_lock);
brvcc->atmvcc->user_back = NULL; /* what about vcc->recvq ??? */
+ brvcc->atmvcc->release_cb = brvcc->old_release_cb;
brvcc->old_push(brvcc->atmvcc, NULL); /* pass on the bad news */
kfree(brvcc);
module_put(THIS_MODULE);
@@ -554,9 +594,11 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
brvcc->encaps = (enum br2684_encaps)be.encaps;
brvcc->old_push = atmvcc->push;
brvcc->old_pop = atmvcc->pop;
+ brvcc->old_release_cb = atmvcc->release_cb;
barrier();
atmvcc->push = br2684_push;
atmvcc->pop = br2684_pop;
+ atmvcc->release_cb = br2684_release_cb;
/* initialize netdev carrier state */
if (atmvcc->dev->signal == ATM_PHY_SIG_LOST)
--
1.8.0
--
dwmw2
On Fri, Nov 30, 2012 at 04:23:46PM +0000, David Woodhouse wrote:
>
> +static void br2684_release_cb(struct atm_vcc *atmvcc)
> +{
> + struct br2684_vcc *brvcc = BR2684_VCC(atmvcc);
> +
> + /*
> + * A race with br2684_xmit_vcc() might cause a spurious wakeup just
> + * after that function *stops* the queue, and qspace might actually
> + * go negative before the queue stops again. We cope with that.
> + */
We cannot race with br2684_xmit_vcc() because both br2684_xmit_vcc()
and br2684_release_cb() are called with locked sk->sk_lock.slock.
> + if (atomic_read(&brvcc->qspace) > 0)
> + netif_wake_queue(brvcc->device);
> +
> + if (brvcc->old_release_cb)
> + brvcc->old_release_cb(atmvcc);
> +}
Except that comment, the patch looks good:
Acked-by: Krzysztof Mazur <[email protected]>
Krzysiek
On Fri, 30 Nov 2012 16:23:46 +0000
David Woodhouse <[email protected]> wrote:
> On Fri, 2012-11-30 at 12:10 +0000, David Woodhouse wrote:
> > In that case I think we're fine. I'll just do the same thing in
> > br2684_push(), fix up the comment you just corrected, and we're all
> > good.
>
> OK, here's an update to me my patch 8/17 'br2684: don't send frames on
> not-ready vcc'. It takes the socket lock and does fairly much the same
> thing as your pppoatm version. It returns NETDEV_TX_BUSY and stops the
> queue if the socket is locked, and it gets woken from the ->release_cb
> callback.
>
> I've dropped your Acked-By: since it's mostly new, but feel free to give
> me a fresh one. With this I think we're done.
>
> Unless Chas has any objections, I'll ask Dave to pull it...
no objections. i think this deals with my concerns. as for splitting
the close functions, from one of your previous messages:
>Really, what we're saying is that *one* of the driver or protocol close
>functions needs to be split, and we need to do DPD or PDP. Since the
>device driver *can* abort/flush the TX queue and also any pending RX
>being handled by a tasklet, I think it makes most sense to keep it in
>the middle, with the protocol being handled first and last... which is
>the current order, as long as we consider setting ATM_VF_CLOSE to be the
>first part.
i believe this is essentially already done with the release_cb()
implementation right? that is splitting the protocol detach/shutdown
into two parts.
On Fri, Nov 30, 2012 at 12:12:56PM -0500, chas williams - CONTRACTOR wrote:
> >Really, what we're saying is that *one* of the driver or protocol close
> >functions needs to be split, and we need to do DPD or PDP. Since the
> >device driver *can* abort/flush the TX queue and also any pending RX
> >being handled by a tasklet, I think it makes most sense to keep it in
> >the middle, with the protocol being handled first and last... which is
> >the current order, as long as we consider setting ATM_VF_CLOSE to be the
> >first part.
>
> i believe this is essentially already done with the release_cb()
> implementation right? that is splitting the protocol detach/shutdown
> into two parts.
partially, release_cb() is about ATM socket locking. To avoid some races
we need to take the ATM socket lock in protocol send function
(br2684_start_xmit, pppoatm_send, ...). That functions are executed
in bh context and we cannot sleep and wait for releasing the ATM socket
lock, so we just block sending and when the ATM socket is unlocked
release_cb() is called and we re-enabling sending.
Currently the first part of detach is just:
lock_sock(sk)
(without latest "br2684: don't send frames on not-ready vcc"
the first part was
set_bit(ATM_VF_CLOSE, &vcc->flags);
clear_bit(ATM_VF_READY, &vcc->flags);
for br2684)
After that the protocol stops sending new packets so the vcc may be
fully closed by ATM driver. The protocol is still ready to process
received packets. After vcc is closed the protocol can safely detach
knowing that no new packets will be received.
Krzysiek
On Fri, 2012-11-30 at 18:00 +0100, Krzysztof Mazur wrote:
> On Fri, Nov 30, 2012 at 04:23:46PM +0000, David Woodhouse wrote:
> >
> > +static void br2684_release_cb(struct atm_vcc *atmvcc)
> > +{
> > + struct br2684_vcc *brvcc = BR2684_VCC(atmvcc);
> > +
> > + /*
> > + * A race with br2684_xmit_vcc() might cause a spurious wakeup just
> > + * after that function *stops* the queue, and qspace might actually
> > + * go negative before the queue stops again. We cope with that.
> > + */
>
> We cannot race with br2684_xmit_vcc() because both br2684_xmit_vcc()
> and br2684_release_cb() are called with locked sk->sk_lock.slock.
Ah, right. For some reason I thought the lock was already dropped when
->release_cb() was called. In that case I'll remove the comment. Thanks.
--
dwmw2