"77c1090 net: fix infinite loop in __skb_recv_datagram()" (v3.8) introduced a
regression:
After that commit, recv can no longer peek beyond a 0-sized skb in the queue.
__skb_recv_datagram() instead stops at the first skb with len == 0 and results
in the system call failing with -EFAULT via skb_copy_datagram_iovec().
Signed-off-by: Benjamin Poirier <[email protected]>
---
net/core/datagram.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 368f9c3..02398ae 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -187,7 +187,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
skb_queue_walk(queue, skb) {
*peeked = skb->peeked;
if (flags & MSG_PEEK) {
- if (*off >= skb->len && skb->len) {
+ if (*off >= skb->len && (skb->len || *off)) {
*off -= skb->len;
continue;
}
--
1.7.10.4
Currently, peeking on a unix stream socket with an offset larger than len of
the data in the sk receive queue returns immediately with bogus data.
This patch fixes this so that the behavior is the same as peeking with no
offset on an empty queue: the caller blocks.
Signed-off-by: Benjamin Poirier <[email protected]>
---
net/unix/af_unix.c | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2db702d..1a02af0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1859,10 +1859,10 @@ out:
}
/*
- * Sleep until data has arrive. But check for races..
+ * Sleep until more data has arrived. But check for races..
*/
-
-static long unix_stream_data_wait(struct sock *sk, long timeo)
+static long unix_stream_data_wait(struct sock *sk, long timeo,
+ struct sk_buff *last)
{
DEFINE_WAIT(wait);
@@ -1871,7 +1871,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ if (skb_peek_tail(&sk->sk_receive_queue) != last ||
sk->sk_err ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
signal_pending(current) ||
@@ -1890,8 +1890,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
return timeo;
}
-
-
static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size,
int flags)
@@ -1936,14 +1934,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
goto out;
}
- skip = sk_peek_offset(sk, flags);
-
do {
int chunk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
unix_state_lock(sk);
- skb = skb_peek(&sk->sk_receive_queue);
+ last = skb = skb_peek(&sk->sk_receive_queue);
again:
if (skb == NULL) {
unix_sk(sk)->recursion_level = 0;
@@ -1966,7 +1962,7 @@ again:
break;
mutex_unlock(&u->readlock);
- timeo = unix_stream_data_wait(sk, timeo);
+ timeo = unix_stream_data_wait(sk, timeo, last);
if (signal_pending(current)
|| mutex_lock_interruptible(&u->readlock)) {
@@ -1980,10 +1976,13 @@ again:
break;
}
- if (skip >= skb->len) {
+ skip = sk_peek_offset(sk, flags);
+ while (skip >= skb->len) {
skip -= skb->len;
+ last = skb;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
- goto again;
+ if (!skb)
+ goto again;
}
unix_state_unlock(sk);
--
1.7.10.4
Currently, peeking on a unix datagram socket with an offset larger than len of
the data in the sk receive queue returns immediately with bogus data. That's
because *off is not reset between each skb_queue_walk().
This patch fixes this so that the behavior is the same as peeking with no
offset on an empty queue: the caller blocks.
Signed-off-by: Benjamin Poirier <[email protected]>
---
net/core/datagram.c | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 02398ae..6c502b5 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -78,9 +78,10 @@ static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int syn
return autoremove_wake_function(wait, mode, sync, key);
}
/*
- * Wait for a packet..
+ * Wait for the last received packet to be different from skb
*/
-static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
+static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
+ struct sk_buff *skb)
{
int error;
DEFINE_WAIT_FUNC(wait, receiver_wake_function);
@@ -92,7 +93,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
if (error)
goto out_err;
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if ((struct sk_buff *)sk->sk_receive_queue.prev != skb)
goto out;
/* Socket shut down? */
@@ -131,9 +132,9 @@ out_noerr:
* __skb_recv_datagram - Receive a datagram skbuff
* @sk: socket
* @flags: MSG_ flags
- * @off: an offset in bytes to peek skb from. Returns an offset
- * within an skb where data actually starts
* @peeked: returns non-zero if this packet has been seen before
+ * @_off: an offset in bytes to peek skb from. Returns an offset
+ * within an skb where data actually starts
* @err: error code returned
*
* Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -159,9 +160,9 @@ out_noerr:
* the standard around please.
*/
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
- int *peeked, int *off, int *err)
+ int *peeked, int *_off, int *err)
{
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
long timeo;
/*
* Caller is allowed not to check sk->sk_err before skb_recv_datagram()
@@ -182,13 +183,16 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
*/
unsigned long cpu_flags;
struct sk_buff_head *queue = &sk->sk_receive_queue;
+ int off = *_off;
spin_lock_irqsave(&queue->lock, cpu_flags);
+ last = (struct sk_buff *)queue;
skb_queue_walk(queue, skb) {
+ last = skb;
*peeked = skb->peeked;
if (flags & MSG_PEEK) {
- if (*off >= skb->len && (skb->len || *off)) {
- *off -= skb->len;
+ if (off >= skb->len && (skb->len || off)) {
+ off -= skb->len;
continue;
}
skb->peeked = 1;
@@ -197,6 +201,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
__skb_unlink(skb, queue);
spin_unlock_irqrestore(&queue->lock, cpu_flags);
+ *_off = off;
return skb;
}
spin_unlock_irqrestore(&queue->lock, cpu_flags);
@@ -206,7 +211,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
if (!timeo)
goto no_packet;
- } while (!wait_for_packet(sk, err, &timeo));
+ } while (!wait_for_more_packets(sk, err, &timeo, last));
return NULL;
--
1.7.10.4
On Thu, 2013-04-25 at 09:47 -0400, Benjamin Poirier wrote:
> "77c1090 net: fix infinite loop in __skb_recv_datagram()" (v3.8) introduced a
> regression:
> After that commit, recv can no longer peek beyond a 0-sized skb in the queue.
> __skb_recv_datagram() instead stops at the first skb with len == 0 and results
> in the system call failing with -EFAULT via skb_copy_datagram_iovec().
if MSG_PEEK is not used, what happens here ?
It doesn't look right to me that we return -EFAULT if skb->len is 0,
EFAULT is reserved to faulting (ie reading/writing at least one byte)
How are we telling the user message had 0 byte, but its not EOF ?
On Thu, 2013-04-25 at 09:47 -0400, Benjamin Poirier wrote:
> Currently, peeking on a unix datagram socket with an offset larger than len of
> the data in the sk receive queue returns immediately with bogus data. That's
> because *off is not reset between each skb_queue_walk().
>
> This patch fixes this so that the behavior is the same as peeking with no
> offset on an empty queue: the caller blocks.
>
> Signed-off-by: Benjamin Poirier <[email protected]>
> ---
> net/core/datagram.c | 25 +++++++++++++++----------
> 1 file changed, 15 insertions(+), 10 deletions(-)
>
> diff --git a/net/core/datagram.c b/net/core/datagram.c
> index 02398ae..6c502b5 100644
> --- a/net/core/datagram.c
> +++ b/net/core/datagram.c
> @@ -78,9 +78,10 @@ static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int syn
> return autoremove_wake_function(wait, mode, sync, key);
> }
> /*
> - * Wait for a packet..
> + * Wait for the last received packet to be different from skb
> */
> -static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
> +static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
> + struct sk_buff *skb)
const struct sk_buff *skb
> {
> int error;
> DEFINE_WAIT_FUNC(wait, receiver_wake_function);
> @@ -92,7 +93,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
> if (error)
> goto out_err;
>
> - if (!skb_queue_empty(&sk->sk_receive_queue))
> + if ((struct sk_buff *)sk->sk_receive_queue.prev != skb)
Why is the cast needed ?
> goto out;
>
> /* Socket shut down? */
> @@ -131,9 +132,9 @@ out_noerr:
> * __skb_recv_datagram - Receive a datagram skbuff
> * @sk: socket
> * @flags: MSG_ flags
> - * @off: an offset in bytes to peek skb from. Returns an offset
> - * within an skb where data actually starts
> * @peeked: returns non-zero if this packet has been seen before
> + * @_off: an offset in bytes to peek skb from. Returns an offset
> + * within an skb where data actually starts
> * @err: error code returned
> *
> * Get a datagram skbuff, understands the peeking, nonblocking wakeups
> @@ -159,9 +160,9 @@ out_noerr:
> * the standard around please.
> */
> struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
> - int *peeked, int *off, int *err)
> + int *peeked, int *_off, int *err)
> {
> - struct sk_buff *skb;
> + struct sk_buff *skb, *last;
> long timeo;
> /*
> * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
> @@ -182,13 +183,16 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
> */
> unsigned long cpu_flags;
> struct sk_buff_head *queue = &sk->sk_receive_queue;
> + int off = *_off;
>
> spin_lock_irqsave(&queue->lock, cpu_flags);
> + last = (struct sk_buff *)queue;
This could be done before spin_lock
> skb_queue_walk(queue, skb) {
> + last = skb;
> *peeked = skb->peeked;
> if (flags & MSG_PEEK) {
> - if (*off >= skb->len && (skb->len || *off)) {
> - *off -= skb->len;
> + if (off >= skb->len && (skb->len || off)) {
> + off -= skb->len;
> continue;
> }
> skb->peeked = 1;
> @@ -197,6 +201,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
> __skb_unlink(skb, queue);
>
> spin_unlock_irqrestore(&queue->lock, cpu_flags);
> + *_off = off;
> return skb;
> }
> spin_unlock_irqrestore(&queue->lock, cpu_flags);
> @@ -206,7 +211,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
> if (!timeo)
> goto no_packet;
>
> - } while (!wait_for_packet(sk, err, &timeo));
> + } while (!wait_for_more_packets(sk, err, &timeo, last));
>
> return NULL;
>
Other than that, patch seems fine.
On 2013/04/25 11:48, Eric Dumazet wrote:
> On Thu, 2013-04-25 at 09:47 -0400, Benjamin Poirier wrote:
> > "77c1090 net: fix infinite loop in __skb_recv_datagram()" (v3.8) introduced a
> > regression:
> > After that commit, recv can no longer peek beyond a 0-sized skb in the queue.
> > __skb_recv_datagram() instead stops at the first skb with len == 0 and results
> > in the system call failing with -EFAULT via skb_copy_datagram_iovec().
>
>
> if MSG_PEEK is not used, what happens here ?
I'm not sure what you're question is aiming at, but if MSG_PEEK isn't used,
there's no difference with regards to this patch. It's all in the "if (flags &
MSG_PEEK)" block.
More generally, without MSG_PEEK, a sequence of
send(..., len=10, ...); send(len=0); send(len=20)
results in
recv()=10; recv()=0; recv()=20; recv()= /* blocks */
With flags=MSG_PEEK, a sequence of
send(len=10); send(len=0); send(len=20)
resulted (without any patch) in
setsockopt(..., SO_PEEK_OFF -> 0);
recv()=10; recv()=0; recv()=0; recv()=0; ...
and with v2 of the patch, results in
setsockopt(..., SO_PEEK_OFF -> 0);
recv()=10; recv()=0; recv()=20; recv()= /* blocks */
We could also have the following sequence
setsockopt(..., SO_PEEK_OFF -> 10);
recv()=0; recv()=20; recv()= /* blocks */
or
setsockopt(..., SO_PEEK_OFF -> 5);
recv()=5; recv()=0; recv()=20; recv()= /* blocks */
or the unfortunate
setsockopt(..., SO_PEEK_OFF -> 0);
recv()=10; recv()=0; recv()=20;
setsockopt(..., SO_PEEK_OFF -> 0);
recv()=10; ; recv()=20; recv()= /* blocks */
That last one could be changed by resetting the skb->peeked flag for all
buffers the queue during sock_setsockopt SO_PEEK_OFF. If you think it's better
that way.
>
> It doesn't look right to me that we return -EFAULT if skb->len is 0,
> EFAULT is reserved to faulting (ie reading/writing at least one byte)
That's what happens when skb_copy_datagram_iovec() is asked to copy > 0 bytes
out of a skb with len == 0.
Perhaps skb_copy_datagram_iovec() should be changed to use EINVAL in that case
but we can avoid that kind of call altogether by fixing the problem with
MSG_PEEK.
>
> How are we telling the user message had 0 byte, but its not EOF ?
>
We aren't, but what's EOF on a datagram socket?
Thank you for the review.
Subject: [PATCH net v2 1/3] unix/dgram: peek beyond 0-sized skbs
"77c1090 net: fix infinite loop in __skb_recv_datagram()" (v3.8) introduced a
regression:
After that commit, recv can no longer peek beyond a 0-sized skb in the queue.
__skb_recv_datagram() instead stops at the first skb with len == 0 and results
in the system call failing with -EFAULT via skb_copy_datagram_iovec().
When peeking at an offset with 0-sized skb(s), each one of those is received
only once, in sequence. The offset starts moving forward again after receiving
datagrams with len > 0.
Signed-off-by: Benjamin Poirier <[email protected]>
---
* v2 also fix the situation when sk_peek_off must advance to and beyond a
0-sized skb
* v1 fix the case when SO_PEEK_OFF is used to set sk_peek_off beyond a
0-sized skb
net/core/datagram.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 368f9c3..99c4f52 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -187,7 +187,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
skb_queue_walk(queue, skb) {
*peeked = skb->peeked;
if (flags & MSG_PEEK) {
- if (*off >= skb->len && skb->len) {
+ if (*off >= skb->len && (skb->len || *off ||
+ skb->peeked)) {
*off -= skb->len;
continue;
}
--
1.7.10.4
Currently, peeking on a unix stream socket with an offset larger than len of
the data in the sk receive queue returns immediately with bogus data.
This patch fixes this so that the behavior is the same as peeking with no
offset on an empty queue: the caller blocks.
Signed-off-by: Benjamin Poirier <[email protected]>
---
net/unix/af_unix.c | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2db702d..1a02af0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1859,10 +1859,10 @@ out:
}
/*
- * Sleep until data has arrive. But check for races..
+ * Sleep until more data has arrived. But check for races..
*/
-
-static long unix_stream_data_wait(struct sock *sk, long timeo)
+static long unix_stream_data_wait(struct sock *sk, long timeo,
+ struct sk_buff *last)
{
DEFINE_WAIT(wait);
@@ -1871,7 +1871,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ if (skb_peek_tail(&sk->sk_receive_queue) != last ||
sk->sk_err ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
signal_pending(current) ||
@@ -1890,8 +1890,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
return timeo;
}
-
-
static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size,
int flags)
@@ -1936,14 +1934,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
goto out;
}
- skip = sk_peek_offset(sk, flags);
-
do {
int chunk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
unix_state_lock(sk);
- skb = skb_peek(&sk->sk_receive_queue);
+ last = skb = skb_peek(&sk->sk_receive_queue);
again:
if (skb == NULL) {
unix_sk(sk)->recursion_level = 0;
@@ -1966,7 +1962,7 @@ again:
break;
mutex_unlock(&u->readlock);
- timeo = unix_stream_data_wait(sk, timeo);
+ timeo = unix_stream_data_wait(sk, timeo, last);
if (signal_pending(current)
|| mutex_lock_interruptible(&u->readlock)) {
@@ -1980,10 +1976,13 @@ again:
break;
}
- if (skip >= skb->len) {
+ skip = sk_peek_offset(sk, flags);
+ while (skip >= skb->len) {
skip -= skb->len;
+ last = skb;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
- goto again;
+ if (!skb)
+ goto again;
}
unix_state_unlock(sk);
--
1.7.10.4
Currently, peeking on a unix datagram socket with an offset larger than len of
the data in the sk receive queue returns immediately with bogus data. That's
because *off is not reset between each skb_queue_walk().
This patch fixes this so that the behavior is the same as peeking with no
offset on an empty queue: the caller blocks.
Signed-off-by: Benjamin Poirier <[email protected]>
---
v2: address review feedback
net/core/datagram.c | 27 ++++++++++++++++-----------
1 file changed, 16 insertions(+), 11 deletions(-)
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 99c4f52..1985c9a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -78,9 +78,10 @@ static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int syn
return autoremove_wake_function(wait, mode, sync, key);
}
/*
- * Wait for a packet..
+ * Wait for the last received packet to be different from skb
*/
-static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
+static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
+ const struct sk_buff *skb)
{
int error;
DEFINE_WAIT_FUNC(wait, receiver_wake_function);
@@ -92,7 +93,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
if (error)
goto out_err;
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (sk->sk_receive_queue.prev != skb)
goto out;
/* Socket shut down? */
@@ -131,9 +132,9 @@ out_noerr:
* __skb_recv_datagram - Receive a datagram skbuff
* @sk: socket
* @flags: MSG_ flags
- * @off: an offset in bytes to peek skb from. Returns an offset
- * within an skb where data actually starts
* @peeked: returns non-zero if this packet has been seen before
+ * @_off: an offset in bytes to peek skb from. Returns an offset
+ * within an skb where data actually starts
* @err: error code returned
*
* Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -159,9 +160,9 @@ out_noerr:
* the standard around please.
*/
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
- int *peeked, int *off, int *err)
+ int *peeked, int *_off, int *err)
{
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
long timeo;
/*
* Caller is allowed not to check sk->sk_err before skb_recv_datagram()
@@ -182,14 +183,17 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
*/
unsigned long cpu_flags;
struct sk_buff_head *queue = &sk->sk_receive_queue;
+ int off = *_off;
+ last = (struct sk_buff *)queue;
spin_lock_irqsave(&queue->lock, cpu_flags);
skb_queue_walk(queue, skb) {
+ last = skb;
*peeked = skb->peeked;
if (flags & MSG_PEEK) {
- if (*off >= skb->len && (skb->len || *off ||
- skb->peeked)) {
- *off -= skb->len;
+ if (off >= skb->len && (skb->len || off ||
+ skb->peeked)) {
+ off -= skb->len;
continue;
}
skb->peeked = 1;
@@ -198,6 +202,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
__skb_unlink(skb, queue);
spin_unlock_irqrestore(&queue->lock, cpu_flags);
+ *_off = off;
return skb;
}
spin_unlock_irqrestore(&queue->lock, cpu_flags);
@@ -207,7 +212,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
if (!timeo)
goto no_packet;
- } while (!wait_for_packet(sk, err, &timeo));
+ } while (!wait_for_more_packets(sk, err, &timeo, last));
return NULL;
--
1.7.10.4
On Fri, 26 Apr 2013 at 18:35 GMT, Benjamin Poirier <[email protected]> wrote:
> struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
> - int *peeked, int *off, int *err)
> + int *peeked, int *_off, int *err)
> {
> - struct sk_buff *skb;
> + struct sk_buff *skb, *last;
> long timeo;
> /*
> * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
> @@ -182,14 +183,17 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
> */
> unsigned long cpu_flags;
> struct sk_buff_head *queue = &sk->sk_receive_queue;
> + int off = *_off;
>
Name the local variable '_off', not the function argument.
Or pick another name, e.g. 'offset'.
"77c1090 net: fix infinite loop in __skb_recv_datagram()" (v3.8) introduced a
regression:
After that commit, recv can no longer peek beyond a 0-sized skb in the queue.
__skb_recv_datagram() instead stops at the first skb with len == 0 and results
in the system call failing with -EFAULT via skb_copy_datagram_iovec().
When peeking at an offset with 0-sized skb(s), each one of those is received
only once, in sequence. The offset starts moving forward again after receiving
datagrams with len > 0.
Signed-off-by: Benjamin Poirier <[email protected]>
---
* v1 fix the case when SO_PEEK_OFF is used to set sk_peek_off beyond a
0-sized skb
* v2 also fix the situation when sk_peek_off must advance to and beyond a
0-sized skb
net/core/datagram.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 368f9c3..99c4f52 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -187,7 +187,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
skb_queue_walk(queue, skb) {
*peeked = skb->peeked;
if (flags & MSG_PEEK) {
- if (*off >= skb->len && skb->len) {
+ if (*off >= skb->len && (skb->len || *off ||
+ skb->peeked)) {
*off -= skb->len;
continue;
}
--
1.7.10.4
Currently, peeking on a unix datagram socket with an offset larger than len of
the data in the sk receive queue returns immediately with bogus data. That's
because *off is not reset between each skb_queue_walk().
This patch fixes this so that the behavior is the same as peeking with no
offset on an empty queue: the caller blocks.
Signed-off-by: Benjamin Poirier <[email protected]>
---
v2: address review feedback from Eric Dumazet
v3: address review feedback from Cong Wang
net/core/datagram.c | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 99c4f52..b5d48ac 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -78,9 +78,10 @@ static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int syn
return autoremove_wake_function(wait, mode, sync, key);
}
/*
- * Wait for a packet..
+ * Wait for the last received packet to be different from skb
*/
-static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
+static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
+ const struct sk_buff *skb)
{
int error;
DEFINE_WAIT_FUNC(wait, receiver_wake_function);
@@ -92,7 +93,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
if (error)
goto out_err;
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (sk->sk_receive_queue.prev != skb)
goto out;
/* Socket shut down? */
@@ -131,9 +132,9 @@ out_noerr:
* __skb_recv_datagram - Receive a datagram skbuff
* @sk: socket
* @flags: MSG_ flags
+ * @peeked: returns non-zero if this packet has been seen before
* @off: an offset in bytes to peek skb from. Returns an offset
* within an skb where data actually starts
- * @peeked: returns non-zero if this packet has been seen before
* @err: error code returned
*
* Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -161,7 +162,7 @@ out_noerr:
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
int *peeked, int *off, int *err)
{
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
long timeo;
/*
* Caller is allowed not to check sk->sk_err before skb_recv_datagram()
@@ -182,14 +183,17 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
*/
unsigned long cpu_flags;
struct sk_buff_head *queue = &sk->sk_receive_queue;
+ int _off = *off;
+ last = (struct sk_buff *)queue;
spin_lock_irqsave(&queue->lock, cpu_flags);
skb_queue_walk(queue, skb) {
+ last = skb;
*peeked = skb->peeked;
if (flags & MSG_PEEK) {
- if (*off >= skb->len && (skb->len || *off ||
+ if (_off >= skb->len && (skb->len || _off ||
skb->peeked)) {
- *off -= skb->len;
+ _off -= skb->len;
continue;
}
skb->peeked = 1;
@@ -198,6 +202,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
__skb_unlink(skb, queue);
spin_unlock_irqrestore(&queue->lock, cpu_flags);
+ *off = _off;
return skb;
}
spin_unlock_irqrestore(&queue->lock, cpu_flags);
@@ -207,7 +212,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
if (!timeo)
goto no_packet;
- } while (!wait_for_packet(sk, err, &timeo));
+ } while (!wait_for_more_packets(sk, err, &timeo, last));
return NULL;
--
1.7.10.4
Currently, peeking on a unix stream socket with an offset larger than len of
the data in the sk receive queue returns immediately with bogus data.
This patch fixes this so that the behavior is the same as peeking with no
offset on an empty queue: the caller blocks.
Signed-off-by: Benjamin Poirier <[email protected]>
---
net/unix/af_unix.c | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2db702d..1a02af0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1859,10 +1859,10 @@ out:
}
/*
- * Sleep until data has arrive. But check for races..
+ * Sleep until more data has arrived. But check for races..
*/
-
-static long unix_stream_data_wait(struct sock *sk, long timeo)
+static long unix_stream_data_wait(struct sock *sk, long timeo,
+ struct sk_buff *last)
{
DEFINE_WAIT(wait);
@@ -1871,7 +1871,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ if (skb_peek_tail(&sk->sk_receive_queue) != last ||
sk->sk_err ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
signal_pending(current) ||
@@ -1890,8 +1890,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
return timeo;
}
-
-
static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size,
int flags)
@@ -1936,14 +1934,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
goto out;
}
- skip = sk_peek_offset(sk, flags);
-
do {
int chunk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
unix_state_lock(sk);
- skb = skb_peek(&sk->sk_receive_queue);
+ last = skb = skb_peek(&sk->sk_receive_queue);
again:
if (skb == NULL) {
unix_sk(sk)->recursion_level = 0;
@@ -1966,7 +1962,7 @@ again:
break;
mutex_unlock(&u->readlock);
- timeo = unix_stream_data_wait(sk, timeo);
+ timeo = unix_stream_data_wait(sk, timeo, last);
if (signal_pending(current)
|| mutex_lock_interruptible(&u->readlock)) {
@@ -1980,10 +1976,13 @@ again:
break;
}
- if (skip >= skb->len) {
+ skip = sk_peek_offset(sk, flags);
+ while (skip >= skb->len) {
skip -= skb->len;
+ last = skb;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
- goto again;
+ if (!skb)
+ goto again;
}
unix_state_unlock(sk);
--
1.7.10.4
On Mon, 2013-04-29 at 17:42 -0400, Benjamin Poirier wrote:
> "77c1090 net: fix infinite loop in __skb_recv_datagram()" (v3.8) introduced a
> regression:
> After that commit, recv can no longer peek beyond a 0-sized skb in the queue.
> __skb_recv_datagram() instead stops at the first skb with len == 0 and results
> in the system call failing with -EFAULT via skb_copy_datagram_iovec().
>
> When peeking at an offset with 0-sized skb(s), each one of those is received
> only once, in sequence. The offset starts moving forward again after receiving
> datagrams with len > 0.
>
> Signed-off-by: Benjamin Poirier <[email protected]>
>
> ---
Acked-by: Eric Dumazet <[email protected]>
On Mon, 2013-04-29 at 17:42 -0400, Benjamin Poirier wrote:
> Currently, peeking on a unix datagram socket with an offset larger than len of
> the data in the sk receive queue returns immediately with bogus data. That's
> because *off is not reset between each skb_queue_walk().
>
> This patch fixes this so that the behavior is the same as peeking with no
> offset on an empty queue: the caller blocks.
>
> Signed-off-by: Benjamin Poirier <[email protected]>
>
> ---
>
> v2: address review feedback from Eric Dumazet
>
> v3: address review feedback from Cong Wang
>
> net/core/datagram.c | 21 +++++++++++++--------
> 1 file changed, 13 insertions(+), 8 deletions(-)
Acked-by: Eric Dumazet <[email protected]>
On Mon, 2013-04-29 at 17:42 -0400, Benjamin Poirier wrote:
> Currently, peeking on a unix stream socket with an offset larger than len of
> the data in the sk receive queue returns immediately with bogus data.
>
> This patch fixes this so that the behavior is the same as peeking with no
> offset on an empty queue: the caller blocks.
>
> Signed-off-by: Benjamin Poirier <[email protected]>
> ---
> net/unix/af_unix.c | 25 ++++++++++++-------------
> 1 file changed, 12 insertions(+), 13 deletions(-)
Acked-by: Eric Dumazet <[email protected]>
From: Benjamin Poirier <[email protected]>
Date: Mon, 29 Apr 2013 17:42:12 -0400
> "77c1090 net: fix infinite loop in __skb_recv_datagram()" (v3.8) introduced a
> regression:
> After that commit, recv can no longer peek beyond a 0-sized skb in the queue.
> __skb_recv_datagram() instead stops at the first skb with len == 0 and results
> in the system call failing with -EFAULT via skb_copy_datagram_iovec().
>
> When peeking at an offset with 0-sized skb(s), each one of those is received
> only once, in sequence. The offset starts moving forward again after receiving
> datagrams with len > 0.
>
> Signed-off-by: Benjamin Poirier <[email protected]>
Applied.
From: Benjamin Poirier <[email protected]>
Date: Mon, 29 Apr 2013 17:42:13 -0400
> Currently, peeking on a unix datagram socket with an offset larger than len of
> the data in the sk receive queue returns immediately with bogus data. That's
> because *off is not reset between each skb_queue_walk().
>
> This patch fixes this so that the behavior is the same as peeking with no
> offset on an empty queue: the caller blocks.
>
> Signed-off-by: Benjamin Poirier <[email protected]>
Applied.
From: Benjamin Poirier <[email protected]>
Date: Mon, 29 Apr 2013 17:42:14 -0400
> Currently, peeking on a unix stream socket with an offset larger than len of
> the data in the sk receive queue returns immediately with bogus data.
>
> This patch fixes this so that the behavior is the same as peeking with no
> offset on an empty queue: the caller blocks.
>
> Signed-off-by: Benjamin Poirier <[email protected]>
Applied.