Here are three patches to fix timeouts handling in AF_RXRPC:
(1) The hard call timeout should be interpreted in seconds, not
milliseconds.
(2) Allow a waiting call to be aborted (thereby cancelling the call) in
the case a signal interrupts sendmsg() and leaves it hanging until it
is granted a channel on a connection.
(3) Kernel-generated calls get the timer started on them even if they're
still waiting to be attached to a connection. If the timer expires
before the wait is complete and a conn is attached, an oops will
occur.
David
---
The patches can be found here also:
http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/log/?h=rxrpc-fixes
David Howells (3):
rxrpc: Fix hard call timeout units
rxrpc: Make it so that a waiting process can be aborted
rxrpc: Fix timeout of a call that hasn't yet been granted a channel
fs/afs/afs.h | 4 ++--
fs/afs/internal.h | 2 +-
fs/afs/rxrpc.c | 8 +++-----
include/net/af_rxrpc.h | 21 +++++++++++----------
net/rxrpc/af_rxrpc.c | 3 +++
net/rxrpc/ar-internal.h | 1 +
net/rxrpc/call_object.c | 9 ++++++++-
net/rxrpc/sendmsg.c | 10 +++++++---
8 files changed, 36 insertions(+), 22 deletions(-)
afs_make_call() calls rxrpc_kernel_begin_call() to begin a call (which may
get stalled in the background waiting for a connection to become
available); it then calls rxrpc_kernel_set_max_life() to set the timeouts -
but that starts the call timer so the call timer might then expire before
we get a connection assigned - leading to the following oops if the call
stalled:
BUG: kernel NULL pointer dereference, address: 0000000000000000
...
CPU: 1 PID: 5111 Comm: krxrpcio/0 Not tainted 6.3.0-rc7-build3+ #701
RIP: 0010:rxrpc_alloc_txbuf+0xc0/0x157
...
Call Trace:
<TASK>
rxrpc_send_ACK+0x50/0x13b
rxrpc_input_call_event+0x16a/0x67d
rxrpc_io_thread+0x1b6/0x45f
? _raw_spin_unlock_irqrestore+0x1f/0x35
? rxrpc_input_packet+0x519/0x519
kthread+0xe7/0xef
? kthread_complete_and_exit+0x1b/0x1b
ret_from_fork+0x22/0x30
Fix this by noting the timeouts in struct rxrpc_call when the call is
created. The timer will be started when the first packet is transmitted.
It shouldn't be possible to trigger this directly from userspace through
AF_RXRPC as sendmsg() will return EBUSY if the call is in the
waiting-for-conn state if it dropped out of the wait due to a signal.
Fixes: 9d35d880e0e4 ("rxrpc: Move client call connection to the I/O thread")
Reported-by: Marc Dionne <[email protected]>
Signed-off-by: David Howells <[email protected]>
cc: "David S. Miller" <[email protected]>
cc: Eric Dumazet <[email protected]>
cc: Jakub Kicinski <[email protected]>
cc: Paolo Abeni <[email protected]>
cc: [email protected]
cc: [email protected]
cc: [email protected]
---
fs/afs/afs.h | 4 ++--
fs/afs/internal.h | 2 +-
fs/afs/rxrpc.c | 8 +++-----
include/net/af_rxrpc.h | 21 +++++++++++----------
net/rxrpc/af_rxrpc.c | 3 +++
net/rxrpc/ar-internal.h | 1 +
net/rxrpc/call_object.c | 9 ++++++++-
net/rxrpc/sendmsg.c | 1 +
8 files changed, 30 insertions(+), 19 deletions(-)
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index 432cb4b23961..81815724db6c 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -19,8 +19,8 @@
#define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */
#define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */
-#define AFS_VL_MAX_LIFESPAN (120 * HZ)
-#define AFS_PROBE_MAX_LIFESPAN (30 * HZ)
+#define AFS_VL_MAX_LIFESPAN 120
+#define AFS_PROBE_MAX_LIFESPAN 30
typedef u64 afs_volid_t;
typedef u64 afs_vnodeid_t;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index ad8523d0d038..68ae91d21b57 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -128,7 +128,7 @@ struct afs_call {
spinlock_t state_lock;
int error; /* error code */
u32 abort_code; /* Remote abort ID or 0 */
- unsigned int max_lifespan; /* Maximum lifespan to set if not 0 */
+ unsigned int max_lifespan; /* Maximum lifespan in secs to set if not 0 */
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
unsigned count2; /* count used in unmarshalling */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e08b850c3e6d..ed1644e7683f 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -335,7 +335,9 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
/* create a call */
rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
(unsigned long)call,
- tx_total_len, gfp,
+ tx_total_len,
+ call->max_lifespan,
+ gfp,
(call->async ?
afs_wake_up_async_call :
afs_wake_up_call_waiter),
@@ -350,10 +352,6 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
}
call->rxcall = rxcall;
-
- if (call->max_lifespan)
- rxrpc_kernel_set_max_life(call->net->socket, rxcall,
- call->max_lifespan);
call->issue_time = ktime_get_real();
/* send the request */
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 01a35e113ab9..5531dd08061e 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -40,16 +40,17 @@ typedef void (*rxrpc_user_attach_call_t)(struct rxrpc_call *, unsigned long);
void rxrpc_kernel_new_call_notification(struct socket *,
rxrpc_notify_new_call_t,
rxrpc_discard_new_call_t);
-struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
- struct sockaddr_rxrpc *,
- struct key *,
- unsigned long,
- s64,
- gfp_t,
- rxrpc_notify_rx_t,
- bool,
- enum rxrpc_interruptibility,
- unsigned int);
+struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
+ struct sockaddr_rxrpc *srx,
+ struct key *key,
+ unsigned long user_call_ID,
+ s64 tx_total_len,
+ u32 hard_timeout,
+ gfp_t gfp,
+ rxrpc_notify_rx_t notify_rx,
+ bool upgrade,
+ enum rxrpc_interruptibility interruptibility,
+ unsigned int debug_id);
int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
struct msghdr *, size_t,
rxrpc_notify_end_tx_t);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index c32b164206f9..31f738d65f1c 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -265,6 +265,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* @key: The security context to use (defaults to socket setting)
* @user_call_ID: The ID to use
* @tx_total_len: Total length of data to transmit during the call (or -1)
+ * @hard_timeout: The maximum lifespan of the call in sec
* @gfp: The allocation constraints
* @notify_rx: Where to send notifications instead of socket queue
* @upgrade: Request service upgrade for call
@@ -283,6 +284,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
struct key *key,
unsigned long user_call_ID,
s64 tx_total_len,
+ u32 hard_timeout,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
bool upgrade,
@@ -313,6 +315,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
p.tx_total_len = tx_total_len;
p.interruptibility = interruptibility;
p.kernel = true;
+ p.timeouts.hard = hard_timeout;
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 67b0a894162d..5d44dc08f66d 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -616,6 +616,7 @@ struct rxrpc_call {
unsigned long expect_term_by; /* When we expect call termination by */
u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
+ u32 hard_timo; /* Maximum lifetime or 0 (jif) */
struct timer_list timer; /* Combined event timer */
struct work_struct destroyer; /* In-process-context destroyer */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index e9f1f49d18c2..fecbc73054bc 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -226,6 +226,13 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
if (cp->exclusive)
__set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags);
+ if (p->timeouts.normal)
+ call->next_rx_timo = min(msecs_to_jiffies(p->timeouts.normal), 1UL);
+ if (p->timeouts.idle)
+ call->next_req_timo = min(msecs_to_jiffies(p->timeouts.idle), 1UL);
+ if (p->timeouts.hard)
+ call->hard_timo = p->timeouts.hard * HZ;
+
ret = rxrpc_init_client_call_security(call);
if (ret < 0) {
rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, ret);
@@ -257,7 +264,7 @@ void rxrpc_start_call_timer(struct rxrpc_call *call)
call->keepalive_at = j;
call->expect_rx_by = j;
call->expect_req_by = j;
- call->expect_term_by = j;
+ call->expect_term_by = j + call->hard_timo;
call->timer.expires = now;
}
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index c1b074c17b33..8e0b94714e84 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -651,6 +651,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
if (IS_ERR(call))
return PTR_ERR(call);
/* ... and we have the call lock. */
+ p.call.nr_timeouts = 0;
ret = 0;
if (rxrpc_call_is_complete(call))
goto out_put_unlock;
Hello:
This series was applied to netdev/net.git (main)
by David S. Miller <[email protected]>:
On Fri, 28 Apr 2023 21:27:53 +0100 you wrote:
> Here are three patches to fix timeouts handling in AF_RXRPC:
>
> (1) The hard call timeout should be interpreted in seconds, not
> milliseconds.
>
> (2) Allow a waiting call to be aborted (thereby cancelling the call) in
> the case a signal interrupts sendmsg() and leaves it hanging until it
> is granted a channel on a connection.
>
> [...]
Here is the summary with links:
- [net,1/3] rxrpc: Fix hard call timeout units
https://git.kernel.org/netdev/net/c/0d098d83c5d9
- [net,2/3] rxrpc: Make it so that a waiting process can be aborted
https://git.kernel.org/netdev/net/c/0eb362d25481
- [net,3/3] rxrpc: Fix timeout of a call that hasn't yet been granted a channel
https://git.kernel.org/netdev/net/c/db099c625b13
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html