2023-03-20 14:24:26

by Chuck Lever

[permalink] [raw]
Subject: [PATCH RFC 0/5] NFSD support for RPC-with-TLS

Hi-

This is server-side support for RPC-with-TLS, to accompany similar
support in the Linux NFS client. This implementation can support
both the opportunistic use of transport layer security (it will be
used if the client cares to) and the required use of transport
layer security (the server requires the client to use it to access
a particular export).

The kernel patches, along with the the handshake upcall, are carried
in the topic-rpc-with-tls-upcall branch available from:

https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git

The user space componenet of the upcall can be found in the
netlink-v7 branch from:

https://github.com/oracle/ktls-utils

This work includes a man page, tlshd(8), that explains how to set
up certificates for the server to use. Currently, NFS support for
RPC-with-TLS does not implement support for pre-shared keys.

Without any other user space componentry, this implementation will
be able to handle clients that request the use of RPC-with-TLS. To
support security policies that restrict access to exports based on
the client's use of TLS, modifications to exportfs and mountd are
needed. These can be found here:

git://git.linux-nfs.org/projects/cel/nfs-utils.git

They include an update to exports(5) explaining how to use the new
"xprtsec=" export option. I will post these for review under
separate cover.

---

Chuck Lever (5):
SUNRPC: Revert 987c7b1d094d
SUNRPC: Recognize control messages in server-side TCP socket code
SUNRPC: Ensure server-side sockets have a sock->file
SUNRPC: Support TLS handshake in the server-side TCP socket code
NFSD: Handle new xprtsec= export option


fs/nfsd/export.c | 53 +++++++++-
fs/nfsd/export.h | 11 ++
include/linux/sunrpc/svc_xprt.h | 5 +-
include/linux/sunrpc/svcsock.h | 2 +
include/trace/events/sunrpc.h | 42 +++++++-
net/sunrpc/svc_xprt.c | 5 +-
net/sunrpc/svcauth_unix.c | 11 +-
net/sunrpc/svcsock.c | 177 +++++++++++++++++++++++++++++---
8 files changed, 284 insertions(+), 22 deletions(-)

--
Chuck Lever



2023-03-20 14:24:33

by Chuck Lever

[permalink] [raw]
Subject: [PATCH RFC 1/5] SUNRPC: Revert 987c7b1d094d

From: Chuck Lever <[email protected]>

I noticed that server-side TLS always sends the RPC record marker as
a separate 4-byte TLS record. This is très inefficace.

It seems to be due to commit 987c7b1d094d ("SUNRPC: Remove redundant
socket flags from svc_tcp_sendmsg()"), which removed the MSG_* flags
from svc_tcp_sendmsg() in favor of using tcp_sock_set_cork. TLS,
which rides on top of TCP, ignores the socket's cork setting.

Client-side RPC also uses tcp_sock_set_cork, but it still sets
MSG_MORE. I don't see similar uncoalesced TLS record traffic there.

Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/svcsock.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 03a4f5615086..28deb4acc392 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1086,9 +1086,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
.iov_len = sizeof(marker),
};
struct msghdr msg = {
- .msg_flags = 0,
+ .msg_flags = MSG_MORE,
};
- int ret;
+ int flags, ret;

*sentp = 0;
ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
@@ -1101,8 +1101,8 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
*sentp += ret;
if (ret != rm.iov_len)
return -EAGAIN;
-
- ret = svc_tcp_send_kvec(sock, head, 0);
+ flags = head->iov_len < xdr->len ? MSG_MORE | MSG_SENDPAGE_NOTLAST : 0;
+ ret = svc_tcp_send_kvec(sock, head, flags);
if (ret < 0)
return ret;
*sentp += ret;
@@ -1117,10 +1117,12 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
offset = offset_in_page(xdr->page_base);
remaining = xdr->page_len;
while (remaining > 0) {
+ if (remaining <= PAGE_SIZE && tail->iov_len == 0)
+ flags = 0;
len = min(remaining, bvec->bv_len - offset);
ret = kernel_sendpage(sock, bvec->bv_page,
bvec->bv_offset + offset,
- len, 0);
+ len, flags);
if (ret < 0)
return ret;
*sentp += ret;



2023-03-20 14:24:39

by Chuck Lever

[permalink] [raw]
Subject: [PATCH RFC 2/5] SUNRPC: Recognize control messages in server-side TCP socket code

From: Chuck Lever <[email protected]>

To support kTLS, the server-side TCP socket code needs to watch for
CMSGs, just like on the client side.

Signed-off-by: Chuck Lever <[email protected]>
---
include/trace/events/sunrpc.h | 26 ++++++++++++++++++++++
net/sunrpc/svcsock.c | 49 +++++++++++++++++++++++++++++++++++++++--
2 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 280a31be3806..cf286a0a17d0 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -2370,6 +2370,32 @@ DECLARE_EVENT_CLASS(svcsock_accept_class,
DEFINE_ACCEPT_EVENT(accept);
DEFINE_ACCEPT_EVENT(getpeername);

+TRACE_EVENT(svcsock_tls_ctype,
+ TP_PROTO(
+ const struct svc_xprt *xprt,
+ unsigned char ctype
+ ),
+
+ TP_ARGS(xprt, ctype),
+
+ TP_STRUCT__entry(
+ SVC_XPRT_ENDPOINT_FIELDS(xprt)
+
+ __field(unsigned long, ctype)
+ ),
+
+ TP_fast_assign(
+ SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt);
+
+ __entry->ctype = ctype;
+ ),
+
+ TP_printk(SVC_XPRT_ENDPOINT_FORMAT " %s",
+ SVC_XPRT_ENDPOINT_VARARGS,
+ rpc_show_tls_content_type(__entry->ctype)
+ )
+);
+
DECLARE_EVENT_CLASS(cache_event,
TP_PROTO(
const struct cache_detail *cd,
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 28deb4acc392..91a62bea6999 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -43,6 +43,7 @@
#include <net/udp.h>
#include <net/tcp.h>
#include <net/tcp_states.h>
+#include <net/tls.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
#include <asm/ioctls.h>
@@ -216,6 +217,50 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
return len;
}

+static int
+svc_tcp_sock_process_cmsg(struct svc_sock *svsk, struct msghdr *msg,
+ struct cmsghdr *cmsg, int ret)
+{
+ if (cmsg->cmsg_level == SOL_TLS &&
+ cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
+ u8 content_type = *((u8 *)CMSG_DATA(cmsg));
+
+ trace_svcsock_tls_ctype(&svsk->sk_xprt, content_type);
+ switch (content_type) {
+ case TLS_RECORD_TYPE_DATA:
+ /* TLS sets EOR at the end of each application data
+ * record, even though there might be more frames
+ * waiting to be decrypted.
+ */
+ msg->msg_flags &= ~MSG_EOR;
+ break;
+ case TLS_RECORD_TYPE_ALERT:
+ ret = -ENOTCONN;
+ break;
+ default:
+ ret = -EAGAIN;
+ }
+ }
+ return ret;
+}
+
+static int
+svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg)
+{
+ union {
+ struct cmsghdr cmsg;
+ u8 buf[CMSG_SPACE(sizeof(u8))];
+ } u;
+ int ret;
+
+ msg->msg_control = &u;
+ msg->msg_controllen = sizeof(u);
+ ret = sock_recvmsg(svsk->sk_sock, msg, MSG_DONTWAIT);
+ if (unlikely(msg->msg_controllen != sizeof(u)))
+ ret = svc_tcp_sock_process_cmsg(svsk, msg, &u.cmsg, ret);
+ return ret;
+}
+
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek)
{
@@ -263,7 +308,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
iov_iter_advance(&msg.msg_iter, seek);
buflen -= seek;
}
- len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
+ len = svc_tcp_sock_recv_cmsg(svsk, &msg);
if (len > 0)
svc_flush_bvec(bvec, len, seek);

@@ -877,7 +922,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen;
iov.iov_len = want;
iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want);
- len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
+ len = svc_tcp_sock_recv_cmsg(svsk, &msg);
if (len < 0)
return len;
svsk->sk_tcplen += len;



2023-03-20 14:24:48

by Chuck Lever

[permalink] [raw]
Subject: [PATCH RFC 3/5] SUNRPC: Ensure server-side sockets have a sock->file

From: Chuck Lever <[email protected]>

The TLS handshake upcall mechanism requires a non-NULL sock->file on
the socket it hands to user space. svc_sock_free() already releases
sock->file properly if one exists.

Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/svcsock.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 91a62bea6999..b6df73cb706a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1340,26 +1340,37 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
struct socket *sock,
int flags)
{
+ struct file *filp = NULL;
struct svc_sock *svsk;
struct sock *inet;
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
- int err = 0;

svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
if (!svsk)
return ERR_PTR(-ENOMEM);

+ if (!sock->file) {
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ if (IS_ERR(filp)) {
+ kfree(svsk);
+ return ERR_CAST(filp);
+ }
+ }
+
inet = sock->sk;

- /* Register socket with portmapper */
- if (pmap_register)
+ if (pmap_register) {
+ int err;
+
err = svc_register(serv, sock_net(sock->sk), inet->sk_family,
inet->sk_protocol,
ntohs(inet_sk(inet)->inet_sport));
-
- if (err < 0) {
- kfree(svsk);
- return ERR_PTR(err);
+ if (err < 0) {
+ if (filp)
+ fput(filp);
+ kfree(svsk);
+ return ERR_PTR(err);
+ }
}

svsk->sk_sock = sock;



2023-03-20 14:24:55

by Chuck Lever

[permalink] [raw]
Subject: [PATCH RFC 4/5] SUNRPC: Support TLS handshake in the server-side TCP socket code

From: Chuck Lever <[email protected]>

This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel
NFS server. If the client requests RPC-with-TLS and the user space
handshake agent is running, the server will set up a TLS session.

There are no policy settings yet. For example, the server cannot
yet require the use of RPC-with-TLS to access its data.

Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/svc_xprt.h | 5 ++
include/linux/sunrpc/svcsock.h | 2 +
include/trace/events/sunrpc.h | 16 ++++++-
net/sunrpc/svc_xprt.c | 5 ++
net/sunrpc/svcauth_unix.c | 11 ++++-
net/sunrpc/svcsock.c | 91 +++++++++++++++++++++++++++++++++++++++
6 files changed, 125 insertions(+), 5 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 775368802762..867479204840 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -27,7 +27,7 @@ struct svc_xprt_ops {
void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *);
void (*xpo_kill_temp_xprt)(struct svc_xprt *);
- void (*xpo_start_tls)(struct svc_xprt *);
+ void (*xpo_handshake)(struct svc_xprt *xprt);
};

struct svc_xprt_class {
@@ -70,6 +70,9 @@ struct svc_xprt {
#define XPT_LOCAL 12 /* connection from loopback interface */
#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
#define XPT_CONG_CTRL 14 /* has congestion control */
+#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
+#define XPT_TLS_SESSION 16 /* transport-layer security established */
+#define XPT_PEER_AUTH 17 /* peer has been authenticated */

struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index bcc555c7ae9c..1175e1c38bac 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -38,6 +38,8 @@ struct svc_sock {
/* Number of queued send requests */
atomic_t sk_sendqlen;

+ struct completion sk_handshake_done;
+
struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */
};

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index cf286a0a17d0..2667a8db4811 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency,
{ BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \
{ BIT(XPT_LOCAL), "LOCAL" }, \
{ BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \
- { BIT(XPT_CONG_CTRL), "CONG_CTRL" })
+ { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \
+ { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \
+ { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \
+ { BIT(XPT_PEER_AUTH), "PEER_AUTH" })

TRACE_EVENT(svc_xprt_create_err,
TP_PROTO(
@@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close);
DEFINE_SVC_XPRT_EVENT(detach);
DEFINE_SVC_XPRT_EVENT(free);

+#define DEFINE_SVC_TLS_EVENT(name) \
+ DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \
+ TP_PROTO(const struct svc_xprt *xprt), \
+ TP_ARGS(xprt))
+
+DEFINE_SVC_TLS_EVENT(start);
+DEFINE_SVC_TLS_EVENT(upcall);
+DEFINE_SVC_TLS_EVENT(unavailable);
+DEFINE_SVC_TLS_EVENT(not_started);
+DEFINE_SVC_TLS_EVENT(timed_out);
+
TRACE_EVENT(svc_xprt_accept,
TP_PROTO(
const struct svc_xprt *xprt,
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index ba629297da4e..b68c04dbf876 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)

if (xpt_flags & BIT(XPT_BUSY))
return false;
- if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
+ if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE)))
return true;
if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
@@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
module_put(xprt->xpt_class->xcl_owner);
}
svc_xprt_received(xprt);
+ } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) {
+ xprt->xpt_ops->xpo_handshake(xprt);
+ svc_xprt_received(xprt);
} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 983c5891cb56..374995201df4 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -17,8 +17,9 @@
#include <net/ipv6.h>
#include <linux/kernel.h>
#include <linux/user_namespace.h>
-#define RPCDBG_FACILITY RPCDBG_AUTH
+#include <trace/events/sunrpc.h>

+#define RPCDBG_FACILITY RPCDBG_AUTH

#include "netns.h"

@@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_arg_stream;
struct svc_cred *cred = &rqstp->rq_cred;
+ struct svc_xprt *xprt = rqstp->rq_xprt;
u32 flavor, len;
void *body;
__be32 *p;
@@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
if (cred->cr_group_info == NULL)
return SVC_CLOSE;

- if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
+ if (xprt->xpt_ops->xpo_handshake) {
p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8);
if (!p)
return SVC_CLOSE;
+ trace_svc_tls_start(xprt);
*p++ = rpc_auth_null;
*p++ = cpu_to_be32(8);
memcpy(p, "STARTTLS", 8);
+
+ set_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
} else {
+ trace_svc_tls_unavailable(xprt);
if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
RPC_AUTH_NULL, NULL, 0) < 0)
return SVC_CLOSE;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b6df73cb706a..16ba8d6ab20e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -44,9 +44,11 @@
#include <net/tcp.h>
#include <net/tcp_states.h>
#include <net/tls.h>
+#include <net/handshake.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
#include <asm/ioctls.h>
+#include <linux/key.h>

#include <linux/sunrpc/types.h>
#include <linux/sunrpc/clnt.h>
@@ -64,6 +66,7 @@

#define RPCDBG_FACILITY RPCDBG_SVCXPRT

+#define SVC_HANDSHAKE_TO (20U * HZ)

static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
int flags);
@@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk)
rmb();
svsk->sk_odata(sk);
trace_svcsock_data_ready(&svsk->sk_xprt, 0);
+ if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags))
+ return;
if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
svc_xprt_enqueue(&svsk->sk_xprt);
}
@@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
sock_no_linger(svsk->sk_sock->sk);
}

+/**
+ * svc_tcp_handshake_done - Handshake completion handler
+ * @data: address of xprt to wake
+ * @status: status of handshake
+ * @peerid: serial number of key containing the remote peer's identity
+ *
+ * If a security policy is specified as an export option, we don't
+ * have a specific export here to check. So we set a "TLS session
+ * is present" flag on the xprt and let an upper layer enforce local
+ * security policy.
+ */
+static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid)
+{
+ struct svc_xprt *xprt = data;
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+
+ if (!status) {
+ if (peerid != TLS_NO_PEERID)
+ set_bit(XPT_PEER_AUTH, &xprt->xpt_flags);
+ set_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
+ }
+ clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
+ complete_all(&svsk->sk_handshake_done);
+}
+
+/**
+ * svc_tcp_handshake - Perform a transport-layer security handshake
+ * @xprt: connected transport endpoint
+ *
+ */
+static void svc_tcp_handshake(struct svc_xprt *xprt)
+{
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ struct tls_handshake_args args = {
+ .ta_sock = svsk->sk_sock,
+ .ta_done = svc_tcp_handshake_done,
+ .ta_data = xprt,
+ };
+ int ret;
+
+ trace_svc_tls_upcall(xprt);
+
+ clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
+ init_completion(&svsk->sk_handshake_done);
+ smp_wmb();
+
+ ret = tls_server_hello_x509(&args, GFP_KERNEL);
+ if (ret) {
+ trace_svc_tls_not_started(xprt);
+ goto out_failed;
+ }
+
+ ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done,
+ SVC_HANDSHAKE_TO);
+ if (ret <= 0) {
+ if (tls_handshake_cancel(svsk->sk_sock)) {
+ trace_svc_tls_timed_out(xprt);
+ goto out_close;
+ }
+ }
+
+ if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) {
+ trace_svc_tls_unavailable(xprt);
+ goto out_close;
+ }
+
+ /*
+ * Mark the transport ready in case the remote sent RPC
+ * traffic before the kernel received the handshake
+ * completion downcall.
+ */
+ set_bit(XPT_DATA, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
+ return;
+
+out_close:
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+out_failed:
+ clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
+ set_bit(XPT_DATA, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
+}
+
/*
* See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
*/
@@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
.xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
+ .xpo_handshake = svc_tcp_handshake,
};

static struct svc_xprt_class svc_tcp_class = {
@@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);

+ tls_handshake_cancel(svsk->sk_sock);
+
if (svsk->sk_sock->file)
sockfd_put(svsk->sk_sock);
else



2023-03-20 14:24:58

by Chuck Lever

[permalink] [raw]
Subject: [PATCH RFC 5/5] NFSD: Handle new xprtsec= export option

From: Chuck Lever <[email protected]>

Enable administrators to require clients to use transport layer
security when accessing particular exports.

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfsd/export.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++---
fs/nfsd/export.h | 11 +++++++++++
2 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 668c7527b17e..171ebc21bf07 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -439,7 +439,6 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid)
return -EINVAL;
}
return 0;
-
}

#ifdef CONFIG_NFSD_V4
@@ -546,6 +545,31 @@ static inline int
secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
#endif

+static int xprtsec_parse(char **mesg, char *buf, struct svc_export *exp)
+{
+ unsigned int i, mode, listsize;
+ int err;
+
+ err = get_uint(mesg, &listsize);
+ if (err)
+ return err;
+ if (listsize > 3)
+ return -EINVAL;
+
+ exp->ex_xprtsec_modes = 0;
+ for (i = 0; i < listsize; i++) {
+ err = get_uint(mesg, &mode);
+ if (err)
+ return err;
+ mode--;
+ if (mode > 2)
+ return -EINVAL;
+ /* Ad hoc */
+ exp->ex_xprtsec_modes |= 1 << mode;
+ }
+ return 0;
+}
+
static inline int
nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid)
{
@@ -608,6 +632,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
exp.ex_client = dom;
exp.cd = cd;
exp.ex_devid_map = NULL;
+ exp.ex_xprtsec_modes = NFSEXP_XPRTSEC_ALL;

/* expiry */
err = -EINVAL;
@@ -650,6 +675,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid);
else if (strcmp(buf, "secinfo") == 0)
err = secinfo_parse(&mesg, buf, &exp);
+ else if (strcmp(buf, "xprtsec") == 0)
+ err = xprtsec_parse(&mesg, buf, &exp);
else
/* quietly ignore unknown words and anything
* following. Newer user-space can try to set
@@ -663,6 +690,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
err = check_export(&exp.ex_path, &exp.ex_flags, exp.ex_uuid);
if (err)
goto out4;
+
/*
* No point caching this if it would immediately expire.
* Also, this protects exportfs's dummy export from the
@@ -824,6 +852,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
for (i = 0; i < MAX_SECINFO_LIST; i++) {
new->ex_flavors[i] = item->ex_flavors[i];
}
+ new->ex_xprtsec_modes = item->ex_xprtsec_modes;
}

static struct cache_head *svc_export_alloc(void)
@@ -1035,9 +1064,26 @@ static struct svc_export *exp_find(struct cache_detail *cd,

__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
{
- struct exp_flavor_info *f;
- struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
+ struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
+ struct svc_xprt *xprt = rqstp->rq_xprt;
+
+ if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) {
+ if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags))
+ goto ok;
+ }
+ if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_TLS) {
+ if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
+ !test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
+ goto ok;
+ }
+ if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_MTLS) {
+ if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
+ test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
+ goto ok;
+ }
+ goto denied;

+ok:
/* legacy gss-only clients are always OK: */
if (exp->ex_client == rqstp->rq_gssclient)
return 0;
@@ -1062,6 +1108,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
if (nfsd4_spo_must_allow(rqstp))
return 0;

+denied:
return rqstp->rq_vers < 4 ? nfserr_acces : nfserr_wrongsec;
}

diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index d03f7f6a8642..61e1e8383c3d 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -77,8 +77,19 @@ struct svc_export {
struct cache_detail *cd;
struct rcu_head ex_rcu;
struct export_stats ex_stats;
+ unsigned long ex_xprtsec_modes;
};

+enum {
+ NFSEXP_XPRTSEC_NONE = 0x01,
+ NFSEXP_XPRTSEC_TLS = 0x02,
+ NFSEXP_XPRTSEC_MTLS = 0x04,
+};
+
+#define NFSEXP_XPRTSEC_ALL (NFSEXP_XPRTSEC_NONE | \
+ NFSEXP_XPRTSEC_TLS | \
+ NFSEXP_XPRTSEC_MTLS)
+
/* an "export key" (expkey) maps a filehandlefragement to an
* svc_export for a given client. There can be several per export,
* for the different fsid types.



2023-03-21 11:43:13

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH RFC 4/5] SUNRPC: Support TLS handshake in the server-side TCP socket code

On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote:
> From: Chuck Lever <[email protected]>
>
> This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel
> NFS server. If the client requests RPC-with-TLS and the user space
> handshake agent is running, the server will set up a TLS session.
>
> There are no policy settings yet. For example, the server cannot
> yet require the use of RPC-with-TLS to access its data.
>
> Signed-off-by: Chuck Lever <[email protected]>
> ---
> include/linux/sunrpc/svc_xprt.h | 5 ++
> include/linux/sunrpc/svcsock.h | 2 +
> include/trace/events/sunrpc.h | 16 ++++++-
> net/sunrpc/svc_xprt.c | 5 ++
> net/sunrpc/svcauth_unix.c | 11 ++++-
> net/sunrpc/svcsock.c | 91 +++++++++++++++++++++++++++++++++++++++
> 6 files changed, 125 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
> index 775368802762..867479204840 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -27,7 +27,7 @@ struct svc_xprt_ops {
> void (*xpo_detach)(struct svc_xprt *);
> void (*xpo_free)(struct svc_xprt *);
> void (*xpo_kill_temp_xprt)(struct svc_xprt *);
> - void (*xpo_start_tls)(struct svc_xprt *);
> + void (*xpo_handshake)(struct svc_xprt *xprt);
> };
>
> struct svc_xprt_class {
> @@ -70,6 +70,9 @@ struct svc_xprt {
> #define XPT_LOCAL 12 /* connection from loopback interface */
> #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
> #define XPT_CONG_CTRL 14 /* has congestion control */
> +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
> +#define XPT_TLS_SESSION 16 /* transport-layer security established */
> +#define XPT_PEER_AUTH 17 /* peer has been authenticated */
>
> struct svc_serv *xpt_server; /* service for transport */
> atomic_t xpt_reserved; /* space on outq that is rsvd */
> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
> index bcc555c7ae9c..1175e1c38bac 100644
> --- a/include/linux/sunrpc/svcsock.h
> +++ b/include/linux/sunrpc/svcsock.h
> @@ -38,6 +38,8 @@ struct svc_sock {
> /* Number of queued send requests */
> atomic_t sk_sendqlen;
>
> + struct completion sk_handshake_done;
> +
> struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */
> };
>
> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
> index cf286a0a17d0..2667a8db4811 100644
> --- a/include/trace/events/sunrpc.h
> +++ b/include/trace/events/sunrpc.h
> @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency,
> { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \
> { BIT(XPT_LOCAL), "LOCAL" }, \
> { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \
> - { BIT(XPT_CONG_CTRL), "CONG_CTRL" })
> + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \
> + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \
> + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \
> + { BIT(XPT_PEER_AUTH), "PEER_AUTH" })
>
> TRACE_EVENT(svc_xprt_create_err,
> TP_PROTO(
> @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close);
> DEFINE_SVC_XPRT_EVENT(detach);
> DEFINE_SVC_XPRT_EVENT(free);
>
> +#define DEFINE_SVC_TLS_EVENT(name) \
> + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \
> + TP_PROTO(const struct svc_xprt *xprt), \
> + TP_ARGS(xprt))
> +
> +DEFINE_SVC_TLS_EVENT(start);
> +DEFINE_SVC_TLS_EVENT(upcall);
> +DEFINE_SVC_TLS_EVENT(unavailable);
> +DEFINE_SVC_TLS_EVENT(not_started);
> +DEFINE_SVC_TLS_EVENT(timed_out);
> +
> TRACE_EVENT(svc_xprt_accept,
> TP_PROTO(
> const struct svc_xprt *xprt,
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index ba629297da4e..b68c04dbf876 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
>
> if (xpt_flags & BIT(XPT_BUSY))
> return false;
> - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
> + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE)))
> return true;
> if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
> if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
> @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
> module_put(xprt->xpt_class->xcl_owner);
> }
> svc_xprt_received(xprt);
> + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) {
> + xprt->xpt_ops->xpo_handshake(xprt);
> + svc_xprt_received(xprt);
> } else if (svc_xprt_reserve_slot(rqstp, xprt)) {
> /* XPT_DATA|XPT_DEFERRED case: */
> dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
> diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
> index 983c5891cb56..374995201df4 100644
> --- a/net/sunrpc/svcauth_unix.c
> +++ b/net/sunrpc/svcauth_unix.c
> @@ -17,8 +17,9 @@
> #include <net/ipv6.h>
> #include <linux/kernel.h>
> #include <linux/user_namespace.h>
> -#define RPCDBG_FACILITY RPCDBG_AUTH
> +#include <trace/events/sunrpc.h>
>
> +#define RPCDBG_FACILITY RPCDBG_AUTH
>
> #include "netns.h"
>
> @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
> {
> struct xdr_stream *xdr = &rqstp->rq_arg_stream;
> struct svc_cred *cred = &rqstp->rq_cred;
> + struct svc_xprt *xprt = rqstp->rq_xprt;
> u32 flavor, len;
> void *body;
> __be32 *p;
> @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
> if (cred->cr_group_info == NULL)
> return SVC_CLOSE;
>
> - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
> + if (xprt->xpt_ops->xpo_handshake) {
> p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8);
> if (!p)
> return SVC_CLOSE;
> + trace_svc_tls_start(xprt);
> *p++ = rpc_auth_null;
> *p++ = cpu_to_be32(8);
> memcpy(p, "STARTTLS", 8);
> +
> + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
> + svc_xprt_enqueue(xprt);
> } else {
> + trace_svc_tls_unavailable(xprt);
> if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
> RPC_AUTH_NULL, NULL, 0) < 0)
> return SVC_CLOSE;
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index b6df73cb706a..16ba8d6ab20e 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -44,9 +44,11 @@
> #include <net/tcp.h>
> #include <net/tcp_states.h>
> #include <net/tls.h>
> +#include <net/handshake.h>
> #include <linux/uaccess.h>
> #include <linux/highmem.h>
> #include <asm/ioctls.h>
> +#include <linux/key.h>
>
> #include <linux/sunrpc/types.h>
> #include <linux/sunrpc/clnt.h>
> @@ -64,6 +66,7 @@
>
> #define RPCDBG_FACILITY RPCDBG_SVCXPRT
>
> +#define SVC_HANDSHAKE_TO (20U * HZ)
>
> static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
> int flags);
> @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk)
> rmb();
> svsk->sk_odata(sk);
> trace_svcsock_data_ready(&svsk->sk_xprt, 0);
> + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags))
> + return;
> if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
> svc_xprt_enqueue(&svsk->sk_xprt);
> }
> @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
> sock_no_linger(svsk->sk_sock->sk);
> }
>
> +/**
> + * svc_tcp_handshake_done - Handshake completion handler
> + * @data: address of xprt to wake
> + * @status: status of handshake
> + * @peerid: serial number of key containing the remote peer's identity
> + *
> + * If a security policy is specified as an export option, we don't
> + * have a specific export here to check. So we set a "TLS session
> + * is present" flag on the xprt and let an upper layer enforce local
> + * security policy.
> + */
> +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid)
> +{
> + struct svc_xprt *xprt = data;
> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
> +
> + if (!status) {
> + if (peerid != TLS_NO_PEERID)
> + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags);
> + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
> + }
> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
> + complete_all(&svsk->sk_handshake_done);
> +}
> +
> +/**
> + * svc_tcp_handshake - Perform a transport-layer security handshake
> + * @xprt: connected transport endpoint
> + *
> + */
> +static void svc_tcp_handshake(struct svc_xprt *xprt)
> +{
> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
> + struct tls_handshake_args args = {
> + .ta_sock = svsk->sk_sock,
> + .ta_done = svc_tcp_handshake_done,
> + .ta_data = xprt,
> + };
> + int ret;
> +
> + trace_svc_tls_upcall(xprt);
> +
> + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
> + init_completion(&svsk->sk_handshake_done);
> + smp_wmb();
> +
> + ret = tls_server_hello_x509(&args, GFP_KERNEL);
> + if (ret) {
> + trace_svc_tls_not_started(xprt);
> + goto out_failed;
> + }
> +
> + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done,
> + SVC_HANDSHAKE_TO);

Just curious: is this 20s timeout mandated by the spec? It seems like a
long time to block a kernel thread if so. Do we need to be concerned
with DoS attacks here? Could a client initiate handshakes and then stop
communicating, forcing the server to tie up threads with these 20s
waits?


> + if (ret <= 0) {
> + if (tls_handshake_cancel(svsk->sk_sock)) {
> + trace_svc_tls_timed_out(xprt);
> + goto out_close;
> + }
> + }
> +
> + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) {
> + trace_svc_tls_unavailable(xprt);
> + goto out_close;
> + }
> +
> + /*
> + * Mark the transport ready in case the remote sent RPC
> + * traffic before the kernel received the handshake
> + * completion downcall.
> + */
> + set_bit(XPT_DATA, &xprt->xpt_flags);
> + svc_xprt_enqueue(xprt);
> + return;
> +
> +out_close:
> + set_bit(XPT_CLOSE, &xprt->xpt_flags);
> +out_failed:
> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
> + set_bit(XPT_DATA, &xprt->xpt_flags);
> + svc_xprt_enqueue(xprt);
> +}
> +
> /*
> * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
> */
> @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
> .xpo_has_wspace = svc_tcp_has_wspace,
> .xpo_accept = svc_tcp_accept,
> .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
> + .xpo_handshake = svc_tcp_handshake,
> };
>
> static struct svc_xprt_class svc_tcp_class = {
> @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
> {
> struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>
> + tls_handshake_cancel(svsk->sk_sock);
> +
> if (svsk->sk_sock->file)
> sockfd_put(svsk->sk_sock);
> else
>
>

--
Jeff Layton <[email protected]>

2023-03-21 11:50:46

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH RFC 5/5] NFSD: Handle new xprtsec= export option

On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote:
> From: Chuck Lever <[email protected]>
>
> Enable administrators to require clients to use transport layer
> security when accessing particular exports.

> Signed-off-by: Chuck Lever <[email protected]>
> ---
> fs/nfsd/export.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++---
> fs/nfsd/export.h | 11 +++++++++++
> 2 files changed, 61 insertions(+), 3 deletions(-)
>
> diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
> index 668c7527b17e..171ebc21bf07 100644
> --- a/fs/nfsd/export.c
> +++ b/fs/nfsd/export.c
> @@ -439,7 +439,6 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid)
> return -EINVAL;
> }
> return 0;
> -
> }
>
> #ifdef CONFIG_NFSD_V4
> @@ -546,6 +545,31 @@ static inline int
> secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
> #endif
>
> +static int xprtsec_parse(char **mesg, char *buf, struct svc_export *exp)
> +{
> + unsigned int i, mode, listsize;
> + int err;
> +
> + err = get_uint(mesg, &listsize);
> + if (err)
> + return err;
> + if (listsize > 3)
> + return -EINVAL;

Might want to make a note that the limit of 3 here is arbitrary, and
that it might need to be lifted in the future (if/when we grow other
xprtsec options).

> +
> + exp->ex_xprtsec_modes = 0;
> + for (i = 0; i < listsize; i++) {
> + err = get_uint(mesg, &mode);
> + if (err)
> + return err;
> + mode--;
> + if (mode > 2)
> + return -EINVAL;
> + /* Ad hoc */
> + exp->ex_xprtsec_modes |= 1 << mode;
> + }
> + return 0;
> +}
> +
> static inline int
> nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid)
> {
> @@ -608,6 +632,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
> exp.ex_client = dom;
> exp.cd = cd;
> exp.ex_devid_map = NULL;
> + exp.ex_xprtsec_modes = NFSEXP_XPRTSEC_ALL;
>
> /* expiry */
> err = -EINVAL;
> @@ -650,6 +675,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
> err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid);
> else if (strcmp(buf, "secinfo") == 0)
> err = secinfo_parse(&mesg, buf, &exp);
> + else if (strcmp(buf, "xprtsec") == 0)
> + err = xprtsec_parse(&mesg, buf, &exp);
> else
> /* quietly ignore unknown words and anything
> * following. Newer user-space can try to set
> @@ -663,6 +690,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
> err = check_export(&exp.ex_path, &exp.ex_flags, exp.ex_uuid);
> if (err)
> goto out4;
> +
> /*
> * No point caching this if it would immediately expire.
> * Also, this protects exportfs's dummy export from the
> @@ -824,6 +852,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
> for (i = 0; i < MAX_SECINFO_LIST; i++) {
> new->ex_flavors[i] = item->ex_flavors[i];
> }
> + new->ex_xprtsec_modes = item->ex_xprtsec_modes;
> }
>
> static struct cache_head *svc_export_alloc(void)
> @@ -1035,9 +1064,26 @@ static struct svc_export *exp_find(struct cache_detail *cd,
>
> __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
> {
> - struct exp_flavor_info *f;
> - struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
> + struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
> + struct svc_xprt *xprt = rqstp->rq_xprt;
> +
> + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) {
> + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags))
> + goto ok;
> + }
> + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_TLS) {
> + if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
> + !test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
> + goto ok;
> + }
> + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_MTLS) {
> + if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
> + test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
> + goto ok;
> + }
> + goto denied;
>
> +ok:
> /* legacy gss-only clients are always OK: */
> if (exp->ex_client == rqstp->rq_gssclient)
> return 0;
> @@ -1062,6 +1108,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
> if (nfsd4_spo_must_allow(rqstp))
> return 0;
>
> +denied:
> return rqstp->rq_vers < 4 ? nfserr_acces : nfserr_wrongsec;
> }
>
> diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
> index d03f7f6a8642..61e1e8383c3d 100644
> --- a/fs/nfsd/export.h
> +++ b/fs/nfsd/export.h
> @@ -77,8 +77,19 @@ struct svc_export {
> struct cache_detail *cd;
> struct rcu_head ex_rcu;
> struct export_stats ex_stats;
> + unsigned long ex_xprtsec_modes;
> };
>
> +enum {
> + NFSEXP_XPRTSEC_NONE = 0x01,
> + NFSEXP_XPRTSEC_TLS = 0x02,
> + NFSEXP_XPRTSEC_MTLS = 0x04,
> +};
> +
> +#define NFSEXP_XPRTSEC_ALL (NFSEXP_XPRTSEC_NONE | \
> + NFSEXP_XPRTSEC_TLS | \
> + NFSEXP_XPRTSEC_MTLS)
> +
> /* an "export key" (expkey) maps a filehandlefragement to an
> * svc_export for a given client. There can be several per export,
> * for the different fsid types.
>
>

--
Jeff Layton <[email protected]>

2023-03-21 14:03:27

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH RFC 4/5] SUNRPC: Support TLS handshake in the server-side TCP socket code



> On Mar 21, 2023, at 7:43 AM, Jeff Layton <[email protected]> wrote:
>
> On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote:
>> From: Chuck Lever <[email protected]>
>>
>> This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel
>> NFS server. If the client requests RPC-with-TLS and the user space
>> handshake agent is running, the server will set up a TLS session.
>>
>> There are no policy settings yet. For example, the server cannot
>> yet require the use of RPC-with-TLS to access its data.
>>
>> Signed-off-by: Chuck Lever <[email protected]>
>> ---
>> include/linux/sunrpc/svc_xprt.h | 5 ++
>> include/linux/sunrpc/svcsock.h | 2 +
>> include/trace/events/sunrpc.h | 16 ++++++-
>> net/sunrpc/svc_xprt.c | 5 ++
>> net/sunrpc/svcauth_unix.c | 11 ++++-
>> net/sunrpc/svcsock.c | 91 +++++++++++++++++++++++++++++++++++++++
>> 6 files changed, 125 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
>> index 775368802762..867479204840 100644
>> --- a/include/linux/sunrpc/svc_xprt.h
>> +++ b/include/linux/sunrpc/svc_xprt.h
>> @@ -27,7 +27,7 @@ struct svc_xprt_ops {
>> void (*xpo_detach)(struct svc_xprt *);
>> void (*xpo_free)(struct svc_xprt *);
>> void (*xpo_kill_temp_xprt)(struct svc_xprt *);
>> - void (*xpo_start_tls)(struct svc_xprt *);
>> + void (*xpo_handshake)(struct svc_xprt *xprt);
>> };
>>
>> struct svc_xprt_class {
>> @@ -70,6 +70,9 @@ struct svc_xprt {
>> #define XPT_LOCAL 12 /* connection from loopback interface */
>> #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
>> #define XPT_CONG_CTRL 14 /* has congestion control */
>> +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
>> +#define XPT_TLS_SESSION 16 /* transport-layer security established */
>> +#define XPT_PEER_AUTH 17 /* peer has been authenticated */
>>
>> struct svc_serv *xpt_server; /* service for transport */
>> atomic_t xpt_reserved; /* space on outq that is rsvd */
>> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
>> index bcc555c7ae9c..1175e1c38bac 100644
>> --- a/include/linux/sunrpc/svcsock.h
>> +++ b/include/linux/sunrpc/svcsock.h
>> @@ -38,6 +38,8 @@ struct svc_sock {
>> /* Number of queued send requests */
>> atomic_t sk_sendqlen;
>>
>> + struct completion sk_handshake_done;
>> +
>> struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */
>> };
>>
>> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
>> index cf286a0a17d0..2667a8db4811 100644
>> --- a/include/trace/events/sunrpc.h
>> +++ b/include/trace/events/sunrpc.h
>> @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency,
>> { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \
>> { BIT(XPT_LOCAL), "LOCAL" }, \
>> { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \
>> - { BIT(XPT_CONG_CTRL), "CONG_CTRL" })
>> + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \
>> + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \
>> + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \
>> + { BIT(XPT_PEER_AUTH), "PEER_AUTH" })
>>
>> TRACE_EVENT(svc_xprt_create_err,
>> TP_PROTO(
>> @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close);
>> DEFINE_SVC_XPRT_EVENT(detach);
>> DEFINE_SVC_XPRT_EVENT(free);
>>
>> +#define DEFINE_SVC_TLS_EVENT(name) \
>> + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \
>> + TP_PROTO(const struct svc_xprt *xprt), \
>> + TP_ARGS(xprt))
>> +
>> +DEFINE_SVC_TLS_EVENT(start);
>> +DEFINE_SVC_TLS_EVENT(upcall);
>> +DEFINE_SVC_TLS_EVENT(unavailable);
>> +DEFINE_SVC_TLS_EVENT(not_started);
>> +DEFINE_SVC_TLS_EVENT(timed_out);
>> +
>> TRACE_EVENT(svc_xprt_accept,
>> TP_PROTO(
>> const struct svc_xprt *xprt,
>> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
>> index ba629297da4e..b68c04dbf876 100644
>> --- a/net/sunrpc/svc_xprt.c
>> +++ b/net/sunrpc/svc_xprt.c
>> @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
>>
>> if (xpt_flags & BIT(XPT_BUSY))
>> return false;
>> - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
>> + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE)))
>> return true;
>> if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
>> if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
>> @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
>> module_put(xprt->xpt_class->xcl_owner);
>> }
>> svc_xprt_received(xprt);
>> + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) {
>> + xprt->xpt_ops->xpo_handshake(xprt);
>> + svc_xprt_received(xprt);
>> } else if (svc_xprt_reserve_slot(rqstp, xprt)) {
>> /* XPT_DATA|XPT_DEFERRED case: */
>> dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
>> diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
>> index 983c5891cb56..374995201df4 100644
>> --- a/net/sunrpc/svcauth_unix.c
>> +++ b/net/sunrpc/svcauth_unix.c
>> @@ -17,8 +17,9 @@
>> #include <net/ipv6.h>
>> #include <linux/kernel.h>
>> #include <linux/user_namespace.h>
>> -#define RPCDBG_FACILITY RPCDBG_AUTH
>> +#include <trace/events/sunrpc.h>
>>
>> +#define RPCDBG_FACILITY RPCDBG_AUTH
>>
>> #include "netns.h"
>>
>> @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
>> {
>> struct xdr_stream *xdr = &rqstp->rq_arg_stream;
>> struct svc_cred *cred = &rqstp->rq_cred;
>> + struct svc_xprt *xprt = rqstp->rq_xprt;
>> u32 flavor, len;
>> void *body;
>> __be32 *p;
>> @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
>> if (cred->cr_group_info == NULL)
>> return SVC_CLOSE;
>>
>> - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
>> + if (xprt->xpt_ops->xpo_handshake) {
>> p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8);
>> if (!p)
>> return SVC_CLOSE;
>> + trace_svc_tls_start(xprt);
>> *p++ = rpc_auth_null;
>> *p++ = cpu_to_be32(8);
>> memcpy(p, "STARTTLS", 8);
>> +
>> + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
>> + svc_xprt_enqueue(xprt);
>> } else {
>> + trace_svc_tls_unavailable(xprt);
>> if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
>> RPC_AUTH_NULL, NULL, 0) < 0)
>> return SVC_CLOSE;
>> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
>> index b6df73cb706a..16ba8d6ab20e 100644
>> --- a/net/sunrpc/svcsock.c
>> +++ b/net/sunrpc/svcsock.c
>> @@ -44,9 +44,11 @@
>> #include <net/tcp.h>
>> #include <net/tcp_states.h>
>> #include <net/tls.h>
>> +#include <net/handshake.h>
>> #include <linux/uaccess.h>
>> #include <linux/highmem.h>
>> #include <asm/ioctls.h>
>> +#include <linux/key.h>
>>
>> #include <linux/sunrpc/types.h>
>> #include <linux/sunrpc/clnt.h>
>> @@ -64,6 +66,7 @@
>>
>> #define RPCDBG_FACILITY RPCDBG_SVCXPRT
>>
>> +#define SVC_HANDSHAKE_TO (20U * HZ)
>>
>> static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
>> int flags);
>> @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk)
>> rmb();
>> svsk->sk_odata(sk);
>> trace_svcsock_data_ready(&svsk->sk_xprt, 0);
>> + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags))
>> + return;
>> if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
>> svc_xprt_enqueue(&svsk->sk_xprt);
>> }
>> @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
>> sock_no_linger(svsk->sk_sock->sk);
>> }
>>
>> +/**
>> + * svc_tcp_handshake_done - Handshake completion handler
>> + * @data: address of xprt to wake
>> + * @status: status of handshake
>> + * @peerid: serial number of key containing the remote peer's identity
>> + *
>> + * If a security policy is specified as an export option, we don't
>> + * have a specific export here to check. So we set a "TLS session
>> + * is present" flag on the xprt and let an upper layer enforce local
>> + * security policy.
>> + */
>> +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid)
>> +{
>> + struct svc_xprt *xprt = data;
>> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>> +
>> + if (!status) {
>> + if (peerid != TLS_NO_PEERID)
>> + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags);
>> + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
>> + }
>> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
>> + complete_all(&svsk->sk_handshake_done);
>> +}
>> +
>> +/**
>> + * svc_tcp_handshake - Perform a transport-layer security handshake
>> + * @xprt: connected transport endpoint
>> + *
>> + */
>> +static void svc_tcp_handshake(struct svc_xprt *xprt)
>> +{
>> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>> + struct tls_handshake_args args = {
>> + .ta_sock = svsk->sk_sock,
>> + .ta_done = svc_tcp_handshake_done,
>> + .ta_data = xprt,
>> + };
>> + int ret;
>> +
>> + trace_svc_tls_upcall(xprt);
>> +
>> + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
>> + init_completion(&svsk->sk_handshake_done);
>> + smp_wmb();
>> +
>> + ret = tls_server_hello_x509(&args, GFP_KERNEL);
>> + if (ret) {
>> + trace_svc_tls_not_started(xprt);
>> + goto out_failed;
>> + }
>> +
>> + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done,
>> + SVC_HANDSHAKE_TO);
>
> Just curious: is this 20s timeout mandated by the spec?

The spec doesn't mandate a timeout. I simply wanted
to guarantee forward progress.


> It seems like a long time to block a kernel thread if so.

It's about the same as the client side timeout, fwiw.


> Do we need to be concerned
> with DoS attacks here? Could a client initiate handshakes and then stop
> communicating, forcing the server to tie up threads with these 20s
> waits?

I think a malicious client can do all kinds of similar things
already. Do you have a particular timeout value in mind, or
is there some other mechanism we can use to better bullet-
proof this aspect of the handshake? I'm open to suggestion.


>> + if (ret <= 0) {
>> + if (tls_handshake_cancel(svsk->sk_sock)) {
>> + trace_svc_tls_timed_out(xprt);
>> + goto out_close;
>> + }
>> + }
>> +
>> + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) {
>> + trace_svc_tls_unavailable(xprt);
>> + goto out_close;
>> + }
>> +
>> + /*
>> + * Mark the transport ready in case the remote sent RPC
>> + * traffic before the kernel received the handshake
>> + * completion downcall.
>> + */
>> + set_bit(XPT_DATA, &xprt->xpt_flags);
>> + svc_xprt_enqueue(xprt);
>> + return;
>> +
>> +out_close:
>> + set_bit(XPT_CLOSE, &xprt->xpt_flags);
>> +out_failed:
>> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
>> + set_bit(XPT_DATA, &xprt->xpt_flags);
>> + svc_xprt_enqueue(xprt);
>> +}
>> +
>> /*
>> * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
>> */
>> @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
>> .xpo_has_wspace = svc_tcp_has_wspace,
>> .xpo_accept = svc_tcp_accept,
>> .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
>> + .xpo_handshake = svc_tcp_handshake,
>> };
>>
>> static struct svc_xprt_class svc_tcp_class = {
>> @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
>> {
>> struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>>
>> + tls_handshake_cancel(svsk->sk_sock);
>> +
>> if (svsk->sk_sock->file)
>> sockfd_put(svsk->sk_sock);
>> else
>>
>>
>
> --
> Jeff Layton <[email protected]>

--
Chuck Lever



2023-03-21 14:06:35

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH RFC 5/5] NFSD: Handle new xprtsec= export option



> On Mar 21, 2023, at 7:50 AM, Jeff Layton <[email protected]> wrote:
>
> On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote:
>> From: Chuck Lever <[email protected]>
>>
>> Enable administrators to require clients to use transport layer
>> security when accessing particular exports.
>
>> Signed-off-by: Chuck Lever <[email protected]>
>> ---
>> fs/nfsd/export.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++---
>> fs/nfsd/export.h | 11 +++++++++++
>> 2 files changed, 61 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
>> index 668c7527b17e..171ebc21bf07 100644
>> --- a/fs/nfsd/export.c
>> +++ b/fs/nfsd/export.c
>> @@ -439,7 +439,6 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid)
>> return -EINVAL;
>> }
>> return 0;
>> -
>> }
>>
>> #ifdef CONFIG_NFSD_V4
>> @@ -546,6 +545,31 @@ static inline int
>> secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
>> #endif
>>
>> +static int xprtsec_parse(char **mesg, char *buf, struct svc_export *exp)
>> +{
>> + unsigned int i, mode, listsize;
>> + int err;
>> +
>> + err = get_uint(mesg, &listsize);
>> + if (err)
>> + return err;
>> + if (listsize > 3)
>> + return -EINVAL;
>
> Might want to make a note that the limit of 3 here is arbitrary, and
> that it might need to be lifted in the future (if/when we grow other
> xprtsec options).

Well I can easily add a symbolic constant for that too. I
missed this one in the final clean-up before posting.

The bigger question is whether the new downcall parameter is
sensible. If there's a nicer way for mountd to get this
information to the kernel, I'm open to suggestion.


>> +
>> + exp->ex_xprtsec_modes = 0;
>> + for (i = 0; i < listsize; i++) {
>> + err = get_uint(mesg, &mode);
>> + if (err)
>> + return err;
>> + mode--;
>> + if (mode > 2)
>> + return -EINVAL;
>> + /* Ad hoc */
>> + exp->ex_xprtsec_modes |= 1 << mode;
>> + }
>> + return 0;
>> +}
>> +
>> static inline int
>> nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid)
>> {
>> @@ -608,6 +632,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
>> exp.ex_client = dom;
>> exp.cd = cd;
>> exp.ex_devid_map = NULL;
>> + exp.ex_xprtsec_modes = NFSEXP_XPRTSEC_ALL;
>>
>> /* expiry */
>> err = -EINVAL;
>> @@ -650,6 +675,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
>> err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid);
>> else if (strcmp(buf, "secinfo") == 0)
>> err = secinfo_parse(&mesg, buf, &exp);
>> + else if (strcmp(buf, "xprtsec") == 0)
>> + err = xprtsec_parse(&mesg, buf, &exp);
>> else
>> /* quietly ignore unknown words and anything
>> * following. Newer user-space can try to set
>> @@ -663,6 +690,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
>> err = check_export(&exp.ex_path, &exp.ex_flags, exp.ex_uuid);
>> if (err)
>> goto out4;
>> +
>> /*
>> * No point caching this if it would immediately expire.
>> * Also, this protects exportfs's dummy export from the
>> @@ -824,6 +852,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
>> for (i = 0; i < MAX_SECINFO_LIST; i++) {
>> new->ex_flavors[i] = item->ex_flavors[i];
>> }
>> + new->ex_xprtsec_modes = item->ex_xprtsec_modes;
>> }
>>
>> static struct cache_head *svc_export_alloc(void)
>> @@ -1035,9 +1064,26 @@ static struct svc_export *exp_find(struct cache_detail *cd,
>>
>> __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
>> {
>> - struct exp_flavor_info *f;
>> - struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
>> + struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
>> + struct svc_xprt *xprt = rqstp->rq_xprt;
>> +
>> + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) {
>> + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags))
>> + goto ok;
>> + }
>> + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_TLS) {
>> + if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
>> + !test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
>> + goto ok;
>> + }
>> + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_MTLS) {
>> + if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
>> + test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
>> + goto ok;
>> + }
>> + goto denied;
>>
>> +ok:
>> /* legacy gss-only clients are always OK: */
>> if (exp->ex_client == rqstp->rq_gssclient)
>> return 0;
>> @@ -1062,6 +1108,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
>> if (nfsd4_spo_must_allow(rqstp))
>> return 0;
>>
>> +denied:
>> return rqstp->rq_vers < 4 ? nfserr_acces : nfserr_wrongsec;
>> }
>>
>> diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
>> index d03f7f6a8642..61e1e8383c3d 100644
>> --- a/fs/nfsd/export.h
>> +++ b/fs/nfsd/export.h
>> @@ -77,8 +77,19 @@ struct svc_export {
>> struct cache_detail *cd;
>> struct rcu_head ex_rcu;
>> struct export_stats ex_stats;
>> + unsigned long ex_xprtsec_modes;
>> };
>>
>> +enum {
>> + NFSEXP_XPRTSEC_NONE = 0x01,
>> + NFSEXP_XPRTSEC_TLS = 0x02,
>> + NFSEXP_XPRTSEC_MTLS = 0x04,
>> +};
>> +
>> +#define NFSEXP_XPRTSEC_ALL (NFSEXP_XPRTSEC_NONE | \
>> + NFSEXP_XPRTSEC_TLS | \
>> + NFSEXP_XPRTSEC_MTLS)
>> +
>> /* an "export key" (expkey) maps a filehandlefragement to an
>> * svc_export for a given client. There can be several per export,
>> * for the different fsid types.
>>
>>
>
> --
> Jeff Layton <[email protected]>
>

--
Chuck Lever



2023-03-21 14:56:59

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH RFC 4/5] SUNRPC: Support TLS handshake in the server-side TCP socket code

On Tue, 2023-03-21 at 14:03 +0000, Chuck Lever III wrote:
>
> > On Mar 21, 2023, at 7:43 AM, Jeff Layton <[email protected]> wrote:
> >
> > On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote:
> > > From: Chuck Lever <[email protected]>
> > >
> > > This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel
> > > NFS server. If the client requests RPC-with-TLS and the user space
> > > handshake agent is running, the server will set up a TLS session.
> > >
> > > There are no policy settings yet. For example, the server cannot
> > > yet require the use of RPC-with-TLS to access its data.
> > >
> > > Signed-off-by: Chuck Lever <[email protected]>
> > > ---
> > > include/linux/sunrpc/svc_xprt.h | 5 ++
> > > include/linux/sunrpc/svcsock.h | 2 +
> > > include/trace/events/sunrpc.h | 16 ++++++-
> > > net/sunrpc/svc_xprt.c | 5 ++
> > > net/sunrpc/svcauth_unix.c | 11 ++++-
> > > net/sunrpc/svcsock.c | 91 +++++++++++++++++++++++++++++++++++++++
> > > 6 files changed, 125 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
> > > index 775368802762..867479204840 100644
> > > --- a/include/linux/sunrpc/svc_xprt.h
> > > +++ b/include/linux/sunrpc/svc_xprt.h
> > > @@ -27,7 +27,7 @@ struct svc_xprt_ops {
> > > void (*xpo_detach)(struct svc_xprt *);
> > > void (*xpo_free)(struct svc_xprt *);
> > > void (*xpo_kill_temp_xprt)(struct svc_xprt *);
> > > - void (*xpo_start_tls)(struct svc_xprt *);
> > > + void (*xpo_handshake)(struct svc_xprt *xprt);
> > > };
> > >
> > > struct svc_xprt_class {
> > > @@ -70,6 +70,9 @@ struct svc_xprt {
> > > #define XPT_LOCAL 12 /* connection from loopback interface */
> > > #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
> > > #define XPT_CONG_CTRL 14 /* has congestion control */
> > > +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
> > > +#define XPT_TLS_SESSION 16 /* transport-layer security established */
> > > +#define XPT_PEER_AUTH 17 /* peer has been authenticated */
> > >
> > > struct svc_serv *xpt_server; /* service for transport */
> > > atomic_t xpt_reserved; /* space on outq that is rsvd */
> > > diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
> > > index bcc555c7ae9c..1175e1c38bac 100644
> > > --- a/include/linux/sunrpc/svcsock.h
> > > +++ b/include/linux/sunrpc/svcsock.h
> > > @@ -38,6 +38,8 @@ struct svc_sock {
> > > /* Number of queued send requests */
> > > atomic_t sk_sendqlen;
> > >
> > > + struct completion sk_handshake_done;
> > > +
> > > struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */
> > > };
> > >
> > > diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
> > > index cf286a0a17d0..2667a8db4811 100644
> > > --- a/include/trace/events/sunrpc.h
> > > +++ b/include/trace/events/sunrpc.h
> > > @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency,
> > > { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \
> > > { BIT(XPT_LOCAL), "LOCAL" }, \
> > > { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \
> > > - { BIT(XPT_CONG_CTRL), "CONG_CTRL" })
> > > + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \
> > > + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \
> > > + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \
> > > + { BIT(XPT_PEER_AUTH), "PEER_AUTH" })
> > >
> > > TRACE_EVENT(svc_xprt_create_err,
> > > TP_PROTO(
> > > @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close);
> > > DEFINE_SVC_XPRT_EVENT(detach);
> > > DEFINE_SVC_XPRT_EVENT(free);
> > >
> > > +#define DEFINE_SVC_TLS_EVENT(name) \
> > > + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \
> > > + TP_PROTO(const struct svc_xprt *xprt), \
> > > + TP_ARGS(xprt))
> > > +
> > > +DEFINE_SVC_TLS_EVENT(start);
> > > +DEFINE_SVC_TLS_EVENT(upcall);
> > > +DEFINE_SVC_TLS_EVENT(unavailable);
> > > +DEFINE_SVC_TLS_EVENT(not_started);
> > > +DEFINE_SVC_TLS_EVENT(timed_out);
> > > +
> > > TRACE_EVENT(svc_xprt_accept,
> > > TP_PROTO(
> > > const struct svc_xprt *xprt,
> > > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> > > index ba629297da4e..b68c04dbf876 100644
> > > --- a/net/sunrpc/svc_xprt.c
> > > +++ b/net/sunrpc/svc_xprt.c
> > > @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
> > >
> > > if (xpt_flags & BIT(XPT_BUSY))
> > > return false;
> > > - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
> > > + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE)))
> > > return true;
> > > if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
> > > if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
> > > @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
> > > module_put(xprt->xpt_class->xcl_owner);
> > > }
> > > svc_xprt_received(xprt);
> > > + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) {
> > > + xprt->xpt_ops->xpo_handshake(xprt);
> > > + svc_xprt_received(xprt);
> > > } else if (svc_xprt_reserve_slot(rqstp, xprt)) {
> > > /* XPT_DATA|XPT_DEFERRED case: */
> > > dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
> > > diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
> > > index 983c5891cb56..374995201df4 100644
> > > --- a/net/sunrpc/svcauth_unix.c
> > > +++ b/net/sunrpc/svcauth_unix.c
> > > @@ -17,8 +17,9 @@
> > > #include <net/ipv6.h>
> > > #include <linux/kernel.h>
> > > #include <linux/user_namespace.h>
> > > -#define RPCDBG_FACILITY RPCDBG_AUTH
> > > +#include <trace/events/sunrpc.h>
> > >
> > > +#define RPCDBG_FACILITY RPCDBG_AUTH
> > >
> > > #include "netns.h"
> > >
> > > @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
> > > {
> > > struct xdr_stream *xdr = &rqstp->rq_arg_stream;
> > > struct svc_cred *cred = &rqstp->rq_cred;
> > > + struct svc_xprt *xprt = rqstp->rq_xprt;
> > > u32 flavor, len;
> > > void *body;
> > > __be32 *p;
> > > @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
> > > if (cred->cr_group_info == NULL)
> > > return SVC_CLOSE;
> > >
> > > - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
> > > + if (xprt->xpt_ops->xpo_handshake) {
> > > p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8);
> > > if (!p)
> > > return SVC_CLOSE;
> > > + trace_svc_tls_start(xprt);
> > > *p++ = rpc_auth_null;
> > > *p++ = cpu_to_be32(8);
> > > memcpy(p, "STARTTLS", 8);
> > > +
> > > + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
> > > + svc_xprt_enqueue(xprt);
> > > } else {
> > > + trace_svc_tls_unavailable(xprt);
> > > if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
> > > RPC_AUTH_NULL, NULL, 0) < 0)
> > > return SVC_CLOSE;
> > > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> > > index b6df73cb706a..16ba8d6ab20e 100644
> > > --- a/net/sunrpc/svcsock.c
> > > +++ b/net/sunrpc/svcsock.c
> > > @@ -44,9 +44,11 @@
> > > #include <net/tcp.h>
> > > #include <net/tcp_states.h>
> > > #include <net/tls.h>
> > > +#include <net/handshake.h>
> > > #include <linux/uaccess.h>
> > > #include <linux/highmem.h>
> > > #include <asm/ioctls.h>
> > > +#include <linux/key.h>
> > >
> > > #include <linux/sunrpc/types.h>
> > > #include <linux/sunrpc/clnt.h>
> > > @@ -64,6 +66,7 @@
> > >
> > > #define RPCDBG_FACILITY RPCDBG_SVCXPRT
> > >
> > > +#define SVC_HANDSHAKE_TO (20U * HZ)
> > >
> > > static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
> > > int flags);
> > > @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk)
> > > rmb();
> > > svsk->sk_odata(sk);
> > > trace_svcsock_data_ready(&svsk->sk_xprt, 0);
> > > + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags))
> > > + return;
> > > if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
> > > svc_xprt_enqueue(&svsk->sk_xprt);
> > > }
> > > @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
> > > sock_no_linger(svsk->sk_sock->sk);
> > > }
> > >
> > > +/**
> > > + * svc_tcp_handshake_done - Handshake completion handler
> > > + * @data: address of xprt to wake
> > > + * @status: status of handshake
> > > + * @peerid: serial number of key containing the remote peer's identity
> > > + *
> > > + * If a security policy is specified as an export option, we don't
> > > + * have a specific export here to check. So we set a "TLS session
> > > + * is present" flag on the xprt and let an upper layer enforce local
> > > + * security policy.
> > > + */
> > > +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid)
> > > +{
> > > + struct svc_xprt *xprt = data;
> > > + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
> > > +
> > > + if (!status) {
> > > + if (peerid != TLS_NO_PEERID)
> > > + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags);
> > > + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
> > > + }
> > > + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
> > > + complete_all(&svsk->sk_handshake_done);
> > > +}
> > > +
> > > +/**
> > > + * svc_tcp_handshake - Perform a transport-layer security handshake
> > > + * @xprt: connected transport endpoint
> > > + *
> > > + */
> > > +static void svc_tcp_handshake(struct svc_xprt *xprt)
> > > +{
> > > + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
> > > + struct tls_handshake_args args = {
> > > + .ta_sock = svsk->sk_sock,
> > > + .ta_done = svc_tcp_handshake_done,
> > > + .ta_data = xprt,
> > > + };
> > > + int ret;
> > > +
> > > + trace_svc_tls_upcall(xprt);
> > > +
> > > + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
> > > + init_completion(&svsk->sk_handshake_done);
> > > + smp_wmb();
> > > +
> > > + ret = tls_server_hello_x509(&args, GFP_KERNEL);
> > > + if (ret) {
> > > + trace_svc_tls_not_started(xprt);
> > > + goto out_failed;
> > > + }
> > > +
> > > + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done,
> > > + SVC_HANDSHAKE_TO);
> >
> > Just curious: is this 20s timeout mandated by the spec?
>
> The spec doesn't mandate a timeout. I simply wanted
> to guarantee forward progress.
>
>
> > It seems like a long time to block a kernel thread if so.
>
> It's about the same as the client side timeout, fwiw.
>
>
> > Do we need to be concerned
> > with DoS attacks here? Could a client initiate handshakes and then stop
> > communicating, forcing the server to tie up threads with these 20s
> > waits?
>
> I think a malicious client can do all kinds of similar things
> already. Do you have a particular timeout value in mind, or
> is there some other mechanism we can use to better bullet-
> proof this aspect of the handshake? I'm open to suggestion.
>

I don't have any suggestions, just trying to speculate about ways this
could break. The only alternative I could see would be to defer the
connection somehow until the reply comes in so that the thread can do
other stuff in the meantime. That's something we can potentially add
later if we decide it's necessary though.

>
> > > + if (ret <= 0) {
> > > + if (tls_handshake_cancel(svsk->sk_sock)) {
> > > + trace_svc_tls_timed_out(xprt);
> > > + goto out_close;
> > > + }
> > > + }
> > > +
> > > + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) {
> > > + trace_svc_tls_unavailable(xprt);
> > > + goto out_close;
> > > + }
> > > +
> > > + /*
> > > + * Mark the transport ready in case the remote sent RPC
> > > + * traffic before the kernel received the handshake
> > > + * completion downcall.
> > > + */
> > > + set_bit(XPT_DATA, &xprt->xpt_flags);
> > > + svc_xprt_enqueue(xprt);
> > > + return;
> > > +
> > > +out_close:
> > > + set_bit(XPT_CLOSE, &xprt->xpt_flags);
> > > +out_failed:
> > > + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
> > > + set_bit(XPT_DATA, &xprt->xpt_flags);
> > > + svc_xprt_enqueue(xprt);
> > > +}
> > > +
> > > /*
> > > * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
> > > */
> > > @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
> > > .xpo_has_wspace = svc_tcp_has_wspace,
> > > .xpo_accept = svc_tcp_accept,
> > > .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
> > > + .xpo_handshake = svc_tcp_handshake,
> > > };
> > >
> > > static struct svc_xprt_class svc_tcp_class = {
> > > @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
> > > {
> > > struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
> > >
> > > + tls_handshake_cancel(svsk->sk_sock);
> > > +
> > > if (svsk->sk_sock->file)
> > > sockfd_put(svsk->sk_sock);
> > > else
> > >
> > >
> >
> > --
> > Jeff Layton <[email protected]>
>
> --
> Chuck Lever
>
>

--
Jeff Layton <[email protected]>

2023-03-21 15:10:40

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH RFC 5/5] NFSD: Handle new xprtsec= export option

On Tue, 2023-03-21 at 14:05 +0000, Chuck Lever III wrote:
>
> > On Mar 21, 2023, at 7:50 AM, Jeff Layton <[email protected]> wrote:
> >
> > On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote:
> > > From: Chuck Lever <[email protected]>
> > >
> > > Enable administrators to require clients to use transport layer
> > > security when accessing particular exports.
> >
> > > Signed-off-by: Chuck Lever <[email protected]>
> > > ---
> > > fs/nfsd/export.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++---
> > > fs/nfsd/export.h | 11 +++++++++++
> > > 2 files changed, 61 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
> > > index 668c7527b17e..171ebc21bf07 100644
> > > --- a/fs/nfsd/export.c
> > > +++ b/fs/nfsd/export.c
> > > @@ -439,7 +439,6 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid)
> > > return -EINVAL;
> > > }
> > > return 0;
> > > -
> > > }
> > >
> > > #ifdef CONFIG_NFSD_V4
> > > @@ -546,6 +545,31 @@ static inline int
> > > secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
> > > #endif
> > >
> > > +static int xprtsec_parse(char **mesg, char *buf, struct svc_export *exp)
> > > +{
> > > + unsigned int i, mode, listsize;
> > > + int err;
> > > +
> > > + err = get_uint(mesg, &listsize);
> > > + if (err)
> > > + return err;
> > > + if (listsize > 3)
> > > + return -EINVAL;
> >
> > Might want to make a note that the limit of 3 here is arbitrary, and
> > that it might need to be lifted in the future (if/when we grow other
> > xprtsec options).
>
> Well I can easily add a symbolic constant for that too. I
> missed this one in the final clean-up before posting.
>
> The bigger question is whether the new downcall parameter is
> sensible. If there's a nicer way for mountd to get this
> information to the kernel, I'm open to suggestion.
>

I don't know of one. Export options seem fine here, since that's how we
control all sorts of options in the nfs server.

>
> > > +
> > > + exp->ex_xprtsec_modes = 0;
> > > + for (i = 0; i < listsize; i++) {
> > > + err = get_uint(mesg, &mode);
> > > + if (err)
> > > + return err;
> > > + mode--;
> > > + if (mode > 2)
> > > + return -EINVAL;
> > > + /* Ad hoc */
> > > + exp->ex_xprtsec_modes |= 1 << mode;
> > > + }
> > > + return 0;
> > > +}
> > > +
> > > static inline int
> > > nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid)
> > > {
> > > @@ -608,6 +632,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
> > > exp.ex_client = dom;
> > > exp.cd = cd;
> > > exp.ex_devid_map = NULL;
> > > + exp.ex_xprtsec_modes = NFSEXP_XPRTSEC_ALL;
> > >
> > > /* expiry */
> > > err = -EINVAL;
> > > @@ -650,6 +675,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
> > > err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid);
> > > else if (strcmp(buf, "secinfo") == 0)
> > > err = secinfo_parse(&mesg, buf, &exp);
> > > + else if (strcmp(buf, "xprtsec") == 0)
> > > + err = xprtsec_parse(&mesg, buf, &exp);
> > > else
> > > /* quietly ignore unknown words and anything
> > > * following. Newer user-space can try to set
> > > @@ -663,6 +690,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
> > > err = check_export(&exp.ex_path, &exp.ex_flags, exp.ex_uuid);
> > > if (err)
> > > goto out4;
> > > +
> > > /*
> > > * No point caching this if it would immediately expire.
> > > * Also, this protects exportfs's dummy export from the
> > > @@ -824,6 +852,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
> > > for (i = 0; i < MAX_SECINFO_LIST; i++) {
> > > new->ex_flavors[i] = item->ex_flavors[i];
> > > }
> > > + new->ex_xprtsec_modes = item->ex_xprtsec_modes;
> > > }
> > >
> > > static struct cache_head *svc_export_alloc(void)
> > > @@ -1035,9 +1064,26 @@ static struct svc_export *exp_find(struct cache_detail *cd,
> > >
> > > __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
> > > {
> > > - struct exp_flavor_info *f;
> > > - struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
> > > + struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
> > > + struct svc_xprt *xprt = rqstp->rq_xprt;
> > > +
> > > + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) {
> > > + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags))
> > > + goto ok;
> > > + }
> > > + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_TLS) {
> > > + if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
> > > + !test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
> > > + goto ok;
> > > + }
> > > + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_MTLS) {
> > > + if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
> > > + test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
> > > + goto ok;
> > > + }
> > > + goto denied;
> > >
> > > +ok:
> > > /* legacy gss-only clients are always OK: */
> > > if (exp->ex_client == rqstp->rq_gssclient)
> > > return 0;
> > > @@ -1062,6 +1108,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
> > > if (nfsd4_spo_must_allow(rqstp))
> > > return 0;
> > >
> > > +denied:
> > > return rqstp->rq_vers < 4 ? nfserr_acces : nfserr_wrongsec;
> > > }
> > >
> > > diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
> > > index d03f7f6a8642..61e1e8383c3d 100644
> > > --- a/fs/nfsd/export.h
> > > +++ b/fs/nfsd/export.h
> > > @@ -77,8 +77,19 @@ struct svc_export {
> > > struct cache_detail *cd;
> > > struct rcu_head ex_rcu;
> > > struct export_stats ex_stats;
> > > + unsigned long ex_xprtsec_modes;
> > > };
> > >
> > > +enum {
> > > + NFSEXP_XPRTSEC_NONE = 0x01,
> > > + NFSEXP_XPRTSEC_TLS = 0x02,
> > > + NFSEXP_XPRTSEC_MTLS = 0x04,
> > > +};
> > > +
> > > +#define NFSEXP_XPRTSEC_ALL (NFSEXP_XPRTSEC_NONE | \
> > > + NFSEXP_XPRTSEC_TLS | \
> > > + NFSEXP_XPRTSEC_MTLS)
> > > +
> > > /* an "export key" (expkey) maps a filehandlefragement to an
> > > * svc_export for a given client. There can be several per export,
> > > * for the different fsid types.
> > >
> > >
> >
> > --
> > Jeff Layton <[email protected]>
> >
>
> --
> Chuck Lever
>
>

--
Jeff Layton <[email protected]>

2023-03-21 16:23:14

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH RFC 4/5] SUNRPC: Support TLS handshake in the server-side TCP socket code



> On Mar 21, 2023, at 10:56 AM, Jeff Layton <[email protected]> wrote:
>
> On Tue, 2023-03-21 at 14:03 +0000, Chuck Lever III wrote:
>>
>>> On Mar 21, 2023, at 7:43 AM, Jeff Layton <[email protected]> wrote:
>>>
>>> On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote:
>>>> From: Chuck Lever <[email protected]>
>>>>
>>>> This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel
>>>> NFS server. If the client requests RPC-with-TLS and the user space
>>>> handshake agent is running, the server will set up a TLS session.
>>>>
>>>> There are no policy settings yet. For example, the server cannot
>>>> yet require the use of RPC-with-TLS to access its data.
>>>>
>>>> Signed-off-by: Chuck Lever <[email protected]>
>>>> ---
>>>> include/linux/sunrpc/svc_xprt.h | 5 ++
>>>> include/linux/sunrpc/svcsock.h | 2 +
>>>> include/trace/events/sunrpc.h | 16 ++++++-
>>>> net/sunrpc/svc_xprt.c | 5 ++
>>>> net/sunrpc/svcauth_unix.c | 11 ++++-
>>>> net/sunrpc/svcsock.c | 91 +++++++++++++++++++++++++++++++++++++++
>>>> 6 files changed, 125 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
>>>> index 775368802762..867479204840 100644
>>>> --- a/include/linux/sunrpc/svc_xprt.h
>>>> +++ b/include/linux/sunrpc/svc_xprt.h
>>>> @@ -27,7 +27,7 @@ struct svc_xprt_ops {
>>>> void (*xpo_detach)(struct svc_xprt *);
>>>> void (*xpo_free)(struct svc_xprt *);
>>>> void (*xpo_kill_temp_xprt)(struct svc_xprt *);
>>>> - void (*xpo_start_tls)(struct svc_xprt *);
>>>> + void (*xpo_handshake)(struct svc_xprt *xprt);
>>>> };
>>>>
>>>> struct svc_xprt_class {
>>>> @@ -70,6 +70,9 @@ struct svc_xprt {
>>>> #define XPT_LOCAL 12 /* connection from loopback interface */
>>>> #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
>>>> #define XPT_CONG_CTRL 14 /* has congestion control */
>>>> +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
>>>> +#define XPT_TLS_SESSION 16 /* transport-layer security established */
>>>> +#define XPT_PEER_AUTH 17 /* peer has been authenticated */
>>>>
>>>> struct svc_serv *xpt_server; /* service for transport */
>>>> atomic_t xpt_reserved; /* space on outq that is rsvd */
>>>> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
>>>> index bcc555c7ae9c..1175e1c38bac 100644
>>>> --- a/include/linux/sunrpc/svcsock.h
>>>> +++ b/include/linux/sunrpc/svcsock.h
>>>> @@ -38,6 +38,8 @@ struct svc_sock {
>>>> /* Number of queued send requests */
>>>> atomic_t sk_sendqlen;
>>>>
>>>> + struct completion sk_handshake_done;
>>>> +
>>>> struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */
>>>> };
>>>>
>>>> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
>>>> index cf286a0a17d0..2667a8db4811 100644
>>>> --- a/include/trace/events/sunrpc.h
>>>> +++ b/include/trace/events/sunrpc.h
>>>> @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency,
>>>> { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \
>>>> { BIT(XPT_LOCAL), "LOCAL" }, \
>>>> { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \
>>>> - { BIT(XPT_CONG_CTRL), "CONG_CTRL" })
>>>> + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \
>>>> + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \
>>>> + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \
>>>> + { BIT(XPT_PEER_AUTH), "PEER_AUTH" })
>>>>
>>>> TRACE_EVENT(svc_xprt_create_err,
>>>> TP_PROTO(
>>>> @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close);
>>>> DEFINE_SVC_XPRT_EVENT(detach);
>>>> DEFINE_SVC_XPRT_EVENT(free);
>>>>
>>>> +#define DEFINE_SVC_TLS_EVENT(name) \
>>>> + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \
>>>> + TP_PROTO(const struct svc_xprt *xprt), \
>>>> + TP_ARGS(xprt))
>>>> +
>>>> +DEFINE_SVC_TLS_EVENT(start);
>>>> +DEFINE_SVC_TLS_EVENT(upcall);
>>>> +DEFINE_SVC_TLS_EVENT(unavailable);
>>>> +DEFINE_SVC_TLS_EVENT(not_started);
>>>> +DEFINE_SVC_TLS_EVENT(timed_out);
>>>> +
>>>> TRACE_EVENT(svc_xprt_accept,
>>>> TP_PROTO(
>>>> const struct svc_xprt *xprt,
>>>> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
>>>> index ba629297da4e..b68c04dbf876 100644
>>>> --- a/net/sunrpc/svc_xprt.c
>>>> +++ b/net/sunrpc/svc_xprt.c
>>>> @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
>>>>
>>>> if (xpt_flags & BIT(XPT_BUSY))
>>>> return false;
>>>> - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
>>>> + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE)))
>>>> return true;
>>>> if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
>>>> if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
>>>> @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
>>>> module_put(xprt->xpt_class->xcl_owner);
>>>> }
>>>> svc_xprt_received(xprt);
>>>> + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) {
>>>> + xprt->xpt_ops->xpo_handshake(xprt);
>>>> + svc_xprt_received(xprt);
>>>> } else if (svc_xprt_reserve_slot(rqstp, xprt)) {
>>>> /* XPT_DATA|XPT_DEFERRED case: */
>>>> dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
>>>> diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
>>>> index 983c5891cb56..374995201df4 100644
>>>> --- a/net/sunrpc/svcauth_unix.c
>>>> +++ b/net/sunrpc/svcauth_unix.c
>>>> @@ -17,8 +17,9 @@
>>>> #include <net/ipv6.h>
>>>> #include <linux/kernel.h>
>>>> #include <linux/user_namespace.h>
>>>> -#define RPCDBG_FACILITY RPCDBG_AUTH
>>>> +#include <trace/events/sunrpc.h>
>>>>
>>>> +#define RPCDBG_FACILITY RPCDBG_AUTH
>>>>
>>>> #include "netns.h"
>>>>
>>>> @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
>>>> {
>>>> struct xdr_stream *xdr = &rqstp->rq_arg_stream;
>>>> struct svc_cred *cred = &rqstp->rq_cred;
>>>> + struct svc_xprt *xprt = rqstp->rq_xprt;
>>>> u32 flavor, len;
>>>> void *body;
>>>> __be32 *p;
>>>> @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
>>>> if (cred->cr_group_info == NULL)
>>>> return SVC_CLOSE;
>>>>
>>>> - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
>>>> + if (xprt->xpt_ops->xpo_handshake) {
>>>> p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8);
>>>> if (!p)
>>>> return SVC_CLOSE;
>>>> + trace_svc_tls_start(xprt);
>>>> *p++ = rpc_auth_null;
>>>> *p++ = cpu_to_be32(8);
>>>> memcpy(p, "STARTTLS", 8);
>>>> +
>>>> + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
>>>> + svc_xprt_enqueue(xprt);
>>>> } else {
>>>> + trace_svc_tls_unavailable(xprt);
>>>> if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
>>>> RPC_AUTH_NULL, NULL, 0) < 0)
>>>> return SVC_CLOSE;
>>>> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
>>>> index b6df73cb706a..16ba8d6ab20e 100644
>>>> --- a/net/sunrpc/svcsock.c
>>>> +++ b/net/sunrpc/svcsock.c
>>>> @@ -44,9 +44,11 @@
>>>> #include <net/tcp.h>
>>>> #include <net/tcp_states.h>
>>>> #include <net/tls.h>
>>>> +#include <net/handshake.h>
>>>> #include <linux/uaccess.h>
>>>> #include <linux/highmem.h>
>>>> #include <asm/ioctls.h>
>>>> +#include <linux/key.h>
>>>>
>>>> #include <linux/sunrpc/types.h>
>>>> #include <linux/sunrpc/clnt.h>
>>>> @@ -64,6 +66,7 @@
>>>>
>>>> #define RPCDBG_FACILITY RPCDBG_SVCXPRT
>>>>
>>>> +#define SVC_HANDSHAKE_TO (20U * HZ)
>>>>
>>>> static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
>>>> int flags);
>>>> @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk)
>>>> rmb();
>>>> svsk->sk_odata(sk);
>>>> trace_svcsock_data_ready(&svsk->sk_xprt, 0);
>>>> + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags))
>>>> + return;
>>>> if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
>>>> svc_xprt_enqueue(&svsk->sk_xprt);
>>>> }
>>>> @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
>>>> sock_no_linger(svsk->sk_sock->sk);
>>>> }
>>>>
>>>> +/**
>>>> + * svc_tcp_handshake_done - Handshake completion handler
>>>> + * @data: address of xprt to wake
>>>> + * @status: status of handshake
>>>> + * @peerid: serial number of key containing the remote peer's identity
>>>> + *
>>>> + * If a security policy is specified as an export option, we don't
>>>> + * have a specific export here to check. So we set a "TLS session
>>>> + * is present" flag on the xprt and let an upper layer enforce local
>>>> + * security policy.
>>>> + */
>>>> +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid)
>>>> +{
>>>> + struct svc_xprt *xprt = data;
>>>> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>>>> +
>>>> + if (!status) {
>>>> + if (peerid != TLS_NO_PEERID)
>>>> + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags);
>>>> + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
>>>> + }
>>>> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
>>>> + complete_all(&svsk->sk_handshake_done);
>>>> +}
>>>> +
>>>> +/**
>>>> + * svc_tcp_handshake - Perform a transport-layer security handshake
>>>> + * @xprt: connected transport endpoint
>>>> + *
>>>> + */
>>>> +static void svc_tcp_handshake(struct svc_xprt *xprt)
>>>> +{
>>>> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>>>> + struct tls_handshake_args args = {
>>>> + .ta_sock = svsk->sk_sock,
>>>> + .ta_done = svc_tcp_handshake_done,
>>>> + .ta_data = xprt,
>>>> + };
>>>> + int ret;
>>>> +
>>>> + trace_svc_tls_upcall(xprt);
>>>> +
>>>> + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
>>>> + init_completion(&svsk->sk_handshake_done);
>>>> + smp_wmb();
>>>> +
>>>> + ret = tls_server_hello_x509(&args, GFP_KERNEL);
>>>> + if (ret) {
>>>> + trace_svc_tls_not_started(xprt);
>>>> + goto out_failed;
>>>> + }
>>>> +
>>>> + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done,
>>>> + SVC_HANDSHAKE_TO);
>>>
>>> Just curious: is this 20s timeout mandated by the spec?
>>
>> The spec doesn't mandate a timeout. I simply wanted
>> to guarantee forward progress.
>>
>>
>>> It seems like a long time to block a kernel thread if so.
>>
>> It's about the same as the client side timeout, fwiw.
>>
>>
>>> Do we need to be concerned
>>> with DoS attacks here? Could a client initiate handshakes and then stop
>>> communicating, forcing the server to tie up threads with these 20s
>>> waits?
>>
>> I think a malicious client can do all kinds of similar things
>> already. Do you have a particular timeout value in mind, or
>> is there some other mechanism we can use to better bullet-
>> proof this aspect of the handshake? I'm open to suggestion.
>>
>
> I don't have any suggestions, just trying to speculate about ways this
> could break. The only alternative I could see would be to defer the
> connection somehow until the reply comes in so that the thread can do
> other stuff in the meantime.

The server has a deferral mechanism, funnily enough. ;-) But
we could also just use a system workqueue.

Note that the handshake upcall mechanism itself has a limit
on the concurrent number of pending handshakes it will allow.
That might be enough to prevent a DoS.


> That's something we can potentially add
> later if we decide it's necessary though.

I shortened the timeout and added a comment suggesting this
as future work.


>>>> + if (ret <= 0) {
>>>> + if (tls_handshake_cancel(svsk->sk_sock)) {
>>>> + trace_svc_tls_timed_out(xprt);
>>>> + goto out_close;
>>>> + }
>>>> + }
>>>> +
>>>> + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) {
>>>> + trace_svc_tls_unavailable(xprt);
>>>> + goto out_close;
>>>> + }
>>>> +
>>>> + /*
>>>> + * Mark the transport ready in case the remote sent RPC
>>>> + * traffic before the kernel received the handshake
>>>> + * completion downcall.
>>>> + */
>>>> + set_bit(XPT_DATA, &xprt->xpt_flags);
>>>> + svc_xprt_enqueue(xprt);
>>>> + return;
>>>> +
>>>> +out_close:
>>>> + set_bit(XPT_CLOSE, &xprt->xpt_flags);
>>>> +out_failed:
>>>> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
>>>> + set_bit(XPT_DATA, &xprt->xpt_flags);
>>>> + svc_xprt_enqueue(xprt);
>>>> +}
>>>> +
>>>> /*
>>>> * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
>>>> */
>>>> @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
>>>> .xpo_has_wspace = svc_tcp_has_wspace,
>>>> .xpo_accept = svc_tcp_accept,
>>>> .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
>>>> + .xpo_handshake = svc_tcp_handshake,
>>>> };
>>>>
>>>> static struct svc_xprt_class svc_tcp_class = {
>>>> @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
>>>> {
>>>> struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>>>>
>>>> + tls_handshake_cancel(svsk->sk_sock);
>>>> +
>>>> if (svsk->sk_sock->file)
>>>> sockfd_put(svsk->sk_sock);
>>>> else
>>>>
>>>>
>>>
>>> --
>>> Jeff Layton <[email protected]>
>>
>> --
>> Chuck Lever
>>
>>
>
> --
> Jeff Layton <[email protected]>

--
Chuck Lever


2023-03-21 16:58:22

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH RFC 4/5] SUNRPC: Support TLS handshake in the server-side TCP socket code

On Tue, 2023-03-21 at 16:09 +0000, Chuck Lever III wrote:
>
> > On Mar 21, 2023, at 10:56 AM, Jeff Layton <[email protected]> wrote:
> >
> > On Tue, 2023-03-21 at 14:03 +0000, Chuck Lever III wrote:
> > >
> > > > On Mar 21, 2023, at 7:43 AM, Jeff Layton <[email protected]> wrote:
> > > >
> > > > On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote:
> > > > > From: Chuck Lever <[email protected]>
> > > > >
> > > > > This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel
> > > > > NFS server. If the client requests RPC-with-TLS and the user space
> > > > > handshake agent is running, the server will set up a TLS session.
> > > > >
> > > > > There are no policy settings yet. For example, the server cannot
> > > > > yet require the use of RPC-with-TLS to access its data.
> > > > >
> > > > > Signed-off-by: Chuck Lever <[email protected]>
> > > > > ---
> > > > > include/linux/sunrpc/svc_xprt.h | 5 ++
> > > > > include/linux/sunrpc/svcsock.h | 2 +
> > > > > include/trace/events/sunrpc.h | 16 ++++++-
> > > > > net/sunrpc/svc_xprt.c | 5 ++
> > > > > net/sunrpc/svcauth_unix.c | 11 ++++-
> > > > > net/sunrpc/svcsock.c | 91 +++++++++++++++++++++++++++++++++++++++
> > > > > 6 files changed, 125 insertions(+), 5 deletions(-)
> > > > >
> > > > > diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
> > > > > index 775368802762..867479204840 100644
> > > > > --- a/include/linux/sunrpc/svc_xprt.h
> > > > > +++ b/include/linux/sunrpc/svc_xprt.h
> > > > > @@ -27,7 +27,7 @@ struct svc_xprt_ops {
> > > > > void (*xpo_detach)(struct svc_xprt *);
> > > > > void (*xpo_free)(struct svc_xprt *);
> > > > > void (*xpo_kill_temp_xprt)(struct svc_xprt *);
> > > > > - void (*xpo_start_tls)(struct svc_xprt *);
> > > > > + void (*xpo_handshake)(struct svc_xprt *xprt);
> > > > > };
> > > > >
> > > > > struct svc_xprt_class {
> > > > > @@ -70,6 +70,9 @@ struct svc_xprt {
> > > > > #define XPT_LOCAL 12 /* connection from loopback interface */
> > > > > #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
> > > > > #define XPT_CONG_CTRL 14 /* has congestion control */
> > > > > +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
> > > > > +#define XPT_TLS_SESSION 16 /* transport-layer security established */
> > > > > +#define XPT_PEER_AUTH 17 /* peer has been authenticated */
> > > > >
> > > > > struct svc_serv *xpt_server; /* service for transport */
> > > > > atomic_t xpt_reserved; /* space on outq that is rsvd */
> > > > > diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
> > > > > index bcc555c7ae9c..1175e1c38bac 100644
> > > > > --- a/include/linux/sunrpc/svcsock.h
> > > > > +++ b/include/linux/sunrpc/svcsock.h
> > > > > @@ -38,6 +38,8 @@ struct svc_sock {
> > > > > /* Number of queued send requests */
> > > > > atomic_t sk_sendqlen;
> > > > >
> > > > > + struct completion sk_handshake_done;
> > > > > +
> > > > > struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */
> > > > > };
> > > > >
> > > > > diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
> > > > > index cf286a0a17d0..2667a8db4811 100644
> > > > > --- a/include/trace/events/sunrpc.h
> > > > > +++ b/include/trace/events/sunrpc.h
> > > > > @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency,
> > > > > { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \
> > > > > { BIT(XPT_LOCAL), "LOCAL" }, \
> > > > > { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \
> > > > > - { BIT(XPT_CONG_CTRL), "CONG_CTRL" })
> > > > > + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \
> > > > > + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \
> > > > > + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \
> > > > > + { BIT(XPT_PEER_AUTH), "PEER_AUTH" })
> > > > >
> > > > > TRACE_EVENT(svc_xprt_create_err,
> > > > > TP_PROTO(
> > > > > @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close);
> > > > > DEFINE_SVC_XPRT_EVENT(detach);
> > > > > DEFINE_SVC_XPRT_EVENT(free);
> > > > >
> > > > > +#define DEFINE_SVC_TLS_EVENT(name) \
> > > > > + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \
> > > > > + TP_PROTO(const struct svc_xprt *xprt), \
> > > > > + TP_ARGS(xprt))
> > > > > +
> > > > > +DEFINE_SVC_TLS_EVENT(start);
> > > > > +DEFINE_SVC_TLS_EVENT(upcall);
> > > > > +DEFINE_SVC_TLS_EVENT(unavailable);
> > > > > +DEFINE_SVC_TLS_EVENT(not_started);
> > > > > +DEFINE_SVC_TLS_EVENT(timed_out);
> > > > > +
> > > > > TRACE_EVENT(svc_xprt_accept,
> > > > > TP_PROTO(
> > > > > const struct svc_xprt *xprt,
> > > > > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> > > > > index ba629297da4e..b68c04dbf876 100644
> > > > > --- a/net/sunrpc/svc_xprt.c
> > > > > +++ b/net/sunrpc/svc_xprt.c
> > > > > @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
> > > > >
> > > > > if (xpt_flags & BIT(XPT_BUSY))
> > > > > return false;
> > > > > - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
> > > > > + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE)))
> > > > > return true;
> > > > > if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
> > > > > if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
> > > > > @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
> > > > > module_put(xprt->xpt_class->xcl_owner);
> > > > > }
> > > > > svc_xprt_received(xprt);
> > > > > + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) {
> > > > > + xprt->xpt_ops->xpo_handshake(xprt);
> > > > > + svc_xprt_received(xprt);
> > > > > } else if (svc_xprt_reserve_slot(rqstp, xprt)) {
> > > > > /* XPT_DATA|XPT_DEFERRED case: */
> > > > > dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
> > > > > diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
> > > > > index 983c5891cb56..374995201df4 100644
> > > > > --- a/net/sunrpc/svcauth_unix.c
> > > > > +++ b/net/sunrpc/svcauth_unix.c
> > > > > @@ -17,8 +17,9 @@
> > > > > #include <net/ipv6.h>
> > > > > #include <linux/kernel.h>
> > > > > #include <linux/user_namespace.h>
> > > > > -#define RPCDBG_FACILITY RPCDBG_AUTH
> > > > > +#include <trace/events/sunrpc.h>
> > > > >
> > > > > +#define RPCDBG_FACILITY RPCDBG_AUTH
> > > > >
> > > > > #include "netns.h"
> > > > >
> > > > > @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
> > > > > {
> > > > > struct xdr_stream *xdr = &rqstp->rq_arg_stream;
> > > > > struct svc_cred *cred = &rqstp->rq_cred;
> > > > > + struct svc_xprt *xprt = rqstp->rq_xprt;
> > > > > u32 flavor, len;
> > > > > void *body;
> > > > > __be32 *p;
> > > > > @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
> > > > > if (cred->cr_group_info == NULL)
> > > > > return SVC_CLOSE;
> > > > >
> > > > > - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
> > > > > + if (xprt->xpt_ops->xpo_handshake) {
> > > > > p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8);
> > > > > if (!p)
> > > > > return SVC_CLOSE;
> > > > > + trace_svc_tls_start(xprt);
> > > > > *p++ = rpc_auth_null;
> > > > > *p++ = cpu_to_be32(8);
> > > > > memcpy(p, "STARTTLS", 8);
> > > > > +
> > > > > + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
> > > > > + svc_xprt_enqueue(xprt);
> > > > > } else {
> > > > > + trace_svc_tls_unavailable(xprt);
> > > > > if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
> > > > > RPC_AUTH_NULL, NULL, 0) < 0)
> > > > > return SVC_CLOSE;
> > > > > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> > > > > index b6df73cb706a..16ba8d6ab20e 100644
> > > > > --- a/net/sunrpc/svcsock.c
> > > > > +++ b/net/sunrpc/svcsock.c
> > > > > @@ -44,9 +44,11 @@
> > > > > #include <net/tcp.h>
> > > > > #include <net/tcp_states.h>
> > > > > #include <net/tls.h>
> > > > > +#include <net/handshake.h>
> > > > > #include <linux/uaccess.h>
> > > > > #include <linux/highmem.h>
> > > > > #include <asm/ioctls.h>
> > > > > +#include <linux/key.h>
> > > > >
> > > > > #include <linux/sunrpc/types.h>
> > > > > #include <linux/sunrpc/clnt.h>
> > > > > @@ -64,6 +66,7 @@
> > > > >
> > > > > #define RPCDBG_FACILITY RPCDBG_SVCXPRT
> > > > >
> > > > > +#define SVC_HANDSHAKE_TO (20U * HZ)
> > > > >
> > > > > static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
> > > > > int flags);
> > > > > @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk)
> > > > > rmb();
> > > > > svsk->sk_odata(sk);
> > > > > trace_svcsock_data_ready(&svsk->sk_xprt, 0);
> > > > > + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags))
> > > > > + return;
> > > > > if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
> > > > > svc_xprt_enqueue(&svsk->sk_xprt);
> > > > > }
> > > > > @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
> > > > > sock_no_linger(svsk->sk_sock->sk);
> > > > > }
> > > > >
> > > > > +/**
> > > > > + * svc_tcp_handshake_done - Handshake completion handler
> > > > > + * @data: address of xprt to wake
> > > > > + * @status: status of handshake
> > > > > + * @peerid: serial number of key containing the remote peer's identity
> > > > > + *
> > > > > + * If a security policy is specified as an export option, we don't
> > > > > + * have a specific export here to check. So we set a "TLS session
> > > > > + * is present" flag on the xprt and let an upper layer enforce local
> > > > > + * security policy.
> > > > > + */
> > > > > +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid)
> > > > > +{
> > > > > + struct svc_xprt *xprt = data;
> > > > > + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
> > > > > +
> > > > > + if (!status) {
> > > > > + if (peerid != TLS_NO_PEERID)
> > > > > + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags);
> > > > > + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
> > > > > + }
> > > > > + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
> > > > > + complete_all(&svsk->sk_handshake_done);
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * svc_tcp_handshake - Perform a transport-layer security handshake
> > > > > + * @xprt: connected transport endpoint
> > > > > + *
> > > > > + */
> > > > > +static void svc_tcp_handshake(struct svc_xprt *xprt)
> > > > > +{
> > > > > + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
> > > > > + struct tls_handshake_args args = {
> > > > > + .ta_sock = svsk->sk_sock,
> > > > > + .ta_done = svc_tcp_handshake_done,
> > > > > + .ta_data = xprt,
> > > > > + };
> > > > > + int ret;
> > > > > +
> > > > > + trace_svc_tls_upcall(xprt);
> > > > > +
> > > > > + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
> > > > > + init_completion(&svsk->sk_handshake_done);
> > > > > + smp_wmb();
> > > > > +
> > > > > + ret = tls_server_hello_x509(&args, GFP_KERNEL);
> > > > > + if (ret) {
> > > > > + trace_svc_tls_not_started(xprt);
> > > > > + goto out_failed;
> > > > > + }
> > > > > +
> > > > > + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done,
> > > > > + SVC_HANDSHAKE_TO);
> > > >
> > > > Just curious: is this 20s timeout mandated by the spec?
> > >
> > > The spec doesn't mandate a timeout. I simply wanted
> > > to guarantee forward progress.
> > >
> > >
> > > > It seems like a long time to block a kernel thread if so.
> > >
> > > It's about the same as the client side timeout, fwiw.
> > >
> > >
> > > > Do we need to be concerned
> > > > with DoS attacks here? Could a client initiate handshakes and then stop
> > > > communicating, forcing the server to tie up threads with these 20s
> > > > waits?
> > >
> > > I think a malicious client can do all kinds of similar things
> > > already. Do you have a particular timeout value in mind, or
> > > is there some other mechanism we can use to better bullet-
> > > proof this aspect of the handshake? I'm open to suggestion.
> > >
> >
> > I don't have any suggestions, just trying to speculate about ways this
> > could break. The only alternative I could see would be to defer the
> > connection somehow until the reply comes in so that the thread can do
> > other stuff in the meantime.
>
> The server has a deferral mechanism, funnily enough. ;-) But
> we could also just use a system workqueue.
>

Yeah. I wasn't sure if it was suitable for use here, but using the
deferral infrastructure might be the way to go. That code is quite
difficult to follow though. It's one of sunrpc's dustier corners.

I'm not sure I follow what you're thinking on workqueues. Do you mean
"defer" the svc_rqst and then queue it to a workqueue when the reply
comes in? That might be ok too. We did have those nfsd workqueue patches
that we could base the design on.

> Note that the handshake upcall mechanism itself has a limit
> on the concurrent number of pending handshakes it will allow.
> That might be enough to prevent a DoS.
>

What happens when you hit the limit? Would it be possible for someone
malicious to block legitimate TLS connections from getting through by
stalling the handshake on a bunch of connections? It might be worthwhile
to consider some sort of per-client-address limit as well.

Nonetheless, I don't see this as a show stopper that would keep us from
merging a working implementation. We can add this sort of mitigation
later too.

> > That's something we can potentially add
> > later if we decide it's necessary though.
>
> I shortened the timeout and added a comment suggesting this
> as future work.
>

Sounds good.

>
> > > > > + if (ret <= 0) {
> > > > > + if (tls_handshake_cancel(svsk->sk_sock)) {
> > > > > + trace_svc_tls_timed_out(xprt);
> > > > > + goto out_close;
> > > > > + }
> > > > > + }
> > > > > +
> > > > > + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) {
> > > > > + trace_svc_tls_unavailable(xprt);
> > > > > + goto out_close;
> > > > > + }
> > > > > +
> > > > > + /*
> > > > > + * Mark the transport ready in case the remote sent RPC
> > > > > + * traffic before the kernel received the handshake
> > > > > + * completion downcall.
> > > > > + */
> > > > > + set_bit(XPT_DATA, &xprt->xpt_flags);
> > > > > + svc_xprt_enqueue(xprt);
> > > > > + return;
> > > > > +
> > > > > +out_close:
> > > > > + set_bit(XPT_CLOSE, &xprt->xpt_flags);
> > > > > +out_failed:
> > > > > + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
> > > > > + set_bit(XPT_DATA, &xprt->xpt_flags);
> > > > > + svc_xprt_enqueue(xprt);
> > > > > +}
> > > > > +
> > > > > /*
> > > > > * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
> > > > > */
> > > > > @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
> > > > > .xpo_has_wspace = svc_tcp_has_wspace,
> > > > > .xpo_accept = svc_tcp_accept,
> > > > > .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
> > > > > + .xpo_handshake = svc_tcp_handshake,
> > > > > };
> > > > >
> > > > > static struct svc_xprt_class svc_tcp_class = {
> > > > > @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
> > > > > {
> > > > > struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
> > > > >
> > > > > + tls_handshake_cancel(svsk->sk_sock);
> > > > > +
> > > > > if (svsk->sk_sock->file)
> > > > > sockfd_put(svsk->sk_sock);
> > > > > else
> > > > >
> > > > >
> > > >
> > > > --
> > > > Jeff Layton <[email protected]>
> > >
> > > --
> > > Chuck Lever
> > >
> > >
> >
> > --
> > Jeff Layton <[email protected]>
>
> --
> Chuck Lever
>
>
>

--
Jeff Layton <[email protected]>