Now that the initial v5.19 merge window has closed, it's time for
another round of review for RPC-with-TLS support in the Linux NFS
client. This is just the RPC-specific portions. The full series is
available in the "topic-rpc-with-tls-upcall" branch here:
https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
I've taken two or three steps towards implementing the architecture
Trond requested during the last review. There is now a two-stage
connection establishment process so that the upper level can use
XPRT_CONNECTED to determine when a TLS session is ready to use.
There are probably additional changes and simplifications that can
be made. Please review and provide feedback.
I wanted to make more progress on client-side authentication (ie,
passing an x.509 cert from the client to the server) but NFSD bugs
have taken all my time for the past few weeks.
Changes since v1:
- Rebased on v5.18
- Re-ordered so generic fixes come first
- Addressed some of Trond's review comments
---
Chuck Lever (15):
SUNRPC: Fail faster on bad verifier
SUNRPC: Widen rpc_task::tk_flags
SUNRPC: Replace dprintk() call site in xs_data_ready
NFS: Replace fs_context-related dprintk() call sites with tracepoints
SUNRPC: Plumb an API for setting transport layer security
SUNRPC: Trace the rpc_create_args
SUNRPC: Refactor rpc_call_null_helper()
SUNRPC: Add RPC client support for the RPC_AUTH_TLS auth flavor
SUNRPC: Ignore data_ready callbacks during TLS handshakes
SUNRPC: Capture cmsg metadata on client-side receive
SUNRPC: Add a connect worker function for TLS
SUNRPC: Add RPC-with-TLS support to xprtsock.c
SUNRPC: Add RPC-with-TLS tracepoints
NFS: Have struct nfs_client carry a TLS policy field
NFS: Add an "xprtsec=" NFS mount option
fs/nfs/client.c | 14 ++
fs/nfs/fs_context.c | 65 +++++--
fs/nfs/internal.h | 2 +
fs/nfs/nfs3client.c | 1 +
fs/nfs/nfs4client.c | 16 +-
fs/nfs/nfstrace.h | 77 ++++++++
fs/nfs/super.c | 7 +
include/linux/nfs_fs_sb.h | 5 +-
include/linux/sunrpc/auth.h | 1 +
include/linux/sunrpc/clnt.h | 15 +-
include/linux/sunrpc/sched.h | 32 ++--
include/linux/sunrpc/xprt.h | 2 +
include/linux/sunrpc/xprtsock.h | 4 +
include/net/tls.h | 2 +
include/trace/events/sunrpc.h | 157 ++++++++++++++--
net/sunrpc/Makefile | 2 +-
net/sunrpc/auth.c | 2 +-
net/sunrpc/auth_tls.c | 120 +++++++++++++
net/sunrpc/clnt.c | 34 ++--
net/sunrpc/debugfs.c | 2 +-
net/sunrpc/xprtsock.c | 310 +++++++++++++++++++++++++++++++-
21 files changed, 805 insertions(+), 65 deletions(-)
create mode 100644 net/sunrpc/auth_tls.c
--
Chuck Lever
Signed-off-by: Chuck Lever <[email protected]>
---
include/trace/events/sunrpc.h | 20 ++++++++++++++++++++
net/sunrpc/xprtsock.c | 6 ++++--
2 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 3995c58a1c51..a66aa1f59ed8 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1266,6 +1266,26 @@ TRACE_EVENT(xprt_reserve,
)
);
+TRACE_EVENT(xs_data_ready,
+ TP_PROTO(
+ const struct rpc_xprt *xprt
+ ),
+
+ TP_ARGS(xprt),
+
+ TP_STRUCT__entry(
+ __string(addr, xprt->address_strings[RPC_DISPLAY_ADDR])
+ __string(port, xprt->address_strings[RPC_DISPLAY_PORT])
+ ),
+
+ TP_fast_assign(
+ __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]);
+ __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]);
+ ),
+
+ TP_printk("peer=[%s]:%s", __get_str(addr), __get_str(port))
+);
+
TRACE_EVENT(xs_stream_read_data,
TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total),
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 650102a9c86a..73fab802996d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1378,7 +1378,7 @@ static void xs_udp_data_receive_workfn(struct work_struct *work)
}
/**
- * xs_data_ready - "data ready" callback for UDP sockets
+ * xs_data_ready - "data ready" callback for sockets
* @sk: socket with data to read
*
*/
@@ -1386,11 +1386,13 @@ static void xs_data_ready(struct sock *sk)
{
struct rpc_xprt *xprt;
- dprintk("RPC: xs_data_ready...\n");
xprt = xprt_from_sock(sk);
if (xprt != NULL) {
struct sock_xprt *transport = container_of(xprt,
struct sock_xprt, xprt);
+
+ trace_xs_data_ready(xprt);
+
transport->old_data_ready(sk);
/* Any data means we had a useful conversation, so
* then we don't need to delay the next reconnect
Contributed as part of the long patch series that converts NFS from
using dprintk to tracepoints for observability.
Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfs/fs_context.c | 25 ++++++++++-------
fs/nfs/nfstrace.h | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 92 insertions(+), 10 deletions(-)
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 9a16897e8dc6..35e400a557b9 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -21,6 +21,8 @@
#include "nfs.h"
#include "internal.h"
+#include "nfstrace.h"
+
#define NFSDBG_FACILITY NFSDBG_MOUNT
#if IS_ENABLED(CONFIG_NFS_V3)
@@ -284,7 +286,7 @@ static int nfs_verify_server_address(struct sockaddr *addr)
}
}
- dfprintk(MOUNT, "NFS: Invalid IP address specified\n");
+ trace_nfs_mount_addr_err(addr);
return 0;
}
@@ -378,7 +380,7 @@ static int nfs_parse_security_flavors(struct fs_context *fc,
char *string = param->string, *p;
int ret;
- dfprintk(MOUNT, "NFS: parsing %s=%s option\n", param->key, param->string);
+ trace_nfs_mount_assign(param->key, string);
while ((p = strsep(&string, ":")) != NULL) {
if (!*p)
@@ -480,7 +482,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
unsigned int len;
int ret, opt;
- dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", param->key);
+ trace_nfs_mount_option(param);
opt = fs_parse(fc, nfs_fs_parameters, param, &result);
if (opt < 0)
@@ -683,6 +685,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
return ret;
break;
case Opt_vers:
+ trace_nfs_mount_assign(param->key, param->string);
ret = nfs_parse_version_string(fc, param->string);
if (ret < 0)
return ret;
@@ -694,6 +697,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_proto:
+ trace_nfs_mount_assign(param->key, param->string);
protofamily = AF_INET;
switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) {
case Opt_xprt_udp6:
@@ -729,6 +733,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_mountproto:
+ trace_nfs_mount_assign(param->key, param->string);
mountfamily = AF_INET;
switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) {
case Opt_xprt_udp6:
@@ -751,6 +756,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_addr:
+ trace_nfs_mount_assign(param->key, param->string);
len = rpc_pton(fc->net_ns, param->string, param->size,
&ctx->nfs_server.address,
sizeof(ctx->nfs_server._address));
@@ -759,16 +765,19 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
ctx->nfs_server.addrlen = len;
break;
case Opt_clientaddr:
+ trace_nfs_mount_assign(param->key, param->string);
kfree(ctx->client_address);
ctx->client_address = param->string;
param->string = NULL;
break;
case Opt_mounthost:
+ trace_nfs_mount_assign(param->key, param->string);
kfree(ctx->mount_server.hostname);
ctx->mount_server.hostname = param->string;
param->string = NULL;
break;
case Opt_mountaddr:
+ trace_nfs_mount_assign(param->key, param->string);
len = rpc_pton(fc->net_ns, param->string, param->size,
&ctx->mount_server.address,
sizeof(ctx->mount_server._address));
@@ -846,7 +855,6 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
*/
case Opt_sloppy:
ctx->sloppy = true;
- dfprintk(MOUNT, "NFS: relaxing parsing rules\n");
break;
}
@@ -879,10 +887,8 @@ static int nfs_parse_source(struct fs_context *fc,
size_t len;
const char *end;
- if (unlikely(!dev_name || !*dev_name)) {
- dfprintk(MOUNT, "NFS: device name not specified\n");
+ if (unlikely(!dev_name || !*dev_name))
return -EINVAL;
- }
/* Is the host name protected with square brakcets? */
if (*dev_name == '[') {
@@ -922,7 +928,7 @@ static int nfs_parse_source(struct fs_context *fc,
if (!ctx->nfs_server.export_path)
goto out_nomem;
- dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", ctx->nfs_server.export_path);
+ trace_nfs_mount_path(ctx->nfs_server.export_path);
return 0;
out_bad_devname:
@@ -1116,7 +1122,6 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
return nfs_invalf(fc, "NFS: nfs_mount_data version supports only AUTH_SYS");
out_nomem:
- dfprintk(MOUNT, "NFS: not enough memory to handle mount options");
return -ENOMEM;
out_no_address:
@@ -1248,7 +1253,7 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
if (IS_ERR(c))
return PTR_ERR(c);
ctx->nfs_server.export_path = c;
- dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
+ trace_nfs_mount_path(c);
c = strndup_user(data->client_addr.data, 16);
if (IS_ERR(c))
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 012bd7339862..ccaeae42ee77 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -1609,6 +1609,83 @@ TRACE_EVENT(nfs_fh_to_dentry,
)
);
+TRACE_EVENT(nfs_mount_addr_err,
+ TP_PROTO(
+ const struct sockaddr *sap
+ ),
+
+ TP_ARGS(sap),
+
+ TP_STRUCT__entry(
+ __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->addr, sap, sizeof(__entry->addr));
+ ),
+
+ TP_printk("addr=%pISpc", __entry->addr)
+);
+
+TRACE_EVENT(nfs_mount_assign,
+ TP_PROTO(
+ const char *option,
+ const char *value
+ ),
+
+ TP_ARGS(option, value),
+
+ TP_STRUCT__entry(
+ __string(option, option)
+ __string(value, value)
+ ),
+
+ TP_fast_assign(
+ __assign_str(option, option);
+ __assign_str(value, value);
+ ),
+
+ TP_printk("option %s=%s",
+ __get_str(option), __get_str(value)
+ )
+);
+
+TRACE_EVENT(nfs_mount_option,
+ TP_PROTO(
+ const struct fs_parameter *param
+ ),
+
+ TP_ARGS(param),
+
+ TP_STRUCT__entry(
+ __string(option, param->key)
+ ),
+
+ TP_fast_assign(
+ __assign_str(option, param->key);
+ ),
+
+ TP_printk("option %s", __get_str(option))
+);
+
+TRACE_EVENT(nfs_mount_path,
+ TP_PROTO(
+ const char *path
+ ),
+
+ TP_ARGS(path),
+
+ TP_STRUCT__entry(
+ __string(path, path)
+ ),
+
+ TP_fast_assign(
+ __assign_str(path, path);
+ ),
+
+ TP_printk("path='%s'", __get_str(path))
+);
+
DECLARE_EVENT_CLASS(nfs_xdr_event,
TP_PROTO(
const struct xdr_stream *xdr,
Pass the upper layer's rpc_create_args to the rpc_clnt_new()
tracepoint so additional parts of the upper layer's request can be
recorded.
Signed-off-by: Chuck Lever <[email protected]>
---
include/trace/events/sunrpc.h | 53 +++++++++++++++++++++++++++++++++--------
net/sunrpc/clnt.c | 2 +-
2 files changed, 44 insertions(+), 11 deletions(-)
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index a66aa1f59ed8..986e135e314f 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -139,36 +139,69 @@ DEFINE_RPC_CLNT_EVENT(release);
DEFINE_RPC_CLNT_EVENT(replace_xprt);
DEFINE_RPC_CLNT_EVENT(replace_xprt_err);
+TRACE_DEFINE_ENUM(RPC_XPRTSEC_NONE);
+TRACE_DEFINE_ENUM(RPC_XPRTSEC_TLS_X509);
+TRACE_DEFINE_ENUM(RPC_XPRTSEC_TLS_PSK);
+
+#define rpc_show_xprtsec_policy(policy) \
+ __print_symbolic(policy, \
+ { RPC_XPRTSEC_NONE, "none" }, \
+ { RPC_XPRTSEC_TLS_X509, "tls-x509" }, \
+ { RPC_XPRTSEC_TLS_PSK, "tls-psk" })
+
+#define rpc_show_create_flags(flags) \
+ __print_flags(flags, "|", \
+ { RPC_CLNT_CREATE_HARDRTRY, "HARDRTRY" }, \
+ { RPC_CLNT_CREATE_AUTOBIND, "AUTOBIND" }, \
+ { RPC_CLNT_CREATE_NONPRIVPORT, "NONPRIVPORT" }, \
+ { RPC_CLNT_CREATE_NOPING, "NOPING" }, \
+ { RPC_CLNT_CREATE_DISCRTRY, "DISCRTRY" }, \
+ { RPC_CLNT_CREATE_QUIET, "QUIET" }, \
+ { RPC_CLNT_CREATE_INFINITE_SLOTS, \
+ "INFINITE_SLOTS" }, \
+ { RPC_CLNT_CREATE_NO_IDLE_TIMEOUT, \
+ "NO_IDLE_TIMEOUT" }, \
+ { RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT, \
+ "NO_RETRANS_TIMEOUT" }, \
+ { RPC_CLNT_CREATE_SOFTERR, "SOFTERR" }, \
+ { RPC_CLNT_CREATE_REUSEPORT, "REUSEPORT" })
+
TRACE_EVENT(rpc_clnt_new,
TP_PROTO(
const struct rpc_clnt *clnt,
const struct rpc_xprt *xprt,
- const char *program,
- const char *server
+ const struct rpc_create_args *args
),
- TP_ARGS(clnt, xprt, program, server),
+ TP_ARGS(clnt, xprt, args),
TP_STRUCT__entry(
__field(unsigned int, client_id)
+ __field(unsigned long, xprtsec)
+ __field(unsigned long, flags)
+ __string(program, clnt->cl_program->name)
+ __string(server, xprt->servername)
__string(addr, xprt->address_strings[RPC_DISPLAY_ADDR])
__string(port, xprt->address_strings[RPC_DISPLAY_PORT])
- __string(program, program)
- __string(server, server)
),
TP_fast_assign(
__entry->client_id = clnt->cl_clid;
+ __entry->xprtsec = args->xprtsec;
+ __entry->flags = args->flags;
+ __assign_str(program, clnt->cl_program->name);
+ __assign_str(server, xprt->servername);
__assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]);
__assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]);
- __assign_str(program, program);
- __assign_str(server, server);
),
- TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER
- " peer=[%s]:%s program=%s server=%s",
+ TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER " peer=[%s]:%s"
+ " program=%s server=%s xprtsec=%s flags=%s",
__entry->client_id, __get_str(addr), __get_str(port),
- __get_str(program), __get_str(server))
+ __get_str(program), __get_str(server),
+ rpc_show_xprtsec_policy(__entry->xprtsec),
+ rpc_show_create_flags(__entry->flags)
+ )
);
TRACE_EVENT(rpc_clnt_new_err,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6dcc88d45f5d..0ca86c92968f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -435,7 +435,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
if (parent)
refcount_inc(&parent->cl_count);
- trace_rpc_clnt_new(clnt, xprt, program->name, args->servername);
+ trace_rpc_clnt_new(clnt, xprt, args);
return clnt;
out_no_path:
I'm about to add a use case that does not want RPC_TASK_NULLCREDS.
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/clnt.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0ca86c92968f..ade83ee13bac 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2751,8 +2751,7 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
.rpc_op_cred = cred,
.callback_ops = ops ?: &rpc_null_ops,
.callback_data = data,
- .flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
- RPC_TASK_NULLCREDS,
+ .flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
};
return rpc_run_task(&task_setup_data);
@@ -2760,7 +2759,8 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
{
- return rpc_call_null_helper(clnt, NULL, cred, flags, NULL, NULL);
+ return rpc_call_null_helper(clnt, NULL, cred, flags | RPC_TASK_NULLCREDS,
+ NULL, NULL);
}
EXPORT_SYMBOL_GPL(rpc_call_null);
@@ -2860,9 +2860,11 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
goto success;
}
- task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC,
- &rpc_cb_add_xprt_call_ops, data);
+ task = rpc_call_null_helper(clnt, xprt, NULL,
+ RPC_TASK_ASYNC | RPC_TASK_NULLCREDS,
+ &rpc_cb_add_xprt_call_ops, data);
data->xps->xps_nunique_destaddr_xprts++;
+
rpc_put_task(task);
success:
return 1;
@@ -2903,7 +2905,8 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
goto out_err;
/* Test the connection */
- task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL);
+ task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_NULLCREDS,
+ NULL, NULL);
if (IS_ERR(task)) {
status = PTR_ERR(task);
goto out_err;
The new authentication flavor is used only to discover peer support
for RPC-over-TLS.
Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/auth.h | 1
net/sunrpc/Makefile | 2 -
net/sunrpc/auth.c | 2 -
net/sunrpc/auth_tls.c | 120 +++++++++++++++++++++++++++++++++++++++++++
4 files changed, 123 insertions(+), 2 deletions(-)
create mode 100644 net/sunrpc/auth_tls.c
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 3e6ce288a7fc..1f13d923f439 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -144,6 +144,7 @@ struct rpc_credops {
extern const struct rpc_authops authunix_ops;
extern const struct rpc_authops authnull_ops;
+extern const struct rpc_authops authtls_ops;
int __init rpc_init_authunix(void);
int __init rpcauth_init_module(void);
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 1c8de397d6ad..f89c10fe7e6a 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
- auth.o auth_null.o auth_unix.o \
+ auth.o auth_null.o auth_tls.o auth_unix.o \
svc.o svcsock.o svcauth.o svcauth_unix.o \
addr.o rpcb_clnt.o timer.o xdr.o \
sunrpc_syms.o cache.o rpc_pipe.o sysfs.o \
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 682fcd24bf43..91c21a1c7cdc 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -32,7 +32,7 @@ static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
[RPC_AUTH_NULL] = (const struct rpc_authops __force __rcu *)&authnull_ops,
[RPC_AUTH_UNIX] = (const struct rpc_authops __force __rcu *)&authunix_ops,
- NULL, /* others can be loadable modules */
+ [RPC_AUTH_TLS] = (const struct rpc_authops __force __rcu *)&authtls_ops,
};
static LIST_HEAD(cred_unused);
diff --git a/net/sunrpc/auth_tls.c b/net/sunrpc/auth_tls.c
new file mode 100644
index 000000000000..cf4185f188e9
--- /dev/null
+++ b/net/sunrpc/auth_tls.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, 2022 Oracle. All rights reserved.
+ *
+ * The AUTH_TLS credential is used only to probe a remote peer
+ * for RPC-over-TLS support.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/sunrpc/clnt.h>
+
+static const char *starttls_token = "STARTTLS";
+static const size_t starttls_len = 8;
+
+static struct rpc_auth tls_auth;
+static struct rpc_cred tls_cred;
+
+static struct rpc_auth *tls_create(const struct rpc_auth_create_args *args,
+ struct rpc_clnt *clnt)
+{
+ refcount_inc(&tls_auth.au_count);
+ return &tls_auth;
+}
+
+static void tls_destroy(struct rpc_auth *auth)
+{
+}
+
+static struct rpc_cred *tls_lookup_cred(struct rpc_auth *auth,
+ struct auth_cred *acred, int flags)
+{
+ return get_rpccred(&tls_cred);
+}
+
+static void tls_destroy_cred(struct rpc_cred *cred)
+{
+}
+
+static int tls_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
+{
+ return 1;
+}
+
+static int tls_marshal(struct rpc_task *task, struct xdr_stream *xdr)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4 * XDR_UNIT);
+ if (!p)
+ return -EMSGSIZE;
+ /* Credential */
+ *p++ = rpc_auth_tls;
+ *p++ = xdr_zero;
+ /* Verifier */
+ *p++ = rpc_auth_null;
+ *p = xdr_zero;
+ return 0;
+}
+
+static int tls_refresh(struct rpc_task *task)
+{
+ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
+ return 0;
+}
+
+static int tls_validate(struct rpc_task *task, struct xdr_stream *xdr)
+{
+ __be32 *p;
+ void *str;
+
+ p = xdr_inline_decode(xdr, XDR_UNIT);
+ if (!p)
+ return -EIO;
+ if (*p != rpc_auth_null)
+ return -EIO;
+ if (xdr_stream_decode_opaque_inline(xdr, &str, starttls_len) != starttls_len)
+ return -EIO;
+ if (memcmp(str, starttls_token, starttls_len))
+ return -EIO;
+ return 0;
+}
+
+const struct rpc_authops authtls_ops = {
+ .owner = THIS_MODULE,
+ .au_flavor = RPC_AUTH_TLS,
+ .au_name = "NULL",
+ .create = tls_create,
+ .destroy = tls_destroy,
+ .lookup_cred = tls_lookup_cred,
+};
+
+static struct rpc_auth tls_auth = {
+ .au_cslack = NUL_CALLSLACK,
+ .au_rslack = NUL_REPLYSLACK,
+ .au_verfsize = NUL_REPLYSLACK,
+ .au_ralign = NUL_REPLYSLACK,
+ .au_ops = &authtls_ops,
+ .au_flavor = RPC_AUTH_TLS,
+ .au_count = REFCOUNT_INIT(1),
+};
+
+static const struct rpc_credops tls_credops = {
+ .cr_name = "AUTH_TLS",
+ .crdestroy = tls_destroy_cred,
+ .crmatch = tls_match,
+ .crmarshal = tls_marshal,
+ .crwrap_req = rpcauth_wrap_req_encode,
+ .crrefresh = tls_refresh,
+ .crvalidate = tls_validate,
+ .crunwrap_resp = rpcauth_unwrap_resp_decode,
+};
+
+static struct rpc_cred tls_cred = {
+ .cr_lru = LIST_HEAD_INIT(tls_cred.cr_lru),
+ .cr_auth = &tls_auth,
+ .cr_ops = &tls_credops,
+ .cr_count = REFCOUNT_INIT(2),
+ .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE,
+};
The RPC header parser doesn't recognize TLS handshake traffic, so it
will close the connection prematurely with an error. To avoid that,
shunt the transport's data_ready callback when there is a TLS
handshake in progress.
The flag will be toggled by code added in a subsequent patch.
Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/xprtsock.h | 1 +
net/sunrpc/xprtsock.c | 6 ++++++
2 files changed, 7 insertions(+)
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 38284f25eddf..daef030f4848 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -90,5 +90,6 @@ struct sock_xprt {
#define XPRT_SOCK_WAKE_DISCONNECT (7)
#define XPRT_SOCK_CONNECT_SENT (8)
#define XPRT_SOCK_NOSPACE (9)
+#define XPRT_SOCK_IGNORE_RECV (10)
#endif /* _LINUX_SUNRPC_XPRTSOCK_H */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 73fab802996d..0a521aee0b2f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -703,6 +703,8 @@ static void xs_poll_check_readable(struct sock_xprt *transport)
{
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+ if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state))
+ return;
if (!xs_poll_socket_readable(transport))
return;
if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
@@ -1394,6 +1396,10 @@ static void xs_data_ready(struct sock *sk)
trace_xs_data_ready(xprt);
transport->old_data_ready(sk);
+
+ if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state))
+ return;
+
/* Any data means we had a useful conversation, so
* then we don't need to delay the next reconnect
*/
Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/xprtsock.h | 1
net/sunrpc/xprtsock.c | 243 ++++++++++++++++++++++++++++++++-------
2 files changed, 201 insertions(+), 43 deletions(-)
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index e0b6009f1f69..eaf3d705f758 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -57,6 +57,7 @@ struct sock_xprt {
struct work_struct error_worker;
struct work_struct recv_worker;
struct mutex recv_mutex;
+ struct completion handshake_done;
struct sockaddr_storage srcaddr;
unsigned short srcport;
int xprt_err;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a4fee00412d4..63fe97ede573 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -48,6 +48,7 @@
#include <net/udp.h>
#include <net/tcp.h>
#include <net/tls.h>
+#include <net/tlsh.h>
#include <linux/bvec.h>
#include <linux/highmem.h>
@@ -197,6 +198,11 @@ static struct ctl_table sunrpc_table[] = {
*/
#define XS_IDLE_DISC_TO (5U * 60 * HZ)
+/*
+ * TLS handshake timeout.
+ */
+#define XS_TLS_HANDSHAKE_TO (20U * HZ)
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# undef RPC_DEBUG_DATA
# define RPCDBG_FACILITY RPCDBG_TRANS
@@ -1254,6 +1260,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
if (atomic_read(&transport->xprt.swapper))
sk_clear_memalloc(sk);
+ /* TODO: Maybe send a TLS Closure alert */
+
kernel_sock_shutdown(sock, SHUT_RDWR);
mutex_lock(&transport->recv_mutex);
@@ -2424,6 +2432,147 @@ static void xs_tcp_setup_socket(struct work_struct *work)
#if IS_ENABLED(CONFIG_TLS)
+/**
+ * xs_tls_handshake_done - TLS handshake completion handler
+ * @data: address of xprt to wake
+ * @status: status of handshake
+ *
+ */
+static void xs_tls_handshake_done(void *data, int status)
+{
+ struct rpc_xprt *xprt = data;
+ struct sock_xprt *transport =
+ container_of(xprt, struct sock_xprt, xprt);
+
+ transport->xprt_err = status ? -EACCES : 0;
+ complete(&transport->handshake_done);
+ xprt_put(xprt);
+}
+
+static int xs_tls_handshake_sync(struct rpc_xprt *xprt, unsigned int xprtsec)
+{
+ struct sock_xprt *transport =
+ container_of(xprt, struct sock_xprt, xprt);
+ int rc;
+
+ init_completion(&transport->handshake_done);
+ set_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state);
+
+ transport->xprt_err = -ETIMEDOUT;
+ switch (xprtsec) {
+ case RPC_XPRTSEC_TLS_X509:
+ rc = tls_client_hello_x509(transport->sock,
+ xs_tls_handshake_done, xprt_get(xprt),
+ TLSH_DEFAULT_PRIORITIES, TLSH_NO_PEERID,
+ TLSH_NO_CERT);
+ break;
+ case RPC_XPRTSEC_TLS_PSK:
+ rc = tls_client_hello_psk(transport->sock, xs_tls_handshake_done,
+ xprt_get(xprt), TLSH_DEFAULT_PRIORITIES,
+ TLSH_NO_PEERID);
+ break;
+ default:
+ rc = -EACCES;
+ }
+ if (rc)
+ goto out;
+
+ rc = wait_for_completion_interruptible_timeout(&transport->handshake_done,
+ XS_TLS_HANDSHAKE_TO);
+ if (rc < 0)
+ goto out;
+
+ rc = transport->xprt_err;
+
+out:
+ xs_stream_reset_connect(transport);
+ clear_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state);
+ return rc;
+}
+
+/*
+ * Transfer the connected socket to @dst_transport, then mark that
+ * xprt CONNECTED.
+ */
+static int xs_tls_finish_connecting(struct rpc_xprt *src_xprt,
+ struct sock_xprt *dst_transport)
+{
+ struct sock_xprt *src_transport =
+ container_of(src_xprt, struct sock_xprt, xprt);
+ struct rpc_xprt *dst_xprt = &dst_transport->xprt;
+
+ if (!dst_transport->inet) {
+ struct socket *sock = src_transport->sock;
+ struct sock *sk = sock->sk;
+
+ /* Avoid temporary address, they are bad for long-lived
+ * connections such as NFS mounts.
+ * RFC4941, section 3.6 suggests that:
+ * Individual applications, which have specific
+ * knowledge about the normal duration of connections,
+ * MAY override this as appropriate.
+ */
+ if (xs_addr(dst_xprt)->sa_family == PF_INET6) {
+ ip6_sock_set_addr_preferences(sk,
+ IPV6_PREFER_SRC_PUBLIC);
+ }
+
+ xs_tcp_set_socket_timeouts(dst_xprt, sock);
+ tcp_sock_set_nodelay(sk);
+
+ lock_sock(sk);
+
+ /*
+ * @sk is already connected, so it now has the RPC callbacks.
+ * Reach into @src_transport to save the original ones.
+ */
+ dst_transport->old_data_ready = src_transport->old_data_ready;
+ dst_transport->old_state_change = src_transport->old_state_change;
+ dst_transport->old_write_space = src_transport->old_write_space;
+ dst_transport->old_error_report = src_transport->old_error_report;
+ sk->sk_user_data = dst_xprt;
+
+ /* socket options */
+ sock_reset_flag(sk, SOCK_LINGER);
+
+ xprt_clear_connected(dst_xprt);
+
+ dst_transport->sock = sock;
+ dst_transport->inet = sk;
+ dst_transport->file = src_transport->file;
+
+ release_sock(sk);
+
+ /* Reset src_transport before shutting down its clnt */
+ mutex_lock(&src_transport->recv_mutex);
+ src_transport->inet = NULL;
+ src_transport->sock = NULL;
+ src_transport->file = NULL;
+
+ xprt_clear_connected(src_xprt);
+ xs_sock_reset_connection_flags(src_xprt);
+ xs_stream_reset_connect(src_transport);
+ mutex_unlock(&src_transport->recv_mutex);
+ }
+
+ if (!xprt_bound(dst_xprt))
+ return -ENOTCONN;
+
+ xs_set_memalloc(dst_xprt);
+
+ if (!xprt_test_and_set_connected(dst_xprt)) {
+ dst_xprt->connect_cookie++;
+ clear_bit(XPRT_SOCK_CONNECTING, &dst_transport->sock_state);
+ xprt_clear_connecting(dst_xprt);
+
+ dst_xprt->stat.connect_count++;
+ dst_xprt->stat.connect_time += (long)jiffies -
+ dst_xprt->stat.connect_start;
+ xs_run_error_worker(dst_transport, XPRT_SOCK_WAKE_PENDING);
+ }
+ return 0;
+}
+
/**
* xs_tls_connect - establish a TLS session on a socket
* @work: queued work item
@@ -2433,61 +2582,70 @@ static void xs_tls_connect(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_create_args args = {
+ .net = transport->xprt.xprt_net,
+ .protocol = transport->xprt.prot,
+ .address = (struct sockaddr *)&transport->xprt.addr,
+ .addrsize = transport->xprt.addrlen,
+ .timeout = transport->xprtsec_clnt->cl_timeout,
+ .servername = transport->xprt.servername,
+ .nodename = transport->xprtsec_clnt->cl_nodename,
+ .program = transport->xprtsec_clnt->cl_program,
+ .prognumber = transport->xprtsec_clnt->cl_prog,
+ .version = transport->xprtsec_clnt->cl_vers,
+ .authflavor = RPC_AUTH_TLS,
+ .cred = transport->xprtsec_clnt->cl_cred,
+ .xprtsec = RPC_XPRTSEC_NONE,
+ };
+ unsigned int pflags = current->flags;
struct rpc_clnt *clnt;
+ struct rpc_xprt *xprt;
+ int status;
- clnt = transport->xprtsec_clnt;
- transport->xprtsec_clnt = NULL;
+ if (atomic_read(&transport->xprt.swapper))
+ current->flags |= PF_MEMALLOC;
+
+ xs_stream_start_connect(transport);
+
+ clnt = rpc_create(&args);
if (IS_ERR(clnt))
goto out_unlock;
+ rcu_read_lock();
+ xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+ rcu_read_unlock();
- xs_tcp_setup_socket(work);
+ status = xs_tls_handshake_sync(xprt, transport->xprt.xprtsec);
+ if (status)
+ goto out_close;
+ status = xs_tls_finish_connecting(xprt, transport);
+ if (status)
+ goto out_close;
+ trace_rpc_socket_connect(&transport->xprt, transport->sock, 0);
+
+ xprt_put(xprt);
rpc_shutdown_client(clnt);
out_unlock:
+ xprt_unlock_connect(&transport->xprt, transport);
+ current_restore_flags(pflags, PF_MEMALLOC);
+ transport->xprtsec_clnt = NULL;
return;
-}
-static void xs_set_xprtsec_clnt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
-{
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
- struct rpc_create_args args = {
- .net = xprt->xprt_net,
- .protocol = xprt->prot,
- .address = (struct sockaddr *)&xprt->addr,
- .addrsize = xprt->addrlen,
- .timeout = clnt->cl_timeout,
- .servername = xprt->servername,
- .nodename = clnt->cl_nodename,
- .program = clnt->cl_program,
- .prognumber = clnt->cl_prog,
- .version = clnt->cl_vers,
- .authflavor = RPC_AUTH_TLS,
- .cred = clnt->cl_cred,
- .xprtsec = RPC_XPRTSEC_NONE,
- .flags = RPC_CLNT_CREATE_NOPING,
- };
-
- switch (xprt->xprtsec) {
- case RPC_XPRTSEC_TLS_X509:
- case RPC_XPRTSEC_TLS_PSK:
- transport->xprtsec_clnt = rpc_create(&args);
- break;
- default:
- transport->xprtsec_clnt = ERR_PTR(-ENOTCONN);
- }
-}
-
-#else
-
-static void xs_set_xprtsec_clnt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
-{
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+out_close:
+ xprt_put(xprt);
+ rpc_shutdown_client(clnt);
- transport->xprtsec_clnt = ERR_PTR(-ENOTCONN);
+ /* xprt_force_disconnect() wakes tasks with a fixed tk_status code.
+ * Wake them first here to ensure they get our tk_status code.
+ */
+ xprt_wake_pending_tasks(&transport->xprt, status);
+ xs_tcp_force_close(&transport->xprt);
+ xprt_clear_connecting(&transport->xprt);
+ goto out_unlock;
}
-#endif /*CONFIG_TLS */
+#endif /* CONFIG_TLS */
/**
* xs_connect - connect a socket to a remote endpoint
@@ -2520,8 +2678,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
} else
dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
- xs_set_xprtsec_clnt(task->tk_client, xprt);
-
+ transport->xprtsec_clnt = task->tk_client;
queue_delayed_work(xprtiod_workqueue,
&transport->connect_worker,
delay);
The new field is used to match struct nfs_clients that have the same
TLS policy setting.
Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfs/client.c | 6 ++++++
fs/nfs/internal.h | 1 +
fs/nfs/nfs3client.c | 1 +
fs/nfs/nfs4client.c | 16 +++++++++++-----
include/linux/nfs_fs_sb.h | 5 ++++-
5 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index e828504cc396..0896e4f047d1 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -184,6 +184,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_net = get_net(cl_init->net);
clp->cl_principal = "*";
+ clp->cl_xprtsec = cl_init->xprtsec_policy;
return clp;
error_cleanup:
@@ -326,6 +327,10 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
sap))
continue;
+ /* Match the xprt security policy */
+ if (clp->cl_xprtsec != data->xprtsec_policy)
+ continue;
+
refcount_inc(&clp->cl_count);
return clp;
}
@@ -675,6 +680,7 @@ static int nfs_init_server(struct nfs_server *server,
.cred = server->cred,
.nconnect = ctx->nfs_server.nconnect,
.init_flags = (1UL << NFS_CS_REUSEPORT),
+ .xprtsec_policy = NFS_CS_XPRTSEC_NONE,
};
struct nfs_client *clp;
int error;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7eefa16ed381..0a3512c39376 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -81,6 +81,7 @@ struct nfs_client_initdata {
struct net *net;
const struct rpc_timeout *timeparms;
const struct cred *cred;
+ unsigned int xprtsec_policy;
};
/*
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 5601e47360c2..d7c5db1f5825 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -93,6 +93,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
.net = mds_clp->cl_net,
.timeparms = &ds_timeout,
.cred = mds_srv->cred,
+ .xprtsec_policy = mds_clp->cl_xprtsec,
};
struct nfs_client *clp;
char buf[INET6_ADDRSTRLEN + 1];
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 47a6cf892c95..682d47e5977b 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -895,7 +895,8 @@ static int nfs4_set_client(struct nfs_server *server,
int proto, const struct rpc_timeout *timeparms,
u32 minorversion, unsigned int nconnect,
unsigned int max_connect,
- struct net *net)
+ struct net *net,
+ unsigned int xprtsec_policy)
{
struct nfs_client_initdata cl_init = {
.hostname = hostname,
@@ -908,6 +909,7 @@ static int nfs4_set_client(struct nfs_server *server,
.net = net,
.timeparms = timeparms,
.cred = server->cred,
+ .xprtsec_policy = xprtsec_policy,
};
struct nfs_client *clp;
@@ -1156,7 +1158,8 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
ctx->minorversion,
ctx->nfs_server.nconnect,
ctx->nfs_server.max_connect,
- fc->net_ns);
+ fc->net_ns,
+ NFS_CS_XPRTSEC_NONE);
if (error < 0)
return error;
@@ -1246,7 +1249,8 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
parent_client->cl_mvops->minor_version,
parent_client->cl_nconnect,
parent_client->cl_max_connect,
- parent_client->cl_net);
+ parent_client->cl_net,
+ parent_client->cl_xprtsec);
if (!error)
goto init_server;
#endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */
@@ -1262,7 +1266,8 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
parent_client->cl_mvops->minor_version,
parent_client->cl_nconnect,
parent_client->cl_max_connect,
- parent_client->cl_net);
+ parent_client->cl_net,
+ parent_client->cl_xprtsec);
if (error < 0)
goto error;
@@ -1335,7 +1340,8 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
error = nfs4_set_client(server, hostname, sap, salen, buf,
clp->cl_proto, clnt->cl_timeout,
clp->cl_minorversion,
- clp->cl_nconnect, clp->cl_max_connect, net);
+ clp->cl_nconnect, clp->cl_max_connect,
+ net, clp->cl_xprtsec);
clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
if (error != 0) {
nfs_server_insert_lists(server);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 157d2bd6b241..7af8fba6f531 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -63,7 +63,10 @@ struct nfs_client {
u32 cl_minorversion;/* NFSv4 minorversion */
unsigned int cl_nconnect; /* Number of connections */
unsigned int cl_max_connect; /* max number of xprts allowed */
- const char * cl_principal; /* used for machine cred */
+ const char * cl_principal; /* used for machine cred */
+ unsigned int cl_xprtsec; /* xprt security policy */
+#define NFS_CS_XPRTSEC_NONE (0)
+#define NFS_CS_XPRTSEC_TLS (1)
#if IS_ENABLED(CONFIG_NFS_V4)
struct list_head cl_ds_clients; /* auth flavor data servers */