Checking to see if this could fit in struct sock.
This goes against v5.8
I tried to make it tl;dr in commit 2/2 but motivation is also described
a bit in https://lore.kernel.org/linux-api/CAGbU3_nVvuzMn2wo4_ZKufWcGfmGsopVujzTWw-Bbeky=xS+GA@mail.gmail.com/
This command attaches the zero terminated string in optval to the
socket for troubleshooting purposes. The free string is displayed in the
process fdinfo file for that fd (/proc/<pid>/fdinfo/<fd>).
One intended usage is to allow processes to self-document sockets
for netstat and friends to report
We ignore optlen and constrain the string to a static max size
Signed-off-by: Pascal Bouchareine <[email protected]>
---
include/net/sock.h | 4 ++++
include/uapi/asm-generic/socket.h | 2 ++
net/core/sock.c | 23 +++++++++++++++++++++++
net/socket.c | 5 +++++
4 files changed, 34 insertions(+)
diff --git a/include/net/sock.h b/include/net/sock.h
index 1183507df95b..6b4fd1383282 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -342,6 +342,7 @@ struct bpf_sk_storage;
* @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
+ * @sk_description: user supplied with SO_DESCRIPTION
*/
struct sock {
/*
@@ -519,6 +520,9 @@ struct sock {
struct bpf_sk_storage __rcu *sk_bpf_storage;
#endif
struct rcu_head sk_rcu;
+
+#define SK_MAX_DESC_SIZE 256
+ char *sk_description;
};
enum sk_pacing {
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 77f7c1638eb1..fb51c4bb7a12 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -119,6 +119,8 @@
#define SO_DETACH_REUSEPORT_BPF 68
+#define SO_DESCRIPTION 69
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/net/core/sock.c b/net/core/sock.c
index 2e5b7870e5d3..2cb44a0e38b7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -828,6 +828,24 @@ void sock_set_rcvbuf(struct sock *sk, int val)
}
EXPORT_SYMBOL(sock_set_rcvbuf);
+int sock_set_description(struct sock *sk, char __user *user_desc)
+{
+ char *old, *desc;
+
+ desc = strndup_user(user_desc, SK_MAX_DESC_SIZE, GFP_KERNEL_ACCOUNT);
+ if (IS_ERR(desc))
+ return PTR_ERR(desc);
+
+ lock_sock(sk);
+ old = sk->sk_description;
+ sk->sk_description = desc;
+ release_sock(sk);
+
+ kfree(old);
+
+ return 0;
+}
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -850,6 +868,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if (optname == SO_BINDTODEVICE)
return sock_setbindtodevice(sk, optval, optlen);
+ if (optname == SO_DESCRIPTION)
+ return sock_set_description(sk, optval);
+
if (optlen < sizeof(int))
return -EINVAL;
@@ -1792,6 +1813,8 @@ static void __sk_destruct(struct rcu_head *head)
RCU_INIT_POINTER(sk->sk_filter, NULL);
}
+ kfree(sk->sk_description);
+
sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
#ifdef CONFIG_BPF_SYSCALL
diff --git a/net/socket.c b/net/socket.c
index 976426d03f09..4f2c1a7744b0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -134,6 +134,11 @@ static void sock_show_fdinfo(struct seq_file *m, struct file *f)
{
struct socket *sock = f->private_data;
+ lock_sock(sock->sk);
+ if (sock->sk->sk_description)
+ seq_printf(m, "desc:\t%s\n", sock->sk->sk_description);
+ release_sock(sock->sk);
+
if (sock->ops->show_fdinfo)
sock->ops->show_fdinfo(m, sock);
}
--
2.25.1
Let caller specify allocation.
Preserve existing calls with GFP_USER.
Signed-off-by: Pascal Bouchareine <[email protected]>
---
drivers/dma-buf/dma-buf.c | 2 +-
drivers/gpu/drm/i915/i915_debugfs_params.c | 2 +-
drivers/gpu/drm/vc4/vc4_bo.c | 3 +-
drivers/input/misc/uinput.c | 2 +-
drivers/s390/char/keyboard.c | 3 +-
drivers/vfio/vfio.c | 3 +-
drivers/virt/fsl_hypervisor.c | 4 +--
fs/f2fs/file.c | 3 +-
fs/fsopen.c | 6 ++--
fs/namespace.c | 2 +-
fs/nfs/fs_context.c | 8 +++--
fs/xfs/xfs_ioctl.c | 2 +-
include/linux/string.h | 2 +-
kernel/events/core.c | 2 +-
kernel/module.c | 2 +-
kernel/trace/trace_event_perf.c | 2 +-
mm/util.c | 34 +++++++++++++---------
net/core/pktgen.c | 2 +-
security/keys/dh.c | 3 +-
security/keys/keyctl.c | 17 +++++++----
security/keys/keyctl_pkey.c | 2 +-
21 files changed, 63 insertions(+), 43 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 1ca609f66fdf..3d94ba811f4b 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -326,7 +326,7 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
*/
static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf)
{
- char *name = strndup_user(buf, DMA_BUF_NAME_LEN);
+ char *name = strndup_user(buf, DMA_BUF_NAME_LEN, GFP_USER);
long ret = 0;
if (IS_ERR(name))
diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c
index 62b2c5f0495d..4c0a77e15c09 100644
--- a/drivers/gpu/drm/i915/i915_debugfs_params.c
+++ b/drivers/gpu/drm/i915/i915_debugfs_params.c
@@ -142,7 +142,7 @@ static ssize_t i915_param_charp_write(struct file *file,
kernel_param_lock(THIS_MODULE);
old = *s;
- new = strndup_user(ubuf, PAGE_SIZE);
+ new = strndup_user(ubuf, PAGE_SIZE, GFP_USER);
if (IS_ERR(new)) {
len = PTR_ERR(new);
goto out;
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 72d30d90b856..deb2c4957a6f 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -1072,7 +1072,8 @@ int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
if (!args->len)
return -EINVAL;
- name = strndup_user(u64_to_user_ptr(args->name), args->len + 1);
+ name = strndup_user(u64_to_user_ptr(args->name), args->len + 1,
+ GFP_USER);
if (IS_ERR(name))
return PTR_ERR(name);
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index f2593133e524..11627a4b4d87 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -926,7 +926,7 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
goto out;
}
- phys = strndup_user(p, 1024);
+ phys = strndup_user(p, 1024, GFP_USER);
if (IS_ERR(phys)) {
retval = PTR_ERR(phys);
goto out;
diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index 567aedc03c76..8e58921d5db4 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -464,7 +464,8 @@ do_kdgkb_ioctl(struct kbd_data *kbd, struct kbsentry __user *u_kbs,
case KDSKBSENT:
if (!perm)
return -EPERM;
- p = strndup_user(u_kbs->kb_string, sizeof(u_kbs->kb_string));
+ p = strndup_user(u_kbs->kb_string,
+ sizeof(u_kbs->kb_string), GFP_USER);
if (IS_ERR(p))
return PTR_ERR(p);
kfree(kbd->func_table[kb_func]);
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 580099afeaff..d55aae6661eb 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1547,7 +1547,8 @@ static long vfio_group_fops_unl_ioctl(struct file *filep,
{
char *buf;
- buf = strndup_user((const char __user *)arg, PAGE_SIZE);
+ buf = strndup_user((const char __user *)arg, PAGE_SIZE,
+ GFP_USER);
if (IS_ERR(buf))
return PTR_ERR(buf);
diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
index 1b0b11b55d2a..142c74aab2b0 100644
--- a/drivers/virt/fsl_hypervisor.c
+++ b/drivers/virt/fsl_hypervisor.c
@@ -346,11 +346,11 @@ static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set)
upropname = (char __user *)(uintptr_t)param.propname;
upropval = (void __user *)(uintptr_t)param.propval;
- path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN);
+ path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN, GFP_USER);
if (IS_ERR(path))
return PTR_ERR(path);
- propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN);
+ propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN, GFP_USER);
if (IS_ERR(propname)) {
ret = PTR_ERR(propname);
goto err_free_path;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3268f8dd59bb..ce37a97fbca7 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3395,7 +3395,8 @@ static int f2fs_set_volume_name(struct file *filp, unsigned long arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX);
+ vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX,
+ GFP_USER);
if (IS_ERR(vbuf))
return PTR_ERR(vbuf);
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 2fa3f241b762..c17ef9ee455c 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -125,7 +125,7 @@ SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
if (flags & ~FSOPEN_CLOEXEC)
return -EINVAL;
- fs_name = strndup_user(_fs_name, PAGE_SIZE);
+ fs_name = strndup_user(_fs_name, PAGE_SIZE, GFP_USER);
if (IS_ERR(fs_name))
return PTR_ERR(fs_name);
@@ -381,7 +381,7 @@ SYSCALL_DEFINE5(fsconfig,
}
if (_key) {
- param.key = strndup_user(_key, 256);
+ param.key = strndup_user(_key, 256, GFP_USER);
if (IS_ERR(param.key)) {
ret = PTR_ERR(param.key);
goto out_f;
@@ -394,7 +394,7 @@ SYSCALL_DEFINE5(fsconfig,
break;
case FSCONFIG_SET_STRING:
param.type = fs_value_is_string;
- param.string = strndup_user(_value, 256);
+ param.string = strndup_user(_value, 256, GFP_USER);
if (IS_ERR(param.string)) {
ret = PTR_ERR(param.string);
goto out_key;
diff --git a/fs/namespace.c b/fs/namespace.c
index 4a0f600a3328..1d9da91fbd2e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3099,7 +3099,7 @@ void *copy_mount_options(const void __user * data)
char *copy_mount_string(const void __user *data)
{
- return data ? strndup_user(data, PATH_MAX) : NULL;
+ return data ? strndup_user(data, PATH_MAX, GFP_USER) : NULL;
}
/*
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index ccc88be88d6a..fcdaeca51ca9 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -1077,18 +1077,20 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
} else
ctx->selected_flavor = RPC_AUTH_UNIX;
- c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
+ c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN,
+ GFP_USER);
if (IS_ERR(c))
return PTR_ERR(c);
ctx->nfs_server.hostname = c;
- c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
+ c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN,
+ GFP_USER);
if (IS_ERR(c))
return PTR_ERR(c);
ctx->nfs_server.export_path = c;
dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
- c = strndup_user(data->client_addr.data, 16);
+ c = strndup_user(data->client_addr.data, 16, GFP_USER);
if (IS_ERR(c))
return PTR_ERR(c);
ctx->client_address = c;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index a190212ca85d..216ab920c6b3 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -546,7 +546,7 @@ xfs_ioc_attrmulti_one(
if ((flags & XFS_IOC_ATTR_ROOT) && (flags & XFS_IOC_ATTR_SECURE))
return -EINVAL;
- name = strndup_user(uname, MAXNAMELEN);
+ name = strndup_user(uname, MAXNAMELEN, GFP_USER);
if (IS_ERR(name))
return PTR_ERR(name);
diff --git a/include/linux/string.h b/include/linux/string.h
index 9b7a0632e87a..3eb69aee484d 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -9,7 +9,7 @@
#include <stdarg.h>
#include <uapi/linux/string.h>
-extern char *strndup_user(const char __user *, long);
+extern char *strndup_user(const char __user *, long, gfp_t);
extern void *memdup_user(const void __user *, size_t);
extern void *vmemdup_user(const void __user *, size_t);
extern void *memdup_user_nul(const void __user *, size_t);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 856d98c36f56..3b0621563d7f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10072,7 +10072,7 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg)
int ret = -EINVAL;
char *filter_str;
- filter_str = strndup_user(arg, PAGE_SIZE);
+ filter_str = strndup_user(arg, PAGE_SIZE, GFP_USER);
if (IS_ERR(filter_str))
return PTR_ERR(filter_str);
diff --git a/kernel/module.c b/kernel/module.c
index aa183c9ac0a2..566c9ddb86d3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3872,7 +3872,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
flush_module_icache(mod);
/* Now copy in args */
- mod->args = strndup_user(uargs, ~0UL >> 1);
+ mod->args = strndup_user(uargs, ~0UL >> 1, GFP_USER);
if (IS_ERR(mod->args)) {
err = PTR_ERR(mod->args);
goto free_arch_cleanup;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 643e0b19920d..48569b39d1f2 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -310,7 +310,7 @@ int perf_uprobe_init(struct perf_event *p_event,
return -EINVAL;
path = strndup_user(u64_to_user_ptr(p_event->attr.uprobe_path),
- PATH_MAX);
+ PATH_MAX, GFP_USER);
if (IS_ERR(path)) {
ret = PTR_ERR(path);
return (ret == -EINVAL) ? -E2BIG : ret;
diff --git a/mm/util.c b/mm/util.c
index c63c8e47be57..cec32cc6d434 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -156,20 +156,13 @@ char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
}
EXPORT_SYMBOL(kmemdup_nul);
-/**
- * memdup_user - duplicate memory region from user space
- *
- * @src: source address in user space
- * @len: number of bytes to copy
- *
- * Return: an ERR_PTR() on failure. Result is physically
- * contiguous, to be freed by kfree().
- */
-void *memdup_user(const void __user *src, size_t len)
+static inline void *__memdup_user_flags(const void __user *src, size_t len,
+ gfp_t gfp)
{
void *p;
- p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
+ /* Don't let users spam with big allocs and use GFP_NOWARN */
+ p = kmalloc_track_caller(len, __GFP_NOWARN | gfp);
if (!p)
return ERR_PTR(-ENOMEM);
@@ -180,6 +173,20 @@ void *memdup_user(const void __user *src, size_t len)
return p;
}
+
+/**
+ * memdup_user - duplicate memory region from user space
+ *
+ * @src: source address in user space
+ * @len: number of bytes to copy
+ *
+ * Return: an ERR_PTR() on failure. Result is physically
+ * contiguous, to be freed by kfree().
+ */
+void *memdup_user(const void __user *src, size_t len)
+{
+ return __memdup_user_flags(src, len, GFP_USER);
+}
EXPORT_SYMBOL(memdup_user);
/**
@@ -212,10 +219,11 @@ EXPORT_SYMBOL(vmemdup_user);
* strndup_user - duplicate an existing string from user space
* @s: The string to duplicate
* @n: Maximum number of bytes to copy, including the trailing NUL.
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
*
* Return: newly allocated copy of @s or an ERR_PTR() in case of error
*/
-char *strndup_user(const char __user *s, long n)
+char *strndup_user(const char __user *s, long n, gfp_t gfp)
{
char *p;
long length;
@@ -228,7 +236,7 @@ char *strndup_user(const char __user *s, long n)
if (length > n)
return ERR_PTR(-EINVAL);
- p = memdup_user(s, length);
+ p = __memdup_user_flags(s, length, gfp);
if (IS_ERR(p))
return p;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b53b6d38c4df..ed12433e194a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -901,7 +901,7 @@ static ssize_t pktgen_if_write(struct file *file,
if (debug) {
size_t copy = min_t(size_t, count + 1, 1024);
- char *tp = strndup_user(user_buffer, copy);
+ char *tp = strndup_user(user_buffer, copy, GFP_USER);
if (IS_ERR(tp))
return PTR_ERR(tp);
diff --git a/security/keys/dh.c b/security/keys/dh.c
index c4c629bb1c03..fada6015b25b 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c
@@ -266,7 +266,8 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params,
}
/* get KDF name string */
- hashname = strndup_user(kdfcopy->hashname, CRYPTO_MAX_ALG_NAME);
+ hashname = strndup_user(kdfcopy->hashname,
+ CRYPTO_MAX_ALG_NAME, GFP_USER);
if (IS_ERR(hashname)) {
ret = PTR_ERR(hashname);
goto out1;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 9febd37a168f..0f74097cdcd1 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -93,7 +93,8 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type,
description = NULL;
if (_description) {
- description = strndup_user(_description, KEY_MAX_DESC_SIZE);
+ description = strndup_user(_description,
+ KEY_MAX_DESC_SIZE, GFP_USER);
if (IS_ERR(description)) {
ret = PTR_ERR(description);
goto error;
@@ -182,7 +183,8 @@ SYSCALL_DEFINE4(request_key, const char __user *, _type,
goto error;
/* pull the description into kernel space */
- description = strndup_user(_description, KEY_MAX_DESC_SIZE);
+ description = strndup_user(_description, KEY_MAX_DESC_SIZE,
+ GFP_USER);
if (IS_ERR(description)) {
ret = PTR_ERR(description);
goto error;
@@ -192,7 +194,8 @@ SYSCALL_DEFINE4(request_key, const char __user *, _type,
callout_info = NULL;
callout_len = 0;
if (_callout_info) {
- callout_info = strndup_user(_callout_info, PAGE_SIZE);
+ callout_info = strndup_user(_callout_info, PAGE_SIZE,
+ GFP_USER);
if (IS_ERR(callout_info)) {
ret = PTR_ERR(callout_info);
goto error2;
@@ -293,7 +296,7 @@ long keyctl_join_session_keyring(const char __user *_name)
/* fetch the name from userspace */
name = NULL;
if (_name) {
- name = strndup_user(_name, KEY_MAX_DESC_SIZE);
+ name = strndup_user(_name, KEY_MAX_DESC_SIZE, GFP_USER);
if (IS_ERR(name)) {
ret = PTR_ERR(name);
goto error;
@@ -728,7 +731,8 @@ long keyctl_keyring_search(key_serial_t ringid,
if (ret < 0)
goto error;
- description = strndup_user(_description, KEY_MAX_DESC_SIZE);
+ description = strndup_user(_description, KEY_MAX_DESC_SIZE,
+ GFP_USER);
if (IS_ERR(description)) {
ret = PTR_ERR(description);
goto error;
@@ -1742,7 +1746,8 @@ long keyctl_restrict_keyring(key_serial_t id, const char __user *_type,
if (ret < 0)
goto error;
- restriction = strndup_user(_restriction, PAGE_SIZE);
+ restriction = strndup_user(_restriction, PAGE_SIZE,
+ GFP_USER);
if (IS_ERR(restriction)) {
ret = PTR_ERR(restriction);
goto error;
diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c
index 931d8dfb4a7f..903792123a85 100644
--- a/security/keys/keyctl_pkey.c
+++ b/security/keys/keyctl_pkey.c
@@ -86,7 +86,7 @@ static int keyctl_pkey_params_get(key_serial_t id,
memset(params, 0, sizeof(*params));
params->encoding = "raw";
- p = strndup_user(_info, PAGE_SIZE);
+ p = strndup_user(_info, PAGE_SIZE, GFP_USER);
if (IS_ERR(p))
return PTR_ERR(p);
params->info = p;
--
2.25.1
On Sat, Aug 15, 2020 at 11:23:43AM -0700, Pascal Bouchareine wrote:
> Let caller specify allocation.
> Preserve existing calls with GFP_USER.
Bloody bad idea, unless you slap a BUG_ON(flags & GFP_ATOMIC) on it,
to make sure nobody tries _that_. Note that copying from userland
is an inherently blocking operation, and this interface invites
just that.
What do you need that flag for, anyway?
Signed-off-by: kernel test robot <[email protected]>
---
sock.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/sock.c b/net/core/sock.c
index 2cb44a0e38b77d..f145a710974b48 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -828,7 +828,7 @@ void sock_set_rcvbuf(struct sock *sk, int val)
}
EXPORT_SYMBOL(sock_set_rcvbuf);
-int sock_set_description(struct sock *sk, char __user *user_desc)
+static int sock_set_description(struct sock *sk, char __user *user_desc)
{
char *old, *desc;
Hi Pascal,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on tip/perf/core]
[also build test WARNING on linux/master v5.8]
[cannot apply to security/next-testing linus/master next-20200814]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Pascal-Bouchareine/proc-socket-attach-description-to-sockets/20200816-090222
base: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git d903b6d029d66e6478562d75ea18d89098f7b7e8
config: i386-randconfig-s002-20200816 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.2-168-g9554805c-dirty
# save the attached .config to linux build tree
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
sparse warnings: (new ones prefixed by >>)
>> net/core/sock.c:831:5: sparse: sparse: symbol 'sock_set_description' was not declared. Should it be static?
net/core/sock.c:2028:9: sparse: sparse: context imbalance in 'sk_clone_lock' - wrong count at exit
net/core/sock.c:2032:6: sparse: sparse: context imbalance in 'sk_free_unlock_clone' - unexpected unlock
net/core/sock.c:3117:6: sparse: sparse: context imbalance in 'lock_sock_fast' - different lock contexts for basic block
net/core/sock.c:3611:13: sparse: sparse: context imbalance in 'proto_seq_start' - wrong count at exit
net/core/sock.c:3623:13: sparse: sparse: context imbalance in 'proto_seq_stop' - wrong count at exit
Please review and possibly fold the followup patch.
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
Hi Pascal,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on tip/perf/core]
[also build test WARNING on linux/master v5.8]
[cannot apply to security/next-testing linus/master next-20200814]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Pascal-Bouchareine/proc-socket-attach-description-to-sockets/20200816-090222
base: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git d903b6d029d66e6478562d75ea18d89098f7b7e8
config: m68k-allmodconfig (attached as .config)
compiler: m68k-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All warnings (new ones prefixed by >>):
In file included from arch/m68k/include/asm/io_mm.h:25,
from arch/m68k/include/asm/io.h:8,
from include/linux/scatterlist.h:9,
from include/linux/dma-mapping.h:11,
from include/linux/skbuff.h:31,
from include/linux/ip.h:16,
from include/net/ip.h:22,
from include/linux/errqueue.h:6,
from net/core/sock.c:91:
arch/m68k/include/asm/raw_io.h: In function 'raw_rom_outsb':
arch/m68k/include/asm/raw_io.h:83:7: warning: variable '__w' set but not used [-Wunused-but-set-variable]
83 | ({u8 __w, __v = (b); u32 _addr = ((u32) (addr)); \
| ^~~
arch/m68k/include/asm/raw_io.h:430:3: note: in expansion of macro 'rom_out_8'
430 | rom_out_8(port, *buf++);
| ^~~~~~~~~
arch/m68k/include/asm/raw_io.h: In function 'raw_rom_outsw':
arch/m68k/include/asm/raw_io.h:86:8: warning: variable '__w' set but not used [-Wunused-but-set-variable]
86 | ({u16 __w, __v = (w); u32 _addr = ((u32) (addr)); \
| ^~~
arch/m68k/include/asm/raw_io.h:448:3: note: in expansion of macro 'rom_out_be16'
448 | rom_out_be16(port, *buf++);
| ^~~~~~~~~~~~
arch/m68k/include/asm/raw_io.h: In function 'raw_rom_outsw_swapw':
arch/m68k/include/asm/raw_io.h:90:8: warning: variable '__w' set but not used [-Wunused-but-set-variable]
90 | ({u16 __w, __v = (w); u32 _addr = ((u32) (addr)); \
| ^~~
arch/m68k/include/asm/raw_io.h:466:3: note: in expansion of macro 'rom_out_le16'
466 | rom_out_le16(port, *buf++);
| ^~~~~~~~~~~~
In file included from include/linux/kernel.h:11,
from include/linux/unaligned/access_ok.h:5,
from arch/m68k/include/asm/unaligned.h:18,
from net/core/sock.c:88:
include/linux/scatterlist.h: In function 'sg_set_buf':
arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra]
169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
| ^~
include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
78 | # define unlikely(x) __builtin_expect(!!(x), 0)
| ^
include/linux/scatterlist.h:143:2: note: in expansion of macro 'BUG_ON'
143 | BUG_ON(!virt_addr_valid(buf));
| ^~~~~~
include/linux/scatterlist.h:143:10: note: in expansion of macro 'virt_addr_valid'
143 | BUG_ON(!virt_addr_valid(buf));
| ^~~~~~~~~~~~~~~
In file included from arch/m68k/include/asm/bug.h:32,
from include/linux/bug.h:5,
from include/linux/thread_info.h:12,
from include/asm-generic/preempt.h:5,
from ./arch/m68k/include/generated/asm/preempt.h:1,
from include/linux/preempt.h:78,
from include/linux/spinlock.h:51,
from include/linux/seqlock.h:36,
from include/linux/time.h:6,
from include/linux/skbuff.h:15,
from include/linux/ip.h:16,
from include/net/ip.h:22,
from include/linux/errqueue.h:6,
from net/core/sock.c:91:
include/linux/dma-mapping.h: In function 'dma_map_resource':
arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra]
169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
| ^~
include/asm-generic/bug.h:144:27: note: in definition of macro 'WARN_ON_ONCE'
144 | int __ret_warn_once = !!(condition); \
| ^~~~~~~~~
arch/m68k/include/asm/page_mm.h:170:25: note: in expansion of macro 'virt_addr_valid'
170 | #define pfn_valid(pfn) virt_addr_valid(pfn_to_virt(pfn))
| ^~~~~~~~~~~~~~~
include/linux/dma-mapping.h:352:19: note: in expansion of macro 'pfn_valid'
352 | if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr))))
| ^~~~~~~~~
net/core/sock.c: At top level:
>> net/core/sock.c:831:5: warning: no previous prototype for 'sock_set_description' [-Wmissing-prototypes]
831 | int sock_set_description(struct sock *sk, char __user *user_desc)
| ^~~~~~~~~~~~~~~~~~~~
vim +/sock_set_description +831 net/core/sock.c
830
> 831 int sock_set_description(struct sock *sk, char __user *user_desc)
832 {
833 char *old, *desc;
834
835 desc = strndup_user(user_desc, SK_MAX_DESC_SIZE, GFP_KERNEL_ACCOUNT);
836 if (IS_ERR(desc))
837 return PTR_ERR(desc);
838
839 lock_sock(sk);
840 old = sk->sk_description;
841 sk->sk_description = desc;
842 release_sock(sk);
843
844 kfree(old);
845
846 return 0;
847 }
848
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
Hi Pascal,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on tip/perf/core]
[also build test WARNING on linux/master v5.8]
[cannot apply to security/next-testing linus/master next-20200814]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Pascal-Bouchareine/proc-socket-attach-description-to-sockets/20200816-090222
base: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git d903b6d029d66e6478562d75ea18d89098f7b7e8
config: x86_64-randconfig-r006-20200816 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project ab9fc8bae805c785066779e76e7846aabad5609e)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All warnings (new ones prefixed by >>):
>> net/core/sock.c:831:5: warning: no previous prototype for function 'sock_set_description' [-Wmissing-prototypes]
int sock_set_description(struct sock *sk, char __user *user_desc)
^
net/core/sock.c:831:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
int sock_set_description(struct sock *sk, char __user *user_desc)
^
static
1 warning generated.
vim +/sock_set_description +831 net/core/sock.c
830
> 831 int sock_set_description(struct sock *sk, char __user *user_desc)
832 {
833 char *old, *desc;
834
835 desc = strndup_user(user_desc, SK_MAX_DESC_SIZE, GFP_KERNEL_ACCOUNT);
836 if (IS_ERR(desc))
837 return PTR_ERR(desc);
838
839 lock_sock(sk);
840 old = sk->sk_description;
841 sk->sk_description = desc;
842 release_sock(sk);
843
844 kfree(old);
845
846 return 0;
847 }
848
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
On Sat, Aug 15, 2020 at 11:42:10PM +0100, Al Viro wrote:
> On Sat, Aug 15, 2020 at 11:23:43AM -0700, Pascal Bouchareine wrote:
> > Let caller specify allocation.
> > Preserve existing calls with GFP_USER.
>
> Bloody bad idea, unless you slap a BUG_ON(flags & GFP_ATOMIC) on it,
> to make sure nobody tries _that_. Note that copying from userland
> is an inherently blocking operation, and this interface invites
> just that.
>
> What do you need that flag for, anyway?
You need it for kmem accounting.
On 8/15/20 11:23 AM, Pascal Bouchareine wrote:
> This command attaches the zero terminated string in optval to the
> socket for troubleshooting purposes. The free string is displayed in the
> process fdinfo file for that fd (/proc/<pid>/fdinfo/<fd>).
>
> One intended usage is to allow processes to self-document sockets
> for netstat and friends to report
>
> We ignore optlen and constrain the string to a static max size
>
>
1) You also ignored what would happen at accept() time.
Please test your patches with ASAN.
2) Also, why is that description specific to sockets ?
3) When a new socket option is added, it is customary to implement both setsockopt() and getsockopt()
for things like CRIU.
On Sun, Aug 16, 2020 at 11:15 AM Eric Dumazet <[email protected]> wrote:
>
> 1) You also ignored what would happen at accept() time.
>
> Please test your patches with ASAN.
Ouch, I will look into it - thanks for pointing that out & 3/
>
> 2) Also, why is that description specific to sockets ?
fcntl on struct file was deemed too intrusive - as far as fds are
concerned, regular files and pipes have names and local processes to
match against anyway
we're left with mostly remote targets - one example was to preserve
target host names after resolution (alas, I don't think there's a
clean flow to hook that transparently)
Let caller specify allocation.
Preserve existing calls with GFP_USER.
Signed-off-by: Pascal Bouchareine <[email protected]>
---
Updating patch 1/ and 2/ to address comments
drivers/dma-buf/dma-buf.c | 2 +-
drivers/gpu/drm/i915/i915_debugfs_params.c | 2 +-
drivers/gpu/drm/vc4/vc4_bo.c | 3 +-
drivers/input/misc/uinput.c | 2 +-
drivers/s390/char/keyboard.c | 3 +-
drivers/vfio/vfio.c | 3 +-
drivers/virt/fsl_hypervisor.c | 4 +--
fs/f2fs/file.c | 3 +-
fs/fsopen.c | 6 ++--
fs/namespace.c | 2 +-
fs/nfs/fs_context.c | 8 +++--
fs/xfs/xfs_ioctl.c | 2 +-
include/linux/string.h | 2 +-
kernel/events/core.c | 2 +-
kernel/module.c | 2 +-
kernel/trace/trace_event_perf.c | 2 +-
mm/util.c | 36 ++++++++++++++--------
net/core/pktgen.c | 2 +-
security/keys/dh.c | 3 +-
security/keys/keyctl.c | 17 ++++++----
security/keys/keyctl_pkey.c | 2 +-
21 files changed, 65 insertions(+), 43 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 1ca609f66fdf..3d94ba811f4b 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -326,7 +326,7 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
*/
static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf)
{
- char *name = strndup_user(buf, DMA_BUF_NAME_LEN);
+ char *name = strndup_user(buf, DMA_BUF_NAME_LEN, GFP_USER);
long ret = 0;
if (IS_ERR(name))
diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c
index 62b2c5f0495d..4c0a77e15c09 100644
--- a/drivers/gpu/drm/i915/i915_debugfs_params.c
+++ b/drivers/gpu/drm/i915/i915_debugfs_params.c
@@ -142,7 +142,7 @@ static ssize_t i915_param_charp_write(struct file *file,
kernel_param_lock(THIS_MODULE);
old = *s;
- new = strndup_user(ubuf, PAGE_SIZE);
+ new = strndup_user(ubuf, PAGE_SIZE, GFP_USER);
if (IS_ERR(new)) {
len = PTR_ERR(new);
goto out;
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 72d30d90b856..deb2c4957a6f 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -1072,7 +1072,8 @@ int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
if (!args->len)
return -EINVAL;
- name = strndup_user(u64_to_user_ptr(args->name), args->len + 1);
+ name = strndup_user(u64_to_user_ptr(args->name), args->len + 1,
+ GFP_USER);
if (IS_ERR(name))
return PTR_ERR(name);
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index f2593133e524..11627a4b4d87 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -926,7 +926,7 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
goto out;
}
- phys = strndup_user(p, 1024);
+ phys = strndup_user(p, 1024, GFP_USER);
if (IS_ERR(phys)) {
retval = PTR_ERR(phys);
goto out;
diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index 567aedc03c76..8e58921d5db4 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -464,7 +464,8 @@ do_kdgkb_ioctl(struct kbd_data *kbd, struct kbsentry __user *u_kbs,
case KDSKBSENT:
if (!perm)
return -EPERM;
- p = strndup_user(u_kbs->kb_string, sizeof(u_kbs->kb_string));
+ p = strndup_user(u_kbs->kb_string,
+ sizeof(u_kbs->kb_string), GFP_USER);
if (IS_ERR(p))
return PTR_ERR(p);
kfree(kbd->func_table[kb_func]);
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 580099afeaff..d55aae6661eb 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1547,7 +1547,8 @@ static long vfio_group_fops_unl_ioctl(struct file *filep,
{
char *buf;
- buf = strndup_user((const char __user *)arg, PAGE_SIZE);
+ buf = strndup_user((const char __user *)arg, PAGE_SIZE,
+ GFP_USER);
if (IS_ERR(buf))
return PTR_ERR(buf);
diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
index 1b0b11b55d2a..142c74aab2b0 100644
--- a/drivers/virt/fsl_hypervisor.c
+++ b/drivers/virt/fsl_hypervisor.c
@@ -346,11 +346,11 @@ static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set)
upropname = (char __user *)(uintptr_t)param.propname;
upropval = (void __user *)(uintptr_t)param.propval;
- path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN);
+ path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN, GFP_USER);
if (IS_ERR(path))
return PTR_ERR(path);
- propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN);
+ propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN, GFP_USER);
if (IS_ERR(propname)) {
ret = PTR_ERR(propname);
goto err_free_path;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3268f8dd59bb..ce37a97fbca7 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3395,7 +3395,8 @@ static int f2fs_set_volume_name(struct file *filp, unsigned long arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX);
+ vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX,
+ GFP_USER);
if (IS_ERR(vbuf))
return PTR_ERR(vbuf);
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 2fa3f241b762..c17ef9ee455c 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -125,7 +125,7 @@ SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
if (flags & ~FSOPEN_CLOEXEC)
return -EINVAL;
- fs_name = strndup_user(_fs_name, PAGE_SIZE);
+ fs_name = strndup_user(_fs_name, PAGE_SIZE, GFP_USER);
if (IS_ERR(fs_name))
return PTR_ERR(fs_name);
@@ -381,7 +381,7 @@ SYSCALL_DEFINE5(fsconfig,
}
if (_key) {
- param.key = strndup_user(_key, 256);
+ param.key = strndup_user(_key, 256, GFP_USER);
if (IS_ERR(param.key)) {
ret = PTR_ERR(param.key);
goto out_f;
@@ -394,7 +394,7 @@ SYSCALL_DEFINE5(fsconfig,
break;
case FSCONFIG_SET_STRING:
param.type = fs_value_is_string;
- param.string = strndup_user(_value, 256);
+ param.string = strndup_user(_value, 256, GFP_USER);
if (IS_ERR(param.string)) {
ret = PTR_ERR(param.string);
goto out_key;
diff --git a/fs/namespace.c b/fs/namespace.c
index 4a0f600a3328..1d9da91fbd2e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3099,7 +3099,7 @@ void *copy_mount_options(const void __user * data)
char *copy_mount_string(const void __user *data)
{
- return data ? strndup_user(data, PATH_MAX) : NULL;
+ return data ? strndup_user(data, PATH_MAX, GFP_USER) : NULL;
}
/*
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index ccc88be88d6a..fcdaeca51ca9 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -1077,18 +1077,20 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
} else
ctx->selected_flavor = RPC_AUTH_UNIX;
- c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
+ c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN,
+ GFP_USER);
if (IS_ERR(c))
return PTR_ERR(c);
ctx->nfs_server.hostname = c;
- c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
+ c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN,
+ GFP_USER);
if (IS_ERR(c))
return PTR_ERR(c);
ctx->nfs_server.export_path = c;
dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
- c = strndup_user(data->client_addr.data, 16);
+ c = strndup_user(data->client_addr.data, 16, GFP_USER);
if (IS_ERR(c))
return PTR_ERR(c);
ctx->client_address = c;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index a190212ca85d..216ab920c6b3 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -546,7 +546,7 @@ xfs_ioc_attrmulti_one(
if ((flags & XFS_IOC_ATTR_ROOT) && (flags & XFS_IOC_ATTR_SECURE))
return -EINVAL;
- name = strndup_user(uname, MAXNAMELEN);
+ name = strndup_user(uname, MAXNAMELEN, GFP_USER);
if (IS_ERR(name))
return PTR_ERR(name);
diff --git a/include/linux/string.h b/include/linux/string.h
index 9b7a0632e87a..3eb69aee484d 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -9,7 +9,7 @@
#include <stdarg.h>
#include <uapi/linux/string.h>
-extern char *strndup_user(const char __user *, long);
+extern char *strndup_user(const char __user *, long, gfp_t);
extern void *memdup_user(const void __user *, size_t);
extern void *vmemdup_user(const void __user *, size_t);
extern void *memdup_user_nul(const void __user *, size_t);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 856d98c36f56..3b0621563d7f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10072,7 +10072,7 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg)
int ret = -EINVAL;
char *filter_str;
- filter_str = strndup_user(arg, PAGE_SIZE);
+ filter_str = strndup_user(arg, PAGE_SIZE, GFP_USER);
if (IS_ERR(filter_str))
return PTR_ERR(filter_str);
diff --git a/kernel/module.c b/kernel/module.c
index aa183c9ac0a2..566c9ddb86d3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3872,7 +3872,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
flush_module_icache(mod);
/* Now copy in args */
- mod->args = strndup_user(uargs, ~0UL >> 1);
+ mod->args = strndup_user(uargs, ~0UL >> 1, GFP_USER);
if (IS_ERR(mod->args)) {
err = PTR_ERR(mod->args);
goto free_arch_cleanup;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 643e0b19920d..48569b39d1f2 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -310,7 +310,7 @@ int perf_uprobe_init(struct perf_event *p_event,
return -EINVAL;
path = strndup_user(u64_to_user_ptr(p_event->attr.uprobe_path),
- PATH_MAX);
+ PATH_MAX, GFP_USER);
if (IS_ERR(path)) {
ret = PTR_ERR(path);
return (ret == -EINVAL) ? -E2BIG : ret;
diff --git a/mm/util.c b/mm/util.c
index c63c8e47be57..14b56daa8fba 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -156,20 +156,15 @@ char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
}
EXPORT_SYMBOL(kmemdup_nul);
-/**
- * memdup_user - duplicate memory region from user space
- *
- * @src: source address in user space
- * @len: number of bytes to copy
- *
- * Return: an ERR_PTR() on failure. Result is physically
- * contiguous, to be freed by kfree().
- */
-void *memdup_user(const void __user *src, size_t len)
+static inline void *__memdup_user_flags(const void __user *src, size_t len,
+ gfp_t gfp)
{
void *p;
- p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
+ BUG_ON(gfp & __GFP_ATOMIC);
+
+ /* Don't let users spam with big allocs and use GFP_NOWARN */
+ p = kmalloc_track_caller(len, __GFP_NOWARN | gfp);
if (!p)
return ERR_PTR(-ENOMEM);
@@ -180,6 +175,20 @@ void *memdup_user(const void __user *src, size_t len)
return p;
}
+
+/**
+ * memdup_user - duplicate memory region from user space
+ *
+ * @src: source address in user space
+ * @len: number of bytes to copy
+ *
+ * Return: an ERR_PTR() on failure. Result is physically
+ * contiguous, to be freed by kfree().
+ */
+void *memdup_user(const void __user *src, size_t len)
+{
+ return __memdup_user_flags(src, len, GFP_USER);
+}
EXPORT_SYMBOL(memdup_user);
/**
@@ -212,10 +221,11 @@ EXPORT_SYMBOL(vmemdup_user);
* strndup_user - duplicate an existing string from user space
* @s: The string to duplicate
* @n: Maximum number of bytes to copy, including the trailing NUL.
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
*
* Return: newly allocated copy of @s or an ERR_PTR() in case of error
*/
-char *strndup_user(const char __user *s, long n)
+char *strndup_user(const char __user *s, long n, gfp_t gfp)
{
char *p;
long length;
@@ -228,7 +238,7 @@ char *strndup_user(const char __user *s, long n)
if (length > n)
return ERR_PTR(-EINVAL);
- p = memdup_user(s, length);
+ p = __memdup_user_flags(s, length, gfp);
if (IS_ERR(p))
return p;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b53b6d38c4df..ed12433e194a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -901,7 +901,7 @@ static ssize_t pktgen_if_write(struct file *file,
if (debug) {
size_t copy = min_t(size_t, count + 1, 1024);
- char *tp = strndup_user(user_buffer, copy);
+ char *tp = strndup_user(user_buffer, copy, GFP_USER);
if (IS_ERR(tp))
return PTR_ERR(tp);
diff --git a/security/keys/dh.c b/security/keys/dh.c
index c4c629bb1c03..fada6015b25b 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c
@@ -266,7 +266,8 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params,
}
/* get KDF name string */
- hashname = strndup_user(kdfcopy->hashname, CRYPTO_MAX_ALG_NAME);
+ hashname = strndup_user(kdfcopy->hashname,
+ CRYPTO_MAX_ALG_NAME, GFP_USER);
if (IS_ERR(hashname)) {
ret = PTR_ERR(hashname);
goto out1;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 9febd37a168f..0f74097cdcd1 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -93,7 +93,8 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type,
description = NULL;
if (_description) {
- description = strndup_user(_description, KEY_MAX_DESC_SIZE);
+ description = strndup_user(_description,
+ KEY_MAX_DESC_SIZE, GFP_USER);
if (IS_ERR(description)) {
ret = PTR_ERR(description);
goto error;
@@ -182,7 +183,8 @@ SYSCALL_DEFINE4(request_key, const char __user *, _type,
goto error;
/* pull the description into kernel space */
- description = strndup_user(_description, KEY_MAX_DESC_SIZE);
+ description = strndup_user(_description, KEY_MAX_DESC_SIZE,
+ GFP_USER);
if (IS_ERR(description)) {
ret = PTR_ERR(description);
goto error;
@@ -192,7 +194,8 @@ SYSCALL_DEFINE4(request_key, const char __user *, _type,
callout_info = NULL;
callout_len = 0;
if (_callout_info) {
- callout_info = strndup_user(_callout_info, PAGE_SIZE);
+ callout_info = strndup_user(_callout_info, PAGE_SIZE,
+ GFP_USER);
if (IS_ERR(callout_info)) {
ret = PTR_ERR(callout_info);
goto error2;
@@ -293,7 +296,7 @@ long keyctl_join_session_keyring(const char __user *_name)
/* fetch the name from userspace */
name = NULL;
if (_name) {
- name = strndup_user(_name, KEY_MAX_DESC_SIZE);
+ name = strndup_user(_name, KEY_MAX_DESC_SIZE, GFP_USER);
if (IS_ERR(name)) {
ret = PTR_ERR(name);
goto error;
@@ -728,7 +731,8 @@ long keyctl_keyring_search(key_serial_t ringid,
if (ret < 0)
goto error;
- description = strndup_user(_description, KEY_MAX_DESC_SIZE);
+ description = strndup_user(_description, KEY_MAX_DESC_SIZE,
+ GFP_USER);
if (IS_ERR(description)) {
ret = PTR_ERR(description);
goto error;
@@ -1742,7 +1746,8 @@ long keyctl_restrict_keyring(key_serial_t id, const char __user *_type,
if (ret < 0)
goto error;
- restriction = strndup_user(_restriction, PAGE_SIZE);
+ restriction = strndup_user(_restriction, PAGE_SIZE,
+ GFP_USER);
if (IS_ERR(restriction)) {
ret = PTR_ERR(restriction);
goto error;
diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c
index 931d8dfb4a7f..903792123a85 100644
--- a/security/keys/keyctl_pkey.c
+++ b/security/keys/keyctl_pkey.c
@@ -86,7 +86,7 @@ static int keyctl_pkey_params_get(key_serial_t id,
memset(params, 0, sizeof(*params));
params->encoding = "raw";
- p = strndup_user(_info, PAGE_SIZE);
+ p = strndup_user(_info, PAGE_SIZE, GFP_USER);
if (IS_ERR(p))
return PTR_ERR(p);
params->info = p;
--
2.25.1
This command attaches the zero terminated string in optval to the
socket for troubleshooting purposes. The free string is displayed in the
process fdinfo file for that fd (/proc/<pid>/fdinfo/<fd>).
One intended usage is to allow processes to self-document sockets
for netstat and friends to report
We ignore optlen and constrain the string to a static max size
Signed-off-by: Pascal Bouchareine <[email protected]>
---
include/net/sock.h | 4 +++
include/uapi/asm-generic/socket.h | 2 ++
net/core/sock.c | 53 +++++++++++++++++++++++++++++++
net/socket.c | 5 +++
4 files changed, 64 insertions(+)
diff --git a/include/net/sock.h b/include/net/sock.h
index 1183507df95b..6b4fd1383282 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -342,6 +342,7 @@ struct bpf_sk_storage;
* @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
+ * @sk_description: user supplied with SO_DESCRIPTION
*/
struct sock {
/*
@@ -519,6 +520,9 @@ struct sock {
struct bpf_sk_storage __rcu *sk_bpf_storage;
#endif
struct rcu_head sk_rcu;
+
+#define SK_MAX_DESC_SIZE 256
+ char *sk_description;
};
enum sk_pacing {
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 77f7c1638eb1..fb51c4bb7a12 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -119,6 +119,8 @@
#define SO_DETACH_REUSEPORT_BPF 68
+#define SO_DESCRIPTION 69
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/net/core/sock.c b/net/core/sock.c
index 2e5b7870e5d3..b8bad57338d8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -828,6 +828,49 @@ void sock_set_rcvbuf(struct sock *sk, int val)
}
EXPORT_SYMBOL(sock_set_rcvbuf);
+static int sock_set_description(struct sock *sk, char __user *user_desc)
+{
+ char *old, *desc;
+
+ desc = strndup_user(user_desc, SK_MAX_DESC_SIZE, GFP_KERNEL_ACCOUNT);
+ if (IS_ERR(desc))
+ return PTR_ERR(desc);
+
+ lock_sock(sk);
+ old = sk->sk_description;
+ sk->sk_description = desc;
+ release_sock(sk);
+
+ kfree(old);
+
+ return 0;
+}
+
+static int sock_get_description(struct sock *sk, char __user *optval,
+ int __user *optlen, int len)
+{
+ char desc[SK_MAX_DESC_SIZE];
+
+ lock_sock(sk);
+ if (sk->sk_description) {
+ /* strndup_user: len(desc + nul) <= SK_MAX_DESC_SIZE */
+ len = min_t(unsigned int, len,
+ strlen(sk->sk_description) + 1);
+ memcpy(desc, sk->sk_description, len);
+ } else {
+ len = 0;
+ }
+ release_sock(sk);
+
+ if (copy_to_user(optval, desc, len))
+ return -EFAULT;
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+
+ return 0;
+}
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -850,6 +893,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if (optname == SO_BINDTODEVICE)
return sock_setbindtodevice(sk, optval, optlen);
+ if (optname == SO_DESCRIPTION)
+ return sock_set_description(sk, optval);
+
if (optlen < sizeof(int))
return -EINVAL;
@@ -1614,6 +1660,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_bound_dev_if;
break;
+ case SO_DESCRIPTION:
+ return sock_get_description(sk, optval, optlen, len);
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -1792,6 +1841,8 @@ static void __sk_destruct(struct rcu_head *head)
RCU_INIT_POINTER(sk->sk_filter, NULL);
}
+ kfree(sk->sk_description);
+
sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
#ifdef CONFIG_BPF_SYSCALL
@@ -1964,6 +2015,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
if (sk_user_data_is_nocopy(newsk))
newsk->sk_user_data = NULL;
+ newsk->sk_description = NULL;
+
newsk->sk_err = 0;
newsk->sk_err_soft = 0;
newsk->sk_priority = 0;
diff --git a/net/socket.c b/net/socket.c
index 976426d03f09..4f2c1a7744b0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -134,6 +134,11 @@ static void sock_show_fdinfo(struct seq_file *m, struct file *f)
{
struct socket *sock = f->private_data;
+ lock_sock(sock->sk);
+ if (sock->sk->sk_description)
+ seq_printf(m, "desc:\t%s\n", sock->sk->sk_description);
+ release_sock(sock->sk);
+
if (sock->ops->show_fdinfo)
sock->ops->show_fdinfo(m, sock);
}
--
2.25.1
On Fri, 21 Aug 2020 20:28:26 -0700 Pascal Bouchareine <[email protected]> wrote:
> Let caller specify allocation.
> Preserve existing calls with GFP_USER.
>
> 21 files changed, 65 insertions(+), 43 deletions(-)
Why change all existing callsites so that one callsite can pass in a
different gfp_t?
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 1ca609f66fdf..3d94ba811f4b 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -326,7 +326,7 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
> */
> static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf)
> {
> - char *name = strndup_user(buf, DMA_BUF_NAME_LEN);
> + char *name = strndup_user(buf, DMA_BUF_NAME_LEN, GFP_USER);
> long ret = 0;
Wouldn't
#include <linux/gfp.h>
char *__strndup_user(const char __user *s, long n, gfp_t gfp);
static inline char *strndup_user(const char __user *s, long n)
{
return __strndup_user(s, n, GFP_USER);
}
be simpler?
Also...
why does strndup_user() use GFP_USER? Nobody will be mapping the
resulting strings into user pagetables (will they?). This was done by
Al's 6c2c97a24f096e32, which doesn't have a changelog :(
In [patch 2/2],
+ desc = strndup_user(user_desc, SK_MAX_DESC_SIZE, GFP_KERNEL_ACCOUNT);
if GFP_USER is legit then shouldn't this be GFP_USER_ACCOUNT (ie,
GFP_USER|__GFP_ACCOUNT)?
Hi Pascal,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on security/next-testing]
[also build test ERROR on linux/master]
[cannot apply to mmotm/master tip/perf/core linus/master v5.9-rc1 next-20200821]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Pascal-Bouchareine/mm-add-GFP-mask-param-to-strndup_user/20200822-122903
base: https://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git next-testing
config: alpha-randconfig-r025-20200822 (attached as .config)
compiler: alpha-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=alpha
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All errors (new ones prefixed by >>):
net/core/sock.c: In function 'sock_setsockopt':
>> net/core/sock.c:896:17: error: 'SO_DESCRIPTION' undeclared (first use in this function); did you mean 'MODULE_DESCRIPTION'?
896 | if (optname == SO_DESCRIPTION)
| ^~~~~~~~~~~~~~
| MODULE_DESCRIPTION
net/core/sock.c:896:17: note: each undeclared identifier is reported only once for each function it appears in
net/core/sock.c: In function 'sock_getsockopt':
net/core/sock.c:1663:7: error: 'SO_DESCRIPTION' undeclared (first use in this function); did you mean 'MODULE_DESCRIPTION'?
1663 | case SO_DESCRIPTION:
| ^~~~~~~~~~~~~~
| MODULE_DESCRIPTION
# https://github.com/0day-ci/linux/commit/35dcbc957b52151274a9e06b2d6c4739b5061622
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Pascal-Bouchareine/mm-add-GFP-mask-param-to-strndup_user/20200822-122903
git checkout 35dcbc957b52151274a9e06b2d6c4739b5061622
vim +896 net/core/sock.c
873
874 /*
875 * This is meant for all protocols to use and covers goings on
876 * at the socket level. Everything here is generic.
877 */
878
879 int sock_setsockopt(struct socket *sock, int level, int optname,
880 char __user *optval, unsigned int optlen)
881 {
882 struct sock_txtime sk_txtime;
883 struct sock *sk = sock->sk;
884 int val;
885 int valbool;
886 struct linger ling;
887 int ret = 0;
888
889 /*
890 * Options without arguments
891 */
892
893 if (optname == SO_BINDTODEVICE)
894 return sock_setbindtodevice(sk, optval, optlen);
895
> 896 if (optname == SO_DESCRIPTION)
897 return sock_set_description(sk, optval);
898
899 if (optlen < sizeof(int))
900 return -EINVAL;
901
902 if (get_user(val, (int __user *)optval))
903 return -EFAULT;
904
905 valbool = val ? 1 : 0;
906
907 lock_sock(sk);
908
909 switch (optname) {
910 case SO_DEBUG:
911 if (val && !capable(CAP_NET_ADMIN))
912 ret = -EACCES;
913 else
914 sock_valbool_flag(sk, SOCK_DBG, valbool);
915 break;
916 case SO_REUSEADDR:
917 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
918 break;
919 case SO_REUSEPORT:
920 sk->sk_reuseport = valbool;
921 break;
922 case SO_TYPE:
923 case SO_PROTOCOL:
924 case SO_DOMAIN:
925 case SO_ERROR:
926 ret = -ENOPROTOOPT;
927 break;
928 case SO_DONTROUTE:
929 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
930 sk_dst_reset(sk);
931 break;
932 case SO_BROADCAST:
933 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
934 break;
935 case SO_SNDBUF:
936 /* Don't error on this BSD doesn't and if you think
937 * about it this is right. Otherwise apps have to
938 * play 'guess the biggest size' games. RCVBUF/SNDBUF
939 * are treated in BSD as hints
940 */
941 val = min_t(u32, val, sysctl_wmem_max);
942 set_sndbuf:
943 /* Ensure val * 2 fits into an int, to prevent max_t()
944 * from treating it as a negative value.
945 */
946 val = min_t(int, val, INT_MAX / 2);
947 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
948 WRITE_ONCE(sk->sk_sndbuf,
949 max_t(int, val * 2, SOCK_MIN_SNDBUF));
950 /* Wake up sending tasks if we upped the value. */
951 sk->sk_write_space(sk);
952 break;
953
954 case SO_SNDBUFFORCE:
955 if (!capable(CAP_NET_ADMIN)) {
956 ret = -EPERM;
957 break;
958 }
959
960 /* No negative values (to prevent underflow, as val will be
961 * multiplied by 2).
962 */
963 if (val < 0)
964 val = 0;
965 goto set_sndbuf;
966
967 case SO_RCVBUF:
968 /* Don't error on this BSD doesn't and if you think
969 * about it this is right. Otherwise apps have to
970 * play 'guess the biggest size' games. RCVBUF/SNDBUF
971 * are treated in BSD as hints
972 */
973 __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
974 break;
975
976 case SO_RCVBUFFORCE:
977 if (!capable(CAP_NET_ADMIN)) {
978 ret = -EPERM;
979 break;
980 }
981
982 /* No negative values (to prevent underflow, as val will be
983 * multiplied by 2).
984 */
985 __sock_set_rcvbuf(sk, max(val, 0));
986 break;
987
988 case SO_KEEPALIVE:
989 if (sk->sk_prot->keepalive)
990 sk->sk_prot->keepalive(sk, valbool);
991 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
992 break;
993
994 case SO_OOBINLINE:
995 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
996 break;
997
998 case SO_NO_CHECK:
999 sk->sk_no_check_tx = valbool;
1000 break;
1001
1002 case SO_PRIORITY:
1003 if ((val >= 0 && val <= 6) ||
1004 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1005 sk->sk_priority = val;
1006 else
1007 ret = -EPERM;
1008 break;
1009
1010 case SO_LINGER:
1011 if (optlen < sizeof(ling)) {
1012 ret = -EINVAL; /* 1003.1g */
1013 break;
1014 }
1015 if (copy_from_user(&ling, optval, sizeof(ling))) {
1016 ret = -EFAULT;
1017 break;
1018 }
1019 if (!ling.l_onoff)
1020 sock_reset_flag(sk, SOCK_LINGER);
1021 else {
1022 #if (BITS_PER_LONG == 32)
1023 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
1024 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
1025 else
1026 #endif
1027 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
1028 sock_set_flag(sk, SOCK_LINGER);
1029 }
1030 break;
1031
1032 case SO_BSDCOMPAT:
1033 sock_warn_obsolete_bsdism("setsockopt");
1034 break;
1035
1036 case SO_PASSCRED:
1037 if (valbool)
1038 set_bit(SOCK_PASSCRED, &sock->flags);
1039 else
1040 clear_bit(SOCK_PASSCRED, &sock->flags);
1041 break;
1042
1043 case SO_TIMESTAMP_OLD:
1044 __sock_set_timestamps(sk, valbool, false, false);
1045 break;
1046 case SO_TIMESTAMP_NEW:
1047 __sock_set_timestamps(sk, valbool, true, false);
1048 break;
1049 case SO_TIMESTAMPNS_OLD:
1050 __sock_set_timestamps(sk, valbool, false, true);
1051 break;
1052 case SO_TIMESTAMPNS_NEW:
1053 __sock_set_timestamps(sk, valbool, true, true);
1054 break;
1055 case SO_TIMESTAMPING_NEW:
1056 sock_set_flag(sk, SOCK_TSTAMP_NEW);
1057 /* fall through */
1058 case SO_TIMESTAMPING_OLD:
1059 if (val & ~SOF_TIMESTAMPING_MASK) {
1060 ret = -EINVAL;
1061 break;
1062 }
1063
1064 if (val & SOF_TIMESTAMPING_OPT_ID &&
1065 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
1066 if (sk->sk_protocol == IPPROTO_TCP &&
1067 sk->sk_type == SOCK_STREAM) {
1068 if ((1 << sk->sk_state) &
1069 (TCPF_CLOSE | TCPF_LISTEN)) {
1070 ret = -EINVAL;
1071 break;
1072 }
1073 sk->sk_tskey = tcp_sk(sk)->snd_una;
1074 } else {
1075 sk->sk_tskey = 0;
1076 }
1077 }
1078
1079 if (val & SOF_TIMESTAMPING_OPT_STATS &&
1080 !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
1081 ret = -EINVAL;
1082 break;
1083 }
1084
1085 sk->sk_tsflags = val;
1086 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
1087 sock_enable_timestamp(sk,
1088 SOCK_TIMESTAMPING_RX_SOFTWARE);
1089 else {
1090 if (optname == SO_TIMESTAMPING_NEW)
1091 sock_reset_flag(sk, SOCK_TSTAMP_NEW);
1092
1093 sock_disable_timestamp(sk,
1094 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
1095 }
1096 break;
1097
1098 case SO_RCVLOWAT:
1099 if (val < 0)
1100 val = INT_MAX;
1101 if (sock->ops->set_rcvlowat)
1102 ret = sock->ops->set_rcvlowat(sk, val);
1103 else
1104 WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
1105 break;
1106
1107 case SO_RCVTIMEO_OLD:
1108 case SO_RCVTIMEO_NEW:
1109 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen, optname == SO_RCVTIMEO_OLD);
1110 break;
1111
1112 case SO_SNDTIMEO_OLD:
1113 case SO_SNDTIMEO_NEW:
1114 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD);
1115 break;
1116
1117 case SO_ATTACH_FILTER:
1118 ret = -EINVAL;
1119 if (optlen == sizeof(struct sock_fprog)) {
1120 struct sock_fprog fprog;
1121
1122 ret = -EFAULT;
1123 if (copy_from_user(&fprog, optval, sizeof(fprog)))
1124 break;
1125
1126 ret = sk_attach_filter(&fprog, sk);
1127 }
1128 break;
1129
1130 case SO_ATTACH_BPF:
1131 ret = -EINVAL;
1132 if (optlen == sizeof(u32)) {
1133 u32 ufd;
1134
1135 ret = -EFAULT;
1136 if (copy_from_user(&ufd, optval, sizeof(ufd)))
1137 break;
1138
1139 ret = sk_attach_bpf(ufd, sk);
1140 }
1141 break;
1142
1143 case SO_ATTACH_REUSEPORT_CBPF:
1144 ret = -EINVAL;
1145 if (optlen == sizeof(struct sock_fprog)) {
1146 struct sock_fprog fprog;
1147
1148 ret = -EFAULT;
1149 if (copy_from_user(&fprog, optval, sizeof(fprog)))
1150 break;
1151
1152 ret = sk_reuseport_attach_filter(&fprog, sk);
1153 }
1154 break;
1155
1156 case SO_ATTACH_REUSEPORT_EBPF:
1157 ret = -EINVAL;
1158 if (optlen == sizeof(u32)) {
1159 u32 ufd;
1160
1161 ret = -EFAULT;
1162 if (copy_from_user(&ufd, optval, sizeof(ufd)))
1163 break;
1164
1165 ret = sk_reuseport_attach_bpf(ufd, sk);
1166 }
1167 break;
1168
1169 case SO_DETACH_REUSEPORT_BPF:
1170 ret = reuseport_detach_prog(sk);
1171 break;
1172
1173 case SO_DETACH_FILTER:
1174 ret = sk_detach_filter(sk);
1175 break;
1176
1177 case SO_LOCK_FILTER:
1178 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
1179 ret = -EPERM;
1180 else
1181 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
1182 break;
1183
1184 case SO_PASSSEC:
1185 if (valbool)
1186 set_bit(SOCK_PASSSEC, &sock->flags);
1187 else
1188 clear_bit(SOCK_PASSSEC, &sock->flags);
1189 break;
1190 case SO_MARK:
1191 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1192 ret = -EPERM;
1193 } else if (val != sk->sk_mark) {
1194 sk->sk_mark = val;
1195 sk_dst_reset(sk);
1196 }
1197 break;
1198
1199 case SO_RXQ_OVFL:
1200 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
1201 break;
1202
1203 case SO_WIFI_STATUS:
1204 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1205 break;
1206
1207 case SO_PEEK_OFF:
1208 if (sock->ops->set_peek_off)
1209 ret = sock->ops->set_peek_off(sk, val);
1210 else
1211 ret = -EOPNOTSUPP;
1212 break;
1213
1214 case SO_NOFCS:
1215 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1216 break;
1217
1218 case SO_SELECT_ERR_QUEUE:
1219 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1220 break;
1221
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
From: Pascal Bouchareine <[email protected]>
Date: Fri, 21 Aug 2020 20:28:27 -0700
> This command attaches the zero terminated string in optval to the
> socket for troubleshooting purposes. The free string is displayed in the
> process fdinfo file for that fd (/proc/<pid>/fdinfo/<fd>).
>
> One intended usage is to allow processes to self-document sockets
> for netstat and friends to report
>
> We ignore optlen and constrain the string to a static max size
>
> Signed-off-by: Pascal Bouchareine <[email protected]>
This change is really a non-starter unless the information gets
published somewhere where people actually look at dumped sockets, and
that's inet_diag and friends.
Thanks for taking a look!
On Fri, Aug 21, 2020 at 8:51 PM Andrew Morton <[email protected]> wrote:
> Why change all existing callsites so that one callsite can pass in a
> different gfp_t?
My initial thought was to change strndup_user to use
GFP_KERNEL_ACCOUNT (or GFP_USER | __GFP_ACCOUNT ?) unconditionally.
(Would that work? that would be a simpler change for sure)
In the case it was not wanted, I assumed a good proportion of callers
might do the same on a case-by-case basis (esp. with regards to
enabling accounting).
> Also...
>
> why does strndup_user() use GFP_USER? Nobody will be mapping the
> resulting strings into user pagetables (will they?). This was done by
> Al's 6c2c97a24f096e32, which doesn't have a changelog :(
FWIW, I believe related to this: https://lkml.org/lkml/2018/1/6/333
It's a bit over my head (is GFP_USER cheaper?) if strndup_user needs
to follow memdup_user
> In [patch 2/2],
>
> + desc = strndup_user(user_desc, SK_MAX_DESC_SIZE, GFP_KERNEL_ACCOUNT);
>
> if GFP_USER is legit then shouldn't this be GFP_USER_ACCOUNT (ie,
> GFP_USER|__GFP_ACCOUNT)?
Yes! I'll see clearer if I manage to wrap my head around what
strndup_user should do
Thanks!
Thank you,
On Sat, Aug 22, 2020 at 12:36 PM David Miller <[email protected]> wrote:
> > We ignore optlen and constrain the string to a static max size
> >
> > Signed-off-by: Pascal Bouchareine <[email protected]>
>
> This change is really a non-starter unless the information gets
> published somewhere where people actually look at dumped sockets, and
> that's inet_diag and friends.
Would it make sense to also make UDIAG_SHOW_NAME use sk_description?
(And keep the existing change - setsockopt + show_fd_info via
/proc/.../fdinfo/..)
I would feel like adding a pid information (and what else am I missing
here) to inet_diag might also be a good improvement then?
I understand that users have to scan /proc to find the FDs, matching
the inode number for the socket to find the owning process today.
If that's of interest I can explore that too
On Sat, Aug 22, 2020 at 12:59 PM Pascal Bouchareine <[email protected]> wrote:
> Would it make sense to also make UDIAG_SHOW_NAME use sk_description?
> (And keep the existing change - setsockopt + show_fd_info via
> /proc/.../fdinfo/..)
Ah,very wrong example - to be more precise, I suppose that'd be adding
a couple idiag_ext for sk_description and pid if possible instead
On Sat, Aug 22, 2020 at 1:19 PM Pascal Bouchareine <[email protected]> wrote:
>
> On Sat, Aug 22, 2020 at 12:59 PM Pascal Bouchareine <[email protected]> wrote:
>
> > Would it make sense to also make UDIAG_SHOW_NAME use sk_description?
> > (And keep the existing change - setsockopt + show_fd_info via
> > /proc/.../fdinfo/..)
>
>
> Ah,very wrong example - to be more precise, I suppose that'd be adding
> a couple idiag_ext for sk_description and pid if possible instead
About the pid part -
On top of multiple pids to scan for a given socket, there's also the
security provided by /proc - I'm not sure what inet_diag does for that
So maybe users calling it will need to scan /proc for a long time anyway...
Or is that doable?
From: Pascal Bouchareine <[email protected]>
Date: Sat, 22 Aug 2020 13:53:03 -0700
> On Sat, Aug 22, 2020 at 1:19 PM Pascal Bouchareine <[email protected]> wrote:
>>
>> On Sat, Aug 22, 2020 at 12:59 PM Pascal Bouchareine <[email protected]> wrote:
>>
>> > Would it make sense to also make UDIAG_SHOW_NAME use sk_description?
>> > (And keep the existing change - setsockopt + show_fd_info via
>> > /proc/.../fdinfo/..)
>>
>>
>> Ah,very wrong example - to be more precise, I suppose that'd be adding
>> a couple idiag_ext for sk_description and pid if possible instead
>
> About the pid part -
> On top of multiple pids to scan for a given socket, there's also the
> security provided by /proc - I'm not sure what inet_diag does for that
> So maybe users calling it will need to scan /proc for a long time anyway...
>
> Or is that doable?
I'd like to kindly ask that you do more research into how this kind of
information is advertised to the user using modern interfaces, and what
kinds of permissions and checks are done for those.
You are proposing a new UAPI for the Linux kernel, and with that comes
some level of responsibility.
Thank you.
On Sat, Aug 22, 2020 at 2:01 PM David Miller <[email protected]> wrote:
> > About the pid part -
> > On top of multiple pids to scan for a given socket, there's also the
> > security provided by /proc - I'm not sure what inet_diag does for that
> > So maybe users calling it will need to scan /proc for a long time anyway...
> >
> > Or is that doable?
>
> I'd like to kindly ask that you do more research into how this kind of
> information is advertised to the user using modern interfaces, and what
> kinds of permissions and checks are done for those.
If we wanted to get rid of having to scan /proc from userland when
using sock_diag to identify associated processes,
I suppose scanning for pids would be the most annoying part?
I understand sock_diag uses CAP_NET_ADMIN for some sensitive bits.
I thought it would require an additional bit of logic to let an
unprivileged user access its own socket "sensitive" data.
Your message makes me think I need to read a lot more about it, so
I'll try that - but more importantly
as you mention APIs and modern interfaces, I think eBPF is going to be
of great help to try and hack
around this data without disturbing existing APIs.
Thanks for taking the time to look into it