Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751670AbdISVrh (ORCPT ); Tue, 19 Sep 2017 17:47:37 -0400 Received: from mx1.redhat.com ([209.132.183.28]:51472 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751594AbdISVrg (ORCPT ); Tue, 19 Sep 2017 17:47:36 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 5C22BC0587C9 Authentication-Results: ext-mx08.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx08.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=joe.lawrence@redhat.com Subject: Re: [PATCH RFC 2/3] pipe: protect pipe_max_size access with a mutex To: Mikulas Patocka Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Alexander Viro , "Luis R. Rodriguez" , Kees Cook , Michael Kerrisk References: <1504622676-2992-1-git-send-email-joe.lawrence@redhat.com> <1504622676-2992-3-git-send-email-joe.lawrence@redhat.com> From: Joe Lawrence Organization: Red Hat Message-ID: <8b2369ef-a537-974e-725b-f55d49a646b6@redhat.com> Date: Tue, 19 Sep 2017 17:47:34 -0400 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.2.0 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.32]); Tue, 19 Sep 2017 21:47:36 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9404 Lines: 300 On 09/14/2017 07:09 PM, Mikulas Patocka wrote: > On Tue, 5 Sep 2017, Joe Lawrence wrote: > >> pipe_max_size is assigned directly via procfs sysctl: >> >> static struct ctl_table fs_table[] = { >> ... >> { >> .procname = "pipe-max-size", >> .data = &pipe_max_size, >> .maxlen = sizeof(int), >> .mode = 0644, >> .proc_handler = &pipe_proc_fn, >> .extra1 = &pipe_min_size, >> }, >> ... >> >> int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, >> size_t *lenp, loff_t *ppos) >> { >> ... >> ret = proc_dointvec_minmax(table, write, buf, lenp, ppos) >> ... >> >> and then later rounded in-place a few statements later: >> >> ... >> pipe_max_size = round_pipe_size(pipe_max_size); >> ... >> >> This leaves a window of time between initial assignment and rounding >> that may be visible to other threads. (For example, one thread sets a >> non-rounded value to pipe_max_size while another reads its value.) >> >> Similar reads of pipe_max_size are potentially racey: >> >> pipe.c :: alloc_pipe_info() >> pipe.c :: pipe_set_size() >> >> Protect them and the procfs sysctl assignment with a mutex. >> >> Reported-by: Mikulas Patocka >> Signed-off-by: Joe Lawrence >> --- >> fs/pipe.c | 28 ++++++++++++++++++++++++---- >> 1 file changed, 24 insertions(+), 4 deletions(-) >> >> diff --git a/fs/pipe.c b/fs/pipe.c >> index fa28910b3c59..33bb11b0d78e 100644 >> --- a/fs/pipe.c >> +++ b/fs/pipe.c >> @@ -35,6 +35,11 @@ >> unsigned int pipe_max_size = 1048576; >> >> /* >> + * Provide mutual exclusion around access to pipe_max_size >> + */ >> +static DEFINE_MUTEX(pipe_max_mutex); >> + >> +/* >> * Minimum pipe size, as required by POSIX >> */ >> unsigned int pipe_min_size = PAGE_SIZE; >> @@ -623,13 +628,18 @@ struct pipe_inode_info *alloc_pipe_info(void) >> unsigned long pipe_bufs = PIPE_DEF_BUFFERS; >> struct user_struct *user = get_current_user(); >> unsigned long user_bufs; >> + unsigned int max_size; >> >> pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT); >> if (pipe == NULL) >> goto out_free_uid; >> >> - if (pipe_bufs * PAGE_SIZE > pipe_max_size && !capable(CAP_SYS_RESOURCE)) >> - pipe_bufs = pipe_max_size >> PAGE_SHIFT; >> + mutex_lock(&pipe_max_mutex); >> + max_size = pipe_max_size; >> + mutex_unlock(&pipe_max_mutex); >> + >> + if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE)) >> + pipe_bufs = max_size >> PAGE_SHIFT; >> >> user_bufs = account_pipe_buffers(user, 0, pipe_bufs); >> >> @@ -1039,6 +1049,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) >> struct pipe_buffer *bufs; >> unsigned int size, nr_pages; >> unsigned long user_bufs; >> + unsigned int max_size; >> long ret = 0; >> >> size = round_pipe_size(arg); >> @@ -1056,8 +1067,11 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) >> * Decreasing the pipe capacity is always permitted, even >> * if the user is currently over a limit. >> */ >> + mutex_lock(&pipe_max_mutex); >> + max_size = pipe_max_size; >> + mutex_unlock(&pipe_max_mutex); >> if (nr_pages > pipe->buffers && >> - size > pipe_max_size && !capable(CAP_SYS_RESOURCE)) >> + size > max_size && !capable(CAP_SYS_RESOURCE)) >> return -EPERM; >> >> user_bufs = account_pipe_buffers(pipe->user, pipe->buffers, nr_pages); >> @@ -1131,18 +1145,24 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, >> unsigned int rounded_pipe_max_size; >> int ret; >> >> + mutex_lock(&pipe_max_mutex); >> orig_pipe_max_size = pipe_max_size; >> ret = proc_dointvec_minmax(table, write, buf, lenp, ppos); >> - if (ret < 0 || !write) >> + if (ret < 0 || !write) { >> + mutex_unlock(&pipe_max_mutex); >> return ret; >> + } >> >> rounded_pipe_max_size = round_pipe_size(pipe_max_size); >> if (rounded_pipe_max_size == 0) { >> pipe_max_size = orig_pipe_max_size; >> + mutex_unlock(&pipe_max_mutex); >> return -EINVAL; >> } >> >> pipe_max_size = rounded_pipe_max_size; >> + mutex_unlock(&pipe_max_mutex); >> + >> return ret; >> } >> >> -- >> 1.8.3.1 >> > I think this mutex is too heavy - if multiple processes simultaneously > create a pipe, the mutex would cause performance degradation. > > You can call do_proc_dointvec with a custom callback "conv" function > that does the rounding of the pipe size value. Okay, that would consolidate the fiddling with pipe_max_size into a single place / assignment. Implementation wise, are you suggesting something like the following (work in progress). round_pipe_size() would still be used by fs/pipe.c::pipe_set_size() so it would need to be visible to both source files. The proc_do... function would be pipe-max-size specific as it's basically: - proc_douintvec_minmax, but - without the "max" check - with an added PAGE_SIZE floor - PAGE_SIZE increment and exported for pipe.c to call. Plumbing in the opposite direction (export do_proc_douintvec() and stick the new conv implementation in fs/pipe.c) might be a little easier, but seems like the same ick-factor in the end. Regards, -- Joe -->8-- -->8-- -->8-- -->8-- diff --git a/fs/pipe.c b/fs/pipe.c index 8cbc97d97753..4db3cd2d139c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1019,7 +1019,7 @@ static int fifo_open(struct inode *inode, struct file *filp) * Currently we rely on the pipe array holding a power-of-2 number * of pages. Returns 0 on error. */ -static inline unsigned int round_pipe_size(unsigned int size) +unsigned int round_pipe_size(unsigned int size) { unsigned long nr_pages; @@ -1130,19 +1130,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, size_t *lenp, loff_t *ppos) { - unsigned int rounded_pipe_max_size; - int ret; - - ret = proc_douintvec_minmax(table, write, buf, lenp, ppos); - if (ret < 0 || !write) - return ret; - - rounded_pipe_max_size = round_pipe_size(pipe_max_size); - if (rounded_pipe_max_size == 0) - return -EINVAL; - - pipe_max_size = rounded_pipe_max_size; - return ret; + return proc_dopipe_max_size(table, write, buf, lenp, ppos); } /* diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index e7497c9dde7f..485cf7a7aa8f 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -190,5 +190,6 @@ static inline int pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_inode_info *get_pipe_info(struct file *file); int create_pipe_files(struct file **, int); +unsigned int round_pipe_size(unsigned int size); #endif diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 1d4dba490fb6..ba24ca72800c 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -50,6 +50,9 @@ extern int proc_dointvec_minmax(struct ctl_table *, int, extern int proc_douintvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +extern int proc_dopipe_max_size(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); extern int proc_dointvec_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c976719bf37a..7a2913c5546e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include @@ -2631,6 +2632,47 @@ int proc_douintvec_minmax(struct ctl_table *table, int write, do_proc_douintvec_minmax_conv, ¶m); } +struct do_proc_dopipe_max_size_conv_param { + unsigned int *min; +}; + +static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, + unsigned int *valp, + int write, void *data) +{ + struct do_proc_dopipe_max_size_conv_param *param = data; + + if (write) { + unsigned int val = round_pipe_size(*lvalp); + + if (val == 0) + return -EINVAL; + + if (param->min && *param->min > val) + return -ERANGE; + + if (*lvalp > UINT_MAX) + return -EINVAL; + + *valp = val; + } else { + unsigned int val = *valp; + *lvalp = (unsigned long) val; + } + + return 0; +} + +int proc_dopipe_max_size(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct do_proc_dopipe_max_size_conv_param param = { + .min = (unsigned int *) table->extra1, + }; + return do_proc_douintvec(table, write, buffer, lenp, ppos, + do_proc_dopipe_max_size_conv, ¶m); +} + static void validate_coredump_safety(void) { #ifdef CONFIG_COREDUMP @@ -3179,6 +3221,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, EXPORT_SYMBOL(proc_dointvec_jiffies); EXPORT_SYMBOL(proc_dointvec_minmax); EXPORT_SYMBOL_GPL(proc_douintvec_minmax); +EXPORT_SYMBOL_GPL(proc_dopipe_max_size); EXPORT_SYMBOL(proc_dointvec_userhz_jiffies); EXPORT_SYMBOL(proc_dointvec_ms_jiffies); EXPORT_SYMBOL(proc_dostring); -- 1.8.3.1