This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports which
allows users to reserve ports for third-party applications.
The reserved ports will not be used by automatic port assignments
(e.g. when calling connect() or bind() with port number 0). Explicit
port allocation behavior is unchanged.
Changes from the previous version:
- switch the /proc entry format to coma separated list of range ports
- treat -EFAULT just like any other error and acknowledge written values
- use isdigit() in proc_get_ulong
Octavian Purdila (3):
sysctl: refactor integer handling proc code
sysctl: add proc_do_large_bitmap
net: reserve ports for applications using fixed port numbers
Documentation/networking/ip-sysctl.txt | 14 +
drivers/infiniband/core/cma.c | 7 +-
include/linux/sysctl.h | 2 +
include/net/ip.h | 6 +
kernel/sysctl.c | 449 +++++++++++++++++++++-----------
net/ipv4/af_inet.c | 8 +-
net/ipv4/inet_connection_sock.c | 6 +
net/ipv4/inet_hashtables.c | 2 +
net/ipv4/sysctl_net_ipv4.c | 17 ++
net/ipv4/udp.c | 3 +-
net/sctp/socket.c | 2 +
11 files changed, 361 insertions(+), 155 deletions(-)
This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports which
allows users to reserve ports for third-party applications.
The reserved ports will not be used by automatic port assignments
(e.g. when calling connect() or bind() with port number 0). Explicit
port allocation behavior is unchanged.
Signed-off-by: Octavian Purdila <[email protected]>
Signed-off-by: WANG Cong <[email protected]>
Cc: Neil Horman <[email protected]>
Cc: Eric Dumazet <[email protected]>
Cc: Eric W. Biederman <[email protected]>
---
Documentation/networking/ip-sysctl.txt | 14 ++++++++++++++
drivers/infiniband/core/cma.c | 7 ++++++-
include/net/ip.h | 6 ++++++
net/ipv4/af_inet.c | 8 +++++++-
net/ipv4/inet_connection_sock.c | 6 ++++++
net/ipv4/inet_hashtables.c | 2 ++
net/ipv4/sysctl_net_ipv4.c | 17 +++++++++++++++++
net/ipv4/udp.c | 3 ++-
net/sctp/socket.c | 2 ++
9 files changed, 62 insertions(+), 3 deletions(-)
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 2dc7a1d..6534ee7 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -564,6 +564,20 @@ ip_local_port_range - 2 INTEGERS
(i.e. by default) range 1024-4999 is enough to issue up to
2000 connections per second to systems supporting timestamps.
+ip_local_reserved_ports - list of comma separated ranges
+ Specify the ports which are reserved for known third-party
+ applications. These ports will not be used by automatic port
+ assignments (e.g. when calling connect() or bind() with port
+ number 0). Explicit port allocation behavior is unchanged.
+
+ The format used for both input and output is a comma separated
+ list of ranges (e.g. "1,2-4,10-10" for ports 1, 2, 3, 4 and
+ 10). Writing to the file will clear all previously reserved
+ ports and update the current list with the one given in the
+ input.
+
+ Default: Empty
+
ip_nonlocal_bind - BOOLEAN
If set, allows processes to bind() to non-local IP addresses,
which can be quite useful - but may break some applications.
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 875e34e..06c9fa5 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1979,6 +1979,8 @@ retry:
/* FIXME: add proper port randomization per like inet_csk_get_port */
do {
ret = idr_get_new_above(ps, bind_list, next_port, &port);
+ if (inet_is_reserved_local_port(port))
+ ret = -EAGAIN;
} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
if (ret)
@@ -2995,10 +2997,13 @@ static int __init cma_init(void)
{
int ret, low, high, remaining;
- get_random_bytes(&next_port, sizeof next_port);
inet_get_local_port_range(&low, &high);
+again:
+ get_random_bytes(&next_port, sizeof next_port);
remaining = (high - low) + 1;
next_port = ((unsigned int) next_port % remaining) + low;
+ if (inet_is_reserved_local_port(next_port))
+ goto again;
cma_wq = create_singlethread_workqueue("rdma_cm");
if (!cma_wq)
diff --git a/include/net/ip.h b/include/net/ip.h
index 503994a..3da9004 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -184,6 +184,12 @@ extern struct local_ports {
} sysctl_local_ports;
extern void inet_get_local_port_range(int *low, int *high);
+extern unsigned long *sysctl_local_reserved_ports;
+static inline int inet_is_reserved_local_port(int port)
+{
+ return test_bit(port, sysctl_local_reserved_ports);
+}
+
extern int sysctl_ip_default_ttl;
extern int sysctl_ip_nonlocal_bind;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 33b7dff..e283fbe 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1546,9 +1546,13 @@ static int __init inet_init(void)
BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
+ sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
+ if (!sysctl_local_reserved_ports)
+ goto out;
+
rc = proto_register(&tcp_prot, 1);
if (rc)
- goto out;
+ goto out_free_reserved_ports;
rc = proto_register(&udp_prot, 1);
if (rc)
@@ -1647,6 +1651,8 @@ out_unregister_udp_proto:
proto_unregister(&udp_prot);
out_unregister_tcp_proto:
proto_unregister(&tcp_prot);
+out_free_reserved_ports:
+ kfree(sysctl_local_reserved_ports);
goto out;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8da6429..1acb462 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,6 +37,9 @@ struct local_ports sysctl_local_ports __read_mostly = {
.range = { 32768, 61000 },
};
+unsigned long *sysctl_local_reserved_ports;
+EXPORT_SYMBOL(sysctl_local_reserved_ports);
+
void inet_get_local_port_range(int *low, int *high)
{
unsigned seq;
@@ -108,6 +111,8 @@ again:
smallest_size = -1;
do {
+ if (inet_is_reserved_local_port(rover))
+ goto next_nolock;
head = &hashinfo->bhash[inet_bhashfn(net, rover,
hashinfo->bhash_size)];
spin_lock(&head->lock);
@@ -130,6 +135,7 @@ again:
break;
next:
spin_unlock(&head->lock);
+ next_nolock:
if (++rover > high)
rover = low;
} while (--remaining > 0);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2b79377..d3e160a 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -456,6 +456,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
local_bh_disable();
for (i = 1; i <= remaining; i++) {
port = low + (i + offset) % remaining;
+ if (inet_is_reserved_local_port(port))
+ continue;
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock(&head->lock);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e3712c..072e193 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -298,6 +298,13 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = ipv4_local_port_range,
},
+ {
+ .procname = "ip_local_reserved_ports",
+ .data = NULL, /* initialized in sysctl_ipv4_init */
+ .maxlen = 65536,
+ .mode = 0644,
+ .proc_handler = proc_do_large_bitmap,
+ },
#ifdef CONFIG_IP_MULTICAST
{
.procname = "igmp_max_memberships",
@@ -721,6 +728,16 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
static __init int sysctl_ipv4_init(void)
{
struct ctl_table_header *hdr;
+ struct ctl_table *i;
+
+ for (i = ipv4_table; i->procname; i++) {
+ if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
+ i->data = sysctl_local_reserved_ports;
+ break;
+ }
+ }
+ if (!i->procname)
+ return -EINVAL;
hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
if (hdr == NULL)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 608a544..bfd0a6a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -232,7 +232,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
*/
do {
if (low <= snum && snum <= high &&
- !test_bit(snum >> udptable->log, bitmap))
+ !test_bit(snum >> udptable->log, bitmap) &&
+ !inet_is_reserved_local_port(snum))
goto found;
snum += rand;
} while (snum != first);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f6d1e59..1f839d0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5432,6 +5432,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
rover++;
if ((rover < low) || (rover > high))
rover = low;
+ if (inet_is_reserved_local_port(rover))
+ continue;
index = sctp_phashfn(rover);
head = &sctp_port_hashtable[index];
sctp_spin_lock(&head->lock);
--
1.5.6.5
As we are about to add another integer handling proc function a little
bit of cleanup is in order: add a few helper functions to improve code
readability and decrease code duplication.
In the process a bug is also fixed: if the user specifies a number
with more then 20 digits it will be interpreted as two integers
(e.g. 10000...13 will be interpreted as 100.... and 13).
Behavior for EFAULT handling was changed as well. Previous to this
patch, when an EFAULT error occurred in the middle of a write
operation, although some of the elements were set, that was not
acknowledged to the user (by shorting the write and returning the
number of bytes accepted). EFAULT is not treated just like any other
errors by acknowledging the amount of bytes accepted.
Signed-off-by: Octavian Purdila <[email protected]>
Cc: WANG Cong <[email protected]>
Cc: Eric W. Biederman <[email protected]>
---
kernel/sysctl.c | 337 ++++++++++++++++++++++++++++++-------------------------
1 files changed, 183 insertions(+), 154 deletions(-)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a68b24..5259727 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2039,8 +2039,131 @@ int proc_dostring(struct ctl_table *table, int write,
buffer, lenp, ppos);
}
+static int proc_skip_wspace(char __user **buf, size_t *size)
+{
+ char c;
+
+ while (*size) {
+ if (get_user(c, *buf))
+ return -EFAULT;
+ if (!isspace(c))
+ break;
+ (*size)--; (*buf)++;
+ }
+
+ return 0;
+}
+
+#define TMPBUFLEN 22
+/**
+ * proc_get_ulong - reads an ASCII formated integer from a user buffer
+ *
+ * @buf - user buffer
+ * @size - size of the user buffer
+ * @perm_trailers - a NULL terminated string with trailers that are
+ * allowed to terminate this number (besides the standard whitespace ones)
+ * @val - this is where the number will be stored
+ * @neg - set to %TRUE if number is negative
+ * @tr - pointer to store the trailer character.
+ *
+ * In case of success 0 is returned and buf and size are updated with
+ * the amount of bytes read. If tr is non NULL and a trailing
+ * character exist (size is non zero after returning from this
+ * function) tr is updated with the trailing character.
+ */
+static int proc_get_ulong(char __user **buf, size_t *size, const char *perm_tr,
+ unsigned long *val, bool *neg, char *tr)
+{
+ int len;
+ char *p, tmp[TMPBUFLEN];
+ int err;
+
+ err = proc_skip_wspace(buf, size);
+ if (err)
+ return err;
+ if (!*size)
+ return -EINVAL;
+
+ len = *size;
+ if (len > TMPBUFLEN-1)
+ len = TMPBUFLEN-1;
+
+ if (copy_from_user(tmp, *buf, len))
+ return -EFAULT;
+
+ tmp[len] = 0;
+ p = tmp;
+ if (*p == '-' && *size > 1) {
+ *neg = 1;
+ p++;
+ } else
+ *neg = 0;
+ if (!isdigit(*p))
+ return -EINVAL;
+
+ *val = simple_strtoul(p, &p, 0);
+
+ len = p - tmp;
+ if (((len < *size) && *p && !isspace(*p) &&
+ perm_tr && !strchr(perm_tr, *p)) ||
+ /* We don't know if the next char is whitespace thus we may accept
+ * invalid integers (e.g. 1234...a) or two integers instead of one
+ * (e.g. 123...1). So lets not allow such large numbers. */
+ len == TMPBUFLEN - 1)
+ return -EINVAL;
-static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
+ if (tr && (len < *size))
+ *tr = *p;
+
+ *buf += len; *size -= len;
+
+ return 0;
+}
+
+/**
+ * proc_put_ulong - coverts an integer to a decimal ASCII formated string
+ *
+ * @buf - the user buffer
+ * @size - the size of the user buffer
+ * @val - the integer to be converted
+ * @neg - sign of the number, %TRUE for negative
+ * @first - if %FALSE will insert a separator character before the number
+ * @separator - the separator character
+ *
+ * In case of success 0 is returned and buf and size are updated with
+ * the amount of bytes read.
+ */
+static int proc_put_ulong(char __user **buf, size_t *size, unsigned long val,
+ bool neg, bool first, char separator)
+{
+ int len;
+ char tmp[TMPBUFLEN], *p = tmp;
+
+ if (!first)
+ *p++ = separator;
+ sprintf(p, "%s%lu", neg ? "-" : "", val);
+ len = strlen(tmp);
+ if (len > *size)
+ len = *size;
+ if (copy_to_user(*buf, tmp, len))
+ return -EFAULT;
+ *size -= len;
+ *buf += len;
+ return 0;
+}
+#undef TMPBUFLEN
+
+static int proc_put_char(char __user **buf, size_t *size, char c)
+{
+ if (*size) {
+ if (put_user(c, *buf))
+ return -EFAULT;
+ (*size)--, (*buf)++;
+ }
+ return 0;
+}
+
+static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
int *valp,
int write, void *data)
{
@@ -2049,7 +2172,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
} else {
int val = *valp;
if (val < 0) {
- *negp = -1;
+ *negp = 1;
*lvalp = (unsigned long)-val;
} else {
*negp = 0;
@@ -2060,19 +2183,15 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
}
static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
- int write, void __user *buffer,
+ int write, void __user *_buffer,
size_t *lenp, loff_t *ppos,
- int (*conv)(int *negp, unsigned long *lvalp, int *valp,
+ int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
int write, void *data),
void *data)
{
-#define TMPBUFLEN 21
- int *i, vleft, first = 1, neg;
- unsigned long lval;
- size_t left, len;
-
- char buf[TMPBUFLEN], *p;
- char __user *s = buffer;
+ int *i, vleft, first = 1, err = 0;
+ size_t left;
+ char __user *buffer = (char __user *) _buffer;
if (!tbl_data || !table->maxlen || !*lenp ||
(*ppos && !write)) {
@@ -2088,88 +2207,44 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
conv = do_proc_dointvec_conv;
for (; left && vleft--; i++, first=0) {
- if (write) {
- while (left) {
- char c;
- if (get_user(c, s))
- return -EFAULT;
- if (!isspace(c))
- break;
- left--;
- s++;
- }
- if (!left)
- break;
- neg = 0;
- len = left;
- if (len > sizeof(buf) - 1)
- len = sizeof(buf) - 1;
- if (copy_from_user(buf, s, len))
- return -EFAULT;
- buf[len] = 0;
- p = buf;
- if (*p == '-' && left > 1) {
- neg = 1;
- p++;
- }
- if (*p < '0' || *p > '9')
- break;
-
- lval = simple_strtoul(p, &p, 0);
+ unsigned long lval;
+ bool neg;
- len = p-buf;
- if ((len < left) && *p && !isspace(*p))
+ if (write) {
+ err = proc_get_ulong(&buffer, &left, NULL, &lval, &neg,
+ NULL);
+ if (err)
break;
- s += len;
- left -= len;
-
- if (conv(&neg, &lval, i, 1, data))
+ if (conv(&neg, &lval, i, 1, data)) {
+ err = -EINVAL;
break;
+ }
} else {
- p = buf;
- if (!first)
- *p++ = '\t';
-
- if (conv(&neg, &lval, i, 0, data))
+ if (conv(&neg, &lval, i, 0, data)) {
+ err = -EINVAL;
break;
-
- sprintf(p, "%s%lu", neg ? "-" : "", lval);
- len = strlen(buf);
- if (len > left)
- len = left;
- if(copy_to_user(s, buf, len))
- return -EFAULT;
- left -= len;
- s += len;
- }
- }
-
- if (!write && !first && left) {
- if(put_user('\n', s))
- return -EFAULT;
- left--, s++;
- }
- if (write) {
- while (left) {
- char c;
- if (get_user(c, s++))
- return -EFAULT;
- if (!isspace(c))
+ }
+ err = proc_put_ulong(&buffer, &left, lval, neg, first,
+ '\t');
+ if (err)
break;
- left--;
}
}
+
+ if (!write && !first && left && !err)
+ err = proc_put_char(&buffer, &left, '\n');
+ if (write && !err)
+ err = proc_skip_wspace(&buffer, &left);
if (write && first)
- return -EINVAL;
+ return err ? : -EINVAL;
*lenp -= left;
*ppos += *lenp;
return 0;
-#undef TMPBUFLEN
}
static int do_proc_dointvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos,
- int (*conv)(int *negp, unsigned long *lvalp, int *valp,
+ int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
int write, void *data),
void *data)
{
@@ -2237,8 +2312,8 @@ struct do_proc_dointvec_minmax_conv_param {
int *max;
};
-static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
- int *valp,
+static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
+ int *valp,
int write, void *data)
{
struct do_proc_dointvec_minmax_conv_param *param = data;
@@ -2251,7 +2326,7 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
} else {
int val = *valp;
if (val < 0) {
- *negp = -1;
+ *negp = 1;
*lvalp = (unsigned long)-val;
} else {
*negp = 0;
@@ -2289,17 +2364,15 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
}
static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
- void __user *buffer,
+ void __user *_buffer,
size_t *lenp, loff_t *ppos,
unsigned long convmul,
unsigned long convdiv)
{
-#define TMPBUFLEN 21
- unsigned long *i, *min, *max, val;
- int vleft, first=1, neg;
- size_t len, left;
- char buf[TMPBUFLEN], *p;
- char __user *s = buffer;
+ unsigned long *i, *min, *max;
+ int vleft, first = 1, err = 0;
+ size_t left;
+ char __user *buffer = (char __user *) _buffer;
if (!data || !table->maxlen || !*lenp ||
(*ppos && !write)) {
@@ -2314,82 +2387,38 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
left = *lenp;
for (; left && vleft--; i++, min++, max++, first=0) {
+ unsigned long val;
+
if (write) {
- while (left) {
- char c;
- if (get_user(c, s))
- return -EFAULT;
- if (!isspace(c))
- break;
- left--;
- s++;
- }
- if (!left)
- break;
- neg = 0;
- len = left;
- if (len > TMPBUFLEN-1)
- len = TMPBUFLEN-1;
- if (copy_from_user(buf, s, len))
- return -EFAULT;
- buf[len] = 0;
- p = buf;
- if (*p == '-' && left > 1) {
- neg = 1;
- p++;
- }
- if (*p < '0' || *p > '9')
- break;
- val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
- len = p-buf;
- if ((len < left) && *p && !isspace(*p))
+ bool neg;
+
+ err = proc_get_ulong(&buffer, &left, NULL,
+ &val, &neg, NULL);
+ if (err)
break;
if (neg)
- val = -val;
- s += len;
- left -= len;
-
- if(neg)
continue;
if ((min && val < *min) || (max && val > *max))
continue;
*i = val;
} else {
- p = buf;
- if (!first)
- *p++ = '\t';
- sprintf(p, "%lu", convdiv * (*i) / convmul);
- len = strlen(buf);
- if (len > left)
- len = left;
- if(copy_to_user(s, buf, len))
- return -EFAULT;
- left -= len;
- s += len;
- }
- }
-
- if (!write && !first && left) {
- if(put_user('\n', s))
- return -EFAULT;
- left--, s++;
- }
- if (write) {
- while (left) {
- char c;
- if (get_user(c, s++))
- return -EFAULT;
- if (!isspace(c))
+ val = convdiv * (*i) / convmul;
+ err = proc_put_ulong(&buffer, &left, val, 0, first,
+ '\t');
+ if (err)
break;
- left--;
}
}
+
+ if (!write && !first && left && !err)
+ err = proc_put_char(&buffer, &left, '\n');
+ if (write && !err)
+ err = proc_skip_wspace(&buffer, &left);
if (write && first)
- return -EINVAL;
+ return err ? : -EINVAL;
*lenp -= left;
*ppos += *lenp;
return 0;
-#undef TMPBUFLEN
}
static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
@@ -2450,7 +2479,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
}
-static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
+static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
int *valp,
int write, void *data)
{
@@ -2462,7 +2491,7 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
int val = *valp;
unsigned long lval;
if (val < 0) {
- *negp = -1;
+ *negp = 1;
lval = (unsigned long)-val;
} else {
*negp = 0;
@@ -2473,7 +2502,7 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
return 0;
}
-static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
+static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
int *valp,
int write, void *data)
{
@@ -2485,7 +2514,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
int val = *valp;
unsigned long lval;
if (val < 0) {
- *negp = -1;
+ *negp = 1;
lval = (unsigned long)-val;
} else {
*negp = 0;
@@ -2496,7 +2525,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
return 0;
}
-static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
+static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
int *valp,
int write, void *data)
{
@@ -2506,7 +2535,7 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
int val = *valp;
unsigned long lval;
if (val < 0) {
- *negp = -1;
+ *negp = 1;
lval = (unsigned long)-val;
} else {
*negp = 0;
--
1.5.6.5
Octavian Purdila wrote:
> This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports which
> allows users to reserve ports for third-party applications.
>
> The reserved ports will not be used by automatic port assignments
> (e.g. when calling connect() or bind() with port number 0). Explicit
> port allocation behavior is unchanged.
>
> Changes from the previous version:
> - switch the /proc entry format to coma separated list of range ports
> - treat -EFAULT just like any other error and acknowledge written values
> - use isdigit() in proc_get_ulong
>
> Octavian Purdila (3):
> sysctl: refactor integer handling proc code
> sysctl: add proc_do_large_bitmap
> net: reserve ports for applications using fixed port numbers
Hi,
This version looks fine for me, but I need to give them a test, and
I will put feedbacks asap. Thanks for your work!
Still two things:
1) bitops are always atomic on every arch, right? If yes, then ok.
2) I hope you could add some documentation to show the relations
between ip_local_port_range and ip_local_reserved_ports.
Thanks!
Octavian Purdila wrote:
> This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports which
> allows users to reserve ports for third-party applications.
>
> The reserved ports will not be used by automatic port assignments
> (e.g. when calling connect() or bind() with port number 0). Explicit
> port allocation behavior is unchanged.
>
> Signed-off-by: Octavian Purdila <[email protected]>
> Signed-off-by: WANG Cong <[email protected]>
> Cc: Neil Horman <[email protected]>
> Cc: Eric Dumazet <[email protected]>
> Cc: Eric W. Biederman <[email protected]>
> ---
> Documentation/networking/ip-sysctl.txt | 14 ++++++++++++++
> drivers/infiniband/core/cma.c | 7 ++++++-
> include/net/ip.h | 6 ++++++
> net/ipv4/af_inet.c | 8 +++++++-
> net/ipv4/inet_connection_sock.c | 6 ++++++
> net/ipv4/inet_hashtables.c | 2 ++
> net/ipv4/sysctl_net_ipv4.c | 17 +++++++++++++++++
> net/ipv4/udp.c | 3 ++-
> net/sctp/socket.c | 2 ++
> 9 files changed, 62 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
> index 2dc7a1d..6534ee7 100644
> --- a/Documentation/networking/ip-sysctl.txt
> +++ b/Documentation/networking/ip-sysctl.txt
> @@ -564,6 +564,20 @@ ip_local_port_range - 2 INTEGERS
> (i.e. by default) range 1024-4999 is enough to issue up to
> 2000 connections per second to systems supporting timestamps.
>
> +ip_local_reserved_ports - list of comma separated ranges
> + Specify the ports which are reserved for known third-party
> + applications. These ports will not be used by automatic port
> + assignments (e.g. when calling connect() or bind() with port
> + number 0). Explicit port allocation behavior is unchanged.
> +
> + The format used for both input and output is a comma separated
> + list of ranges (e.g. "1,2-4,10-10" for ports 1, 2, 3, 4 and
> + 10). Writing to the file will clear all previously reserved
> + ports and update the current list with the one given in the
> + input.
> +
> + Default: Empty
> +
> ip_nonlocal_bind - BOOLEAN
> If set, allows processes to bind() to non-local IP addresses,
> which can be quite useful - but may break some applications.
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
> index 875e34e..06c9fa5 100644
> --- a/drivers/infiniband/core/cma.c
> +++ b/drivers/infiniband/core/cma.c
> @@ -1979,6 +1979,8 @@ retry:
> /* FIXME: add proper port randomization per like inet_csk_get_port */
> do {
> ret = idr_get_new_above(ps, bind_list, next_port, &port);
> + if (inet_is_reserved_local_port(port))
> + ret = -EAGAIN;
> } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
>
> if (ret)
> @@ -2995,10 +2997,13 @@ static int __init cma_init(void)
> {
> int ret, low, high, remaining;
>
> - get_random_bytes(&next_port, sizeof next_port);
> inet_get_local_port_range(&low, &high);
> +again:
> + get_random_bytes(&next_port, sizeof next_port);
> remaining = (high - low) + 1;
> next_port = ((unsigned int) next_port % remaining) + low;
> + if (inet_is_reserved_local_port(next_port))
> + goto again;
>
> cma_wq = create_singlethread_workqueue("rdma_cm");
> if (!cma_wq)
> diff --git a/include/net/ip.h b/include/net/ip.h
> index 503994a..3da9004 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -184,6 +184,12 @@ extern struct local_ports {
> } sysctl_local_ports;
> extern void inet_get_local_port_range(int *low, int *high);
>
> +extern unsigned long *sysctl_local_reserved_ports;
> +static inline int inet_is_reserved_local_port(int port)
> +{
> + return test_bit(port, sysctl_local_reserved_ports);
> +}
> +
> extern int sysctl_ip_default_ttl;
> extern int sysctl_ip_nonlocal_bind;
>
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index 33b7dff..e283fbe 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1546,9 +1546,13 @@ static int __init inet_init(void)
>
> BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
>
> + sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
> + if (!sysctl_local_reserved_ports)
> + goto out;
> +
> rc = proto_register(&tcp_prot, 1);
> if (rc)
> - goto out;
> + goto out_free_reserved_ports;
>
> rc = proto_register(&udp_prot, 1);
> if (rc)
> @@ -1647,6 +1651,8 @@ out_unregister_udp_proto:
> proto_unregister(&udp_prot);
> out_unregister_tcp_proto:
> proto_unregister(&tcp_prot);
> +out_free_reserved_ports:
> + kfree(sysctl_local_reserved_ports);
> goto out;
> }
>
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 8da6429..1acb462 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -37,6 +37,9 @@ struct local_ports sysctl_local_ports __read_mostly = {
> .range = { 32768, 61000 },
> };
>
> +unsigned long *sysctl_local_reserved_ports;
> +EXPORT_SYMBOL(sysctl_local_reserved_ports);
> +
Sorry, this looks somewhat weird, why not just export
inet_is_reserved_local_port()?
On Saturday 20 February 2010 10:20:53 you wrote:
> > +unsigned long *sysctl_local_reserved_ports;
> > +EXPORT_SYMBOL(sysctl_local_reserved_ports);
> > +
>
> Sorry, this looks somewhat weird, why not just export
> inet_is_reserved_local_port()?
>
My understanding is that if we do that than we won't be able to inline
inet_is_reserved_local_port(). And as David said previously that will have a
significant impact on performance.
On Saturday 20 February 2010 10:11:40 you wrote:
> Octavian Purdila wrote:
> > This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports which
> > allows users to reserve ports for third-party applications.
> >
> > The reserved ports will not be used by automatic port assignments
> > (e.g. when calling connect() or bind() with port number 0). Explicit
> > port allocation behavior is unchanged.
> >
> > Changes from the previous version:
> > - switch the /proc entry format to coma separated list of range ports
> > - treat -EFAULT just like any other error and acknowledge written values
> > - use isdigit() in proc_get_ulong
> >
> > Octavian Purdila (3):
> > sysctl: refactor integer handling proc code
> > sysctl: add proc_do_large_bitmap
> > net: reserve ports for applications using fixed port numbers
>
> Hi,
>
> This version looks fine for me, but I need to give them a test, and
> I will put feedbacks asap. Thanks for your work!
>
> Still two things:
>
> 1) bitops are always atomic on every arch, right? If yes, then ok.
AFAIK, yes.
> 2) I hope you could add some documentation to show the relations
> between ip_local_port_range and ip_local_reserved_ports.
>
How does this sound:
ip_local_reserved_ports - list of comma separated ranges
Specify the ports which are reserved for known third-party
applications. These ports will not be used by automatic port
assignments (e.g. when calling connect() or bind() with port
number 0). Explicit port allocation behavior is unchanged.
The format used for both input and output is a comma separated
list of ranges (e.g. "1,2-4,10-10" for ports 1, 2, 3, 4 and
10). Writing to the file will clear all previously reserved
ports and update the current list with the one given in the
input.
Note that ip_local_port_range and ip_local_port_range settings
are independent and both are considered by the kernel when
determining which ports are available for automatic port
assignments.
You can reserve ports which are not in the current
ip_local_port_range, e.g.:
$ cat /proc/sys/net/ipv4/ip_local_port_range
32000 61000
$ cat /proc/sys/net/ipv4/ip_local_reserved_ports
8080,9148
although this is redundant. However such a setting is useful
if later the port range is changed to a value that will
include the reserved ports.
Octavian Purdila wrote:
> On Saturday 20 February 2010 10:11:40 you wrote:
>
>> 2) I hope you could add some documentation to show the relations
>> between ip_local_port_range and ip_local_reserved_ports.
>>
>
> How does this sound:
>
> ip_local_reserved_ports - list of comma separated ranges
> Specify the ports which are reserved for known third-party
> applications. These ports will not be used by automatic port
> assignments (e.g. when calling connect() or bind() with port
> number 0). Explicit port allocation behavior is unchanged.
>
> The format used for both input and output is a comma separated
> list of ranges (e.g. "1,2-4,10-10" for ports 1, 2, 3, 4 and
> 10). Writing to the file will clear all previously reserved
> ports and update the current list with the one given in the
> input.
>
> Note that ip_local_port_range and ip_local_port_range settings
> are independent and both are considered by the kernel when
> determining which ports are available for automatic port
> assignments.
>
> You can reserve ports which are not in the current
> ip_local_port_range, e.g.:
>
> $ cat /proc/sys/net/ipv4/ip_local_port_range
> 32000 61000
> $ cat /proc/sys/net/ipv4/ip_local_reserved_ports
> 8080,9148
>
> although this is redundant. However such a setting is useful
> if later the port range is changed to a value that will
> include the reserved ports.
This looks fine for me.
Thanks.
Octavian Purdila wrote:
> On Saturday 20 February 2010 10:20:53 you wrote:
>
>>> +unsigned long *sysctl_local_reserved_ports;
>>> +EXPORT_SYMBOL(sysctl_local_reserved_ports);
>>> +
>> Sorry, this looks somewhat weird, why not just export
>> inet_is_reserved_local_port()?
>>
>
> My understanding is that if we do that than we won't be able to inline
> inet_is_reserved_local_port(). And as David said previously that will have a
> significant impact on performance.
Oh, that would be true probably, so just leave as it is.
Thanks.
On Sat, 20 Feb 2010, Octavian Purdila wrote:
> On Saturday 20 February 2010 10:11:40 you wrote:
> > Octavian Purdila wrote:
> > > This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports which
> > > allows users to reserve ports for third-party applications.
> > >
> > > The reserved ports will not be used by automatic port assignments
> > > (e.g. when calling connect() or bind() with port number 0). Explicit
> > > port allocation behavior is unchanged.
> > >
> > > Changes from the previous version:
> > > - switch the /proc entry format to coma separated list of range ports
> > > - treat -EFAULT just like any other error and acknowledge written values
> > > - use isdigit() in proc_get_ulong
> > >
> > > Octavian Purdila (3):
> > > sysctl: refactor integer handling proc code
> > > sysctl: add proc_do_large_bitmap
> > > net: reserve ports for applications using fixed port numbers
> >
> > Hi,
> >
> > This version looks fine for me, but I need to give them a test, and
> > I will put feedbacks asap. Thanks for your work!
> >
> > Still two things:
> >
> > 1) bitops are always atomic on every arch, right? If yes, then ok.
>
> AFAIK, yes.
>
> > 2) I hope you could add some documentation to show the relations
> > between ip_local_port_range and ip_local_reserved_ports.
> >
>
> How does this sound:
>
> ip_local_reserved_ports - list of comma separated ranges
> Specify the ports which are reserved for known third-party
> applications. These ports will not be used by automatic port
> assignments (e.g. when calling connect() or bind() with port
> number 0). Explicit port allocation behavior is unchanged.
>
> The format used for both input and output is a comma separated
> list of ranges (e.g. "1,2-4,10-10" for ports 1, 2, 3, 4 and
> 10). Writing to the file will clear all previously reserved
> ports and update the current list with the one given in the
> input.
>
> Note that ip_local_port_range and ip_local_port_range settings
Change second ip_local_port_range to ip_local_reserved_ports.
-Bill
> are independent and both are considered by the kernel when
> determining which ports are available for automatic port
> assignments.
>
> You can reserve ports which are not in the current
> ip_local_port_range, e.g.:
>
> $ cat /proc/sys/net/ipv4/ip_local_port_range
> 32000 61000
> $ cat /proc/sys/net/ipv4/ip_local_reserved_ports
> 8080,9148
>
> although this is redundant. However such a setting is useful
> if later the port range is changed to a value that will
> include the reserved ports.
Octavian Purdila wrote:
> This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports which
> allows users to reserve ports for third-party applications.
>
> The reserved ports will not be used by automatic port assignments
> (e.g. when calling connect() or bind() with port number 0). Explicit
> port allocation behavior is unchanged.
>
> Signed-off-by: Octavian Purdila <[email protected]>
> Signed-off-by: WANG Cong <[email protected]>
> Cc: Neil Horman <[email protected]>
> Cc: Eric Dumazet <[email protected]>
> Cc: Eric W. Biederman <[email protected]>
My test case shows this works as expect, I mean reserving local ports.
So, for this one,
Acked-by: WANG Cong <[email protected]>
Thanks for your work!