2010-02-03 04:30:24

by Cong Wang

[permalink] [raw]
Subject: [RFC Patch] net: reserve ports for applications using fixed port numbers


This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports,
it can be used like ip_local_port_range, but this is used to
reserve ports for third-party applications which use fixed
port numbers within ip_local_port_range.

This only affects the applications which call socket functions
like bind(2) with port number 0, to prevent the kernel getting the ports
within the specified range for them. For applications which use fixed
port number, it will have no effects.

Any comments are welcome.

Signed-off-by: WANG Cong <[email protected]>
Cc: David Miller <[email protected]>
Cc: Neil Horman <[email protected]>
Cc: Eric Dumazet <[email protected]>

---
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index cc9b594..8248fc6 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1979,6 +1979,8 @@ retry:
/* FIXME: add proper port randomization per like inet_csk_get_port */
do {
ret = idr_get_new_above(ps, bind_list, next_port, &port);
+ if (inet_is_reserved_local_port(port))
+ ret = -EAGAIN;
} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));

if (ret)
@@ -2997,10 +2999,13 @@ static int __init cma_init(void)
{
int ret, low, high, remaining;

- get_random_bytes(&next_port, sizeof next_port);
inet_get_local_port_range(&low, &high);
+again:
+ get_random_bytes(&next_port, sizeof next_port);
remaining = (high - low) + 1;
next_port = ((unsigned int) next_port % remaining) + low;
+ if (inet_is_reserved_local_port(next_port))
+ goto again;

cma_wq = create_singlethread_workqueue("rdma_cm");
if (!cma_wq)
diff --git a/include/net/ip.h b/include/net/ip.h
index fb63371..f70acad 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -181,8 +181,10 @@ extern void snmp_mib_free(void *ptr[2]);
extern struct local_ports {
seqlock_t lock;
int range[2];
-} sysctl_local_ports;
+} sysctl_local_ports, sysctl_local_reserved_ports;
extern void inet_get_local_port_range(int *low, int *high);
+extern void inet_get_local_reserved_ports(int *from, int *to);
+extern int inet_is_reserved_local_port(int port);

extern int sysctl_ip_default_ttl;
extern int sysctl_ip_nonlocal_bind;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ee16475..ee13e48 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,6 +37,11 @@ struct local_ports sysctl_local_ports __read_mostly = {
.range = { 32768, 61000 },
};

+struct local_ports sysctl_local_reserved_ports __read_mostly = {
+ .lock = SEQLOCK_UNLOCKED,
+ .range = { 0, 0 },
+};
+
void inet_get_local_port_range(int *low, int *high)
{
unsigned seq;
@@ -49,6 +54,28 @@ void inet_get_local_port_range(int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);

+void inet_get_local_reserved_ports(int *from, int *to)
+{
+ unsigned int seq;
+ do {
+ seq = read_seqbegin(&sysctl_local_reserved_ports.lock);
+
+ *from = sysctl_local_reserved_ports.range[0];
+ *to = sysctl_local_reserved_ports.range[1];
+ } while (read_seqretry(&sysctl_local_reserved_ports.lock, seq));
+}
+
+int inet_is_reserved_local_port(int port)
+{
+ int min, max;
+
+ inet_get_local_reserved_ports(&min, &max);
+ if (min && max)
+ return (port >= min && port <= max);
+ return 0;
+}
+EXPORT_SYMBOL(inet_is_reserved_local_port);
+
int inet_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb)
{
@@ -105,6 +132,8 @@ again:
inet_get_local_port_range(&low, &high);
remaining = (high - low) + 1;
smallest_rover = rover = net_random() % remaining + low;
+ if (inet_is_reserved_local_port(rover))
+ goto again;

smallest_size = -1;
do {
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2b79377..d3e160a 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -456,6 +456,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
local_bh_disable();
for (i = 1; i <= remaining; i++) {
port = low + (i + offset) % remaining;
+ if (inet_is_reserved_local_port(port))
+ continue;
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock(&head->lock);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e3712c..9adf1a5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -23,6 +23,7 @@

static int zero;
static int tcp_retr1_max = 255;
+static int ip_local_reserved_ports_min[] = {0, 0 };
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };

@@ -63,6 +64,51 @@ static int ipv4_local_port_range(ctl_table *table, int write,
return ret;
}

+static void set_reserved_port_range(int range[2])
+{
+ write_seqlock(&sysctl_local_reserved_ports.lock);
+ sysctl_local_reserved_ports.range[0] = range[0];
+ sysctl_local_reserved_ports.range[1] = range[1];
+ write_sequnlock(&sysctl_local_reserved_ports.lock);
+}
+
+static int ipv4_local_reserved_ports(ctl_table *table, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret;
+ int range[2];
+ int reserved_range[2];
+ ctl_table tmp = {
+ .data = &reserved_range,
+ .maxlen = sizeof(reserved_range),
+ .mode = table->mode,
+ .extra1 = &ip_local_reserved_ports_min,
+ .extra2 = &ip_local_port_range_max,
+ };
+
+ inet_get_local_reserved_ports(reserved_range, reserved_range+1);
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+ if (write && ret == 0) {
+ inet_get_local_port_range(range, range + 1);
+ if (!reserved_range[0] && !reserved_range[1]) {
+ set_reserved_port_range(reserved_range);
+ } else {
+ if (reserved_range[1] < reserved_range[0])
+ ret = -EINVAL;
+ else if (reserved_range[0] < range[0])
+ ret = -EINVAL;
+ else if (reserved_range[1] > range[1])
+ ret = -EINVAL;
+ else
+ set_reserved_port_range(reserved_range);
+ }
+ }
+
+ return ret;
+}
+
static int proc_tcp_congestion_control(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -298,6 +344,13 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = ipv4_local_port_range,
},
+ {
+ .procname = "ip_local_reserved_ports",
+ .data = &sysctl_local_reserved_ports.range,
+ .maxlen = sizeof(sysctl_local_reserved_ports.range),
+ .mode = 0644,
+ .proc_handler = ipv4_local_reserved_ports,
+ },
#ifdef CONFIG_IP_MULTICAST
{
.procname = "igmp_max_memberships",
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f0126fd..83045ca 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -210,8 +210,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
inet_get_local_port_range(&low, &high);
remaining = (high - low) + 1;

+again:
rand = net_random();
first = (((u64)rand * remaining) >> 32) + low;
+ if (inet_is_reserved_local_port(first))
+ goto again;
/*
* force rand to be an odd multiple of UDP_HTABLE_SIZE
*/
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 67fdac9..d685141 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5432,6 +5432,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
rover++;
if ((rover < low) || (rover > high))
rover = low;
+ if (inet_is_reserved_local_port(rover))
+ continue;
index = sctp_phashfn(rover);
head = &sctp_port_hashtable[index];
sctp_spin_lock(&head->lock);


2010-02-03 04:40:01

by Eric Dumazet

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

Le mardi 02 février 2010 à 23:30 -0500, Amerigo Wang a écrit :
> This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports,
> it can be used like ip_local_port_range, but this is used to
> reserve ports for third-party applications which use fixed
> port numbers within ip_local_port_range.
>
> This only affects the applications which call socket functions
> like bind(2) with port number 0, to prevent the kernel getting the ports
> within the specified range for them. For applications which use fixed
> port number, it will have no effects.
>
> Any comments are welcome.
>
> Signed-off-by: WANG Cong <[email protected]>
> Cc: David Miller <[email protected]>
> Cc: Neil Horman <[email protected]>
> Cc: Eric Dumazet <[email protected]>

> .procname = "igmp_max_memberships",
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index f0126fd..83045ca 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -210,8 +210,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
> inet_get_local_port_range(&low, &high);
> remaining = (high - low) + 1;
>
> +again:
> rand = net_random();
> first = (((u64)rand * remaining) >> 32) + low;
> + if (inet_is_reserved_local_port(first))
> + goto again;
> /*
> * force rand to be an odd multiple of UDP_HTABLE_SIZE
> */

Unless I misread the patch, you are checking only the 'first' port that
udp_lib_get_port() chose.

I would use inet_get_local_reserved_ports(&min_res, &max_res);
and check every port that we chose in the loop to avoid it if necessary.

2010-02-03 05:12:32

by Cong Wang

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

Eric Dumazet wrote:
> Le mardi 02 février 2010 à 23:30 -0500, Amerigo Wang a écrit :
>> This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports,
>> it can be used like ip_local_port_range, but this is used to
>> reserve ports for third-party applications which use fixed
>> port numbers within ip_local_port_range.
>>
>> This only affects the applications which call socket functions
>> like bind(2) with port number 0, to prevent the kernel getting the ports
>> within the specified range for them. For applications which use fixed
>> port number, it will have no effects.
>>
>> Any comments are welcome.
>>
>> Signed-off-by: WANG Cong <[email protected]>
>> Cc: David Miller <[email protected]>
>> Cc: Neil Horman <[email protected]>
>> Cc: Eric Dumazet <[email protected]>
>
>> .procname = "igmp_max_memberships",
>> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
>> index f0126fd..83045ca 100644
>> --- a/net/ipv4/udp.c
>> +++ b/net/ipv4/udp.c
>> @@ -210,8 +210,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
>> inet_get_local_port_range(&low, &high);
>> remaining = (high - low) + 1;
>>
>> +again:
>> rand = net_random();
>> first = (((u64)rand * remaining) >> 32) + low;
>> + if (inet_is_reserved_local_port(first))
>> + goto again;
>> /*
>> * force rand to be an odd multiple of UDP_HTABLE_SIZE
>> */
>
> Unless I misread the patch, you are checking only the 'first' port that
> udp_lib_get_port() chose.
>
> I would use inet_get_local_reserved_ports(&min_res, &max_res);
> and check every port that we chose in the loop to avoid it if necessary.
>

Hmm, right, 'first' is used to do iteration, but I did missed 'last'.
Thanks! I will fix this in the next update.

2010-02-03 11:16:15

by Octavian Purdila

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

On Wednesday 03 February 2010 06:30:07 you wrote:

> This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports,
> it can be used like ip_local_port_range, but this is used to
> reserve ports for third-party applications which use fixed
> port numbers within ip_local_port_range.
>
> This only affects the applications which call socket functions
> like bind(2) with port number 0, to prevent the kernel getting the ports
> within the specified range for them. For applications which use fixed
> port number, it will have no effects.

It also affects the case where applications do connect, without previously
doing bind, right?

>
> Any comments are welcome.

I think it might be useful to allow setting individual ports as reserved, not
only ranges, for example by using a bitmap.

2010-02-04 03:20:51

by Cong Wang

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

Octavian Purdila wrote:
> On Wednesday 03 February 2010 06:30:07 you wrote:
>
>> This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports,
>> it can be used like ip_local_port_range, but this is used to
>> reserve ports for third-party applications which use fixed
>> port numbers within ip_local_port_range.
>>
>> This only affects the applications which call socket functions
>> like bind(2) with port number 0, to prevent the kernel getting the ports
>> within the specified range for them. For applications which use fixed
>> port number, it will have no effects.
>
> It also affects the case where applications do connect, without previously
> doing bind, right?


Yeah, I forgot to mention this, sorry.

>
>> Any comments are welcome.
>
> I think it might be useful to allow setting individual ports as reserved, not
> only ranges, for example by using a bitmap.
>

This is a good idea, but I am not sure if this will be overkill? :-/
Also, using bitmap is not friendly to sysctl interface, I am afraid.


Thanks!

2010-02-04 12:47:29

by Octavian Purdila

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

On Thursday 04 February 2010 05:23:38 you wrote:

> > I think it might be useful to allow setting individual ports as reserved,
> > not only ranges, for example by using a bitmap.
>
> This is a good idea, but I am not sure if this will be overkill? :-/
> Also, using bitmap is not friendly to sysctl interface, I am afraid.
>

My concern is that we can have multiple applications that require a fixed port
and if those ports are significantly apart we will decrease the port range
available for connect. And that will hurt the rate of which new connections
can be opened.

As for the sysctl interface I agree, I don't think it is even possible to
cleanly use a bitmap through sysctl.

The options I see are either enhance sysctl to support bitmaps or use a
dedicated /proc/net entry.

I want to give this a try, which one do you people think is better?

2010-02-04 17:41:00

by David Miller

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

From: Octavian Purdila <[email protected]>
Date: Thu, 4 Feb 2010 14:44:01 +0200

> My concern is that we can have multiple applications that require a
> fixed port and if those ports are significantly apart we will
> decrease the port range available for connect. And that will hurt
> the rate of which new connections can be opened.

I'm already uneasy about adding the simple check every time
we loop around in the bind port allocator.

Adding an LSM hook to this spot? I absolutely refuse to allow
that, it will completely kill bind performance.

2010-02-04 18:19:18

by Octavian Purdila

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

On Thursday 04 February 2010 19:41:10 you wrote:

> From: Octavian Purdila <[email protected]>
> Date: Thu, 4 Feb 2010 14:44:01 +0200
>
> > My concern is that we can have multiple applications that require a
> > fixed port and if those ports are significantly apart we will
> > decrease the port range available for connect. And that will hurt
> > the rate of which new connections can be opened.
>
> I'm already uneasy about adding the simple check every time
> we loop around in the bind port allocator.
>
> Adding an LSM hook to this spot? I absolutely refuse to allow
> that, it will completely kill bind performance.
>

I think Tetsuo was proposing the LSM hook, so I'll leave him the daunting task
of convincing you of the benefit of that :) - I have no opinion on this due to
massive lack of knowledge.

I was just proposing to use a discrete set of ports instead of a range. The
check in the current patch:

int inet_is_reserved_local_port(int port)
{
int min, max;

inet_get_local_reserved_ports(&min, &max);
if (min && max)
return (port >= min && port <= max);
return 0;
}

would become:

int inet_is_reserved_local_port(int port)
{
if (test_bit(port, reserved_ports))
return 1;
return 0;
}

In theory it might be slower because of the reserved_ports bitmap will have a
larger memory footprint than just a min/max, especially with random port
allocation. But is this an issue in practice?

2010-02-04 18:21:04

by David Miller

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

From: Octavian Purdila <[email protected]>
Date: Thu, 4 Feb 2010 20:15:51 +0200

> int inet_is_reserved_local_port(int port)
> {
> if (test_bit(port, reserved_ports))
> return 1;
> return 0;
> }
>
> In theory it might be slower because of the reserved_ports bitmap will have a
> larger memory footprint than just a min/max, especially with random port
> allocation. But is this an issue in practice?

No need to speculate, some simple benchmarks would confirm or deny
this.

2010-02-04 21:45:37

by Tetsuo Handa

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

Octavian Purdila wrote:
>
> int inet_is_reserved_local_port(int port)
> {
> if (test_bit(port, reserved_ports))
> return 1;
> return 0;
> }
>
Above check is exactly what I'm doing in the LSM hook.

2010-02-04 21:56:29

by David Miller

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

From: Tetsuo Handa <[email protected]>
Date: Fri, 5 Feb 2010 06:45:28 +0900

> Octavian Purdila wrote:
>>
>> int inet_is_reserved_local_port(int port)
>> {
>> if (test_bit(port, reserved_ports))
>> return 1;
>> return 0;
>> }
>>
> Above check is exactly what I'm doing in the LSM hook.

But his version can be done inline in 2 or 3 instructions.

An LSM hook will result in an indirect function call,
all live registers spilled to the stack, then all of
those reloaded when the function returns.

It will be much more expensive.

2010-02-05 00:41:16

by Tetsuo Handa

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

David Miller wrote:
> > Octavian Purdila wrote:
> >>
> >> int inet_is_reserved_local_port(int port)
> >> {
> >> if (test_bit(port, reserved_ports))
> >> return 1;
> >> return 0;
> >> }
> >>
> > Above check is exactly what I'm doing in the LSM hook.
>
> But his version can be done inline in 2 or 3 instructions.
>
> An LSM hook will result in an indirect function call,
> all live registers spilled to the stack, then all of
> those reloaded when the function returns.
>
> It will be much more expensive.

If you can accept his version, I want to use his version (with an interface for
updating above "reserved_ports" by not only root user's sysctl() but also MAC's
policy configuration).

2010-02-05 01:08:48

by Octavian Purdila

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

On Friday 05 February 2010 02:41:12 you wrote:
> David Miller wrote:
> > > Octavian Purdila wrote:
> > >> int inet_is_reserved_local_port(int port)
> > >> {
> > >> if (test_bit(port, reserved_ports))
> > >> return 1;
> > >> return 0;
> > >> }
> > >
> > > Above check is exactly what I'm doing in the LSM hook.
> >
> > But his version can be done inline in 2 or 3 instructions.
> >
> > An LSM hook will result in an indirect function call,
> > all live registers spilled to the stack, then all of
> > those reloaded when the function returns.
> >
> > It will be much more expensive.
>
> If you can accept his version, I want to use his version (with an interface
> for updating above "reserved_ports" by not only root user's sysctl() but
> also MAC's policy configuration).
>

I think that simply using an interface to update the reserved_ports from MAC
policy configuration module wouldn't work, as root will be able to modify the
policy via sysctl.

I think that we might need to:

a) have a reserved_port updater

b) put a LSM hook into that

c) use the reserved_port updater from sysctl


2010-02-05 04:42:23

by Cong Wang

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

Octavian Purdila wrote:
> On Thursday 04 February 2010 19:41:10 you wrote:
>
>> From: Octavian Purdila <[email protected]>
>> Date: Thu, 4 Feb 2010 14:44:01 +0200
>>
>>> My concern is that we can have multiple applications that require a
>>> fixed port and if those ports are significantly apart we will
>>> decrease the port range available for connect. And that will hurt
>>> the rate of which new connections can be opened.
>> I'm already uneasy about adding the simple check every time
>> we loop around in the bind port allocator.
>>
>> Adding an LSM hook to this spot? I absolutely refuse to allow
>> that, it will completely kill bind performance.
>>
>
> I think Tetsuo was proposing the LSM hook, so I'll leave him the daunting task
> of convincing you of the benefit of that :) - I have no opinion on this due to
> massive lack of knowledge.
>
> I was just proposing to use a discrete set of ports instead of a range. The
> check in the current patch:
>
> int inet_is_reserved_local_port(int port)
> {
> int min, max;
>
> inet_get_local_reserved_ports(&min, &max);
> if (min && max)
> return (port >= min && port <= max);
> return 0;
> }
>
> would become:
>
> int inet_is_reserved_local_port(int port)
> {
> if (test_bit(port, reserved_ports))
> return 1;
> return 0;
> }
>
> In theory it might be slower because of the reserved_ports bitmap will have a
> larger memory footprint than just a min/max, especially with random port
> allocation. But is this an issue in practice?

Again, using bitmap algorithm is not a problem and it's better, the
problem is sysctl interface, how would you plan to interact with users
via sysctl/proc if you use bitmap to handle this? I would like to hear
more details about this.

Thanks!

2010-02-05 05:58:36

by Cong Wang

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

Octavian Purdila wrote:
> On Friday 05 February 2010 02:41:12 you wrote:
>> David Miller wrote:
>>>> Octavian Purdila wrote:
>>>>> int inet_is_reserved_local_port(int port)
>>>>> {
>>>>> if (test_bit(port, reserved_ports))
>>>>> return 1;
>>>>> return 0;
>>>>> }
>>>> Above check is exactly what I'm doing in the LSM hook.
>>> But his version can be done inline in 2 or 3 instructions.
>>>
>>> An LSM hook will result in an indirect function call,
>>> all live registers spilled to the stack, then all of
>>> those reloaded when the function returns.
>>>
>>> It will be much more expensive.
>> If you can accept his version, I want to use his version (with an interface
>> for updating above "reserved_ports" by not only root user's sysctl() but
>> also MAC's policy configuration).
>>
>
> I think that simply using an interface to update the reserved_ports from MAC
> policy configuration module wouldn't work, as root will be able to modify the
> policy via sysctl.
>
> I think that we might need to:
>
> a) have a reserved_port updater
>
> b) put a LSM hook into that
>
> c) use the reserved_port updater from sysctl
>
>

Ideally, you'd provide an interface for port allocator to use, so
doing port reservation will be easier.

Thanks.

2010-02-05 07:11:33

by Bart Van Assche

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

On Wed, Feb 3, 2010 at 5:30 AM, Amerigo Wang <[email protected]> wrote:
>
> This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports,
> it can be used like ip_local_port_range, but this is used to
> reserve ports for third-party applications which use fixed
> port numbers within ip_local_port_range.
>
> This only affects the applications which call socket functions
> like bind(2) with port number 0, to prevent the kernel getting the ports
> within the specified range for them. For applications which use fixed
> port number, it will have no effects.
>
> Any comments are welcome.

Relying on fixed port numbers is generally considered as a shortcoming
in the application. It would be helpful if you could explain more in
detail why port number reservation is necessary. Maybe there exists
another solution that does not require modifying the bind() system
call.

A quote from the UNIX socket FAQ (http://www.faqs.org/faqs/unix-faq/socket/):

4.10. How should I choose a port number for my server?

The list of registered port assignments can be found in STD 2 or RFC
1700. Choose one that isn't already registered, and isn't in
/etc/services on your system. It is also a good idea to let users
customize the port number in case of conflicts with other un-
registered port numbers in other servers. The best way of doing this
is hardcoding a service name, and using getservbyname() to lookup the
actual port number. This method allows users to change the port your
server binds to by simply editing the /etc/services file.

Bart.

2010-02-05 07:22:11

by Cong Wang

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

Bart Van Assche wrote:
> On Wed, Feb 3, 2010 at 5:30 AM, Amerigo Wang <[email protected]> wrote:
>> This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports,
>> it can be used like ip_local_port_range, but this is used to
>> reserve ports for third-party applications which use fixed
>> port numbers within ip_local_port_range.
>>
>> This only affects the applications which call socket functions
>> like bind(2) with port number 0, to prevent the kernel getting the ports
>> within the specified range for them. For applications which use fixed
>> port number, it will have no effects.
>>
>> Any comments are welcome.
>
> Relying on fixed port numbers is generally considered as a shortcoming
> in the application. It would be helpful if you could explain more in
> detail why port number reservation is necessary. Maybe there exists
> another solution that does not require modifying the bind() system
> call.
>

The problem is that there are some existing applications which use
fixed port number, we don't have chances to change this for them,
thus making them working is desired, so they want to reserve these
port for those applications.

For example, if I have an appliction which uses port 40000, but
before this application starts, another application gets this port
number by bind() with port 0 (i.e. chosen by kernel), in this case,
that application will fail to start. Again, we don't have any chance
to change the source code of that application.

Hope this can make the problem clear.

Thanks.

2010-02-05 09:08:25

by Tetsuo Handa

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed portnumbers

Cong Wang wrote:
> The problem is that there are some existing applications which use
> fixed port number, we don't have chances to change this for them,
> thus making them working is desired, so they want to reserve these
> port for those applications.
>
> For example, if I have an appliction which uses port 40000, but
> before this application starts, another application gets this port
> number by bind() with port 0 (i.e. chosen by kernel), in this case,
> that application will fail to start. Again, we don't have any chance
> to change the source code of that application.
>
And there is a utility called "portreserved" (port reserve daemon).
http://fedoraproject.org/wiki/Features/Portreserve

But that utility cannot close the race window between "portreserved stops
reserving local port numbers" and "applications starts using local port
numbers which portreserved was reserving".

Thus, I think people want to have port reservation mechanism inside kernel
(if it has little impact).

2010-02-05 12:09:21

by Octavian Purdila

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

On Friday 05 February 2010 06:45:38 you wrote:

> Again, using bitmap algorithm is not a problem and it's better, the
> problem is sysctl interface, how would you plan to interact with users
> via sysctl/proc if you use bitmap to handle this? I would like to hear
> more details about this.
>

We could use something like positive values for setting and negative for reset
(e.g. 3 would set the port in the bitmap and -3 would reset it).

But we would need new sysctl and proc handlers to handle the bitmap case (e.g.
sysctl_bitmap, proc_dobitmap_minmax).

2010-02-05 12:31:39

by Octavian Purdila

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

On Friday 05 February 2010 08:01:43 you wrote:

> >> If you can accept his version, I want to use his version (with an
> >> interface for updating above "reserved_ports" by not only root user's
> >> sysctl() but also MAC's policy configuration).
> >
> > I think that simply using an interface to update the reserved_ports from
> > MAC policy configuration module wouldn't work, as root will be able to
> > modify the policy via sysctl.
> >
> > I think that we might need to:
> >
> > a) have a reserved_port updater
> >
> > b) put a LSM hook into that
> >
> > c) use the reserved_port updater from sysctl
>
> Ideally, you'd provide an interface for port allocator to use, so
> doing port reservation will be easier.
>

If I understand the TOMOYO requirements correctly, we need a way to restrict a
user action based on some security policy (in this case the ability to clear
reserved ports). Traditionally that has been done with LSM hooks, so I think
that approach is preferable.

2010-02-08 03:18:37

by Cong Wang

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

Octavian Purdila wrote:
> On Friday 05 February 2010 06:45:38 you wrote:
>
>> Again, using bitmap algorithm is not a problem and it's better, the
>> problem is sysctl interface, how would you plan to interact with users
>> via sysctl/proc if you use bitmap to handle this? I would like to hear
>> more details about this.
>>
>
> We could use something like positive values for setting and negative for reset
> (e.g. 3 would set the port in the bitmap and -3 would reset it).


Hmm, then how do you output the info of those ports? Arrays of bitmaps?

>
> But we would need new sysctl and proc handlers to handle the bitmap case (e.g.
> sysctl_bitmap, proc_dobitmap_minmax).

Maybe.

Thanks.

2010-02-08 16:54:48

by Octavian Purdila

[permalink] [raw]
Subject: Re: [RFC Patch] net: reserve ports for applications using fixed port numbers

On Monday 08 February 2010 05:21:50 you wrote:
> Octavian Purdila wrote:
> > On Friday 05 February 2010 06:45:38 you wrote:
> >> Again, using bitmap algorithm is not a problem and it's better, the
> >> problem is sysctl interface, how would you plan to interact with users
> >> via sysctl/proc if you use bitmap to handle this? I would like to hear
> >> more details about this.
> >
> > We could use something like positive values for setting and negative for
> > reset (e.g. 3 would set the port in the bitmap and -3 would reset it).
>
> Hmm, then how do you output the info of those ports? Arrays of bitmaps?
>

See the patch bellow (work in progress).

BTW, while working on it I added some helpers, which we can use to rewrite the proc_doint/long stuff. I think it will help with readability and eliminates some code duplication as well. What do you guys think about that?

--- linux_2.6.32/main/src/kernel/sysctl.c
+++ linux_2.6.32/main/src/kernel/sysctl.c
@@ -250,6 +250,11 @@
static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
#endif

+static unsigned long test_bitmap[65535/sizeof(long)];
+static int proc_dobitmap(struct ctl_table *table, int write,
+ void __user *buf, size_t *lenp, loff_t *ppos);
+
+
static struct ctl_table kern_table[] = {
{
.ctl_name = CTL_UNNUMBERED,
@@ -1032,6 +1037,15 @@
.proc_handler = &proc_dointvec,
},
#endif
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "bitmap_test",
+ .data = &test_bitmap,
+ .maxlen = 65535,
+ .mode = 0644,
+ .proc_handler = &proc_dobitmap,
+ },
+
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
@@ -2902,6 +2916,194 @@
return 0;
}

+static int proc_skip_wspace(char __user **buf, size_t *size)
+{
+ char c;
+
+ while (*size) {
+ if (get_user(c, *buf))
+ return -EFAULT;
+ if (!isspace(c))
+ break;
+ *size--; *buf++;
+ }
+
+ return 0;
+}
+
+static inline int _proc_get_ulong(char __user **buf, size_t *size,
+ unsigned long *val, bool *neg)
+{
+#define TMPBUFLEN 21
+ int len = *size;
+ char *p, tmp[TMPBUFLEN];
+
+ if (len > TMPBUFLEN-1)
+ len = TMPBUFLEN-1;
+
+ if (copy_from_user(tmp, *buf, len))
+ return -EFAULT;
+
+ tmp[len] = 0;
+ p = tmp;
+ if (*p == '-' && *size > 1) {
+ *neg = 1;
+ p++;
+ }
+ if (*p < '0' || *p > '9')
+ return -EINVAL;
+
+ *val = simple_strtoul(p, &p, 0);
+
+ len = p - tmp;
+ if ((len < *size) && *p && !isspace(*p))
+ return -EINVAL;
+
+ *buf += len; *size -= len;
+
+ return 0;
+#undef TMPBUFLEN
+}
+
+static int proc_get_long(char __user **buf, size_t *size, long *val)
+{
+ int err;
+ bool neg;
+ unsigned long uval;
+
+ err = _proc_get_ulong(buf, size, &uval, &neg);
+ if (err)
+ return err;
+
+ if (neg)
+ *val = -uval;
+ else
+ *val = uval;
+
+ return 0;
+}
+
+static int proc_get_ulong(char __user **buf, size_t *size, unsigned long *val)
+{
+ int err;
+ bool neg;
+
+ err = _proc_get_ulong(buf, size, val, &neg);
+ if (err)
+ return err;
+ if (neg)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int proc_put_ulong(char __user **buf, size_t *size, unsigned long val,
+ bool first)
+{
+#define TMPBUFLEN 21
+ int len;
+ char tmp[TMPBUFLEN], *p = tmp;
+
+ if (!first)
+ *p++ = '\t';
+ sprintf(p, "%lu", val);
+ len = strlen(tmp);
+ if (len > *size)
+ len = *size;
+ if (copy_to_user(*buf, tmp, len))
+ return -EFAULT;
+ *size -= len;
+ *buf += len;
+ return 0;
+#undef TMPBUFLEN
+}
+
+static int proc_put_newline(char __user **buf, size_t *size)
+{
+ if (*size) {
+ if (put_user('\n', *buf))
+ return -EFAULT;
+ *size--, *buf++;
+ }
+ return 0;
+}
+
+static int proc_dobitmap(struct ctl_table *table, int write,
+ void __user *buf, size_t *lenp, loff_t *ppos)
+{
+ bool first = 1;
+ unsigned long *bitmap = (unsigned long *) table->data;
+ unsigned long bitmap_len = table->maxlen;
+ int left = *lenp, err = 0;
+ char __user *buffer = (char __user *) buf;
+
+ if (!bitmap_len || !left || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ if (write) {
+ while (left) {
+ long val;
+
+ err = proc_skip_wspace(&buffer, &left);
+ if (err)
+ break;
+ if (!left) {
+ err = -EINVAL;
+ break;
+ }
+ err = proc_get_long(&buffer, &left, &val);
+ if (err)
+ break;
+ if (abs(val) > bitmap_len) {
+ err = -EINVAL;
+ break;
+ }
+
+ if (val < 0)
+ clear_bit(-val, bitmap);
+ else
+ set_bit(val, bitmap);
+
+ first = 0;
+ }
+ if (!err)
+ err = proc_skip_wspace(&buffer, &left);
+ } else {
+ unsigned long bit = 0;
+
+ while (left) {
+ bit = find_next_bit(bitmap, bitmap_len, bit);
+ printk("%s:%d %lu\n", __func__, __LINE__, bit);
+ if (bit >= bitmap_len)
+ break;
+ err = proc_put_ulong(&buffer, &left, bit, first);
+ printk("%s:%d %d\n", __func__, __LINE__, err);
+ if (err)
+ break;
+ first = 0; bit++;
+ }
+ if (!err)
+ err = proc_put_newline(&buffer, &left);
+ }
+
+ if (first) {
+ if (write && !err)
+ err = -EINVAL;
+ if (err)
+ return err;
+ }
+
+ if (err == -EFAULT)
+ return err;
+
+ printk("%s:%d %d %d\n", __func__, __LINE__, *lenp, left);
+ *lenp -= left;
+ *ppos += *lenp;
+ return 0;
+}
+
#else /* CONFIG_PROC_FS */

int proc_dostring(struct ctl_table *table, int write,