Subject: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

thanks to someone for pointing out to me my flub when i errantly
converted a spin_unlock to rtnl_lock (it in a very rarely, never
in fact as far as i know, used section of the code. this following
should now be even more correct.

[ATM]: use rtnl_{lock,unlock} during device operations

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
# ChangeSet 1.1164 -> 1.1165
# net/atm/proc.c 1.17 -> 1.18
# net/atm/signaling.c 1.10 -> 1.11
# net/atm/resources.c 1.11 -> 1.12
# net/atm/common.c 1.28 -> 1.29
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/06/04 [email protected] 1.1165
# use rtnl_{lock,unlock} during device operations
# --------------------------------------------
#
diff -Nru a/net/atm/common.c b/net/atm/common.c
--- a/net/atm/common.c Wed Jun 4 20:20:31 2003
+++ b/net/atm/common.c Wed Jun 4 20:20:31 2003
@@ -376,19 +376,18 @@
struct atm_dev *dev = NULL;
struct list_head *p, *next;

- spin_lock(&atm_dev_lock);
+ rtnl_lock();
list_for_each_safe(p, next, &atm_devs) {
dev = list_entry(p, struct atm_dev, dev_list);
atm_dev_hold(dev);
- spin_unlock(&atm_dev_lock);
if (!atm_do_connect_dev(vcc,dev,vpi,vci))
break;
atm_dev_release(dev);
dev = NULL;
- spin_lock(&atm_dev_lock);
}
- spin_unlock(&atm_dev_lock);
- if (!dev) return -ENODEV;
+ rtnl_unlock();
+ if (!dev)
+ return -ENODEV;
}
if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC)
set_bit(ATM_VF_PARTIAL,&vcc->flags);
@@ -642,17 +641,17 @@
goto done;
}
size = 0;
- spin_lock(&atm_dev_lock);
+ rtnl_lock();
list_for_each(p, &atm_devs)
size += sizeof(int);
if (size > len) {
- spin_unlock(&atm_dev_lock);
+ rtnl_unlock();
ret_val = -E2BIG;
goto done;
}
tmp_buf = kmalloc(size, GFP_ATOMIC);
if (!tmp_buf) {
- spin_unlock(&atm_dev_lock);
+ rtnl_unlock();
ret_val = -ENOMEM;
goto done;
}
@@ -661,7 +660,7 @@
dev = list_entry(p, struct atm_dev, dev_list);
*tmp_p++ = dev->number;
}
- spin_unlock(&atm_dev_lock);
+ rtnl_unlock();
ret_val = ((copy_to_user(buf, tmp_buf, size)) ||
put_user(size, &((struct atm_iobuf *) arg)->length)
) ? -EFAULT : 0;
diff -Nru a/net/atm/proc.c b/net/atm/proc.c
--- a/net/atm/proc.c Wed Jun 4 20:20:31 2003
+++ b/net/atm/proc.c Wed Jun 4 20:20:31 2003
@@ -314,16 +314,16 @@
"AAL(TX,err,RX,err,drop) ... [refcnt]\n");
}
left = pos-1;
- spin_lock(&atm_dev_lock);
+ rtnl_lock();
list_for_each(p, &atm_devs) {
dev = list_entry(p, struct atm_dev, dev_list);
if (left-- == 0) {
dev_info(dev,buf);
- spin_unlock(&atm_dev_lock);
+ rtnl_unlock();
return strlen(buf);
}
}
- spin_unlock(&atm_dev_lock);
+ rtnl_unlock();
return 0;
}

@@ -349,7 +349,7 @@
if (try_atm_clip_ops())
clip_info = 1;
#endif
- spin_lock(&atm_dev_lock);
+ rtnl_lock();
list_for_each(p, &atm_devs) {
dev = list_entry(p, struct atm_dev, dev_list);
spin_lock_irqsave(&dev->lock, flags);
@@ -357,7 +357,7 @@
if (vcc->sk->family == PF_ATMPVC && vcc->dev && !left--) {
pvc_info(vcc,buf,clip_info);
spin_unlock_irqrestore(&dev->lock, flags);
- spin_unlock(&atm_dev_lock);
+ rtnl_unlock();
#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
if (clip_info)
module_put(atm_clip_ops->owner);
@@ -366,7 +366,7 @@
}
spin_unlock_irqrestore(&dev->lock, flags);
}
- spin_unlock(&atm_dev_lock);
+ rtnl_unlock();
#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
if (clip_info)
module_put(atm_clip_ops->owner);
@@ -388,7 +388,7 @@
"Address"," Itf VPI VCI Fam Flags Reply Send buffer"
" Recv buffer\n");
left = pos-1;
- spin_lock(&atm_dev_lock);
+ rtnl_lock();
list_for_each(p, &atm_devs) {
dev = list_entry(p, struct atm_dev, dev_list);
spin_lock_irqsave(&dev->lock, flags);
@@ -396,12 +396,12 @@
if (!left--) {
vc_info(vcc,buf);
spin_unlock_irqrestore(&dev->lock, flags);
- spin_unlock(&atm_dev_lock);
+ rtnl_unlock();
return strlen(buf);
}
spin_unlock_irqrestore(&dev->lock, flags);
}
- spin_unlock(&atm_dev_lock);
+ rtnl_unlock();

return 0;
}
@@ -418,7 +418,7 @@
if (!pos)
return sprintf(buf,"Itf VPI VCI State Remote\n");
left = pos-1;
- spin_lock(&atm_dev_lock);
+ rtnl_lock();
list_for_each(p, &atm_devs) {
dev = list_entry(p, struct atm_dev, dev_list);
spin_lock_irqsave(&dev->lock, flags);
@@ -426,12 +426,12 @@
if (vcc->sk->family == PF_ATMSVC && !left--) {
svc_info(vcc,buf);
spin_unlock_irqrestore(&dev->lock, flags);
- spin_unlock(&atm_dev_lock);
+ rtnl_unlock();
return strlen(buf);
}
spin_unlock_irqrestore(&dev->lock, flags);
}
- spin_unlock(&atm_dev_lock);
+ rtnl_unlock();

return 0;
}
diff -Nru a/net/atm/resources.c b/net/atm/resources.c
--- a/net/atm/resources.c Wed Jun 4 20:20:31 2003
+++ b/net/atm/resources.c Wed Jun 4 20:20:31 2003
@@ -27,7 +27,7 @@


LIST_HEAD(atm_devs);
-spinlock_t atm_dev_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t atm_dev_lock = SPIN_LOCK_UNLOCKED;

static struct atm_dev *__alloc_atm_dev(const char *type)
{
@@ -58,7 +58,6 @@
list_for_each(p, &atm_devs) {
dev = list_entry(p, struct atm_dev, dev_list);
if ((dev->ops) && (dev->number == number)) {
- atm_dev_hold(dev);
return dev;
}
}
@@ -71,6 +70,8 @@

spin_lock(&atm_dev_lock);
dev = __atm_dev_lookup(number);
+ if (dev)
+ atm_dev_hold(dev);
spin_unlock(&atm_dev_lock);
return dev;
}
@@ -86,11 +87,9 @@
type);
return NULL;
}
- spin_lock(&atm_dev_lock);
+ rtnl_lock();
if (number != -1) {
if ((inuse = __atm_dev_lookup(number))) {
- atm_dev_release(inuse);
- spin_unlock(&atm_dev_lock);
__free_atm_dev(dev);
return NULL;
}
@@ -98,7 +97,6 @@
} else {
dev->number = 0;
while ((inuse = __atm_dev_lookup(dev->number))) {
- atm_dev_release(inuse);
dev->number++;
}
}
@@ -110,8 +108,6 @@
memset(&dev->flags, 0, sizeof(dev->flags));
memset(&dev->stats, 0, sizeof(dev->stats));
atomic_set(&dev->refcnt, 1);
- list_add_tail(&dev->dev_list, &atm_devs);
- spin_unlock(&atm_dev_lock);

#ifdef CONFIG_PROC_FS
if (ops->proc_read) {
@@ -119,15 +115,17 @@
printk(KERN_ERR "atm_dev_register: "
"atm_proc_dev_register failed for dev %s\n",
type);
- spin_lock(&atm_dev_lock);
- list_del(&dev->dev_list);
- spin_unlock(&atm_dev_lock);
__free_atm_dev(dev);
return NULL;
}
}
#endif

+ spin_lock(&atm_dev_lock);
+ list_add_tail(&dev->dev_list, &atm_devs);
+ spin_unlock(&atm_dev_lock);
+ rtnl_unlock();
+
return dev;
}

@@ -136,6 +134,7 @@
{
unsigned long warning_time;

+ rtnl_lock();
#ifdef CONFIG_PROC_FS
if (dev->ops->proc_read)
atm_proc_dev_deregister(dev);
@@ -156,6 +155,7 @@
warning_time = jiffies;
}
}
+ rtnl_unlock();

__free_atm_dev(dev);
}
diff -Nru a/net/atm/signaling.c b/net/atm/signaling.c
--- a/net/atm/signaling.c Wed Jun 4 20:20:31 2003
+++ b/net/atm/signaling.c Wed Jun 4 20:20:31 2003
@@ -220,14 +220,14 @@
printk(KERN_ERR "sigd_close: closing with requests pending\n");
skb_queue_purge(&vcc->sk->receive_queue);

- spin_lock(&atm_dev_lock);
+ rtnl_lock();
list_for_each(p, &atm_devs) {
dev = list_entry(p, struct atm_dev, dev_list);
spin_lock_irqsave(&dev->lock, flags);
purge_vccs(dev->vccs);
spin_unlock_irqrestore(&dev->lock, flags);
}
- spin_unlock(&atm_dev_lock);
+ rtnl_unlock();
}



2003-06-06 09:25:17

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: chas williams <[email protected]>
Date: Thu, 05 Jun 2003 11:28:47 -0400

thanks to someone for pointing out to me my flub when i errantly
converted a spin_unlock to rtnl_lock (it in a very rarely, never
in fact as far as i know, used section of the code. this following
should now be even more correct.

[ATM]: use rtnl_{lock,unlock} during device operations

Are you sure nothing needs to walk the list in interrupt or softint
context? That's why you can't normally protect all of it using the
RTNL semaphore, because walks occur in non-sleepable contexts.

Read the comment above dev_base in drivers/net/Space.c to see what
the intended locking model is.

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,"David S. Miller" writes:
>Are you sure nothing needs to walk the list in interrupt or softint
>context? That's why you can't normally protect all of it using the
>RTNL semaphore, because walks occur in non-sleepable contexts.

oddly enough, i dont believe the list is iterated in interrupt
context.

>Read the comment above dev_base in drivers/net/Space.c to see what
>the intended locking model is.

yeah, i already read that. it has a bit of a typo (rtln indeed).
it looks like rtnl_lock() is also used to protect dev_ioctl's
(thus my usage in atm_ioctl) and protect lookup's like __dev_get_by_name.
i didnt get rid of atm_dev_lock, i just dont use it unless writing
or if i couldnt safely use rtnl when a reader is iterating (like
atm_dev_hold() which could be called at interrupt--though no one does).
i thought this was the idea.

2003-06-06 10:53:10

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: chas williams <[email protected]>
Date: Fri, 06 Jun 2003 06:58:20 -0400

In message <[email protected]>,"David S. Miller" writes:
>Read the comment above dev_base in drivers/net/Space.c to see what
>the intended locking model is.

yeah, i already read that. it has a bit of a typo (rtln indeed).
it looks like rtnl_lock() is also used to protect dev_ioctl's
(thus my usage in atm_ioctl) and protect lookup's like __dev_get_by_name.

RTNL also protects the rest of all networking administrative
via being acquired in the recvmsg() loop of rtnetlink.c

Basically it protects all networking administrative actions, add an
address for a device, up a device, down a device, add a route, attach
a packet scheduler to dev, etc. etc.

i didnt get rid of atm_dev_lock, i just dont use it unless writing
or if i couldnt safely use rtnl when a reader is iterating (like
atm_dev_hold() which could be called at interrupt--though no one does).
i thought this was the idea.

Hmmm, this is not how RTNL works on netdevs. The SMP lock is held
around all walking, and at the very precise moment where we are
doing the actual device unlink from dev_base. rtnl is acquired at
top-level when we will change something.

This is very different from how you are using the lock+rtnl scheme
for your ATM stuff.

2003-06-06 11:11:38

by David Anderson

[permalink] [raw]
Subject:

unsubscribe linux-kernel

2003-06-06 13:44:42

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

David S. Miller wrote:
> This is very different from how you are using the lock+rtnl scheme
> for your ATM stuff.

What should help in the ATM code is that it pushes synchronization
"down", i.e. "close" functions usually can't return until they are
truly done (or at least have made sure there is nothing externally
visible left).

In the case of devices, delayed removal is possible, but is then
initiated when closing a VCC (VCCs act as an implicit reference
count for devices), which is another synchronous operation.

VCC creation/removal in response to other asynchronous activity,
e.g. after ARP, is relayed through user-space demons, which then
make normal system calls to implement the change.

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-06 13:56:49

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Werner Almesberger <[email protected]>
Date: Fri, 6 Jun 2003 10:57:53 -0300

What should help in the ATM code is that it pushes synchronization
"down", i.e. "close" functions usually can't return until they are
truly done (or at least have made sure there is nothing externally
visible left).

If we move over to a more netdevice-based design for ATM,
this will not longer be acceptable.

Unregister of netdevices is %100 asynchronous, even if references
remain (and even if the device is UP!), we close then rip the device
out of the kernel. As references go away we finally get to zero
and thus can finally kfree() up the netdevice.

This has some problems currently which Al and myself are fixing. In
the final analysis we'll even handle things like stray SYSFS and
PROCFS references by marking the device "dead" at unregister time
and any post-unregister reference will see this and error out.

This is a much better model than synchronizing everything, you tie
your hands when you do it that way and it tends to lead to module
unload deadlocks.

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,"David S. Miller" writes:
>Basically it protects all networking administrative actions, add an
>address for a device, up a device, down a device, add a route, attach
>a packet scheduler to dev, etc. etc.

so should i hold rtnl across add/remove atm addresses on atm dev's?
(but iw ouldnt hold rtnl across people just reading the list of
atm addresses right?)

>Hmmm, this is not how RTNL works on netdevs. The SMP lock is held
>around all walking, and at the very precise moment where we are
>doing the actual device unlink from dev_base. rtnl is acquired at
>top-level when we will change something.

>This is very different from how you are using the lock+rtnl scheme
>for your ATM stuff.

ok, i was thinking i could use rtnl to protect readers. this makes the
connect(dev = ANY) rather icky. some of the abuse by me in other places
might be moot in the future. i am planning (or have done) to move all
the vcc's onto a global list (ala many of the other protocol stacks).
this makes the code for proc (and others) much cleaner since you just grab
a read lock on the global vcc sklist instead of locking and interating
each atm device. further, this will let atm interrupt handlers block
a race with vcc close/removal. is this a bad plan?

2003-06-06 14:57:28

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: chas williams <[email protected]>
Date: Fri, 06 Jun 2003 11:05:37 -0400

so should i hold rtnl across add/remove atm addresses on atm dev's?
(but iw ouldnt hold rtnl across people just reading the list of
atm addresses right?)

Correct.

i am planning (or have done) to move all the vcc's onto a global
list (ala many of the other protocol stacks). this makes the code
for proc (and others) much cleaner since you just grab a read lock
on the global vcc sklist instead of locking and interating each atm
device. further, this will let atm interrupt handlers block a race
with vcc close/removal. is this a bad plan?

Sounds good.

2003-06-06 15:01:00

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

David S. Miller wrote:
> If we move over to a more netdevice-based design for ATM,
> this will not longer be acceptable.

Why, what's wrong with that ? It's simple, efficient, and it works.

> Unregister of netdevices is %100 asynchronous, even if references
> remain (and even if the device is UP!), we close then rip the device
> out of the kernel. As references go away we finally get to zero
> and thus can finally kfree() up the netdevice.

Well, that's similar in a synchronous design. Only that you make
sure that the removal is only attempted from a context that can
sleep. (That's one of the things the demons do. And no, I don't
think you want them in the kernel :-)

> This is a much better model than synchronizing everything, you tie
> your hands when you do it that way and it tends to lead to module
> unload deadlocks.

Hmm, last time I tried to ignore circular dependencies in
ansynchronous designs, they failed just as badly as in
synchronous designs :-)

There are certainly places where a synchronous design doesn't
make sense. But in the case of ATM device and VCC handling, you
already have all the synchronous code paths (because things are
initiated by user space), they're not very timing-critical, and
reuse before destruction has completed is unlikely.

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-06 15:05:22

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Werner Almesberger <[email protected]>
Date: Fri, 6 Jun 2003 12:13:39 -0300

But in the case of ATM device and VCC handling, you
already have all the synchronous code paths (because things are
initiated by user space), they're not very timing-critical, and
reuse before destruction has completed is unlikely.

VCC's are just like routes, they are setup by a control
layer in userspace, and RTNL should therefore protect changes
to such things.

But regardless I should be able to yank an ATM device out of the
kernel (unregistering it) even if there are a thousand VCC's attached
to it. The user control process gets a netlink message saying to kill
them off, but that's entirely seperate from the unregistering act
itself.

2003-06-06 15:13:04

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

David S. Miller wrote:
> But regardless I should be able to yank an ATM device out of the
> kernel (unregistering it) even if there are a thousand VCC's attached
> to it.

Why ? A VCC is more like a network interface than a TCP
connection.

Worse yet, if you remove the device, it's unlikely that you
can use a new one with the same physical interface before
all the old VCCs are gone. (I.e. you almost always have
things on well-known VCCs, which are associated with physical
devices.)

Removing an ATM device while there are open VCCs isn't a lot
more useful than removing a telephone while there is still a
call in progress :-)

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-06 15:17:08

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Werner Almesberger <[email protected]>
Date: Fri, 6 Jun 2003 12:26:16 -0300

David S. Miller wrote:
> But regardless I should be able to yank an ATM device out of the
> kernel (unregistering it) even if there are a thousand VCC's attached
> to it.

Why ? A VCC is more like a network interface than a TCP
connection.

It's more like an IP tunnel and a route, granted. And those are
similarly configured, and to me the same rules apply.

Removing an ATM device while there are open VCCs isn't a lot
more useful than removing a telephone while there is still a
call in progress :-)

It's like rmmod'ing TCP while you have TCP connections.
And it's like unregistering IP tunnels while you still have
routes going over them, the routes just get deleted.

And frankly I see absolutely NOTHING wrong with doing these
sorts of things.

2003-06-06 15:41:01

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

David S. Miller wrote:
> It's more like an IP tunnel and a route, granted.

Even with the route, the destination can remain "fixed".
The VCC only makes sense in the context of the device, which
is fully visible to the user. (It's different in the case of
SVCs, but they're managed by a user space demon. Besides, if
their device goes away, they die too.)

> And those are
> similarly configured, and to me the same rules apply.

Why do you care ? That part of the current design is
technically adequate and reasonably simple. Littering the
code with asynchronous code paths would only make it more
complex.

(If you want to keep Chas busy, the communication between
the kernel and its demons may be a much more interesting
topic ;-)

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-06 15:45:00

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Werner Almesberger <[email protected]>
Date: Fri, 6 Jun 2003 12:54:16 -0300

David S. Miller wrote:
> And those are
> similarly configured, and to me the same rules apply.

Why do you care ?

Because ATM devices have always been "funny", and there
is so much infrastructure, and frankly sanity, they can
share by being more netdevice like.

All the lack of proper SMP locking and races are a result
of ATM devices being different and not being fixed up
when we did all of the SMP work for netdevices.

Chas has to do a lot of work now that would not have been
necessary.

(If you want to keep Chas busy, the communication between
the kernel and its demons may be a much more interesting
topic ;-)

Tell me it at least uses netlink ;(

2003-06-06 16:27:42

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

David S. Miller wrote:
> Because ATM devices have always been "funny", and there
> is so much infrastructure, and frankly sanity, they can
> share by being more netdevice like.

Yes, and I agree that it's important that this gets fixed.
Also, the code was basically unmaintained for about two
years, and that shows.

I'm just pointing out that "asynchronizing" some internal
process that is perfectly happy with being synchronous
(and consistent with the semantics of ATM, which themselves
are quite "funny" most of the time) strikes me as a not
very helpful move.

> Tell me it at least uses netlink ;(

Nope, pure DIY. Actually, I think at the time when I wrote
that stuff ('95), SIOC* still ruled the world ...

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-06 16:50:39

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

chas williams wrote:
> i am planning (or have done) to move all the vcc's onto a global list

At that point, you may also want to sanitize struct atmsvc_msg.vcc
and struct atmsvc_msg.listen_vcc through a hash, instead of blindly
trusting atmsigd.

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-06 21:27:46

by Mitchell Blank Jr

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

Werner Almesberger wrote:
> (It's different in the case of
> SVCs, but they're managed by a user space demon. Besides, if
> their device goes away, they die too.)

Yes, in theory it would be nice to have SVCs be able to reroute between
interfaces, but we certainly don't have the infrastructure to do that now
(and I doubt that they'll ever be a significant enough push to demand its
development) To do that you'd need the kernel to look like a simple
ATM switch and have atmsigd speak NNI to its neighbors (didn't someone have
some code for this WAY back in the dark ages... but it had some unfortunate
license issues... or am I remembering wrong?)

You still would have the the userspace-visible vcc pinned to a ATM device
but it would just be a pseudo-device managed by the virtual switch. Then
the switch would take care of how to route the SVC.

So in short I agree with you that the current method is best - it makes
no operational sense to deactivate an ATM interface while there are still
open VCs. Since VCs are intrinsically bound to a {dev,vpi,vci} tuple from
userlands perspective they would be uselss if their dev disappeared (even
if it reappeared later).

One thing I WOULD like to see is way to do something like "ifconfig up/down"
on ATM devices (once the VCs are all down) instead of having all of the
ATM cards with drivers automatically considered "up" like we currently do.
Some types of interfaces (esp *DSL) have rather complicated procedures for
setting up the interface and it would be nice to be able to control this
explicitly. It'd be really useful to have a third state called "auto"
which would bring it up when the first VCC is opened and down when the last
is closed. Not only would this be great for DSL (since it would establish
the DSL connection automatically when you run pppd or whatever) but it
would be a sane "least surprise" default.

[Very vaguely related - it would also be nice to have a per-itf flag to
require CAP_NET_BIND_SERVICE for all VCCs instead of just "vpi==0 &&
vci<ATM_NOT_RSV_VCI". In 99% of deployments there aren't any ATM-native
apps running so you don't want users establishing random ATM vcs]

-Mitch

2003-06-06 21:38:13

by Mitchell Blank Jr

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

David S. Miller wrote:
> (If you want to keep Chas busy, the communication between
> the kernel and its demons may be a much more interesting
> topic ;-)
>
> Tell me it at least uses netlink ;(

No.... not even close :-(

The really gross part is that it uses kernel-land pointers as "opaque" VC
descriptors, so basically if atmsigd goes nuts in can easily stomp all
over the kernel's memory. Way back when I added a CAP_SYS_RAWIO check to
the ATMSIGD_CTRL ioctl so at least there's no security hole but it's still
damn gross (no offense, Werner :-)

I agree that fixing that old cruft would be a lot more productive than
trying to shoehorn the ATM devices into the netdevice framework.

-Mitch

2003-06-06 22:43:39

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

Mitchell Blank Jr wrote:
> ATM switch and have atmsigd speak NNI to its neighbors (didn't someone have
> some code for this WAY back in the dark ages... but it had some unfortunate
> license issues... or am I remembering wrong?)

I think it was Ascom who had some P-NNI code, but they never
made the effort of clearing it for release, even though it
was only gathering dust in some drawer.

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-06 23:05:55

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

Mitchell Blank Jr wrote:
> so basically if atmsigd goes nuts in can easily stomp all
> over the kernel's memory.

Naw, it isn't supposed to do that :-) I wonder if anyone
actually made functional changes to atmsigd (or qgen ;-) since
I last touched it ...

But yes, with a unified VCC table, it certainly makes sense to
add a hash to validate those pointers. I still think that using
pointers per se is a good idea, because they're naturally
unique numbers. Given that a VCC can be in all kinds of states,
it would be pretty hard to use anything in struct atm_vcc else
as a unique key. Also, it's not very common to have atmsigd
talk ISP (Internal Signaling Protocol - the thing used between
atmsigd and the kernel) to a different box.

> the ATMSIGD_CTRL ioctl so at least there's no security hole but it's still
> damn gross (no offense, Werner :-)

It could probably be used to leverage other security holes in
atmsigd. (Not that I'm aware of any, but I'd be surprised if
there were none.)

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,Mitchell Blank Jr writes:
>The really gross part is that it uses kernel-land pointers as "opaque" VC
>descriptors, so basically if atmsigd goes nuts in can easily stomp all
>over the kernel's memory. Way back when I added a CAP_SYS_RAWIO check to
>the ATMSIGD_CTRL ioctl so at least there's no security hole but it's still
>damn gross (no offense, Werner :-)

yes, it pretty awful. the problem is that this will take some changes
to user space stuff as well to be done right. so i was hoping to put
it off till 2.7 and hopefully get something that's smp stable for
2.6.

>I agree that fixing that old cruft would be a lot more productive than
>trying to shoehorn the ATM devices into the netdevice framework.

i would be willing to review patches. my concern is to have something
stable for 2.6. making atm more compliant with the netdevice framework
will make it easier to maintain in the future (or so i hope).

2003-06-06 23:28:51

by Mitchell Blank Jr

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

Werner Almesberger wrote:
> But yes, with a unified VCC table, it certainly makes sense to
> add a hash to validate those pointers. I still think that using
> pointers per se is a good idea, because they're naturally
> unique numbers.

True... it's gross when you have 32-bit userland and a 64-bit kernel but
we already dealt with that pain for sparc64.

> > the ATMSIGD_CTRL ioctl so at least there's no security hole but it's still
> > damn gross (no offense, Werner :-)
>
> It could probably be used to leverage other security holes in
> atmsigd.

Not really... since atmsigd runs as root it could just as easily
open("/proc/kcore") and go to town.

-Mitch

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,Werner Almesberger writes:
>Even with the route, the destination can remain "fixed".
>The VCC only makes sense in the context of the device, which
>is fully visible to the user. (It's different in the case of
>SVCs, but they're managed by a user space demon. Besides, if
>their device goes away, they die too.)

actually i think its a good idea to uncouple the atm
devices and the vcc. while the vcc is useless w/o the
atm dev, there are quite a few resources tied to the vcc
(via sk side). not having them seperate orignally led
to the 'nodev' atm device as a place to keep track of
vcc that are going away.

once the atm device/vcc are uncoupled they can easily be
converted to a net device. this keeps me from needing to
replicate the net device code (particularly the sysfs
stuff -- or so i hope). netdevices already have a handy
register/unregister that works (and will keep working).
why would i want to duplicate the net device work?

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,Werner Almesberger writes:
>make sense. But in the case of ATM device and VCC handling, you
>already have all the synchronous code paths (because things are
>initiated by user space), they're not very timing-critical, and
>reuse before destruction has completed is unlikely.

wrong! the biggest problem with the atm stack is because one
thing isnt synchronus. the atm recv side is completely async.
it can pop up at anytime and try to use a vcc.

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,Werner Almesberger writes:
>Removing an ATM device while there are open VCCs isn't a lot
>more useful than removing a telephone while there is still a
>call in progress :-)

ever hang up in the middle of a call? ever want to hang up
in the middle of a phone call?

2003-06-06 23:53:10

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

chas williams wrote:
> ever hang up in the middle of a call? ever want to hang up
> in the middle of a phone call?

Yes, exactly: if you want to remove the phone, this also
removes the call. In TCP/IP, this isn't the case. Your
TCP connections will survive route changes, interface
removals, etc.

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-06 23:56:41

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

chas williams wrote:
> wrong! the biggest problem with the atm stack is because one
> thing isnt synchronus. the atm recv side is completely async.
> it can pop up at anytime and try to use a vcc.

The data plane, yes. But the control/configuration plane is
synchronous. And it also makes sure that the driver stops
doing asynchronous things when removing a VCC.

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-07 00:07:46

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

chas williams wrote:
> converted to a net device. this keeps me from needing to
> replicate the net device code (particularly the sysfs
> stuff -- or so i hope). netdevices already have a handy
> register/unregister that works (and will keep working).
> why would i want to duplicate the net device work?

Sure, particularly sysfs as "the next big thing" is a good reason
to align data structures and general semantics with the rest of
the stack.

The only thing that worries me in all this is Dave's request to
make device destruction asynchronous, because of the complexity
this is likely to add, for, IMHO, little or no gain.

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,Werner Almesberger writes:
>Naw, it isn't supposed to do that :-) I wonder if anyone
>actually made functional changes to atmsigd (or qgen ;-) since
>I last touched it ...

we (in particualr ekinize) added point to multipoint signalling.
its mostly handled in user space (atmsigd). some changes will be
needed when we support vbr.

>But yes, with a unified VCC table, it certainly makes sense to
>add a hash to validate those pointers. I still think that using
>pointers per se is a good idea, because they're naturally
>unique numbers. Given that a VCC can be in all kinds of states,

at that point we would probably just fix it to use the netlink
interface (or whatever is going to be the acceptable interface)

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,Werner Almesberger writes:
>TCP connections will survive route changes, interface
>removals, etc.

really? if i remove my ethernet interface i expect all the
connections to die.

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,Werner Almesberger writes:
>The only thing that worries me in all this is Dave's request to
>make device destruction asynchronous, because of the complexity
>this is likely to add, for, IMHO, little or no gain.

but its actually pretty easy to do. its similar to atmsigd
exiting. all the vcc's (on that device) would be purged and
eventually close allowing the atm device to be removed from the kernel.

2003-06-07 00:43:54

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

chas williams wrote:
> really? if i remove my ethernet interface i expect all the
> connections to die.

They survive at least ifconfig down, then ifconfig up. I have
to do this often enough :-( I think there was a time (probably
long long ago), when downing the interface killed them
immediately.

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,Werner Almesberger writes:
>The data plane, yes. But the control/configuration plane is
>synchronous. And it also makes sure that the driver stops
>doing asynchronous things when removing a VCC.

but parts of the control/config plane still arent synchronus.
how about atm_async_release()? actually i believe its up to
the driver author to make sure that a vcc isnt used after the
close completes. but very few (if any) try to do this.

2003-06-07 00:47:08

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

chas williams wrote:
> we (in particualr ekinize) added point to multipoint signalling.
> its mostly handled in user space (atmsigd).

Oh, that's pretty cool.

> at that point we would probably just fix it to use the netlink
> interface (or whatever is going to be the acceptable interface)

netlink is just the transport, plus a bit of addressing and such.
It's still largely up to you to decide what goes into these
messages.

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-07 00:57:47

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

chas williams wrote:
> but parts of the control/config plane still arent synchronus.
> how about atm_async_release()?

That's just "tag and go away". If I understood Dave right, he
wants the device to actually disappear at that point (well, at
what would be the equivalent point for devices).

Of course, "tag and go away" is relatively easy, and you can
even make it partially look as if you'd destroy things
immediately, e.g. by putting the old "ATM device" layer under
a network device layer. When that dreaded async call comes,
you drop the network device layer part, leave the ATM device
intact, and start working towards getting rid of it too. (E.g.
by failing all system calls related to it, much like what
happens when atmsigd dies.)

> actually i believe its up to the driver author to make sure that
> a vcc isnt used after the close completes.

When a driver's "close" function returns, the driver must have
released all externally visible resources (e.g. VPI/VCI) that
belong to the VCC, and ideally all invisible resources (e.g.
buffers). There must be no more calls to "push".

In return, the stack must not make any other calls for that
VCC after invoking "close".

Hmm, and I should have written all this in the device driver
interface document :-)

> but very few (if any) try to do this.

Looks like trouble ...

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-07 01:00:55

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

chas williams wrote:
> but its actually pretty easy to do. its similar to atmsigd
> exiting. all the vcc's (on that device) would be purged and
> eventually close allowing the atm device to be removed from the kernel.

Ah, here you wrote it yourself. Yes, that would be a comprimise
that doesn't force you to make the removal itself asynchronous.

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,Werner Almesberger writes:
>> how about atm_async_release()?
>That's just "tag and go away". If I understood Dave right, he
>wants the device to actually disappear at that point (well, at
>what would be the equivalent point for devices).

that's ok. the sk list (of vcc's) will be seperate from
the atm dev so you can take the vcc w/o relying on a device
to hold the list.

btw, you might get async device 'releases' more often than
you would like. some atm devices are usb and could be
unplugged during operation (yes, that's really bad but it
would be wise to be prepared for this.) i actually have
a cardbus atm interface. i might eject it accidentally.

>In return, the stack must not make any other calls for that
>VCC after invoking "close".
>Hmm, and I should have written all this in the device driver
>interface document :-)
>Looks like trouble ...

right!

2003-06-07 06:42:51

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Werner Almesberger <[email protected]>
Date: Fri, 6 Jun 2003 21:06:20 -0300

Yes, exactly: if you want to remove the phone, this also
removes the call. In TCP/IP, this isn't the case. Your
TCP connections will survive route changes, interface
removals, etc.

But not a change in IP address :-) TCP connections are
exactly like VCC's in this regard, the VCC here is defined
as saddr/sport/daddr/dport, and if any of these must change due
to an administrative act, it kills the "phone call" :-)

2003-06-07 06:48:52

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: chas williams <[email protected]>
Date: Fri, 06 Jun 2003 20:45:51 -0400

really? if i remove my ethernet interface i expect all the
connections to die.

Nope, this actually works.

Many a moon ago, we did this wrong and yes the TCP connections
died.

But these days, definitely if you bring the interface back up
with the same IP addresses, it just works and the connections
recover.

2003-06-07 06:47:21

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Werner Almesberger <[email protected]>
Date: Fri, 6 Jun 2003 21:20:26 -0300

The only thing that worries me in all this is Dave's request to
make device destruction asynchronous,

Not a request, they already are asynchronous today in 2.5.x

unregister_netdevice() rips the device out and returns, and
the problems we need to fix to make this work %100 are problems
that exist regardless of whether things operate asynchronously
or not.

For example, crap like this was always busted:

rmmod eth0 </proc/sys/net/ipv4/conf/eth0/whatever

and now the asynchornous model forces us to fix this.

Werner, don't turn this into another one of those absolutely
rediculious discussions about module semantic threads you
guys all pile-drove into Rusty several months ago. That stuff
stunk like pure shit and really unfairly drove Rusty up a wall.

It really showed how pointless linux-kernel discussion can
be and how much such rediculious discussions can totally impede
real progress because someone LOUD disagrees with someone's
game plan.

2003-06-07 06:51:44

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Werner Almesberger <[email protected]>
Date: Fri, 6 Jun 2003 22:11:00 -0300

When a driver's "close" function returns, the driver must have
released all externally visible resources (e.g. VPI/VCI) that
belong to the VCC, and ideally all invisible resources (e.g.
buffers). There must be no more calls to "push".

In return, the stack must not make any other calls for that
VCC after invoking "close".

This is exactly how netdevice's WON'T be acting anymore.

All netdevice objects are allocated dynamically, and unregister
just means "disconnect" not "free". It marks the device as
dead, and NULLs out all the callbacks in the netdevice struct.

Any further attempt to use the device (via some PROCFS file I/O
or whatever) will just error out.

So we return long before all references go away, and the final
netdevice reference put does the de-allocation of the netdevice
struct.

Totally asynchronous, and it just works.

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,Werner Almesberger writes:
>The data plane, yes. But the control/configuration plane is
>synchronous. And it also makes sure that the driver stops
>doing asynchronous things when removing a VCC.

i forgot to mention that this is difficult with the way most of the
atm drivers are written. in smp kernels, you cant disable interrupts
across all processors to keep the drivers bottom halves from running.
(ok, you could but it would be very naughty). if the bottom halves were
tasklets this would be easy, but most drivers would need converted.

2003-06-07 10:55:59

by James Stevenson

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

On Fri, 6 Jun 2003, chas williams wrote:

> In message <[email protected]>,Werner Almesberger writes:
> >TCP connections will survive route changes, interface
> >removals, etc.
>
> really? if i remove my ethernet interface i expect all the
> connections to die.

Think of a latop with a normall ethernet card in it.
When you unplug the cable it wont disconnect all the tcp
connection on the interface so that you could re route everything though
a wireless card.



Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,Werner Almesberger writes:
>It could probably be used to leverage other security holes in
>atmsigd. (Not that I'm aware of any, but I'd be surprised if
>there were none.)

well various bits of the atm library used to use vsprintf. we changed
this to vsnprintf just to avoid trouble. i couldnt see how it might be
possible for someone to send a call setup that would overflow the debug
format buffer but better safe than sorry.

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,James Steven
son writes:
>Think of a latop with a normall ethernet card in it.
>When you unplug the cable it wont disconnect all the tcp
>connection on the interface so that you could re route everything though
>a wireless card.

if i have a single interface and i physically remove it (not just unplug
the cable) i would be willing to accept that certain tcp connections are
going to die. particularly tcp which might be using keep alives.

2003-06-07 14:23:00

by James Stevenson

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)


> In message <[email protected]>,James Steven
> son writes:
> >Think of a latop with a normall ethernet card in it.
> >When you unplug the cable it wont disconnect all the tcp
> >connection on the interface so that you could re route everything though
> >a wireless card.
>
> if i have a single interface and i physically remove it (not just unplug
> the cable) i would be willing to accept that certain tcp connections are
> going to die. particularly tcp which might be using keep alives.

>From how is understand it the tcp connections should be alive
until they try to send data. As soon as they try to send
data on a down interface as in they dont have a route any more
an icmp packet of host / network unreachable should be generated
then the connection can be killed.

James

2003-06-07 15:50:26

by Mr. James W. Laferriere

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

Hello James ,

On Sat, 7 Jun 2003, James Stevenson wrote:
> On Fri, 6 Jun 2003, chas williams wrote:
> > In message <[email protected]>,Werner Almesberger writes:
> > >TCP connections will survive route changes, interface
> > >removals, etc.
> > really? if i remove my ethernet interface i expect all the
> > connections to die.

> Think of a latop with a normall ethernet card in it.
> When you unplug the cable it wont disconnect all the tcp
> connection on the interface so that you could re route everything though
> a wireless card.
This anology is incorrect or at least can be circumvented .
Ie: All physical interfaces could be unnumbered & th eloopback be
the only interface with THE ip . Then all routes lead to Rome as
it were .
Btw , This can & does work . Hth , JimL
--
+------------------------------------------------------------------+
| James W. Laferriere | System Techniques | Give me VMS |
| Network Engineer | P.O. Box 854 | Give me Linux |
| [email protected] | Coudersport PA 16915 | only on AXP |
+------------------------------------------------------------------+

2003-06-07 17:59:24

by Ryan Anderson

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

On Fri, Jun 06, 2003 at 11:59:46PM -0700, David S. Miller wrote:
> From: chas williams <[email protected]>
> Date: Fri, 06 Jun 2003 20:45:51 -0400
>
> really? if i remove my ethernet interface i expect all the
> connections to die.
>
> Nope, this actually works.
>
> Many a moon ago, we did this wrong and yes the TCP connections
> died.
>
> But these days, definitely if you bring the interface back up
> with the same IP addresses, it just works and the connections
> recover.

FWIW, I seem to recall Windows doing this in older version, but with XP
it's very good about noticing link death, and killing things off.

I like the "correct" behavior much better, especially when dealing with
a combination of ethernet cables that have finicky ends and laptops that
get moved a little bit. (Poof - cable slips out, all connections drop,
waste 5 minutes getting things retarted.)

--

Ryan Anderson
sometimes Pug Majere

2003-06-07 18:48:54

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

Hi,

On Fri, 6 Jun 2003, David S. Miller wrote:

> unregister_netdevice() rips the device out and returns, and
> the problems we need to fix to make this work %100 are problems
> that exist regardless of whether things operate asynchronously
> or not.
>
> For example, crap like this was always busted:
>
> rmmod eth0 </proc/sys/net/ipv4/conf/eth0/whatever
>
> and now the asynchornous model forces us to fix this.

The basic problem is still that module_exit is a synchronous interface,
from where you can't call any asynchronous functions, unless you prevent
new callbacks via try_module_get() or you have to wait for all pending
events. This means you have to artificially turn an asynchronous interface
into a synchronous one, if you want to use it from module_exit.
The current problems can be of course fixed within the current module
model, but it certainly won't be simpler than the alternatives. An
asynchronous module model would greatly help to avoid lots of
try_module_get() in many code paths.

> It really showed how pointless linux-kernel discussion can
> be and how much such rediculious discussions can totally impede
> real progress because someone LOUD disagrees with someone's
> game plan.

Well, I certainly won't stand in the way, if people want to learn it the
hard way. I can try to explain the problems, but I can't force anyone to
listen. On the contrary I can only encourage everyone to fix the problems
within the current module framework, it's certainly possible. The high
level interfaces like file systems or network devices are rather simple,
but the more fine-grained the modules become, the more interesting it will
get. These problems need to be fixed anyway and once they are fixed, the
easier it will be for me to demonstrate alternative solutions (for 2.6
it's too late anyway and I don't have the time to fix all the problems
myself) and until then I have no problem to just shut up if nobody wants
to listen anyway.
The only thing I really regret are the complete new user utilities, this
was completely unnecessary and it will likely change again anyway (by
merging it with the hot plug interface).

bye, Roman

2003-06-08 03:18:18

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

David S. Miller wrote:
> But not a change in IP address :-)

Hmm yes, that might be a bit tricky to handle. Although one
could argue that a TCP connection could even exist through
this - you just couldn't send or receive traffic while the
local address isn't valid.

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-08 03:32:18

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

David S. Miller wrote:
> For example, crap like this was always busted:
>
> rmmod eth0 </proc/sys/net/ipv4/conf/eth0/whatever
>
> and now the asynchornous model forces us to fix this.

Well, that's just the good old broken module API problem again.
Under the premise that modules can't be fixed, but the world
around them can, try_module_get is an adequate band-aid for
this API bug, but I wouldn't apply Kant's formula of universal
law quite so literally here :-)

> Werner, don't turn this into another one of those absolutely
> rediculious discussions about module semantic threads you
> guys all pile-drove into Rusty several months ago. That stuff
> stunk like pure shit and really unfairly drove Rusty up a wall.

Bah, what happened to appreciating the fine art of
single-handedly waging a flame war ? ;-))

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-08 03:52:11

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

David S. Miller wrote:
> So we return long before all references go away, and the final
> netdevice reference put does the de-allocation of the netdevice
> struct.
>
> Totally asynchronous, and it just works.

Hmm, don't you also need the condition that, after initiating
removal, only operations should succeed that generally work
towards eliminating references ? Otherwise, you could just
put a synchronous layer underneath net devices, and play
pretend.

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-08 06:30:00

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

On Sat, 2003-06-07 at 20:45, Werner Almesberger wrote:
> Well, that's just the good old broken module API problem again.
> Under the premise that modules can't be fixed, but the world
> around them can, try_module_get is an adequate band-aid for
> this API bug, but I wouldn't apply Kant's formula of universal
> law quite so literally here :-)

Netdevices NO LONGER use module refcounts in any way shape or form. They
are not needed to fix problems of this nature.

They way to fix it is to always dynamically allocate your netdevice
objects, and mark them dead. The final kfree() of the object can be
deferred until the final reference goes away, and that could be 10 years
from now, it doesn't matter and the module can be unloaded NOW and
without any delay.

--
David S. Miller <[email protected]>

2003-06-08 06:46:29

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Roman Zippel <[email protected]>
Date: Sat, 7 Jun 2003 21:01:54 +0200 (CEST)

The basic problem is still that module_exit is a synchronous interface,
from where you can't call any asynchronous functions, unless you prevent
new callbacks via try_module_get() or you have to wait for all pending
events.

We don't use module refcounts at all anymore for netdev objects.
That's the whole key.

Module refcounts are spurious when a subsystem does it's own sane
accounting and uses strictly dynamically allocated objects.

2003-06-08 22:19:31

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

Hi,

On Sat, 7 Jun 2003, David S. Miller wrote:

> The basic problem is still that module_exit is a synchronous interface,
> from where you can't call any asynchronous functions, unless you prevent
> new callbacks via try_module_get() or you have to wait for all pending
> events.
>
> We don't use module refcounts at all anymore for netdev objects.
> That's the whole key.

If I read the source correctly, unregister_netdevice() simply does rip the
device out and sees what happens? It's a bit primitive, but should of
course work too.
I'm just not sure if that model works that well in other parts too, I
think file systems don't really like it, if the device under them
disappears. Any kind of automatic module unloading also becomes a bit
difficult without a module use count. I also hope no net driver starts
exporting it's own data via sysfs (I currently don't see anything that
would protect this).
BTW does anything protect the get_stats() call in netstat_attr_show()?

bye, Roman

2003-06-09 05:24:31

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Roman Zippel <[email protected]>
Date: Mon, 9 Jun 2003 00:32:49 +0200 (CEST)

On Sat, 7 Jun 2003, David S. Miller wrote:

> We don't use module refcounts at all anymore for netdev objects.
> That's the whole key.

If I read the source correctly, unregister_netdevice() simply does rip the
device out and sees what happens? It's a bit primitive, but should of
course work too.

The transition is half complete. Eventually even that
"wait for refcount to hit zero" part will go away, and
also we will add the logic to mark the device at "dead"
and then teach all the sysfs/procfs routines to error out
if they see the device they are examining is in this state.

2003-06-09 13:23:37

by Duncan Sands

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

> btw, you might get async device 'releases' more often than
> you would like. some atm devices are usb and could be
> unplugged during operation (yes, that's really bad but it
> would be wise to be prepared for this.) i actually have
> a cardbus atm interface. i might eject it accidentally.

Right. In the speedtouch (USB ATM DSL modem) driver, when the
device is unplugged I would have liked to be able to say to the
ATM layer: I've gone, don't call me any more. But since there
is no way to do this, instead I do:

- refuse to open any more vccs; fail all attempts to send packets
- call shutdown_atm_dev. This means that when the last vcc is
closed, atm_dev_close will be called in my driver.
- really shutdown in atm_dev_close

In practice that means that the vcc remains open until pppd realises
that the connection has gone down (no more echo requests getting
through, for example). Maybe I should push a NULL skb down into
each vcc to get it to close?

Another thing: if the modem is plugged in again, and pppd relaunched,
it would be nice if connections that were open when the modem was
unplugged automagically recovered. Is that possible?

Ciao,

Duncan.

2003-06-09 13:28:03

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Duncan Sands <[email protected]>
Date: Mon, 9 Jun 2003 15:37:13 +0200

- refuse to open any more vccs; fail all attempts to send packets

My personal feeling is that there should be a way to zombie
VCCs, precisely for such events.

ATM should unlink them immediately, and mark them dead.
Anything that tries to do something with a VCC should
check that it is still alive.

With something like this you can ensure that a driver does
not get called into anymore.

It is just my non-ATM-expert opinion :-)

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

>My personal feeling is that there should be a way to zombie
>VCCs, precisely for such events.
>
>ATM should unlink them immediately, and mark them dead.
>Anything that tries to do something with a VCC should
>check that it is still alive.

i imagine marking vcc->dev = NULL would be pretty close to the above.
most operations going into the driver check vcc->dev already and it
might actually handle this correct w/o too much extra effort. you
might also set some flags on the vcc's like ATM_VF_RELEASED etc etc.

of course, this sort of implies that the vcc's are on their own list
and not on a list that is actually part of the atm dev. (see latest
rfc patch).

2003-06-09 13:49:45

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: chas williams <[email protected]>
Date: Mon, 09 Jun 2003 09:58:45 -0400

>ATM should unlink them immediately, and mark them dead.
>Anything that tries to do something with a VCC should
>check that it is still alive.

i imagine marking vcc->dev = NULL would be pretty close to the above.

Yep. That'd work.

(see latest rfc patch).

You do know that won't compile with current 2.5.x right?
All the struct sock members got renamed to have a "sk_" prefix
2 days ago :-)

But I did like the general direction you were going in. I can't
comment on things like getting rid of ATM_*_ITF or whatever it's
called because I don't know what that thing does ;)

Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

In message <[email protected]>,"David S. Miller" write
s:
> (see latest rfc patch).
>
>You do know that won't compile with current 2.5.x right?
>All the struct sock members got renamed to have a "sk_" prefix
>2 days ago :-)

i do remember you telling me that. i pulled a new copy of the tree
and will get my patch fixed. in the meantime the curious should use
a slightly older kernel.

>But I did like the general direction you were going in. I can't
>comment on things like getting rid of ATM_*_ITF or whatever it's
>called because I don't know what that thing does ;)

you could say open vcp.vci but i dont care which interface you use.
typically you care which atm interface you might exit. they might
not be connected to the same network. i can see how this might be
useful for load balancing. but even then you still might something
smarter to do the selection. perhaps only two interfaces are load
balancing. the third might be your dsl uplink. forethought (the
marconi atm stack) has an idea of failover groups. you define these
as a list of adapters. any adapter in the group can handle a call
setup. this works ok for svc's since sigd can take care of the mess.
for pvc's i dont entirely see the point. regardless, i think the 'any'
interface idea should be handled in user space and the kernel always
passed a 'real' interface.

2003-06-09 14:46:40

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: chas williams <[email protected]>
Date: Mon, 09 Jun 2003 10:54:13 -0400

you could say open vcp.vci but i dont care which interface you use.

Ok, this reminds me that we need to figure out how MPLS
figures into all of this.. sorry I've fallen behind in my
studies in this area.

2003-06-09 22:46:05

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

Hi,

On Sun, 8 Jun 2003, David S. Miller wrote:

> The transition is half complete. Eventually even that
> "wait for refcount to hit zero" part will go away, and
> also we will add the logic to mark the device at "dead"
> and then teach all the sysfs/procfs routines to error out
> if they see the device they are examining is in this state.

This would be basically the same as moving the try_module_get/module_put
calls from the open/close to the read/write functions.
You still need to synchronize with already running functions and if your
that far it's probably easier to simply replace the ops pointer to get rid
of the dead test.
This still leaves you with a very limited control of the module unloading
process...

bye, Roman

2003-06-09 22:49:50

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Roman Zippel <[email protected]>
Date: Tue, 10 Jun 2003 00:59:21 +0200 (CEST)

You still need to synchronize with already running functions

netdev->dead = 1;
netdev->op_this = NULL;
netdev->op_that = NULL;
netdev->op_whatever = NULL;
synchronize_kernel();

2003-06-09 23:01:51

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

Hi,

On Mon, 9 Jun 2003, David S. Miller wrote:

> You still need to synchronize with already running functions
>
> netdev->dead = 1;
> netdev->op_this = NULL;
> netdev->op_that = NULL;
> netdev->op_whatever = NULL;
> synchronize_kernel();

That assumes of course that the functions don't sleep.
(RCU isn't really the answer to everything.)

bye, Roman

2003-06-09 23:04:14

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Roman Zippel <[email protected]>
Date: Tue, 10 Jun 2003 01:14:56 +0200 (CEST)

On Mon, 9 Jun 2003, David S. Miller wrote:

> netdev->dead = 1;
> netdev->op_this = NULL;
> netdev->op_that = NULL;
> netdev->op_whatever = NULL;
> synchronize_kernel();

That assumes of course that the functions don't sleep.
(RCU isn't really the answer to everything.)

They hold references to the object, it doesn't matter if
they sleep. Forget the "netdev->dead" line, it isn't necessary.

2003-06-09 23:21:11

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

Hi,

On Mon, 9 Jun 2003, David S. Miller wrote:

> > netdev->dead = 1;
> > netdev->op_this = NULL;
> > netdev->op_that = NULL;
> > netdev->op_whatever = NULL;
> > synchronize_kernel();
>
> That assumes of course that the functions don't sleep.
> (RCU isn't really the answer to everything.)
>
> They hold references to the object, it doesn't matter if
> they sleep.

That's not the point. You also have to wait for the already running
operations to finish, before you can allow the module to unload.

bye, Roman

2003-06-09 23:28:49

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Roman Zippel <[email protected]>
Date: Tue, 10 Jun 2003 01:34:19 +0200 (CEST)

You also have to wait for the already running
operations to finish, before you can allow the module to unload.

These things run under dev_base_lock, so either they find the device
or they don't, and since they hold a spinlock they can't preempt.

2003-06-10 18:13:44

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

Hi,

On Mon, 9 Jun 2003, David S. Miller wrote:

> You also have to wait for the already running
> operations to finish, before you can allow the module to unload.
>
> These things run under dev_base_lock, so either they find the device
> or they don't, and since they hold a spinlock they can't preempt.

dev_base_lock mostly protects the device list, but it doesn't protect the
call of get_stats.
Anyway, I'm really not against these changes. Actually it's quite close to
what I already proposed months ago. The basic idea was always to replace
the global module lock with a device specific lock (which is needed for
dynamic device management anyway) and to let the driver provide a module
use count. This is not that different and it was rejected from Rusty and
"really unfairly drove Rusty up a wall".
I look forward to progress in this area and maybe then it's easier to
discuss how this can be generalized and applied to other parts of the
kernel and maybe we can also compare it to that "stuff" I proposed which
"stunk like pure shit". ;-)

bye, Roman

2003-06-10 21:23:40

by Werner Almesberger

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

David S. Miller wrote:
> Netdevices NO LONGER use module refcounts in any way shape or form. They
> are not needed to fix problems of this nature.

Oh, I see. I didn't notice that change, sorry. Indeed, that looks
quite promising.

Is this mature enough in 2.5.70 to make it worth looking for holes,
or should I rather wait a bit ?

- Werner

--
_________________________________________________________________________
/ Werner Almesberger, Buenos Aires, Argentina [email protected] /
/_http://www.almesberger.net/____________________________________________/

2003-06-10 22:06:37

by David Miller

[permalink] [raw]
Subject: Re: [PATCH][ATM] use rtnl_{lock,unlock} during device operations (take 2)

From: Werner Almesberger <[email protected]>
Date: Tue, 10 Jun 2003 18:34:09 -0300

Is this mature enough in 2.5.70 to make it worth looking for holes,
or should I rather wait a bit ?

Wait about a month or two :)