LinuxLists.cc - [PATCH 004 of 19] knfsd: lockd: introduce nsm

2006-09-01 04:39:33

Subject: [PATCH 004 of 19] knfsd: lockd: introduce nsm_handle

From: Olaf Kirch <[email protected]>

This patch introduces the nsm_handle, which is shared by
all nlm_host objects referring to the same client.

With this patch applied, all nlm_hosts from the same address
will share the same nsm_handle. A future patch will add sharing
by name.

Note: this patch changes h_name so that it is no longer guaranteed
to be an IP address of the host. When the host represents an NFS server,
h_name will be the name passed in the mount call. When the host
represents a client, h_name will be the name presented in the lock
request received from the client. A h_name is only used for printing
informational messages, this change should not be significant.

Signed-off-by: Olaf Kirch <[email protected]>
Signed-off-by: Neil Brown <[email protected]>

### Diffstat output
./fs/lockd/clntlock.c | 3 -
./fs/lockd/host.c | 121 ++++++++++++++++++++++++++++++++++++++----
./fs/lockd/mon.c | 14 +++-
./include/linux/lockd/lockd.h | 17 ++++-
4 files changed, 136 insertions(+), 19 deletions(-)

diff .prev/fs/lockd/clntlock.c ./fs/lockd/clntlock.c
--- .prev/fs/lockd/clntlock.c 2006-08-31 16:44:23.000000000 +1000
+++ ./fs/lockd/clntlock.c 2006-08-31 17:00:03.000000000 +1000
@@ -150,7 +150,8 @@ u32 nlmclnt_grant(const struct sockaddr_
static void nlmclnt_prepare_reclaim(struct nlm_host *host)
{
down_write(&host->h_rwsem);
- host->h_monitored = 0;
+ if (host->h_nsmhandle)
+ host->h_nsmhandle->sm_monitored = 0;
host->h_state++;
host->h_nextrebind = 0;
nlm_rebind_host(host);

diff .prev/fs/lockd/host.c ./fs/lockd/host.c
--- .prev/fs/lockd/host.c 2006-08-31 16:23:12.000000000 +1000
+++ ./fs/lockd/host.c 2006-08-31 17:00:03.000000000 +1000
@@ -34,6 +34,8 @@ static DEFINE_MUTEX(nlm_host_mutex);

static void nlm_gc_hosts(void);
+static struct nsm_handle * __nsm_find(const struct sockaddr_in *,
+ const char *, int, int);

/*
* Find an NLM server handle in the cache. If there is none, create it.
@@ -68,7 +70,7 @@ nlm_lookup_host(int server, const struct
int hostname_len)
{
struct nlm_host *host, **hp;
- u32 addr;
+ struct nsm_handle *nsm = NULL;
int hash;

dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
@@ -86,7 +88,21 @@ nlm_lookup_host(int server, const struct
if (time_after_eq(jiffies, next_gc))
nlm_gc_hosts();

+ /* We may keep several nlm_host objects for a peer, because each
+ * nlm_host is identified by
+ * (address, protocol, version, server/client)
+ * We could probably simplify this a little by putting all those
+ * different NLM rpc_clients into one single nlm_host object.
+ * This would allow us to have one nlm_host per address.
+ */
for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) {
+ if (!nlm_cmp_addr(&host->h_addr, sin))
+ continue;
+
+ /* See if we have an NSM handle for this client */
+ if (!nsm && (nsm = host->h_nsmhandle) != 0)
+ atomic_inc(&nsm->sm_count);
+
if (host->h_proto != proto)
continue;
if (host->h_version != version)
@@ -94,7 +110,7 @@ nlm_lookup_host(int server, const struct
if (host->h_server != server)
continue;

- if (nlm_cmp_addr(&host->h_addr, sin)) {
+ {
if (hp != nlm_hosts + hash) {
*hp = host->h_next;
host->h_next = nlm_hosts[hash];
@@ -106,16 +122,18 @@ nlm_lookup_host(int server, const struct
}
}

- /* Ooops, no host found, create it */
- dprintk("lockd: creating host entry\n");
+ /* Sadly, the host isn't in our hash table yet. See if
+ * we have an NSM handle for it. If not, create one.
+ */
+ if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len)))
+ goto out;

host = kzalloc(sizeof(*host), GFP_KERNEL);
- if (!host)
- goto nohost;
-
- addr = sin->sin_addr.s_addr;
- sprintf(host->h_name, "%u.%u.%u.%u", NIPQUAD(addr));
-
+ if (!host) {
+ nsm_release(nsm);
+ goto out;
+ }
+ host->h_name = nsm->sm_name;
host->h_addr = *sin;
host->h_addr.sin_port = 0; /* ouch! */
host->h_version = version;
@@ -129,6 +147,7 @@ nlm_lookup_host(int server, const struct
init_rwsem(&host->h_rwsem);
host->h_state = 0; /* pseudo NSM state */
host->h_nsmstate = 0; /* real NSM state */
+ host->h_nsmhandle = nsm;
host->h_server = server;
host->h_next = nlm_hosts[hash];
nlm_hosts[hash] = host;
@@ -140,7 +159,7 @@ nlm_lookup_host(int server, const struct
if (++nrhosts > NLM_HOST_MAX)
next_gc = 0;

-nohost:
+out:
mutex_unlock(&nlm_host_mutex);
return host;
}
@@ -393,3 +412,83 @@ nlm_gc_hosts(void)
next_gc = jiffies + NLM_HOST_COLLECT;
}

+
+/*
+ * Manage NSM handles
+ */
+static LIST_HEAD(nsm_handles);
+static DECLARE_MUTEX(nsm_sema);
+
+static struct nsm_handle *
+__nsm_find(const struct sockaddr_in *sin,
+ const char *hostname, int hostname_len,
+ int create)
+{
+ struct nsm_handle *nsm = NULL;
+ struct list_head *pos;
+
+ if (!sin)
+ return NULL;
+
+ if (hostname && memchr(hostname, '/', hostname_len) != NULL) {
+ if (printk_ratelimit()) {
+ printk(KERN_WARNING "Invalid hostname \"%.*s\" "
+ "in NFS lock request\n",
+ hostname_len, hostname);
+ }
+ return NULL;
+ }
+
+ down(&nsm_sema);
+ list_for_each(pos, &nsm_handles) {
+ nsm = list_entry(pos, struct nsm_handle, sm_link);
+
+ if (!nlm_cmp_addr(&nsm->sm_addr, sin))
+ continue;
+ atomic_inc(&nsm->sm_count);
+ goto out;
+ }
+
+ if (!create) {
+ nsm = NULL;
+ goto out;
+ }
+
+ nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL);
+ if (nsm != NULL) {
+ nsm->sm_addr = *sin;
+ nsm->sm_name = (char *) (nsm + 1);
+ memcpy(nsm->sm_name, hostname, hostname_len);
+ nsm->sm_name[hostname_len] = '\0';
+ atomic_set(&nsm->sm_count, 1);
+
+ list_add(&nsm->sm_link, &nsm_handles);
+ }
+
+out: up(&nsm_sema);
+ return nsm;
+}
+
+struct nsm_handle *
+nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len)
+{
+ return __nsm_find(sin, hostname, hostname_len, 1);
+}
+
+/*
+ * Release an NSM handle
+ */
+void
+nsm_release(struct nsm_handle *nsm)
+{
+ if (!nsm)
+ return;
+ if (atomic_read(&nsm->sm_count) == 1) {
+ down(&nsm_sema);
+ if (atomic_dec_and_test(&nsm->sm_count)) {
+ list_del(&nsm->sm_link);
+ kfree(nsm);
+ }
+ up(&nsm_sema);
+ }
+}

diff .prev/fs/lockd/mon.c ./fs/lockd/mon.c
--- .prev/fs/lockd/mon.c 2006-08-31 16:12:30.000000000 +1000
+++ ./fs/lockd/mon.c 2006-08-31 17:00:03.000000000 +1000
@@ -70,11 +70,14 @@ nsm_mon_unmon(struct nlm_host *host, u32
int
nsm_monitor(struct nlm_host *host)
{
+ struct nsm_handle *nsm = host->h_nsmhandle;
struct nsm_res res;
int status;

dprintk("lockd: nsm_monitor(%s)\n", host->h_name);
- if (host->h_monitored)
+ BUG_ON(nsm == NULL);
+
+ if (nsm->sm_monitored)
return 0;

status = nsm_mon_unmon(host, SM_MON, &res);
@@ -82,7 +85,7 @@ nsm_monitor(struct nlm_host *host)
if (status < 0 || res.status != 0)
printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name);
else
- host->h_monitored = 1;
+ nsm->sm_monitored = 1;
return status;
}

@@ -92,19 +95,22 @@ nsm_monitor(struct nlm_host *host)
int
nsm_unmonitor(struct nlm_host *host)
{
+ struct nsm_handle *nsm = host->h_nsmhandle;
struct nsm_res res;
int status = 0;

dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
- if (!host->h_monitored)
+ if (nsm == NULL)
return 0;
- host->h_monitored = 0;
+ host->h_nsmhandle = NULL;

if (!host->h_killed) {
status = nsm_mon_unmon(host, SM_UNMON, &res);
if (status < 0)
printk(KERN_NOTICE "lockd: cannot unmonitor %s\n", host->h_name);
+ nsm->sm_monitored = 0;
}
+ nsm_release(nsm);
return status;
}

diff .prev/include/linux/lockd/lockd.h ./include/linux/lockd/lockd.h
--- .prev/include/linux/lockd/lockd.h 2006-08-31 16:23:12.000000000 +1000
+++ ./include/linux/lockd/lockd.h 2006-08-31 17:00:03.000000000 +1000
@@ -40,14 +40,13 @@ struct nlm_host {
struct nlm_host * h_next; /* linked list (hash table) */
struct sockaddr_in h_addr; /* peer address */
struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */
- char h_name[20]; /* remote hostname */
+ char * h_name; /* remote hostname */
u32 h_version; /* interface version */
unsigned short h_proto; /* transport proto */
unsigned short h_reclaiming : 1,
h_server : 1, /* server side, not client side */
h_inuse : 1,
- h_killed : 1,
- h_monitored : 1;
+ h_killed : 1;
wait_queue_head_t h_gracewait; /* wait while reclaiming */
struct rw_semaphore h_rwsem; /* Reboot recovery lock */
u32 h_state; /* pseudo-state counter */
@@ -61,6 +60,16 @@ struct nlm_host {
spinlock_t h_lock;
struct list_head h_granted; /* Locks in GRANTED state */
struct list_head h_reclaim; /* Locks in RECLAIM state */
+ struct nsm_handle * h_nsmhandle; /* NSM status handle */
+};
+
+struct nsm_handle {
+ struct list_head sm_link;
+ atomic_t sm_count;
+ char * sm_name;
+ struct sockaddr_in sm_addr;
+ unsigned int sm_monitored : 1,
+ sm_sticky : 1; /* don't unmonitor */
};

/*
@@ -171,6 +180,8 @@ void nlm_release_host(struct nlm_host
void nlm_shutdown_hosts(void);
extern struct nlm_host *nlm_find_client(void);
extern void nlm_host_rebooted(const struct sockaddr_in *, const struct nlm_reboot *);
+struct nsm_handle *nsm_find(const struct sockaddr_in *, const char *, int);
+void nsm_release(struct nsm_handle *);

/*

2006-09-01 06:17:11

by Andrew Morton

[permalink] [raw]

Subject: Re: [PATCH 004 of 19] knfsd: lockd: introduce nsm_handle

On Fri, 1 Sep 2006 14:38:25 +1000
NeilBrown <[email protected]> wrote:

> +static DECLARE_MUTEX(nsm_sema);
> ...
> + down(&nsm_sema);

Next you'll all be wearing bell-bottomed jeans?

2006-09-01 06:20:35

by Andrew Morton

[permalink] [raw]

Subject: Re: [PATCH 004 of 19] knfsd: lockd: introduce nsm_handle

On Fri, 1 Sep 2006 14:38:25 +1000
NeilBrown <[email protected]> wrote:

> +nsm_release(struct nsm_handle *nsm)
> +{
> + if (!nsm)
> + return;
> + if (atomic_read(&nsm->sm_count) == 1) {
> + down(&nsm_sema);
> + if (atomic_dec_and_test(&nsm->sm_count)) {
> + list_del(&nsm->sm_link);
> + kfree(nsm);
> + }
> + up(&nsm_sema);
> + }
> +}

That's weird-looking. Are you sure? afaict, if ->sm_count ever gets a
value of 2 or more, it's unfreeable.

2006-09-01 15:50:55

by Trond Myklebust

[permalink] [raw]

Subject: Re: [NFS] [PATCH 004 of 19] knfsd: lockd: introduce nsm_handle

On Fri, 2006-09-01 at 14:38 +1000, NeilBrown wrote:
> From: Olaf Kirch <[email protected]>
>
> This patch introduces the nsm_handle, which is shared by
> all nlm_host objects referring to the same client.

> With this patch applied, all nlm_hosts from the same address
> will share the same nsm_handle. A future patch will add sharing
> by name.

<boggle>
Exactly why is it desirable to have > 1 nlm_host from the same address?
</boggle>

If we can map several clients into a single nsm_handle, then surely it
makes sense to map them into the same nlm_host too.

> Note: this patch changes h_name so that it is no longer guaranteed
> to be an IP address of the host. When the host represents an NFS server,
> h_name will be the name passed in the mount call. When the host
> represents a client, h_name will be the name presented in the lock
> request received from the client. A h_name is only used for printing
> informational messages, this change should not be significant.

> Signed-off-by: Olaf Kirch <[email protected]>
> Signed-off-by: Neil Brown <[email protected]>
>
> ### Diffstat output
> ./fs/lockd/clntlock.c | 3 -
> ./fs/lockd/host.c | 121 ++++++++++++++++++++++++++++++++++++++----
> ./fs/lockd/mon.c | 14 +++-
> ./include/linux/lockd/lockd.h | 17 ++++-
> 4 files changed, 136 insertions(+), 19 deletions(-)
>
> diff .prev/fs/lockd/clntlock.c ./fs/lockd/clntlock.c
> --- .prev/fs/lockd/clntlock.c 2006-08-31 16:44:23.000000000 +1000
> +++ ./fs/lockd/clntlock.c 2006-08-31 17:00:03.000000000 +1000
> @@ -150,7 +150,8 @@ u32 nlmclnt_grant(const struct sockaddr_
> static void nlmclnt_prepare_reclaim(struct nlm_host *host)
> {
> down_write(&host->h_rwsem);
> - host->h_monitored = 0;
> + if (host->h_nsmhandle)
> + host->h_nsmhandle->sm_monitored = 0;
> host->h_state++;
> host->h_nextrebind = 0;
> nlm_rebind_host(host);
>
> diff .prev/fs/lockd/host.c ./fs/lockd/host.c
> --- .prev/fs/lockd/host.c 2006-08-31 16:23:12.000000000 +1000
> +++ ./fs/lockd/host.c 2006-08-31 17:00:03.000000000 +1000
> @@ -34,6 +34,8 @@ static DEFINE_MUTEX(nlm_host_mutex);
>
>
> static void nlm_gc_hosts(void);
> +static struct nsm_handle * __nsm_find(const struct sockaddr_in *,
> + const char *, int, int);
>
> /*
> * Find an NLM server handle in the cache. If there is none, create it.
> @@ -68,7 +70,7 @@ nlm_lookup_host(int server, const struct
> int hostname_len)
> {
> struct nlm_host *host, **hp;
> - u32 addr;
> + struct nsm_handle *nsm = NULL;
> int hash;
>
> dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
> @@ -86,7 +88,21 @@ nlm_lookup_host(int server, const struct
> if (time_after_eq(jiffies, next_gc))
> nlm_gc_hosts();
>
> + /* We may keep several nlm_host objects for a peer, because each
> + * nlm_host is identified by
> + * (address, protocol, version, server/client)
> + * We could probably simplify this a little by putting all those
> + * different NLM rpc_clients into one single nlm_host object.
> + * This would allow us to have one nlm_host per address.
> + */
> for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) {
> + if (!nlm_cmp_addr(&host->h_addr, sin))
> + continue;
> +
> + /* See if we have an NSM handle for this client */
> + if (!nsm && (nsm = host->h_nsmhandle) != 0)
> + atomic_inc(&nsm->sm_count);
> +
> if (host->h_proto != proto)
> continue;
> if (host->h_version != version)
> @@ -94,7 +110,7 @@ nlm_lookup_host(int server, const struct
> if (host->h_server != server)
> continue;
>
> - if (nlm_cmp_addr(&host->h_addr, sin)) {
> + {
> if (hp != nlm_hosts + hash) {
> *hp = host->h_next;
> host->h_next = nlm_hosts[hash];
> @@ -106,16 +122,18 @@ nlm_lookup_host(int server, const struct
> }
> }
>
> - /* Ooops, no host found, create it */
> - dprintk("lockd: creating host entry\n");
> + /* Sadly, the host isn't in our hash table yet. See if
> + * we have an NSM handle for it. If not, create one.
> + */
> + if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len)))
> + goto out;
>
> host = kzalloc(sizeof(*host), GFP_KERNEL);
> - if (!host)
> - goto nohost;
> -
> - addr = sin->sin_addr.s_addr;
> - sprintf(host->h_name, "%u.%u.%u.%u", NIPQUAD(addr));
> -
> + if (!host) {
> + nsm_release(nsm);
> + goto out;
> + }
> + host->h_name = nsm->sm_name;
> host->h_addr = *sin;
> host->h_addr.sin_port = 0; /* ouch! */
> host->h_version = version;
> @@ -129,6 +147,7 @@ nlm_lookup_host(int server, const struct
> init_rwsem(&host->h_rwsem);
> host->h_state = 0; /* pseudo NSM state */
> host->h_nsmstate = 0; /* real NSM state */
> + host->h_nsmhandle = nsm;
> host->h_server = server;
> host->h_next = nlm_hosts[hash];
> nlm_hosts[hash] = host;
> @@ -140,7 +159,7 @@ nlm_lookup_host(int server, const struct
> if (++nrhosts > NLM_HOST_MAX)
> next_gc = 0;
>
> -nohost:
> +out:
> mutex_unlock(&nlm_host_mutex);
> return host;
> }
> @@ -393,3 +412,83 @@ nlm_gc_hosts(void)
> next_gc = jiffies + NLM_HOST_COLLECT;
> }
>
> +
> +/*
> + * Manage NSM handles
> + */
> +static LIST_HEAD(nsm_handles);
> +static DECLARE_MUTEX(nsm_sema);
> +
> +static struct nsm_handle *
> +__nsm_find(const struct sockaddr_in *sin,
> + const char *hostname, int hostname_len,
> + int create)
> +{
> + struct nsm_handle *nsm = NULL;
> + struct list_head *pos;
> +
> + if (!sin)
> + return NULL;
> +
> + if (hostname && memchr(hostname, '/', hostname_len) != NULL) {
> + if (printk_ratelimit()) {
> + printk(KERN_WARNING "Invalid hostname \"%.*s\" "
> + "in NFS lock request\n",
> + hostname_len, hostname);
> + }
> + return NULL;
> + }
> +
> + down(&nsm_sema);
^^^^^^^^^^^^^^^^^^^^^^^^^^

Growl! Can we do this properly (i.e using spinlocks)? This semaphore
crap may help simplify the code for adding new entries, but it is
counterproductive as far as scalability goes, and leads straight down
the path of damnation in the form of the broken 'atomic_read()'
heuristics in nsm_release().

> + list_for_each(pos, &nsm_handles) {
> + nsm = list_entry(pos, struct nsm_handle, sm_link);
> +
> + if (!nlm_cmp_addr(&nsm->sm_addr, sin))
> + continue;
> + atomic_inc(&nsm->sm_count);
> + goto out;
> + }
> +
> + if (!create) {
> + nsm = NULL;
> + goto out;
> + }
> +
> + nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL);
> + if (nsm != NULL) {
> + nsm->sm_addr = *sin;
> + nsm->sm_name = (char *) (nsm + 1);
> + memcpy(nsm->sm_name, hostname, hostname_len);
> + nsm->sm_name[hostname_len] = '\0';
> + atomic_set(&nsm->sm_count, 1);
> +
> + list_add(&nsm->sm_link, &nsm_handles);
> + }
> +
> +out: up(&nsm_sema);
> + return nsm;
> +}
> +
> +struct nsm_handle *
> +nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len)
> +{
> + return __nsm_find(sin, hostname, hostname_len, 1);
> +}
> +
> +/*
> + * Release an NSM handle
> + */
> +void
> +nsm_release(struct nsm_handle *nsm)
> +{
> + if (!nsm)
> + return;
> + if (atomic_read(&nsm->sm_count) == 1) {
> + down(&nsm_sema);
> + if (atomic_dec_and_test(&nsm->sm_count)) {
> + list_del(&nsm->sm_link);
> + kfree(nsm);
> + }
> + up(&nsm_sema);
> + }
> +}

As Andrew pointed out, this function doesn't actually decrement sm_count
if there is more than one reference to it.

atomic_dec_and_lock() is your friend.

> diff .prev/fs/lockd/mon.c ./fs/lockd/mon.c
> --- .prev/fs/lockd/mon.c 2006-08-31 16:12:30.000000000 +1000
> +++ ./fs/lockd/mon.c 2006-08-31 17:00:03.000000000 +1000
> @@ -70,11 +70,14 @@ nsm_mon_unmon(struct nlm_host *host, u32
> int
> nsm_monitor(struct nlm_host *host)
> {
> + struct nsm_handle *nsm = host->h_nsmhandle;
> struct nsm_res res;
> int status;
>
> dprintk("lockd: nsm_monitor(%s)\n", host->h_name);
> - if (host->h_monitored)
> + BUG_ON(nsm == NULL);
> +
> + if (nsm->sm_monitored)
> return 0;
>
> status = nsm_mon_unmon(host, SM_MON, &res);
> @@ -82,7 +85,7 @@ nsm_monitor(struct nlm_host *host)
> if (status < 0 || res.status != 0)
> printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name);
> else
> - host->h_monitored = 1;
> + nsm->sm_monitored = 1;
> return status;
> }
>
> @@ -92,19 +95,22 @@ nsm_monitor(struct nlm_host *host)
> int
> nsm_unmonitor(struct nlm_host *host)
> {
> + struct nsm_handle *nsm = host->h_nsmhandle;
> struct nsm_res res;
> int status = 0;
>
> dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
> - if (!host->h_monitored)
> + if (nsm == NULL)
> return 0;
> - host->h_monitored = 0;
> + host->h_nsmhandle = NULL;
>
> if (!host->h_killed) {
> status = nsm_mon_unmon(host, SM_UNMON, &res);
> if (status < 0)
> printk(KERN_NOTICE "lockd: cannot unmonitor %s\n", host->h_name);
> + nsm->sm_monitored = 0;
> }
> + nsm_release(nsm);
> return status;
> }
>
>
> diff .prev/include/linux/lockd/lockd.h ./include/linux/lockd/lockd.h
> --- .prev/include/linux/lockd/lockd.h 2006-08-31 16:23:12.000000000 +1000
> +++ ./include/linux/lockd/lockd.h 2006-08-31 17:00:03.000000000 +1000
> @@ -40,14 +40,13 @@ struct nlm_host {
> struct nlm_host * h_next; /* linked list (hash table) */
> struct sockaddr_in h_addr; /* peer address */
> struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */
> - char h_name[20]; /* remote hostname */
> + char * h_name; /* remote hostname */
> u32 h_version; /* interface version */
> unsigned short h_proto; /* transport proto */
> unsigned short h_reclaiming : 1,
> h_server : 1, /* server side, not client side */
> h_inuse : 1,
> - h_killed : 1,
> - h_monitored : 1;
> + h_killed : 1;
> wait_queue_head_t h_gracewait; /* wait while reclaiming */
> struct rw_semaphore h_rwsem; /* Reboot recovery lock */
> u32 h_state; /* pseudo-state counter */
> @@ -61,6 +60,16 @@ struct nlm_host {
> spinlock_t h_lock;
> struct list_head h_granted; /* Locks in GRANTED state */
> struct list_head h_reclaim; /* Locks in RECLAIM state */
> + struct nsm_handle * h_nsmhandle; /* NSM status handle */
> +};
> +
> +struct nsm_handle {
> + struct list_head sm_link;
> + atomic_t sm_count;
> + char * sm_name;
> + struct sockaddr_in sm_addr;
> + unsigned int sm_monitored : 1,
> + sm_sticky : 1; /* don't unmonitor */
> };
>
> /*
> @@ -171,6 +180,8 @@ void nlm_release_host(struct nlm_host
> void nlm_shutdown_hosts(void);
> extern struct nlm_host *nlm_find_client(void);
> extern void nlm_host_rebooted(const struct sockaddr_in *, const struct nlm_reboot *);
> +struct nsm_handle *nsm_find(const struct sockaddr_in *, const char *, int);
> +void nsm_release(struct nsm_handle *);
>
>
> /*
>
> -------------------------------------------------------------------------
> Using Tomcat but need to do more? Need to support web services, security?
> Get stuff done quickly with pre-integrated technology to make your job easier
> Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
> http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
> _______________________________________________
> NFS maillist - [email protected]
> https://lists.sourceforge.net/lists/listinfo/nfs

2006-09-01 16:11:14

by Olaf Kirch

[permalink] [raw]

Subject: Re: [NFS] [PATCH 004 of 19] knfsd: lockd: introduce nsm_handle

On Fri, Sep 01, 2006 at 11:50:20AM -0400, Trond Myklebust wrote:
> > With this patch applied, all nlm_hosts from the same address
> > will share the same nsm_handle. A future patch will add sharing
> > by name.
>
> <boggle>
> Exactly why is it desirable to have > 1 nlm_host from the same address?
> </boggle>
>
> If we can map several clients into a single nsm_handle, then surely it
> makes sense to map them into the same nlm_host too.

This is all related to the reasons for introducing NSM notification
by name in the first place.

On the client side, we may have mounted several volumes from a multi-homed
server, using different addresses, and you have several NLM client
handles, each with one of these addresses - and each in a different
nlm_host object.

Or you have an NFS server in a HA configuration, listening on a virtual
address. As the server fails over, the alias moves to the backup
machine.

Or (once we have IPv6) you may have a mix of v4 and v6 mounts.

Now when the server reboots, it will send you one or more SM_NOTIFY
messages. You do not know which addresses it will use. In the multihomed
case, you will get one SM_NOTIFY for each server address if the server
is a Linux box. Other server OSs will send you just one SM_NOTIFY,
and the sender address will be more or less random. In the HA case
described above, the sender address will not match the address
you used at all (since the UDP packet will have the interface's
primary IP address, not the alias).

This is the main motivation for introducing the nsm_handle, and this is
also the reason why there is potentially a 1-to-many relationship between
nsm_handles (representing a "host") and nlm_host, representing a tuple of
(NLM version, transport protocol, address).

Maybe we should rename nlm_host to something less confusing.

Olaf
--
Olaf Kirch | --- o --- Nous sommes du soleil we love when we play
[email protected] | / | \ sol.dhoop.naytheet.ah kin.ir.samse.qurax

2006-09-01 16:41:50

by Trond Myklebust

[permalink] [raw]

Subject: Re: [NFS] [PATCH 004 of 19] knfsd: lockd: introduce nsm_handle

On Fri, 2006-09-01 at 18:11 +0200, Olaf Kirch wrote:
> This is all related to the reasons for introducing NSM notification
> by name in the first place.
>
> On the client side, we may have mounted several volumes from a multi-homed
> server, using different addresses, and you have several NLM client
> handles, each with one of these addresses - and each in a different
> nlm_host object.
>
> Or you have an NFS server in a HA configuration, listening on a virtual
> address. As the server fails over, the alias moves to the backup
> machine.
>
> Or (once we have IPv6) you may have a mix of v4 and v6 mounts.
>
> Now when the server reboots, it will send you one or more SM_NOTIFY
> messages. You do not know which addresses it will use. In the multihomed
> case, you will get one SM_NOTIFY for each server address if the server
> is a Linux box. Other server OSs will send you just one SM_NOTIFY,
> and the sender address will be more or less random. In the HA case
> described above, the sender address will not match the address
> you used at all (since the UDP packet will have the interface's
> primary IP address, not the alias).
>
> This is the main motivation for introducing the nsm_handle, and this is
> also the reason why there is potentially a 1-to-many relationship between
> nsm_handles (representing a "host") and nlm_host, representing a tuple of
> (NLM version, transport protocol, address).
>
> Maybe we should rename nlm_host to something less confusing.

The local statd process is supposed to decode the notification from the
remote client/server, and then notify the kernel. It already sends that
notification on a per-nlm_host basis (i.e. it the call down to the
kernel contains the <address,version,transport protocol>.

If we need to mark more than one <address,version,transport protocol>
tuple as rebooting when we get a notification from the remote
client/server, then why not fix statd so that it does so. Why perform
these extra mappings in the kernel, which doesn't have the benefit of
reverse DNS lookups etc to help it out?

Cheers,
Trond

2006-09-01 23:48:52

by NeilBrown

[permalink] [raw]

Subject: Re: [PATCH 004 of 19] knfsd: lockd: introduce nsm_handle

On Thursday August 31, [email protected] wrote:
> On Fri, 1 Sep 2006 14:38:25 +1000
> NeilBrown <[email protected]> wrote:
>
> > +static DECLARE_MUTEX(nsm_sema);
> > ...
> > + down(&nsm_sema);
>
> Next you'll all be wearing bell-bottomed jeans?

You've been peeking in my wardrobe, haven't you :-)

NeilBrown

---
Subject: Convert lockd to use the newer mutex instead of the older semaphore

Both the (recently introduces) nsm_sema and the older f_sema are
converted over.

Cc: Olaf Kirch <[email protected]>
Signed-off-by: Neil Brown <[email protected]>

### Diffstat output
./fs/lockd/host.c | 11 ++++++-----
./fs/lockd/svclock.c | 22 +++++++++++-----------
./fs/lockd/svcsubs.c | 2 +-
./include/linux/lockd/lockd.h | 2 +-
4 files changed, 19 insertions(+), 18 deletions(-)

diff .prev/fs/lockd/host.c ./fs/lockd/host.c
--- .prev/fs/lockd/host.c 2006-09-01 12:26:45.000000000 +1000
+++ ./fs/lockd/host.c 2006-09-01 18:56:39.000000000 +1000
@@ -436,7 +436,7 @@ nlm_gc_hosts(void)
* Manage NSM handles
*/
static LIST_HEAD(nsm_handles);
-static DECLARE_MUTEX(nsm_sema);
+static DEFINE_MUTEX(nsm_mutex);

static struct nsm_handle *
__nsm_find(const struct sockaddr_in *sin,
@@ -458,7 +458,7 @@ __nsm_find(const struct sockaddr_in *sin
return NULL;
}

- down(&nsm_sema);
+ mutex_lock(&nsm_mutex);
list_for_each(pos, &nsm_handles) {
nsm = list_entry(pos, struct nsm_handle, sm_link);

@@ -488,7 +488,8 @@ __nsm_find(const struct sockaddr_in *sin
list_add(&nsm->sm_link, &nsm_handles);
}

-out: up(&nsm_sema);
+out:
+ mutex_unlock(&nsm_mutex);
return nsm;
}

@@ -507,11 +508,11 @@ nsm_release(struct nsm_handle *nsm)
if (!nsm)
return;
if (atomic_read(&nsm->sm_count) == 1) {
- down(&nsm_sema);
+ mutex_lock(&nsm_mutex);
if (atomic_dec_and_test(&nsm->sm_count)) {
list_del(&nsm->sm_link);
kfree(nsm);
}
- up(&nsm_sema);
+ mutex_unlock(&nsm_mutex);
}
}

diff .prev/fs/lockd/svclock.c ./fs/lockd/svclock.c
--- .prev/fs/lockd/svclock.c 2006-09-01 12:17:21.000000000 +1000
+++ ./fs/lockd/svclock.c 2006-09-01 18:54:53.000000000 +1000
@@ -254,9 +254,9 @@ static void nlmsvc_free_block(struct kre
dprintk("lockd: freeing block %p...\n", block);

/* Remove block from file's list of blocks */
- down(&file->f_sema);
+ mutex_lock(&file->f_mutex);
list_del_init(&block->b_flist);
- up(&file->f_sema);
+ mutex_unlock(&file->f_mutex);

nlmsvc_freegrantargs(block->b_call);
nlm_release_call(block->b_call);
@@ -281,7 +281,7 @@ void nlmsvc_traverse_blocks(struct nlm_h
struct nlm_block *block, *next;

restart:
- down(&file->f_sema);
+ mutex_lock(&file->f_mutex);
list_for_each_entry_safe(block, next, &file->f_blocks, b_flist) {
if (!match(block->b_host, host))
continue;
@@ -290,12 +290,12 @@ restart:
if (list_empty(&block->b_list))
continue;
kref_get(&block->b_count);
- up(&file->f_sema);
+ mutex_unlock(&file->f_mutex);
nlmsvc_unlink_block(block);
nlmsvc_release_block(block);
goto restart;
}
- up(&file->f_sema);
+ mutex_unlock(&file->f_mutex);
}

/*
@@ -354,7 +354,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, stru
lock->fl.fl_flags &= ~FL_SLEEP;
again:
/* Lock file against concurrent access */
- down(&file->f_sema);
+ mutex_lock(&file->f_mutex);
/* Get existing block (in case client is busy-waiting) */
block = nlmsvc_lookup_block(file, lock);
if (block == NULL) {
@@ -392,10 +392,10 @@ again:

/* If we don't have a block, create and initialize it. Then
* retry because we may have slept in kmalloc. */
- /* We have to release f_sema as nlmsvc_create_block may try to
+ /* We have to release f_mutex as nlmsvc_create_block may try to
* to claim it while doing host garbage collection */
if (newblock == NULL) {
- up(&file->f_sema);
+ mutex_unlock(&file->f_mutex);
dprintk("lockd: blocking on this lock (allocating).\n");
if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
return nlm_lck_denied_nolocks;
@@ -405,7 +405,7 @@ again:
/* Append to list of blocked */
nlmsvc_insert_block(newblock, NLM_NEVER);
out:
- up(&file->f_sema);
+ mutex_unlock(&file->f_mutex);
nlmsvc_release_block(newblock);
nlmsvc_release_block(block);
dprintk("lockd: nlmsvc_lock returned %u\n", ret);
@@ -489,9 +489,9 @@ nlmsvc_cancel_blocked(struct nlm_file *f
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);

- down(&file->f_sema);
+ mutex_lock(&file->f_mutex);
block = nlmsvc_lookup_block(file, lock);
- up(&file->f_sema);
+ mutex_unlock(&file->f_mutex);
if (block != NULL) {
status = nlmsvc_unlink_block(block);
nlmsvc_release_block(block);

diff .prev/fs/lockd/svcsubs.c ./fs/lockd/svcsubs.c
--- .prev/fs/lockd/svcsubs.c 2006-09-01 11:38:14.000000000 +1000
+++ ./fs/lockd/svcsubs.c 2006-09-01 18:55:56.000000000 +1000
@@ -106,7 +106,7 @@ nlm_lookup_file(struct svc_rqst *rqstp,
goto out_unlock;

memcpy(&file->f_handle, f, sizeof(struct nfs_fh));
- init_MUTEX(&file->f_sema);
+ mutex_init(&file->f_mutex);
INIT_HLIST_NODE(&file->f_list);
INIT_LIST_HEAD(&file->f_blocks);

diff .prev/include/linux/lockd/lockd.h ./include/linux/lockd/lockd.h
--- .prev/include/linux/lockd/lockd.h 2006-09-01 12:26:43.000000000 +1000
+++ ./include/linux/lockd/lockd.h 2006-09-01 18:50:32.000000000 +1000
@@ -111,7 +111,7 @@ struct nlm_file {
struct list_head f_blocks; /* blocked locks */
unsigned int f_locks; /* guesstimate # of locks */
unsigned int f_count; /* reference count */
- struct semaphore f_sema; /* avoid concurrent access */
+ struct mutex f_mutex; /* avoid concurrent access */
};

/*

--
VGER BF report: H 2.88825e-13

2006-09-01 23:50:11

by NeilBrown

[permalink] [raw]

Subject: Re: [PATCH 004 of 19] knfsd: lockd: introduce nsm_handle

On Thursday August 31, [email protected] wrote:
> On Fri, 1 Sep 2006 14:38:25 +1000
> NeilBrown <[email protected]> wrote:
>
> > +nsm_release(struct nsm_handle *nsm)
> > +{
> > + if (!nsm)
> > + return;
> > + if (atomic_read(&nsm->sm_count) == 1) {
> > + down(&nsm_sema);
> > + if (atomic_dec_and_test(&nsm->sm_count)) {
> > + list_del(&nsm->sm_link);
> > + kfree(nsm);
> > + }
> > + up(&nsm_sema);
> > + }
> > +}
>
> That's weird-looking. Are you sure? afaict, if ->sm_count ever gets a
> value of 2 or more, it's unfreeable.

How do you *do* that? I thought I had already found the bugs...

Thanks :-)
NeilBrown

---
Subject: Fix locking in nsm_release

The locking is all backwards and broken.

We first atomic_dec_and_test.
If this fails, someone else has an active reference and we need do
no more.
If it succeeds, then the only ref is in the hash table, but someone
might be about to find and use that reference. nsm_mutex provides
exclusion against this. If sm_count is still 0 once the
mutex has been gained, then it is safe to discard the nsm.

Signed-off-by: Neil Brown <[email protected]>

### Diffstat output
./fs/lockd/host.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff .prev/fs/lockd/host.c ./fs/lockd/host.c
--- .prev/fs/lockd/host.c 2006-09-01 18:56:39.000000000 +1000
+++ ./fs/lockd/host.c 2006-09-02 09:44:04.000000000 +1000
@@ -507,9 +507,9 @@ nsm_release(struct nsm_handle *nsm)
{
if (!nsm)
return;
- if (atomic_read(&nsm->sm_count) == 1) {
+ if (atomic_dec_and_test(&nsm->sm_count)) {
mutex_lock(&nsm_mutex);
- if (atomic_dec_and_test(&nsm->sm_count)) {
+ if (atomic_read(&nsm->sm_count) == 0) {
list_del(&nsm->sm_link);
kfree(nsm);
}

--
VGER BF report: H 0

2006-09-04 08:48:50

by Olaf Kirch

[permalink] [raw]

Subject: Re: [NFS] [PATCH 004 of 19] knfsd: lockd: introduce nsm_handle

On Fri, Sep 01, 2006 at 12:41:33PM -0400, Trond Myklebust wrote:
> The local statd process is supposed to decode the notification from the
> remote client/server, and then notify the kernel. It already sends that
> notification on a per-nlm_host basis (i.e. it the call down to the
> kernel contains the <address,version,transport protocol>.

Why does statd need to send the full <address,version,transport protocol>
to the kernel in the first place?

All that's really needed is a unique identification of the host having
rebooted, nevermind how we have been talking to it, and in what role.

I consider the current practice a side effect of a bad implementation
which duplicates a lot of state by dumping everything into the nlm_host.

With the current code, we cannot even monitor IPv6 addresses, because
there's not enough room in the NSM_MON packet. With my proposed change,
we can ditch version and transport protocol, and all of a sudden we
have 16 bytes for the address - ie enough to make IPv6 happy.

In the long run, we could clean out nlm_host even more - there's
a lot of cruft in there.

h_name just h_nsmhandle->sm_name
h_gracewait could be shared as well
h_state should move to nsmhandle as well
h_nsmstate currently not used, could move to nsmhandle as well
h_pidcount currently allocated per nlm_host, which leads
to aliasing if we mix NFSv2 and v3 mounts

On a side note, we may want to always allocate an RPC client for each
nlm_host. Then we can ditch the following variables as well, which are
in the rpc_client's portmap info anyway:

h_proto pm_prot
h_version pm_vers
h_sema useless
h_nextrebind we can stop rebinding every 60s, the sunrpc
doesn't need that anymore. During recovery,
we can just call rpc_force_rebind
directly.

Or going even further, one could make the nlm_host agnostic of transports
and protocol versions. Just stick a (short) array of RPC clients in the
nlm_host - any code that places NLM calls will need some extra logic
to select the right client, but it would save on memory and reduce
complexity.

Olaf
--
Olaf Kirch | --- o --- Nous sommes du soleil we love when we play
[email protected] | / | \ sol.dhoop.naytheet.ah kin.ir.samse.qurax

--
VGER BF report: H 2.68873e-09