2011-05-31 22:49:16

by Weston Andros Adamson

[permalink] [raw]
Subject: NFS: pnfs IPv6 support, prelim multipath support

Patches 1 and 2 are reposts that have been rebased to Trond's nfs-for-next.
Patch 3 is new, but short.



2011-05-31 22:49:17

by Weston Andros Adamson

[permalink] [raw]
Subject: [PATCH 2/3] NFS: Parse and store all multipath DS addresses

This parses and stores all addresses associated with each data server,
laying the groundwork for supporting multipath to data servers.

- Skips over addresses that cannot be parsed (ie IPv6 addrs if v6 is not
enabled). Only fails if none of the addresses are recognizable
- Currently only uses the first address that parsed cleanly
- Tested against pynfs server (modified to support multipath)

Signed-off-by: Weston Andros Adamson <[email protected]>
---
fs/nfs/nfs4filelayout.h | 12 +-
fs/nfs/nfs4filelayoutdev.c | 363 ++++++++++++++++++++++++++++----------------
2 files changed, 243 insertions(+), 132 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 6c6a817..68cce73 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -47,11 +47,17 @@ enum stripetype4 {
};

/* Individual ip address */
+struct nfs4_pnfs_ds_addr {
+ struct sockaddr_storage da_addr;
+ size_t da_addrlen;
+ struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */
+ char *da_remotestr; /* human readable addr+port */
+};
+
struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
- struct sockaddr_storage ds_addr;
- size_t ds_addrlen;
- char *ds_remotestr; /* human readable addr+port */
+ char *ds_remotestr; /* comma sep list of addrs */
+ struct list_head ds_addrs;
struct nfs_client *ds_clp;
atomic_t ds_count;
};
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 0adb9be..f26c1cf 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -65,53 +65,104 @@ print_ds(struct nfs4_pnfs_ds *ds)
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
}

-/* nfs4_ds_cache_lock is held */
-static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(struct sockaddr *addr, size_t addrlen)
+static bool
+same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
{
- struct nfs4_pnfs_ds *ds;
struct sockaddr_in *a, *b;
struct sockaddr_in6 *a6, *b6;

- list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
- if (addr->sa_family != ds->ds_addr.ss_family)
- continue;
-
- switch (addr->sa_family) {
- case AF_INET:
- a = (struct sockaddr_in *)addr;
- b = (struct sockaddr_in *)&ds->ds_addr;
-
- if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
- a->sin_port == b->sin_port)
- return ds;
- break;
-
- case AF_INET6:
- a6 = (struct sockaddr_in6 *)addr;
- b6 = (struct sockaddr_in6 *)&ds->ds_addr;
-
- /* LINKLOCAL addresses must have matching scope_id */
- if (ipv6_addr_scope(&a6->sin6_addr) ==
- IPV6_ADDR_SCOPE_LINKLOCAL &&
- a6->sin6_scope_id != b6->sin6_scope_id)
- continue;
-
- if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
- a6->sin6_port == b6->sin6_port)
- return ds;
- break;
-
- default:
- dprintk("%s: unhandled address family: %u\n",
- __func__, addr->sa_family);
- return NULL;
+ if (addr1->sa_family != addr2->sa_family)
+ return false;
+
+ switch (addr1->sa_family) {
+ case AF_INET:
+ a = (struct sockaddr_in *)addr1;
+ b = (struct sockaddr_in *)addr2;
+
+ if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
+ a->sin_port == b->sin_port)
+ return true;
+ break;
+
+ case AF_INET6:
+ a6 = (struct sockaddr_in6 *)addr1;
+ b6 = (struct sockaddr_in6 *)addr2;
+
+ /* LINKLOCAL addresses must have matching scope_id */
+ if (ipv6_addr_scope(&a6->sin6_addr) ==
+ IPV6_ADDR_SCOPE_LINKLOCAL &&
+ a6->sin6_scope_id != b6->sin6_scope_id)
+ return false;
+
+ if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
+ a6->sin6_port == b6->sin6_port)
+ return true;
+ break;
+
+ default:
+ dprintk("%s: unhandled address family: %u\n",
+ __func__, addr1->sa_family);
+ return false;
+ }
+
+ return false;
+}
+
+/*
+ * Lookup DS by addresses. The first matching address returns true.
+ * nfs4_ds_cache_lock is held
+ */
+static struct nfs4_pnfs_ds *
+_data_server_lookup_locked(struct list_head *dsaddrs)
+{
+ struct nfs4_pnfs_ds *ds;
+ struct nfs4_pnfs_ds_addr *da1, *da2;
+
+ list_for_each_entry(da1, dsaddrs, da_node) {
+ list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
+ list_for_each_entry(da2, &ds->ds_addrs, da_node) {
+ if (same_sockaddr(
+ (struct sockaddr *)&da1->da_addr,
+ (struct sockaddr *)&da2->da_addr))
+ return ds;
+ }
}
}
return NULL;
}

/*
+ * Compare two lists of addresses.
+ */
+static bool
+_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
+ struct list_head *dsaddrs2)
+{
+ struct nfs4_pnfs_ds_addr *da1, *da2;
+ size_t count1 = 0,
+ count2 = 0;
+
+ list_for_each_entry(da1, dsaddrs1, da_node)
+ count1++;
+
+ list_for_each_entry(da2, dsaddrs2, da_node) {
+ bool found = false;
+ count2++;
+ list_for_each_entry(da1, dsaddrs1, da_node) {
+ if (same_sockaddr((struct sockaddr *)&da1->da_addr,
+ (struct sockaddr *)&da2->da_addr)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return false;
+ }
+
+ return (count1 == count2);
+}
+
+/*
* Create an rpc connection to the nfs4_pnfs_ds data server
* Currently only support IPv4
*/
@@ -119,14 +170,21 @@ static int
nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
{
struct nfs_client *clp;
+ struct nfs4_pnfs_ds_addr *da;
int status = 0;

- dprintk("--> %s addr %s au_flavor %d\n", __func__, ds->ds_remotestr,
+ dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);

+ BUG_ON(list_empty(&ds->ds_addrs));
+
+ da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);
+ dprintk("%s: using the first address for DS %s: %s\n",
+ __func__, ds->ds_remotestr, da->da_remotestr);
+
clp = nfs4_set_ds_client(mds_srv->nfs_client,
- (struct sockaddr *)&ds->ds_addr,
- ds->ds_addrlen, IPPROTO_TCP);
+ (struct sockaddr *)&da->da_addr,
+ da->da_addrlen, IPPROTO_TCP);
if (IS_ERR(clp)) {
status = PTR_ERR(clp);
goto out;
@@ -169,12 +227,24 @@ out_put:
static void
destroy_ds(struct nfs4_pnfs_ds *ds)
{
+ struct nfs4_pnfs_ds_addr *da;
+
dprintk("--> %s\n", __func__);
ifdebug(FACILITY)
print_ds(ds);

if (ds->ds_clp)
nfs_put_client(ds->ds_clp);
+
+ while (!list_empty(&ds->ds_addrs)) {
+ da = list_first_entry(&ds->ds_addrs,
+ struct nfs4_pnfs_ds_addr,
+ da_node);
+ list_del_init(&da->da_node);
+ kfree(da->da_remotestr);
+ kfree(da);
+ }
+
kfree(ds->ds_remotestr);
kfree(ds);
}
@@ -207,67 +277,73 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
* complicated setup around many dprinks.
*/
static char *
-nfs4_pnfs_remotestr(struct sockaddr *ds_addr, gfp_t gfp_flags)
+nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
{
- char buf[INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN];
+ struct nfs4_pnfs_ds_addr *da;
char *remotestr;
- char *startsep = "";
- char *endsep = "";
size_t len;
- uint16_t port;
+ char *p;

- switch (ds_addr->sa_family) {
- case AF_INET:
- port = ((struct sockaddr_in *)ds_addr)->sin_port;
- break;
- case AF_INET6:
- startsep = "[";
- endsep = "]";
- port = ((struct sockaddr_in6 *)ds_addr)->sin6_port;
- break;
- default:
- dprintk("%s: Unknown address family %u\n",
- __func__, ds_addr->sa_family);
- return NULL;
+ len = 3; /* '{', '}' and eol */
+ list_for_each_entry(da, dsaddrs, da_node) {
+ len += strlen(da->da_remotestr) + 1; /* string plus comma */
}

- if (!rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf))) {
- dprintk("%s: error printing addr\n", __func__);
+ remotestr = kzalloc(len, gfp_flags);
+ if (!remotestr)
return NULL;
- }

- len = strlen(buf) + strlen(startsep) + strlen(endsep) + 1 + 5 + 1;
- remotestr = kzalloc(len, gfp_flags);
+ p = remotestr;
+ *(p++) = '{';
+ len--;
+ list_for_each_entry(da, dsaddrs, da_node) {
+ size_t ll = strlen(da->da_remotestr);

- if (unlikely(!remotestr)) {
- dprintk("%s: couldn't alloc remotestr\n", __func__);
- return NULL;
- }
+ if (ll > len)
+ goto out_err;

- snprintf(remotestr, len, "%s%s%s:%u",
- startsep, buf, endsep, ntohs(port));
+ memcpy(p, da->da_remotestr, ll);
+ p += ll;
+ len -= ll;

+ if (len < 1)
+ goto out_err;
+ (*p++) = ',';
+ len--;
+ }
+ if (len < 2)
+ goto out_err;
+ *(p++) = '}';
+ *p = '\0';
return remotestr;
+out_err:
+ kfree(remotestr);
+ return NULL;
}

static struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct sockaddr *addr, size_t addrlen, gfp_t gfp_flags)
+nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
char *remotestr;

- ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
+ if (list_empty(dsaddrs)) {
+ dprintk("%s: no addresses defined\n", __func__);
+ goto out;
+ }
+
+ ds = kzalloc(sizeof(*ds), gfp_flags);
if (!ds)
goto out;

/* this is only used for debugging, so it's ok if its NULL */
- remotestr = nfs4_pnfs_remotestr(addr, gfp_flags);
+ remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);

spin_lock(&nfs4_ds_cache_lock);
- tmp_ds = _data_server_lookup_locked(addr, addrlen);
+ tmp_ds = _data_server_lookup_locked(dsaddrs);
if (tmp_ds == NULL) {
- memcpy(&ds->ds_addr, addr, addrlen);
- ds->ds_addrlen = addrlen;
+ INIT_LIST_HEAD(&ds->ds_addrs);
+ list_splice_init(dsaddrs, &ds->ds_addrs);
ds->ds_remotestr = remotestr;
atomic_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
@@ -276,6 +352,11 @@ nfs4_pnfs_ds_add(struct sockaddr *addr, size_t addrlen, gfp_t gfp_flags)
dprintk("%s add new data server %s\n", __func__,
ds->ds_remotestr);
} else {
+ if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
+ dsaddrs)) {
+ dprintk("%s: multipath address mismatch: %s != %s",
+ __func__, tmp_ds->ds_remotestr, remotestr);
+ }
kfree(remotestr);
kfree(ds);
atomic_inc(&tmp_ds->ds_count);
@@ -292,19 +373,20 @@ out:
/*
* Currently only supports ipv4, ipv6 and one multi-path address.
*/
-static struct nfs4_pnfs_ds *
-decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
+static struct nfs4_pnfs_ds_addr *
+decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
{
- struct nfs4_pnfs_ds *ds = NULL;
+ struct nfs4_pnfs_ds_addr *da = NULL;
char *buf, *portstr;
- struct sockaddr_storage ss;
- size_t sslen;
u32 port;
int nlen, rlen;
int tmp[2];
__be32 *p;
char *netid, *match_netid;
- size_t match_netid_len;
+ size_t len, match_netid_len;
+ char *startsep = "";
+ char *endsep = "";
+

/* r_netid */
p = xdr_inline_decode(streamp, 4);
@@ -365,50 +447,74 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla
}
*portstr = '\0';

- if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&ss, sizeof(ss))) {
- dprintk("%s: Error parsing address %s\n", __func__, buf);
+ da = kzalloc(sizeof(*da), gfp_flags);
+ if (unlikely(!da))
goto out_free_buf;
+
+ INIT_LIST_HEAD(&da->da_node);
+
+ if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
+ sizeof(da->da_addr))) {
+ dprintk("%s: error parsing address %s\n", __func__, buf);
+ goto out_free_da;
}

portstr++;
sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
port = htons((tmp[0] << 8) | (tmp[1]));

- switch (ss.ss_family) {
+ switch (da->da_addr.ss_family) {
case AF_INET:
- ((struct sockaddr_in *)&ss)->sin_port = port;
- sslen = sizeof(struct sockaddr_in);
+ ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
+ da->da_addrlen = sizeof(struct sockaddr_in);
match_netid = "tcp";
match_netid_len = 3;
break;

case AF_INET6:
- ((struct sockaddr_in6 *)&ss)->sin6_port = port;
- sslen = sizeof(struct sockaddr_in6);
+ ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
+ da->da_addrlen = sizeof(struct sockaddr_in6);
match_netid = "tcp6";
match_netid_len = 4;
+ startsep = "[";
+ endsep = "]";
break;

default:
dprintk("%s: unsupported address family: %u\n",
- __func__, ss.ss_family);
- goto out_free_buf;
+ __func__, da->da_addr.ss_family);
+ goto out_free_da;
}

if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
__func__, netid, match_netid);
- goto out_free_buf;
+ goto out_free_da;
}

- ds = nfs4_pnfs_ds_add((struct sockaddr *)&ss, sslen, gfp_flags);
- dprintk("%s: Added DS %s\n", __func__, ds->ds_remotestr);
+ /* save human readable address */
+ len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
+ da->da_remotestr = kzalloc(len, gfp_flags);
+
+ /* NULL is ok, only used for dprintk */
+ if (da->da_remotestr)
+ snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
+ buf, endsep, ntohs(port));
+
+ dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
+ kfree(buf);
+ kfree(netid);
+ return da;
+
+out_free_da:
+ kfree(da);
out_free_buf:
+ dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
kfree(buf);
out_free_netid:
kfree(netid);
out_err:
- return ds;
+ return NULL;
}

/* Decode opaque device data and return the result */
@@ -425,6 +531,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
struct xdr_stream stream;
struct xdr_buf buf;
struct page *scratch;
+ struct list_head dsaddrs;
+ struct nfs4_pnfs_ds_addr *da;

/* set up xdr stream */
scratch = alloc_page(gfp_flags);
@@ -501,6 +609,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
NFS_SERVER(ino)->nfs_client,
&pdev->dev_id);

+ INIT_LIST_HEAD(&dsaddrs);
+
for (i = 0; i < dsaddr->ds_num; i++) {
int j;
u32 mp_count;
@@ -510,48 +620,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
goto out_err_free_deviceid;

mp_count = be32_to_cpup(p); /* multipath count */
- if (mp_count > 1) {
- printk(KERN_WARNING
- "%s: Multipath count %d not supported, "
- "skipping all greater than 1\n", __func__,
- mp_count);
- }
for (j = 0; j < mp_count; j++) {
- if (j == 0) {
- dsaddr->ds_list[i] = decode_and_add_ds(&stream,
- ino, gfp_flags);
- if (dsaddr->ds_list[i] == NULL)
- goto out_err_free_deviceid;
- } else {
- u32 len;
- /* skip extra multipath */
-
- /* read len, skip */
- p = xdr_inline_decode(&stream, 4);
- if (unlikely(!p))
- goto out_err_free_deviceid;
- len = be32_to_cpup(p);
-
- p = xdr_inline_decode(&stream, len);
- if (unlikely(!p))
- goto out_err_free_deviceid;
-
- /* read len, skip */
- p = xdr_inline_decode(&stream, 4);
- if (unlikely(!p))
- goto out_err_free_deviceid;
- len = be32_to_cpup(p);
-
- p = xdr_inline_decode(&stream, len);
- if (unlikely(!p))
- goto out_err_free_deviceid;
- }
+ da = decode_ds_addr(&stream, gfp_flags);
+ if (da)
+ list_add_tail(&da->da_node, &dsaddrs);
+ }
+ if (list_empty(&dsaddrs)) {
+ dprintk("%s: no suitable DS addresses found\n",
+ __func__);
+ goto out_err_free_deviceid;
+ }
+
+ dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+ if (!dsaddr->ds_list[i])
+ goto out_err_drain_dsaddrs;
+
+ /* If DS was already in cache, free ds addrs */
+ while (!list_empty(&dsaddrs)) {
+ da = list_first_entry(&dsaddrs,
+ struct nfs4_pnfs_ds_addr,
+ da_node);
+ list_del_init(&da->da_node);
+ kfree(da->da_remotestr);
+ kfree(da);
}
}

__free_page(scratch);
return dsaddr;

+out_err_drain_dsaddrs:
+ while (!list_empty(&dsaddrs)) {
+ da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
+ da_node);
+ list_del_init(&da->da_node);
+ kfree(da->da_remotestr);
+ kfree(da);
+ }
out_err_free_deviceid:
nfs4_fl_free_deviceid(dsaddr);
/* stripe_indicies was part of dsaddr */
--
1.7.5.2


2011-05-31 22:49:16

by Weston Andros Adamson

[permalink] [raw]
Subject: [PATCH 1/3] NFS: pnfs IPv6 support

Handle ipv6 remote addresses from GETDEVICEINFO

- supports netid "tcp" for ipv4 and "tcp6" for ipv6 as rfc 5665 specifies
- added ds_remotestr to avoid having to handle different AFs in every dprintk
- tested against pynfs 4.1 server, submitting ipv6 support patch to pynfs
- tested with IPv6 disabled, it compiles cleanly and relies on rpc_pton to
refuse to accept IPv6 addresses

Signed-off-by: Weston Andros Adamson <[email protected]>
---
fs/nfs/nfs4filelayout.c | 7 +-
fs/nfs/nfs4filelayout.h | 5 +-
fs/nfs/nfs4filelayoutdev.c | 255 ++++++++++++++++++++++++++++++++------------
3 files changed, 191 insertions(+), 76 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 4269088..b410a9b 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -343,8 +343,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
- dprintk("%s USE DS:ip %x %hu\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);

/* No multipath support. Use first DS */
data->ds_clp = ds->ds_clp;
@@ -383,9 +382,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
- dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
+ dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
data->inode->i_ino, sync, (size_t) data->args.count, offset,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ ds->ds_remotestr);

data->write_done_cb = filelayout_write_done_cb;
data->ds_clp = ds->ds_clp;
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index cebe01e..6c6a817 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -49,8 +49,9 @@ enum stripetype4 {
/* Individual ip address */
struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
- u32 ds_ip_addr;
- u32 ds_port;
+ struct sockaddr_storage ds_addr;
+ size_t ds_addrlen;
+ char *ds_remotestr; /* human readable addr+port */
struct nfs_client *ds_clp;
atomic_t ds_count;
};
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 3b7bf13..0adb9be 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -56,28 +56,56 @@ print_ds(struct nfs4_pnfs_ds *ds)
printk("%s NULL device\n", __func__);
return;
}
- printk(" ip_addr %x port %hu\n"
+ printk(" ds %s\n"
" ref count %d\n"
" client %p\n"
" cl_exchange_flags %x\n",
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+ ds->ds_remotestr,
atomic_read(&ds->ds_count), ds->ds_clp,
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
}

/* nfs4_ds_cache_lock is held */
static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(u32 ip_addr, u32 port)
+_data_server_lookup_locked(struct sockaddr *addr, size_t addrlen)
{
struct nfs4_pnfs_ds *ds;
-
- dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
- ntohl(ip_addr), ntohs(port));
+ struct sockaddr_in *a, *b;
+ struct sockaddr_in6 *a6, *b6;

list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
- if (ds->ds_ip_addr == ip_addr &&
- ds->ds_port == port) {
- return ds;
+ if (addr->sa_family != ds->ds_addr.ss_family)
+ continue;
+
+ switch (addr->sa_family) {
+ case AF_INET:
+ a = (struct sockaddr_in *)addr;
+ b = (struct sockaddr_in *)&ds->ds_addr;
+
+ if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
+ a->sin_port == b->sin_port)
+ return ds;
+ break;
+
+ case AF_INET6:
+ a6 = (struct sockaddr_in6 *)addr;
+ b6 = (struct sockaddr_in6 *)&ds->ds_addr;
+
+ /* LINKLOCAL addresses must have matching scope_id */
+ if (ipv6_addr_scope(&a6->sin6_addr) ==
+ IPV6_ADDR_SCOPE_LINKLOCAL &&
+ a6->sin6_scope_id != b6->sin6_scope_id)
+ continue;
+
+ if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
+ a6->sin6_port == b6->sin6_port)
+ return ds;
+ break;
+
+ default:
+ dprintk("%s: unhandled address family: %u\n",
+ __func__, addr->sa_family);
+ return NULL;
}
}
return NULL;
@@ -91,19 +119,14 @@ static int
nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
{
struct nfs_client *clp;
- struct sockaddr_in sin;
int status = 0;

- dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+ dprintk("--> %s addr %s au_flavor %d\n", __func__, ds->ds_remotestr,
mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);

- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = ds->ds_ip_addr;
- sin.sin_port = ds->ds_port;
-
- clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
- sizeof(sin), IPPROTO_TCP);
+ clp = nfs4_set_ds_client(mds_srv->nfs_client,
+ (struct sockaddr *)&ds->ds_addr,
+ ds->ds_addrlen, IPPROTO_TCP);
if (IS_ERR(clp)) {
status = PTR_ERR(clp);
goto out;
@@ -115,8 +138,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
goto out_put;
}
ds->ds_clp = clp;
- dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ dprintk("%s [existing] server=%s\n", __func__,
+ ds->ds_remotestr);
goto out;
}

@@ -135,8 +158,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
goto out_put;

ds->ds_clp = clp;
- dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
- ntohs(ds->ds_port));
+ dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
out:
return status;
out_put:
@@ -153,6 +175,7 @@ destroy_ds(struct nfs4_pnfs_ds *ds)

if (ds->ds_clp)
nfs_put_client(ds->ds_clp);
+ kfree(ds->ds_remotestr);
kfree(ds);
}

@@ -179,31 +202,85 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
kfree(dsaddr);
}

+/*
+ * Create a string with a human readable address and port to avoid
+ * complicated setup around many dprinks.
+ */
+static char *
+nfs4_pnfs_remotestr(struct sockaddr *ds_addr, gfp_t gfp_flags)
+{
+ char buf[INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN];
+ char *remotestr;
+ char *startsep = "";
+ char *endsep = "";
+ size_t len;
+ uint16_t port;
+
+ switch (ds_addr->sa_family) {
+ case AF_INET:
+ port = ((struct sockaddr_in *)ds_addr)->sin_port;
+ break;
+ case AF_INET6:
+ startsep = "[";
+ endsep = "]";
+ port = ((struct sockaddr_in6 *)ds_addr)->sin6_port;
+ break;
+ default:
+ dprintk("%s: Unknown address family %u\n",
+ __func__, ds_addr->sa_family);
+ return NULL;
+ }
+
+ if (!rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf))) {
+ dprintk("%s: error printing addr\n", __func__);
+ return NULL;
+ }
+
+ len = strlen(buf) + strlen(startsep) + strlen(endsep) + 1 + 5 + 1;
+ remotestr = kzalloc(len, gfp_flags);
+
+ if (unlikely(!remotestr)) {
+ dprintk("%s: couldn't alloc remotestr\n", __func__);
+ return NULL;
+ }
+
+ snprintf(remotestr, len, "%s%s%s:%u",
+ startsep, buf, endsep, ntohs(port));
+
+ return remotestr;
+}
+
static struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
+nfs4_pnfs_ds_add(struct sockaddr *addr, size_t addrlen, gfp_t gfp_flags)
{
- struct nfs4_pnfs_ds *tmp_ds, *ds;
+ struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
+ char *remotestr;

ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
if (!ds)
goto out;

+ /* this is only used for debugging, so it's ok if its NULL */
+ remotestr = nfs4_pnfs_remotestr(addr, gfp_flags);
+
spin_lock(&nfs4_ds_cache_lock);
- tmp_ds = _data_server_lookup_locked(ip_addr, port);
+ tmp_ds = _data_server_lookup_locked(addr, addrlen);
if (tmp_ds == NULL) {
- ds->ds_ip_addr = ip_addr;
- ds->ds_port = port;
+ memcpy(&ds->ds_addr, addr, addrlen);
+ ds->ds_addrlen = addrlen;
+ ds->ds_remotestr = remotestr;
atomic_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
ds->ds_clp = NULL;
list_add(&ds->ds_node, &nfs4_data_server_cache);
- dprintk("%s add new data server ip 0x%x\n", __func__,
- ds->ds_ip_addr);
+ dprintk("%s add new data server %s\n", __func__,
+ ds->ds_remotestr);
} else {
+ kfree(remotestr);
kfree(ds);
atomic_inc(&tmp_ds->ds_count);
- dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
- __func__, tmp_ds->ds_ip_addr,
+ dprintk("%s data server %s found, inc'ed ds_count to %d\n",
+ __func__, ds->ds_remotestr,
atomic_read(&tmp_ds->ds_count));
ds = tmp_ds;
}
@@ -213,18 +290,21 @@ out:
}

/*
- * Currently only support ipv4, and one multi-path address.
+ * Currently only supports ipv4, ipv6 and one multi-path address.
*/
static struct nfs4_pnfs_ds *
decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds *ds = NULL;
- char *buf;
- const char *ipend, *pstr;
- u32 ip_addr, port;
- int nlen, rlen, i;
+ char *buf, *portstr;
+ struct sockaddr_storage ss;
+ size_t sslen;
+ u32 port;
+ int nlen, rlen;
int tmp[2];
__be32 *p;
+ char *netid, *match_netid;
+ size_t match_netid_len;

/* r_netid */
p = xdr_inline_decode(streamp, 4);
@@ -236,62 +316,97 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla
if (unlikely(!p))
goto out_err;

- /* Check that netid is "tcp" */
- if (nlen != 3 || memcmp((char *)p, "tcp", 3)) {
- dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
+ netid = kmalloc(nlen+1, gfp_flags);
+ if (unlikely(!netid))
goto out_err;
- }

- /* r_addr */
+ netid[nlen] = '\0';
+ memcpy(netid, p, nlen);
+
+ /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
p = xdr_inline_decode(streamp, 4);
if (unlikely(!p))
- goto out_err;
+ goto out_free_netid;
rlen = be32_to_cpup(p);

p = xdr_inline_decode(streamp, rlen);
if (unlikely(!p))
- goto out_err;
+ goto out_free_netid;

- /* ipv6 length plus port is legal */
- if (rlen > INET6_ADDRSTRLEN + 8) {
+ /* port is ".ABC.DEF", 8 chars max */
+ if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
dprintk("%s: Invalid address, length %d\n", __func__,
rlen);
- goto out_err;
+ goto out_free_netid;
}
buf = kmalloc(rlen + 1, gfp_flags);
if (!buf) {
dprintk("%s: Not enough memory\n", __func__);
- goto out_err;
+ goto out_free_netid;
}
buf[rlen] = '\0';
memcpy(buf, p, rlen);

- /* replace the port dots with dashes for the in4_pton() delimiter*/
- for (i = 0; i < 2; i++) {
- char *res = strrchr(buf, '.');
- if (!res) {
- dprintk("%s: Failed finding expected dots in port\n",
- __func__);
- goto out_free;
- }
- *res = '-';
+ /* replace port '.' with '-' */
+ portstr = strrchr(buf, '.');
+ if (!portstr) {
+ dprintk("%s: Failed finding expected dot in port\n",
+ __func__);
+ goto out_free_buf;
+ }
+ *portstr = '-';
+
+ /* find '.' between address and port */
+ portstr = strrchr(buf, '.');
+ if (!portstr) {
+ dprintk("%s: Failed finding expected dot between address and "
+ "port\n", __func__);
+ goto out_free_buf;
}
+ *portstr = '\0';

- /* Currently only support ipv4 address */
- if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
- dprintk("%s: Only ipv4 addresses supported\n", __func__);
- goto out_free;
+ if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&ss, sizeof(ss))) {
+ dprintk("%s: Error parsing address %s\n", __func__, buf);
+ goto out_free_buf;
}

- /* port */
- pstr = ipend;
- sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
+ portstr++;
+ sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
port = htons((tmp[0] << 8) | (tmp[1]));

- ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
- dprintk("%s: Decoded address and port %s\n", __func__, buf);
-out_free:
+ switch (ss.ss_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)&ss)->sin_port = port;
+ sslen = sizeof(struct sockaddr_in);
+ match_netid = "tcp";
+ match_netid_len = 3;
+ break;
+
+ case AF_INET6:
+ ((struct sockaddr_in6 *)&ss)->sin6_port = port;
+ sslen = sizeof(struct sockaddr_in6);
+ match_netid = "tcp6";
+ match_netid_len = 4;
+ break;
+
+ default:
+ dprintk("%s: unsupported address family: %u\n",
+ __func__, ss.ss_family);
+ goto out_free_buf;
+ }
+
+ if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
+ dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
+ __func__, netid, match_netid);
+ goto out_free_buf;
+ }
+
+ ds = nfs4_pnfs_ds_add((struct sockaddr *)&ss, sslen, gfp_flags);
+ dprintk("%s: Added DS %s\n", __func__, ds->ds_remotestr);
+out_free_buf:
kfree(buf);
+out_free_netid:
+ kfree(netid);
out_err:
return ds;
}
@@ -591,13 +706,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)

static void
filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
- int err, u32 ds_addr)
+ int err, const char *ds_remotestr)
{
u32 *p = (u32 *)&dsaddr->id_node.deviceid;

- printk(KERN_ERR "NFS: data server %x connection error %d."
+ printk(KERN_ERR "NFS: data server %s connection error %d."
" Deviceid [%x%x%x%x] marked out of use.\n",
- ds_addr, err, p[0], p[1], p[2], p[3]);
+ ds_remotestr, err, p[0], p[1], p[2], p[3]);

spin_lock(&nfs4_ds_cache_lock);
dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
@@ -628,7 +743,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
err = nfs4_ds_connect(s, ds);
if (err) {
filelayout_mark_devid_negative(dsaddr, err,
- ntohl(ds->ds_ip_addr));
+ ds->ds_remotestr);
return NULL;
}
}
--
1.7.5.2


2011-05-31 22:49:18

by Weston Andros Adamson

[permalink] [raw]
Subject: [PATCH 3/3] NFS: pnfs: loop over multipath addrs on connect

Don't just use the first addr in the multipath list - instead, loop
over addresses when calling nfs4_set_ds_client() (which calls connect)
until it is successful.

Although this is not real multipath support, it's a quick fix to handle when
an MDS sends a list of addresses for a DS and some of the addr families are
unsupported or misconfigured (like no routable ipv6 addr assigned).
This will attempt all paths to the DS before giving up, instead of immediately
falling back to the MDS.

As before, an error encountered after a successful connect() will cause all
i/o to fall back to the MDS.

Signed-off-by: Weston Andros Adamson <[email protected]>
---
fs/nfs/nfs4filelayoutdev.c | 14 +++++++++-----
1 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f26c1cf..aff4d9a 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -169,7 +169,7 @@ _data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
static int
nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
{
- struct nfs_client *clp;
+ struct nfs_client *clp = ERR_PTR(-EIO);
struct nfs4_pnfs_ds_addr *da;
int status = 0;

@@ -178,13 +178,17 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)

BUG_ON(list_empty(&ds->ds_addrs));

- da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);
- dprintk("%s: using the first address for DS %s: %s\n",
- __func__, ds->ds_remotestr, da->da_remotestr);
+ list_for_each_entry(da, &ds->ds_addrs, da_node) {
+ dprintk("%s: DS %s: trying address %s\n",
+ __func__, ds->ds_remotestr, da->da_remotestr);

- clp = nfs4_set_ds_client(mds_srv->nfs_client,
+ clp = nfs4_set_ds_client(mds_srv->nfs_client,
(struct sockaddr *)&da->da_addr,
da->da_addrlen, IPPROTO_TCP);
+ if (!IS_ERR(clp))
+ break;
+ }
+
if (IS_ERR(clp)) {
status = PTR_ERR(clp);
goto out;
--
1.7.5.2


2011-06-01 06:38:47

by Mkrtchyan, Tigran

[permalink] [raw]
Subject: Re: [PATCH 2/3] NFS: Parse and store all multipath DS addresses



On Tue, 31 May 2011, Weston Andros Adamson wrote:

> Date: Tue, 31 May 2011 18:48:57 -0400
> From: Weston Andros Adamson <[email protected]>
> To: [email protected]
> Cc: [email protected], Weston Andros Adamson <[email protected]>
> Subject: [PATCH 2/3] NFS: Parse and store all multipath DS addresses
>
> This parses and stores all addresses associated with each data server,
> laying the groundwork for supporting multipath to data servers.
>
> - Skips over addresses that cannot be parsed (ie IPv6 addrs if v6 is not
> enabled). Only fails if none of the addresses are recognizable
> - Currently only uses the first address that parsed cleanly
> - Tested against pynfs server (modified to support multipath)
>
> Signed-off-by: Weston Andros Adamson <[email protected]>
> ---
> fs/nfs/nfs4filelayout.h | 12 +-
> fs/nfs/nfs4filelayoutdev.c | 363 ++++++++++++++++++++++++++++----------------
> 2 files changed, 243 insertions(+), 132 deletions(-)
>
> diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
> index 6c6a817..68cce73 100644
> --- a/fs/nfs/nfs4filelayout.h
> +++ b/fs/nfs/nfs4filelayout.h
> @@ -47,11 +47,17 @@ enum stripetype4 {
> };
>
> /* Individual ip address */
> +struct nfs4_pnfs_ds_addr {
> + struct sockaddr_storage da_addr;
> + size_t da_addrlen;
> + struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */
> + char *da_remotestr; /* human readable addr+port */
> +};
> +
> struct nfs4_pnfs_ds {
> struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
> - struct sockaddr_storage ds_addr;
> - size_t ds_addrlen;
> - char *ds_remotestr; /* human readable addr+port */
> + char *ds_remotestr; /* comma sep list of addrs */
> + struct list_head ds_addrs;
> struct nfs_client *ds_clp;
> atomic_t ds_count;
> };
> diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
> index 0adb9be..f26c1cf 100644
> --- a/fs/nfs/nfs4filelayoutdev.c
> +++ b/fs/nfs/nfs4filelayoutdev.c
> @@ -65,53 +65,104 @@ print_ds(struct nfs4_pnfs_ds *ds)
> ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
> }
>
> -/* nfs4_ds_cache_lock is held */
> -static struct nfs4_pnfs_ds *
> -_data_server_lookup_locked(struct sockaddr *addr, size_t addrlen)
> +static bool
> +same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
> {
> - struct nfs4_pnfs_ds *ds;
> struct sockaddr_in *a, *b;
> struct sockaddr_in6 *a6, *b6;
>
> - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
> - if (addr->sa_family != ds->ds_addr.ss_family)
> - continue;
> -
> - switch (addr->sa_family) {
> - case AF_INET:
> - a = (struct sockaddr_in *)addr;
> - b = (struct sockaddr_in *)&ds->ds_addr;
> -
> - if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
> - a->sin_port == b->sin_port)
> - return ds;
> - break;
> -
> - case AF_INET6:
> - a6 = (struct sockaddr_in6 *)addr;
> - b6 = (struct sockaddr_in6 *)&ds->ds_addr;
> -
> - /* LINKLOCAL addresses must have matching scope_id */
> - if (ipv6_addr_scope(&a6->sin6_addr) ==
> - IPV6_ADDR_SCOPE_LINKLOCAL &&
> - a6->sin6_scope_id != b6->sin6_scope_id)
> - continue;
> -
> - if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
> - a6->sin6_port == b6->sin6_port)
> - return ds;
> - break;
> -
> - default:
> - dprintk("%s: unhandled address family: %u\n",
> - __func__, addr->sa_family);
> - return NULL;
> + if (addr1->sa_family != addr2->sa_family)
> + return false;
> +
> + switch (addr1->sa_family) {
> + case AF_INET:
> + a = (struct sockaddr_in *)addr1;
> + b = (struct sockaddr_in *)addr2;
> +
> + if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
> + a->sin_port == b->sin_port)
> + return true;
> + break;
> +
> + case AF_INET6:
> + a6 = (struct sockaddr_in6 *)addr1;
> + b6 = (struct sockaddr_in6 *)addr2;
> +
> + /* LINKLOCAL addresses must have matching scope_id */
> + if (ipv6_addr_scope(&a6->sin6_addr) ==
> + IPV6_ADDR_SCOPE_LINKLOCAL &&
> + a6->sin6_scope_id != b6->sin6_scope_id)
> + return false;
> +
> + if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
> + a6->sin6_port == b6->sin6_port)
> + return true;
> + break;
> +
> + default:
> + dprintk("%s: unhandled address family: %u\n",
> + __func__, addr1->sa_family);
> + return false;
> + }
> +
> + return false;
> +}
> +
> +/*
> + * Lookup DS by addresses. The first matching address returns true.
> + * nfs4_ds_cache_lock is held
> + */
> +static struct nfs4_pnfs_ds *
> +_data_server_lookup_locked(struct list_head *dsaddrs)
> +{
> + struct nfs4_pnfs_ds *ds;
> + struct nfs4_pnfs_ds_addr *da1, *da2;
> +
> + list_for_each_entry(da1, dsaddrs, da_node) {
> + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
> + list_for_each_entry(da2, &ds->ds_addrs, da_node) {
> + if (same_sockaddr(
> + (struct sockaddr *)&da1->da_addr,
> + (struct sockaddr *)&da2->da_addr))
> + return ds;
> + }
> }
> }
> return NULL;
> }
>
> /*
> + * Compare two lists of addresses.
> + */
> +static bool
> +_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
> + struct list_head *dsaddrs2)
> +{
> + struct nfs4_pnfs_ds_addr *da1, *da2;
> + size_t count1 = 0,
> + count2 = 0;
> +
> + list_for_each_entry(da1, dsaddrs1, da_node)
> + count1++;
> +
> + list_for_each_entry(da2, dsaddrs2, da_node) {
> + bool found = false;
> + count2++;
> + list_for_each_entry(da1, dsaddrs1, da_node) {
> + if (same_sockaddr((struct sockaddr *)&da1->da_addr,
> + (struct sockaddr *)&da2->da_addr)) {
> + found = true;
> + break;
> + }
> + }
> + if (!found)
> + return false;
> + }
> +
> + return (count1 == count2);
> +}
> +
> +/*
> * Create an rpc connection to the nfs4_pnfs_ds data server
> * Currently only support IPv4

is it still true?

Tigran.
> */
> @@ -119,14 +170,21 @@ static int
> nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
> {
> struct nfs_client *clp;
> + struct nfs4_pnfs_ds_addr *da;
> int status = 0;
>
> - dprintk("--> %s addr %s au_flavor %d\n", __func__, ds->ds_remotestr,
> + dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
> mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
>
> + BUG_ON(list_empty(&ds->ds_addrs));
> +
> + da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);
> + dprintk("%s: using the first address for DS %s: %s\n",
> + __func__, ds->ds_remotestr, da->da_remotestr);
> +
> clp = nfs4_set_ds_client(mds_srv->nfs_client,
> - (struct sockaddr *)&ds->ds_addr,
> - ds->ds_addrlen, IPPROTO_TCP);
> + (struct sockaddr *)&da->da_addr,
> + da->da_addrlen, IPPROTO_TCP);
> if (IS_ERR(clp)) {
> status = PTR_ERR(clp);
> goto out;
> @@ -169,12 +227,24 @@ out_put:
> static void
> destroy_ds(struct nfs4_pnfs_ds *ds)
> {
> + struct nfs4_pnfs_ds_addr *da;
> +
> dprintk("--> %s\n", __func__);
> ifdebug(FACILITY)
> print_ds(ds);
>
> if (ds->ds_clp)
> nfs_put_client(ds->ds_clp);
> +
> + while (!list_empty(&ds->ds_addrs)) {
> + da = list_first_entry(&ds->ds_addrs,
> + struct nfs4_pnfs_ds_addr,
> + da_node);
> + list_del_init(&da->da_node);
> + kfree(da->da_remotestr);
> + kfree(da);
> + }
> +
> kfree(ds->ds_remotestr);
> kfree(ds);
> }
> @@ -207,67 +277,73 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
> * complicated setup around many dprinks.
> */
> static char *
> -nfs4_pnfs_remotestr(struct sockaddr *ds_addr, gfp_t gfp_flags)
> +nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
> {
> - char buf[INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN];
> + struct nfs4_pnfs_ds_addr *da;
> char *remotestr;
> - char *startsep = "";
> - char *endsep = "";
> size_t len;
> - uint16_t port;
> + char *p;
>
> - switch (ds_addr->sa_family) {
> - case AF_INET:
> - port = ((struct sockaddr_in *)ds_addr)->sin_port;
> - break;
> - case AF_INET6:
> - startsep = "[";
> - endsep = "]";
> - port = ((struct sockaddr_in6 *)ds_addr)->sin6_port;
> - break;
> - default:
> - dprintk("%s: Unknown address family %u\n",
> - __func__, ds_addr->sa_family);
> - return NULL;
> + len = 3; /* '{', '}' and eol */
> + list_for_each_entry(da, dsaddrs, da_node) {
> + len += strlen(da->da_remotestr) + 1; /* string plus comma */
> }
>
> - if (!rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf))) {
> - dprintk("%s: error printing addr\n", __func__);
> + remotestr = kzalloc(len, gfp_flags);
> + if (!remotestr)
> return NULL;
> - }
>
> - len = strlen(buf) + strlen(startsep) + strlen(endsep) + 1 + 5 + 1;
> - remotestr = kzalloc(len, gfp_flags);
> + p = remotestr;
> + *(p++) = '{';
> + len--;
> + list_for_each_entry(da, dsaddrs, da_node) {
> + size_t ll = strlen(da->da_remotestr);
>
> - if (unlikely(!remotestr)) {
> - dprintk("%s: couldn't alloc remotestr\n", __func__);
> - return NULL;
> - }
> + if (ll > len)
> + goto out_err;
>
> - snprintf(remotestr, len, "%s%s%s:%u",
> - startsep, buf, endsep, ntohs(port));
> + memcpy(p, da->da_remotestr, ll);
> + p += ll;
> + len -= ll;
>
> + if (len < 1)
> + goto out_err;
> + (*p++) = ',';
> + len--;
> + }
> + if (len < 2)
> + goto out_err;
> + *(p++) = '}';
> + *p = '\0';
> return remotestr;
> +out_err:
> + kfree(remotestr);
> + return NULL;
> }
>
> static struct nfs4_pnfs_ds *
> -nfs4_pnfs_ds_add(struct sockaddr *addr, size_t addrlen, gfp_t gfp_flags)
> +nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
> {
> struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
> char *remotestr;
>
> - ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
> + if (list_empty(dsaddrs)) {
> + dprintk("%s: no addresses defined\n", __func__);
> + goto out;
> + }
> +
> + ds = kzalloc(sizeof(*ds), gfp_flags);
> if (!ds)
> goto out;
>
> /* this is only used for debugging, so it's ok if its NULL */
> - remotestr = nfs4_pnfs_remotestr(addr, gfp_flags);
> + remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
>
> spin_lock(&nfs4_ds_cache_lock);
> - tmp_ds = _data_server_lookup_locked(addr, addrlen);
> + tmp_ds = _data_server_lookup_locked(dsaddrs);
> if (tmp_ds == NULL) {
> - memcpy(&ds->ds_addr, addr, addrlen);
> - ds->ds_addrlen = addrlen;
> + INIT_LIST_HEAD(&ds->ds_addrs);
> + list_splice_init(dsaddrs, &ds->ds_addrs);
> ds->ds_remotestr = remotestr;
> atomic_set(&ds->ds_count, 1);
> INIT_LIST_HEAD(&ds->ds_node);
> @@ -276,6 +352,11 @@ nfs4_pnfs_ds_add(struct sockaddr *addr, size_t addrlen, gfp_t gfp_flags)
> dprintk("%s add new data server %s\n", __func__,
> ds->ds_remotestr);
> } else {
> + if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
> + dsaddrs)) {
> + dprintk("%s: multipath address mismatch: %s != %s",
> + __func__, tmp_ds->ds_remotestr, remotestr);
> + }
> kfree(remotestr);
> kfree(ds);
> atomic_inc(&tmp_ds->ds_count);
> @@ -292,19 +373,20 @@ out:
> /*
> * Currently only supports ipv4, ipv6 and one multi-path address.
> */
> -static struct nfs4_pnfs_ds *
> -decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
> +static struct nfs4_pnfs_ds_addr *
> +decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
> {
> - struct nfs4_pnfs_ds *ds = NULL;
> + struct nfs4_pnfs_ds_addr *da = NULL;
> char *buf, *portstr;
> - struct sockaddr_storage ss;
> - size_t sslen;
> u32 port;
> int nlen, rlen;
> int tmp[2];
> __be32 *p;
> char *netid, *match_netid;
> - size_t match_netid_len;
> + size_t len, match_netid_len;
> + char *startsep = "";
> + char *endsep = "";
> +
>
> /* r_netid */
> p = xdr_inline_decode(streamp, 4);
> @@ -365,50 +447,74 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla
> }
> *portstr = '\0';
>
> - if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&ss, sizeof(ss))) {
> - dprintk("%s: Error parsing address %s\n", __func__, buf);
> + da = kzalloc(sizeof(*da), gfp_flags);
> + if (unlikely(!da))
> goto out_free_buf;
> +
> + INIT_LIST_HEAD(&da->da_node);
> +
> + if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
> + sizeof(da->da_addr))) {
> + dprintk("%s: error parsing address %s\n", __func__, buf);
> + goto out_free_da;
> }
>
> portstr++;
> sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
> port = htons((tmp[0] << 8) | (tmp[1]));
>
> - switch (ss.ss_family) {
> + switch (da->da_addr.ss_family) {
> case AF_INET:
> - ((struct sockaddr_in *)&ss)->sin_port = port;
> - sslen = sizeof(struct sockaddr_in);
> + ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
> + da->da_addrlen = sizeof(struct sockaddr_in);
> match_netid = "tcp";
> match_netid_len = 3;
> break;
>
> case AF_INET6:
> - ((struct sockaddr_in6 *)&ss)->sin6_port = port;
> - sslen = sizeof(struct sockaddr_in6);
> + ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
> + da->da_addrlen = sizeof(struct sockaddr_in6);
> match_netid = "tcp6";
> match_netid_len = 4;
> + startsep = "[";
> + endsep = "]";
> break;
>
> default:
> dprintk("%s: unsupported address family: %u\n",
> - __func__, ss.ss_family);
> - goto out_free_buf;
> + __func__, da->da_addr.ss_family);
> + goto out_free_da;
> }
>
> if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
> dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
> __func__, netid, match_netid);
> - goto out_free_buf;
> + goto out_free_da;
> }
>
> - ds = nfs4_pnfs_ds_add((struct sockaddr *)&ss, sslen, gfp_flags);
> - dprintk("%s: Added DS %s\n", __func__, ds->ds_remotestr);
> + /* save human readable address */
> + len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
> + da->da_remotestr = kzalloc(len, gfp_flags);
> +
> + /* NULL is ok, only used for dprintk */
> + if (da->da_remotestr)
> + snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
> + buf, endsep, ntohs(port));
> +
> + dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
> + kfree(buf);
> + kfree(netid);
> + return da;
> +
> +out_free_da:
> + kfree(da);
> out_free_buf:
> + dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
> kfree(buf);
> out_free_netid:
> kfree(netid);
> out_err:
> - return ds;
> + return NULL;
> }
>
> /* Decode opaque device data and return the result */
> @@ -425,6 +531,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
> struct xdr_stream stream;
> struct xdr_buf buf;
> struct page *scratch;
> + struct list_head dsaddrs;
> + struct nfs4_pnfs_ds_addr *da;
>
> /* set up xdr stream */
> scratch = alloc_page(gfp_flags);
> @@ -501,6 +609,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
> NFS_SERVER(ino)->nfs_client,
> &pdev->dev_id);
>
> + INIT_LIST_HEAD(&dsaddrs);
> +
> for (i = 0; i < dsaddr->ds_num; i++) {
> int j;
> u32 mp_count;
> @@ -510,48 +620,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
> goto out_err_free_deviceid;
>
> mp_count = be32_to_cpup(p); /* multipath count */
> - if (mp_count > 1) {
> - printk(KERN_WARNING
> - "%s: Multipath count %d not supported, "
> - "skipping all greater than 1\n", __func__,
> - mp_count);
> - }
> for (j = 0; j < mp_count; j++) {
> - if (j == 0) {
> - dsaddr->ds_list[i] = decode_and_add_ds(&stream,
> - ino, gfp_flags);
> - if (dsaddr->ds_list[i] == NULL)
> - goto out_err_free_deviceid;
> - } else {
> - u32 len;
> - /* skip extra multipath */
> -
> - /* read len, skip */
> - p = xdr_inline_decode(&stream, 4);
> - if (unlikely(!p))
> - goto out_err_free_deviceid;
> - len = be32_to_cpup(p);
> -
> - p = xdr_inline_decode(&stream, len);
> - if (unlikely(!p))
> - goto out_err_free_deviceid;
> -
> - /* read len, skip */
> - p = xdr_inline_decode(&stream, 4);
> - if (unlikely(!p))
> - goto out_err_free_deviceid;
> - len = be32_to_cpup(p);
> -
> - p = xdr_inline_decode(&stream, len);
> - if (unlikely(!p))
> - goto out_err_free_deviceid;
> - }
> + da = decode_ds_addr(&stream, gfp_flags);
> + if (da)
> + list_add_tail(&da->da_node, &dsaddrs);
> + }
> + if (list_empty(&dsaddrs)) {
> + dprintk("%s: no suitable DS addresses found\n",
> + __func__);
> + goto out_err_free_deviceid;
> + }
> +
> + dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
> + if (!dsaddr->ds_list[i])
> + goto out_err_drain_dsaddrs;
> +
> + /* If DS was already in cache, free ds addrs */
> + while (!list_empty(&dsaddrs)) {
> + da = list_first_entry(&dsaddrs,
> + struct nfs4_pnfs_ds_addr,
> + da_node);
> + list_del_init(&da->da_node);
> + kfree(da->da_remotestr);
> + kfree(da);
> }
> }
>
> __free_page(scratch);
> return dsaddr;
>
> +out_err_drain_dsaddrs:
> + while (!list_empty(&dsaddrs)) {
> + da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
> + da_node);
> + list_del_init(&da->da_node);
> + kfree(da->da_remotestr);
> + kfree(da);
> + }
> out_err_free_deviceid:
> nfs4_fl_free_deviceid(dsaddr);
> /* stripe_indicies was part of dsaddr */
> --
> 1.7.5.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>