2014-09-03 04:25:45

by Christoph Hellwig

[permalink] [raw]
Subject: pnfs: factor GETDEVICEINFO implementations V2

This series adds a common implementation of the GETDEVICELIST operation,
and switches the block layout driver to use the generic device id cache.

This is mostly done by moving well tested code from the file and block
layout drivers to common code. I've only tested it using the block
layout driver, so testing with the other layouts would be greatly
appreciated. I'm mostly concerned about the object layout driver as
it had significant differences and missing bug fixes compared to the
others.

Note that the last patch requires my "pnfs block layout driver fixes V2"
series from August 21, but the others should work fine without it.

A git tree is also available at

git://git.infradead.org/users/hch/pnfs.git getdeviceinfo

This work was sponsored by NetApp, Inc.

Changes since V1:
- GETDEVICELIST leak fix
- add knob to limit GETDEVICEINFO payload size for the object layout
driver



2014-09-03 04:26:00

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 3/4] pnfs: add a nfs4_get_deviceid helper

This will be used by the block layout driver when splitting extents.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/nfs/pnfs.h | 7 +++++++
1 file changed, 7 insertions(+)

diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index bf4e87d..4746ede 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -279,6 +279,13 @@ void nfs4_deviceid_purge_client(const struct nfs_client *);
int nfs4_deviceid_getdevicelist(struct nfs_server *server,
const struct nfs_fh *fh);

+static inline struct nfs4_deviceid_node *
+nfs4_get_deviceid(struct nfs4_deviceid_node *d)
+{
+ atomic_inc(&d->ref);
+ return d;
+}
+
static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
--
1.9.1


2014-09-03 04:25:52

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 1/4] pnfs: factor GETDEVICEINFO implementations

Add support to the common pNFS core to issue GETDEVICEINFO calls on
a device ID cache miss. The code is taken from the well debugged
file layout implementation and calls out to the layoutdriver through
a new alloc_deviceid_node method. The calling conventions for
nfs4_find_get_deviceid are changed so that all information needed to
send a GETDEVICEINFO request is passed to the common code.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/nfs/filelayout/filelayout.c | 29 +++++---
fs/nfs/filelayout/filelayout.h | 7 +-
fs/nfs/filelayout/filelayoutdev.c | 108 ++-------------------------
fs/nfs/objlayout/objio_osd.c | 115 +++++++++++------------------
fs/nfs/objlayout/objlayout.c | 70 ------------------
fs/nfs/objlayout/objlayout.h | 5 --
fs/nfs/pnfs.h | 13 +++-
fs/nfs/pnfs_dev.c | 149 ++++++++++++++++++++++++++------------
8 files changed, 183 insertions(+), 313 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 1359c4a..3136fc7 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -646,18 +646,15 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
}

/* find and reference the deviceid */
- d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld,
- NFS_SERVER(lo->plh_inode)->nfs_client, id);
- if (d == NULL) {
- dsaddr = filelayout_get_device_info(lo->plh_inode, id,
- lo->plh_lc_cred, gfp_flags);
- if (dsaddr == NULL)
- goto out;
- } else
- dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+ d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), id,
+ lo->plh_lc_cred, gfp_flags);
+ if (d == NULL)
+ goto out;
+
+ dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
/* Found deviceid is unavailable */
if (filelayout_test_devid_unavailable(&dsaddr->id_node))
- goto out_put;
+ goto out_put;

fl->dsaddr = dsaddr;

@@ -1367,6 +1364,17 @@ out:
cinfo->ds->ncommitting = 0;
return PNFS_ATTEMPTED;
}
+static struct nfs4_deviceid_node *
+filelayout_alloc_deviceid_node(struct nfs_server *server,
+ struct pnfs_device *pdev, gfp_t gfp_flags)
+{
+ struct nfs4_file_layout_dsaddr *dsaddr;
+
+ dsaddr = nfs4_fl_alloc_deviceid_node(server, pdev, gfp_flags);
+ if (!dsaddr)
+ return NULL;
+ return &dsaddr->id_node;
+}

static void
filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
@@ -1419,6 +1427,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.commit_pagelist = filelayout_commit_pagelist,
.read_pagelist = filelayout_read_pagelist,
.write_pagelist = filelayout_write_pagelist,
+ .alloc_deviceid_node = filelayout_alloc_deviceid_node,
.free_deviceid_node = filelayout_free_deveiceid_node,
};

diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h
index ffbddf2..7c9f800 100644
--- a/fs/nfs/filelayout/filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -147,10 +147,11 @@ u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
u32 ds_idx);
+
+extern struct nfs4_file_layout_dsaddr *
+nfs4_fl_alloc_deviceid_node(struct nfs_server *server,
+ struct pnfs_device *pdev, gfp_t gfp_flags);
extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
-struct nfs4_file_layout_dsaddr *
-filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id,
- struct rpc_cred *cred, gfp_t gfp_flags);

#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 8540516..9bb806a 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -484,8 +484,9 @@ out_err:
}

/* Decode opaque device data and return the result */
-static struct nfs4_file_layout_dsaddr*
-decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
+struct nfs4_file_layout_dsaddr *
+nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
+ gfp_t gfp_flags)
{
int i;
u32 cnt, num;
@@ -570,10 +571,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
dsaddr->stripe_indices = stripe_indices;
stripe_indices = NULL;
dsaddr->ds_num = num;
- nfs4_init_deviceid_node(&dsaddr->id_node,
- NFS_SERVER(ino)->pnfs_curr_ld,
- NFS_SERVER(ino)->nfs_client,
- &pdev->dev_id);
+ nfs4_init_deviceid_node(&dsaddr->id_node, server, &pdev->dev_id);

INIT_LIST_HEAD(&dsaddrs);

@@ -587,7 +585,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)

mp_count = be32_to_cpup(p); /* multipath count */
for (j = 0; j < mp_count; j++) {
- da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net,
+ da = decode_ds_addr(server->nfs_client->cl_net,
&stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
@@ -637,102 +635,6 @@ out_err:
return NULL;
}

-/*
- * Decode the opaque device specified in 'dev' and add it to the cache of
- * available devices.
- */
-static struct nfs4_file_layout_dsaddr *
-decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
-{
- struct nfs4_deviceid_node *d;
- struct nfs4_file_layout_dsaddr *n, *new;
-
- new = decode_device(inode, dev, gfp_flags);
- if (!new) {
- printk(KERN_WARNING "NFS: %s: Could not decode or add device\n",
- __func__);
- return NULL;
- }
-
- d = nfs4_insert_deviceid_node(&new->id_node);
- n = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
- if (n != new) {
- nfs4_fl_free_deviceid(new);
- return n;
- }
-
- return new;
-}
-
-/*
- * Retrieve the information for dev_id, add it to the list
- * of available devices, and return it.
- */
-struct nfs4_file_layout_dsaddr *
-filelayout_get_device_info(struct inode *inode,
- struct nfs4_deviceid *dev_id,
- struct rpc_cred *cred,
- gfp_t gfp_flags)
-{
- struct pnfs_device *pdev = NULL;
- u32 max_resp_sz;
- int max_pages;
- struct page **pages = NULL;
- struct nfs4_file_layout_dsaddr *dsaddr = NULL;
- int rc, i;
- struct nfs_server *server = NFS_SERVER(inode);
-
- /*
- * Use the session max response size as the basis for setting
- * GETDEVICEINFO's maxcount
- */
- max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- max_pages = nfs_page_array_len(0, max_resp_sz);
- dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
- __func__, inode, max_resp_sz, max_pages);
-
- pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags);
- if (pdev == NULL)
- return NULL;
-
- pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
- if (pages == NULL) {
- kfree(pdev);
- return NULL;
- }
- for (i = 0; i < max_pages; i++) {
- pages[i] = alloc_page(gfp_flags);
- if (!pages[i])
- goto out_free;
- }
-
- memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
- pdev->layout_type = LAYOUT_NFSV4_1_FILES;
- pdev->pages = pages;
- pdev->pgbase = 0;
- pdev->pglen = max_resp_sz;
- pdev->mincount = 0;
- pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead;
-
- rc = nfs4_proc_getdeviceinfo(server, pdev, cred);
- dprintk("%s getdevice info returns %d\n", __func__, rc);
- if (rc)
- goto out_free;
-
- /*
- * Found new device, need to decode it and then add it to the
- * list of known devices for this mountpoint.
- */
- dsaddr = decode_and_add_device(inode, pdev, gfp_flags);
-out_free:
- for (i = 0; i < max_pages; i++)
- __free_page(pages[i]);
- kfree(pages);
- kfree(pdev);
- dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
- return dsaddr;
-}
-
void
nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
{
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index ae05278..2e045b2 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -60,52 +60,6 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d)
kfree(de);
}

-static struct objio_dev_ent *_dev_list_find(const struct nfs_server *nfss,
- const struct nfs4_deviceid *d_id)
-{
- struct nfs4_deviceid_node *d;
- struct objio_dev_ent *de;
-
- d = nfs4_find_get_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client, d_id);
- if (!d)
- return NULL;
-
- de = container_of(d, struct objio_dev_ent, id_node);
- return de;
-}
-
-static struct objio_dev_ent *
-_dev_list_add(const struct nfs_server *nfss,
- const struct nfs4_deviceid *d_id, struct osd_dev *od,
- gfp_t gfp_flags)
-{
- struct nfs4_deviceid_node *d;
- struct objio_dev_ent *de = kzalloc(sizeof(*de), gfp_flags);
- struct objio_dev_ent *n;
-
- if (!de) {
- dprintk("%s: -ENOMEM od=%p\n", __func__, od);
- return NULL;
- }
-
- dprintk("%s: Adding od=%p\n", __func__, od);
- nfs4_init_deviceid_node(&de->id_node,
- nfss->pnfs_curr_ld,
- nfss->nfs_client,
- d_id);
- de->od.od = od;
-
- d = nfs4_insert_deviceid_node(&de->id_node);
- n = container_of(d, struct objio_dev_ent, id_node);
- if (n != de) {
- dprintk("%s: Race with other n->od=%p\n", __func__, n->od.od);
- objio_free_deviceid_node(&de->id_node);
- de = n;
- }
-
- return de;
-}
-
struct objio_segment {
struct pnfs_layout_segment lseg;

@@ -130,29 +84,24 @@ struct objio_state {

/* Send and wait for a get_device_info of devices in the layout,
then look them up with the osd_initiator library */
-static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
- struct objio_segment *objio_seg, unsigned c, struct nfs4_deviceid *d_id,
- gfp_t gfp_flags)
+struct nfs4_deviceid_node *
+objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
+ gfp_t gfp_flags)
{
struct pnfs_osd_deviceaddr *deviceaddr;
- struct objio_dev_ent *ode;
+ struct objio_dev_ent *ode = NULL;
struct osd_dev *od;
struct osd_dev_info odi;
bool retry_flag = true;
+ u32 *p;
int err;

- ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id);
- if (ode) {
- objio_seg->oc.ods[c] = &ode->od; /* must use container_of */
- return 0;
- }
-
- err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags);
- if (unlikely(err)) {
- dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n",
- __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err);
- return err;
- }
+ deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags);
+ if (!deviceaddr)
+ return NULL;
+
+ p = page_address(pdev->pages[0]);
+ pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p);

odi.systemid_len = deviceaddr->oda_systemid.len;
if (odi.systemid_len > sizeof(odi.systemid)) {
@@ -188,14 +137,24 @@ retry_lookup:
goto out;
}

- ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od,
- gfp_flags);
- objio_seg->oc.ods[c] = &ode->od; /* must use container_of */
dprintk("Adding new dev_id(%llx:%llx)\n",
- _DEVID_LO(d_id), _DEVID_HI(d_id));
+ _DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id));
+
+ ode = kzalloc(sizeof(*ode), gfp_flags);
+ if (!ode) {
+ dprintk("%s: -ENOMEM od=%p\n", __func__, od);
+ goto out;
+ }
+
+ nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id);
+ kfree(deviceaddr);
+
+ ode->od.od = od;
+ return &ode->id_node;
+
out:
- objlayout_put_deviceinfo(deviceaddr);
- return err;
+ kfree(deviceaddr);
+ return NULL;
}

static void copy_single_comp(struct ore_components *oc, unsigned c,
@@ -254,6 +213,7 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp,
struct xdr_stream *xdr,
gfp_t gfp_flags)
{
+ struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode);
struct objio_segment *objio_seg;
struct pnfs_osd_xdr_decode_layout_iter iter;
struct pnfs_osd_layout layout;
@@ -283,13 +243,21 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp,
objio_seg->oc.first_dev = layout.olo_comps_index;
cur_comp = 0;
while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) {
+ struct nfs4_deviceid_node *d;
+ struct objio_dev_ent *ode;
+
copy_single_comp(&objio_seg->oc, cur_comp, &src_comp);
- err = objio_devices_lookup(pnfslay, objio_seg, cur_comp,
- &src_comp.oc_object_id.oid_device_id,
- gfp_flags);
- if (err)
+
+ d = nfs4_find_get_deviceid(server,
+ &src_comp.oc_object_id.oid_device_id,
+ pnfslay->plh_lc_cred, gfp_flags);
+ if (!d) {
+ err = -ENXIO;
goto err;
- ++cur_comp;
+ }
+
+ ode = container_of(d, struct objio_dev_ent, id_node);
+ objio_seg->oc.ods[cur_comp++] = &ode->od;
}
/* pnfs_osd_xdr_decode_layout_comp returns false on error */
if (unlikely(err))
@@ -653,6 +621,7 @@ static struct pnfs_layoutdriver_type objlayout_type = {
.flags = PNFS_LAYOUTRET_ON_SETATTR |
PNFS_LAYOUTRET_ON_ERROR,

+ .max_deviceinfo_size = PAGE_SIZE,
.owner = THIS_MODULE,
.alloc_layout_hdr = objlayout_alloc_layout_hdr,
.free_layout_hdr = objlayout_free_layout_hdr,
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 697a16d..c89357c 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -574,76 +574,6 @@ loop_done:
dprintk("%s: Return\n", __func__);
}

-
-/*
- * Get Device Info API for io engines
- */
-struct objlayout_deviceinfo {
- struct page *page;
- struct pnfs_osd_deviceaddr da; /* This must be last */
-};
-
-/* Initialize and call nfs_getdeviceinfo, then decode and return a
- * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
- * should be called.
- */
-int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
- struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr,
- gfp_t gfp_flags)
-{
- struct objlayout_deviceinfo *odi;
- struct pnfs_device pd;
- struct page *page, **pages;
- u32 *p;
- int err;
-
- page = alloc_page(gfp_flags);
- if (!page)
- return -ENOMEM;
-
- pages = &page;
- pd.pages = pages;
-
- memcpy(&pd.dev_id, d_id, sizeof(*d_id));
- pd.layout_type = LAYOUT_OSD2_OBJECTS;
- pd.pages = &page;
- pd.pgbase = 0;
- pd.pglen = PAGE_SIZE;
- pd.mincount = 0;
- pd.maxcount = PAGE_SIZE;
-
- err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd,
- pnfslay->plh_lc_cred);
- dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
- if (err)
- goto err_out;
-
- p = page_address(page);
- odi = kzalloc(sizeof(*odi), gfp_flags);
- if (!odi) {
- err = -ENOMEM;
- goto err_out;
- }
- pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
- odi->page = page;
- *deviceaddr = &odi->da;
- return 0;
-
-err_out:
- __free_page(page);
- return err;
-}
-
-void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
-{
- struct objlayout_deviceinfo *odi = container_of(deviceaddr,
- struct objlayout_deviceinfo,
- da);
-
- __free_page(odi->page);
- kfree(odi);
-}
-
enum {
OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index fd13f1d..3a0828d 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -149,11 +149,6 @@ extern void objlayout_read_done(struct objlayout_io_res *oir,
extern void objlayout_write_done(struct objlayout_io_res *oir,
ssize_t status, bool sync);

-extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
- struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr,
- gfp_t gfp_flags);
-extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr);
-
/*
* exported generic objects function vectors
*/
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e11a161..c5f90e7 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -84,6 +84,7 @@ struct pnfs_layoutdriver_type {
const char *name;
struct module *owner;
unsigned flags;
+ unsigned max_deviceinfo_size;

int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *);
int (*clear_layoutdriver) (struct nfs_server *);
@@ -126,6 +127,9 @@ struct pnfs_layoutdriver_type {
enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int);

void (*free_deviceid_node) (struct nfs4_deviceid_node *);
+ struct nfs4_deviceid_node * (*alloc_deviceid_node)
+ (struct nfs_server *server, struct pnfs_device *pdev,
+ gfp_t gfp_flags);

void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid,
struct xdr_stream *xdr,
@@ -260,11 +264,12 @@ struct nfs4_deviceid_node {
atomic_t ref;
};

-struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
+struct nfs4_deviceid_node *
+nfs4_find_get_deviceid(struct nfs_server *server,
+ const struct nfs4_deviceid *id, struct rpc_cred *cred,
+ gfp_t gfp_mask);
void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
-void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
- const struct pnfs_layoutdriver_type *,
- const struct nfs_client *,
+void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, struct nfs_server *,
const struct nfs4_deviceid *);
struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 6da209b..791f8b3 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -29,6 +29,9 @@
*/

#include <linux/export.h>
+#include <linux/nfs_fs.h>
+#include "nfs4session.h"
+#include "internal.h"
#include "pnfs.h"

#define NFSDBG_FACILITY NFSDBG_PNFS
@@ -89,6 +92,74 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
return NULL;
}

+static struct nfs4_deviceid_node *
+nfs4_get_device_info(struct nfs_server *server,
+ const struct nfs4_deviceid *dev_id,
+ struct rpc_cred *cred, gfp_t gfp_flags)
+{
+ struct nfs4_deviceid_node *d = NULL;
+ struct pnfs_device *pdev = NULL;
+ struct page **pages = NULL;
+ u32 max_resp_sz;
+ int max_pages;
+ int rc, i;
+
+ /*
+ * Use the session max response size as the basis for setting
+ * GETDEVICEINFO's maxcount
+ */
+ max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
+ if (server->pnfs_curr_ld->max_deviceinfo_size &&
+ server->pnfs_curr_ld->max_deviceinfo_size < max_resp_sz)
+ max_resp_sz = server->pnfs_curr_ld->max_deviceinfo_size;
+ max_pages = nfs_page_array_len(0, max_resp_sz);
+ dprintk("%s: server %p max_resp_sz %u max_pages %d\n",
+ __func__, server, max_resp_sz, max_pages);
+
+ pdev = kzalloc(sizeof(*pdev), gfp_flags);
+ if (!pdev)
+ return NULL;
+
+ pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
+ if (!pages)
+ goto out_free_pdev;
+
+ for (i = 0; i < max_pages; i++) {
+ pages[i] = alloc_page(gfp_flags);
+ if (!pages[i])
+ goto out_free_pages;
+ }
+
+ memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
+ pdev->layout_type = server->pnfs_curr_ld->id;
+ pdev->pages = pages;
+ pdev->pgbase = 0;
+ pdev->pglen = max_resp_sz;
+ pdev->mincount = 0;
+ pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead;
+
+ rc = nfs4_proc_getdeviceinfo(server, pdev, cred);
+ dprintk("%s getdevice info returns %d\n", __func__, rc);
+ if (rc)
+ goto out_free_pages;
+
+ /*
+ * Found new device, need to decode it and then add it to the
+ * list of known devices for this mountpoint.
+ */
+ d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev,
+ gfp_flags);
+
+out_free_pages:
+ for (i = 0; i < max_pages; i++)
+ __free_page(pages[i]);
+ kfree(pages);
+out_free_pdev:
+ kfree(pdev);
+ dprintk("<-- %s d %p\n", __func__, d);
+ return d;
+}
+
/*
* Lookup a deviceid in cache and get a reference count on it if found
*
@@ -96,14 +167,14 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
* @id deviceid to look up
*/
static struct nfs4_deviceid_node *
-_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
- const struct nfs_client *clp, const struct nfs4_deviceid *id,
- long hash)
+__nfs4_find_get_deviceid(struct nfs_server *server,
+ const struct nfs4_deviceid *id, long hash)
{
struct nfs4_deviceid_node *d;

rcu_read_lock();
- d = _lookup_deviceid(ld, clp, id, hash);
+ d = _lookup_deviceid(server->pnfs_curr_ld, server->nfs_client, id,
+ hash);
if (d != NULL)
atomic_inc(&d->ref);
rcu_read_unlock();
@@ -111,10 +182,33 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
}

struct nfs4_deviceid_node *
-nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
- const struct nfs_client *clp, const struct nfs4_deviceid *id)
+nfs4_find_get_deviceid(struct nfs_server *server,
+ const struct nfs4_deviceid *id, struct rpc_cred *cred,
+ gfp_t gfp_mask)
{
- return _find_get_deviceid(ld, clp, id, nfs4_deviceid_hash(id));
+ long hash = nfs4_deviceid_hash(id);
+ struct nfs4_deviceid_node *d, *new;
+
+ d = __nfs4_find_get_deviceid(server, id, hash);
+ if (d)
+ return d;
+
+ new = nfs4_get_device_info(server, id, cred, gfp_mask);
+ if (!new)
+ return new;
+
+ spin_lock(&nfs4_deviceid_lock);
+ d = __nfs4_find_get_deviceid(server, id, hash);
+ if (d) {
+ spin_unlock(&nfs4_deviceid_lock);
+ server->pnfs_curr_ld->free_deviceid_node(new);
+ return d;
+ }
+ hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
+ atomic_inc(&new->ref);
+ spin_unlock(&nfs4_deviceid_lock);
+
+ return new;
}
EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);

@@ -151,15 +245,13 @@ nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
EXPORT_SYMBOL_GPL(nfs4_delete_deviceid);

void
-nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
- const struct pnfs_layoutdriver_type *ld,
- const struct nfs_client *nfs_client,
+nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, struct nfs_server *server,
const struct nfs4_deviceid *id)
{
INIT_HLIST_NODE(&d->node);
INIT_HLIST_NODE(&d->tmpnode);
- d->ld = ld;
- d->nfs_client = nfs_client;
+ d->ld = server->pnfs_curr_ld;
+ d->nfs_client = server->nfs_client;
d->flags = 0;
d->deviceid = *id;
atomic_set(&d->ref, 1);
@@ -167,39 +259,6 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node);

/*
- * Uniquely initialize and insert a deviceid node into cache
- *
- * @new new deviceid node
- * Note that the caller must set up the following members:
- * new->ld
- * new->nfs_client
- * new->deviceid
- *
- * @ret the inserted node, if none found, otherwise, the found entry.
- */
-struct nfs4_deviceid_node *
-nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new)
-{
- struct nfs4_deviceid_node *d;
- long hash;
-
- spin_lock(&nfs4_deviceid_lock);
- hash = nfs4_deviceid_hash(&new->deviceid);
- d = _find_get_deviceid(new->ld, new->nfs_client, &new->deviceid, hash);
- if (d) {
- spin_unlock(&nfs4_deviceid_lock);
- return d;
- }
-
- hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
- spin_unlock(&nfs4_deviceid_lock);
- atomic_inc(&new->ref);
-
- return new;
-}
-EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node);
-
-/*
* Dereference a deviceid node and delete it when its reference count drops
* to zero.
*
--
1.9.1


2014-09-03 23:34:48

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 2/4] pnfs: add a common GETDEVICELIST implementation

On Wed, Sep 03, 2014 at 02:57:41PM -0400, Anna Schumaker wrote:
> On 09/03/2014 12:27 AM, Christoph Hellwig wrote:
> > At a simple helper to issue a GETDEVICELIST operation and pre-load
> > the device id cache based on the result.
> >
> > Signed-off-by: Christoph Hellwig <[email protected]>
> > ---
> > fs/nfs/pnfs.h | 2 ++
> > fs/nfs/pnfs_dev.c | 29 +++++++++++++++++++++++++++++
> > 2 files changed, 31 insertions(+)
> >
> > diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> > index c5f90e7..bf4e87d 100644
> > --- a/fs/nfs/pnfs.h
> > +++ b/fs/nfs/pnfs.h
> > @@ -276,6 +276,8 @@ bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
> > void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node);
> > bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node);
> > void nfs4_deviceid_purge_client(const struct nfs_client *);
> > +int nfs4_deviceid_getdevicelist(struct nfs_server *server,
> > + const struct nfs_fh *fh);
> >
> > static inline struct pnfs_layout_segment *
> > pnfs_get_lseg(struct pnfs_layout_segment *lseg)
> > diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
> > index 791f8b3..82c2836 100644
> > --- a/fs/nfs/pnfs_dev.c
> > +++ b/fs/nfs/pnfs_dev.c
> > @@ -359,3 +359,32 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
> > rcu_read_unlock();
> > }
> >
> > +int
> > +nfs4_deviceid_getdevicelist(struct nfs_server *server,
> > + const struct nfs_fh *fh)
> > +{
> > + struct pnfs_devicelist *dlist;
> > + struct nfs4_deviceid_node *d;
> > + int error = 0, i;
> > +
> > + dlist = kzalloc(sizeof(struct pnfs_devicelist), GFP_NOFS);
> > + if (!dlist)
> > + return -ENOMEM;
> > +
> > + while (!dlist->eof) {
> > + error = nfs4_proc_getdevicelist(server, fh, dlist);
> > + if (error)
> > + break;
> > +
> > + for (i = 0; i < dlist->num_devs; i++) {
> > + d = nfs4_find_get_deviceid(server, &dlist->dev_id[i],
> > + NULL, GFP_NOFS);
> > + if (d)
> > + nfs4_put_deviceid_node(d);
>
> I think you need to change nfs4xdr.c:encode_getdevicelist() to use a stored cookie value for the GETDEVICELIST operation, otherwise we will always be reading from the beginning of the device list and the while loop might never exit.

Just like the existing GETDEVICELIST implementation in the block layout code
did. I don't really know what people were smoking when they came up with that
cookie mess, but before adding more complications to the GETDEVICELIST
implementation I'd rather just drop it entirely.


2014-09-03 18:58:11

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH 2/4] pnfs: add a common GETDEVICELIST implementation

On 09/03/2014 12:27 AM, Christoph Hellwig wrote:
> At a simple helper to issue a GETDEVICELIST operation and pre-load
> the device id cache based on the result.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> fs/nfs/pnfs.h | 2 ++
> fs/nfs/pnfs_dev.c | 29 +++++++++++++++++++++++++++++
> 2 files changed, 31 insertions(+)
>
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index c5f90e7..bf4e87d 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -276,6 +276,8 @@ bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
> void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node);
> bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node);
> void nfs4_deviceid_purge_client(const struct nfs_client *);
> +int nfs4_deviceid_getdevicelist(struct nfs_server *server,
> + const struct nfs_fh *fh);
>
> static inline struct pnfs_layout_segment *
> pnfs_get_lseg(struct pnfs_layout_segment *lseg)
> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
> index 791f8b3..82c2836 100644
> --- a/fs/nfs/pnfs_dev.c
> +++ b/fs/nfs/pnfs_dev.c
> @@ -359,3 +359,32 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
> rcu_read_unlock();
> }
>
> +int
> +nfs4_deviceid_getdevicelist(struct nfs_server *server,
> + const struct nfs_fh *fh)
> +{
> + struct pnfs_devicelist *dlist;
> + struct nfs4_deviceid_node *d;
> + int error = 0, i;
> +
> + dlist = kzalloc(sizeof(struct pnfs_devicelist), GFP_NOFS);
> + if (!dlist)
> + return -ENOMEM;
> +
> + while (!dlist->eof) {
> + error = nfs4_proc_getdevicelist(server, fh, dlist);
> + if (error)
> + break;
> +
> + for (i = 0; i < dlist->num_devs; i++) {
> + d = nfs4_find_get_deviceid(server, &dlist->dev_id[i],
> + NULL, GFP_NOFS);
> + if (d)
> + nfs4_put_deviceid_node(d);

I think you need to change nfs4xdr.c:encode_getdevicelist() to use a stored cookie value for the GETDEVICELIST operation, otherwise we will always be reading from the beginning of the device list and the while loop might never exit.

Anna
> + }
> + }
> +
> + kfree(dlist);
> + return error;
> +}
> +EXPORT_SYMBOL_GPL(nfs4_deviceid_getdevicelist);


2014-09-03 04:25:56

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 2/4] pnfs: add a common GETDEVICELIST implementation

At a simple helper to issue a GETDEVICELIST operation and pre-load
the device id cache based on the result.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/nfs/pnfs.h | 2 ++
fs/nfs/pnfs_dev.c | 29 +++++++++++++++++++++++++++++
2 files changed, 31 insertions(+)

diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index c5f90e7..bf4e87d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -276,6 +276,8 @@ bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node);
bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node);
void nfs4_deviceid_purge_client(const struct nfs_client *);
+int nfs4_deviceid_getdevicelist(struct nfs_server *server,
+ const struct nfs_fh *fh);

static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 791f8b3..82c2836 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -359,3 +359,32 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
rcu_read_unlock();
}

+int
+nfs4_deviceid_getdevicelist(struct nfs_server *server,
+ const struct nfs_fh *fh)
+{
+ struct pnfs_devicelist *dlist;
+ struct nfs4_deviceid_node *d;
+ int error = 0, i;
+
+ dlist = kzalloc(sizeof(struct pnfs_devicelist), GFP_NOFS);
+ if (!dlist)
+ return -ENOMEM;
+
+ while (!dlist->eof) {
+ error = nfs4_proc_getdevicelist(server, fh, dlist);
+ if (error)
+ break;
+
+ for (i = 0; i < dlist->num_devs; i++) {
+ d = nfs4_find_get_deviceid(server, &dlist->dev_id[i],
+ NULL, GFP_NOFS);
+ if (d)
+ nfs4_put_deviceid_node(d);
+ }
+ }
+
+ kfree(dlist);
+ return error;
+}
+EXPORT_SYMBOL_GPL(nfs4_deviceid_getdevicelist);
--
1.9.1


2014-09-03 04:26:05

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 4/4] pnfs/blocklayout: use the device id cache

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 149 ++----------------------------------
fs/nfs/blocklayout/blocklayout.h | 25 ++----
fs/nfs/blocklayout/blocklayoutdev.c | 88 +++++++++------------
fs/nfs/blocklayout/blocklayoutdm.c | 26 +------
fs/nfs/blocklayout/extent_tree.c | 27 ++++---
5 files changed, 65 insertions(+), 250 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index fdc065c..ff8b43b 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -119,6 +119,8 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
void (*end_io)(struct bio *, int err),
struct parallel_io *par)
{
+ struct pnfs_block_dev *dev =
+ container_of(be->be_device, struct pnfs_block_dev, d_node);
struct bio *bio;

npg = min(npg, BIO_MAX_PAGES);
@@ -131,7 +133,7 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
if (bio) {
bio->bi_iter.bi_sector = isect - be->be_f_offset +
be->be_v_offset;
- bio->bi_bdev = be->be_mdev;
+ bio->bi_bdev = dev->d_bdev;
bio->bi_end_io = end_io;
bio->bi_private = par;
}
@@ -510,96 +512,9 @@ bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
ext_tree_mark_committed(&lcdata->args, lcdata->res.status);
}

-static void free_blk_mountid(struct block_mount_id *mid)
-{
- if (mid) {
- struct pnfs_block_dev *dev, *tmp;
-
- /* No need to take bm_lock as we are last user freeing bm_devlist */
- list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) {
- list_del(&dev->bm_node);
- bl_free_block_dev(dev);
- }
- kfree(mid);
- }
-}
-
-/* This is mostly copied from the filelayout_get_device_info function.
- * It seems much of this should be at the generic pnfs level.
- */
-static struct pnfs_block_dev *
-nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
- struct nfs4_deviceid *d_id)
-{
- struct pnfs_device *dev;
- struct pnfs_block_dev *rv;
- u32 max_resp_sz;
- int max_pages;
- struct page **pages = NULL;
- int i, rc;
-
- /*
- * Use the session max response size as the basis for setting
- * GETDEVICEINFO's maxcount
- */
- max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- max_pages = nfs_page_array_len(0, max_resp_sz);
- dprintk("%s max_resp_sz %u max_pages %d\n",
- __func__, max_resp_sz, max_pages);
-
- dev = kmalloc(sizeof(*dev), GFP_NOFS);
- if (!dev) {
- dprintk("%s kmalloc failed\n", __func__);
- return ERR_PTR(-ENOMEM);
- }
-
- pages = kcalloc(max_pages, sizeof(struct page *), GFP_NOFS);
- if (pages == NULL) {
- kfree(dev);
- return ERR_PTR(-ENOMEM);
- }
- for (i = 0; i < max_pages; i++) {
- pages[i] = alloc_page(GFP_NOFS);
- if (!pages[i]) {
- rv = ERR_PTR(-ENOMEM);
- goto out_free;
- }
- }
-
- memcpy(&dev->dev_id, d_id, sizeof(*d_id));
- dev->layout_type = LAYOUT_BLOCK_VOLUME;
- dev->pages = pages;
- dev->pgbase = 0;
- dev->pglen = PAGE_SIZE * max_pages;
- dev->mincount = 0;
- dev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead;
-
- dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
- rc = nfs4_proc_getdeviceinfo(server, dev, NULL);
- dprintk("%s getdevice info returns %d\n", __func__, rc);
- if (rc) {
- rv = ERR_PTR(rc);
- goto out_free;
- }
-
- rv = nfs4_blk_decode_device(server, dev);
- out_free:
- for (i = 0; i < max_pages; i++)
- __free_page(pages[i]);
- kfree(pages);
- kfree(dev);
- return rv;
-}
-
static int
bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
{
- struct block_mount_id *b_mt_id = NULL;
- struct pnfs_devicelist *dlist = NULL;
- struct pnfs_block_dev *bdev;
- LIST_HEAD(block_disklist);
- int status, i;
-
dprintk("%s enter\n", __func__);

if (server->pnfs_blksize == 0) {
@@ -612,60 +527,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
return -EINVAL;
}

- b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_NOFS);
- if (!b_mt_id) {
- status = -ENOMEM;
- goto out_error;
- }
- /* Initialize nfs4 block layout mount id */
- spin_lock_init(&b_mt_id->bm_lock);
- INIT_LIST_HEAD(&b_mt_id->bm_devlist);
-
- dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_NOFS);
- if (!dlist) {
- status = -ENOMEM;
- goto out_error;
- }
- dlist->eof = 0;
- while (!dlist->eof) {
- status = nfs4_proc_getdevicelist(server, fh, dlist);
- if (status)
- goto out_error;
- dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n",
- __func__, dlist->num_devs, dlist->eof);
- for (i = 0; i < dlist->num_devs; i++) {
- bdev = nfs4_blk_get_deviceinfo(server, fh,
- &dlist->dev_id[i]);
- if (IS_ERR(bdev)) {
- status = PTR_ERR(bdev);
- goto out_error;
- }
- spin_lock(&b_mt_id->bm_lock);
- list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
- spin_unlock(&b_mt_id->bm_lock);
- }
- }
- dprintk("%s SUCCESS\n", __func__);
- server->pnfs_ld_data = b_mt_id;
-
- out_return:
- kfree(dlist);
- return status;
-
- out_error:
- free_blk_mountid(b_mt_id);
- goto out_return;
-}
-
-static int
-bl_clear_layoutdriver(struct nfs_server *server)
-{
- struct block_mount_id *b_mt_id = server->pnfs_ld_data;
-
- dprintk("%s enter\n", __func__);
- free_blk_mountid(b_mt_id);
- dprintk("%s RETURNS\n", __func__);
- return 0;
+ return nfs4_deviceid_getdevicelist(server, fh);
}

static bool
@@ -806,7 +668,8 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
.prepare_layoutcommit = bl_prepare_layoutcommit,
.cleanup_layoutcommit = bl_cleanup_layoutcommit,
.set_layoutdriver = bl_set_layoutdriver,
- .clear_layoutdriver = bl_clear_layoutdriver,
+ .alloc_deviceid_node = bl_alloc_deviceid_node,
+ .free_deviceid_node = bl_free_deviceid_node,
.pg_read_ops = &bl_pg_read_ops,
.pg_write_ops = &bl_pg_write_ops,
};
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 6f3a550..9757f3e 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -44,16 +44,9 @@
#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
#define SECTOR_SIZE (1 << SECTOR_SHIFT)

-struct block_mount_id {
- spinlock_t bm_lock; /* protects list */
- struct list_head bm_devlist; /* holds pnfs_block_dev */
-};
-
struct pnfs_block_dev {
- struct list_head bm_node;
- struct nfs4_deviceid bm_mdevid; /* associated devid */
- struct block_device *bm_mdev; /* meta device itself */
- struct net *net;
+ struct nfs4_deviceid_node d_node;
+ struct block_device *d_bdev;
};

enum exstate4 {
@@ -69,8 +62,7 @@ struct pnfs_block_extent {
struct rb_node be_node;
struct list_head be_list;
};
- struct nfs4_deviceid be_devid; /* FIXME: could use device cache instead */
- struct block_device *be_mdev;
+ struct nfs4_deviceid_node *be_device;
sector_t be_f_offset; /* the starting offset in the file */
sector_t be_length; /* the size of the extent */
sector_t be_v_offset; /* the starting offset in the volume */
@@ -90,8 +82,6 @@ struct pnfs_block_layout {
spinlock_t bl_ext_lock; /* Protects list manipulation */
};

-#define BLK_ID(lo) ((struct block_mount_id *)(NFS_SERVER(lo->plh_inode)->pnfs_ld_data))
-
static inline struct pnfs_block_layout *
BLK_LO2EXT(struct pnfs_layout_hdr *lo)
{
@@ -123,14 +113,15 @@ struct bl_msg_hdr {
/* blocklayoutdev.c */
ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
-void nfs4_blkdev_put(struct block_device *bdev);
-struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server,
- struct pnfs_device *dev);
int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
struct nfs4_layoutget_res *lgr, gfp_t gfp_flags);

+struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server,
+ struct pnfs_device *pdev, gfp_t gfp_mask);
+void bl_free_deviceid_node(struct nfs4_deviceid_node *d);
+
/* blocklayoutdm.c */
-void bl_free_block_dev(struct pnfs_block_dev *bdev);
+void bl_dm_remove(struct net *net, dev_t dev);

/* extent_tree.c */
int ext_tree_insert(struct pnfs_block_layout *bl,
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index cd71b5e..d6527d2 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -53,16 +53,6 @@ static int decode_sector_number(__be32 **rp, sector_t *sp)
return 0;
}

-/*
- * Release the block device
- */
-void nfs4_blkdev_put(struct block_device *bdev)
-{
- dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev),
- MINOR(bdev->bd_dev));
- blkdev_put(bdev, FMODE_READ);
-}
-
ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
size_t mlen)
{
@@ -92,12 +82,12 @@ void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
/*
* Decodes pnfs_block_deviceaddr4 which is XDR encoded in dev->dev_addr_buf.
*/
-struct pnfs_block_dev *
-nfs4_blk_decode_device(struct nfs_server *server,
- struct pnfs_device *dev)
+struct nfs4_deviceid_node *
+bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *dev,
+ gfp_t gfp_mask)
{
struct pnfs_block_dev *rv;
- struct block_device *bd = NULL;
+ struct block_device *bd;
struct bl_pipe_msg bl_pipe_msg;
struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
struct bl_msg_hdr bl_msg = {
@@ -117,11 +107,9 @@ nfs4_blk_decode_device(struct nfs_server *server,

bl_pipe_msg.bl_wq = &nn->bl_wq;
memset(msg, 0, sizeof(*msg));
- msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS);
- if (!msg->data) {
- rv = ERR_PTR(-ENOMEM);
+ msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, gfp_mask);
+ if (!msg->data)
goto out;
- }

memcpy(msg->data, &bl_msg, sizeof(bl_msg));
dataptr = (uint8_t *) msg->data;
@@ -140,7 +128,6 @@ nfs4_blk_decode_device(struct nfs_server *server,
rc = rpc_queue_upcall(nn->bl_device_pipe, msg);
if (rc < 0) {
remove_wait_queue(&nn->bl_wq, &wq);
- rv = ERR_PTR(rc);
goto out;
}

@@ -152,7 +139,6 @@ nfs4_blk_decode_device(struct nfs_server *server,
if (reply->status != BL_DEVICE_REQUEST_PROC) {
printk(KERN_WARNING "%s failed to decode device: %d\n",
__func__, reply->status);
- rv = ERR_PTR(-EINVAL);
goto out;
}

@@ -162,51 +148,40 @@ nfs4_blk_decode_device(struct nfs_server *server,
printk(KERN_WARNING "%s failed to open device %d:%d (%ld)\n",
__func__, reply->major, reply->minor,
PTR_ERR(bd));
- rv = ERR_CAST(bd);
goto out;
}

- rv = kzalloc(sizeof(*rv), GFP_NOFS);
- if (!rv) {
- rv = ERR_PTR(-ENOMEM);
+ rv = kzalloc(sizeof(*rv), gfp_mask);
+ if (!rv)
goto out;
- }

- rv->bm_mdev = bd;
- memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid));
- rv->net = net;
+ nfs4_init_deviceid_node(&rv->d_node, server, &dev->dev_id);
+ rv->d_bdev = bd;
+
dprintk("%s Created device %s with bd_block_size %u\n",
__func__,
bd->bd_disk->disk_name,
bd->bd_block_size);

+ kfree(msg->data);
+ return &rv->d_node;
+
out:
kfree(msg->data);
- return rv;
+ return NULL;
}

-/* Map deviceid returned by the server to constructed block_device */
-static struct block_device *translate_devid(struct pnfs_layout_hdr *lo,
- struct nfs4_deviceid *id)
+void
+bl_free_deviceid_node(struct nfs4_deviceid_node *d)
{
- struct block_device *rv = NULL;
- struct block_mount_id *mid;
- struct pnfs_block_dev *dev;
-
- dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id);
- mid = BLK_ID(lo);
- spin_lock(&mid->bm_lock);
- list_for_each_entry(dev, &mid->bm_devlist, bm_node) {
- if (memcmp(id->data, dev->bm_mdevid.data,
- NFS4_DEVICEID4_SIZE) == 0) {
- rv = dev->bm_mdev;
- goto out;
- }
- }
- out:
- spin_unlock(&mid->bm_lock);
- dprintk("%s returning %p\n", __func__, rv);
- return rv;
+ struct pnfs_block_dev *dev =
+ container_of(d, struct pnfs_block_dev, d_node);
+ struct net *net = d->nfs_client->cl_net;
+
+ blkdev_put(dev->d_bdev, FMODE_READ);
+ bl_dm_remove(net, dev->d_bdev->bd_dev);
+
+ kfree(dev);
}

/* Tracks info needed to ensure extents in layout obey constraints of spec */
@@ -309,15 +284,20 @@ nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
* recovery easier.
*/
for (i = 0; i < count; i++) {
+ struct nfs4_deviceid id;
+
be = kzalloc(sizeof(struct pnfs_block_extent), GFP_NOFS);
if (!be) {
status = -ENOMEM;
goto out_err;
}
- memcpy(&be->be_devid, p, NFS4_DEVICEID4_SIZE);
+ memcpy(&id, p, NFS4_DEVICEID4_SIZE);
p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
- be->be_mdev = translate_devid(lo, &be->be_devid);
- if (!be->be_mdev)
+
+ be->be_device =
+ nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id,
+ lo->plh_lc_cred, gfp_flags);
+ if (!be->be_device)
goto out_err;

/* The next three values are read in as bytes,
@@ -364,12 +344,14 @@ nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
return status;

out_err:
+ nfs4_put_deviceid_node(be->be_device);
kfree(be);
out_free_list:
while (!list_empty(&extents)) {
be = list_first_entry(&extents, struct pnfs_block_extent,
be_list);
list_del(&be->be_list);
+ nfs4_put_deviceid_node(be->be_device);
kfree(be);
}
goto out;
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index 8999cfd..abc2e9e 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -38,7 +38,7 @@

#define NFSDBG_FACILITY NFSDBG_PNFS_LD

-static void dev_remove(struct net *net, dev_t dev)
+void bl_dm_remove(struct net *net, dev_t dev)
{
struct bl_pipe_msg bl_pipe_msg;
struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
@@ -82,27 +82,3 @@ static void dev_remove(struct net *net, dev_t dev)
out:
kfree(msg->data);
}
-
-/*
- * Release meta device
- */
-static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
-{
- dprintk("%s Releasing\n", __func__);
- nfs4_blkdev_put(bdev->bm_mdev);
- dev_remove(bdev->net, bdev->bm_mdev->bd_dev);
-}
-
-void bl_free_block_dev(struct pnfs_block_dev *bdev)
-{
- if (bdev) {
- if (bdev->bm_mdev) {
- dprintk("%s Removing DM device: %d:%d\n",
- __func__,
- MAJOR(bdev->bm_mdev->bd_dev),
- MINOR(bdev->bm_mdev->bd_dev));
- nfs4_blk_metadev_release(bdev);
- }
- kfree(bdev);
- }
-}
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index acdfcb9..59fa1d4 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -69,7 +69,7 @@ ext_can_merge(struct pnfs_block_extent *be1, struct pnfs_block_extent *be2)
{
if (be1->be_state != be2->be_state)
return false;
- if (be1->be_mdev != be2->be_mdev)
+ if (be1->be_device != be2->be_device)
return false;

if (be1->be_f_offset + be1->be_length != be2->be_f_offset)
@@ -94,6 +94,7 @@ ext_try_to_merge_left(struct rb_root *root, struct pnfs_block_extent *be)
if (left && ext_can_merge(left, be)) {
left->be_length += be->be_length;
rb_erase(&be->be_node, root);
+ nfs4_put_deviceid_node(be->be_device);
kfree(be);
return left;
}
@@ -109,6 +110,7 @@ ext_try_to_merge_right(struct rb_root *root, struct pnfs_block_extent *be)
if (right && ext_can_merge(be, right)) {
be->be_length += right->be_length;
rb_erase(&right->be_node, root);
+ nfs4_put_deviceid_node(right->be_device);
kfree(right);
}

@@ -133,16 +135,14 @@ __ext_tree_insert(struct rb_root *root,
be->be_v_offset = new->be_v_offset;
be->be_length += new->be_length;
be = ext_try_to_merge_left(root, be);
- kfree(new);
- return;
+ goto free_new;
}
p = &(*p)->rb_left;
} else if (new->be_f_offset >= ext_f_end(be)) {
if (merge_ok && ext_can_merge(be, new)) {
be->be_length += new->be_length;
be = ext_try_to_merge_right(root, be);
- kfree(new);
- return;
+ goto free_new;
}
p = &(*p)->rb_right;
} else {
@@ -152,6 +152,10 @@ __ext_tree_insert(struct rb_root *root,

rb_link_node(&new->be_node, parent, p);
rb_insert_color(&new->be_node, root);
+ return;
+free_new:
+ nfs4_put_deviceid_node(new->be_device);
+ kfree(new);
}

static int
@@ -196,9 +200,7 @@ __ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)
new->be_length = len2;
new->be_state = be->be_state;
new->be_tag = be->be_tag;
- new->be_mdev = be->be_mdev;
- memcpy(&new->be_devid, &be->be_devid,
- sizeof(struct nfs4_deviceid));
+ new->be_device = nfs4_get_deviceid(be->be_device);

__ext_tree_insert(root, new, true);
} else {
@@ -219,6 +221,7 @@ __ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)
struct pnfs_block_extent *next = ext_tree_next(be);

rb_erase(&be->be_node, root);
+ nfs4_put_deviceid_node(be->be_device);
kfree(be);
be = next;
}
@@ -263,6 +266,7 @@ retry:
__ext_tree_insert(root, new, true);
} else if (new->be_f_offset >= be->be_f_offset) {
if (ext_f_end(new) <= ext_f_end(be)) {
+ nfs4_put_deviceid_node(new->be_device);
kfree(new);
} else {
sector_t new_len = ext_f_end(new) - ext_f_end(be);
@@ -288,6 +292,7 @@ retry:
}

split->be_length = be->be_f_offset - split->be_f_offset;
+ split->be_device = nfs4_get_deviceid(new->be_device);
__ext_tree_insert(root, split, true);

new->be_f_offset += diff;
@@ -378,9 +383,7 @@ ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be,
new->be_length = orig_len - be->be_length;
new->be_state = be->be_state;
new->be_tag = be->be_tag;
-
- new->be_mdev = be->be_mdev;
- memcpy(&new->be_devid, &be->be_devid, sizeof(struct nfs4_deviceid));
+ new->be_device = nfs4_get_deviceid(be->be_device);

dprintk("%s: got 0x%lx:0x%lx!\n",
__func__, be->be_f_offset, ext_f_end(be));
@@ -498,7 +501,7 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
continue;
}

- p = xdr_encode_opaque_fixed(p, be->be_devid.data,
+ p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
NFS4_DEVICEID4_SIZE);
p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
--
1.9.1