2010-06-04 18:16:02

by Sorin Faibish

[permalink] [raw]
Subject: [pnfs][PATCH 2/3] pnfs-blocklayout client: remove device creation or remove from kernel


This patch changes the client decodes pnfs_block_deviceaddr4 (draft-8)
which is XDR encoded in dev->dev_addr_buf. Maps deviceid returned by
the server to constructed block_device list of comples volumes. It also
removes device creation from the kernel and will be done in user space
by the discovery daemon.

Signed-off-by: Sorin Faibish <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 19 +-
fs/nfs/blocklayout/blocklayout.h | 24 ++-
fs/nfs/blocklayout/blocklayoutdev.c | 508
++++-------------------------------
fs/nfs/blocklayout/blocklayoutdm.c | 296 +++------------------
4 files changed, 110 insertions(+), 737 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c
b/fs/nfs/blocklayout/blocklayout.c
index db008e6..123fa2f 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -687,8 +687,7 @@ static void free_blk_mountid(struct block_mount_id
*mid)
*/
static struct pnfs_block_dev *
nfs4_blk_get_deviceinfo(struct super_block *sb, struct nfs_fh *fh,
- struct pnfs_deviceid *d_id,
- struct list_head *sdlist)
+ struct pnfs_deviceid *d_id)
{
struct pnfs_device *dev;
struct pnfs_block_dev *rv = NULL;
@@ -739,7 +738,7 @@ nfs4_blk_get_deviceinfo(struct super_block *sb, struct
nfs_fh *fh,
if (rc)
goto out_free;

- rv = nfs4_blk_decode_device(sb, dev, sdlist);
+ rv = nfs4_blk_decode_device(sb, dev);
out_free:
if (used_pages > 1 && dev->area != NULL)
vunmap(dev->area);
@@ -760,7 +759,6 @@ bl_initialize_mountpoint(struct super_block *sb,
struct nfs_fh *fh)
struct pnfs_mount_type *mtype = NULL;
struct pnfs_devicelist *dlist = NULL;
struct pnfs_block_dev *bdev;
- LIST_HEAD(scsi_disklist);
int status, i;

dprintk("%s enter\n", __func__);
@@ -781,13 +779,6 @@ bl_initialize_mountpoint(struct super_block *sb,
struct nfs_fh *fh)
goto out_error;
mtype->mountid = (void *)b_mt_id;

- /* Construct a list of all visible scsi disks that have not been
- * claimed.
- */
- status = nfs4_blk_create_scsi_disk_list(&scsi_disklist);
- if (status < 0)
- goto out_error;
-
dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_KERNEL);
if (!dlist)
goto out_error;
@@ -806,8 +797,7 @@ bl_initialize_mountpoint(struct super_block *sb,
struct nfs_fh *fh)
*/
for (i = 0; i < dlist->num_devs; i++) {
bdev = nfs4_blk_get_deviceinfo(sb, fh,
- &dlist->dev_id[i],
- &scsi_disklist);
+ &dlist->dev_id[i]);
if (!bdev)
goto out_error;
spin_lock(&b_mt_id->bm_lock);
@@ -819,7 +809,6 @@ bl_initialize_mountpoint(struct super_block *sb,
struct nfs_fh *fh)

out_return:
kfree(dlist);
- nfs4_blk_destroy_disk_list(&scsi_disklist);
return mtype;

out_error:
@@ -1181,6 +1170,7 @@ static int __init nfs4blocklayout_init(void)
dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);

pnfs_callback_ops = pnfs_register_layoutdriver(&blocklayout_type);
+ bl_pipe_init();
return 0;
}

@@ -1190,6 +1180,7 @@ static void __exit nfs4blocklayout_exit(void)
__func__);

pnfs_unregister_layoutdriver(&blocklayout_type);
+ bl_pipe_exit();
}

module_init(nfs4blocklayout_init);
diff --git a/fs/nfs/blocklayout/blocklayout.h
b/fs/nfs/blocklayout/blocklayout.h
index ca36e61..3cfa149 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -58,7 +58,6 @@ struct block_mount_id {

struct pnfs_block_dev {
struct list_head bm_node;
- char *bm_mdevname; /* meta device name */
struct pnfs_deviceid bm_mdevid; /* associated devid */
struct block_device *bm_mdev; /* meta device itself */
};
@@ -241,16 +240,10 @@ uint32_t *blk_overflow(uint32_t *p, uint32_t *end,
size_t nbytes);
struct block_device *nfs4_blkdev_get(dev_t dev);
int nfs4_blkdev_put(struct block_device *bdev);
struct pnfs_block_dev *nfs4_blk_decode_device(struct super_block *sb,
- struct pnfs_device *dev,
- struct list_head *sdlist);
+ struct pnfs_device *dev);
int nfs4_blk_process_layoutget(struct pnfs_layout_type *lo,
struct nfs4_pnfs_layoutget_res *lgr);
-int nfs4_blk_create_scsi_disk_list(struct list_head *);
-void nfs4_blk_destroy_disk_list(struct list_head *);
/* blocklayoutdm.c */
-struct pnfs_block_dev *nfs4_blk_init_metadev(struct super_block *sb,
- struct pnfs_device *dev);
-int nfs4_blk_flatten(struct pnfs_blk_volume *, int, struct pnfs_block_dev
*);
void free_block_dev(struct pnfs_block_dev *bdev);
/* extents.c */
struct pnfs_block_extent *
@@ -273,4 +266,19 @@ int add_and_merge_extent(struct pnfs_block_layout *bl,
struct pnfs_block_extent *new);
int mark_for_commit(struct pnfs_block_extent *be,
sector_t offset, sector_t length);
+
+#include <linux/sunrpc/simple_rpc_pipefs.h>
+
+extern pipefs_list_t bl_device_list;
+extern struct dentry *bl_device_pipe;
+
+int bl_pipe_init(void);
+void bl_pipe_exit(void);
+
+#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */
+#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/
+#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */
+#define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds
*/
+#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */
+
#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c
b/fs/nfs/blocklayout/blocklayoutdev.c
index ac5c117..c42108a 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -40,8 +40,6 @@

#define NFSDBG_FACILITY NFSDBG_PNFS_LD

-#define MAX_VOLS 256 /* Maximum number of SCSI disks. Totally
arbitrary */
-
uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes)
{
uint32_t *q = p + XDR_QUADLEN(nbytes);
@@ -78,480 +76,74 @@ int nfs4_blkdev_put(struct block_device *bdev)
return blkdev_put(bdev, FMODE_READ);
}

-/* Add a visible, claimed (by us!) scsi disk to the device list */
-static int alloc_add_disk(struct block_device *blk_dev, struct list_head
*dlist)
-{
- struct visible_block_device *vis_dev;
-
- dprintk("%s enter\n", __func__);
- vis_dev = kmalloc(sizeof(struct visible_block_device), GFP_KERNEL);
- if (!vis_dev) {
- dprintk("%s nfs4_get_sig failed\n", __func__);
- return -ENOMEM;
- }
- vis_dev->vi_bdev = blk_dev;
- vis_dev->vi_mapped = 0;
- vis_dev->vi_put_done = 0;
- list_add(&vis_dev->vi_node, dlist);
- return 0;
-}
-
-/* Walk the list of scsi_devices. Add disks that can be opened and claimed
- * to the device list
- */
-static int
-nfs4_blk_add_scsi_disk(struct Scsi_Host *shost,
- int index, struct list_head *dlist)
-{
- static char *claim_ptr = "I belong to pnfs block driver";
- struct block_device *bdev;
- struct gendisk *gd;
- struct scsi_device *sdev;
- unsigned int major, minor, ret = 0;
- dev_t dev;
-
- dprintk("%s enter \n", __func__);
- if (index >= MAX_VOLS) {
- dprintk("%s MAX_VOLS hit\n", __func__);
- return -ENOSPC;
- }
- dprintk("%s 1 \n", __func__);
- index--;
- shost_for_each_device(sdev, shost) {
- dprintk("%s 2\n", __func__);
- /* Need to do this check before bumping index */
- if (sdev->type != TYPE_DISK)
- continue;
- dprintk("%s 3 index %d \n", __func__, index);
- if (++index >= MAX_VOLS) {
- scsi_device_put(sdev);
- break;
- }
- major = (!(index >> 4) ? SCSI_DISK0_MAJOR :
- SCSI_DISK1_MAJOR-1 + (index >> 4));
- minor = ((index << 4) & 255);
-
- dprintk("%s SCSI device %d:%d \n", __func__, major, minor);
-
- dev = MKDEV(major, minor);
- bdev = nfs4_blkdev_get(dev);
- if (!bdev) {
- dprintk("%s: failed to open device %d:%d\n",
- __func__, major, minor);
- continue;
- }
- gd = bdev->bd_disk;
-
- dprintk("%s 4\n", __func__);
-
- if (bd_claim(bdev, claim_ptr)) {
- dprintk("%s: failed to claim device %d:%d\n",
- __func__, gd->major, gd->first_minor);
- blkdev_put(bdev, FMODE_READ);
- continue;
- }
-
- ret = alloc_add_disk(bdev, dlist);
- if (ret < 0)
- goto out_err;
- dprintk("%s ADDED DEVICE capacity %ld, bd_block_size %d\n",
- __func__,
- (unsigned long)get_capacity(gd),
- bdev->bd_block_size);
-
- }
- index++;
- dprintk("%s returns index %d \n", __func__, index);
- return index;
-
-out_err:
- dprintk("%s Can't add disk to list. ERROR: %d\n", __func__, ret);
- nfs4_blkdev_put(bdev);
- return ret;
-}
-
-/* Destroy the temporary scsi disk list */
-void nfs4_blk_destroy_disk_list(struct list_head *dlist)
-{
- struct visible_block_device *vis_dev;
-
- dprintk("%s enter\n", __func__);
- while (!list_empty(dlist)) {
- vis_dev = list_first_entry(dlist, struct visible_block_device,
- vi_node);
- dprintk("%s removing device %d:%d\n", __func__,
- MAJOR(vis_dev->vi_bdev->bd_dev),
- MINOR(vis_dev->vi_bdev->bd_dev));
- list_del(&vis_dev->vi_node);
- if (!vis_dev->vi_put_done)
- nfs4_blkdev_put(vis_dev->vi_bdev);
- kfree(vis_dev);
- }
-}
-
-struct nfs4_blk_scsi_disk_list_ctl {
- struct list_head *dlist;
- int index;
-};
-
-static int nfs4_blk_iter_scsi_disk_list(struct device *cdev, void *data)
-{
- struct Scsi_Host *shost;
- struct nfs4_blk_scsi_disk_list_ctl *lc = data;
- int ret;
-
- dprintk("%s enter\n", __func__);
- shost = class_to_shost(cdev);
- ret = nfs4_blk_add_scsi_disk(shost, lc->index, lc->dlist);
- dprintk("%s 1 ret %d\n", __func__, ret);
- if (ret >= 0) {
- lc->index = ret;
- ret = 0;
- }
- return ret;
-}
-
-/*
- * Create a temporary list of all SCSI disks host can see, and that have
not
- * yet been claimed.
- * shost_class: list of all registered scsi_hosts
- * returns -errno on error, and #of devices found on success.
- * XXX Loosely emulate scsi_host_lookup from scsi/host.c
-*/
-int nfs4_blk_create_scsi_disk_list(struct list_head *dlist)
-{
- struct nfs4_blk_scsi_disk_list_ctl lc = {
- .dlist = dlist,
- .index = 0,
- };
-
- dprintk("%s enter\n", __func__);
- return class_for_each_device(&shost_class, NULL,
- &lc, nfs4_blk_iter_scsi_disk_list);
-}
-/* We are given an array of XDR encoded array indices, each of which
should
- * refer to a previously decoded device. Translate into a list of
pointers
- * to the appropriate pnfs_blk_volume's.
- */
-static int set_vol_array(uint32_t **pp, uint32_t *end,
- struct pnfs_blk_volume *vols, int working)
-{
- int i, index;
- uint32_t *p = *pp;
- struct pnfs_blk_volume **array = vols[working].bv_vols;
- for (i = 0; i < vols[working].bv_vol_n; i++) {
- BLK_READBUF(p, end, 4);
- READ32(index);
- if ((index < 0) || (index >= working)) {
- dprintk("%s Index %i out of expected range\n",
- __func__, index);
- goto out_err;
- }
- array[i] = &vols[index];
- }
- *pp = p;
- return 0;
- out_err:
- return -EIO;
-}
-
-static uint64_t sum_subvolume_sizes(struct pnfs_blk_volume *vol)
-{
- int i;
- uint64_t sum = 0;
- for (i = 0; i < vol->bv_vol_n; i++)
- sum += vol->bv_vols[i]->bv_size;
- return sum;
-}
-
-static int decode_blk_signature(uint32_t **pp, uint32_t *end,
- struct pnfs_blk_sig *sig)
-{
- int i, tmp;
- uint32_t *p = *pp;
-
- BLK_READBUF(p, end, 4);
- READ32(sig->si_num_comps);
- if (sig->si_num_comps == 0) {
- dprintk("%s 0 components in sig\n", __func__);
- goto out_err;
- }
- if (sig->si_num_comps >= PNFS_BLOCK_MAX_SIG_COMP) {
- dprintk("number of sig comps %i >= PNFS_BLOCK_MAX_SIG_COMP\n",
- sig->si_num_comps);
- goto out_err;
- }
- for (i = 0; i < sig->si_num_comps; i++) {
- BLK_READBUF(p, end, 12);
- READ64(sig->si_comps[i].bs_offset);
- READ32(tmp);
- sig->si_comps[i].bs_length = tmp;
- BLK_READBUF(p, end, tmp);
- /* Note we rely here on fact that sig is used immediately
- * for mapping, then thrown away.
- */
- sig->si_comps[i].bs_string = (char *)p;
- p += XDR_QUADLEN(tmp);
- }
- *pp = p;
- return 0;
- out_err:
- return -EIO;
-}
-
-/* Translate a signature component into a block and offset. */
-static void get_sector(struct block_device *bdev,
- struct pnfs_blk_sig_comp *comp,
- sector_t *block,
- uint32_t *offset_in_block)
-{
- int64_t use_offset = comp->bs_offset;
- unsigned int blkshift = blksize_bits(block_size(bdev));
-
- dprintk("%s enter\n", __func__);
- if (use_offset < 0)
- use_offset += (get_capacity(bdev->bd_disk) << 9);
- *block = use_offset >> blkshift;
- *offset_in_block = use_offset - (*block << blkshift);
-
- dprintk("%s block %llu offset_in_block %u\n",
- __func__, (u64)*block, *offset_in_block);
- return;
-}
-
-/*
- * All signatures in sig must be found on bdev for verification.
- * Returns True if sig matches, False otherwise.
- *
- * STUB - signature crossing a block boundary will cause problems.
- */
-static int verify_sig(struct block_device *bdev, struct pnfs_blk_sig *sig)
-{
- sector_t block = 0;
- struct pnfs_blk_sig_comp *comp;
- struct buffer_head *bh = NULL;
- uint32_t offset_in_block = 0;
- char *ptr;
- int i;
-
- dprintk("%s enter. bd_disk->capacity %ld, bd_block_size %d\n",
- __func__, (unsigned long)get_capacity(bdev->bd_disk),
- bdev->bd_block_size);
- for (i = 0; i < sig->si_num_comps; i++) {
- comp = &sig->si_comps[i];
- dprintk("%s comp->bs_offset %lld, length=%d\n", __func__,
- comp->bs_offset, comp->bs_length);
- get_sector(bdev, comp, &block, &offset_in_block);
- bh = __bread(bdev, block, bdev->bd_block_size);
- if (!bh)
- goto out_err;
- ptr = (char *)bh->b_data + offset_in_block;
- if (memcmp(ptr, comp->bs_string, comp->bs_length))
- goto out_err;
- brelse(bh);
- }
- dprintk("%s Complete Match Found\n", __func__);
- return 1;
-
-out_err:
- brelse(bh);
- dprintk("%s No Match\n", __func__);
- return 0;
-}
-
-/*
- * map_sig_to_device()
- * Given a signature, walk the list of visible scsi disks searching for
- * a match. Returns True if mapping was done, False otherwise.
- *
- * While we're at it, fill in the vol->bv_size.
- */
-/* XXX FRED - use normal 0=success status */
-static int map_sig_to_device(struct pnfs_blk_sig *sig,
- struct pnfs_blk_volume *vol,
- struct list_head *sdlist)
-{
- int mapped = 0;
- struct visible_block_device *vis_dev;
-
- list_for_each_entry(vis_dev, sdlist, vi_node) {
- if (vis_dev->vi_mapped)
- continue;
- mapped = verify_sig(vis_dev->vi_bdev, sig);
- if (mapped) {
- vol->bv_dev = vis_dev->vi_bdev->bd_dev;
- vol->bv_size = get_capacity(vis_dev->vi_bdev->bd_disk);
- vis_dev->vi_mapped = 1;
- /* XXX FRED check this */
- /* We no longer need to scan this device, and
- * we need to "put" it before creating metadevice.
- */
- if (!vis_dev->vi_put_done) {
- vis_dev->vi_put_done = 1;
- nfs4_blkdev_put(vis_dev->vi_bdev);
- }
- break;
- }
- }
- return mapped;
-}
-
-/* XDR decodes pnfs_block_volume4 structure */
-static int decode_blk_volume(uint32_t **pp, uint32_t *end,
- struct pnfs_blk_volume *vols, int i,
- struct list_head *sdlist, int *array_cnt)
-{
- int status = 0;
- struct pnfs_blk_sig sig;
- uint32_t *p = *pp;
- uint64_t tmp; /* Used by READ_SECTOR */
- struct pnfs_blk_volume *vol = &vols[i];
- int j;
- u64 tmp_size;
-
- BLK_READBUF(p, end, 4);
- READ32(vol->bv_type);
- dprintk("%s vol->bv_type = %i\n", __func__, vol->bv_type);
- switch (vol->bv_type) {
- case PNFS_BLOCK_VOLUME_SIMPLE:
- *array_cnt = 0;
- status = decode_blk_signature(&p, end, &sig);
- if (status)
- return status;
- status = map_sig_to_device(&sig, vol, sdlist);
- if (!status) {
- dprintk("Could not find disk for device\n");
- return -EIO;
- }
- status = 0;
- dprintk("%s Set Simple vol to dev %d:%d, size %llu\n",
- __func__,
- MAJOR(vol->bv_dev),
- MINOR(vol->bv_dev),
- (u64)vol->bv_size);
- break;
- case PNFS_BLOCK_VOLUME_SLICE:
- BLK_READBUF(p, end, 16);
- READ_SECTOR(vol->bv_offset);
- READ_SECTOR(vol->bv_size);
- *array_cnt = vol->bv_vol_n = 1;
- status = set_vol_array(&p, end, vols, i);
- break;
- case PNFS_BLOCK_VOLUME_STRIPE:
- BLK_READBUF(p, end, 8);
- READ_SECTOR(vol->bv_stripe_unit);
- BLK_READBUF(p, end, 4);
- READ32(vol->bv_vol_n);
- if (!vol->bv_vol_n)
- return -EIO;
- *array_cnt = vol->bv_vol_n;
- status = set_vol_array(&p, end, vols, i);
- if (status)
- return status;
- /* Ensure all subvolumes are the same size */
- for (j = 1; j < vol->bv_vol_n; j++) {
- if (vol->bv_vols[j]->bv_size !=
- vol->bv_vols[0]->bv_size) {
- dprintk("%s varying subvol size\n", __func__);
- return -EIO;
- }
- }
- /* Make sure total size only includes addressable areas */
- tmp_size = vol->bv_vols[0]->bv_size;
- do_div(tmp_size, (u32)vol->bv_stripe_unit);
- vol->bv_size = vol->bv_vol_n * tmp_size * vol->bv_stripe_unit;
- dprintk("%s Set Stripe vol to size %llu\n",
- __func__, (u64)vol->bv_size);
- break;
- case PNFS_BLOCK_VOLUME_CONCAT:
- BLK_READBUF(p, end, 4);
- READ32(vol->bv_vol_n);
- if (!vol->bv_vol_n)
- return -EIO;
- *array_cnt = vol->bv_vol_n;
- status = set_vol_array(&p, end, vols, i);
- if (status)
- return status;
- vol->bv_size = sum_subvolume_sizes(vol);
- dprintk("%s Set Concat vol to size %llu\n",
- __func__, (u64)vol->bv_size);
- break;
- default:
- dprintk("Unknown volume type %i\n", vol->bv_type);
- out_err:
- return -EIO;
- }
- *pp = p;
- return status;
-}
-
/* Decodes pnfs_block_deviceaddr4 (draft-8) which is XDR encoded
* in dev->dev_addr_buf.
*/
struct pnfs_block_dev *
nfs4_blk_decode_device(struct super_block *sb,
- struct pnfs_device *dev,
- struct list_head *sdlist)
+ struct pnfs_device *dev)
{
- int num_vols, i, status, count;
- struct pnfs_blk_volume *vols, **arrays, **arrays_ptr;
- uint32_t *p = dev->area;
- uint32_t *end = (uint32_t *) ((char *) p + dev->mincount);
struct pnfs_block_dev *rv = NULL;
- struct visible_block_device *vis_dev;
+ struct block_device *bd = NULL;
+ pipefs_hdr_t *msg = NULL, *reply = NULL;
+ uint32_t major, minor;

dprintk("%s enter\n", __func__);

- READ32(num_vols);
- dprintk("%s num_vols = %i\n", __func__, num_vols);
-
- vols = kmalloc(sizeof(struct pnfs_blk_volume) * num_vols, GFP_KERNEL);
- if (!vols)
+ if (IS_ERR(bl_device_pipe))
return NULL;
- /* Each volume in vols array needs its own array. Save time by
- * allocating them all in one large hunk. Because each volume
- * array can only reference previous volumes, and because once
- * a concat or stripe references a volume, it may never be
- * referenced again, the volume arrays are guaranteed to fit
- * in the suprisingly small space allocated.
- */
- arrays = kmalloc(sizeof(struct pnfs_blk_volume *) * num_vols * 2,
- GFP_KERNEL);
- if (!arrays)
- goto out;
- arrays_ptr = arrays;

- list_for_each_entry(vis_dev, sdlist, vi_node) {
- /* Wipe crud left from parsing previous device */
- vis_dev->vi_mapped = 0;
- }
- for (i = 0; i < num_vols; i++) {
- vols[i].bv_vols = arrays_ptr;
- status = decode_blk_volume(&p, end, vols, i, sdlist, &count);
- if (status)
- goto out;
- arrays_ptr += count;
+ msg = pipefs_alloc_init_msg(0, BL_DEVICE_MOUNT, 0, dev->area,
+ dev->mincount);
+ if (IS_ERR(msg)) {
+ dprintk("ERROR: couldn't make pipefs message.\n");
+ goto out_err;
}
+ msg->msgid = (u32)(&msg);
+ msg->status = BL_DEVICE_REQUEST_INIT;

- /* Check that we have used up opaque */
- if (p != end) {
- dprintk("Undecoded cruft at end of opaque\n");
- goto out;
+ reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
+ &bl_device_list, 0, 0);
+
+ if (IS_ERR(reply)) {
+ dprintk("ERROR: upcall_waitreply failed\n");
+ goto out_err;
+ }
+ if (reply->status != BL_DEVICE_REQUEST_PROC) {
+ dprintk("%s : reply status is %d\n", __func__, reply->status);
+ goto out_err;
+ }
+ memcpy(&major, (uint32_t *)(payload_of(reply)), sizeof(uint32_t));
+ memcpy(&minor, (uint32_t *)(payload_of(reply) + sizeof(uint32_t)),
+ sizeof(uint32_t));
+ bd = nfs4_blkdev_get(MKDEV(major, minor));
+ if (IS_ERR(bd)) {
+ dprintk("%s failed to open device : %ld\n",
+ __func__, PTR_ERR(bd));
+ goto out_err;
}

- /* Now use info in vols to create the meta device */
- rv = nfs4_blk_init_metadev(sb, dev);
+ rv = kzalloc(sizeof(*rv), GFP_KERNEL);
if (!rv)
- goto out;
- status = nfs4_blk_flatten(vols, num_vols, rv);
- if (status) {
- free_block_dev(rv);
- rv = NULL;
- }
- out:
- kfree(arrays);
- kfree(vols);
+ goto out_err;
+
+ rv->bm_mdev = bd;
+ memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct pnfs_deviceid));
+ dprintk("%s Created device %s with bd_block_size %u\n",
+ __func__,
+ bd->bd_disk->disk_name,
+ bd->bd_block_size);
+ kfree(reply);
+ kfree(msg);
return rv;
+
+out_err:
+ kfree(rv);
+ if (!IS_ERR(reply))
+ kfree(reply);
+ if (!IS_ERR(msg))
+ kfree(msg);
+ return NULL;
}

/* Map deviceid returned by the server to constructed block_device */
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c
b/fs/nfs/blocklayout/blocklayoutdm.c
index 4bff748..f094bc1 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -31,6 +31,7 @@
*/

#include <linux/genhd.h> /* gendisk - used in a dprintk*/
+#include <linux/sched.h>

#include "blocklayout.h"

@@ -45,52 +46,44 @@
#define roundup8(x) (((x)+7) & ~7)
#define sizeof8(x) roundup8(sizeof(x))

-/* Given x>=1, return smallest n such that 2**n >= x */
-static unsigned long find_order(int x)
+static int dev_remove(dev_t dev)
{
- unsigned long rv = 0;
- for (x--; x; x >>= 1)
- rv++;
- return rv;
-}
-
-/* Debugging aid */
-static void print_extent(u64 meta_offset, dev_t disk,
- u64 disk_offset, u64 length)
-{
- dprintk("%lli:, %d:%d %lli, %lli\n", meta_offset, MAJOR(disk),
- MINOR(disk), disk_offset, length);
-}
-static int dev_create(const char *name, dev_t *dev)
-{
- struct dm_ioctl ctrl;
- int rv;
-
- memset(&ctrl, 0, sizeof(ctrl));
- strncpy(ctrl.name, name, DM_NAME_LEN-1);
- rv = dm_dev_create(&ctrl); /* XXX - need to pull data out of ctrl */
- dprintk("Tried to create %s, got %i\n", name, rv);
- if (!rv) {
- *dev = huge_decode_dev(ctrl.dev);
- dprintk("dev = (%i, %i)\n", MAJOR(*dev), MINOR(*dev));
+ int ret = 1;
+ pipefs_hdr_t *msg = NULL, *reply = NULL;
+ uint64_t bl_dev;
+ uint32_t major = MAJOR(dev), minor = MINOR(dev);
+
+ dprintk("Entering %s \n", __func__);
+
+ if (IS_ERR(bl_device_pipe))
+ return ret;
+
+ memcpy((void *)&bl_dev, &major, sizeof(uint32_t));
+ memcpy((void *)&bl_dev + sizeof(uint32_t), &minor, sizeof(uint32_t));
+ msg = pipefs_alloc_init_msg(0, BL_DEVICE_UMOUNT, 0, (void *)&bl_dev,
+ sizeof(uint64_t));
+ if (IS_ERR(msg)) {
+ dprintk("ERROR: couldn't make pipefs message.\n");
+ goto out;
+ }
+ msg->msgid = (uint32_t)(&msg);
+ msg->status = BL_DEVICE_REQUEST_INIT;
+
+ reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
+ &bl_device_list, 0, 0);
+ if (IS_ERR(reply)) {
+ dprintk("ERROR: upcall_waitreply failed\n");
+ goto out;
}
- return rv;
-}
-
-static int dev_remove(const char *name)
-{
- struct dm_ioctl ctrl;
- memset(&ctrl, 0, sizeof(ctrl));
- strncpy(ctrl.name, name, DM_NAME_LEN-1);
- return dm_dev_remove(&ctrl);
-}

-static int dev_resume(const char *name)
-{
- struct dm_ioctl ctrl;
- memset(&ctrl, 0, sizeof(ctrl));
- strncpy(ctrl.name, name, DM_NAME_LEN-1);
- return dm_do_resume(&ctrl);
+ if (reply->status == BL_DEVICE_REQUEST_PROC)
+ ret = 0; /*TODO: what to return*/
+out:
+ if (!IS_ERR(reply))
+ kfree(reply);
+ if (!IS_ERR(msg))
+ kfree(msg);
+ return ret;
}

/*
@@ -100,12 +93,12 @@ static int nfs4_blk_metadev_release(struct
pnfs_block_dev *bdev)
{
int rv;

- dprintk("%s Releasing %s\n", __func__, bdev->bm_mdevname);
+ dprintk("%s Releasing \n", __func__);
/* XXX Check return? */
rv = nfs4_blkdev_put(bdev->bm_mdev);
dprintk("%s nfs4_blkdev_put returns %d\n", __func__, rv);

- rv = dev_remove(bdev->bm_mdevname);
+ rv = dev_remove(bdev->bm_mdev->bd_dev);
dprintk("%s Returns %d\n", __func__, rv);
return rv;
}
@@ -114,9 +107,8 @@ void free_block_dev(struct pnfs_block_dev *bdev)
{
if (bdev) {
if (bdev->bm_mdev) {
- dprintk("%s Removing DM device: %s %d:%d\n",
+ dprintk("%s Removing DM device: %d:%d\n",
__func__,
- bdev->bm_mdevname,
MAJOR(bdev->bm_mdev->bd_dev),
MINOR(bdev->bm_mdev->bd_dev));
/* XXX Check status ?? */
@@ -125,213 +117,3 @@ void free_block_dev(struct pnfs_block_dev *bdev)
kfree(bdev);
}
}
-
-/*
- * Create meta device. Keep it open to use for I/O.
- */
-struct pnfs_block_dev *nfs4_blk_init_metadev(struct super_block *sb,
- struct pnfs_device *dev)
-{
- static uint64_t dev_count; /* STUB used for device names */
- struct block_device *bd;
- dev_t meta_dev;
- struct pnfs_block_dev *rv;
- int status;
-
- dprintk("%s enter\n", __func__);
-
- rv = kmalloc(sizeof(*rv) + 32, GFP_KERNEL);
- if (!rv)
- return NULL;
- rv->bm_mdevname = (char *)rv + sizeof(*rv);
- sprintf(rv->bm_mdevname, "FRED_%llu", dev_count++);
- status = dev_create(rv->bm_mdevname, &meta_dev);
- if (status)
- goto out_err;
- bd = nfs4_blkdev_get(meta_dev);
- if (!bd)
- goto out_err;
- if (bd_claim(bd, sb)) {
- dprintk("%s: failed to claim device %d:%d\n",
- __func__,
- MAJOR(meta_dev),
- MINOR(meta_dev));
- blkdev_put(bd, FMODE_READ);
- goto out_err;
- }
-
- rv->bm_mdev = bd;
- memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct pnfs_deviceid));
- dprintk("%s Created device %s named %s with bd_block_size %u\n",
- __func__,
- bd->bd_disk->disk_name,
- rv->bm_mdevname,
- bd->bd_block_size);
- return rv;
-
- out_err:
- kfree(rv);
- return NULL;
-}
-
-/*
- * Given a vol_offset into root, returns the disk and disk_offset it
- * corresponds to, as well as the length of the contiguous segment
thereafter.
- * All offsets/lengths are in 512-byte sectors.
- */
-static int nfs4_blk_resolve(int root, struct pnfs_blk_volume *vols,
- u64 vol_offset, dev_t *disk, u64 *disk_offset,
- u64 *length)
-{
- struct pnfs_blk_volume *node;
- u64 node_offset;
-
- /* Walk down device tree until we hit a leaf node (VOLUME_SIMPLE) */
- node = &vols[root];
- node_offset = vol_offset;
- *length = node->bv_size;
- while (1) {
- dprintk("offset=%lli, length=%lli\n",
- node_offset, *length);
- if (node_offset > node->bv_size)
- return -EIO;
- switch (node->bv_type) {
- case PNFS_BLOCK_VOLUME_SIMPLE:
- *disk = node->bv_dev;
- dprintk("%s VOLUME_SIMPLE: node->bv_dev %d:%d\n",
- __func__,
- MAJOR(node->bv_dev),
- MINOR(node->bv_dev));
- *disk_offset = node_offset;
- *length = min(*length, node->bv_size - node_offset);
- return 0;
- case PNFS_BLOCK_VOLUME_SLICE:
- dprintk("%s VOLUME_SLICE:\n", __func__);
- *length = min(*length, node->bv_size - node_offset);
- node_offset += node->bv_offset;
- node = node->bv_vols[0];
- break;
- case PNFS_BLOCK_VOLUME_CONCAT: {
- u64 next = 0, sum = 0;
- int i;
- dprintk("%s VOLUME_CONCAT:\n", __func__);
- for (i = 0; i < node->bv_vol_n; i++) {
- next = sum + node->bv_vols[i]->bv_size;
- if (node_offset < next)
- break;
- sum = next;
- }
- *length = min(*length, next - node_offset);
- node_offset -= sum;
- node = node->bv_vols[i];
- }
- break;
- case PNFS_BLOCK_VOLUME_STRIPE: {
- u64 global_s_no;
- u64 stripe_pos;
- u64 local_s_no;
- u64 disk_number;
-
- dprintk("%s VOLUME_STRIPE:\n", __func__);
- global_s_no = node_offset;
- /* BUG - note this assumes stripe_unit <= 2**32 */
- stripe_pos = (u64) do_div(global_s_no,
- (u32)node->bv_stripe_unit);
- local_s_no = global_s_no;
- disk_number = (u64) do_div(local_s_no,
- (u32) node->bv_vol_n);
- *length = min(*length,
- node->bv_stripe_unit - stripe_pos);
- node_offset = local_s_no * node->bv_stripe_unit +
- stripe_pos;
- node = node->bv_vols[disk_number];
- }
- break;
- default:
- return -EIO;
- }
- }
-}
-
-/*
- * Create an LVM dm device table that represents the volume topology
returned
- * by GETDEVICELIST or GETDEVICEINFO.
- *
- * vols: topology with VOLUME_SIMPLEs mapped to visable scsi disks.
- * size: number of volumes in vols.
- */
-int nfs4_blk_flatten(struct pnfs_blk_volume *vols, int size,
- struct pnfs_block_dev *bdev)
-{
- u64 meta_offset = 0;
- u64 meta_size = vols[size-1].bv_size;
- dev_t disk;
- u64 disk_offset, len;
- int status = 0, count = 0, pages_needed;
- struct dm_ioctl *ctl;
- struct dm_target_spec *spec;
- char *args = NULL;
- unsigned long p;
-
- dprintk("%s enter. mdevname %s number of volumes %d\n", __func__,
- bdev->bm_mdevname, size);
-
- /* We need to reserve memory to store segments, so need to count
- * segments. This means we resolve twice, basically throwing away
- * all info from first run apart from the count. Seems like
- * there should be a better way.
- */
- for (meta_offset = 0; meta_offset < meta_size; meta_offset += len) {
- status = nfs4_blk_resolve(size-1, vols, meta_offset, &disk,
- &disk_offset, &len);
- /* TODO Check status */
- count += 1;
- }
-
- dprintk("%s: Have %i segments\n", __func__, count);
- pages_needed = ((count + SPEC_HEADER_ADJUST) / SPECS_PER_PAGE) + 1;
- dprintk("%s: Need %i pages\n", __func__, pages_needed);
- p = __get_free_pages(GFP_KERNEL, find_order(pages_needed));
- if (!p)
- return -ENOMEM;
- /* A dm_ioctl is placed at the beginning, followed by a series of
- * (dm_target_spec, argument string) pairs.
- */
- ctl = (struct dm_ioctl *) p;
- spec = (struct dm_target_spec *) (p + sizeof8(*ctl));
- memset(ctl, 0, sizeof(*ctl));
- ctl->data_start = (char *) spec - (char *) ctl;
- ctl->target_count = count;
- strncpy(ctl->name, bdev->bm_mdevname, DM_NAME_LEN);
-
- dprintk("%s ctl->name %s\n", __func__, ctl->name);
- for (meta_offset = 0; meta_offset < meta_size; meta_offset += len) {
- status = nfs4_blk_resolve(size-1, vols, meta_offset, &disk,
- &disk_offset, &len);
- if (!len)
- break;
- /* TODO Check status */
- print_extent(meta_offset, disk, disk_offset, len);
- spec->sector_start = meta_offset;
- spec->length = len;
- spec->status = 0;
- strcpy(spec->target_type, "linear");
- args = (char *) (spec + 1);
- sprintf(args, "%i:%i %lli",
- MAJOR(disk), MINOR(disk), disk_offset);
- dprintk("%s args %s\n", __func__, args);
- spec->next = roundup8(sizeof(*spec) + strlen(args) + 1);
- spec = (struct dm_target_spec *) (((char *) spec) + spec->next);
- }
- ctl->data_size = (char *) spec - (char *) ctl;
-
- status = dm_table_load(ctl, ctl->data_size);
- dprintk("%s dm_table_load returns %d\n", __func__, status);
-
- dev_resume(bdev->bm_mdevname);
-
- free_pages(p, find_order(pages_needed));
- dprintk("%s returns %d\n", __func__, status);
- return status;
-}
-
--
1.6.6.1