We were silently accepting dense layouts but using sparse fh selection
and flawed offset calculations.
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4filelayout.c | 72 ++++++++++++++++++++-----------------------
fs/nfs/nfs4filelayout.h | 11 +-----
fs/nfs/nfs4filelayoutdev.c | 38 ++++++++++++++++-------
3 files changed, 62 insertions(+), 59 deletions(-)
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 8a83c0d..57a0010 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -100,36 +100,26 @@ filelayout_uninitialize_mountpoint(struct nfs_server *nfss)
* offset of the file on the dserver based on whether the
* layout type is STRIPE_DENSE or STRIPE_SPARSE
*/
-loff_t
-filelayout_get_dserver_offset(loff_t offset,
- struct nfs4_filelayout_segment *layout)
+static loff_t
+filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
{
- if (!layout)
- return offset;
+ struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
- switch (layout->stripe_type) {
+ switch (flseg->stripe_type) {
case STRIPE_SPARSE:
return offset;
case STRIPE_DENSE:
{
- u32 stripe_size;
- u32 stripe_unit;
- loff_t off;
- loff_t tmp;
- u32 stripe_unit_idx;
-
- stripe_size = layout->stripe_unit * layout->num_fh;
- /* XXX I do this because do_div seems to take a 32 bit dividend */
- stripe_unit = layout->stripe_unit;
- tmp = off = offset;
-
- do_div(off, stripe_size);
- stripe_unit_idx = do_div(tmp, stripe_unit);
-
- return off * stripe_unit + stripe_unit_idx;
+ u32 stripe_width;
+ u64 tmp, off;
+ u32 unit = flseg->stripe_unit;
+
+ stripe_width = unit * FILE_DSADDR(lseg)->stripe_count;
+ tmp = off = offset - flseg->pattern_offset;
+ do_div(tmp, stripe_width);
+ return tmp * unit + do_div(off, unit);
}
-
default:
BUG();
}
@@ -218,18 +208,17 @@ filelayout_read_pagelist(struct pnfs_layout_type *layoutid,
struct nfs_read_data *data)
{
struct inode *inode = PNFS_INODE(layoutid);
- struct nfs4_filelayout_segment *flseg;
+ struct pnfs_layout_segment *lseg = data->pdata.lseg;
struct nfs4_pnfs_ds *ds;
u32 idx;
+ struct nfs_fh *fh;
dprintk("--> %s ino %lu nr_pages %d pgbase %u req %Zu@%llu\n",
__func__, inode->i_ino, nr_pages, pgbase, count, offset);
- flseg = LSEG_LD_DATA(data->pdata.lseg);
-
/* Retrieve the correct rpc_client for the byte range */
- idx = nfs4_fl_calc_ds_index(data->pdata.lseg, offset);
- ds = nfs4_fl_prepare_ds(data->pdata.lseg, idx);
+ idx = nfs4_fl_calc_ds_index(lseg, offset);
+ ds = nfs4_fl_prepare_ds(lseg, idx);
if (!ds) {
printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
return PNFS_NOT_ATTEMPTED;
@@ -239,7 +228,9 @@ filelayout_read_pagelist(struct pnfs_layout_type *layoutid,
/* just try the first data server for the index..*/
data->fldata.ds_nfs_client = ds->ds_clp;
- data->args.fh = nfs4_fl_select_ds_fh(flseg, idx);
+ fh = nfs4_fl_select_ds_fh(lseg, offset);
+ if (fh)
+ data->args.fh = fh;
/* Now get the file offset on the dserver
* Set the read offset to this offset, and
@@ -247,8 +238,7 @@ filelayout_read_pagelist(struct pnfs_layout_type *layoutid,
* In the case of aync reads, the offset will be reset in the
* call_ops->rpc_call_done() routine.
*/
- data->args.offset = filelayout_get_dserver_offset(offset,
- flseg);
+ data->args.offset = filelayout_get_dserver_offset(lseg, offset);
data->fldata.orig_offset = offset;
/* Perform an asynchronous read */
@@ -269,16 +259,17 @@ filelayout_write_pagelist(struct pnfs_layout_type *layoutid,
struct nfs_write_data *data)
{
struct inode *inode = PNFS_INODE(layoutid);
- struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(data->pdata.lseg);
+ struct pnfs_layout_segment *lseg = data->pdata.lseg;
struct nfs4_pnfs_ds *ds;
u32 idx;
+ struct nfs_fh *fh;
dprintk("--> %s ino %lu nr_pages %d pgbase %u req %Zu@%llu sync %d\n",
__func__, inode->i_ino, nr_pages, pgbase, count, offset, sync);
/* Retrieve the correct rpc_client for the byte range */
- idx = nfs4_fl_calc_ds_index(data->pdata.lseg, offset);
- ds = nfs4_fl_prepare_ds(data->pdata.lseg, idx);
+ idx = nfs4_fl_calc_ds_index(lseg, offset);
+ ds = nfs4_fl_prepare_ds(lseg, idx);
if (!ds) {
printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
return PNFS_NOT_ATTEMPTED;
@@ -288,12 +279,14 @@ filelayout_write_pagelist(struct pnfs_layout_type *layoutid,
htonl(ds->ds_ip_addr), ntohs(ds->ds_port), ds->r_addr);
data->fldata.ds_nfs_client = ds->ds_clp;
- data->args.fh = nfs4_fl_select_ds_fh(flseg, idx);
+ fh = nfs4_fl_select_ds_fh(lseg, offset);
+ if (fh)
+ data->args.fh = fh;
/* Get the file offset on the dserver. Set the write offset to
* this offset and save the original offset.
*/
- data->args.offset = filelayout_get_dserver_offset(offset, flseg);
+ data->args.offset = filelayout_get_dserver_offset(lseg, offset);
data->fldata.orig_offset = offset;
/* Perform an asynchronous write The offset will be reset in the
@@ -649,6 +642,8 @@ filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
clnt = NFS_CLIENT(dsdata->inode);
ds = NULL;
} else {
+ struct nfs_fh *fh;
+
call_ops = &filelayout_commit_call_ops;
req = nfs_list_entry(dsdata->pages.next);
ds = nfs4_fl_prepare_ds(req->wb_lseg, idx);
@@ -660,9 +655,10 @@ filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
}
clnt = ds->ds_clp->cl_rpcclient;
dsdata->fldata.ds_nfs_client = ds->ds_clp;
- dsdata->args.fh = \
- nfs4_fl_select_ds_fh(LSEG_LD_DATA(req->wb_lseg),
- idx);
+ file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
+ fh = nfs4_fl_select_ds_fh(req->wb_lseg, file_offset);
+ if (fh)
+ dsdata->args.fh = fh;
}
dprintk("%s: Initiating commit: %llu USE DS:\n",
__func__, file_offset);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 3697926..29e481d 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -80,15 +80,8 @@ struct filelayout_mount_type {
struct super_block *fl_sb;
};
-static inline struct nfs_fh *
-nfs4_fl_select_ds_fh(struct nfs4_filelayout_segment *flseg, u32 idx)
-{
- /* FRED - what about case == 0??? */
- if (flseg->num_fh == 1)
- return &flseg->fh_array[0];
- else
- return &flseg->fh_array[idx];
-}
+extern struct nfs_fh *
+nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset);
extern struct pnfs_client_operations *pnfs_callback_ops;
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 404dd5f..5842c09 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -558,28 +558,42 @@ nfs4_pnfs_device_item_find(struct nfs_client *clp, struct pnfs_deviceid *id)
* Then: ((res + fsi) % dsaddr->stripe_count)
*/
static inline u32
-_nfs4_fl_calc_j_index(loff_t offset,
- struct nfs4_file_layout_dsaddr *dsaddr,
- struct nfs4_filelayout_segment *layout)
+_nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
{
+ struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
u64 tmp;
- tmp = offset - layout->pattern_offset;
- do_div(tmp, layout->stripe_unit);
- tmp += layout->first_stripe_index;
- return do_div(tmp, dsaddr->stripe_count);
+ tmp = offset - flseg->pattern_offset;
+ do_div(tmp, flseg->stripe_unit);
+ tmp += flseg->first_stripe_index;
+ return do_div(tmp, FILE_DSADDR(lseg)->stripe_count);
}
u32
nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset)
{
- struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
- struct nfs4_file_layout_dsaddr *dsaddr;
u32 j;
- dsaddr = FILE_DSADDR(lseg);
- j = _nfs4_fl_calc_j_index(offset, dsaddr, flseg);
- return dsaddr->stripe_indices[j];
+ j = _nfs4_fl_calc_j_index(lseg, offset);
+ return FILE_DSADDR(lseg)->stripe_indices[j];
+}
+
+struct nfs_fh *
+nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset)
+{
+ struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
+ u32 i;
+
+ if (flseg->stripe_type == STRIPE_SPARSE) {
+ if (flseg->num_fh == 1)
+ i = 0;
+ else if (flseg->num_fh == 0)
+ return NULL;
+ else
+ i = nfs4_fl_calc_ds_index(lseg, offset);
+ } else
+ i = _nfs4_fl_calc_j_index(lseg, offset);
+ return &flseg->fh_array[i];
}
struct nfs4_pnfs_ds *
--
1.6.6.1
Merged at pnfs-all-2.6.35-rc3-2010-06-17
Thanks!
Benny
On Jun. 16, 2010, 9:33 -0400, Fred Isaman <[email protected]> wrote:
> We were silently accepting dense layouts but using sparse fh selection
> and flawed offset calculations.
>
> Signed-off-by: Fred Isaman <[email protected]>
> ---
> fs/nfs/nfs4filelayout.c | 72 ++++++++++++++++++++-----------------------
> fs/nfs/nfs4filelayout.h | 11 +-----
> fs/nfs/nfs4filelayoutdev.c | 38 ++++++++++++++++-------
> 3 files changed, 62 insertions(+), 59 deletions(-)
>
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index 8a83c0d..57a0010 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -100,36 +100,26 @@ filelayout_uninitialize_mountpoint(struct nfs_server *nfss)
> * offset of the file on the dserver based on whether the
> * layout type is STRIPE_DENSE or STRIPE_SPARSE
> */
> -loff_t
> -filelayout_get_dserver_offset(loff_t offset,
> - struct nfs4_filelayout_segment *layout)
> +static loff_t
> +filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
> {
> - if (!layout)
> - return offset;
> + struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
>
> - switch (layout->stripe_type) {
> + switch (flseg->stripe_type) {
> case STRIPE_SPARSE:
> return offset;
>
> case STRIPE_DENSE:
> {
> - u32 stripe_size;
> - u32 stripe_unit;
> - loff_t off;
> - loff_t tmp;
> - u32 stripe_unit_idx;
> -
> - stripe_size = layout->stripe_unit * layout->num_fh;
> - /* XXX I do this because do_div seems to take a 32 bit dividend */
> - stripe_unit = layout->stripe_unit;
> - tmp = off = offset;
> -
> - do_div(off, stripe_size);
> - stripe_unit_idx = do_div(tmp, stripe_unit);
> -
> - return off * stripe_unit + stripe_unit_idx;
> + u32 stripe_width;
> + u64 tmp, off;
> + u32 unit = flseg->stripe_unit;
> +
> + stripe_width = unit * FILE_DSADDR(lseg)->stripe_count;
> + tmp = off = offset - flseg->pattern_offset;
> + do_div(tmp, stripe_width);
> + return tmp * unit + do_div(off, unit);
> }
> -
> default:
> BUG();
> }
> @@ -218,18 +208,17 @@ filelayout_read_pagelist(struct pnfs_layout_type *layoutid,
> struct nfs_read_data *data)
> {
> struct inode *inode = PNFS_INODE(layoutid);
> - struct nfs4_filelayout_segment *flseg;
> + struct pnfs_layout_segment *lseg = data->pdata.lseg;
> struct nfs4_pnfs_ds *ds;
> u32 idx;
> + struct nfs_fh *fh;
>
> dprintk("--> %s ino %lu nr_pages %d pgbase %u req %Zu@%llu\n",
> __func__, inode->i_ino, nr_pages, pgbase, count, offset);
>
> - flseg = LSEG_LD_DATA(data->pdata.lseg);
> -
> /* Retrieve the correct rpc_client for the byte range */
> - idx = nfs4_fl_calc_ds_index(data->pdata.lseg, offset);
> - ds = nfs4_fl_prepare_ds(data->pdata.lseg, idx);
> + idx = nfs4_fl_calc_ds_index(lseg, offset);
> + ds = nfs4_fl_prepare_ds(lseg, idx);
> if (!ds) {
> printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
> return PNFS_NOT_ATTEMPTED;
> @@ -239,7 +228,9 @@ filelayout_read_pagelist(struct pnfs_layout_type *layoutid,
>
> /* just try the first data server for the index..*/
> data->fldata.ds_nfs_client = ds->ds_clp;
> - data->args.fh = nfs4_fl_select_ds_fh(flseg, idx);
> + fh = nfs4_fl_select_ds_fh(lseg, offset);
> + if (fh)
> + data->args.fh = fh;
>
> /* Now get the file offset on the dserver
> * Set the read offset to this offset, and
> @@ -247,8 +238,7 @@ filelayout_read_pagelist(struct pnfs_layout_type *layoutid,
> * In the case of aync reads, the offset will be reset in the
> * call_ops->rpc_call_done() routine.
> */
> - data->args.offset = filelayout_get_dserver_offset(offset,
> - flseg);
> + data->args.offset = filelayout_get_dserver_offset(lseg, offset);
> data->fldata.orig_offset = offset;
>
> /* Perform an asynchronous read */
> @@ -269,16 +259,17 @@ filelayout_write_pagelist(struct pnfs_layout_type *layoutid,
> struct nfs_write_data *data)
> {
> struct inode *inode = PNFS_INODE(layoutid);
> - struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(data->pdata.lseg);
> + struct pnfs_layout_segment *lseg = data->pdata.lseg;
> struct nfs4_pnfs_ds *ds;
> u32 idx;
> + struct nfs_fh *fh;
>
> dprintk("--> %s ino %lu nr_pages %d pgbase %u req %Zu@%llu sync %d\n",
> __func__, inode->i_ino, nr_pages, pgbase, count, offset, sync);
>
> /* Retrieve the correct rpc_client for the byte range */
> - idx = nfs4_fl_calc_ds_index(data->pdata.lseg, offset);
> - ds = nfs4_fl_prepare_ds(data->pdata.lseg, idx);
> + idx = nfs4_fl_calc_ds_index(lseg, offset);
> + ds = nfs4_fl_prepare_ds(lseg, idx);
> if (!ds) {
> printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
> return PNFS_NOT_ATTEMPTED;
> @@ -288,12 +279,14 @@ filelayout_write_pagelist(struct pnfs_layout_type *layoutid,
> htonl(ds->ds_ip_addr), ntohs(ds->ds_port), ds->r_addr);
>
> data->fldata.ds_nfs_client = ds->ds_clp;
> - data->args.fh = nfs4_fl_select_ds_fh(flseg, idx);
> + fh = nfs4_fl_select_ds_fh(lseg, offset);
> + if (fh)
> + data->args.fh = fh;
>
> /* Get the file offset on the dserver. Set the write offset to
> * this offset and save the original offset.
> */
> - data->args.offset = filelayout_get_dserver_offset(offset, flseg);
> + data->args.offset = filelayout_get_dserver_offset(lseg, offset);
> data->fldata.orig_offset = offset;
>
> /* Perform an asynchronous write The offset will be reset in the
> @@ -649,6 +642,8 @@ filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
> clnt = NFS_CLIENT(dsdata->inode);
> ds = NULL;
> } else {
> + struct nfs_fh *fh;
> +
> call_ops = &filelayout_commit_call_ops;
> req = nfs_list_entry(dsdata->pages.next);
> ds = nfs4_fl_prepare_ds(req->wb_lseg, idx);
> @@ -660,9 +655,10 @@ filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
> }
> clnt = ds->ds_clp->cl_rpcclient;
> dsdata->fldata.ds_nfs_client = ds->ds_clp;
> - dsdata->args.fh = \
> - nfs4_fl_select_ds_fh(LSEG_LD_DATA(req->wb_lseg),
> - idx);
> + file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
> + fh = nfs4_fl_select_ds_fh(req->wb_lseg, file_offset);
> + if (fh)
> + dsdata->args.fh = fh;
> }
> dprintk("%s: Initiating commit: %llu USE DS:\n",
> __func__, file_offset);
> diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
> index 3697926..29e481d 100644
> --- a/fs/nfs/nfs4filelayout.h
> +++ b/fs/nfs/nfs4filelayout.h
> @@ -80,15 +80,8 @@ struct filelayout_mount_type {
> struct super_block *fl_sb;
> };
>
> -static inline struct nfs_fh *
> -nfs4_fl_select_ds_fh(struct nfs4_filelayout_segment *flseg, u32 idx)
> -{
> - /* FRED - what about case == 0??? */
> - if (flseg->num_fh == 1)
> - return &flseg->fh_array[0];
> - else
> - return &flseg->fh_array[idx];
> -}
> +extern struct nfs_fh *
> +nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset);
>
> extern struct pnfs_client_operations *pnfs_callback_ops;
>
> diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
> index 404dd5f..5842c09 100644
> --- a/fs/nfs/nfs4filelayoutdev.c
> +++ b/fs/nfs/nfs4filelayoutdev.c
> @@ -558,28 +558,42 @@ nfs4_pnfs_device_item_find(struct nfs_client *clp, struct pnfs_deviceid *id)
> * Then: ((res + fsi) % dsaddr->stripe_count)
> */
> static inline u32
> -_nfs4_fl_calc_j_index(loff_t offset,
> - struct nfs4_file_layout_dsaddr *dsaddr,
> - struct nfs4_filelayout_segment *layout)
> +_nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
> {
> + struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
> u64 tmp;
>
> - tmp = offset - layout->pattern_offset;
> - do_div(tmp, layout->stripe_unit);
> - tmp += layout->first_stripe_index;
> - return do_div(tmp, dsaddr->stripe_count);
> + tmp = offset - flseg->pattern_offset;
> + do_div(tmp, flseg->stripe_unit);
> + tmp += flseg->first_stripe_index;
> + return do_div(tmp, FILE_DSADDR(lseg)->stripe_count);
> }
>
> u32
> nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset)
> {
> - struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
> - struct nfs4_file_layout_dsaddr *dsaddr;
> u32 j;
>
> - dsaddr = FILE_DSADDR(lseg);
> - j = _nfs4_fl_calc_j_index(offset, dsaddr, flseg);
> - return dsaddr->stripe_indices[j];
> + j = _nfs4_fl_calc_j_index(lseg, offset);
> + return FILE_DSADDR(lseg)->stripe_indices[j];
> +}
> +
> +struct nfs_fh *
> +nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset)
> +{
> + struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
> + u32 i;
> +
> + if (flseg->stripe_type == STRIPE_SPARSE) {
> + if (flseg->num_fh == 1)
> + i = 0;
> + else if (flseg->num_fh == 0)
> + return NULL;
> + else
> + i = nfs4_fl_calc_ds_index(lseg, offset);
> + } else
> + i = _nfs4_fl_calc_j_index(lseg, offset);
> + return &flseg->fh_array[i];
> }
>
> struct nfs4_pnfs_ds *