2015-08-25 01:43:09

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 1/4] NFSv4.1/flexfiles: Add refcounting to struct nfs4_ff_layout_mirror

We do want to share mirrors between layout segments, so add a refcount
to enable that.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 36 +++++++++++++++++++++++++---------
fs/nfs/flexfilelayout/flexfilelayout.h | 1 +
2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index cc2c5f7f2bc1..62de0b8038c8 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -135,6 +135,31 @@ decode_name(struct xdr_stream *xdr, u32 *id)
return 0;
}

+static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
+{
+ struct nfs4_ff_layout_mirror *mirror;
+
+ mirror = kzalloc(sizeof(*mirror), gfp_flags);
+ if (mirror != NULL) {
+ spin_lock_init(&mirror->lock);
+ atomic_set(&mirror->ref, 1);
+ }
+ return mirror;
+}
+
+static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
+{
+ kfree(mirror->fh_versions);
+ nfs4_ff_layout_put_deviceid(mirror->mirror_ds);
+ kfree(mirror);
+}
+
+static void ff_layout_put_mirror(struct nfs4_ff_layout_mirror *mirror)
+{
+ if (mirror != NULL && atomic_dec_and_test(&mirror->ref))
+ ff_layout_free_mirror(mirror);
+}
+
static void ff_layout_free_mirror_array(struct nfs4_ff_layout_segment *fls)
{
int i;
@@ -144,11 +169,7 @@ static void ff_layout_free_mirror_array(struct nfs4_ff_layout_segment *fls)
/* normally mirror_ds is freed in
* .free_deviceid_node but we still do it here
* for .alloc_lseg error path */
- if (fls->mirror_array[i]) {
- kfree(fls->mirror_array[i]->fh_versions);
- nfs4_ff_layout_put_deviceid(fls->mirror_array[i]->mirror_ds);
- kfree(fls->mirror_array[i]);
- }
+ ff_layout_put_mirror(fls->mirror_array[i]);
}
kfree(fls->mirror_array);
fls->mirror_array = NULL;
@@ -262,15 +283,12 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
if (ds_count != 1)
goto out_err_free;

- fls->mirror_array[i] =
- kzalloc(sizeof(struct nfs4_ff_layout_mirror),
- gfp_flags);
+ fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags);
if (fls->mirror_array[i] == NULL) {
rc = -ENOMEM;
goto out_err_free;
}

- spin_lock_init(&fls->mirror_array[i]->lock);
fls->mirror_array[i]->ds_count = ds_count;
fls->mirror_array[i]->lseg = &fls->generic_hdr;

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index f92f9a0a856b..26b8258e256f 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -77,6 +77,7 @@ struct nfs4_ff_layout_mirror {
u32 uid;
u32 gid;
struct rpc_cred *cred;
+ atomic_t ref;
spinlock_t lock;
struct nfs4_ff_layoutstat read_stat;
struct nfs4_ff_layoutstat write_stat;
--
2.4.3



2015-08-25 01:43:11

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 2/4] NFSv4.1/flexfiles: Remove mirror backpointer to lseg.

When we start sharing mirrors between several lsegs, we won't be able to
keep it.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 25 ++++++++++++-------------
fs/nfs/flexfilelayout/flexfilelayout.h | 1 -
2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 62de0b8038c8..f3efff640989 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -290,7 +290,6 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
}

fls->mirror_array[i]->ds_count = ds_count;
- fls->mirror_array[i]->lseg = &fls->generic_hdr;

/* deviceid */
rc = decode_deviceid(&stream, &devid);
@@ -513,7 +512,8 @@ nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat,
}

static void
-nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror,
+nfs4_ff_layout_stat_io_start_read(struct inode *inode,
+ struct nfs4_ff_layout_mirror *mirror,
__u64 requested, ktime_t now)
{
bool report;
@@ -524,8 +524,7 @@ nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror,
spin_unlock(&mirror->lock);

if (report)
- pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode,
- GFP_KERNEL);
+ pnfs_report_layoutstat(inode, GFP_KERNEL);
}

static void
@@ -542,7 +541,8 @@ nfs4_ff_layout_stat_io_end_read(struct rpc_task *task,
}

static void
-nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror,
+nfs4_ff_layout_stat_io_start_write(struct inode *inode,
+ struct nfs4_ff_layout_mirror *mirror,
__u64 requested, ktime_t now)
{
bool report;
@@ -553,8 +553,7 @@ nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror,
spin_unlock(&mirror->lock);

if (report)
- pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode,
- GFP_NOIO);
+ pnfs_report_layoutstat(inode, GFP_NOIO);
}

static void
@@ -1098,7 +1097,7 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx)
static int ff_layout_read_prepare_common(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
- nfs4_ff_layout_stat_io_start_read(
+ nfs4_ff_layout_stat_io_start_read(hdr->inode,
FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
hdr->args.count,
task->tk_start);
@@ -1285,7 +1284,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
static int ff_layout_write_prepare_common(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
- nfs4_ff_layout_stat_io_start_write(
+ nfs4_ff_layout_stat_io_start_write(hdr->inode,
FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
hdr->args.count,
task->tk_start);
@@ -1367,7 +1366,7 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
static void ff_layout_commit_prepare_common(struct rpc_task *task,
struct nfs_commit_data *cdata)
{
- nfs4_ff_layout_stat_io_start_write(
+ nfs4_ff_layout_stat_io_start_write(cdata->inode,
FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
0, task->tk_start);
}
@@ -1912,8 +1911,8 @@ ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args,
devinfo->layout_type = LAYOUT_FLEX_FILES;
devinfo->layoutstats_encode = ff_layout_encode_layoutstats;
devinfo->layout_private = mirror;
- /* lseg refcount put in cleanup_layoutstats */
- pnfs_get_lseg(pls);
+ /* mirror refcount put in cleanup_layoutstats */
+ atomic_inc(&mirror->ref);

++(*dev_count);
}
@@ -1965,7 +1964,7 @@ ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data)
for (i = 0; i < data->args.num_dev; i++) {
mirror = data->args.devinfo[i].layout_private;
data->args.devinfo[i].layout_private = NULL;
- pnfs_put_lseg(mirror->lseg);
+ ff_layout_put_mirror(mirror);
}
}

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 26b8258e256f..fe9d3ff7cf85 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -67,7 +67,6 @@ struct nfs4_ff_layoutstat {
};

struct nfs4_ff_layout_mirror {
- struct pnfs_layout_segment *lseg; /* back pointer */
u32 ds_count;
u32 efficiency;
struct nfs4_ff_layout_ds *mirror_ds;
--
2.4.3


2015-08-25 01:43:12

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 4/4] NFSv4.2/pnfs: Make the layoutstats timer configurable

Allow advanced users to set the layoutstats timer in order to lengthen
or shorten the period between layoutstat transmissions to the server.

Signed-off-by: Trond Myklebust <[email protected]>
---
Documentation/kernel-parameters.txt | 9 +++++++++
fs/nfs/flexfilelayout/flexfilelayout.c | 5 ++++-
fs/nfs/pnfs.c | 4 ++++
fs/nfs/pnfs.h | 3 +++
4 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1d6f0459cd7b..30d78b561574 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2279,6 +2279,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
The default parameter value of '0' causes the kernel
not to attempt recovery of lost locks.

+ nfs4.layoutstats_timer =
+ [NFSv4.2] Change the rate at which the kernel sends
+ layoutstats to the pNFS metadata server.
+
+ Setting this to value to 0 causes the kernel to use
+ whatever value is the default set by the layout
+ driver. A non-zero value sets the minimum interval
+ in seconds between layoutstats transmissions.
+
nfsd.nfs4_disable_idmapping=
[NFSv4] When set to the default of '1', the NFSv4
server will return only numeric uids and gids to
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 210352741177..738065674484 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -533,14 +533,17 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
ktime_t now)
{
static const ktime_t notime = {0};
+ s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL;

nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now);
if (ktime_equal(mirror->start_time, notime))
mirror->start_time = now;
if (ktime_equal(mirror->last_report_time, notime))
mirror->last_report_time = now;
+ if (layoutstats_timer != 0)
+ report_interval = (s64)layoutstats_timer * 1000LL;
if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
- FF_LAYOUTSTATS_REPORT_INTERVAL) {
+ report_interval) {
mirror->last_report_time = now;
return true;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 247c5a5d2d6b..3530bb703214 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2285,3 +2285,7 @@ out_put:
}
EXPORT_SYMBOL_GPL(pnfs_report_layoutstat);
#endif
+
+unsigned int layoutstats_timer;
+module_param(layoutstats_timer, uint, 0644);
+EXPORT_SYMBOL_GPL(layoutstats_timer);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 02c27f93caf1..d3979dd1037a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -528,12 +528,15 @@ pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
nfss->pnfs_curr_ld->id == src->l_type);
}

+extern unsigned int layoutstats_timer;
+
#ifdef NFS_DEBUG
void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
#else
static inline void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id)
{
}
+
#endif /* NFS_DEBUG */
#else /* CONFIG_NFS_V4_1 */

--
2.4.3


2015-08-25 01:43:11

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 3/4] NFSv4.1/flexfile: Ensure uniqueness of mirrors across layout segments

Keep the full list of mirrors in the struct nfs4_ff_layout_mirror so that
they can be shared among the layout segments that use them.
Also ensure that we send out only one copy of the layoutstats per mirror.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 116 ++++++++++++++++++++++++++-------
fs/nfs/flexfilelayout/flexfilelayout.h | 3 +
2 files changed, 95 insertions(+), 24 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index f3efff640989..210352741177 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -34,6 +34,7 @@ ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
ffl = kzalloc(sizeof(*ffl), gfp_flags);
if (ffl) {
INIT_LIST_HEAD(&ffl->error_list);
+ INIT_LIST_HEAD(&ffl->mirrors);
return &ffl->generic_hdr;
} else
return NULL;
@@ -135,6 +136,66 @@ decode_name(struct xdr_stream *xdr, u32 *id)
return 0;
}

+static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
+ const struct nfs4_ff_layout_mirror *m2)
+{
+ int i, j;
+
+ if (m1->fh_versions_cnt != m2->fh_versions_cnt)
+ return false;
+ for (i = 0; i < m1->fh_versions_cnt; i++) {
+ bool found_fh = false;
+ for (j = 0; j < m2->fh_versions_cnt; i++) {
+ if (nfs_compare_fh(&m1->fh_versions[i],
+ &m2->fh_versions[j])) {
+ found_fh = true;
+ break;
+ }
+ }
+ if (!found_fh)
+ return false;
+ }
+ return true;
+}
+
+static struct nfs4_ff_layout_mirror *
+ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
+ struct nfs4_ff_layout_mirror *mirror)
+{
+ struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo);
+ struct nfs4_ff_layout_mirror *pos;
+ struct inode *inode = lo->plh_inode;
+
+ spin_lock(&inode->i_lock);
+ list_for_each_entry(pos, &ff_layout->mirrors, mirrors) {
+ if (mirror->mirror_ds != pos->mirror_ds)
+ continue;
+ if (!ff_mirror_match_fh(mirror, pos))
+ continue;
+ if (atomic_inc_not_zero(&pos->ref)) {
+ spin_unlock(&inode->i_lock);
+ return pos;
+ }
+ }
+ list_add(&mirror->mirrors, &ff_layout->mirrors);
+ mirror->layout = lo;
+ spin_unlock(&inode->i_lock);
+ return mirror;
+}
+
+void
+ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror)
+{
+ struct inode *inode;
+ if (mirror->layout == NULL)
+ return;
+ inode = mirror->layout->plh_inode;
+ spin_lock(&inode->i_lock);
+ list_del(&mirror->mirrors);
+ spin_unlock(&inode->i_lock);
+ mirror->layout = NULL;
+}
+
static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
{
struct nfs4_ff_layout_mirror *mirror;
@@ -143,12 +204,14 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
if (mirror != NULL) {
spin_lock_init(&mirror->lock);
atomic_set(&mirror->ref, 1);
+ INIT_LIST_HEAD(&mirror->mirrors);
}
return mirror;
}

static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
+ ff_layout_remove_mirror(mirror);
kfree(mirror->fh_versions);
nfs4_ff_layout_put_deviceid(mirror->mirror_ds);
kfree(mirror);
@@ -267,6 +330,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
goto out_err_free;

for (i = 0; i < fls->mirror_array_cnt; i++) {
+ struct nfs4_ff_layout_mirror *mirror;
struct nfs4_deviceid devid;
struct nfs4_deviceid_node *idnode;
u32 ds_count;
@@ -355,6 +419,12 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
if (rc)
goto out_err_free;

+ mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
+ if (mirror != fls->mirror_array[i]) {
+ ff_layout_free_mirror(fls->mirror_array[i]);
+ fls->mirror_array[i] = mirror;
+ }
+
dprintk("%s: uid %d gid %d\n", __func__,
fls->mirror_array[i]->uid,
fls->mirror_array[i]->gid);
@@ -1883,24 +1953,27 @@ ff_layout_encode_layoutstats(struct xdr_stream *xdr,
*start = cpu_to_be32((xdr->p - start - 1) * 4);
}

-static bool
+static int
ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args,
struct pnfs_layout_segment *pls,
- int *dev_count, int dev_limit)
+ int dev_limit)
{
+ struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(pls->pls_layout);
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_deviceid_node *dev;
struct nfs42_layoutstat_devinfo *devinfo;
- int i;
+ int i = 0;

- for (i = 0; i < FF_LAYOUT_MIRROR_COUNT(pls); i++) {
- if (*dev_count >= dev_limit)
+ list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) {
+ if (i >= dev_limit)
break;
- mirror = FF_LAYOUT_COMP(pls, i);
- if (!mirror || !mirror->mirror_ds)
+ if (!mirror->mirror_ds)
continue;
- dev = FF_LAYOUT_DEVID_NODE(pls, i);
- devinfo = &args->devinfo[*dev_count];
+ /* mirror refcount put in cleanup_layoutstats */
+ if (!atomic_inc_not_zero(&mirror->ref))
+ continue;
+ dev = &mirror->mirror_ds->id_node;
+ devinfo = &args->devinfo[i];
memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);
devinfo->offset = pls->pls_range.offset;
devinfo->length = pls->pls_range.length;
@@ -1911,24 +1984,25 @@ ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args,
devinfo->layout_type = LAYOUT_FLEX_FILES;
devinfo->layoutstats_encode = ff_layout_encode_layoutstats;
devinfo->layout_private = mirror;
- /* mirror refcount put in cleanup_layoutstats */
- atomic_inc(&mirror->ref);

- ++(*dev_count);
+ i++;
}
-
- return *dev_count < dev_limit;
+ return i;
}

static int
ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
{
+ struct nfs4_flexfile_layout *ff_layout;
+ struct nfs4_ff_layout_mirror *mirror;
struct pnfs_layout_segment *pls;
int dev_count = 0;

spin_lock(&args->inode->i_lock);
- list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) {
- dev_count += FF_LAYOUT_MIRROR_COUNT(pls);
+ ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout);
+ list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) {
+ if (atomic_read(&mirror->ref) != 0)
+ dev_count ++;
}
spin_unlock(&args->inode->i_lock);
/* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */
@@ -1937,20 +2011,14 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
__func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV);
dev_count = PNFS_LAYOUTSTATS_MAXDEV;
}
- args->devinfo = kmalloc(dev_count * sizeof(*args->devinfo), GFP_KERNEL);
+ args->devinfo = kmalloc_array(dev_count, sizeof(*args->devinfo), GFP_NOIO);
if (!args->devinfo)
return -ENOMEM;

dev_count = 0;
spin_lock(&args->inode->i_lock);
- list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) {
- if (!ff_layout_mirror_prepare_stats(args, pls, &dev_count,
- PNFS_LAYOUTSTATS_MAXDEV)) {
- break;
- }
- }
+ args->num_dev = ff_layout_mirror_prepare_stats(args, pls, dev_count);
spin_unlock(&args->inode->i_lock);
- args->num_dev = dev_count;

return 0;
}
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index fe9d3ff7cf85..68cc0d9828f9 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -67,6 +67,8 @@ struct nfs4_ff_layoutstat {
};

struct nfs4_ff_layout_mirror {
+ struct pnfs_layout_hdr *layout;
+ struct list_head mirrors;
u32 ds_count;
u32 efficiency;
struct nfs4_ff_layout_ds *mirror_ds;
@@ -95,6 +97,7 @@ struct nfs4_ff_layout_segment {
struct nfs4_flexfile_layout {
struct pnfs_layout_hdr generic_hdr;
struct pnfs_ds_commit_info commit_info;
+ struct list_head mirrors;
struct list_head error_list; /* nfs4_ff_layout_ds_err */
};

--
2.4.3