2016-04-22 00:52:09

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v2 0/7] nfs/sunrpc: fix flexfiles credential handling

v2:
- drop generic_match patch and add a shared group_info that we can
use in all acreds
- fix error checking in ff_layout_get_ds_cred callers
- use rcu_access_pointer instead of rcu_dereference in
ff_layout_free_mirror

The way that credentials are handled in the flexfiles client is
currently wrong. It only keeps one set of credentials per mirror object,
but those objects can be shared between read and rw layout segments
If the server hands out different credentials for read and rw layouts,
then you can end up in a situation where the client will end up issuing
writes with the wrong uid.

This is visible by simply mounting a server that does hand out different
creds for ro vs. rw layouts, opening the file for read, then opening it
for write, and then issuing a write. At that point, the DS write calls
will fail with EACCES. The client will then re-fetch the layout, but the
creds are not updated, so the writes still fail. The client is then
stuck at that point. This problem also means that the client is unable
to recover after a layout is fenced, since the creds are never updated.

This patchset cleans up some ugliness in how flexfiles rpc_creds are
handled, adds a place to keep ro and rw creds per mirror, and allows
those creds to be updated when a LAYOUTGET response contains different
ones.

We start with a few sunrpc patches that lay the groundwork to allow the
creds to be accessible via RCU and to fix up a current wart where we can
end up doing a GFP_KERNEL allocation while in a possible memory reclaim
scenario. The flexfiles code is then changed to allow the creds to be
swapped out as credential updates come in. The users of those creds then
ensure that they take references to them in case they are swapped out,
and put those references after the calls are dispatched.

I'd like to see this be considered for the 4.7 merge window.

Jeff Layton (6):
sunrpc: plumb gfp_t parm into crcreate operation
sunrpc: add a get_rpccred_rcu inline
nfs: don't call nfs4_ff_layout_prepare_ds from ff_layout_get_ds_cred
nfs: have ff_layout_get_ds_cred take a reference to the cred
nfs: get a reference to the credential in ff_layout_alloc_lseg
nfs: have flexfiles mirror keep creds for both ro and rw layouts

Weston Andros Adamson (1):
sunrpc: add rpc_lookup_generic_cred

fs/nfs/flexfilelayout/flexfilelayout.c | 76 ++++++++++++++++++++++++------
fs/nfs/flexfilelayout/flexfilelayout.h | 5 +-
fs/nfs/flexfilelayout/flexfilelayoutdev.c | 78 +++++++++++--------------------
include/linux/sunrpc/auth.h | 23 ++++++++-
net/sunrpc/auth.c | 4 +-
net/sunrpc/auth_generic.c | 13 ++++--
net/sunrpc/auth_gss/auth_gss.c | 6 +--
net/sunrpc/auth_unix.c | 6 +--
8 files changed, 130 insertions(+), 81 deletions(-)

--
2.5.5



2016-04-22 00:52:10

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v2 1/7] sunrpc: plumb gfp_t parm into crcreate operation

We need to be able to call the generic_cred creator from different
contexts. Add a gfp_t parm to the crcreate operation and to
rpcauth_lookup_credcache. For now, we just push the gfp_t parms up
one level to the *_lookup_cred functions.

Signed-off-by: Jeff Layton <[email protected]>
---
include/linux/sunrpc/auth.h | 4 ++--
net/sunrpc/auth.c | 4 ++--
net/sunrpc/auth_generic.c | 6 +++---
net/sunrpc/auth_gss/auth_gss.c | 6 +++---
net/sunrpc/auth_unix.c | 6 +++---
5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 6a241a277249..3b616aa7e4d2 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -127,7 +127,7 @@ struct rpc_authops {
void (*destroy)(struct rpc_auth *);

struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int);
- struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int);
+ struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int, gfp_t);
int (*list_pseudoflavors)(rpc_authflavor_t *, int);
rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *);
int (*flavor2info)(rpc_authflavor_t,
@@ -178,7 +178,7 @@ rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t,
int rpcauth_get_gssinfo(rpc_authflavor_t,
struct rpcsec_gss_info *);
int rpcauth_list_flavors(rpc_authflavor_t *, int);
-struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
+struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int, gfp_t);
void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int);
struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 02f53674dc39..e0bb30fd2ed3 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -543,7 +543,7 @@ rpcauth_cache_enforce_limit(void)
*/
struct rpc_cred *
rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
- int flags)
+ int flags, gfp_t gfp)
{
LIST_HEAD(free);
struct rpc_cred_cache *cache = auth->au_credcache;
@@ -580,7 +580,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
if (flags & RPCAUTH_LOOKUP_RCU)
return ERR_PTR(-ECHILD);

- new = auth->au_ops->crcreate(auth, acred, flags);
+ new = auth->au_ops->crcreate(auth, acred, flags, gfp);
if (IS_ERR(new)) {
cred = new;
goto out;
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 41248b1820c7..6ed3e3df43e9 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -77,15 +77,15 @@ static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
static struct rpc_cred *
generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
- return rpcauth_lookup_credcache(&generic_auth, acred, flags);
+ return rpcauth_lookup_credcache(&generic_auth, acred, flags, GFP_KERNEL);
}

static struct rpc_cred *
-generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
{
struct generic_cred *gcred;

- gcred = kmalloc(sizeof(*gcred), GFP_KERNEL);
+ gcred = kmalloc(sizeof(*gcred), gfp);
if (gcred == NULL)
return ERR_PTR(-ENOMEM);

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 15612ffa8d57..e64ae93d5b4f 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1299,11 +1299,11 @@ gss_destroy_cred(struct rpc_cred *cred)
static struct rpc_cred *
gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
- return rpcauth_lookup_credcache(auth, acred, flags);
+ return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
}

static struct rpc_cred *
-gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
{
struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth);
struct gss_cred *cred = NULL;
@@ -1313,7 +1313,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
__func__, from_kuid(&init_user_ns, acred->uid),
auth->au_flavor);

- if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS)))
+ if (!(cred = kzalloc(sizeof(*cred), gfp)))
goto out_err;

rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops);
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 0d3dd364c22f..9f65452b7cbc 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -52,11 +52,11 @@ unx_destroy(struct rpc_auth *auth)
static struct rpc_cred *
unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
- return rpcauth_lookup_credcache(auth, acred, flags);
+ return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
}

static struct rpc_cred *
-unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
{
struct unx_cred *cred;
unsigned int groups = 0;
@@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
from_kuid(&init_user_ns, acred->uid),
from_kgid(&init_user_ns, acred->gid));

- if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS)))
+ if (!(cred = kmalloc(sizeof(*cred), gfp)))
return ERR_PTR(-ENOMEM);

rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
--
2.5.5


2016-04-22 00:52:11

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v2 2/7] sunrpc: add rpc_lookup_generic_cred

From: Weston Andros Adamson <[email protected]>

Add function rpc_lookup_generic_cred, which allows lookups of a generic
credential that's not current_cred().

[jlayton: add gfp_t parm]

Signed-off-by: Jeff Layton <[email protected]>
---
include/linux/sunrpc/auth.h | 1 +
net/sunrpc/auth_generic.c | 7 +++++++
2 files changed, 8 insertions(+)

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 3b616aa7e4d2..16bd8f8fef8c 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -167,6 +167,7 @@ void rpc_destroy_authunix(void);

struct rpc_cred * rpc_lookup_cred(void);
struct rpc_cred * rpc_lookup_cred_nonblock(void);
+struct rpc_cred * rpc_lookup_generic_cred(struct auth_cred *, int, gfp_t);
struct rpc_cred * rpc_lookup_machine_cred(const char *service_name);
int rpcauth_register(const struct rpc_authops *);
int rpcauth_unregister(const struct rpc_authops *);
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 6ed3e3df43e9..54dd3fdead54 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -38,6 +38,13 @@ struct rpc_cred *rpc_lookup_cred(void)
}
EXPORT_SYMBOL_GPL(rpc_lookup_cred);

+struct rpc_cred *
+rpc_lookup_generic_cred(struct auth_cred *acred, int flags, gfp_t gfp)
+{
+ return rpcauth_lookup_credcache(&generic_auth, acred, flags, gfp);
+}
+EXPORT_SYMBOL_GPL(rpc_lookup_generic_cred);
+
struct rpc_cred *rpc_lookup_cred_nonblock(void)
{
return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU);
--
2.5.5


2016-04-22 00:52:13

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v2 4/7] nfs: don't call nfs4_ff_layout_prepare_ds from ff_layout_get_ds_cred

All the callers already call that function before calling into here,
so it ends up being a no-op anyway.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayoutdev.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index add0e5a70bd6..a0dbf94d15ae 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -433,16 +433,12 @@ ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx,
struct rpc_cred *mdscred)
{
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
- struct rpc_cred *cred = ERR_PTR(-EINVAL);
-
- if (!nfs4_ff_layout_prepare_ds(lseg, ds_idx, true))
- goto out;
+ struct rpc_cred *cred;

if (mirror && mirror->cred)
cred = mirror->cred;
else
cred = mdscred;
-out:
return cred;
}

--
2.5.5


2016-04-22 00:52:12

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v2 3/7] sunrpc: add a get_rpccred_rcu inline

Sometimes we might have a RCU managed credential pointer and don't want
to use locking to handle it. Add a function that will take a reference
to the cred iff the refcount is not already zero. Callers can dereference
the pointer under the rcu_read_lock and use that function to take a
reference only if the cred is not on its way to destruction.

Signed-off-by: Jeff Layton <[email protected]>
---
include/linux/sunrpc/auth.h | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 16bd8f8fef8c..6f36b2bf3e05 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -206,5 +206,23 @@ struct rpc_cred * get_rpccred(struct rpc_cred *cred)
return cred;
}

+/**
+ * get_rpccred_rcu - get a reference to a cred using rcu-protected pointer
+ * @cred: cred of which to take a reference
+ *
+ * In some cases, we may have a pointer to a credential to which we
+ * want to take a reference, but don't already have one. Because these
+ * objects are freed using RCU, we can access the cr_count while its
+ * on its way to destruction and only take a reference if it's not already
+ * zero.
+ */
+static inline struct rpc_cred *
+get_rpccred_rcu(struct rpc_cred *cred)
+{
+ if (atomic_inc_not_zero(&cred->cr_count))
+ return cred;
+ return NULL;
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_SUNRPC_AUTH_H */
--
2.5.5


2016-04-22 00:52:15

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v2 5/7] nfs: have ff_layout_get_ds_cred take a reference to the cred

In later patches, we're going to want to allow the creds to be updated
when we get a new layout with updated creds. Have this function take
a reference to the cred that is later put once the call has been
dispatched.

Also, prepare for this change by ensuring we follow RCU rules when
getting a reference to the cred as well.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 15 +++++++++------
fs/nfs/flexfilelayout/flexfilelayoutdev.c | 30 ++++++++++++++++++++++++++----
2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 0cb1abd535e3..432d5b0009c7 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1712,7 +1712,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
goto out_failed;

ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
- if (IS_ERR(ds_cred))
+ if (!ds_cred)
goto out_failed;

vers = nfs4_ff_layout_ds_version(lseg, idx);
@@ -1737,7 +1737,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
vers == 3 ? &ff_layout_read_call_ops_v3 :
&ff_layout_read_call_ops_v4,
0, RPC_TASK_SOFTCONN);
-
+ put_rpccred(ds_cred);
return PNFS_ATTEMPTED;

out_failed:
@@ -1769,7 +1769,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
return PNFS_NOT_ATTEMPTED;

ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
- if (IS_ERR(ds_cred))
+ if (!ds_cred)
return PNFS_NOT_ATTEMPTED;

vers = nfs4_ff_layout_ds_version(lseg, idx);
@@ -1798,6 +1798,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
vers == 3 ? &ff_layout_write_call_ops_v3 :
&ff_layout_write_call_ops_v4,
sync, RPC_TASK_SOFTCONN);
+ put_rpccred(ds_cred);
return PNFS_ATTEMPTED;
}

@@ -1824,7 +1825,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
struct rpc_clnt *ds_clnt;
struct rpc_cred *ds_cred;
u32 idx;
- int vers;
+ int vers, ret;
struct nfs_fh *fh;

idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
@@ -1838,7 +1839,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
goto out_err;

ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred);
- if (IS_ERR(ds_cred))
+ if (!ds_cred)
goto out_err;

vers = nfs4_ff_layout_ds_version(lseg, idx);
@@ -1854,10 +1855,12 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
if (fh)
data->args.fh = fh;

- return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
+ ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_commit_call_ops_v3 :
&ff_layout_commit_call_ops_v4,
how, RPC_TASK_SOFTCONN);
+ put_rpccred(ds_cred);
+ return ret;
out_err:
pnfs_generic_prepare_to_resend_writes(data);
pnfs_generic_commit_release(data);
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index a0dbf94d15ae..baee22929174 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -338,6 +338,25 @@ static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror,
return 0;
}

+static struct rpc_cred *
+ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
+{
+ struct rpc_cred *cred, **pcred;
+
+ pcred = &mirror->cred;
+
+ rcu_read_lock();
+ do {
+ cred = rcu_dereference(*pcred);
+ if (!cred)
+ break;
+
+ cred = get_rpccred_rcu(cred);
+ } while(!cred);
+ rcu_read_unlock();
+ return cred;
+}
+
struct nfs_fh *
nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx)
{
@@ -435,10 +454,13 @@ ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx,
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
struct rpc_cred *cred;

- if (mirror && mirror->cred)
- cred = mirror->cred;
- else
- cred = mdscred;
+ if (mirror) {
+ cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode);
+ if (!cred)
+ cred = get_rpccred(mdscred);
+ } else {
+ cred = get_rpccred(mdscred);
+ }
return cred;
}

--
2.5.5


2016-04-22 00:52:16

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v2 6/7] nfs: get a reference to the credential in ff_layout_alloc_lseg

We're just as likely to have allocation problems here as we would if we
delay looking up the credential like we currently do. Fix the code to
get a rpc_cred reference early, as soon as the mirror is set up.

This allows us to eliminate the mirror early if there is a problem
getting an rpc credential. This also allows us to drop the uid/gid
from the layout_mirror struct as well.

In the event that we find an existing mirror where this one would go, we
swap in the new creds unconditionally, and drop the reference to the old
one.

Note that the old ff_layout_update_mirror_cred function wouldn't set
this pointer unless the DS version was 3, but we don't know what the DS
version is at this point. I'm a little unclear on why it did that as you
still need creds to talk to v4 servers as well. I have the code set
it regardless of the DS version here.

Also note the change to using generic creds instead of calling
lookup_cred directly. With that change, we also need to populate the
group_info pointer in the acred as some functions expect that to never
be NULL. Instead of allocating one every time however, we can allocate
one when the module is loaded and share it since the group_info is
refcounted.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 41 ++++++++++++++++++++++-----
fs/nfs/flexfilelayout/flexfilelayout.h | 2 --
fs/nfs/flexfilelayout/flexfilelayoutdev.c | 47 ++-----------------------------
3 files changed, 36 insertions(+), 54 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 432d5b0009c7..19806caa8ff2 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -26,6 +26,8 @@

#define FF_LAYOUT_POLL_RETRY_MAX (15*HZ)

+static struct group_info *ff_zero_group;
+
static struct pnfs_layout_hdr *
ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
{
@@ -407,8 +409,9 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_deviceid devid;
struct nfs4_deviceid_node *idnode;
- u32 ds_count;
- u32 fh_count;
+ struct auth_cred acred = { .group_info = ff_zero_group };
+ struct rpc_cred *cred;
+ u32 ds_count, fh_count, id;
int j;

rc = -EIO;
@@ -484,24 +487,39 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
fls->mirror_array[i]->fh_versions_cnt = fh_count;

/* user */
- rc = decode_name(&stream, &fls->mirror_array[i]->uid);
+ rc = decode_name(&stream, &id);
if (rc)
goto out_err_free;

+ acred.uid = make_kuid(&init_user_ns, id);
+
/* group */
- rc = decode_name(&stream, &fls->mirror_array[i]->gid);
+ rc = decode_name(&stream, &id);
if (rc)
goto out_err_free;

+ acred.gid = make_kgid(&init_user_ns, id);
+
+ /* find the cred for it */
+ cred = rpc_lookup_generic_cred(&acred, 0, gfp_flags);
+ if (IS_ERR(cred)) {
+ rc = PTR_ERR(cred);
+ goto out_err_free;
+ }
+
+ rcu_assign_pointer(fls->mirror_array[i]->cred, cred);
+
mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
if (mirror != fls->mirror_array[i]) {
+ /* swap cred ptrs so free_mirror will clean up old */
+ fls->mirror_array[i]->cred = xchg(&mirror->cred, cred);
ff_layout_free_mirror(fls->mirror_array[i]);
fls->mirror_array[i] = mirror;
}

- dprintk("%s: uid %d gid %d\n", __func__,
- fls->mirror_array[i]->uid,
- fls->mirror_array[i]->gid);
+ dprintk("%s: uid %u gid %u\n", __func__,
+ from_kuid(&init_user_ns, acred.uid),
+ from_kgid(&init_user_ns, acred.gid));
}

p = xdr_inline_decode(&stream, 4);
@@ -2226,6 +2244,11 @@ static int __init nfs4flexfilelayout_init(void)
{
printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n",
__func__);
+ if (!ff_zero_group) {
+ ff_zero_group = groups_alloc(0);
+ if (!ff_zero_group)
+ return -ENOMEM;
+ }
return pnfs_register_layoutdriver(&flexfilelayout_type);
}

@@ -2234,6 +2257,10 @@ static void __exit nfs4flexfilelayout_exit(void)
printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n",
__func__);
pnfs_unregister_layoutdriver(&flexfilelayout_type);
+ if (ff_zero_group) {
+ put_group_info(ff_zero_group);
+ ff_zero_group = NULL;
+ }
}

MODULE_ALIAS("nfs-layouttype4-4");
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index dd353bb7dc0a..c29fc853ce74 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -76,8 +76,6 @@ struct nfs4_ff_layout_mirror {
u32 fh_versions_cnt;
struct nfs_fh *fh_versions;
nfs4_stateid stateid;
- u32 uid;
- u32 gid;
struct rpc_cred *cred;
atomic_t ref;
spinlock_t lock;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index baee22929174..6ddd8a5c5ae0 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -302,42 +302,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
return 0;
}

-/* currently we only support AUTH_NONE and AUTH_SYS */
-static rpc_authflavor_t
-nfs4_ff_layout_choose_authflavor(struct nfs4_ff_layout_mirror *mirror)
-{
- if (mirror->uid == (u32)-1)
- return RPC_AUTH_NULL;
- return RPC_AUTH_UNIX;
-}
-
-/* fetch cred for NFSv3 DS */
-static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror,
- struct nfs4_pnfs_ds *ds)
-{
- if (ds->ds_clp && !mirror->cred &&
- mirror->mirror_ds->ds_versions[0].version == 3) {
- struct rpc_auth *auth = ds->ds_clp->cl_rpcclient->cl_auth;
- struct rpc_cred *cred;
- struct auth_cred acred = {
- .uid = make_kuid(&init_user_ns, mirror->uid),
- .gid = make_kgid(&init_user_ns, mirror->gid),
- };
-
- /* AUTH_NULL ignores acred */
- cred = auth->au_ops->lookup_cred(auth, &acred, 0);
- if (IS_ERR(cred)) {
- dprintk("%s: lookup_cred failed with %ld\n",
- __func__, PTR_ERR(cred));
- return PTR_ERR(cred);
- } else {
- if (cmpxchg(&mirror->cred, NULL, cred))
- put_rpccred(cred);
- }
- }
- return 0;
-}
-
static struct rpc_cred *
ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
{
@@ -386,7 +350,6 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
struct inode *ino = lseg->pls_layout->plh_inode;
struct nfs_server *s = NFS_SERVER(ino);
unsigned int max_payload;
- rpc_authflavor_t flavor;

if (!ff_layout_mirror_valid(lseg, mirror)) {
pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
@@ -402,9 +365,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
smp_rmb();
if (ds->ds_clp)
- goto out_update_creds;
-
- flavor = nfs4_ff_layout_choose_authflavor(mirror);
+ goto out;

/* FIXME: For now we assume the server sent only one version of NFS
* to use for the DS.
@@ -413,7 +374,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
dataserver_retrans,
mirror->mirror_ds->ds_versions[0].version,
mirror->mirror_ds->ds_versions[0].minor_version,
- flavor);
+ RPC_AUTH_UNIX);

/* connect success, check rsize/wsize limit */
if (ds->ds_clp) {
@@ -438,11 +399,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
} else
pnfs_error_mark_layout_for_return(ino, lseg);
ds = NULL;
- goto out;
}
-out_update_creds:
- if (ff_layout_update_mirror_cred(mirror, ds))
- ds = NULL;
out:
return ds;
}
--
2.5.5


2016-04-22 00:52:17

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v2 7/7] nfs: have flexfiles mirror keep creds for both ro and rw layouts

A mirror can be shared between multiple layouts, even with different
iomodes. That makes stats gathering simpler, but it causes a problem
when we get different creds in READ vs. RW layouts.

The current code drops the newer credentials onto the floor when this
occurs. That's problematic when you fetch a READ layout first, and then
a RW. If the READ layout doesn't have the correct creds to do a write,
then writes will fail.

We could just overwrite the READ credentials with the RW ones, but that
would break the ability for the server to fence the layout for reads if
things go awry. We need to be able to revert to the earlier READ creds
if the RW layout is returned afterward.

The simplest fix is to just keep two sets of creds per mirror. One for
READ layouts and one for RW, and then use the appropriate set depending
on the iomode of the layout segment.

Also fix up some RCU nits that sparse found.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 30 +++++++++++++++++++++++-------
fs/nfs/flexfilelayout/flexfilelayout.h | 3 ++-
fs/nfs/flexfilelayout/flexfilelayoutdev.c | 7 +++++--
3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 19806caa8ff2..61b27d99df18 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -213,10 +213,16 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)

static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
+ struct rpc_cred *cred;
+
ff_layout_remove_mirror(mirror);
kfree(mirror->fh_versions);
- if (mirror->cred)
- put_rpccred(mirror->cred);
+ cred = rcu_access_pointer(mirror->ro_cred);
+ if (cred)
+ put_rpccred(cred);
+ cred = rcu_access_pointer(mirror->rw_cred);
+ if (cred)
+ put_rpccred(cred);
nfs4_ff_layout_put_deviceid(mirror->mirror_ds);
kfree(mirror);
}
@@ -410,7 +416,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
struct nfs4_deviceid devid;
struct nfs4_deviceid_node *idnode;
struct auth_cred acred = { .group_info = ff_zero_group };
- struct rpc_cred *cred;
+ struct rpc_cred __rcu *cred;
u32 ds_count, fh_count, id;
int j;

@@ -501,23 +507,33 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
acred.gid = make_kgid(&init_user_ns, id);

/* find the cred for it */
- cred = rpc_lookup_generic_cred(&acred, 0, gfp_flags);
+ rcu_assign_pointer(cred, rpc_lookup_generic_cred(&acred, 0, gfp_flags));
if (IS_ERR(cred)) {
rc = PTR_ERR(cred);
goto out_err_free;
}

- rcu_assign_pointer(fls->mirror_array[i]->cred, cred);
+ if (lgr->range.iomode == IOMODE_READ)
+ rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
+ else
+ rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred);

mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
if (mirror != fls->mirror_array[i]) {
/* swap cred ptrs so free_mirror will clean up old */
- fls->mirror_array[i]->cred = xchg(&mirror->cred, cred);
+ if (lgr->range.iomode == IOMODE_READ) {
+ cred = xchg(&mirror->ro_cred, cred);
+ rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
+ } else {
+ cred = xchg(&mirror->rw_cred, cred);
+ rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred);
+ }
ff_layout_free_mirror(fls->mirror_array[i]);
fls->mirror_array[i] = mirror;
}

- dprintk("%s: uid %u gid %u\n", __func__,
+ dprintk("%s: iomode %s uid %u gid %u\n", __func__,
+ lgr->range.iomode == IOMODE_READ ? "READ" : "RW",
from_kuid(&init_user_ns, acred.uid),
from_kgid(&init_user_ns, acred.gid));
}
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index c29fc853ce74..1318c77aeb35 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -76,7 +76,8 @@ struct nfs4_ff_layout_mirror {
u32 fh_versions_cnt;
struct nfs_fh *fh_versions;
nfs4_stateid stateid;
- struct rpc_cred *cred;
+ struct rpc_cred __rcu *ro_cred;
+ struct rpc_cred __rcu *rw_cred;
atomic_t ref;
spinlock_t lock;
struct nfs4_ff_layoutstat read_stat;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 6ddd8a5c5ae0..56296f3df19c 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -305,9 +305,12 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
static struct rpc_cred *
ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
{
- struct rpc_cred *cred, **pcred;
+ struct rpc_cred *cred, __rcu **pcred;

- pcred = &mirror->cred;
+ if (iomode == IOMODE_READ)
+ pcred = &mirror->ro_cred;
+ else
+ pcred = &mirror->rw_cred;

rcu_read_lock();
do {
--
2.5.5


2016-04-27 18:00:40

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH v2 1/7] sunrpc: plumb gfp_t parm into crcreate operation

Hi Jeff,

On 04/21/2016 08:51 PM, Jeff Layton wrote:
> We need to be able to call the generic_cred creator from different
> contexts. Add a gfp_t parm to the crcreate operation and to
> rpcauth_lookup_credcache. For now, we just push the gfp_t parms up
> one level to the *_lookup_cred functions.
>
> Signed-off-by: Jeff Layton <[email protected]>
> ---
> include/linux/sunrpc/auth.h | 4 ++--
> net/sunrpc/auth.c | 4 ++--
> net/sunrpc/auth_generic.c | 6 +++---
> net/sunrpc/auth_gss/auth_gss.c | 6 +++---
> net/sunrpc/auth_unix.c | 6 +++---
> 5 files changed, 13 insertions(+), 13 deletions(-)
>
> diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
> index 6a241a277249..3b616aa7e4d2 100644
> --- a/include/linux/sunrpc/auth.h
> +++ b/include/linux/sunrpc/auth.h
> @@ -127,7 +127,7 @@ struct rpc_authops {
> void (*destroy)(struct rpc_auth *);
>
> struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int);
> - struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int);
> + struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int, gfp_t);
> int (*list_pseudoflavors)(rpc_authflavor_t *, int);
> rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *);
> int (*flavor2info)(rpc_authflavor_t,
> @@ -178,7 +178,7 @@ rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t,
> int rpcauth_get_gssinfo(rpc_authflavor_t,
> struct rpcsec_gss_info *);
> int rpcauth_list_flavors(rpc_authflavor_t *, int);
> -struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
> +struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int, gfp_t);
> void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
> struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int);
> struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int);
> diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
> index 02f53674dc39..e0bb30fd2ed3 100644
> --- a/net/sunrpc/auth.c
> +++ b/net/sunrpc/auth.c
> @@ -543,7 +543,7 @@ rpcauth_cache_enforce_limit(void)
> */
> struct rpc_cred *
> rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
> - int flags)
> + int flags, gfp_t gfp)
> {
> LIST_HEAD(free);
> struct rpc_cred_cache *cache = auth->au_credcache;
> @@ -580,7 +580,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
> if (flags & RPCAUTH_LOOKUP_RCU)
> return ERR_PTR(-ECHILD);
>
> - new = auth->au_ops->crcreate(auth, acred, flags);
> + new = auth->au_ops->crcreate(auth, acred, flags, gfp);
> if (IS_ERR(new)) {
> cred = new;
> goto out;
> diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
> index 41248b1820c7..6ed3e3df43e9 100644
> --- a/net/sunrpc/auth_generic.c
> +++ b/net/sunrpc/auth_generic.c
> @@ -77,15 +77,15 @@ static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
> static struct rpc_cred *
> generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> {
> - return rpcauth_lookup_credcache(&generic_auth, acred, flags);
> + return rpcauth_lookup_credcache(&generic_auth, acred, flags, GFP_KERNEL);
> }
>
> static struct rpc_cred *
> -generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> +generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
> {
> struct generic_cred *gcred;
>
> - gcred = kmalloc(sizeof(*gcred), GFP_KERNEL);
> + gcred = kmalloc(sizeof(*gcred), gfp);
> if (gcred == NULL)
> return ERR_PTR(-ENOMEM);
>
> diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
> index 15612ffa8d57..e64ae93d5b4f 100644
> --- a/net/sunrpc/auth_gss/auth_gss.c
> +++ b/net/sunrpc/auth_gss/auth_gss.c
> @@ -1299,11 +1299,11 @@ gss_destroy_cred(struct rpc_cred *cred)
> static struct rpc_cred *
> gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> {
> - return rpcauth_lookup_credcache(auth, acred, flags);
> + return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
> }

I know you're just preserving the original flags here, but with all the GFP_NOFS talk recently I was wondering just how important it is to keep using GFP_NOFS here?

Thanks,
Anna

>
> static struct rpc_cred *
> -gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> +gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
> {
> struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth);
> struct gss_cred *cred = NULL;
> @@ -1313,7 +1313,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> __func__, from_kuid(&init_user_ns, acred->uid),
> auth->au_flavor);
>
> - if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS)))
> + if (!(cred = kzalloc(sizeof(*cred), gfp)))
> goto out_err;
>
> rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops);
> diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
> index 0d3dd364c22f..9f65452b7cbc 100644
> --- a/net/sunrpc/auth_unix.c
> +++ b/net/sunrpc/auth_unix.c
> @@ -52,11 +52,11 @@ unx_destroy(struct rpc_auth *auth)
> static struct rpc_cred *
> unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> {
> - return rpcauth_lookup_credcache(auth, acred, flags);
> + return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
> }
>
> static struct rpc_cred *
> -unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> +unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
> {
> struct unx_cred *cred;
> unsigned int groups = 0;
> @@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> from_kuid(&init_user_ns, acred->uid),
> from_kgid(&init_user_ns, acred->gid));
>
> - if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS)))
> + if (!(cred = kmalloc(sizeof(*cred), gfp)))
> return ERR_PTR(-ENOMEM);
>
> rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
>


2016-04-27 18:36:24

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH v2 1/7] sunrpc: plumb gfp_t parm into crcreate operation

On Wed, 2016-04-27 at 14:00 -0400, Anna Schumaker wrote:
> Hi Jeff,
>
> On 04/21/2016 08:51 PM, Jeff Layton wrote:
> >
> > We need to be able to call the generic_cred creator from different
> > contexts. Add a gfp_t parm to the crcreate operation and to
> > rpcauth_lookup_credcache. For now, we just push the gfp_t parms up
> > one level to the *_lookup_cred functions.
> >
> > Signed-off-by: Jeff Layton <[email protected]>
> > ---
> >  include/linux/sunrpc/auth.h    | 4 ++--
> >  net/sunrpc/auth.c              | 4 ++--
> >  net/sunrpc/auth_generic.c      | 6 +++---
> >  net/sunrpc/auth_gss/auth_gss.c | 6 +++---
> >  net/sunrpc/auth_unix.c         | 6 +++---
> >  5 files changed, 13 insertions(+), 13 deletions(-)
> >
> > diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
> > index 6a241a277249..3b616aa7e4d2 100644
> > --- a/include/linux/sunrpc/auth.h
> > +++ b/include/linux/sunrpc/auth.h
> > @@ -127,7 +127,7 @@ struct rpc_authops {
> >   void (*destroy)(struct rpc_auth *);
> >  
> >   struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int);
> > - struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int);
> > + struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int, gfp_t);
> >   int (*list_pseudoflavors)(rpc_authflavor_t *, int);
> >   rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *);
> >   int (*flavor2info)(rpc_authflavor_t,
> > @@ -178,7 +178,7 @@ rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t,
> >  int rpcauth_get_gssinfo(rpc_authflavor_t,
> >   struct rpcsec_gss_info *);
> >  int rpcauth_list_flavors(rpc_authflavor_t *, int);
> > -struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
> > +struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int, gfp_t);
> >  void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
> >  struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int);
> >  struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int);
> > diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
> > index 02f53674dc39..e0bb30fd2ed3 100644
> > --- a/net/sunrpc/auth.c
> > +++ b/net/sunrpc/auth.c
> > @@ -543,7 +543,7 @@ rpcauth_cache_enforce_limit(void)
> >   */
> >  struct rpc_cred *
> >  rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
> > - int flags)
> > + int flags, gfp_t gfp)
> >  {
> >   LIST_HEAD(free);
> >   struct rpc_cred_cache *cache = auth->au_credcache;
> > @@ -580,7 +580,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
> >   if (flags & RPCAUTH_LOOKUP_RCU)
> >   return ERR_PTR(-ECHILD);
> >  
> > - new = auth->au_ops->crcreate(auth, acred, flags);
> > + new = auth->au_ops->crcreate(auth, acred, flags, gfp);
> >   if (IS_ERR(new)) {
> >   cred = new;
> >   goto out;
> > diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
> > index 41248b1820c7..6ed3e3df43e9 100644
> > --- a/net/sunrpc/auth_generic.c
> > +++ b/net/sunrpc/auth_generic.c
> > @@ -77,15 +77,15 @@ static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
> >  static struct rpc_cred *
> >  generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> >  {
> > - return rpcauth_lookup_credcache(&generic_auth, acred, flags);
> > + return rpcauth_lookup_credcache(&generic_auth, acred, flags, GFP_KERNEL);
> >  }
> >  
> >  static struct rpc_cred *
> > -generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> > +generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
> >  {
> >   struct generic_cred *gcred;
> >  
> > - gcred = kmalloc(sizeof(*gcred), GFP_KERNEL);
> > + gcred = kmalloc(sizeof(*gcred), gfp);
> >   if (gcred == NULL)
> >   return ERR_PTR(-ENOMEM);
> >  
> > diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
> > index 15612ffa8d57..e64ae93d5b4f 100644
> > --- a/net/sunrpc/auth_gss/auth_gss.c
> > +++ b/net/sunrpc/auth_gss/auth_gss.c
> > @@ -1299,11 +1299,11 @@ gss_destroy_cred(struct rpc_cred *cred)
> >  static struct rpc_cred *
> >  gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> >  {
> > - return rpcauth_lookup_credcache(auth, acred, flags);
> > + return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
> >  }
> I know you're just preserving the original flags here, but with all the GFP_NOFS talk recently I was wondering just how important it is to keep using GFP_NOFS here?
>
> Thanks,
> Anna
>

(cc'ing Michal in case he has thoughts...)

TBH, I rolled this patch before the summit, so I hadn't considered it. 

AFAIK, it's using GFP_NOFS there to ensure that the cred allocations
don't recurse back into NFS writeback. I don't see anything that would
prevent that if we switched these to GFP_KERNEL without making other
changes.

Note too that I think the NFS case will be quite a bit more complex
than something like XFS. We offload most of the processing to the RPC
state machine, which runs in workqueue context. So even if you set
something like PF_MEMALLOC in at the time you call writepages, you
likely won't have it set when the state machine is actually running.

I think if we want to stop using GFP_NOFS, then we'll need a more
comprehensive plan for NFS/RPC. I don't think we should embark down
that path until the direction is a bit more clear.

> >
> >  
> >  static struct rpc_cred *
> > -gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> > +gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
> >  {
> >   struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth);
> >   struct gss_cred *cred = NULL;
> > @@ -1313,7 +1313,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> >   __func__, from_kuid(&init_user_ns, acred->uid),
> >   auth->au_flavor);
> >  
> > - if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS)))
> > + if (!(cred = kzalloc(sizeof(*cred), gfp)))
> >   goto out_err;
> >  
> >   rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops);
> > diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
> > index 0d3dd364c22f..9f65452b7cbc 100644
> > --- a/net/sunrpc/auth_unix.c
> > +++ b/net/sunrpc/auth_unix.c
> > @@ -52,11 +52,11 @@ unx_destroy(struct rpc_auth *auth)
> >  static struct rpc_cred *
> >  unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> >  {
> > - return rpcauth_lookup_credcache(auth, acred, flags);
> > + return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
> >  }
> >  
> >  static struct rpc_cred *
> > -unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> > +unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
> >  {
> >   struct unx_cred *cred;
> >   unsigned int groups = 0;
> > @@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
> >   from_kuid(&init_user_ns, acred->uid),
> >   from_kgid(&init_user_ns, acred->gid));
> >  
> > - if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS)))
> > + if (!(cred = kmalloc(sizeof(*cred), gfp)))
> >   return ERR_PTR(-ENOMEM);
> >  
> >   rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
> >
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Jeff Layton <[email protected]>

2016-04-28 20:37:26

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH v2 6/7] nfs: get a reference to the credential in ff_layout_alloc_lseg

Hi Jeff,

It looks like this patch removes the mirror->uid and mirror->gid parameters that your earlier patch (nfs: don't match flexfiles mirrors that have different creds) makes use of. Should that patch be updated, or does this series make that patch obsolete?

Thanks,
Anna

On 04/21/2016 08:51 PM, Jeff Layton wrote:
> We're just as likely to have allocation problems here as we would if we
> delay looking up the credential like we currently do. Fix the code to
> get a rpc_cred reference early, as soon as the mirror is set up.
>
> This allows us to eliminate the mirror early if there is a problem
> getting an rpc credential. This also allows us to drop the uid/gid
> from the layout_mirror struct as well.
>
> In the event that we find an existing mirror where this one would go, we
> swap in the new creds unconditionally, and drop the reference to the old
> one.
>
> Note that the old ff_layout_update_mirror_cred function wouldn't set
> this pointer unless the DS version was 3, but we don't know what the DS
> version is at this point. I'm a little unclear on why it did that as you
> still need creds to talk to v4 servers as well. I have the code set
> it regardless of the DS version here.
>
> Also note the change to using generic creds instead of calling
> lookup_cred directly. With that change, we also need to populate the
> group_info pointer in the acred as some functions expect that to never
> be NULL. Instead of allocating one every time however, we can allocate
> one when the module is loaded and share it since the group_info is
> refcounted.
>
> Signed-off-by: Jeff Layton <[email protected]>
> ---
> fs/nfs/flexfilelayout/flexfilelayout.c | 41 ++++++++++++++++++++++-----
> fs/nfs/flexfilelayout/flexfilelayout.h | 2 --
> fs/nfs/flexfilelayout/flexfilelayoutdev.c | 47 ++-----------------------------
> 3 files changed, 36 insertions(+), 54 deletions(-)
>
> diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
> index 432d5b0009c7..19806caa8ff2 100644
> --- a/fs/nfs/flexfilelayout/flexfilelayout.c
> +++ b/fs/nfs/flexfilelayout/flexfilelayout.c
> @@ -26,6 +26,8 @@
>
> #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ)
>
> +static struct group_info *ff_zero_group;
> +
> static struct pnfs_layout_hdr *
> ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
> {
> @@ -407,8 +409,9 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
> struct nfs4_ff_layout_mirror *mirror;
> struct nfs4_deviceid devid;
> struct nfs4_deviceid_node *idnode;
> - u32 ds_count;
> - u32 fh_count;
> + struct auth_cred acred = { .group_info = ff_zero_group };
> + struct rpc_cred *cred;
> + u32 ds_count, fh_count, id;
> int j;
>
> rc = -EIO;
> @@ -484,24 +487,39 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
> fls->mirror_array[i]->fh_versions_cnt = fh_count;
>
> /* user */
> - rc = decode_name(&stream, &fls->mirror_array[i]->uid);
> + rc = decode_name(&stream, &id);
> if (rc)
> goto out_err_free;
>
> + acred.uid = make_kuid(&init_user_ns, id);
> +
> /* group */
> - rc = decode_name(&stream, &fls->mirror_array[i]->gid);
> + rc = decode_name(&stream, &id);
> if (rc)
> goto out_err_free;
>
> + acred.gid = make_kgid(&init_user_ns, id);
> +
> + /* find the cred for it */
> + cred = rpc_lookup_generic_cred(&acred, 0, gfp_flags);
> + if (IS_ERR(cred)) {
> + rc = PTR_ERR(cred);
> + goto out_err_free;
> + }
> +
> + rcu_assign_pointer(fls->mirror_array[i]->cred, cred);
> +
> mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
> if (mirror != fls->mirror_array[i]) {
> + /* swap cred ptrs so free_mirror will clean up old */
> + fls->mirror_array[i]->cred = xchg(&mirror->cred, cred);
> ff_layout_free_mirror(fls->mirror_array[i]);
> fls->mirror_array[i] = mirror;
> }
>
> - dprintk("%s: uid %d gid %d\n", __func__,
> - fls->mirror_array[i]->uid,
> - fls->mirror_array[i]->gid);
> + dprintk("%s: uid %u gid %u\n", __func__,
> + from_kuid(&init_user_ns, acred.uid),
> + from_kgid(&init_user_ns, acred.gid));
> }
>
> p = xdr_inline_decode(&stream, 4);
> @@ -2226,6 +2244,11 @@ static int __init nfs4flexfilelayout_init(void)
> {
> printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n",
> __func__);
> + if (!ff_zero_group) {
> + ff_zero_group = groups_alloc(0);
> + if (!ff_zero_group)
> + return -ENOMEM;
> + }
> return pnfs_register_layoutdriver(&flexfilelayout_type);
> }
>
> @@ -2234,6 +2257,10 @@ static void __exit nfs4flexfilelayout_exit(void)
> printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n",
> __func__);
> pnfs_unregister_layoutdriver(&flexfilelayout_type);
> + if (ff_zero_group) {
> + put_group_info(ff_zero_group);
> + ff_zero_group = NULL;
> + }
> }
>
> MODULE_ALIAS("nfs-layouttype4-4");
> diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
> index dd353bb7dc0a..c29fc853ce74 100644
> --- a/fs/nfs/flexfilelayout/flexfilelayout.h
> +++ b/fs/nfs/flexfilelayout/flexfilelayout.h
> @@ -76,8 +76,6 @@ struct nfs4_ff_layout_mirror {
> u32 fh_versions_cnt;
> struct nfs_fh *fh_versions;
> nfs4_stateid stateid;
> - u32 uid;
> - u32 gid;
> struct rpc_cred *cred;
> atomic_t ref;
> spinlock_t lock;
> diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> index baee22929174..6ddd8a5c5ae0 100644
> --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> @@ -302,42 +302,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
> return 0;
> }
>
> -/* currently we only support AUTH_NONE and AUTH_SYS */
> -static rpc_authflavor_t
> -nfs4_ff_layout_choose_authflavor(struct nfs4_ff_layout_mirror *mirror)
> -{
> - if (mirror->uid == (u32)-1)
> - return RPC_AUTH_NULL;
> - return RPC_AUTH_UNIX;
> -}
> -
> -/* fetch cred for NFSv3 DS */
> -static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror,
> - struct nfs4_pnfs_ds *ds)
> -{
> - if (ds->ds_clp && !mirror->cred &&
> - mirror->mirror_ds->ds_versions[0].version == 3) {
> - struct rpc_auth *auth = ds->ds_clp->cl_rpcclient->cl_auth;
> - struct rpc_cred *cred;
> - struct auth_cred acred = {
> - .uid = make_kuid(&init_user_ns, mirror->uid),
> - .gid = make_kgid(&init_user_ns, mirror->gid),
> - };
> -
> - /* AUTH_NULL ignores acred */
> - cred = auth->au_ops->lookup_cred(auth, &acred, 0);
> - if (IS_ERR(cred)) {
> - dprintk("%s: lookup_cred failed with %ld\n",
> - __func__, PTR_ERR(cred));
> - return PTR_ERR(cred);
> - } else {
> - if (cmpxchg(&mirror->cred, NULL, cred))
> - put_rpccred(cred);
> - }
> - }
> - return 0;
> -}
> -
> static struct rpc_cred *
> ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
> {
> @@ -386,7 +350,6 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
> struct inode *ino = lseg->pls_layout->plh_inode;
> struct nfs_server *s = NFS_SERVER(ino);
> unsigned int max_payload;
> - rpc_authflavor_t flavor;
>
> if (!ff_layout_mirror_valid(lseg, mirror)) {
> pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
> @@ -402,9 +365,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
> /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
> smp_rmb();
> if (ds->ds_clp)
> - goto out_update_creds;
> -
> - flavor = nfs4_ff_layout_choose_authflavor(mirror);
> + goto out;
>
> /* FIXME: For now we assume the server sent only one version of NFS
> * to use for the DS.
> @@ -413,7 +374,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
> dataserver_retrans,
> mirror->mirror_ds->ds_versions[0].version,
> mirror->mirror_ds->ds_versions[0].minor_version,
> - flavor);
> + RPC_AUTH_UNIX);
>
> /* connect success, check rsize/wsize limit */
> if (ds->ds_clp) {
> @@ -438,11 +399,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
> } else
> pnfs_error_mark_layout_for_return(ino, lseg);
> ds = NULL;
> - goto out;
> }
> -out_update_creds:
> - if (ff_layout_update_mirror_cred(mirror, ds))
> - ds = NULL;
> out:
> return ds;
> }
>


2016-04-29 10:49:53

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH v2 6/7] nfs: get a reference to the credential in ff_layout_alloc_lseg

Oh! The original patch should be dropped. I should have sent a self-NAK
on it before. I'll do that now...

Thanks,
Jeff

On Thu, 2016-04-28 at 16:37 -0400, Anna Schumaker wrote:
> Hi Jeff,
>
> It looks like this patch removes the mirror->uid and mirror->gid
> parameters that your earlier patch (nfs: don't match flexfiles
> mirrors that have different creds) makes use of.  Should that patch
> be updated, or does this series make that patch obsolete?
>
> Thanks,
> Anna
>
> On 04/21/2016 08:51 PM, Jeff Layton wrote:
> > We're just as likely to have allocation problems here as we would
> > if we
> > delay looking up the credential like we currently do. Fix the code
> > to
> > get a rpc_cred reference early, as soon as the mirror is set up.
> >
> > This allows us to eliminate the mirror early if there is a problem
> > getting an rpc credential. This also allows us to drop the uid/gid
> > from the layout_mirror struct as well.
> >
> > In the event that we find an existing mirror where this one would
> > go, we
> > swap in the new creds unconditionally, and drop the reference to
> > the old
> > one.
> >
> > Note that the old ff_layout_update_mirror_cred function wouldn't
> > set
> > this pointer unless the DS version was 3, but we don't know what
> > the DS
> > version is at this point. I'm a little unclear on why it did that
> > as you
> > still need creds to talk to v4 servers as well. I have the code set
> > it regardless of the DS version here.
> >
> > Also note the change to using generic creds instead of calling
> > lookup_cred directly. With that change, we also need to populate
> > the
> > group_info pointer in the acred as some functions expect that to
> > never
> > be NULL. Instead of allocating one every time however, we can
> > allocate
> > one when the module is loaded and share it since the group_info is
> > refcounted.
> >
> > Signed-off-by: Jeff Layton <[email protected]>
> > ---
> >  fs/nfs/flexfilelayout/flexfilelayout.c    | 41
> > ++++++++++++++++++++++-----
> >  fs/nfs/flexfilelayout/flexfilelayout.h    |  2 --
> >  fs/nfs/flexfilelayout/flexfilelayoutdev.c | 47 ++-----------------
> > ------------
> >  3 files changed, 36 insertions(+), 54 deletions(-)
> >
> > diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c
> > b/fs/nfs/flexfilelayout/flexfilelayout.c
> > index 432d5b0009c7..19806caa8ff2 100644
> > --- a/fs/nfs/flexfilelayout/flexfilelayout.c
> > +++ b/fs/nfs/flexfilelayout/flexfilelayout.c
> > @@ -26,6 +26,8 @@
> >  
> >  #define FF_LAYOUT_POLL_RETRY_MAX     (15*HZ)
> >  
> > +static struct group_info *ff_zero_group;
> > +
> >  static struct pnfs_layout_hdr *
> >  ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
> >  {
> > @@ -407,8 +409,9 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr
> > *lh,
> >   struct nfs4_ff_layout_mirror *mirror;
> >   struct nfs4_deviceid devid;
> >   struct nfs4_deviceid_node *idnode;
> > - u32 ds_count;
> > - u32 fh_count;
> > + struct auth_cred acred = { .group_info =
> > ff_zero_group };
> > + struct rpc_cred *cred;
> > + u32 ds_count, fh_count, id;
> >   int j;
> >  
> >   rc = -EIO;
> > @@ -484,24 +487,39 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr
> > *lh,
> >   fls->mirror_array[i]->fh_versions_cnt = fh_count;
> >  
> >   /* user */
> > - rc = decode_name(&stream, &fls->mirror_array[i]-
> > >uid);
> > + rc = decode_name(&stream, &id);
> >   if (rc)
> >   goto out_err_free;
> >  
> > + acred.uid = make_kuid(&init_user_ns, id);
> > +
> >   /* group */
> > - rc = decode_name(&stream, &fls->mirror_array[i]-
> > >gid);
> > + rc = decode_name(&stream, &id);
> >   if (rc)
> >   goto out_err_free;
> >  
> > + acred.gid = make_kgid(&init_user_ns, id);
> > +
> > + /* find the cred for it */
> > + cred = rpc_lookup_generic_cred(&acred, 0,
> > gfp_flags);
> > + if (IS_ERR(cred)) {
> > + rc = PTR_ERR(cred);
> > + goto out_err_free;
> > + }
> > +
> > + rcu_assign_pointer(fls->mirror_array[i]->cred,
> > cred);
> > +
> >   mirror = ff_layout_add_mirror(lh, fls-
> > >mirror_array[i]);
> >   if (mirror != fls->mirror_array[i]) {
> > + /* swap cred ptrs so free_mirror will
> > clean up old */
> > + fls->mirror_array[i]->cred = xchg(&mirror-
> > >cred, cred);
> >   ff_layout_free_mirror(fls-
> > >mirror_array[i]);
> >   fls->mirror_array[i] = mirror;
> >   }
> >  
> > - dprintk("%s: uid %d gid %d\n", __func__,
> > - fls->mirror_array[i]->uid,
> > - fls->mirror_array[i]->gid);
> > + dprintk("%s: uid %u gid %u\n", __func__,
> > + from_kuid(&init_user_ns, acred.uid),
> > + from_kgid(&init_user_ns, acred.gid));
> >   }
> >  
> >   p = xdr_inline_decode(&stream, 4);
> > @@ -2226,6 +2244,11 @@ static int __init
> > nfs4flexfilelayout_init(void)
> >  {
> >   printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver
> > Registering...\n",
> >          __func__);
> > + if (!ff_zero_group) {
> > + ff_zero_group = groups_alloc(0);
> > + if (!ff_zero_group)
> > + return -ENOMEM;
> > + }
> >   return pnfs_register_layoutdriver(&flexfilelayout_type);
> >  }
> >  
> > @@ -2234,6 +2257,10 @@ static void __exit
> > nfs4flexfilelayout_exit(void)
> >   printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver
> > Unregistering...\n",
> >          __func__);
> >   pnfs_unregister_layoutdriver(&flexfilelayout_type);
> > + if (ff_zero_group) {
> > + put_group_info(ff_zero_group);
> > + ff_zero_group = NULL;
> > + }
> >  }
> >  
> >  MODULE_ALIAS("nfs-layouttype4-4");
> > diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h
> > b/fs/nfs/flexfilelayout/flexfilelayout.h
> > index dd353bb7dc0a..c29fc853ce74 100644
> > --- a/fs/nfs/flexfilelayout/flexfilelayout.h
> > +++ b/fs/nfs/flexfilelayout/flexfilelayout.h
> > @@ -76,8 +76,6 @@ struct nfs4_ff_layout_mirror {
> >   u32 fh_versions_cnt;
> >   struct nfs_fh *fh_versions;
> >   nfs4_stateid stateid;
> > - u32 uid;
> > - u32 gid;
> >   struct rpc_cred *cred;
> >   atomic_t ref;
> >   spinlock_t lock;
> > diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> > b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> > index baee22929174..6ddd8a5c5ae0 100644
> > --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> > +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
> > @@ -302,42 +302,6 @@ int ff_layout_track_ds_error(struct
> > nfs4_flexfile_layout *flo,
> >   return 0;
> >  }
> >  
> > -/* currently we only support AUTH_NONE and AUTH_SYS */
> > -static rpc_authflavor_t
> > -nfs4_ff_layout_choose_authflavor(struct nfs4_ff_layout_mirror
> > *mirror)
> > -{
> > - if (mirror->uid == (u32)-1)
> > - return RPC_AUTH_NULL;
> > - return RPC_AUTH_UNIX;
> > -}
> > -
> > -/* fetch cred for NFSv3 DS */
> > -static int ff_layout_update_mirror_cred(struct
> > nfs4_ff_layout_mirror *mirror,
> > -       struct nfs4_pnfs_ds *ds)
> > -{
> > - if (ds->ds_clp && !mirror->cred &&
> > -     mirror->mirror_ds->ds_versions[0].version == 3) {
> > - struct rpc_auth *auth = ds->ds_clp->cl_rpcclient-
> > >cl_auth;
> > - struct rpc_cred *cred;
> > - struct auth_cred acred = {
> > - .uid = make_kuid(&init_user_ns, mirror-
> > >uid),
> > - .gid = make_kgid(&init_user_ns, mirror-
> > >gid),
> > - };
> > -
> > - /* AUTH_NULL ignores acred */
> > - cred = auth->au_ops->lookup_cred(auth, &acred, 0);
> > - if (IS_ERR(cred)) {
> > - dprintk("%s: lookup_cred failed with
> > %ld\n",
> > - __func__, PTR_ERR(cred));
> > - return PTR_ERR(cred);
> > - } else {
> > - if (cmpxchg(&mirror->cred, NULL, cred))
> > - put_rpccred(cred);
> > - }
> > - }
> > - return 0;
> > -}
> > -
> >  static struct rpc_cred *
> >  ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror,
> > u32 iomode)
> >  {
> > @@ -386,7 +350,6 @@ nfs4_ff_layout_prepare_ds(struct
> > pnfs_layout_segment *lseg, u32 ds_idx,
> >   struct inode *ino = lseg->pls_layout->plh_inode;
> >   struct nfs_server *s = NFS_SERVER(ino);
> >   unsigned int max_payload;
> > - rpc_authflavor_t flavor;
> >  
> >   if (!ff_layout_mirror_valid(lseg, mirror)) {
> >   pr_err_ratelimited("NFS: %s: No data server for
> > offset index %d\n",
> > @@ -402,9 +365,7 @@ nfs4_ff_layout_prepare_ds(struct
> > pnfs_layout_segment *lseg, u32 ds_idx,
> >   /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
> >   smp_rmb();
> >   if (ds->ds_clp)
> > - goto out_update_creds;
> > -
> > - flavor = nfs4_ff_layout_choose_authflavor(mirror);
> > + goto out;
> >  
> >   /* FIXME: For now we assume the server sent only one
> > version of NFS
> >    * to use for the DS.
> > @@ -413,7 +374,7 @@ nfs4_ff_layout_prepare_ds(struct
> > pnfs_layout_segment *lseg, u32 ds_idx,
> >        dataserver_retrans,
> >        mirror->mirror_ds-
> > >ds_versions[0].version,
> >        mirror->mirror_ds-
> > >ds_versions[0].minor_version,
> > -      flavor);
> > +      RPC_AUTH_UNIX);
> >  
> >   /* connect success, check rsize/wsize limit */
> >   if (ds->ds_clp) {
> > @@ -438,11 +399,7 @@ nfs4_ff_layout_prepare_ds(struct
> > pnfs_layout_segment *lseg, u32 ds_idx,
> >   } else
> >   pnfs_error_mark_layout_for_return(ino,
> > lseg);
> >   ds = NULL;
> > - goto out;
> >   }
> > -out_update_creds:
> > - if (ff_layout_update_mirror_cred(mirror, ds))
> > - ds = NULL;
> >  out:
> >   return ds;
> >  }
> >
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs"
> in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

2016-04-29 12:58:35

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH v2 6/7] nfs: get a reference to the credential in ff_layout_alloc_lseg

On 04/29/2016 06:49 AM, Jeff Layton wrote:
> Oh! The original patch should be dropped. I should have sent a self-NAK
> on it before. I'll do that now...

Great, thanks!

Anna

>
> Thanks,
> Jeff
>
> On Thu, 2016-04-28 at 16:37 -0400, Anna Schumaker wrote:
>> Hi Jeff,
>>
>> It looks like this patch removes the mirror->uid and mirror->gid
>> parameters that your earlier patch (nfs: don't match flexfiles
>> mirrors that have different creds) makes use of. Should that patch
>> be updated, or does this series make that patch obsolete?
>>
>> Thanks,
>> Anna
>>
>> On 04/21/2016 08:51 PM, Jeff Layton wrote:
>>> We're just as likely to have allocation problems here as we would
>>> if we
>>> delay looking up the credential like we currently do. Fix the code
>>> to
>>> get a rpc_cred reference early, as soon as the mirror is set up.
>>>
>>> This allows us to eliminate the mirror early if there is a problem
>>> getting an rpc credential. This also allows us to drop the uid/gid
>>> from the layout_mirror struct as well.
>>>
>>> In the event that we find an existing mirror where this one would
>>> go, we
>>> swap in the new creds unconditionally, and drop the reference to
>>> the old
>>> one.
>>>
>>> Note that the old ff_layout_update_mirror_cred function wouldn't
>>> set
>>> this pointer unless the DS version was 3, but we don't know what
>>> the DS
>>> version is at this point. I'm a little unclear on why it did that
>>> as you
>>> still need creds to talk to v4 servers as well. I have the code set
>>> it regardless of the DS version here.
>>>
>>> Also note the change to using generic creds instead of calling
>>> lookup_cred directly. With that change, we also need to populate
>>> the
>>> group_info pointer in the acred as some functions expect that to
>>> never
>>> be NULL. Instead of allocating one every time however, we can
>>> allocate
>>> one when the module is loaded and share it since the group_info is
>>> refcounted.
>>>
>>> Signed-off-by: Jeff Layton <[email protected]>
>>> ---
>>> fs/nfs/flexfilelayout/flexfilelayout.c | 41
>>> ++++++++++++++++++++++-----
>>> fs/nfs/flexfilelayout/flexfilelayout.h | 2 --
>>> fs/nfs/flexfilelayout/flexfilelayoutdev.c | 47 ++-----------------
>>> ------------
>>> 3 files changed, 36 insertions(+), 54 deletions(-)
>>>
>>> diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c
>>> b/fs/nfs/flexfilelayout/flexfilelayout.c
>>> index 432d5b0009c7..19806caa8ff2 100644
>>> --- a/fs/nfs/flexfilelayout/flexfilelayout.c
>>> +++ b/fs/nfs/flexfilelayout/flexfilelayout.c
>>> @@ -26,6 +26,8 @@
>>>
>>> #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ)
>>>
>>> +static struct group_info *ff_zero_group;
>>> +
>>> static struct pnfs_layout_hdr *
>>> ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
>>> {
>>> @@ -407,8 +409,9 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr
>>> *lh,
>>> struct nfs4_ff_layout_mirror *mirror;
>>> struct nfs4_deviceid devid;
>>> struct nfs4_deviceid_node *idnode;
>>> - u32 ds_count;
>>> - u32 fh_count;
>>> + struct auth_cred acred = { .group_info =
>>> ff_zero_group };
>>> + struct rpc_cred *cred;
>>> + u32 ds_count, fh_count, id;
>>> int j;
>>>
>>> rc = -EIO;
>>> @@ -484,24 +487,39 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr
>>> *lh,
>>> fls->mirror_array[i]->fh_versions_cnt = fh_count;
>>>
>>> /* user */
>>> - rc = decode_name(&stream, &fls->mirror_array[i]-
>>>> uid);
>>> + rc = decode_name(&stream, &id);
>>> if (rc)
>>> goto out_err_free;
>>>
>>> + acred.uid = make_kuid(&init_user_ns, id);
>>> +
>>> /* group */
>>> - rc = decode_name(&stream, &fls->mirror_array[i]-
>>>> gid);
>>> + rc = decode_name(&stream, &id);
>>> if (rc)
>>> goto out_err_free;
>>>
>>> + acred.gid = make_kgid(&init_user_ns, id);
>>> +
>>> + /* find the cred for it */
>>> + cred = rpc_lookup_generic_cred(&acred, 0,
>>> gfp_flags);
>>> + if (IS_ERR(cred)) {
>>> + rc = PTR_ERR(cred);
>>> + goto out_err_free;
>>> + }
>>> +
>>> + rcu_assign_pointer(fls->mirror_array[i]->cred,
>>> cred);
>>> +
>>> mirror = ff_layout_add_mirror(lh, fls-
>>>> mirror_array[i]);
>>> if (mirror != fls->mirror_array[i]) {
>>> + /* swap cred ptrs so free_mirror will
>>> clean up old */
>>> + fls->mirror_array[i]->cred = xchg(&mirror-
>>>> cred, cred);
>>> ff_layout_free_mirror(fls-
>>>> mirror_array[i]);
>>> fls->mirror_array[i] = mirror;
>>> }
>>>
>>> - dprintk("%s: uid %d gid %d\n", __func__,
>>> - fls->mirror_array[i]->uid,
>>> - fls->mirror_array[i]->gid);
>>> + dprintk("%s: uid %u gid %u\n", __func__,
>>> + from_kuid(&init_user_ns, acred.uid),
>>> + from_kgid(&init_user_ns, acred.gid));
>>> }
>>>
>>> p = xdr_inline_decode(&stream, 4);
>>> @@ -2226,6 +2244,11 @@ static int __init
>>> nfs4flexfilelayout_init(void)
>>> {
>>> printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver
>>> Registering...\n",
>>> __func__);
>>> + if (!ff_zero_group) {
>>> + ff_zero_group = groups_alloc(0);
>>> + if (!ff_zero_group)
>>> + return -ENOMEM;
>>> + }
>>> return pnfs_register_layoutdriver(&flexfilelayout_type);
>>> }
>>>
>>> @@ -2234,6 +2257,10 @@ static void __exit
>>> nfs4flexfilelayout_exit(void)
>>> printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver
>>> Unregistering...\n",
>>> __func__);
>>> pnfs_unregister_layoutdriver(&flexfilelayout_type);
>>> + if (ff_zero_group) {
>>> + put_group_info(ff_zero_group);
>>> + ff_zero_group = NULL;
>>> + }
>>> }
>>>
>>> MODULE_ALIAS("nfs-layouttype4-4");
>>> diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h
>>> b/fs/nfs/flexfilelayout/flexfilelayout.h
>>> index dd353bb7dc0a..c29fc853ce74 100644
>>> --- a/fs/nfs/flexfilelayout/flexfilelayout.h
>>> +++ b/fs/nfs/flexfilelayout/flexfilelayout.h
>>> @@ -76,8 +76,6 @@ struct nfs4_ff_layout_mirror {
>>> u32 fh_versions_cnt;
>>> struct nfs_fh *fh_versions;
>>> nfs4_stateid stateid;
>>> - u32 uid;
>>> - u32 gid;
>>> struct rpc_cred *cred;
>>> atomic_t ref;
>>> spinlock_t lock;
>>> diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
>>> b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
>>> index baee22929174..6ddd8a5c5ae0 100644
>>> --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
>>> +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
>>> @@ -302,42 +302,6 @@ int ff_layout_track_ds_error(struct
>>> nfs4_flexfile_layout *flo,
>>> return 0;
>>> }
>>>
>>> -/* currently we only support AUTH_NONE and AUTH_SYS */
>>> -static rpc_authflavor_t
>>> -nfs4_ff_layout_choose_authflavor(struct nfs4_ff_layout_mirror
>>> *mirror)
>>> -{
>>> - if (mirror->uid == (u32)-1)
>>> - return RPC_AUTH_NULL;
>>> - return RPC_AUTH_UNIX;
>>> -}
>>> -
>>> -/* fetch cred for NFSv3 DS */
>>> -static int ff_layout_update_mirror_cred(struct
>>> nfs4_ff_layout_mirror *mirror,
>>> - struct nfs4_pnfs_ds *ds)
>>> -{
>>> - if (ds->ds_clp && !mirror->cred &&
>>> - mirror->mirror_ds->ds_versions[0].version == 3) {
>>> - struct rpc_auth *auth = ds->ds_clp->cl_rpcclient-
>>>> cl_auth;
>>> - struct rpc_cred *cred;
>>> - struct auth_cred acred = {
>>> - .uid = make_kuid(&init_user_ns, mirror-
>>>> uid),
>>> - .gid = make_kgid(&init_user_ns, mirror-
>>>> gid),
>>> - };
>>> -
>>> - /* AUTH_NULL ignores acred */
>>> - cred = auth->au_ops->lookup_cred(auth, &acred, 0);
>>> - if (IS_ERR(cred)) {
>>> - dprintk("%s: lookup_cred failed with
>>> %ld\n",
>>> - __func__, PTR_ERR(cred));
>>> - return PTR_ERR(cred);
>>> - } else {
>>> - if (cmpxchg(&mirror->cred, NULL, cred))
>>> - put_rpccred(cred);
>>> - }
>>> - }
>>> - return 0;
>>> -}
>>> -
>>> static struct rpc_cred *
>>> ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror,
>>> u32 iomode)
>>> {
>>> @@ -386,7 +350,6 @@ nfs4_ff_layout_prepare_ds(struct
>>> pnfs_layout_segment *lseg, u32 ds_idx,
>>> struct inode *ino = lseg->pls_layout->plh_inode;
>>> struct nfs_server *s = NFS_SERVER(ino);
>>> unsigned int max_payload;
>>> - rpc_authflavor_t flavor;
>>>
>>> if (!ff_layout_mirror_valid(lseg, mirror)) {
>>> pr_err_ratelimited("NFS: %s: No data server for
>>> offset index %d\n",
>>> @@ -402,9 +365,7 @@ nfs4_ff_layout_prepare_ds(struct
>>> pnfs_layout_segment *lseg, u32 ds_idx,
>>> /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
>>> smp_rmb();
>>> if (ds->ds_clp)
>>> - goto out_update_creds;
>>> -
>>> - flavor = nfs4_ff_layout_choose_authflavor(mirror);
>>> + goto out;
>>>
>>> /* FIXME: For now we assume the server sent only one
>>> version of NFS
>>> * to use for the DS.
>>> @@ -413,7 +374,7 @@ nfs4_ff_layout_prepare_ds(struct
>>> pnfs_layout_segment *lseg, u32 ds_idx,
>>> dataserver_retrans,
>>> mirror->mirror_ds-
>>>> ds_versions[0].version,
>>> mirror->mirror_ds-
>>>> ds_versions[0].minor_version,
>>> - flavor);
>>> + RPC_AUTH_UNIX);
>>>
>>> /* connect success, check rsize/wsize limit */
>>> if (ds->ds_clp) {
>>> @@ -438,11 +399,7 @@ nfs4_ff_layout_prepare_ds(struct
>>> pnfs_layout_segment *lseg, u32 ds_idx,
>>> } else
>>> pnfs_error_mark_layout_for_return(ino,
>>> lseg);
>>> ds = NULL;
>>> - goto out;
>>> }
>>> -out_update_creds:
>>> - if (ff_layout_update_mirror_cred(mirror, ds))
>>> - ds = NULL;
>>> out:
>>> return ds;
>>> }
>>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs"
>> in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>