In sight of the next patch to add a refcount in p9_req_t, rename
the p9_free_req() function in p9_release_req().
In the next patch the actual kfree will be moved to another function.
Signed-off-by: Tomas Bortoli <[email protected]>
Signed-off-by: Dominique Martinet <[email protected]>
---
net/9p/client.c | 100 ++++++++++++++++++++++++++++----------------------------
1 file changed, 50 insertions(+), 50 deletions(-)
diff --git a/net/9p/client.c b/net/9p/client.c
index 6c57ab1294d7..7942c0bfcc5b 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -344,13 +344,13 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
EXPORT_SYMBOL(p9_tag_lookup);
/**
- * p9_free_req - Free a request.
+ * p9_tag_remove - Remove a tag.
* @c: Client session.
- * @r: Request to free.
+ * @r: Request of reference.
*
* Context: Any context.
*/
-static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
+static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
{
unsigned long flags;
u16 tag = r->tc.tag;
@@ -379,7 +379,7 @@ static void p9_tag_cleanup(struct p9_client *c)
rcu_read_lock();
idr_for_each_entry(&c->reqs, req, id) {
pr_info("Tag %d still in use\n", id);
- p9_free_req(c, req);
+ p9_tag_remove(c, req);
}
rcu_read_unlock();
}
@@ -647,7 +647,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
if (c->trans_mod->cancelled)
c->trans_mod->cancelled(c, oldreq);
- p9_free_req(c, req);
+ p9_tag_remove(c, req);
return 0;
}
@@ -681,7 +681,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
trace_9p_client_req(c, type, req->tc.tag);
return req;
reterr:
- p9_free_req(c, req);
+ p9_tag_remove(c, req);
return ERR_PTR(err);
}
@@ -691,7 +691,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
* @type: type of request
* @fmt: protocol format string (see protocol.c)
*
- * Returns request structure (which client must free using p9_free_req)
+ * Returns request structure (which client must free using p9_tag_remove)
*/
static struct p9_req_t *
@@ -767,7 +767,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
if (!err)
return req;
reterr:
- p9_free_req(c, req);
+ p9_tag_remove(c, req);
return ERR_PTR(safe_errno(err));
}
@@ -782,7 +782,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
* @hdrlen: reader header size, This is the size of response protocol data
* @fmt: protocol format string (see protocol.c)
*
- * Returns request structure (which client must free using p9_free_req)
+ * Returns request structure (which client must free using p9_tag_remove)
*/
static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
struct iov_iter *uidata,
@@ -849,7 +849,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
if (!err)
return req;
reterr:
- p9_free_req(c, req);
+ p9_tag_remove(c, req);
return ERR_PTR(safe_errno(err));
}
@@ -952,7 +952,7 @@ static int p9_client_version(struct p9_client *c)
error:
kfree(version);
- p9_free_req(c, req);
+ p9_tag_remove(c, req);
return err;
}
@@ -1094,7 +1094,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
goto error;
}
@@ -1103,7 +1103,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
memmove(&fid->qid, &qid, sizeof(struct p9_qid));
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return fid;
error:
@@ -1151,10 +1151,10 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
goto clunk_fid;
}
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids);
@@ -1229,7 +1229,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
fid->iounit = iounit;
free_and_error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1274,7 +1274,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32
ofid->iounit = iounit;
free_and_error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1319,7 +1319,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
fid->iounit = iounit;
free_and_error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1353,7 +1353,7 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name,
qid->type, (unsigned long long)qid->path, qid->version);
free_and_error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1373,7 +1373,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newna
return PTR_ERR(req);
p9_debug(P9_DEBUG_9P, "<<< RLINK\n");
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return 0;
}
EXPORT_SYMBOL(p9_client_link);
@@ -1397,7 +1397,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync)
p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
@@ -1432,7 +1432,7 @@ int p9_client_clunk(struct p9_fid *fid)
p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
/*
* Fid is not valid even after a failed clunk
@@ -1466,7 +1466,7 @@ int p9_client_remove(struct p9_fid *fid)
p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
if (err == -ERESTARTSYS)
p9_client_clunk(fid);
@@ -1493,7 +1493,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags)
}
p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1545,7 +1545,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
"D", &count, &dataptr);
if (*err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
break;
}
if (rsize < count) {
@@ -1555,7 +1555,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
if (!count) {
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
break;
}
@@ -1565,7 +1565,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
offset += n;
if (n != count) {
*err = -EFAULT;
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
break;
}
} else {
@@ -1573,7 +1573,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
total += count;
offset += count;
}
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
}
return total;
}
@@ -1617,7 +1617,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
*err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count);
if (*err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
break;
}
if (rsize < count) {
@@ -1627,7 +1627,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
iov_iter_advance(from, count);
total += count;
offset += count;
@@ -1661,7 +1661,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
goto error;
}
@@ -1678,7 +1678,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
from_kgid(&init_user_ns, ret->n_gid),
from_kuid(&init_user_ns, ret->n_muid));
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return ret;
error:
@@ -1714,7 +1714,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
goto error;
}
@@ -1739,7 +1739,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec,
ret->st_gen, ret->st_data_version);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return ret;
error:
@@ -1808,7 +1808,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1840,7 +1840,7 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1868,7 +1868,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
&sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
goto error;
}
@@ -1879,7 +1879,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
sb->blocks, sb->bfree, sb->bavail, sb->files, sb->ffree,
sb->fsid, (long int)sb->namelen);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1907,7 +1907,7 @@ int p9_client_rename(struct p9_fid *fid,
p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1937,7 +1937,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n",
newdirfid->fid, new_name);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1974,10 +1974,10 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
goto clunk_fid;
}
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n",
attr_fid->fid, *attr_size);
return attr_fid;
@@ -2011,7 +2011,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -2074,11 +2074,11 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
if (non_zc)
memmove(data, dataptr, count);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return count;
free_and_error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -2109,7 +2109,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
(unsigned long long)qid->path, qid->version);
error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return err;
}
@@ -2140,7 +2140,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
(unsigned long long)qid->path, qid->version);
error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return err;
}
@@ -2173,7 +2173,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
}
p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return err;
}
@@ -2208,7 +2208,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
"proc_id %d client_id %s\n", glock->type, glock->start,
glock->length, glock->proc_id, glock->client_id);
error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return err;
}
EXPORT_SYMBOL(p9_client_getlock_dotl);
@@ -2234,7 +2234,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
}
p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return err;
}
EXPORT_SYMBOL(p9_client_readlink);
--
2.11.0
To avoid use-after-free(s), use a refcount to keep track of the
usable references to any instantiated struct p9_req_t.
This commit adds p9_req_put(), p9_req_get() and p9_req_try_get() as
wrappers to kref_put(), kref_get() and kref_get_unless_zero().
These are used by the client and the transports to keep track of
valid requests' references.
p9_free_req() is added back and used as callback by kref_put().
Add SLAB_TYPESAFE_BY_RCU as it ensures that the memory freed by
kmem_cache_free() will not be reused for another type until the rcu
synchronisation period is over, so an address gotten under rcu read
lock is safe to inc_ref() without corrupting random memory while
the lock is held.
Co-developed-by: Dominique Martinet <[email protected]>
Signed-off-by: Tomas Bortoli <[email protected]>
Reported-by: [email protected]
Signed-off-by: Dominique Martinet <[email protected]>
---
include/net/9p/client.h | 14 +++++++++++++
net/9p/client.c | 54 +++++++++++++++++++++++++++++++++++++++++++------
net/9p/trans_fd.c | 11 +++++++++-
net/9p/trans_rdma.c | 1 +
4 files changed, 73 insertions(+), 7 deletions(-)
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 735f3979d559..947a570307a6 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -94,6 +94,7 @@ enum p9_req_status_t {
struct p9_req_t {
int status;
int t_err;
+ struct kref refcount;
wait_queue_head_t wq;
struct p9_fcall tc;
struct p9_fcall rc;
@@ -233,6 +234,19 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
void p9_fcall_fini(struct p9_fcall *fc);
struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
+
+static inline void p9_req_get(struct p9_req_t *r)
+{
+ kref_get(&r->refcount);
+}
+
+static inline int p9_req_try_get(struct p9_req_t *r)
+{
+ return kref_get_unless_zero(&r->refcount);
+}
+
+int p9_req_put(struct p9_req_t *r);
+
void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int);
diff --git a/net/9p/client.c b/net/9p/client.c
index 7942c0bfcc5b..c9bb5d41afa4 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -310,6 +310,18 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
if (tag < 0)
goto free;
+ /* Init ref to two because in the general case there is one ref
+ * that is put asynchronously by a writer thread, one ref
+ * temporarily given by p9_tag_lookup and put by p9_client_cb
+ * in the recv thread, and one ref put by p9_tag_remove in the
+ * main thread. The only exception is virtio that does not use
+ * p9_tag_lookup but does not have a writer thread either
+ * (the write happens synchronously in the request/zc_request
+ * callback), so p9_client_cb eats the second ref there
+ * as the pointer is duplicated directly by virtqueue_add_sgs()
+ */
+ refcount_set(&req->refcount.refcount, 2);
+
return req;
free:
@@ -333,10 +345,21 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
struct p9_req_t *req;
rcu_read_lock();
+again:
req = idr_find(&c->reqs, tag);
- /* There's no refcount on the req; a malicious server could cause
- * us to dereference a NULL pointer
- */
+ if (req) {
+ /* We have to be careful with the req found under rcu_read_lock
+ * Thanks to SLAB_TYPESAFE_BY_RCU we can safely try to get the
+ * ref again without corrupting other data, then check again
+ * that the tag matches once we have the ref
+ */
+ if (!p9_req_try_get(req))
+ goto again;
+ if (req->tc.tag != tag) {
+ p9_req_put(req);
+ goto again;
+ }
+ }
rcu_read_unlock();
return req;
@@ -350,7 +373,7 @@ EXPORT_SYMBOL(p9_tag_lookup);
*
* Context: Any context.
*/
-static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
+static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
{
unsigned long flags;
u16 tag = r->tc.tag;
@@ -359,11 +382,23 @@ static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
spin_lock_irqsave(&c->lock, flags);
idr_remove(&c->reqs, tag);
spin_unlock_irqrestore(&c->lock, flags);
+ return p9_req_put(r);
+}
+
+static void p9_req_free(struct kref *ref)
+{
+ struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount);
p9_fcall_fini(&r->tc);
p9_fcall_fini(&r->rc);
kmem_cache_free(p9_req_cache, r);
}
+int p9_req_put(struct p9_req_t *r)
+{
+ return kref_put(&r->refcount, p9_req_free);
+}
+EXPORT_SYMBOL(p9_req_put);
+
/**
* p9_tag_cleanup - cleans up tags structure and reclaims resources
* @c: v9fs client struct
@@ -379,7 +414,9 @@ static void p9_tag_cleanup(struct p9_client *c)
rcu_read_lock();
idr_for_each_entry(&c->reqs, req, id) {
pr_info("Tag %d still in use\n", id);
- p9_tag_remove(c, req);
+ if (p9_tag_remove(c, req) == 0)
+ pr_warn("Packet with tag %d has still references",
+ req->tc.tag);
}
rcu_read_unlock();
}
@@ -403,6 +440,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
wake_up(&req->wq);
p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
+ p9_req_put(req);
}
EXPORT_SYMBOL(p9_client_cb);
@@ -682,6 +720,8 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
return req;
reterr:
p9_tag_remove(c, req);
+ /* We have to put also the 2nd reference as it won't be used */
+ p9_req_put(req);
return ERR_PTR(err);
}
@@ -716,6 +756,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
err = c->trans_mod->request(c, req);
if (err < 0) {
+ /* write won't happen */
+ p9_req_put(req);
if (err != -ERESTARTSYS && err != -EFAULT)
c->status = Disconnected;
goto recalc_sigpending;
@@ -2241,7 +2283,7 @@ EXPORT_SYMBOL(p9_client_readlink);
int __init p9_client_init(void)
{
- p9_req_cache = KMEM_CACHE(p9_req_t, 0);
+ p9_req_cache = KMEM_CACHE(p9_req_t, SLAB_TYPESAFE_BY_RCU);
return p9_req_cache ? 0 : -ENOMEM;
}
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 20f46f13fe83..686e24e355d0 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -132,6 +132,7 @@ struct p9_conn {
struct list_head req_list;
struct list_head unsent_req_list;
struct p9_req_t *req;
+ struct p9_req_t *wreq;
char tmp_buf[7];
struct p9_fcall rc;
int wpos;
@@ -383,6 +384,7 @@ static void p9_read_work(struct work_struct *work)
m->rc.sdata = NULL;
m->rc.offset = 0;
m->rc.capacity = 0;
+ p9_req_put(m->req);
m->req = NULL;
}
@@ -472,6 +474,8 @@ static void p9_write_work(struct work_struct *work)
m->wbuf = req->tc.sdata;
m->wsize = req->tc.size;
m->wpos = 0;
+ p9_req_get(req);
+ m->wreq = req;
spin_unlock(&m->client->lock);
}
@@ -492,8 +496,11 @@ static void p9_write_work(struct work_struct *work)
}
m->wpos += err;
- if (m->wpos == m->wsize)
+ if (m->wpos == m->wsize) {
m->wpos = m->wsize = 0;
+ p9_req_put(m->wreq);
+ m->wreq = NULL;
+ }
end_clear:
clear_bit(Wworksched, &m->wsched);
@@ -694,6 +701,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
if (req->status == REQ_STATUS_UNSENT) {
list_del(&req->req_list);
req->status = REQ_STATUS_FLSHD;
+ p9_req_put(req);
ret = 0;
}
spin_unlock(&client->lock);
@@ -711,6 +719,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
spin_lock(&client->lock);
list_del(&req->req_list);
spin_unlock(&client->lock);
+ p9_req_put(req);
return 0;
}
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index c60655c90c9e..8cff368a11e3 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -365,6 +365,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
c->busa, c->req->tc.size,
DMA_TO_DEVICE);
up(&rdma->sq_sem);
+ p9_req_put(c->req);
kfree(c);
}
--
2.11.0
Tomas Bortoli wrote on Tue, Aug 14, 2018:
> To avoid use-after-free(s), use a refcount to keep track of the
> usable references to any instantiated struct p9_req_t.
>
> This commit adds p9_req_put(), p9_req_get() and p9_req_try_get() as
> wrappers to kref_put(), kref_get() and kref_get_unless_zero().
> These are used by the client and the transports to keep track of
> valid requests' references.
>
> p9_free_req() is added back and used as callback by kref_put().
>
> Add SLAB_TYPESAFE_BY_RCU as it ensures that the memory freed by
> kmem_cache_free() will not be reused for another type until the rcu
> synchronisation period is over, so an address gotten under rcu read
> lock is safe to inc_ref() without corrupting random memory while
> the lock is held.
FWIW, since 4.19-rc1 has been tagged I was going to push this and all
the perrequesites to linux-next, but I've managed to leak some requests
by interrupting them in trans_virtio.
I think I've found why (see below), so I'll push a fixed version after
some more testing and another thorough read -- at some point today, but
this hasn't been 'approved' explicitely so please review! :)
(Jun, I think you'll need to ask again to rename 'req' to 'rreq' if you
think it's important -- I think such a rename should go in a separate
patch anyway, there's plenty of time until the 4.20 merge window)
By "all the prerequesites" I mean this patch "serie":
* 9p: Use a slab for allocating requests
* 9p: Remove p9_idpool
* net/9p: embed fcall in req to round down buffer allocs
* net/9p: add a per-client fcall kmem_cache
* 9p: rename p9_free_req() function
* 9p: Add refcount to p9_req_t
All the other patchs have had some review though, I was just waiting for
the start of this cycle, but if someone has any issue with the above
patches now is a good time to say.
> diff --git a/net/9p/client.c b/net/9p/client.c
> index 7942c0bfcc5b..c9bb5d41afa4 100644
> --- a/net/9p/client.c
> +++ b/net/9p/client.c
> @@ -716,6 +756,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
>
> err = c->trans_mod->request(c, req);
> if (err < 0) {
> + /* write won't happen */
> + p9_req_put(req);
> if (err != -ERESTARTSYS && err != -EFAULT)
> c->status = Disconnected;
> goto recalc_sigpending;
p9_client_zc_rpc needs the same put if zc_request failed, I'm not sure
why it wasn't here in my draft
--
Dominique
Hi Dominique,
On 2018/8/28 7:09, Dominique Martinet wrote:
> Tomas Bortoli wrote on Tue, Aug 14, 2018:
>> To avoid use-after-free(s), use a refcount to keep track of the
>> usable references to any instantiated struct p9_req_t.
>>
>> This commit adds p9_req_put(), p9_req_get() and p9_req_try_get() as
>> wrappers to kref_put(), kref_get() and kref_get_unless_zero().
>> These are used by the client and the transports to keep track of
>> valid requests' references.
>>
>> p9_free_req() is added back and used as callback by kref_put().
>>
>> Add SLAB_TYPESAFE_BY_RCU as it ensures that the memory freed by
>> kmem_cache_free() will not be reused for another type until the rcu
>> synchronisation period is over, so an address gotten under rcu read
>> lock is safe to inc_ref() without corrupting random memory while
>> the lock is held.
>
>
> FWIW, since 4.19-rc1 has been tagged I was going to push this and all
> the perrequesites to linux-next, but I've managed to leak some requests
> by interrupting them in trans_virtio.
> I think I've found why (see below), so I'll push a fixed version after
> some more testing and another thorough read -- at some point today, but
> this hasn't been 'approved' explicitely so please review! :)
>
> (Jun, I think you'll need to ask again to rename 'req' to 'rreq' if you
> think it's important -- I think such a rename should go in a separate
> patch anyway, there's plenty of time until the 4.20 merge window)
>
I still think such a rename is necessary, and as you said, it will be
better go in another patch.
Thanks,
Jun
>
> By "all the prerequesites" I mean this patch "serie":
> * 9p: Use a slab for allocating requests
> * 9p: Remove p9_idpool
> * net/9p: embed fcall in req to round down buffer allocs
> * net/9p: add a per-client fcall kmem_cache
> * 9p: rename p9_free_req() function
> * 9p: Add refcount to p9_req_t
>
> All the other patchs have had some review though, I was just waiting for
> the start of this cycle, but if someone has any issue with the above
> patches now is a good time to say.
>
>
>> diff --git a/net/9p/client.c b/net/9p/client.c
>> index 7942c0bfcc5b..c9bb5d41afa4 100644
>> --- a/net/9p/client.c
>> +++ b/net/9p/client.c
>> @@ -716,6 +756,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
>>
>> err = c->trans_mod->request(c, req);
>> if (err < 0) {
>> + /* write won't happen */
>> + p9_req_put(req);
>> if (err != -ERESTARTSYS && err != -EFAULT)
>> c->status = Disconnected;
>> goto recalc_sigpending;
>
> p9_client_zc_rpc needs the same put if zc_request failed, I'm not sure
> why it wasn't here in my draft
>
piaojun wrote on Tue, Aug 28, 2018:
> > (Jun, I think you'll need to ask again to rename 'req' to 'rreq' if you
> > think it's important -- I think such a rename should go in a separate
> > patch anyway, there's plenty of time until the 4.20 merge window)
> >
>
> I still think such a rename is necessary, and as you said, it will be
> better go in another patch.
Tomas can you send a patch for that please?
It's not very interesting, but might as well finish this properly :)
> >> diff --git a/net/9p/client.c b/net/9p/client.c
> >> index 7942c0bfcc5b..c9bb5d41afa4 100644
> >> --- a/net/9p/client.c
> >> +++ b/net/9p/client.c
> >> @@ -716,6 +756,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
> >>
> >> err = c->trans_mod->request(c, req);
> >> if (err < 0) {
> >> + /* write won't happen */
> >> + p9_req_put(req);
> >> if (err != -ERESTARTSYS && err != -EFAULT)
> >> c->status = Disconnected;
> >> goto recalc_sigpending;
> >
> > p9_client_zc_rpc needs the same put if zc_request failed, I'm not sure
> > why it wasn't here in my draft
Ah, I remember a bit better now, this is not as simple as adding the
same check after zc_request because the zc_request embeds the wait
itself to do its own cleanup after the reply came (unpin pages that were
sent to the server).
This brings in an interesting race condition that if the
wait_event_killable() in the zc_request is interrupted, the user data
pages that were pinned get unpinned and could potentially be moved
before the server replies... Even if they're not moved the user would be
told the read/write failed and could reuse the memory that would be
read/written later.
I'm not sure how this part works but it's probably not great.
Greg, do you have an opinion on this?
This is tricky, we cannot even rely on the refcounting for this as the
zc pages are likely user pages, so it'll be bad if we return from the
syscall and the memory gets accessed later.
On the other hand making that wait non-killable isn't a good solution
either, and we cannot use flush for virtio, so I don't have any idea for
this... Any magic virtio "take-back"?
Well, this would be for another patch anyway - for now I'll just do the
p9_req_put if it hasn't been kicked so that means something like the
following diff.. But my test bed is currently down so I'll wait for
tests to push:
-------8<----------------
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 7728b0acde09..36a1401c0722 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -404,6 +404,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
struct scatterlist *sgs[4];
size_t offs;
int need_drop = 0;
+ int kicked = 0;
p9_debug(P9_DEBUG_TRANS, "virtio request\n");
@@ -498,6 +499,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
}
virtqueue_kick(chan->vq);
spin_unlock_irqrestore(&chan->lock, flags);
+ kicked = 1;
p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
/*
@@ -518,6 +520,10 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
}
kvfree(in_pages);
kvfree(out_pages);
+ if (!kicked) {
+ /* reply won't come */
+ p9_req_put(req);
+ }
return err;
}
-------8<----------------
--
Dominique
Dominique Martinet wrote on Tue, Aug 28, 2018:
> I think I've found why (see below), so I'll push a fixed version after
> some more testing and another thorough read -- at some point today, but
> this hasn't been 'approved' explicitely so please review! :)
While the issue I pointed at was real, it wasn't what was causing the
refcount leak I was observing -- the problem is that we didn't drop a
ref when the request was successfully cancelled (e.g. the reply to the
flush came and the original request didn't get replied to)
The reason for this was that there were multiple versions of the patch
which alternated between doing the put in client.c after the cancelled
callback inconditionally, and doing the put in each transport's
cancelled() function, but virtio does not have this callback so that
didn't get added in the final version (codeveloping is hard); so I've
added an else() close to just issue a put if there is no callback.
(In the end, it felt better to have the req_put in the transport because
trans_fd is making refcounting difficult with its list handling, and
separating the put from the list removal would be more confusing than is
gained by sharing code)
Anyway, that's starting to be quite different from the v2 so I'll send a
v3 keeping Tomas as the author -- please check my edits are alright with
you, Tomas.
Meanwhile I'll keep running tests, I'm now confident about virtio but
want to spend more time on other transports again, so delaying the push
to linux-next for a few more days...
--
Dominique