Hi,
I have added all all review findings and rediffed the patches
- patch #1-#6: Fix syzcall findings & further race cleanups
patch #1 has an updated subject/comment
patch #2 contains the squashed result of Dmitrys change and my
own updates.
patch #6 is replaced by the proposal from Davidlohr
- patch #7-#10: rhashtable improvement from Davidlohr
- patch #11: A variable rename patch: id/lid/idx/uid were a mess
- patch #12: change a return code from int to bool, side effect of the
refcount_t introduction.
@Andrew:
Can you merge the patches into -mm/next?
I have not seen any issues in my tests.
--
Manfred
ipc_getref has still a return value of type "int", matching the atomic_t
interface of atomic_inc_not_zero()/atomic_add_unless().
ipc_getref now uses refcount_inc_not_zero, which has a return value of
type "bool".
Therefore: Update the return code to avoid implicit conversions.
Signed-off-by: Manfred Spraul <[email protected]>
---
ipc/util.c | 2 +-
ipc/util.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/ipc/util.c b/ipc/util.c
index fb69c911655a..6306eb25180b 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -461,7 +461,7 @@ void ipc_set_key_private(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
ipcp->key = IPC_PRIVATE;
}
-int ipc_rcu_getref(struct kern_ipc_perm *ptr)
+bool ipc_rcu_getref(struct kern_ipc_perm *ptr)
{
return refcount_inc_not_zero(&ptr->refcount);
}
diff --git a/ipc/util.h b/ipc/util.h
index e74564fe3375..0a159f69b3bb 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -138,7 +138,7 @@ static inline int ipc_get_maxidx(struct ipc_ids *ids)
* refcount is initialized by ipc_addid(), before that point call_rcu()
* must be used.
*/
-int ipc_rcu_getref(struct kern_ipc_perm *ptr);
+bool ipc_rcu_getref(struct kern_ipc_perm *ptr);
void ipc_rcu_putref(struct kern_ipc_perm *ptr,
void (*func)(struct rcu_head *head));
--
2.17.1
From: Davidlohr Bueso <[email protected]>
In sysvipc we have an ids->tables_initialized regarding the
rhashtable, introduced in:
commit 0cfb6aee70bd ("ipc: optimize semget/shmget/msgget for lots of keys")
It's there, specifically, to prevent nil pointer dereferences,
from using an uninitialized api. Considering how rhashtable_init()
can fail (probably due to ENOMEM, if anything), this made the
overall ipc initialization capable of failure as well. That alone
is ugly, but fine, however I've spotted a few issues regarding the
semantics of tables_initialized (however unlikely they may be):
- There is inconsistency in what we return to userspace: ipc_addid()
returns ENOSPC which is certainly _wrong_, while ipc_obtain_object_idr()
returns EINVAL.
- After we started using rhashtables, ipc_findkey() can return nil upon
!tables_initialized, but the caller expects nil for when the ipc structure
isn't found, and can therefore call into ipcget() callbacks.
Now that rhashtable initialization cannot fail, we can properly
get rid of the hack altogether.
Signed-off-by: Davidlohr Bueso <[email protected]>
(commit id extended to 12 digits)
Signed-off-by: Manfred Spraul <[email protected]>
---
include/linux/ipc_namespace.h | 1 -
ipc/util.c | 23 ++++++++---------------
2 files changed, 8 insertions(+), 16 deletions(-)
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index b5630c8eb2f3..37f3a4b7c637 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -16,7 +16,6 @@ struct user_namespace;
struct ipc_ids {
int in_use;
unsigned short seq;
- bool tables_initialized;
struct rw_semaphore rwsem;
struct idr ipcs_idr;
int max_id;
diff --git a/ipc/util.c b/ipc/util.c
index 234f6d781df3..f620778b11d2 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -125,7 +125,6 @@ int ipc_init_ids(struct ipc_ids *ids)
if (err)
return err;
idr_init(&ids->ipcs_idr);
- ids->tables_initialized = true;
ids->max_id = -1;
#ifdef CONFIG_CHECKPOINT_RESTORE
ids->next_id = -1;
@@ -178,19 +177,16 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
*/
static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
{
- struct kern_ipc_perm *ipcp = NULL;
+ struct kern_ipc_perm *ipcp;
- if (likely(ids->tables_initialized))
- ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
+ ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
ipc_kht_params);
+ if (!ipcp)
+ return NULL;
- if (ipcp) {
- rcu_read_lock();
- ipc_lock_object(ipcp);
- return ipcp;
- }
-
- return NULL;
+ rcu_read_lock();
+ ipc_lock_object(ipcp);
+ return ipcp;
}
/*
@@ -268,7 +264,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
if (limit > IPCMNI)
limit = IPCMNI;
- if (!ids->tables_initialized || ids->in_use >= limit)
+ if (ids->in_use >= limit)
return -ENOSPC;
idr_preload(GFP_KERNEL);
@@ -577,9 +573,6 @@ struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
struct kern_ipc_perm *out;
int lid = ipcid_to_idx(id);
- if (unlikely(!ids->tables_initialized))
- return ERR_PTR(-EINVAL);
-
out = idr_find(&ids->ipcs_idr, lid);
if (!out)
return ERR_PTR(-EINVAL);
--
2.17.1
From: Davidlohr Bueso <[email protected]>
Now that we know that rhashtable_init() will not fail, we
can get rid of a lot of the unnecessary cleanup paths when
the call errored out.
Signed-off-by: Davidlohr Bueso <[email protected]>
(variable name added to util.h to resolve checkpatch warning)
Signed-off-by: Manfred Spraul <[email protected]>
---
ipc/msg.c | 9 ++++-----
ipc/namespace.c | 20 ++++----------------
ipc/sem.c | 10 ++++------
ipc/shm.c | 9 ++++-----
ipc/util.c | 18 +++++-------------
ipc/util.h | 18 +++++++++---------
6 files changed, 30 insertions(+), 54 deletions(-)
diff --git a/ipc/msg.c b/ipc/msg.c
index 4aca0ce363b5..130e12e6a8c6 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -1237,7 +1237,7 @@ COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
}
#endif
-int msg_init_ns(struct ipc_namespace *ns)
+void msg_init_ns(struct ipc_namespace *ns)
{
ns->msg_ctlmax = MSGMAX;
ns->msg_ctlmnb = MSGMNB;
@@ -1245,7 +1245,7 @@ int msg_init_ns(struct ipc_namespace *ns)
atomic_set(&ns->msg_bytes, 0);
atomic_set(&ns->msg_hdrs, 0);
- return ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+ ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
}
#ifdef CONFIG_IPC_NS
@@ -1286,12 +1286,11 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
}
#endif
-int __init msg_init(void)
+void __init msg_init(void)
{
- const int err = msg_init_ns(&init_ipc_ns);
+ msg_init_ns(&init_ipc_ns);
ipc_init_proc_interface("sysvipc/msg",
" key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
IPC_MSG_IDS, sysvipc_msg_proc_show);
- return err;
}
diff --git a/ipc/namespace.c b/ipc/namespace.c
index f59a89966f92..21607791d62c 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -55,28 +55,16 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
ns->user_ns = get_user_ns(user_ns);
ns->ucounts = ucounts;
- err = sem_init_ns(ns);
+ err = mq_init_ns(ns);
if (err)
goto fail_put;
- err = msg_init_ns(ns);
- if (err)
- goto fail_destroy_sem;
- err = shm_init_ns(ns);
- if (err)
- goto fail_destroy_msg;
- err = mq_init_ns(ns);
- if (err)
- goto fail_destroy_shm;
+ sem_init_ns(ns);
+ msg_init_ns(ns);
+ shm_init_ns(ns);
return ns;
-fail_destroy_shm:
- shm_exit_ns(ns);
-fail_destroy_msg:
- msg_exit_ns(ns);
-fail_destroy_sem:
- sem_exit_ns(ns);
fail_put:
put_user_ns(ns->user_ns);
ns_free_inum(&ns->ns);
diff --git a/ipc/sem.c b/ipc/sem.c
index da1626984083..671d8703b130 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -220,14 +220,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
#define sc_semopm sem_ctls[2]
#define sc_semmni sem_ctls[3]
-int sem_init_ns(struct ipc_namespace *ns)
+void sem_init_ns(struct ipc_namespace *ns)
{
ns->sc_semmsl = SEMMSL;
ns->sc_semmns = SEMMNS;
ns->sc_semopm = SEMOPM;
ns->sc_semmni = SEMMNI;
ns->used_sems = 0;
- return ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
+ ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
}
#ifdef CONFIG_IPC_NS
@@ -239,14 +239,12 @@ void sem_exit_ns(struct ipc_namespace *ns)
}
#endif
-int __init sem_init(void)
+void __init sem_init(void)
{
- const int err = sem_init_ns(&init_ipc_ns);
-
+ sem_init_ns(&init_ipc_ns);
ipc_init_proc_interface("sysvipc/sem",
" key semid perms nsems uid gid cuid cgid otime ctime\n",
IPC_SEM_IDS, sysvipc_sem_proc_show);
- return err;
}
/**
diff --git a/ipc/shm.c b/ipc/shm.c
index 22afb98363ff..d388d6e744c0 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -95,14 +95,14 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
#endif
-int shm_init_ns(struct ipc_namespace *ns)
+void shm_init_ns(struct ipc_namespace *ns)
{
ns->shm_ctlmax = SHMMAX;
ns->shm_ctlall = SHMALL;
ns->shm_ctlmni = SHMMNI;
ns->shm_rmid_forced = 0;
ns->shm_tot = 0;
- return ipc_init_ids(&shm_ids(ns));
+ ipc_init_ids(&shm_ids(ns));
}
/*
@@ -135,9 +135,8 @@ void shm_exit_ns(struct ipc_namespace *ns)
static int __init ipc_ns_init(void)
{
- const int err = shm_init_ns(&init_ipc_ns);
- WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err);
- return err;
+ shm_init_ns(&init_ipc_ns);
+ return 0;
}
pure_initcall(ipc_ns_init);
diff --git a/ipc/util.c b/ipc/util.c
index f620778b11d2..35621be0d945 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -87,16 +87,12 @@ struct ipc_proc_iface {
*/
static int __init ipc_init(void)
{
- int err_sem, err_msg;
-
proc_mkdir("sysvipc", NULL);
- err_sem = sem_init();
- WARN(err_sem, "ipc: sysv sem_init failed: %d\n", err_sem);
- err_msg = msg_init();
- WARN(err_msg, "ipc: sysv msg_init failed: %d\n", err_msg);
+ sem_init();
+ msg_init();
shm_init();
- return err_msg ? err_msg : err_sem;
+ return 0;
}
device_initcall(ipc_init);
@@ -115,21 +111,17 @@ static const struct rhashtable_params ipc_kht_params = {
* Set up the sequence range to use for the ipc identifier range (limited
* below IPCMNI) then initialise the keys hashtable and ids idr.
*/
-int ipc_init_ids(struct ipc_ids *ids)
+void ipc_init_ids(struct ipc_ids *ids)
{
- int err;
ids->in_use = 0;
ids->seq = 0;
init_rwsem(&ids->rwsem);
- err = rhashtable_init(&ids->key_ht, &ipc_kht_params);
- if (err)
- return err;
+ rhashtable_init(&ids->key_ht, &ipc_kht_params);
idr_init(&ids->ipcs_idr);
ids->max_id = -1;
#ifdef CONFIG_CHECKPOINT_RESTORE
ids->next_id = -1;
#endif
- return 0;
}
#ifdef CONFIG_PROC_FS
diff --git a/ipc/util.h b/ipc/util.h
index e3c47b21db93..6c5c77c61f85 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -18,8 +18,8 @@
#define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */
#define SEQ_MULTIPLIER (IPCMNI)
-int sem_init(void);
-int msg_init(void);
+void sem_init(void);
+void msg_init(void);
void shm_init(void);
struct ipc_namespace;
@@ -34,17 +34,17 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { }
#endif
#ifdef CONFIG_SYSVIPC
-int sem_init_ns(struct ipc_namespace *ns);
-int msg_init_ns(struct ipc_namespace *ns);
-int shm_init_ns(struct ipc_namespace *ns);
+void sem_init_ns(struct ipc_namespace *ns);
+void msg_init_ns(struct ipc_namespace *ns);
+void shm_init_ns(struct ipc_namespace *ns);
void sem_exit_ns(struct ipc_namespace *ns);
void msg_exit_ns(struct ipc_namespace *ns);
void shm_exit_ns(struct ipc_namespace *ns);
#else
-static inline int sem_init_ns(struct ipc_namespace *ns) { return 0; }
-static inline int msg_init_ns(struct ipc_namespace *ns) { return 0; }
-static inline int shm_init_ns(struct ipc_namespace *ns) { return 0; }
+static inline void sem_init_ns(struct ipc_namespace *ns) { }
+static inline void msg_init_ns(struct ipc_namespace *ns) { }
+static inline void shm_init_ns(struct ipc_namespace *ns) { }
static inline void sem_exit_ns(struct ipc_namespace *ns) { }
static inline void msg_exit_ns(struct ipc_namespace *ns) { }
@@ -83,7 +83,7 @@ struct ipc_ops {
struct seq_file;
struct ipc_ids;
-int ipc_init_ids(struct ipc_ids *);
+void ipc_init_ids(struct ipc_ids *ids);
#ifdef CONFIG_PROC_FS
void __init ipc_init_proc_interface(const char *path, const char *header,
int ids, int (*show)(struct seq_file *, void *));
--
2.17.1
The varable names got a mess, thus standardize them again:
id: user space id. Called semid, shmid, msgid if the type is known.
Most functions use "id" already.
idx: "index" for the idr lookup
Right now, some functions use lid, ipc_addid() already uses idx as
the variable name.
seq: sequence number, to avoid quick collisions of the user space id
key: user space key, used for the rhash tree
Signed-off-by: Manfred Spraul <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
---
include/linux/ipc_namespace.h | 2 +-
ipc/msg.c | 6 +++---
ipc/sem.c | 6 +++---
ipc/shm.c | 4 ++--
ipc/util.c | 26 +++++++++++++-------------
ipc/util.h | 10 +++++-----
6 files changed, 27 insertions(+), 27 deletions(-)
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 37f3a4b7c637..3098d275a29d 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -18,7 +18,7 @@ struct ipc_ids {
unsigned short seq;
struct rw_semaphore rwsem;
struct idr ipcs_idr;
- int max_id;
+ int max_idx;
#ifdef CONFIG_CHECKPOINT_RESTORE
int next_id;
#endif
diff --git a/ipc/msg.c b/ipc/msg.c
index 130e12e6a8c6..1892bec0f1c8 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -455,7 +455,7 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
int cmd, struct msginfo *msginfo)
{
int err;
- int max_id;
+ int max_idx;
/*
* We must not return kernel stack data.
@@ -482,9 +482,9 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
msginfo->msgpool = MSGPOOL;
msginfo->msgtql = MSGTQL;
}
- max_id = ipc_get_maxid(&msg_ids(ns));
+ max_idx = ipc_get_maxidx(&msg_ids(ns));
up_read(&msg_ids(ns).rwsem);
- return (max_id < 0) ? 0 : max_id;
+ return (max_idx < 0) ? 0 : max_idx;
}
static int msgctl_stat(struct ipc_namespace *ns, int msqid,
diff --git a/ipc/sem.c b/ipc/sem.c
index 671d8703b130..f98962b06024 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1293,7 +1293,7 @@ static int semctl_info(struct ipc_namespace *ns, int semid,
int cmd, void __user *p)
{
struct seminfo seminfo;
- int max_id;
+ int max_idx;
int err;
err = security_sem_semctl(NULL, cmd);
@@ -1317,11 +1317,11 @@ static int semctl_info(struct ipc_namespace *ns, int semid,
seminfo.semusz = SEMUSZ;
seminfo.semaem = SEMAEM;
}
- max_id = ipc_get_maxid(&sem_ids(ns));
+ max_idx = ipc_get_maxidx(&sem_ids(ns));
up_read(&sem_ids(ns).rwsem);
if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
return -EFAULT;
- return (max_id < 0) ? 0 : max_id;
+ return (max_idx < 0) ? 0 : max_idx;
}
static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
diff --git a/ipc/shm.c b/ipc/shm.c
index d388d6e744c0..a4e9a1b34595 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -935,7 +935,7 @@ static int shmctl_ipc_info(struct ipc_namespace *ns,
shminfo->shmall = ns->shm_ctlall;
shminfo->shmmin = SHMMIN;
down_read(&shm_ids(ns).rwsem);
- err = ipc_get_maxid(&shm_ids(ns));
+ err = ipc_get_maxidx(&shm_ids(ns));
up_read(&shm_ids(ns).rwsem);
if (err < 0)
err = 0;
@@ -955,7 +955,7 @@ static int shmctl_shm_info(struct ipc_namespace *ns,
shm_info->shm_tot = ns->shm_tot;
shm_info->swap_attempts = 0;
shm_info->swap_successes = 0;
- err = ipc_get_maxid(&shm_ids(ns));
+ err = ipc_get_maxidx(&shm_ids(ns));
up_read(&shm_ids(ns).rwsem);
if (err < 0)
err = 0;
diff --git a/ipc/util.c b/ipc/util.c
index 35621be0d945..fb69c911655a 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -118,7 +118,7 @@ void ipc_init_ids(struct ipc_ids *ids)
init_rwsem(&ids->rwsem);
rhashtable_init(&ids->key_ht, &ipc_kht_params);
idr_init(&ids->ipcs_idr);
- ids->max_id = -1;
+ ids->max_idx = -1;
#ifdef CONFIG_CHECKPOINT_RESTORE
ids->next_id = -1;
#endif
@@ -236,7 +236,7 @@ static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
* @limit: limit for the number of used ids
*
* Add an entry 'new' to the ipc ids idr. The permissions object is
- * initialised and the first free entry is set up and the id assigned
+ * initialised and the first free entry is set up and the index assigned
* is returned. The 'new' entry is returned in a locked state on success.
*
* On failure the entry is not locked and a negative err-code is returned.
@@ -290,8 +290,8 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
}
ids->in_use++;
- if (idx > ids->max_id)
- ids->max_id = idx;
+ if (idx > ids->max_idx)
+ ids->max_idx = idx;
return idx;
}
@@ -430,20 +430,20 @@ static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
*/
void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
{
- int lid = ipcid_to_idx(ipcp->id);
+ int idx = ipcid_to_idx(ipcp->id);
- idr_remove(&ids->ipcs_idr, lid);
+ idr_remove(&ids->ipcs_idr, idx);
ipc_kht_remove(ids, ipcp);
ids->in_use--;
ipcp->deleted = true;
- if (unlikely(lid == ids->max_id)) {
+ if (unlikely(idx == ids->max_idx)) {
do {
- lid--;
- if (lid == -1)
+ idx--;
+ if (idx == -1)
break;
- } while (!idr_find(&ids->ipcs_idr, lid));
- ids->max_id = lid;
+ } while (!idr_find(&ids->ipcs_idr, idx));
+ ids->max_idx = idx;
}
}
@@ -563,9 +563,9 @@ void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out)
struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
{
struct kern_ipc_perm *out;
- int lid = ipcid_to_idx(id);
+ int idx = ipcid_to_idx(id);
- out = idr_find(&ids->ipcs_idr, lid);
+ out = idr_find(&ids->ipcs_idr, idx);
if (!out)
return ERR_PTR(-EINVAL);
diff --git a/ipc/util.h b/ipc/util.h
index 6c5c77c61f85..e74564fe3375 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -113,12 +113,12 @@ void ipc_set_key_private(struct ipc_ids *, struct kern_ipc_perm *);
int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
/**
- * ipc_get_maxid - get the last assigned id
+ * ipc_get_maxidx - get the highest assigned index
* @ids: ipc identifier set
*
* Called with ipc_ids.rwsem held for reading.
*/
-static inline int ipc_get_maxid(struct ipc_ids *ids)
+static inline int ipc_get_maxidx(struct ipc_ids *ids)
{
if (ids->in_use == 0)
return -1;
@@ -126,7 +126,7 @@ static inline int ipc_get_maxid(struct ipc_ids *ids)
if (ids->in_use == IPCMNI)
return IPCMNI - 1;
- return ids->max_id;
+ return ids->max_idx;
}
/*
@@ -172,9 +172,9 @@ extern struct msg_msg *load_msg(const void __user *src, size_t len);
extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst);
extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len);
-static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
+static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int id)
{
- return uid / SEQ_MULTIPLIER != ipcp->seq;
+ return ipcid_to_seqx(id) != ipcp->seq;
}
static inline void ipc_lock_object(struct kern_ipc_perm *perm)
--
2.17.1
From: Davidlohr Bueso <[email protected]>
rhashtable_init() may fail due to -ENOMEM, thus making the
entire api unusable. This patch removes this scenario,
however unlikely. In order to guarantee memory allocation,
this patch always ends up doing GFP_KERNEL|__GFP_NOFAIL
for both the tbl as well as alloc_bucket_spinlocks().
Upon the first table allocation failure, we shrink the
size to the smallest value that makes sense and retry with
__GFP_NOFAIL semantics. With the defaults, this means that
from 64 buckets, we retry with only 4. Any later issues
regarding performance due to collisions or larger table
resizing (when more memory becomes available) is the least
of our problems.
Signed-off-by: Davidlohr Bueso <[email protected]>
Acked-by: Herbert Xu <[email protected]>
Signed-off-by: Manfred Spraul <[email protected]>
---
lib/rhashtable.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 083f871491a1..0026cf3e3f27 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -179,10 +179,11 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
size = nbuckets;
- if (tbl == NULL && gfp != GFP_KERNEL) {
+ if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) {
tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
nbuckets = 0;
}
+
if (tbl == NULL)
return NULL;
@@ -1065,9 +1066,16 @@ int rhashtable_init(struct rhashtable *ht,
}
}
+ /*
+ * This is api initialization and thus we need to guarantee the
+ * initial rhashtable allocation. Upon failure, retry with the
+ * smallest possible size with __GFP_NOFAIL semantics.
+ */
tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
- if (tbl == NULL)
- return -ENOMEM;
+ if (unlikely(tbl == NULL)) {
+ size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
+ tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL);
+ }
atomic_set(&ht->nelems, 0);
--
2.17.1
From: Davidlohr Bueso <[email protected]>
ipc/util.c contains multiple functions to get the ipc object
pointer given an id number.
There are two sets of function: One set verifies the sequence
counter part of the id number, other functions do not check
the sequence counter.
The standard for function names in ipc/util.c is
- ..._check() functions verify the sequence counter
- ..._idr() functions do not verify the sequence counter
ipc_lock() is an exception: It does not verify the sequence
counter value, but this is not obvious from the function name.
Furthermore, shm.c is the only user of this helper. Thus, we
can simply move the logic into shm_lock() and get rid of the
function altogether.
[changelog mostly by manfred]
Signed-off-by: Davidlohr Bueso <[email protected]>
Signed-off-by: Manfred Spraul <[email protected]>
---
ipc/shm.c | 29 +++++++++++++++++++++++------
ipc/util.c | 36 ------------------------------------
ipc/util.h | 1 -
3 files changed, 23 insertions(+), 43 deletions(-)
diff --git a/ipc/shm.c b/ipc/shm.c
index 0a509befb558..22afb98363ff 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -179,16 +179,33 @@ static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace
*/
static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
{
- struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
+ struct kern_ipc_perm *ipcp;
+
+ rcu_read_lock();
+ ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
+ if (IS_ERR(ipcp))
+ goto err;
+ ipc_lock_object(ipcp);
+ /*
+ * ipc_rmid() may have already freed the ID while ipc_lock_object()
+ * was spinning: here verify that the structure is still valid.
+ * Upon races with RMID, return -EIDRM, thus indicating that
+ * the ID points to a removed identifier.
+ */
+ if (ipc_valid_object(ipcp)) {
+ /* return a locked ipc object upon success */
+ return container_of(ipcp, struct shmid_kernel, shm_perm);
+ }
+
+ ipc_unlock_object(ipcp);
+err:
+ rcu_read_unlock();
/*
* Callers of shm_lock() must validate the status of the returned ipc
- * object pointer (as returned by ipc_lock()), and error out as
- * appropriate.
+ * object pointer and error out as appropriate.
*/
- if (IS_ERR(ipcp))
- return (void *)ipcp;
- return container_of(ipcp, struct shmid_kernel, shm_perm);
+ return (void *)ipcp;
}
static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
diff --git a/ipc/util.c b/ipc/util.c
index 5cc37066e659..234f6d781df3 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -587,42 +587,6 @@ struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
return out;
}
-/**
- * ipc_lock - lock an ipc structure without rwsem held
- * @ids: ipc identifier set
- * @id: ipc id to look for
- *
- * Look for an id in the ipc ids idr and lock the associated ipc object.
- *
- * The ipc object is locked on successful exit.
- */
-struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
-{
- struct kern_ipc_perm *out;
-
- rcu_read_lock();
- out = ipc_obtain_object_idr(ids, id);
- if (IS_ERR(out))
- goto err;
-
- spin_lock(&out->lock);
-
- /*
- * ipc_rmid() may have already freed the ID while ipc_lock()
- * was spinning: here verify that the structure is still valid.
- * Upon races with RMID, return -EIDRM, thus indicating that
- * the ID points to a removed identifier.
- */
- if (ipc_valid_object(out))
- return out;
-
- spin_unlock(&out->lock);
- out = ERR_PTR(-EIDRM);
-err:
- rcu_read_unlock();
- return out;
-}
-
/**
* ipc_obtain_object_check
* @ids: ipc identifier set
diff --git a/ipc/util.h b/ipc/util.h
index fcf81425ae98..e3c47b21db93 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -142,7 +142,6 @@ int ipc_rcu_getref(struct kern_ipc_perm *ptr);
void ipc_rcu_putref(struct kern_ipc_perm *ptr,
void (*func)(struct rcu_head *head));
-struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id);
void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
--
2.17.1
From: Davidlohr Bueso <[email protected]>
As of commit ce91f6ee5b3b ("mm: kvmalloc does not fallback to vmalloc for
incompatible gfp flags") we can simplify the caller and trust kvzalloc() to
just do the right thing. For the case of the GFP_ATOMIC context, we can
drop the __GFP_NORETRY flag for obvious reasons, and for the __GFP_NOWARN
case, however, it is changed such that the caller passes the flag instead
of making bucket_table_alloc() handle it.
This slightly changes the gfp flags passed on to nested_table_alloc() as
it will now also use GFP_ATOMIC | __GFP_NOWARN. However, I consider this a
positive consequence as for the same reasons we want nowarn semantics in
bucket_table_alloc().
Signed-off-by: Davidlohr Bueso <[email protected]>
Acked-by: Michal Hocko <[email protected]>
(commit id extended to 12 digits, line wraps updated)
Signed-off-by: Manfred Spraul <[email protected]>
---
lib/rhashtable.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 9427b5766134..083f871491a1 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -175,10 +175,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
int i;
size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
- if (gfp != GFP_KERNEL)
- tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
- else
- tbl = kvzalloc(size, gfp);
+ tbl = kvzalloc(size, gfp);
size = nbuckets;
@@ -459,7 +456,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht,
err = -ENOMEM;
- new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
+ new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN);
if (new_tbl == NULL)
goto fail;
--
2.17.1
The comment that explains ipc_obtain_object_check is wrong:
The function checks the sequence number, not the reference
counter.
Note that checking the reference counter would be meaningless:
The reference counter is decreased without holding any locks,
thus an object with kern_ipc_perm.deleted=true may disappear at
the end of the next rcu grace period.
Signed-off-by: Manfred Spraul <[email protected]>
Reviewed-by: Davidlohr Bueso <[email protected]>
---
ipc/util.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/ipc/util.c b/ipc/util.c
index cffd12240f67..5cc37066e659 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -628,8 +628,8 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
* @ids: ipc identifier set
* @id: ipc id to look for
*
- * Similar to ipc_obtain_object_idr() but also checks
- * the ipc object reference counter.
+ * Similar to ipc_obtain_object_idr() but also checks the ipc object
+ * sequence number.
*
* Call inside the RCU critical section.
* The ipc object is *not* locked on exit.
--
2.17.1
ipc_addid() is impossible to use:
- for certain failures, the caller must not use ipc_rcu_putref(),
because the reference counter is not yet initialized.
- for other failures, the caller must use ipc_rcu_putref(),
because parallel operations could be ongoing already.
The patch cleans that up, by initializing the refcount early,
and by modifying all callers.
The issues is related to the finding of
[email protected]:
syzbot found an issue with reading kern_ipc_perm.seq,
here both read and write to already released memory could happen.
Signed-off-by: Manfred Spraul <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Davidlohr Bueso <[email protected]>
---
ipc/msg.c | 2 +-
ipc/sem.c | 2 +-
ipc/shm.c | 2 ++
ipc/util.c | 10 ++++++++--
4 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/ipc/msg.c b/ipc/msg.c
index 49358f474fc9..38119c1f0da3 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -162,7 +162,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
/* ipc_addid() locks msq upon success. */
retval = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
if (retval < 0) {
- call_rcu(&msq->q_perm.rcu, msg_rcu_free);
+ ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
return retval;
}
diff --git a/ipc/sem.c b/ipc/sem.c
index d89ce69b2613..8a0a1eb05765 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -556,7 +556,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
/* ipc_addid() locks sma upon success. */
retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
if (retval < 0) {
- call_rcu(&sma->sem_perm.rcu, sem_rcu_free);
+ ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
return retval;
}
ns->used_sems += nsems;
diff --git a/ipc/shm.c b/ipc/shm.c
index f3bae59bed08..92d71abe9e8f 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -671,6 +671,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
if (is_file_hugepages(file) && shp->mlock_user)
user_shm_unlock(size, shp->mlock_user);
fput(file);
+ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
+ return error;
no_file:
call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
return error;
diff --git a/ipc/util.c b/ipc/util.c
index 4998f8fa8ce0..f3447911c81e 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -250,7 +250,9 @@ static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
* Add an entry 'new' to the ipc ids idr. The permissions object is
* initialised and the first free entry is set up and the id assigned
* is returned. The 'new' entry is returned in a locked state on success.
+ *
* On failure the entry is not locked and a negative err-code is returned.
+ * The caller must use ipc_rcu_putref() to free the identifier.
*
* Called with writer ipc_ids.rwsem held.
*/
@@ -260,6 +262,9 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
kgid_t egid;
int idx, err;
+ /* 1) Initialize the refcount so that ipc_rcu_putref works */
+ refcount_set(&new->refcount, 1);
+
if (limit > IPCMNI)
limit = IPCMNI;
@@ -268,9 +273,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
idr_preload(GFP_KERNEL);
- refcount_set(&new->refcount, 1);
spin_lock_init(&new->lock);
- new->deleted = false;
rcu_read_lock();
spin_lock(&new->lock);
@@ -278,6 +281,8 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
new->cuid = new->uid = euid;
new->gid = new->cgid = egid;
+ new->deleted = false;
+
idx = ipc_idr_alloc(ids, new);
idr_preload_end();
@@ -290,6 +295,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
}
}
if (idx < 0) {
+ new->deleted = true;
spin_unlock(&new->lock);
rcu_read_unlock();
return idx;
--
2.17.1
Both the comment and the name of ipcctl_pre_down_nolock()
are misleading: The function must be called while holdling
the rw semaphore.
Therefore the patch renames the function to ipcctl_obtain_check():
This name matches the other names used in util.c:
- "obtain" function look up a pointer in the idr, without
acquiring the object lock.
- The caller is responsible for locking.
- _check means that the sequence number is checked.
Signed-off-by: Manfred Spraul <[email protected]>
Reviewed-by: Davidlohr Bueso <[email protected]>
---
ipc/msg.c | 2 +-
ipc/sem.c | 2 +-
ipc/shm.c | 2 +-
ipc/util.c | 8 ++++----
ipc/util.h | 2 +-
5 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/ipc/msg.c b/ipc/msg.c
index 38119c1f0da3..4aca0ce363b5 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -385,7 +385,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
down_write(&msg_ids(ns).rwsem);
rcu_read_lock();
- ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd,
+ ipcp = ipcctl_obtain_check(ns, &msg_ids(ns), msqid, cmd,
&msqid64->msg_perm, msqid64->msg_qbytes);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
diff --git a/ipc/sem.c b/ipc/sem.c
index 8a0a1eb05765..da1626984083 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1595,7 +1595,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
down_write(&sem_ids(ns).rwsem);
rcu_read_lock();
- ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
+ ipcp = ipcctl_obtain_check(ns, &sem_ids(ns), semid, cmd,
&semid64->sem_perm, 0);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
diff --git a/ipc/shm.c b/ipc/shm.c
index 92d71abe9e8f..0a509befb558 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -868,7 +868,7 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
down_write(&shm_ids(ns).rwsem);
rcu_read_lock();
- ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
+ ipcp = ipcctl_obtain_check(ns, &shm_ids(ns), shmid, cmd,
&shmid64->shm_perm, 0);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
diff --git a/ipc/util.c b/ipc/util.c
index f3447911c81e..cffd12240f67 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -687,7 +687,7 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
}
/**
- * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd
+ * ipcctl_obtain_check - retrieve an ipc object and check permissions
* @ns: ipc namespace
* @ids: the table of ids where to look for the ipc
* @id: the id of the ipc to retrieve
@@ -697,16 +697,16 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
*
* This function does some common audit and permissions check for some IPC_XXX
* cmd and is called from semctl_down, shmctl_down and msgctl_down.
- * It must be called without any lock held and:
*
- * - retrieves the ipc with the given id in the given table.
+ * It:
+ * - retrieves the ipc object with the given id in the given table.
* - performs some audit and permission check, depending on the given cmd
* - returns a pointer to the ipc object or otherwise, the corresponding
* error.
*
* Call holding the both the rwsem and the rcu read lock.
*/
-struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
+struct kern_ipc_perm *ipcctl_obtain_check(struct ipc_namespace *ns,
struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm)
{
diff --git a/ipc/util.h b/ipc/util.h
index 0aba3230d007..fcf81425ae98 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -148,7 +148,7 @@ struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id);
void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
-struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
+struct kern_ipc_perm *ipcctl_obtain_check(struct ipc_namespace *ns,
struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm);
--
2.17.1
ipc_addid() initializes kern_ipc_perm.seq after having called
idr_alloc() (within ipc_idr_alloc()).
Thus a parallel semop() or msgrcv() that uses ipc_obtain_object_check()
may see an uninitialized value.
The patch moves the initialization of kern_ipc_perm.seq before the
calls of idr_alloc().
Notes:
1) This patch has a user space visible side effect:
If /proc/sys/kernel/*_next_id is used (i.e.: checkpoint/restore) and
if semget()/msgget()/shmget() fails in the final step of adding the id
to the rhash tree, then .._next_id is cleared. Before the patch, is
remained unmodified.
There is no change of the behavior after a successful ..get() call:
It always clears .._next_id, there is no impact to non checkpoint/restore
code as that code does not use .._next_id.
2) The patch correctly documents that after a call to ipc_idr_alloc(),
the full tear-down sequence must be used. The callers of ipc_addid()
do not fullfill that, i.e. more bugfixes are required.
The patch is a squash of a patch from Dmitry and my own changes.
Reported-by: [email protected]
Signed-off-by: Manfred Spraul <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Davidlohr Bueso <[email protected]>
Cc: Michael Kerrisk <[email protected]>
---
Documentation/sysctl/kernel.txt | 3 +-
ipc/util.c | 91 +++++++++++++++++----------------
2 files changed, 50 insertions(+), 44 deletions(-)
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index eded671d55eb..b2d4a8f8fe97 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -440,7 +440,8 @@ Notes:
1) kernel doesn't guarantee, that new object will have desired id. So,
it's up to userspace, how to handle an object with "wrong" id.
2) Toggle with non-default value will be set back to -1 by kernel after
-successful IPC object allocation.
+successful IPC object allocation. If an IPC object allocation syscall
+fails, it is undefined if the value remains unmodified or is reset to -1.
==============================================================
diff --git a/ipc/util.c b/ipc/util.c
index 4e81182fa0ac..4998f8fa8ce0 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -193,46 +193,54 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
return NULL;
}
-#ifdef CONFIG_CHECKPOINT_RESTORE
/*
- * Specify desired id for next allocated IPC object.
+ * Insert new IPC object into idr tree, and set sequence number and id
+ * in the correct order.
+ * Especially:
+ * - the sequence number must be set before inserting the object into the idr,
+ * because the sequence number is accessed without a lock.
+ * - the id can/must be set after inserting the object into the idr.
+ * All accesses must be done after getting kern_ipc_perm.lock.
+ *
+ * The caller must own kern_ipc_perm.lock.of the new object.
+ * On error, the function returns a (negative) error code.
*/
-#define ipc_idr_alloc(ids, new) \
- idr_alloc(&(ids)->ipcs_idr, (new), \
- (ids)->next_id < 0 ? 0 : ipcid_to_idx((ids)->next_id),\
- 0, GFP_NOWAIT)
-
-static inline int ipc_buildid(int id, struct ipc_ids *ids,
- struct kern_ipc_perm *new)
+static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
{
- if (ids->next_id < 0) { /* default, behave as !CHECKPOINT_RESTORE */
+ int idx, next_id = -1;
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ next_id = ids->next_id;
+ ids->next_id = -1;
+#endif
+
+ /*
+ * As soon as a new object is inserted into the idr,
+ * ipc_obtain_object_idr() or ipc_obtain_object_check() can find it,
+ * and the lockless preparations for ipc operations can start.
+ * This means especially: permission checks, audit calls, allocation
+ * of undo structures, ...
+ *
+ * Thus the object must be fully initialized, and if something fails,
+ * then the full tear-down sequence must be followed.
+ * (i.e.: set new->deleted, reduce refcount, call_rcu())
+ */
+
+ if (next_id < 0) { /* !CHECKPOINT_RESTORE or next_id is unset */
new->seq = ids->seq++;
if (ids->seq > IPCID_SEQ_MAX)
ids->seq = 0;
+ idx = idr_alloc(&ids->ipcs_idr, new, 0, 0, GFP_NOWAIT);
} else {
- new->seq = ipcid_to_seqx(ids->next_id);
- ids->next_id = -1;
+ new->seq = ipcid_to_seqx(next_id);
+ idx = idr_alloc(&ids->ipcs_idr, new, ipcid_to_idx(next_id),
+ 0, GFP_NOWAIT);
}
-
- return SEQ_MULTIPLIER * new->seq + id;
+ if (idx >= 0)
+ new->id = SEQ_MULTIPLIER * new->seq + idx;
+ return idx;
}
-#else
-#define ipc_idr_alloc(ids, new) \
- idr_alloc(&(ids)->ipcs_idr, (new), 0, 0, GFP_NOWAIT)
-
-static inline int ipc_buildid(int id, struct ipc_ids *ids,
- struct kern_ipc_perm *new)
-{
- new->seq = ids->seq++;
- if (ids->seq > IPCID_SEQ_MAX)
- ids->seq = 0;
-
- return SEQ_MULTIPLIER * new->seq + id;
-}
-
-#endif /* CONFIG_CHECKPOINT_RESTORE */
-
/**
* ipc_addid - add an ipc identifier
* @ids: ipc identifier set
@@ -250,7 +258,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
{
kuid_t euid;
kgid_t egid;
- int id, err;
+ int idx, err;
if (limit > IPCMNI)
limit = IPCMNI;
@@ -270,30 +278,27 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
new->cuid = new->uid = euid;
new->gid = new->cgid = egid;
- id = ipc_idr_alloc(ids, new);
+ idx = ipc_idr_alloc(ids, new);
idr_preload_end();
- if (id >= 0 && new->key != IPC_PRIVATE) {
+ if (idx >= 0 && new->key != IPC_PRIVATE) {
err = rhashtable_insert_fast(&ids->key_ht, &new->khtnode,
ipc_kht_params);
if (err < 0) {
- idr_remove(&ids->ipcs_idr, id);
- id = err;
+ idr_remove(&ids->ipcs_idr, idx);
+ idx = err;
}
}
- if (id < 0) {
+ if (idx < 0) {
spin_unlock(&new->lock);
rcu_read_unlock();
- return id;
+ return idx;
}
ids->in_use++;
- if (id > ids->max_id)
- ids->max_id = id;
-
- new->id = ipc_buildid(id, ids, new);
-
- return id;
+ if (idx > ids->max_id)
+ ids->max_id = idx;
+ return idx;
}
/**
--
2.17.1
ipc_addid() initializes kern_ipc_perm.id after having called
ipc_idr_alloc().
Thus a parallel semctl() or msgctl() that uses e.g. MSG_STAT may use
this unitialized value as the return code.
The patch moves all accesses to kern_ipc_perm.id under the spin_lock().
The issues is related to the finding of
[email protected]:
syzbot found an issue with kern_ipc_perm.seq
Signed-off-by: Manfred Spraul <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Kees Cook <[email protected]>
Reviewed-by: Davidlohr Bueso <[email protected]>
---
ipc/msg.c | 19 ++++++++++++++-----
ipc/sem.c | 18 +++++++++++++-----
ipc/shm.c | 19 ++++++++++++++-----
3 files changed, 41 insertions(+), 15 deletions(-)
diff --git a/ipc/msg.c b/ipc/msg.c
index 3b6545302598..49358f474fc9 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -491,7 +491,6 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid,
int cmd, struct msqid64_ds *p)
{
struct msg_queue *msq;
- int id = 0;
int err;
memset(p, 0, sizeof(*p));
@@ -503,7 +502,6 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid,
err = PTR_ERR(msq);
goto out_unlock;
}
- id = msq->q_perm.id;
} else { /* IPC_STAT */
msq = msq_obtain_object_check(ns, msqid);
if (IS_ERR(msq)) {
@@ -548,10 +546,21 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid,
p->msg_lspid = pid_vnr(msq->q_lspid);
p->msg_lrpid = pid_vnr(msq->q_lrpid);
- ipc_unlock_object(&msq->q_perm);
- rcu_read_unlock();
- return id;
+ if (cmd == IPC_STAT) {
+ /*
+ * As defined in SUS:
+ * Return 0 on success
+ */
+ err = 0;
+ } else {
+ /*
+ * MSG_STAT and MSG_STAT_ANY (both Linux specific)
+ * Return the full id, including the sequence number
+ */
+ err = msq->q_perm.id;
+ }
+ ipc_unlock_object(&msq->q_perm);
out_unlock:
rcu_read_unlock();
return err;
diff --git a/ipc/sem.c b/ipc/sem.c
index 5af1943ad782..d89ce69b2613 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1222,7 +1222,6 @@ static int semctl_stat(struct ipc_namespace *ns, int semid,
{
struct sem_array *sma;
time64_t semotime;
- int id = 0;
int err;
memset(semid64, 0, sizeof(*semid64));
@@ -1234,7 +1233,6 @@ static int semctl_stat(struct ipc_namespace *ns, int semid,
err = PTR_ERR(sma);
goto out_unlock;
}
- id = sma->sem_perm.id;
} else { /* IPC_STAT */
sma = sem_obtain_object_check(ns, semid);
if (IS_ERR(sma)) {
@@ -1274,10 +1272,20 @@ static int semctl_stat(struct ipc_namespace *ns, int semid,
#endif
semid64->sem_nsems = sma->sem_nsems;
+ if (cmd == IPC_STAT) {
+ /*
+ * As defined in SUS:
+ * Return 0 on success
+ */
+ err = 0;
+ } else {
+ /*
+ * SEM_STAT and SEM_STAT_ANY (both Linux specific)
+ * Return the full id, including the sequence number
+ */
+ err = sma->sem_perm.id;
+ }
ipc_unlock_object(&sma->sem_perm);
- rcu_read_unlock();
- return id;
-
out_unlock:
rcu_read_unlock();
return err;
diff --git a/ipc/shm.c b/ipc/shm.c
index 051a3e1fb8df..f3bae59bed08 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -949,7 +949,6 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid,
int cmd, struct shmid64_ds *tbuf)
{
struct shmid_kernel *shp;
- int id = 0;
int err;
memset(tbuf, 0, sizeof(*tbuf));
@@ -961,7 +960,6 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid,
err = PTR_ERR(shp);
goto out_unlock;
}
- id = shp->shm_perm.id;
} else { /* IPC_STAT */
shp = shm_obtain_object_check(ns, shmid);
if (IS_ERR(shp)) {
@@ -1011,10 +1009,21 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid,
tbuf->shm_lpid = pid_vnr(shp->shm_lprid);
tbuf->shm_nattch = shp->shm_nattch;
- ipc_unlock_object(&shp->shm_perm);
- rcu_read_unlock();
- return id;
+ if (cmd == IPC_STAT) {
+ /*
+ * As defined in SUS:
+ * Return 0 on success
+ */
+ err = 0;
+ } else {
+ /*
+ * SHM_STAT and SHM_STAT_ANY (both Linux specific)
+ * Return the full id, including the sequence number
+ */
+ err = shp->shm_perm.id;
+ }
+ ipc_unlock_object(&shp->shm_perm);
out_unlock:
rcu_read_unlock();
return err;
--
2.17.1