Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751726AbdGaImo (ORCPT ); Mon, 31 Jul 2017 04:42:44 -0400 Received: from mail-wm0-f47.google.com ([74.125.82.47]:34996 "EHLO mail-wm0-f47.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750807AbdGaImm (ORCPT ); Mon, 31 Jul 2017 04:42:42 -0400 Date: Mon, 31 Jul 2017 10:42:37 +0200 From: Guillaume Knispel To: Andrew Morton , Manfred Spraul , Kees Cook , Davidlohr Bueso Cc: Alexey Dobriyan , "Eric W. Biederman" , "Peter Zijlstra (Intel)" , Ingo Molnar , Sebastian Andrzej Siewior , Serge Hallyn , Andrey Vagin , Guillaume Knispel , Marc Pardo , linux-kernel@vger.kernel.org Subject: [PATCH] ipc: optimize semget/shmget/msgget for lots of keys Message-ID: <20170731084237.GA123231@ubuntu> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline Content-Transfer-Encoding: 8bit User-Agent: Mutt/1.5.24 (2015-08-30) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 14462 Lines: 502 ipc_findkey() scanned all objects to look for the wanted key. This is slow when using a high number of keys, for example on an i5 laptop the following loop took 17 s, with last semget calls taking ~1 ms each. for (int i = 0, k=0x424242; i < 31900; ++i) semget(k++, 1, IPC_CREAT | 0600); This change adds an rhashtable of kern_ipc_perm objects in ipc_ids, so that one lookup cease to be O(n). The above loop now takes ~10 ms. Each lookup-only semget() call can take between ~120 ns and a few µs. Rarely, some creations can take a few dozen of µs. I also ran 'syscalls-ipc' and 'containers' of ltp 20170516, including with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_ATOMIC_SLEEP, and CONFIG_DEBUG_OBJECTS_RCU_HEAD, and did not detect any regression (compared to 4.13-rc2, running Ubuntu 16.04.2) [I did not manage to enable CONFIG_PROVE_RCU] Signed-off-by: Guillaume Knispel Reviewed-by: Marc Pardo --- include/linux/ipc.h | 3 ++ include/linux/ipc_namespace.h | 3 ++ ipc/msg.c | 10 +++-- ipc/namespace.c | 20 +++++++-- ipc/sem.c | 11 +++-- ipc/shm.c | 14 +++--- ipc/util.c | 99 ++++++++++++++++++++++++++++++++----------- ipc/util.h | 21 +++++---- 8 files changed, 130 insertions(+), 51 deletions(-) diff --git a/include/linux/ipc.h b/include/linux/ipc.h index fadd579..f480f22 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -3,6 +3,7 @@ #include #include +#include #include #define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ @@ -21,6 +22,8 @@ struct kern_ipc_perm { unsigned long seq; void *security; + struct rhash_head khtnode; + struct rcu_head rcu; atomic_t refcount; } ____cacheline_aligned_in_smp __randomize_layout; diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 65327ee..aea3b81 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -7,15 +7,18 @@ #include #include #include +#include struct user_namespace; struct ipc_ids { int in_use; unsigned short seq; + bool tables_initialized; struct rw_semaphore rwsem; struct idr ipcs_idr; int next_id; + struct rhashtable key_ht; }; struct ipc_namespace { diff --git a/ipc/msg.c b/ipc/msg.c index 5b25e07..f123042 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -1011,7 +1011,7 @@ SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, } -void msg_init_ns(struct ipc_namespace *ns) +int msg_init_ns(struct ipc_namespace *ns) { ns->msg_ctlmax = MSGMAX; ns->msg_ctlmnb = MSGMNB; @@ -1019,7 +1019,7 @@ void msg_init_ns(struct ipc_namespace *ns) atomic_set(&ns->msg_bytes, 0); atomic_set(&ns->msg_hdrs, 0); - ipc_init_ids(&ns->ids[IPC_MSG_IDS]); + return ipc_init_ids(&ns->ids[IPC_MSG_IDS]); } #ifdef CONFIG_IPC_NS @@ -1027,6 +1027,7 @@ void msg_exit_ns(struct ipc_namespace *ns) { free_ipcs(ns, &msg_ids(ns), freeque); idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); + rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht); } #endif @@ -1057,11 +1058,12 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it) } #endif -void __init msg_init(void) +int __init msg_init(void) { - msg_init_ns(&init_ipc_ns); + const int err = msg_init_ns(&init_ipc_ns); ipc_init_proc_interface("sysvipc/msg", " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", IPC_MSG_IDS, sysvipc_msg_proc_show); + return err; } diff --git a/ipc/namespace.c b/ipc/namespace.c index b4d80f9..4130f48 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -54,16 +54,28 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, ns->user_ns = get_user_ns(user_ns); ns->ucounts = ucounts; - err = mq_init_ns(ns); + err = sem_init_ns(ns); if (err) goto fail_put; + err = msg_init_ns(ns); + if (err) + goto fail_destroy_sem; + err = shm_init_ns(ns); + if (err) + goto fail_destroy_msg; - sem_init_ns(ns); - msg_init_ns(ns); - shm_init_ns(ns); + err = mq_init_ns(ns); + if (err) + goto fail_destroy_shm; return ns; +fail_destroy_shm: + shm_exit_ns(ns); +fail_destroy_msg: + msg_exit_ns(ns); +fail_destroy_sem: + sem_exit_ns(ns); fail_put: put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); diff --git a/ipc/sem.c b/ipc/sem.c index 9e70cd7..b13ecfd 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -185,14 +185,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); #define sc_semopm sem_ctls[2] #define sc_semmni sem_ctls[3] -void sem_init_ns(struct ipc_namespace *ns) +int sem_init_ns(struct ipc_namespace *ns) { ns->sc_semmsl = SEMMSL; ns->sc_semmns = SEMMNS; ns->sc_semopm = SEMOPM; ns->sc_semmni = SEMMNI; ns->used_sems = 0; - ipc_init_ids(&ns->ids[IPC_SEM_IDS]); + return ipc_init_ids(&ns->ids[IPC_SEM_IDS]); } #ifdef CONFIG_IPC_NS @@ -200,15 +200,18 @@ void sem_exit_ns(struct ipc_namespace *ns) { free_ipcs(ns, &sem_ids(ns), freeary); idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); + rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht); } #endif -void __init sem_init(void) +int __init sem_init(void) { - sem_init_ns(&init_ipc_ns); + const int err = sem_init_ns(&init_ipc_ns); + ipc_init_proc_interface("sysvipc/sem", " key semid perms nsems uid gid cuid cgid otime ctime\n", IPC_SEM_IDS, sysvipc_sem_proc_show); + return err; } /** diff --git a/ipc/shm.c b/ipc/shm.c index 28a4448..5fa7576 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -72,14 +72,14 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp); static int sysvipc_shm_proc_show(struct seq_file *s, void *it); #endif -void shm_init_ns(struct ipc_namespace *ns) +int shm_init_ns(struct ipc_namespace *ns) { ns->shm_ctlmax = SHMMAX; ns->shm_ctlall = SHMALL; ns->shm_ctlmni = SHMMNI; ns->shm_rmid_forced = 0; ns->shm_tot = 0; - ipc_init_ids(&shm_ids(ns)); + return ipc_init_ids(&shm_ids(ns)); } /* @@ -95,7 +95,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) if (shp->shm_nattch) { shp->shm_perm.mode |= SHM_DEST; /* Do not find it any more */ - shp->shm_perm.key = IPC_PRIVATE; + ipc_set_key_private(&shm_ids(ns), &shp->shm_perm); shm_unlock(shp); } else shm_destroy(ns, shp); @@ -106,16 +106,18 @@ void shm_exit_ns(struct ipc_namespace *ns) { free_ipcs(ns, &shm_ids(ns), do_shm_rmid); idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); + rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht); } #endif static int __init ipc_ns_init(void) { - shm_init_ns(&init_ipc_ns); - return 0; + const int err = shm_init_ns(&init_ipc_ns); + WARN(err, "ipc: sysV shm_init_ns failed: %d\n", err); + return err; } -pure_initcall(ipc_ns_init); +core_initcall(ipc_ns_init); void __init shm_init(void) { diff --git a/ipc/util.c b/ipc/util.c index 1a2cb02..621e9fc 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -83,27 +83,45 @@ struct ipc_proc_iface { */ static int __init ipc_init(void) { - sem_init(); - msg_init(); + int err_sem, err_msg; + + err_sem = sem_init(); + WARN(err_sem, "ipc: sysV sem_init failed: %d\n", err_sem); + err_msg = msg_init(); + WARN(err_msg, "ipc: sysV msg_init failed: %d\n", err_msg); shm_init(); - return 0; + + return err_msg ? err_msg : err_sem; } device_initcall(ipc_init); +static const struct rhashtable_params ipc_kht_params = { + .head_offset = offsetof(struct kern_ipc_perm, khtnode), + .key_offset = offsetof(struct kern_ipc_perm, key), + .key_len = FIELD_SIZEOF(struct kern_ipc_perm, key), + .automatic_shrinking = true, +}; + /** * ipc_init_ids - initialise ipc identifiers * @ids: ipc identifier set * * Set up the sequence range to use for the ipc identifier range (limited - * below IPCMNI) then initialise the ids idr. + * below IPCMNI) then initialise the keys hashtable and ids idr. */ -void ipc_init_ids(struct ipc_ids *ids) +int ipc_init_ids(struct ipc_ids *ids) { + int err; ids->in_use = 0; ids->seq = 0; ids->next_id = -1; init_rwsem(&ids->rwsem); + err = rhashtable_init(&ids->key_ht, &ipc_kht_params); + if (err) + return err; idr_init(&ids->ipcs_idr); + ids->tables_initialized = true; + return 0; } #ifdef CONFIG_PROC_FS @@ -147,28 +165,20 @@ void __init ipc_init_proc_interface(const char *path, const char *header, * Returns the locked pointer to the ipc structure if found or NULL * otherwise. If key is found ipc points to the owning ipc structure * - * Called with ipc_ids.rwsem held. + * Called with writer ipc_ids.rwsem held. */ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) { - struct kern_ipc_perm *ipc; - int next_id; - int total; - - for (total = 0, next_id = 0; total < ids->in_use; next_id++) { - ipc = idr_find(&ids->ipcs_idr, next_id); - - if (ipc == NULL) - continue; + struct kern_ipc_perm *ipcp = NULL; - if (ipc->key != key) { - total++; - continue; - } + if (likely(ids->tables_initialized)) + ipcp = rhashtable_lookup_fast(&ids->key_ht, &key, + ipc_kht_params); + if (ipcp) { rcu_read_lock(); - ipc_lock_object(ipc); - return ipc; + ipc_lock_object(ipcp); + return ipcp; } return NULL; @@ -221,13 +231,13 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) { kuid_t euid; kgid_t egid; - int id; + int id, err; int next_id = ids->next_id; if (size > IPCMNI) size = IPCMNI; - if (ids->in_use >= size) + if (!ids->tables_initialized || ids->in_use >= size) return -ENOSPC; idr_preload(GFP_KERNEL); @@ -246,6 +256,15 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) (next_id < 0) ? 0 : ipcid_to_idx(next_id), 0, GFP_NOWAIT); idr_preload_end(); + + if (id >= 0 && new->key != IPC_PRIVATE) { + err = rhashtable_insert_fast(&ids->key_ht, &new->khtnode, + ipc_kht_params); + if (err < 0) { + idr_remove(&ids->ipcs_idr, id); + id = err; + } + } if (id < 0) { spin_unlock(&new->lock); rcu_read_unlock(); @@ -377,6 +396,20 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, return err; } +/** + * ipc_kht_remove - remove an ipc from the key hashtable + * @ids: ipc identifier set + * @ipcp: ipc perm structure containing the key to remove + * + * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held + * before this function is called, and remain locked on the exit. + */ +static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) +{ + if (ipcp->key != IPC_PRIVATE) + rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode, + ipc_kht_params); +} /** * ipc_rmid - remove an ipc identifier @@ -391,10 +424,25 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) int lid = ipcid_to_idx(ipcp->id); idr_remove(&ids->ipcs_idr, lid); + ipc_kht_remove(ids, ipcp); ids->in_use--; ipcp->deleted = true; } +/** + * ipc_set_key_private - switch the key of an existing ipc to IPC_PRIVATE + * @ids: ipc identifier set + * @ipcp: ipc perm structure containing the key to modify + * + * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held + * before this function is called, and remain locked on the exit. + */ +void ipc_set_key_private(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) +{ + ipc_kht_remove(ids, ipcp); + ipcp->key = IPC_PRIVATE; +} + int ipc_rcu_getref(struct kern_ipc_perm *ptr) { return atomic_inc_not_zero(&ptr->refcount); @@ -485,7 +533,7 @@ void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out) } /** - * ipc_obtain_object + * ipc_obtain_object_idr * @ids: ipc identifier set * @id: ipc id to look for * @@ -499,6 +547,9 @@ struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id) struct kern_ipc_perm *out; int lid = ipcid_to_idx(id); + if (unlikely(!ids->tables_initialized)) + return ERR_PTR(-EINVAL); + out = idr_find(&ids->ipcs_idr, lid); if (!out) return ERR_PTR(-EINVAL); diff --git a/ipc/util.h b/ipc/util.h index c692010..80c9f51 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -15,8 +15,8 @@ #define SEQ_MULTIPLIER (IPCMNI) -void sem_init(void); -void msg_init(void); +int sem_init(void); +int msg_init(void); void shm_init(void); struct ipc_namespace; @@ -30,17 +30,17 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { } #endif #ifdef CONFIG_SYSVIPC -void sem_init_ns(struct ipc_namespace *ns); -void msg_init_ns(struct ipc_namespace *ns); -void shm_init_ns(struct ipc_namespace *ns); +int sem_init_ns(struct ipc_namespace *ns); +int msg_init_ns(struct ipc_namespace *ns); +int shm_init_ns(struct ipc_namespace *ns); void sem_exit_ns(struct ipc_namespace *ns); void msg_exit_ns(struct ipc_namespace *ns); void shm_exit_ns(struct ipc_namespace *ns); #else -static inline void sem_init_ns(struct ipc_namespace *ns) { } -static inline void msg_init_ns(struct ipc_namespace *ns) { } -static inline void shm_init_ns(struct ipc_namespace *ns) { } +static inline int sem_init_ns(struct ipc_namespace *ns) { return 0; } +static inline int msg_init_ns(struct ipc_namespace *ns) { return 0; } +static inline int shm_init_ns(struct ipc_namespace *ns) { return 0; } static inline void sem_exit_ns(struct ipc_namespace *ns) { } static inline void msg_exit_ns(struct ipc_namespace *ns) { } @@ -79,7 +79,7 @@ struct ipc_ops { struct seq_file; struct ipc_ids; -void ipc_init_ids(struct ipc_ids *); +int ipc_init_ids(struct ipc_ids *); #ifdef CONFIG_PROC_FS void __init ipc_init_proc_interface(const char *path, const char *header, int ids, int (*show)(struct seq_file *, void *)); @@ -104,6 +104,9 @@ int ipc_get_maxid(struct ipc_ids *); /* must be called with both locks acquired. */ void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *); +/* must be called with both locks acquired. */ +void ipc_set_key_private(struct ipc_ids *, struct kern_ipc_perm *); + /* must be called with ipcp locked */ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg); -- 2.7.4