2017-08-31 17:21:14

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH -next 0/4] sysvipc: ipc-key management improvements

Hi,

Here are a few improvements I spotted while eyeballing Guillaume's
rhashtable implementation for ipc keys. The first and fourth patches
are the interesting ones, the middle two are trivial.

Applies against today's -next.
Passes ltp ipc related testcases.

Thanks!

Davidlohr Bueso (4):
sysvipc: unteach ids->next_id for !CHECKPOINT_RESTORE
sysvipc: duplicate lock comments wrt ipc_addid()
sysvipc: properly name ipc_addid() limit parameter
sysvipc: make get_maxid O(1) again

arch/x86/Kconfig | 2 +-
include/linux/ipc_namespace.h | 3 ++
ipc/sem.c | 1 +
ipc/shm.c | 1 +
ipc/util.c | 97 ++++++++++++++++++++++++-------------------
ipc/util.h | 26 ++++++++----
6 files changed, 79 insertions(+), 51 deletions(-)

--
2.12.0


2017-08-31 17:21:21

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH 3/4] sysvipc: properly name ipc_addid() limit parameter

This is better understood as a limit, instead of size;
exactly like the function comment indicates. Rename it.

Signed-off-by: Davidlohr Bueso <[email protected]>
---
ipc/util.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ipc/util.c b/ipc/util.c
index fb69152705e6..d4091665f439 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -261,7 +261,7 @@ static inline int ipc_buildid(int id, struct ipc_ids *ids,
* ipc_addid - add an ipc identifier
* @ids: ipc identifier set
* @new: new ipc permission set
- * @size: limit for the number of used ids
+ * @limit: limit for the number of used ids
*
* Add an entry 'new' to the ipc ids idr. The permissions object is
* initialised and the first free entry is set up and the id assigned
@@ -270,16 +270,16 @@ static inline int ipc_buildid(int id, struct ipc_ids *ids,
*
* Called with writer ipc_ids.rwsem held.
*/
-int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
+int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
{
kuid_t euid;
kgid_t egid;
int id, err;

- if (size > IPCMNI)
- size = IPCMNI;
+ if (limit > IPCMNI)
+ limit = IPCMNI;

- if (!ids->tables_initialized || ids->in_use >= size)
+ if (!ids->tables_initialized || ids->in_use >= limit)
return -ENOSPC;

idr_preload(GFP_KERNEL);
--
2.12.0

2017-08-31 17:21:28

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH 4/4] sysvipc: make get_maxid O(1) again

For a custom microbenchmark on a 3.30GHz Xeon SandyBridge,
which calls IPC_STAT over and over, it was calculated that,
on avg the cost of ipc_get_maxid() for increasing amounts of
keys was:

10 keys: ~900 cycles
100 keys: ~15000 cycles
1000 keys: ~150000 cycles
10000 keys: ~2100000 cycles

This is unsurprising as maxid is currently O(n).

By having the max_id available in O(1) we save all those cycles
for each semctl(_STAT) command, the idr_find can be expensive --
which some real (customer) workloads actually poll on. Note that
this used to be the case, until 7ca7e564e04 (ipc: store ipcs into
IDRs). The cost is the extra idr_find when doing RMIDs, but we
simply go backwards, and should not take too many iterations to
find the new value.

Signed-off-by: Davidlohr Bueso <[email protected]>
---
arch/x86/Kconfig | 2 +-
include/linux/ipc_namespace.h | 1 +
ipc/util.c | 43 +++++++++++++------------------------------
ipc/util.h | 21 ++++++++++++++++++---
4 files changed, 33 insertions(+), 34 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 44d139b88c32..301fe235367b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,7 +55,7 @@ config X86
select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_PMEM_API if X86_64
- select ARCH_HAS_REFCOUNT
+ select ARCH_HAS_REFCOUNT if BROKEN
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 058051566ced..c59b8ddee191 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -18,6 +18,7 @@ struct ipc_ids {
bool tables_initialized;
struct rw_semaphore rwsem;
struct idr ipcs_idr;
+ int max_id;
#ifdef CONFIG_CHECKPOINT_RESTORE
int next_id;
#endif
diff --git a/ipc/util.c b/ipc/util.c
index d4091665f439..45e84203c3fe 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -121,6 +121,7 @@ int ipc_init_ids(struct ipc_ids *ids)
return err;
idr_init(&ids->ipcs_idr);
ids->tables_initialized = true;
+ ids->max_id = -1;
#ifdef CONFIG_CHECKPOINT_RESTORE
ids->next_id = -1;
#endif
@@ -187,36 +188,6 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
return NULL;
}

-/**
- * ipc_get_maxid - get the last assigned id
- * @ids: ipc identifier set
- *
- * Called with ipc_ids.rwsem held.
- */
-int ipc_get_maxid(struct ipc_ids *ids)
-{
- struct kern_ipc_perm *ipc;
- int max_id = -1;
- int total, id;
-
- if (ids->in_use == 0)
- return -1;
-
- if (ids->in_use == IPCMNI)
- return IPCMNI - 1;
-
- /* Look for the last assigned id */
- total = 0;
- for (id = 0; id < IPCMNI && total < ids->in_use; id++) {
- ipc = idr_find(&ids->ipcs_idr, id);
- if (ipc != NULL) {
- max_id = id;
- total++;
- }
- }
- return max_id;
-}
-
#ifdef CONFIG_CHECKPOINT_RESTORE
/*
* Specify desired id for next allocated IPC object.
@@ -312,6 +283,9 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
}

ids->in_use++;
+ if (id > ids->max_id)
+ ids->max_id = id;
+
new->id = ipc_buildid(id, ids, new);

return id;
@@ -458,6 +432,15 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
ipc_kht_remove(ids, ipcp);
ids->in_use--;
ipcp->deleted = true;
+
+ if (unlikely(lid == ids->max_id)) {
+ do {
+ lid--;
+ if(lid == -1)
+ break;
+ } while (!idr_find(&ids->ipcs_idr, lid));
+ ids->max_id = lid;
+ }
}

/**
diff --git a/ipc/util.h b/ipc/util.h
index 17c8d2f14990..eabca0e37ce1 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -12,6 +12,7 @@

#include <linux/unistd.h>
#include <linux/err.h>
+#include <linux/ipc_namespace.h>

#define SEQ_MULTIPLIER (IPCMNI)

@@ -98,9 +99,6 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
/* must be called with ids->rwsem acquired for writing */
int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);

-/* must be called with ids->rwsem acquired for reading */
-int ipc_get_maxid(struct ipc_ids *);
-
/* must be called with both locks acquired. */
void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *);

@@ -110,6 +108,23 @@ void ipc_set_key_private(struct ipc_ids *, struct kern_ipc_perm *);
/* must be called with ipcp locked */
int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);

+/**
+ * ipc_get_maxid - get the last assigned id
+ * @ids: ipc identifier set
+ *
+ * Called with ipc_ids.rwsem held for reading.
+ */
+static inline int ipc_get_maxid(struct ipc_ids *ids)
+{
+ if (ids->in_use == 0)
+ return -1;
+
+ if (ids->in_use == IPCMNI)
+ return IPCMNI - 1;
+
+ return ids->max_id;
+}
+
/*
* For allocation that need to be freed by RCU.
* Objects are reference counted, they start with reference count 1.
--
2.12.0

2017-08-31 17:21:18

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH 1/4] sysvipc: unteach ids->next_id for !CHECKPOINT_RESTORE

The next_id object-allocation functionality was introduced in
03f595668017f (ipc: add sysctl to specify desired next object id).
Given that these new entries are _only_ exported under the
CONFIG_CHECKPOINT_RESTORE option, there is no point for the
common case to even know about ->next_id. As such rewrite
ipc_buildid() such that it can do away with the field as well as
unnecessary branches when adding a new identifier. The end result
also better differentiates both cases, so the code ends up being
cleaner; albeit the small duplications regarding the default case.

Signed-off-by: Davidlohr Bueso <[email protected]>
---
include/linux/ipc_namespace.h | 2 ++
ipc/util.c | 60 ++++++++++++++++++++++++++++++++-----------
ipc/util.h | 5 ----
3 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 83f0bf7a587d..058051566ced 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -18,7 +18,9 @@ struct ipc_ids {
bool tables_initialized;
struct rw_semaphore rwsem;
struct idr ipcs_idr;
+#ifdef CONFIG_CHECKPOINT_RESTORE
int next_id;
+#endif
struct rhashtable key_ht;
};

diff --git a/ipc/util.c b/ipc/util.c
index 78755873cc5b..fb69152705e6 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -115,13 +115,15 @@ int ipc_init_ids(struct ipc_ids *ids)
int err;
ids->in_use = 0;
ids->seq = 0;
- ids->next_id = -1;
init_rwsem(&ids->rwsem);
err = rhashtable_init(&ids->key_ht, &ipc_kht_params);
if (err)
return err;
idr_init(&ids->ipcs_idr);
ids->tables_initialized = true;
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ ids->next_id = -1;
+#endif
return 0;
}

@@ -215,6 +217,46 @@ int ipc_get_maxid(struct ipc_ids *ids)
return max_id;
}

+#ifdef CONFIG_CHECKPOINT_RESTORE
+/*
+ * Specify desired id for next allocated IPC object.
+ */
+#define ipc_idr_alloc(ids, new) \
+ idr_alloc(&(ids)->ipcs_idr, (new), \
+ (ids)->next_id < 0 ? 0: ipcid_to_idx((ids)->next_id), \
+ 0, GFP_NOWAIT);
+
+static inline int ipc_buildid(int id, struct ipc_ids *ids,
+ struct kern_ipc_perm *new)
+{
+ if (ids->next_id < 0) { /* default, behave as !CHECKPOINT_RESTORE */
+ new->seq = ids->seq++;
+ if (ids->seq > IPCID_SEQ_MAX)
+ ids->seq = 0;
+ } else {
+ new->seq = ipcid_to_seqx(ids->next_id);
+ ids->next_id = -1;
+ }
+
+ return SEQ_MULTIPLIER * new->seq + id;
+}
+
+#else
+#define ipc_idr_alloc(ids, new) \
+ idr_alloc(&(ids)->ipcs_idr, (new), 0, 0, GFP_NOWAIT);
+
+static inline int ipc_buildid(int id, struct ipc_ids *ids,
+ struct kern_ipc_perm *new)
+{
+ new->seq = ids->seq++;
+ if (ids->seq > IPCID_SEQ_MAX)
+ ids->seq = 0;
+
+ return SEQ_MULTIPLIER * new->seq + id;
+}
+
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+
/**
* ipc_addid - add an ipc identifier
* @ids: ipc identifier set
@@ -233,7 +275,6 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
kuid_t euid;
kgid_t egid;
int id, err;
- int next_id = ids->next_id;

if (size > IPCMNI)
size = IPCMNI;
@@ -253,9 +294,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
new->cuid = new->uid = euid;
new->gid = new->cgid = egid;

- id = idr_alloc(&ids->ipcs_idr, new,
- (next_id < 0) ? 0 : ipcid_to_idx(next_id), 0,
- GFP_NOWAIT);
+ id = ipc_idr_alloc(ids, new);
idr_preload_end();

if (id >= 0 && new->key != IPC_PRIVATE) {
@@ -273,17 +312,8 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
}

ids->in_use++;
+ new->id = ipc_buildid(id, ids, new);

- if (next_id < 0) {
- new->seq = ids->seq++;
- if (ids->seq > IPCID_SEQ_MAX)
- ids->seq = 0;
- } else {
- new->seq = ipcid_to_seqx(next_id);
- ids->next_id = -1;
- }
-
- new->id = ipc_buildid(id, new->seq);
return id;
}

diff --git a/ipc/util.h b/ipc/util.h
index 80c9f51c3f07..17c8d2f14990 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -145,11 +145,6 @@ extern struct msg_msg *load_msg(const void __user *src, size_t len);
extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst);
extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len);

-static inline int ipc_buildid(int id, int seq)
-{
- return SEQ_MULTIPLIER * seq + id;
-}
-
static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
{
return uid / SEQ_MULTIPLIER != ipcp->seq;
--
2.12.0

2017-08-31 17:21:53

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH 2/4] sysvipc: duplicate lock comments wrt ipc_addid()

The comment in msgqueues when using ipc_addid() is quite
useful imo. Duplicate it for shm and semaphores.

Signed-off-by: Davidlohr Bueso <[email protected]>
---
ipc/sem.c | 1 +
ipc/shm.c | 1 +
2 files changed, 2 insertions(+)

diff --git a/ipc/sem.c b/ipc/sem.c
index 013c7981f3c7..29c6ab6badc2 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -514,6 +514,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
sma->sem_nsems = nsems;
sma->sem_ctime = get_seconds();

+ /* ipc_addid() locks sma upon success. */
retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
if (retval < 0) {
call_rcu(&sma->sem_perm.rcu, sem_rcu_free);
diff --git a/ipc/shm.c b/ipc/shm.c
index 8fc97beb5234..ea64ed8782b3 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -600,6 +600,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
shp->shm_file = file;
shp->shm_creator = current;

+ /* ipc_addid() locks shp upon success. */
error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
if (error < 0)
goto no_id;
--
2.12.0

2017-08-31 21:16:44

by Davidlohr Bueso

[permalink] [raw]
Subject: Re: [PATCH 4/4] sysvipc: make get_maxid O(1) again

On Thu, 31 Aug 2017, Davidlohr Bueso wrote:
>diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>index 44d139b88c32..301fe235367b 100644
>--- a/arch/x86/Kconfig
>+++ b/arch/x86/Kconfig
>@@ -55,7 +55,7 @@ config X86
> select ARCH_HAS_KCOV if X86_64
> select ARCH_HAS_MMIO_FLUSH
> select ARCH_HAS_PMEM_API if X86_64
>- select ARCH_HAS_REFCOUNT
>+ select ARCH_HAS_REFCOUNT if BROKEN

*sigh* sorry, I forgot this was there, otherwise nothing works :/

Here's v2. Thanks.

-----8<------------------------------------------------------
>From 4f31964d4ae75d752c913a03e9c751b13381b561 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <[email protected]>
Date: Thu, 31 Aug 2017 10:34:35 -0700
Subject: [PATCH v2 4/4] sysvipc: make get_maxid O(1) again

For a custom microbenchmark on a 3.30GHz Xeon SandyBridge,
which calls IPC_STAT over and over, it was calculated that,
on avg the cost of ipc_get_maxid() for increasing amounts of
keys was:

10 keys: ~900 cycles
100 keys: ~15000 cycles
1000 keys: ~150000 cycles
10000 keys: ~2100000 cycles

This is unsurprising as maxid is currently O(n).

By having the max_id available in O(1) we save all those cycles
for each semctl(_STAT) command, the idr_find can be expensive --
which some real (customer) workloads actually poll on. Note that
this used to be the case, until 7ca7e564e04 (ipc: store ipcs into
IDRs). The cost is the extra idr_find when doing RMIDs, but we
simply go backwards, and should not take too many iterations to
find the new value.

Signed-off-by: Davidlohr Bueso <[email protected]>
---
include/linux/ipc_namespace.h | 1 +
ipc/util.c | 43 +++++++++++++------------------------------
ipc/util.h | 21 ++++++++++++++++++---
3 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 058051566ced..c59b8ddee191 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -18,6 +18,7 @@ struct ipc_ids {
bool tables_initialized;
struct rw_semaphore rwsem;
struct idr ipcs_idr;
+ int max_id;
#ifdef CONFIG_CHECKPOINT_RESTORE
int next_id;
#endif
diff --git a/ipc/util.c b/ipc/util.c
index d4091665f439..45e84203c3fe 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -121,6 +121,7 @@ int ipc_init_ids(struct ipc_ids *ids)
return err;
idr_init(&ids->ipcs_idr);
ids->tables_initialized = true;
+ ids->max_id = -1;
#ifdef CONFIG_CHECKPOINT_RESTORE
ids->next_id = -1;
#endif
@@ -187,36 +188,6 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
return NULL;
}

-/**
- * ipc_get_maxid - get the last assigned id
- * @ids: ipc identifier set
- *
- * Called with ipc_ids.rwsem held.
- */
-int ipc_get_maxid(struct ipc_ids *ids)
-{
- struct kern_ipc_perm *ipc;
- int max_id = -1;
- int total, id;
-
- if (ids->in_use == 0)
- return -1;
-
- if (ids->in_use == IPCMNI)
- return IPCMNI - 1;
-
- /* Look for the last assigned id */
- total = 0;
- for (id = 0; id < IPCMNI && total < ids->in_use; id++) {
- ipc = idr_find(&ids->ipcs_idr, id);
- if (ipc != NULL) {
- max_id = id;
- total++;
- }
- }
- return max_id;
-}
-
#ifdef CONFIG_CHECKPOINT_RESTORE
/*
* Specify desired id for next allocated IPC object.
@@ -312,6 +283,9 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
}

ids->in_use++;
+ if (id > ids->max_id)
+ ids->max_id = id;
+
new->id = ipc_buildid(id, ids, new);

return id;
@@ -458,6 +432,15 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
ipc_kht_remove(ids, ipcp);
ids->in_use--;
ipcp->deleted = true;
+
+ if (unlikely(lid == ids->max_id)) {
+ do {
+ lid--;
+ if(lid == -1)
+ break;
+ } while (!idr_find(&ids->ipcs_idr, lid));
+ ids->max_id = lid;
+ }
}

/**
diff --git a/ipc/util.h b/ipc/util.h
index 17c8d2f14990..eabca0e37ce1 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -12,6 +12,7 @@

#include <linux/unistd.h>
#include <linux/err.h>
+#include <linux/ipc_namespace.h>

#define SEQ_MULTIPLIER (IPCMNI)

@@ -98,9 +99,6 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
/* must be called with ids->rwsem acquired for writing */
int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);

-/* must be called with ids->rwsem acquired for reading */
-int ipc_get_maxid(struct ipc_ids *);
-
/* must be called with both locks acquired. */
void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *);

@@ -110,6 +108,23 @@ void ipc_set_key_private(struct ipc_ids *, struct kern_ipc_perm *);
/* must be called with ipcp locked */
int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);

+/**
+ * ipc_get_maxid - get the last assigned id
+ * @ids: ipc identifier set
+ *
+ * Called with ipc_ids.rwsem held for reading.
+ */
+static inline int ipc_get_maxid(struct ipc_ids *ids)
+{
+ if (ids->in_use == 0)
+ return -1;
+
+ if (ids->in_use == IPCMNI)
+ return IPCMNI - 1;
+
+ return ids->max_id;
+}
+
/*
* For allocation that need to be freed by RCU.
* Objects are reference counted, they start with reference count 1.
--
2.12.0