LinuxLists.cc - [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

2022-09-02 07:18:28

Subject: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

The msg_bytes and msg_hdrs atomic counters are frequently
updated when IPC msg queue is in heavy use, causing heavy
cache bounce and overhead. Change them to percpu_counters
greatly improve the performance. Since there is one unique
ipc namespace, additional memory cost is minimal. Reading
of the count done in msgctl call, which is infrequent. So
the need to sum up the counts in each CPU is infrequent.

Apply the patch and test the pts/stress-ng-1.4.0
-- system v message passing (160 threads).

Score gain: 3.38x

CPU: ICX 8380 x 2 sockets
Core number: 40 x 2 physical cores
Benchmark: pts/stress-ng-1.4.0
-- system v message passing (160 threads)

Signed-off-by: Jiebin Sun <[email protected]>
---
include/linux/ipc_namespace.h | 5 +++--
include/linux/percpu_counter.h | 9 +++++++++
ipc/msg.c | 30 +++++++++++++++++-------------
lib/percpu_counter.c | 6 ++++++
4 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e3e8c8662b49..e8240cf2611a 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -11,6 +11,7 @@
#include <linux/refcount.h>
#include <linux/rhashtable-types.h>
#include <linux/sysctl.h>
+#include <linux/percpu_counter.h>

struct user_namespace;

@@ -36,8 +37,8 @@ struct ipc_namespace {
unsigned int msg_ctlmax;
unsigned int msg_ctlmnb;
unsigned int msg_ctlmni;
- atomic_t msg_bytes;
- atomic_t msg_hdrs;
+ struct percpu_counter percpu_msg_bytes;
+ struct percpu_counter percpu_msg_hdrs;

size_t shm_ctlmax;
size_t shm_ctlall;
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 01861eebed79..6eec30122cc3 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -40,6 +40,7 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,

void percpu_counter_destroy(struct percpu_counter *fbc);
void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
+void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount);
void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount,
s32 batch);
s64 __percpu_counter_sum(struct percpu_counter *fbc);
@@ -138,6 +139,14 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
preempt_enable();
}

+static inline void
+percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
+{
+ preempt_disable();
+ fbc->count += amount;
+ preempt_enable();
+}
+
static inline void
percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
{
diff --git a/ipc/msg.c b/ipc/msg.c
index a0d05775af2c..1b498537f05e 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -39,6 +39,7 @@
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
#include <linux/rhashtable.h>
+#include <linux/percpu_counter.h>

#include <asm/current.h>
#include <linux/uaccess.h>
@@ -285,10 +286,10 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
rcu_read_unlock();

list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
- atomic_dec(&ns->msg_hdrs);
+ percpu_counter_add_local(&ns->percpu_msg_hdrs, -1);
free_msg(msg);
}
- atomic_sub(msq->q_cbytes, &ns->msg_bytes);
+ percpu_counter_add_local(&ns->percpu_msg_bytes, -(msq->q_cbytes));
ipc_update_pid(&msq->q_lspid, NULL);
ipc_update_pid(&msq->q_lrpid, NULL);
ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
@@ -495,17 +496,18 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
msginfo->msgssz = MSGSSZ;
msginfo->msgseg = MSGSEG;
down_read(&msg_ids(ns).rwsem);
- if (cmd == MSG_INFO) {
+ if (cmd == MSG_INFO)
msginfo->msgpool = msg_ids(ns).in_use;
- msginfo->msgmap = atomic_read(&ns->msg_hdrs);
- msginfo->msgtql = atomic_read(&ns->msg_bytes);
+ max_idx = ipc_get_maxidx(&msg_ids(ns));
+ up_read(&msg_ids(ns).rwsem);
+ if (cmd == MSG_INFO) {
+ msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
+ msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);
} else {
msginfo->msgmap = MSGMAP;
msginfo->msgpool = MSGPOOL;
msginfo->msgtql = MSGTQL;
}
- max_idx = ipc_get_maxidx(&msg_ids(ns));
- up_read(&msg_ids(ns).rwsem);
return (max_idx < 0) ? 0 : max_idx;
}

@@ -935,8 +937,8 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
list_add_tail(&msg->m_list, &msq->q_messages);
msq->q_cbytes += msgsz;
msq->q_qnum++;
- atomic_add(msgsz, &ns->msg_bytes);
- atomic_inc(&ns->msg_hdrs);
+ percpu_counter_add_local(&ns->percpu_msg_bytes, msgsz);
+ percpu_counter_add_local(&ns->percpu_msg_hdrs, 1);
}

err = 0;
@@ -1159,8 +1161,8 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
msq->q_rtime = ktime_get_real_seconds();
ipc_update_pid(&msq->q_lrpid, task_tgid(current));
msq->q_cbytes -= msg->m_ts;
- atomic_sub(msg->m_ts, &ns->msg_bytes);
- atomic_dec(&ns->msg_hdrs);
+ percpu_counter_add_local(&ns->percpu_msg_bytes, -(msg->m_ts));
+ percpu_counter_add_local(&ns->percpu_msg_hdrs, -1);
ss_wakeup(msq, &wake_q, false);

goto out_unlock0;
@@ -1303,14 +1305,16 @@ void msg_init_ns(struct ipc_namespace *ns)
ns->msg_ctlmnb = MSGMNB;
ns->msg_ctlmni = MSGMNI;

- atomic_set(&ns->msg_bytes, 0);
- atomic_set(&ns->msg_hdrs, 0);
+ percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
+ percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
}

#ifdef CONFIG_IPC_NS
void msg_exit_ns(struct ipc_namespace *ns)
{
+ percpu_counter_destroy(&ns->percpu_msg_bytes);
+ percpu_counter_destroy(&ns->percpu_msg_hdrs);
free_ipcs(ns, &msg_ids(ns), freeque);
idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index ed610b75dc32..d33cb750962a 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -72,6 +72,12 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
}
EXPORT_SYMBOL(percpu_counter_set);

+void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
+{
+ this_cpu_add(*fbc->counters, amount);
+}
+EXPORT_SYMBOL(percpu_counter_add_local);
+
/*
* This function is both preempt and irq safe. The former is due to explicit
* preemption disable. The latter is guaranteed by the fact that the slow path
--
2.31.1

2022-09-02 16:14:59

by Andrew Morton

[permalink] [raw]

Subject: Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

On Fri, 2 Sep 2022 23:22:43 +0800 Jiebin Sun <[email protected]> wrote:

> The msg_bytes and msg_hdrs atomic counters are frequently
> updated when IPC msg queue is in heavy use, causing heavy
> cache bounce and overhead. Change them to percpu_counters
> greatly improve the performance. Since there is one unique
> ipc namespace, additional memory cost is minimal. Reading
> of the count done in msgctl call, which is infrequent. So
> the need to sum up the counts in each CPU is infrequent.
>
> Apply the patch and test the pts/stress-ng-1.4.0
> -- system v message passing (160 threads).
>
> Score gain: 3.38x

So this test became 3x faster?

> CPU: ICX 8380 x 2 sockets
> Core number: 40 x 2 physical cores
> Benchmark: pts/stress-ng-1.4.0
> -- system v message passing (160 threads)
>
> ...
>
> @@ -138,6 +139,14 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
> preempt_enable();
> }
>
> +static inline void
> +percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
> +{
> + preempt_disable();
> + fbc->count += amount;
> + preempt_enable();
> +}

What's this and why is it added?

It would be best to propose this as a separate preparatory patch.
Fully changelogged and perhaps even with a code comment explaining why
and when it should be used.

Thanks.

2022-09-02 16:45:21

by Shakeel Butt

[permalink] [raw]

Subject: Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

On Fri, Sep 2, 2022 at 12:04 AM Jiebin Sun <[email protected]> wrote:
>
> The msg_bytes and msg_hdrs atomic counters are frequently
> updated when IPC msg queue is in heavy use, causing heavy
> cache bounce and overhead. Change them to percpu_counters
> greatly improve the performance. Since there is one unique
> ipc namespace, additional memory cost is minimal. Reading
> of the count done in msgctl call, which is infrequent. So
> the need to sum up the counts in each CPU is infrequent.
>
> Apply the patch and test the pts/stress-ng-1.4.0
> -- system v message passing (160 threads).
>
> Score gain: 3.38x
>
> CPU: ICX 8380 x 2 sockets
> Core number: 40 x 2 physical cores
> Benchmark: pts/stress-ng-1.4.0
> -- system v message passing (160 threads)
>
> Signed-off-by: Jiebin Sun <[email protected]>
[...]
>
> +void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
> +{
> + this_cpu_add(*fbc->counters, amount);
> +}
> +EXPORT_SYMBOL(percpu_counter_add_local);

Why not percpu_counter_add()? This may drift the fbc->count more than
batch*nr_cpus. I am assuming that is not the issue for you as you
always do an expensive sum in the slow path. As Andrew asked, this
should be a separate patch.

2022-09-03 20:11:35

by Manfred Spraul

[permalink] [raw]

Subject: Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

Hi Jiebin,

On 9/2/22 17:22, Jiebin Sun wrote:
> The msg_bytes and msg_hdrs atomic counters are frequently
> updated when IPC msg queue is in heavy use, causing heavy
> cache bounce and overhead. Change them to percpu_counters
> greatly improve the performance. Since there is one unique
> ipc namespace, additional memory cost is minimal.

With ipc namespaces, there is one struct per namespace, correct?

The cost is probably still ok, but the change log should be correct.

> @@ -1303,14 +1305,16 @@ void msg_init_ns(struct ipc_namespace *ns)
> ns->msg_ctlmnb = MSGMNB;
> ns->msg_ctlmni = MSGMNI;
>
> - atomic_set(&ns->msg_bytes, 0);
> - atomic_set(&ns->msg_hdrs, 0);
> + percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
> + percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
> ipc_init_ids(&ns->ids[IPC_MSG_IDS]);

These calls can fail. You must add error handling.

--

Manfred

2022-09-05 12:00:30

Subject: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

Subject: Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

Subject: Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

Subject: Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

Subject: [PATCH v2 0/2] ipc/msg: mitigate the lock contention in ipc/msg

Subject: Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

Subject: [PATCH v2 1/2] percpu: Add percpu_counter_add_local

Subject: Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

Subject: Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

Subject: Re: [PATCH v2 1/2] percpu: Add percpu_counter_add_local

Subject: [PATCH v3 0/2] ipc/msg: mitigate the lock contention in ipc/msg

Subject: Re: [PATCH v2 1/2] percpu: Add percpu_counter_add_local

Subject: [PATCH v3 1/2] percpu: Add percpu_counter_add_local

Subject: [PATCH v3 2/2] ipc/msg: mitigate the lock contention with percpu counter

Subject: Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

Subject: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter

Subject: Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

Subject: Re: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter

Subject: Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter

Subject: Re: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter

Subject: Re: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter

Subject: Re: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter

Subject: Re: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter

Subject: Re: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter

Subject: [PATCH v5 0/2] ipc/msg: mitigate the lock contention in ipc/msg

Subject: [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local

Subject: [PATCH v5 2/2] ipc/msg: mitigate the lock contention with percpu counter

Subject: Re: [PATCH v5 2/2] ipc/msg: mitigate the lock contention with percpu counter

Subject: Re: [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local

Subject: Re: [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local

Subject: Re: [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local

Subject: Re: [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local

Subject: [PATCH v6 0/2] ipc/msg: mitigate the lock contention in ipc/msg

Subject: [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter

Subject: Re: [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter

Subject: Re: [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter

Subject: Re: [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter

Subject: Re: [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter

Subject: [PATCH] ipc/msg: avoid negative value by overflow in msginfo