2020-10-22 09:31:41

by Zhang, Qiang

[permalink] [raw]
Subject: Question on io-wq


Hi Jens Axboe

There are some problem in 'io_wqe_worker' thread, when the
'io_wqe_worker' be create and Setting the affinity of CPUs in NUMA
nodes, due to CPU hotplug, When the last CPU going down, the
'io_wqe_worker' thread will run anywhere. when the CPU in the node goes
online again, we should restore their cpu bindings?

Thanks
Qiang


2020-10-22 17:34:44

by Jens Axboe

[permalink] [raw]
Subject: Re: Question on io-wq

On 10/22/20 3:02 AM, Zhang,Qiang wrote:
>
> Hi Jens Axboe
>
> There are some problem in 'io_wqe_worker' thread, when the
> 'io_wqe_worker' be create and Setting the affinity of CPUs in NUMA
> nodes, due to CPU hotplug, When the last CPU going down, the
> 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes
> online again, we should restore their cpu bindings?

Something like the below should help in ensuring affinities are
always correct - trigger an affinity set for an online CPU event. We
should not need to do it for offlining. Can you test it?


diff --git a/fs/io-wq.c b/fs/io-wq.c
index 4012ff541b7b..3bf029d1170e 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -19,6 +19,7 @@
#include <linux/task_work.h>
#include <linux/blk-cgroup.h>
#include <linux/audit.h>
+#include <linux/cpu.h>

#include "io-wq.h"

@@ -123,9 +124,13 @@ struct io_wq {
refcount_t refs;
struct completion done;

+ struct hlist_node cpuhp_node;
+
refcount_t use_refs;
};

+static enum cpuhp_state io_wq_online;
+
static bool io_worker_get(struct io_worker *worker)
{
return refcount_inc_not_zero(&worker->ref);
@@ -1096,6 +1101,13 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
return ERR_PTR(-ENOMEM);
}

+ ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
+ if (ret) {
+ kfree(wq->wqes);
+ kfree(wq);
+ return ERR_PTR(ret);
+ }
+
wq->free_work = data->free_work;
wq->do_work = data->do_work;

@@ -1145,6 +1157,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
ret = PTR_ERR(wq->manager);
complete(&wq->done);
err:
+ cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
for_each_node(node)
kfree(wq->wqes[node]);
kfree(wq->wqes);
@@ -1164,6 +1177,8 @@ static void __io_wq_destroy(struct io_wq *wq)
{
int node;

+ cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
+
set_bit(IO_WQ_BIT_EXIT, &wq->state);
if (wq->manager)
kthread_stop(wq->manager);
@@ -1191,3 +1206,40 @@ struct task_struct *io_wq_get_task(struct io_wq *wq)
{
return wq->manager;
}
+
+static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
+{
+ struct task_struct *task = worker->task;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+ do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
+ task->flags |= PF_NO_SETAFFINITY;
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ return false;
+}
+
+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+ int i;
+
+ rcu_read_lock();
+ for_each_node(i)
+ io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
+ rcu_read_unlock();
+ return 0;
+}
+
+static __init int io_wq_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
+ io_wq_cpu_online, NULL);
+ if (ret < 0)
+ return ret;
+ io_wq_online = ret;
+ return 0;
+}
+subsys_initcall(io_wq_init);

--
Jens Axboe

2020-10-23 07:57:07

by Zhang, Qiang

[permalink] [raw]
Subject: 回复: Question on io-wq



________________________________________
??????: Jens Axboe <[email protected]>
????ʱ??: 2020??10??22?? 22:08
?ռ???: Zhang, Qiang
????: [email protected]; [email protected]; [email protected]; [email protected]
????: Re: Question on io-wq

On 10/22/20 3:02 AM, Zhang,Qiang wrote:
>
> Hi Jens Axboe
>
> There are some problem in 'io_wqe_worker' thread, when the
> 'io_wqe_worker' be create and Setting the affinity of CPUs in NUMA
> nodes, due to CPU hotplug, When the last CPU going down, the
> 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes
> online again, we should restore their cpu bindings?

>Something like the below should help in ensuring affinities are
>always correct - trigger an affinity set for an online CPU event. We
>should not need to do it for offlining. Can you test it?


>diff --git a/fs/io-wq.c b/fs/io-wq.c
>index 4012ff541b7b..3bf029d1170e 100644
>--- a/fs/io-wq.c
>+++ b/fs/io-wq.c
>@@ -19,6 +19,7 @@
>#include <linux/task_work.h>
>#include <linux/blk-cgroup.h>
>#include <linux/audit.h>
>+#include <linux/cpu.h>

>#include "io-wq.h"
>
>@@ -123,9 +124,13 @@ struct io_wq {
> refcount_t refs;
> struct completion done;
>
>+ struct hlist_node cpuhp_node;
>+
> refcount_t use_refs;
>};
>
>+static enum cpuhp_state io_wq_online;
>+
>static bool io_worker_get(struct io_worker *worker)
>{
> return refcount_inc_not_zero(&worker->ref);
>@@ -1096,6 +1101,13 @@ struct io_wq *io_wq_create(unsigned bounded, >struct io_wq_data *data)
> return ERR_PTR(-ENOMEM);
> }
>
>+ ret = cpuhp_state_add_instance_nocalls(io_wq_online, >&wq->cpuhp_node);
>+ if (ret) {
>+ kfree(wq->wqes);
>+ kfree(wq);
>+ return ERR_PTR(ret);
>+ }
>+
> wq->free_work = data->free_work;
> wq->do_work = data->do_work;
>
>@@ -1145,6 +1157,7 @@ struct io_wq *io_wq_create(unsigned bounded, >struct io_wq_data *data)
> ret = PTR_ERR(wq->manager);
> complete(&wq->done);
>err:
>+ cpuhp_state_remove_instance_nocalls(io_wq_online, >&wq->cpuhp_node);
> for_each_node(node)
> kfree(wq->wqes[node]);
> kfree(wq->wqes);
>@@ -1164,6 +1177,8 @@ static void __io_wq_destroy(struct io_wq *wq)
>{
> int node;
>
>+ cpuhp_state_remove_instance_nocalls(io_wq_online, >&wq->cpuhp_node);
>+
> set_bit(IO_WQ_BIT_EXIT, &wq->state);
> if (wq->manager)
> kthread_stop(wq->manager);
>@@ -1191,3 +1206,40 @@ struct task_struct *io_wq_get_task(struct io_wq >*wq)
>{
> return wq->manager;
>}
>+
>+static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
>+{
>+ struct task_struct *task = worker->task;
>+ unsigned long flags;
>+
struct rq_flags rf;


>+ raw_spin_lock_irqsave(&task->pi_lock, flags);
>+ do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
>+ task->flags |= PF_NO_SETAFFINITY;
>+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);


>+ return false;
>+}
>+
>+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
>+{
>+ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
>+ int i;
>+
>+ rcu_read_lock();
>+ for_each_node(i)
>+ io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, >NULL);
>+ rcu_read_unlock();
>+ return 0;
>+}
>+
>+static __init int io_wq_init(void)
>+{
>+ int ret;
>+
>+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, >"io->wq/online",
>+ io_wq_cpu_online, NULL);
>+ if (ret < 0)
>+ return ret;
>+ io_wq_online = ret;
>+ return 0;
>+}
>+subsys_initcall(io_wq_init);
>
>--
>Jens Axboe

2020-10-23 13:34:19

by Zhang, Qiang

[permalink] [raw]
Subject: 回复: Question on io-wq



________________________________________
??????: Zhang, Qiang <[email protected]>
????ʱ??: 2020??10??23?? 11:55
?ռ???: Jens Axboe
????: [email protected]; [email protected]; [email protected]; [email protected]
????: ?ظ?: Question on io-wq



________________________________________
??????: Jens Axboe <[email protected]>
????ʱ??: 2020??10??22?? 22:08
?ռ???: Zhang, Qiang
????: [email protected]; [email protected]; [email protected]; [email protected]
????: Re: Question on io-wq

On 10/22/20 3:02 AM, Zhang,Qiang wrote:
>
> Hi Jens Axboe
>
> There are some problem in 'io_wqe_worker' thread, when the
> 'io_wqe_worker' be create and Setting the affinity of CPUs in NUMA
> nodes, due to CPU hotplug, When the last CPU going down, the
> 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes
> online again, we should restore their cpu bindings?

>Something like the below should help in ensuring affinities are
>always correct - trigger an affinity set for an online CPU event. We
>should not need to do it for offlining. Can you test it?


>diff --git a/fs/io-wq.c b/fs/io-wq.c
>index 4012ff541b7b..3bf029d1170e 100644
>--- a/fs/io-wq.c
>+++ b/fs/io-wq.c
>@@ -19,6 +19,7 @@
>#include <linux/task_work.h>
>#include <linux/blk-cgroup.h>
>#include <linux/audit.h>
>+#include <linux/cpu.h>

>#include "io-wq.h"
>
>@@ -123,9 +124,13 @@ struct io_wq {
> refcount_t refs;
> struct completion done;
>
>+ struct hlist_node cpuhp_node;
>+
> refcount_t use_refs;
>};
>
>+static enum cpuhp_state io_wq_online;
>+
>static bool io_worker_get(struct io_worker *worker)
>{
> return refcount_inc_not_zero(&worker->ref);
>@@ -1096,6 +1101,13 @@ struct io_wq *io_wq_create(unsigned bounded, >struct io_wq_data *data)
> return ERR_PTR(-ENOMEM);
> }
>
>+ ret = cpuhp_state_add_instance_nocalls(io_wq_online, >&wq->cpuhp_node);
>+ if (ret) {
>+ kfree(wq->wqes);
>+ kfree(wq);
>+ return ERR_PTR(ret);
>+ }
>+
> wq->free_work = data->free_work;
> wq->do_work = data->do_work;
>
>@@ -1145,6 +1157,7 @@ struct io_wq *io_wq_create(unsigned bounded, >struct io_wq_data *data)
> ret = PTR_ERR(wq->manager);
> complete(&wq->done);
>err:
>+ cpuhp_state_remove_instance_nocalls(io_wq_online, >&wq->cpuhp_node);
> for_each_node(node)
> kfree(wq->wqes[node]);
> kfree(wq->wqes);
>@@ -1164,6 +1177,8 @@ static void __io_wq_destroy(struct io_wq *wq)
>{
> int node;
>
>+ cpuhp_state_remove_instance_nocalls(io_wq_online, >&wq->cpuhp_node);
>+
> set_bit(IO_WQ_BIT_EXIT, &wq->state);
> if (wq->manager)
> kthread_stop(wq->manager);
>@@ -1191,3 +1206,40 @@ struct task_struct *io_wq_get_task(struct io_wq >*wq)
>{
> return wq->manager;
>}
>+
>+static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
>+{
>+ struct task_struct *task = worker->task;
>+ unsigned long flags;
>+
struct rq_flags rf;
struct rq *rq;
rq = task_rq_lock(task, &rf);

--- raw_spin_lock_irqsave(&task->pi_lock, flags);
>+ do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
>+ task->flags |= PF_NO_SETAFFINITY;
--- raw_spin_unlock_irqrestore(&task->pi_lock, flags);

task_rq_unlock(rq, task, &rf);

>+ return false;
>+}
>+
>+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
>+{
>+ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
>+ int i;
>+
>+ rcu_read_lock();
>+ for_each_node(i)
>+ io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, >NULL);
>+ rcu_read_unlock();
>+ return 0;
>+}
>+
>+static __init int io_wq_init(void)
>+{
>+ int ret;
>+
>+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, >"io->wq/online",
>+ io_wq_cpu_online, NULL);
>+ if (ret < 0)
>+ return ret;
>+ io_wq_online = ret;
>+ return 0;
>+}
>+subsys_initcall(io_wq_init);
>
>--
>Jens Axboe