From: Zhaoyang Huang <[email protected]>
There could be missing wake up if the rest of the window remain the
same stall states as the polling_total updates for every polling_min_period.
Introducing threshold_breach flag to record the trigger's status and update
the logic.
Suggested-by: Suren Baghdasaryan <[email protected]>
Signed-off-by: Zhaoyang Huang <[email protected]>
---
v2: modify the logic according to Suren's suggestion
---
---
include/linux/psi_types.h | 2 ++
kernel/sched/psi.c | 38 +++++++++++++++++++++++---------------
2 files changed, 25 insertions(+), 15 deletions(-)
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index 0a23300..87b694a 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -132,6 +132,8 @@ struct psi_trigger {
/* Refcounting to prevent premature destruction */
struct kref refcount;
+
+ bool threshold_breach;
};
struct psi_group {
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 1652f2b..5c67ab9 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -524,24 +524,29 @@ static u64 update_triggers(struct psi_group *group, u64 now)
*/
list_for_each_entry(t, &group->triggers, node) {
u64 growth;
+ bool trigger_stalled =
+ group->polling_total[t->state] != total[t->state];
- /* Check for stall activity */
- if (group->polling_total[t->state] == total[t->state])
- continue;
-
- /*
- * Multiple triggers might be looking at the same state,
- * remember to update group->polling_total[] once we've
- * been through all of them. Also remember to extend the
- * polling time if we see new stall activity.
- */
- new_stall = true;
-
- /* Calculate growth since last update */
- growth = window_update(&t->win, now, total[t->state]);
- if (growth < t->threshold)
+ /* Check for stall activity or a previous threshold breach */
+ if (!trigger_stalled && !t->threshold_breach)
continue;
+ if (trigger_stalled) {
+ /*
+ * Multiple triggers might be looking at the same state,
+ * remember to update group->polling_total[] once we've
+ * been through all of them. Also remember to extend the
+ * polling time if we see new stall activity.
+ */
+ new_stall = true;
+
+ /* Calculate growth since last update */
+ growth = window_update(&t->win, now, total[t->state]);
+ if (growth < t->threshold)
+ continue;
+
+ t->threshold_breach = true;
+ }
/* Limit event signaling to once per window */
if (now < t->last_event_time + t->win.size)
continue;
@@ -550,6 +555,8 @@ static u64 update_triggers(struct psi_group *group, u64 now)
if (cmpxchg(&t->event, 0, 1) == 0)
wake_up_interruptible(&t->event_wait);
t->last_event_time = now;
+ /* Reset threshold breach flag once event got generated */
+ t->threshold_breach = false;
}
if (new_stall)
@@ -1152,6 +1159,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
t->last_event_time = 0;
init_waitqueue_head(&t->event_wait);
kref_init(&t->refcount);
+ t->threshold_breach = false;
mutex_lock(&group->trigger_lock);
--
1.9.1
On Tue, Dec 21, 2021 at 12:34 AM Huangzhaoyang <[email protected]> wrote:
>
> From: Zhaoyang Huang <[email protected]>
CC'ing PeterZ since I think this change will need to be accepted into
his tree. Please include Peter in the future versions of this patch.
>
> There could be missing wake up if the rest of the window remain the
> same stall states as the polling_total updates for every polling_min_period.
> Introducing threshold_breach flag to record the trigger's status and update
> the logic.
At least for me, it's hard to understand the problem from this
description. Suggest something like this:
When a new threshold breaching stall happens after a psi event was
generated and within the window duration, the new event is not
generated because the events are rate-limited to one per window. If
after that no new stall is recorded then the event will not be
generated even after rate-limiting duration has passed. This is
happening because with no new stall, window_update will not be called
even though threshold was previously breached. To fix this, record
threshold breaching occurrence and generate the event once window
duration is passed.
The code looks good to me. Thanks!
>
> Suggested-by: Suren Baghdasaryan <[email protected]>
> Signed-off-by: Zhaoyang Huang <[email protected]>
> ---
> v2: modify the logic according to Suren's suggestion
> ---
> ---
> include/linux/psi_types.h | 2 ++
> kernel/sched/psi.c | 38 +++++++++++++++++++++++---------------
> 2 files changed, 25 insertions(+), 15 deletions(-)
>
> diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
> index 0a23300..87b694a 100644
> --- a/include/linux/psi_types.h
> +++ b/include/linux/psi_types.h
> @@ -132,6 +132,8 @@ struct psi_trigger {
>
> /* Refcounting to prevent premature destruction */
> struct kref refcount;
> +
> + bool threshold_breach;
> };
>
> struct psi_group {
> diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> index 1652f2b..5c67ab9 100644
> --- a/kernel/sched/psi.c
> +++ b/kernel/sched/psi.c
> @@ -524,24 +524,29 @@ static u64 update_triggers(struct psi_group *group, u64 now)
> */
> list_for_each_entry(t, &group->triggers, node) {
> u64 growth;
> + bool trigger_stalled =
> + group->polling_total[t->state] != total[t->state];
>
> - /* Check for stall activity */
> - if (group->polling_total[t->state] == total[t->state])
> - continue;
> -
> - /*
> - * Multiple triggers might be looking at the same state,
> - * remember to update group->polling_total[] once we've
> - * been through all of them. Also remember to extend the
> - * polling time if we see new stall activity.
> - */
> - new_stall = true;
> -
> - /* Calculate growth since last update */
> - growth = window_update(&t->win, now, total[t->state]);
> - if (growth < t->threshold)
> + /* Check for stall activity or a previous threshold breach */
> + if (!trigger_stalled && !t->threshold_breach)
> continue;
>
> + if (trigger_stalled) {
> + /*
> + * Multiple triggers might be looking at the same state,
> + * remember to update group->polling_total[] once we've
> + * been through all of them. Also remember to extend the
> + * polling time if we see new stall activity.
> + */
> + new_stall = true;
> +
> + /* Calculate growth since last update */
> + growth = window_update(&t->win, now, total[t->state]);
> + if (growth < t->threshold)
> + continue;
> +
> + t->threshold_breach = true;
> + }
> /* Limit event signaling to once per window */
> if (now < t->last_event_time + t->win.size)
> continue;
> @@ -550,6 +555,8 @@ static u64 update_triggers(struct psi_group *group, u64 now)
> if (cmpxchg(&t->event, 0, 1) == 0)
> wake_up_interruptible(&t->event_wait);
> t->last_event_time = now;
> + /* Reset threshold breach flag once event got generated */
> + t->threshold_breach = false;
> }
>
> if (new_stall)
> @@ -1152,6 +1159,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
> t->last_event_time = 0;
> init_waitqueue_head(&t->event_wait);
> kref_init(&t->refcount);
> + t->threshold_breach = false;
>
> mutex_lock(&group->trigger_lock);
>
> --
> 1.9.1
>