2022-05-16 11:31:12

by Alex Shi

[permalink] [raw]
Subject: Re: [PATCH 1/2] psi: add support for multi level pressure stall trigger



On 5/16/22 11:35, Chen Wandun wrote:
> Nowadays, psi events are triggered when stall time exceed
> stall threshold, but no any different between these events.
>
> Actually, events can be divide into multi level, each level
> represent a different stall pressure, that is help to identify
> pressure information more accurately.
>
> echo "some 150000 350000 1000000" > /proc/pressure/memory would

This breaks the old ABI. And why you need this new function?

Thanks

> add [150ms, 350ms) threshold for partial memory stall measured
> within 1sec time window.
>
> Signed-off-by: Chen Wandun <[email protected]>
> ---
> include/linux/psi_types.h | 3 ++-
> kernel/sched/psi.c | 19 +++++++++++++------
> 2 files changed, 15 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
> index c7fe7c089718..2b1393c8bf90 100644
> --- a/include/linux/psi_types.h
> +++ b/include/linux/psi_types.h
> @@ -119,7 +119,8 @@ struct psi_trigger {
> enum psi_states state;
>
> /* User-spacified threshold in ns */
> - u64 threshold;
> + u64 min_threshold;
> + u64 max_threshold;
>
> /* List node inside triggers list */
> struct list_head node;
> diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> index 6f9533c95b0a..17dd233b533a 100644
> --- a/kernel/sched/psi.c
> +++ b/kernel/sched/psi.c
> @@ -541,7 +541,7 @@ static u64 update_triggers(struct psi_group *group, u64 now)
>
> /* Calculate growth since last update */
> growth = window_update(&t->win, now, total[t->state]);
> - if (growth < t->threshold)
> + if (growth < t->min_threshold || growth >= t->max_threshold)
> continue;
>
> t->pending_event = true;
> @@ -1087,15 +1087,18 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
> {
> struct psi_trigger *t;
> enum psi_states state;
> - u32 threshold_us;
> + u32 min_threshold_us;
> + u32 max_threshold_us;
> u32 window_us;
>
> if (static_branch_likely(&psi_disabled))
> return ERR_PTR(-EOPNOTSUPP);
>
> - if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2)
> + if (sscanf(buf, "some %u %u %u", &min_threshold_us,
> + &max_threshold_us, &window_us) == 3)
> state = PSI_IO_SOME + res * 2;
> - else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
> + else if (sscanf(buf, "full %u %u %u", &min_threshold_us,
> + &max_threshold_us, &window_us) == 3)
> state = PSI_IO_FULL + res * 2;
> else
> return ERR_PTR(-EINVAL);
> @@ -1107,8 +1110,11 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
> window_us > WINDOW_MAX_US)
> return ERR_PTR(-EINVAL);
>
> + if (min_threshold_us >= max_threshold_us)
> + return ERR_PTR(-EINVAL);
> +
> /* Check threshold */
> - if (threshold_us == 0 || threshold_us > window_us)
> + if (max_threshold_us > window_us)
> return ERR_PTR(-EINVAL);
>
> t = kmalloc(sizeof(*t), GFP_KERNEL);
> @@ -1117,7 +1123,8 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
>
> t->group = group;
> t->state = state;
> - t->threshold = threshold_us * NSEC_PER_USEC;
> + t->min_threshold = min_threshold_us * NSEC_PER_USEC;
> + t->max_threshold = max_threshold_us * NSEC_PER_USEC;
> t->win.size = window_us * NSEC_PER_USEC;
> window_reset(&t->win, 0, 0, 0);
>


2022-05-16 23:22:05

by Suren Baghdasaryan

[permalink] [raw]
Subject: Re: [PATCH 1/2] psi: add support for multi level pressure stall trigger

On Sun, May 15, 2022 at 11:20 PM Alex Shi <[email protected]> wrote:
>
>
>
> On 5/16/22 11:35, Chen Wandun wrote:
> > Nowadays, psi events are triggered when stall time exceed
> > stall threshold, but no any different between these events.
> >
> > Actually, events can be divide into multi level, each level
> > represent a different stall pressure, that is help to identify
> > pressure information more accurately.

IIUC by defining min and max, you want the trigger to activate when
the stall is between min and max thresholds. But I don't see why you
would need that. If you want to have several levels, you can create
multiple triggers and monitor them separately. For your example, that
would be:

echo "some 150000 1000000" > /proc/pressure/memory
echo "some 350000 1000000" > /proc/pressure/memory

Your first trigger will fire whenever the stall exceeds 150ms within
each 1sec and the second one will trigger when it exceeds 350ms. It is
true that if the stall jumps sharply above 350ms, you would get both
triggers firing. I'm guessing that's why you want this functionality
so that 150ms trigger does not fire when 350ms one is firing but why
is that a problem? Can't userspace pick the highest level one and
ignore all the lower ones when this happens? Or are you addressing
some other requirement?

> >
> > echo "some 150000 350000 1000000" > /proc/pressure/memory would
>
> This breaks the old ABI. And why you need this new function?

Both great points.

>
> Thanks
>
> > add [150ms, 350ms) threshold for partial memory stall measured
> > within 1sec time window.
> >
> > Signed-off-by: Chen Wandun <[email protected]>
> > ---
> > include/linux/psi_types.h | 3 ++-
> > kernel/sched/psi.c | 19 +++++++++++++------
> > 2 files changed, 15 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
> > index c7fe7c089718..2b1393c8bf90 100644
> > --- a/include/linux/psi_types.h
> > +++ b/include/linux/psi_types.h
> > @@ -119,7 +119,8 @@ struct psi_trigger {
> > enum psi_states state;
> >
> > /* User-spacified threshold in ns */
> > - u64 threshold;
> > + u64 min_threshold;
> > + u64 max_threshold;
> >
> > /* List node inside triggers list */
> > struct list_head node;
> > diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> > index 6f9533c95b0a..17dd233b533a 100644
> > --- a/kernel/sched/psi.c
> > +++ b/kernel/sched/psi.c
> > @@ -541,7 +541,7 @@ static u64 update_triggers(struct psi_group *group, u64 now)
> >
> > /* Calculate growth since last update */
> > growth = window_update(&t->win, now, total[t->state]);
> > - if (growth < t->threshold)
> > + if (growth < t->min_threshold || growth >= t->max_threshold)
> > continue;
> >
> > t->pending_event = true;
> > @@ -1087,15 +1087,18 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
> > {
> > struct psi_trigger *t;
> > enum psi_states state;
> > - u32 threshold_us;
> > + u32 min_threshold_us;
> > + u32 max_threshold_us;
> > u32 window_us;
> >
> > if (static_branch_likely(&psi_disabled))
> > return ERR_PTR(-EOPNOTSUPP);
> >
> > - if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2)
> > + if (sscanf(buf, "some %u %u %u", &min_threshold_us,
> > + &max_threshold_us, &window_us) == 3)
> > state = PSI_IO_SOME + res * 2;
> > - else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
> > + else if (sscanf(buf, "full %u %u %u", &min_threshold_us,
> > + &max_threshold_us, &window_us) == 3)
> > state = PSI_IO_FULL + res * 2;
> > else
> > return ERR_PTR(-EINVAL);
> > @@ -1107,8 +1110,11 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
> > window_us > WINDOW_MAX_US)
> > return ERR_PTR(-EINVAL);
> >
> > + if (min_threshold_us >= max_threshold_us)
> > + return ERR_PTR(-EINVAL);
> > +
> > /* Check threshold */
> > - if (threshold_us == 0 || threshold_us > window_us)
> > + if (max_threshold_us > window_us)
> > return ERR_PTR(-EINVAL);
> >
> > t = kmalloc(sizeof(*t), GFP_KERNEL);
> > @@ -1117,7 +1123,8 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
> >
> > t->group = group;
> > t->state = state;
> > - t->threshold = threshold_us * NSEC_PER_USEC;
> > + t->min_threshold = min_threshold_us * NSEC_PER_USEC;
> > + t->max_threshold = max_threshold_us * NSEC_PER_USEC;
> > t->win.size = window_us * NSEC_PER_USEC;
> > window_reset(&t->win, 0, 0, 0);
> >

2022-05-17 01:31:10

by Suren Baghdasaryan

[permalink] [raw]
Subject: Re: [PATCH 1/2] psi: add support for multi level pressure stall trigger

On Mon, May 16, 2022 at 1:21 AM Suren Baghdasaryan <[email protected]> wrote:
>
> On Sun, May 15, 2022 at 11:20 PM Alex Shi <[email protected]> wrote:
> >
> >
> >
> > On 5/16/22 11:35, Chen Wandun wrote:
> > > Nowadays, psi events are triggered when stall time exceed
> > > stall threshold, but no any different between these events.
> > >
> > > Actually, events can be divide into multi level, each level
> > > represent a different stall pressure, that is help to identify
> > > pressure information more accurately.
>
> IIUC by defining min and max, you want the trigger to activate when
> the stall is between min and max thresholds. But I don't see why you
> would need that. If you want to have several levels, you can create
> multiple triggers and monitor them separately. For your example, that
> would be:
>
> echo "some 150000 1000000" > /proc/pressure/memory
> echo "some 350000 1000000" > /proc/pressure/memory
>
> Your first trigger will fire whenever the stall exceeds 150ms within
> each 1sec and the second one will trigger when it exceeds 350ms. It is
> true that if the stall jumps sharply above 350ms, you would get both
> triggers firing. I'm guessing that's why you want this functionality
> so that 150ms trigger does not fire when 350ms one is firing but why
> is that a problem? Can't userspace pick the highest level one and
> ignore all the lower ones when this happens? Or are you addressing
> some other requirement?
>
> > >
> > > echo "some 150000 350000 1000000" > /proc/pressure/memory would
> >
> > This breaks the old ABI. And why you need this new function?
>
> Both great points.

BTW, I think the additional max_threshold parameter could be
implemented in a backward compatible way so that the old API is not
broken:

arg_count = sscanf(buf, "some %u %u %u", &min_threshold_us, &arg2, &arg3);
if (arg_count < 2) return ERR_PTR(-EINVAL);
if (arg_count < 3) {
max_threshold_us = INT_MAX;
window_us = arg2;
} else {
max_threshold_us = arg2;
window_us = arg3;
}

But again, the motivation still needs to be explained.

>
> >
> > Thanks
> >
> > > add [150ms, 350ms) threshold for partial memory stall measured
> > > within 1sec time window.
> > >
> > > Signed-off-by: Chen Wandun <[email protected]>
> > > ---
> > > include/linux/psi_types.h | 3 ++-
> > > kernel/sched/psi.c | 19 +++++++++++++------
> > > 2 files changed, 15 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
> > > index c7fe7c089718..2b1393c8bf90 100644
> > > --- a/include/linux/psi_types.h
> > > +++ b/include/linux/psi_types.h
> > > @@ -119,7 +119,8 @@ struct psi_trigger {
> > > enum psi_states state;
> > >
> > > /* User-spacified threshold in ns */
> > > - u64 threshold;
> > > + u64 min_threshold;
> > > + u64 max_threshold;
> > >
> > > /* List node inside triggers list */
> > > struct list_head node;
> > > diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> > > index 6f9533c95b0a..17dd233b533a 100644
> > > --- a/kernel/sched/psi.c
> > > +++ b/kernel/sched/psi.c
> > > @@ -541,7 +541,7 @@ static u64 update_triggers(struct psi_group *group, u64 now)
> > >
> > > /* Calculate growth since last update */
> > > growth = window_update(&t->win, now, total[t->state]);
> > > - if (growth < t->threshold)
> > > + if (growth < t->min_threshold || growth >= t->max_threshold)
> > > continue;
> > >
> > > t->pending_event = true;
> > > @@ -1087,15 +1087,18 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
> > > {
> > > struct psi_trigger *t;
> > > enum psi_states state;
> > > - u32 threshold_us;
> > > + u32 min_threshold_us;
> > > + u32 max_threshold_us;
> > > u32 window_us;
> > >
> > > if (static_branch_likely(&psi_disabled))
> > > return ERR_PTR(-EOPNOTSUPP);
> > >
> > > - if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2)
> > > + if (sscanf(buf, "some %u %u %u", &min_threshold_us,
> > > + &max_threshold_us, &window_us) == 3)
> > > state = PSI_IO_SOME + res * 2;
> > > - else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
> > > + else if (sscanf(buf, "full %u %u %u", &min_threshold_us,
> > > + &max_threshold_us, &window_us) == 3)
> > > state = PSI_IO_FULL + res * 2;
> > > else
> > > return ERR_PTR(-EINVAL);
> > > @@ -1107,8 +1110,11 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
> > > window_us > WINDOW_MAX_US)
> > > return ERR_PTR(-EINVAL);
> > >
> > > + if (min_threshold_us >= max_threshold_us)
> > > + return ERR_PTR(-EINVAL);
> > > +
> > > /* Check threshold */
> > > - if (threshold_us == 0 || threshold_us > window_us)
> > > + if (max_threshold_us > window_us)
> > > return ERR_PTR(-EINVAL);
> > >
> > > t = kmalloc(sizeof(*t), GFP_KERNEL);
> > > @@ -1117,7 +1123,8 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
> > >
> > > t->group = group;
> > > t->state = state;
> > > - t->threshold = threshold_us * NSEC_PER_USEC;
> > > + t->min_threshold = min_threshold_us * NSEC_PER_USEC;
> > > + t->max_threshold = max_threshold_us * NSEC_PER_USEC;
> > > t->win.size = window_us * NSEC_PER_USEC;
> > > window_reset(&t->win, 0, 0, 0);
> > >