Add the delayed timer to devfreq framework in order to support
the periodical polling mode without stop caused by CPU idle state.
Some Non-CPU device must need to monitor the device status like
utilization regardless of CPU state.
- patch1 explains the detailed reason why the delayed timer is required.
- patch2 initializes that exynos5422-dmc device use delayed timer as default
instead of deferrable timer.
Chanwoo Choi (2):
PM / devfreq: Add support delayed timer for polling mode
memory: samsung: exynos5422-dmc: Use delayed timer as default
Documentation/ABI/testing/sysfs-class-devfreq | 12 +++
drivers/devfreq/devfreq.c | 83 ++++++++++++++++++-
drivers/memory/samsung/exynos5422-dmc.c | 1 +
include/linux/devfreq.h | 9 ++
4 files changed, 104 insertions(+), 1 deletion(-)
--
2.17.1
Until now, the devfreq driver using polling mode like simple_ondemand
governor have used only deferrable timer for reduing the redundant
power consumption. It reduces the CPU wake-up from idle due to polling mode
which check the status of Non-CPU device.
But, it has a problem for Non-CPU device like DMC device with DMA operation.
Some Non-CPU device need to do monitor continously regardless of CPU state
in order to decide the proper next status of Non-CPU device.
So, add support the dealyed timer for polling mode to support
the periodical monitoring. The devfreq driver and user can select
the kind of timer on either deferrable and delayed timer.
For example, change the timer type of DMC device
based on Exynos5422-based Odroid-XU3 as following:
- If want to use deferrable timer as following:
echo deferrable > /sys/class/devfreq/10c20000.memory-controller/timer
- If want to use delayed timer as following:
echo delayed > /sys/class/devfreq/10c20000.memory-controller/timer
Suggested-by: Lukasz Luba <[email protected]>
Signed-off-by: Chanwoo Choi <[email protected]>
---
Documentation/ABI/testing/sysfs-class-devfreq | 12 +++
drivers/devfreq/devfreq.c | 83 ++++++++++++++++++-
include/linux/devfreq.h | 9 ++
3 files changed, 103 insertions(+), 1 deletion(-)
diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
index 9758eb85ade3..b10aa3aa230d 100644
--- a/Documentation/ABI/testing/sysfs-class-devfreq
+++ b/Documentation/ABI/testing/sysfs-class-devfreq
@@ -65,6 +65,18 @@ Description:
as following:
echo 0 > /sys/class/devfreq/.../trans_stat
+What: /sys/class/devfreq/.../timer
+Date: July 2020
+Contact: Chanwoo Choi <[email protected]>
+Description:
+ This ABI shows and stores the kind of work timer by users.
+ This work timer is used by devfreq workqueue in order to
+ monitor the device status such as utilization. The user
+ can change the work timer on runtime according to their demand
+ as following:
+ echo deferrable > /sys/class/devfreq/.../timer
+ echo delayed > /sys/class/devfreq/.../timer
+
What: /sys/class/devfreq/.../userspace/set_freq
Date: September 2011
Contact: MyungJoo Ham <[email protected]>
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 52b9c3e141f3..2a4aa8742520 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -49,6 +49,11 @@ static LIST_HEAD(devfreq_governor_list);
static LIST_HEAD(devfreq_list);
static DEFINE_MUTEX(devfreq_list_lock);
+static const char timer_name[][DEVFREQ_NAME_LEN] = {
+ [DEVFREQ_TIMER_DEFERRABLE] = { "deferrable" },
+ [DEVFREQ_TIMER_DELAYED] = { "delayed" },
+};
+
/**
* find_device_devfreq() - find devfreq struct using device pointer
* @dev: device pointer used to lookup device devfreq.
@@ -454,7 +459,17 @@ void devfreq_monitor_start(struct devfreq *devfreq)
if (devfreq->governor->interrupt_driven)
return;
- INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
+ switch (devfreq->profile->timer) {
+ case DEVFREQ_TIMER_DEFERRABLE:
+ INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
+ break;
+ case DEVFREQ_TIMER_DELAYED:
+ INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor);
+ break;
+ default:
+ return;
+ }
+
if (devfreq->profile->polling_ms)
queue_delayed_work(devfreq_wq, &devfreq->work,
msecs_to_jiffies(devfreq->profile->polling_ms));
@@ -771,6 +786,11 @@ struct devfreq *devfreq_add_device(struct device *dev,
devfreq->data = data;
devfreq->nb.notifier_call = devfreq_notifier_call;
+ if (devfreq->profile->timer < 0
+ || devfreq->profile->timer >= DEVFREQ_TIMER_NUM) {
+ goto err_out;
+ }
+
if (!devfreq->profile->max_state && !devfreq->profile->freq_table) {
mutex_unlock(&devfreq->lock);
err = set_freq_table(devfreq);
@@ -1625,6 +1645,66 @@ static ssize_t trans_stat_store(struct device *dev,
}
static DEVICE_ATTR_RW(trans_stat);
+static ssize_t timer_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct devfreq *df = to_devfreq(dev);
+
+ if (!df->governor)
+ return -EINVAL;
+
+ return sprintf(buf, "%s\n", timer_name[df->profile->timer]);
+}
+
+static ssize_t timer_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct devfreq *df = to_devfreq(dev);
+ char str_timer[DEVFREQ_NAME_LEN + 1];
+ int timer = -1;
+ int ret = 0, i;
+
+ ret = sscanf(buf, "%16s", str_timer);
+ if (ret != 1)
+ return -EINVAL;
+
+ for (i = 0; i < DEVFREQ_TIMER_NUM; i++) {
+ if (!strncmp(timer_name[i], str_timer, DEVFREQ_NAME_LEN)) {
+ timer = i;
+ break;
+ }
+ }
+
+ if (timer < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (df->profile->timer == timer) {
+ ret = 0;
+ goto out;
+ }
+
+ mutex_lock(&df->lock);
+ df->profile->timer = timer;
+ mutex_unlock(&df->lock);
+
+ ret = df->governor->event_handler(df, DEVFREQ_GOV_STOP, NULL);
+ if (ret) {
+ dev_warn(dev, "%s: Governor %s not stopped(%d)\n",
+ __func__, df->governor->name, ret);
+ goto out;
+ }
+
+ ret = df->governor->event_handler(df, DEVFREQ_GOV_START, NULL);
+ if (ret)
+ dev_warn(dev, "%s: Governor %s not started(%d)\n",
+ __func__, df->governor->name, ret);
+out:
+ return ret ? ret : count;
+}
+static DEVICE_ATTR_RW(timer);
+
static struct attribute *devfreq_attrs[] = {
&dev_attr_name.attr,
&dev_attr_governor.attr,
@@ -1636,6 +1716,7 @@ static struct attribute *devfreq_attrs[] = {
&dev_attr_min_freq.attr,
&dev_attr_max_freq.attr,
&dev_attr_trans_stat.attr,
+ &dev_attr_timer.attr,
NULL,
};
ATTRIBUTE_GROUPS(devfreq);
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 57e871a559a9..12782fbb4c25 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -31,6 +31,13 @@
#define DEVFREQ_PRECHANGE (0)
#define DEVFREQ_POSTCHANGE (1)
+/* DEVFREQ work timers */
+enum devfreq_timer {
+ DEVFREQ_TIMER_DEFERRABLE = 0,
+ DEVFREQ_TIMER_DELAYED,
+ DEVFREQ_TIMER_NUM,
+};
+
struct devfreq;
struct devfreq_governor;
@@ -70,6 +77,7 @@ struct devfreq_dev_status {
* @initial_freq: The operating frequency when devfreq_add_device() is
* called.
* @polling_ms: The polling interval in ms. 0 disables polling.
+ * @timer: Timer type is either deferrable or delayed timer.
* @target: The device should set its operating frequency at
* freq or lowest-upper-than-freq value. If freq is
* higher than any operable frequency, set maximum.
@@ -96,6 +104,7 @@ struct devfreq_dev_status {
struct devfreq_dev_profile {
unsigned long initial_freq;
unsigned int polling_ms;
+ enum devfreq_timer timer;
int (*target)(struct device *dev, unsigned long *freq, u32 flags);
int (*get_dev_status)(struct device *dev,
--
2.17.1
Hi Chanwoo,
I think it doesn't help on the benchmark I suggested that is doing only memory
accesses. With both timer, I have the same timing.
To test the benchmark with these new patches about timer:
git clone https://github.com/wwilly/benchmark.git \
&& cd benchmark \
&& source env.sh \
&& ./bench_build.sh \
&& bash source/scripts/test_dvfs_mem_patched.sh
The benchmark is set by default to run for 1s, but you can increase this by
tweaking the script as:
taskset 8 ./bench_install/bin/microbe_cache 33554431 0 9722222 <TIME in sec> ${little_freq}
Also, as I reported the issue, would it be possible to add a
Reported-by: Willy Wolff <[email protected]> ?
Many thanks in advance.
Best Regards,
Willy
Hi Chanwoo,
On 7/3/20 8:26 AM, Chanwoo Choi wrote:
> Add the delayed timer to devfreq framework in order to support
> the periodical polling mode without stop caused by CPU idle state.
Thank you, this patchset looks fine to me and is a step in the right
direction:
Reviewed-by: Bartlomiej Zolnierkiewicz <[email protected]>
> Some Non-CPU device must need to monitor the device status like
> utilization regardless of CPU state.
This is probably true for all devfreq devices using simple_ondemand
governor by default:
drivers/devfreq/exynos-bus.c
drivers/devfreq/rk3399_dmc.c
drivers/devfreq/tegra20-devfreq.c
drivers/gpu/drm/lima/lima_devfreq.c
drivers/gpu/drm/msm/msm_gpu.c
drivers/gpu/drm/panfrost/panfrost_devfreq.c
drivers/memory/samsung/exynos5422-dmc.c
drivers/scsi/ufs/ufshcd.c
With devfreq device polling being "coupled" to CPU idle state
the devfreq subsystem behavior is completely unpredictable and
unreliable.
It affects both performance (device opp change up happening too
late) and power consumption (device opp change down happening too
late).
It also causes hardware usage counters support to report too high
values (because of CPU idle "coupling" the real polling period
becomes larger than maximum period supported by the counter and
the counter becomes fully "saturated") which negatively affects
power consumption (as has been observed when using Odroid XU3/4).
[ The only upside of using such "coupling" is lowered CPU power
usage (in some situations) but at the (unacceptable IMHO) cost
of the correctness of operations of devfreq subsystem. ]
Unfortunately this patchset currently fixes only exynos5422-dmc
devfreq driver. To fix problems for Exynos platforms we need to
also fix exynos-bus devfreq driver.
Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics
> - patch1 explains the detailed reason why the delayed timer is required.
> - patch2 initializes that exynos5422-dmc device use delayed timer as default
> instead of deferrable timer.
>
> Chanwoo Choi (2):
> PM / devfreq: Add support delayed timer for polling mode
> memory: samsung: exynos5422-dmc: Use delayed timer as default
>
> Documentation/ABI/testing/sysfs-class-devfreq | 12 +++
> drivers/devfreq/devfreq.c | 83 ++++++++++++++++++-
> drivers/memory/samsung/exynos5422-dmc.c | 1 +
> include/linux/devfreq.h | 9 ++
> 4 files changed, 104 insertions(+), 1 deletion(-)
Hi all,
On 7/3/20 7:26 AM, Chanwoo Choi wrote:
> Add the delayed timer to devfreq framework in order to support
> the periodical polling mode without stop caused by CPU idle state.
> Some Non-CPU device must need to monitor the device status like
> utilization regardless of CPU state.
>
> - patch1 explains the detailed reason why the delayed timer is required.
> - patch2 initializes that exynos5422-dmc device use delayed timer as default
> instead of deferrable timer.
>
> Chanwoo Choi (2):
> PM / devfreq: Add support delayed timer for polling mode
> memory: samsung: exynos5422-dmc: Use delayed timer as default
>
> Documentation/ABI/testing/sysfs-class-devfreq | 12 +++
> drivers/devfreq/devfreq.c | 83 ++++++++++++++++++-
> drivers/memory/samsung/exynos5422-dmc.c | 1 +
> include/linux/devfreq.h | 9 ++
> 4 files changed, 104 insertions(+), 1 deletion(-)
>
My apologizes for being late for the party. I wasn't able to run tests
and I had to fix my setup after I messed up some scripts.
The patch set looks good to me, so you can add my (to both patches):
Reviewed-by: Lukasz Luba <[email protected]>
I have run these Willy's benchmark tests and I will send some
follow up patches in a few minutes.
Regards,
Lukasz
Hi Willy,
On 7/3/20 1:33 PM, Willy Wolff wrote:
> Hi Chanwoo,
>
> I think it doesn't help on the benchmark I suggested that is doing only memory
> accesses. With both timer, I have the same timing.
>
> To test the benchmark with these new patches about timer:
>
> git clone https://github.com/wwilly/benchmark.git \
> && cd benchmark \
> && source env.sh \
> && ./bench_build.sh \
> && bash source/scripts/test_dvfs_mem_patched.sh
>
> The benchmark is set by default to run for 1s, but you can increase this by
> tweaking the script as:
>
> taskset 8 ./bench_install/bin/microbe_cache 33554431 0 9722222 <TIME in sec> ${little_freq}
>
>
> Also, as I reported the issue, would it be possible to add a
> Reported-by: Willy Wolff <[email protected]> ?
> Many thanks in advance.
Thank you for your good work and the benchmark. I hope you will continue
to use it and report some issues. I am going to send a follow up patches
for the DMC and I will add your 'Reported-by'. In the tests I can see
the improvements, but it's worth to consult with you if I understand
the new results correctly.
I think there is still some area for improvements in the devfreq and you
could find the interesting bits to contribute.
Regards,
Lukasz
>
>
> Best Regards,
> Willy
>
Hi Lukasz,
On 2020-07-08-15-25-03, Lukasz Luba wrote:
> Hi Willy,
>
> On 7/3/20 1:33 PM, Willy Wolff wrote:
> > Hi Chanwoo,
> >
> > I think it doesn't help on the benchmark I suggested that is doing only memory
> > accesses. With both timer, I have the same timing.
> >
> > To test the benchmark with these new patches about timer:
> >
> > git clone https://github.com/wwilly/benchmark.git \
> > && cd benchmark \
> > && source env.sh \
> > && ./bench_build.sh \
> > && bash source/scripts/test_dvfs_mem_patched.sh
> >
> > The benchmark is set by default to run for 1s, but you can increase this by
> > tweaking the script as:
> >
> > taskset 8 ./bench_install/bin/microbe_cache 33554431 0 9722222 <TIME in sec> ${little_freq}
> >
> >
> > Also, as I reported the issue, would it be possible to add a
> > Reported-by: Willy Wolff <[email protected]> ?
> > Many thanks in advance.
>
> Thank you for your good work and the benchmark. I hope you will continue
> to use it and report some issues. I am going to send a follow up patches
> for the DMC and I will add your 'Reported-by'. In the tests I can see
> the improvements, but it's worth to consult with you if I understand
> the new results correctly.
>
Thanks for that. I will follow on the other patch thread discussion.
> I think there is still some area for improvements in the devfreq and you
> could find the interesting bits to contribute.
In fact, this benchmark is motivated about part of my PhD research that has just
been accepted at LCTES2020: "Performance Optimization on big.LITTLE Architectures:
A Memory-latency Aware Approach" at https://dl.acm.org/doi/10.1145/3372799.3394370
Basically, it's about snooping latency with "bad" CPU DVFS choice on big.LITTLE
systems or more generally SMP/AMP architecture. I'm cleaning up my code and will
propose patches as an RFC later. It introduces a new CPU DVFS governor to limit
snooping latency.
Cheers,
Willy
>
> Regards,
> Lukasz
>
> >
> >
> > Best Regards,
> > Willy
> >
Hi Willy
On 7/10/20 4:12 PM, Willy Wolff wrote:
> Hi Lukasz,
>
> On 2020-07-08-15-25-03, Lukasz Luba wrote:
>> Hi Willy,
>>
>> On 7/3/20 1:33 PM, Willy Wolff wrote:
>>> Hi Chanwoo,
>>>
>>> I think it doesn't help on the benchmark I suggested that is doing only memory
>>> accesses. With both timer, I have the same timing.
>>>
>>> To test the benchmark with these new patches about timer:
>>>
>>> git clone https://github.com/wwilly/benchmark.git \
>>> && cd benchmark \
>>> && source env.sh \
>>> && ./bench_build.sh \
>>> && bash source/scripts/test_dvfs_mem_patched.sh
>>>
>>> The benchmark is set by default to run for 1s, but you can increase this by
>>> tweaking the script as:
>>>
>>> taskset 8 ./bench_install/bin/microbe_cache 33554431 0 9722222 <TIME in sec> ${little_freq}
>>>
>>>
>>> Also, as I reported the issue, would it be possible to add a
>>> Reported-by: Willy Wolff <[email protected]> ?
>>> Many thanks in advance.
>>
>> Thank you for your good work and the benchmark. I hope you will continue
>> to use it and report some issues. I am going to send a follow up patches
>> for the DMC and I will add your 'Reported-by'. In the tests I can see
>> the improvements, but it's worth to consult with you if I understand
>> the new results correctly.
>>
>
> Thanks for that. I will follow on the other patch thread discussion.
>
>> I think there is still some area for improvements in the devfreq and you
>> could find the interesting bits to contribute.
>
> In fact, this benchmark is motivated about part of my PhD research that has just
> been accepted at LCTES2020: "Performance Optimization on big.LITTLE Architectures:
> A Memory-latency Aware Approach" at https://dl.acm.org/doi/10.1145/3372799.3394370
>
Congrats and thank you for the link (I will read it).
> Basically, it's about snooping latency with "bad" CPU DVFS choice on big.LITTLE
> systems or more generally SMP/AMP architecture. I'm cleaning up my code and will
> propose patches as an RFC later. It introduces a new CPU DVFS governor to limit
> snooping latency.
This is interesting, please add me on CC in the patch set.
Regards,
Lukasz