In order to allow drivers to use pm_qos_update_request from interrupt
context we call the notifiers via schedule_work().
Signed-off-by: Florian Mickler <[email protected]>
---
Well, this would be the schedule_work() alternative.
kernel/pm_qos_params.c | 47 ++++++++++++++++++++++++++++++-----------------
1 files changed, 30 insertions(+), 17 deletions(-)
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index f42d3f7..296343a 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -40,6 +40,7 @@
#include <linux/string.h>
#include <linux/platform_device.h>
#include <linux/init.h>
+#include <linux/workqueue.h>
#include <linux/uaccess.h>
@@ -60,11 +61,13 @@ struct pm_qos_request_list {
static s32 max_compare(s32 v1, s32 v2);
static s32 min_compare(s32 v1, s32 v2);
+static void update_notify(struct work_struct *work);
struct pm_qos_object {
struct pm_qos_request_list requests;
struct blocking_notifier_head *notifiers;
struct miscdevice pm_qos_power_miscdev;
+ struct work_struct notify;
char *name;
s32 default_value;
atomic_t target_value;
@@ -72,10 +75,12 @@ struct pm_qos_object {
};
static struct pm_qos_object null_pm_qos;
+
static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier);
static struct pm_qos_object cpu_dma_pm_qos = {
.requests = {LIST_HEAD_INIT(cpu_dma_pm_qos.requests.list)},
.notifiers = &cpu_dma_lat_notifier,
+ .notify = __WORK_INITIALIZER(cpu_dma_pm_qos.notify, update_notify),
.name = "cpu_dma_latency",
.default_value = 2000 * USEC_PER_SEC,
.target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
@@ -86,6 +91,7 @@ static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
static struct pm_qos_object network_lat_pm_qos = {
.requests = {LIST_HEAD_INIT(network_lat_pm_qos.requests.list)},
.notifiers = &network_lat_notifier,
+ .notify = __WORK_INITIALIZER(network_lat_pm_qos.notify, update_notify),
.name = "network_latency",
.default_value = 2000 * USEC_PER_SEC,
.target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
@@ -97,13 +103,14 @@ static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
static struct pm_qos_object network_throughput_pm_qos = {
.requests = {LIST_HEAD_INIT(network_throughput_pm_qos.requests.list)},
.notifiers = &network_throughput_notifier,
+ .notify = __WORK_INITIALIZER(network_throughput_pm_qos.notify,
+ update_notify),
.name = "network_throughput",
.default_value = 0,
.target_value = ATOMIC_INIT(0),
.comparitor = max_compare
};
-
static struct pm_qos_object *pm_qos_array[] = {
&null_pm_qos,
&cpu_dma_pm_qos,
@@ -135,35 +142,40 @@ static s32 min_compare(s32 v1, s32 v2)
return min(v1, v2);
}
+static void update_notify(struct work_struct *work)
+{
+ struct pm_qos_object *obj =
+ container_of(work, struct pm_qos_object, notify);
+
+ s32 extreme_value = atomic_read(&obj->target_value);
+ blocking_notifier_call_chain(
+ obj->notifiers,
+ (unsigned long) extreme_value, NULL);
+}
static void update_target(int pm_qos_class)
{
s32 extreme_value;
struct pm_qos_request_list *node;
+ struct pm_qos_object *obj = pm_qos_array[pm_qos_class];
unsigned long flags;
int call_notifier = 0;
spin_lock_irqsave(&pm_qos_lock, flags);
- extreme_value = pm_qos_array[pm_qos_class]->default_value;
- list_for_each_entry(node,
- &pm_qos_array[pm_qos_class]->requests.list, list) {
- extreme_value = pm_qos_array[pm_qos_class]->comparitor(
- extreme_value, node->value);
- }
- if (atomic_read(&pm_qos_array[pm_qos_class]->target_value) !=
- extreme_value) {
+ extreme_value = obj->default_value;
+ list_for_each_entry(node, &obj->requests.list, list)
+ extreme_value = obj->comparitor(extreme_value, node->value);
+
+ if (atomic_read(&obj->target_value) != extreme_value) {
call_notifier = 1;
- atomic_set(&pm_qos_array[pm_qos_class]->target_value,
- extreme_value);
+ atomic_set(&obj->target_value, extreme_value);
pr_debug(KERN_ERR "new target for qos %d is %d\n", pm_qos_class,
- atomic_read(&pm_qos_array[pm_qos_class]->target_value));
+ atomic_read(&obj->target_value));
}
spin_unlock_irqrestore(&pm_qos_lock, flags);
if (call_notifier)
- blocking_notifier_call_chain(
- pm_qos_array[pm_qos_class]->notifiers,
- (unsigned long) extreme_value, NULL);
+ schedule_work(&obj->notify);
}
static int register_pm_qos_misc(struct pm_qos_object *qos)
@@ -301,7 +313,7 @@ EXPORT_SYMBOL_GPL(pm_qos_remove_request);
* @pm_qos_class: identifies which qos target changes should be notified.
* @notifier: notifier block managed by caller.
*
- * will register the notifier into a notification chain that gets called
+ * Will register the notifier into a notification chain that gets called
* upon changes to the pm_qos_class target value.
*/
int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
@@ -320,7 +332,7 @@ EXPORT_SYMBOL_GPL(pm_qos_add_notifier);
* @pm_qos_class: identifies which qos target changes are notified.
* @notifier: notifier block to be removed.
*
- * will remove the notifier from the notification chain that gets called
+ * Will remove the notifier from the notification chain that gets called
* upon changes to the pm_qos_class target value.
*/
int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
@@ -392,6 +404,7 @@ static int __init pm_qos_power_init(void)
{
int ret = 0;
+
ret = register_pm_qos_misc(&cpu_dma_pm_qos);
if (ret < 0) {
printk(KERN_ERR "pm_qos_param: cpu_dma_latency setup failed\n");
--
1.7.1
On Wed, 2010-06-09 at 17:29 +0200, [email protected] wrote:
> In order to allow drivers to use pm_qos_update_request from interrupt
> context we call the notifiers via schedule_work().
>
> Signed-off-by: Florian Mickler <[email protected]>
> ---
>
> Well, this would be the schedule_work() alternative.
>
> kernel/pm_qos_params.c | 47 ++++++++++++++++++++++++++++++-----------------
> 1 files changed, 30 insertions(+), 17 deletions(-)
>
> diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
> index f42d3f7..296343a 100644
> --- a/kernel/pm_qos_params.c
> +++ b/kernel/pm_qos_params.c
> @@ -40,6 +40,7 @@
> #include <linux/string.h>
> #include <linux/platform_device.h>
> #include <linux/init.h>
> +#include <linux/workqueue.h>
>
> #include <linux/uaccess.h>
>
> @@ -60,11 +61,13 @@ struct pm_qos_request_list {
>
> static s32 max_compare(s32 v1, s32 v2);
> static s32 min_compare(s32 v1, s32 v2);
> +static void update_notify(struct work_struct *work);
>
> struct pm_qos_object {
> struct pm_qos_request_list requests;
> struct blocking_notifier_head *notifiers;
> struct miscdevice pm_qos_power_miscdev;
> + struct work_struct notify;
> char *name;
> s32 default_value;
> atomic_t target_value;
> @@ -72,10 +75,12 @@ struct pm_qos_object {
> };
>
> static struct pm_qos_object null_pm_qos;
> +
> static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier);
> static struct pm_qos_object cpu_dma_pm_qos = {
> .requests = {LIST_HEAD_INIT(cpu_dma_pm_qos.requests.list)},
> .notifiers = &cpu_dma_lat_notifier,
> + .notify = __WORK_INITIALIZER(cpu_dma_pm_qos.notify, update_notify),
> .name = "cpu_dma_latency",
> .default_value = 2000 * USEC_PER_SEC,
> .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
> @@ -86,6 +91,7 @@ static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
> static struct pm_qos_object network_lat_pm_qos = {
> .requests = {LIST_HEAD_INIT(network_lat_pm_qos.requests.list)},
> .notifiers = &network_lat_notifier,
> + .notify = __WORK_INITIALIZER(network_lat_pm_qos.notify, update_notify),
> .name = "network_latency",
> .default_value = 2000 * USEC_PER_SEC,
> .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
> @@ -97,13 +103,14 @@ static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
> static struct pm_qos_object network_throughput_pm_qos = {
> .requests = {LIST_HEAD_INIT(network_throughput_pm_qos.requests.list)},
> .notifiers = &network_throughput_notifier,
> + .notify = __WORK_INITIALIZER(network_throughput_pm_qos.notify,
> + update_notify),
> .name = "network_throughput",
> .default_value = 0,
> .target_value = ATOMIC_INIT(0),
> .comparitor = max_compare
> };
>
> -
> static struct pm_qos_object *pm_qos_array[] = {
> &null_pm_qos,
> &cpu_dma_pm_qos,
> @@ -135,35 +142,40 @@ static s32 min_compare(s32 v1, s32 v2)
> return min(v1, v2);
> }
>
> +static void update_notify(struct work_struct *work)
> +{
> + struct pm_qos_object *obj =
> + container_of(work, struct pm_qos_object, notify);
> +
> + s32 extreme_value = atomic_read(&obj->target_value);
> + blocking_notifier_call_chain(
> + obj->notifiers,
> + (unsigned long) extreme_value, NULL);
> +}
>
> static void update_target(int pm_qos_class)
> {
> s32 extreme_value;
> struct pm_qos_request_list *node;
> + struct pm_qos_object *obj = pm_qos_array[pm_qos_class];
> unsigned long flags;
> int call_notifier = 0;
>
> spin_lock_irqsave(&pm_qos_lock, flags);
> - extreme_value = pm_qos_array[pm_qos_class]->default_value;
> - list_for_each_entry(node,
> - &pm_qos_array[pm_qos_class]->requests.list, list) {
> - extreme_value = pm_qos_array[pm_qos_class]->comparitor(
> - extreme_value, node->value);
> - }
> - if (atomic_read(&pm_qos_array[pm_qos_class]->target_value) !=
> - extreme_value) {
> + extreme_value = obj->default_value;
> + list_for_each_entry(node, &obj->requests.list, list)
> + extreme_value = obj->comparitor(extreme_value, node->value);
> +
> + if (atomic_read(&obj->target_value) != extreme_value) {
> call_notifier = 1;
> - atomic_set(&pm_qos_array[pm_qos_class]->target_value,
> - extreme_value);
> + atomic_set(&obj->target_value, extreme_value);
> pr_debug(KERN_ERR "new target for qos %d is %d\n", pm_qos_class,
> - atomic_read(&pm_qos_array[pm_qos_class]->target_value));
> + atomic_read(&obj->target_value));
> }
> spin_unlock_irqrestore(&pm_qos_lock, flags);
>
> if (call_notifier)
> - blocking_notifier_call_chain(
> - pm_qos_array[pm_qos_class]->notifiers,
> - (unsigned long) extreme_value, NULL);
> + schedule_work(&obj->notify);
> }
This still isn't resilient against two successive calls to update. If
the second one gets to schedule_work() before the work of the first one
has finished, you'll corrupt the workqueue.
The mechanisms to solve this race (refcounting a pool of structures) are
so heavyweight that I concluded it was simpler just to have atomically
updated pm_qos objects and enforce it ... rather than trying to allow
blocking chains to be added to atomic sites via a workqueue.
James
On Wed, 09 Jun 2010 11:37:12 -0400
James Bottomley <[email protected]> wrote:
> This still isn't resilient against two successive calls to update. If
> the second one gets to schedule_work() before the work of the first one
> has finished, you'll corrupt the workqueue.
Sorry, I don't see it. Can you elaborate?
In "run_workqueue(" we do a list_del_init() which resets the
list-pointers of the workitem and only after that reset the
WORK_STRUCT_PENDING member of said structure.
schedule_work does a queue_work_on which does a test_and_set_bit on
the WORK_STRUCT_PENDING member of the work and only queues the work
via list_add_tail in insert_work afterwards.
Where is my think'o? Or was this fixed while you didn't look?
So what _can_ happen, is that we miss a new notfication while the old
notification is still in the queue. But I don't think this is a problem.
Cheers,
Flo
On Wed, 2010-06-09 at 18:00 +0200, Florian Mickler wrote:
> On Wed, 09 Jun 2010 11:37:12 -0400
> James Bottomley <[email protected]> wrote:
>
>
> > This still isn't resilient against two successive calls to update. If
> > the second one gets to schedule_work() before the work of the first one
> > has finished, you'll corrupt the workqueue.
>
> Sorry, I don't see it. Can you elaborate?
>
> In "run_workqueue(" we do a list_del_init() which resets the
> list-pointers of the workitem and only after that reset the
> WORK_STRUCT_PENDING member of said structure.
>
>
> schedule_work does a queue_work_on which does a test_and_set_bit on
> the WORK_STRUCT_PENDING member of the work and only queues the work
> via list_add_tail in insert_work afterwards.
>
> Where is my think'o? Or was this fixed while you didn't look?
>
> So what _can_ happen, is that we miss a new notfication while the old
> notification is still in the queue. But I don't think this is a problem.
OK, so the expression of the race is that the latest notification gets
lost. If something is tracking values, you'd really like to lose the
previous one (which is now irrelevant) not the latest one. The point is
there's still a race.
James
On Wed, 09 Jun 2010 12:07:25 -0400
James Bottomley <[email protected]> wrote:
> On Wed, 2010-06-09 at 18:00 +0200, Florian Mickler wrote:
> > On Wed, 09 Jun 2010 11:37:12 -0400
> > James Bottomley <[email protected]> wrote:
> >
> >
> > > This still isn't resilient against two successive calls to update. If
> > > the second one gets to schedule_work() before the work of the first one
> > > has finished, you'll corrupt the workqueue.
> >
> > Sorry, I don't see it. Can you elaborate?
> >
> > In "run_workqueue(" we do a list_del_init() which resets the
> > list-pointers of the workitem and only after that reset the
> > WORK_STRUCT_PENDING member of said structure.
> >
> >
> > schedule_work does a queue_work_on which does a test_and_set_bit on
> > the WORK_STRUCT_PENDING member of the work and only queues the work
> > via list_add_tail in insert_work afterwards.
> >
> > Where is my think'o? Or was this fixed while you didn't look?
> >
> > So what _can_ happen, is that we miss a new notfication while the old
> > notification is still in the queue. But I don't think this is a problem.
>
> OK, so the expression of the race is that the latest notification gets
> lost. If something is tracking values, you'd really like to lose the
> previous one (which is now irrelevant) not the latest one. The point is
> there's still a race.
>
> James
>
Yeah, but for blocking notification it is not that bad.
Doesn't using blocking notifiers mean that you need to always check
via pm_qos_request() to get the latest value anyways? I.e. the
notification is just a hint, that something changed so you can act on
it. But if you act (may it because of notification or because of
something other) then you have to get the current value anyways.
I think there are 2 schemes of operation. The one which needs
atomic notifiers and where it would be bad if we lost any notification
and the other where it is just a way of doing some work in a timely
fashion but it isn't critical that it is done right away.
pm_qos was the second kind of operation up until now, because the
notifiers may have got scheduled away while blocked.
I think we should allow for both kinds of operations simultaneous. But
as I got that pushback to the change from John Linville as I touched his
code, I got a bit reluctant and just have done the simple variant. :)
(for example if we want to implement some sort of debugging stats, we
could use blocking notifications, but if we for example block some
c-stats due to a latency-requirement we probably need some atomic
notifications.)
Cheers,
Flo
On Wed, 2010-06-09 at 18:32 +0200, Florian Mickler wrote:
> On Wed, 09 Jun 2010 12:07:25 -0400
> James Bottomley <[email protected]> wrote:
>
> > On Wed, 2010-06-09 at 18:00 +0200, Florian Mickler wrote:
> > > On Wed, 09 Jun 2010 11:37:12 -0400
> > > James Bottomley <[email protected]> wrote:
> > >
> > >
> > > > This still isn't resilient against two successive calls to update. If
> > > > the second one gets to schedule_work() before the work of the first one
> > > > has finished, you'll corrupt the workqueue.
> > >
> > > Sorry, I don't see it. Can you elaborate?
> > >
> > > In "run_workqueue(" we do a list_del_init() which resets the
> > > list-pointers of the workitem and only after that reset the
> > > WORK_STRUCT_PENDING member of said structure.
> > >
> > >
> > > schedule_work does a queue_work_on which does a test_and_set_bit on
> > > the WORK_STRUCT_PENDING member of the work and only queues the work
> > > via list_add_tail in insert_work afterwards.
> > >
> > > Where is my think'o? Or was this fixed while you didn't look?
> > >
> > > So what _can_ happen, is that we miss a new notfication while the old
> > > notification is still in the queue. But I don't think this is a problem.
> >
> > OK, so the expression of the race is that the latest notification gets
> > lost. If something is tracking values, you'd really like to lose the
> > previous one (which is now irrelevant) not the latest one. The point is
> > there's still a race.
> >
> > James
> >
>
> Yeah, but for blocking notification it is not that bad.
The network latency notifier uses the value to recalculate something.
Losing the last value will mean it's using stale data.
> Doesn't using blocking notifiers mean that you need to always check
> via pm_qos_request() to get the latest value anyways? I.e. the
> notification is just a hint, that something changed so you can act on
> it. But if you act (may it because of notification or because of
> something other) then you have to get the current value anyways.
Well, the network notifier assumes the notifier value is the latest ...
> I think there are 2 schemes of operation. The one which needs
> atomic notifiers and where it would be bad if we lost any notification
> and the other where it is just a way of doing some work in a timely
> fashion but it isn't critical that it is done right away.
>
> pm_qos was the second kind of operation up until now, because the
> notifiers may have got scheduled away while blocked.
Actually, no ... the current API preserves ordering semantics. If a
notifier sleeps and another change comes along, the update callsite will
sleep on the notifier's mutex ... so ordering is always preserved.
> I think we should allow for both kinds of operations simultaneous. But
> as I got that pushback to the change from John Linville as I touched his
> code, I got a bit reluctant and just have done the simple variant. :)
The code I proposed does ... but it relies on the callsite supplying the
necessary context.
James
On Wed, 09 Jun 2010 13:05:49 -0400
James Bottomley <[email protected]> wrote:
> On Wed, 2010-06-09 at 18:32 +0200, Florian Mickler wrote:
> > Yeah, but for blocking notification it is not that bad.
>
> The network latency notifier uses the value to recalculate something.
> Losing the last value will mean it's using stale data.
Ah. Indeed. I didn't do my homework there. That sucks. (Btw, I know why
you said "recalculate _something_" :) )
It even fetches via pm_qos_get_request() in the middle. But obviously
if we do not report the last value but the value before that, then this
is bloed.
We could fix it though for all current (two) in tree users by passing
always a negative value in to the notification. Then the network
notifier fetches the value via pm_qos_get_request();
And cpuidle ignores the value anyways.
> >
> > pm_qos was the second kind of operation up until now, because the
> > notifiers may have got scheduled away while blocked.
>
> Actually, no ... the current API preserves ordering semantics. If a
> notifier sleeps and another change comes along, the update callsite will
> sleep on the notifier's mutex ... so ordering is always preserved.
Ah! Ordering. Something I didn't thought of. But still no luck for me.
>
> > I think we should allow for both kinds of operations simultaneous. But
> > as I got that pushback to the change from John Linville as I touched his
> > code, I got a bit reluctant and just have done the simple variant. :)
>
> The code I proposed does ... but it relies on the callsite supplying the
> necessary context.
>
Yes, I know. It's just that if we want to have both kind's of notifiers
we need to use a atomic_notifier chain for that constraint. And then we
have the issue that John objected to, namely pushing all the
schedule_work code to each of the API-Users.
Anyway, I think your patch is better in that it is correct.
It just doesn't solve the issue for constraints where drivers want to
update from interrupt but notifiers want to be able to work from
process context.
> James
>
Cheers,
Flo
On Wed, 09 Jun 2010 13:05:49 -0400
James Bottomley <[email protected]> wrote:
> On Wed, 2010-06-09 at 18:32 +0200, Florian Mickler wrote:
> > On Wed, 09 Jun 2010 12:07:25 -0400
> > James Bottomley <[email protected]> wrote:
> > > OK, so the expression of the race is that the latest notification gets
> > > lost. If something is tracking values, you'd really like to lose the
> > > previous one (which is now irrelevant) not the latest one. The point is
> > > there's still a race.
> > >
> > > James
> > >
> >
> > Yeah, but for blocking notification it is not that bad.
>
> The network latency notifier uses the value to recalculate something.
> Losing the last value will mean it's using stale data.
Actually after pondering a bit, it is not stale data that gets
delivered: (Correct me if I'm wrong)
The update_notify() function determines the extreme value and then
calls the blocking_notifier_chain.
But just before the update_notify() function get's called, the
work-structure is reset and re-queue-able. So it is possible to queue it
already even before the extreme_value in update_notify get's
determined.
So the notified value is always the latest or there is another
notification underway.
>
> > Doesn't using blocking notifiers mean that you need to always check
> > via pm_qos_request() to get the latest value anyways? I.e. the
> > notification is just a hint, that something changed so you can act on
> > it. But if you act (may it because of notification or because of
> > something other) then you have to get the current value anyways.
>
> Well, the network notifier assumes the notifier value is the latest ...
>
> > I think there are 2 schemes of operation. The one which needs
> > atomic notifiers and where it would be bad if we lost any notification
> > and the other where it is just a way of doing some work in a timely
> > fashion but it isn't critical that it is done right away.
> >
> > pm_qos was the second kind of operation up until now, because the
> > notifiers may have got scheduled away while blocked.
>
> Actually, no ... the current API preserves ordering semantics. If a
> notifier sleeps and another change comes along, the update callsite will
> sleep on the notifier's mutex ... so ordering is always preserved.
If my above thinkings are right, then the change in semantics is only,
that now you can not determine by the number of notifications the
number of update_request-calls.
>
> James
>
Cheers,
Flo
On Thu, 2010-06-10 at 09:45 +0200, Florian Mickler wrote:
> On Wed, 09 Jun 2010 13:05:49 -0400
> James Bottomley <[email protected]> wrote:
> > On Wed, 2010-06-09 at 18:32 +0200, Florian Mickler wrote:
> > > On Wed, 09 Jun 2010 12:07:25 -0400
> > > James Bottomley <[email protected]> wrote:
> > > > OK, so the expression of the race is that the latest notification gets
> > > > lost. If something is tracking values, you'd really like to lose the
> > > > previous one (which is now irrelevant) not the latest one. The point is
> > > > there's still a race.
> > > >
> > > > James
> > > >
> > >
> > > Yeah, but for blocking notification it is not that bad.
> >
> > The network latency notifier uses the value to recalculate something.
> > Losing the last value will mean it's using stale data.
>
> Actually after pondering a bit, it is not stale data that gets
> delivered: (Correct me if I'm wrong)
>
> The update_notify() function determines the extreme value and then
> calls the blocking_notifier_chain.
>
> But just before the update_notify() function get's called, the
> work-structure is reset and re-queue-able. So it is possible to queue it
> already even before the extreme_value in update_notify get's
> determined.
>
> So the notified value is always the latest or there is another
> notification underway.
Well, no ... it's a race, and like all good races the winner is non
deterministic.
If the update comes in before the work queue is run, then everyone
eventually sees the new value. If it comes in as the chain is running,
some notifiers see the old value and some the new. If it comes in
during back end processing, no-one sees the new value.
James
On Thu, 10 Jun 2010 08:39:04 -0500
James Bottomley <[email protected]> wrote:
> On Thu, 2010-06-10 at 09:45 +0200, Florian Mickler wrote:
> > On Wed, 09 Jun 2010 13:05:49 -0400
> > James Bottomley <[email protected]> wrote:
> > > On Wed, 2010-06-09 at 18:32 +0200, Florian Mickler wrote:
> > > > On Wed, 09 Jun 2010 12:07:25 -0400
> > > > James Bottomley <[email protected]> wrote:
> > > > > OK, so the expression of the race is that the latest notification gets
> > > > > lost. If something is tracking values, you'd really like to lose the
> > > > > previous one (which is now irrelevant) not the latest one. The point is
> > > > > there's still a race.
> > > > >
> > > > > James
> > > > >
> > > >
> > > > Yeah, but for blocking notification it is not that bad.
> > >
> > > The network latency notifier uses the value to recalculate something.
> > > Losing the last value will mean it's using stale data.
> >
> > Actually after pondering a bit, it is not stale data that gets
> > delivered: (Correct me if I'm wrong)
> >
> > The update_notify() function determines the extreme value and then
> > calls the blocking_notifier_chain.
> >
> > But just before the update_notify() function get's called, the
> > work-structure is reset and re-queue-able. So it is possible to queue it
> > already even before the extreme_value in update_notify get's
> > determined.
> >
> > So the notified value is always the latest or there is another
> > notification underway.
>
> Well, no ... it's a race, and like all good races the winner is non
> deterministic.
Can you point out where I'm wrong?
U1. update_request gets called
U2. new extreme value gets calculated under spinlock
U3. notify gets queued if its WORK_PENDING_BIT is not set.
run_workqueue() does the following:
R1. clears the WORK_PENDING_BIT
R2. calls update_notify()
R3. reads the current extreme value
R4. notification gets called with that value
If another update_request comes to schedule_work before
run_workqueue() has cleared the WORK_PENDING_BIT, the work will not be
requeued, but R3 isn't yet executed. So the notifiers will get the last
value.
If another update_request comes to schedule_work() after
run_workqueue() has cleared the WORK_PENDING_BIT, the work will be
requeued and another update_notify will be executed later.
>
> If the update comes in before the work queue is run, then everyone
> eventually sees the new value. If it comes in as the chain is running,
> some notifiers see the old value and some the new. If it comes in
> during back end processing, no-one sees the new value.
>
> James
>
No, I think that is not correct. There will always be another
update_notify()-call after the last update_request-recalculation due to
the fact that run_workqueue() calls work_clear_pending() before calling
the callback function of that work.
(We could even requeue the work from within that callback function if we
wanted to keep the number of notifications the same. But then we would
need to queue the extreme_values in the framework. I think this
isn't needed.)
Cheers,
Flo
On Thu, 2010-06-10 at 16:41 +0200, Florian Mickler wrote:
> > > So the notified value is always the latest or there is another
> > > notification underway.
> >
> > Well, no ... it's a race, and like all good races the winner is non
> > deterministic.
>
> Can you point out where I'm wrong?
>
> U1. update_request gets called
> U2. new extreme value gets calculated under spinlock
> U3. notify gets queued if its WORK_PENDING_BIT is not set.
>
> run_workqueue() does the following:
> R1. clears the WORK_PENDING_BIT
> R2. calls update_notify()
> R3. reads the current extreme value
> R4. notification gets called with that value
>
>
> If another update_request comes to schedule_work before
> run_workqueue() has cleared the WORK_PENDING_BIT, the work will not be
> requeued, but R3 isn't yet executed. So the notifiers will get the last
> value.
So the race now only causes lost older notifications ... as long as the
consumers are OK with that (it is an API change) then this should work.
You're still not taking advantage of the user context passed in, though,
so this does needlessly delay notifications for that case.
Actually, pm_qos_remove now needs a flush_scheduled work since you don't
want to return until the list is clear (since the next action may be to
free the object).
James
On Fri, 11 Jun 2010 09:25:52 -0500
James Bottomley <[email protected]> wrote:
> On Thu, 2010-06-10 at 16:41 +0200, Florian Mickler wrote:
> > > > So the notified value is always the latest or there is another
> > > > notification underway.
> > >
> > > Well, no ... it's a race, and like all good races the winner is non
> > > deterministic.
> >
> > Can you point out where I'm wrong?
> >
> > U1. update_request gets called
> > U2. new extreme value gets calculated under spinlock
> > U3. notify gets queued if its WORK_PENDING_BIT is not set.
> >
> > run_workqueue() does the following:
> > R1. clears the WORK_PENDING_BIT
> > R2. calls update_notify()
> > R3. reads the current extreme value
> > R4. notification gets called with that value
> >
> >
> > If another update_request comes to schedule_work before
> > run_workqueue() has cleared the WORK_PENDING_BIT, the work will not be
> > requeued, but R3 isn't yet executed. So the notifiers will get the last
> > value.
>
> So the race now only causes lost older notifications ... as long as the
> consumers are OK with that (it is an API change) then this should work.
> You're still not taking advantage of the user context passed in, though,
> so this does needlessly delay notifications for that case.
Right. We can use execute_in_process_context.
> Actually, pm_qos_remove now needs a flush_scheduled work since you don't
> want to return until the list is clear (since the next action may be to
> free the object).
Yes. Good point, will fix.
> James
>
Cheers,
Flo
On Fri, 11 Jun 2010 09:25:52 -0500
James Bottomley <[email protected]> wrote:
>
> Actually, pm_qos_remove now needs a flush_scheduled work since you don't
> want to return until the list is clear (since the next action may be to
> free the object).
The work-items are allocated in the pm_qos objects (which get never
freed), so we should be fine there.
>
> James
>
On Mon, 2010-06-14 at 16:33 +0200, Florian Mickler wrote:
> On Fri, 11 Jun 2010 09:25:52 -0500
> James Bottomley <[email protected]> wrote:
>
> >
> > Actually, pm_qos_remove now needs a flush_scheduled work since you don't
> > want to return until the list is clear (since the next action may be to
> > free the object).
>
> The work-items are allocated in the pm_qos objects (which get never
> freed), so we should be fine there.
That's not a safe assumption. Once we get into drivers, timers and cpu
ilde states, I can see these things being in modules.
Regardless, it's bad programming practise to be using something after
the final remove is called, it certainly violates the principle of least
surprise and would usually eventually cause problems.
James
On Mon, 14 Jun 2010 09:44:06 -0500
James Bottomley <[email protected]> wrote:
> On Mon, 2010-06-14 at 16:33 +0200, Florian Mickler wrote:
> > On Fri, 11 Jun 2010 09:25:52 -0500
> > James Bottomley <[email protected]> wrote:
> >
> > >
> > > Actually, pm_qos_remove now needs a flush_scheduled work since you don't
> > > want to return until the list is clear (since the next action may be to
> > > free the object).
> >
> > The work-items are allocated in the pm_qos objects (which get never
> > freed), so we should be fine there.
>
> That's not a safe assumption. Once we get into drivers, timers and cpu
> ilde states, I can see these things being in modules.
>
> Regardless, it's bad programming practise to be using something after
> the final remove is called, it certainly violates the principle of least
> surprise and would usually eventually cause problems.
>
> James
>
I absolutely defer to you in this question. But there is no
pm_qos_remove at the moment, as far as I see? Should I add one? When
and how would it be called?
Maybe I'm not understanding you right at the moment.
Cheers,
Flo
On Mon, 2010-06-14 at 16:49 +0200, Florian Mickler wrote:
> On Mon, 14 Jun 2010 09:44:06 -0500
> James Bottomley <[email protected]> wrote:
>
> > On Mon, 2010-06-14 at 16:33 +0200, Florian Mickler wrote:
> > > On Fri, 11 Jun 2010 09:25:52 -0500
> > > James Bottomley <[email protected]> wrote:
> > >
> > > >
> > > > Actually, pm_qos_remove now needs a flush_scheduled work since you don't
> > > > want to return until the list is clear (since the next action may be to
> > > > free the object).
> > >
> > > The work-items are allocated in the pm_qos objects (which get never
> > > freed), so we should be fine there.
> >
> > That's not a safe assumption. Once we get into drivers, timers and cpu
> > ilde states, I can see these things being in modules.
> >
> > Regardless, it's bad programming practise to be using something after
> > the final remove is called, it certainly violates the principle of least
> > surprise and would usually eventually cause problems.
> >
> > James
> >
>
> I absolutely defer to you in this question. But there is no
> pm_qos_remove at the moment, as far as I see? Should I add one? When
> and how would it be called?
>
> Maybe I'm not understanding you right at the moment.
You need to flush in pm_qos_remove_notifier() to make sure you aren't
racing deferred work in the removed notifier with a return from remove,
so that the outcome is always consistent (i.e. the notifier sees all
pending events before removal). On closer inspection, it looks like the
notifier mutexes are sufficient to make sure it doesn't return running
the notifier to be removed ... but that does mean that add and remove
require user context.
James
On Mon, 14 Jun 2010 10:10:06 -0500
James Bottomley <[email protected]> wrote:
> On closer inspection, it looks like the
> notifier mutexes are sufficient to make sure it doesn't return running
> the notifier to be removed ... but that does mean that add and remove
> require user context.
>
> James
>
Yes. I don't think we need to make the add and remove callable from
interrupt context?
Cheers,
Flo