LinuxLists.cc - [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211

2007-11-26 21:30:10

Subject: [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211_simple

This changes rc80211_simple failed frame percentage estimation to use an
exponential moving average method. Failed frames percentage is sampled over
time and a smoothed average is computed. This average is examined periodically
and the rate adjusted eventually.

Compared to the old method this new approach is much more responsive. The old
method used plain tx success/failure counters. This made rate control very
unresponsive after a short time of using the interface, since old data wouldn't
age properly.

Signed-off-by: Mattias Nissler <[email protected]>
---
net/mac80211/ieee80211_rate.h | 3 -
net/mac80211/rc80211_simple.c | 155 ++++++++++++++++++++++++++++++----------
2 files changed, 116 insertions(+), 42 deletions(-)

diff --git a/net/mac80211/ieee80211_rate.h b/net/mac80211/ieee80211_rate.h
index 2368813..9948d0f 100644
--- a/net/mac80211/ieee80211_rate.h
+++ b/net/mac80211/ieee80211_rate.h
@@ -18,9 +18,6 @@
#include "ieee80211_i.h"
#include "sta_info.h"

-#define RATE_CONTROL_NUM_DOWN 20
-#define RATE_CONTROL_NUM_UP 15
-

struct rate_control_extra {
/* values from rate_control_get_rate() to the caller: */
diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c
index da72737..60de964 100644
--- a/net/mac80211/rc80211_simple.c
+++ b/net/mac80211/rc80211_simple.c
@@ -25,8 +25,67 @@

#define RATE_CONTROL_EMERG_DEC 2
-#define RATE_CONTROL_INTERVAL (HZ / 20)
-#define RATE_CONTROL_MIN_TX 10
+#define RATE_CONTROL_SAMPLE_INTERVAL (HZ / 10)
+#define RATE_CONTROL_ADJUST_INTERVAL (HZ)
+#define RATE_CONTROL_MIN_TX 4
+#define RATE_CONTROL_MIN_SAMPLES 4
+
+#define RATE_CONTROL_SMOOTHING_SHIFT 4
+#define RATE_CONTROL_SMOOTHING (1 << RATE_CONTROL_SMOOTHING_SHIFT)
+
+#define RATE_CONTROL_DOWN_THRES 20
+#define RATE_CONTROL_UP_THRES 15
+
+struct sta_rate_control {
+ unsigned long last_change;
+ unsigned long last_sample;
+
+ u32 tx_num_failed;
+ u32 tx_num_xmit;
+
+ u32 num_samples;
+
+ /*
+ * Average number of failed frames, scaled by RATE_CONTROL_SMOOTHING.
+ * This value is a smoothed by using a exponentail weighted average
+ * technique:
+ *
+ * (SMOOTHING - 1) * per_failed_old + failed
+ * per_failed = -----------------------------------------
+ * SMOOTHING
+ *
+ * where the per_failed is the new approximation, per_failed_old the
+ * previous one and failed is the percentage of failed transmissions in
+ * the last interval. Note that the bigger SMOOTHING the more weight is
+ * given to the previous estimate, resulting in more smoothing but less
+ * responsiveness.
+ *
+ * For computation, we actually don't use the above formula, but this
+ * one:
+ *
+ * per_failed_scaled = per_failed_old_scaled - per_failed_old + failed
+ *
+ * where:
+ * per_failed_scaled = per_failed * SMOOTHING
+ * per_failed_old_scaled = per_faield_old * SMOOTHING
+ *
+ * This avoids floating point numbers and the per_failed_old value can
+ * easily be obtained by shifting per_failed_old_scaled right by
+ * RATE_CONTROL_SMOOTHING_SHIFT.
+ */
+ u32 per_failed_scaled;
+
+ unsigned long avg_rate_update;
+ u32 tx_avg_rate_sum;
+ u32 tx_avg_rate_num;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ struct dentry *tx_avg_rate_sum_dentry;
+ struct dentry *tx_avg_rate_num_dentry;
+ struct dentry *per_failed_avg_dentry;
+#endif
+};
+

static void rate_control_rate_inc(struct ieee80211_local *local,
struct sta_info *sta)
@@ -111,22 +170,6 @@ struct global_rate_control {
int dummy;
};

-struct sta_rate_control {
- unsigned long last_rate_change;
- u32 tx_num_failures;
- u32 tx_num_xmit;
-
- unsigned long avg_rate_update;
- u32 tx_avg_rate_sum;
- u32 tx_avg_rate_num;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- struct dentry *tx_avg_rate_sum_dentry;
- struct dentry *tx_avg_rate_num_dentry;
-#endif
-};
-
-
static void rate_control_simple_tx_status(void *priv, struct net_device *dev,
struct sk_buff *skb,
struct ieee80211_tx_status *status)
@@ -142,9 +185,13 @@ static void rate_control_simple_tx_status(void *priv, struct net_device *dev,
return;

srctrl = sta->rate_ctrl_priv;
- srctrl->tx_num_xmit++;
+ if (status->retry_count) {
+ srctrl->tx_num_xmit += status->retry_count;
+ srctrl->tx_num_failed += status->retry_count;
+ } else
+ srctrl->tx_num_xmit++;
+
if (status->excessive_retries) {
- srctrl->tx_num_failures++;
sta->tx_retry_failed++;
sta->tx_num_consecutive_failures++;
sta->tx_num_mpdu_fail++;
@@ -159,36 +206,45 @@ static void rate_control_simple_tx_status(void *priv, struct net_device *dev,
sta->tx_num_mpdu_fail += status->retry_count;

if (time_after(jiffies,
- srctrl->last_rate_change + RATE_CONTROL_INTERVAL) &&
- srctrl->tx_num_xmit > RATE_CONTROL_MIN_TX) {
+ srctrl->last_sample + RATE_CONTROL_SAMPLE_INTERVAL) &&
+ srctrl->tx_num_xmit > RATE_CONTROL_MIN_TX) {
+ u32 avg;
u32 per_failed;
- srctrl->last_rate_change = jiffies;
-
- per_failed = (100 * sta->tx_num_mpdu_fail) /
- (sta->tx_num_mpdu_fail + sta->tx_num_mpdu_ok);
- /* TODO: calculate average per_failed to make adjusting
- * parameters easier */
-#if 0
- if (net_ratelimit()) {
- printk(KERN_DEBUG "MPDU fail=%d ok=%d per_failed=%d\n",
- sta->tx_num_mpdu_fail, sta->tx_num_mpdu_ok,
- per_failed);
- }
-#endif
+
+ srctrl->last_sample = jiffies;
+
+ per_failed = srctrl->tx_num_failed * 100 / srctrl->tx_num_xmit;
+ avg = srctrl->per_failed_scaled >> RATE_CONTROL_SMOOTHING_SHIFT;
+ srctrl->per_failed_scaled =
+ srctrl->per_failed_scaled - avg + per_failed;
+
+ srctrl->tx_num_xmit = 0;
+ srctrl->tx_num_failed = 0;
+ srctrl->num_samples++;
+ }
+
+ if (time_after(jiffies,
+ srctrl->last_change + RATE_CONTROL_ADJUST_INTERVAL) &&
+ srctrl->num_samples > RATE_CONTROL_MIN_SAMPLES) {
+ u32 avg;
+
+ srctrl->last_change = jiffies;

/*
* XXX: Make these configurable once we have an
* interface to the rate control algorithms
*/
- if (per_failed > RATE_CONTROL_NUM_DOWN) {
+ avg = srctrl->per_failed_scaled >> RATE_CONTROL_SMOOTHING_SHIFT;
+ if (avg > RATE_CONTROL_DOWN_THRES) {
rate_control_rate_dec(local, sta);
- } else if (per_failed < RATE_CONTROL_NUM_UP) {
+ srctrl->num_samples = 0;
+ } else if (avg < RATE_CONTROL_UP_THRES) {
rate_control_rate_inc(local, sta);
+ srctrl->num_samples = 0;
}
+
srctrl->tx_avg_rate_sum += status->control.rate->rate;
srctrl->tx_avg_rate_num++;
- srctrl->tx_num_failures = 0;
- srctrl->tx_num_xmit = 0;
} else if (sta->tx_num_consecutive_failures >=
RATE_CONTROL_EMERG_DEC) {
rate_control_rate_dec(local, sta);
@@ -369,6 +425,23 @@ static const struct file_operations sta_tx_avg_rate_num_ops = {
.open = open_file_generic,
};

+static ssize_t sta_per_failed_avg(struct file *file,
+ char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct sta_rate_control *srctrl = file->private_data;
+ char buf[20];
+
+ sprintf(buf, "%d\n",
+ srctrl->per_failed_scaled >> RATE_CONTROL_SMOOTHING_SHIFT);
+ return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
+}
+
+static const struct file_operations sta_per_failed_avg_ops = {
+ .read = sta_per_failed_avg,
+ .open = open_file_generic,
+};
+
static void rate_control_simple_add_sta_debugfs(void *priv, void *priv_sta,
struct dentry *dir)
{
@@ -380,6 +453,9 @@ static void rate_control_simple_add_sta_debugfs(void *priv, void *priv_sta,
srctrl->tx_avg_rate_sum_dentry =
debugfs_create_file("rc_simple_sta_tx_avg_rate_sum", 0400,
dir, srctrl, &sta_tx_avg_rate_sum_ops);
+ srctrl->per_failed_avg_dentry =
+ debugfs_create_file("rc_simple_sta_per_failed_avg", 0400,
+ dir, srctrl, &sta_per_failed_avg_ops);
}

static void rate_control_simple_remove_sta_debugfs(void *priv, void *priv_sta)
@@ -388,6 +464,7 @@ static void rate_control_simple_remove_sta_debugfs(void *priv, void *priv_sta)

debugfs_remove(srctrl->tx_avg_rate_sum_dentry);
debugfs_remove(srctrl->tx_avg_rate_num_dentry);
+ debugfs_remove(srctrl->per_failed_avg_dentry);
}
#endif

--
1.5.3.4

2007-11-27 21:38:07

by Mattias Nissler

[permalink] [raw]

Subject: Re: [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211_simple

On Tue, 2007-11-27 at 16:35 +0100, Stefano Brivio wrote:
> On Mon, 26 Nov 2007 22:30:05 +0100
> Mattias Nissler <[email protected]> wrote:
>
> > This changes rc80211_simple failed frame percentage estimation to use an
> > exponential moving average method. Failed frames percentage is sampled
> > over time and a smoothed average is computed. This average is examined
> > periodically and the rate adjusted eventually.
>
> This can be seen as a particular example of a PID controller [1]. It's
> actually a PI controller, with no derivative terms in it. It could be
> interesting to implement a regular PID controller.
>
> This is clearly a MIMO model, with the setpoints being a reasonable value
> of TX failures and the highest achievable rate, the process input being the
> bitrate and the process output being TX failures and successes; and
> obviously, the big issue being the implementation without floating-point.
> Thus, with some tuning, you could probably get a very good rate control
> algorithm.

Good point, I'm not too familiar with control theory, but I'll do some
homework :-)

However, maybe you can answer some questions:

Is it sensible to preprocess (i.e. calculate a smoothed average) the
input data for a PID controller? If so, should we compute a time average
or just average over the last N frames?

Note that the rates (or actually modulations) are not a continuous
entity, but we only have a limited set of choices. Normally, the PID
would output a value from some continuous range. What are good ways to
map the PID controller output to a rate?

Mattias

2007-11-27 22:01:27

by Larry Finger

[permalink] [raw]

Subject: Re: [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211_simple

Mattias Nissler wrote:
>
> Good to hear. I've tested it with rt61pci and rt73usb. I guess you
> tested b43[legacy]. Any other test results, especially in low signal
> quality situations would be of great help.

Sorry I wasn't clearer. I have tested V3 of the patch on a BCM4311/2 using b43. So far, it has only
been tested with strong signal.

Larry

2007-11-27 15:07:55

by Larry Finger

[permalink] [raw]

Subject: Re: [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211_simple

Johannes Berg wrote:
> On Mon, 2007-11-26 at 22:30 +0100, Mattias Nissler wrote:
>> This changes rc80211_simple failed frame percentage estimation to use an
>> exponential moving average method. Failed frames percentage is sampled over
>> time and a smoothed average is computed. This average is examined periodically
>> and the rate adjusted eventually.
>>
>> Compared to the old method this new approach is much more responsive. The old
>> method used plain tx success/failure counters. This made rate control very
>> unresponsive after a short time of using the interface, since old data wouldn't
>> age properly.
>
> Sounds sane and better than before. If it works we can apply this I
> guess.

I have not done any rigorous testing of this patch; however, it has not caused any harm on my system.

Larry

2007-11-27 21:29:12

by Mattias Nissler

[permalink] [raw]

Subject: Re: [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211_simple

On Tue, 2007-11-27 at 15:13 +0100, Johannes Berg wrote:
> On Mon, 2007-11-26 at 22:30 +0100, Mattias Nissler wrote:
> > This changes rc80211_simple failed frame percentage estimation to use an
> > exponential moving average method. Failed frames percentage is sampled over
> > time and a smoothed average is computed. This average is examined periodically
> > and the rate adjusted eventually.
> >
> > Compared to the old method this new approach is much more responsive. The old
> > method used plain tx success/failure counters. This made rate control very
> > unresponsive after a short time of using the interface, since old data wouldn't
> > age properly.
>
> Sounds sane and better than before. If it works we can apply this I
> guess.

Before this goes in, I have at least one issue to be discussed: As it
is, the patch calculates the failed frames percentage exclusively from
the retry_count. I'm not sure whether this is what we want, because some
drivers cannot provide this parameter (or they only know there have been
retries, but not how many). This is the case e.g. for the rt2x00 usb
devices. I'm not sure what to do about this. My suggestion would be
something like:

srctrl->tx_num_xmit++;

if (status->excessive_retries) {
srctrl->tx_num_failed += 2;
srctrl->tx_num_xmit++;
} else if (status->retry_count) {
srctrl->tx_num_failed++;
srctrl->tx_num_xmit++;
}

This accounts correctly transmitted frames as one good, failed frames 2
bad and retry-succeeded frames 1 good + 1 bad point in the failed
percentage calculation. Opinions?

Mattias

2007-11-28 16:34:25

by Mattias Nissler

[permalink] [raw]

Subject: Re: [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211_simple

Hi Stefano,

first of all, thanks for your input.

On Wed, 2007-11-28 at 00:29 +0100, Stefano Brivio wrote:

> By computing an average, you won't be able to implement differentiation,
> even if you could implement integration.
>
> In fact, the integration here means considering error values occurred over
> some time (and I'd think of some 'windowing' approach, as explained below),
> and so if we make an average, that doesn't really matter because all the
> input numbers would weigh the same in the term to be integrated, as in an
> average.
>
> But in order to differentiate, you need to have some array of values for
> the slope calculation (if you get a value only, you can't find the slope of
> the line which is passing by - or near to, if we consider more than two
> values - two points).
>
> Maybe it doesn't even make sense to have more than two values, and it would
> be better to just compute a regular slope between two points, so that we
> would just need to keep an average value and two values. Plus, the
> differentiation here would just mean to make a difference between the
> latest two values and divide it by the time difference between the two
> frames, so we would get just two input values.
>
> I would take this approach for computing the ID terms:
>
> [number of bad frames]
> [2][3][5][6][9][7]
> | | |
> 0 S <-n-> E <-- S is where the sliding window starts, E where it
> ends, n is the length of the window (here it's 4 as an example, that'll be
> subject to tuning).
>
> - in order to integrate, we make the average of the bad frame values
> ranging from S to E; if that average is going to be over a reasonable
> threshold for bad frame values we preset, the I factor will be negative,
> positive otherwise;
> - in order to differentiate, we'll just subtract the preset threshold
> >from the values stored in E, E-1 from the preset threshold, then subtract
> the resulting E value from the resulting E-1 value and divide by the time
> which passed between the two bad frames computation; if this value is
> positive, the D factor will be negative, positive otherwise;
> - after we did these computation, we wait for some time and make the
> sliding window shift right, and then repeat.
>
> What we would avoid here - by properly tuning the n value - is the
> so-called integral wind-up [1], which could be really bad if the quality of
> the link varies rapidly, because we would take too much into account the
> past events, which could be meaningless to some degree if considering a not
> recent past.

That's all good, I completely agree.

I should have been a little clearer with my question, but when I wrote
it, I probably still had some concepts mashed up in my brain. Anyway,
let me try again: What about the problem of getting the failed frame
number samples? The input data received by the rate scaling algorithm is
basically if a frame failed to be transmitted or not, and if it was
transmitted, how many retries were needed. What my original patch does
(and what you also seem to assume in your explanations) is to count
failed frames and total number of frames over fix-sized time intervals
and compute a failed frames percentage sample value. The question is now
whether this approach is a good one. There are situations where the
device might only transmitted very few frames (as low as 1 frame every
ten seconds). Clearly, there is not much information available to the
rate scaling algorithm (not much need for rate scaling either, but
nevertheless we have to consider the effect of such a situation for
frames following an idle period). And then there are of course periods
with many transmitted frames. So what I thought of is maybe using
discrete time to calculate samples is perhaps not a good idea. Another
option would be to calculate failed frames percentage samples from each
M frames that were to be transmitted. What's your opinion on this?

>
> > Note that the rates (or actually modulations) are not a continuous
> > entity, but we only have a limited set of choices. Normally, the PID
> > would output a value from some continuous range. What are good ways to
> > map the PID controller output to a rate?
>
> The quick approach would be to round it to the nearest rate. A better one
> could be to keep a map of (1):[rate] <-> (2):[k1*rate + k2*recent errors at
> this rate], so that if we do have to decide whether to switch between two
> rates, we could actually evaluate the device performance - mainly
> sensitivity - at different rates(1), and accordingly think of the real
> difference between two rates(2). Then we round the output to the nearest
> rate(2) and choose the corresponding rate(1).

Ok, I understand. Question is whether it's worth the added overhead both
in computation and storage.

Mattias

2007-11-27 23:34:09

by Stefano Brivio

[permalink] [raw]

Subject: Re: [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211_simple

On Tue, 27 Nov 2007 22:38:04 +0100
Mattias Nissler <[email protected]> wrote:

> Is it sensible to preprocess (i.e. calculate a smoothed average) the
> input data for a PID controller? If so, should we compute a time average
> or just average over the last N frames?

By computing an average, you won't be able to implement differentiation,
even if you could implement integration.

In fact, the integration here means considering error values occurred over
some time (and I'd think of some 'windowing' approach, as explained below),
and so if we make an average, that doesn't really matter because all the
input numbers would weigh the same in the term to be integrated, as in an
average.

But in order to differentiate, you need to have some array of values for
the slope calculation (if you get a value only, you can't find the slope of
the line which is passing by - or near to, if we consider more than two
values - two points).

Maybe it doesn't even make sense to have more than two values, and it would
be better to just compute a regular slope between two points, so that we
would just need to keep an average value and two values. Plus, the
differentiation here would just mean to make a difference between the
latest two values and divide it by the time difference between the two
frames, so we would get just two input values.

I would take this approach for computing the ID terms:

[number of bad frames]
[2][3][5][6][9][7]
| | |
0 S <-n-> E <-- S is where the sliding window starts, E where it
ends, n is the length of the window (here it's 4 as an example, that'll be
subject to tuning).

- in order to integrate, we make the average of the bad frame values
ranging from S to E; if that average is going to be over a reasonable
threshold for bad frame values we preset, the I factor will be negative,
positive otherwise;
- in order to differentiate, we'll just subtract the preset threshold
from the values stored in E, E-1 from the preset threshold, then subtract
the resulting E value from the resulting E-1 value and divide by the time
which passed between the two bad frames computation; if this value is
positive, the D factor will be negative, positive otherwise;
- after we did these computation, we wait for some time and make the
sliding window shift right, and then repeat.

What we would avoid here - by properly tuning the n value - is the
so-called integral wind-up [1], which could be really bad if the quality of
the link varies rapidly, because we would take too much into account the
past events, which could be meaningless to some degree if considering a not
recent past.

> Note that the rates (or actually modulations) are not a continuous
> entity, but we only have a limited set of choices. Normally, the PID
> would output a value from some continuous range. What are good ways to
> map the PID controller output to a rate?

The quick approach would be to round it to the nearest rate. A better one
could be to keep a map of (1):[rate] <-> (2):[k1*rate + k2*recent errors at
this rate], so that if we do have to decide whether to switch between two
rates, we could actually evaluate the device performance - mainly
sensitivity - at different rates(1), and accordingly think of the real
difference between two rates(2). Then we round the output to the nearest
rate(2) and choose the corresponding rate(1).

I understand this could look a bit obscure, if you are still interested
however feel free to ask questions. :)

[1] http://en.wikipedia.org/wiki/Integral_windup

--
Ciao
Stefano

2007-11-27 21:30:48

by Mattias Nissler

[permalink] [raw]

Subject: Re: [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211_simple

On Tue, 2007-11-27 at 09:07 -0600, Larry Finger wrote:
> Johannes Berg wrote:
> > On Mon, 2007-11-26 at 22:30 +0100, Mattias Nissler wrote:
> >> This changes rc80211_simple failed frame percentage estimation to use an
> >> exponential moving average method. Failed frames percentage is sampled over
> >> time and a smoothed average is computed. This average is examined periodically
> >> and the rate adjusted eventually.
> >>
> >> Compared to the old method this new approach is much more responsive. The old
> >> method used plain tx success/failure counters. This made rate control very
> >> unresponsive after a short time of using the interface, since old data wouldn't
> >> age properly.
> >
> > Sounds sane and better than before. If it works we can apply this I
> > guess.
>
> I have not done any rigorous testing of this patch; however, it has not caused any harm on my system.

Good to hear. I've tested it with rt61pci and rt73usb. I guess you
tested b43[legacy]. Any other test results, especially in low signal
quality situations would be of great help.

Mattias

2007-11-27 15:40:18

by Stefano Brivio

[permalink] [raw]

Subject: Re: [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211_simple

On Mon, 26 Nov 2007 22:30:05 +0100
Mattias Nissler <[email protected]> wrote:

> This changes rc80211_simple failed frame percentage estimation to use an
> exponential moving average method. Failed frames percentage is sampled
> over time and a smoothed average is computed. This average is examined
> periodically and the rate adjusted eventually.

This can be seen as a particular example of a PID controller [1]. It's
actually a PI controller, with no derivative terms in it. It could be
interesting to implement a regular PID controller.

This is clearly a MIMO model, with the setpoints being a reasonable value
of TX failures and the highest achievable rate, the process input being the
bitrate and the process output being TX failures and successes; and
obviously, the big issue being the implementation without floating-point.
Thus, with some tuning, you could probably get a very good rate control
algorithm.

I just wanted to share this thought ATM, but I'll try to elaborate more if I
happen to have some spare time.

[1] http://en.wikipedia.org/wiki/PID_controller

--
Ciao
Stefano

2007-11-29 04:08:24

by Stefano Brivio

[permalink] [raw]

Subject: Re: [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211_simple

On Thu, 29 Nov 2007 02:59:43 +0100
Mattias Nissler <[email protected]> wrote:

> Ok, I your case study sounds reasonable :-) So I guess I'll stick with
> averaging over fix sized time intervals. The interpolation approach you
> suggest seems good enough. How I'd expect the rate control algorithm to
> behave in situations with not much input data is:
>
> a) Stay at the current rate, just assume conditions didn't change.
> b) Be optimistic: Slowly ramp up tx rate, so if more data to be
> transmitted is available, it'll get good rates from the beginning, if
> possible.
>
> I think the approach you suggest is basically a) if we aren't adjusting
> rate heavily at the moment.

Yes, because:
1) if the number of frame errors is over some threshold, by interpolating
the way I suggested we will probably switch to a lower rate (mostly because
of the I term - as the slope there would be 1); this should be just fine
because anyway we didn't need the extra bitrate, and once we need it, we'll
have enough data to switch to an higher rate, in case. A caveat here: what
if we don't want an high throughput but we want low latency? We'll have to
see if your b) approach is reasonable - i.e. if the increased latency
provided by a lower bitrate is significant;
2) if the frame errors are below the threshold, no problem because we won't
switch rate (if we are near to the threshold) or we'll switch to an higher
one. We don't aim to 0 frame errors, but we aim to the threshold (and this
threshold could be meant as a userspace parameter, maybe, and may be seen as
a 'reliability over throughput' parameter).

> Ok, this whole thing sounds very promising to me. Now that we've
> discussed some important points, I'll go ahead and write some code,
> probably over the weekend.

If I'm not too busy, I'm going to help you.

--
Ciao
Stefano

2007-11-28 17:49:47

by Stefano Brivio

[permalink] [raw]

Subject: Re: [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211_simple

On Wed, 28 Nov 2007 17:34:21 +0100
Mattias Nissler <[email protected]> wrote:

> I should have been a little clearer with my question, but when I wrote
> it, I probably still had some concepts mashed up in my brain. Anyway,
> let me try again: What about the problem of getting the failed frame
> number samples? The input data received by the rate scaling algorithm is
> basically if a frame failed to be transmitted or not, and if it was
> transmitted, how many retries were needed. What my original patch does
> (and what you also seem to assume in your explanations) is to count
> failed frames and total number of frames over fix-sized time intervals
> and compute a failed frames percentage sample value.

Exactly.

> The question is now whether this approach is a good one. There are
> situations where the device might only transmitted very few frames (as
> low as 1 frame every ten seconds). Clearly, there is not much information
> available to the rate scaling algorithm (not much need for rate scaling
> either, but nevertheless we have to consider the effect of such a
> situation for frames following an idle period). And then there are of
> course periods with many transmitted frames. So what I thought of is
> maybe using discrete time to calculate samples is perhaps not a good
> idea. Another option would be to calculate failed frames percentage
> samples from each M frames that were to be transmitted. What's your
> opinion on this?

The size of the time interval (not to be confused with the sliding
window) could vary depending on number of frames we tried to sent. But I
don't know if this is worth the effort. I'll list a few examples:
1) we are downloading a big file through our NFS server at home; we dance
around with our laptop in our hands and suddenly we fall behind a short
wall - SNR drops by 10dB and we need to suddenly react at this, the D term
does this and we would need the time interval to be short enough so that we
notice in time about the fast drop in SNR;
2) what if we consider 1), except that we are just on IRC, sending a few
frames every some seconds? The time interval needs to be short anyway,
because we would notice about the drop in SNR too late otherwise;
3) we are stealing connectivity from the neighborhood, rain falls and
humidity slowly increases, thus producing a slow decrease in SNR; the I
term should deal with this, by integrating the error over time and thus
force a lower rate after, maybe, some minutes; both if we make a lot of
traffic or just send few frames, the time interval here should be short
enough - again - so that we can actually see a consistent decrease in SNR
between different time intervals.

So I'd say that for maximum granularity and good precision, we should try
to keep this time interval as short as possible (my rough guess is about
1s). We then need to solve the issue you mentioned, but I'd come up with
another approach here. Instead of taking a long time interval, let's do
interpolation. In other words, we can reasonably assume that, if at a given
time t we don't transmit any frame so we miss data, the frame errors rate
is similar to the one at t-1, and if we missed data from t-1 as well, we
grab the value from the t-2 interval, and so on. This is rough, but
still it seems to me a precise enough method for dealing with the issue.

> > The quick approach would be to round it to the nearest rate. A better
> > one could be to keep a map of (1):[rate] <-> (2):[k1*rate + k2*recent
> > errors at this rate], so that if we do have to decide whether to switch
> > between two rates, we could actually evaluate the device performance -
> > mainly sensitivity - at different rates(1), and accordingly think of
> > the real difference between two rates(2). Then we round the output to
> > the nearest rate(2) and choose the corresponding rate(1).
>
> Ok, I understand. Question is whether it's worth the added overhead both
> in computation and storage.

Probably not, but so far I've seen very few examples of PID controllers for
data rates by googling around, and my guess here is that you would need to
try the simplest approach and then go further adding complexity until you
are satisfied.

--
Ciao
Stefano

2007-11-29 01:59:48

by Mattias Nissler

[permalink] [raw]

Subject: Re: [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211_simple

On Wed, 2007-11-28 at 18:43 +0100, Stefano Brivio wrote:

> The size of the time interval (not to be confused with the sliding
> window) could vary depending on number of frames we tried to sent. But I
> don't know if this is worth the effort. I'll list a few examples:
> 1) we are downloading a big file through our NFS server at home; we dance
> around with our laptop in our hands and suddenly we fall behind a short
> wall - SNR drops by 10dB and we need to suddenly react at this, the D term
> does this and we would need the time interval to be short enough so that we
> notice in time about the fast drop in SNR;
> 2) what if we consider 1), except that we are just on IRC, sending a few
> frames every some seconds? The time interval needs to be short anyway,
> because we would notice about the drop in SNR too late otherwise;
> 3) we are stealing connectivity from the neighborhood, rain falls and
> humidity slowly increases, thus producing a slow decrease in SNR; the I
> term should deal with this, by integrating the error over time and thus
> force a lower rate after, maybe, some minutes; both if we make a lot of
> traffic or just send few frames, the time interval here should be short
> enough - again - so that we can actually see a consistent decrease in SNR
> between different time intervals.
>
> So I'd say that for maximum granularity and good precision, we should try
> to keep this time interval as short as possible (my rough guess is about
> 1s). We then need to solve the issue you mentioned, but I'd come up with
> another approach here. Instead of taking a long time interval, let's do
> interpolation. In other words, we can reasonably assume that, if at a given
> time t we don't transmit any frame so we miss data, the frame errors rate
> is similar to the one at t-1, and if we missed data from t-1 as well, we
> grab the value from the t-2 interval, and so on. This is rough, but
> still it seems to me a precise enough method for dealing with the issue.

Ok, I your case study sounds reasonable :-) So I guess I'll stick with
averaging over fix sized time intervals. The interpolation approach you
suggest seems good enough. How I'd expect the rate control algorithm to
behave in situations with not much input data is:

a) Stay at the current rate, just assume conditions didn't change.
b) Be optimistic: Slowly ramp up tx rate, so if more data to be
transmitted is available, it'll get good rates from the beginning, if
possible.

I think the approach you suggest is basically a) if we aren't adjusting
rate heavily at the moment.

>
>
> > > The quick approach would be to round it to the nearest rate. A better
> > > one could be to keep a map of (1):[rate] <-> (2):[k1*rate + k2*recent
> > > errors at this rate], so that if we do have to decide whether to switch
> > > between two rates, we could actually evaluate the device performance -
> > > mainly sensitivity - at different rates(1), and accordingly think of
> > > the real difference between two rates(2). Then we round the output to
> > > the nearest rate(2) and choose the corresponding rate(1).
> >
> > Ok, I understand. Question is whether it's worth the added overhead both
> > in computation and storage.
>
> Probably not, but so far I've seen very few examples of PID controllers for
> data rates by googling around, and my guess here is that you would need to
> try the simplest approach and then go further adding complexity until you
> are satisfied.

Ok, this whole thing sounds very promising to me. Now that we've
discussed some important points, I'll go ahead and write some code,
probably over the weekend.

Mattias

2007-11-27 14:13:44

by Johannes Berg

[permalink] [raw]

Subject: Re: [RFC/T][PATCH][V3] mac80211: Exponential moving average estimate for rc80211_simple

On Mon, 2007-11-26 at 22:30 +0100, Mattias Nissler wrote:
> This changes rc80211_simple failed frame percentage estimation to use an
> exponential moving average method. Failed frames percentage is sampled over
> time and a smoothed average is computed. This average is examined periodically
> and the rate adjusted eventually.
>
> Compared to the old method this new approach is much more responsive. The old
> method used plain tx success/failure counters. This made rate control very
> unresponsive after a short time of using the interface, since old data wouldn't
> age properly.

Sounds sane and better than before. If it works we can apply this I
guess.

johannes

Attachments:

signature.asc (828.00 B)
This is a digitally signed message part