From: Jesse Jones <[email protected]>
Changes since v1: Only flush tx queue if interface is mesh mode.
This prevents kernel panics due to uninitialized spin_lock.
When more than one station hears a broadcast request, it is possible that
multiple devices will reply at the same time, potentially causing
collision. This patch helps reduce this issue.
Signed-off-by: Alexis Green <[email protected]>
Signed-off-by: Benjamin Morgan <[email protected]>
---
net/mac80211/ieee80211_i.h | 11 ++++
net/mac80211/iface.c | 61 ++++++++++++++++++++++
net/mac80211/mesh.c | 2 +
net/mac80211/mesh_hwmp.c | 124 +++++++++++++++++++++++++++++++++++----------
4 files changed, 171 insertions(+), 27 deletions(-)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 159a1a7..f422897 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -330,6 +330,11 @@ struct mesh_preq_queue {
u8 flags;
};
+struct mesh_tx_queue {
+ struct list_head list;
+ struct sk_buff *skb;
+};
+
struct ieee80211_roc_work {
struct list_head list;
@@ -670,6 +675,11 @@ struct ieee80211_if_mesh {
spinlock_t mesh_preq_queue_lock;
struct mesh_preq_queue preq_queue;
int preq_queue_len;
+ /* Spinlock for trasmitted MPATH frames */
+ spinlock_t mesh_tx_queue_lock;
+ struct mesh_tx_queue tx_queue;
+ int tx_queue_len;
+
struct mesh_stats mshstats;
struct mesh_config mshcfg;
atomic_t estab_plinks;
@@ -919,6 +929,7 @@ struct ieee80211_sub_if_data {
struct work_struct work;
struct sk_buff_head skb_queue;
+ struct delayed_work tx_work;
u8 needed_rx_chains;
enum ieee80211_smps_mode smps_mode;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 40813dd..d5b4bf4 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -778,6 +778,59 @@ static int ieee80211_open(struct net_device *dev)
return ieee80211_do_open(&sdata->wdev, true);
}
+static void flush_tx_skbs(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+ struct mesh_tx_queue *tx_node;
+
+ spin_lock_bh(&ifmsh->mesh_tx_queue_lock);
+
+ /* Note that this check is important because of the two-stage
+ * way that ieee80211_if_mesh is initialized.
+ */
+ if (ifmsh->tx_queue_len > 0) {
+ mhwmp_dbg(sdata, "flushing %d skbs", ifmsh->tx_queue_len);
+
+ while (!list_empty(&ifmsh->tx_queue.list)) {
+ tx_node = list_last_entry(&ifmsh->tx_queue.list,
+ struct mesh_tx_queue, list);
+ kfree_skb(tx_node->skb);
+ list_del(&tx_node->list);
+ kfree(tx_node);
+ }
+ ifmsh->tx_queue_len = 0;
+ }
+
+ spin_unlock_bh(&ifmsh->mesh_tx_queue_lock);
+}
+
+static void mesh_jittered_tx(struct work_struct *wk)
+{
+ struct ieee80211_sub_if_data *sdata =
+ container_of(
+ wk, struct ieee80211_sub_if_data,
+ tx_work.work);
+
+ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+ struct mesh_tx_queue *tx_node;
+
+ spin_lock_bh(&ifmsh->mesh_tx_queue_lock);
+
+ list_for_each_entry(tx_node, &ifmsh->tx_queue.list, list) {
+ ieee80211_tx_skb(sdata, tx_node->skb);
+ }
+
+ while (!list_empty(&ifmsh->tx_queue.list)) {
+ tx_node = list_last_entry(&ifmsh->tx_queue.list,
+ struct mesh_tx_queue, list);
+ list_del(&tx_node->list);
+ kfree(tx_node);
+ }
+ ifmsh->tx_queue_len = 0;
+
+ spin_unlock_bh(&ifmsh->mesh_tx_queue_lock);
+}
+
static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
bool going_down)
{
@@ -881,6 +934,12 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
+ /* Nothing to flush if the interface is not in mesh mode */
+ if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
+ flush_tx_skbs(sdata);
+
+ cancel_delayed_work_sync(&sdata->tx_work);
+
if (sdata->wdev.cac_started) {
chandef = sdata->vif.bss_conf.chandef;
WARN_ON(local->suspended);
@@ -1846,6 +1905,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ieee80211_dfs_cac_timer_work);
INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk,
ieee80211_delayed_tailroom_dec);
+ INIT_DELAYED_WORK(&sdata->tx_work,
+ mesh_jittered_tx);
for (i = 0; i < NUM_NL80211_BANDS; i++) {
struct ieee80211_supported_band *sband;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index c28b0af..f0d3cd9 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1381,6 +1381,8 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
ieee80211_mesh_path_root_timer,
(unsigned long) sdata);
INIT_LIST_HEAD(&ifmsh->preq_queue.list);
+ INIT_LIST_HEAD(&ifmsh->tx_queue.list);
+ spin_lock_init(&ifmsh->mesh_tx_queue_lock);
skb_queue_head_init(&ifmsh->ps.bc_buf);
spin_lock_init(&ifmsh->mesh_preq_queue_lock);
spin_lock_init(&ifmsh->sync_offset_lock);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index d07ee3c..5c22daf 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -18,6 +18,7 @@
#define ARITH_SHIFT 8
#define MAX_PREQ_QUEUE_LEN 64
+#define MAX_TX_QUEUE_LEN 8
static void mesh_queue_preq(struct mesh_path *, u8);
@@ -98,13 +99,15 @@ enum mpath_frame_type {
static const u8 broadcast_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
- const u8 *orig_addr, u32 orig_sn,
- u8 target_flags, const u8 *target,
- u32 target_sn, const u8 *da,
- u8 hop_count, u8 ttl,
- u32 lifetime, u32 metric, u32 preq_id,
- struct ieee80211_sub_if_data *sdata)
+static struct sk_buff *alloc_mesh_path_sel_frame(enum mpath_frame_type action,
+ u8 flags, const u8 *orig_addr,
+ u32 orig_sn, u8 target_flags,
+ const u8 *target,
+ u32 target_sn, const u8 *da,
+ u8 hop_count, u8 ttl,
+ u32 lifetime, u32 metric,
+ u32 preq_id,
+ struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
@@ -117,7 +120,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
hdr_len +
2 + 37); /* max HWMP IE */
if (!skb)
- return -1;
+ return NULL;
skb_reserve(skb, local->tx_headroom);
mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len);
memset(mgmt, 0, hdr_len);
@@ -153,7 +156,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
break;
default:
kfree_skb(skb);
- return -ENOTSUPP;
+ return NULL;
}
*pos++ = ie_len;
*pos++ = flags;
@@ -192,10 +195,72 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
pos += 4;
}
- ieee80211_tx_skb(sdata, skb);
- return 0;
+ return skb;
+}
+
+static void mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
+ const u8 *orig_addr, u32 orig_sn,
+ u8 target_flags, const u8 *target,
+ u32 target_sn, const u8 *da,
+ u8 hop_count, u8 ttl,
+ u32 lifetime, u32 metric, u32 preq_id,
+ struct ieee80211_sub_if_data *sdata)
+{
+ struct sk_buff *skb = alloc_mesh_path_sel_frame(action, flags,
+ orig_addr, orig_sn, target_flags, target, target_sn,
+ da, hop_count, ttl, lifetime, metric, preq_id, sdata);
+ if (skb)
+ ieee80211_tx_skb(sdata, skb);
}
+static void mesh_path_sel_frame_tx_jittered(enum mpath_frame_type action,
+ u8 flags, const u8 *orig_addr,
+ u32 orig_sn, u8 target_flags,
+ const u8 *target, u32 target_sn,
+ const u8 *da, u8 hop_count, u8 ttl,
+ u32 lifetime, u32 metric,
+ u32 preq_id,
+ struct ieee80211_sub_if_data *sdata)
+{
+ u32 jitter;
+ struct sk_buff *skb = alloc_mesh_path_sel_frame(action, flags,
+ orig_addr, orig_sn,
+ target_flags, target,
+ target_sn, da,
+ hop_count, ttl,
+ lifetime, metric,
+ preq_id, sdata);
+ if (skb) {
+ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+ struct mesh_tx_queue *tx_node = kmalloc(
+ sizeof(struct mesh_tx_queue), GFP_ATOMIC);
+ if (!tx_node) {
+ mhwmp_dbg(sdata, "could not allocate mesh_hwmp tx node");
+ return;
+ }
+
+ spin_lock_bh(&ifmsh->mesh_tx_queue_lock);
+ if (ifmsh->tx_queue_len == MAX_TX_QUEUE_LEN) {
+ spin_unlock_bh(&ifmsh->mesh_tx_queue_lock);
+ kfree(tx_node);
+ kfree_skb(skb);
+ if (printk_ratelimit())
+ mhwmp_dbg(sdata, "mesh_hwmp tx node queue full");
+ return;
+ }
+
+ tx_node->skb = skb;
+ list_add_tail(&tx_node->list, &ifmsh->tx_queue.list);
+ ++ifmsh->tx_queue_len;
+ spin_unlock_bh(&ifmsh->mesh_tx_queue_lock);
+
+ jitter = prandom_u32() % 25;
+
+ ieee80211_queue_delayed_work(
+ &sdata->local->hw,
+ &sdata->tx_work, msecs_to_jiffies(jitter));
+ }
+}
/* Headroom is not adjusted. Caller should ensure that skb has sufficient
* headroom in case the frame is encrypted. */
@@ -620,11 +685,13 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
ttl = ifmsh->mshcfg.element_ttl;
if (ttl != 0) {
mhwmp_dbg(sdata, "replying to the PREQ\n");
- mesh_path_sel_frame_tx(MPATH_PREP, 0, orig_addr,
- orig_sn, 0, target_addr,
- target_sn, mgmt->sa, 0, ttl,
- lifetime, target_metric, 0,
- sdata);
+ mesh_path_sel_frame_tx_jittered(MPATH_PREP, 0,
+ orig_addr, orig_sn,
+ 0, target_addr,
+ target_sn, mgmt->sa,
+ 0, ttl, lifetime,
+ target_metric, 0,
+ sdata);
} else {
ifmsh->mshstats.dropped_frames_ttl++;
}
@@ -652,10 +719,11 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
target_sn = PREQ_IE_TARGET_SN(preq_elem);
}
- mesh_path_sel_frame_tx(MPATH_PREQ, flags, orig_addr,
- orig_sn, target_flags, target_addr,
- target_sn, da, hopcount, ttl, lifetime,
- orig_metric, preq_id, sdata);
+ mesh_path_sel_frame_tx_jittered(MPATH_PREQ, flags, orig_addr,
+ orig_sn, target_flags,
+ target_addr, target_sn, da,
+ hopcount, ttl, lifetime,
+ orig_metric, preq_id, sdata);
if (!is_multicast_ether_addr(da))
ifmsh->mshstats.fwded_unicast++;
else
@@ -721,9 +789,10 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
target_sn = PREP_IE_TARGET_SN(prep_elem);
orig_sn = PREP_IE_ORIG_SN(prep_elem);
- mesh_path_sel_frame_tx(MPATH_PREP, flags, orig_addr, orig_sn, 0,
- target_addr, target_sn, next_hop, hopcount,
- ttl, lifetime, metric, 0, sdata);
+ mesh_path_sel_frame_tx_jittered(MPATH_PREP, flags, orig_addr, orig_sn,
+ 0, target_addr, target_sn, next_hop,
+ hopcount, ttl, lifetime, metric, 0,
+ sdata);
rcu_read_unlock();
sdata->u.mesh.mshstats.fwded_unicast++;
@@ -873,10 +942,11 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
ttl--;
if (ifmsh->mshcfg.dot11MeshForwarding) {
- mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr,
- orig_sn, 0, NULL, 0, broadcast_addr,
- hopcount, ttl, interval,
- metric + metric_txsta, 0, sdata);
+ mesh_path_sel_frame_tx_jittered(MPATH_RANN, flags, orig_addr,
+ orig_sn, 0, NULL, 0,
+ broadcast_addr, hopcount, ttl,
+ interval, metric + metric_txsta,
+ 0, sdata);
}
rcu_read_unlock();
On Fri, 2017-02-24 at 11:58 -0800, Alexis Green wrote:
> From: Jesse Jones <[email protected]>
>
> Changes since v1: Only flush tx queue if interface is mesh mode.
> This prevents kernel panics due to uninitialized spin_lock.
>
> When more than one station hears a broadcast request, it is possible
> that multiple devices will reply at the same time, potentially
> causing collision. This patch helps reduce this issue.
It's not clear to me what you mean by "collision"? Over the air the NAV
should handle the avoidance thereof, so I don't really see what this
does wrt. collisions?
Are these frames somehow duplicates? But I don't see any suppression if
you've already put a frame on the "jittered" list then it will never be
deleted from it again, so it doesn't suppress anything in that sense?
johannes
Hi Johannes,
This is loosely based on RFC5148, specifically event-triggered message generation as described in section 5.2.
The frames are not duplicated, but, hopefully offset enough so they don't collide at the receiver (and, since, these are management frames, there is no retransmission and we may lose the information contained in them). If the two (or more) devices that reply are synchronized well enough, carrier sense will tell them that air is clear and messages will go out at the same time. It doesn't happen too often, but we found it noticeable enough in our testing.
Best regards,
Alexis Green
On 2/27/2017 5:30 AM, Johannes Berg wrote:
> On Fri, 2017-02-24 at 11:58 -0800, Alexis Green wrote:
>> From: Jesse Jones <[email protected]>
>>
>> When more than one station hears a broadcast request, it is possible
>> that multiple devices will reply at the same time, potentially
>> causing collision. This patch helps reduce this issue.
>
> It's not clear to me what you mean by "collision"? Over the air the NAV
> should handle the avoidance thereof, so I don't really see what this
> does wrt. collisions?
>
> Are these frames somehow duplicates? But I don't see any suppression if
> you've already put a frame on the "jittered" list then it will never be
> deleted from it again, so it doesn't suppress anything in that sense?
>
> johannes
>
> > Well it certainly attempts to via stuff like carrier sense. But that
> > is not fool proof and any time two routers hear a frame and both
> > decide to forward it immediately there is a chance that they will both
> > sense the air at the same time, decide that it is clear, and lose both
> > their forwarded frames due to a collision. How often that happens is
> > hard to say but we have observed that exact behavior a few years ago
> > with an 802.11 multicast routing protocol and adding jitter
> > significantly improved reliability.
>
> I'm really surprised by this since they both should jitter their
> transmissions
> already between CWmin and CWmax. Is that window somehow really super
> small for what you're doing?
I don't think so.
-- Jesse
> Isn't CWmin and CWmax only used for retries?
No.
> We recently had the problem that on 5MHz channels probe-responses of
> APs which can't hear each other (hidden node problem) always collide.
I think this is what I neglected - if the APs won't hear each other,
all their own sensing and NAV won't help since it'll never be updated.
CWmin/max are short enough to not actually matter in this case since
they're not long enough to avoid colliding entirely even if one picks
the lowest and the other the largest possible value.
johannes
Sorry - this is the other half of my reply that I accidentally deleted
before sending...
> +static void flush_tx_skbs(struct ieee80211_sub_if_data *sdata)
> +{
> + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
> + struct mesh_tx_queue *tx_node;
> +
> + spin_lock_bh(&ifmsh->mesh_tx_queue_lock);
> +
> + /* Note that this check is important because of the two-
> stage
> + * way that ieee80211_if_mesh is initialized.
> + */
I think you should fix that rather than work around it. If this is
called with iftype != mesh then this is super problematic anyway, since
ifmsh->tx_queue_len would alias some other variable (there's a union).
> + if (ifmsh->tx_queue_len > 0) {
> + mhwmp_dbg(sdata, "flushing %d skbs", ifmsh-
> >tx_queue_len);
> +
> + while (!list_empty(&ifmsh->tx_queue.list)) {
> + tx_node = list_last_entry(&ifmsh-
> >tx_queue.list,
> + struct
> mesh_tx_queue, list);
> + kfree_skb(tx_node->skb);
> + list_del(&tx_node->list);
> + kfree(tx_node);
> + }
> + ifmsh->tx_queue_len = 0;
> + }
> +
> + spin_unlock_bh(&ifmsh->mesh_tx_queue_lock);
> +}
All of this also gets *vastly* simpler if it's just skb_queue_purge()
:)
> + spin_lock_bh(&ifmsh->mesh_tx_queue_lock);
> +
> + list_for_each_entry(tx_node, &ifmsh->tx_queue.list, list) {
> + ieee80211_tx_skb(sdata, tx_node->skb);
> + }
I don't think you should hold the lock across _tx_skb(), ISTR problems
with that - particularly with the STA lock, so this might be OK, but it
might also cause lock ordering issues. It's easy to avoid anyway, so
better not to do it.
johannes
On 06/03/17 13:38, Johannes Berg wrote:
>
>> Well it certainly attempts to via stuff like carrier sense. But that
>> is not fool proof and any time two routers hear a frame and both
>> decide to forward it immediately there is a chance that they will
>> both sense the air at the same time, decide that it is clear, and
>> lose both their forwarded frames due to a collision. How often that
>> happens is hard to say but we have observed that exact behavior a few
>> years ago with an 802.11 multicast routing protocol and adding jitter
>> significantly improved reliability.
>
> I'm really surprised by this since they both should jitter their
> transmissions already between CWmin and CWmax. Is that window somehow really super small for what you're doing?
>
> johannes
>
Isn't CWmin and CWmax only used for retries?
We recently had the problem that on 5MHz channels probe-responses of APs
which can't hear each other (hidden node problem) always collide.
See [1] for a trace showing the problem.
Yellow is the probe-request (and ack on success), the other colours are
3 APs.
Putting probe-responses on their own queue with it's own timing results
in [2] and seems to make the problem less worse.
However the first frame still always collides, and only subsequent
retries have the randomness of cwmin/cwmax added.
5MHz channels make the problem worse since frames are 4 times longer.
I'm currently trying to find a way to add some randomness to the initial
response, which it seems this patchset attempts to solve as well
(different context though).
[1] http://may.nu/images/problem.png
[2] http://may.nu/images/jittered.png
Hi Johannes,
Thank you for the comments, I'll see if I can apply all of your
suggestions and resubmit.
Alexis
On Thu, Mar 16, 2017 at 3:18 AM, Johannes Berg
<[email protected]> wrote:
> Sorry - this is the other half of my reply that I accidentally deleted
> before sending...
>
>> +static void flush_tx_skbs(struct ieee80211_sub_if_data *sdata)
>> +{
>> + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
>> + struct mesh_tx_queue *tx_node;
>> +
>> + spin_lock_bh(&ifmsh->mesh_tx_queue_lock);
>> +
>> + /* Note that this check is important because of the two-
>> stage
>> + * way that ieee80211_if_mesh is initialized.
>> + */
>
> I think you should fix that rather than work around it. If this is
> called with iftype != mesh then this is super problematic anyway, since
> ifmsh->tx_queue_len would alias some other variable (there's a union).
>
>> + if (ifmsh->tx_queue_len > 0) {
>> + mhwmp_dbg(sdata, "flushing %d skbs", ifmsh-
>> >tx_queue_len);
>> +
>> + while (!list_empty(&ifmsh->tx_queue.list)) {
>> + tx_node = list_last_entry(&ifmsh-
>> >tx_queue.list,
>> + struct
>> mesh_tx_queue, list);
>> + kfree_skb(tx_node->skb);
>> + list_del(&tx_node->list);
>> + kfree(tx_node);
>> + }
>> + ifmsh->tx_queue_len = 0;
>> + }
>> +
>> + spin_unlock_bh(&ifmsh->mesh_tx_queue_lock);
>> +}
>
> All of this also gets *vastly* simpler if it's just skb_queue_purge()
> :)
>
>> + spin_lock_bh(&ifmsh->mesh_tx_queue_lock);
>> +
>> + list_for_each_entry(tx_node, &ifmsh->tx_queue.list, list) {
>> + ieee80211_tx_skb(sdata, tx_node->skb);
>> + }
>
> I don't think you should hold the lock across _tx_skb(), ISTR problems
> with that - particularly with the STA lock, so this might be OK, but it
> might also cause lock ordering issues. It's easy to avoid anyway, so
> better not to do it.
>
> johannes
On Thu, Mar 2, 2017 at 9:41 AM, Jesse Jones <[email protected]> wrote:
>> Hi Alexis,
>>
>> > This is loosely based on RFC5148, specifically event-triggered message
>> > generation as described in section 5.2.
>>
>> I'm confused. I see how that generally seems to apply to any mobile
>> network, but it *does* state up-front that
>> In some instances, these problems can be solved in these lower
>> layers, but in other instances, some help at the network and higher
>> layers is necessary.
>>
>> I believe 802.11 *does* in fact solve these issues at lower layers. Can
>> you explain how you observed any problem in this area?
>
> Well it certainly attempts to via stuff like carrier sense. But that is not
> fool proof and any time two routers hear a frame and both decide to forward
> it immediately there is a chance that they will both sense the air at the
> same time, decide that it is clear, and lose both their forwarded frames due
> to a collision. How often that happens is hard to say but we have observed
> that exact behavior a few years ago with an 802.11 multicast routing
> protocol and adding jitter significantly improved reliability.
Have you tried performing RTS/CTS before sending path selection frames
to manage the hidden node problem?
--
thomas
> Hi Alexis,
>
> > This is loosely based on RFC5148, specifically event-triggered message
> > generation as described in section 5.2.
>
> I'm confused. I see how that generally seems to apply to any mobile
> network, but it *does* state up-front that
> In some instances, these problems can be solved in these lower
> layers, but in other instances, some help at the network and higher
> layers is necessary.
>
> I believe 802.11 *does* in fact solve these issues at lower layers. Can
> you explain how you observed any problem in this area?
Well it certainly attempts to via stuff like carrier sense. But that is not
fool proof and any time two routers hear a frame and both decide to forward
it immediately there is a chance that they will both sense the air at the
same time, decide that it is clear, and lose both their forwarded frames due
to a collision. How often that happens is hard to say but we have observed
that exact behavior a few years ago with an 802.11 multicast routing
protocol and adding jitter significantly improved reliability.
An example of where this would be solved at a lower layer would be something
like a TDMA network that has better mechanisms to ensure that multiple
devices do not send at the same time.
> > The frames are not duplicated, but, hopefully offset enough so they
> > don't collide at the receiver
>
> That wasn't my question - my question regarding duplicating was if this
> was
> intended to *suppress* duplicates, but it sounds like not.
No, it's not intended to suppress duplicates.
-- Jesse
These images work intermittently for me. Here they are rehosted:
[1] http://i.imgur.com/zM0eLNk.png
[2] http://i.imgur.com/E7GL7U3.png
On Fri, Mar 10, 2017 at 12:40 AM, Kalle Valo <[email protected]> wrote:
> Matthias May <[email protected]> writes:
>
>> [1] http://may.nu/images/problem.png
>> [2] http://may.nu/images/jittered.png
>
> I get 404 "Not Found" for these.
>
> --
> Kalle Valo
> Well it certainly attempts to via stuff like carrier sense. But that
> is not fool proof and any time two routers hear a frame and both
> decide to forward it immediately there is a chance that they will
> both sense the air at the same time, decide that it is clear, and
> lose both their forwarded frames due to a collision. How often that
> happens is hard to say but we have observed that exact behavior a few
> years ago with an 802.11 multicast routing protocol and adding jitter
> significantly improved reliability.
I'm really surprised by this since they both should jitter their
transmissions already between CWmin and CWmax. Is that window somehow really super small for what you're doing?
johannes
Hi,
So I guess after all the discussion, you should amend the commit log a
bit, certainly at least to mention the hidden-node issue.
Regarding the patch itself, I'm not super happy with how big it is,
some additional comments below:
> +struct mesh_tx_queue {
> + struct list_head list;
> + struct sk_buff *skb;
> +};
This seems awkward, what's wrong with using an SKB list (struct
sk_buff_head, skb_queue_* etc)?
> + /* Spinlock for trasmitted MPATH frames */
> + spinlock_t mesh_tx_queue_lock;
That would also contain the extra spinlock.
> + struct mesh_tx_queue tx_queue;
This was always a bad idea, since you never need the skb pointer here -
should've just used struct list_head.
> + int tx_queue_len;
Also contained in the skb queue.
> + struct delayed_work tx_work;
I don't really see any value here for a delayed work - a pure timer
would work just as well?
Also, these fields should be in ifmsh, I think?
johannes
Matthias May <[email protected]> writes:
> [1] http://may.nu/images/problem.png
> [2] http://may.nu/images/jittered.png
I get 404 "Not Found" for these.
--
Kalle Valo
Hi Alexis,
> This is loosely based on RFC5148, specifically event-triggered
> message generation as described in section 5.2.
I'm confused. I see how that generally seems to apply to any mobile
network, but it *does* state up-front that
In some instances, these problems can be solved in these lower
layers, but in other instances, some help at the network and higher
layers is necessary.
I believe 802.11 *does* in fact solve these issues at lower layers.
Can you explain how you observed any problem in this area?
> The frames are not duplicated, but, hopefully offset enough so they
> don't collide at the receiver
That wasn't my question - my question regarding duplicating was if this
was intended to *suppress* duplicates, but it sounds like not.
> (and, since, these are management frames, there is no retransmission
> and we may lose the information contained in them).
That statement isn't true in general, though it does seem that some of
the frames you're touching are actually *multicast* frames and as such
don't have any retries.
> If the two (or more) devices that reply are synchronized well enough,
> carrier sense will tell them that air is clear and messages will go
> out at the same time. It doesn't happen too often, but we found it
> noticeable enough in our testing.
I'm still curious how it happens at all, since NAV synchronisation
should prevent it at a much lower level?
johannes
Am 09.03.2017 um 16:49 schrieb Matthias May:
> On 06/03/17 13:38, Johannes Berg wrote:
>>> Well it certainly attempts to via stuff like carrier sense. But that
>>> is not fool proof and any time two routers hear a frame and both
>>> decide to forward it immediately there is a chance that they will
>>> both sense the air at the same time, decide that it is clear, and
>>> lose both their forwarded frames due to a collision. How often that
>>> happens is hard to say but we have observed that exact behavior a few
>>> years ago with an 802.11 multicast routing protocol and adding jitter
>>> significantly improved reliability.
>> I'm really surprised by this since they both should jitter their
>> transmissions already between CWmin and CWmax. Is that window somehow really super small for what you're doing?
>>
>> johannes
>>
> Isn't CWmin and CWmax only used for retries?
> We recently had the problem that on 5MHz channels probe-responses of APs
> which can't hear each other (hidden node problem) always collide.
> See [1] for a trace showing the problem.
> Yellow is the probe-request (and ack on success), the other colours are
> 3 APs.
> Putting probe-responses on their own queue with it's own timing results
> in [2] and seems to make the problem less worse.
> However the first frame still always collides, and only subsequent
> retries have the randomness of cwmin/cwmax added.
> 5MHz channels make the problem worse since frames are 4 times longer.
>
> I'm currently trying to find a way to add some randomness to the initial
> response, which it seems this patchset attempts to solve as well
> (different context though).
>
> [1] http://may.nu/images/problem.png
> [2] http://may.nu/images/jittered.png
How do you measure this signals ? Maybe the first choose of in the
contention window is the same for all nodes caused by simulation, or so
... In general the chance for choosing all nodes the same waiting value
is 1/16 * n (for best effort). So for 3 nodes it seems unlikely.
But from first look on your pictures, CW seems to work, since the cyan
AP is slightly earlier then magenta and green one, which may the
distance of some slot times. But of course for hidden stations this (at
best) 16 slot times (which may be only 5 to 8 slot times in average) are
much shorter than the actual probe response air time. Would they have
proper carrier sense they would not collide. The jitter of path request
are also only a bit of more gambling, which may help ... at this point
the client should probe a smaller set of APs a time, but the APs cannot
change their behavior ...
But the actual patch fro PREPs have another goal. Since they could only
collide, if multiple routes to destination exists, it might help to
spread the a bit. But from what I see, every PREP is jittered. Maybe the
first PREP should not be artificially delayed, but all following PREPs
of this specific PREQ (which mostly only contain redundant routes,
without a better ALM).
kind regards
--
M.Sc. Benjamin Beichler
Universität Rostock, Fakultät für Informatik und Elektrotechnik
Institut für Angewandte Mikroelektronik und Datentechnik
University of Rostock, Department of CS and EE
Institute of Applied Microelectronics and CE
Richard-Wagner-Straße 31
18119 Rostock
Deutschland/Germany
phone: +49 (0) 381 498 - 7278
email: [email protected]
www: http://www.imd.uni-rostock.de/