2016-03-03 21:59:13

by Felix Fietkau

[permalink] [raw]
Subject: [PATCH v3 1/2] mac80211: add A-MSDU tx support

Requires software tx queueing and fast-xmit support. For good
performance, drivers need frag_list support as well. This avoids the
need for copying data of aggregated frames. Running without it is only
supported for debugging purposes.

To avoid performance and packet size issues, the rate control module or
driver needs to limit the maximum A-MSDU size by setting
max_rc_amsdu_len in struct ieee80211_sta.

Signed-off-by: Felix Fietkau <[email protected]>
---
include/linux/ieee80211.h | 3 +
include/net/mac80211.h | 19 ++++++
net/mac80211/agg-tx.c | 5 ++
net/mac80211/debugfs.c | 2 +
net/mac80211/ieee80211_i.h | 1 +
net/mac80211/sta_info.c | 2 +
net/mac80211/tx.c | 156 +++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 188 insertions(+)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 3b1f6ce..7ac7594 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -163,6 +163,9 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2)
/* 30 byte 4 addr hdr, 2 byte QoS, 2304 byte MSDU, 12 byte crypt, 4 byte FCS */
#define IEEE80211_MAX_FRAME_LEN 2352

+/* Maximal size of an A-MSDU that can be transported in a HT BA session */
+#define IEEE80211_MAX_MPDU_LEN_HT_BA 4095
+
/* Maximal size of an A-MSDU */
#define IEEE80211_MAX_MPDU_LEN_HT_3839 3839
#define IEEE80211_MAX_MPDU_LEN_HT_7935 7935
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a53333c..c3cfdf5 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -709,6 +709,7 @@ enum mac80211_tx_info_flags {
* @IEEE80211_TX_CTRL_PS_RESPONSE: This frame is a response to a poll
* frame (PS-Poll or uAPSD).
* @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information
+ * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame
*
* These flags are used in tx_info->control.flags.
*/
@@ -716,6 +717,7 @@ enum mac80211_tx_control_flags {
IEEE80211_TX_CTRL_PORT_CTRL_PROTO = BIT(0),
IEEE80211_TX_CTRL_PS_RESPONSE = BIT(1),
IEEE80211_TX_CTRL_RATE_INJECT = BIT(2),
+ IEEE80211_TX_CTRL_AMSDU = BIT(3),
};

/*
@@ -1741,6 +1743,7 @@ struct ieee80211_sta_rates {
* size is min(max_amsdu_len, 7935) bytes.
* Both additional HT limits must be enforced by the low level driver.
* This is defined by the spec (IEEE 802.11-2012 section 8.3.2.2 NOTE 2).
+ * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control.
* @txq: per-TID data TX queues (if driver uses the TXQ abstraction)
*/
struct ieee80211_sta {
@@ -1761,6 +1764,7 @@ struct ieee80211_sta {
bool mfp;
u8 max_amsdu_subframes;
u16 max_amsdu_len;
+ u16 max_rc_amsdu_len;

struct ieee80211_txq *txq[IEEE80211_NUM_TIDS];

@@ -1974,6 +1978,15 @@ struct ieee80211_txq {
* order and does not need to manage its own reorder buffer or BA session
* timeout.
*
+ * @IEEE80211_HW_TX_AMSDU: Hardware (or driver) supports software aggregated
+ * A-MSDU frames. Requires software tx queueing and fast-xmit support.
+ * When not using minstrel/minstrel_ht rate control, the driver should
+ * limit the maximum A-MSDU size based on the current tx rate by setting
+ * max_rc_amsdu_len in struct ieee80211_sta.
+ *
+ * @IEEE80211_HW_TX_FRAG_LIST: Hardware (or driver) supports sending frag_list
+ * skbs, needed for zero-copy software A-MSDU.
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
@@ -2011,6 +2024,8 @@ enum ieee80211_hw_flags {
IEEE80211_HW_BEACON_TX_STATUS,
IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR,
IEEE80211_HW_SUPPORTS_REORDERING_BUFFER,
+ IEEE80211_HW_TX_AMSDU,
+ IEEE80211_HW_TX_FRAG_LIST,

/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
@@ -2083,6 +2098,9 @@ enum ieee80211_hw_flags {
* size is smaller (an example is LinkSys WRT120N with FW v1.0.07
* build 002 Jun 18 2012).
*
+ * @max_tx_fragments: maximum number of tx buffers per (A)-MSDU, sum
+ * of 1 + skb_shinfo(skb)->nr_frags for each skb in the frag_list.
+ *
* @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX
* (if %IEEE80211_HW_QUEUE_CONTROL is set)
*
@@ -2137,6 +2155,7 @@ struct ieee80211_hw {
u8 max_rate_tries;
u8 max_rx_aggregation_subframes;
u8 max_tx_aggregation_subframes;
+ u8 max_tx_fragments;
u8 offchannel_tx_hw_queue;
u8 radiotap_mcs_details;
u16 radiotap_vht_details;
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 4932e9f..42fa810 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -935,6 +935,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
size_t len)
{
struct tid_ampdu_tx *tid_tx;
+ struct ieee80211_txq *txq;
u16 capab, tid;
u8 buf_size;
bool amsdu;
@@ -945,6 +946,10 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes);

+ txq = sta->sta.txq[tid];
+ if (!amsdu && txq)
+ set_bit(IEEE80211_TXQ_NO_AMSDU, &to_txq_info(txq)->flags);
+
mutex_lock(&sta->ampdu_mlme.mtx);

tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 4ab5c52..3a32501 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -127,6 +127,8 @@ static const char *hw_flag_names[] = {
FLAG(BEACON_TX_STATUS),
FLAG(NEEDS_UNIQUE_STA_ADDR),
FLAG(SUPPORTS_REORDERING_BUFFER),
+ FLAG(TX_AMSDU),
+ FLAG(TX_FRAG_LIST),
#undef FLAG
};

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c6830fb..f1104d5 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -802,6 +802,7 @@ struct mac80211_qos_map {
enum txq_info_flags {
IEEE80211_TXQ_STOP,
IEEE80211_TXQ_AMPDU,
+ IEEE80211_TXQ_NO_AMSDU,
};

struct txq_info {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 00c82fb..e63fe70 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -406,6 +406,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
}
}

+ sta->sta.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA;
+
sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr);

return sta;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 485e30a..6b4e2ce 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1325,6 +1325,10 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
out:
spin_unlock_bh(&txqi->queue.lock);

+ if (skb && skb_has_frag_list(skb) &&
+ !ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
+ skb_linearize(skb);
+
return skb;
}
EXPORT_SYMBOL(ieee80211_tx_dequeue);
@@ -2803,6 +2807,154 @@ void ieee80211_clear_fast_xmit(struct sta_info *sta)
kfree_rcu(fast_tx, rcu_head);
}

+static bool ieee80211_amsdu_realloc_pad(struct ieee80211_local *local,
+ struct sk_buff *skb, int headroom,
+ int *subframe_len)
+{
+ int amsdu_len = *subframe_len + sizeof(struct ethhdr);
+ int padding = (4 - amsdu_len) & 3;
+
+ if (skb_headroom(skb) < headroom || skb_tailroom(skb) < padding) {
+ I802_DEBUG_INC(local->tx_expand_skb_head);
+
+ if (pskb_expand_head(skb, headroom, padding, GFP_ATOMIC)) {
+ wiphy_debug(local->hw.wiphy,
+ "failed to reallocate TX buffer\n");
+ return false;
+ }
+ }
+
+ if (padding) {
+ *subframe_len += padding;
+ memset(skb_put(skb, padding), 0, padding);
+ }
+
+ return true;
+}
+
+static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_fast_tx *fast_tx,
+ struct sk_buff *skb)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_hdr *hdr;
+ struct ethhdr amsdu_hdr;
+ int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header);
+ int subframe_len = skb->len - hdr_len;
+ void *data;
+ u8 *qc;
+
+ if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
+ return false;
+
+ if (info->control.flags & IEEE80211_TX_CTRL_AMSDU)
+ return true;
+
+ if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(amsdu_hdr),
+ &subframe_len))
+ return false;
+
+ amsdu_hdr.h_proto = cpu_to_be16(subframe_len);
+ memcpy(amsdu_hdr.h_source, skb->data + fast_tx->sa_offs, ETH_ALEN);
+ memcpy(amsdu_hdr.h_dest, skb->data + fast_tx->da_offs, ETH_ALEN);
+
+ data = skb_push(skb, sizeof(amsdu_hdr));
+ memmove(data, data + sizeof(amsdu_hdr), hdr_len);
+ memcpy(data + hdr_len, &amsdu_hdr, sizeof(amsdu_hdr));
+
+ hdr = data;
+ qc = ieee80211_get_qos_ctl(hdr);
+ *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT;
+
+ info->control.flags |= IEEE80211_TX_CTRL_AMSDU;
+
+ return true;
+}
+
+static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct ieee80211_fast_tx *fast_tx,
+ struct sk_buff *skb)
+{
+ struct ieee80211_local *local = sdata->local;
+ u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
+ struct ieee80211_txq *txq = sta->sta.txq[tid];
+ struct txq_info *txqi;
+ struct sk_buff **frag_tail, *head;
+ int subframe_len = skb->len - ETH_ALEN;
+ u8 max_subframes = sta->sta.max_amsdu_subframes;
+ int max_frags = local->hw.max_tx_fragments;
+ int max_amsdu_len = sta->sta.max_amsdu_len;
+ __be16 len;
+ void *data;
+ bool ret = false;
+ int n = 1, nfrags;
+
+ if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
+ return false;
+
+ if (!txq)
+ return false;
+
+ txqi = to_txq_info(txq);
+ if (test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags))
+ return false;
+
+ if (sta->sta.max_rc_amsdu_len)
+ max_amsdu_len = min_t(int, max_amsdu_len,
+ sta->sta.max_rc_amsdu_len);
+
+ spin_lock_bh(&txqi->queue.lock);
+
+ head = skb_peek_tail(&txqi->queue);
+ if (!head)
+ goto out;
+
+ if (skb->len + head->len > max_amsdu_len)
+ goto out;
+
+ if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
+ goto out;
+
+ nfrags = 1 + skb_shinfo(skb)->nr_frags;
+ nfrags += 1 + skb_shinfo(head)->nr_frags;
+ frag_tail = &skb_shinfo(head)->frag_list;
+ while (*frag_tail) {
+ nfrags += 1 + skb_shinfo(*frag_tail)->nr_frags;
+ frag_tail = &(*frag_tail)->next;
+ n++;
+ }
+
+ if (max_subframes && n > max_subframes)
+ goto out;
+
+ if (max_frags && nfrags > max_frags)
+ goto out;
+
+ if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + 2,
+ &subframe_len))
+ return false;
+
+ ret = true;
+ data = skb_push(skb, ETH_ALEN + 2);
+ memmove(data, data + ETH_ALEN + 2, 2 * ETH_ALEN);
+
+ data += 2 * ETH_ALEN;
+ len = cpu_to_be16(subframe_len);
+ memcpy(data, &len, 2);
+ memcpy(data + 2, rfc1042_header, sizeof(rfc1042_header));
+
+ head->len += skb->len;
+ head->data_len += skb->len;
+ *frag_tail = skb;
+
+out:
+ spin_unlock_bh(&txqi->queue.lock);
+
+ return ret;
+}
+
static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
struct net_device *dev, struct sta_info *sta,
struct ieee80211_fast_tx *fast_tx,
@@ -2857,6 +3009,10 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,

ieee80211_tx_stats(dev, skb->len + extra_head);

+ if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) &&
+ ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb))
+ return true;
+
/* will not be crypto-handled beyond what we do here, so use false
* as the may-encrypt argument for the resize to not account for
* more room than we already have in 'extra_head'
--
2.2.2



2016-03-03 21:59:07

by Felix Fietkau

[permalink] [raw]
Subject: [PATCH v3 2/2] mac80211: minstrel_ht: set A-MSDU tx limits based on selected max_prob_rate

Prevents excessive A-MSDU aggregation at low data rates or bad
conditions.

Signed-off-by: Felix Fietkau <[email protected]>
---
net/mac80211/rc80211_minstrel_ht.c | 54 ++++++++++++++++++++++++++++++++++++++
1 file changed, 54 insertions(+)

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 3928dbd..a9d5b28 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -882,6 +882,59 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
ratetbl->rate[offset].flags = flags;
}

+static inline int
+minstrel_ht_get_prob_ewma(struct minstrel_ht_sta *mi, int rate)
+{
+ int group = rate / MCS_GROUP_RATES;
+ rate %= MCS_GROUP_RATES;
+ return mi->groups[group].rates[rate].prob_ewma;
+}
+
+static int
+minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi)
+{
+ int group = mi->max_prob_rate / MCS_GROUP_RATES;
+ const struct mcs_group *g = &minstrel_mcs_groups[group];
+ int rate = mi->max_prob_rate % MCS_GROUP_RATES;
+
+ /* Disable A-MSDU if max_prob_rate is bad */
+ if (mi->groups[group].rates[rate].prob_ewma < MINSTREL_FRAC(50, 100))
+ return 1;
+
+ /* If the rate is slower than single-stream MCS1, make A-MSDU limit small */
+ if (g->duration[rate] > MCS_DURATION(1, 0, 52))
+ return 500;
+
+ /*
+ * If the rate is slower than single-stream MCS4, limit A-MSDU to usual
+ * data packet size
+ */
+ if (g->duration[rate] > MCS_DURATION(1, 0, 104))
+ return 1600;
+
+ /*
+ * If the rate is slower than single-stream MCS7, or if the max throughput
+ * rate success probability is less than 75%, limit A-MSDU to twice the usual
+ * data packet size
+ */
+ if (g->duration[rate] > MCS_DURATION(1, 0, 260) ||
+ (minstrel_ht_get_prob_ewma(mi, mi->max_tp_rate[0]) <
+ MINSTREL_FRAC(75, 100)))
+ return 3200;
+
+ /*
+ * HT A-MPDU limits maximum MPDU size under BA agreement to 4095 bytes.
+ * Since aggregation sessions are started/stopped without txq flush, use
+ * the limit here to avoid the complexity of having to de-aggregate
+ * packets in the queue.
+ */
+ if (!mi->sta->vht_cap.vht_supported)
+ return IEEE80211_MAX_MPDU_LEN_HT_BA;
+
+ /* unlimited */
+ return 0;
+}
+
static void
minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
{
@@ -906,6 +959,7 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate);
}

+ mi->sta->max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi);
rates->rate[i].idx = -1;
rate_control_set_rates(mp->hw, mi->sta, rates);
}
--
2.2.2


2016-04-05 12:36:04

by Johannes Berg

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] mac80211: minstrel_ht: set A-MSDU tx limits based on selected max_prob_rate

Applied both now, I've had to fix a locking issue in the first patch.

I also worded the requirement for drivers more strongly - drivers
really *must* update the max length if they set the flag and don't use
minstrel.

johannes