From: Po-Hao Huang <[email protected]>
Allocate a per-skb completion to track those skbs we are interested in
and wait for them to complete transmission with TX status. To avoid
race condition between process and softirq without addtional locking,
we use a work to free the tx_wait struct later when waiter is finished
referencing it. This must be called in process context and with a
timeout value greater than zero since it might sleep.
We use another workqueue so works can be processed concurrently and
when the PCI device is removed unexpectedly, all pending works can be
flushed. This prevents some works that were scheduled but never processed
leads to memory leak.
Signed-off-by: Po-Hao Huang <[email protected]>
Signed-off-by: Ping-Ke Shih <[email protected]>
---
drivers/net/wireless/realtek/rtw89/core.c | 53 +++++++++++++++++++++++
drivers/net/wireless/realtek/rtw89/core.h | 33 ++++++++++++++
drivers/net/wireless/realtek/rtw89/pci.c | 6 +++
drivers/net/wireless/realtek/rtw89/pci.h | 4 +-
4 files changed, 94 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c
index 43080d50db171..369f7fb4e0779 100644
--- a/drivers/net/wireless/realtek/rtw89/core.c
+++ b/drivers/net/wireless/realtek/rtw89/core.c
@@ -849,6 +849,55 @@ void rtw89_core_tx_kick_off(struct rtw89_dev *rtwdev, u8 qsel)
rtw89_hci_tx_kick_off(rtwdev, ch_dma);
}
+static void rtw89_core_free_tx_wait_work(struct work_struct *work)
+{
+ struct rtw89_tx_wait_info *wait =
+ container_of(work, struct rtw89_tx_wait_info, work);
+ struct rtw89_dev *rtwdev = wait->rtwdev;
+ int done, ret;
+
+ ret = read_poll_timeout(atomic_read, done, done, 1000, 100000, false,
+ &wait->wait_done);
+
+ if (ret)
+ rtw89_err(rtwdev, "tx wait timed out, stop polling\n");
+ else
+ kfree(wait);
+}
+
+int rtw89_core_tx_kick_off_and_wait(struct rtw89_dev *rtwdev, struct sk_buff *skb,
+ int qsel, unsigned int timeout)
+{
+ struct rtw89_tx_skb_data *skb_data = RTW89_TX_SKB_CB(skb);
+ struct rtw89_tx_wait_info *wait;
+ unsigned long time_left;
+ int ret = 0;
+
+ skb_data->wait = kzalloc(sizeof(*wait), GFP_KERNEL);
+ wait = skb_data->wait;
+ if (!wait) {
+ rtw89_warn(rtwdev, "alloc tx wait info failed\n");
+ rtw89_core_tx_kick_off(rtwdev, qsel);
+ return 0;
+ }
+
+ init_completion(&wait->completion);
+ INIT_WORK(&wait->work, rtw89_core_free_tx_wait_work);
+ wait->rtwdev = rtwdev;
+
+ rtw89_core_tx_kick_off(rtwdev, qsel);
+ time_left = wait_for_completion_timeout(&wait->completion,
+ msecs_to_jiffies(timeout));
+ if (time_left == 0)
+ ret = -ETIMEDOUT;
+ else if (!wait->tx_done)
+ ret = -EAGAIN;
+
+ atomic_inc(&wait->wait_done);
+
+ return ret;
+}
+
int rtw89_h2c_tx(struct rtw89_dev *rtwdev,
struct sk_buff *skb, bool fwdl)
{
@@ -3193,6 +3242,9 @@ int rtw89_core_init(struct rtw89_dev *rtwdev)
rtwdev->txq_wq = alloc_workqueue("rtw89_tx_wq", WQ_UNBOUND | WQ_HIGHPRI, 0);
if (!rtwdev->txq_wq)
return -ENOMEM;
+ rtwdev->gc_wq = alloc_workqueue("rtw89_gc_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+ if (!rtwdev->gc_wq)
+ return -ENOMEM;
spin_lock_init(&rtwdev->ba_lock);
spin_lock_init(&rtwdev->rpwm_lock);
mutex_init(&rtwdev->mutex);
@@ -3234,6 +3286,7 @@ void rtw89_core_deinit(struct rtw89_dev *rtwdev)
rtw89_fw_free_all_early_h2c(rtwdev);
destroy_workqueue(rtwdev->txq_wq);
+ destroy_workqueue(rtwdev->gc_wq);
mutex_destroy(&rtwdev->rf_mutex);
mutex_destroy(&rtwdev->mutex);
}
diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h
index aefc95f83d277..dd4165394c8de 100644
--- a/drivers/net/wireless/realtek/rtw89/core.h
+++ b/drivers/net/wireless/realtek/rtw89/core.h
@@ -2421,6 +2421,19 @@ struct rtw89_phy_rate_pattern {
bool enable;
};
+struct rtw89_tx_wait_info {
+ struct rtw89_dev *rtwdev;
+ struct completion completion;
+ struct work_struct work;
+ atomic_t wait_done;
+ bool tx_done;
+};
+
+struct rtw89_tx_skb_data {
+ struct rtw89_tx_wait_info *wait;
+ u8 hci_priv[];
+};
+
#define RTW89_P2P_MAX_NOA_NUM 2
struct rtw89_vif {
@@ -3758,6 +3771,7 @@ struct rtw89_dev {
/* used to protect rf read write */
struct mutex rf_mutex;
struct workqueue_struct *txq_wq;
+ struct workqueue_struct *gc_wq;
struct work_struct txq_work;
struct delayed_work txq_reinvoke_work;
/* used to protect ba_list and forbid_ba_list */
@@ -3969,6 +3983,14 @@ static inline void rtw89_hci_clear(struct rtw89_dev *rtwdev, struct pci_dev *pde
rtwdev->hci.ops->clear(rtwdev, pdev);
}
+static inline
+struct rtw89_tx_skb_data *RTW89_TX_SKB_CB(struct sk_buff *skb)
+{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
+ return (struct rtw89_tx_skb_data *)info->status.status_driver_data;
+}
+
static inline u8 rtw89_read8(struct rtw89_dev *rtwdev, u32 addr)
{
return rtwdev->hci.ops->read8(rtwdev, addr);
@@ -4612,11 +4634,22 @@ static inline struct sk_buff *rtw89_alloc_skb_for_rx(struct rtw89_dev *rtwdev,
return dev_alloc_skb(length);
}
+static inline void rtw89_core_tx_wait_complete(struct rtw89_dev *rtwdev,
+ struct rtw89_tx_wait_info *wait,
+ bool tx_done)
+{
+ wait->tx_done = tx_done;
+ complete(&wait->completion);
+ queue_work(rtwdev->gc_wq, &wait->work);
+}
+
int rtw89_core_tx_write(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif,
struct ieee80211_sta *sta, struct sk_buff *skb, int *qsel);
int rtw89_h2c_tx(struct rtw89_dev *rtwdev,
struct sk_buff *skb, bool fwdl);
void rtw89_core_tx_kick_off(struct rtw89_dev *rtwdev, u8 qsel);
+int rtw89_core_tx_kick_off_and_wait(struct rtw89_dev *rtwdev, struct sk_buff *skb,
+ int qsel, unsigned int timeout);
void rtw89_core_fill_txdesc(struct rtw89_dev *rtwdev,
struct rtw89_tx_desc_info *desc_info,
void *txdesc);
diff --git a/drivers/net/wireless/realtek/rtw89/pci.c b/drivers/net/wireless/realtek/rtw89/pci.c
index ec8bb5f10482e..ca6b63944ee99 100644
--- a/drivers/net/wireless/realtek/rtw89/pci.c
+++ b/drivers/net/wireless/realtek/rtw89/pci.c
@@ -364,6 +364,8 @@ static void rtw89_pci_tx_status(struct rtw89_dev *rtwdev,
struct rtw89_pci_tx_ring *tx_ring,
struct sk_buff *skb, u8 tx_status)
{
+ struct rtw89_tx_skb_data *skb_data = RTW89_TX_SKB_CB(skb);
+ struct rtw89_tx_wait_info *wait = skb_data->wait;
struct ieee80211_tx_info *info;
info = IEEE80211_SKB_CB(skb);
@@ -394,6 +396,8 @@ static void rtw89_pci_tx_status(struct rtw89_dev *rtwdev,
}
}
+ if (wait)
+ rtw89_core_tx_wait_complete(rtwdev, wait, tx_status == RTW89_TX_DONE);
ieee80211_tx_status_ni(rtwdev->hw, skb);
}
@@ -1203,6 +1207,7 @@ static int rtw89_pci_txwd_submit(struct rtw89_dev *rtwdev,
struct pci_dev *pdev = rtwpci->pdev;
struct sk_buff *skb = tx_req->skb;
struct rtw89_pci_tx_data *tx_data = RTW89_PCI_TX_SKB_CB(skb);
+ struct rtw89_tx_skb_data *skb_data = RTW89_TX_SKB_CB(skb);
bool en_wd_info = desc_info->en_wd_info;
u32 txwd_len;
u32 txwp_len;
@@ -1218,6 +1223,7 @@ static int rtw89_pci_txwd_submit(struct rtw89_dev *rtwdev,
}
tx_data->dma = dma;
+ skb_data->wait = NULL;
txwp_len = sizeof(*txwp_info);
txwd_len = chip->txwd_body_size;
diff --git a/drivers/net/wireless/realtek/rtw89/pci.h b/drivers/net/wireless/realtek/rtw89/pci.h
index 1e19740db8c54..0e4bd210b100f 100644
--- a/drivers/net/wireless/realtek/rtw89/pci.h
+++ b/drivers/net/wireless/realtek/rtw89/pci.h
@@ -1004,9 +1004,9 @@ rtw89_pci_rxbd_increase(struct rtw89_pci_rx_ring *rx_ring, u32 cnt)
static inline struct rtw89_pci_tx_data *RTW89_PCI_TX_SKB_CB(struct sk_buff *skb)
{
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct rtw89_tx_skb_data *data = RTW89_TX_SKB_CB(skb);
- return (struct rtw89_pci_tx_data *)info->status.status_driver_data;
+ return (struct rtw89_pci_tx_data *)data->hci_priv;
}
static inline struct rtw89_pci_tx_bd_32 *
--
2.25.1
Ping-Ke Shih <[email protected]> writes:
> From: Po-Hao Huang <[email protected]>
>
> Allocate a per-skb completion to track those skbs we are interested in
> and wait for them to complete transmission with TX status. To avoid
> race condition between process and softirq without addtional locking,
> we use a work to free the tx_wait struct later when waiter is finished
> referencing it. This must be called in process context and with a
> timeout value greater than zero since it might sleep.
>
> We use another workqueue so works can be processed concurrently and
> when the PCI device is removed unexpectedly, all pending works can be
> flushed. This prevents some works that were scheduled but never processed
> leads to memory leak.
>
> Signed-off-by: Po-Hao Huang <[email protected]>
> Signed-off-by: Ping-Ke Shih <[email protected]>
[...]
> +static void rtw89_core_free_tx_wait_work(struct work_struct *work)
> +{
> + struct rtw89_tx_wait_info *wait =
> + container_of(work, struct rtw89_tx_wait_info, work);
> + struct rtw89_dev *rtwdev = wait->rtwdev;
> + int done, ret;
> +
> + ret = read_poll_timeout(atomic_read, done, done, 1000, 100000, false,
> + &wait->wait_done);
> +
> + if (ret)
> + rtw89_err(rtwdev, "tx wait timed out, stop polling\n");
> + else
> + kfree(wait);
> +}
I admit I didn't try to understand this patch in detail but this
function just looks odd to me. Why there's polling able to free
something?
--
https://patchwork.kernel.org/project/linux-wireless/list/
https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches
> -----Original Message-----
> From: Kalle Valo <[email protected]>
> Sent: Wednesday, March 15, 2023 4:40 PM
> To: Ping-Ke Shih <[email protected]>
> Cc: Bernie Huang <[email protected]>; [email protected]
> Subject: Re: [PATCH 2/5] wifi: rtw89: add function to wait for completion of TX skbs
>
> Ping-Ke Shih <[email protected]> writes:
>
> > From: Po-Hao Huang <[email protected]>
> >
> > Allocate a per-skb completion to track those skbs we are interested in
> > and wait for them to complete transmission with TX status. To avoid
> > race condition between process and softirq without addtional locking,
> > we use a work to free the tx_wait struct later when waiter is finished
> > referencing it. This must be called in process context and with a
> > timeout value greater than zero since it might sleep.
> >
> > We use another workqueue so works can be processed concurrently and
> > when the PCI device is removed unexpectedly, all pending works can be
> > flushed. This prevents some works that were scheduled but never processed
> > leads to memory leak.
> >
> > Signed-off-by: Po-Hao Huang <[email protected]>
> > Signed-off-by: Ping-Ke Shih <[email protected]>
>
> [...]
>
> > +static void rtw89_core_free_tx_wait_work(struct work_struct *work)
> > +{
> > + struct rtw89_tx_wait_info *wait =
> > + container_of(work, struct rtw89_tx_wait_info, work);
> > + struct rtw89_dev *rtwdev = wait->rtwdev;
> > + int done, ret;
> > +
> > + ret = read_poll_timeout(atomic_read, done, done, 1000, 100000, false,
> > + &wait->wait_done);
> > +
> > + if (ret)
> > + rtw89_err(rtwdev, "tx wait timed out, stop polling\n");
> > + else
> > + kfree(wait);
> > +}
>
> I admit I didn't try to understand this patch in detail but this
> function just looks odd to me. Why there's polling able to free
> something?
>
Three works are involved in the "wait/completion".
work 1. remain-on-channel work
It trigger TX null data and wait (kmalloc 'wait' object, and wait for completion)
work 2. TX completion by napi_poll
It returns TX status (failed or succeed to TX), and complete the 'wait' object,
and queue rtw89_core_free_tx_wait_work() to free 'wait' object.
We queue this by work 2, because time of work 1 is predictable, but
it is hard to estimate time of work 2. The read_poll_timeout() is for
the work 1 predictable time.
work 3. This work is to do garbage collection of 'wait' object
It polls if work 1 is done before doing free 'wait' object.
Things are complex because work 1 and 2 are done asynchronously, so one
of them can't free 'wait' object, or it will causes use-after-free in other
side.
Use case 1: (work 1 could free 'wait' object)
work 1 work 2 work 3
wait
completion
wait ok
free 'wait'
Use case 2: (work 2 could free 'wait' object)
work 1 work 2 work 3
wait
wait timeout
completion
free 'wait'
I can add a comment as a hint why we can use a read_poll_timeout() to assist
in freeing something.
Ping-Ke
Ping-Ke Shih <[email protected]> writes:
>> > +static void rtw89_core_free_tx_wait_work(struct work_struct *work)
>> > +{
>> > + struct rtw89_tx_wait_info *wait =
>> > + container_of(work, struct rtw89_tx_wait_info, work);
>> > + struct rtw89_dev *rtwdev = wait->rtwdev;
>> > + int done, ret;
>> > +
>> > + ret = read_poll_timeout(atomic_read, done, done, 1000, 100000, false,
>> > + &wait->wait_done);
>> > +
>> > + if (ret)
>> > + rtw89_err(rtwdev, "tx wait timed out, stop polling\n");
>> > + else
>> > + kfree(wait);
>> > +}
>>
>> I admit I didn't try to understand this patch in detail but this
>> function just looks odd to me. Why there's polling able to free
>> something?
>>
>
> Three works are involved in the "wait/completion".
>
> work 1. remain-on-channel work
> It trigger TX null data and wait (kmalloc 'wait' object, and wait for completion)
>
> work 2. TX completion by napi_poll
> It returns TX status (failed or succeed to TX), and complete the 'wait' object,
> and queue rtw89_core_free_tx_wait_work() to free 'wait' object.
>
> We queue this by work 2, because time of work 1 is predictable, but
> it is hard to estimate time of work 2. The read_poll_timeout() is for
> the work 1 predictable time.
>
> work 3. This work is to do garbage collection of 'wait' object
> It polls if work 1 is done before doing free 'wait' object.
>
>
> Things are complex because work 1 and 2 are done asynchronously, so one
> of them can't free 'wait' object, or it will causes use-after-free in other
> side.
>
> Use case 1: (work 1 could free 'wait' object)
> work 1 work 2 work 3
> wait
> completion
> wait ok
> free 'wait'
>
> Use case 2: (work 2 could free 'wait' object)
> work 1 work 2 work 3
> wait
> wait timeout
> completion
> free 'wait'
>
>
> I can add a comment as a hint why we can use a read_poll_timeout() to assist
> in freeing something.
I would expect that there's polling if you are waiting something from
hardware, or maybe when implementing a spin lock, but not when waiting
for another kernel thread. This just doesn't feel right but I don't have
time to propose a good alternative either, sorry.
--
https://patchwork.kernel.org/project/linux-wireless/list/
https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches
On Mon, 2023-04-03 at 13:32 +0300, Kalle Valo wrote:
>
> I would expect that there's polling if you are waiting something from
> hardware, or maybe when implementing a spin lock, but not when waiting
> for another kernel thread. This just doesn't feel right but I don't have
> time to propose a good alternative either, sorry.
>
I have found a solution that uses an owner variable with a spin lock
to determine which side to free completion object. Simply show two use
cases below:
Use case 1: (normal case; free completion object by work 1)
work 1 work 2
wait
(spin_lock)
complete
check & set owner --> owner = work1
(spin_unlock)
wait ok
(spin_lock)
check & check owner --> free by work 1
(spin_unlock)
free completion
Use case 2: (timeout case; free completion object by work 2)
work 1 work 2
wait
wait timeout
(spin_lock)
check & set owner --> owner = work 2
(spin_unlock)
(spin_lock)
completion
check & set owner --> free by work 2
(spin_unlock)
free completion
I will apply this by v5.
Ping-Ke
On Tue, 2023-04-04 at 10:37 +0800, Ping-Ke Shih wrote:
> On Mon, 2023-04-03 at 13:32 +0300, Kalle Valo wrote:
> > I would expect that there's polling if you are waiting something from
> > hardware, or maybe when implementing a spin lock, but not when waiting
> > for another kernel thread. This just doesn't feel right but I don't have
> > time to propose a good alternative either, sorry.
> >
>
> I have found a solution that uses an owner variable with a spin lock
> to determine which side to free completion object. Simply show two use
> cases below:
>
> Use case 1: (normal case; free completion object by work 1)
> work 1 work 2
> wait
> (spin_lock)
> complete
> check & set owner --> owner = work1
> (spin_unlock)
> wait ok
> (spin_lock)
> check & check owner --> free by work 1
> (spin_unlock)
> free completion
>
>
> Use case 2: (timeout case; free completion object by work 2)
> work 1 work 2
> wait
> wait timeout
> (spin_lock)
> check & set owner --> owner = work 2
> (spin_unlock)
> (spin_lock)
> completion
> check & set owner --> free by work 2
> (spin_unlock)
> free completion
>
> I will apply this by v5.
>
We have a better idea that use kfree_rcu() to free completion, so no
need spin_lock(). Then, the use cases become below, and I have sent
this change by v6.
Use case 1: (normal case)
work 1 work 2
(rcu_assign_pointer(wait))
wait
(rcu_read_lock)
wait = rcu_dereference();
if (wait) --> wait != NULL
complete
(rcu_read_unlock)
wait ok
(rcu_assign_pointer(NULL))
kfree_rcu(completion)
Use case 2: (timeout case)
work 1 work 2
(rcu_assign_pointer(wait))
wait
wait timeout
(rcu_assign_pointer(NULL))
kfree_rcu(completion)
(rcu_read_lock)
wait = rcu_dereference();
if (wait) --> wait == NULL
complete
(rcu_read_unlock)
Ping-Ke
Ping-Ke Shih <[email protected]> writes:
> On Tue, 2023-04-04 at 10:37 +0800, Ping-Ke Shih wrote:
>
>> On Mon, 2023-04-03 at 13:32 +0300, Kalle Valo wrote:
>> > I would expect that there's polling if you are waiting something from
>> > hardware, or maybe when implementing a spin lock, but not when waiting
>> > for another kernel thread. This just doesn't feel right but I don't have
>> > time to propose a good alternative either, sorry.
>> >
>>
>> I have found a solution that uses an owner variable with a spin lock
>> to determine which side to free completion object. Simply show two use
>> cases below:
>>
>> Use case 1: (normal case; free completion object by work 1)
>> work 1 work 2
>> wait
>> (spin_lock)
>> complete
>> check & set owner --> owner = work1
>> (spin_unlock)
>> wait ok
>> (spin_lock)
>> check & check owner --> free by work 1
>> (spin_unlock)
>> free completion
>>
>>
>> Use case 2: (timeout case; free completion object by work 2)
>> work 1 work 2
>> wait
>> wait timeout
>> (spin_lock)
>> check & set owner --> owner = work 2
>> (spin_unlock)
>> (spin_lock)
>> completion
>> check & set owner --> free by work 2
>> (spin_unlock)
>> free completion
>>
>> I will apply this by v5.
>>
>
> We have a better idea that use kfree_rcu() to free completion, so no
> need spin_lock(). Then, the use cases become below, and I have sent
> this change by v6.
>
> Use case 1: (normal case)
> work 1 work 2
> (rcu_assign_pointer(wait))
> wait
> (rcu_read_lock)
> wait = rcu_dereference();
> if (wait) --> wait != NULL
> complete
> (rcu_read_unlock)
> wait ok
> (rcu_assign_pointer(NULL))
> kfree_rcu(completion)
>
>
> Use case 2: (timeout case)
> work 1 work 2
> (rcu_assign_pointer(wait))
> wait
> wait timeout
> (rcu_assign_pointer(NULL))
> kfree_rcu(completion)
> (rcu_read_lock)
> wait = rcu_dereference();
> if (wait) --> wait == NULL
> complete
> (rcu_read_unlock)
Thanks, I haven't had a chance to look at v6 yet (nor v5 for that
matter) but at least based on this mail your idea looks much better than
polling.
--
https://patchwork.kernel.org/project/linux-wireless/list/
https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches