2024-01-12 19:51:36

by Bitterblue Smith

[permalink] [raw]
Subject: [PATCH] wifi: rtlwifi: Speed up firmware loading for USB

Currently it takes almost 6 seconds to upload the firmware for RTL8192CU
(and 11 seconds for RTL8192DU). That's because the firmware is uploaded
one byte at a time.

Also, after plugging the device, the firmware gets uploaded three times
before a connection to the AP is established.

Maybe this is fine for most users, but when testing changes to the
driver it's really annoying to wait so long.

Speed up the firmware upload by writing chunks of 64 bytes at a time.
This way it takes about 110 ms for RTL8192CU (and about 210 ms for
RTL8192DU).

PCI devices could upload it in chunks of 4 bytes, but I don't have any
to test and commit 89d32c9071aa ("rtlwifi: Download firmware as bytes
rather than as dwords") decided otherwise anyway.

Signed-off-by: Bitterblue Smith <[email protected]>
---
drivers/net/wireless/realtek/rtlwifi/efuse.c | 65 +++++++++++++++++--
drivers/net/wireless/realtek/rtlwifi/efuse.h | 4 +-
.../wireless/realtek/rtlwifi/rtl8192cu/sw.c | 6 +-
drivers/net/wireless/realtek/rtlwifi/usb.c | 9 +++
drivers/net/wireless/realtek/rtlwifi/wifi.h | 8 +++
5 files changed, 82 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.c b/drivers/net/wireless/realtek/rtlwifi/efuse.c
index 2e945554ed6d..870a276299f5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/efuse.c
+++ b/drivers/net/wireless/realtek/rtlwifi/efuse.c
@@ -1287,18 +1287,73 @@ int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv,
}
EXPORT_SYMBOL_GPL(rtl_get_hwinfo);

-void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size)
+static void _rtl_fw_block_write_usb(struct ieee80211_hw *hw, u8 *buffer, u32 size)
+{
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
+ u32 blockcount, blockcount8, blockcount4;
+ u32 remain8 = 0, remain4 = 0, remain = 0;
+ const u32 blocksize = 64;
+ const u32 blocksize8 = 8;
+ const u32 blocksize4 = 4;
+ u32 i, offset;
+
+ blockcount = size / blocksize;
+ remain8 = size % blocksize;
+ for (i = 0; i < blockcount; i++) {
+ offset = i * blocksize;
+ rtl_write_chunk(rtlpriv,
+ START_ADDRESS + offset,
+ blocksize, buffer + offset);
+ }
+
+ if (remain8) {
+ offset = blockcount * blocksize;
+ blockcount8 = remain8 / blocksize8;
+ remain4 = remain8 % blocksize8;
+
+ for (i = 0; i < blockcount8; i++)
+ rtl_write_chunk(rtlpriv,
+ START_ADDRESS + offset + i * blocksize8,
+ blocksize8,
+ buffer + offset + i * blocksize8);
+ }
+
+ if (remain4) {
+ offset += blockcount8 * blocksize8;
+ blockcount4 = remain4 / blocksize4;
+ remain = remain8 % blocksize4;
+
+ for (i = 0; i < blockcount4; i++)
+ rtl_write_dword(rtlpriv,
+ START_ADDRESS + offset + i * blocksize4,
+ cpu_to_le32(*(u32 *)(buffer + offset + i)));
+ }
+
+ if (remain) {
+ offset += blockcount4 * blocksize4;
+
+ for (i = 0; i < remain; i++)
+ rtl_write_byte(rtlpriv, START_ADDRESS + offset + i,
+ *(buffer + offset + i));
+ }
+}
+
+void rtl_fw_block_write(struct ieee80211_hw *hw, u8 *buffer, u32 size)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
- u8 *pu4byteptr = (u8 *)buffer;
u32 i;

- for (i = 0; i < size; i++)
- rtl_write_byte(rtlpriv, (START_ADDRESS + i), *(pu4byteptr + i));
+ if (rtlpriv->rtlhal.interface == INTF_PCI) {
+ for (i = 0; i < size; i++)
+ rtl_write_byte(rtlpriv, (START_ADDRESS + i),
+ *(buffer + i));
+ } else if (rtlpriv->rtlhal.interface == INTF_USB) {
+ _rtl_fw_block_write_usb(hw, buffer, size);
+ }
}
EXPORT_SYMBOL_GPL(rtl_fw_block_write);

-void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer,
+void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, u8 *buffer,
u32 size)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.h b/drivers/net/wireless/realtek/rtlwifi/efuse.h
index 1ec59f439382..4821625ad1e5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/efuse.h
+++ b/drivers/net/wireless/realtek/rtlwifi/efuse.h
@@ -91,8 +91,8 @@ void efuse_power_switch(struct ieee80211_hw *hw, u8 write, u8 pwrstate);
int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv,
int max_size, u8 *hwinfo, int *params);
void rtl_fill_dummy(u8 *pfwbuf, u32 *pfwlen);
-void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer,
+void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, u8 *buffer,
u32 size);
-void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size);
+void rtl_fw_block_write(struct ieee80211_hw *hw, u8 *buffer, u32 size);
void rtl_efuse_ops_init(struct ieee80211_hw *hw);
#endif
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
index 20b4aac69642..9f4cf09090d6 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
@@ -40,7 +40,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw)
rtlpriv->dm.thermalvalue = 0;

/* for firmware buf */
- rtlpriv->rtlhal.pfirmware = vzalloc(0x4000);
+ rtlpriv->rtlhal.pfirmware = kmalloc(0x4000, GFP_KERNEL);
if (!rtlpriv->rtlhal.pfirmware) {
pr_err("Can't alloc buffer for fw\n");
return 1;
@@ -61,7 +61,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw)
fw_name, rtlpriv->io.dev,
GFP_KERNEL, hw, rtl_fw_cb);
if (err) {
- vfree(rtlpriv->rtlhal.pfirmware);
+ kfree(rtlpriv->rtlhal.pfirmware);
rtlpriv->rtlhal.pfirmware = NULL;
}
return err;
@@ -72,7 +72,7 @@ static void rtl92cu_deinit_sw_vars(struct ieee80211_hw *hw)
struct rtl_priv *rtlpriv = rtl_priv(hw);

if (rtlpriv->rtlhal.pfirmware) {
- vfree(rtlpriv->rtlhal.pfirmware);
+ kfree(rtlpriv->rtlhal.pfirmware);
rtlpriv->rtlhal.pfirmware = NULL;
}
}
diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c
index 07a7e6fa46af..1fc480fe18ad 100644
--- a/drivers/net/wireless/realtek/rtlwifi/usb.c
+++ b/drivers/net/wireless/realtek/rtlwifi/usb.c
@@ -125,6 +125,14 @@ static void _usb_write32_sync(struct rtl_priv *rtlpriv, u32 addr, u32 val)
_usb_write_sync(rtlpriv, addr, val, 4);
}

+static void _usb_write_chunk_sync(struct rtl_priv *rtlpriv, u32 addr,
+ u32 length, u8 *data)
+{
+ struct usb_device *udev = to_usb_device(rtlpriv->io.dev);
+
+ _usbctrl_vendorreq_sync(udev, REALTEK_USB_VENQT_WRITE, addr, data, length);
+}
+
static void _rtl_usb_io_handler_init(struct device *dev,
struct ieee80211_hw *hw)
{
@@ -135,6 +143,7 @@ static void _rtl_usb_io_handler_init(struct device *dev,
rtlpriv->io.write8 = _usb_write8_sync;
rtlpriv->io.write16 = _usb_write16_sync;
rtlpriv->io.write32 = _usb_write32_sync;
+ rtlpriv->io.write_chunk = _usb_write_chunk_sync;
rtlpriv->io.read8 = _usb_read8_sync;
rtlpriv->io.read16 = _usb_read16_sync;
rtlpriv->io.read32 = _usb_read32_sync;
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index 53af324f3807..3821f6e31447 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -1450,6 +1450,8 @@ struct rtl_io {
void (*write8)(struct rtl_priv *rtlpriv, u32 addr, u8 val);
void (*write16)(struct rtl_priv *rtlpriv, u32 addr, u16 val);
void (*write32)(struct rtl_priv *rtlpriv, u32 addr, u32 val);
+ void (*write_chunk)(struct rtl_priv *rtlpriv, u32 addr, u32 length,
+ u8 *data);

u8 (*read8)(struct rtl_priv *rtlpriv, u32 addr);
u16 (*read16)(struct rtl_priv *rtlpriv, u32 addr);
@@ -2962,6 +2964,12 @@ static inline void rtl_write_dword(struct rtl_priv *rtlpriv,
rtlpriv->io.read32(rtlpriv, addr);
}

+static inline void rtl_write_chunk(struct rtl_priv *rtlpriv,
+ u32 addr, u32 length, u8 *data)
+{
+ rtlpriv->io.write_chunk(rtlpriv, addr, length, data);
+}
+
static inline u32 rtl_get_bbreg(struct ieee80211_hw *hw,
u32 regaddr, u32 bitmask)
{
--
2.43.0


2024-01-14 00:27:02

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] wifi: rtlwifi: Speed up firmware loading for USB

Hi Bitterblue,

kernel test robot noticed the following build warnings:

[auto build test WARNING on wireless-next/main]
[cannot apply to wireless/main linus/master v6.7 next-20240112]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Bitterblue-Smith/wifi-rtlwifi-Speed-up-firmware-loading-for-USB/20240113-035326
base: https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git main
patch link: https://lore.kernel.org/r/0d262acd-4f94-41c2-8d15-83486aeb976b%40gmail.com
patch subject: [PATCH] wifi: rtlwifi: Speed up firmware loading for USB
config: csky-randconfig-r113-20240114 (https://download.01.org/0day-ci/archive/20240114/[email protected]/config)
compiler: csky-linux-gcc (GCC) 13.2.0
reproduce: (https://download.01.org/0day-ci/archive/20240114/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

sparse warnings: (new ones prefixed by >>)
>> drivers/net/wireless/realtek/rtlwifi/efuse.c:1329:41: sparse: sparse: incorrect type in argument 3 (different base types) @@ expected unsigned int [usertype] val32 @@ got restricted __le32 [usertype] @@
drivers/net/wireless/realtek/rtlwifi/efuse.c:1329:41: sparse: expected unsigned int [usertype] val32
drivers/net/wireless/realtek/rtlwifi/efuse.c:1329:41: sparse: got restricted __le32 [usertype]

vim +1329 drivers/net/wireless/realtek/rtlwifi/efuse.c

1289
1290 static void _rtl_fw_block_write_usb(struct ieee80211_hw *hw, u8 *buffer, u32 size)
1291 {
1292 struct rtl_priv *rtlpriv = rtl_priv(hw);
1293 u32 blockcount, blockcount8, blockcount4;
1294 u32 remain8 = 0, remain4 = 0, remain = 0;
1295 const u32 blocksize = 64;
1296 const u32 blocksize8 = 8;
1297 const u32 blocksize4 = 4;
1298 u32 i, offset;
1299
1300 blockcount = size / blocksize;
1301 remain8 = size % blocksize;
1302 for (i = 0; i < blockcount; i++) {
1303 offset = i * blocksize;
1304 rtl_write_chunk(rtlpriv,
1305 START_ADDRESS + offset,
1306 blocksize, buffer + offset);
1307 }
1308
1309 if (remain8) {
1310 offset = blockcount * blocksize;
1311 blockcount8 = remain8 / blocksize8;
1312 remain4 = remain8 % blocksize8;
1313
1314 for (i = 0; i < blockcount8; i++)
1315 rtl_write_chunk(rtlpriv,
1316 START_ADDRESS + offset + i * blocksize8,
1317 blocksize8,
1318 buffer + offset + i * blocksize8);
1319 }
1320
1321 if (remain4) {
1322 offset += blockcount8 * blocksize8;
1323 blockcount4 = remain4 / blocksize4;
1324 remain = remain8 % blocksize4;
1325
1326 for (i = 0; i < blockcount4; i++)
1327 rtl_write_dword(rtlpriv,
1328 START_ADDRESS + offset + i * blocksize4,
> 1329 cpu_to_le32(*(u32 *)(buffer + offset + i)));
1330 }
1331
1332 if (remain) {
1333 offset += blockcount4 * blocksize4;
1334
1335 for (i = 0; i < remain; i++)
1336 rtl_write_byte(rtlpriv, START_ADDRESS + offset + i,
1337 *(buffer + offset + i));
1338 }
1339 }
1340

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2024-01-15 00:52:24

by Ping-Ke Shih

[permalink] [raw]
Subject: RE: [PATCH] wifi: rtlwifi: Speed up firmware loading for USB



> -----Original Message-----
> From: Bitterblue Smith <[email protected]>
> Sent: Saturday, January 13, 2024 3:51 AM
> To: [email protected]
> Cc: Ping-Ke Shih <[email protected]>; Larry Finger <[email protected]>
> Subject: [PATCH] wifi: rtlwifi: Speed up firmware loading for USB
>
> Currently it takes almost 6 seconds to upload the firmware for RTL8192CU
> (and 11 seconds for RTL8192DU). That's because the firmware is uploaded
> one byte at a time.
>
> Also, after plugging the device, the firmware gets uploaded three times
> before a connection to the AP is established.
>
> Maybe this is fine for most users, but when testing changes to the
> driver it's really annoying to wait so long.
>
> Speed up the firmware upload by writing chunks of 64 bytes at a time.
> This way it takes about 110 ms for RTL8192CU (and about 210 ms for
> RTL8192DU).
>
> PCI devices could upload it in chunks of 4 bytes, but I don't have any
> to test and commit 89d32c9071aa ("rtlwifi: Download firmware as bytes
> rather than as dwords") decided otherwise anyway.
>
> Signed-off-by: Bitterblue Smith <[email protected]>
> ---
> drivers/net/wireless/realtek/rtlwifi/efuse.c | 65 +++++++++++++++++--
> drivers/net/wireless/realtek/rtlwifi/efuse.h | 4 +-
> .../wireless/realtek/rtlwifi/rtl8192cu/sw.c | 6 +-
> drivers/net/wireless/realtek/rtlwifi/usb.c | 9 +++
> drivers/net/wireless/realtek/rtlwifi/wifi.h | 8 +++
> 5 files changed, 82 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.c
> b/drivers/net/wireless/realtek/rtlwifi/efuse.c
> index 2e945554ed6d..870a276299f5 100644
> --- a/drivers/net/wireless/realtek/rtlwifi/efuse.c
> +++ b/drivers/net/wireless/realtek/rtlwifi/efuse.c
> @@ -1287,18 +1287,73 @@ int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv,
> }
> EXPORT_SYMBOL_GPL(rtl_get_hwinfo);
>
> -void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size)
> +static void _rtl_fw_block_write_usb(struct ieee80211_hw *hw, u8 *buffer, u32 size)
> +{
> + struct rtl_priv *rtlpriv = rtl_priv(hw);
> + u32 blockcount, blockcount8, blockcount4;
> + u32 remain8 = 0, remain4 = 0, remain = 0;
> + const u32 blocksize = 64;
> + const u32 blocksize8 = 8;
> + const u32 blocksize4 = 4;
> + u32 i, offset;
> +
> + blockcount = size / blocksize;
> + remain8 = size % blocksize;
> + for (i = 0; i < blockcount; i++) {
> + offset = i * blocksize;
> + rtl_write_chunk(rtlpriv,
> + START_ADDRESS + offset,
> + blocksize, buffer + offset);
> + }
> +
> + if (remain8) {
> + offset = blockcount * blocksize;
> + blockcount8 = remain8 / blocksize8;
> + remain4 = remain8 % blocksize8;
> +
> + for (i = 0; i < blockcount8; i++)
> + rtl_write_chunk(rtlpriv,
> + START_ADDRESS + offset + i * blocksize8,
> + blocksize8,
> + buffer + offset + i * blocksize8);
> + }
> +
> + if (remain4) {
> + offset += blockcount8 * blocksize8;
> + blockcount4 = remain4 / blocksize4;
> + remain = remain8 % blocksize4;
> +
> + for (i = 0; i < blockcount4; i++)
> + rtl_write_dword(rtlpriv,
> + START_ADDRESS + offset + i * blocksize4,
> + cpu_to_le32(*(u32 *)(buffer + offset + i)));

Here should be le32_to_cpu().

> + }
> +
> + if (remain) {
> + offset += blockcount4 * blocksize4;
> +
> + for (i = 0; i < remain; i++)
> + rtl_write_byte(rtlpriv, START_ADDRESS + offset + i,
> + *(buffer + offset + i));
> + }
> +}

I think we can increase 'start' and 'buffer' addresses after writing, so
arithmetic can be simple. And, combine 64/8/4/1 block writing into single
loop. Pseudo code like

static void _rtl_fw_block_write_usb(struct ieee80211_hw *hw, u8 *buffer, u32 size)
{
u32 start = START_ADDRESS;
u32 n;

while (size > 0) {
if (size >= 64) {
n = 64;
rtl_write_chunk(rtlpriv, start, 64, buffer);
} else if (size >= 8) {
n = 8;
rtl_write_chunk(rtlpriv, start, 8, buffer);
} else if (size >= 4) {
n = 4;
rtl_write_dword(rtlpriv, start, le32_to_cpu(*(u32 *)buffer));
} else {
n = 1;
rtl_write_byte(rtlpriv, start, *buffer);
}

start += n;
buffer += n;
size -= n;
}
}

> +
> +void rtl_fw_block_write(struct ieee80211_hw *hw, u8 *buffer, u32 size)
> {
> struct rtl_priv *rtlpriv = rtl_priv(hw);
> - u8 *pu4byteptr = (u8 *)buffer;
> u32 i;
>
> - for (i = 0; i < size; i++)
> - rtl_write_byte(rtlpriv, (START_ADDRESS + i), *(pu4byteptr + i));
> + if (rtlpriv->rtlhal.interface == INTF_PCI) {
> + for (i = 0; i < size; i++)
> + rtl_write_byte(rtlpriv, (START_ADDRESS + i),
> + *(buffer + i));
> + } else if (rtlpriv->rtlhal.interface == INTF_USB) {
> + _rtl_fw_block_write_usb(hw, buffer, size);
> + }
> }
> EXPORT_SYMBOL_GPL(rtl_fw_block_write);
>
> -void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer,
> +void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, u8 *buffer,
> u32 size)
> {
> struct rtl_priv *rtlpriv = rtl_priv(hw);
> diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.h
> b/drivers/net/wireless/realtek/rtlwifi/efuse.h
> index 1ec59f439382..4821625ad1e5 100644
> --- a/drivers/net/wireless/realtek/rtlwifi/efuse.h
> +++ b/drivers/net/wireless/realtek/rtlwifi/efuse.h
> @@ -91,8 +91,8 @@ void efuse_power_switch(struct ieee80211_hw *hw, u8 write, u8 pwrstate);
> int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv,
> int max_size, u8 *hwinfo, int *params);
> void rtl_fill_dummy(u8 *pfwbuf, u32 *pfwlen);
> -void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer,
> +void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, u8 *buffer,
> u32 size);
> -void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size);
> +void rtl_fw_block_write(struct ieee80211_hw *hw, u8 *buffer, u32 size);
> void rtl_efuse_ops_init(struct ieee80211_hw *hw);
> #endif
> diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
> b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
> index 20b4aac69642..9f4cf09090d6 100644
> --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
> +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
> @@ -40,7 +40,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw)
> rtlpriv->dm.thermalvalue = 0;
>
> /* for firmware buf */
> - rtlpriv->rtlhal.pfirmware = vzalloc(0x4000);
> + rtlpriv->rtlhal.pfirmware = kmalloc(0x4000, GFP_KERNEL);

Why should we use kmalloc instead? I don't see any description about this in
commit message.

> if (!rtlpriv->rtlhal.pfirmware) {
> pr_err("Can't alloc buffer for fw\n");
> return 1;
> @@ -61,7 +61,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw)
> fw_name, rtlpriv->io.dev,
> GFP_KERNEL, hw, rtl_fw_cb);
> if (err) {
> - vfree(rtlpriv->rtlhal.pfirmware);
> + kfree(rtlpriv->rtlhal.pfirmware);
> rtlpriv->rtlhal.pfirmware = NULL;
> }
> return err;
> @@ -72,7 +72,7 @@ static void rtl92cu_deinit_sw_vars(struct ieee80211_hw *hw)
> struct rtl_priv *rtlpriv = rtl_priv(hw);
>
> if (rtlpriv->rtlhal.pfirmware) {
> - vfree(rtlpriv->rtlhal.pfirmware);
> + kfree(rtlpriv->rtlhal.pfirmware);
> rtlpriv->rtlhal.pfirmware = NULL;
> }
> }

[...]


2024-01-15 03:11:09

by Larry Finger

[permalink] [raw]
Subject: Re: [PATCH] wifi: rtlwifi: Speed up firmware loading for USB

On 1/12/24 13:51, Bitterblue Smith wrote:
> Currently it takes almost 6 seconds to upload the firmware for RTL8192CU
> (and 11 seconds for RTL8192DU). That's because the firmware is uploaded
> one byte at a time.
>
> Also, after plugging the device, the firmware gets uploaded three times
> before a connection to the AP is established.
>
> Maybe this is fine for most users, but when testing changes to the
> driver it's really annoying to wait so long.
>
> Speed up the firmware upload by writing chunks of 64 bytes at a time.
> This way it takes about 110 ms for RTL8192CU (and about 210 ms for
> RTL8192DU).
>
> PCI devices could upload it in chunks of 4 bytes, but I don't have any
> to test and commit 89d32c9071aa ("rtlwifi: Download firmware as bytes
> rather than as dwords") decided otherwise anyway.
>
> Signed-off-by: Bitterblue Smith <[email protected]>
> ---

Tested OK on a BE ppc32 installation.

Tested-by: Larry Finger <[email protected]>


2024-01-15 15:29:16

by Bitterblue Smith

[permalink] [raw]
Subject: Re: [PATCH] wifi: rtlwifi: Speed up firmware loading for USB

On 15/01/2024 02:50, Ping-Ke Shih wrote:
>
>
>> -----Original Message-----
>> From: Bitterblue Smith <[email protected]>
>> Sent: Saturday, January 13, 2024 3:51 AM
>> To: [email protected]
>> Cc: Ping-Ke Shih <[email protected]>; Larry Finger <[email protected]>
>> Subject: [PATCH] wifi: rtlwifi: Speed up firmware loading for USB
>>
>> Currently it takes almost 6 seconds to upload the firmware for RTL8192CU
>> (and 11 seconds for RTL8192DU). That's because the firmware is uploaded
>> one byte at a time.
>>
>> Also, after plugging the device, the firmware gets uploaded three times
>> before a connection to the AP is established.
>>
>> Maybe this is fine for most users, but when testing changes to the
>> driver it's really annoying to wait so long.
>>
>> Speed up the firmware upload by writing chunks of 64 bytes at a time.
>> This way it takes about 110 ms for RTL8192CU (and about 210 ms for
>> RTL8192DU).
>>
>> PCI devices could upload it in chunks of 4 bytes, but I don't have any
>> to test and commit 89d32c9071aa ("rtlwifi: Download firmware as bytes
>> rather than as dwords") decided otherwise anyway.
>>
>> Signed-off-by: Bitterblue Smith <[email protected]>
>> ---
>> drivers/net/wireless/realtek/rtlwifi/efuse.c | 65 +++++++++++++++++--
>> drivers/net/wireless/realtek/rtlwifi/efuse.h | 4 +-
>> .../wireless/realtek/rtlwifi/rtl8192cu/sw.c | 6 +-
>> drivers/net/wireless/realtek/rtlwifi/usb.c | 9 +++
>> drivers/net/wireless/realtek/rtlwifi/wifi.h | 8 +++
>> 5 files changed, 82 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.c
>> b/drivers/net/wireless/realtek/rtlwifi/efuse.c
>> index 2e945554ed6d..870a276299f5 100644
>> --- a/drivers/net/wireless/realtek/rtlwifi/efuse.c
>> +++ b/drivers/net/wireless/realtek/rtlwifi/efuse.c
>> @@ -1287,18 +1287,73 @@ int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv,
>> }
>> EXPORT_SYMBOL_GPL(rtl_get_hwinfo);
>>
>> -void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size)
>> +static void _rtl_fw_block_write_usb(struct ieee80211_hw *hw, u8 *buffer, u32 size)
>> +{
>> + struct rtl_priv *rtlpriv = rtl_priv(hw);
>> + u32 blockcount, blockcount8, blockcount4;
>> + u32 remain8 = 0, remain4 = 0, remain = 0;
>> + const u32 blocksize = 64;
>> + const u32 blocksize8 = 8;
>> + const u32 blocksize4 = 4;
>> + u32 i, offset;
>> +
>> + blockcount = size / blocksize;
>> + remain8 = size % blocksize;
>> + for (i = 0; i < blockcount; i++) {
>> + offset = i * blocksize;
>> + rtl_write_chunk(rtlpriv,
>> + START_ADDRESS + offset,
>> + blocksize, buffer + offset);
>> + }
>> +
>> + if (remain8) {
>> + offset = blockcount * blocksize;
>> + blockcount8 = remain8 / blocksize8;
>> + remain4 = remain8 % blocksize8;
>> +
>> + for (i = 0; i < blockcount8; i++)
>> + rtl_write_chunk(rtlpriv,
>> + START_ADDRESS + offset + i * blocksize8,
>> + blocksize8,
>> + buffer + offset + i * blocksize8);
>> + }
>> +
>> + if (remain4) {
>> + offset += blockcount8 * blocksize8;
>> + blockcount4 = remain4 / blocksize4;
>> + remain = remain8 % blocksize4;
>> +
>> + for (i = 0; i < blockcount4; i++)
>> + rtl_write_dword(rtlpriv,
>> + START_ADDRESS + offset + i * blocksize4,
>> + cpu_to_le32(*(u32 *)(buffer + offset + i)));
>
> Here should be le32_to_cpu().
>

Right. But now I realise that rtl_write_dword is called at most once here,
so there is not much point using it.

>> + }
>> +
>> + if (remain) {
>> + offset += blockcount4 * blocksize4;
>> +
>> + for (i = 0; i < remain; i++)
>> + rtl_write_byte(rtlpriv, START_ADDRESS + offset + i,
>> + *(buffer + offset + i));
>> + }
>> +}
>
> I think we can increase 'start' and 'buffer' addresses after writing, so
> arithmetic can be simple. And, combine 64/8/4/1 block writing into single
> loop. Pseudo code like
>
> static void _rtl_fw_block_write_usb(struct ieee80211_hw *hw, u8 *buffer, u32 size)
> {
> u32 start = START_ADDRESS;
> u32 n;
>
> while (size > 0) {
> if (size >= 64) {
> n = 64;
> rtl_write_chunk(rtlpriv, start, 64, buffer);
> } else if (size >= 8) {
> n = 8;
> rtl_write_chunk(rtlpriv, start, 8, buffer);
> } else if (size >= 4) {
> n = 4;
> rtl_write_dword(rtlpriv, start, le32_to_cpu(*(u32 *)buffer));
> } else {
> n = 1;
> rtl_write_byte(rtlpriv, start, *buffer);
> }
>
> start += n;
> buffer += n;
> size -= n;
> }
> }
>

Why didn't I think of that? (Because I didn't think much, just copied
the code from the RTL8192DU driver and cleaned it a bit.)

>> +
>> +void rtl_fw_block_write(struct ieee80211_hw *hw, u8 *buffer, u32 size)
>> {
>> struct rtl_priv *rtlpriv = rtl_priv(hw);
>> - u8 *pu4byteptr = (u8 *)buffer;
>> u32 i;
>>
>> - for (i = 0; i < size; i++)
>> - rtl_write_byte(rtlpriv, (START_ADDRESS + i), *(pu4byteptr + i));
>> + if (rtlpriv->rtlhal.interface == INTF_PCI) {
>> + for (i = 0; i < size; i++)
>> + rtl_write_byte(rtlpriv, (START_ADDRESS + i),
>> + *(buffer + i));
>> + } else if (rtlpriv->rtlhal.interface == INTF_USB) {
>> + _rtl_fw_block_write_usb(hw, buffer, size);
>> + }
>> }
>> EXPORT_SYMBOL_GPL(rtl_fw_block_write);
>>
>> -void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer,
>> +void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, u8 *buffer,
>> u32 size)
>> {
>> struct rtl_priv *rtlpriv = rtl_priv(hw);
>> diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.h
>> b/drivers/net/wireless/realtek/rtlwifi/efuse.h
>> index 1ec59f439382..4821625ad1e5 100644
>> --- a/drivers/net/wireless/realtek/rtlwifi/efuse.h
>> +++ b/drivers/net/wireless/realtek/rtlwifi/efuse.h
>> @@ -91,8 +91,8 @@ void efuse_power_switch(struct ieee80211_hw *hw, u8 write, u8 pwrstate);
>> int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv,
>> int max_size, u8 *hwinfo, int *params);
>> void rtl_fill_dummy(u8 *pfwbuf, u32 *pfwlen);
>> -void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer,
>> +void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, u8 *buffer,
>> u32 size);
>> -void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size);
>> +void rtl_fw_block_write(struct ieee80211_hw *hw, u8 *buffer, u32 size);
>> void rtl_efuse_ops_init(struct ieee80211_hw *hw);
>> #endif
>> diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
>> b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
>> index 20b4aac69642..9f4cf09090d6 100644
>> --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
>> +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
>> @@ -40,7 +40,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw)
>> rtlpriv->dm.thermalvalue = 0;
>>
>> /* for firmware buf */
>> - rtlpriv->rtlhal.pfirmware = vzalloc(0x4000);
>> + rtlpriv->rtlhal.pfirmware = kmalloc(0x4000, GFP_KERNEL);
>
> Why should we use kmalloc instead? I don't see any description about this in
> commit message.
>

That's because usb_control_msg() can't use memory allocated by vmalloc:

Jan 09 19:39:29 ideapad2 kernel: xhci_hcd 0000:03:00.3: rejecting DMA map of vmalloc memory

I will mention this in the commit message.

>> if (!rtlpriv->rtlhal.pfirmware) {
>> pr_err("Can't alloc buffer for fw\n");
>> return 1;
>> @@ -61,7 +61,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw)
>> fw_name, rtlpriv->io.dev,
>> GFP_KERNEL, hw, rtl_fw_cb);
>> if (err) {
>> - vfree(rtlpriv->rtlhal.pfirmware);
>> + kfree(rtlpriv->rtlhal.pfirmware);
>> rtlpriv->rtlhal.pfirmware = NULL;
>> }
>> return err;
>> @@ -72,7 +72,7 @@ static void rtl92cu_deinit_sw_vars(struct ieee80211_hw *hw)
>> struct rtl_priv *rtlpriv = rtl_priv(hw);
>>
>> if (rtlpriv->rtlhal.pfirmware) {
>> - vfree(rtlpriv->rtlhal.pfirmware);
>> + kfree(rtlpriv->rtlhal.pfirmware);
>> rtlpriv->rtlhal.pfirmware = NULL;
>> }
>> }
>
> [...]
>
>