2019-04-30 13:37:51

by Rasmus Villemoes

[permalink] [raw]
Subject: [PATCH 2/5] soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K

The current array of struct qe_snum use 256*4 bytes for just keeping
track of the free/used state of each index, and the struct layout
means there's another 768 bytes of padding. If we just unzip that
structure, the array of snum values just use 256 bytes, while the
free/inuse state can be tracked in a 32 byte bitmap.

So this reduces the .data footprint by 1760 bytes. It also serves as
preparation for introducing another DT binding for specifying the snum
values.

Signed-off-by: Rasmus Villemoes <[email protected]>
---
drivers/soc/fsl/qe/qe.c | 37 ++++++++++++-------------------------
1 file changed, 12 insertions(+), 25 deletions(-)

diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index 855373deb746..d0393f83145c 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -14,6 +14,7 @@
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*/
+#include <linux/bitmap.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -43,25 +44,14 @@ static DEFINE_SPINLOCK(qe_lock);
DEFINE_SPINLOCK(cmxgcr_lock);
EXPORT_SYMBOL(cmxgcr_lock);

-/* QE snum state */
-enum qe_snum_state {
- QE_SNUM_STATE_USED,
- QE_SNUM_STATE_FREE
-};
-
-/* QE snum */
-struct qe_snum {
- u8 num;
- enum qe_snum_state state;
-};
-
/* We allocate this here because it is used almost exclusively for
* the communication processor devices.
*/
struct qe_immap __iomem *qe_immr;
EXPORT_SYMBOL(qe_immr);

-static struct qe_snum snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */
+static u8 snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */
+static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
static unsigned int qe_num_of_snum;

static phys_addr_t qebase = -1;
@@ -308,6 +298,7 @@ static void qe_snums_init(void)
};
const u8 *snum_init;

+ bitmap_zero(snum_state, QE_NUM_OF_SNUM);
qe_num_of_snum = qe_get_num_of_snums();

if (qe_num_of_snum == 76)
@@ -315,10 +306,8 @@ static void qe_snums_init(void)
else
snum_init = snum_init_46;

- for (i = 0; i < qe_num_of_snum; i++) {
- snums[i].num = snum_init[i];
- snums[i].state = QE_SNUM_STATE_FREE;
- }
+ for (i = 0; i < qe_num_of_snum; i++)
+ snums[i] = snum_init[i];
}

int qe_get_snum(void)
@@ -328,12 +317,10 @@ int qe_get_snum(void)
int i;

spin_lock_irqsave(&qe_lock, flags);
- for (i = 0; i < qe_num_of_snum; i++) {
- if (snums[i].state == QE_SNUM_STATE_FREE) {
- snums[i].state = QE_SNUM_STATE_USED;
- snum = snums[i].num;
- break;
- }
+ i = find_first_zero_bit(snum_state, qe_num_of_snum);
+ if (i < qe_num_of_snum) {
+ set_bit(i, snum_state);
+ snum = snums[i];
}
spin_unlock_irqrestore(&qe_lock, flags);

@@ -346,8 +333,8 @@ void qe_put_snum(u8 snum)
int i;

for (i = 0; i < qe_num_of_snum; i++) {
- if (snums[i].num == snum) {
- snums[i].state = QE_SNUM_STATE_FREE;
+ if (snums[i] == snum) {
+ clear_bit(i, snum_state);
break;
}
}
--
2.20.1


2019-04-30 17:13:49

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH 2/5] soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K



Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit :
> The current array of struct qe_snum use 256*4 bytes for just keeping
> track of the free/used state of each index, and the struct layout
> means there's another 768 bytes of padding. If we just unzip that
> structure, the array of snum values just use 256 bytes, while the
> free/inuse state can be tracked in a 32 byte bitmap.
>
> So this reduces the .data footprint by 1760 bytes. It also serves as
> preparation for introducing another DT binding for specifying the snum
> values.
>
> Signed-off-by: Rasmus Villemoes <[email protected]>
> ---
> drivers/soc/fsl/qe/qe.c | 37 ++++++++++++-------------------------
> 1 file changed, 12 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
> index 855373deb746..d0393f83145c 100644
> --- a/drivers/soc/fsl/qe/qe.c
> +++ b/drivers/soc/fsl/qe/qe.c
> @@ -14,6 +14,7 @@
> * Free Software Foundation; either version 2 of the License, or (at your
> * option) any later version.
> */
> +#include <linux/bitmap.h>
> #include <linux/errno.h>
> #include <linux/sched.h>
> #include <linux/kernel.h>
> @@ -43,25 +44,14 @@ static DEFINE_SPINLOCK(qe_lock);
> DEFINE_SPINLOCK(cmxgcr_lock);
> EXPORT_SYMBOL(cmxgcr_lock);
>
> -/* QE snum state */
> -enum qe_snum_state {
> - QE_SNUM_STATE_USED,
> - QE_SNUM_STATE_FREE
> -};
> -
> -/* QE snum */
> -struct qe_snum {
> - u8 num;
> - enum qe_snum_state state;
> -};
> -
> /* We allocate this here because it is used almost exclusively for
> * the communication processor devices.
> */
> struct qe_immap __iomem *qe_immr;
> EXPORT_SYMBOL(qe_immr);
>
> -static struct qe_snum snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */
> +static u8 snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */
> +static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
> static unsigned int qe_num_of_snum;
>
> static phys_addr_t qebase = -1;
> @@ -308,6 +298,7 @@ static void qe_snums_init(void)
> };
> const u8 *snum_init;
>
> + bitmap_zero(snum_state, QE_NUM_OF_SNUM);

Doesn't make much importance, but wouldn't it be more logical to add
this line where the setting of .state = QE_SNUM_STATE_FREE was done
previously, ie around the for() loop below ?

> qe_num_of_snum = qe_get_num_of_snums();
>
> if (qe_num_of_snum == 76)
> @@ -315,10 +306,8 @@ static void qe_snums_init(void)
> else
> snum_init = snum_init_46;
>
> - for (i = 0; i < qe_num_of_snum; i++) {
> - snums[i].num = snum_init[i];
> - snums[i].state = QE_SNUM_STATE_FREE;
> - }
> + for (i = 0; i < qe_num_of_snum; i++)
> + snums[i] = snum_init[i];

Could use memcpy() instead ?

> }
>
> int qe_get_snum(void)
> @@ -328,12 +317,10 @@ int qe_get_snum(void)
> int i;
>
> spin_lock_irqsave(&qe_lock, flags);
> - for (i = 0; i < qe_num_of_snum; i++) {
> - if (snums[i].state == QE_SNUM_STATE_FREE) {
> - snums[i].state = QE_SNUM_STATE_USED;
> - snum = snums[i].num;
> - break;
> - }
> + i = find_first_zero_bit(snum_state, qe_num_of_snum);
> + if (i < qe_num_of_snum) {
> + set_bit(i, snum_state);
> + snum = snums[i];
> }
> spin_unlock_irqrestore(&qe_lock, flags);
>
> @@ -346,8 +333,8 @@ void qe_put_snum(u8 snum)
> int i;
>
> for (i = 0; i < qe_num_of_snum; i++) {
> - if (snums[i].num == snum) {
> - snums[i].state = QE_SNUM_STATE_FREE;
> + if (snums[i] == snum) {
> + clear_bit(i, snum_state);
> break;
> }
> }

Can we replace this loop by memchr() ?

Christophe

2019-05-01 05:52:36

by Rasmus Villemoes

[permalink] [raw]
Subject: Re: [PATCH 2/5] soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K

On 30/04/2019 19.12, Christophe Leroy wrote:
>
> Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit :
>> The current array of struct qe_snum use 256*4 bytes for just keeping
>> track of the free/used state of each index, and the struct layout
>> means there's another 768 bytes of padding. If we just unzip that
>> structure, the array of snum values just use 256 bytes, while the
>> free/inuse state can be tracked in a 32 byte bitmap.
>>
>> So this reduces the .data footprint by 1760 bytes. It also serves as
>> preparation for introducing another DT binding for specifying the snum
>> values.
>>
>> Signed-off-by: Rasmus Villemoes <[email protected]>
>> ---
>> -
>>   /* We allocate this here because it is used almost exclusively for
>>    * the communication processor devices.
>>    */
>>   struct qe_immap __iomem *qe_immr;
>>   EXPORT_SYMBOL(qe_immr);
>>   -static struct qe_snum snums[QE_NUM_OF_SNUM];    /* Dynamically
>> allocated SNUMs */
>> +static u8 snums[QE_NUM_OF_SNUM];    /* Dynamically allocated SNUMs */
>> +static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
>>   static unsigned int qe_num_of_snum;
>>     static phys_addr_t qebase = -1;
>> @@ -308,6 +298,7 @@ static void qe_snums_init(void)
>>       };
>>       const u8 *snum_init;
>>   +    bitmap_zero(snum_state, QE_NUM_OF_SNUM);
>
> Doesn't make much importance, but wouldn't it be more logical to add
> this line where the setting of .state = QE_SNUM_STATE_FREE was done
> previously, ie around the for() loop below ?

This was on purpose, to avoid having to move it up in patch 4, where we
don't necessarily reach the for loop.

>>       qe_num_of_snum = qe_get_num_of_snums();
>>         if (qe_num_of_snum == 76)
>> @@ -315,10 +306,8 @@ static void qe_snums_init(void)
>>       else
>>           snum_init = snum_init_46;
>>   -    for (i = 0; i < qe_num_of_snum; i++) {
>> -        snums[i].num = snum_init[i];
>> -        snums[i].state = QE_SNUM_STATE_FREE;
>> -    }
>> +    for (i = 0; i < qe_num_of_snum; i++)
>> +        snums[i] = snum_init[i];
>
> Could use memcpy() instead ?

Yes, I switch to that in 5/5. Sure, I could do it here already, but I
did it this way to keep close to the current style. I don't care either
way, so if you prefer introducing memcpy here, fine by me.


>>       spin_unlock_irqrestore(&qe_lock, flags);
>>   @@ -346,8 +333,8 @@ void qe_put_snum(u8 snum)
>>       int i;
>>         for (i = 0; i < qe_num_of_snum; i++) {
>> -        if (snums[i].num == snum) {
>> -            snums[i].state = QE_SNUM_STATE_FREE;
>> +        if (snums[i] == snum) {
>> +            clear_bit(i, snum_state);
>>               break;
>>           }
>>       }
>
> Can we replace this loop by memchr() ?

Hm, yes. So that would be

const u8 *p = memchr(snums, snum, qe_num_of_snum)
if (p)
clear_bit(p - snums, snum_state);

I guess. Let me fold that in and see how it looks.

Thanks,
Rasmus