The new challenge is to remove VLAs from the kernel
(see https://lkml.org/lkml/2018/3/7/621) to eventually
turn on -Wvla.
Using a kmalloc array is the easy way to fix this but kmalloc is still
more expensive than stack allocation. Introduce a fast path with a
fixed size stack array to cover most chip with gpios below some fixed
amount. The slow path dynamically allocates an array to cover those
chips with a large number of gpios.
Reviewed-and-tested-by: Lukas Wunner <[email protected]>
Signed-off-by: Lukas Wunner <[email protected]>
Signed-off-by: Laura Abbott <[email protected]>
---
v4: Changed some local variables to avoid coccinelle warnings. Added a
warning if the number of GPIOs exceeds the current fast path define.
Lukas, I kept your Tested-by because the changes were pretty minimal.
Let me know if you want to run the tests again.
---
drivers/gpio/gpiolib.c | 89 ++++++++++++++++++++++++++++++++++---------
drivers/gpio/gpiolib.h | 2 +-
include/linux/gpio/consumer.h | 10 +++--
3 files changed, 79 insertions(+), 22 deletions(-)
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index d66de67ef307..b6605255662e 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -61,6 +61,11 @@ static struct bus_type gpio_bus_type = {
.name = "gpio",
};
+/*
+ * Number of GPIOs to use for the fast path in set array
+ */
+#define FASTPATH_NGPIO 256
+
/* gpio_lock prevents conflicts during gpio_desc[] table updates.
* While any GPIO is requested, its gpio_chip is not removable;
* each GPIO's "requested" flag serves as a lock and refcount.
@@ -399,12 +404,11 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd,
vals[i] = !!ghd.values[i];
/* Reuse the array setting function */
- gpiod_set_array_value_complex(false,
+ return gpiod_set_array_value_complex(false,
true,
lh->numdescs,
lh->descs,
vals);
- return 0;
}
return -EINVAL;
}
@@ -1192,6 +1196,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
goto err_free_descs;
}
+ if (chip->ngpio > FASTPATH_NGPIO)
+ chip_warn(chip, "line cnt %d is greater than fast path cnt %d\n",
+ chip->ngpio, FASTPATH_NGPIO);
+
gdev->label = kstrdup_const(chip->label ?: "unknown", GFP_KERNEL);
if (!gdev->label) {
status = -ENOMEM;
@@ -2653,6 +2661,7 @@ static int gpio_chip_get_multiple(struct gpio_chip *chip,
return -EIO;
}
+
int gpiod_get_array_value_complex(bool raw, bool can_sleep,
unsigned int array_size,
struct gpio_desc **desc_array,
@@ -2662,16 +2671,28 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep,
while (i < array_size) {
struct gpio_chip *chip = desc_array[i]->gdev->chip;
- unsigned long mask[BITS_TO_LONGS(chip->ngpio)];
- unsigned long bits[BITS_TO_LONGS(chip->ngpio)];
+ unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)];
+ unsigned long *mask, *bits;
int first, j, ret;
+ if (likely(chip->ngpio <= FASTPATH_NGPIO)) {
+ memset(fastpath, 0, sizeof(fastpath));
+ mask = fastpath;
+ bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO);
+ } else {
+ mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio),
+ sizeof(*mask),
+ can_sleep ? GFP_KERNEL : GFP_ATOMIC);
+ if (!mask)
+ return -ENOMEM;
+ bits = mask + BITS_TO_LONGS(chip->ngpio);
+ }
+
if (!can_sleep)
WARN_ON(chip->can_sleep);
/* collect all inputs belonging to the same chip */
first = i;
- memset(mask, 0, sizeof(mask));
do {
const struct gpio_desc *desc = desc_array[i];
int hwgpio = gpio_chip_hwgpio(desc);
@@ -2682,8 +2703,15 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep,
(desc_array[i]->gdev->chip == chip));
ret = gpio_chip_get_multiple(chip, mask, bits);
- if (ret)
+ if (ret) {
+ /*
+ * Go against style and intentionally do the check for
+ * performance reasons
+ */
+ if (mask != fastpath)
+ kfree(mask);
return ret;
+ }
for (j = first; j < i; j++) {
const struct gpio_desc *desc = desc_array[j];
@@ -2695,6 +2723,13 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep,
value_array[j] = value;
trace_gpio_value(desc_to_gpio(desc), 1, value);
}
+
+ /*
+ * Go against style and intentionally do the check for
+ * performance reasons
+ */
+ if (mask != fastpath)
+ kfree(mask);
}
return 0;
}
@@ -2878,7 +2913,7 @@ static void gpio_chip_set_multiple(struct gpio_chip *chip,
}
}
-void gpiod_set_array_value_complex(bool raw, bool can_sleep,
+int gpiod_set_array_value_complex(bool raw, bool can_sleep,
unsigned int array_size,
struct gpio_desc **desc_array,
int *value_array)
@@ -2887,14 +2922,26 @@ void gpiod_set_array_value_complex(bool raw, bool can_sleep,
while (i < array_size) {
struct gpio_chip *chip = desc_array[i]->gdev->chip;
- unsigned long mask[BITS_TO_LONGS(chip->ngpio)];
- unsigned long bits[BITS_TO_LONGS(chip->ngpio)];
+ unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)];
+ unsigned long *mask, *bits;
int count = 0;
+ if (likely(chip->ngpio <= FASTPATH_NGPIO)) {
+ memset(fastpath, 0, sizeof(fastpath));
+ mask = fastpath;
+ bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO);
+ } else {
+ mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio),
+ sizeof(*mask),
+ can_sleep ? GFP_KERNEL : GFP_ATOMIC);
+ if (!mask)
+ return -ENOMEM;
+ bits = mask + BITS_TO_LONGS(chip->ngpio);
+ }
+
if (!can_sleep)
WARN_ON(chip->can_sleep);
- memset(mask, 0, sizeof(mask));
do {
struct gpio_desc *desc = desc_array[i];
int hwgpio = gpio_chip_hwgpio(desc);
@@ -2925,7 +2972,15 @@ void gpiod_set_array_value_complex(bool raw, bool can_sleep,
/* push collected bits to outputs */
if (count != 0)
gpio_chip_set_multiple(chip, mask, bits);
+
+ /*
+ * Go against style and intentionally do the check for
+ * performance reasons
+ */
+ if (mask != fastpath)
+ kfree(mask);
}
+ return 0;
}
/**
@@ -3000,13 +3055,13 @@ EXPORT_SYMBOL_GPL(gpiod_set_value);
* This function should be called from contexts where we cannot sleep, and will
* complain if the GPIO chip functions potentially sleep.
*/
-void gpiod_set_raw_array_value(unsigned int array_size,
+int gpiod_set_raw_array_value(unsigned int array_size,
struct gpio_desc **desc_array, int *value_array)
{
if (!desc_array)
- return;
- gpiod_set_array_value_complex(true, false, array_size, desc_array,
- value_array);
+ return -EINVAL;
+ return gpiod_set_array_value_complex(true, false, array_size,
+ desc_array, value_array);
}
EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value);
@@ -3326,14 +3381,14 @@ EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);
*
* This function is to be called from contexts that can sleep.
*/
-void gpiod_set_raw_array_value_cansleep(unsigned int array_size,
+int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
struct gpio_desc **desc_array,
int *value_array)
{
might_sleep_if(extra_checks);
if (!desc_array)
- return;
- gpiod_set_array_value_complex(true, true, array_size, desc_array,
+ return -EINVAL;
+ return gpiod_set_array_value_complex(true, true, array_size, desc_array,
value_array);
}
EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value_cansleep);
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index b17ec6795c81..b64813e3876e 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -188,7 +188,7 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep,
unsigned int array_size,
struct gpio_desc **desc_array,
int *value_array);
-void gpiod_set_array_value_complex(bool raw, bool can_sleep,
+int gpiod_set_array_value_complex(bool raw, bool can_sleep,
unsigned int array_size,
struct gpio_desc **desc_array,
int *value_array);
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index dbd065963296..243112c7fa7d 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -116,7 +116,7 @@ int gpiod_get_raw_array_value(unsigned int array_size,
struct gpio_desc **desc_array,
int *value_array);
void gpiod_set_raw_value(struct gpio_desc *desc, int value);
-void gpiod_set_raw_array_value(unsigned int array_size,
+int gpiod_set_raw_array_value(unsigned int array_size,
struct gpio_desc **desc_array,
int *value_array);
@@ -134,7 +134,7 @@ int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
struct gpio_desc **desc_array,
int *value_array);
void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value);
-void gpiod_set_raw_array_value_cansleep(unsigned int array_size,
+int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
struct gpio_desc **desc_array,
int *value_array);
@@ -369,12 +369,13 @@ static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value)
/* GPIO can never have been requested */
WARN_ON(1);
}
-static inline void gpiod_set_raw_array_value(unsigned int array_size,
+static inline int gpiod_set_raw_array_value(unsigned int array_size,
struct gpio_desc **desc_array,
int *value_array)
{
/* GPIO can never have been requested */
WARN_ON(1);
+ return 0;
}
static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc)
@@ -423,12 +424,13 @@ static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc,
/* GPIO can never have been requested */
WARN_ON(1);
}
-static inline void gpiod_set_raw_array_value_cansleep(unsigned int array_size,
+static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
struct gpio_desc **desc_array,
int *value_array)
{
/* GPIO can never have been requested */
WARN_ON(1);
+ return 0;
}
static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
--
2.14.3
On Tue, Apr 10, 2018 at 06:03:52PM -0700, Laura Abbott wrote:
> --- a/drivers/gpio/gpiolib.c
> +++ b/drivers/gpio/gpiolib.c
> @@ -61,6 +61,11 @@ static struct bus_type gpio_bus_type = {
> .name = "gpio",
> };
>
> +/*
> + * Number of GPIOs to use for the fast path in set array
> + */
> +#define FASTPATH_NGPIO 256
Hm, this has regressed from 384 back to 256 since v3.
> @@ -2653,6 +2661,7 @@ static int gpio_chip_get_multiple(struct gpio_chip *chip,
> return -EIO;
> }
>
> +
> int gpiod_get_array_value_complex(bool raw, bool can_sleep,
Spurious newline. (In v3 this was the place where FASTPATH_NGPIO was
defined, this is a leftover from when you moved it further up.)
I've given this another quick test with gpio-hammer and it worked fine,
so this is still
Reviewed-and-tested-by: Lukas Wunner <[email protected]>
Thanks a lot!
Lukas
On Wed, Apr 11, 2018 at 3:03 AM, Laura Abbott <[email protected]> wrote:
> The new challenge is to remove VLAs from the kernel
> (see https://lkml.org/lkml/2018/3/7/621) to eventually
> turn on -Wvla.
>
> Using a kmalloc array is the easy way to fix this but kmalloc is still
> more expensive than stack allocation. Introduce a fast path with a
> fixed size stack array to cover most chip with gpios below some fixed
> amount. The slow path dynamically allocates an array to cover those
> chips with a large number of gpios.
>
> Reviewed-and-tested-by: Lukas Wunner <[email protected]>
> Signed-off-by: Lukas Wunner <[email protected]>
> Signed-off-by: Laura Abbott <[email protected]>
> ---
> v4: Changed some local variables to avoid coccinelle warnings. Added a
> warning if the number of GPIOs exceeds the current fast path define.
>
> Lukas, I kept your Tested-by because the changes were pretty minimal.
> Let me know if you want to run the tests again.
This patch is starting to look really good.
> +/*
> + * Number of GPIOs to use for the fast path in set array
> + */
> +#define FASTPATH_NGPIO 256
There is still some comment about this.
And now that I am also tryint to think I wonder about it, we
have a global ARCH_NR_GPIOS that is typically 512.
Some archs set it up.
This define is something of an abomination, in the ARM
case it comes from arch/arm/include/asm/gpio.h
where #define ARCH_NR_GPIOS CONFIG_ARCH_NR_GPIO
where the latter is a Kconfig option that is mostly 512 for
most ARM systems.
Well, ARM looks like this:
config ARCH_NR_GPIO
int
default 2048 if ARCH_SOCFPGA
default 1024 if ARCH_BRCMSTB || ARCH_SHMOBILE || ARCH_TEGRA || \
ARCH_ZYNQ
default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5 || \
SOC_DRA7XX || ARCH_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210
default 416 if ARCH_SUNXI
default 392 if ARCH_U8500
default 352 if ARCH_VT8500
default 288 if ARCH_ROCKCHIP
default 264 if MACH_H4700
default 0
help
Maximum number of GPIOs in the system.
If unsure, leave the default value.
So if FASTPATH_NGPIO should be anything else than
ARCH_NR_GPIO this has to be established somewhere
as a floor or half or something, but I would just set it as
the same as ARCH_NR_GPIOS...
The main reason this define exist is for this function
from <linux/gpio/consumer.h>:
/* Convert between the old gpio_ and new gpiod_ interfaces */
struct gpio_desc *gpio_to_desc(unsigned gpio);
Nowadays that fact is a bit obscured since the variable is
only used when assigning the base (in the global GPIO
number space, which is what we want to get rid of but
sigh) in gpiochip_find_base() where it attempts to place
a newly allocated gpiochip in the higher region of this
numberspace since the embedded SoC GPIO base tends
to be 0, on old platforms.
So I don't know about this.
Can't we just use ARCH_NR_GPIOS?
Very few systems have more than 512 assigned global
GPIO numbers and those are FPGA experimental machines.
In the long run obviously I want to get rid of these defines
altogether and only allocate GPIO descriptos dynamically
so as you see I am reluctant to add new numberspace weirdness
around here.
Yours,
Linus Walleij
On 12/04/2018 16:38, Linus Walleij wrote:
> On Wed, Apr 11, 2018 at 3:03 AM, Laura Abbott <[email protected]> wrote:
>
>> The new challenge is to remove VLAs from the kernel
>> (see https://lkml.org/lkml/2018/3/7/621) to eventually
>> turn on -Wvla.
>>
>> Using a kmalloc array is the easy way to fix this but kmalloc is still
>> more expensive than stack allocation. Introduce a fast path with a
>> fixed size stack array to cover most chip with gpios below some fixed
>> amount. The slow path dynamically allocates an array to cover those
>> chips with a large number of gpios.
>>
>> Reviewed-and-tested-by: Lukas Wunner <[email protected]>
>> Signed-off-by: Lukas Wunner <[email protected]>
>> Signed-off-by: Laura Abbott <[email protected]>
>> ---
>> v4: Changed some local variables to avoid coccinelle warnings. Added a
>> warning if the number of GPIOs exceeds the current fast path define.
>>
>> Lukas, I kept your Tested-by because the changes were pretty minimal.
>> Let me know if you want to run the tests again.
>
> This patch is starting to look really good.
>
>> +/*
>> + * Number of GPIOs to use for the fast path in set array
>> + */
>> +#define FASTPATH_NGPIO 256
>
> There is still some comment about this.
>
> And now that I am also tryint to think I wonder about it, we
> have a global ARCH_NR_GPIOS that is typically 512.
> Some archs set it up.
>
> This define is something of an abomination, in the ARM
> case it comes from arch/arm/include/asm/gpio.h
> where #define ARCH_NR_GPIOS CONFIG_ARCH_NR_GPIO
> where the latter is a Kconfig option that is mostly 512 for
> most ARM systems.
>
> Well, ARM looks like this:
>
> config ARCH_NR_GPIO
> int
> default 2048 if ARCH_SOCFPGA
> default 1024 if ARCH_BRCMSTB || ARCH_SHMOBILE || ARCH_TEGRA || \
> ARCH_ZYNQ
> default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5 || \
> SOC_DRA7XX || ARCH_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210
> default 416 if ARCH_SUNXI
> default 392 if ARCH_U8500
> default 352 if ARCH_VT8500
> default 288 if ARCH_ROCKCHIP
> default 264 if MACH_H4700
> default 0
> help
> Maximum number of GPIOs in the system.
>
> If unsure, leave the default value.
>
> So if FASTPATH_NGPIO should be anything else than
> ARCH_NR_GPIO this has to be established somewhere
> as a floor or half or something, but I would just set it as
> the same as ARCH_NR_GPIOS...
>
> The main reason this define exist is for this function
> from <linux/gpio/consumer.h>:
>
> /* Convert between the old gpio_ and new gpiod_ interfaces */
> struct gpio_desc *gpio_to_desc(unsigned gpio);
>
> Nowadays that fact is a bit obscured since the variable is
> only used when assigning the base (in the global GPIO
> number space, which is what we want to get rid of but
> sigh) in gpiochip_find_base() where it attempts to place
> a newly allocated gpiochip in the higher region of this
> numberspace since the embedded SoC GPIO base tends
> to be 0, on old platforms.
>
> So I don't know about this.
>
> Can't we just use ARCH_NR_GPIOS?
>
> Very few systems have more than 512 assigned global
> GPIO numbers and those are FPGA experimental machines.
>
> In the long run obviously I want to get rid of these defines
> altogether and only allocate GPIO descriptos dynamically
> so as you see I am reluctant to add new numberspace weirdness
> around here.
Isn't that for total GPIO's in the system?
And the arrays just need to cater for max per chip?
From what I can understand of the code which is admittedly limited.
--
Regards
Phil Reid
On 04/12/2018 05:39 PM, Phil Reid wrote:
> On 12/04/2018 16:38, Linus Walleij wrote:
>> On Wed, Apr 11, 2018 at 3:03 AM, Laura Abbott <[email protected]> wrote:
>>
>>> The new challenge is to remove VLAs from the kernel
>>> (see https://lkml.org/lkml/2018/3/7/621) to eventually
>>> turn on -Wvla.
>>>
>>> Using a kmalloc array is the easy way to fix this but kmalloc is still
>>> more expensive than stack allocation. Introduce a fast path with a
>>> fixed size stack array to cover most chip with gpios below some fixed
>>> amount. The slow path dynamically allocates an array to cover those
>>> chips with a large number of gpios.
>>>
>>> Reviewed-and-tested-by: Lukas Wunner <[email protected]>
>>> Signed-off-by: Lukas Wunner <[email protected]>
>>> Signed-off-by: Laura Abbott <[email protected]>
>>> ---
>>> v4: Changed some local variables to avoid coccinelle warnings. Added a
>>> warning if the number of GPIOs exceeds the current fast path define.
>>>
>>> Lukas, I kept your Tested-by because the changes were pretty minimal.
>>> Let me know if you want to run the tests again.
>>
>> This patch is starting to look really good.
>>
>>> +/*
>>> + * Number of GPIOs to use for the fast path in set array
>>> + */
>>> +#define FASTPATH_NGPIO 256
>>
>> There is still some comment about this.
>>
>> And now that I am also tryint to think I wonder about it, we
>> have a global ARCH_NR_GPIOS that is typically 512.
>> Some archs set it up.
>>
>> This define is something of an abomination, in the ARM
>> case it comes from arch/arm/include/asm/gpio.h
>> where #define ARCH_NR_GPIOS CONFIG_ARCH_NR_GPIO
>> where the latter is a Kconfig option that is mostly 512 for
>> most ARM systems.
>>
>> Well, ARM looks like this:
>>
>> config ARCH_NR_GPIO
>> int
>> default 2048 if ARCH_SOCFPGA
>> default 1024 if ARCH_BRCMSTB || ARCH_SHMOBILE || ARCH_TEGRA || \
>> ARCH_ZYNQ
>> default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5 || \
>> SOC_DRA7XX || ARCH_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210
>> default 416 if ARCH_SUNXI
>> default 392 if ARCH_U8500
>> default 352 if ARCH_VT8500
>> default 288 if ARCH_ROCKCHIP
>> default 264 if MACH_H4700
>> default 0
>> help
>> Maximum number of GPIOs in the system.
>>
>> If unsure, leave the default value.
>>
>> So if FASTPATH_NGPIO should be anything else than
>> ARCH_NR_GPIO this has to be established somewhere
>> as a floor or half or something, but I would just set it as
>> the same as ARCH_NR_GPIOS...
>>
>> The main reason this define exist is for this function
>> from <linux/gpio/consumer.h>:
>>
>> /* Convert between the old gpio_ and new gpiod_ interfaces */
>> struct gpio_desc *gpio_to_desc(unsigned gpio);
>>
>> Nowadays that fact is a bit obscured since the variable is
>> only used when assigning the base (in the global GPIO
>> number space, which is what we want to get rid of but
>> sigh) in gpiochip_find_base() where it attempts to place
>> a newly allocated gpiochip in the higher region of this
>> numberspace since the embedded SoC GPIO base tends
>> to be 0, on old platforms.
>>
>> So I don't know about this.
>>
>> Can't we just use ARCH_NR_GPIOS?
>>
>> Very few systems have more than 512 assigned global
>> GPIO numbers and those are FPGA experimental machines.
>>
>> In the long run obviously I want to get rid of these defines
>> altogether and only allocate GPIO descriptos dynamically
>> so as you see I am reluctant to add new numberspace weirdness
>> around here.
> Isn't that for total GPIO's in the system?
> And the arrays just need to cater for max per chip?
> From what I can understand of the code which is admittedly limited.
>
>
Yeah the switch back to 256 was a mistake on my end (I think I
grabbed an incorrect version for my base). ARCH_NR_GPIOs
is the total number in the system which may be multiple
chips so yes we would be possibly allocating more space
than necessary.
unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]
unsigned long fastpath[2 * BITS_TO_LONGS(512)]
unsigned long fastpath[2 * DIV_ROUND_UP(512, 8 * sizeof(long))]
so we end up with 128 bytes on the stack total assuming I
can do math correctly. I think this a fairly reasonable
amount though, even if we are over-estimating if there are
multiple chips.
Thanks,
Laura
On 14/04/2018 05:10, Laura Abbott wrote:
> On 04/12/2018 05:39 PM, Phil Reid wrote:
>> On 12/04/2018 16:38, Linus Walleij wrote:
>>> On Wed, Apr 11, 2018 at 3:03 AM, Laura Abbott <[email protected]> wrote:
>>>
>>>> The new challenge is to remove VLAs from the kernel
>>>> (see https://lkml.org/lkml/2018/3/7/621) to eventually
>>>> turn on -Wvla.
>>>>
>>>> Using a kmalloc array is the easy way to fix this but kmalloc is still
>>>> more expensive than stack allocation. Introduce a fast path with a
>>>> fixed size stack array to cover most chip with gpios below some fixed
>>>> amount. The slow path dynamically allocates an array to cover those
>>>> chips with a large number of gpios.
>>>>
>>>> Reviewed-and-tested-by: Lukas Wunner <[email protected]>
>>>> Signed-off-by: Lukas Wunner <[email protected]>
>>>> Signed-off-by: Laura Abbott <[email protected]>
>>>> ---
>>>> v4: Changed some local variables to avoid coccinelle warnings. Added a
>>>> warning if the number of GPIOs exceeds the current fast path define.
>>>>
>>>> Lukas, I kept your Tested-by because the changes were pretty minimal.
>>>> Let me know if you want to run the tests again.
>>>
>>> This patch is starting to look really good.
>>>
>>>> +/*
>>>> + * Number of GPIOs to use for the fast path in set array
>>>> + */
>>>> +#define FASTPATH_NGPIO 256
>>>
>>> There is still some comment about this.
>>>
>>> And now that I am also tryint to think I wonder about it, we
>>> have a global ARCH_NR_GPIOS that is typically 512.
>>> Some archs set it up.
>>>
>>> This define is something of an abomination, in the ARM
>>> case it comes from arch/arm/include/asm/gpio.h
>>> where #define ARCH_NR_GPIOS CONFIG_ARCH_NR_GPIO
>>> where the latter is a Kconfig option that is mostly 512 for
>>> most ARM systems.
>>>
>>> Well, ARM looks like this:
>>>
>>> config ARCH_NR_GPIO
>>> int
>>> default 2048 if ARCH_SOCFPGA
>>> default 1024 if ARCH_BRCMSTB || ARCH_SHMOBILE || ARCH_TEGRA || \
>>> ARCH_ZYNQ
>>> default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5 || \
>>> SOC_DRA7XX || ARCH_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210
>>> default 416 if ARCH_SUNXI
>>> default 392 if ARCH_U8500
>>> default 352 if ARCH_VT8500
>>> default 288 if ARCH_ROCKCHIP
>>> default 264 if MACH_H4700
>>> default 0
>>> help
>>> Maximum number of GPIOs in the system.
>>>
>>> If unsure, leave the default value.
>>>
>>> So if FASTPATH_NGPIO should be anything else than
>>> ARCH_NR_GPIO this has to be established somewhere
>>> as a floor or half or something, but I would just set it as
>>> the same as ARCH_NR_GPIOS...
>>>
>>> The main reason this define exist is for this function
>>> from <linux/gpio/consumer.h>:
>>>
>>> /* Convert between the old gpio_ and new gpiod_ interfaces */
>>> struct gpio_desc *gpio_to_desc(unsigned gpio);
>>>
>>> Nowadays that fact is a bit obscured since the variable is
>>> only used when assigning the base (in the global GPIO
>>> number space, which is what we want to get rid of but
>>> sigh) in gpiochip_find_base() where it attempts to place
>>> a newly allocated gpiochip in the higher region of this
>>> numberspace since the embedded SoC GPIO base tends
>>> to be 0, on old platforms.
>>>
>>> So I don't know about this.
>>>
>>> Can't we just use ARCH_NR_GPIOS?
>>>
>>> Very few systems have more than 512 assigned global
>>> GPIO numbers and those are FPGA experimental machines.
>>>
>>> In the long run obviously I want to get rid of these defines
>>> altogether and only allocate GPIO descriptos dynamically
>>> so as you see I am reluctant to add new numberspace weirdness
>>> around here.
>> Isn't that for total GPIO's in the system?
>> And the arrays just need to cater for max per chip?
>> From what I can understand of the code which is admittedly limited.
>>
>>
>
> Yeah the switch back to 256 was a mistake on my end (I think I
> grabbed an incorrect version for my base). ARCH_NR_GPIOs
> is the total number in the system which may be multiple
> chips so yes we would be possibly allocating more space
> than necessary.
>
> unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]
>
> unsigned long fastpath[2 * BITS_TO_LONGS(512)]
> unsigned long fastpath[2 * DIV_ROUND_UP(512, 8 * sizeof(long))]
>
> so we end up with 128 bytes on the stack total assuming I
> can do math correctly. I think this a fairly reasonable
> amount though, even if we are over-estimating if there are
> multiple chips.
>
Yeah that's not too bad.
My system is a SOCFPGA so it'd be 2048 / 8 = 512.
Still not unreasonable.
But the system doesn't have a single gpio close to that.
The largest chip is 32.
--
Regards
Phil Reid