Beloved friends,
I would like to request your feedback on the following idea. I hope I have
made sure to CC all the right people and the right lists! If not, PLEASE
let me know! I couldn't find a MAINTAINERS entry for gpiolib so I just
used what I saw in the git log and have also added people and lists that
I think may be interested.
This is just an RFC. If you all feel it is looking like the right approach
then I'll clean it up and make it a patch.
Thanks,
jaya
am300epd was doing 800*600*16*gpio_set_value for each framebuffer transfer
(it uses 16-pins of gpio as its data bus). I found this caused a wee
performance limitation. This patch adds an API for gpio_set_value_bus
which allows users to set batches of consecutive gpio together in a single
call. I have done a test implementation on gumstix (pxa255) with am300epd
and it provides a nice improvement in performance.
Signed-off-by: Jaya Kumar <[email protected]>
Cc: David Brownell <[email protected]>
Cc: David Brownell <[email protected]>
Cc: Sam Ravnborg <[email protected]>
Cc: Jean Delvare <[email protected]>
Cc: Eric Miao <[email protected]>
Cc: Haavard Skinnemoen <[email protected]>
Cc: Philipp Zabel <[email protected]>
Cc: Russell King <[email protected]>
Cc: Ben Gardner <[email protected]>
CC: Greg KH <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
---
arch/arm/mach-pxa/am300epd.c | 9 ++++++
arch/arm/mach-pxa/gpio.c | 28 +++++++++++++++++++++
arch/arm/mach-pxa/include/mach/gpio.h | 24 ++++++++++++++++++
drivers/gpio/gpiolib.c | 44 +++++++++++++++++++++++++++++++++
include/asm-generic/gpio.h | 6 ++++
5 files changed, 111 insertions(+), 0 deletions(-)
diff --git a/arch/arm/mach-pxa/am300epd.c b/arch/arm/mach-pxa/am300epd.c
index bb81564..f741a98 100644
--- a/arch/arm/mach-pxa/am300epd.c
+++ b/arch/arm/mach-pxa/am300epd.c
@@ -222,8 +222,17 @@ static void am300_set_hdb(struct broadsheetfb_par *par, u16 data)
{
int i;
+#if 1
+ gpio_set_value_bus(DB0_GPIO_PIN, data, 16);
+#endif
+#if 0
+ gpio_set_value_bus(DB0_GPIO_PIN, data, 6);
+ gpio_set_value_bus(DB0_GPIO_PIN + 6, data >> 6, 10);
+#endif
+#if 0 /* simple set */
for (i = 0; i <= (DB15_GPIO_PIN - DB0_GPIO_PIN) ; i++)
gpio_set_value(DB0_GPIO_PIN + i, (data >> i) & 0x01);
+#endif
}
diff --git a/arch/arm/mach-pxa/gpio.c b/arch/arm/mach-pxa/gpio.c
index 14930cf..80b0aa1 100644
--- a/arch/arm/mach-pxa/gpio.c
+++ b/arch/arm/mach-pxa/gpio.c
@@ -132,6 +132,33 @@ static void pxa_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
__raw_writel(mask, pxa->regbase + GPCR_OFFSET);
}
+/*
+ * Set output GPIO level in batches
+ */
+static void pxa_gpio_set_bus(struct gpio_chip *chip, unsigned offset,
+ int values, int bitwidth)
+{
+ struct pxa_gpio_chip *pxa;
+ int mask;
+
+ /* we're guaranteed by the caller that offset + bitwidth remains
+ * in this chip.
+ */
+ pxa = container_of(chip, struct pxa_gpio_chip, chip);
+
+ mask = ((1 << bitwidth) - 1) << offset;
+ values <<= offset;
+
+ values &= mask;
+ if (values)
+ __raw_writel(values, pxa->regbase + GPSR_OFFSET);
+
+ values = ~values;
+ values &= mask;
+ if (values)
+ __raw_writel(values, pxa->regbase + GPCR_OFFSET);
+}
+
#define GPIO_CHIP(_n) \
[_n] = { \
.regbase = GPIO##_n##_BASE, \
@@ -141,6 +168,7 @@ static void pxa_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
.direction_output = pxa_gpio_direction_output, \
.get = pxa_gpio_get, \
.set = pxa_gpio_set, \
+ .set_bus = pxa_gpio_set_bus, \
.base = (_n) * 32, \
.ngpio = 32, \
}, \
diff --git a/arch/arm/mach-pxa/include/mach/gpio.h b/arch/arm/mach-pxa/include/mach/gpio.h
index 2c538d8..6ee92d3 100644
--- a/arch/arm/mach-pxa/include/mach/gpio.h
+++ b/arch/arm/mach-pxa/include/mach/gpio.h
@@ -56,6 +56,30 @@ static inline void gpio_set_value(unsigned gpio, int value)
}
}
+static inline void gpio_set_value_bus(unsigned gpio, int values, int bitwidth)
+{
+ if (__builtin_constant_p(gpio) &&
+ (gpio + bitwidth < NR_BUILTIN_GPIO) &&
+ ((gpio + bitwidth) <= roundup(gpio+1, 32))) {
+ int shift, mask;
+
+ shift = gpio % 32;
+ mask = ((1 << bitwidth) - 1) << shift;
+ values <<= shift;
+
+ values &= mask;
+ if (values)
+ GPSR(gpio) = values;
+
+ values = ~values;
+ values &= mask;
+ if (values)
+ GPCR(gpio) = values;
+ } else {
+ __gpio_set_value_bus(gpio, values, bitwidth);
+ }
+}
+
#define gpio_cansleep __gpio_cansleep
#define gpio_to_irq(gpio) IRQ_GPIO(gpio)
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 9112830..fddf3af 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1057,6 +1057,50 @@ void __gpio_set_value(unsigned gpio, int value)
EXPORT_SYMBOL_GPL(__gpio_set_value);
/**
+ * __gpio_set_value_bus() - assign a batch of consecutive gpio pins together
+ * @gpio: starting gpio pin
+ * @values: values to assign, sequential in host ordering
+ * @bitwidth: total width of bits to be assigned
+ * Context: any
+ *
+ * This is used directly or indirectly to implement gpio_set_value().
+ * It invokes the associated gpio_chip.set() method.
+ */
+void __gpio_set_value_bus(unsigned gpio, int values, int bitwidth)
+{
+ struct gpio_chip *chip;
+ int i = 0;
+ int value, width, remwidth;
+
+ do {
+ chip = gpio_to_chip(gpio + i);
+ WARN_ON(extra_checks && chip->can_sleep);
+
+ if (!chip->set_bus) {
+ while (((gpio + i) < (chip->base + chip->ngpio))
+ && bitwidth) {
+ value = values & (1 << i);
+ chip->set(chip, gpio + i - chip->base, value);
+ i++;
+ bitwidth--;
+ }
+ } else {
+ value = values >> i; /* shift off the used stuff */
+ remwidth = ((chip->base + (int) chip->ngpio) -
+ ((int) gpio + i));
+ width = min(bitwidth, remwidth);
+
+ chip->set_bus(chip, gpio + i - chip->base, value,
+ width);
+ i += width;
+ bitwidth -= width;
+ }
+ } while (bitwidth);
+}
+EXPORT_SYMBOL_GPL(__gpio_set_value_bus);
+
+
+/**
* __gpio_cansleep() - report whether gpio value access will sleep
* @gpio: gpio in question
* Context: any
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 81797ec..9747517 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -44,6 +44,8 @@ struct module;
* returns either the value actually sensed, or zero
* @direction_output: configures signal "offset" as output, or returns error
* @set: assigns output value for signal "offset"
+ * @set_bus: batch assigns output values for consecutive signals starting at
+ * "offset" with width in bits "bitwidth"
* @to_irq: optional hook supporting non-static gpio_to_irq() mappings;
* implementation may not sleep
* @dbg_show: optional routine to show contents in debugfs; default code
@@ -84,6 +86,9 @@ struct gpio_chip {
unsigned offset, int value);
void (*set)(struct gpio_chip *chip,
unsigned offset, int value);
+ void (*set_bus)(struct gpio_chip *chip,
+ unsigned offset, int values,
+ int bitwidth);
int (*to_irq)(struct gpio_chip *chip,
unsigned offset);
@@ -124,6 +129,7 @@ extern void gpio_set_value_cansleep(unsigned gpio, int value);
*/
extern int __gpio_get_value(unsigned gpio);
extern void __gpio_set_value(unsigned gpio, int value);
+extern void __gpio_set_value_bus(unsigned gpio, int values, int bitwidth);
extern int __gpio_cansleep(unsigned gpio);
--
1.5.2.3
On Wed, Nov 26, 2008 at 6:52 AM, Jaya Kumar <[email protected]> wrote:
> Beloved friends,
>
> I would like to request your feedback on the following idea. I hope I have
> made sure to CC all the right people and the right lists! If not, PLEASE
> let me know! I couldn't find a MAINTAINERS entry for gpiolib so I just
> used what I saw in the git log and have also added people and lists that
> I think may be interested.
>
> This is just an RFC. If you all feel it is looking like the right approach
> then I'll clean it up and make it a patch.
>
> Thanks,
> jaya
>
> am300epd was doing 800*600*16*gpio_set_value for each framebuffer transfer
> (it uses 16-pins of gpio as its data bus). I found this caused a wee
> performance limitation. This patch adds an API for gpio_set_value_bus
> which allows users to set batches of consecutive gpio together in a single
> call. I have done a test implementation on gumstix (pxa255) with am300epd
> and it provides a nice improvement in performance.
Using a bit mask will be more generic if the GPIOs are not contiguous.
Yet I still doubt this will be generic enough to be added to gpiolib.
The user of this gpio_set_value_bus() may assume too much about
the internal, e.g. how many GPIOs on the chip and whether these GPIOs
are contiguous or not, and whether this GPIO chip support bitwise
operations.
Let's have a concrete example: what if the user gives a bunch of GPIOs
that crosses the chip boundary, say, GPIO29 - GPIO35 (with each chip
covering 32 GPIOs).
On Tue, Nov 25, 2008 at 8:20 PM, Eric Miao <[email protected]> wrote:
> On Wed, Nov 26, 2008 at 6:52 AM, Jaya Kumar <[email protected]> wrote:
>> Beloved friends,
>>
>> I would like to request your feedback on the following idea. I hope I have
>> made sure to CC all the right people and the right lists! If not, PLEASE
>> let me know! I couldn't find a MAINTAINERS entry for gpiolib so I just
>> used what I saw in the git log and have also added people and lists that
>> I think may be interested.
>>
>> This is just an RFC. If you all feel it is looking like the right approach
>> then I'll clean it up and make it a patch.
>>
>> Thanks,
>> jaya
>>
>> am300epd was doing 800*600*16*gpio_set_value for each framebuffer transfer
>> (it uses 16-pins of gpio as its data bus). I found this caused a wee
>> performance limitation. This patch adds an API for gpio_set_value_bus
>> which allows users to set batches of consecutive gpio together in a single
>> call. I have done a test implementation on gumstix (pxa255) with am300epd
>> and it provides a nice improvement in performance.
>
> Using a bit mask will be more generic if the GPIOs are not contiguous.
I agree that a bitmask would be more generic.
However, the primary use case for this type of functionality that I
could think of was for devices that are treating gpio as a bus. That's
the place where I think this type of functionality would be
beneficial. In the non-contiguous case, where that device is using
separate bits of gpio (ie: via a bitmask), that would be likely for
control rather than for pushing data in bulk.
> Yet I still doubt this will be generic enough to be added to gpiolib.
> The user of this gpio_set_value_bus() may assume too much about
> the internal, e.g. how many GPIOs on the chip and whether these GPIOs
> are contiguous or not, and whether this GPIO chip support bitwise
> operations.
In this patch, I tried to avoid having the user need to know the
details of the underlying gpio support. For example, in this patch,
the user, am300epd.c does:
gpio_set_value_bus(DB0_GPIO_PIN, data, 16);
and the generic code just does a fallback to software if that's not viable:
+ if (!chip->set_bus) {
+ while (((gpio + i) < (chip->base + chip->ngpio))
+ && bitwidth) {
+ value = values & (1 << i);
+ chip->set(chip, gpio + i - chip->base, value);
+ i++;
+ bitwidth--;
+ }
>
> Let's have a concrete example: what if the user gives a bunch of GPIOs
> that crosses the chip boundary, say, GPIO29 - GPIO35 (with each chip
> covering 32 GPIOs).
>
Okay. So paraphrasing, we have GPIO29-GPIO35 which is 7 bits wide. I
assume the crossing is at GPIO32 being 2nd chip, ie: chip->ngpio = 32.
In the above code, that would hit the following case:
+ do {
+ chip = gpio_to_chip(gpio + i);
+ WARN_ON(extra_checks && chip->can_sleep);
+
+ if (!chip->set_bus) {
+ while (((gpio + i) < (chip->base + chip->ngpio))
+ && bitwidth) {
+ value = values & (1 << i);
+ chip->set(chip, gpio + i - chip->base, value);
+ i++;
+ bitwidth--;
+ }
+ } else {
+ value = values >> i; /* shift off the used stuff */
+ remwidth = ((chip->base + (int) chip->ngpio) -
+ ((int) gpio + i));
+ width = min(bitwidth, remwidth);
+
+ chip->set_bus(chip, gpio + i - chip->base, value,
+ width);
+ i += width;
+ bitwidth -= width;
+ }
+ } while (bitwidth);
so on the first GPIO29-31, it would have value = values >> 0,
chip->base = 0 , ngpio = 32, gpio = 29, i = 0 so width = 3, so it
would do set_bus(chip, 29, value, 3)
and then i += 3, bitwidth -= 3 and so now we do chip = gpio_to_chip(29
+ 3 = 32) so we get the next chip and handle the remaing bits. So in
this scenario that works fine.
In fact, in the case of am300epd.c, it is an identical scenario
because DB0 is pin 58 and it is 16 bits wide so we hit 2 chips, ie :
32 and 64.
Thanks,
jaya
On Tuesday 25 November 2008, Eric Miao wrote:
> Using a bit mask will be more generic if the GPIOs are not contiguous.
> Yet I still doubt this will be generic enough to be added to gpiolib.
My expectation for this kind of mechanism was that systems who need
to craft another parallel bus out of GPIO pins would be doing this
with some system-specific utility functions.
So my "is it generic enough" question is more at the level of "Are
there enough Linux systems that need this sort of thing to justify
generic support?". I happen not to have come across the need for
such ganged access from Linux (yet). Whereas I've yet to use non-x86
Linux systems that don't need to manipulate individual GPIO pins...
> The user of this gpio_set_value_bus() may assume too much about
> the internal, e.g. how many GPIOs on the chip and whether these GPIOs
> are contiguous or not, and whether this GPIO chip support bitwise
> operations.
Actually I would expect that to be addressed by the hardware designer.
As in, if you're bitbanging a 16-bit parallel bus (plus several
control signals -- chip select lines, address latch, r/w, etc) the
board would be designed for efficient bitbanging, by taking care
that the software bus ops aren't stupidly complex. So I guess I'm
agreeing with Eric there: wanting this kind of stuff at all seems
to imply being fairly low-down'n'dirty.
Example, assuming a 32 bit GPIO bank, the data lines would probably
be all adjacent and politely ordered by the board designer so that
/* write a 16 bit value on the specfied data lines,
* assuming the intermediate state doesn't matter...
*/
writew(0xffff << N, &bank->clear_bits);
writew(value << N, &bank->set_bits);
instead of needing to compute some complex permutation of those
bits ... and similarly
/* read a 16 bit value from the specified data lines */
value = 0xffff & (readw(&bank->read_bits) >> N);
possibly after handshaking with the device on the other side
about changing signal direction, again without permutation.
But heck, maybe there just aren't that many adjacent GPIOs free,
because of alternate functions that are used... ugh.
Note also that this proposal only includes
> > +???????void????????????????????(*set_bus)(struct gpio_chip *chip,
> > +??????????????????????????????????????????unsigned offset, int values,
> > +??????????????????????????????????????????int bitwidth);
not its sibling read operation.
> Let's have a concrete example: what if the user gives a bunch of GPIOs
> that crosses the chip boundary, say, GPIO29 - GPIO35 (with each chip
> covering 32 GPIOs).
I'd care more about the upper level operation being performed ... like the
control protocol for passing the address of a word being read or written
and then switching the bus from address to data read (or write) mode to
get the word, then yielding the bus access.
- Dave
On Tue, Nov 25, 2008 at 11:15 PM, David Brownell <[email protected]> wrote:
> On Tuesday 25 November 2008, Eric Miao wrote:
>> Using a bit mask will be more generic if the GPIOs are not contiguous.
>> Yet I still doubt this will be generic enough to be added to gpiolib.
>
> My expectation for this kind of mechanism was that systems who need
> to craft another parallel bus out of GPIO pins would be doing this
> with some system-specific utility functions.
>
> So my "is it generic enough" question is more at the level of "Are
> there enough Linux systems that need this sort of thing to justify
> generic support?". I happen not to have come across the need for
> such ganged access from Linux (yet). Whereas I've yet to use non-x86
> Linux systems that don't need to manipulate individual GPIO pins...
I have come across the following scenarios where a bus set of gpio is useful:
- Broadsheet E-Ink controller (uses 16-bit data bus over GPIO)
framebuffer device (this patch is for this)
- Apollo/Hecuba E-Ink controller (uses 8-bit data bus over GPIO)
framebuffer device
- 8-bit parallel IO matrix LCD controllers, such as the Samsung KS108,
also Hitachi, etc
In the area of framebuffers for lower end display devices, I find this
to be quite common.
I have also seen this in systems such as 8-bit A2D devices, also with
various coprocessor solutions where a smaller CPU like a msp430 or
HC05 would clock data to the host using 8-bit gpio data.
>
>
>> The user of this gpio_set_value_bus() may assume too much about
>> the internal, e.g. how many GPIOs on the chip and whether these GPIOs
>> are contiguous or not, and whether this GPIO chip support bitwise
>> operations.
>
> Actually I would expect that to be addressed by the hardware designer.
I agree that the gpio's are always contiguous. It would be very
unusual (I've never seen it yet) where a hardware designer picked
non-consecutive pins to be used for a bus. In the case of AM300, the
board designer picked the xscale's 58 - 73 gpio pins.
>
> As in, if you're bitbanging a 16-bit parallel bus (plus several
> control signals -- chip select lines, address latch, r/w, etc) the
> board would be designed for efficient bitbanging, by taking care
> that the software bus ops aren't stupidly complex. So I guess I'm
> agreeing with Eric there: wanting this kind of stuff at all seems
> to imply being fairly low-down'n'dirty.
Yes, agreed, handling the contiguous bus case turned out to be quite
straightforward and the core is just about 30 lines of code.
+ do {
+ chip = gpio_to_chip(gpio + i);
+ WARN_ON(extra_checks && chip->can_sleep);
+
+ if (!chip->set_bus) {
+ while (((gpio + i) < (chip->base + chip->ngpio))
+ && bitwidth) {
+ value = values & (1 << i);
+ chip->set(chip, gpio + i - chip->base, value);
+ i++;
+ bitwidth--;
+ }
+ } else {
+ value = values >> i; /* shift off the used stuff */
+ remwidth = ((chip->base + (int) chip->ngpio) -
+ ((int) gpio + i));
+ width = min(bitwidth, remwidth);
+
+ chip->set_bus(chip, gpio + i - chip->base, value,
+ width);
+ i += width;
+ bitwidth -= width;
+ }
+ } while (bitwidth);
>
>
> Example, assuming a 32 bit GPIO bank, the data lines would probably
> be all adjacent and politely ordered by the board designer so that
A typical board designer will ensure that the selected pins are
consecutive. But I think given today's rapid development time, I'd be
hard pressed to ensure that they're also register consecutive. In the
case of AM300, the designer picked a pin sequence that spans 2 32-bit
registers since it starts at 58 and ends at 73. So it spans the 32-63
and 64-95 registers. The code handles that case fine.
>
> /* write a 16 bit value on the specfied data lines,
> * assuming the intermediate state doesn't matter...
> */
> writew(0xffff << N, &bank->clear_bits);
> writew(value << N, &bank->set_bits);
>
> instead of needing to compute some complex permutation of those
> bits ... and similarly
>
> /* read a 16 bit value from the specified data lines */
> value = 0xffff & (readw(&bank->read_bits) >> N);
>
> possibly after handshaking with the device on the other side
> about changing signal direction, again without permutation.
>
> But heck, maybe there just aren't that many adjacent GPIOs free,
> because of alternate functions that are used... ugh.
>
>
> Note also that this proposal only includes
>
>> > + void (*set_bus)(struct gpio_chip *chip,
>> > + unsigned offset, int values,
>> > + int bitwidth);
>
> not its sibling read operation.
>
Yes, I figured I'd start with the most basic approach. Also, the
costliest operation in am300epd is the the actual framebuffer transfer
to the device which is just a lot of writes. If people want me to do
it, I can also implement get_bus.
>
>> Let's have a concrete example: what if the user gives a bunch of GPIOs
>> that crosses the chip boundary, say, GPIO29 - GPIO35 (with each chip
>> covering 32 GPIOs).
>
> I'd care more about the upper level operation being performed ... like the
> control protocol for passing the address of a word being read or written
> and then switching the bus from address to data read (or write) mode to
> get the word, then yielding the bus access.
The upper level protocol in this case is from broadsheetfb (also
posted). Here's the relevant code:
+static void broadsheet_issue_data(struct broadsheetfb_par *par, u16 data)
+{
+ par->board->set_ctl(par, BS_WR, 0);
+ par->board->set_hdb(par, data);
+ par->board->set_ctl(par, BS_WR, 1);
+}
...
which is called to transfer the fb via:
+static void broadsheet_burst_write(struct broadsheetfb_par *par, int size,
+ u16 *data)
+{
...
+ for (i = 0; i < size; i++) {
...
+ par->board->set_hdb(par, tmp);
Thanks,
jaya
Jaya Kumar wrote:
> Beloved friends,
>
> I would like to request your feedback on the following idea. I hope I have
> made sure to CC all the right people and the right lists! If not, PLEASE
> let me know! I couldn't find a MAINTAINERS entry for gpiolib so I just
> used what I saw in the git log and have also added people and lists that
> I think may be interested.
>
> This is just an RFC. If you all feel it is looking like the right approach
> then I'll clean it up and make it a patch.
>
> Thanks,
> jaya
>
> am300epd was doing 800*600*16*gpio_set_value for each framebuffer transfer
> (it uses 16-pins of gpio as its data bus). I found this caused a wee
> performance limitation. This patch adds an API for gpio_set_value_bus
> which allows users to set batches of consecutive gpio together in a single
> call. I have done a test implementation on gumstix (pxa255) with am300epd
> and it provides a nice improvement in performance.
>
> Signed-off-by: Jaya Kumar <[email protected]>
> Cc: David Brownell <[email protected]>
> Cc: David Brownell <[email protected]>
> Cc: Sam Ravnborg <[email protected]>
> Cc: Jean Delvare <[email protected]>
> Cc: Eric Miao <[email protected]>
> Cc: Haavard Skinnemoen <[email protected]>
> Cc: Philipp Zabel <[email protected]>
> Cc: Russell King <[email protected]>
> Cc: Ben Gardner <[email protected]>
> CC: Greg KH <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
>
> ---
> arch/arm/mach-pxa/am300epd.c | 9 ++++++
> arch/arm/mach-pxa/gpio.c | 28 +++++++++++++++++++++
> arch/arm/mach-pxa/include/mach/gpio.h | 24 ++++++++++++++++++
> drivers/gpio/gpiolib.c | 44 +++++++++++++++++++++++++++++++++
> include/asm-generic/gpio.h | 6 ++++
> 5 files changed, 111 insertions(+), 0 deletions(-)
>
[...]
> diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
> index 81797ec..9747517 100644
> --- a/include/asm-generic/gpio.h
> +++ b/include/asm-generic/gpio.h
> @@ -44,6 +44,8 @@ struct module;
> * returns either the value actually sensed, or zero
> * @direction_output: configures signal "offset" as output, or returns error
> * @set: assigns output value for signal "offset"
> + * @set_bus: batch assigns output values for consecutive signals starting at
> + * "offset" with width in bits "bitwidth"
> * @to_irq: optional hook supporting non-static gpio_to_irq() mappings;
> * implementation may not sleep
> * @dbg_show: optional routine to show contents in debugfs; default code
> @@ -84,6 +86,9 @@ struct gpio_chip {
> unsigned offset, int value);
> void (*set)(struct gpio_chip *chip,
> unsigned offset, int value);
> + void (*set_bus)(struct gpio_chip *chip,
> + unsigned offset, int values,
I think values should be unsigned
> + int bitwidth);
>
> int (*to_irq)(struct gpio_chip *chip,
> unsigned offset);
> @@ -124,6 +129,7 @@ extern void gpio_set_value_cansleep(unsigned gpio, int value);
> */
> extern int __gpio_get_value(unsigned gpio);
> extern void __gpio_set_value(unsigned gpio, int value);
> +extern void __gpio_set_value_bus(unsigned gpio, int values, int bitwidth);
>
> extern int __gpio_cansleep(unsigned gpio);
>
On Wed, Nov 26, 2008 at 4:09 AM, Paulius Zaleckas
<[email protected]> wrote:
> Jaya Kumar wrote:
>> * @to_irq: optional hook supporting non-static gpio_to_irq() mappings;
>> * implementation may not sleep
>> * @dbg_show: optional routine to show contents in debugfs; default code
>> @@ -84,6 +86,9 @@ struct gpio_chip {
>> unsigned offset, int value);
>> void (*set)(struct gpio_chip *chip,
>> unsigned offset, int value);
>> + void (*set_bus)(struct gpio_chip *chip,
>> + unsigned offset, int values,
>
> I think values should be unsigned
>
Okay, can do but it is unusual no? since set uses int value, i figured
set_bus should be similar right?
Thanks,
jaya
On Wed, 26 Nov 2008, Jaya Kumar wrote:
> On Wed, Nov 26, 2008 at 4:09 AM, Paulius Zaleckas
> <[email protected]> wrote:
> > Jaya Kumar wrote:
> >> * @to_irq: optional hook supporting non-static gpio_to_irq() mappings;
> >> * implementation may not sleep
> >> * @dbg_show: optional routine to show contents in debugfs; default code
> >> @@ -84,6 +86,9 @@ struct gpio_chip {
> >> unsigned offset, int value);
> >> void (*set)(struct gpio_chip *chip,
> >> unsigned offset, int value);
> >> + void (*set_bus)(struct gpio_chip *chip,
> >> + unsigned offset, int values,
> >
> > I think values should be unsigned
> >
>
> Okay, can do but it is unusual no? since set uses int value, i figured
> set_bus should be similar right?
->set() sets one pin, right? So it's either 0 or 1.
->set_bus() sets multiple pins. With `int', it will fail for bit 31, as that's
the sign bit.
Perhaps you even want u32, to make it clear what's the maximum number of pins
you can set in one shot?
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
On Wed, Nov 26, 2008 at 5:08 AM, Geert Uytterhoeven
<[email protected]> wrote:
> On Wed, 26 Nov 2008, Jaya Kumar wrote:
>> On Wed, Nov 26, 2008 at 4:09 AM, Paulius Zaleckas
>> <[email protected]> wrote:
>> > Jaya Kumar wrote:
>> >> * @to_irq: optional hook supporting non-static gpio_to_irq() mappings;
>> >> * implementation may not sleep
>> >> * @dbg_show: optional routine to show contents in debugfs; default code
>> >> @@ -84,6 +86,9 @@ struct gpio_chip {
>> >> unsigned offset, int value);
>> >> void (*set)(struct gpio_chip *chip,
>> >> unsigned offset, int value);
>> >> + void (*set_bus)(struct gpio_chip *chip,
>> >> + unsigned offset, int values,
>> >
>> > I think values should be unsigned
>> >
>>
>> Okay, can do but it is unusual no? since set uses int value, i figured
>> set_bus should be similar right?
>
> ->set() sets one pin, right? So it's either 0 or 1.
>
> ->set_bus() sets multiple pins. With `int', it will fail for bit 31, as that's
> the sign bit.
I'm not sure I understood this. I imagine the usage will be like
int values = 0x8000_0000;
set_bus(..., start_pin, values);
write_32(register, values); // will still work to set bit 31
(values >> 31) == 1;
>
> Perhaps you even want u32, to make it clear what's the maximum number of pins
> you can set in one shot?
Yes, I agree, I will change to use u32 as it is then explicit.
Thanks,
jaya
On Wed, 26 Nov 2008, Jaya Kumar wrote:
> On Wed, Nov 26, 2008 at 5:08 AM, Geert Uytterhoeven
> <[email protected]> wrote:
> > On Wed, 26 Nov 2008, Jaya Kumar wrote:
> >> On Wed, Nov 26, 2008 at 4:09 AM, Paulius Zaleckas
> >> <[email protected]> wrote:
> >> > Jaya Kumar wrote:
> >> >> * @to_irq: optional hook supporting non-static gpio_to_irq() mappings;
> >> >> * implementation may not sleep
> >> >> * @dbg_show: optional routine to show contents in debugfs; default code
> >> >> @@ -84,6 +86,9 @@ struct gpio_chip {
> >> >> unsigned offset, int value);
> >> >> void (*set)(struct gpio_chip *chip,
> >> >> unsigned offset, int value);
> >> >> + void (*set_bus)(struct gpio_chip *chip,
> >> >> + unsigned offset, int values,
> >> >
> >> > I think values should be unsigned
> >>
> >> Okay, can do but it is unusual no? since set uses int value, i figured
> >> set_bus should be similar right?
> >
> > ->set() sets one pin, right? So it's either 0 or 1.
> >
> > ->set_bus() sets multiple pins. With `int', it will fail for bit 31, as that's
> > the sign bit.
>
> I'm not sure I understood this. I imagine the usage will be like
>
> int values = 0x8000_0000;
> set_bus(..., start_pin, values);
>
> write_32(register, values); // will still work to set bit 31
Tehcnically, it will work. But it's more confusing if it's int.
> (values >> 31) == 1;
However, be careful with shifts, as right shifts on signed values will
duplicate te sign bit.
> > Perhaps you even want u32, to make it clear what's the maximum number of pins
> > you can set in one shot?
>
> Yes, I agree, I will change to use u32 as it is then explicit.
OK. Thx!
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
On Wed, Nov 26, 2008 at 12:51:27AM -0500, Jaya Kumar wrote:
> On Tue, Nov 25, 2008 at 11:15 PM, David Brownell <[email protected]> wrote:
> > On Tuesday 25 November 2008, Eric Miao wrote:
> >> Using a bit mask will be more generic if the GPIOs are not contiguous.
> >> Yet I still doubt this will be generic enough to be added to gpiolib.
> >
> > My expectation for this kind of mechanism was that systems who need
> > to craft another parallel bus out of GPIO pins would be doing this
> > with some system-specific utility functions.
> >
> > So my "is it generic enough" question is more at the level of "Are
> > there enough Linux systems that need this sort of thing to justify
> > generic support?". I happen not to have come across the need for
> > such ganged access from Linux (yet). Whereas I've yet to use non-x86
> > Linux systems that don't need to manipulate individual GPIO pins...
>
> I have come across the following scenarios where a bus set of gpio is useful:
> - Broadsheet E-Ink controller (uses 16-bit data bus over GPIO)
> framebuffer device (this patch is for this)
> - Apollo/Hecuba E-Ink controller (uses 8-bit data bus over GPIO)
> framebuffer device
> - 8-bit parallel IO matrix LCD controllers, such as the Samsung KS108,
> also Hitachi, etc
We have such a system at work. And we need fast acces to the gpio pins
when updating the LCD.
I have not written/looked to deep at the code I just recall it was
a bit messy and not something I would be proud of submitting to any ML.
Sam
On Fri, Nov 28, 2008 at 4:01 AM, Sam Ravnborg <[email protected]> wrote:
> On Wed, Nov 26, 2008 at 12:51:27AM -0500, Jaya Kumar wrote:
>> On Tue, Nov 25, 2008 at 11:15 PM, David Brownell <[email protected]> wrote:
>> > On Tuesday 25 November 2008, Eric Miao wrote:
>> >> Using a bit mask will be more generic if the GPIOs are not contiguous.
>> >> Yet I still doubt this will be generic enough to be added to gpiolib.
>> >
>> > My expectation for this kind of mechanism was that systems who need
>> > to craft another parallel bus out of GPIO pins would be doing this
>> > with some system-specific utility functions.
>> >
>> > So my "is it generic enough" question is more at the level of "Are
>> > there enough Linux systems that need this sort of thing to justify
>> > generic support?". I happen not to have come across the need for
>> > such ganged access from Linux (yet). Whereas I've yet to use non-x86
>> > Linux systems that don't need to manipulate individual GPIO pins...
>>
>> I have come across the following scenarios where a bus set of gpio is useful:
>> - Broadsheet E-Ink controller (uses 16-bit data bus over GPIO)
>> framebuffer device (this patch is for this)
>> - Apollo/Hecuba E-Ink controller (uses 8-bit data bus over GPIO)
>> framebuffer device
>> - 8-bit parallel IO matrix LCD controllers, such as the Samsung KS108,
>> also Hitachi, etc
>
> We have such a system at work. And we need fast acces to the gpio pins
> when updating the LCD.
> I have not written/looked to deep at the code I just recall it was
> a bit messy and not something I would be proud of submitting to any ML.
>
> Sam
>
Okay. Please help me understand in case I misunderstood. Are you
saying the code that I posted is too messy? To me, actually I am proud
of it. :-) But if some parts look messy, I'm happy to work on
improving it. I will need some advice though and please advise me on
which parts look messy.
Thanks,
jaya
On Fri, Nov 28, 2008 at 07:43:31AM +0800, Jaya Kumar wrote:
> On Fri, Nov 28, 2008 at 4:01 AM, Sam Ravnborg <[email protected]> wrote:
> > On Wed, Nov 26, 2008 at 12:51:27AM -0500, Jaya Kumar wrote:
> >> On Tue, Nov 25, 2008 at 11:15 PM, David Brownell <[email protected]> wrote:
> >> > On Tuesday 25 November 2008, Eric Miao wrote:
> >> >> Using a bit mask will be more generic if the GPIOs are not contiguous.
> >> >> Yet I still doubt this will be generic enough to be added to gpiolib.
> >> >
> >> > My expectation for this kind of mechanism was that systems who need
> >> > to craft another parallel bus out of GPIO pins would be doing this
> >> > with some system-specific utility functions.
> >> >
> >> > So my "is it generic enough" question is more at the level of "Are
> >> > there enough Linux systems that need this sort of thing to justify
> >> > generic support?". I happen not to have come across the need for
> >> > such ganged access from Linux (yet). Whereas I've yet to use non-x86
> >> > Linux systems that don't need to manipulate individual GPIO pins...
> >>
> >> I have come across the following scenarios where a bus set of gpio is useful:
> >> - Broadsheet E-Ink controller (uses 16-bit data bus over GPIO)
> >> framebuffer device (this patch is for this)
> >> - Apollo/Hecuba E-Ink controller (uses 8-bit data bus over GPIO)
> >> framebuffer device
> >> - 8-bit parallel IO matrix LCD controllers, such as the Samsung KS108,
> >> also Hitachi, etc
> >
> > We have such a system at work. And we need fast acces to the gpio pins
> > when updating the LCD.
> > I have not written/looked to deep at the code I just recall it was
> > a bit messy and not something I would be proud of submitting to any ML.
> >
> > Sam
> >
>
> Okay. Please help me understand in case I misunderstood. Are you
> saying the code that I posted is too messy? To me, actually I am proud
> of it. :-) But if some parts look messy, I'm happy to work on
> improving it. I will need some advice though and please advise me on
> which parts look messy.
Nope - the code we use at work is too messy. What you posted looks
much better.
Sam
On Wednesday 26 November 2008, Jaya Kumar wrote:
> On Wed, Nov 26, 2008 at 4:09 AM, Paulius Zaleckas wrote:
> > Jaya Kumar wrote:
> >> void (*set)(struct gpio_chip *chip,
> >> unsigned offset, int value);
> >> + void (*set_bus)(struct gpio_chip *chip,
> >> + unsigned offset, int values,
> >
> > I think values should be unsigned
>
> Okay, can do but it is unusual no?
For bitmasks, anything except "unsigned long" is unusual.
In fact, <linux/bitmask.h> uses them in arrays...
If this goes through, I suspect it would be fair to expect
a gpio_chip to work with only one word at at time. SOC based
GPIO controllers stick to their natural word sizes (32 or 16
bits in most cases), in arrays, and I've yet to come across
external controllers that are any different. So passing in
arrays of "unsigned long" would seem to be overkill.
However, "set_bus" is seems misleading to me: "set" because
it makes me think it's writing to the set_bits register,
which won't clear things; "bus" because this is doing ganged
operations, which aren't only for busses ... and in fact a
bus operation probably needs multiple ganged operations,
e.g. first write the address, handshake, then write data and
handshake again.
Maybe words like "assign" and "bitmask" would get past those
particular issues... though I don't hugely like "assign".
In terms of low level primitives, I've already commented
that the "read" side is missing. An additional issue came
to mind: the policy of using contiguous bits should not
be mandated here. It doesn't need to be, either ... just
pass a mask of valid bits, along with a mask of values.
> since set uses int value, i figured set_bus should be similar right?
It doesn't take a bitmask; that's a single zero/nonzero value,
for which the sign (bit) is irrelevant. It's signed mostly to
distinguish the two parameters by type, so it's less easy to
confuse them; sometimes the compiler will point out goofage.
(Plus, the offsets can ever be negative.)
- Dave
On Thursday 27 November 2008, Sam Ravnborg wrote:
> > > So my "is it generic enough" question is more at the level of "Are
> > > there enough Linux systems that need this sort of thing to justify
> > > generic support?". ?I happen not to have come across the need for
> > > such ganged access from Linux (yet). ?Whereas I've yet to use non-x86
> > > Linux systems that don't need to manipulate individual GPIO pins...
> >
> > I have come across the following scenarios where a bus set of gpio is useful:
> > - Broadsheet E-Ink controller (uses 16-bit data bus over GPIO)
> > framebuffer device (this patch is for this)
> > - Apollo/Hecuba E-Ink controller (uses 8-bit data bus over GPIO)
> > framebuffer device
> > - 8-bit parallel IO matrix LCD controllers, such as the Samsung KS108,
> > also Hitachi, etc
All of those can also be handled with traditional parallel data
busses too, of course. Are you saying you've seen these used
with Linux systems? Enough to justify generic support?
I've certainly seen how some of those LCD controllers, graphical
or character based, work with microcontrollers. They're very
widely available too ... so I can imagine various uClinux systems
have one hooked up. Most of the Linux system hookups I've happened
across use USB or serial links (e.g. the crystalfontz.com stuff)
since modern PCs are severely GPIO-deprived.
Another example where ganged access might help is keypad matrices.
> We have such a system at work. And we need fast acces to the gpio pins
> when updating the LCD.
> I have not written/looked to deep at the code I just recall it was
> a bit messy and not something I would be proud of submitting to any ML.
Often times such messiness is more because the code never got
cleaned up, because it's kind of a one-off to support particular
boards.
You could say that the gpiolib implementation framework got its
start, in part, as a way to unclutter some kernel trees that
had way too much one-off stuff like that. ;)
- Dave
On Tuesday 25 November 2008, Jaya Kumar wrote:
> In the area of framebuffers for lower end display devices, I find this
> to be quite common.
I'd have said "display modules" myself, although I don't
currently have any Linux systems using them. Some of
those modules use SPI, since it tends support DMA for
downloading the image data ... although admittedly those
are not the very lowest end devices.
> I have also seen this in systems such as 8-bit A2D devices, also with
> various coprocessor solutions where a smaller CPU like a msp430 or
> HC05 would clock data to the host using 8-bit gpio data.
When I hear "coprocessor" I think more like Video DSPs. ;)
And for microcontrollers (msp430, avr8, etc) hooking up,
I've usually seen them use a serial bus (I2C, SPI, etc).
But it's also true that when data transfer rates matter,
it's faster to talk to microcontrollers using a parallel
bus than such a serial bus. The Linux SOC it's talking
to can probably clock SPI an order of magnitude faster
than the microcontroller tolerates. And the reason you
want a dedicated microcontroller is for predictable
(a.k.a. "realtime") monitoring tasks, which they can't
do if they're spending all their time talking to Linux.
Sounds like you're working with a bunch of semicustom
monitoring/control designs?
If you want to pursue this, I'd like to get the gpio_chip
proposal in place first: bitmask read and write, without
forcing an "all bits are contiguous" policy. Lightweight.
Maybe it should suffice to have a gpio_chip support the
bitmask ops, instead of just bit-at-a-time ones... so it'd
be practical to incorporate this by itself, given patches
to convert a couple gpio_chip implementations.
Then separately two more things:
- A gpio_* interface proposal for higher-level bitmask calls.
This is worth some discussion; the calls should clearly
be optional (not everything will implement them), and I
can't help but suspect <linux/bitmap.h> interfaces should
interoperate smoothly.
- A gpiolib based implementation, using those new gpio_chip
methods as accelerators if they're present. This should
probably also be optional, even at the Kconfig level; many
systems won't need to spend memory on these calls.
Right now you seem to have smooshed together those three
things, which probably helped get your sample driver going
faster but isn't a very good way to move forward. Don't
assume gpiolib when defining the interface.
- Dave
On Sun, Nov 30, 2008 at 6:47 AM, David Brownell <[email protected]> wrote:
>
> In terms of low level primitives, I've already commented
> that the "read" side is missing. An additional issue came
> to mind: the policy of using contiguous bits should not
> be mandated here. It doesn't need to be, either ... just
> pass a mask of valid bits, along with a mask of values.
>
Agreed. Will implement read side. I think I agree with Eric and you on
the bitmask now. It should not add much complexity, I think. I'll work
on adding it. Given that _bus() isn't appropriate anymore, how does
"gpio_set_values" and .sets sound and correspondingly gpio_get_values
and .gets?
Thanks,
jaya
Jean emailed me saying he didn't want to be CCed.
On Sun, Nov 30, 2008 at 6:48 AM, David Brownell <[email protected]> wrote:
> On Thursday 27 November 2008, Sam Ravnborg wrote:
>> > > So my "is it generic enough" question is more at the level of "Are
>> > > there enough Linux systems that need this sort of thing to justify
>> > > generic support?". I happen not to have come across the need for
>> > > such ganged access from Linux (yet). Whereas I've yet to use non-x86
>> > > Linux systems that don't need to manipulate individual GPIO pins...
>> >
>> > I have come across the following scenarios where a bus set of gpio is useful:
>> > - Broadsheet E-Ink controller (uses 16-bit data bus over GPIO)
>> > framebuffer device (this patch is for this)
>> > - Apollo/Hecuba E-Ink controller (uses 8-bit data bus over GPIO)
>> > framebuffer device
>> > - 8-bit parallel IO matrix LCD controllers, such as the Samsung KS108,
>> > also Hitachi, etc
>
> All of those can also be handled with traditional parallel data
> busses too, of course. Are you saying you've seen these used
> with Linux systems? Enough to justify generic support?
Assuming I've understood you correctly about generic support, yes. I
used the 3 above with Linux arm systems that were interfaced to them
via gpio. The matrix displays had just 128x64 so the 8x gpio_set_value
penalty wasn't a biggie. The E-Ink displays range from 800x600 to
1200x826 so the 16x gpio_set_value became a bottleneck. In terms of
generic support, the Broadsheet display controller is also used on
i.MX31 and SC2410/2440 boards so those platform drivers (assuming
those good folks submit them, (i've only written and submitted
am300epd.c)) should also benefit.
Thanks,
jaya
On Sun, Nov 30, 2008 at 6:54 AM, David Brownell <[email protected]> wrote:
> If you want to pursue this, I'd like to get the gpio_chip
> proposal in place first: bitmask read and write, without
> forcing an "all bits are contiguous" policy. Lightweight.
Yup, yup, I definitely want to pursue it. I want Linux e-paper
displays to be zippy. :-) Agreed, I will do bitmask read and write.
Hey, wait a sec, bitmask write definitely. but bitmask read is
peculiar to me. I can understand the caller would want to do something
like foo = gpio_get_values(gpio, bitwidth). But would they really want
to do foo = gpio_get_values(gpio, bitwidth, bitmask) rather than deal
with it appropriately themselves?
>
> Maybe it should suffice to have a gpio_chip support the
> bitmask ops, instead of just bit-at-a-time ones... so it'd
> be practical to incorporate this by itself, given patches
> to convert a couple gpio_chip implementations.
Ok, you've scared me there. I only have a Gumstix board so I can do it
for the pxa255 but will need help if more platforms are needed.
Exploiting this opportunity for hardware whoring, if anyone wants to
send me free hardware, I'll be ecstatic and will eagerly do support
duty on said platforms. :-)
>
> Then separately two more things:
>
> - A gpio_* interface proposal for higher-level bitmask calls.
> This is worth some discussion; the calls should clearly
> be optional (not everything will implement them), and I
> can't help but suspect <linux/bitmap.h> interfaces should
> interoperate smoothly.
>
> - A gpiolib based implementation, using those new gpio_chip
> methods as accelerators if they're present. This should
> probably also be optional, even at the Kconfig level; many
> systems won't need to spend memory on these calls.
Understood. I will make it optional. Does
CONFIG_GPIOLIB_MULTIBIT_ACCESS sound okay?
>
> Right now you seem to have smooshed together those three
> things, which probably helped get your sample driver going
> faster but isn't a very good way to move forward. Don't
Yes, my goal with this was to get started and get feedback early. :-)
> assume gpiolib when defining the interface.
Ok, I didn't understand this part. I think you mentioned a higher
level interface before but I didn't fully understand, if not gpiolib,
then at what level do you recommend?
Thanks,
jaya
On Saturday 29 November 2008, Jaya Kumar wrote:
> Given that _bus() isn't appropriate anymore, how does
> "gpio_set_values" and .sets sound and correspondingly gpio_get_values
> and .gets?
Confusingly similar to the single value version: a single letter.
Far better to use "bitmask" or something similar ...
... also, better to split out discussions of gpio_chip methods
(NOT used by all implementors of the GPIO interfaces) from the
discussion of those interfaces (implemented uniformly on all
platforms that support any of the calls).
On Saturday 29 November 2008, Jaya Kumar wrote:
> On Sun, Nov 30, 2008 at 6:54 AM, David Brownell <[email protected]> wrote:
> > If you want to pursue this, I'd like to get the gpio_chip
> > proposal in place first: bitmask read and write, without
> > forcing an "all bits are contiguous" policy. Lightweight.
>
> Yup, yup, I definitely want to pursue it. I want Linux e-paper
> displays to be zippy. :-) Agreed, I will do bitmask read and write.
> Hey, wait a sec, bitmask write definitely. but bitmask read is
> peculiar to me. I can understand the caller would want to do something
> like foo = gpio_get_values(gpio, bitwidth). But would they really want
> to do foo = gpio_get_values(gpio, bitwidth, bitmask) rather than deal
> with it appropriately themselves?
They wouldn't want names so easily confused with the current set
of GPIO calls, no.
But if they're using GPIOs as a low-bandwidth parallel bus they'd
most *certainly* need to be able to read, not just write. That's
what the "I" part of "GPIO" represents: "I"nput (vs "O"utput).
> > Maybe it should suffice to have a gpio_chip support the
> > bitmask ops, instead of just bit-at-a-time ones... so it'd
> > be practical to incorporate this by itself, given patches
> > to convert a couple gpio_chip implementations.
>
> Ok, you've scared me there. I only have a Gumstix board so I can do it
> for the pxa255 but will need help if more platforms are needed.
> Exploiting this opportunity for hardware whoring, if anyone wants to
> send me free hardware, I'll be ecstatic and will eagerly do support
> duty on said platforms. :-)
Doesn't necessarily need to be *you* doing that, but if it only
works on older gumstix boards it'd not be general enough. Which
is part of why I say to get the lowest level proposal out there
first. If done right, it'll be easy to support on other chips.
> >
> > Then separately two more things:
> >
> > - A gpio_* interface proposal for higher-level bitmask calls.
> > This is worth some discussion; the calls should clearly
> > be optional (not everything will implement them), and I
> > can't help but suspect <linux/bitmap.h> interfaces should
> > interoperate smoothly.
... including probably ganged request/free, and direction updates.
When bitbanging a multiplexed parallel databus, you'll often need
to change directions.
> >
> > - A gpiolib based implementation, using those new gpio_chip
> > methods as accelerators if they're present. This should
> > probably also be optional, even at the Kconfig level; many
> > systems won't need to spend memory on these calls.
>
> Understood. I will make it optional. Does
> CONFIG_GPIOLIB_MULTIBIT_ACCESS sound okay?
Kind of verbose for my taste, and it's already "multibit" (one
at a time, but still multiple) ... let's see some more mergeable
proposals and code first. Different C file too, I suspect.
> > Don't assume gpiolib when defining the interface.
>
> Ok, I didn't understand this part. I think you mentioned a higher
> level interface before but I didn't fully understand, if not gpiolib,
> then at what level do you recommend?
The gpio_*() interfaces are how system glue code and drivers
access GPIOs, unless they roll their own chip-specific calls.
Whereas gpiolib (and gpio_chip) are an *optional* framework
for implementing those calls. Platforms can (and do!) use
other frameworks ... maybe they don't want to pay its costs,
and don't need the various GPIO expander drivers.
So to repeat: don't assume the interface is implemented in
one particular way (using gpiolib). It has to make sense
with other implementation strategies too. Standard split
between interface and implementation, that's all. (Some folk
have been heard to talk about "gpiolib" as the interface to
drivers ... it's not, it's a provider-only interface library.)
- Dave
On Mon, Dec 1, 2008 at 1:55 AM, David Brownell <[email protected]> wrote:
>
> They wouldn't want names so easily confused with the current set
> of GPIO calls, no.
>
Okay, how does gpio_set/get_values_batch() sound?
>
>> >
>> > Then separately two more things:
>> >
>> > - A gpio_* interface proposal for higher-level bitmask calls.
>> > This is worth some discussion; the calls should clearly
>> > be optional (not everything will implement them), and I
>> > can't help but suspect <linux/bitmap.h> interfaces should
>> > interoperate smoothly.
>
> ... including probably ganged request/free, and direction updates.
> When bitbanging a multiplexed parallel databus, you'll often need
> to change directions.
Okay, so I'll also add gpio_direction_output/input_batch and request/free_batch.
>
>
>> >
>> > - A gpiolib based implementation, using those new gpio_chip
>> > methods as accelerators if they're present. This should
>> > probably also be optional, even at the Kconfig level; many
>> > systems won't need to spend memory on these calls.
>>
>> Understood. I will make it optional. Does
>> CONFIG_GPIOLIB_MULTIBIT_ACCESS sound okay?
>
> Kind of verbose for my taste, and it's already "multibit" (one
> at a time, but still multiple) ... let's see some more mergeable
> proposals and code first. Different C file too, I suspect.
Ok, CONFIG_GPIOLIB_BATCH and I'll add this code in
drivers/gpio/gpiolib_batch.c. I hope I have understood that suggestion
correctly.
>
>
>> > Don't assume gpiolib when defining the interface.
>>
>> Ok, I didn't understand this part. I think you mentioned a higher
>> level interface before but I didn't fully understand, if not gpiolib,
>> then at what level do you recommend?
>
> The gpio_*() interfaces are how system glue code and drivers
> access GPIOs, unless they roll their own chip-specific calls.
>
> Whereas gpiolib (and gpio_chip) are an *optional* framework
> for implementing those calls. Platforms can (and do!) use
> other frameworks ... maybe they don't want to pay its costs,
> and don't need the various GPIO expander drivers.
>
> So to repeat: don't assume the interface is implemented in
> one particular way (using gpiolib). It has to make sense
> with other implementation strategies too. Standard split
> between interface and implementation, that's all. (Some folk
> have been heard to talk about "gpiolib" as the interface to
> drivers ... it's not, it's a provider-only interface library.)
>
Okay, I read above several times and I read Documentation/gpio.txt.
But... I'm not confident I've understood your meanings above in terms
of how it should change the code. What I know so far is:
a)
arch/arm/mach-pxa/include/mach/gpio.h is where the pxa GPIO api
interface is defined. So that's where I will put the
gpio_get/set_values_batch functions. This will match the existing
gpio_set/get_value code there.
b)
arch/arm/mach-pxa/gpio.c is where the implementation is.
So I'll put the pxa_gpio_direction_input/output_batch and
pxa_gpio_set/get_batch there.
c)
drivers/gpio/gpiolib_batch.c
This is where I'd put the generic platform independent implementations
of __gpio_set/get_values_batch
Does this sound consistent with your recommendation? If not, I need
more help to understand what changes you are recommending.
> Doesn't necessarily need to be *you* doing that, but if it only
> works on older gumstix boards it'd not be general enough. Which
> is part of why I say to get the lowest level proposal out there
> first. If done right, it'll be easy to support on other chips.
Yes, I completely agree that it must work on a wide range of
architectures. But I don't understand what you mean when you say get
the lowest level proposal out there first. I see the RFC code that I
posted as being the lowest level proposal (albeit needing better name
and bitmask support) and one that is sufficiently general that it can
be implemented on other architectures. If it can't, can you help me
understand by pointing to which portion would break on other archs?
Thanks,
jaya
On Mon, Dec 1, 2008 at 9:10 AM, Jaya Kumar <[email protected]> wrote:
> On Mon, Dec 1, 2008 at 1:55 AM, David Brownell <[email protected]> wrote:
>>
>> They wouldn't want names so easily confused with the current set
>> of GPIO calls, no.
>>
>
> Okay, how does gpio_set/get_values_batch() sound?
>
>>
>>> >
>>> > Then separately two more things:
>>> >
>>> > - A gpio_* interface proposal for higher-level bitmask calls.
>>> > This is worth some discussion; the calls should clearly
>>> > be optional (not everything will implement them), and I
>>> > can't help but suspect <linux/bitmap.h> interfaces should
>>> > interoperate smoothly.
>>
>> ... including probably ganged request/free, and direction updates.
>> When bitbanging a multiplexed parallel databus, you'll often need
>> to change directions.
>
> Okay, so I'll also add gpio_direction_output/input_batch and request/free_batch.
>
>>
>>
>>> >
>>> > - A gpiolib based implementation, using those new gpio_chip
>>> > methods as accelerators if they're present. This should
>>> > probably also be optional, even at the Kconfig level; many
>>> > systems won't need to spend memory on these calls.
>>>
>>> Understood. I will make it optional. Does
>>> CONFIG_GPIOLIB_MULTIBIT_ACCESS sound okay?
>>
>> Kind of verbose for my taste, and it's already "multibit" (one
>> at a time, but still multiple) ... let's see some more mergeable
>> proposals and code first. Different C file too, I suspect.
>
> Ok, CONFIG_GPIOLIB_BATCH and I'll add this code in
> drivers/gpio/gpiolib_batch.c. I hope I have understood that suggestion
> correctly.
>
>>
>>
>>> > Don't assume gpiolib when defining the interface.
>>>
>>> Ok, I didn't understand this part. I think you mentioned a higher
>>> level interface before but I didn't fully understand, if not gpiolib,
>>> then at what level do you recommend?
>>
>> The gpio_*() interfaces are how system glue code and drivers
>> access GPIOs, unless they roll their own chip-specific calls.
>>
>> Whereas gpiolib (and gpio_chip) are an *optional* framework
>> for implementing those calls. Platforms can (and do!) use
>> other frameworks ... maybe they don't want to pay its costs,
>> and don't need the various GPIO expander drivers.
>>
>> So to repeat: don't assume the interface is implemented in
>> one particular way (using gpiolib). It has to make sense
>> with other implementation strategies too. Standard split
>> between interface and implementation, that's all. (Some folk
>> have been heard to talk about "gpiolib" as the interface to
>> drivers ... it's not, it's a provider-only interface library.)
>>
>
> Okay, I read above several times and I read Documentation/gpio.txt.
> But... I'm not confident I've understood your meanings above in terms
> of how it should change the code. What I know so far is:
>
> a)
> arch/arm/mach-pxa/include/mach/gpio.h is where the pxa GPIO api
> interface is defined. So that's where I will put the
> gpio_get/set_values_batch functions. This will match the existing
> gpio_set/get_value code there.
>
> b)
> arch/arm/mach-pxa/gpio.c is where the implementation is.
> So I'll put the pxa_gpio_direction_input/output_batch and
> pxa_gpio_set/get_batch there.
>
> c)
> drivers/gpio/gpiolib_batch.c
>
> This is where I'd put the generic platform independent implementations
> of __gpio_set/get_values_batch
>
> Does this sound consistent with your recommendation? If not, I need
> more help to understand what changes you are recommending.
>
>
>> Doesn't necessarily need to be *you* doing that, but if it only
>> works on older gumstix boards it'd not be general enough. Which
>> is part of why I say to get the lowest level proposal out there
>> first. If done right, it'll be easy to support on other chips.
>
> Yes, I completely agree that it must work on a wide range of
> architectures. But I don't understand what you mean when you say get
> the lowest level proposal out there first. I see the RFC code that I
> posted as being the lowest level proposal (albeit needing better name
> and bitmask support) and one that is sufficiently general that it can
> be implemented on other architectures. If it can't, can you help me
> understand by pointing to which portion would break on other archs?
>
Oh, gosh darn it, how time has flown. My email above was to make sure
I have understood the feedback. I assume I should just get started on
implementing. Just to double check, the plan is:
- add bitmask support.
- add get_batch support
- improve naming. I think gpio_get_batch/gpio_set_batch sounds good. I
plan to have something like:
void gpio_set_batch(unsigned gpio, u32 values, u32 bitmask, int bitwidth);
u32 gpio_get_batch(unsigned gpio, u32 bitmask, int bitwidth);
I think I should explain the bitmask and bitwidth choice. The intended
use case is:
for (i=0; i < 800*600; i++) {
gpio_set_batch(...)
}
bitwidth (needed to iterate and map to chip ngpios) could be
calculated from bitmask, but that involves iteratively counting bits
from the mask, so we would have to do 800*600 bit counts. Unless, we
do ugly things like cache the previous bitwidth/mask and compare
against the current caller arguments. Given that the bitwidth would
typically be a fixed value, I believe it is more intuitive for the
caller to provide it, eg:
gpio_set_batch(DB0, value, 0xFFFF, 16)
which has the nice performance benefit of skipping all the bit
counting in the most common use case scenario.
While we are here, I was thinking about it, and its better if I give
gpio_request/free/direction_batch a miss for now. Nothing prevents
those features being added at a later point. That way I can focus on
getting gpio_set/get_batch implemented correctly and merged as the
first step.
Thanks,
jaya
On Sat 27 Dec 2008 09:55, Jaya Kumar pondered:
> Oh, gosh darn it, how time has flown. My email above was to make sure
> I have understood the feedback. I assume I should just get started on
> implementing. Just to double check, the plan is:
> - add bitmask support.
> - add get_batch support
> - improve naming. I think gpio_get_batch/gpio_set_batch sounds good.
for what it is worth - I don't like 'batch'.
According to wikipedia[1] - in computer science, batch typically refers to:
* Batch (Unix), a command to queue jobs for later execution
* Batch file, in DOS, OS/2, and Microsoft Windows, a text file containing
a series of commands intended to be executed
* Batch processing, execution of a series of programs on a computer
without human interaction
* Batch renaming, the process of renaming multiple computer files
and folders in an automated fashion
None of these are really what you are talking about. Batch normally only
refers to a group when taking about baking - A batch of cookies - at least
where I grew up....
I think that I would prefer 'group' or 'collection' to use some of the words
that David described things as 'ganged' or 'bus' or 'bank' or 'adjacent'.
(but I think 'adjacent' or 'bank' really is an implementation convention).
I think I would also prefer to name things like gpio_bus_* as a rather than
gpio_*_bus - so the operation always is on the end...
> I
> plan to have something like:
>
> void gpio_set_batch(unsigned gpio, u32 values, u32 bitmask, int bitwidth);
Shouldn't the write return an error code (if the data was not written)?
> u32 gpio_get_batch(unsigned gpio, u32 bitmask, int bitwidth);
Both assume specific SoC and board level impmentation details (which I ask
below).
> I think I should explain the bitmask and bitwidth choice. The intended
> use case is:
> for (i=0; i < 800*600; i++) {
> gpio_set_batch(...)
> }
>
> bitwidth (needed to iterate and map to chip ngpios) could be
> calculated from bitmask, but that involves iteratively counting bits
> from the mask, so we would have to do 800*600 bit counts. Unless, we
> do ugly things like cache the previous bitwidth/mask and compare
> against the current caller arguments. Given that the bitwidth would
> typically be a fixed value, I believe it is more intuitive for the
> caller to provide it, eg:
>
> gpio_set_batch(DB0, value, 0xFFFF, 16)
>
> which has the nice performance benefit of skipping all the bit
> counting in the most common use case scenario.
but has the requirement that the driver know exactly the board level
impmentation details (something that doesn't sound generic).
> While we are here, I was thinking about it, and its better if I give
> gpio_request/free/direction_batch a miss for now. Nothing prevents
> those features being added at a later point.
I don't think that request/free are optional.
For example - in most SoC implementations - gpios are implemented as banks of
16 or 32. (a 16 or 32 bit register).
Are there facilities to span these registers?
- can you request 64 gpios as a 'bank'?
- can you request gpio_8 -> gpio_40 as a 'bank' on a 32-bit system?
Are non-adjacent/non-contiguous gpios avaliable to be put into
a 'bank/batch/bus'? can you use gpio_8 -> 11 & 28 -> 31 as a 8-bit 'bus'?
How do you know what is avaliable to be talked to as a bank/bus/batch without
the request/free operation?
I have seen various hardware designs (both at the PCB and SoC level) require
all of these options, and would like to see common infrastructure which
handles this.
The issue is that on many SoC implementations - dedicated peripherals can also
be GPIOs - so it someone wants to use SPI (for example) GPIO's 3->7 might be
removed from the avaliable 'gpio' resources. This is determined by the
silicon designer - and even the PCB designer has little to no flexibility on
this. It gets worse as multiple SPI or I2C are used on the PCB (which can
have lots of small (less than 5) dedicated pins in the middle of the larger
gpio resources)....
I would think that a 'bank' / 'bus' (whatever) would be a collection of
random/multiple GPIOs (a struct of gpio_port_t) rather than a start/length
(as you described) - or better yet - the request function takes a list (of
individual GPIO's - defined in the platform data), and creates the struct
itself.
Something like the way gpio_keys are defined...
static struct gpio_bus bfin_gpio_bus_table[] = {
{BIT_0, GPIO_PB8, 1, "gpio-bus: BIT0"},
{BIT_1, GPIO_PB9, 1, "gpio-bus: BIT1"},
{BIT_2, GPIO_PB10, 1, "gpio-bus: BIT2"},
{BIT_3, GPIO_PB11, 1, "gpio-bus: BIT3"},
};
static struct gpio_bus_data bfin_gpio_bus_data = {
.bits = bfin_gpio_bus_table,
.width = ARRAY_SIZE(bfin_gpio_keys_table),
};
static struct platform_device bfin_device_gpiobus = {
.name = "gpio-bus",
.dev = {
.platform_data = &bfin_gpio_bus_data,
},
};
The request function builds up the bus/masks/shifts from that...
-Robin
[1] http://en.wikipedia.org/wiki/Batch
On Sun, 2008-12-28 at 13:46 -0500, Robin Getz wrote:
> > gpio_set_batch(DB0, value, 0xFFFF, 16)
> >
> > which has the nice performance benefit of skipping all the bit
> > counting in the most common use case scenario.
>
> but has the requirement that the driver know exactly the board level
> impmentation details (something that doesn't sound generic).
The original use case for these batch operations was in a fastpath -
setting data lines on a framebuffer. Sure it's arguably not as generic
as may be, but it optimises for speed and current usage patterns - I'm
OK with that. Other usage patterns which don't have a speed requirement
can be done using the individual pin operations and a loop.
>
> > While we are here, I was thinking about it, and its better if I give
> > gpio_request/free/direction_batch a miss for now. Nothing prevents
> > those features being added at a later point.
>
> I don't think that request/free are optional.
>
> For example - in most SoC implementations - gpios are implemented as banks of
> 16 or 32. (a 16 or 32 bit register).
>
> Are there facilities to span these registers?
> - can you request 64 gpios as a 'bank'?
> - can you request gpio_8 -> gpio_40 as a 'bank' on a 32-bit system?
>
> Are non-adjacent/non-contiguous gpios avaliable to be put into
> a 'bank/batch/bus'? can you use gpio_8 -> 11 & 28 -> 31 as a 8-bit 'bus'?
>
> How do you know what is avaliable to be talked to as a bank/bus/batch without
> the request/free operation?
I think the read/write operations should be able to fail if you give
them invalid chunks of gpio, sure. Request/free are not really designed
for that operation - they just ensure exclusive access to a gpio if
that's what the driver wants. In the batch case the
request/free/direction operations can once again be performed by single
pin operations and iteration.
>
>
> I have seen various hardware designs (both at the PCB and SoC level) require
> all of these options, and would like to see common infrastructure which
> handles this.
>
> The issue is that on many SoC implementations - dedicated peripherals can also
> be GPIOs - so it someone wants to use SPI (for example) GPIO's 3->7 might be
> removed from the avaliable 'gpio' resources. This is determined by the
> silicon designer - and even the PCB designer has little to no flexibility on
> this. It gets worse as multiple SPI or I2C are used on the PCB (which can
> have lots of small (less than 5) dedicated pins in the middle of the larger
> gpio resources)....
Yeah the request/free operation doesn't deal with muxing or any other
platform-specific kinda gumph, that was an original design decision.
They're really just a usage counter.
An example which comes to mind is the avr32-specific userspace gpio
interface. This takes a bitmask, loops over the set bits and fails if
any of the gpio are previously requested or have been assigned to
non-gpio peripherals. I don't really see a need to streamline this.
>
> I would think that a 'bank' / 'bus' (whatever) would be a collection of
> random/multiple GPIOs (a struct of gpio_port_t) rather than a start/length
> (as you described) - or better yet - the request function takes a list (of
> individual GPIO's - defined in the platform data), and creates the struct
> itself.
Hmm, this seems a little overengineered for the basic use-cases I can
think of. If this can be cranked up to the same speed as the current
proposition then OK maybe someone will like it but otherwise, once
again, I think most people will be happy with individual operations and
iteration.
--Ben.
On Mon, Dec 29, 2008 at 6:00 AM, Ben Nizette <[email protected]> wrote:
> On Sun, 2008-12-28 at 13:46 -0500, Robin Getz wrote:
>> > gpio_set_batch(DB0, value, 0xFFFF, 16)
>> >
>> > which has the nice performance benefit of skipping all the bit
>> > counting in the most common use case scenario.
>>
>> but has the requirement that the driver know exactly the board level
>> impmentation details (something that doesn't sound generic).
>
> The original use case for these batch operations was in a fastpath -
> setting data lines on a framebuffer. Sure it's arguably not as generic
> as may be, but it optimises for speed and current usage patterns - I'm
> OK with that. Other usage patterns which don't have a speed requirement
> can be done using the individual pin operations and a loop.
Hi Ben, Yes, exactly! Thanks!
Hi Robin, I think the randomly or less ordered gpio case that you
raised should be solved using the existing gpio_set/get single bit
value API. I intended this startpin, mask and length based API is to
help drivers that would currently hit a performance limitation while
keeping it just sufficiently abstract that it can be accelerated on an
implementation specific basis without too much of a headache.
btw, about the term batch, I had looked it up originally and seen:
google define:batch
A collection of products or data which is treated as one entity with
respect to certain operations eg processing and production.
http://www.shipping.francoudi.com/main/main.asp
so I felt it was a sufficiently meaningful and matching term to use.
Also, I had used the term gpio_set_value_bus in the first patch to try
to be close to the existing gpio_set/get_value API. I switched to
gpio_set_batch because it was pointed out that _bus could be
misleading. All that said, I'm happy to change it to the term around
which consensus forms. :-)
>
>>
>> > While we are here, I was thinking about it, and its better if I give
>> > gpio_request/free/direction_batch a miss for now. Nothing prevents
>> > those features being added at a later point.
>>
>> I don't think that request/free are optional.
>>
>> For example - in most SoC implementations - gpios are implemented as banks of
>> 16 or 32. (a 16 or 32 bit register).
>>
>> Are there facilities to span these registers?
>> - can you request 64 gpios as a 'bank'?
>> - can you request gpio_8 -> gpio_40 as a 'bank' on a 32-bit system?
I think as discussed above, these cases would be better solved using
the single bit set/get API.
>>
>> Are non-adjacent/non-contiguous gpios avaliable to be put into
>> a 'bank/batch/bus'? can you use gpio_8 -> 11 & 28 -> 31 as a 8-bit 'bus'?
>>
>> How do you know what is avaliable to be talked to as a bank/bus/batch without
>> the request/free operation?
>
> I think the read/write operations should be able to fail if you give
> them invalid chunks of gpio, sure. Request/free are not really designed
I think we can do that for read/write but we'd need to break
consistency with the existing api. The existing gpio_set_value and
gpio_get_value don't offer an error return.
> for that operation - they just ensure exclusive access to a gpio if
> that's what the driver wants. In the batch case the
> request/free/direction operations can once again be performed by single
> pin operations and iteration.
Agreed.
>
>>
>>
>> I have seen various hardware designs (both at the PCB and SoC level) require
>> all of these options, and would like to see common infrastructure which
>> handles this.
>>
>> The issue is that on many SoC implementations - dedicated peripherals can also
>> be GPIOs - so it someone wants to use SPI (for example) GPIO's 3->7 might be
>> removed from the avaliable 'gpio' resources. This is determined by the
>> silicon designer - and even the PCB designer has little to no flexibility on
>> this. It gets worse as multiple SPI or I2C are used on the PCB (which can
>> have lots of small (less than 5) dedicated pins in the middle of the larger
>> gpio resources)....
>
> Yeah the request/free operation doesn't deal with muxing or any other
> platform-specific kinda gumph, that was an original design decision.
> They're really just a usage counter.
>
> An example which comes to mind is the avr32-specific userspace gpio
> interface. This takes a bitmask, loops over the set bits and fails if
> any of the gpio are previously requested or have been assigned to
> non-gpio peripherals. I don't really see a need to streamline this.
>
>>
>> I would think that a 'bank' / 'bus' (whatever) would be a collection of
>> random/multiple GPIOs (a struct of gpio_port_t) rather than a start/length
>> (as you described) - or better yet - the request function takes a list (of
>> individual GPIO's - defined in the platform data), and creates the struct
>> itself.
Robin, I agree with you that trying to achieve unstructured multiple
GPIO support is elegant. The start/length approach I've taken here is
definitely not as generic. But by giving up some pure abstraction,
we've gained in implementation simplicity for each platform. Also,
there's nothing in my patch that would in future prevent us from
adding the random capability via an API akin to what you've suggested.
I'd like to get the start/length approach out there and in-play to
find out if other people want to use it and how they end up using it.
Thanks,
jaya
On Sunday 30 November 2008, Jaya Kumar wrote:
> > The gpio_*() interfaces are how system glue code and drivers
> > access GPIOs, unless they roll their own chip-specific calls.
> >
> > Whereas gpiolib (and gpio_chip) are an *optional* framework
> > for implementing those calls. ?Platforms can (and do!) use
> > other frameworks ... maybe they don't want to pay its costs,
> > and don't need the various GPIO expander drivers.
> >
> > So to repeat: ?don't assume the interface is implemented in
> > one particular way (using gpiolib). ?It has to make sense
> > with other implementation strategies too. ?Standard split
> > between interface and implementation, that's all. ?(Some folk
> > have been heard to talk about "gpiolib" as the interface to
> > drivers ... it's not, it's a provider-only interface library.)
> >
>
> Okay, I read above several times and I read Documentation/gpio.txt.
> But... I'm not confident I've understood your meanings above in terms
> of how it should change the code. What I know so far is:
>
> a)
> arch/arm/mach-pxa/include/mach/gpio.h is where the pxa GPIO api
> interface is defined.
It's actually where the *implementation* is *declared*. The
API is defined primarily in Documentation/gpio.txt ... though
in this case the implementation mostly punts to gpiolib. And
as you note below, the implementation is *defined* elsewhere.
> So that's where I will put the
> gpio_get/set_values_batch functions. This will match the existing
> gpio_set/get_value code there.
Those functions just punt to gpiolib, unlesss they happen
to fit in the "constant parameters" case where they can
expend to two or three instructions inline, rather than a
more expensive function call.
In this case, I don't see any benefit to supporting that
"inline constant parameters" case.
> b)
> arch/arm/mach-pxa/gpio.c is where the implementation is.
> So I'll put the pxa_gpio_direction_input/output_batch and
> pxa_gpio_set/get_batch there.
This is where the PXA implementation is *defined*, yes.
> c)
> drivers/gpio/gpiolib_batch.c
>
> This is where I'd put the generic platform independent implementations
> of __gpio_set/get_values_batch
>
> Does this sound consistent with your recommendation? If not, I need
> more help to understand what changes you are recommending.
More or less, yes. I'd have to see actual code. :)
> > Doesn't necessarily need to be *you* doing that, but if it only
> > works on older gumstix boards it'd not be general enough. ?Which
> > is part of why I say to get the lowest level proposal out there
> > first. ?If done right, it'll be easy to support on other chips.
>
> Yes, I completely agree that it must work on a wide range of
> architectures. But I don't understand what you mean when you say get
> the lowest level proposal out there first.
Provide a patch/RFD with
(0) methods you want to add to "struct gpio_chip".
Nothing else. That should be the easiest part of this change.
Other patches should probably include:
(1) one with the proposed driver level interface ... needs
discussion, so beginning with a patch may not be best;
(2) one *implementing* that interface using current calls;
(3) another *optimizing* that interface to use the new low
level methods (0), on chips where they're available;
(4) gpio_chip implementations for those low level ops.
Plus at least one example of how to use the interfaces in (1),
by the time everything starts to become solid, and implementation
patches are worth while.
(I'm not keen on starting with code to implement interfaces, except
when the interfaces are trivial or obvious. Interfaces are hard to
change. It's best to spend some time up-front improving them, while
they're easy to change/fix.)
> I see the RFC code that I
> posted as being the lowest level proposal (albeit needing better name
> and bitmask support) and one that is sufficiently general that it can
> be implemented on other architectures.
To repeat: what you've sent so far is mixing layers. It doesn't split
out the programming interface from its implementation at all.
Yet the whole *point* of having gpiolib is to make that split easy.
Which is why I want to see patches that reflect that architectural
split, instead of a hard-to-review megapatch that crosses all the
existing boundaries and doesn't show how non-PXA hardware could
implement these patches (or benefit from them).
- Dave
On Saturday 27 December 2008, Jaya Kumar wrote:
> Oh, gosh darn it, how time has flown. My email above was to make sure
> I have understood the feedback. I assume I should just get started on
> implementing. Just to double check, the plan is:
> - add bitmask support.
> - add get_batch support
> - improve naming. I think gpio_get_batch/gpio_set_batch sounds good.
I was expecting to get some agreement on interfaces first.
> I plan to have something like:
>
> void gpio_set_batch(unsigned gpio, u32 values, u32 bitmask, int bitwidth);
> u32 gpio_get_batch(unsigned gpio, u32 bitmask, int bitwidth);
I still don't like the word "batch" here. Did you look
at the <linux/bitmask.h> interfaces as I suggested? They
would suggest something rather different:
- passing bitmasks as {unsigned long *bits, int count} tuples
- separate calls to:
* zero a set of bits (loop: gpio_set_value to 0)
* fill a set of bits (loop: gpio_set_value to 1)
* copy a set of bits (loop: gpio_set_value to src[i])
* read a set of bits (loop: dst[i] = gpio_get_value)
* ... maybe more
Any restriction to 32 bit value seems shortsighted. It would
make sense to wrap the GPIO bitmask descriptions in a struct,
letting drivers work with smaller sets -- probably most would
fit into a single "unsigned long".
> I think I should explain the bitmask and bitwidth choice. The intended
> use case is:
> for (i=0; i < 800*600; i++) {
> ?gpio_set_batch(...)
> }
>
> bitwidth (needed to iterate and map to chip ngpios) could be
> calculated from bitmask, but that involves iteratively counting bits
> from the mask, so we would have to do 800*600 bit counts. Unless, we
> do ugly things like cache the previous bitwidth/mask and compare
> against the current caller arguments. Given that the bitwidth would
> typically be a fixed value, I believe it is more intuitive for the
> caller to provide it, eg:
>
> gpio_set_batch(DB0, value, 0xFFFF, 16)
>
> which has the nice performance benefit of skipping all the bit
> counting in the most common use case scenario.
That doesn't explain the bit mask and bitwidth parameters at all.
> While we are here, I was thinking about it, and its better if I give
> gpio_request/free/direction_batch a miss for now.
Let's not and say we didn't. Providing incomplete interfaces
isn't really much help.
> Nothing prevents
> those features being added at a later point. That way I can focus on
> getting gpio_set/get_batch implemented correctly and merged as the
> first step.
First step needs to be defining the interface extensions needed.
- Dave
On Sunday 28 December 2008, Robin Getz wrote:
> On Sat 27 Dec 2008 09:55, Jaya Kumar pondered:
>
> I think that I would prefer 'group' or 'collection' to use some of the words
> that David described things as 'ganged' or 'bus' or 'bank' or 'adjacent'.
> (but I think 'adjacent' or 'bank' really is an implementation convention).
>
> I think I would also prefer to name things like gpio_bus_* as a rather than
> gpio_*_bus - so the operation always is on the end...
I'd avoid "bus"; the term has specific meanings, which aren't necessarily
relevant in these contexts. Agreed about "adjacent" and "bank".
If it weren't for its verb usage, I'd suggest "set". :)
> Shouldn't the write return an error code (if the data was not written)?
For these operations, I think reads/writes should have that possibility.
The reason single-bit operations don't provide error paths is twofold.
First, they started as wrappers for can't-fail register accessors.
Second, it's extremely unrealisitic to expect much code to handle any
kind of faults in the middle of bitbanging loops ... or even just in
classic "set this bit and continue" configuration code.
Those reasons don't apply well to these multi-bit cases.
> > While we are here, I was thinking about it, and its better if I give
> > gpio_request/free/direction_batch a miss for now. Nothing prevents
> > those features being added at a later point.
>
> I don't think that request/free are optional.
Agreed. If there's any difficulty implementing them, that would
suggest there's a significant conceptual problem to address ...
> For example - in most SoC implementations - gpios are implemented as banks of
> 16 or 32. (a 16 or 32 bit register).
>
> Are there facilities to span these registers?
> - can you request 64 gpios as a 'bank'?
> - can you request gpio_8 -> gpio_40 as a 'bank' on a 32-bit system?
>
> Are non-adjacent/non-contiguous gpios avaliable to be put into
> a 'bank/batch/bus'? can you use gpio_8 -> 11 & 28 -> 31 as a 8-bit 'bus'?
>
> How do you know what is avaliable to be talked to as a bank/bus/batch without
> the request/free operation?
>
>
> I have seen various hardware designs (both at the PCB and SoC level) require
> all of these options, and would like to see common infrastructure which
> handles this.
Right. Get the interface right -- it should allow all those
complexities! -- and maybe take shortcuts in the first versions
of the implementation, by only talking to one gpio_chip at a
time. I'd expect any shortcuts would be resolved quickly, to
the extent they cause problems.
> The issue is that on many SoC implementations - dedicated peripherals can also
> be GPIOs - so it someone wants to use SPI (for example) GPIO's 3->7 might be
> removed from the avaliable 'gpio' resources. This is determined by the
> silicon designer - and even the PCB designer has little to no flexibility on
> this. It gets worse as multiple SPI or I2C are used on the PCB (which can
> have lots of small (less than 5) dedicated pins in the middle of the larger
> gpio resources)....
Such pin-muxing is a different issue though. Don't expect GPIO
calls to resolve those problems.
> I would think that a 'bank' / 'bus' (whatever) would be a collection of
> random/multiple GPIOs (a struct of gpio_port_t) rather than a start/length
> (as you described) - or better yet - the request function takes a list (of
> individual GPIO's - defined in the platform data), and creates the struct
> itself.
That's why I pointed to <linux/bitmask.h> as one model: it allows
an arbitrary set of bits (GPIOs) to be specified.
I'll NAK any "gpio_port_t" name based entirely on kernel coding
conventions though. ;)
> Something like the way gpio_keys are defined...
>
> static struct gpio_bus bfin_gpio_bus_table[] = {
> {BIT_0, GPIO_PB8, 1, "gpio-bus: BIT0"},
> {BIT_1, GPIO_PB9, 1, "gpio-bus: BIT1"},
> {BIT_2, GPIO_PB10, 1, "gpio-bus: BIT2"},
> {BIT_3, GPIO_PB11, 1, "gpio-bus: BIT3"},
> };
>
> static struct gpio_bus_data bfin_gpio_bus_data = {
> .bits = bfin_gpio_bus_table,
> .width = ARRAY_SIZE(bfin_gpio_keys_table),
> };
>
> static struct platform_device bfin_device_gpiobus = {
> .name = "gpio-bus",
> .dev = {
> .platform_data = &bfin_gpio_bus_data,
> },
> };
>
> The request function builds up the bus/masks/shifts from that...
That sort of thing might be made to work. Whatever this
multiple-GPIO-set abstraction is, I suspect you're right
about wanting the request function to be able to build
up some data structures behind it. And that it would be
simpler to require all the GPIOs to be listed up front,
rather than support "add these to the set" requests.
- Dave
On Sunday 28 December 2008, Jaya Kumar wrote:
> I'd like to get the start/length approach out there and in-play to
> find out if other people want to use it and how they end up using it.
As an *implementation* constraint, I might agree ... so
long as it's easily changed later.
As an *interface* constraint, I don't ... interfaces are
rarely easy to change.
However, in terms of implementation, most gpio chips have
primitives that work in terms of bitmasks rather than any
kind of start/length primitive. Example:
- To set bits in "u32 mask":
iowrite32(mask, bank_base + SET_REG)
- To clear bits in "u32 mask"
iowrite32(mask, bank_base + CLR_REG)
- To read bits in "u32 mask",
return mask & ioread32(bank_base + VALUE_REG)
In short, start/length looks most like a policy, of the
"keep them out of interfaces!" flavor, than something
appropriate for an interface. As noted above, gpio_chip
interfaces would more naturally use masks.
- Dave
On Sunday 28 December 2008, Ben Nizette wrote:
> > > gpio_set_batch(DB0, value, 0xFFFF, 16)
> > >
> > > which has the nice performance benefit of skipping all the bit
> > > counting in the most common use case scenario.
> >
> > but has the requirement that the driver know exactly the board level
> > impmentation details (something that doesn't sound generic).
>
> The original use case for these batch operations was in a fastpath -
> setting data lines on a framebuffer.
And the rationale for generalizing them was to let such fastpaths
stop being board-specific hacks. :)
David Brownell wrote:
> The reason single-bit operations don't provide error paths is twofold.
> First, they started as wrappers for can't-fail register accessors.
> Second, it's extremely unrealisitic to expect much code to handle any
> kind of faults in the middle of bitbanging loops ... or even just in
> classic "set this bit and continue" configuration code.
That's interesting. I'm not sure it's a good idea not to return an
error code. The caller can just ignore it if they don't care, and
it's extremely cheap to "return 0" in GPIO drivers which can't error.
If I were bit-banging on GPIOs reached via some peripheral chip (such
a GPIO-fanout chip over I2C/SPI, where that chip is itself feeding a
secondary I2C or similar bit-banging bus), I probably would like to
check for errors and take emergency action if the peripheral chip
isn't responding, or just report to userspace.
This has actually happened on a board I worked with, where the primary
I2C failed due to a plugged in peripheral loading it too much, and a
secondary bit-banging bus was not then reachable.
-- Jamie
On Monday 29 December 2008, Jamie Lokier wrote:
> David Brownell wrote:
> > The reason single-bit operations don't provide error paths is twofold.
> > First, they started as wrappers for can't-fail register accessors.
> > Second, it's extremely unrealisitic to expect much code to handle any
> > kind of faults in the middle of bitbanging loops ... or even just in
> > classic "set this bit and continue" configuration code.
>
> That's interesting. I'm not sure it's a good idea not to return an
> error code. The caller can just ignore it if they don't care, and
> it's extremely cheap to "return 0" in GPIO drivers which can't error.
I'm not sure either; at this point I *might* consider doing
it differently -- but primarily for the case of external GPIO
chips, e.g. over I2C or SPI -- where errors are realistic. But
it's been this way for a few years now, and changing stuff
that hasn't been observed to be a problem isn't on my list.
But as I noted: patches for $SUBJECT don't seem to have any
reason not to report whatever faults they encounter.
Also worth remembering: when reading a GPIO value, it's not
so easy to "ignore" a tristate (0, 1, error) return value.
> If I were bit-banging on GPIOs reached via some peripheral chip (such
> a GPIO-fanout chip over I2C/SPI, where that chip is itself feeding a
> secondary I2C or similar bit-banging bus), I probably would like to
> check for errors and take emergency action if the peripheral chip
> isn't responding, or just report to userspace.
If I had to do that, I'd *certainly* want to bang the hardware
designer over the head with some sort of cluebat or cluebrick. :(
> This has actually happened on a board I worked with, where the primary
> I2C failed due to a plugged in peripheral loading it too much, and a
> secondary bit-banging bus was not then reachable.
It should now be realistic for I2C device drivers to have
fault recovery logic...
But for a long time, I2c only returned -EPERM so it was
completely hopeless trying to figure out how to "handle"
any problem beyond logging the problem and hoping someone
is watching syslog output. That's a big part of why most
current I2C drivers have such unfriendly fault handling.
- Dave
>
> -- Jamie
>
>
Hi David,
On Mon, Dec 29, 2008 at 2:32 PM, David Brownell <[email protected]> wrote:
>
> - passing bitmasks as {unsigned long *bits, int count} tuples
> - separate calls to:
> * zero a set of bits (loop: gpio_set_value to 0)
> * fill a set of bits (loop: gpio_set_value to 1)
> * copy a set of bits (loop: gpio_set_value to src[i])
> * read a set of bits (loop: dst[i] = gpio_get_value)
> * ... maybe more
I had read bitmasks.h as per your recommendation and my thought on
that was: How do I reconcile such a substantial API when the use case
is just setting/getting a bunch of gpio pins?
>
> Any restriction to 32 bit value seems shortsighted. It would
> make sense to wrap the GPIO bitmask descriptions in a struct,
> letting drivers work with smaller sets -- probably most would
> fit into a single "unsigned long".
>
Hmm. Well. Currently, there's only been 1 bit support and its been
that way for years, hasn't it? It was not shortsighted at that time to
have only 1 bit access. No one raised a need for more. There weren't
any complaints about the 1-bit gpio API until am300epd.c came along.
You had correctly pointed out "I happen not to have come across the
need for such ganged access from Linux" in the start of this thread. I
think that you raised a strong point there. If I understood you
correctly, it implies it is NOT likely that there'll be many users of
this ganged access API.
Which is precisely why I took your suggestion to heart and stuck to
the middle path. For better or worse, we currently have a grand total
of just one use case for this ganged API: am300epd.c which does a
16-bit write. No one has spoken up with another concrete use case. So
I say we stick to the simple 32-bit startpin/mask/length API. Lets see
if people start using it. If more than a few do and issues are raised
with it being limited to "just 32 bits", then we can rearchitecht as
needed. After all, as you requested, it's an optional in-kernel only
API so we're in no danger of shooting ourselves in the foot here.
Further more, we've gone from years of 1-bit access with no complaints
to 32-bit access. A factor of 32 improvement seems a reasonably good
bet for the next few years to me. :-)
If any of my assertions are mistaken, I'll need your help to
understand which specific part of the conversation I've misunderstood.
Thanks,
jaya
On Mon 29 Dec 2008 14:56, David Brownell pondered:
> On Sunday 28 December 2008, Robin Getz wrote:
> > On Sat 27 Dec 2008 09:55, Jaya Kumar pondered:
> >
> > I think that I would prefer 'group' or 'collection' to use some of
> > the words that David described things as 'ganged' or 'bus' or 'bank'
> > or 'adjacent'. (but I think 'adjacent' or 'bank' really is an
> > implementation convention).
> >
> > I think I would also prefer to name things like gpio_bus_* as a
> > rather than gpio_*_bus - so the operation always is on the end...
>
> I'd avoid "bus"; the term has specific meanings, which aren't
> necessarily relevant in these contexts. Agreed about "adjacent" and
> "bank".
>
> If it weren't for its verb usage, I'd suggest "set". :)
So, any suggestions? how about "gang" or "group"? or as you said
below "multiple".
> > Shouldn't the write return an error code (if the data was not
> > written)?
>
> For these operations, I think reads/writes should have that possibility.
>
> The reason single-bit operations don't provide error paths is twofold.
> First, they started as wrappers for can't-fail register accessors.
> Second, it's extremely unrealisitic to expect much code to handle any
> kind of faults in the middle of bitbanging loops ... or even just in
> classic "set this bit and continue" configuration code.
>
> Those reasons don't apply well to these multi-bit cases.
I'm just trying to think of what the error case might be. The only think I can
really think of - is the case where you try to do a set/get on something that
has been freed.
Otherwise - it should be pretty much the same. (basic bitbanging loops, which
call multiple/basic can't-fail register accessors).
> > > While we are here, I was thinking about it, and its better if I give
> > > gpio_request/free/direction_batch a miss for now. Nothing prevents
> > > those features being added at a later point.
> >
> > I don't think that request/free are optional.
>
> Agreed. If there's any difficulty implementing them, that would
> suggest there's a significant conceptual problem to address ...
>
> > For example - in most SoC implementations - gpios are implemented
> > as banks of 16 or 32. (a 16 or 32 bit register).
> >
> > Are there facilities to span these registers?
> > - can you request 64 gpios as a 'bank'?
> > - can you request gpio_8 -> gpio_40 as a 'bank' on a 32-bit system?
> >
> > Are non-adjacent/non-contiguous gpios avaliable to be put into
> > a 'bank/batch/bus'? can you use gpio_8 -> 11 & 28 -> 31 as a
> > 8-bit 'bus'?
> >
> > How do you know what is avaliable to be talked to as a bank/bus/batch
> > without the request/free operation?
> >
> >
> > I have seen various hardware designs (both at the PCB and SoC level)
> > require all of these options, and would like to see common
> > infrastructure which handles this.
>
> Right. Get the interface right -- it should allow all those
> complexities! -- and maybe take shortcuts in the first versions
> of the implementation, by only talking to one gpio_chip at a
> time. I'd expect any shortcuts would be resolved quickly, to
> the extent they cause problems.
Yeah, I hadn't thought about spanning more than one gpio_chip. That's a good
point.
> > The issue is that on many SoC implementations - dedicated peripherals
> > can also be GPIOs - so it someone wants to use SPI (for example) GPIO's
> > 3->7 might be removed from the avaliable 'gpio' resources. This is
> > determined by the silicon designer - and even the PCB designer has
> > little to no flexibility on this. It gets worse as multiple SPI or I2C
> > are used on the PCB (which can have lots of small (less than 5)
> > dedicated pins in the middle of the larger gpio resources)....
>
> Such pin-muxing is a different issue though. Don't expect GPIO
> calls to resolve those problems.
I wasn't trying to make the point about pin muxing (I agree - different
problem) - just that it is unlikely that the gang of gpios will be
adjacent/contiguous in most cases.
> > I would think that a 'bank' / 'bus' (whatever) would be a collection
> > of random/multiple GPIOs (a struct of gpio_port_t) rather than a
> > start/length (as you described) - or better yet - the request function
> > takes a list (of individual GPIO's - defined in the platform data),
> > and creates the struct itself.
>
> That's why I pointed to <linux/bitmask.h> as one model: it allows
> an arbitrary set of bits (GPIOs) to be specified.
I can see that being useful.
> I'll NAK any "gpio_port_t" name based entirely on kernel coding
> conventions though. ;)
>
>
> > Something like the way gpio_keys are defined...
> >
> > static struct gpio_bus bfin_gpio_bus_table[] = {
> > {BIT_0, GPIO_PB8, 1, "gpio-bus: BIT0"},
> > {BIT_1, GPIO_PB9, 1, "gpio-bus: BIT1"},
> > {BIT_2, GPIO_PB10, 1, "gpio-bus: BIT2"},
> > {BIT_3, GPIO_PB11, 1, "gpio-bus: BIT3"},
> > };
> >
> > static struct gpio_bus_data bfin_gpio_bus_data = {
> > .bits = bfin_gpio_bus_table,
> > .width = ARRAY_SIZE(bfin_gpio_keys_table),
> > };
> >
> > static struct platform_device bfin_device_gpiobus = {
> > .name = "gpio-bus",
> > .dev = {
> > .platform_data = &bfin_gpio_bus_data,
> > },
> > };
> >
> > The request function builds up the bus/masks/shifts from that...
>
> That sort of thing might be made to work. Whatever this
> multiple-GPIO-set abstraction is, I suspect you're right
> about wanting the request function to be able to build
> up some data structures behind it. And that it would be
> simpler to require all the GPIOs to be listed up front,
> rather than support "add these to the set" requests.
Yeah, I would prefer the data structures to 16 calls to a request function to
build up a 16-bit wide group...
On Tue, Dec 30, 2008 at 11:55 PM, Robin Getz <[email protected]> wrote:
> Yeah, I hadn't thought about spanning more than one gpio_chip. That's a good
> point.
The currently posted code already supports spanning more than one gpio_chip.
+ do {
+ chip = gpio_to_chip(gpio + i);
+ WARN_ON(extra_checks && chip->can_sleep);
+
+ if (!chip->set_bus) {
+ while (((gpio + i) < (chip->base + chip->ngpio))
+ && bitwidth) {
+ value = values & (1 << i);
+ chip->set(chip, gpio + i - chip->base, value);
+ i++;
+ bitwidth--;
+ }
+ } else {
+ value = values >> i; /* shift off the used stuff */
+ remwidth = ((chip->base + (int) chip->ngpio) -
+ ((int) gpio + i));
+ width = min(bitwidth, remwidth);
+
+ chip->set_bus(chip, gpio + i - chip->base, value,
+ width);
+ i += width;
+ bitwidth -= width;
+ }
+ } while (bitwidth);
On Tue, Dec 30, 2008 at 11:58 PM, Jaya Kumar <[email protected]> wrote:
> On Tue, Dec 30, 2008 at 11:55 PM, Robin Getz <[email protected]> wrote:
>> Yeah, I hadn't thought about spanning more than one gpio_chip. That's a good
>> point.
>
> The currently posted code already supports spanning more than one gpio_chip.
and btw, that was because the use case needed to span 2, since it
started at 58 and was 16 bits long on a platform where ngpio == 32.
this was explained at the start of the thread.
>
> + do {
> + chip = gpio_to_chip(gpio + i);
> + WARN_ON(extra_checks && chip->can_sleep);
> +
> + if (!chip->set_bus) {
> + while (((gpio + i) < (chip->base + chip->ngpio))
> + && bitwidth) {
> + value = values & (1 << i);
> + chip->set(chip, gpio + i - chip->base, value);
> + i++;
> + bitwidth--;
> + }
> + } else {
> + value = values >> i; /* shift off the used stuff */
> + remwidth = ((chip->base + (int) chip->ngpio) -
> + ((int) gpio + i));
> + width = min(bitwidth, remwidth);
> +
> + chip->set_bus(chip, gpio + i - chip->base, value,
> + width);
> + i += width;
> + bitwidth -= width;
> + }
> + } while (bitwidth);
>
On Tue 30 Dec 2008 23:58, Jaya Kumar pondered:
> On Tue, Dec 30, 2008 at 11:55 PM, Robin Getz <[email protected]> wrote:
> > Yeah, I hadn't thought about spanning more than one gpio_chip. That's a good
> > point.
>
> The currently posted code already supports spanning more than one gpio_chip.
>
But doesn't do all the other things that David suggested/requested.
On Thu, Jan 1, 2009 at 1:38 AM, Robin Getz <[email protected]> wrote:
> On Tue 30 Dec 2008 23:58, Jaya Kumar pondered:
>> On Tue, Dec 30, 2008 at 11:55 PM, Robin Getz <[email protected]> wrote:
>> > Yeah, I hadn't thought about spanning more than one gpio_chip. That's a good
>> > point.
>>
>> The currently posted code already supports spanning more than one gpio_chip.
>>
>
> But doesn't do all the other things that David suggested/requested.
>
Hi Robin,
Yes, you are right. My implementation does not support a driver that
needs to set/get more than 32-bits of gpio in a single call. I'm okay
with that restriction as I don't see a concrete use case for that. I
understand that you're saying that's not satisfactory. I guess we'll
have to agree to differ.
Thanks,
jaya
On Wed 31 Dec 2008 13:05, Jaya Kumar pondered:
> On Thu, Jan 1, 2009 at 1:38 AM, Robin Getz <[email protected]> wrote:
> > On Tue 30 Dec 2008 23:58, Jaya Kumar pondered:
> >> On Tue, Dec 30, 2008 at 11:55 PM, Robin Getz <[email protected]> wrote:
> >> > Yeah, I hadn't thought about spanning more than one gpio_chip. That's a good
> >> > point.
> >>
> >> The currently posted code already supports spanning more than one gpio_chip.
> >>
> >
> > But doesn't do all the other things that David suggested/requested.
>
> Hi Robin,
>
> Yes, you are right. My implementation does not support a driver that
> needs to set/get more than 32-bits of gpio in a single call. I'm okay
> with that restriction as I don't see a concrete use case for that.
It's not the more than 32-bits that I'm concerned about - it is spanning
more than one register. (if all the GPIOs that are left on the board are
2, 64, and 128, where 2, and 64 are part of the SOC's GPIO, and 128 is on
a GPIO expander - which is a common use case - is this handled?)
On Sun 28 Dec 2008 17:00, Ben Nizette pondered:
> On Sun, 2008-12-28 at 13:46 -0500, Robin Getz wrote:
> > > gpio_set_batch(DB0, value, 0xFFFF, 16)
> > >
> > > which has the nice performance benefit of skipping all the bit
> > > counting in the most common use case scenario.
> >
> > but has the requirement that the driver know exactly the board level
> > impmentation details (something that doesn't sound generic).
>
> The original use case for these batch operations was in a fastpath -
> setting data lines on a framebuffer. Sure it's arguably not as generic
> as may be, but it optimises for speed and current usage patterns - I'm
> OK with that. Other usage patterns which don't have a speed requirement
> can be done using the individual pin operations and a loop.
The tradeoff for speed is always made with extensibility. If you want best
speed - you shouldn't be using the GPIO framework at all - just bang directly
on the registers yourself...
If you want something extensible/generic that serves multiple use cases -
it will normally come with a performance tradeoff...
> > > While we are here, I was thinking about it, and its better if I give
> > > gpio_request/free/direction_batch a miss for now. Nothing prevents
> > > those features being added at a later point.
> >
> > I don't think that request/free are optional.
> >
> > For example - in most SoC implementations - gpios are implemented as
> > banks of 16 or 32. (a 16 or 32 bit register).
> >
> > Are there facilities to span these registers?
> > - can you request 64 gpios as a 'bank'?
> > - can you request gpio_8 -> gpio_40 as a 'bank' on a 32-bit system?
> >
> > Are non-adjacent/non-contiguous gpios avaliable to be put into
> > a 'bank/batch/bus'? can you use gpio_8 -> 11 & 28 -> 31 as a 8-bit
> > 'bus'?
> >
> > How do you know what is avaliable to be talked to as a bank/bus/batch
> > without the request/free operation?
>
> I think the read/write operations should be able to fail if you give
> them invalid chunks of gpio, sure.
Can you define "invalid"? what are the limitations?
Can I use gpio_8 -> 11 & 28 -> 31 as a chunk?
> Request/free are not really designed
> for that operation - they just ensure exclusive access to a gpio if
> that's what the driver wants. In the batch case the
> request/free/direction operations can once again be performed by single
> pin operations and iteration.
That depends on the semantics of "request".
If it is "request & build up a monolithic chunk from xxx GPIO's" - then
my definition works.
> > I have seen various hardware designs (both at the PCB and SoC level)
> > require all of these options, and would like to see common infrastructure
> > which handles this.
>
> Yeah the request/free operation doesn't deal with muxing or any other
> platform-specific kinda gumph, that was an original design decision.
> They're really just a usage counter.
Sorry for bringing up the muxing - that wasn't the point.
It was really the issue of being non-contiguous, spanning various implementations.
> An example which comes to mind is the avr32-specific userspace gpio
> interface. This takes a bitmask, loops over the set bits and fails if
> any of the gpio are previously requested or have been assigned to
> non-gpio peripherals.
I'll have a look. I don't understand why everyone decided to make their own
userspace GPIO interface - can't we all just get along? :)
> I don't really see a need to streamline this.
> > I would think that a 'bank' / 'bus' (whatever) would be a collection
> > of random/multiple GPIOs (a struct of gpio_port_t) rather than a
> > start/length (as you described) - or better yet - the request
> > function takes a list (of individual GPIO's - defined in the
> > platform data), and creates the struct itself.
>
> Hmm, this seems a little overengineered for the basic use-cases I can
> think of.
Not the ones I run into all the time...
More complex pin multiplexing results in less contiguous free GPIO.
(which again - has nothing to do with multiplexing - it is the result
that is the important thing).
> If this can be cranked up to the same speed as the current
> proposition then OK maybe someone will like it but otherwise, once
> again, I think most people will be happy with individual operations and
> iteration.
It will be easier to maintain (from a end user perceptive - if someone
wants a "chunk" of gpio's - they just define it in their platform data).
It does put a bigger burden on the person writing things.
I would think that the overhead would only be at init - runtime shouldn't
be much different in the simple case, but allowing the complex usecases
with the same interface is better (since we only have to teach people one
thing) :)
-Robin
On Tue, 2009-01-06 at 18:02 -0500, Robin Getz wrote:
> On Sun 28 Dec 2008 17:00, Ben Nizette pondered:
> > On Sun, 2008-12-28 at 13:46 -0500, Robin Getz wrote:
> > > > gpio_set_batch(DB0, value, 0xFFFF, 16)
> > > >
> > > > which has the nice performance benefit of skipping all the bit
> > > > counting in the most common use case scenario.
> > >
> > > but has the requirement that the driver know exactly the board level
> > > impmentation details (something that doesn't sound generic).
> >
> > The original use case for these batch operations was in a fastpath -
> > setting data lines on a framebuffer. Sure it's arguably not as generic
> > as may be, but it optimises for speed and current usage patterns - I'm
> > OK with that. Other usage patterns which don't have a speed requirement
> > can be done using the individual pin operations and a loop.
>
> The tradeoff for speed is always made with extensibility. If you want best
> speed - you shouldn't be using the GPIO framework at all - just bang directly
> on the registers yourself...
>
> If you want something extensible/generic that serves multiple use cases -
> it will normally come with a performance tradeoff...
Yeah there's certainly a sliding scale here. At the speedy end you've
got register banging, $SUBJECT is one level higher, your concepts are
higher again.
If your concept can perform at a speed which solves the problem which
inspired $SUBJECT then IMO it's a better way to go. The thing which
worries me is that if we did end up sliding right up the Generic end of
the Generic-Performance slider then we'd end up with a sexy interface
which isn't actually usable for the case which inspired this activity in
the first place!
And yes the speed is primarily the concern of the implementation and
your comments really concern the interface. On paper the 2 are largely
orthogonal but until I see an alternate/competing patch I'm not prepared
to bet it'll all play nice.
>
> > > > While we are here, I was thinking about it, and its better if I give
> > > > gpio_request/free/direction_batch a miss for now. Nothing prevents
> > > > those features being added at a later point.
> > >
> > > I don't think that request/free are optional.
> > >
> > > For example - in most SoC implementations - gpios are implemented as
> > > banks of 16 or 32. (a 16 or 32 bit register).
> > >
> > > Are there facilities to span these registers?
> > > - can you request 64 gpios as a 'bank'?
> > > - can you request gpio_8 -> gpio_40 as a 'bank' on a 32-bit system?
> > >
> > > Are non-adjacent/non-contiguous gpios avaliable to be put into
> > > a 'bank/batch/bus'? can you use gpio_8 -> 11 & 28 -> 31 as a 8-bit
> > > 'bus'?
> > >
> > > How do you know what is avaliable to be talked to as a bank/bus/batch
> > > without the request/free operation?
> >
> > I think the read/write operations should be able to fail if you give
> > them invalid chunks of gpio, sure.
>
> Can you define "invalid"? what are the limitations?
>
> Can I use gpio_8 -> 11 & 28 -> 31 as a chunk?
With the $SUBJECT patch, I understand you can (they both fit in a u32
bitmask). The limitations aren't really the point though - no matter
what the final interface/implementation there will be some limitations.
Either there must be a bulk request interface which will fail if you try
and break the limitations or else the read/write will fail under these
circumstances.
A limitation which springs to mind for any implementation would be if
you specify that the chunk must be accessible in irq context but you
specify some gpios which must sleep. Yes this is a silly case and
totally the platform's responsibility but the fact is that if someone
makes this mistake an error should be returned *somewhere* rather than
just letting things explode.
In my own personal library of standalone code I've got a GPIO driver
which uses request() to build a cookie representing the gpios to be
driven; read/write then use this cookie. If the requested pins can't be
batched then it's the request which fails. I liked this approach. For
$SUBJECT's purposes though I think it's more symmetric with the single
bit operations to keep request as a refcount and have read/write able to
fail. Not too fussed though.
>
> > Request/free are not really designed
> > for that operation - they just ensure exclusive access to a gpio if
> > that's what the driver wants. In the batch case the
> > request/free/direction operations can once again be performed by single
> > pin operations and iteration.
>
> That depends on the semantics of "request".
>
> If it is "request & build up a monolithic chunk from xxx GPIO's" - then
> my definition works.
Indeed, see above.
>
>
> > > I have seen various hardware designs (both at the PCB and SoC level)
> > > require all of these options, and would like to see common infrastructure
> > > which handles this.
> >
> > Yeah the request/free operation doesn't deal with muxing or any other
> > platform-specific kinda gumph, that was an original design decision.
> > They're really just a usage counter.
>
> Sorry for bringing up the muxing - that wasn't the point.
>
> It was really the issue of being non-contiguous, spanning various implementations.
>
> > An example which comes to mind is the avr32-specific userspace gpio
> > interface. This takes a bitmask, loops over the set bits and fails if
> > any of the gpio are previously requested or have been assigned to
> > non-gpio peripherals.
>
> I'll have a look. I don't understand why everyone decided to make their own
> userspace GPIO interface - can't we all just get along? :)
Yeah for sure. Of course since .27 there's been a nice consistant
userspace gpio interface (/sys/class/gpio) so theoretically all the
others are deprecated. I know that at least on avrfreaks, the main
avr32 support forum, users have to have a good reason to be using the
avr32-specific interface if they expect to be helped.
>
> > I don't really see a need to streamline this.
>
>
>
> > > I would think that a 'bank' / 'bus' (whatever) would be a collection
> > > of random/multiple GPIOs (a struct of gpio_port_t) rather than a
> > > start/length (as you described) - or better yet - the request
> > > function takes a list (of individual GPIO's - defined in the
> > > platform data), and creates the struct itself.
> >
> > Hmm, this seems a little overengineered for the basic use-cases I can
> > think of.
>
> Not the ones I run into all the time...
>
> More complex pin multiplexing results in less contiguous free GPIO.
> (which again - has nothing to do with multiplexing - it is the result
> that is the important thing).
Which is why we're having this bit of a discussion! Jaya was the first
person to have this problem and has a patch which, in his case, solves
it. If you have other concrete cases which need to be solved at the
same time then now's the time to come forward and share with the class.
OK so I've got a board which collects the status of a number of gpio all
over a bunch of chips at a few 10s of Hz, encodes it and fires it across
a network. This works with single gpio ops but would be streamlined
with batch access. If the final solution to this problem can be used
for my pins then great, I'll move to it. However, if supporting my
use-case breaks Jaya's then it's not worth it.
>
> > If this can be cranked up to the same speed as the current
> > proposition then OK maybe someone will like it but otherwise, once
> > again, I think most people will be happy with individual operations and
> > iteration.
>
> It will be easier to maintain (from a end user perceptive - if someone
> wants a "chunk" of gpio's - they just define it in their platform data).
> It does put a bigger burden on the person writing things.
>
> I would think that the overhead would only be at init - runtime shouldn't
> be much different in the simple case, but allowing the complex usecases
> with the same interface is better (since we only have to teach people one
> thing) :)
Yeah I'll say again that if we can find an interface as generic as you
suggest and an implementation which performs as well as Jaya needs then
hells yeah, let's go for it. If however a sexy interface means Jaya's
original use case breaks then I for one won't support that interface.
--Ben.
>
> -Robin
On Wed, Jan 7, 2009 at 6:41 AM, Robin Getz <[email protected]> wrote:
> On Wed 31 Dec 2008 13:05, Jaya Kumar pondered:
>> On Thu, Jan 1, 2009 at 1:38 AM, Robin Getz <[email protected]> wrote:
>> > On Tue 30 Dec 2008 23:58, Jaya Kumar pondered:
>> >> On Tue, Dec 30, 2008 at 11:55 PM, Robin Getz <[email protected]> wrote:
>> >> > Yeah, I hadn't thought about spanning more than one gpio_chip. That's a good
>> >> > point.
>> >>
>> >> The currently posted code already supports spanning more than one gpio_chip.
>> >>
>> >
>> > But doesn't do all the other things that David suggested/requested.
>>
>> Hi Robin,
>>
>> Yes, you are right. My implementation does not support a driver that
>> needs to set/get more than 32-bits of gpio in a single call. I'm okay
>> with that restriction as I don't see a concrete use case for that.
>
> It's not the more than 32-bits that I'm concerned about - it is spanning
> more than one register. (if all the GPIOs that are left on the board are
> 2, 64, and 128, where 2, and 64 are part of the SOC's GPIO, and 128 is on
> a GPIO expander - which is a common use case - is this handled?)
>
Hi Robin,
You are correct in that the 2, 64, 128 case would not be accelarated
by my current implementation. The reason is that those 3 numbers can't
fit into a 32 bit bitmask (the mask is for consecutive bits). The user
would need to use the pre-existing single bit API for that scenario,
ie: gpio_set_value(2, val); (64, val), (128, val);
In my current patch, I'm trying to address the performance issue seen
by am300epd.c because of the use of gpio as a 16-bit data bus to
transfer framebuffer data using the current single bit gpio API. I
want to make my implementation more generic than just that which is
why I've added the bitmask and support up to 32-bits. I want to expand
on that to the extent that that can be done without diluting the
performance and without losing the simplicity of the current gpiolib
API.
After reading your points, I think I understand that you would prefer
a higher level API. I am thinking along the lines of:
array_of_gpio_numbers[] = [ 2 , 64, 128 ];
cookie = gpio_register_bundle(array_of_gpio_numbers, sizeof(array));
error = gpio_set_bundle(cookie, values[]);
error = gpio_get_bundle(cookie, values[]);
gpio_unregister_bundle(cookie);
I think that's elegant and I agree that it is feasible but it is a
level higher than what I'm trying to achieve. Would you agree with me
that the above API could be implemented on top of the lower level API
that I have proposed? To be specific, I imagine something along the
lines of:
int gpio_set_bundle(int cookie, u32 *values)
{
offsets = retrieve_offsets_from_cookie(cookie);
err = generate_chips_from_offsets(offsets, &chips);
err = merge_consecutive_chips(chips, &merged_chips)
foreach chip (merged_chips) {
values = generate_values(chip, offset);
mask = generate_mask(chip, offset);
masklength = calculate_length(mask);
gpio_set_batch(chip, offset, values, mask, masklength);
}
}
forgive the naming, I'm just hurrying to illustrate what I mean.
So my goal is to get the lower level batch API working and solid. It
would solve the am300epd.c problem and put into place a framework for
others[1]. Once we've gotten to that level of support, I believe we
could start implementing the higher level API that you've described on
top of that. Does this sound like a reasonable plan that would address
your concerns?
Thanks,
jaya
[1] The same underlying display controller that I seek to support,
broadsheet, will be used across more than just am300epd.c's pxa255. I
know with certainty that it has been used on iMX31L. Several other
SoCs including the S3C6410 are likely. So if we've got this lower
level batch API done, I hope to see it commonly implemented across
those other SoCs too in order to ensure all of those use cases are
accelerated. I am happy to work on that, especially if I can get
hardware. :-)
ps: I noticed you said the above 2, 64, 128 is a common use case. I
don't dispute that, but is that a case where acceleration is
essential? It would also help if I could look through an example of
this use case in the tree or elsewhere so that I can improve my
understanding.