2019-10-19 08:41:58

by Sudip Mukherjee

[permalink] [raw]
Subject: [PATCH] omapfb: reduce stack usage

The build of xtensa allmodconfig is giving a warning of:
In function 'dsi_dump_dsidev_irqs':
warning: the frame size of 1120 bytes is larger than 1024 bytes

Allocate the memory for 'struct dsi_irq_stats' dynamically instead
of assigning it in stack.

Signed-off-by: Sudip Mukherjee <[email protected]>
---
drivers/video/fbdev/omap2/omapfb/dss/dsi.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
index d620376216e1..43402467bf40 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
@@ -1536,22 +1536,25 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
{
struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
unsigned long flags;
- struct dsi_irq_stats stats;
+ struct dsi_irq_stats *stats;

+ stats = kmalloc(sizeof(*stats), GFP_KERNEL);
+ if (!stats)
+ return;
spin_lock_irqsave(&dsi->irq_stats_lock, flags);

- stats = dsi->irq_stats;
+ memcpy(stats, &dsi->irq_stats, sizeof(*stats));
memset(&dsi->irq_stats, 0, sizeof(dsi->irq_stats));
dsi->irq_stats.last_reset = jiffies;

spin_unlock_irqrestore(&dsi->irq_stats_lock, flags);

seq_printf(s, "period %u ms\n",
- jiffies_to_msecs(jiffies - stats.last_reset));
+ jiffies_to_msecs(jiffies - stats->last_reset));

- seq_printf(s, "irqs %d\n", stats.irq_count);
+ seq_printf(s, "irqs %d\n", stats->irq_count);
#define PIS(x) \
- seq_printf(s, "%-20s %10d\n", #x, stats.dsi_irqs[ffs(DSI_IRQ_##x)-1]);
+ seq_printf(s, "%-20s %10d\n", #x, stats->dsi_irqs[ffs(DSI_IRQ_##x)-1]);

seq_printf(s, "-- DSI%d interrupts --\n", dsi->module_id + 1);
PIS(VC0);
@@ -1575,10 +1578,10 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,

#define PIS(x) \
seq_printf(s, "%-20s %10d %10d %10d %10d\n", #x, \
- stats.vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \
- stats.vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \
- stats.vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \
- stats.vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]);
+ stats->vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \
+ stats->vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \
+ stats->vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \
+ stats->vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]);

seq_printf(s, "-- VC interrupts --\n");
PIS(CS);
@@ -1594,7 +1597,7 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,

#define PIS(x) \
seq_printf(s, "%-20s %10d\n", #x, \
- stats.cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]);
+ stats->cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]);

seq_printf(s, "-- CIO interrupts --\n");
PIS(ERRSYNCESC1);
@@ -1618,6 +1621,7 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
PIS(ULPSACTIVENOT_ALL0);
PIS(ULPSACTIVENOT_ALL1);
#undef PIS
+ kfree(stats);
}

static void dsi1_dump_irqs(struct seq_file *s)
--
2.11.0


2019-10-19 08:55:42

by Ladislav Michl

[permalink] [raw]
Subject: Re: [PATCH] omapfb: reduce stack usage

On Fri, Oct 18, 2019 at 05:30:04PM +0100, Sudip Mukherjee wrote:
> The build of xtensa allmodconfig is giving a warning of:
> In function 'dsi_dump_dsidev_irqs':
> warning: the frame size of 1120 bytes is larger than 1024 bytes
>
> Allocate the memory for 'struct dsi_irq_stats' dynamically instead
> of assigning it in stack.

So now function can fail silently, executes longer, code is sligthly
bigger... And all that to silent warning about exceeding frame size.
Is it really worth "fixing"?

> Signed-off-by: Sudip Mukherjee <[email protected]>
> ---
> drivers/video/fbdev/omap2/omapfb/dss/dsi.c | 24 ++++++++++++++----------
> 1 file changed, 14 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
> index d620376216e1..43402467bf40 100644
> --- a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
> +++ b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
> @@ -1536,22 +1536,25 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
> {
> struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
> unsigned long flags;
> - struct dsi_irq_stats stats;
> + struct dsi_irq_stats *stats;
>
> + stats = kmalloc(sizeof(*stats), GFP_KERNEL);
> + if (!stats)
> + return;
> spin_lock_irqsave(&dsi->irq_stats_lock, flags);
>
> - stats = dsi->irq_stats;
> + memcpy(stats, &dsi->irq_stats, sizeof(*stats));
> memset(&dsi->irq_stats, 0, sizeof(dsi->irq_stats));
> dsi->irq_stats.last_reset = jiffies;
>
> spin_unlock_irqrestore(&dsi->irq_stats_lock, flags);
>
> seq_printf(s, "period %u ms\n",
> - jiffies_to_msecs(jiffies - stats.last_reset));
> + jiffies_to_msecs(jiffies - stats->last_reset));
>
> - seq_printf(s, "irqs %d\n", stats.irq_count);
> + seq_printf(s, "irqs %d\n", stats->irq_count);
> #define PIS(x) \
> - seq_printf(s, "%-20s %10d\n", #x, stats.dsi_irqs[ffs(DSI_IRQ_##x)-1]);
> + seq_printf(s, "%-20s %10d\n", #x, stats->dsi_irqs[ffs(DSI_IRQ_##x)-1]);
>
> seq_printf(s, "-- DSI%d interrupts --\n", dsi->module_id + 1);
> PIS(VC0);
> @@ -1575,10 +1578,10 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
>
> #define PIS(x) \
> seq_printf(s, "%-20s %10d %10d %10d %10d\n", #x, \
> - stats.vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \
> - stats.vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \
> - stats.vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \
> - stats.vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]);
> + stats->vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \
> + stats->vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \
> + stats->vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \
> + stats->vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]);
>
> seq_printf(s, "-- VC interrupts --\n");
> PIS(CS);
> @@ -1594,7 +1597,7 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
>
> #define PIS(x) \
> seq_printf(s, "%-20s %10d\n", #x, \
> - stats.cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]);
> + stats->cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]);
>
> seq_printf(s, "-- CIO interrupts --\n");
> PIS(ERRSYNCESC1);
> @@ -1618,6 +1621,7 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
> PIS(ULPSACTIVENOT_ALL0);
> PIS(ULPSACTIVENOT_ALL1);
> #undef PIS
> + kfree(stats);
> }
>
> static void dsi1_dump_irqs(struct seq_file *s)
> --
> 2.11.0

2019-10-19 09:49:06

by Sudip Mukherjee

[permalink] [raw]
Subject: Re: [PATCH] omapfb: reduce stack usage

On Fri, Oct 18, 2019 at 07:27:28PM +0200, Ladislav Michl wrote:
> On Fri, Oct 18, 2019 at 05:30:04PM +0100, Sudip Mukherjee wrote:
> > The build of xtensa allmodconfig is giving a warning of:
> > In function 'dsi_dump_dsidev_irqs':
> > warning: the frame size of 1120 bytes is larger than 1024 bytes
> >
> > Allocate the memory for 'struct dsi_irq_stats' dynamically instead
> > of assigning it in stack.
>
> So now function can fail silently, executes longer, code is sligthly
> bigger... And all that to silent warning about exceeding frame size.
> Is it really worth "fixing"?

The only point of failure is if kmalloc() fails and if kmalloc() fails then
there will be error prints in dmesg to tell the user that there is no
memory left in the system. About the size bigger, it seems
the drivers/video/fbdev/omap2/omapfb/dss/dsi.o file is smaller with the
patch.
This is without the patch:
-rw-r--r-- 1 sudip sudip 316856 Oct 18 22:27 drivers/video/fbdev/omap2/omapfb/dss/dsi.o
And this is with the patch:
-rw-r--r-- 1 sudip sudip 316436 Oct 18 20:09 drivers/video/fbdev/omap2/omapfb/dss/dsi.o

And also, objdump shows me that <dsi_dump_dsidev_irqs> was taking up 0xD7D
bytes, and now with the patch it is taking up 0xBED bytes, thats a saving
of 400 bytes. If it has 400 bytes of less code to execute will it not be
faster now?

But, I may be totally wrong in my thinking, and in that case, please feel
free to reject the patch.

--
Regards
Sudip

2019-10-19 09:52:03

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] omapfb: reduce stack usage

On Fri, 2019-10-18 at 23:30 +0100, Sudip Mukherjee wrote:
> On Fri, Oct 18, 2019 at 07:27:28PM +0200, Ladislav Michl wrote:
> > On Fri, Oct 18, 2019 at 05:30:04PM +0100, Sudip Mukherjee wrote:
> > > The build of xtensa allmodconfig is giving a warning of:
> > > In function 'dsi_dump_dsidev_irqs':
> > > warning: the frame size of 1120 bytes is larger than 1024 bytes
> > >
> > > Allocate the memory for 'struct dsi_irq_stats' dynamically instead
> > > of assigning it in stack.
> >
> > So now function can fail silently, executes longer, code is sligthly
> > bigger... And all that to silent warning about exceeding frame size.
> > Is it really worth "fixing"?

Depends if it could fail in practice due to a stack overrun.

> The only point of failure is if kmalloc() fails and if kmalloc() fails then
> there will be error prints in dmesg to tell the user that there is no
> memory left in the system. About the size bigger, it seems
> the drivers/video/fbdev/omap2/omapfb/dss/dsi.o file is smaller with the
> patch.
> This is without the patch:
> -rw-r--r-- 1 sudip sudip 316856 Oct 18 22:27 drivers/video/fbdev/omap2/omapfb/dss/dsi.o
> And this is with the patch:
> -rw-r--r-- 1 sudip sudip 316436 Oct 18 20:09 drivers/video/fbdev/omap2/omapfb/dss/dsi.o
>
> And also, objdump shows me that <dsi_dump_dsidev_irqs> was taking up 0xD7D
> bytes, and now with the patch it is taking up 0xBED bytes, thats a saving
> of 400 bytes. If it has 400 bytes of less code to execute will it not be
> faster now?

You should try compiling without all the debugging symbols (defconfig)

> But, I may be totally wrong in my thinking, and in that case, please feel
> free to reject the patch.

Without your patch:

$ objdump -x drivers/video/fbdev/omap2/omapfb/dss/dsi.o | grep dsi_dump_dsidev_irqs
00000d20 l F .text 0000061c dsi_dump_dsidev_irqs

With your patch:

$ objdump -x drivers/video/fbdev/omap2/omapfb/dss/dsi.o | grep dsi_dump_dsidev_irqs
00000d20 l F .text 00000638 dsi_dump_dsidev_irqs


2019-10-19 09:57:51

by Sudip Mukherjee

[permalink] [raw]
Subject: Re: [PATCH] omapfb: reduce stack usage

On Fri, Oct 18, 2019 at 06:19:15PM -0700, Joe Perches wrote:
> On Fri, 2019-10-18 at 23:30 +0100, Sudip Mukherjee wrote:
> > On Fri, Oct 18, 2019 at 07:27:28PM +0200, Ladislav Michl wrote:
> > > On Fri, Oct 18, 2019 at 05:30:04PM +0100, Sudip Mukherjee wrote:
> > > > The build of xtensa allmodconfig is giving a warning of:
> > > > In function 'dsi_dump_dsidev_irqs':
> > > > warning: the frame size of 1120 bytes is larger than 1024 bytes
<snip>
>
> Without your patch:
>
> $ objdump -x drivers/video/fbdev/omap2/omapfb/dss/dsi.o | grep dsi_dump_dsidev_irqs
> 00000d20 l F .text 0000061c dsi_dump_dsidev_irqs
>
> With your patch:
>
> $ objdump -x drivers/video/fbdev/omap2/omapfb/dss/dsi.o | grep dsi_dump_dsidev_irqs
> 00000d20 l F .text 00000638 dsi_dump_dsidev_irqs

I did objdump -d and then compared where it started and where it ended.

But, in anycase, this driver is framebuffer driver for omap2 and in
reality, can only be used on arm platform and when I build the driver
with arm compiler I am not getting this warning. This is not a valid
concern, please reject this patch.

--
Regards
Sudip

Subject: Re: [PATCH] omapfb: reduce stack usage


On 10/18/19 6:30 PM, Sudip Mukherjee wrote:
> The build of xtensa allmodconfig is giving a warning of:
> In function 'dsi_dump_dsidev_irqs':
> warning: the frame size of 1120 bytes is larger than 1024 bytes
>
> Allocate the memory for 'struct dsi_irq_stats' dynamically instead
> of assigning it in stack.
>
> Signed-off-by: Sudip Mukherjee <[email protected]>
> ---
> drivers/video/fbdev/omap2/omapfb/dss/dsi.c | 24 ++++++++++++++----------
> 1 file changed, 14 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
> index d620376216e1..43402467bf40 100644
> --- a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
> +++ b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
> @@ -1536,22 +1536,25 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
> {
> struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
> unsigned long flags;
> - struct dsi_irq_stats stats;
> + struct dsi_irq_stats *stats;
>
> + stats = kmalloc(sizeof(*stats), GFP_KERNEL);
> + if (!stats)
> + return;
> spin_lock_irqsave(&dsi->irq_stats_lock, flags);
>
> - stats = dsi->irq_stats;
> + memcpy(stats, &dsi->irq_stats, sizeof(*stats));

"stats" copy is only needed for generating debugfs information.

We can probably reduce the stack usage and also simplify the driver
by just accessing dsi->irq_stats directly before cleaning it
(we would also need to extend coverage of spinlock but the code is
debug only so this should not be a problem).

Care to try this approach?

Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics

> memset(&dsi->irq_stats, 0, sizeof(dsi->irq_stats));
> dsi->irq_stats.last_reset = jiffies;
>
> spin_unlock_irqrestore(&dsi->irq_stats_lock, flags);
>
> seq_printf(s, "period %u ms\n",
> - jiffies_to_msecs(jiffies - stats.last_reset));
> + jiffies_to_msecs(jiffies - stats->last_reset));
>
> - seq_printf(s, "irqs %d\n", stats.irq_count);
> + seq_printf(s, "irqs %d\n", stats->irq_count);
> #define PIS(x) \
> - seq_printf(s, "%-20s %10d\n", #x, stats.dsi_irqs[ffs(DSI_IRQ_##x)-1]);
> + seq_printf(s, "%-20s %10d\n", #x, stats->dsi_irqs[ffs(DSI_IRQ_##x)-1]);
>
> seq_printf(s, "-- DSI%d interrupts --\n", dsi->module_id + 1);
> PIS(VC0);
> @@ -1575,10 +1578,10 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
>
> #define PIS(x) \
> seq_printf(s, "%-20s %10d %10d %10d %10d\n", #x, \
> - stats.vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \
> - stats.vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \
> - stats.vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \
> - stats.vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]);
> + stats->vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \
> + stats->vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \
> + stats->vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \
> + stats->vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]);
>
> seq_printf(s, "-- VC interrupts --\n");
> PIS(CS);
> @@ -1594,7 +1597,7 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
>
> #define PIS(x) \
> seq_printf(s, "%-20s %10d\n", #x, \
> - stats.cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]);
> + stats->cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]);
>
> seq_printf(s, "-- CIO interrupts --\n");
> PIS(ERRSYNCESC1);
> @@ -1618,6 +1621,7 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
> PIS(ULPSACTIVENOT_ALL0);
> PIS(ULPSACTIVENOT_ALL1);
> #undef PIS
> + kfree(stats);
> }
>
> static void dsi1_dump_irqs(struct seq_file *s)