2017-12-11 12:47:00

by Arnd Bergmann

[permalink] [raw]
Subject: [PATCH] [v2] drm/i915: use static const array for PICK macro

The varargs macro trick in _PIPE3/_PHY3/_PORT3 was meant as an optimization
to shrink the i915 kernel module by around 1000 bytes. However, the
downside is a size regression with CONFIG_KASAN, as I found from stack size
warnings with gcc-7.0.1:

before:
drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_get_hw_state':
drivers/gpu/drm/i915/intel_dpll_mgr.c:1644:1: error: the frame size of 176 bytes is larger than 100 bytes [-Werror=frame-larger-than=]
drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_enable':
drivers/gpu/drm/i915/intel_dpll_mgr.c:1548:1: error: the frame size of 224 bytes is larger than 100 bytes [-Werror=frame-larger-than=]

after:
drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_get_hw_state':
drivers/gpu/drm/i915/intel_dpll_mgr.c:1644:1: error: the frame size of 1016 bytes is larger than 1000 bytes [-Werror=frame-larger-than=]
drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_enable':
drivers/gpu/drm/i915/intel_dpll_mgr.c:1548:1: error: the frame size of 1960 bytes is larger than 1000 bytes [-Werror=frame-larger-than=]

I also checked the module sizes and got with gcc-7.0.1

original:
text data bss dec hex filename
2380830 1155436 4448 3540714 3606ea drivers/gpu/drm/i915/i915-kasan.o
1298054 543692 2884 1844630 1c2596 drivers/gpu/drm/i915/i915-nokasan.o

after ce64645d86ac:
text data bss dec hex filename
2389515 1154476 4448 3548439 362517 drivers/gpu/drm/i915/i915-kasan.o
1299639 543692 2884 1846215 1c2bc7 drivers/gpu/drm/i915/i915-nokasan.o

with this patch:
text data bss dec hex filename
2381275 1163884 4448 3549607 3629a7 drivers/gpu/drm/i915/i915-kasan.o
1296038 543692 2884 1842614 1c1db6 drivers/gpu/drm/i915/i915-nokasan.o

Actually showing a code size growth in .text both with and without kasan,
and my version gets most of it back at the expense of larger .data when
kasan is enabled.

Fixes: ce64645d86ac ("drm/i915: use variadic macros and arrays to choose port/pipe based registers")
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80114
Link: https://lkml.org/lkml/2017/3/20/1022
Cc: Jani Nikula <[email protected]>
Signed-off-by: Arnd Bergmann <[email protected]>
---
v2: rebased after a1986f4174a4 ("drm/i915: Remove unnecessary PORT3 definition.")
---
drivers/gpu/drm/i915/i915_reg.h | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 09bf043c1c2e..36f4408503e1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -139,7 +139,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG);
}

-#define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index])
+#define _PICK(__index, ...) ({static const u32 __arr[] = { __VA_ARGS__ }; __arr[__index];})

#define _PIPE(pipe, a, b) ((a) + (pipe)*((b)-(a)))
#define _MMIO_PIPE(pipe, a, b) _MMIO(_PIPE(pipe, a, b))
@@ -3097,10 +3097,10 @@ enum i915_power_well_id {
/*
* Clock control & power management
*/
-#define _DPLL_A (dev_priv->info.display_mmio_offset + 0x6014)
-#define _DPLL_B (dev_priv->info.display_mmio_offset + 0x6018)
-#define _CHV_DPLL_C (dev_priv->info.display_mmio_offset + 0x6030)
-#define DPLL(pipe) _MMIO_PIPE3((pipe), _DPLL_A, _DPLL_B, _CHV_DPLL_C)
+#define _DPLL_A 0x6014
+#define _DPLL_B 0x6018
+#define _CHV_DPLL_C 0x6030
+#define DPLL(pipe) _MMIO(dev_priv->info.display_mmio_offset + _PICK((pipe), _DPLL_A, _DPLL_B, _CHV_DPLL_C))

#define VGA0 _MMIO(0x6000)
#define VGA1 _MMIO(0x6004)
@@ -3196,10 +3196,10 @@ enum i915_power_well_id {
#define SDVO_MULTIPLIER_SHIFT_HIRES 4
#define SDVO_MULTIPLIER_SHIFT_VGA 0

-#define _DPLL_A_MD (dev_priv->info.display_mmio_offset + 0x601c)
-#define _DPLL_B_MD (dev_priv->info.display_mmio_offset + 0x6020)
-#define _CHV_DPLL_C_MD (dev_priv->info.display_mmio_offset + 0x603c)
-#define DPLL_MD(pipe) _MMIO_PIPE3((pipe), _DPLL_A_MD, _DPLL_B_MD, _CHV_DPLL_C_MD)
+#define _DPLL_A_MD 0x601c
+#define _DPLL_B_MD 0x6020
+#define _CHV_DPLL_C_MD 0x603c
+#define DPLL_MD(pipe) _MMIO(dev_priv->info.display_mmio_offset + _PICK((pipe), _DPLL_A_MD, _DPLL_B_MD, _CHV_DPLL_C_MD))

/*
* UDI pixel divider, controlling how many pixels are stuffed into a packet.
--
2.9.0


2017-12-11 18:41:01

by Chris Wilson

[permalink] [raw]
Subject: Re: [PATCH] [v2] drm/i915: use static const array for PICK macro

Quoting Chris Wilson (2017-12-11 12:51:42)
> Quoting Arnd Bergmann (2017-12-11 12:46:22)
> > The varargs macro trick in _PIPE3/_PHY3/_PORT3 was meant as an optimization
> > to shrink the i915 kernel module by around 1000 bytes. However, the
> > downside is a size regression with CONFIG_KASAN, as I found from stack size
> > warnings with gcc-7.0.1:
> >
> > before:
> > drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_get_hw_state':
> > drivers/gpu/drm/i915/intel_dpll_mgr.c:1644:1: error: the frame size of 176 bytes is larger than 100 bytes [-Werror=frame-larger-than=]
> > drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_enable':
> > drivers/gpu/drm/i915/intel_dpll_mgr.c:1548:1: error: the frame size of 224 bytes is larger than 100 bytes [-Werror=frame-larger-than=]
> >
> > after:
> > drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_get_hw_state':
> > drivers/gpu/drm/i915/intel_dpll_mgr.c:1644:1: error: the frame size of 1016 bytes is larger than 1000 bytes [-Werror=frame-larger-than=]
> > drivers/gpu/drm/i915/intel_dpll_mgr.c: In function 'bxt_ddi_pll_enable':
> > drivers/gpu/drm/i915/intel_dpll_mgr.c:1548:1: error: the frame size of 1960 bytes is larger than 1000 bytes [-Werror=frame-larger-than=]
> >
> > I also checked the module sizes and got with gcc-7.0.1
> >
> > original:
> > text data bss dec hex filename
> > 2380830 1155436 4448 3540714 3606ea drivers/gpu/drm/i915/i915-kasan.o
> > 1298054 543692 2884 1844630 1c2596 drivers/gpu/drm/i915/i915-nokasan.o
> >
> > after ce64645d86ac:
> > text data bss dec hex filename
> > 2389515 1154476 4448 3548439 362517 drivers/gpu/drm/i915/i915-kasan.o
> > 1299639 543692 2884 1846215 1c2bc7 drivers/gpu/drm/i915/i915-nokasan.o
> >
> > with this patch:
> > text data bss dec hex filename
> > 2381275 1163884 4448 3549607 3629a7 drivers/gpu/drm/i915/i915-kasan.o
> > 1296038 543692 2884 1842614 1c1db6 drivers/gpu/drm/i915/i915-nokasan.o
> >
> > Actually showing a code size growth in .text both with and without kasan,
> > and my version gets most of it back at the expense of larger .data when
> > kasan is enabled.
> >
> > Fixes: ce64645d86ac ("drm/i915: use variadic macros and arrays to choose port/pipe based registers")
> > Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80114
> > Link: https://lkml.org/lkml/2017/3/20/1022
> > Cc: Jani Nikula <[email protected]>
> > Signed-off-by: Arnd Bergmann <[email protected]>
> > ---
> > v2: rebased after a1986f4174a4 ("drm/i915: Remove unnecessary PORT3 definition.")
> > ---
> > drivers/gpu/drm/i915/i915_reg.h | 18 +++++++++---------
> > 1 file changed, 9 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index 09bf043c1c2e..36f4408503e1 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -139,7 +139,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
> > return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG);
> > }
> >
> > -#define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index])
> > +#define _PICK(__index, ...) ({static const u32 __arr[] = { __VA_ARGS__ }; __arr[__index];})
>
> Is gcc smart enough for
> if (__builtin_context_p(__index)) {
> ((const u32 []){ __VA_ARGS__ })[__index];
> } else {
> static const u32 __arr[] = { __VA_ARGS__ };
> __arr[__index];
> }
> ?

Not really, we don't have enough constants for it to make a substantial
difference:

add/remove: 1/0 grow/shrink: 3/5 up/down: 617/-604 (13)
Function old new delta
cnl_ddi_vswing_program.isra - 574 +574
bxt_ddi_phy_is_enabled 220 241 +21
bxt_ddi_phy_set_signal_level 537 556 +19
i9xx_get_pipe_config 1474 1477 +3
bxt_ddi_phy_verify_state 411 408 -3
_bxt_ddi_phy_init 956 950 -6
vlv_display_power_well_init 470 461 -9
bxt_ddi_pll_get_hw_state 774 762 -12
cnl_ddi_vswing_sequence 1166 592 -574
Total: Before=13461532, After=13461545, chg +0.00%

Of particular note the size of __arr[] is not reduced, so gcc is already
eliminating the static[] for constant index, or not eliminating the
redundant branch here.
-Chris

2018-01-16 16:42:30

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH] [v2] drm/i915: use static const array for PICK macro

On Mon, Dec 11, 2017 at 7:40 PM, Chris Wilson <[email protected]> wrote:
> Quoting Chris Wilson (2017-12-11 12:51:42)
>> Quoting Arnd Bergmann (2017-12-11 12:46:22)
>> > v2: rebased after a1986f4174a4 ("drm/i915: Remove unnecessary PORT3 definition.")
>> > ---
>> > drivers/gpu/drm/i915/i915_reg.h | 18 +++++++++---------
>> > 1 file changed, 9 insertions(+), 9 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> > index 09bf043c1c2e..36f4408503e1 100644
>> > --- a/drivers/gpu/drm/i915/i915_reg.h
>> > +++ b/drivers/gpu/drm/i915/i915_reg.h
>> > @@ -139,7 +139,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>> > return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG);
>> > }
>> >
>> > -#define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index])
>> > +#define _PICK(__index, ...) ({static const u32 __arr[] = { __VA_ARGS__ }; __arr[__index];})
>>
>> Is gcc smart enough for
>> if (__builtin_context_p(__index)) {
>> ((const u32 []){ __VA_ARGS__ })[__index];
>> } else {
>> static const u32 __arr[] = { __VA_ARGS__ };
>> __arr[__index];
>> }
>> ?
>
> Not really, we don't have enough constants for it to make a substantial
> difference:
>
> add/remove: 1/0 grow/shrink: 3/5 up/down: 617/-604 (13)
> Function old new delta
> cnl_ddi_vswing_program.isra - 574 +574
> bxt_ddi_phy_is_enabled 220 241 +21
> bxt_ddi_phy_set_signal_level 537 556 +19
> i9xx_get_pipe_config 1474 1477 +3
> bxt_ddi_phy_verify_state 411 408 -3
> _bxt_ddi_phy_init 956 950 -6
> vlv_display_power_well_init 470 461 -9
> bxt_ddi_pll_get_hw_state 774 762 -12
> cnl_ddi_vswing_sequence 1166 592 -574
> Total: Before=13461532, After=13461545, chg +0.00%
>
> Of particular note the size of __arr[] is not reduced, so gcc is already
> eliminating the static[] for constant index, or not eliminating the
> redundant branch here.

I noticed we never concluded here. Did you see anything wrong with my
workaround in the end or could we just apply it to avoid the stack
size regression?

Arnd