The __xenbus_map_ring() function has two large arrays, 'map' and
'unmap' on its stack. When clang decides to inline it into its caller,
xenbus_map_ring_valloc_hvm(), the total stack usage exceeds the warning
limit for stack size on 32-bit architectures.
drivers/xen/xenbus/xenbus_client.c:592:12: error: stack frame size of 1104 bytes in function 'xenbus_map_ring_valloc_hvm' [-Werror,-Wframe-larger-than=]
As far as I can tell, other compilers don't inline it here, so we get
no warning, but the stack usage is actually the same. It is possible
for both arrays to use the same location on the stack, but the compiler
cannot prove that this is safe because they get passed to external
functions that may end up using them until they go out of scope.
Move the two arrays into separate basic blocks to limit the scope
and force them to occupy less stack in total, regardless of the
inlining decision.
Signed-off-by: Arnd Bergmann <[email protected]>
---
drivers/xen/xenbus/xenbus_client.c | 74 +++++++++++++++++-------------
1 file changed, 41 insertions(+), 33 deletions(-)
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 040d2a43e8e3..23ca70378e36 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -470,54 +470,62 @@ static int __xenbus_map_ring(struct xenbus_device *dev,
unsigned int flags,
bool *leaked)
{
- struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
- struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
int i, j;
int err = GNTST_okay;
- if (nr_grefs > XENBUS_MAX_RING_GRANTS)
- return -EINVAL;
+ {
+ struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
- for (i = 0; i < nr_grefs; i++) {
- memset(&map[i], 0, sizeof(map[i]));
- gnttab_set_map_op(&map[i], addrs[i], flags, gnt_refs[i],
- dev->otherend_id);
- handles[i] = INVALID_GRANT_HANDLE;
- }
+ if (nr_grefs > XENBUS_MAX_RING_GRANTS)
+ return -EINVAL;
- gnttab_batch_map(map, i);
+ for (i = 0; i < nr_grefs; i++) {
+ memset(&map[i], 0, sizeof(map[i]));
+ gnttab_set_map_op(&map[i], addrs[i], flags,
+ gnt_refs[i], dev->otherend_id);
+ handles[i] = INVALID_GRANT_HANDLE;
+ }
+
+ gnttab_batch_map(map, i);
- for (i = 0; i < nr_grefs; i++) {
- if (map[i].status != GNTST_okay) {
- err = map[i].status;
- xenbus_dev_fatal(dev, map[i].status,
+ for (i = 0; i < nr_grefs; i++) {
+ if (map[i].status != GNTST_okay) {
+ err = map[i].status;
+ xenbus_dev_fatal(dev, map[i].status,
"mapping in shared page %d from domain %d",
gnt_refs[i], dev->otherend_id);
- goto fail;
- } else
- handles[i] = map[i].handle;
+ goto fail;
+ } else
+ handles[i] = map[i].handle;
+ }
}
-
return GNTST_okay;
fail:
- for (i = j = 0; i < nr_grefs; i++) {
- if (handles[i] != INVALID_GRANT_HANDLE) {
- memset(&unmap[j], 0, sizeof(unmap[j]));
- gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i],
- GNTMAP_host_map, handles[i]);
- j++;
+ {
+ struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
+
+ for (i = j = 0; i < nr_grefs; i++) {
+ if (handles[i] != INVALID_GRANT_HANDLE) {
+ memset(&unmap[j], 0, sizeof(unmap[j]));
+ gnttab_set_unmap_op(&unmap[j],
+ (phys_addr_t)addrs[i],
+ GNTMAP_host_map,
+ handles[i]);
+ j++;
+ }
}
- }
- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j))
- BUG();
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+ unmap, j))
+ BUG();
- *leaked = false;
- for (i = 0; i < j; i++) {
- if (unmap[i].status != GNTST_okay) {
- *leaked = true;
- break;
+ *leaked = false;
+ for (i = 0; i < j; i++) {
+ if (unmap[i].status != GNTST_okay) {
+ *leaked = true;
+ break;
+ }
}
}
--
2.26.0
On 05.05.20 16:15, Arnd Bergmann wrote:
> The __xenbus_map_ring() function has two large arrays, 'map' and
> 'unmap' on its stack. When clang decides to inline it into its caller,
> xenbus_map_ring_valloc_hvm(), the total stack usage exceeds the warning
> limit for stack size on 32-bit architectures.
>
> drivers/xen/xenbus/xenbus_client.c:592:12: error: stack frame size of 1104 bytes in function 'xenbus_map_ring_valloc_hvm' [-Werror,-Wframe-larger-than=]
>
> As far as I can tell, other compilers don't inline it here, so we get
> no warning, but the stack usage is actually the same. It is possible
> for both arrays to use the same location on the stack, but the compiler
> cannot prove that this is safe because they get passed to external
> functions that may end up using them until they go out of scope.
>
> Move the two arrays into separate basic blocks to limit the scope
> and force them to occupy less stack in total, regardless of the
> inlining decision.
Why don't you put both arrays into a union?
Juergen
>
> Signed-off-by: Arnd Bergmann <[email protected]>
> ---
> drivers/xen/xenbus/xenbus_client.c | 74 +++++++++++++++++-------------
> 1 file changed, 41 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
> index 040d2a43e8e3..23ca70378e36 100644
> --- a/drivers/xen/xenbus/xenbus_client.c
> +++ b/drivers/xen/xenbus/xenbus_client.c
> @@ -470,54 +470,62 @@ static int __xenbus_map_ring(struct xenbus_device *dev,
> unsigned int flags,
> bool *leaked)
> {
> - struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
> - struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
> int i, j;
> int err = GNTST_okay;
>
> - if (nr_grefs > XENBUS_MAX_RING_GRANTS)
> - return -EINVAL;
> + {
> + struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
>
> - for (i = 0; i < nr_grefs; i++) {
> - memset(&map[i], 0, sizeof(map[i]));
> - gnttab_set_map_op(&map[i], addrs[i], flags, gnt_refs[i],
> - dev->otherend_id);
> - handles[i] = INVALID_GRANT_HANDLE;
> - }
> + if (nr_grefs > XENBUS_MAX_RING_GRANTS)
> + return -EINVAL;
>
> - gnttab_batch_map(map, i);
> + for (i = 0; i < nr_grefs; i++) {
> + memset(&map[i], 0, sizeof(map[i]));
> + gnttab_set_map_op(&map[i], addrs[i], flags,
> + gnt_refs[i], dev->otherend_id);
> + handles[i] = INVALID_GRANT_HANDLE;
> + }
> +
> + gnttab_batch_map(map, i);
>
> - for (i = 0; i < nr_grefs; i++) {
> - if (map[i].status != GNTST_okay) {
> - err = map[i].status;
> - xenbus_dev_fatal(dev, map[i].status,
> + for (i = 0; i < nr_grefs; i++) {
> + if (map[i].status != GNTST_okay) {
> + err = map[i].status;
> + xenbus_dev_fatal(dev, map[i].status,
> "mapping in shared page %d from domain %d",
> gnt_refs[i], dev->otherend_id);
> - goto fail;
> - } else
> - handles[i] = map[i].handle;
> + goto fail;
> + } else
> + handles[i] = map[i].handle;
> + }
> }
> -
> return GNTST_okay;
>
> fail:
> - for (i = j = 0; i < nr_grefs; i++) {
> - if (handles[i] != INVALID_GRANT_HANDLE) {
> - memset(&unmap[j], 0, sizeof(unmap[j]));
> - gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i],
> - GNTMAP_host_map, handles[i]);
> - j++;
> + {
> + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
> +
> + for (i = j = 0; i < nr_grefs; i++) {
> + if (handles[i] != INVALID_GRANT_HANDLE) {
> + memset(&unmap[j], 0, sizeof(unmap[j]));
> + gnttab_set_unmap_op(&unmap[j],
> + (phys_addr_t)addrs[i],
> + GNTMAP_host_map,
> + handles[i]);
> + j++;
> + }
> }
> - }
>
> - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j))
> - BUG();
> + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
> + unmap, j))
> + BUG();
>
> - *leaked = false;
> - for (i = 0; i < j; i++) {
> - if (unmap[i].status != GNTST_okay) {
> - *leaked = true;
> - break;
> + *leaked = false;
> + for (i = 0; i < j; i++) {
> + if (unmap[i].status != GNTST_okay) {
> + *leaked = true;
> + break;
> + }
> }
> }
>
>
On Tue, May 5, 2020 at 4:34 PM Jürgen Groß <[email protected]> wrote:
> On 05.05.20 16:15, Arnd Bergmann wrote:
> > The __xenbus_map_ring() function has two large arrays, 'map' and
> > 'unmap' on its stack. When clang decides to inline it into its caller,
> > xenbus_map_ring_valloc_hvm(), the total stack usage exceeds the warning
> > limit for stack size on 32-bit architectures.
> >
> > drivers/xen/xenbus/xenbus_client.c:592:12: error: stack frame size of 1104 bytes in function 'xenbus_map_ring_valloc_hvm' [-Werror,-Wframe-larger-than=]
> >
> > As far as I can tell, other compilers don't inline it here, so we get
> > no warning, but the stack usage is actually the same. It is possible
> > for both arrays to use the same location on the stack, but the compiler
> > cannot prove that this is safe because they get passed to external
> > functions that may end up using them until they go out of scope.
> >
> > Move the two arrays into separate basic blocks to limit the scope
> > and force them to occupy less stack in total, regardless of the
> > inlining decision.
>
> Why don't you put both arrays into a union?
I considered that as well, and don't really mind either way. I think it does
get a bit ugly whatever we do. If you prefer the union, I can respin the
patch that way.
Arnd
On 05.05.20 17:01, Arnd Bergmann wrote:
> On Tue, May 5, 2020 at 4:34 PM Jürgen Groß <[email protected]> wrote:
>> On 05.05.20 16:15, Arnd Bergmann wrote:
>>> The __xenbus_map_ring() function has two large arrays, 'map' and
>>> 'unmap' on its stack. When clang decides to inline it into its caller,
>>> xenbus_map_ring_valloc_hvm(), the total stack usage exceeds the warning
>>> limit for stack size on 32-bit architectures.
>>>
>>> drivers/xen/xenbus/xenbus_client.c:592:12: error: stack frame size of 1104 bytes in function 'xenbus_map_ring_valloc_hvm' [-Werror,-Wframe-larger-than=]
>>>
>>> As far as I can tell, other compilers don't inline it here, so we get
>>> no warning, but the stack usage is actually the same. It is possible
>>> for both arrays to use the same location on the stack, but the compiler
>>> cannot prove that this is safe because they get passed to external
>>> functions that may end up using them until they go out of scope.
>>>
>>> Move the two arrays into separate basic blocks to limit the scope
>>> and force them to occupy less stack in total, regardless of the
>>> inlining decision.
>>
>> Why don't you put both arrays into a union?
>
> I considered that as well, and don't really mind either way. I think it does
> get a bit ugly whatever we do. If you prefer the union, I can respin the
> patch that way.
Hmm, thinking more about it I think the real clean solution would be to
extend struct map_ring_valloc_hvm to cover the pv case, too, to add the
map and unmap arrays (possibly as a union) to it and to allocate it
dynamically instead of having it on the stack.
Would you be fine doing this?
Juergen
On 5/5/20 12:02 PM, Jürgen Groß wrote:
> On 05.05.20 17:01, Arnd Bergmann wrote:
>> On Tue, May 5, 2020 at 4:34 PM Jürgen Groß <[email protected]> wrote:
>>> On 05.05.20 16:15, Arnd Bergmann wrote:
>>>> The __xenbus_map_ring() function has two large arrays, 'map' and
>>>> 'unmap' on its stack. When clang decides to inline it into its caller,
>>>> xenbus_map_ring_valloc_hvm(), the total stack usage exceeds the
>>>> warning
>>>> limit for stack size on 32-bit architectures.
>>>>
>>>> drivers/xen/xenbus/xenbus_client.c:592:12: error: stack frame size
>>>> of 1104 bytes in function 'xenbus_map_ring_valloc_hvm'
>>>> [-Werror,-Wframe-larger-than=]
>>>>
>>>> As far as I can tell, other compilers don't inline it here, so we get
>>>> no warning, but the stack usage is actually the same. It is possible
>>>> for both arrays to use the same location on the stack, but the
>>>> compiler
>>>> cannot prove that this is safe because they get passed to external
>>>> functions that may end up using them until they go out of scope.
>>>>
>>>> Move the two arrays into separate basic blocks to limit the scope
>>>> and force them to occupy less stack in total, regardless of the
>>>> inlining decision.
>>>
>>> Why don't you put both arrays into a union?
>>
>> I considered that as well, and don't really mind either way. I think
>> it does
>> get a bit ugly whatever we do. If you prefer the union, I can respin the
>> patch that way.
>
> Hmm, thinking more about it I think the real clean solution would be to
> extend struct map_ring_valloc_hvm to cover the pv case, too, to add the
> map and unmap arrays (possibly as a union) to it and to allocate it
> dynamically instead of having it on the stack.
>
> Would you be fine doing this?
Another option might be to factor out/modify code from
xenbus_unmap_ring() and call the resulting code from
__xenbus_map_ring()'s fail path.
-boris
On 05.05.20 18:12, Boris Ostrovsky wrote:
>
> On 5/5/20 12:02 PM, Jürgen Groß wrote:
>> On 05.05.20 17:01, Arnd Bergmann wrote:
>>> On Tue, May 5, 2020 at 4:34 PM Jürgen Groß <[email protected]> wrote:
>>>> On 05.05.20 16:15, Arnd Bergmann wrote:
>>>>> The __xenbus_map_ring() function has two large arrays, 'map' and
>>>>> 'unmap' on its stack. When clang decides to inline it into its caller,
>>>>> xenbus_map_ring_valloc_hvm(), the total stack usage exceeds the
>>>>> warning
>>>>> limit for stack size on 32-bit architectures.
>>>>>
>>>>> drivers/xen/xenbus/xenbus_client.c:592:12: error: stack frame size
>>>>> of 1104 bytes in function 'xenbus_map_ring_valloc_hvm'
>>>>> [-Werror,-Wframe-larger-than=]
>>>>>
>>>>> As far as I can tell, other compilers don't inline it here, so we get
>>>>> no warning, but the stack usage is actually the same. It is possible
>>>>> for both arrays to use the same location on the stack, but the
>>>>> compiler
>>>>> cannot prove that this is safe because they get passed to external
>>>>> functions that may end up using them until they go out of scope.
>>>>>
>>>>> Move the two arrays into separate basic blocks to limit the scope
>>>>> and force them to occupy less stack in total, regardless of the
>>>>> inlining decision.
>>>>
>>>> Why don't you put both arrays into a union?
>>>
>>> I considered that as well, and don't really mind either way. I think
>>> it does
>>> get a bit ugly whatever we do. If you prefer the union, I can respin the
>>> patch that way.
>>
>> Hmm, thinking more about it I think the real clean solution would be to
>> extend struct map_ring_valloc_hvm to cover the pv case, too, to add the
>> map and unmap arrays (possibly as a union) to it and to allocate it
>> dynamically instead of having it on the stack.
>>
>> Would you be fine doing this?
>
>
>
> Another option might be to factor out/modify code from
> xenbus_unmap_ring() and call the resulting code from
> __xenbus_map_ring()'s fail path.
This will still allocate large arrays on the stack. If we ever increase
the max ring page order it will explode.
Juergen
On Tue, May 5, 2020 at 6:02 PM Jürgen Groß <[email protected]> wrote:
> On 05.05.20 17:01, Arnd Bergmann wrote:
> > On Tue, May 5, 2020 at 4:34 PM Jürgen Groß <[email protected]> wrote:
> >> On 05.05.20 16:15, Arnd Bergmann wrote:
> >
> > I considered that as well, and don't really mind either way. I think it does
> > get a bit ugly whatever we do. If you prefer the union, I can respin the
> > patch that way.
>
> Hmm, thinking more about it I think the real clean solution would be to
> extend struct map_ring_valloc_hvm to cover the pv case, too, to add the
> map and unmap arrays (possibly as a union) to it and to allocate it
> dynamically instead of having it on the stack.
>
> Would you be fine doing this?
This is a little more complex than I'd want to do without doing any testing
(and no, I don't want to do the testing either) ;-)
It does sound like a better approach though.
Arnd
On 05.05.20 22:57, Arnd Bergmann wrote:
> On Tue, May 5, 2020 at 6:02 PM Jürgen Groß <[email protected]> wrote:
>> On 05.05.20 17:01, Arnd Bergmann wrote:
>>> On Tue, May 5, 2020 at 4:34 PM Jürgen Groß <[email protected]> wrote:
>>>> On 05.05.20 16:15, Arnd Bergmann wrote:
>>>
>>> I considered that as well, and don't really mind either way. I think it does
>>> get a bit ugly whatever we do. If you prefer the union, I can respin the
>>> patch that way.
>>
>> Hmm, thinking more about it I think the real clean solution would be to
>> extend struct map_ring_valloc_hvm to cover the pv case, too, to add the
>> map and unmap arrays (possibly as a union) to it and to allocate it
>> dynamically instead of having it on the stack.
>>
>> Would you be fine doing this?
>
> This is a little more complex than I'd want to do without doing any testing
> (and no, I don't want to do the testing either) ;-)
>
> It does sound like a better approach though.
I take this as you are fine with me writing the patch and adding you as
"Reported-by:"?
Juergen
On Wed, May 6, 2020 at 7:12 AM Jürgen Groß <[email protected]> wrote:
>
> On 05.05.20 22:57, Arnd Bergmann wrote:
> > On Tue, May 5, 2020 at 6:02 PM Jürgen Groß <[email protected]> wrote:
> >> On 05.05.20 17:01, Arnd Bergmann wrote:
> >>> On Tue, May 5, 2020 at 4:34 PM Jürgen Groß <[email protected]> wrote:
> >>>> On 05.05.20 16:15, Arnd Bergmann wrote:
> >>>
> >>> I considered that as well, and don't really mind either way. I think it does
> >>> get a bit ugly whatever we do. If you prefer the union, I can respin the
> >>> patch that way.
> >>
> >> Hmm, thinking more about it I think the real clean solution would be to
> >> extend struct map_ring_valloc_hvm to cover the pv case, too, to add the
> >> map and unmap arrays (possibly as a union) to it and to allocate it
> >> dynamically instead of having it on the stack.
> >>
> >> Would you be fine doing this?
> >
> > This is a little more complex than I'd want to do without doing any testing
> > (and no, I don't want to do the testing either) ;-)
> >
> > It does sound like a better approach though.
>
> I take this as you are fine with me writing the patch and adding you as
> "Reported-by:"?
Yes, definitely. Thanks!
Arnd