group_share and group_runnable are tracked as 'unsigned long',
however some functions using them as 'long' which is ultimately
assigned back to 'unsigned long' variables in reweight_entity.
Since there is not scope on using a different and signed type,
this change improves code consistency and avoids further type
conversions. More important, to prevent undefined behavior
caused by overflow.
Using them as 'long' resulted in the following stack trace (on top
of v4.19.34)
==============================================================================
UBSAN: Undefined behaviour in kernel/sched/fair.c:3055:9
signed integer overflow:
1048576 * 9144968455305 cannot be represented in type 'long int'
dump_backtrace+0x0/0x338
show_stack+0x28/0x38
dump_stack+0xc8/0x100
ubsan_epilogue+0x18/0x6c
handle_overflow+0x170/0x1c0
__ubsan_handle_mul_overflow+0x34/0x44
update_cfs_group+0x244/0x248
dequeue_entity+0x478/0x12c0
dequeue_task_fair+0x6c/0xd98
__sched_setscheduler+0x320/0xdf0
_sched_setscheduler+0xf4/0x158
do_sched_setscheduler+0x118/0x1a0
__arm64_sys_sched_setscheduler+0x50/0x70
el0_svc_common+0xf4/0x258
el0_svc_handler+0x50/0xa8
==============================================================================
UBSAN: Undefined behaviour in kernel/sched/fair.c:3111:11
signed integer overflow:
97833896519391 * 952504 cannot be represented in type 'long int'
Call trace:
dump_backtrace+0x0/0x338
show_stack+0x28/0x38
dump_stack+0xc8/0x100
ubsan_epilogue+0x18/0x6c
handle_overflow+0x170/0x1c0
__ubsan_handle_mul_overflow+0x34/0x44
update_cfs_group+0x210/0x248
enqueue_entity+0x7b4/0x1868
enqueue_task_fair+0x12c/0xe70
__sched_setscheduler+0x4cc/0xdf0
_sched_setscheduler+0xf4/0x158
do_sched_setscheduler+0x118/0x1a0
__arm64_sys_sched_setscheduler+0x50/0x70
el0_svc_common+0xf4/0x258
el0_svc_handler+0x50/0xa8
el0_svc+0x8/0xc
==============================================================================
Cc: [email protected]
Signed-off-by: Cheng Jian <[email protected]>
---
kernel/sched/fair.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fdab7eb6f351..cf003a31c220 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2920,9 +2920,9 @@ void reweight_task(struct task_struct *p, int prio)
*
* hence icky!
*/
-static long calc_group_shares(struct cfs_rq *cfs_rq)
+static unsigned long calc_group_shares(struct cfs_rq *cfs_rq)
{
- long tg_weight, tg_shares, load, shares;
+ unsigned long tg_weight, tg_shares, load, shares;
struct task_group *tg = cfs_rq->tg;
tg_shares = READ_ONCE(tg->shares);
@@ -2951,7 +2951,7 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
* case no task is runnable on a CPU MIN_SHARES=2 should be returned
* instead of 0.
*/
- return clamp_t(long, shares, MIN_SHARES, tg_shares);
+ return clamp_t(unsigned long, shares, MIN_SHARES, tg_shares);
}
/*
@@ -2981,9 +2981,9 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
* Where these max() serve both to use the 'instant' values to fix the slow
* from-idle and avoid the /0 on to-idle, similar to (6).
*/
-static long calc_group_runnable(struct cfs_rq *cfs_rq, long shares)
+static unsigned long calc_group_runnable(struct cfs_rq *cfs_rq, long shares)
{
- long runnable, load_avg;
+ unsigned long runnable, load_avg;
load_avg = max(cfs_rq->avg.load_avg,
scale_load_down(cfs_rq->load.weight));
@@ -2995,7 +2995,7 @@ static long calc_group_runnable(struct cfs_rq *cfs_rq, long shares)
if (load_avg)
runnable /= load_avg;
- return clamp_t(long, runnable, MIN_SHARES, shares);
+ return clamp_t(unsigned long, runnable, MIN_SHARES, shares);
}
#endif /* CONFIG_SMP */
@@ -3008,7 +3008,7 @@ static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
static void update_cfs_group(struct sched_entity *se)
{
struct cfs_rq *gcfs_rq = group_cfs_rq(se);
- long shares, runnable;
+ unsigned long shares, runnable;
if (!gcfs_rq)
return;
--
2.17.1
On Sat, Apr 13, 2019 at 03:32:34AM +0000, Cheng Jian wrote:
> group_share and group_runnable are tracked as 'unsigned long',
> however some functions using them as 'long' which is ultimately
> assigned back to 'unsigned long' variables in reweight_entity.
>
> Since there is not scope on using a different and signed type,
> this change improves code consistency and avoids further type
> conversions. More important, to prevent undefined behavior
> caused by overflow.
There is no undefined behaviour due to overflow. UBSAN is broken,
upgrade to GCC8 or later.
Hi, Peter
On 2019/4/15 20:46, Peter Zijlstra wrote:
I write a demo about this, which I described it as overflow.
#cat test.c
//test.c
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <assert.h>
int main(void)
{
long a = 1048576 * 9144968455305; /* shares = tg_shares * load */
unsigned long b = a;
//unsigned long e = 1048576 * 9144968455305;
printf("LONG_MAX = %ld, 0x%lx\n", LONG_MAX, LONG_MAX);
printf("ULONG_MAX = %lu, 0x%lx\n", ULONG_MAX, ULONG_MAX);
if (b > LONG_MAX)
printf("==overflow!!!==\n"); // OVERFLOW
printf("a = %20ld,0x%016lx\n", a, a);
printf("b = %20lu,0x%016lx\n", b, b);
/* shares /= tg_weight */
printf("a/3 = 0x%016lx\n", (a / 3)); // WRONG
printf("a/3 = 0x%016lx\n", (((unsigned long)a) / 3)); // unsigned
printf("a/3 = 0x%016lx\n", (a / (unsigned long)3)); // unsigned
printf("b/3 = 0x%016lx\n", b / 3); // unsigned
return EXIT_SUCCESS;
}
#./test
LONG_MAX = 9223372036854775807, 0x7fffffffffffffff
ULONG_MAX = 18446744073709551615, 0xffffffffffffffff
==overflow!!!==
a = -8857549630719655936,0x8513a98a48900000
b = 9589194442989895680,0x8513a98a48900000
a/3 = 0xd7068dd8c2daaaab // WRONG
a/3 = 0x2c5be32e18300000
a/3 = 0x2c5be32e18300000
b/3 = 0x2c5be32e18300000
> On Sat, Apr 13, 2019 at 03:32:34AM +0000, Cheng Jian wrote:
>> group_share and group_runnable are tracked as 'unsigned long',
>> however some functions using them as 'long' which is ultimately
>> assigned back to 'unsigned long' variables in reweight_entity.
>>
>> Since there is not scope on using a different and signed type,
>> this change improves code consistency and avoids further type
>> conversions. More important, to prevent undefined behavior
>> caused by overflow.
> There is no undefined behaviour due to overflow.UBSAN is broken,
> upgrade to GCC8 or later.
>
> .
So, function calc_group_shares will return the wrong value.
```cpp
static long calc_group_shares(struct cfs_rq *cfs_rq)
{
// ......
shares = (tg_shares * load);
if (tg_weight)
shares /= tg_weight;
}
```
The same to calc_group_runnable.
Thanks.
CHENG Jian.
On Mon, Apr 15, 2019 at 11:20:31PM +0800, chengjian (D) wrote:
> Hi, Peter
>
>
> On 2019/4/15 20:46, Peter Zijlstra wrote:
>
> I write a demo about this, which I described it as overflow.
I'm not saying there's no overflow, I'm saying there's nothing UB about
it.
On 2019/4/15 23:25, Peter Zijlstra wrote:
> On Mon, Apr 15, 2019 at 11:20:31PM +0800, chengjian (D) wrote:
>> Hi, Peter
>>
>>
>> On 2019/4/15 20:46, Peter Zijlstra wrote:
>>
>> I write a demo about this, which I described it as overflow.
> I'm not saying there's no overflow, I'm saying there's nothing UB about
> it.
>
Yeah. I got it.
This may not be an undefined behavior,
but rather a bug or logic error caused by overflow.
```cpp
static long calc_group_shares(struct cfs_rq *cfs_rq)
{
// ......
shares = (tg_shares * load); // 1048576 * 9144968455305 =
-8857549630719655936 (OVERFLOW)
if (tg_weight) // assume tg_weight is 3
shares /= tg_weight; // 0xd7068dd8c2daaaab shoule be
0x2c5be32e18300000
}
```
It will cause `se->runnable_weight` to have an incorrect value in
reweight_entity().
Thanks.
CHENG Jian