LinuxLists.cc - [bug report] memory corruption panic caused by SG

2018-08-03 08:49:16

Subject: [bug report] memory corruption panic caused by SG_IO ioctl()

2018-08-03 15:08:00

Subject: Re: [bug report] memory corruption panic caused by SG_IO ioctl()

On 2018-08-03 04:46 AM, Wanlong Gao wrote:
> Hi Martinand all folks,
>
>
> Recently we find a kernel panic with memory corruption caused by SG_IO ioctl(),
> and it can be easily reproduced by running following reproducer about
> minutes,any idea?

Which kernel?

And what are the underlying devices (e.g. does /dev/sg0 refer to a SATA disk,
a real SCSI disk (SAS for example), USB mass storage, etc)?

Also can you get a copy of the kernel panic?

Doug Gilbert

>
> C reproducer:
>
> // autogenerated by syzkaller (http://github.com/google/syzkaller)
>
> #define _GNU_SOURCE
>
> #include <endian.h>
>
> #include <sys/syscall.h>
>
> #include <unistd.h>
>
> #include <fcntl.h>
>
> #include <stdio.h>
>
> #include <string.h>
>
> #include <sys/stat.h>
>
> #include <stdint.h>
>
> #include <string.h>
>
> static uintptr_t syz_open_dev(uintptr_t a0, uintptr_t a1, uintptr_t a2)
>
> {
>
> if (a0 == 0xc || a0 == 0xb) {
>
> char buf[128];
>
> sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8_t)a1,
> (uint8_t)a2);
>
> return open(buf, O_RDWR, 0);
>
> } else {
>
> char buf[1024];
>
> char* hash;
>
> strncpy(buf, (char*)a0, sizeof(buf) - 1);
>
> buf[sizeof(buf) - 1] = 0;
>
> while ((hash = strchr(buf, '#'))) {
>
> *hash = '0' + (char)(a1 % 10);
>
> a1 /= 10;
>
> }
>
> return open(buf, a2, 0);
>
> }
>
> }
>
> static void execute_one();
>
> extern unsigned long long procid;
>
> void loop()
>
> {
>
> while (1) {
>
> execute_one();
>
> }
>
> }
>
> uint64_t r[1] = {0xffffffffffffffff};
>
> void execute_one()
>
> {
>
> long res = 0;
>
> memcpy((void*)0x20000040, "/dev/sg#", 9);
>
> res = syz_open_dev(0x20000040, 0, 0);
>
> if (res != -1)
>
> r[0] = res;
>
> *(uint32_t*)0x200002c0 = 0x53;
>
> *(uint32_t*)0x200002c4 = 0;
>
> *(uint8_t*)0x200002c8 = 0xd;
>
> *(uint8_t*)0x200002c9 = 0;
>
> *(uint16_t*)0x200002ca = 0;
>
> *(uint32_t*)0x200002cc = 0x95;
>
> *(uint64_t*)0x200002d0 = 0x20000080;
>
> *(uint64_t*)0x200002d8 = 0x20000000;
>
> memcpy((void*)0x20000000,
> "\x08\xf0\xa8\x77\xd3\xbe\x87\x5d\xda\x65\x79\x3f\xc7", 13);
>
> *(uint64_t*)0x200002e0 = 0x20000180;
>
> *(uint32_t*)0x200002e8 = 0x8001;
>
> *(uint32_t*)0x200002ec = 0x10024;
>
> *(uint32_t*)0x200002f0 = -1;
>
> *(uint64_t*)0x200002f4 = 0x20000280;
>
> *(uint8_t*)0x200002fc = 0;
>
> *(uint8_t*)0x200002fd = 0;
>
> *(uint8_t*)0x200002fe = 0;
>
> *(uint8_t*)0x200002ff = 0;
>
> *(uint16_t*)0x20000300 = 0;
>
> *(uint16_t*)0x20000302 = 0;
>
> *(uint32_t*)0x20000304 = 0;
>
> *(uint32_t*)0x20000308 = 0;
>
> *(uint32_t*)0x2000030c = 0;
>
> syscall(__NR_ioctl, r[0], 0x2285, 0x200002c0);
>
> }
>
> int main()
>
> {
>
> syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0);
>
> for (;;) {
>
> loop();
>
> }
>
> }
>

2018-08-03 15:48:42

by gaowanlong

[permalink] [raw]

Subject: RE: [bug report] memory corruption panic caused by SG_IO ioctl()

Doug,

On 2018-08-03 04:46 AM, Wanlong Gao wrote:
> Hi Martinand all folks,
>
>
>> Recently we find a kernel panic with memory corruption caused by SG_IO ioctl(),
>> and it can be easily reproduced by running following reproducer about
>> minutes,any idea?

> Which kernel?

We've tested with 4.17.11 and 4.18.rc7 and both reproduced.

> And what are the underlying devices (e.g. does /dev/sg0 refer to a SATA disk,
> a real SCSI disk (SAS for example), USB mass storage, etc)?

We tested in a qemu-kvm guest and the sg0 refer to a virtual SATA disk.

> Also can you get a copy of the kernel panic?

Since the call traces are different every time it reproduced, that I didn't paste the
call trace or the vmcore, but this reproducer is very useful and I believe you can reproduce
it easily using the following code.

Thanks,
Wanlong

> Doug Gilbert

>
> C reproducer:
>
> // autogenerated by syzkaller (http://github.com/google/syzkaller)
>
> #define _GNU_SOURCE
>
> #include <endian.h>
>
> #include <sys/syscall.h>
>
> #include <unistd.h>
>
> #include <fcntl.h>
>
> #include <stdio.h>
>
> #include <string.h>
>
> #include <sys/stat.h>
>
> #include <stdint.h>
>
> #include <string.h>
>
> static uintptr_t syz_open_dev(uintptr_t a0, uintptr_t a1, uintptr_t a2)
>
> {
>
> if (a0 == 0xc || a0 == 0xb) {
>
> char buf[128];
>
> sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8_t)a1,
> (uint8_t)a2);
>
> return open(buf, O_RDWR, 0);
>
> } else {
>
> char buf[1024];
>
> char* hash;
>
> strncpy(buf, (char*)a0, sizeof(buf) - 1);
>
> buf[sizeof(buf) - 1] = 0;
>
> while ((hash = strchr(buf, '#'))) {
>
> *hash = '0' + (char)(a1 % 10);
>
> a1 /= 10;
>
> }
>
> return open(buf, a2, 0);
>
> }
>
> }
>
> static void execute_one();
>
> extern unsigned long long procid;
>
> void loop()
>
> {
>
> while (1) {
>
> execute_one();
>
> }
>
> }
>
> uint64_t r[1] = {0xffffffffffffffff};
>
> void execute_one()
>
> {
>
> long res = 0;
>
> memcpy((void*)0x20000040, "/dev/sg#", 9);
>
> res = syz_open_dev(0x20000040, 0, 0);
>
> if (res != -1)
>
> r[0] = res;
>
> *(uint32_t*)0x200002c0 = 0x53;
>
> *(uint32_t*)0x200002c4 = 0;
>
> *(uint8_t*)0x200002c8 = 0xd;
>
> *(uint8_t*)0x200002c9 = 0;
>
> *(uint16_t*)0x200002ca = 0;
>
> *(uint32_t*)0x200002cc = 0x95;
>
> *(uint64_t*)0x200002d0 = 0x20000080;
>
> *(uint64_t*)0x200002d8 = 0x20000000;
>
> memcpy((void*)0x20000000,
> "\x08\xf0\xa8\x77\xd3\xbe\x87\x5d\xda\x65\x79\x3f\xc7", 13);
>
> *(uint64_t*)0x200002e0 = 0x20000180;
>
> *(uint32_t*)0x200002e8 = 0x8001;
>
> *(uint32_t*)0x200002ec = 0x10024;
>
> *(uint32_t*)0x200002f0 = -1;
>
> *(uint64_t*)0x200002f4 = 0x20000280;
>
> *(uint8_t*)0x200002fc = 0;
>
> *(uint8_t*)0x200002fd = 0;
>
> *(uint8_t*)0x200002fe = 0;
>
> *(uint8_t*)0x200002ff = 0;
>
> *(uint16_t*)0x20000300 = 0;
>
> *(uint16_t*)0x20000302 = 0;
>
> *(uint32_t*)0x20000304 = 0;
>
> *(uint32_t*)0x20000308 = 0;
>
> *(uint32_t*)0x2000030c = 0;
>
> syscall(__NR_ioctl, r[0], 0x2285, 0x200002c0);
>
> }
>
> int main()
>
> {
>
> syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0);
>
> for (;;) {
>
> loop();
>
> }
>
> }
>

2018-08-03 16:18:26

by Douglas Gilbert

[permalink] [raw]

Subject: Re: [bug report] memory corruption panic caused by SG_IO ioctl()

On 2018-08-03 11:47 AM, gaowanlong wrote:
> Doug,
>
> On 2018-08-03 04:46 AM, Wanlong Gao wrote:
>> Hi Martinand all folks,
>>
>>
>>> Recently we find a kernel panic with memory corruption caused by SG_IO ioctl(),
>>> and it can be easily reproduced by running following reproducer about
>>> minutes,any idea?
>
>> Which kernel?
>
> We've tested with 4.17.11 and 4.18.rc7 and both reproduced.
>
>> And what are the underlying devices (e.g. does /dev/sg0 refer to a SATA disk,
>> a real SCSI disk (SAS for example), USB mass storage, etc)?
>
> We tested in a qemu-kvm guest and the sg0 refer to a virtual SATA disk.

Thanks for the prompt reply.

The first test I am doing, and you can also do, is to replace the virtual
SATA disk with a scsi_debug pseudo SCSI disk(s). This will tell us
whether libata has a hand in this (as that was the case in a previous
syzkaller report on the SG_IO ioctl()).

>> Also can you get a copy of the kernel panic?
>
> Since the call traces are different every time it reproduced, that I didn't paste the
> call trace or the vmcore, but this reproducer is very useful and I believe you can reproduce
> it easily using the following code.

Okay.

As I write I'm running your reproducer with lk 4.18.0-rc6 against pseudo
scsi_debug "disks". So far no problems (5 minutes) with no noise in syslog.

Doug Gilbert

>> C reproducer:
>>
>> // autogenerated by syzkaller (http://github.com/google/syzkaller)
>>
>> #define _GNU_SOURCE
>>
>> #include <endian.h>
>>
>> #include <sys/syscall.h>
>>
>> #include <unistd.h>
>>
>> #include <fcntl.h>
>>
>> #include <stdio.h>
>>
>> #include <string.h>
>>
>> #include <sys/stat.h>
>>
>> #include <stdint.h>
>>
>> #include <string.h>
>>
>> static uintptr_t syz_open_dev(uintptr_t a0, uintptr_t a1, uintptr_t a2)
>>
>> {
>>
>> if (a0 == 0xc || a0 == 0xb) {
>>
>> char buf[128];
>>
>> sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8_t)a1,
>> (uint8_t)a2);
>>
>> return open(buf, O_RDWR, 0);
>>
>> } else {
>>
>> char buf[1024];
>>
>> char* hash;
>>
>> strncpy(buf, (char*)a0, sizeof(buf) - 1);
>>
>> buf[sizeof(buf) - 1] = 0;
>>
>> while ((hash = strchr(buf, '#'))) {
>>
>> *hash = '0' + (char)(a1 % 10);
>>
>> a1 /= 10;
>>
>> }
>>
>> return open(buf, a2, 0);
>>
>> }
>>
>> }
>>
>> static void execute_one();
>>
>> extern unsigned long long procid;
>>
>> void loop()
>>
>> {
>>
>> while (1) {
>>
>> execute_one();
>>
>> }
>>
>> }
>>
>> uint64_t r[1] = {0xffffffffffffffff};
>>
>> void execute_one()
>>
>> {
>>
>> long res = 0;
>>
>> memcpy((void*)0x20000040, "/dev/sg#", 9);
>>
>> res = syz_open_dev(0x20000040, 0, 0);
>>
>> if (res != -1)
>>
>> r[0] = res;
>>
>> *(uint32_t*)0x200002c0 = 0x53;
>>
>> *(uint32_t*)0x200002c4 = 0;
>>
>> *(uint8_t*)0x200002c8 = 0xd;
>>
>> *(uint8_t*)0x200002c9 = 0;
>>
>> *(uint16_t*)0x200002ca = 0;
>>
>> *(uint32_t*)0x200002cc = 0x95;
>>
>> *(uint64_t*)0x200002d0 = 0x20000080;
>>
>> *(uint64_t*)0x200002d8 = 0x20000000;
>>
>> memcpy((void*)0x20000000,
>> "\x08\xf0\xa8\x77\xd3\xbe\x87\x5d\xda\x65\x79\x3f\xc7", 13);
>>
>> *(uint64_t*)0x200002e0 = 0x20000180;
>>
>> *(uint32_t*)0x200002e8 = 0x8001;
>>
>> *(uint32_t*)0x200002ec = 0x10024;
>>
>> *(uint32_t*)0x200002f0 = -1;
>>
>> *(uint64_t*)0x200002f4 = 0x20000280;
>>
>> *(uint8_t*)0x200002fc = 0;
>>
>> *(uint8_t*)0x200002fd = 0;
>>
>> *(uint8_t*)0x200002fe = 0;
>>
>> *(uint8_t*)0x200002ff = 0;
>>
>> *(uint16_t*)0x20000300 = 0;
>>
>> *(uint16_t*)0x20000302 = 0;
>>
>> *(uint32_t*)0x20000304 = 0;
>>
>> *(uint32_t*)0x20000308 = 0;
>>
>> *(uint32_t*)0x2000030c = 0;
>>
>> syscall(__NR_ioctl, r[0], 0x2285, 0x200002c0);
>>
>> }
>>
>> int main()
>>
>> {
>>
>> syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0);
>>
>> for (;;) {
>>
>> loop();
>>
>> }
>>
>> }
>>
>
>

2018-08-03 17:45:37

by Douglas Gilbert

[permalink] [raw]

Subject: Re: [bug report] memory corruption panic caused by SG_IO ioctl()

On 2018-08-03 12:17 PM, Douglas Gilbert wrote:
> On 2018-08-03 11:47 AM, gaowanlong wrote:
>> Doug,
>>
>> On 2018-08-03 04:46 AM, Wanlong Gao wrote:
>>> Hi Martinand all folks,
>>>
>>>
>>>> Recently we find a kernel panic with memory corruption caused by SG_IO ioctl(),
>>>> and it can be easily reproduced by running following reproducer about
>>>> minutes,any idea?
>>
>>> Which kernel?
>>
>> We've tested with 4.17.11 and 4.18.rc7 and both reproduced.
>>
>>> And what are the underlying devices (e.g. does /dev/sg0 refer to a SATA disk,
>>> a real SCSI disk (SAS for example), USB mass storage, etc)?
>>
>> We tested in a qemu-kvm guest and the sg0 refer to a virtual SATA disk.
>
> Thanks for the prompt reply.
>
> The first test I am doing, and you can also do, is to replace the virtual
> SATA disk with a scsi_debug pseudo SCSI disk(s). This will tell us
> whether libata has a hand in this (as that was the case in a previous
> syzkaller report on the SG_IO ioctl()).
>
>>> Also can you get a copy of the kernel panic?
>>
>> Since the call traces are different every time it reproduced, that I didn't
>> paste the
>> call trace or the vmcore, but this reproducer is very useful and I believe you
>> can reproduce
>> it easily using the following code.
>
> Okay.
>
> As I write I'm running your reproducer with lk 4.18.0-rc6 against pseudo
> scsi_debug "disks". So far no problems (5 minutes) with no noise in syslog.

Ran for an hour before I stopped it. Before that I did a
echo 1 > /sys/bus/pseudo/drivers/scsi_debug/opts

which causes a lot of noise in syslog. Then I could see every command was
being rejected with "LBA out of range". So I restarted scsi_debug with this:

modprobe scsi_debug max_luns=8 sector_size=4096 virtual_gb=2000 ndelay=5000

To give 8 pseudo scsi disks of 2 TB size. Then it worked, this from syslog:
sd 0:0:0:0: scsi_debug: tag=0x7e, cmd 08 f0 a8 77 d3 be 87 5d da 65 79 3f c7

That is certainly strange, a READ(6) [deprecated] with 13 bytes in the command!
But it doesn't seem to hurt scsi_debug. Still running 15 minutes later ...

Doug Gilbert