2020-03-02 03:58:16

by He Zhe

[permalink] [raw]
Subject: disk revalidation updates and OOM

Hi,

Since the following commit
https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git/commit/?h=for-5.5/disk-revalidate&id=6917d0689993f46d97d40dd66c601d0fd5b1dbdd
until now(v5.6-rc4),

If we start udisksd service of systemd(v244), systemd-udevd will scan /dev/hdc
(the cdrom device created by default in qemu(v4.2.0)). systemd-udevd will
endlessly run and cause OOM.



It works well by reverting the following series of commits.

979c690d block: move clearing bd_invalidated into check_disk_size_change
f0b870d block: remove (__)blkdev_reread_part as an exported API
142fe8f block: fix bdev_disk_changed for non-partitioned devices
a1548b6 block: move rescan_partitions to fs/block_dev.c
6917d06 block: merge invalidate_partitions into rescan_partitions



I found the number of some block events increase thousands per second.

root@qemux86:~# perf top -e block:*
9 block:block_touch_buffer
2 block:block_dirty_buffer
0 block:block_rq_requeue
307K block:block_rq_complete
174K block:block_rq_insert
174K block:block_rq_issue
0 block:block_bio_bounce
0 block:block_bio_complete
2 block:block_bio_backmerge
0 block:block_bio_frontmerge
9 block:block_bio_queue
7 block:block_getrq
0 block:block_sleeprq
4 block:block_plug
4 block:block_unplug
0 block:block_split
0 block:block_bio_remap
0 block:block_rq_remap



Here is the strace log from systemd-udevd. It repeats the following actions
endlessly.

epoll_wait(3, [{EPOLLIN, {u32=6274288, u64=6274288}}], 2, -1) = 1                                                                           
clock_gettime64(CLOCK_REALTIME, {tv_sec=1582858384, tv_nsec=264944457}) = 0                                                                 
clock_gettime64(CLOCK_MONOTONIC, {tv_sec=146, tv_nsec=493809949}) = 0                                                                       
clock_gettime64(CLOCK_BOOTTIME, {tv_sec=146, tv_nsec=502760142}) = 0                                                                        
recvmsg(14, {msg_name={sa_family=AF_NETLINK, nl_pid=-206275536, nl_groups=00000000}, msg_namelen=128->12, msg_iov=[{iov_base={{len=1969383786
getrandom("\x05\xca\xeb\xf4\x3f\x01\xb8\x0f\x7c\x89\xf9\x4b\x46\x73\x9b\xd5", 16, GRND_NONBLOCK) = 16                                       
clock_gettime64(CLOCK_MONOTONIC, {tv_sec=146, tv_nsec=613235420}) = 0                                                                       
openat(AT_FDCWD, "/dev/hdc", O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC) = 6                                                      
flock(6, LOCK_SH|LOCK_NB)               = 0                                                                                                 
openat(AT_FDCWD, "/sys/devices/pci0000:00/0000:00:01.1/ide1/1.0/block/hdc/uevent", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 15                     
fstat64(15, {st_mode=S_IFREG|0644, st_size=4096, ...}) = 0                                                                                  
fstat64(15, {st_mode=S_IFREG|0644, st_size=4096, ...}) = 0                                                                                  
read(15, "MAJOR=22\nMINOR=0\nDEVNAME=hdc\nDEV"..., 4096) = 42
read(15, "", 4096)                      = 0
close(15)                               = 0
openat(AT_FDCWD, "/run/udev/data/b22:0", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 15
fstat64(15, {st_mode=S_IFREG|0644, st_size=34, ...}) = 0
fstat64(15, {st_mode=S_IFREG|0644, st_size=34, ...}) = 0
read(15, "I:4680519\nE:ID_FS_TYPE=\nG:system"..., 4096) = 34
read(15, "", 4096)                      = 0
close(15)                               = 0
getrandom("\x22\xda\x6d\x9d\x97\x44\xcc\x2d\x82\x52\x00\xb4\x7b\x75\x8d\x6a", 16, GRND_NONBLOCK) = 16
openat(AT_FDCWD, "/proc/cmdline", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 15
fstat64(15, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(15, "root=/dev/vda rw  console=ttyS0 "..., 1024) = 118
ioctl(15, TCGETS, 0xbfb8b7ec)           = -1 ENOTTY (Inappropriate ioctl for device)
read(15, "", 1024)                      = 0
close(15)                               = 0
openat(AT_FDCWD, "/proc/cmdline", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 15
fstat64(15, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(15, "root=/dev/vda rw  console=ttyS0 "..., 1024) = 118
ioctl(15, TCGETS, 0xbfb8b7ec)           = -1 ENOTTY (Inappropriate ioctl for device)
read(15, "", 1024)                      = 0
close(15)                               = 0
openat(AT_FDCWD, "/", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
openat(15, "sys", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0
close(15)                               = 0
openat(16, "devices", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
fstat64(15, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(16)                               = 0
openat(15, "pci0000:00", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(15)                               = 0
openat(16, "0000:00:01.1", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
fstat64(15, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(16)                               = 0
openat(15, "ide1", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(15)                               = 0
openat(16, "1.0", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
fstat64(15, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(16)                               = 0
openat(15, "block", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(15)                               = 0
close(16)                               = 0
access("/sys/devices/pci0000:00/0000:00:01.1/ide1/1.0/block/uevent", F_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
openat(15, "sys", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0
close(15)                               = 0
openat(16, "devices", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
fstat64(15, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(16)                               = 0
openat(15, "pci0000:00", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(15)                               = 0
openat(16, "0000:00:01.1", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
fstat64(15, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(16)                               = 0
openat(15, "ide1", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(15)                               = 0
openat(16, "1.0", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
fstat64(15, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(16)                               = 0
close(15)                               = 0
access("/sys/devices/pci0000:00/0000:00:01.1/ide1/1.0/uevent", F_OK) = 0
openat(AT_FDCWD, "/sys/devices/pci0000:00/0000:00:01.1/ide1/1.0/uevent", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 15
fstat64(15, {st_mode=S_IFREG|0644, st_size=4096, ...}) = 0
fstat64(15, {st_mode=S_IFREG|0644, st_size=4096, ...}) = 0
read(15, "DRIVER=ide-cdrom\nMEDIA=cdrom\nDRI"..., 4096) = 64
read(15, "", 4096)                      = 0
close(15)                               = 0
readlinkat(AT_FDCWD, "/sys/devices/pci0000:00/0000:00:01.1/ide1/1.0/subsystem", "../../../../../bus/ide", 4096) = 22
openat(AT_FDCWD, "/run/udev/data/+ide:1.0", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/run/udev/data/+ide:1.0", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/run/udev/data/+ide:1.0", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/run/udev/data/+ide:1.0", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/run/udev/data/+ide:1.0", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/run/udev/data/+ide:1.0", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/run/udev/data/+ide:1.0", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
openat(15, "sys", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0
close(15)                               = 0
openat(16, "devices", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
fstat64(15, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(16)                               = 0
openat(15, "pci0000:00", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(15)                               = 0
openat(16, "0000:00:01.1", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
fstat64(15, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(16)                               = 0
openat(15, "ide1", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(15)                               = 0
close(16)                               = 0
access("/sys/devices/pci0000:00/0000:00:01.1/ide1/uevent", F_OK) = 0
readlinkat(AT_FDCWD, "/sys/devices/pci0000:00/0000:00:01.1/ide1/subsystem", 0x5f5b30, 4096) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
openat(15, "sys", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0
close(15)                               = 0
openat(16, "devices", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
fstat64(15, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(16)                               = 0
openat(15, "pci0000:00", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(15)                               = 0
openat(16, "0000:00:01.1", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
fstat64(15, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(16)                               = 0
close(15)                               = 0
access("/sys/devices/pci0000:00/0000:00:01.1/uevent", F_OK) = 0
readlinkat(AT_FDCWD, "/sys/devices/pci0000:00/0000:00:01.1/subsystem", "../../../bus/pci", 4096) = 16
openat(AT_FDCWD, "/", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
openat(15, "sys", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0
close(15)                               = 0
openat(16, "devices", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
fstat64(15, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(16)                               = 0
openat(15, "pci0000:00", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(15)                               = 0
close(16)                               = 0
access("/sys/devices/pci0000:00/uevent", F_OK) = 0
readlinkat(AT_FDCWD, "/sys/devices/pci0000:00/subsystem", 0x5f5b30, 4096) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15                                                                
openat(15, "sys", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 16
fstat64(16, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0
close(15)                               = 0
openat(16, "devices", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
fstat64(15, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
close(16)                               = 0
close(15)                               = 0
access("/sys/devices/uevent", F_OK)     = -1 ENOENT (No such file or directory)
readlinkat(AT_FDCWD, "/sys/devices/pci0000:00/0000:00:01.1/ide1/1.0/block/hdc/driver", 0x5f5b30, 4096) = -1 ENOENT (No such file or director)
readlinkat(AT_FDCWD, "/sys/devices/pci0000:00/0000:00:01.1/ide1/1.0/driver", "../../../../../bus/ide/drivers/i"..., 4096) = 40
readlinkat(AT_FDCWD, "/sys/devices/pci0000:00/0000:00:01.1/ide1/driver", 0x5f5b30, 4096) = -1 ENOENT (No such file or directory)
readlinkat(AT_FDCWD, "/sys/devices/pci0000:00/0000:00:01.1/driver", "../../../bus/pci/drivers/PIIX_ID"..., 4096) = 33
readlinkat(AT_FDCWD, "/sys/devices/pci0000:00/driver", 0x5f5b30, 4096) = -1 ENOENT (No such file or directory)
lstat64("/dev/hdc", {st_mode=S_IFBLK|0660, st_rdev=makedev(0x16, 0), ...}) = 0
utimensat_time64(AT_FDCWD, "/dev/hdc", NULL, 0) = 0
lstat64("/dev/block/22:0", {st_mode=S_IFLNK|0777, st_size=6, ...}) = 0
readlinkat(AT_FDCWD, "/dev/block/22:0", "../hdc", 4096) = 6
utimensat_time64(AT_FDCWD, "/dev/block/22:0", NULL, AT_SYMLINK_NOFOLLOW) = 0
stat64("/run/udev/tags/systemd", {st_mode=S_IFDIR|0755, st_size=720, ...}) = 0
openat(AT_FDCWD, "/run/udev/tags/systemd/b22:0", O_RDONLY|O_LARGEFILE|O_NOFOLLOW|O_CLOEXEC|O_PATH) = 15
fstat64(15, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
utimensat_time64(AT_FDCWD, "/proc/self/fd/15", NULL, 0) = 0
close(15)                               = 0
stat64("/run/udev/data", {st_mode=S_IFDIR|0755, st_size=4180, ...}) = 0
umask(077)                              = 022
getpid()                                = 404
clock_gettime64(CLOCK_MONOTONIC, {tv_sec=147, tv_nsec=105469823}) = 0
openat(AT_FDCWD, "/run/udev/data/.#b22:03qtXsM", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE|O_CLOEXEC, 0600) = 15
umask(022)                              = 077
fcntl64(15, F_GETFL)                    = 0x8002 (flags O_RDWR|O_LARGEFILE)
fchmod(15, 0644)                        = 0
fstat64(15, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
write(15, "I:4680519\nE:ID_FS_TYPE=\nG:system"..., 34) = 34
rename("/run/udev/data/.#b22:03qtXsM", "/run/udev/data/b22:0") = 0
close(15)                               = 0
close(6)                                = 0
sendmsg(14, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=0x000002}, msg_namelen=12, msg_iov=[{iov_base={{len=1969383788, type=0x65648
write(7, "", 0)                         = 0
epoll_wait(3, [{EPOLLIN, {u32=6274288, u64=6274288}}], 2, -1) = 1



Thanks,
Zhe


2020-03-04 13:38:17

by Jan Kara

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM

Hi!

On Mon 02-03-20 11:55:44, He Zhe wrote:
> Since the following commit
> https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git/commit/?h=for-5.5/disk-revalidate&id=6917d0689993f46d97d40dd66c601d0fd5b1dbdd
> until now(v5.6-rc4),
>
> If we start udisksd service of systemd(v244), systemd-udevd will scan
> /dev/hdc (the cdrom device created by default in qemu(v4.2.0)).
> systemd-udevd will endlessly run and cause OOM.

Thanks for report! The commit you mention has this:

There is a small behavior change in that we now send the kevent change
notice also if we were not invalidating but no partitions were found, which
seems like the right thing to do.

And apparently this confuses systemd-udevd because it tries to open
/dev/hdc in response to KOBJ_CHANGE event on that device and the open calls
rescan_partitions() which generates another KOBJ_CHANGE event. So I'm
afraid we'll have to revert to the old behavior of not sending KOBJ_CHANGE
event when there are no partitions found. Christoph?

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR

2020-03-04 16:26:57

by Christoph Hellwig

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM

On Wed, Mar 04, 2020 at 02:37:38PM +0100, Jan Kara wrote:
> Hi!
>
> On Mon 02-03-20 11:55:44, He Zhe wrote:
> > Since the following commit
> > https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git/commit/?h=for-5.5/disk-revalidate&id=6917d0689993f46d97d40dd66c601d0fd5b1dbdd
> > until now(v5.6-rc4),
> >
> > If we start udisksd service of systemd(v244), systemd-udevd will scan
> > /dev/hdc (the cdrom device created by default in qemu(v4.2.0)).
> > systemd-udevd will endlessly run and cause OOM.
>
> Thanks for report! The commit you mention has this:
>
> There is a small behavior change in that we now send the kevent change
> notice also if we were not invalidating but no partitions were found, which
> seems like the right thing to do.
>
> And apparently this confuses systemd-udevd because it tries to open
> /dev/hdc in response to KOBJ_CHANGE event on that device and the open calls
> rescan_partitions() which generates another KOBJ_CHANGE event. So I'm
> afraid we'll have to revert to the old behavior of not sending KOBJ_CHANGE
> event when there are no partitions found. Christoph?

Looks like it. Let me figure out how to best do that.

2020-03-07 14:30:28

by Christoph Hellwig

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM

So I looked into this, and if it was just the uevent this
should have been fixed in:

"block: don't send uevent for empty disk when not invalidating"

from Eric Biggers in December. Does the problem still occur with that
patch applied?

2020-03-08 11:01:05

by He Zhe

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM



On 3/7/20 10:29 PM, Christoph Hellwig wrote:
> So I looked into this, and if it was just the uevent this
> should have been fixed in:
>
> "block: don't send uevent for empty disk when not invalidating"
>
> from Eric Biggers in December. Does the problem still occur with that
> patch applied?

Yes, it occurs with that patch. The patch that introduces the issue and the one
you mentioned above both went in in v5.5-rc1. I found this issue in v5.5
release.

Thanks,
Zhe


2020-03-10 07:42:11

by Christoph Hellwig

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM

On Mon, Mar 02, 2020 at 11:55:44AM +0800, He Zhe wrote:
> Hi,
>
> Since the following commit
> https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git/commit/?h=for-5.5/disk-revalidate&id=6917d0689993f46d97d40dd66c601d0fd5b1dbdd
> until now(v5.6-rc4),
>
> If we start udisksd service of systemd(v244), systemd-udevd will scan /dev/hdc
> (the cdrom device created by default in qemu(v4.2.0)). systemd-udevd will
> endlessly run and cause OOM.
>
>
>
> It works well by reverting the following series of commits.
>
> 979c690d block: move clearing bd_invalidated into check_disk_size_change
> f0b870d block: remove (__)blkdev_reread_part as an exported API
> 142fe8f block: fix bdev_disk_changed for non-partitioned devices
> a1548b6 block: move rescan_partitions to fs/block_dev.c
> 6917d06 block: merge invalidate_partitions into rescan_partitions

So this is the exact requirement of commits to be reverted from a bisect
or just a first guess?

2020-03-10 15:31:20

by He Zhe

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM



On 3/10/20 3:40 PM, Christoph Hellwig wrote:
> On Mon, Mar 02, 2020 at 11:55:44AM +0800, He Zhe wrote:
>> Hi,
>>
>> Since the following commit
>> https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git/commit/?h=for-5.5/disk-revalidate&id=6917d0689993f46d97d40dd66c601d0fd5b1dbdd
>> until now(v5.6-rc4),
>>
>> If we start udisksd service of systemd(v244), systemd-udevd will scan /dev/hdc
>> (the cdrom device created by default in qemu(v4.2.0)). systemd-udevd will
>> endlessly run and cause OOM.
>>
>>
>>
>> It works well by reverting the following series of commits.
>>
>> 979c690d block: move clearing bd_invalidated into check_disk_size_change
>> f0b870d block: remove (__)blkdev_reread_part as an exported API
>> 142fe8f block: fix bdev_disk_changed for non-partitioned devices
>> a1548b6 block: move rescan_partitions to fs/block_dev.c
>> 6917d06 block: merge invalidate_partitions into rescan_partitions
> So this is the exact requirement of commits to be reverted from a bisect
> or just a first guess?

Many commits failed to build or boot during bisection.

At least the following four have to be reverted to make it work.

979c690d block: move clearing bd_invalidated into check_disk_size_change
f0b870d block: remove (__)blkdev_reread_part as an exported API
142fe8f block: fix bdev_disk_changed for non-partitioned devices
a1548b6 block: move rescan_partitions to fs/block_dev.c

Regards,
Zhe


2020-03-10 16:27:51

by Christoph Hellwig

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM

On Tue, Mar 10, 2020 at 11:30:27PM +0800, He Zhe wrote:
> > So this is the exact requirement of commits to be reverted from a bisect
> > or just a first guess?
>
> Many commits failed to build or boot during bisection.
>
> At least the following four have to be reverted to make it work.
>
> 979c690d block: move clearing bd_invalidated into check_disk_size_change
> f0b870d block: remove (__)blkdev_reread_part as an exported API
> 142fe8f block: fix bdev_disk_changed for non-partitioned devices
> a1548b6 block: move rescan_partitions to fs/block_dev.c

Just to make sure we are on the same page: if you revert all four it
works, if you rever all but

a1548b6 block: move rescan_partitions to fs/block_dev.c

it doesn't?

2020-03-11 04:04:56

by He Zhe

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM



On 3/11/20 12:26 AM, Christoph Hellwig wrote:
> On Tue, Mar 10, 2020 at 11:30:27PM +0800, He Zhe wrote:
>>> So this is the exact requirement of commits to be reverted from a bisect
>>> or just a first guess?
>> Many commits failed to build or boot during bisection.
>>
>> At least the following four have to be reverted to make it work.
>>
>> 979c690d block: move clearing bd_invalidated into check_disk_size_change
>> f0b870d block: remove (__)blkdev_reread_part as an exported API
>> 142fe8f block: fix bdev_disk_changed for non-partitioned devices
>> a1548b6 block: move rescan_partitions to fs/block_dev.c
> Just to make sure we are on the same page: if you revert all four it
> works, if you rever all but
>
> a1548b6 block: move rescan_partitions to fs/block_dev.c
>
> it doesn't?

After reverting 142fe8f, rescan_partitions would be called in block/ioctl.c
and cause a build failure. So I need to also revert a1548b6 to provide
rescan_partitions.

OR if I manually add the following diff instead of reverting a1548b6, then yes,
it works too.

diff --git a/block/ioctl.c b/block/ioctl.c
index 8d724d11c8f5..bac562604cd0 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -192,6 +192,7 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
  * acquire bd_mutex. This API should be used in case that
  * caller has held bd_mutex already.
  */
+extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev, bool invalidate);
 int __blkdev_reread_part(struct block_device *bdev)
 {
        struct gendisk *disk = bdev->bd_disk;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ec10dacd18d0..30da0bc85c31 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1508,7 +1508,7 @@ EXPORT_SYMBOL(bd_set_size);

 static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);

-static int rescan_partitions(struct gendisk *disk, struct block_device *bdev,
+int rescan_partitions(struct gendisk *disk, struct block_device *bdev,
                bool invalidate)
 {
        int ret;


Zhe


2020-03-11 10:30:01

by Martin Wilck

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM

On Mon, 2020-03-02 at 11:55 +0800, He Zhe wrote:
> Hi,
>
> Since the following commit
> https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git/commit/?h=for-5.5/disk-revalidate&id=6917d0689993f46d97d40dd66c601d0fd5b1dbdd
> until now(v5.6-rc4),
>
> If we start udisksd service of systemd(v244), systemd-udevd will scan
> /dev/hdc
> (the cdrom device created by default in qemu(v4.2.0)). systemd-udevd
> will
> endlessly run and cause OOM.

I've tried to reproduce this, but so far I haven't been able to.
Perhaps because the distro 5.5.7 kernel I've tried (which contains the
offending commit 142fe8f) has no IDE support - the qemu IDE CD shows up
as sr0, with the ata_piix driver. I have systemd-udevd 244. Enabling
udisksd makes no difference, the system runs stably. ISO images can
be "ejected" and loaded, single uevents are received and processed.

Does this happen for you if you use ata_piix?

Regards
Martin


2020-03-11 15:13:12

by Martin Wilck

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM

On Wed, 2020-03-11 at 11:29 +0100, Martin Wilck wrote:
> On Mon, 2020-03-02 at 11:55 +0800, He Zhe wrote:
> > Hi,
> >
> > Since the following commit
> > https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git/commit/?h=for-5.5/disk-revalidate&id=6917d0689993f46d97d40dd66c601d0fd5b1dbdd
> > until now(v5.6-rc4),
> >
> > If we start udisksd service of systemd(v244), systemd-udevd will
> > scan
> > /dev/hdc
> > (the cdrom device created by default in qemu(v4.2.0)). systemd-
> > udevd
> > will
> > endlessly run and cause OOM.
>
> I've tried to reproduce this, but so far I haven't been able to.
> Perhaps because the distro 5.5.7 kernel I've tried (which contains
> the
> offending commit 142fe8f) has no IDE support - the qemu IDE CD shows
> up
> as sr0, with the ata_piix driver. I have systemd-udevd 244. Enabling
> udisksd makes no difference, the system runs stably. ISO images can
> be "ejected" and loaded, single uevents are received and processed.
>
> Does this happen for you if you use ata_piix?

I have enabled the ATA drivers on my test system now, and I still don't
see the issue. "hd*" for CDROM devices has been marked deprecated in
udev since 2009 (!).

Is it possible that you have the legacy udisksd running, and didn't
disable CD-ROM polling?

Martin


2020-03-11 15:55:57

by Christoph Hellwig

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM

On Wed, Mar 11, 2020 at 12:03:43PM +0800, He Zhe wrote:
> >> 979c690d block: move clearing bd_invalidated into check_disk_size_change
> >> f0b870d block: remove (__)blkdev_reread_part as an exported API
> >> 142fe8f block: fix bdev_disk_changed for non-partitioned devices
> >> a1548b6 block: move rescan_partitions to fs/block_dev.c
> > Just to make sure we are on the same page: if you revert all four it
> > works, if you rever all but
> >
> > a1548b6 block: move rescan_partitions to fs/block_dev.c
> >
> > it doesn't?
>
> After reverting 142fe8f, rescan_partitions would be called in block/ioctl.c
> and cause a build failure. So I need to also revert a1548b6 to provide
> rescan_partitions.
>
> OR if I manually add the following diff instead of reverting a1548b6, then yes,
> it works too.

Ok, so 142fe8f is good except for the build failure.

Do 142fe8f and 979c690d work with the build fix applied? (f0b870d
shouldn't be interesting for this case).

2020-03-16 11:02:54

by He Zhe

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM



On 3/11/20 11:11 PM, Martin Wilck wrote:
> On Wed, 2020-03-11 at 11:29 +0100, Martin Wilck wrote:
>> On Mon, 2020-03-02 at 11:55 +0800, He Zhe wrote:
>>> Hi,
>>>
>>> Since the following commit
>>> https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git/commit/?h=for-5.5/disk-revalidate&id=6917d0689993f46d97d40dd66c601d0fd5b1dbdd
>>> until now(v5.6-rc4),
>>>
>>> If we start udisksd service of systemd(v244), systemd-udevd will
>>> scan
>>> /dev/hdc
>>> (the cdrom device created by default in qemu(v4.2.0)). systemd-
>>> udevd
>>> will
>>> endlessly run and cause OOM.
>> I've tried to reproduce this, but so far I haven't been able to.
>> Perhaps because the distro 5.5.7 kernel I've tried (which contains
>> the
>> offending commit 142fe8f) has no IDE support - the qemu IDE CD shows
>> up
>> as sr0, with the ata_piix driver. I have systemd-udevd 244. Enabling
>> udisksd makes no difference, the system runs stably. ISO images can
>> be "ejected" and loaded, single uevents are received and processed.
>>
>> Does this happen for you if you use ata_piix?
> I have enabled the ATA drivers on my test system now, and I still don't
> see the issue. "hd*" for CDROM devices has been marked deprecated in
> udev since 2009 (!).
>
> Is it possible that you have the legacy udisksd running, and didn't
> disable CD-ROM polling?

Thanks for the suggestion, I'll try this ASAP.

Zhe

>
> Martin
>
>

2020-03-16 11:02:58

by He Zhe

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM



On 3/11/20 11:54 PM, Christoph Hellwig wrote:
> On Wed, Mar 11, 2020 at 12:03:43PM +0800, He Zhe wrote:
>>>> 979c690d block: move clearing bd_invalidated into check_disk_size_change
>>>> f0b870d block: remove (__)blkdev_reread_part as an exported API
>>>> 142fe8f block: fix bdev_disk_changed for non-partitioned devices
>>>> a1548b6 block: move rescan_partitions to fs/block_dev.c
>>> Just to make sure we are on the same page: if you revert all four it
>>> works, if you rever all but
>>>
>>> a1548b6 block: move rescan_partitions to fs/block_dev.c
>>>
>>> it doesn't?
>> After reverting 142fe8f, rescan_partitions would be called in block/ioctl.c
>> and cause a build failure. So I need to also revert a1548b6 to provide
>> rescan_partitions.
>>
>> OR if I manually add the following diff instead of reverting a1548b6, then yes,
>> it works too.
> Ok, so 142fe8f is good except for the build failure.
>
> Do 142fe8f and 979c690d work with the build fix applied? (f0b870d
> shouldn't be interesting for this case).

Sorry for slow reply.

With my build fix applied, the issue is triggered since 142fe8f.
And I can see the endless loop of invalidate and revalidate...

Zhe

2020-03-16 11:17:50

by Martin Wilck

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM

Hello Zhe,

On Mon, 2020-03-16 at 19:02 +0800, He Zhe wrote:
>
> > Is it possible that you have the legacy udisksd running, and didn't
> > disable CD-ROM polling?
>
> Thanks for the suggestion, I'll try this ASAP.

Since this is difficult to reproduce and you said this happens in a VM,
would you mind uploading the image and qemu settings somewhere, so that
others could have a look?

Regards,
Martin


2020-03-16 11:38:29

by Christoph Hellwig

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM

On Mon, Mar 16, 2020 at 07:01:09PM +0800, He Zhe wrote:
> > Do 142fe8f and 979c690d work with the build fix applied? (f0b870d
> > shouldn't be interesting for this case).
>
> Sorry for slow reply.
>
> With my build fix applied, the issue is triggered since 142fe8f.
> And I can see the endless loop of invalidate and revalidate...

Thanks. Can you test the patch below that restores the previous
rather odd behavior of not clearing the capacity to 0 if partition
scanning is not enabled?


diff --git a/fs/block_dev.c b/fs/block_dev.c
index 69bf2fb6f7cd..daac27f4b821 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1520,10 +1520,13 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
if (ret)
return ret;

- if (invalidate)
- set_capacity(disk, 0);
- else if (disk->fops->revalidate_disk)
- disk->fops->revalidate_disk(disk);
+ if (invalidate) {
+ if (disk_part_scan_enabled(disk))
+ set_capacity(disk, 0);
+ } else {
+ if (disk->fops->revalidate_disk)
+ disk->fops->revalidate_disk(disk);
+ }

check_disk_size_change(disk, bdev, !invalidate);

2020-03-17 08:51:59

by He Zhe

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM



On 3/16/20 7:37 PM, Christoph Hellwig wrote:
> On Mon, Mar 16, 2020 at 07:01:09PM +0800, He Zhe wrote:
>>> Do 142fe8f and 979c690d work with the build fix applied? (f0b870d
>>> shouldn't be interesting for this case).
>> Sorry for slow reply.
>>
>> With my build fix applied, the issue is triggered since 142fe8f.
>> And I can see the endless loop of invalidate and revalidate...
> Thanks. Can you test the patch below that restores the previous
> rather odd behavior of not clearing the capacity to 0 if partition
> scanning is not enabled?

This fixes the issue. I also validated it on v5.6-rc6.

Thank you very much.

Zhe

>
>
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 69bf2fb6f7cd..daac27f4b821 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -1520,10 +1520,13 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
> if (ret)
> return ret;
>
> - if (invalidate)
> - set_capacity(disk, 0);
> - else if (disk->fops->revalidate_disk)
> - disk->fops->revalidate_disk(disk);
> + if (invalidate) {
> + if (disk_part_scan_enabled(disk))
> + set_capacity(disk, 0);
> + } else {
> + if (disk->fops->revalidate_disk)
> + disk->fops->revalidate_disk(disk);
> + }
>
> check_disk_size_change(disk, bdev, !invalidate);
>

2020-03-17 08:52:45

by He Zhe

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM



On 3/16/20 7:17 PM, Martin Wilck wrote:
> Hello Zhe,
>
> On Mon, 2020-03-16 at 19:02 +0800, He Zhe wrote:
>>> Is it possible that you have the legacy udisksd running, and didn't
>>> disable CD-ROM polling?
>> Thanks for the suggestion, I'll try this ASAP.
> Since this is difficult to reproduce and you said this happens in a VM,
> would you mind uploading the image and qemu settings somewhere, so that
> others could have a look?

Christoph's diff on the other thread has fixed this issue.

Thank you very much.

Zhe

>
> Regards,
> Martin
>
>

2020-03-17 12:44:10

by Christoph Hellwig

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM

On Tue, Mar 17, 2020 at 04:50:11PM +0800, He Zhe wrote:
> >> With my build fix applied, the issue is triggered since 142fe8f.
> >> And I can see the endless loop of invalidate and revalidate...
> > Thanks. Can you test the patch below that restores the previous
> > rather odd behavior of not clearing the capacity to 0 if partition
> > scanning is not enabled?
>
> This fixes the issue. I also validated it on v5.6-rc6.

Can you check this slight variant that only skips the capacity
change for removable devices given that IIRC you reported the problem
with a legacy ide-cd device?


diff --git a/fs/block_dev.c b/fs/block_dev.c
index 69bf2fb6f7cd..3212ac85d493 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1520,10 +1520,14 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
if (ret)
return ret;

- if (invalidate)
- set_capacity(disk, 0);
- else if (disk->fops->revalidate_disk)
- disk->fops->revalidate_disk(disk);
+ if (invalidate) {
+ if (!(disk->flags & GENHD_FL_REMOVABLE) ||
+ disk_part_scan_enabled(disk))
+ set_capacity(disk, 0);
+ } else {
+ if (disk->fops->revalidate_disk)
+ disk->fops->revalidate_disk(disk);
+ }

check_disk_size_change(disk, bdev, !invalidate);

2020-03-18 06:34:10

by He Zhe

[permalink] [raw]
Subject: Re: disk revalidation updates and OOM



On 3/17/20 8:42 PM, Christoph Hellwig wrote:
> On Tue, Mar 17, 2020 at 04:50:11PM +0800, He Zhe wrote:
>>>> With my build fix applied, the issue is triggered since 142fe8f.
>>>> And I can see the endless loop of invalidate and revalidate...
>>> Thanks. Can you test the patch below that restores the previous
>>> rather odd behavior of not clearing the capacity to 0 if partition
>>> scanning is not enabled?
>> This fixes the issue. I also validated it on v5.6-rc6.
> Can you check this slight variant that only skips the capacity
> change for removable devices given that IIRC you reported the problem
> with a legacy ide-cd device?

Tested. This also works.

Zhe

>
>
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 69bf2fb6f7cd..3212ac85d493 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -1520,10 +1520,14 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
> if (ret)
> return ret;
>
> - if (invalidate)
> - set_capacity(disk, 0);
> - else if (disk->fops->revalidate_disk)
> - disk->fops->revalidate_disk(disk);
> + if (invalidate) {
> + if (!(disk->flags & GENHD_FL_REMOVABLE) ||
> + disk_part_scan_enabled(disk))
> + set_capacity(disk, 0);
> + } else {
> + if (disk->fops->revalidate_disk)
> + disk->fops->revalidate_disk(disk);
> + }
>
> check_disk_size_change(disk, bdev, !invalidate);
>