2023-05-23 07:48:19

by Christoph Hellwig

[permalink] [raw]
Subject: fix the name_to_dev_t mess

Hi all,

this series tries to sort out accumulated mess around the name_to_dev_t
function. This function is intended to allow looking up the dev_t of a
block device based on a name string before the root file systems is
mounted and thus the normal path based lookup is available.

Unfortunately a few years ago it managed to get exported and used in
non-init contexts, leading to the something looking like a path name
also beeing lookuped up by a different and potential dangerous
algorithm.

This series does a fair amount of refactoring and finally ends up with
the renamed and improved name_to_dev_t only beeing available for the
early init code again.

The series is against Jens' for-6.5/block tree but probably applies
against current mainline just fine as well.

A git tree is also available here:

git://git.infradead.org/users/hch/block.git blk-init-cleanup

Gitweb:

http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/blk-init-cleanup

Diffstat:
Documentation/admin-guide/kernel-parameters.txt | 2
arch/alpha/kernel/setup.c | 2
arch/ia64/kernel/setup.c | 2
arch/powerpc/platforms/powermac/setup.c | 3
block/Makefile | 2
block/early-lookup.c | 315 ++++++++++++++++++
block/genhd.c | 92 -----
drivers/base/dd.c | 6
drivers/md/dm-init.c | 4
drivers/md/dm-snap.c | 14
drivers/md/dm-table.c | 26 -
drivers/md/md-autodetect.c | 3
drivers/mtd/devices/block2mtd.c | 62 ++-
fs/pstore/blk.c | 4
include/linux/blkdev.h | 6
include/linux/device-mapper.h | 2
include/linux/device/driver.h | 2
include/linux/mount.h | 1
include/linux/root_dev.h | 9
init/do_mounts.c | 416 ++++++------------------
init/do_mounts.h | 14
init/do_mounts_initrd.c | 11
kernel/power/hibernate.c | 167 +++++----
kernel/power/power.h | 3
kernel/power/swap.c | 2
25 files changed, 603 insertions(+), 567 deletions(-)


2023-05-23 07:48:43

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 08/24] init: pass root_device_name explicitly

Instead of declaring root_device_name as a global variable pass it as an
argument to the funtions using it.

Signed-off-by: Christoph Hellwig <[email protected]>
---
init/do_mounts.c | 29 ++++++++++++++++-------------
init/do_mounts.h | 14 +++++++-------
init/do_mounts_initrd.c | 11 ++++++-----
3 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/init/do_mounts.c b/init/do_mounts.c
index e708b02d9d6566..1405ee7218bf00 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -28,7 +28,6 @@
#include "do_mounts.h"

int root_mountflags = MS_RDONLY | MS_SILENT;
-static char * __initdata root_device_name;
static char __initdata saved_root_name[64];
static int root_wait;

@@ -391,7 +390,7 @@ static int __init do_mount_root(const char *name, const char *fs,
return ret;
}

-void __init mount_root_generic(char *name, int flags)
+void __init mount_root_generic(char *name, char *pretty_name, int flags)
{
struct page *page = alloc_page(GFP_KERNEL);
char *fs_names = page_address(page);
@@ -425,7 +424,7 @@ void __init mount_root_generic(char *name, int flags)
* and give them a list of the available devices
*/
printk("VFS: Cannot open root device \"%s\" or %s: error %d\n",
- root_device_name, b, err);
+ pretty_name, b, err);
printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");

printk_all_partitions();
@@ -541,7 +540,7 @@ static bool __init fs_is_nodev(char *fstype)
return ret;
}

-static int __init mount_nodev_root(void)
+static int __init mount_nodev_root(char *root_device_name)
{
char *fs_names, *fstype;
int err = -EINVAL;
@@ -569,21 +568,21 @@ static int __init mount_nodev_root(void)
}

#ifdef CONFIG_BLOCK
-static void __init mount_block_root(void)
+static void __init mount_block_root(char *root_device_name)
{
int err = create_dev("/dev/root", ROOT_DEV);

if (err < 0)
pr_emerg("Failed to create /dev/root: %d\n", err);
- mount_root_generic("/dev/root", root_mountflags);
+ mount_root_generic("/dev/root", root_device_name, root_mountflags);
}
#else
-static inline void mount_block_root(void)
+static inline void mount_block_root(char *root_device_name)
{
}
#endif /* CONFIG_BLOCK */

-void __init mount_root(void)
+void __init mount_root(char *root_device_name)
{
switch (ROOT_DEV) {
case Root_NFS:
@@ -593,11 +592,12 @@ void __init mount_root(void)
mount_cifs_root();
break;
case 0:
- if (root_device_name && root_fs_names && mount_nodev_root() == 0)
+ if (root_device_name && root_fs_names &&
+ mount_nodev_root(root_device_name) == 0)
break;
fallthrough;
default:
- mount_block_root();
+ mount_block_root(root_device_name);
break;
}
}
@@ -607,6 +607,8 @@ void __init mount_root(void)
*/
void __init prepare_namespace(void)
{
+ char *root_device_name;
+
if (root_delay) {
printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
root_delay);
@@ -628,7 +630,8 @@ void __init prepare_namespace(void)
root_device_name = saved_root_name;
if (!strncmp(root_device_name, "mtd", 3) ||
!strncmp(root_device_name, "ubi", 3)) {
- mount_root_generic(root_device_name, root_mountflags);
+ mount_root_generic(root_device_name, root_device_name,
+ root_mountflags);
goto out;
}
ROOT_DEV = name_to_dev_t(root_device_name);
@@ -636,7 +639,7 @@ void __init prepare_namespace(void)
root_device_name += 5;
}

- if (initrd_load())
+ if (initrd_load(root_device_name))
goto out;

/* wait for any asynchronous scanning to complete */
@@ -649,7 +652,7 @@ void __init prepare_namespace(void)
async_synchronize_full();
}

- mount_root();
+ mount_root(root_device_name);
out:
devtmpfs_mount();
init_mount(".", "/", NULL, MS_MOVE, NULL);
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 33623025f6951a..15e372b00ce704 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -10,8 +10,8 @@
#include <linux/root_dev.h>
#include <linux/init_syscalls.h>

-void mount_root_generic(char *name, int flags);
-void mount_root(void);
+void mount_root_generic(char *name, char *pretty_name, int flags);
+void mount_root(char *root_device_name);
extern int root_mountflags;

static inline __init int create_dev(char *name, dev_t dev)
@@ -33,11 +33,11 @@ static inline int rd_load_image(char *from) { return 0; }
#endif

#ifdef CONFIG_BLK_DEV_INITRD
-
-bool __init initrd_load(void);
-
+bool __init initrd_load(char *root_device_name);
#else
-
-static inline bool initrd_load(void) { return false; }
+static inline bool initrd_load(char *root_device_name)
+{
+ return false;
+ }

#endif
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 686d1ff3af4bb1..425f4bcf4b77e0 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -83,7 +83,7 @@ static int __init init_linuxrc(struct subprocess_info *info, struct cred *new)
return 0;
}

-static void __init handle_initrd(void)
+static void __init handle_initrd(char *root_device_name)
{
struct subprocess_info *info;
static char *argv[] = { "linuxrc", NULL, };
@@ -95,7 +95,8 @@ static void __init handle_initrd(void)
real_root_dev = new_encode_dev(ROOT_DEV);
create_dev("/dev/root.old", Root_RAM0);
/* mount initrd on rootfs' /root */
- mount_root_generic("/dev/root.old", root_mountflags & ~MS_RDONLY);
+ mount_root_generic("/dev/root.old", root_device_name,
+ root_mountflags & ~MS_RDONLY);
init_mkdir("/old", 0700);
init_chdir("/old");

@@ -117,7 +118,7 @@ static void __init handle_initrd(void)

init_chdir("/");
ROOT_DEV = new_decode_dev(real_root_dev);
- mount_root();
+ mount_root(root_device_name);

printk(KERN_NOTICE "Trying to move old root to /initrd ... ");
error = init_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL);
@@ -133,7 +134,7 @@ static void __init handle_initrd(void)
}
}

-bool __init initrd_load(void)
+bool __init initrd_load(char *root_device_name)
{
if (mount_initrd) {
create_dev("/dev/ram", Root_RAM0);
@@ -145,7 +146,7 @@ bool __init initrd_load(void)
*/
if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) {
init_unlink("/initrd.image");
- handle_initrd();
+ handle_initrd(root_device_name);
return true;
}
}
--
2.39.2


2023-05-23 07:48:52

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 17/24] dm-snap: simplify the origin_dev == cow_dev check in snapshot_ctr

Use the block_device acquired in dm_get_device for the check instead
of doing an extra lookup.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/md/dm-snap.c | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 9c49f53760d066..7832974b73eb03 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1241,7 +1241,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
int i;
int r = -EINVAL;
char *origin_path, *cow_path;
- dev_t origin_dev, cow_dev;
unsigned int args_used, num_flush_bios = 1;
fmode_t origin_mode = FMODE_READ;

@@ -1279,24 +1278,21 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->error = "Cannot get origin device";
goto bad_origin;
}
- origin_dev = s->origin->bdev->bd_dev;

cow_path = argv[0];
argv++;
argc--;

- cow_dev = dm_get_dev_t(cow_path);
- if (cow_dev && cow_dev == origin_dev) {
- ti->error = "COW device cannot be the same as origin device";
- r = -EINVAL;
- goto bad_cow;
- }
-
r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow);
if (r) {
ti->error = "Cannot get COW device";
goto bad_cow;
}
+ if (s->cow->bdev && s->cow->bdev == s->origin->bdev) {
+ ti->error = "COW device cannot be the same as origin device";
+ r = -EINVAL;
+ goto bad_store;
+ }

r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store);
if (r) {
--
2.39.2


2023-05-23 07:49:01

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 14/24] init: clear root_wait on all invalid root= strings

Instead of only clearing root_wait in devt_from_partuuid when the UUID
format was invalid, do that in parse_root_device for all strings that
failed to parse.

Signed-off-by: Christoph Hellwig <[email protected]>
---
init/do_mounts.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/init/do_mounts.c b/init/do_mounts.c
index f1953aeb321978..0b36a5f39ee8e2 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -112,14 +112,14 @@ static int devt_from_partuuid(const char *uuid_str, dev_t *devt)

/* Explicitly fail on poor PARTUUID syntax. */
if (sscanf(slash + 1, "PARTNROFF=%d%c", &offset, &c) != 1)
- goto clear_root_wait;
+ goto out_invalid;
cmp.len = slash - uuid_str;
} else {
cmp.len = strlen(uuid_str);
}

if (!cmp.len)
- goto clear_root_wait;
+ goto out_invalid;

dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid);
if (!dev)
@@ -139,12 +139,9 @@ static int devt_from_partuuid(const char *uuid_str, dev_t *devt)
put_device(dev);
return 0;

-clear_root_wait:
+out_invalid:
pr_err("VFS: PARTUUID= is invalid.\n"
"Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
- if (root_wait)
- pr_err("Disabling rootwait; root= is invalid.\n");
- root_wait = 0;
return -EINVAL;
}

@@ -611,6 +608,7 @@ static void __init wait_for_root(char *root_device_name)

static dev_t __init parse_root_device(char *root_device_name)
{
+ int error;
dev_t dev;

if (!strncmp(root_device_name, "mtd", 3) ||
@@ -623,8 +621,14 @@ static dev_t __init parse_root_device(char *root_device_name)
if (strcmp(root_device_name, "/dev/ram") == 0)
return Root_RAM0;

- if (early_lookup_bdev(root_device_name, &dev))
+ error = early_lookup_bdev(root_device_name, &dev);
+ if (error) {
+ if (error == -EINVAL && root_wait) {
+ pr_err("Disabling rootwait; root= is invalid.\n");
+ root_wait = 0;
+ }
return 0;
+ }
return dev;
}

--
2.39.2


2023-05-23 07:49:16

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 21/24] PM: hibernate: don't use early_lookup_bdev in resume_store

resume_store is a sysfs attribute written during normal kernel runtime,
and it should not use the early_lookup_bdev API that bypasses all normal
path based permission checking, and might cause problems with certain
container environments renaming devices.

Switch to lookup_bdev, which does a normal path lookup instead, and fall
back to trying to parse a numeric dev_t just like early_lookup_bdev did.

Note that this strictly speaking changes the kernel ABI as the PARTUUID=
and PARTLABEL= style syntax is now not available during a running
systems. They never were intended for that, but this breaks things
we'll have to figure out a way to make them available again. But if
avoidable in any way I'd rather avoid that.

Signed-off-by: Christoph Hellwig <[email protected]>
Fixes: 421a5fa1a6cf ("PM / hibernate: use name_to_dev_t to parse resume")
---
kernel/power/hibernate.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index c52dedb9f7c8e8..7ae95ec72f9902 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -1178,7 +1178,23 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
if (!name)
return -ENOMEM;

- error = early_lookup_bdev(name, &dev);
+ error = lookup_bdev(name, &dev);
+ if (error) {
+ unsigned maj, min, offset;
+ char *p, dummy;
+
+ if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 ||
+ sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset,
+ &dummy) == 3) {
+ dev = MKDEV(maj, min);
+ if (maj != MAJOR(dev) || min != MINOR(dev))
+ error = -EINVAL;
+ } else {
+ dev = new_decode_dev(simple_strtoul(name, &p, 16));
+ if (*p)
+ error = -EINVAL;
+ }
+ }
kfree(name);
if (error)
return error;
--
2.39.2


2023-05-23 07:49:41

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 19/24] dm: remove dm_get_dev_t

Open code dm_get_dev_t in the only remaining caller, and propagate the
exact error code from lookup_bdev and early_lookup_bdev.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/md/dm-table.c | 20 ++++----------------
include/linux/device-mapper.h | 2 --
2 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 05aa16da43b0d5..e997f4322a9967 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -323,20 +323,6 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
return 0;
}

-/*
- * Convert the path to a device
- */
-dev_t dm_get_dev_t(const char *path)
-{
- dev_t dev;
-
- if (lookup_bdev(path, &dev) &&
- early_lookup_bdev(path, &dev))
- return 0;
- return dev;
-}
-EXPORT_SYMBOL_GPL(dm_get_dev_t);
-
/*
* Add a device to the list, or just increment the usage count if
* it's already present.
@@ -359,8 +345,10 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
if (MAJOR(dev) != major || MINOR(dev) != minor)
return -EOVERFLOW;
} else {
- dev = dm_get_dev_t(path);
- if (!dev)
+ r = lookup_bdev(path, &dev);
+ if (r)
+ r = early_lookup_bdev(path, &dev);
+ if (r)
return -ENODEV;
}
if (dev == disk_devt(t->md->disk))
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index a52d2b9a68460a..c27b84002d8382 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -170,8 +170,6 @@ struct dm_dev {
char name[16];
};

-dev_t dm_get_dev_t(const char *path);
-
/*
* Constructors should call these functions to ensure destination devices
* are opened/closed correctly.
--
2.39.2


2023-05-23 07:50:56

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 22/24] mtd: block2mtd: factor the early block device open logic into a helper

Simply add_device a bit by splitting out the cumbersome early boot logic
into a separate helper.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/mtd/devices/block2mtd.c | 53 +++++++++++++++++++--------------
1 file changed, 30 insertions(+), 23 deletions(-)

diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 4c21e9f13bead5..182eed68c75634 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -215,34 +215,18 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
kfree(dev);
}

-
-static struct block2mtd_dev *add_device(char *devname, int erase_size,
- char *label, int timeout)
+static struct block_device *mdtblock_early_get_bdev(const char *devname,
+ fmode_t mode, int timeout, struct block2mtd_dev *dev)
{
+ struct block_device *bdev = ERR_PTR(-ENODEV);
#ifndef MODULE
int i;
-#endif
- const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
- struct block_device *bdev;
- struct block2mtd_dev *dev;
- char *name;
-
- if (!devname)
- return NULL;
-
- dev = kzalloc(sizeof(struct block2mtd_dev), GFP_KERNEL);
- if (!dev)
- return NULL;

- /* Get a handle on the device */
- bdev = blkdev_get_by_path(devname, mode, dev);
-
-#ifndef MODULE
/*
* We might not have the root device mounted at this point.
* Try to resolve the device name by other means.
*/
- for (i = 0; IS_ERR(bdev) && i <= timeout; i++) {
+ for (i = 0; i <= timeout; i++) {
dev_t devt;

if (i)
@@ -254,12 +238,35 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size,
msleep(1000);
wait_for_device_probe();

- if (early_lookup_bdev(devname, &devt))
- continue;
- bdev = blkdev_get_by_dev(devt, mode, dev);
+ if (!early_lookup_bdev(devname, &devt)) {
+ bdev = blkdev_get_by_dev(devt, mode, dev);
+ if (!IS_ERR(bdev))
+ break;
+ }
}
#endif
+ return bdev;
+}
+
+static struct block2mtd_dev *add_device(char *devname, int erase_size,
+ char *label, int timeout)
+{
+ const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
+ struct block_device *bdev;
+ struct block2mtd_dev *dev;
+ char *name;

+ if (!devname)
+ return NULL;
+
+ dev = kzalloc(sizeof(struct block2mtd_dev), GFP_KERNEL);
+ if (!dev)
+ return NULL;
+
+ /* Get a handle on the device */
+ bdev = blkdev_get_by_path(devname, mode, dev);
+ if (IS_ERR(bdev))
+ bdev = mdtblock_early_get_bdev(devname, mode, timeout, dev);
if (IS_ERR(bdev)) {
pr_err("error: cannot open device %s\n", devname);
goto err_free_block2mtd;
--
2.39.2


2023-05-23 07:54:33

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 16/24] block: move more code to early-lookup.c

blk_lookup_devt is only used by code in early-lookup.c, so move it
there.

printk_all_partitions and it's helper bdevt_str are only used by the
early init code in init/do_mounts.c, so they should go there as well.

Signed-off-by: Christoph Hellwig <[email protected]>
---
block/early-lookup.c | 92 ++++++++++++++++++++++++++++++++++++++++++
block/genhd.c | 92 ------------------------------------------
include/linux/blkdev.h | 1 -
3 files changed, 92 insertions(+), 93 deletions(-)

diff --git a/block/early-lookup.c b/block/early-lookup.c
index 9fc30d039508af..6016e781b6a0e2 100644
--- a/block/early-lookup.c
+++ b/block/early-lookup.c
@@ -120,6 +120,35 @@ static int devt_from_partlabel(const char *label, dev_t *devt)
return 0;
}

+static dev_t blk_lookup_devt(const char *name, int partno)
+{
+ dev_t devt = MKDEV(0, 0);
+ struct class_dev_iter iter;
+ struct device *dev;
+
+ class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+ while ((dev = class_dev_iter_next(&iter))) {
+ struct gendisk *disk = dev_to_disk(dev);
+
+ if (strcmp(dev_name(dev), name))
+ continue;
+
+ if (partno < disk->minors) {
+ /* We need to return the right devno, even
+ * if the partition doesn't exist yet.
+ */
+ devt = MKDEV(MAJOR(dev->devt),
+ MINOR(dev->devt) + partno);
+ } else {
+ devt = part_devt(disk, partno);
+ if (devt)
+ break;
+ }
+ }
+ class_dev_iter_exit(&iter);
+ return devt;
+}
+
static int devt_from_devname(const char *name, dev_t *devt)
{
int part;
@@ -222,3 +251,66 @@ int early_lookup_bdev(const char *name, dev_t *devt)
return devt_from_devnum(name, devt);
}
EXPORT_SYMBOL_GPL(early_lookup_bdev);
+
+static char __init *bdevt_str(dev_t devt, char *buf)
+{
+ if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
+ char tbuf[BDEVT_SIZE];
+ snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
+ snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
+ } else
+ snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
+
+ return buf;
+}
+
+/*
+ * print a full list of all partitions - intended for places where the root
+ * filesystem can't be mounted and thus to give the victim some idea of what
+ * went wrong
+ */
+void __init printk_all_partitions(void)
+{
+ struct class_dev_iter iter;
+ struct device *dev;
+
+ class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+ while ((dev = class_dev_iter_next(&iter))) {
+ struct gendisk *disk = dev_to_disk(dev);
+ struct block_device *part;
+ char devt_buf[BDEVT_SIZE];
+ unsigned long idx;
+
+ /*
+ * Don't show empty devices or things that have been
+ * suppressed
+ */
+ if (get_capacity(disk) == 0 || (disk->flags & GENHD_FL_HIDDEN))
+ continue;
+
+ /*
+ * Note, unlike /proc/partitions, I am showing the numbers in
+ * hex - the same format as the root= option takes.
+ */
+ rcu_read_lock();
+ xa_for_each(&disk->part_tbl, idx, part) {
+ if (!bdev_nr_sectors(part))
+ continue;
+ printk("%s%s %10llu %pg %s",
+ bdev_is_partition(part) ? " " : "",
+ bdevt_str(part->bd_dev, devt_buf),
+ bdev_nr_sectors(part) >> 1, part,
+ part->bd_meta_info ?
+ part->bd_meta_info->uuid : "");
+ if (bdev_is_partition(part))
+ printk("\n");
+ else if (dev->parent && dev->parent->driver)
+ printk(" driver: %s\n",
+ dev->parent->driver->name);
+ else
+ printk(" (driver?)\n");
+ }
+ rcu_read_unlock();
+ }
+ class_dev_iter_exit(&iter);
+}
diff --git a/block/genhd.c b/block/genhd.c
index 1cb489b927d50a..aa28f296fe391b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -318,18 +318,6 @@ void blk_free_ext_minor(unsigned int minor)
ida_free(&ext_devt_ida, minor);
}

-static char *bdevt_str(dev_t devt, char *buf)
-{
- if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
- char tbuf[BDEVT_SIZE];
- snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
- snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
- } else
- snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
-
- return buf;
-}
-
void disk_uevent(struct gendisk *disk, enum kobject_action action)
{
struct block_device *part;
@@ -755,57 +743,6 @@ void blk_request_module(dev_t devt)
}
#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */

-/*
- * print a full list of all partitions - intended for places where the root
- * filesystem can't be mounted and thus to give the victim some idea of what
- * went wrong
- */
-void __init printk_all_partitions(void)
-{
- struct class_dev_iter iter;
- struct device *dev;
-
- class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
- while ((dev = class_dev_iter_next(&iter))) {
- struct gendisk *disk = dev_to_disk(dev);
- struct block_device *part;
- char devt_buf[BDEVT_SIZE];
- unsigned long idx;
-
- /*
- * Don't show empty devices or things that have been
- * suppressed
- */
- if (get_capacity(disk) == 0 || (disk->flags & GENHD_FL_HIDDEN))
- continue;
-
- /*
- * Note, unlike /proc/partitions, I am showing the numbers in
- * hex - the same format as the root= option takes.
- */
- rcu_read_lock();
- xa_for_each(&disk->part_tbl, idx, part) {
- if (!bdev_nr_sectors(part))
- continue;
- printk("%s%s %10llu %pg %s",
- bdev_is_partition(part) ? " " : "",
- bdevt_str(part->bd_dev, devt_buf),
- bdev_nr_sectors(part) >> 1, part,
- part->bd_meta_info ?
- part->bd_meta_info->uuid : "");
- if (bdev_is_partition(part))
- printk("\n");
- else if (dev->parent && dev->parent->driver)
- printk(" driver: %s\n",
- dev->parent->driver->name);
- else
- printk(" (driver?)\n");
- }
- rcu_read_unlock();
- }
- class_dev_iter_exit(&iter);
-}
-
#ifdef CONFIG_PROC_FS
/* iterator */
static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
@@ -1339,35 +1276,6 @@ dev_t part_devt(struct gendisk *disk, u8 partno)
return devt;
}

-dev_t blk_lookup_devt(const char *name, int partno)
-{
- dev_t devt = MKDEV(0, 0);
- struct class_dev_iter iter;
- struct device *dev;
-
- class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
- while ((dev = class_dev_iter_next(&iter))) {
- struct gendisk *disk = dev_to_disk(dev);
-
- if (strcmp(dev_name(dev), name))
- continue;
-
- if (partno < disk->minors) {
- /* We need to return the right devno, even
- * if the partition doesn't exist yet.
- */
- devt = MKDEV(MAJOR(dev->devt),
- MINOR(dev->devt) + partno);
- } else {
- devt = part_devt(disk, partno);
- if (devt)
- break;
- }
- }
- class_dev_iter_exit(&iter);
- return devt;
-}
-
struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
struct lock_class_key *lkclass)
{
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index dd00e9cf840da5..361341aea82ce5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -837,7 +837,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,

dev_t part_devt(struct gendisk *disk, u8 partno);
void inc_diskseq(struct gendisk *disk);
-dev_t blk_lookup_devt(const char *name, int partno);
void blk_request_module(dev_t devt);

extern int blk_register_queue(struct gendisk *disk);
--
2.39.2


2023-05-23 07:54:42

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 09/24] init: don't remove the /dev/ prefix from error messages

Remove the code that drops the /dev/ prefix from root_device_name, which
is only used for error messages when mounting the root device fails.

Signed-off-by: Christoph Hellwig <[email protected]>
---
init/do_mounts.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/init/do_mounts.c b/init/do_mounts.c
index 1405ee7218bf00..74cc96bffbdd71 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -607,8 +607,6 @@ void __init mount_root(char *root_device_name)
*/
void __init prepare_namespace(void)
{
- char *root_device_name;
-
if (root_delay) {
printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
root_delay);
@@ -627,19 +625,16 @@ void __init prepare_namespace(void)
md_run_setup();

if (saved_root_name[0]) {
- root_device_name = saved_root_name;
- if (!strncmp(root_device_name, "mtd", 3) ||
- !strncmp(root_device_name, "ubi", 3)) {
- mount_root_generic(root_device_name, root_device_name,
+ if (!strncmp(saved_root_name, "mtd", 3) ||
+ !strncmp(saved_root_name, "ubi", 3)) {
+ mount_root_generic(saved_root_name, saved_root_name,
root_mountflags);
goto out;
}
- ROOT_DEV = name_to_dev_t(root_device_name);
- if (strncmp(root_device_name, "/dev/", 5) == 0)
- root_device_name += 5;
+ ROOT_DEV = name_to_dev_t(saved_root_name);
}

- if (initrd_load(root_device_name))
+ if (initrd_load(saved_root_name))
goto out;

/* wait for any asynchronous scanning to complete */
@@ -652,7 +647,7 @@ void __init prepare_namespace(void)
async_synchronize_full();
}

- mount_root(root_device_name);
+ mount_root(saved_root_name);
out:
devtmpfs_mount();
init_mount(".", "/", NULL, MS_MOVE, NULL);
--
2.39.2


2023-05-23 07:54:51

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 20/24] dm: only call early_lookup_bdev from early boot context

early_lookup_bdev is supposed to only be called from the early boot
code, but dm_get_device calls it as a general fallback when lookup_bdev
fails, which is problematic because early_lookup_bdev bypasses all normal
path based permission checking, and might cause problems with certain
container environments renaming devices.

Switch to only call early_lookup_bdev when dm is built-in and the system
state in not running yet. This means it is still available when tables
are constructed by dm-init.c from the kernel command line, but not
otherwise.

Note that this strictly speaking changes the kernel ABI as the PARTUUID=
and PARTLABEL= style syntax is now not available during a running
systems. They never were intended for that, but this breaks things
we'll have to figure out a way to make them available again. But if
avoidable in any way I'd rather avoid that.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/md/dm-table.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index e997f4322a9967..c230241a76b374 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -326,8 +326,11 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
/*
* Add a device to the list, or just increment the usage count if
* it's already present.
+ *
+ * Note: the __ref annotation is because this function can call the __init
+ * marked early_lookup_bdev when called during early boot code from dm-init.c.
*/
-int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
+int __ref dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
struct dm_dev **result)
{
int r;
@@ -346,8 +349,10 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
return -EOVERFLOW;
} else {
r = lookup_bdev(path, &dev);
- if (r)
+#ifndef MODULE
+ if (r && system_state < SYSTEM_RUNNING)
r = early_lookup_bdev(path, &dev);
+#endif
if (r)
return -ENODEV;
}
--
2.39.2


2023-05-23 07:55:49

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 12/24] init: move the nfs/cifs/ram special cases out of name_to_dev_t

The nfs/cifs/ram special cass only need to be parsed once, and only in
the boot code. Move them out of name_to_dev_t and into
prepare_namespace.

Signed-off-by: Christoph Hellwig <[email protected]>
---
init/do_mounts.c | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/init/do_mounts.c b/init/do_mounts.c
index d5c06c1546e82c..86599faf2bf8a1 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -248,7 +248,6 @@ static dev_t devt_from_devnum(const char *name)
*
* 1) <hex_major><hex_minor> device number in hexadecimal represents itself
* no leading 0x, for example b302.
- * 2) /dev/nfs represents Root_NFS (0xff)
* 3) /dev/<disk_name> represents the device number of disk
* 4) /dev/<disk_name><decimal> represents the device number
* of partition - device number of disk plus the partition number
@@ -266,7 +265,6 @@ static dev_t devt_from_devnum(const char *name)
* a colon.
* 9) PARTLABEL=<name> with name being the GPT partition label.
* MSDOS partitions do not support labels!
- * 10) /dev/cifs represents Root_CIFS (0xfe)
*
* If name doesn't have fall into the categories above, we return (0,0).
* block_class is used to check if something is a disk name. If the disk
@@ -275,12 +273,6 @@ static dev_t devt_from_devnum(const char *name)
*/
dev_t name_to_dev_t(const char *name)
{
- if (strcmp(name, "/dev/nfs") == 0)
- return Root_NFS;
- if (strcmp(name, "/dev/cifs") == 0)
- return Root_CIFS;
- if (strcmp(name, "/dev/ram") == 0)
- return Root_RAM0;
#ifdef CONFIG_BLOCK
if (strncmp(name, "PARTUUID=", 9) == 0)
return devt_from_partuuid(name + 9);
@@ -631,6 +623,12 @@ static dev_t __init parse_root_device(char *root_device_name)
if (!strncmp(root_device_name, "mtd", 3) ||
!strncmp(root_device_name, "ubi", 3))
return Root_Generic;
+ if (strcmp(root_device_name, "/dev/nfs") == 0)
+ return Root_NFS;
+ if (strcmp(root_device_name, "/dev/cifs") == 0)
+ return Root_CIFS;
+ if (strcmp(root_device_name, "/dev/ram") == 0)
+ return Root_RAM0;
return name_to_dev_t(root_device_name);
}

--
2.39.2


2023-05-23 07:55:58

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 23/24] mtd: block2mtd: don't call early_lookup_bdev after the system is running

early_lookup_bdev is supposed to only be called from the early boot
code, but mdtblock_early_get_bdev is called as a general fallback when
lookup_bdev fails, which is problematic because early_lookup_bdev
bypasses all normal path based permission checking, and might cause
problems with certain container environments renaming devices.

Switch to only call early_lookup_bdev when dm is built-in and the system
state in not running yet.

Note that this strictly speaking changes the kernel ABI as the PARTUUID=
and PARTLABEL= style syntax is now not available during a running
systems. They never were intended for that, but this breaks things
we'll have to figure out a way to make them available again. But if
avoidable in any way I'd rather avoid that.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/mtd/devices/block2mtd.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 182eed68c75634..59e4c71cfc6f53 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -215,13 +215,23 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
kfree(dev);
}

-static struct block_device *mdtblock_early_get_bdev(const char *devname,
+/*
+ * This function is marked __ref because it calls the __init marked
+ * early_lookup_bdev when called from the early boot code.
+ */
+static struct block_device __ref *mdtblock_early_get_bdev(const char *devname,
fmode_t mode, int timeout, struct block2mtd_dev *dev)
{
struct block_device *bdev = ERR_PTR(-ENODEV);
#ifndef MODULE
int i;

+ /*
+ * We can't use early_lookup_bdev from a running system.
+ */
+ if (system_state >= SYSTEM_RUNNING)
+ return bdev;
+
/*
* We might not have the root device mounted at this point.
* Try to resolve the device name by other means.
--
2.39.2


2023-05-23 07:56:01

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 02/24] PM: hibernate: factor out a helper to find the resume device

Split the logic to find the resume device out software_resume and into
a separate helper to start unwindig the convoluted goto logic.

Signed-off-by: Christoph Hellwig <[email protected]>
---
kernel/power/hibernate.c | 72 +++++++++++++++++++++-------------------
1 file changed, 37 insertions(+), 35 deletions(-)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 30d1274f03f625..07279506366255 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -910,6 +910,41 @@ int hibernate_quiet_exec(int (*func)(void *data), void *data)
}
EXPORT_SYMBOL_GPL(hibernate_quiet_exec);

+static int find_resume_device(void)
+{
+ if (!strlen(resume_file))
+ return -ENOENT;
+
+ pm_pr_dbg("Checking hibernation image partition %s\n", resume_file);
+
+ if (resume_delay) {
+ pr_info("Waiting %dsec before reading resume device ...\n",
+ resume_delay);
+ ssleep(resume_delay);
+ }
+
+ /* Check if the device is there */
+ swsusp_resume_device = name_to_dev_t(resume_file);
+ if (swsusp_resume_device)
+ return 0;
+
+ /*
+ * Some device discovery might still be in progress; we need to wait for
+ * this to finish.
+ */
+ wait_for_device_probe();
+ if (resume_wait) {
+ while (!(swsusp_resume_device = name_to_dev_t(resume_file)))
+ msleep(10);
+ async_synchronize_full();
+ }
+
+ swsusp_resume_device = name_to_dev_t(resume_file);
+ if (!swsusp_resume_device)
+ return -ENODEV;
+ return 0;
+}
+
/**
* software_resume - Resume from a saved hibernation image.
*
@@ -949,45 +984,12 @@ static int software_resume(void)

snapshot_test = false;

- if (swsusp_resume_device)
- goto Check_image;
-
- if (!strlen(resume_file)) {
- error = -ENOENT;
- goto Unlock;
- }
-
- pm_pr_dbg("Checking hibernation image partition %s\n", resume_file);
-
- if (resume_delay) {
- pr_info("Waiting %dsec before reading resume device ...\n",
- resume_delay);
- ssleep(resume_delay);
- }
-
- /* Check if the device is there */
- swsusp_resume_device = name_to_dev_t(resume_file);
if (!swsusp_resume_device) {
- /*
- * Some device discovery might still be in progress; we need
- * to wait for this to finish.
- */
- wait_for_device_probe();
-
- if (resume_wait) {
- while ((swsusp_resume_device = name_to_dev_t(resume_file)) == 0)
- msleep(10);
- async_synchronize_full();
- }
-
- swsusp_resume_device = name_to_dev_t(resume_file);
- if (!swsusp_resume_device) {
- error = -ENODEV;
+ error = find_resume_device();
+ if (error)
goto Unlock;
- }
}

- Check_image:
pm_pr_dbg("Hibernation image partition %d:%d present\n",
MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));

--
2.39.2


2023-05-23 07:56:09

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 04/24] PM: hibernate: move finding the resume device out of software_resume

software_resume can be called either from an init call in the boot code,
or from sysfs once the system has finished booting, and the two
invocation methods this can't race with each other.

For the latter case we did just parse the suspend device manually, while
the former might not have one. Split software_resume so that the search
only happens for the boot case, which also means the special lockdep
nesting annotation can go away as the system transition mutex can be
taken a little later and doesn't have the sysfs locking nest inside it.

Signed-off-by: Christoph Hellwig <[email protected]>
---
kernel/power/hibernate.c | 80 ++++++++++++++++++++--------------------
1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 78696aa04f5ca3..45e24b02cd50b6 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -907,7 +907,7 @@ int hibernate_quiet_exec(int (*func)(void *data), void *data)
}
EXPORT_SYMBOL_GPL(hibernate_quiet_exec);

-static int find_resume_device(void)
+static int __init find_resume_device(void)
{
if (!strlen(resume_file))
return -ENOENT;
@@ -942,53 +942,16 @@ static int find_resume_device(void)
return 0;
}

-/**
- * software_resume - Resume from a saved hibernation image.
- *
- * This routine is called as a late initcall, when all devices have been
- * discovered and initialized already.
- *
- * The image reading code is called to see if there is a hibernation image
- * available for reading. If that is the case, devices are quiesced and the
- * contents of memory is restored from the saved image.
- *
- * If this is successful, control reappears in the restored target kernel in
- * hibernation_snapshot() which returns to hibernate(). Otherwise, the routine
- * attempts to recover gracefully and make the kernel return to the normal mode
- * of operation.
- */
static int software_resume(void)
{
int error;

- /*
- * If the user said "noresume".. bail out early.
- */
- if (noresume || !hibernation_available())
- return 0;
-
- /*
- * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
- * is configured into the kernel. Since the regular hibernate
- * trigger path is via sysfs which takes a buffer mutex before
- * calling hibernate functions (which take system_transition_mutex)
- * this can cause lockdep to complain about a possible ABBA deadlock
- * which cannot happen since we're in the boot code here and
- * sysfs can't be invoked yet. Therefore, we use a subclass
- * here to avoid lockdep complaining.
- */
- mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING);
-
- if (!swsusp_resume_device) {
- error = find_resume_device();
- if (error)
- goto Unlock;
- }
-
pm_pr_dbg("Hibernation image partition %d:%d present\n",
MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));

pm_pr_dbg("Looking for hibernation image.\n");
+
+ mutex_lock(&system_transition_mutex);
error = swsusp_check(false);
if (error)
goto Unlock;
@@ -1035,7 +998,39 @@ static int software_resume(void)
goto Finish;
}

-late_initcall_sync(software_resume);
+/**
+ * software_resume_initcall - Resume from a saved hibernation image.
+ *
+ * This routine is called as a late initcall, when all devices have been
+ * discovered and initialized already.
+ *
+ * The image reading code is called to see if there is a hibernation image
+ * available for reading. If that is the case, devices are quiesced and the
+ * contents of memory is restored from the saved image.
+ *
+ * If this is successful, control reappears in the restored target kernel in
+ * hibernation_snapshot() which returns to hibernate(). Otherwise, the routine
+ * attempts to recover gracefully and make the kernel return to the normal mode
+ * of operation.
+ */
+static int __init software_resume_initcall(void)
+{
+ /*
+ * If the user said "noresume".. bail out early.
+ */
+ if (noresume || !hibernation_available())
+ return 0;
+
+ if (!swsusp_resume_device) {
+ int error = find_resume_device();
+
+ if (error)
+ return error;
+ }
+
+ return software_resume();
+}
+late_initcall_sync(software_resume_initcall);


static const char * const hibernation_modes[] = {
@@ -1176,6 +1171,9 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
char *name;
dev_t res;

+ if (!hibernation_available())
+ return 0;
+
if (len && buf[len-1] == '\n')
len--;
name = kstrndup(buf, len, GFP_KERNEL);
--
2.39.2


2023-05-23 07:56:17

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 05/24] init: remove pointless Root_* values

Remove all unused defines, and just use the expanded versions for
the scsi disk majors.

I've decided to keep Root_RAM0 even if it could be expanded as there
is a lot of special casing for it in the init code.

Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/alpha/kernel/setup.c | 2 +-
arch/ia64/kernel/setup.c | 2 +-
arch/powerpc/platforms/powermac/setup.c | 3 ++-
include/linux/root_dev.h | 8 --------
4 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 33bf3a62700270..b650ff1cb022ee 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -658,7 +658,7 @@ setup_arch(char **cmdline_p)
#endif

/* Default root filesystem to sda2. */
- ROOT_DEV = Root_SDA2;
+ ROOT_DEV = MKDEV(SCSI_DISK0_MAJOR, 2);

#ifdef CONFIG_EISA
/* FIXME: only set this when we actually have EISA in this box? */
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c0572804427275..becdb4f33c2195 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -627,7 +627,7 @@ setup_arch (char **cmdline_p)
* is physical disk 1 partition 1 and the Linux root disk is
* physical disk 1 partition 2.
*/
- ROOT_DEV = Root_SDA2; /* default to second partition on first drive */
+ ROOT_DEV = MKDEV(SCSI_DISK0_MAJOR, 2);

if (is_uv_system())
uv_setup(cmdline_p);
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 193cc9c394221b..0c41f4b005bcf3 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -76,7 +76,8 @@ int pmac_newworld;

static int current_root_goodness = -1;

-#define DEFAULT_ROOT_DEVICE Root_SDA1 /* sda1 - slightly silly choice */
+/* sda1 - slightly silly choice */
+#define DEFAULT_ROOT_DEVICE MKDEV(SCSI_DISK0_MAJOR, 1)

sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN;
EXPORT_SYMBOL(sys_ctrler);
diff --git a/include/linux/root_dev.h b/include/linux/root_dev.h
index 4e78651371ba92..ed3ea8da642972 100644
--- a/include/linux/root_dev.h
+++ b/include/linux/root_dev.h
@@ -10,14 +10,6 @@ enum {
Root_NFS = MKDEV(UNNAMED_MAJOR, 255),
Root_CIFS = MKDEV(UNNAMED_MAJOR, 254),
Root_RAM0 = MKDEV(RAMDISK_MAJOR, 0),
- Root_RAM1 = MKDEV(RAMDISK_MAJOR, 1),
- Root_FD0 = MKDEV(FLOPPY_MAJOR, 0),
- Root_HDA1 = MKDEV(IDE0_MAJOR, 1),
- Root_HDA2 = MKDEV(IDE0_MAJOR, 2),
- Root_SDA1 = MKDEV(SCSI_DISK0_MAJOR, 1),
- Root_SDA2 = MKDEV(SCSI_DISK0_MAJOR, 2),
- Root_HDC1 = MKDEV(IDE1_MAJOR, 1),
- Root_SR0 = MKDEV(SCSI_CDROM_MAJOR, 0),
};

extern dev_t ROOT_DEV;
--
2.39.2


2023-05-23 07:56:26

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 15/24] block: move the code to do early boot lookup of block devices to block/

Create a new block/early-lookup.c to keep the early block device lookup
code instead of having this code sit with the early mount code.

Signed-off-by: Christoph Hellwig <[email protected]>
---
.../admin-guide/kernel-parameters.txt | 2 +-
block/Makefile | 2 +-
block/early-lookup.c | 224 ++++++++++++++++++
init/do_mounts.c | 219 -----------------
4 files changed, 226 insertions(+), 221 deletions(-)
create mode 100644 block/early-lookup.c

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f08b83e62c6222..3f8cf6dc7de887 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5452,7 +5452,7 @@
port and the regular usb controller gets disabled.

root= [KNL] Root filesystem
- See early_lookup_bdev comment in init/do_mounts.c.
+ See early_lookup_bdev comment in block/early-lookup.c

rootdelay= [KNL] Delay (in seconds) to pause before attempting to
mount the root filesystem
diff --git a/block/Makefile b/block/Makefile
index b31b05390749a1..46ada9dc8bbfe2 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -9,7 +9,7 @@ obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
- disk-events.o blk-ia-ranges.o
+ disk-events.o blk-ia-ranges.o early-lookup.o

obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
diff --git a/block/early-lookup.c b/block/early-lookup.c
new file mode 100644
index 00000000000000..9fc30d039508af
--- /dev/null
+++ b/block/early-lookup.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Code for looking up block devices in the early boot code before mounting the
+ * root file system. Unfortunately currently also abused in a few other places.
+ */
+#include <linux/blkdev.h>
+#include <linux/ctype.h>
+
+struct uuidcmp {
+ const char *uuid;
+ int len;
+};
+
+/**
+ * match_dev_by_uuid - callback for finding a partition using its uuid
+ * @dev: device passed in by the caller
+ * @data: opaque pointer to the desired struct uuidcmp to match
+ *
+ * Returns 1 if the device matches, and 0 otherwise.
+ */
+static int match_dev_by_uuid(struct device *dev, const void *data)
+{
+ struct block_device *bdev = dev_to_bdev(dev);
+ const struct uuidcmp *cmp = data;
+
+ if (!bdev->bd_meta_info ||
+ strncasecmp(cmp->uuid, bdev->bd_meta_info->uuid, cmp->len))
+ return 0;
+ return 1;
+}
+
+/**
+ * devt_from_partuuid - looks up the dev_t of a partition by its UUID
+ * @uuid_str: char array containing ascii UUID
+ *
+ * The function will return the first partition which contains a matching
+ * UUID value in its partition_meta_info struct. This does not search
+ * by filesystem UUIDs.
+ *
+ * If @uuid_str is followed by a "/PARTNROFF=%d", then the number will be
+ * extracted and used as an offset from the partition identified by the UUID.
+ *
+ * Returns the matching dev_t on success or 0 on failure.
+ */
+static int devt_from_partuuid(const char *uuid_str, dev_t *devt)
+{
+ struct uuidcmp cmp;
+ struct device *dev = NULL;
+ int offset = 0;
+ char *slash;
+
+ cmp.uuid = uuid_str;
+
+ slash = strchr(uuid_str, '/');
+ /* Check for optional partition number offset attributes. */
+ if (slash) {
+ char c = 0;
+
+ /* Explicitly fail on poor PARTUUID syntax. */
+ if (sscanf(slash + 1, "PARTNROFF=%d%c", &offset, &c) != 1)
+ goto out_invalid;
+ cmp.len = slash - uuid_str;
+ } else {
+ cmp.len = strlen(uuid_str);
+ }
+
+ if (!cmp.len)
+ goto out_invalid;
+
+ dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid);
+ if (!dev)
+ return -ENODEV;
+
+ if (offset) {
+ /*
+ * Attempt to find the requested partition by adding an offset
+ * to the partition number found by UUID.
+ */
+ *devt = part_devt(dev_to_disk(dev),
+ dev_to_bdev(dev)->bd_partno + offset);
+ } else {
+ *devt = dev->devt;
+ }
+
+ put_device(dev);
+ return 0;
+
+out_invalid:
+ pr_err("VFS: PARTUUID= is invalid.\n"
+ "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
+ return -EINVAL;
+}
+
+/**
+ * match_dev_by_label - callback for finding a partition using its label
+ * @dev: device passed in by the caller
+ * @data: opaque pointer to the label to match
+ *
+ * Returns 1 if the device matches, and 0 otherwise.
+ */
+static int match_dev_by_label(struct device *dev, const void *data)
+{
+ struct block_device *bdev = dev_to_bdev(dev);
+ const char *label = data;
+
+ if (!bdev->bd_meta_info || strcmp(label, bdev->bd_meta_info->volname))
+ return 0;
+ return 1;
+}
+
+static int devt_from_partlabel(const char *label, dev_t *devt)
+{
+ struct device *dev;
+
+ dev = class_find_device(&block_class, NULL, label, &match_dev_by_label);
+ if (!dev)
+ return -ENODEV;
+ *devt = dev->devt;
+ put_device(dev);
+ return 0;
+}
+
+static int devt_from_devname(const char *name, dev_t *devt)
+{
+ int part;
+ char s[32];
+ char *p;
+
+ if (strlen(name) > 31)
+ return -EINVAL;
+ strcpy(s, name);
+ for (p = s; *p; p++) {
+ if (*p == '/')
+ *p = '!';
+ }
+
+ *devt = blk_lookup_devt(s, 0);
+ if (*devt)
+ return 0;
+
+ /*
+ * Try non-existent, but valid partition, which may only exist after
+ * opening the device, like partitioned md devices.
+ */
+ while (p > s && isdigit(p[-1]))
+ p--;
+ if (p == s || !*p || *p == '0')
+ return -EINVAL;
+
+ /* try disk name without <part number> */
+ part = simple_strtoul(p, NULL, 10);
+ *p = '\0';
+ *devt = blk_lookup_devt(s, part);
+ if (*devt)
+ return 0;
+
+ /* try disk name without p<part number> */
+ if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
+ return -EINVAL;
+ p[-1] = '\0';
+ *devt = blk_lookup_devt(s, part);
+ if (*devt)
+ return 0;
+ return -EINVAL;
+}
+
+static int devt_from_devnum(const char *name, dev_t *devt)
+{
+ unsigned maj, min, offset;
+ char *p, dummy;
+
+ if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 ||
+ sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) {
+ *devt = MKDEV(maj, min);
+ if (maj != MAJOR(*devt) || min != MINOR(*devt))
+ return -EINVAL;
+ } else {
+ *devt = new_decode_dev(simple_strtoul(name, &p, 16));
+ if (*p)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Convert a name into device number. We accept the following variants:
+ *
+ * 1) <hex_major><hex_minor> device number in hexadecimal represents itself
+ * no leading 0x, for example b302.
+ * 3) /dev/<disk_name> represents the device number of disk
+ * 4) /dev/<disk_name><decimal> represents the device number
+ * of partition - device number of disk plus the partition number
+ * 5) /dev/<disk_name>p<decimal> - same as the above, that form is
+ * used when disk name of partitioned disk ends on a digit.
+ * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
+ * unique id of a partition if the partition table provides it.
+ * The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
+ * partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
+ * filled hex representation of the 32-bit "NT disk signature", and PP
+ * is a zero-filled hex representation of the 1-based partition number.
+ * 7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
+ * a partition with a known unique id.
+ * 8) <major>:<minor> major and minor number of the device separated by
+ * a colon.
+ * 9) PARTLABEL=<name> with name being the GPT partition label.
+ * MSDOS partitions do not support labels!
+ *
+ * If name doesn't have fall into the categories above, we return (0,0).
+ * block_class is used to check if something is a disk name. If the disk
+ * name contains slashes, the device name has them replaced with
+ * bangs.
+ */
+int early_lookup_bdev(const char *name, dev_t *devt)
+{
+ if (strncmp(name, "PARTUUID=", 9) == 0)
+ return devt_from_partuuid(name + 9, devt);
+ if (strncmp(name, "PARTLABEL=", 10) == 0)
+ return devt_from_partlabel(name + 10, devt);
+ if (strncmp(name, "/dev/", 5) == 0)
+ return devt_from_devname(name + 5, devt);
+ return devt_from_devnum(name, devt);
+}
+EXPORT_SYMBOL_GPL(early_lookup_bdev);
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 0b36a5f39ee8e2..780546a6cbfb6f 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -59,225 +59,6 @@ static int __init readwrite(char *str)
__setup("ro", readonly);
__setup("rw", readwrite);

-#ifdef CONFIG_BLOCK
-struct uuidcmp {
- const char *uuid;
- int len;
-};
-
-/**
- * match_dev_by_uuid - callback for finding a partition using its uuid
- * @dev: device passed in by the caller
- * @data: opaque pointer to the desired struct uuidcmp to match
- *
- * Returns 1 if the device matches, and 0 otherwise.
- */
-static int match_dev_by_uuid(struct device *dev, const void *data)
-{
- struct block_device *bdev = dev_to_bdev(dev);
- const struct uuidcmp *cmp = data;
-
- if (!bdev->bd_meta_info ||
- strncasecmp(cmp->uuid, bdev->bd_meta_info->uuid, cmp->len))
- return 0;
- return 1;
-}
-
-/**
- * devt_from_partuuid - looks up the dev_t of a partition by its UUID
- * @uuid_str: char array containing ascii UUID
- *
- * The function will return the first partition which contains a matching
- * UUID value in its partition_meta_info struct. This does not search
- * by filesystem UUIDs.
- *
- * If @uuid_str is followed by a "/PARTNROFF=%d", then the number will be
- * extracted and used as an offset from the partition identified by the UUID.
- *
- * Returns the matching dev_t on success or 0 on failure.
- */
-static int devt_from_partuuid(const char *uuid_str, dev_t *devt)
-{
- struct uuidcmp cmp;
- struct device *dev = NULL;
- int offset = 0;
- char *slash;
-
- cmp.uuid = uuid_str;
-
- slash = strchr(uuid_str, '/');
- /* Check for optional partition number offset attributes. */
- if (slash) {
- char c = 0;
-
- /* Explicitly fail on poor PARTUUID syntax. */
- if (sscanf(slash + 1, "PARTNROFF=%d%c", &offset, &c) != 1)
- goto out_invalid;
- cmp.len = slash - uuid_str;
- } else {
- cmp.len = strlen(uuid_str);
- }
-
- if (!cmp.len)
- goto out_invalid;
-
- dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid);
- if (!dev)
- return -ENODEV;
-
- if (offset) {
- /*
- * Attempt to find the requested partition by adding an offset
- * to the partition number found by UUID.
- */
- *devt = part_devt(dev_to_disk(dev),
- dev_to_bdev(dev)->bd_partno + offset);
- } else {
- *devt = dev->devt;
- }
-
- put_device(dev);
- return 0;
-
-out_invalid:
- pr_err("VFS: PARTUUID= is invalid.\n"
- "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
- return -EINVAL;
-}
-
-/**
- * match_dev_by_label - callback for finding a partition using its label
- * @dev: device passed in by the caller
- * @data: opaque pointer to the label to match
- *
- * Returns 1 if the device matches, and 0 otherwise.
- */
-static int match_dev_by_label(struct device *dev, const void *data)
-{
- struct block_device *bdev = dev_to_bdev(dev);
- const char *label = data;
-
- if (!bdev->bd_meta_info || strcmp(label, bdev->bd_meta_info->volname))
- return 0;
- return 1;
-}
-
-static int devt_from_partlabel(const char *label, dev_t *devt)
-{
- struct device *dev;
-
- dev = class_find_device(&block_class, NULL, label, &match_dev_by_label);
- if (!dev)
- return -ENODEV;
- *devt = dev->devt;
- put_device(dev);
- return 0;
-}
-
-static int devt_from_devname(const char *name, dev_t *devt)
-{
- int part;
- char s[32];
- char *p;
-
- if (strlen(name) > 31)
- return -EINVAL;
- strcpy(s, name);
- for (p = s; *p; p++) {
- if (*p == '/')
- *p = '!';
- }
-
- *devt = blk_lookup_devt(s, 0);
- if (*devt)
- return 0;
-
- /*
- * Try non-existent, but valid partition, which may only exist after
- * opening the device, like partitioned md devices.
- */
- while (p > s && isdigit(p[-1]))
- p--;
- if (p == s || !*p || *p == '0')
- return -EINVAL;
-
- /* try disk name without <part number> */
- part = simple_strtoul(p, NULL, 10);
- *p = '\0';
- *devt = blk_lookup_devt(s, part);
- if (*devt)
- return 0;
-
- /* try disk name without p<part number> */
- if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
- return -EINVAL;
- p[-1] = '\0';
- *devt = blk_lookup_devt(s, part);
- if (*devt)
- return 0;
- return -EINVAL;
-}
-
-static int devt_from_devnum(const char *name, dev_t *devt)
-{
- unsigned maj, min, offset;
- char *p, dummy;
-
- if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 ||
- sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) {
- *devt = MKDEV(maj, min);
- if (maj != MAJOR(*devt) || min != MINOR(*devt))
- return -EINVAL;
- } else {
- *devt = new_decode_dev(simple_strtoul(name, &p, 16));
- if (*p)
- return -EINVAL;
- }
-
- return 0;
-}
-
-/*
- * Convert a name into device number. We accept the following variants:
- *
- * 1) <hex_major><hex_minor> device number in hexadecimal represents itself
- * no leading 0x, for example b302.
- * 3) /dev/<disk_name> represents the device number of disk
- * 4) /dev/<disk_name><decimal> represents the device number
- * of partition - device number of disk plus the partition number
- * 5) /dev/<disk_name>p<decimal> - same as the above, that form is
- * used when disk name of partitioned disk ends on a digit.
- * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
- * unique id of a partition if the partition table provides it.
- * The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
- * partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
- * filled hex representation of the 32-bit "NT disk signature", and PP
- * is a zero-filled hex representation of the 1-based partition number.
- * 7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
- * a partition with a known unique id.
- * 8) <major>:<minor> major and minor number of the device separated by
- * a colon.
- * 9) PARTLABEL=<name> with name being the GPT partition label.
- * MSDOS partitions do not support labels!
- *
- * If name doesn't have fall into the categories above, we return (0,0).
- * block_class is used to check if something is a disk name. If the disk
- * name contains slashes, the device name has them replaced with
- * bangs.
- */
-int early_lookup_bdev(const char *name, dev_t *devt)
-{
- if (strncmp(name, "PARTUUID=", 9) == 0)
- return devt_from_partuuid(name + 9, devt);
- if (strncmp(name, "PARTLABEL=", 10) == 0)
- return devt_from_partlabel(name + 10, devt);
- if (strncmp(name, "/dev/", 5) == 0)
- return devt_from_devname(name + 5, devt);
- return devt_from_devnum(name, devt);
-}
-EXPORT_SYMBOL_GPL(early_lookup_bdev);
-#endif
-
static int __init root_dev_setup(char *line)
{
strscpy(saved_root_name, line, sizeof(saved_root_name));
--
2.39.2


2023-05-23 07:56:30

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 13/24] init: improve the name_to_dev_t interface

name_to_dev_t has a very misleading name, that doesn't make clear
it should only be used by the early init code, and also has a bad
calling convention that doesn't allow returning different kinds of
errors. Rename it to early_lookup_bdev to make the use case clear,
and return an errno, where -EINVAL means the string could not be
parsed, and -ENODEV means it the string was valid, but there was
no device found for it.

Also stub out the whole call for !CONFIG_BLOCK as all the non-block
root cases are always covered in the caller.

Signed-off-by: Christoph Hellwig <[email protected]>
---
.../admin-guide/kernel-parameters.txt | 2 +-
drivers/md/dm-table.c | 5 +-
drivers/md/md-autodetect.c | 3 +-
drivers/mtd/devices/block2mtd.c | 3 +-
fs/pstore/blk.c | 4 +-
include/linux/blkdev.h | 5 +
include/linux/mount.h | 1 -
init/do_mounts.c | 102 +++++++++---------
kernel/power/hibernate.c | 22 ++--
9 files changed, 73 insertions(+), 74 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 9e5bab29685ff0..f08b83e62c6222 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5452,7 +5452,7 @@
port and the regular usb controller gets disabled.

root= [KNL] Root filesystem
- See name_to_dev_t comment in init/do_mounts.c.
+ See early_lookup_bdev comment in init/do_mounts.c.

rootdelay= [KNL] Delay (in seconds) to pause before attempting to
mount the root filesystem
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 1398f1d6e83e7f..05aa16da43b0d5 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -330,8 +330,9 @@ dev_t dm_get_dev_t(const char *path)
{
dev_t dev;

- if (lookup_bdev(path, &dev))
- dev = name_to_dev_t(path);
+ if (lookup_bdev(path, &dev) &&
+ early_lookup_bdev(path, &dev))
+ return 0;
return dev;
}
EXPORT_SYMBOL_GPL(dm_get_dev_t);
diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
index 91836e6de3260f..6eaa0eab40f962 100644
--- a/drivers/md/md-autodetect.c
+++ b/drivers/md/md-autodetect.c
@@ -147,7 +147,8 @@ static void __init md_setup_drive(struct md_setup_args *args)
if (p)
*p++ = 0;

- dev = name_to_dev_t(devname);
+ if (early_lookup_bdev(devname, &dev))
+ dev = 0;
if (strncmp(devname, "/dev/", 5) == 0)
devname += 5;
snprintf(comp_name, 63, "/dev/%s", devname);
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 4cd37ec45762b6..4c21e9f13bead5 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -254,8 +254,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size,
msleep(1000);
wait_for_device_probe();

- devt = name_to_dev_t(devname);
- if (!devt)
+ if (early_lookup_bdev(devname, &devt))
continue;
bdev = blkdev_get_by_dev(devt, mode, dev);
}
diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index 4ae0cfcd15f20b..de8cf5d75f34d5 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -263,9 +263,9 @@ static __init const char *early_boot_devpath(const char *initial_devname)
* same scheme to find the device that we use for mounting
* the root file system.
*/
- dev_t dev = name_to_dev_t(initial_devname);
+ dev_t dev;

- if (!dev) {
+ if (early_lookup_bdev(initial_devname, &dev)) {
pr_err("failed to resolve '%s'!\n", initial_devname);
return initial_devname;
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fe99948688dfda..dd00e9cf840da5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1494,6 +1494,7 @@ int sync_blockdev_nowait(struct block_device *bdev);
void sync_bdevs(bool wait);
void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
void printk_all_partitions(void);
+int early_lookup_bdev(const char *pathname, dev_t *dev);
#else
static inline void invalidate_bdev(struct block_device *bdev)
{
@@ -1515,6 +1516,10 @@ static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
static inline void printk_all_partitions(void)
{
}
+static inline int early_lookup_bdev(const char *pathname, dev_t *dev)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_BLOCK */

int fsync_bdev(struct block_device *bdev);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 1ea326c368f726..4b81ea90440e45 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -107,7 +107,6 @@ extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
extern void mark_mounts_for_expiry(struct list_head *mounts);

-extern dev_t name_to_dev_t(const char *name);
extern bool path_is_mountpoint(const struct path *path);

extern bool our_mnt(struct vfsmount *mnt);
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 86599faf2bf8a1..f1953aeb321978 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -96,11 +96,10 @@ static int match_dev_by_uuid(struct device *dev, const void *data)
*
* Returns the matching dev_t on success or 0 on failure.
*/
-static dev_t devt_from_partuuid(const char *uuid_str)
+static int devt_from_partuuid(const char *uuid_str, dev_t *devt)
{
struct uuidcmp cmp;
struct device *dev = NULL;
- dev_t devt = 0;
int offset = 0;
char *slash;

@@ -124,21 +123,21 @@ static dev_t devt_from_partuuid(const char *uuid_str)

dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid);
if (!dev)
- return 0;
+ return -ENODEV;

if (offset) {
/*
* Attempt to find the requested partition by adding an offset
* to the partition number found by UUID.
*/
- devt = part_devt(dev_to_disk(dev),
- dev_to_bdev(dev)->bd_partno + offset);
+ *devt = part_devt(dev_to_disk(dev),
+ dev_to_bdev(dev)->bd_partno + offset);
} else {
- devt = dev->devt;
+ *devt = dev->devt;
}

put_device(dev);
- return devt;
+ return 0;

clear_root_wait:
pr_err("VFS: PARTUUID= is invalid.\n"
@@ -146,7 +145,7 @@ static dev_t devt_from_partuuid(const char *uuid_str)
if (root_wait)
pr_err("Disabling rootwait; root= is invalid.\n");
root_wait = 0;
- return 0;
+ return -EINVAL;
}

/**
@@ -166,38 +165,35 @@ static int match_dev_by_label(struct device *dev, const void *data)
return 1;
}

-static dev_t devt_from_partlabel(const char *label)
+static int devt_from_partlabel(const char *label, dev_t *devt)
{
struct device *dev;
- dev_t devt = 0;

dev = class_find_device(&block_class, NULL, label, &match_dev_by_label);
- if (dev) {
- devt = dev->devt;
- put_device(dev);
- }
-
- return devt;
+ if (!dev)
+ return -ENODEV;
+ *devt = dev->devt;
+ put_device(dev);
+ return 0;
}

-static dev_t devt_from_devname(const char *name)
+static int devt_from_devname(const char *name, dev_t *devt)
{
- dev_t devt = 0;
int part;
char s[32];
char *p;

if (strlen(name) > 31)
- return 0;
+ return -EINVAL;
strcpy(s, name);
for (p = s; *p; p++) {
if (*p == '/')
*p = '!';
}

- devt = blk_lookup_devt(s, 0);
- if (devt)
- return devt;
+ *devt = blk_lookup_devt(s, 0);
+ if (*devt)
+ return 0;

/*
* Try non-existent, but valid partition, which may only exist after
@@ -206,41 +202,42 @@ static dev_t devt_from_devname(const char *name)
while (p > s && isdigit(p[-1]))
p--;
if (p == s || !*p || *p == '0')
- return 0;
+ return -EINVAL;

/* try disk name without <part number> */
part = simple_strtoul(p, NULL, 10);
*p = '\0';
- devt = blk_lookup_devt(s, part);
- if (devt)
- return devt;
+ *devt = blk_lookup_devt(s, part);
+ if (*devt)
+ return 0;

/* try disk name without p<part number> */
if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
- return 0;
+ return -EINVAL;
p[-1] = '\0';
- return blk_lookup_devt(s, part);
+ *devt = blk_lookup_devt(s, part);
+ if (*devt)
+ return 0;
+ return -EINVAL;
}
-#endif /* CONFIG_BLOCK */

-static dev_t devt_from_devnum(const char *name)
+static int devt_from_devnum(const char *name, dev_t *devt)
{
unsigned maj, min, offset;
- dev_t devt = 0;
char *p, dummy;

if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 ||
sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) {
- devt = MKDEV(maj, min);
- if (maj != MAJOR(devt) || min != MINOR(devt))
- return 0;
+ *devt = MKDEV(maj, min);
+ if (maj != MAJOR(*devt) || min != MINOR(*devt))
+ return -EINVAL;
} else {
- devt = new_decode_dev(simple_strtoul(name, &p, 16));
+ *devt = new_decode_dev(simple_strtoul(name, &p, 16));
if (*p)
- return 0;
+ return -EINVAL;
}

- return devt;
+ return 0;
}

/*
@@ -271,19 +268,18 @@ static dev_t devt_from_devnum(const char *name)
* name contains slashes, the device name has them replaced with
* bangs.
*/
-dev_t name_to_dev_t(const char *name)
+int early_lookup_bdev(const char *name, dev_t *devt)
{
-#ifdef CONFIG_BLOCK
if (strncmp(name, "PARTUUID=", 9) == 0)
- return devt_from_partuuid(name + 9);
+ return devt_from_partuuid(name + 9, devt);
if (strncmp(name, "PARTLABEL=", 10) == 0)
- return devt_from_partlabel(name + 10);
+ return devt_from_partlabel(name + 10, devt);
if (strncmp(name, "/dev/", 5) == 0)
- return devt_from_devname(name + 5);
-#endif
- return devt_from_devnum(name);
+ return devt_from_devname(name + 5, devt);
+ return devt_from_devnum(name, devt);
}
-EXPORT_SYMBOL_GPL(name_to_dev_t);
+EXPORT_SYMBOL_GPL(early_lookup_bdev);
+#endif

static int __init root_dev_setup(char *line)
{
@@ -606,20 +602,17 @@ static void __init wait_for_root(char *root_device_name)

pr_info("Waiting for root device %s...\n", root_device_name);

- for (;;) {
- if (driver_probe_done()) {
- ROOT_DEV = name_to_dev_t(root_device_name);
- if (ROOT_DEV)
- break;
- }
+ while (!driver_probe_done() ||
+ early_lookup_bdev(root_device_name, &ROOT_DEV) < 0)
msleep(5);
- }
async_synchronize_full();

}

static dev_t __init parse_root_device(char *root_device_name)
{
+ dev_t dev;
+
if (!strncmp(root_device_name, "mtd", 3) ||
!strncmp(root_device_name, "ubi", 3))
return Root_Generic;
@@ -629,7 +622,10 @@ static dev_t __init parse_root_device(char *root_device_name)
return Root_CIFS;
if (strcmp(root_device_name, "/dev/ram") == 0)
return Root_RAM0;
- return name_to_dev_t(root_device_name);
+
+ if (early_lookup_bdev(root_device_name, &dev))
+ return 0;
+ return dev;
}

/*
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 45e24b02cd50b6..c52dedb9f7c8e8 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -11,6 +11,7 @@

#define pr_fmt(fmt) "PM: hibernation: " fmt

+#include <linux/blkdev.h>
#include <linux/export.h>
#include <linux/suspend.h>
#include <linux/reboot.h>
@@ -921,8 +922,7 @@ static int __init find_resume_device(void)
}

/* Check if the device is there */
- swsusp_resume_device = name_to_dev_t(resume_file);
- if (swsusp_resume_device)
+ if (!early_lookup_bdev(resume_file, &swsusp_resume_device))
return 0;

/*
@@ -931,15 +931,12 @@ static int __init find_resume_device(void)
*/
wait_for_device_probe();
if (resume_wait) {
- while (!(swsusp_resume_device = name_to_dev_t(resume_file)))
+ while (early_lookup_bdev(resume_file, &swsusp_resume_device))
msleep(10);
async_synchronize_full();
}

- swsusp_resume_device = name_to_dev_t(resume_file);
- if (!swsusp_resume_device)
- return -ENODEV;
- return 0;
+ return early_lookup_bdev(resume_file, &swsusp_resume_device);
}

static int software_resume(void)
@@ -1169,7 +1166,8 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
unsigned int sleep_flags;
int len = n;
char *name;
- dev_t res;
+ dev_t dev;
+ int error;

if (!hibernation_available())
return 0;
@@ -1180,13 +1178,13 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
if (!name)
return -ENOMEM;

- res = name_to_dev_t(name);
+ error = early_lookup_bdev(name, &dev);
kfree(name);
- if (!res)
- return -EINVAL;
+ if (error)
+ return error;

sleep_flags = lock_system_sleep();
- swsusp_resume_device = res;
+ swsusp_resume_device = dev;
unlock_system_sleep(sleep_flags);

pm_pr_dbg("Configured hibernation resume from disk to %u\n",
--
2.39.2


2023-05-23 07:56:33

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 07/24] init: refactor mount_root

Provide stubs for all the lower level mount helpers, and just switch
on ROOT_DEV in the main function.

Signed-off-by: Christoph Hellwig <[email protected]>
---
init/do_mounts.c | 104 +++++++++++++++++++++++++----------------------
1 file changed, 56 insertions(+), 48 deletions(-)

diff --git a/init/do_mounts.c b/init/do_mounts.c
index a2c0baace0992c..e708b02d9d6566 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -453,15 +453,14 @@ void __init mount_root_generic(char *name, int flags)
#define NFSROOT_TIMEOUT_MAX 30
#define NFSROOT_RETRY_MAX 5

-static int __init mount_nfs_root(void)
+static void __init mount_nfs_root(void)
{
char *root_dev, *root_data;
unsigned int timeout;
- int try, err;
+ int try;

- err = nfs_root_data(&root_dev, &root_data);
- if (err != 0)
- return 0;
+ if (nfs_root_data(&root_dev, &root_data))
+ goto fail;

/*
* The server or network may not be ready, so try several
@@ -470,10 +469,8 @@ static int __init mount_nfs_root(void)
*/
timeout = NFSROOT_TIMEOUT_MIN;
for (try = 1; ; try++) {
- err = do_mount_root(root_dev, "nfs",
- root_mountflags, root_data);
- if (err == 0)
- return 1;
+ if (!do_mount_root(root_dev, "nfs", root_mountflags, root_data))
+ return;
if (try > NFSROOT_RETRY_MAX)
break;

@@ -483,9 +480,14 @@ static int __init mount_nfs_root(void)
if (timeout > NFSROOT_TIMEOUT_MAX)
timeout = NFSROOT_TIMEOUT_MAX;
}
- return 0;
+fail:
+ pr_err("VFS: Unable to mount root fs via NFS.\n");
}
-#endif
+#else
+static inline void mount_nfs_root(void)
+{
+}
+#endif /* CONFIG_ROOT_NFS */

#ifdef CONFIG_CIFS_ROOT

@@ -495,22 +497,20 @@ extern int cifs_root_data(char **dev, char **opts);
#define CIFSROOT_TIMEOUT_MAX 30
#define CIFSROOT_RETRY_MAX 5

-static int __init mount_cifs_root(void)
+static void __init mount_cifs_root(void)
{
char *root_dev, *root_data;
unsigned int timeout;
- int try, err;
+ int try;

- err = cifs_root_data(&root_dev, &root_data);
- if (err != 0)
- return 0;
+ if (cifs_root_data(&root_dev, &root_data))
+ goto fail;

timeout = CIFSROOT_TIMEOUT_MIN;
for (try = 1; ; try++) {
- err = do_mount_root(root_dev, "cifs", root_mountflags,
- root_data);
- if (err == 0)
- return 1;
+ if (!do_mount_root(root_dev, "cifs", root_mountflags,
+ root_data))
+ return;
if (try > CIFSROOT_RETRY_MAX)
break;

@@ -519,9 +519,14 @@ static int __init mount_cifs_root(void)
if (timeout > CIFSROOT_TIMEOUT_MAX)
timeout = CIFSROOT_TIMEOUT_MAX;
}
- return 0;
+fail:
+ pr_err("VFS: Unable to mount root fs via SMB.\n");
}
-#endif
+#else
+static inline void mount_cifs_root(void)
+{
+}
+#endif /* CONFIG_CIFS_ROOT */

static bool __init fs_is_nodev(char *fstype)
{
@@ -563,35 +568,38 @@ static int __init mount_nodev_root(void)
return err;
}

-void __init mount_root(void)
-{
-#ifdef CONFIG_ROOT_NFS
- if (ROOT_DEV == Root_NFS) {
- if (!mount_nfs_root())
- printk(KERN_ERR "VFS: Unable to mount root fs via NFS.\n");
- return;
- }
-#endif
-#ifdef CONFIG_CIFS_ROOT
- if (ROOT_DEV == Root_CIFS) {
- if (!mount_cifs_root())
- printk(KERN_ERR "VFS: Unable to mount root fs via SMB.\n");
- return;
- }
-#endif
- if (ROOT_DEV == 0 && root_device_name && root_fs_names) {
- if (mount_nodev_root() == 0)
- return;
- }
#ifdef CONFIG_BLOCK
- {
- int err = create_dev("/dev/root", ROOT_DEV);
+static void __init mount_block_root(void)
+{
+ int err = create_dev("/dev/root", ROOT_DEV);
+
+ if (err < 0)
+ pr_emerg("Failed to create /dev/root: %d\n", err);
+ mount_root_generic("/dev/root", root_mountflags);
+}
+#else
+static inline void mount_block_root(void)
+{
+}
+#endif /* CONFIG_BLOCK */

- if (err < 0)
- pr_emerg("Failed to create /dev/root: %d\n", err);
- mount_root_generic("/dev/root", root_mountflags);
+void __init mount_root(void)
+{
+ switch (ROOT_DEV) {
+ case Root_NFS:
+ mount_nfs_root();
+ break;
+ case Root_CIFS:
+ mount_cifs_root();
+ break;
+ case 0:
+ if (root_device_name && root_fs_names && mount_nodev_root() == 0)
+ break;
+ fallthrough;
+ default:
+ mount_block_root();
+ break;
}
-#endif
}

/*
--
2.39.2


2023-05-23 07:56:34

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 24/24] block: mark early_lookup_bdev as __init

early_lookup_bdev is now only used during the early boot code as it
should, so mark it __init to not waste run time memory on it.

Signed-off-by: Christoph Hellwig <[email protected]>
---
block/early-lookup.c | 19 +++++++++----------
include/linux/blkdev.h | 2 +-
2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/block/early-lookup.c b/block/early-lookup.c
index 6016e781b6a0e2..3ff0d2e4dcbfb8 100644
--- a/block/early-lookup.c
+++ b/block/early-lookup.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Code for looking up block devices in the early boot code before mounting the
- * root file system. Unfortunately currently also abused in a few other places.
+ * root file system.
*/
#include <linux/blkdev.h>
#include <linux/ctype.h>
@@ -18,7 +18,7 @@ struct uuidcmp {
*
* Returns 1 if the device matches, and 0 otherwise.
*/
-static int match_dev_by_uuid(struct device *dev, const void *data)
+static int __init match_dev_by_uuid(struct device *dev, const void *data)
{
struct block_device *bdev = dev_to_bdev(dev);
const struct uuidcmp *cmp = data;
@@ -42,7 +42,7 @@ static int match_dev_by_uuid(struct device *dev, const void *data)
*
* Returns the matching dev_t on success or 0 on failure.
*/
-static int devt_from_partuuid(const char *uuid_str, dev_t *devt)
+static int __init devt_from_partuuid(const char *uuid_str, dev_t *devt)
{
struct uuidcmp cmp;
struct device *dev = NULL;
@@ -98,7 +98,7 @@ static int devt_from_partuuid(const char *uuid_str, dev_t *devt)
*
* Returns 1 if the device matches, and 0 otherwise.
*/
-static int match_dev_by_label(struct device *dev, const void *data)
+static int __init match_dev_by_label(struct device *dev, const void *data)
{
struct block_device *bdev = dev_to_bdev(dev);
const char *label = data;
@@ -108,7 +108,7 @@ static int match_dev_by_label(struct device *dev, const void *data)
return 1;
}

-static int devt_from_partlabel(const char *label, dev_t *devt)
+static int __init devt_from_partlabel(const char *label, dev_t *devt)
{
struct device *dev;

@@ -120,7 +120,7 @@ static int devt_from_partlabel(const char *label, dev_t *devt)
return 0;
}

-static dev_t blk_lookup_devt(const char *name, int partno)
+static dev_t __init blk_lookup_devt(const char *name, int partno)
{
dev_t devt = MKDEV(0, 0);
struct class_dev_iter iter;
@@ -149,7 +149,7 @@ static dev_t blk_lookup_devt(const char *name, int partno)
return devt;
}

-static int devt_from_devname(const char *name, dev_t *devt)
+static int __init devt_from_devname(const char *name, dev_t *devt)
{
int part;
char s[32];
@@ -193,7 +193,7 @@ static int devt_from_devname(const char *name, dev_t *devt)
return -EINVAL;
}

-static int devt_from_devnum(const char *name, dev_t *devt)
+static int __init devt_from_devnum(const char *name, dev_t *devt)
{
unsigned maj, min, offset;
char *p, dummy;
@@ -240,7 +240,7 @@ static int devt_from_devnum(const char *name, dev_t *devt)
* name contains slashes, the device name has them replaced with
* bangs.
*/
-int early_lookup_bdev(const char *name, dev_t *devt)
+int __init early_lookup_bdev(const char *name, dev_t *devt)
{
if (strncmp(name, "PARTUUID=", 9) == 0)
return devt_from_partuuid(name + 9, devt);
@@ -250,7 +250,6 @@ int early_lookup_bdev(const char *name, dev_t *devt)
return devt_from_devname(name + 5, devt);
return devt_from_devnum(name, devt);
}
-EXPORT_SYMBOL_GPL(early_lookup_bdev);

static char __init *bdevt_str(dev_t devt, char *buf)
{
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 361341aea82ce5..a07776e5b27940 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1493,7 +1493,7 @@ int sync_blockdev_nowait(struct block_device *bdev);
void sync_bdevs(bool wait);
void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
void printk_all_partitions(void);
-int early_lookup_bdev(const char *pathname, dev_t *dev);
+int __init early_lookup_bdev(const char *pathname, dev_t *dev);
#else
static inline void invalidate_bdev(struct block_device *bdev)
{
--
2.39.2


2023-05-23 07:56:50

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 18/24] dm: open code dm_get_dev_t in dm_init_init

dm_init_init is called from early boot code, and thus lookup_bdev
will never succeed. Just open code that call to early_lookup_bdev
instead.

Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/md/dm-init.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
index d369457dbed0ed..2a71bcdba92d14 100644
--- a/drivers/md/dm-init.c
+++ b/drivers/md/dm-init.c
@@ -293,8 +293,10 @@ static int __init dm_init_init(void)

for (i = 0; i < ARRAY_SIZE(waitfor); i++) {
if (waitfor[i]) {
+ dev_t dev;
+
DMINFO("waiting for device %s ...", waitfor[i]);
- while (!dm_get_dev_t(waitfor[i]))
+ while (early_lookup_bdev(waitfor[i], &dev))
fsleep(5000);
}
}
--
2.39.2


2023-05-23 07:57:14

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 06/24] init: rename mount_block_root to mount_root_generic

mount_block_root is also used to mount non-block file systems, so give
it a better name.

Signed-off-by: Christoph Hellwig <[email protected]>
---
init/do_mounts.c | 6 +++---
init/do_mounts.h | 2 +-
init/do_mounts_initrd.c | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/init/do_mounts.c b/init/do_mounts.c
index 2fe7901b5bcfaf..a2c0baace0992c 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -391,7 +391,7 @@ static int __init do_mount_root(const char *name, const char *fs,
return ret;
}

-void __init mount_block_root(char *name, int flags)
+void __init mount_root_generic(char *name, int flags)
{
struct page *page = alloc_page(GFP_KERNEL);
char *fs_names = page_address(page);
@@ -589,7 +589,7 @@ void __init mount_root(void)

if (err < 0)
pr_emerg("Failed to create /dev/root: %d\n", err);
- mount_block_root("/dev/root", root_mountflags);
+ mount_root_generic("/dev/root", root_mountflags);
}
#endif
}
@@ -620,7 +620,7 @@ void __init prepare_namespace(void)
root_device_name = saved_root_name;
if (!strncmp(root_device_name, "mtd", 3) ||
!strncmp(root_device_name, "ubi", 3)) {
- mount_block_root(root_device_name, root_mountflags);
+ mount_root_generic(root_device_name, root_mountflags);
goto out;
}
ROOT_DEV = name_to_dev_t(root_device_name);
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 7a29ac3e427bab..33623025f6951a 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -10,7 +10,7 @@
#include <linux/root_dev.h>
#include <linux/init_syscalls.h>

-void mount_block_root(char *name, int flags);
+void mount_root_generic(char *name, int flags);
void mount_root(void);
extern int root_mountflags;

diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 34731241377d30..686d1ff3af4bb1 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -95,7 +95,7 @@ static void __init handle_initrd(void)
real_root_dev = new_encode_dev(ROOT_DEV);
create_dev("/dev/root.old", Root_RAM0);
/* mount initrd on rootfs' /root */
- mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY);
+ mount_root_generic("/dev/root.old", root_mountflags & ~MS_RDONLY);
init_mkdir("/old", 0700);
init_chdir("/old");

--
2.39.2


2023-05-23 07:57:26

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 03/24] PM: hibernate: remove the global snapshot_test variable

Passing call dependent variable in global variables is a huge
antipattern. Fix it up.

Signed-off-by: Christoph Hellwig <[email protected]>
---
kernel/power/hibernate.c | 17 ++++++-----------
kernel/power/power.h | 3 +--
kernel/power/swap.c | 2 +-
3 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 07279506366255..78696aa04f5ca3 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -64,7 +64,6 @@ enum {
static int hibernation_mode = HIBERNATION_SHUTDOWN;

bool freezer_test_done;
-bool snapshot_test;

static const struct platform_hibernation_ops *hibernation_ops;

@@ -684,7 +683,7 @@ static void power_down(void)
cpu_relax();
}

-static int load_image_and_restore(void)
+static int load_image_and_restore(bool snapshot_test)
{
int error;
unsigned int flags;
@@ -721,6 +720,7 @@ static int load_image_and_restore(void)
*/
int hibernate(void)
{
+ bool snapshot_test = false;
unsigned int sleep_flags;
int error;

@@ -748,9 +748,6 @@ int hibernate(void)
if (error)
goto Exit;

- /* protected by system_transition_mutex */
- snapshot_test = false;
-
lock_device_hotplug();
/* Allocate memory management structures */
error = create_basic_memory_bitmaps();
@@ -792,9 +789,9 @@ int hibernate(void)
unlock_device_hotplug();
if (snapshot_test) {
pm_pr_dbg("Checking hibernation image\n");
- error = swsusp_check();
+ error = swsusp_check(snapshot_test);
if (!error)
- error = load_image_and_restore();
+ error = load_image_and_restore(snapshot_test);
}
thaw_processes();

@@ -982,8 +979,6 @@ static int software_resume(void)
*/
mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING);

- snapshot_test = false;
-
if (!swsusp_resume_device) {
error = find_resume_device();
if (error)
@@ -994,7 +989,7 @@ static int software_resume(void)
MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));

pm_pr_dbg("Looking for hibernation image.\n");
- error = swsusp_check();
+ error = swsusp_check(false);
if (error)
goto Unlock;

@@ -1022,7 +1017,7 @@ static int software_resume(void)
goto Close_Finish;
}

- error = load_image_and_restore();
+ error = load_image_and_restore(false);
thaw_processes();
Finish:
pm_notifier_call_chain(PM_POST_RESTORE);
diff --git a/kernel/power/power.h b/kernel/power/power.h
index b83c8d5e188dec..978189fcafd124 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -59,7 +59,6 @@ asmlinkage int swsusp_save(void);

/* kernel/power/hibernate.c */
extern bool freezer_test_done;
-extern bool snapshot_test;

extern int hibernation_snapshot(int platform_mode);
extern int hibernation_restore(int platform_mode);
@@ -174,7 +173,7 @@ extern int swsusp_swap_in_use(void);
#define SF_HW_SIG 8

/* kernel/power/hibernate.c */
-extern int swsusp_check(void);
+int swsusp_check(bool snapshot_test);
extern void swsusp_free(void);
extern int swsusp_read(unsigned int *flags_p);
extern int swsusp_write(unsigned int flags);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 92e41ed292ada8..efed11568bfc72 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -1514,7 +1514,7 @@ int swsusp_read(unsigned int *flags_p)
* swsusp_check - Check for swsusp signature in the resume device
*/

-int swsusp_check(void)
+int swsusp_check(bool snapshot_test)
{
int error;
void *holder;
--
2.39.2


2023-05-23 09:43:34

by Miquel Raynal

[permalink] [raw]
Subject: Re: [PATCH 23/24] mtd: block2mtd: don't call early_lookup_bdev after the system is running

Hi Christoph,

[email protected] wrote on Tue, 23 May 2023 09:45:34 +0200:

> early_lookup_bdev is supposed to only be called from the early boot
> code, but mdtblock_early_get_bdev is called as a general fallback when
> lookup_bdev fails, which is problematic because early_lookup_bdev
> bypasses all normal path based permission checking, and might cause
> problems with certain container environments renaming devices.
>
> Switch to only call early_lookup_bdev when dm is built-in and the system
> state in not running yet.
>
> Note that this strictly speaking changes the kernel ABI as the PARTUUID=
> and PARTLABEL= style syntax is now not available during a running
> systems. They never were intended for that, but this breaks things
> we'll have to figure out a way to make them available again. But if
> avoidable in any way I'd rather avoid that.

Sounds reasonable to me. Richard?

Reviewed-by: Miquel Raynal <[email protected]>

Thanks,
Miquèl

2023-05-23 09:45:44

by Miquel Raynal

[permalink] [raw]
Subject: Re: [PATCH 22/24] mtd: block2mtd: factor the early block device open logic into a helper

Hi Christoph,

[email protected] wrote on Tue, 23 May 2023 09:45:33 +0200:

> Simply add_device a bit by splitting out the cumbersome early boot logic

I guess you meant "Simplify..."

Otherwise lgtm so,

Reviewed-by: Miquel Raynal <[email protected]>

> into a separate helper.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> drivers/mtd/devices/block2mtd.c | 53 +++++++++++++++++++--------------
> 1 file changed, 30 insertions(+), 23 deletions(-)
>

Thanks,
Miquèl

2023-05-23 16:57:23

by Mike Snitzer

[permalink] [raw]
Subject: Re: [PATCH 19/24] dm: remove dm_get_dev_t

On Tue, May 23 2023 at 3:45P -0400,
Christoph Hellwig <[email protected]> wrote:

> Open code dm_get_dev_t in the only remaining caller, and propagate the
> exact error code from lookup_bdev and early_lookup_bdev.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> drivers/md/dm-table.c | 20 ++++----------------
> include/linux/device-mapper.h | 2 --
> 2 files changed, 4 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
> index 05aa16da43b0d5..e997f4322a9967 100644
> --- a/drivers/md/dm-table.c
> +++ b/drivers/md/dm-table.c
> @@ -323,20 +323,6 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
> return 0;
> }
>
> -/*
> - * Convert the path to a device
> - */
> -dev_t dm_get_dev_t(const char *path)
> -{
> - dev_t dev;
> -
> - if (lookup_bdev(path, &dev) &&
> - early_lookup_bdev(path, &dev))
> - return 0;
> - return dev;
> -}
> -EXPORT_SYMBOL_GPL(dm_get_dev_t);
> -
> /*
> * Add a device to the list, or just increment the usage count if
> * it's already present.
> @@ -359,8 +345,10 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
> if (MAJOR(dev) != major || MINOR(dev) != minor)
> return -EOVERFLOW;
> } else {
> - dev = dm_get_dev_t(path);
> - if (!dev)
> + r = lookup_bdev(path, &dev);
> + if (r)
> + r = early_lookup_bdev(path, &dev);
> + if (r)
> return -ENODEV;
> }
> if (dev == disk_devt(t->md->disk))

OK, but you aren't actually propagating the exact error code. Did
you intend to change the return from -ENODEV to r?

Mike

2023-05-23 17:14:09

by Mike Snitzer

[permalink] [raw]
Subject: Re: [PATCH 17/24] dm-snap: simplify the origin_dev == cow_dev check in snapshot_ctr

On Tue, May 23 2023 at 3:45P -0400,
Christoph Hellwig <[email protected]> wrote:

> Use the block_device acquired in dm_get_device for the check instead
> of doing an extra lookup.
>
> Signed-off-by: Christoph Hellwig <[email protected]>

Reviewed-by: Mike Snitzer <[email protected]>

2023-05-23 17:19:05

by Mike Snitzer

[permalink] [raw]
Subject: Re: [PATCH 20/24] dm: only call early_lookup_bdev from early boot context

On Tue, May 23 2023 at 3:45P -0400,
Christoph Hellwig <[email protected]> wrote:

> early_lookup_bdev is supposed to only be called from the early boot
> code, but dm_get_device calls it as a general fallback when lookup_bdev
> fails, which is problematic because early_lookup_bdev bypasses all normal
> path based permission checking, and might cause problems with certain
> container environments renaming devices.
>
> Switch to only call early_lookup_bdev when dm is built-in and the system
> state in not running yet. This means it is still available when tables
> are constructed by dm-init.c from the kernel command line, but not
> otherwise.
>
> Note that this strictly speaking changes the kernel ABI as the PARTUUID=
> and PARTLABEL= style syntax is now not available during a running
> systems. They never were intended for that, but this breaks things
> we'll have to figure out a way to make them available again. But if
> avoidable in any way I'd rather avoid that.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> drivers/md/dm-table.c | 9 +++++++--
> 1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
> index e997f4322a9967..c230241a76b374 100644
> --- a/drivers/md/dm-table.c
> +++ b/drivers/md/dm-table.c
> @@ -326,8 +326,11 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
> /*
> * Add a device to the list, or just increment the usage count if
> * it's already present.
> + *
> + * Note: the __ref annotation is because this function can call the __init
> + * marked early_lookup_bdev when called during early boot code from dm-init.c.
> */
> -int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
> +int __ref dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
> struct dm_dev **result)
> {
> int r;
> @@ -346,8 +349,10 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
> return -EOVERFLOW;
> } else {
> r = lookup_bdev(path, &dev);
> - if (r)
> +#ifndef MODULE
> + if (r && system_state < SYSTEM_RUNNING)
> r = early_lookup_bdev(path, &dev);
> +#endif
> if (r)
> return -ENODEV;
> }
> --
> 2.39.2
>

I think/hope this will be fine:

Reviewed-by: Mike Snitzer <[email protected]>

2023-05-23 17:20:40

by Mike Snitzer

[permalink] [raw]
Subject: Re: [PATCH 18/24] dm: open code dm_get_dev_t in dm_init_init

On Tue, May 23 2023 at 3:45P -0400,
Christoph Hellwig <[email protected]> wrote:

> dm_init_init is called from early boot code, and thus lookup_bdev
> will never succeed. Just open code that call to early_lookup_bdev
> instead.
>
> Signed-off-by: Christoph Hellwig <[email protected]>

Reviewed-by: Mike Snitzer <[email protected]>

2023-05-23 18:37:36

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH 03/24] PM: hibernate: remove the global snapshot_test variable

On Tue, May 23, 2023 at 9:45 AM Christoph Hellwig <[email protected]> wrote:
>
> Passing call dependent variable in global variables is a huge
> antipattern. Fix it up.
>
> Signed-off-by: Christoph Hellwig <[email protected]>

Acked-by: Rafael J. Wysocki <[email protected]>

> ---
> kernel/power/hibernate.c | 17 ++++++-----------
> kernel/power/power.h | 3 +--
> kernel/power/swap.c | 2 +-
> 3 files changed, 8 insertions(+), 14 deletions(-)
>
> diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
> index 07279506366255..78696aa04f5ca3 100644
> --- a/kernel/power/hibernate.c
> +++ b/kernel/power/hibernate.c
> @@ -64,7 +64,6 @@ enum {
> static int hibernation_mode = HIBERNATION_SHUTDOWN;
>
> bool freezer_test_done;
> -bool snapshot_test;
>
> static const struct platform_hibernation_ops *hibernation_ops;
>
> @@ -684,7 +683,7 @@ static void power_down(void)
> cpu_relax();
> }
>
> -static int load_image_and_restore(void)
> +static int load_image_and_restore(bool snapshot_test)
> {
> int error;
> unsigned int flags;
> @@ -721,6 +720,7 @@ static int load_image_and_restore(void)
> */
> int hibernate(void)
> {
> + bool snapshot_test = false;
> unsigned int sleep_flags;
> int error;
>
> @@ -748,9 +748,6 @@ int hibernate(void)
> if (error)
> goto Exit;
>
> - /* protected by system_transition_mutex */
> - snapshot_test = false;
> -
> lock_device_hotplug();
> /* Allocate memory management structures */
> error = create_basic_memory_bitmaps();
> @@ -792,9 +789,9 @@ int hibernate(void)
> unlock_device_hotplug();
> if (snapshot_test) {
> pm_pr_dbg("Checking hibernation image\n");
> - error = swsusp_check();
> + error = swsusp_check(snapshot_test);
> if (!error)
> - error = load_image_and_restore();
> + error = load_image_and_restore(snapshot_test);
> }
> thaw_processes();
>
> @@ -982,8 +979,6 @@ static int software_resume(void)
> */
> mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING);
>
> - snapshot_test = false;
> -
> if (!swsusp_resume_device) {
> error = find_resume_device();
> if (error)
> @@ -994,7 +989,7 @@ static int software_resume(void)
> MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
>
> pm_pr_dbg("Looking for hibernation image.\n");
> - error = swsusp_check();
> + error = swsusp_check(false);
> if (error)
> goto Unlock;
>
> @@ -1022,7 +1017,7 @@ static int software_resume(void)
> goto Close_Finish;
> }
>
> - error = load_image_and_restore();
> + error = load_image_and_restore(false);
> thaw_processes();
> Finish:
> pm_notifier_call_chain(PM_POST_RESTORE);
> diff --git a/kernel/power/power.h b/kernel/power/power.h
> index b83c8d5e188dec..978189fcafd124 100644
> --- a/kernel/power/power.h
> +++ b/kernel/power/power.h
> @@ -59,7 +59,6 @@ asmlinkage int swsusp_save(void);
>
> /* kernel/power/hibernate.c */
> extern bool freezer_test_done;
> -extern bool snapshot_test;
>
> extern int hibernation_snapshot(int platform_mode);
> extern int hibernation_restore(int platform_mode);
> @@ -174,7 +173,7 @@ extern int swsusp_swap_in_use(void);
> #define SF_HW_SIG 8
>
> /* kernel/power/hibernate.c */
> -extern int swsusp_check(void);
> +int swsusp_check(bool snapshot_test);
> extern void swsusp_free(void);
> extern int swsusp_read(unsigned int *flags_p);
> extern int swsusp_write(unsigned int flags);
> diff --git a/kernel/power/swap.c b/kernel/power/swap.c
> index 92e41ed292ada8..efed11568bfc72 100644
> --- a/kernel/power/swap.c
> +++ b/kernel/power/swap.c
> @@ -1514,7 +1514,7 @@ int swsusp_read(unsigned int *flags_p)
> * swsusp_check - Check for swsusp signature in the resume device
> */
>
> -int swsusp_check(void)
> +int swsusp_check(bool snapshot_test)
> {
> int error;
> void *holder;
> --
> 2.39.2
>

2023-05-23 18:38:21

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH 02/24] PM: hibernate: factor out a helper to find the resume device

On Tue, May 23, 2023 at 9:45 AM Christoph Hellwig <[email protected]> wrote:
>
> Split the logic to find the resume device out software_resume and into
> a separate helper to start unwindig the convoluted goto logic.
>
> Signed-off-by: Christoph Hellwig <[email protected]>

Acked-by: Rafael J. Wysocki <[email protected]>

> ---
> kernel/power/hibernate.c | 72 +++++++++++++++++++++-------------------
> 1 file changed, 37 insertions(+), 35 deletions(-)
>
> diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
> index 30d1274f03f625..07279506366255 100644
> --- a/kernel/power/hibernate.c
> +++ b/kernel/power/hibernate.c
> @@ -910,6 +910,41 @@ int hibernate_quiet_exec(int (*func)(void *data), void *data)
> }
> EXPORT_SYMBOL_GPL(hibernate_quiet_exec);
>
> +static int find_resume_device(void)
> +{
> + if (!strlen(resume_file))
> + return -ENOENT;
> +
> + pm_pr_dbg("Checking hibernation image partition %s\n", resume_file);
> +
> + if (resume_delay) {
> + pr_info("Waiting %dsec before reading resume device ...\n",
> + resume_delay);
> + ssleep(resume_delay);
> + }
> +
> + /* Check if the device is there */
> + swsusp_resume_device = name_to_dev_t(resume_file);
> + if (swsusp_resume_device)
> + return 0;
> +
> + /*
> + * Some device discovery might still be in progress; we need to wait for
> + * this to finish.
> + */
> + wait_for_device_probe();
> + if (resume_wait) {
> + while (!(swsusp_resume_device = name_to_dev_t(resume_file)))
> + msleep(10);
> + async_synchronize_full();
> + }
> +
> + swsusp_resume_device = name_to_dev_t(resume_file);
> + if (!swsusp_resume_device)
> + return -ENODEV;
> + return 0;
> +}
> +
> /**
> * software_resume - Resume from a saved hibernation image.
> *
> @@ -949,45 +984,12 @@ static int software_resume(void)
>
> snapshot_test = false;
>
> - if (swsusp_resume_device)
> - goto Check_image;
> -
> - if (!strlen(resume_file)) {
> - error = -ENOENT;
> - goto Unlock;
> - }
> -
> - pm_pr_dbg("Checking hibernation image partition %s\n", resume_file);
> -
> - if (resume_delay) {
> - pr_info("Waiting %dsec before reading resume device ...\n",
> - resume_delay);
> - ssleep(resume_delay);
> - }
> -
> - /* Check if the device is there */
> - swsusp_resume_device = name_to_dev_t(resume_file);
> if (!swsusp_resume_device) {
> - /*
> - * Some device discovery might still be in progress; we need
> - * to wait for this to finish.
> - */
> - wait_for_device_probe();
> -
> - if (resume_wait) {
> - while ((swsusp_resume_device = name_to_dev_t(resume_file)) == 0)
> - msleep(10);
> - async_synchronize_full();
> - }
> -
> - swsusp_resume_device = name_to_dev_t(resume_file);
> - if (!swsusp_resume_device) {
> - error = -ENODEV;
> + error = find_resume_device();
> + if (error)
> goto Unlock;
> - }
> }
>
> - Check_image:
> pm_pr_dbg("Hibernation image partition %d:%d present\n",
> MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
>
> --
> 2.39.2
>

2023-05-23 18:42:34

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH 21/24] PM: hibernate: don't use early_lookup_bdev in resume_store

On Tue, May 23, 2023 at 9:46 AM Christoph Hellwig <[email protected]> wrote:
>
> resume_store is a sysfs attribute written during normal kernel runtime,
> and it should not use the early_lookup_bdev API that bypasses all normal
> path based permission checking, and might cause problems with certain
> container environments renaming devices.
>
> Switch to lookup_bdev, which does a normal path lookup instead, and fall
> back to trying to parse a numeric dev_t just like early_lookup_bdev did.
>
> Note that this strictly speaking changes the kernel ABI as the PARTUUID=
> and PARTLABEL= style syntax is now not available during a running
> systems. They never were intended for that, but this breaks things
> we'll have to figure out a way to make them available again. But if
> avoidable in any way I'd rather avoid that.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Fixes: 421a5fa1a6cf ("PM / hibernate: use name_to_dev_t to parse resume")

Acked-by: Rafael J. Wysocki <[email protected]>

> ---
> kernel/power/hibernate.c | 18 +++++++++++++++++-
> 1 file changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
> index c52dedb9f7c8e8..7ae95ec72f9902 100644
> --- a/kernel/power/hibernate.c
> +++ b/kernel/power/hibernate.c
> @@ -1178,7 +1178,23 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
> if (!name)
> return -ENOMEM;
>
> - error = early_lookup_bdev(name, &dev);
> + error = lookup_bdev(name, &dev);
> + if (error) {
> + unsigned maj, min, offset;
> + char *p, dummy;
> +
> + if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 ||
> + sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset,
> + &dummy) == 3) {
> + dev = MKDEV(maj, min);
> + if (maj != MAJOR(dev) || min != MINOR(dev))
> + error = -EINVAL;
> + } else {
> + dev = new_decode_dev(simple_strtoul(name, &p, 16));
> + if (*p)
> + error = -EINVAL;
> + }
> + }
> kfree(name);
> if (error)
> return error;
> --
> 2.39.2
>

2023-05-23 18:56:35

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH 04/24] PM: hibernate: move finding the resume device out of software_resume

On Tue, May 23, 2023 at 9:45 AM Christoph Hellwig <[email protected]> wrote:
>
> software_resume can be called either from an init call in the boot code,
> or from sysfs once the system has finished booting, and the two
> invocation methods this can't race with each other.
>
> For the latter case we did just parse the suspend device manually, while
> the former might not have one. Split software_resume so that the search
> only happens for the boot case, which also means the special lockdep
> nesting annotation can go away as the system transition mutex can be
> taken a little later and doesn't have the sysfs locking nest inside it.
>
> Signed-off-by: Christoph Hellwig <[email protected]>

Acked-by: Rafael J. Wysocki <[email protected]>

> ---
> kernel/power/hibernate.c | 80 ++++++++++++++++++++--------------------
> 1 file changed, 39 insertions(+), 41 deletions(-)
>
> diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
> index 78696aa04f5ca3..45e24b02cd50b6 100644
> --- a/kernel/power/hibernate.c
> +++ b/kernel/power/hibernate.c
> @@ -907,7 +907,7 @@ int hibernate_quiet_exec(int (*func)(void *data), void *data)
> }
> EXPORT_SYMBOL_GPL(hibernate_quiet_exec);
>
> -static int find_resume_device(void)
> +static int __init find_resume_device(void)
> {
> if (!strlen(resume_file))
> return -ENOENT;
> @@ -942,53 +942,16 @@ static int find_resume_device(void)
> return 0;
> }
>
> -/**
> - * software_resume - Resume from a saved hibernation image.
> - *
> - * This routine is called as a late initcall, when all devices have been
> - * discovered and initialized already.
> - *
> - * The image reading code is called to see if there is a hibernation image
> - * available for reading. If that is the case, devices are quiesced and the
> - * contents of memory is restored from the saved image.
> - *
> - * If this is successful, control reappears in the restored target kernel in
> - * hibernation_snapshot() which returns to hibernate(). Otherwise, the routine
> - * attempts to recover gracefully and make the kernel return to the normal mode
> - * of operation.
> - */
> static int software_resume(void)
> {
> int error;
>
> - /*
> - * If the user said "noresume".. bail out early.
> - */
> - if (noresume || !hibernation_available())
> - return 0;
> -
> - /*
> - * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
> - * is configured into the kernel. Since the regular hibernate
> - * trigger path is via sysfs which takes a buffer mutex before
> - * calling hibernate functions (which take system_transition_mutex)
> - * this can cause lockdep to complain about a possible ABBA deadlock
> - * which cannot happen since we're in the boot code here and
> - * sysfs can't be invoked yet. Therefore, we use a subclass
> - * here to avoid lockdep complaining.
> - */
> - mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING);
> -
> - if (!swsusp_resume_device) {
> - error = find_resume_device();
> - if (error)
> - goto Unlock;
> - }
> -
> pm_pr_dbg("Hibernation image partition %d:%d present\n",
> MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
>
> pm_pr_dbg("Looking for hibernation image.\n");
> +
> + mutex_lock(&system_transition_mutex);
> error = swsusp_check(false);
> if (error)
> goto Unlock;
> @@ -1035,7 +998,39 @@ static int software_resume(void)
> goto Finish;
> }
>
> -late_initcall_sync(software_resume);
> +/**
> + * software_resume_initcall - Resume from a saved hibernation image.
> + *
> + * This routine is called as a late initcall, when all devices have been
> + * discovered and initialized already.
> + *
> + * The image reading code is called to see if there is a hibernation image
> + * available for reading. If that is the case, devices are quiesced and the
> + * contents of memory is restored from the saved image.
> + *
> + * If this is successful, control reappears in the restored target kernel in
> + * hibernation_snapshot() which returns to hibernate(). Otherwise, the routine
> + * attempts to recover gracefully and make the kernel return to the normal mode
> + * of operation.
> + */
> +static int __init software_resume_initcall(void)
> +{
> + /*
> + * If the user said "noresume".. bail out early.
> + */
> + if (noresume || !hibernation_available())
> + return 0;
> +
> + if (!swsusp_resume_device) {
> + int error = find_resume_device();
> +
> + if (error)
> + return error;
> + }
> +
> + return software_resume();
> +}
> +late_initcall_sync(software_resume_initcall);
>
>
> static const char * const hibernation_modes[] = {
> @@ -1176,6 +1171,9 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
> char *name;
> dev_t res;
>
> + if (!hibernation_available())
> + return 0;
> +
> if (len && buf[len-1] == '\n')
> len--;
> name = kstrndup(buf, len, GFP_KERNEL);
> --
> 2.39.2
>

2023-05-24 05:04:42

by Randy Dunlap

[permalink] [raw]
Subject: Re: [PATCH 15/24] block: move the code to do early boot lookup of block devices to block/



On 5/23/23 00:45, Christoph Hellwig wrote:
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index f08b83e62c6222..3f8cf6dc7de887 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -5452,7 +5452,7 @@
> port and the regular usb controller gets disabled.
>
> root= [KNL] Root filesystem
> - See early_lookup_bdev comment in init/do_mounts.c.
> + See early_lookup_bdev comment in block/early-lookup.c

Patch 13 does this:

root= [KNL] Root filesystem
- See name_to_dev_t comment in init/do_mounts.c.
+ See early_lookup_bdev comment in init/do_mounts.c.

Should this latter chunk be dropped?

--
~Randy

2023-05-24 05:40:25

by Randy Dunlap

[permalink] [raw]
Subject: Re: [PATCH 15/24] block: move the code to do early boot lookup of block devices to block/



On 5/23/23 21:58, Randy Dunlap wrote:
>
>
> On 5/23/23 00:45, Christoph Hellwig wrote:
>> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
>> index f08b83e62c6222..3f8cf6dc7de887 100644
>> --- a/Documentation/admin-guide/kernel-parameters.txt
>> +++ b/Documentation/admin-guide/kernel-parameters.txt
>> @@ -5452,7 +5452,7 @@
>> port and the regular usb controller gets disabled.
>>
>> root= [KNL] Root filesystem
>> - See early_lookup_bdev comment in init/do_mounts.c.
>> + See early_lookup_bdev comment in block/early-lookup.c
>
> Patch 13 does this:
>
> root= [KNL] Root filesystem
> - See name_to_dev_t comment in init/do_mounts.c.
> + See early_lookup_bdev comment in init/do_mounts.c.
>
> Should this latter chunk be dropped?
>

Oh, oops, reverse order of patches?

--
~Randy

2023-05-24 06:39:51

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 19/24] dm: remove dm_get_dev_t

On Tue, May 23, 2023 at 12:49:16PM -0400, Mike Snitzer wrote:
> > - dev = dm_get_dev_t(path);
> > - if (!dev)
> > + r = lookup_bdev(path, &dev);
> > + if (r)
> > + r = early_lookup_bdev(path, &dev);
> > + if (r)
> > return -ENODEV;
> > }
> > if (dev == disk_devt(t->md->disk))
>
> OK, but you aren't actually propagating the exact error code. Did
> you intend to change the return from -ENODEV to r?

Yes, thanks.

2023-05-24 06:40:00

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 15/24] block: move the code to do early boot lookup of block devices to block/

On Tue, May 23, 2023 at 09:59:50PM -0700, Randy Dunlap wrote:
> >> root= [KNL] Root filesystem
> >> - See early_lookup_bdev comment in init/do_mounts.c.
> >> + See early_lookup_bdev comment in block/early-lookup.c
> >
> > Patch 13 does this:
> >
> > root= [KNL] Root filesystem
> > - See name_to_dev_t comment in init/do_mounts.c.
> > + See early_lookup_bdev comment in init/do_mounts.c.
> >
> > Should this latter chunk be dropped?
> >
>
> Oh, oops, reverse order of patches?

Patch 13 renames the function, this patch moves it. So I think this
correct, but feel free to double check my foggy brain :)


2023-06-21 21:30:42

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 14/24] init: clear root_wait on all invalid root= strings

Hi,

On Tue, May 23, 2023 at 09:45:25AM +0200, Christoph Hellwig wrote:
> Instead of only clearing root_wait in devt_from_partuuid when the UUID
> format was invalid, do that in parse_root_device for all strings that
> failed to parse.
>
> Signed-off-by: Christoph Hellwig <[email protected]>

In linux-next, almost all of my boot tests from usb drives as well
as a few others fail with "Disabling rootwait; root= is invalid."
in the log. Bisect points to this patch.

It can not easily be reverted, but the following change fixes the problem.

--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -413,10 +413,12 @@ static dev_t __init parse_root_device(char *root_device_name)

error = early_lookup_bdev(root_device_name, &dev);
if (error) {
+#if 0
if (error == -EINVAL && root_wait) {
pr_err("Disabling rootwait; root= is invalid.\n");
root_wait = 0;
}
+#endif
return 0;
}
return dev;

Debugging shows that early_lookup_bdev() indeed returns -EINVAL.
Looking into it further, it turns out that devt_from_devname() returns
-EINVAL for root devices such as
root=/dev/sda
if the device is not found, making it impossible to rootwait for such
a device (this might for example be a raw USB drive without partitions,
or any qemu drive with format=raw).

Guenter

---
# bad: [15e71592dbae49a674429c618a10401d7f992ac3] Add linux-next specific files for 20230621
# good: [45a3e24f65e90a047bef86f927ebdc4c710edaa1] Linux 6.4-rc7
git bisect start 'HEAD' 'v6.4-rc7'
# good: [e867e67cd55ae460c860ffd896c7fc96add2821c] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
git bisect good e867e67cd55ae460c860ffd896c7fc96add2821c
# bad: [0ab4015a11182e2a19c3dd52db85418f370cef39] Merge branch 'for-next' of git://git.kernel.dk/linux-block.git
git bisect bad 0ab4015a11182e2a19c3dd52db85418f370cef39
# good: [901bdf5ea1a836400ee69aa32b04e9c209271ec7] Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
git bisect good 901bdf5ea1a836400ee69aa32b04e9c209271ec7
# good: [07164956fbc26eff280f3a044a489460ae36413c] Merge branch 'for-next' of https://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394.git
git bisect good 07164956fbc26eff280f3a044a489460ae36413c
# good: [3067e020d361ed346957eb5e253911f7a3e18f59] add snd_soc_{of_}get_dlc()
git bisect good 3067e020d361ed346957eb5e253911f7a3e18f59
# bad: [0dbbd269fb6a8799f312dfc9b1ae1244a144cfc6] Merge branch 'for-6.5/block' into for-next
git bisect bad 0dbbd269fb6a8799f312dfc9b1ae1244a144cfc6
# good: [6c500000af037f74b66dd01b565c8ee1b501cc1b] block: mark bio_add_folio as __must_check
git bisect good 6c500000af037f74b66dd01b565c8ee1b501cc1b
# bad: [1a0ddd56e545b743af510b5a1b8dbdfe7d35cd3b] pktcdvd: replace sscanf() by kstrtoul()
git bisect bad 1a0ddd56e545b743af510b5a1b8dbdfe7d35cd3b
# good: [e3102722ffe77094ba9e7e46380792b3dd8a7abd] init: rename mount_block_root to mount_root_generic
git bisect good e3102722ffe77094ba9e7e46380792b3dd8a7abd
# bad: [d4a28d7defe79006e59293a4b43d518ba8483fb0] dm: remove dm_get_dev_t
git bisect bad d4a28d7defe79006e59293a4b43d518ba8483fb0
# good: [c0c1a7dcb6f5db4500e6574294674213bc24940c] init: move the nfs/cifs/ram special cases out of name_to_dev_t
git bisect good c0c1a7dcb6f5db4500e6574294674213bc24940c
# bad: [702f3189e454b3c3c2f3c99dbf30acf41aab707c] block: move the code to do early boot lookup of block devices to block/
git bisect bad 702f3189e454b3c3c2f3c99dbf30acf41aab707c
# bad: [079caa35f7863cd9958b4555ae873ea4d352a502] init: clear root_wait on all invalid root= strings
git bisect bad 079caa35f7863cd9958b4555ae873ea4d352a502
# good: [cf056a43121559d3642419917d405c3237ded90a] init: improve the name_to_dev_t interface
git bisect good cf056a43121559d3642419917d405c3237ded90a
# first bad commit: [079caa35f7863cd9958b4555ae873ea4d352a502] init: clear root_wait on all invalid root= strings

2023-06-22 03:59:27

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 14/24] init: clear root_wait on all invalid root= strings

On Wed, Jun 21, 2023 at 02:07:13PM -0700, Guenter Roeck wrote:
> On Tue, May 23, 2023 at 09:45:25AM +0200, Christoph Hellwig wrote:
> > Instead of only clearing root_wait in devt_from_partuuid when the UUID
> > format was invalid, do that in parse_root_device for all strings that
> > failed to parse.
> >
> > Signed-off-by: Christoph Hellwig <[email protected]>
>
> In linux-next, almost all of my boot tests from usb drives as well
> as a few others fail with "Disabling rootwait; root= is invalid."
> in the log. Bisect points to this patch.

Can you send such a log, and the command line you've used?


2023-06-22 05:08:42

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 14/24] init: clear root_wait on all invalid root= strings

On 6/21/23 20:51, Christoph Hellwig wrote:
> On Wed, Jun 21, 2023 at 02:07:13PM -0700, Guenter Roeck wrote:
>> On Tue, May 23, 2023 at 09:45:25AM +0200, Christoph Hellwig wrote:
>>> Instead of only clearing root_wait in devt_from_partuuid when the UUID
>>> format was invalid, do that in parse_root_device for all strings that
>>> failed to parse.
>>>
>>> Signed-off-by: Christoph Hellwig <[email protected]>
>>
>> In linux-next, almost all of my boot tests from usb drives as well
>> as a few others fail with "Disabling rootwait; root= is invalid."
>> in the log. Bisect points to this patch.
>
> Can you send such a log, and the command line you've used?
>


There are lots of logs at https://kerneltests.org/builders, in the 'next'
column of qemu tests. One example is
https://kerneltests.org/builders/qemu-arm-v7-next/builds/511/steps/qemubuildcommand/logs/stdio

Sample command line:

qemu-system-arm -M mcimx7d-sabre \
-kernel arch/arm/boot/zImage -no-reboot -snapshot \
-usb -device usb-storage,drive=d0,bus=usb-bus.1 \
-drive file=rootfs-armv7a.ext2,if=none,id=d0,format=raw \
-m 256 -nic user -display none \
--append "root=/dev/sda rootwait earlycon=ec_imx6q,mmio,0x30860000,115200n8 console=ttymxc0,115200" \
-dtb arch/arm/boot/dts/imx7d-sdb.dtb \
-nographic -monitor null -serial stdio

This is with a modified imx_v6_v7_defconfig and root file system from
https://github.com/groeck/linux-build-test/blob/master/rootfs/arm-v7/rootfs-armv7a.ext2.gz

The -EINVAL return value is from

if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')

in devt_from_devname().

Guenter


2023-06-22 06:46:27

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 14/24] init: clear root_wait on all invalid root= strings

Hi Guenter,

can you try this patch?

diff --git a/block/early-lookup.c b/block/early-lookup.c
index a5be3c68ed079c..66e4514d671179 100644
--- a/block/early-lookup.c
+++ b/block/early-lookup.c
@@ -174,7 +174,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt)
while (p > s && isdigit(p[-1]))
p--;
if (p == s || !*p || *p == '0')
- return -EINVAL;
+ return -ENODEV;

/* try disk name without <part number> */
part = simple_strtoul(p, NULL, 10);

2023-06-22 14:01:14

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 14/24] init: clear root_wait on all invalid root= strings

On 6/21/23 23:00, Christoph Hellwig wrote:
> Hi Guenter,
>
> can you try this patch?
>
> diff --git a/block/early-lookup.c b/block/early-lookup.c
> index a5be3c68ed079c..66e4514d671179 100644
> --- a/block/early-lookup.c
> +++ b/block/early-lookup.c
> @@ -174,7 +174,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt)
> while (p > s && isdigit(p[-1]))
> p--;
> if (p == s || !*p || *p == '0')
> - return -EINVAL;
> + return -ENODEV;
>
> /* try disk name without <part number> */
> part = simple_strtoul(p, NULL, 10);

Not completely. Tests with root=/dev/sda still fail.

"name" passed to devt_from_devname() is "sda".

for (p = s; *p; p++) {
if (*p == '/')
*p = '!';
}

advances 'p' to the end of the string.

while (p > s && isdigit(p[-1]))
p--;

moves it back to point to the first digit (if there is one).

if (p == s || !*p || *p == '0')
return -EINVAL;

then fails because *p is 0. In other words, the function only accepts
drive names with digits at the end (and the first digit must not be '0').

I don't recall how I hit the other condition earlier. I have various
"/dev/mmcblkX" in my tests, where X can be any number including 0.
Maybe those fail randomly as well.

Overall I am not sure though what an "invalid" devicename is supposed
to be in this context. I have "sda", "sr0", "vda", "mtdblkX",
"nvme0n1", "mmcblkX", and "hda". Why would any of those not be eligible
for "rootwait" ?

In practice, everything not ending with a digit, or ending with
'0', fails the first test. Everything ending with a digit > 0
fails the second test. But "humptydump3p4" passes all those tests.

Guenter

---
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>

#define EINVAL1 1
#define EINVAL2 2
#define EINVAL3 3
#define ENODEV 4

static int devt_from_devname(const char *name)
{
int part;
char s[32];
char *p;

if (strlen(name) > 31)
return EINVAL1;

strcpy(s, name);
for (p = s; *p; p++) {
if (*p == '/')
*p = '!';
}

/*
* Try non-existent, but valid partition, which may only exist after
* opening the device, like partitioned md devices.
*/
while (p > s && isdigit(p[-1]))
p--;
if (p == s || !*p || *p == '0') {
return EINVAL2;
}

/* try disk name without <part number> */
part = strtoul(p, NULL, 10);
*p = '\0';

/* try disk name without p<part number> */
if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') {
return EINVAL3;
}
return ENODEV;
}

char *devnames[] = {
"sda",
"sda1",
"mmcblk0",
"mmcblk1",
"mtdblk0",
"mtdblk1",
"vda",
"hda",
"nvme0n1",
"sr0",
"sr1",
"humptydump3p4",
NULL
};

int main(int argc, char **argv)
{
char *str;
int i;

for (i = 0, str = devnames[0]; str; str = devnames[++i]) {
printf("%s: %d\n", str, devt_from_devname(str));
}
}


2023-06-22 14:59:10

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 14/24] init: clear root_wait on all invalid root= strings

On Thu, Jun 22, 2023 at 06:54:41AM -0700, Guenter Roeck wrote:
> On 6/21/23 23:00, Christoph Hellwig wrote:
>> Hi Guenter,
>>
>> can you try this patch?
>>
>> diff --git a/block/early-lookup.c b/block/early-lookup.c
>> index a5be3c68ed079c..66e4514d671179 100644
>> --- a/block/early-lookup.c
>> +++ b/block/early-lookup.c
>> @@ -174,7 +174,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt)
>> while (p > s && isdigit(p[-1]))
>> p--;
>> if (p == s || !*p || *p == '0')
>> - return -EINVAL;
>> + return -ENODEV;
>> /* try disk name without <part number> */
>> part = simple_strtoul(p, NULL, 10);
>
> Not completely. Tests with root=/dev/sda still fail.
>
> "name" passed to devt_from_devname() is "sda".
>
> for (p = s; *p; p++) {
> if (*p == '/')
> *p = '!';
> }
>
> advances 'p' to the end of the string.
>
> while (p > s && isdigit(p[-1]))
> p--;
>
> moves it back to point to the first digit (if there is one).
>
> if (p == s || !*p || *p == '0')
> return -EINVAL;
>
> then fails because *p is 0. In other words, the function only accepts
> drive names with digits at the end (and the first digit must not be '0').
>
> I don't recall how I hit the other condition earlier. I have various
> "/dev/mmcblkX" in my tests, where X can be any number including 0.
> Maybe those fail randomly as well.
>
> Overall I am not sure though what an "invalid" devicename is supposed
> to be in this context. I have "sda", "sr0", "vda", "mtdblkX",
> "nvme0n1", "mmcblkX", and "hda". Why would any of those not be eligible
> for "rootwait" ?
>
> In practice, everything not ending with a digit, or ending with
> '0', fails the first test. Everything ending with a digit > 0
> fails the second test. But "humptydump3p4" passes all those tests.


Yeah. I guess I should give up on the idea of error out in this
particular parser. The idea sounded good, but I guess it doesn't
work. So we'll probably want his fix:


diff --git a/block/early-lookup.c b/block/early-lookup.c
index a5be3c68ed079c..9e2d5a19de1b3b 100644
--- a/block/early-lookup.c
+++ b/block/early-lookup.c
@@ -174,7 +174,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt)
while (p > s && isdigit(p[-1]))
p--;
if (p == s || !*p || *p == '0')
- return -EINVAL;
+ return -ENODEV;

/* try disk name without <part number> */
part = simple_strtoul(p, NULL, 10);
@@ -185,7 +185,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt)

/* try disk name without p<part number> */
if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
- return -EINVAL;
+ return -ENODEV;
p[-1] = '\0';
*devt = blk_lookup_devt(s, part);
if (*devt)

2023-06-22 15:01:07

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 14/24] init: clear root_wait on all invalid root= strings

On 6/22/23 07:40, Christoph Hellwig wrote:
> On Thu, Jun 22, 2023 at 06:54:41AM -0700, Guenter Roeck wrote:
>> On 6/21/23 23:00, Christoph Hellwig wrote:
>>> Hi Guenter,
>>>
>>> can you try this patch?
>>>
>>> diff --git a/block/early-lookup.c b/block/early-lookup.c
>>> index a5be3c68ed079c..66e4514d671179 100644
>>> --- a/block/early-lookup.c
>>> +++ b/block/early-lookup.c
>>> @@ -174,7 +174,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt)
>>> while (p > s && isdigit(p[-1]))
>>> p--;
>>> if (p == s || !*p || *p == '0')
>>> - return -EINVAL;
>>> + return -ENODEV;
>>> /* try disk name without <part number> */
>>> part = simple_strtoul(p, NULL, 10);
>>
>> Not completely. Tests with root=/dev/sda still fail.
>>
>> "name" passed to devt_from_devname() is "sda".
>>
>> for (p = s; *p; p++) {
>> if (*p == '/')
>> *p = '!';
>> }
>>
>> advances 'p' to the end of the string.
>>
>> while (p > s && isdigit(p[-1]))
>> p--;
>>
>> moves it back to point to the first digit (if there is one).
>>
>> if (p == s || !*p || *p == '0')
>> return -EINVAL;
>>
>> then fails because *p is 0. In other words, the function only accepts
>> drive names with digits at the end (and the first digit must not be '0').
>>
>> I don't recall how I hit the other condition earlier. I have various
>> "/dev/mmcblkX" in my tests, where X can be any number including 0.
>> Maybe those fail randomly as well.
>>
>> Overall I am not sure though what an "invalid" devicename is supposed
>> to be in this context. I have "sda", "sr0", "vda", "mtdblkX",
>> "nvme0n1", "mmcblkX", and "hda". Why would any of those not be eligible
>> for "rootwait" ?
>>
>> In practice, everything not ending with a digit, or ending with
>> '0', fails the first test. Everything ending with a digit > 0
>> fails the second test. But "humptydump3p4" passes all those tests.
>
>
> Yeah. I guess I should give up on the idea of error out in this
> particular parser. The idea sounded good, but I guess it doesn't
> work. So we'll probably want his fix:
>

Yes, that fixes the problem for me.

Guenter

>
> diff --git a/block/early-lookup.c b/block/early-lookup.c
> index a5be3c68ed079c..9e2d5a19de1b3b 100644
> --- a/block/early-lookup.c
> +++ b/block/early-lookup.c
> @@ -174,7 +174,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt)
> while (p > s && isdigit(p[-1]))
> p--;
> if (p == s || !*p || *p == '0')
> - return -EINVAL;
> + return -ENODEV;
>
> /* try disk name without <part number> */
> part = simple_strtoul(p, NULL, 10);
> @@ -185,7 +185,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt)
>
> /* try disk name without p<part number> */
> if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
> - return -EINVAL;
> + return -ENODEV;
> p[-1] = '\0';
> *devt = blk_lookup_devt(s, part);
> if (*devt)