Hi all,
this series starts to move the early init code away from requiring
KERNEL_DS to be implicitly set during early startup. It does so by
first removing legacy unused cruft, and the switches away the code
from struct file based APIs to our more usual in-kernel APIs.
There is no really good tree for this, so if there are no objections
I'd like to set up a new one for linux-next.
Git tree:
git://git.infradead.org/users/hch/misc.git init-user-pointers
Gitweb:
http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/init-user-pointers
Changes since v2:
- add vfs_fchown and vfs_fchmod helpers and use them for initramfs
unpacking
- split patches up a little more
- fix a commit log typo
Changes since v1:
- add a patch to deprecated "classic" initrd support
Diffstat:
b/arch/arm/kernel/atags_parse.c | 2
b/arch/sh/kernel/setup.c | 2
b/arch/sparc/kernel/setup_32.c | 2
b/arch/sparc/kernel/setup_64.c | 2
b/arch/x86/kernel/setup.c | 2
b/drivers/md/Makefile | 3
b/drivers/md/md-autodetect.c | 239 ++++++++++++++++++----------------------
b/drivers/md/md.c | 34 +----
b/drivers/md/md.h | 10 +
b/fs/file.c | 7 -
b/fs/ioctl.c | 7 -
b/fs/open.c | 56 +++++----
b/fs/read_write.c | 2
b/fs/readdir.c | 11 -
b/include/linux/fs.h | 3
b/include/linux/initrd.h | 6 -
b/include/linux/raid/detect.h | 8 +
b/include/linux/syscalls.h | 17 --
b/init/Makefile | 1
b/init/do_mounts.c | 70 +----------
b/init/do_mounts.h | 21 ---
b/init/do_mounts_initrd.c | 13 --
b/init/do_mounts_rd.c | 102 +++++++----------
b/init/initramfs.c | 103 +++++------------
b/init/main.c | 16 +-
include/linux/raid/md_u.h | 13 --
26 files changed, 279 insertions(+), 473 deletions(-)
Add a helper for struct file based chown operations. To be used by
the initramfs code soon.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/open.c | 29 +++++++++++++++++------------
include/linux/fs.h | 2 ++
2 files changed, 19 insertions(+), 12 deletions(-)
diff --git a/fs/open.c b/fs/open.c
index 6cd48a61cda3b9..103c66309bee67 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -740,23 +740,28 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group
AT_SYMLINK_NOFOLLOW);
}
+int vfs_fchown(struct file *file, uid_t user, gid_t group)
+{
+ int error;
+
+ error = mnt_want_write_file(file);
+ if (error)
+ return error;
+ audit_file(file);
+ error = chown_common(&file->f_path, user, group);
+ mnt_drop_write_file(file);
+ return error;
+}
+
int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
{
struct fd f = fdget(fd);
int error = -EBADF;
- if (!f.file)
- goto out;
-
- error = mnt_want_write_file(f.file);
- if (error)
- goto out_fput;
- audit_file(f.file);
- error = chown_common(&f.file->f_path, user, group);
- mnt_drop_write_file(f.file);
-out_fput:
- fdput(f);
-out:
+ if (f.file) {
+ error = vfs_fchown(f.file, user, group);
+ fdput(f);
+ }
return error;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f5abba86107d86..0ddd64ca0b45c0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1744,6 +1744,8 @@ int vfs_mkobj(struct dentry *, umode_t,
int (*f)(struct dentry *, umode_t, void *),
void *);
+int vfs_fchown(struct file *file, uid_t user, gid_t group);
+
extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
--
2.27.0
mdp_major can just move to drivers/md/md.h.
Signed-off-by: Christoph Hellwig <[email protected]>
Acked-by: Song Liu <[email protected]>
---
drivers/md/md.h | 1 +
include/linux/raid/md_u.h | 13 -------------
2 files changed, 1 insertion(+), 13 deletions(-)
delete mode 100644 include/linux/raid/md_u.h
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 37315a3f28e97d..6f8fff77ce10a5 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -801,6 +801,7 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio
mddev->queue->limits.max_write_zeroes_sectors = 0;
}
+extern int mdp_major;
void md_autostart_arrays(int part);
#endif /* _MD_MD_H */
diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h
deleted file mode 100644
index 8dfec085a20ee1..00000000000000
--- a/include/linux/raid/md_u.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- md_u.h : user <=> kernel API between Linux raidtools and RAID drivers
- Copyright (C) 1998 Ingo Molnar
-
-*/
-#ifndef _MD_U_H
-#define _MD_U_H
-
-#include <uapi/linux/raid/md_u.h>
-
-extern int mdp_major;
-#endif
--
2.27.0
Fold it into the only remaining caller.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/open.c | 7 +------
include/linux/syscalls.h | 1 -
2 files changed, 1 insertion(+), 7 deletions(-)
diff --git a/fs/open.c b/fs/open.c
index ab3671af8a9705..b316dd6a86a8b9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -608,7 +608,7 @@ int vfs_fchmod(struct file *file, umode_t mode)
return chmod_common(&file->f_path, mode);
}
-int ksys_fchmod(unsigned int fd, umode_t mode)
+SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
{
struct fd f = fdget(fd);
int err = -EBADF;
@@ -620,11 +620,6 @@ int ksys_fchmod(unsigned int fd, umode_t mode)
return err;
}
-SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
-{
- return ksys_fchmod(fd, mode);
-}
-
int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
{
struct path path;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b6d90057476260..39ff738997a172 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1240,7 +1240,6 @@ int ksys_umount(char __user *name, int flags);
int ksys_chroot(const char __user *filename);
ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count);
int ksys_chdir(const char __user *filename);
-int ksys_fchmod(unsigned int fd, umode_t mode);
int ksys_fchown(unsigned int fd, uid_t user, gid_t group);
int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count);
--
2.27.0
Just open code it in the two callers.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/open.c | 11 ++++++++---
include/linux/syscalls.h | 11 -----------
2 files changed, 8 insertions(+), 14 deletions(-)
diff --git a/fs/open.c b/fs/open.c
index 75166f071d280a..ab3671af8a9705 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1208,7 +1208,9 @@ long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
{
- return ksys_open(filename, flags, mode);
+ if (force_o_largefile())
+ flags |= O_LARGEFILE;
+ return do_sys_open(AT_FDCWD, filename, flags, mode);
}
SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
@@ -1270,9 +1272,12 @@ COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, fla
*/
SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
{
- return ksys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
-}
+ int flags = O_CREAT | O_WRONLY | O_TRUNC;
+ if (force_o_largefile())
+ flags |= O_LARGEFILE;
+ return do_sys_open(AT_FDCWD, pathname, flags, mode);
+}
#endif
/*
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a998651629c71b..363baaadf8e19a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1374,17 +1374,6 @@ static inline int ksys_close(unsigned int fd)
return __close_fd(current->files, fd);
}
-extern long do_sys_open(int dfd, const char __user *filename, int flags,
- umode_t mode);
-
-static inline long ksys_open(const char __user *filename, int flags,
- umode_t mode)
-{
- if (force_o_largefile())
- flags |= O_LARGEFILE;
- return do_sys_open(AT_FDCWD, filename, flags, mode);
-}
-
extern long do_sys_truncate(const char __user *pathname, loff_t length);
static inline long ksys_truncate(const char __user *pathname, loff_t length)
--
2.27.0
Fold it into the only remaining caller.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/ioctl.c | 7 +------
include/linux/syscalls.h | 1 -
2 files changed, 1 insertion(+), 7 deletions(-)
diff --git a/fs/ioctl.c b/fs/ioctl.c
index d69786d1dd9115..4e6cc0a7d69c9f 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -736,7 +736,7 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
return -ENOIOCTLCMD;
}
-int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
+SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
struct fd f = fdget(fd);
int error;
@@ -757,11 +757,6 @@ int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
return error;
}
-SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
-{
- return ksys_ioctl(fd, cmd, arg);
-}
-
#ifdef CONFIG_COMPAT
/**
* compat_ptr_ioctl - generic implementation of .compat_ioctl file operation
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 39ff738997a172..5b0f1fca4cfb9d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1241,7 +1241,6 @@ int ksys_chroot(const char __user *filename);
ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count);
int ksys_chdir(const char __user *filename);
int ksys_fchown(unsigned int fd, uid_t user, gid_t group);
-int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count);
void ksys_sync(void);
int ksys_unshare(unsigned long unshare_flags);
--
2.27.0
Don't rely on the implicit set_fs(KERNEL_DS) for ksys_open to work, but
instead open a struct file for /dev/console and then install it as FD
0/1/2 manually.
Signed-off-by: Christoph Hellwig <[email protected]>
---
init/main.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/init/main.c b/init/main.c
index 0ead83e86b5aa2..db0621dfbb0468 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1457,15 +1457,19 @@ static int __ref kernel_init(void *unused)
"See Linux Documentation/admin-guide/init.rst for guidance.");
}
+/* Open /dev/console, for stdin/stdout/stderr, this should never fail */
void console_on_rootfs(void)
{
- /* Open the /dev/console as stdin, this should never fail */
- if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
- pr_err("Warning: unable to open an initial console.\n");
+ struct file *file = filp_open("/dev/console", O_RDWR, 0);
- /* create stdout/stderr */
- (void) ksys_dup(0);
- (void) ksys_dup(0);
+ if (IS_ERR(file)) {
+ pr_err("Warning: unable to open an initial console.\n");
+ return;
+ }
+ get_file_rcu_many(file, 2);
+ fd_install(get_unused_fd_flags(0), file);
+ fd_install(get_unused_fd_flags(0), file);
+ fd_install(get_unused_fd_flags(0), file);
}
static noinline void __init kernel_init_freeable(void)
--
2.27.0
Just open code it in the only caller.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/readdir.c | 11 ++---------
include/linux/syscalls.h | 2 --
2 files changed, 2 insertions(+), 11 deletions(-)
diff --git a/fs/readdir.c b/fs/readdir.c
index a49f07c11cfbd0..19434b3c982cd3 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -348,8 +348,8 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
return -EFAULT;
}
-int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
- unsigned int count)
+SYSCALL_DEFINE3(getdents64, unsigned int, fd,
+ struct linux_dirent64 __user *, dirent, unsigned int, count)
{
struct fd f;
struct getdents_callback64 buf = {
@@ -380,13 +380,6 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
return error;
}
-
-SYSCALL_DEFINE3(getdents64, unsigned int, fd,
- struct linux_dirent64 __user *, dirent, unsigned int, count)
-{
- return ksys_getdents64(fd, dirent, count);
-}
-
#ifdef CONFIG_COMPAT
struct compat_old_linux_dirent {
compat_ulong_t d_ino;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 10843a6adb770d..a998651629c71b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1243,8 +1243,6 @@ ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count);
int ksys_chdir(const char __user *filename);
int ksys_fchmod(unsigned int fd, umode_t mode);
int ksys_fchown(unsigned int fd, uid_t user, gid_t group);
-int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
- unsigned int count);
int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count);
void ksys_sync(void);
--
2.27.0
Instead of using a spcial RAID_AUTORUN ioctl that only exists for
non-modular builds and is only called from the early init code, just
call the actual function directly.
Signed-off-by: Christoph Hellwig <[email protected]>
Acked-by: Song Liu <[email protected]>
---
drivers/md/md-autodetect.c | 10 ++--------
drivers/md/md.c | 14 +-------------
drivers/md/md.h | 3 +++
3 files changed, 6 insertions(+), 21 deletions(-)
diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
index fe806f7b9759a1..0eb746211ed53c 100644
--- a/drivers/md/md-autodetect.c
+++ b/drivers/md/md-autodetect.c
@@ -9,6 +9,7 @@
#include <linux/raid/detect.h>
#include <linux/raid/md_u.h>
#include <linux/raid/md_p.h>
+#include "md.h"
/*
* When md (and any require personalities) are compiled into the kernel
@@ -285,8 +286,6 @@ __setup("md=", md_setup);
static void __init autodetect_raid(void)
{
- int fd;
-
/*
* Since we don't want to detect and use half a raid array, we need to
* wait for the known devices to complete their probing
@@ -295,12 +294,7 @@ static void __init autodetect_raid(void)
printk(KERN_INFO "md: If you don't use raid, use raid=noautodetect\n");
wait_for_device_probe();
-
- fd = ksys_open("/dev/md0", 0, 0);
- if (fd >= 0) {
- ksys_ioctl(fd, RAID_AUTORUN, raid_autopart);
- ksys_close(fd);
- }
+ md_autostart_arrays(raid_autopart);
}
void __init md_run_setup(void)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f567f536b529bd..6e9a48da474848 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -68,10 +68,6 @@
#include "md-bitmap.h"
#include "md-cluster.h"
-#ifndef MODULE
-static void autostart_arrays(int part);
-#endif
-
/* pers_list is a list of registered personalities protected
* by pers_lock.
* pers_lock does extra service to protect accesses to
@@ -7421,7 +7417,6 @@ static inline bool md_ioctl_valid(unsigned int cmd)
case GET_DISK_INFO:
case HOT_ADD_DISK:
case HOT_REMOVE_DISK:
- case RAID_AUTORUN:
case RAID_VERSION:
case RESTART_ARRAY_RW:
case RUN_ARRAY:
@@ -7467,13 +7462,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
case RAID_VERSION:
err = get_version(argp);
goto out;
-
-#ifndef MODULE
- case RAID_AUTORUN:
- err = 0;
- autostart_arrays(arg);
- goto out;
-#endif
default:;
}
@@ -9721,7 +9709,7 @@ void md_autodetect_dev(dev_t dev)
}
}
-static void autostart_arrays(int part)
+void md_autostart_arrays(int part)
{
struct md_rdev *rdev;
struct detected_devices_node *node_detected_dev;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 612814d07d35ab..37315a3f28e97d 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -800,4 +800,7 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio
!bio->bi_disk->queue->limits.max_write_zeroes_sectors)
mddev->queue->limits.max_write_zeroes_sectors = 0;
}
+
+void md_autostart_arrays(int part);
+
#endif /* _MD_MD_H */
--
2.27.0
Remove the special handling for multiple floppies in the initrd code.
No one should be using floppies for booting these days. (famous last
words..)
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/arm/kernel/atags_parse.c | 2 -
arch/sh/kernel/setup.c | 2 -
arch/sparc/kernel/setup_32.c | 2 -
arch/sparc/kernel/setup_64.c | 2 -
arch/x86/kernel/setup.c | 2 -
include/linux/initrd.h | 6 ---
init/do_mounts.c | 69 ++++-------------------------------
init/do_mounts.h | 1 -
init/do_mounts_rd.c | 20 +++-------
9 files changed, 12 insertions(+), 94 deletions(-)
diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
index ce02f92f4ab262..6c12d9fe694e3e 100644
--- a/arch/arm/kernel/atags_parse.c
+++ b/arch/arm/kernel/atags_parse.c
@@ -91,8 +91,6 @@ __tagtable(ATAG_VIDEOTEXT, parse_tag_videotext);
static int __init parse_tag_ramdisk(const struct tag *tag)
{
rd_image_start = tag->u.ramdisk.start;
- rd_doload = (tag->u.ramdisk.flags & 1) == 0;
- rd_prompt = (tag->u.ramdisk.flags & 2) == 0;
if (tag->u.ramdisk.size)
rd_size = tag->u.ramdisk.size;
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 67f5a3b44c2eff..4144be650d4106 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -290,8 +290,6 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_BLK_DEV_RAM
rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
#endif
if (!MOUNT_ROOT_RDONLY)
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index 6d07b85b9e2470..eea43a1aef1b9a 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -353,8 +353,6 @@ void __init setup_arch(char **cmdline_p)
ROOT_DEV = old_decode_dev(root_dev);
#ifdef CONFIG_BLK_DEV_RAM
rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0);
#endif
prom_setsync(prom_sync_me);
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index f765fda871eb61..d87244197d5cbb 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -659,8 +659,6 @@ void __init setup_arch(char **cmdline_p)
ROOT_DEV = old_decode_dev(root_dev);
#ifdef CONFIG_BLK_DEV_RAM
rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0);
#endif
task_thread_info(&init_task)->kregs = &fake_swapper_regs;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a3767e74c758c0..b9a68d8e06d8d1 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -870,8 +870,6 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_BLK_DEV_RAM
rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
#endif
#ifdef CONFIG_EFI
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
diff --git a/include/linux/initrd.h b/include/linux/initrd.h
index aa591435572868..8db6f8c8030b68 100644
--- a/include/linux/initrd.h
+++ b/include/linux/initrd.h
@@ -2,12 +2,6 @@
#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */
-/* 1 = load ramdisk, 0 = don't load */
-extern int rd_doload;
-
-/* 1 = prompt for ramdisk, 0 = don't prompt */
-extern int rd_prompt;
-
/* starting block # of image */
extern int rd_image_start;
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 1a4dfa17fb2899..4f4ceb35805503 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -28,8 +28,6 @@
#include "do_mounts.h"
-int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */
-
int root_mountflags = MS_RDONLY | MS_SILENT;
static char * __initdata root_device_name;
static char __initdata saved_root_name[64];
@@ -39,7 +37,7 @@ dev_t ROOT_DEV;
static int __init load_ramdisk(char *str)
{
- rd_doload = simple_strtol(str,NULL,0) & 3;
+ pr_warn("ignoring the depreated load_ramdisk= option\n");
return 1;
}
__setup("load_ramdisk=", load_ramdisk);
@@ -553,66 +551,20 @@ static int __init mount_cifs_root(void)
}
#endif
-#if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD)
-void __init change_floppy(char *fmt, ...)
-{
- struct termios termios;
- char buf[80];
- char c;
- int fd;
- va_list args;
- va_start(args, fmt);
- vsprintf(buf, fmt, args);
- va_end(args);
- fd = ksys_open("/dev/root", O_RDWR | O_NDELAY, 0);
- if (fd >= 0) {
- ksys_ioctl(fd, FDEJECT, 0);
- ksys_close(fd);
- }
- printk(KERN_NOTICE "VFS: Insert %s and press ENTER\n", buf);
- fd = ksys_open("/dev/console", O_RDWR, 0);
- if (fd >= 0) {
- ksys_ioctl(fd, TCGETS, (long)&termios);
- termios.c_lflag &= ~ICANON;
- ksys_ioctl(fd, TCSETSF, (long)&termios);
- ksys_read(fd, &c, 1);
- termios.c_lflag |= ICANON;
- ksys_ioctl(fd, TCSETSF, (long)&termios);
- ksys_close(fd);
- }
-}
-#endif
-
void __init mount_root(void)
{
#ifdef CONFIG_ROOT_NFS
if (ROOT_DEV == Root_NFS) {
- if (mount_nfs_root())
- return;
-
- printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
- ROOT_DEV = Root_FD0;
+ if (!mount_nfs_root())
+ printk(KERN_ERR "VFS: Unable to mount root fs via NFS.\n");
+ return;
}
#endif
#ifdef CONFIG_CIFS_ROOT
if (ROOT_DEV == Root_CIFS) {
- if (mount_cifs_root())
- return;
-
- printk(KERN_ERR "VFS: Unable to mount root fs via SMB, trying floppy.\n");
- ROOT_DEV = Root_FD0;
- }
-#endif
-#ifdef CONFIG_BLK_DEV_FD
- if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
- /* rd_doload is 2 for a dual initrd/ramload setup */
- if (rd_doload==2) {
- if (rd_load_disk(1)) {
- ROOT_DEV = Root_RAM1;
- root_device_name = NULL;
- }
- } else
- change_floppy("root floppy");
+ if (!mount_cifs_root())
+ printk(KERN_ERR "VFS: Unable to mount root fs via SMB.\n");
+ return;
}
#endif
#ifdef CONFIG_BLOCK
@@ -631,8 +583,6 @@ void __init mount_root(void)
*/
void __init prepare_namespace(void)
{
- int is_floppy;
-
if (root_delay) {
printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
root_delay);
@@ -675,11 +625,6 @@ void __init prepare_namespace(void)
async_synchronize_full();
}
- is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR;
-
- if (is_floppy && rd_doload && rd_load_disk(0))
- ROOT_DEV = Root_RAM0;
-
mount_root();
out:
devtmpfs_mount();
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 50d6c8941e15a1..c855b3f0e06d19 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -9,7 +9,6 @@
#include <linux/major.h>
#include <linux/root_dev.h>
-void change_floppy(char *fmt, ...);
void mount_block_root(char *name, int flags);
void mount_root(void);
extern int root_mountflags;
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 32fb049d18f9b4..27b1bccf6f12a8 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -15,11 +15,9 @@
#include <linux/decompress/generic.h>
-int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */
-
static int __init prompt_ramdisk(char *str)
{
- rd_prompt = simple_strtol(str,NULL,0) & 1;
+ pr_warn("ignoring the depreated prompt_ramdisk= option\n");
return 1;
}
__setup("prompt_ramdisk=", prompt_ramdisk);
@@ -178,7 +176,7 @@ int __init rd_load_image(char *from)
int res = 0;
int in_fd, out_fd;
unsigned long rd_blocks, devblocks;
- int nblocks, i, disk;
+ int nblocks, i;
char *buf = NULL;
unsigned short rotate = 0;
decompress_fn decompressor = NULL;
@@ -243,21 +241,15 @@ int __init rd_load_image(char *from)
printk(KERN_NOTICE "RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ",
nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : "");
- for (i = 0, disk = 1; i < nblocks; i++) {
+ for (i = 0; i < nblocks; i++) {
if (i && (i % devblocks == 0)) {
- pr_cont("done disk #%d.\n", disk++);
+ pr_cont("done disk #1.\n");
rotate = 0;
if (ksys_close(in_fd)) {
printk("Error closing the disk.\n");
goto noclose_input;
}
- change_floppy("disk #%d", disk);
- in_fd = ksys_open(from, O_RDONLY, 0);
- if (in_fd < 0) {
- printk("Error opening disk.\n");
- goto noclose_input;
- }
- printk("Loading disk #%d... ", disk);
+ break;
}
ksys_read(in_fd, buf, BLOCK_SIZE);
ksys_write(out_fd, buf, BLOCK_SIZE);
@@ -284,8 +276,6 @@ int __init rd_load_image(char *from)
int __init rd_load_disk(int n)
{
- if (rd_prompt)
- change_floppy("root floppy disk to be loaded into RAM disk");
create_dev("/dev/root", ROOT_DEV);
create_dev("/dev/ram", MKDEV(RAMDISK_MAJOR, n));
return rd_load_image("/dev/root");
--
2.27.0
The classic initial ramdisk has been replaced by the much more
flexible and efficient initramfs a long time. Warn about it being
removed soon.
Signed-off-by: Christoph Hellwig <[email protected]>
---
init/do_mounts_initrd.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 57ad5b2da8f5f5..e08669187d63be 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -75,6 +75,8 @@ static void __init handle_initrd(void)
extern char *envp_init[];
int error;
+ pr_warn("using deprecated initrd support, will be removed in 2021.\n");
+
real_root_dev = new_encode_dev(ROOT_DEV);
create_dev("/dev/root.old", Root_RAM0);
/* mount initrd on rootfs' /root */
--
2.27.0
Move the loop over the possible arrays into the caller to remove a level
of indentation for the whole function.
Signed-off-by: Christoph Hellwig <[email protected]>
Acked-by: Song Liu <[email protected]>
---
drivers/md/md-autodetect.c | 203 ++++++++++++++++++-------------------
1 file changed, 101 insertions(+), 102 deletions(-)
diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
index 6bc9b734eee6ff..a43a8f1580584c 100644
--- a/drivers/md/md-autodetect.c
+++ b/drivers/md/md-autodetect.c
@@ -27,7 +27,7 @@ static int __initdata raid_noautodetect=1;
#endif
static int __initdata raid_autopart;
-static struct {
+static struct md_setup_args {
int minor;
int partitioned;
int level;
@@ -126,122 +126,117 @@ static inline int create_dev(char *name, dev_t dev)
return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
}
-static void __init md_setup_drive(void)
+static void __init md_setup_drive(struct md_setup_args *args)
{
- int minor, i, ent, partitioned;
+ int minor, i, partitioned;
dev_t dev;
dev_t devices[MD_SB_DISKS+1];
+ int fd;
+ int err = 0;
+ char *devname;
+ mdu_disk_info_t dinfo;
+ char name[16];
- for (ent = 0; ent < md_setup_ents ; ent++) {
- int fd;
- int err = 0;
- char *devname;
- mdu_disk_info_t dinfo;
- char name[16];
+ minor = args->minor;
+ partitioned = args->partitioned;
+ devname = args->device_names;
- minor = md_setup_args[ent].minor;
- partitioned = md_setup_args[ent].partitioned;
- devname = md_setup_args[ent].device_names;
+ sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor);
+ if (partitioned)
+ dev = MKDEV(mdp_major, minor << MdpMinorShift);
+ else
+ dev = MKDEV(MD_MAJOR, minor);
+ create_dev(name, dev);
+ for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) {
+ struct kstat stat;
+ char *p;
+ char comp_name[64];
- sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor);
- if (partitioned)
- dev = MKDEV(mdp_major, minor << MdpMinorShift);
- else
- dev = MKDEV(MD_MAJOR, minor);
- create_dev(name, dev);
- for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) {
- struct kstat stat;
- char *p;
- char comp_name[64];
+ p = strchr(devname, ',');
+ if (p)
+ *p++ = 0;
- p = strchr(devname, ',');
- if (p)
- *p++ = 0;
+ dev = name_to_dev_t(devname);
+ if (strncmp(devname, "/dev/", 5) == 0)
+ devname += 5;
+ snprintf(comp_name, 63, "/dev/%s", devname);
+ if (vfs_stat(comp_name, &stat) == 0 && S_ISBLK(stat.mode))
+ dev = new_decode_dev(stat.rdev);
+ if (!dev) {
+ printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
+ break;
+ }
- dev = name_to_dev_t(devname);
- if (strncmp(devname, "/dev/", 5) == 0)
- devname += 5;
- snprintf(comp_name, 63, "/dev/%s", devname);
- if (vfs_stat(comp_name, &stat) == 0 &&
- S_ISBLK(stat.mode))
- dev = new_decode_dev(stat.rdev);
- if (!dev) {
- printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
- break;
- }
+ devices[i] = dev;
+ devname = p;
+ }
+ devices[i] = 0;
- devices[i] = dev;
+ if (!i)
+ return;
- devname = p;
- }
- devices[i] = 0;
+ printk(KERN_INFO "md: Loading md%s%d: %s\n",
+ partitioned ? "_d" : "", minor,
+ args->device_names);
- if (!i)
- continue;
+ fd = ksys_open(name, 0, 0);
+ if (fd < 0) {
+ printk(KERN_ERR "md: open failed - cannot start "
+ "array %s\n", name);
+ return;
+ }
+ if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
+ printk(KERN_WARNING
+ "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
+ minor);
+ ksys_close(fd);
+ return;
+ }
- printk(KERN_INFO "md: Loading md%s%d: %s\n",
- partitioned ? "_d" : "", minor,
- md_setup_args[ent].device_names);
+ if (args->level != LEVEL_NONE) {
+ /* non-persistent */
+ mdu_array_info_t ainfo;
+ ainfo.level = args->level;
+ ainfo.size = 0;
+ ainfo.nr_disks =0;
+ ainfo.raid_disks =0;
+ while (devices[ainfo.raid_disks])
+ ainfo.raid_disks++;
+ ainfo.md_minor =minor;
+ ainfo.not_persistent = 1;
- fd = ksys_open(name, 0, 0);
- if (fd < 0) {
- printk(KERN_ERR "md: open failed - cannot start "
- "array %s\n", name);
- continue;
- }
- if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
- printk(KERN_WARNING
- "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
- minor);
- ksys_close(fd);
- continue;
+ ainfo.state = (1 << MD_SB_CLEAN);
+ ainfo.layout = 0;
+ ainfo.chunk_size = args->chunk;
+ err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo);
+ for (i = 0; !err && i <= MD_SB_DISKS; i++) {
+ dev = devices[i];
+ if (!dev)
+ break;
+ dinfo.number = i;
+ dinfo.raid_disk = i;
+ dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
+ dinfo.major = MAJOR(dev);
+ dinfo.minor = MINOR(dev);
+ err = ksys_ioctl(fd, ADD_NEW_DISK,
+ (long)&dinfo);
}
-
- if (md_setup_args[ent].level != LEVEL_NONE) {
- /* non-persistent */
- mdu_array_info_t ainfo;
- ainfo.level = md_setup_args[ent].level;
- ainfo.size = 0;
- ainfo.nr_disks =0;
- ainfo.raid_disks =0;
- while (devices[ainfo.raid_disks])
- ainfo.raid_disks++;
- ainfo.md_minor =minor;
- ainfo.not_persistent = 1;
-
- ainfo.state = (1 << MD_SB_CLEAN);
- ainfo.layout = 0;
- ainfo.chunk_size = md_setup_args[ent].chunk;
- err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo);
- for (i = 0; !err && i <= MD_SB_DISKS; i++) {
- dev = devices[i];
- if (!dev)
- break;
- dinfo.number = i;
- dinfo.raid_disk = i;
- dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
- dinfo.major = MAJOR(dev);
- dinfo.minor = MINOR(dev);
- err = ksys_ioctl(fd, ADD_NEW_DISK,
- (long)&dinfo);
- }
- } else {
- /* persistent */
- for (i = 0; i <= MD_SB_DISKS; i++) {
- dev = devices[i];
- if (!dev)
- break;
- dinfo.major = MAJOR(dev);
- dinfo.minor = MINOR(dev);
- ksys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
- }
+ } else {
+ /* persistent */
+ for (i = 0; i <= MD_SB_DISKS; i++) {
+ dev = devices[i];
+ if (!dev)
+ break;
+ dinfo.major = MAJOR(dev);
+ dinfo.minor = MINOR(dev);
+ ksys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
}
- if (!err)
- err = ksys_ioctl(fd, RUN_ARRAY, 0);
- if (err)
- printk(KERN_WARNING "md: starting md%d failed\n", minor);
- ksys_close(fd);
}
+ if (!err)
+ err = ksys_ioctl(fd, RUN_ARRAY, 0);
+ if (err)
+ printk(KERN_WARNING "md: starting md%d failed\n", minor);
+ ksys_close(fd);
}
static int __init raid_setup(char *str)
@@ -289,11 +284,15 @@ static void __init autodetect_raid(void)
void __init md_run_setup(void)
{
+ int ent;
+
create_dev("/dev/md0", MKDEV(MD_MAJOR, 0));
if (raid_noautodetect)
printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=autodetect will force)\n");
else
autodetect_raid();
- md_setup_drive();
+
+ for (ent = 0; ent < md_setup_ents; ent++)
+ md_setup_drive(&md_setup_args[ent]);
}
--
2.27.0
md_setup_drive knows it works with md devices, so it is rather pointless
to open a file descriptor and issue ioctls. Just call directly into the
relevant low-level md routines after getting a handle to the device using
blkdev_get_by_dev instead.
Signed-off-by: Christoph Hellwig <[email protected]>
Acked-by: Song Liu <[email protected]>
---
drivers/md/md-autodetect.c | 127 ++++++++++++++++---------------------
drivers/md/md.c | 20 +++---
drivers/md/md.h | 6 ++
3 files changed, 71 insertions(+), 82 deletions(-)
diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
index a43a8f1580584c..5b24b5616d3acc 100644
--- a/drivers/md/md-autodetect.c
+++ b/drivers/md/md-autodetect.c
@@ -2,7 +2,6 @@
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/init.h>
-#include <linux/syscalls.h>
#include <linux/mount.h>
#include <linux/major.h>
#include <linux/delay.h>
@@ -120,37 +119,29 @@ static int __init md_setup(char *str)
return 1;
}
-static inline int create_dev(char *name, dev_t dev)
-{
- ksys_unlink(name);
- return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
-}
-
static void __init md_setup_drive(struct md_setup_args *args)
{
- int minor, i, partitioned;
- dev_t dev;
- dev_t devices[MD_SB_DISKS+1];
- int fd;
- int err = 0;
- char *devname;
- mdu_disk_info_t dinfo;
+ char *devname = args->device_names;
+ dev_t devices[MD_SB_DISKS + 1], mdev;
+ struct mdu_array_info_s ainfo = { };
+ struct block_device *bdev;
+ struct mddev *mddev;
+ int err = 0, i;
char name[16];
- minor = args->minor;
- partitioned = args->partitioned;
- devname = args->device_names;
+ if (args->partitioned) {
+ mdev = MKDEV(mdp_major, args->minor << MdpMinorShift);
+ sprintf(name, "md_d%d", args->minor);
+ } else {
+ mdev = MKDEV(MD_MAJOR, args->minor);
+ sprintf(name, "md%d", args->minor);
+ }
- sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor);
- if (partitioned)
- dev = MKDEV(mdp_major, minor << MdpMinorShift);
- else
- dev = MKDEV(MD_MAJOR, minor);
- create_dev(name, dev);
for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) {
struct kstat stat;
char *p;
char comp_name[64];
+ dev_t dev;
p = strchr(devname, ',');
if (p)
@@ -163,7 +154,7 @@ static void __init md_setup_drive(struct md_setup_args *args)
if (vfs_stat(comp_name, &stat) == 0 && S_ISBLK(stat.mode))
dev = new_decode_dev(stat.rdev);
if (!dev) {
- printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
+ pr_warn("md: Unknown device name: %s\n", devname);
break;
}
@@ -175,68 +166,64 @@ static void __init md_setup_drive(struct md_setup_args *args)
if (!i)
return;
- printk(KERN_INFO "md: Loading md%s%d: %s\n",
- partitioned ? "_d" : "", minor,
- args->device_names);
+ pr_info("md: Loading %s: %s\n", name, args->device_names);
- fd = ksys_open(name, 0, 0);
- if (fd < 0) {
- printk(KERN_ERR "md: open failed - cannot start "
- "array %s\n", name);
+ bdev = blkdev_get_by_dev(mdev, FMODE_READ, NULL);
+ if (IS_ERR(bdev)) {
+ pr_err("md: open failed - cannot start array %s\n", name);
return;
}
- if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
- printk(KERN_WARNING
- "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
- minor);
- ksys_close(fd);
- return;
+ mddev = bdev->bd_disk->private_data;
+
+ err = mddev_lock(mddev);
+ if (err) {
+ pr_err("md: failed to lock array %s\n", name);
+ goto out_blkdev_put;
+ }
+
+ if (!list_empty(&mddev->disks) || mddev->raid_disks) {
+ pr_warn("md: Ignoring %s, already autodetected. (Use raid=noautodetect)\n",
+ name);
+ goto out_unlock;
}
if (args->level != LEVEL_NONE) {
/* non-persistent */
- mdu_array_info_t ainfo;
ainfo.level = args->level;
- ainfo.size = 0;
- ainfo.nr_disks =0;
- ainfo.raid_disks =0;
- while (devices[ainfo.raid_disks])
- ainfo.raid_disks++;
- ainfo.md_minor =minor;
+ ainfo.md_minor = args->minor;
ainfo.not_persistent = 1;
-
ainfo.state = (1 << MD_SB_CLEAN);
- ainfo.layout = 0;
ainfo.chunk_size = args->chunk;
- err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo);
- for (i = 0; !err && i <= MD_SB_DISKS; i++) {
- dev = devices[i];
- if (!dev)
- break;
+ while (devices[ainfo.raid_disks])
+ ainfo.raid_disks++;
+ }
+
+ err = md_set_array_info(mddev, &ainfo);
+
+ for (i = 0; i <= MD_SB_DISKS && devices[i]; i++) {
+ struct mdu_disk_info_s dinfo = {
+ .major = MAJOR(devices[i]),
+ .minor = MINOR(devices[i]),
+ };
+
+ if (args->level != LEVEL_NONE) {
dinfo.number = i;
dinfo.raid_disk = i;
- dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
- dinfo.major = MAJOR(dev);
- dinfo.minor = MINOR(dev);
- err = ksys_ioctl(fd, ADD_NEW_DISK,
- (long)&dinfo);
- }
- } else {
- /* persistent */
- for (i = 0; i <= MD_SB_DISKS; i++) {
- dev = devices[i];
- if (!dev)
- break;
- dinfo.major = MAJOR(dev);
- dinfo.minor = MINOR(dev);
- ksys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
+ dinfo.state =
+ (1 << MD_DISK_ACTIVE) | (1 << MD_DISK_SYNC);
}
+
+ md_add_new_disk(mddev, &dinfo);
}
+
if (!err)
- err = ksys_ioctl(fd, RUN_ARRAY, 0);
+ err = do_md_run(mddev);
if (err)
- printk(KERN_WARNING "md: starting md%d failed\n", minor);
- ksys_close(fd);
+ pr_warn("md: starting %s failed\n", name);
+out_unlock:
+ mddev_unlock(mddev);
+out_blkdev_put:
+ blkdev_put(bdev, FMODE_READ);
}
static int __init raid_setup(char *str)
@@ -286,8 +273,6 @@ void __init md_run_setup(void)
{
int ent;
- create_dev("/dev/md0", MKDEV(MD_MAJOR, 0));
-
if (raid_noautodetect)
printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=autodetect will force)\n");
else
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6e9a48da474848..9960cfeb59a50c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4368,7 +4368,6 @@ array_state_show(struct mddev *mddev, char *page)
static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
-static int do_md_run(struct mddev *mddev);
static int restart_array(struct mddev *mddev);
static ssize_t
@@ -6015,7 +6014,7 @@ int md_run(struct mddev *mddev)
}
EXPORT_SYMBOL_GPL(md_run);
-static int do_md_run(struct mddev *mddev)
+int do_md_run(struct mddev *mddev)
{
int err;
@@ -6651,7 +6650,7 @@ static int get_disk_info(struct mddev *mddev, void __user * arg)
return 0;
}
-static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
+int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
{
char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
struct md_rdev *rdev;
@@ -6697,7 +6696,7 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
}
/*
- * add_new_disk can be used once the array is assembled
+ * md_add_new_disk can be used once the array is assembled
* to add "hot spares". They must already have a superblock
* written
*/
@@ -6810,7 +6809,7 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
return err;
}
- /* otherwise, add_new_disk is only allowed
+ /* otherwise, md_add_new_disk is only allowed
* for major_version==0 superblocks
*/
if (mddev->major_version != 0) {
@@ -7055,7 +7054,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
}
/*
- * set_array_info is used two different ways
+ * md_set_array_info is used two different ways
* The original usage is when creating a new array.
* In this usage, raid_disks is > 0 and it together with
* level, size, not_persistent,layout,chunksize determine the
@@ -7067,9 +7066,8 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
* The minor and patch _version numbers are also kept incase the
* super_block handler wishes to interpret them.
*/
-static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
+int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info)
{
-
if (info->raid_disks == 0) {
/* just setting version number for superblock loading */
if (info->major_version < 0 ||
@@ -7560,7 +7558,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
err = -EBUSY;
goto unlock;
}
- err = set_array_info(mddev, &info);
+ err = md_set_array_info(mddev, &info);
if (err) {
pr_warn("md: couldn't set array info. %d\n", err);
goto unlock;
@@ -7614,7 +7612,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
/* Need to clear read-only for this */
break;
else
- err = add_new_disk(mddev, &info);
+ err = md_add_new_disk(mddev, &info);
goto unlock;
}
break;
@@ -7682,7 +7680,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
if (copy_from_user(&info, argp, sizeof(info)))
err = -EFAULT;
else
- err = add_new_disk(mddev, &info);
+ err = md_add_new_disk(mddev, &info);
goto unlock;
}
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 6f8fff77ce10a5..7ee81aa2eac862 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -801,7 +801,13 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio
mddev->queue->limits.max_write_zeroes_sectors = 0;
}
+struct mdu_array_info_s;
+struct mdu_disk_info_s;
+
extern int mdp_major;
void md_autostart_arrays(int part);
+int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
+int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
+int do_md_run(struct mddev *mddev);
#endif /* _MD_MD_H */
--
2.27.0
devfs is long gone, and autoscan works just fine without this these days.
Signed-off-by: Christoph Hellwig <[email protected]>
Acked-by: Song Liu <[email protected]>
---
drivers/md/md-autodetect.c | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
index 0eb746211ed53c..6bc9b734eee6ff 100644
--- a/drivers/md/md-autodetect.c
+++ b/drivers/md/md-autodetect.c
@@ -240,16 +240,6 @@ static void __init md_setup_drive(void)
err = ksys_ioctl(fd, RUN_ARRAY, 0);
if (err)
printk(KERN_WARNING "md: starting md%d failed\n", minor);
- else {
- /* reread the partition table.
- * I (neilb) and not sure why this is needed, but I cannot
- * boot a kernel with devfs compiled in from partitioned md
- * array without it
- */
- ksys_close(fd);
- fd = ksys_open(name, 0, 0);
- ksys_ioctl(fd, BLKRRPART, 0);
- }
ksys_close(fd);
}
}
--
2.27.0
The only caller of the bstat function becomes cleaner and simpler when
open coding the function.
Signed-off-by: Christoph Hellwig <[email protected]>
Acked-by: Song Liu <[email protected]>
---
init/do_mounts.h | 10 ----------
init/do_mounts_md.c | 8 ++++----
2 files changed, 4 insertions(+), 14 deletions(-)
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 0bb0806de4ce2c..7513d1c14d13fe 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -20,16 +20,6 @@ static inline int create_dev(char *name, dev_t dev)
return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
}
-static inline u32 bstat(char *name)
-{
- struct kstat stat;
- if (vfs_stat(name, &stat) != 0)
- return 0;
- if (!S_ISBLK(stat.mode))
- return 0;
- return stat.rdev;
-}
-
#ifdef CONFIG_BLK_DEV_RAM
int __init rd_load_disk(int n);
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
index b84031528dd446..359363e85ccd0b 100644
--- a/init/do_mounts_md.c
+++ b/init/do_mounts_md.c
@@ -138,9 +138,9 @@ static void __init md_setup_drive(void)
dev = MKDEV(MD_MAJOR, minor);
create_dev(name, dev);
for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) {
+ struct kstat stat;
char *p;
char comp_name[64];
- u32 rdev;
p = strchr(devname, ',');
if (p)
@@ -150,9 +150,9 @@ static void __init md_setup_drive(void)
if (strncmp(devname, "/dev/", 5) == 0)
devname += 5;
snprintf(comp_name, 63, "/dev/%s", devname);
- rdev = bstat(comp_name);
- if (rdev)
- dev = new_decode_dev(rdev);
+ if (vfs_stat(comp_name, &stat) == 0 &&
+ S_ISBLK(stat.mode))
+ dev = new_decode_dev(stat.rdev);
if (!dev) {
printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
break;
--
2.27.0
Add a helper for struct file based chmode operations. To be used by
the initramfs code soon.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/open.c | 9 +++++++--
include/linux/fs.h | 1 +
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/fs/open.c b/fs/open.c
index 103c66309bee67..75166f071d280a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -602,14 +602,19 @@ static int chmod_common(const struct path *path, umode_t mode)
return error;
}
+int vfs_fchmod(struct file *file, umode_t mode)
+{
+ audit_file(file);
+ return chmod_common(&file->f_path, mode);
+}
+
int ksys_fchmod(unsigned int fd, umode_t mode)
{
struct fd f = fdget(fd);
int err = -EBADF;
if (f.file) {
- audit_file(f.file);
- err = chmod_common(&f.file->f_path, mode);
+ err = vfs_fchmod(f.file, mode);
fdput(f);
}
return err;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0ddd64ca0b45c0..635086726f2053 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1745,6 +1745,7 @@ int vfs_mkobj(struct dentry *, umode_t,
void *);
int vfs_fchown(struct file *file, uid_t user, gid_t group);
+int vfs_fchmod(struct file *file, umode_t mode);
extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
--
2.27.0
Just like the NFS and CIFS root code this better lives with the
driver it is tightly integrated with.
Signed-off-by: Christoph Hellwig <[email protected]>
Acked-by: Song Liu <[email protected]>
---
drivers/md/Makefile | 3 +++
init/do_mounts_md.c => drivers/md/md-autodetect.c | 15 +++++++++++++--
include/linux/raid/detect.h | 8 ++++++++
init/Makefile | 1 -
init/do_mounts.c | 1 +
init/do_mounts.h | 10 ----------
6 files changed, 25 insertions(+), 13 deletions(-)
rename init/do_mounts_md.c => drivers/md/md-autodetect.c (96%)
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 31840f95cd408b..6d3e234dc46a5d 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -43,6 +43,9 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o
obj-$(CONFIG_MD_CLUSTER) += md-cluster.o
obj-$(CONFIG_BCACHE) += bcache/
obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
+ifeq ($(CONFIG_BLK_DEV_MD),y)
+obj-y += md-autodetect.o
+endif
obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o
obj-$(CONFIG_DM_UNSTRIPED) += dm-unstripe.o
diff --git a/init/do_mounts_md.c b/drivers/md/md-autodetect.c
similarity index 96%
rename from init/do_mounts_md.c
rename to drivers/md/md-autodetect.c
index 359363e85ccd0b..fe806f7b9759a1 100644
--- a/init/do_mounts_md.c
+++ b/drivers/md/md-autodetect.c
@@ -1,10 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/syscalls.h>
+#include <linux/mount.h>
+#include <linux/major.h>
#include <linux/delay.h>
+#include <linux/raid/detect.h>
#include <linux/raid/md_u.h>
#include <linux/raid/md_p.h>
-#include "do_mounts.h"
-
/*
* When md (and any require personalities) are compiled into the kernel
* (not a module), arrays can be assembles are boot time using with AUTODETECT
@@ -114,6 +119,12 @@ static int __init md_setup(char *str)
return 1;
}
+static inline int create_dev(char *name, dev_t dev)
+{
+ ksys_unlink(name);
+ return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
+}
+
static void __init md_setup_drive(void)
{
int minor, i, ent, partitioned;
diff --git a/include/linux/raid/detect.h b/include/linux/raid/detect.h
index 37dd3f40cd316e..1f029a71c3ef05 100644
--- a/include/linux/raid/detect.h
+++ b/include/linux/raid/detect.h
@@ -1,3 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
void md_autodetect_dev(dev_t dev);
+
+#ifdef CONFIG_BLK_DEV_MD
+void md_run_setup(void);
+#else
+static inline void md_run_setup(void)
+{
+}
+#endif
diff --git a/init/Makefile b/init/Makefile
index 57499b1ff4714d..6bc37f64b3617c 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -18,7 +18,6 @@ obj-y += init_task.o
mounts-y := do_mounts.o
mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o
mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o
-mounts-$(CONFIG_BLK_DEV_MD) += do_mounts_md.o
# dependencies on generated files need to be listed explicitly
$(obj)/version.o: include/generated/compile.h
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 29d326b6c29d2d..1a4dfa17fb2899 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -23,6 +23,7 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
#include <linux/nfs_mount.h>
+#include <linux/raid/detect.h>
#include <uapi/linux/mount.h>
#include "do_mounts.h"
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 7513d1c14d13fe..50d6c8941e15a1 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -41,13 +41,3 @@ bool __init initrd_load(void);
static inline bool initrd_load(void) { return false; }
#endif
-
-#ifdef CONFIG_BLK_DEV_MD
-
-void md_run_setup(void);
-
-#else
-
-static inline void md_run_setup(void) {}
-
-#endif
--
2.27.0
There is no good reason to mess with file descriptors from in-kernel
code, switch the initramfs unpacking to struct file based write
instead.
Signed-off-by: Christoph Hellwig <[email protected]>
---
init/initramfs.c | 42 +++++++++++++++++++++---------------------
1 file changed, 21 insertions(+), 21 deletions(-)
diff --git a/init/initramfs.c b/init/initramfs.c
index d42ec8329cd840..c335920e5ecc2d 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -13,13 +13,13 @@
#include <linux/memblock.h>
#include <linux/namei.h>
-static ssize_t __init xwrite(int fd, const char *p, size_t count)
+static ssize_t __init xwrite(struct file *file, const char *p, size_t count)
{
ssize_t out = 0;
/* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */
while (count) {
- ssize_t rv = ksys_write(fd, p, count);
+ ssize_t rv = kernel_write(file, p, count, &file->f_pos);
if (rv < 0) {
if (rv == -EINTR || rv == -EAGAIN)
@@ -317,7 +317,7 @@ static int __init maybe_link(void)
return 0;
}
-static __initdata int wfd;
+static __initdata struct file *wfile;
static int __init do_name(void)
{
@@ -334,16 +334,16 @@ static int __init do_name(void)
int openflags = O_WRONLY|O_CREAT;
if (ml != 1)
openflags |= O_TRUNC;
- wfd = ksys_open(collected, openflags, mode);
-
- if (wfd >= 0) {
- ksys_fchown(wfd, uid, gid);
- ksys_fchmod(wfd, mode);
- if (body_len)
- ksys_ftruncate(wfd, body_len);
- vcollected = kstrdup(collected, GFP_KERNEL);
- state = CopyFile;
- }
+ wfile = filp_open(collected, openflags, mode);
+ if (IS_ERR(wfile))
+ return 0;
+
+ vfs_fchown(wfile, uid, gid);
+ vfs_fchmod(wfile, mode);
+ if (body_len)
+ vfs_truncate(&wfile->f_path, body_len);
+ vcollected = kstrdup(collected, GFP_KERNEL);
+ state = CopyFile;
}
} else if (S_ISDIR(mode)) {
ksys_mkdir(collected, mode);
@@ -365,16 +365,16 @@ static int __init do_name(void)
static int __init do_copy(void)
{
if (byte_count >= body_len) {
- if (xwrite(wfd, victim, body_len) != body_len)
+ if (xwrite(wfile, victim, body_len) != body_len)
error("write error");
- ksys_close(wfd);
+ fput(wfile);
do_utime(vcollected, mtime);
kfree(vcollected);
eat(body_len);
state = SkipIt;
return 0;
} else {
- if (xwrite(wfd, victim, byte_count) != byte_count)
+ if (xwrite(wfile, victim, byte_count) != byte_count)
error("write error");
body_len -= byte_count;
eat(byte_count);
@@ -586,21 +586,21 @@ static void __init clean_rootfs(void)
static void __init populate_initrd_image(char *err)
{
ssize_t written;
- int fd;
+ struct file *file;
unpack_to_rootfs(__initramfs_start, __initramfs_size);
printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n",
err);
- fd = ksys_open("/initrd.image", O_WRONLY | O_CREAT, 0700);
- if (fd < 0)
+ file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700);
+ if (IS_ERR(file))
return;
- written = xwrite(fd, (char *)initrd_start, initrd_end - initrd_start);
+ written = xwrite(file, (char *)initrd_start, initrd_end - initrd_start);
if (written != initrd_end - initrd_start)
pr_err("/initrd.image: incomplete write (%zd != %ld)\n",
written, initrd_end - initrd_start);
- ksys_close(fd);
+ fput(file);
}
#endif /* CONFIG_BLK_DEV_RAM */
--
2.27.0
Signed-off-by: Christoph Hellwig <[email protected]>
---
init/do_mounts_initrd.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index e4f88e9e1c0839..57ad5b2da8f5f5 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -56,7 +56,7 @@ static int __init early_initrd(char *p)
}
early_param("initrd", early_initrd);
-static int init_linuxrc(struct subprocess_info *info, struct cred *new)
+static int __init init_linuxrc(struct subprocess_info *info, struct cred *new)
{
ksys_unshare(CLONE_FS | CLONE_FILES);
console_on_rootfs();
--
2.27.0
Just use d_genocide instead of iterating through the root directory with
cumbersome userspace-like APIs. This also ensures we actually remove files
that are not direct children of the root entry, which the old code failed
to do.
Fixes: df52092f3c97 ("fastboot: remove duplicate unpack_to_rootfs()")
Signed-off-by: Christoph Hellwig <[email protected]>
---
init/initramfs.c | 46 +++++-----------------------------------------
1 file changed, 5 insertions(+), 41 deletions(-)
diff --git a/init/initramfs.c b/init/initramfs.c
index d10404625c31f0..d42ec8329cd840 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -11,6 +11,7 @@
#include <linux/utime.h>
#include <linux/file.h>
#include <linux/memblock.h>
+#include <linux/namei.h>
static ssize_t __init xwrite(int fd, const char *p, size_t count)
{
@@ -572,51 +573,14 @@ static inline bool kexec_free_initrd(void)
#endif /* CONFIG_KEXEC_CORE */
#ifdef CONFIG_BLK_DEV_RAM
-#define BUF_SIZE 1024
static void __init clean_rootfs(void)
{
- int fd;
- void *buf;
- struct linux_dirent64 *dirp;
- int num;
+ struct path path;
- fd = ksys_open("/", O_RDONLY, 0);
- WARN_ON(fd < 0);
- if (fd < 0)
- return;
- buf = kzalloc(BUF_SIZE, GFP_KERNEL);
- WARN_ON(!buf);
- if (!buf) {
- ksys_close(fd);
+ if (kern_path("/", 0, &path))
return;
- }
-
- dirp = buf;
- num = ksys_getdents64(fd, dirp, BUF_SIZE);
- while (num > 0) {
- while (num > 0) {
- struct kstat st;
- int ret;
-
- ret = vfs_lstat(dirp->d_name, &st);
- WARN_ON_ONCE(ret);
- if (!ret) {
- if (S_ISDIR(st.mode))
- ksys_rmdir(dirp->d_name);
- else
- ksys_unlink(dirp->d_name);
- }
-
- num -= dirp->d_reclen;
- dirp = (void *)dirp + dirp->d_reclen;
- }
- dirp = buf;
- memset(buf, 0, BUF_SIZE);
- num = ksys_getdents64(fd, dirp, BUF_SIZE);
- }
-
- ksys_close(fd);
- kfree(buf);
+ d_genocide(path.dentry);
+ path_put(&path);
}
static void __init populate_initrd_image(char *err)
--
2.27.0
Fold it into the only remaining caller.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/file.c | 7 +------
include/linux/syscalls.h | 1 -
2 files changed, 1 insertion(+), 7 deletions(-)
diff --git a/fs/file.c b/fs/file.c
index abb8b7081d7a44..85b7993165dd2f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -985,7 +985,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
return ksys_dup3(oldfd, newfd, 0);
}
-int ksys_dup(unsigned int fildes)
+SYSCALL_DEFINE1(dup, unsigned int, fildes)
{
int ret = -EBADF;
struct file *file = fget_raw(fildes);
@@ -1000,11 +1000,6 @@ int ksys_dup(unsigned int fildes)
return ret;
}
-SYSCALL_DEFINE1(dup, unsigned int, fildes)
-{
- return ksys_dup(fildes);
-}
-
int f_dupfd(unsigned int from, struct file *file, unsigned flags)
{
int err;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 363baaadf8e19a..b6d90057476260 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1237,7 +1237,6 @@ asmlinkage long sys_ni_syscall(void);
*/
int ksys_umount(char __user *name, int flags);
-int ksys_dup(unsigned int fildes);
int ksys_chroot(const char __user *filename);
ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count);
int ksys_chdir(const char __user *filename);
--
2.27.0
If initrd support is not enable just print the warning directly instead
of hiding the fact that we just failed behind two stub functions.
Signed-off-by: Christoph Hellwig <[email protected]>
---
init/initramfs.c | 15 ++++-----------
1 file changed, 4 insertions(+), 11 deletions(-)
diff --git a/init/initramfs.c b/init/initramfs.c
index 7a38012e1af742..d10404625c31f0 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -618,13 +618,7 @@ static void __init clean_rootfs(void)
ksys_close(fd);
kfree(buf);
}
-#else
-static inline void clean_rootfs(void)
-{
-}
-#endif /* CONFIG_BLK_DEV_RAM */
-#ifdef CONFIG_BLK_DEV_RAM
static void __init populate_initrd_image(char *err)
{
ssize_t written;
@@ -644,11 +638,6 @@ static void __init populate_initrd_image(char *err)
written, initrd_end - initrd_start);
ksys_close(fd);
}
-#else
-static void __init populate_initrd_image(char *err)
-{
- printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
-}
#endif /* CONFIG_BLK_DEV_RAM */
static int __init populate_rootfs(void)
@@ -668,8 +657,12 @@ static int __init populate_rootfs(void)
err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start);
if (err) {
+#ifdef CONFIG_BLK_DEV_RAM
clean_rootfs();
populate_initrd_image(err);
+#else
+ printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
+#endif
}
done:
--
2.27.0
BLKFLSBUF used to be overloaded for the ramdisk driver to free the whole
ramdisk, which was completely different behavior compared to all other
drivers. But this magic overload got removed in commit ff26956875c2
("brd: remove support for BLKFLSBUF"), so this call is entirely
pointless now.
Signed-off-by: Christoph Hellwig <[email protected]>
---
init/do_mounts_initrd.c | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index d72beda824aa79..e4f88e9e1c0839 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -115,21 +115,12 @@ static void __init handle_initrd(void)
if (!error)
printk("okay\n");
else {
- int fd = ksys_open("/dev/root.old", O_RDWR, 0);
if (error == -ENOENT)
printk("/initrd does not exist. Ignored.\n");
else
printk("failed\n");
printk(KERN_NOTICE "Unmounting old root\n");
ksys_umount("/old", MNT_DETACH);
- printk(KERN_NOTICE "Trying to free ramdisk memory ... ");
- if (fd < 0) {
- error = fd;
- } else {
- error = ksys_ioctl(fd, BLKFLSBUF, 0);
- ksys_close(fd);
- }
- printk(!error ? "okay\n" : "failed\n");
}
}
--
2.27.0
There is no good reason to mess with file descriptors from in-kernel
code, switch the initrd loading to struct file based read and writes
instead.
Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/read_write.c | 2 +-
include/linux/syscalls.h | 1 -
init/do_mounts_rd.c | 82 ++++++++++++++++++++--------------------
3 files changed, 43 insertions(+), 42 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index 4fb797822567a6..5db58b8c78d0dd 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -301,7 +301,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
}
EXPORT_SYMBOL(vfs_llseek);
-off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence)
+static off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence)
{
off_t retval;
struct fd f = fdget_pos(fd);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b951a87da9877c..10843a6adb770d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1246,7 +1246,6 @@ int ksys_fchown(unsigned int fd, uid_t user, gid_t group);
int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
unsigned int count);
int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
-off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence);
ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count);
void ksys_sync(void);
int ksys_unshare(unsigned long unshare_flags);
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 27b1bccf6f12a8..7b64390c075043 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -31,7 +31,8 @@ static int __init ramdisk_start_setup(char *str)
}
__setup("ramdisk_start=", ramdisk_start_setup);
-static int __init crd_load(int in_fd, int out_fd, decompress_fn deco);
+static int __init crd_load(struct file *in_file, struct file *out_file,
+ decompress_fn deco);
/*
* This routine tries to find a RAM disk image to load, and returns the
@@ -53,7 +54,8 @@ static int __init crd_load(int in_fd, int out_fd, decompress_fn deco);
* lz4
*/
static int __init
-identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
+identify_ramdisk_image(struct file *file, int start_block,
+ decompress_fn *decompressor)
{
const int size = 512;
struct minix_super_block *minixsb;
@@ -64,6 +66,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
unsigned char *buf;
const char *compress_name;
unsigned long n;
+ loff_t pos;
buf = kmalloc(size, GFP_KERNEL);
if (!buf)
@@ -78,8 +81,8 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
/*
* Read block 0 to test for compressed kernel
*/
- ksys_lseek(fd, start_block * BLOCK_SIZE, 0);
- ksys_read(fd, buf, size);
+ pos = start_block * BLOCK_SIZE;
+ kernel_read(file, buf, size, &pos);
*decompressor = decompress_method(buf, size, &compress_name);
if (compress_name) {
@@ -124,8 +127,8 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
/*
* Read 512 bytes further to check if cramfs is padded
*/
- ksys_lseek(fd, start_block * BLOCK_SIZE + 0x200, 0);
- ksys_read(fd, buf, size);
+ pos = start_block * BLOCK_SIZE + 0x200;
+ kernel_read(file, buf, size, &pos);
if (cramfsb->magic == CRAMFS_MAGIC) {
printk(KERN_NOTICE
@@ -138,8 +141,8 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
/*
* Read block 1 to test for minix and ext2 superblock
*/
- ksys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0);
- ksys_read(fd, buf, size);
+ pos = (start_block + 1) * BLOCK_SIZE;
+ kernel_read(file, buf, size, &pos);
/* Try minix */
if (minixsb->s_magic == MINIX_SUPER_MAGIC ||
@@ -166,15 +169,23 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
start_block);
done:
- ksys_lseek(fd, start_block * BLOCK_SIZE, 0);
kfree(buf);
return nblocks;
}
+static unsigned long nr_blocks(struct file *file)
+{
+ struct inode *inode = file->f_mapping->host;
+
+ if (!S_ISBLK(inode->i_mode))
+ return 0;
+ return i_size_read(inode) >> 10;
+}
+
int __init rd_load_image(char *from)
{
int res = 0;
- int in_fd, out_fd;
+ struct file *in_file, *out_file;
unsigned long rd_blocks, devblocks;
int nblocks, i;
char *buf = NULL;
@@ -184,20 +195,20 @@ int __init rd_load_image(char *from)
char rotator[4] = { '|' , '/' , '-' , '\\' };
#endif
- out_fd = ksys_open("/dev/ram", O_RDWR, 0);
- if (out_fd < 0)
+ out_file = filp_open("/dev/ram", O_RDWR, 0);
+ if (IS_ERR(out_file))
goto out;
- in_fd = ksys_open(from, O_RDONLY, 0);
- if (in_fd < 0)
+ in_file = filp_open(from, O_RDONLY, 0);
+ if (IS_ERR(in_file))
goto noclose_input;
- nblocks = identify_ramdisk_image(in_fd, rd_image_start, &decompressor);
+ nblocks = identify_ramdisk_image(in_file, rd_image_start, &decompressor);
if (nblocks < 0)
goto done;
if (nblocks == 0) {
- if (crd_load(in_fd, out_fd, decompressor) == 0)
+ if (crd_load(in_file, out_file, decompressor) == 0)
goto successful_load;
goto done;
}
@@ -206,11 +217,7 @@ int __init rd_load_image(char *from)
* NOTE NOTE: nblocks is not actually blocks but
* the number of kibibytes of data to load into a ramdisk.
*/
- if (ksys_ioctl(out_fd, BLKGETSIZE, (unsigned long)&rd_blocks) < 0)
- rd_blocks = 0;
- else
- rd_blocks >>= 1;
-
+ rd_blocks = nr_blocks(out_file);
if (nblocks > rd_blocks) {
printk("RAMDISK: image too big! (%dKiB/%ldKiB)\n",
nblocks, rd_blocks);
@@ -220,13 +227,10 @@ int __init rd_load_image(char *from)
/*
* OK, time to copy in the data
*/
- if (ksys_ioctl(in_fd, BLKGETSIZE, (unsigned long)&devblocks) < 0)
- devblocks = 0;
- else
- devblocks >>= 1;
-
if (strcmp(from, "/initrd.image") == 0)
devblocks = nblocks;
+ else
+ devblocks = nr_blocks(in_file);
if (devblocks == 0) {
printk(KERN_ERR "RAMDISK: could not determine device size\n");
@@ -245,14 +249,11 @@ int __init rd_load_image(char *from)
if (i && (i % devblocks == 0)) {
pr_cont("done disk #1.\n");
rotate = 0;
- if (ksys_close(in_fd)) {
- printk("Error closing the disk.\n");
- goto noclose_input;
- }
+ fput(in_file);
break;
}
- ksys_read(in_fd, buf, BLOCK_SIZE);
- ksys_write(out_fd, buf, BLOCK_SIZE);
+ kernel_read(in_file, buf, BLOCK_SIZE, &in_file->f_pos);
+ kernel_write(out_file, buf, BLOCK_SIZE, &out_file->f_pos);
#if !defined(CONFIG_S390)
if (!(i % 16)) {
pr_cont("%c\b", rotator[rotate & 0x3]);
@@ -265,9 +266,9 @@ int __init rd_load_image(char *from)
successful_load:
res = 1;
done:
- ksys_close(in_fd);
+ fput(in_file);
noclose_input:
- ksys_close(out_fd);
+ fput(out_file);
out:
kfree(buf);
ksys_unlink("/dev/ram");
@@ -283,11 +284,11 @@ int __init rd_load_disk(int n)
static int exit_code;
static int decompress_error;
-static int crd_infd, crd_outfd;
+static struct file *crd_infile, *crd_outfile;
static long __init compr_fill(void *buf, unsigned long len)
{
- long r = ksys_read(crd_infd, buf, len);
+ long r = kernel_read(crd_infile, buf, len, &crd_infile->f_pos);
if (r < 0)
printk(KERN_ERR "RAMDISK: error while reading compressed data");
else if (r == 0)
@@ -297,7 +298,7 @@ static long __init compr_fill(void *buf, unsigned long len)
static long __init compr_flush(void *window, unsigned long outcnt)
{
- long written = ksys_write(crd_outfd, window, outcnt);
+ long written = kernel_write(crd_outfile, window, outcnt, &crd_outfile->f_pos);
if (written != outcnt) {
if (decompress_error == 0)
printk(KERN_ERR
@@ -316,11 +317,12 @@ static void __init error(char *x)
decompress_error = 1;
}
-static int __init crd_load(int in_fd, int out_fd, decompress_fn deco)
+static int __init crd_load(struct file *in_file, struct file *out_file,
+ decompress_fn deco)
{
int result;
- crd_infd = in_fd;
- crd_outfd = out_fd;
+ crd_infile = in_file;
+ crd_outfile = out_file;
if (!deco) {
pr_emerg("Invalid ramdisk decompression routine. "
--
2.27.0
On Tue, Jul 14, 2020 at 12:09 PM Christoph Hellwig <[email protected]> wrote:
>
> There is no good reason to mess with file descriptors from in-kernel
> code, switch the initramfs unpacking to struct file based write
> instead.
Looking at this diff, I realized this really should be cleaned up more.
+ wfile = filp_open(collected, openflags, mode);
> + if (IS_ERR(wfile))
> + return 0;
> +
> + vfs_fchown(wfile, uid, gid);
> + vfs_fchmod(wfile, mode);
> + if (body_len)
> + vfs_truncate(&wfile->f_path, body_len);
> + vcollected = kstrdup(collected, GFP_KERNEL);
That "vcollected" is ugly and broken, and seems oh-so-wrong.
Because it's only use is:
> - ksys_close(wfd);
> + fput(wfile);
> do_utime(vcollected, mtime);
> kfree(vcollected);
which should just have done the exact same thing that you did with
vfs_chown() and friends: we already have a "utimes_common()" that
takes a path, and it could have been made into "vfs_utimes()", and
then this whole vcollected confusion would go away and be replaced by
vfs_truncate(&wfile->f_path, mtime);
(ok, with all the "timespec64 t[2]" things going on that do_utime()
does now, but you get the idea).
Talk about de-crufting that initramfs unpacking..
But I don't hate this patch, I'm just pointing out that there's room
for improvement.
Linus
On Tue, Jul 14, 2020 at 12:06 PM Christoph Hellwig <[email protected]> wrote:
>
> this series starts to move the early init code away from requiring
> KERNEL_DS to be implicitly set during early startup. It does so by
> first removing legacy unused cruft, and the switches away the code
> from struct file based APIs to our more usual in-kernel APIs.
Looks good to me, with the added note on the utimes cruft too as a
further cleanup (separate patch).
So you can add my acked-by.
I _would_ like the md parts to get a few more acks. I see the one from
Song Liu, anybody else in md land willing to go through those patches?
They were the bulk of it, and the least obvious to me because I don't
know that code at all?
Linus
On Tue, Jul 14, 2020 at 12:31:01PM -0700, Linus Torvalds wrote:
> That "vcollected" is ugly and broken, and seems oh-so-wrong.
>
> Because it's only use is:
>
>
> > - ksys_close(wfd);
> > + fput(wfile);
> > do_utime(vcollected, mtime);
> > kfree(vcollected);
>
> which should just have done the exact same thing that you did with
> vfs_chown() and friends: we already have a "utimes_common()" that
> takes a path, and it could have been made into "vfs_utimes()", and
> then this whole vcollected confusion would go away and be replaced by
>
> vfs_truncate(&wfile->f_path, mtime);
>
> (ok, with all the "timespec64 t[2]" things going on that do_utime()
> does now, but you get the idea).
>
> Talk about de-crufting that initramfs unpacking..
>
> But I don't hate this patch, I'm just pointing out that there's room
> for improvement.
I'll send another series to clean this up. I had a few utimes related
patch in a later series and this fits in pretty well with those.
On Tue, Jul 14, 2020 at 12:34:45PM -0700, Linus Torvalds wrote:
> On Tue, Jul 14, 2020 at 12:06 PM Christoph Hellwig <[email protected]> wrote:
> >
> > this series starts to move the early init code away from requiring
> > KERNEL_DS to be implicitly set during early startup. It does so by
> > first removing legacy unused cruft, and the switches away the code
> > from struct file based APIs to our more usual in-kernel APIs.
>
> Looks good to me, with the added note on the utimes cruft too as a
> further cleanup (separate patch).
>
> So you can add my acked-by.
>
> I _would_ like the md parts to get a few more acks. I see the one from
> Song Liu, anybody else in md land willing to go through those patches?
> They were the bulk of it, and the least obvious to me because I don't
> know that code at all?
Song is the maintainer. Neil is the only person I could think of
that also knows the old md code pretty well. Guoqing has contributed
a lot lately, but the code touched here is rather historic (and not
used very much at all these days as people use modular md and initramfѕ
based detection).
On Tue, Jul 14 2020, Christoph Hellwig wrote:
> The only caller of the bstat function becomes cleaner and simpler when
> open coding the function.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Acked-by: Song Liu <[email protected]>
Reviewed-by: NeilBrown <[email protected]>
Nice!
NeilBrown
> ---
> init/do_mounts.h | 10 ----------
> init/do_mounts_md.c | 8 ++++----
> 2 files changed, 4 insertions(+), 14 deletions(-)
>
> diff --git a/init/do_mounts.h b/init/do_mounts.h
> index 0bb0806de4ce2c..7513d1c14d13fe 100644
> --- a/init/do_mounts.h
> +++ b/init/do_mounts.h
> @@ -20,16 +20,6 @@ static inline int create_dev(char *name, dev_t dev)
> return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
> }
>
> -static inline u32 bstat(char *name)
> -{
> - struct kstat stat;
> - if (vfs_stat(name, &stat) != 0)
> - return 0;
> - if (!S_ISBLK(stat.mode))
> - return 0;
> - return stat.rdev;
> -}
> -
> #ifdef CONFIG_BLK_DEV_RAM
>
> int __init rd_load_disk(int n);
> diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
> index b84031528dd446..359363e85ccd0b 100644
> --- a/init/do_mounts_md.c
> +++ b/init/do_mounts_md.c
> @@ -138,9 +138,9 @@ static void __init md_setup_drive(void)
> dev = MKDEV(MD_MAJOR, minor);
> create_dev(name, dev);
> for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) {
> + struct kstat stat;
> char *p;
> char comp_name[64];
> - u32 rdev;
>
> p = strchr(devname, ',');
> if (p)
> @@ -150,9 +150,9 @@ static void __init md_setup_drive(void)
> if (strncmp(devname, "/dev/", 5) == 0)
> devname += 5;
> snprintf(comp_name, 63, "/dev/%s", devname);
> - rdev = bstat(comp_name);
> - if (rdev)
> - dev = new_decode_dev(rdev);
> + if (vfs_stat(comp_name, &stat) == 0 &&
> + S_ISBLK(stat.mode))
> + dev = new_decode_dev(stat.rdev);
> if (!dev) {
> printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
> break;
> --
> 2.27.0
On Tue, Jul 14 2020, Christoph Hellwig wrote:
> Instead of using a spcial RAID_AUTORUN ioctl that only exists for
> non-modular builds and is only called from the early init code, just
> call the actual function directly.
I think there was a tool in the old 'mdutls' that would call this ioctl
from user-space, but I cannot find a copy of that online, so I cannot be
sure.... not that it really matters.
Reviewed-by: NeilBrown <[email protected]>
Thanks,
NeilBrown
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Acked-by: Song Liu <[email protected]>
> ---
> drivers/md/md-autodetect.c | 10 ++--------
> drivers/md/md.c | 14 +-------------
> drivers/md/md.h | 3 +++
> 3 files changed, 6 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
> index fe806f7b9759a1..0eb746211ed53c 100644
> --- a/drivers/md/md-autodetect.c
> +++ b/drivers/md/md-autodetect.c
> @@ -9,6 +9,7 @@
> #include <linux/raid/detect.h>
> #include <linux/raid/md_u.h>
> #include <linux/raid/md_p.h>
> +#include "md.h"
>
> /*
> * When md (and any require personalities) are compiled into the kernel
> @@ -285,8 +286,6 @@ __setup("md=", md_setup);
>
> static void __init autodetect_raid(void)
> {
> - int fd;
> -
> /*
> * Since we don't want to detect and use half a raid array, we need to
> * wait for the known devices to complete their probing
> @@ -295,12 +294,7 @@ static void __init autodetect_raid(void)
> printk(KERN_INFO "md: If you don't use raid, use raid=noautodetect\n");
>
> wait_for_device_probe();
> -
> - fd = ksys_open("/dev/md0", 0, 0);
> - if (fd >= 0) {
> - ksys_ioctl(fd, RAID_AUTORUN, raid_autopart);
> - ksys_close(fd);
> - }
> + md_autostart_arrays(raid_autopart);
> }
>
> void __init md_run_setup(void)
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index f567f536b529bd..6e9a48da474848 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -68,10 +68,6 @@
> #include "md-bitmap.h"
> #include "md-cluster.h"
>
> -#ifndef MODULE
> -static void autostart_arrays(int part);
> -#endif
> -
> /* pers_list is a list of registered personalities protected
> * by pers_lock.
> * pers_lock does extra service to protect accesses to
> @@ -7421,7 +7417,6 @@ static inline bool md_ioctl_valid(unsigned int cmd)
> case GET_DISK_INFO:
> case HOT_ADD_DISK:
> case HOT_REMOVE_DISK:
> - case RAID_AUTORUN:
> case RAID_VERSION:
> case RESTART_ARRAY_RW:
> case RUN_ARRAY:
> @@ -7467,13 +7462,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
> case RAID_VERSION:
> err = get_version(argp);
> goto out;
> -
> -#ifndef MODULE
> - case RAID_AUTORUN:
> - err = 0;
> - autostart_arrays(arg);
> - goto out;
> -#endif
> default:;
> }
>
> @@ -9721,7 +9709,7 @@ void md_autodetect_dev(dev_t dev)
> }
> }
>
> -static void autostart_arrays(int part)
> +void md_autostart_arrays(int part)
> {
> struct md_rdev *rdev;
> struct detected_devices_node *node_detected_dev;
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 612814d07d35ab..37315a3f28e97d 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -800,4 +800,7 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio
> !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
> mddev->queue->limits.max_write_zeroes_sectors = 0;
> }
> +
> +void md_autostart_arrays(int part);
> +
> #endif /* _MD_MD_H */
> --
> 2.27.0
On Tue, Jul 14 2020, Christoph Hellwig wrote:
> devfs is long gone, and autoscan works just fine without this these days.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Acked-by: Song Liu <[email protected]>
Happy to see this go!
Reviewed-by: NeilBrown <[email protected]>
Thanks,
NeilBrown
> ---
> drivers/md/md-autodetect.c | 10 ----------
> 1 file changed, 10 deletions(-)
>
> diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
> index 0eb746211ed53c..6bc9b734eee6ff 100644
> --- a/drivers/md/md-autodetect.c
> +++ b/drivers/md/md-autodetect.c
> @@ -240,16 +240,6 @@ static void __init md_setup_drive(void)
> err = ksys_ioctl(fd, RUN_ARRAY, 0);
> if (err)
> printk(KERN_WARNING "md: starting md%d failed\n", minor);
> - else {
> - /* reread the partition table.
> - * I (neilb) and not sure why this is needed, but I cannot
> - * boot a kernel with devfs compiled in from partitioned md
> - * array without it
> - */
> - ksys_close(fd);
> - fd = ksys_open(name, 0, 0);
> - ksys_ioctl(fd, BLKRRPART, 0);
> - }
> ksys_close(fd);
> }
> }
> --
> 2.27.0
On Tue, Jul 14 2020, Christoph Hellwig wrote:
> Move the loop over the possible arrays into the caller to remove a level
> of indentation for the whole function.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Acked-by: Song Liu <[email protected]>
Nice
Reviewed-by: NeilBrown <[email protected]>
Thanks,
NeilBrown
> ---
> drivers/md/md-autodetect.c | 203 ++++++++++++++++++-------------------
> 1 file changed, 101 insertions(+), 102 deletions(-)
>
> diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
> index 6bc9b734eee6ff..a43a8f1580584c 100644
> --- a/drivers/md/md-autodetect.c
> +++ b/drivers/md/md-autodetect.c
> @@ -27,7 +27,7 @@ static int __initdata raid_noautodetect=1;
> #endif
> static int __initdata raid_autopart;
>
> -static struct {
> +static struct md_setup_args {
> int minor;
> int partitioned;
> int level;
> @@ -126,122 +126,117 @@ static inline int create_dev(char *name, dev_t dev)
> return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
> }
>
> -static void __init md_setup_drive(void)
> +static void __init md_setup_drive(struct md_setup_args *args)
> {
> - int minor, i, ent, partitioned;
> + int minor, i, partitioned;
> dev_t dev;
> dev_t devices[MD_SB_DISKS+1];
> + int fd;
> + int err = 0;
> + char *devname;
> + mdu_disk_info_t dinfo;
> + char name[16];
>
> - for (ent = 0; ent < md_setup_ents ; ent++) {
> - int fd;
> - int err = 0;
> - char *devname;
> - mdu_disk_info_t dinfo;
> - char name[16];
> + minor = args->minor;
> + partitioned = args->partitioned;
> + devname = args->device_names;
>
> - minor = md_setup_args[ent].minor;
> - partitioned = md_setup_args[ent].partitioned;
> - devname = md_setup_args[ent].device_names;
> + sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor);
> + if (partitioned)
> + dev = MKDEV(mdp_major, minor << MdpMinorShift);
> + else
> + dev = MKDEV(MD_MAJOR, minor);
> + create_dev(name, dev);
> + for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) {
> + struct kstat stat;
> + char *p;
> + char comp_name[64];
>
> - sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor);
> - if (partitioned)
> - dev = MKDEV(mdp_major, minor << MdpMinorShift);
> - else
> - dev = MKDEV(MD_MAJOR, minor);
> - create_dev(name, dev);
> - for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) {
> - struct kstat stat;
> - char *p;
> - char comp_name[64];
> + p = strchr(devname, ',');
> + if (p)
> + *p++ = 0;
>
> - p = strchr(devname, ',');
> - if (p)
> - *p++ = 0;
> + dev = name_to_dev_t(devname);
> + if (strncmp(devname, "/dev/", 5) == 0)
> + devname += 5;
> + snprintf(comp_name, 63, "/dev/%s", devname);
> + if (vfs_stat(comp_name, &stat) == 0 && S_ISBLK(stat.mode))
> + dev = new_decode_dev(stat.rdev);
> + if (!dev) {
> + printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
> + break;
> + }
>
> - dev = name_to_dev_t(devname);
> - if (strncmp(devname, "/dev/", 5) == 0)
> - devname += 5;
> - snprintf(comp_name, 63, "/dev/%s", devname);
> - if (vfs_stat(comp_name, &stat) == 0 &&
> - S_ISBLK(stat.mode))
> - dev = new_decode_dev(stat.rdev);
> - if (!dev) {
> - printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
> - break;
> - }
> + devices[i] = dev;
> + devname = p;
> + }
> + devices[i] = 0;
>
> - devices[i] = dev;
> + if (!i)
> + return;
>
> - devname = p;
> - }
> - devices[i] = 0;
> + printk(KERN_INFO "md: Loading md%s%d: %s\n",
> + partitioned ? "_d" : "", minor,
> + args->device_names);
>
> - if (!i)
> - continue;
> + fd = ksys_open(name, 0, 0);
> + if (fd < 0) {
> + printk(KERN_ERR "md: open failed - cannot start "
> + "array %s\n", name);
> + return;
> + }
> + if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
> + printk(KERN_WARNING
> + "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
> + minor);
> + ksys_close(fd);
> + return;
> + }
>
> - printk(KERN_INFO "md: Loading md%s%d: %s\n",
> - partitioned ? "_d" : "", minor,
> - md_setup_args[ent].device_names);
> + if (args->level != LEVEL_NONE) {
> + /* non-persistent */
> + mdu_array_info_t ainfo;
> + ainfo.level = args->level;
> + ainfo.size = 0;
> + ainfo.nr_disks =0;
> + ainfo.raid_disks =0;
> + while (devices[ainfo.raid_disks])
> + ainfo.raid_disks++;
> + ainfo.md_minor =minor;
> + ainfo.not_persistent = 1;
>
> - fd = ksys_open(name, 0, 0);
> - if (fd < 0) {
> - printk(KERN_ERR "md: open failed - cannot start "
> - "array %s\n", name);
> - continue;
> - }
> - if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
> - printk(KERN_WARNING
> - "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
> - minor);
> - ksys_close(fd);
> - continue;
> + ainfo.state = (1 << MD_SB_CLEAN);
> + ainfo.layout = 0;
> + ainfo.chunk_size = args->chunk;
> + err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo);
> + for (i = 0; !err && i <= MD_SB_DISKS; i++) {
> + dev = devices[i];
> + if (!dev)
> + break;
> + dinfo.number = i;
> + dinfo.raid_disk = i;
> + dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
> + dinfo.major = MAJOR(dev);
> + dinfo.minor = MINOR(dev);
> + err = ksys_ioctl(fd, ADD_NEW_DISK,
> + (long)&dinfo);
> }
> -
> - if (md_setup_args[ent].level != LEVEL_NONE) {
> - /* non-persistent */
> - mdu_array_info_t ainfo;
> - ainfo.level = md_setup_args[ent].level;
> - ainfo.size = 0;
> - ainfo.nr_disks =0;
> - ainfo.raid_disks =0;
> - while (devices[ainfo.raid_disks])
> - ainfo.raid_disks++;
> - ainfo.md_minor =minor;
> - ainfo.not_persistent = 1;
> -
> - ainfo.state = (1 << MD_SB_CLEAN);
> - ainfo.layout = 0;
> - ainfo.chunk_size = md_setup_args[ent].chunk;
> - err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo);
> - for (i = 0; !err && i <= MD_SB_DISKS; i++) {
> - dev = devices[i];
> - if (!dev)
> - break;
> - dinfo.number = i;
> - dinfo.raid_disk = i;
> - dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
> - dinfo.major = MAJOR(dev);
> - dinfo.minor = MINOR(dev);
> - err = ksys_ioctl(fd, ADD_NEW_DISK,
> - (long)&dinfo);
> - }
> - } else {
> - /* persistent */
> - for (i = 0; i <= MD_SB_DISKS; i++) {
> - dev = devices[i];
> - if (!dev)
> - break;
> - dinfo.major = MAJOR(dev);
> - dinfo.minor = MINOR(dev);
> - ksys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
> - }
> + } else {
> + /* persistent */
> + for (i = 0; i <= MD_SB_DISKS; i++) {
> + dev = devices[i];
> + if (!dev)
> + break;
> + dinfo.major = MAJOR(dev);
> + dinfo.minor = MINOR(dev);
> + ksys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
> }
> - if (!err)
> - err = ksys_ioctl(fd, RUN_ARRAY, 0);
> - if (err)
> - printk(KERN_WARNING "md: starting md%d failed\n", minor);
> - ksys_close(fd);
> }
> + if (!err)
> + err = ksys_ioctl(fd, RUN_ARRAY, 0);
> + if (err)
> + printk(KERN_WARNING "md: starting md%d failed\n", minor);
> + ksys_close(fd);
> }
>
> static int __init raid_setup(char *str)
> @@ -289,11 +284,15 @@ static void __init autodetect_raid(void)
>
> void __init md_run_setup(void)
> {
> + int ent;
> +
> create_dev("/dev/md0", MKDEV(MD_MAJOR, 0));
>
> if (raid_noautodetect)
> printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=autodetect will force)\n");
> else
> autodetect_raid();
> - md_setup_drive();
> +
> + for (ent = 0; ent < md_setup_ents; ent++)
> + md_setup_drive(&md_setup_args[ent]);
> }
> --
> 2.27.0
On Tue, Jul 14 2020, Christoph Hellwig wrote:
> md_setup_drive knows it works with md devices, so it is rather pointless
> to open a file descriptor and issue ioctls. Just call directly into the
> relevant low-level md routines after getting a handle to the device using
> blkdev_get_by_dev instead.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Acked-by: Song Liu <[email protected]>
> ---
> drivers/md/md-autodetect.c | 127 ++++++++++++++++---------------------
> drivers/md/md.c | 20 +++---
> drivers/md/md.h | 6 ++
> 3 files changed, 71 insertions(+), 82 deletions(-)
>
> diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
> index a43a8f1580584c..5b24b5616d3acc 100644
> --- a/drivers/md/md-autodetect.c
> +++ b/drivers/md/md-autodetect.c
> @@ -2,7 +2,6 @@
> #include <linux/kernel.h>
> #include <linux/blkdev.h>
> #include <linux/init.h>
> -#include <linux/syscalls.h>
> #include <linux/mount.h>
> #include <linux/major.h>
> #include <linux/delay.h>
> @@ -120,37 +119,29 @@ static int __init md_setup(char *str)
> return 1;
> }
>
> -static inline int create_dev(char *name, dev_t dev)
> -{
> - ksys_unlink(name);
> - return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
> -}
> -
> static void __init md_setup_drive(struct md_setup_args *args)
> {
> - int minor, i, partitioned;
> - dev_t dev;
> - dev_t devices[MD_SB_DISKS+1];
> - int fd;
> - int err = 0;
> - char *devname;
> - mdu_disk_info_t dinfo;
> + char *devname = args->device_names;
> + dev_t devices[MD_SB_DISKS + 1], mdev;
> + struct mdu_array_info_s ainfo = { };
> + struct block_device *bdev;
> + struct mddev *mddev;
> + int err = 0, i;
> char name[16];
>
> - minor = args->minor;
> - partitioned = args->partitioned;
> - devname = args->device_names;
> + if (args->partitioned) {
> + mdev = MKDEV(mdp_major, args->minor << MdpMinorShift);
> + sprintf(name, "md_d%d", args->minor);
> + } else {
> + mdev = MKDEV(MD_MAJOR, args->minor);
> + sprintf(name, "md%d", args->minor);
> + }
>
> - sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor);
> - if (partitioned)
> - dev = MKDEV(mdp_major, minor << MdpMinorShift);
> - else
> - dev = MKDEV(MD_MAJOR, minor);
> - create_dev(name, dev);
> for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) {
> struct kstat stat;
> char *p;
> char comp_name[64];
> + dev_t dev;
>
> p = strchr(devname, ',');
> if (p)
> @@ -163,7 +154,7 @@ static void __init md_setup_drive(struct md_setup_args *args)
> if (vfs_stat(comp_name, &stat) == 0 && S_ISBLK(stat.mode))
> dev = new_decode_dev(stat.rdev);
> if (!dev) {
> - printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
> + pr_warn("md: Unknown device name: %s\n", devname);
> break;
> }
>
> @@ -175,68 +166,64 @@ static void __init md_setup_drive(struct md_setup_args *args)
> if (!i)
> return;
>
> - printk(KERN_INFO "md: Loading md%s%d: %s\n",
> - partitioned ? "_d" : "", minor,
> - args->device_names);
> + pr_info("md: Loading %s: %s\n", name, args->device_names);
>
> - fd = ksys_open(name, 0, 0);
> - if (fd < 0) {
> - printk(KERN_ERR "md: open failed - cannot start "
> - "array %s\n", name);
> + bdev = blkdev_get_by_dev(mdev, FMODE_READ, NULL);
> + if (IS_ERR(bdev)) {
> + pr_err("md: open failed - cannot start array %s\n", name);
> return;
> }
> - if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
> - printk(KERN_WARNING
> - "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
> - minor);
> - ksys_close(fd);
> - return;
I'd be more comfortable if you added something like
if (WARN(bdev->bd_disk->fops != md_fops,
"Opening block device %x resulted in non-md device\"))
return;
here. However even without that
Reviewed-by: NeilBrown <[email protected]>
Thanks,
NeilBrown
> + mddev = bdev->bd_disk->private_data;
> +
> + err = mddev_lock(mddev);
> + if (err) {
> + pr_err("md: failed to lock array %s\n", name);
> + goto out_blkdev_put;
> + }
> +
> + if (!list_empty(&mddev->disks) || mddev->raid_disks) {
> + pr_warn("md: Ignoring %s, already autodetected. (Use raid=noautodetect)\n",
> + name);
> + goto out_unlock;
> }
>
> if (args->level != LEVEL_NONE) {
> /* non-persistent */
> - mdu_array_info_t ainfo;
> ainfo.level = args->level;
> - ainfo.size = 0;
> - ainfo.nr_disks =0;
> - ainfo.raid_disks =0;
> - while (devices[ainfo.raid_disks])
> - ainfo.raid_disks++;
> - ainfo.md_minor =minor;
> + ainfo.md_minor = args->minor;
> ainfo.not_persistent = 1;
> -
> ainfo.state = (1 << MD_SB_CLEAN);
> - ainfo.layout = 0;
> ainfo.chunk_size = args->chunk;
> - err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo);
> - for (i = 0; !err && i <= MD_SB_DISKS; i++) {
> - dev = devices[i];
> - if (!dev)
> - break;
> + while (devices[ainfo.raid_disks])
> + ainfo.raid_disks++;
> + }
> +
> + err = md_set_array_info(mddev, &ainfo);
> +
> + for (i = 0; i <= MD_SB_DISKS && devices[i]; i++) {
> + struct mdu_disk_info_s dinfo = {
> + .major = MAJOR(devices[i]),
> + .minor = MINOR(devices[i]),
> + };
> +
> + if (args->level != LEVEL_NONE) {
> dinfo.number = i;
> dinfo.raid_disk = i;
> - dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
> - dinfo.major = MAJOR(dev);
> - dinfo.minor = MINOR(dev);
> - err = ksys_ioctl(fd, ADD_NEW_DISK,
> - (long)&dinfo);
> - }
> - } else {
> - /* persistent */
> - for (i = 0; i <= MD_SB_DISKS; i++) {
> - dev = devices[i];
> - if (!dev)
> - break;
> - dinfo.major = MAJOR(dev);
> - dinfo.minor = MINOR(dev);
> - ksys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
> + dinfo.state =
> + (1 << MD_DISK_ACTIVE) | (1 << MD_DISK_SYNC);
> }
> +
> + md_add_new_disk(mddev, &dinfo);
> }
> +
> if (!err)
> - err = ksys_ioctl(fd, RUN_ARRAY, 0);
> + err = do_md_run(mddev);
> if (err)
> - printk(KERN_WARNING "md: starting md%d failed\n", minor);
> - ksys_close(fd);
> + pr_warn("md: starting %s failed\n", name);
> +out_unlock:
> + mddev_unlock(mddev);
> +out_blkdev_put:
> + blkdev_put(bdev, FMODE_READ);
> }
>
> static int __init raid_setup(char *str)
> @@ -286,8 +273,6 @@ void __init md_run_setup(void)
> {
> int ent;
>
> - create_dev("/dev/md0", MKDEV(MD_MAJOR, 0));
> -
> if (raid_noautodetect)
> printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=autodetect will force)\n");
> else
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 6e9a48da474848..9960cfeb59a50c 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -4368,7 +4368,6 @@ array_state_show(struct mddev *mddev, char *page)
>
> static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
> static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
> -static int do_md_run(struct mddev *mddev);
> static int restart_array(struct mddev *mddev);
>
> static ssize_t
> @@ -6015,7 +6014,7 @@ int md_run(struct mddev *mddev)
> }
> EXPORT_SYMBOL_GPL(md_run);
>
> -static int do_md_run(struct mddev *mddev)
> +int do_md_run(struct mddev *mddev)
> {
> int err;
>
> @@ -6651,7 +6650,7 @@ static int get_disk_info(struct mddev *mddev, void __user * arg)
> return 0;
> }
>
> -static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
> +int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
> {
> char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
> struct md_rdev *rdev;
> @@ -6697,7 +6696,7 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
> }
>
> /*
> - * add_new_disk can be used once the array is assembled
> + * md_add_new_disk can be used once the array is assembled
> * to add "hot spares". They must already have a superblock
> * written
> */
> @@ -6810,7 +6809,7 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
> return err;
> }
>
> - /* otherwise, add_new_disk is only allowed
> + /* otherwise, md_add_new_disk is only allowed
> * for major_version==0 superblocks
> */
> if (mddev->major_version != 0) {
> @@ -7055,7 +7054,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
> }
>
> /*
> - * set_array_info is used two different ways
> + * md_set_array_info is used two different ways
> * The original usage is when creating a new array.
> * In this usage, raid_disks is > 0 and it together with
> * level, size, not_persistent,layout,chunksize determine the
> @@ -7067,9 +7066,8 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
> * The minor and patch _version numbers are also kept incase the
> * super_block handler wishes to interpret them.
> */
> -static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
> +int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info)
> {
> -
> if (info->raid_disks == 0) {
> /* just setting version number for superblock loading */
> if (info->major_version < 0 ||
> @@ -7560,7 +7558,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
> err = -EBUSY;
> goto unlock;
> }
> - err = set_array_info(mddev, &info);
> + err = md_set_array_info(mddev, &info);
> if (err) {
> pr_warn("md: couldn't set array info. %d\n", err);
> goto unlock;
> @@ -7614,7 +7612,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
> /* Need to clear read-only for this */
> break;
> else
> - err = add_new_disk(mddev, &info);
> + err = md_add_new_disk(mddev, &info);
> goto unlock;
> }
> break;
> @@ -7682,7 +7680,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
> if (copy_from_user(&info, argp, sizeof(info)))
> err = -EFAULT;
> else
> - err = add_new_disk(mddev, &info);
> + err = md_add_new_disk(mddev, &info);
> goto unlock;
> }
>
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 6f8fff77ce10a5..7ee81aa2eac862 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -801,7 +801,13 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio
> mddev->queue->limits.max_write_zeroes_sectors = 0;
> }
>
> +struct mdu_array_info_s;
> +struct mdu_disk_info_s;
> +
> extern int mdp_major;
> void md_autostart_arrays(int part);
> +int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
> +int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
> +int do_md_run(struct mddev *mddev);
>
> #endif /* _MD_MD_H */
> --
> 2.27.0
On Thu, Jul 16, 2020 at 09:50:49AM +1000, NeilBrown wrote:
> I'd be more comfortable if you added something like
> if (WARN(bdev->bd_disk->fops != md_fops,
> "Opening block device %x resulted in non-md device\"))
> return;
> here. However even without that
>
> Reviewed-by: NeilBrown <[email protected]>
Ok, I've added that.
On 7/15/20 8:51 AM, Christoph Hellwig wrote:
> On Tue, Jul 14, 2020 at 12:34:45PM -0700, Linus Torvalds wrote:
>> On Tue, Jul 14, 2020 at 12:06 PM Christoph Hellwig <[email protected]> wrote:
>>> this series starts to move the early init code away from requiring
>>> KERNEL_DS to be implicitly set during early startup. It does so by
>>> first removing legacy unused cruft, and the switches away the code
>>> from struct file based APIs to our more usual in-kernel APIs.
>> Looks good to me, with the added note on the utimes cruft too as a
>> further cleanup (separate patch).
>>
>> So you can add my acked-by.
>>
>> I _would_ like the md parts to get a few more acks. I see the one from
>> Song Liu, anybody else in md land willing to go through those patches?
>> They were the bulk of it, and the least obvious to me because I don't
>> know that code at all?
> Song is the maintainer. Neil is the only person I could think of
> that also knows the old md code pretty well. Guoqing has contributed
> a lot lately, but the code touched here is rather historic (and not
> used very much at all these days as people use modular md and initramfѕ
> based detection).
Hi Christoph,
I just cloned the tree, seems there is compile issue that you need to
resolve.
hch-misc$ make -j8
DESCEND objtool
CALL scripts/atomic/check-atomics.sh
CALL scripts/checksyscalls.sh
CHK include/generated/compile.h
CC drivers/md/md.o
CC drivers/md/md-bitmap.o
CC drivers/md/md-autodetect.o
AR drivers/perf/built-in.a
CC drivers/md/dm.o
AR drivers/hwtracing/intel_th/built-in.a
CC drivers/nvmem/core.o
drivers/md/md.c:7809:45: error: static declaration of ‘md_fops’ follows
non-static declaration
static const struct block_device_operations md_fops =
^~~~~~~
drivers/md/md.c:329:38: note: previous declaration of ‘md_fops’ was here
const struct block_device_operations md_fops;
^~~~~~~
scripts/Makefile.build:280: recipe for target 'drivers/md/md.o' failed
make[2]: *** [drivers/md/md.o] Error 1
make[2]: *** Waiting for unfinished jobs....
And for the changes of md, feel free to add my Acked-by if it could help.
Thanks,
Guoqing
On Thu, Jul 16, 2020 at 05:57:12PM +0200, Guoqing Jiang wrote:
> On 7/15/20 8:51 AM, Christoph Hellwig wrote:
>> On Tue, Jul 14, 2020 at 12:34:45PM -0700, Linus Torvalds wrote:
> I just cloned the tree, seems there is compile issue that you need to
> resolve.
Fixed and force pushed.
Hi Christoph,
On 14.07.2020 21:04, Christoph Hellwig wrote:
> Just use d_genocide instead of iterating through the root directory with
> cumbersome userspace-like APIs. This also ensures we actually remove files
> that are not direct children of the root entry, which the old code failed
> to do.
>
> Fixes: df52092f3c97 ("fastboot: remove duplicate unpack_to_rootfs()")
> Signed-off-by: Christoph Hellwig <[email protected]>
This patch breaks initrd support ;-(
I use initrd to deploy kernel modules on my test machines. It was
automatically mounted on /initrd. /lib/modules is just a symlink to
/initrd. I know that initrd support is marked as deprecated, but it
would be really nice to give people some time to update their machines
before breaking the stuff.
Here is the log:
Kernel image @ 0x40007fc0 [ 0x000000 - 0x6dd9c8 ]
## Flattened Device Tree blob at 41000000
Booting using the fdt blob at 0x41000000
Loading Ramdisk to 4de3c000, end 50000000 ... OK
Loading Device Tree to 4de2d000, end 4de3b206 ... OK
Starting kernel ...
[ 0.000000] Booting Linux on physical CPU 0x900
...
[ 0.000000] Kernel command line: root=PARTLABEL=rootfs rootwait
console=tty1 console=ttySAC2,115200n8 earlycon rootdelay=2
...
[ 1.853631] Trying to unpack rootfs image as initramfs...
[ 1.858661] rootfs image is not initramfs (invalid magic at start of
compressed archive); looks like an initrd
...
[ 2.204776] Freeing initrd memory: 34576K
...
[ 4.635360] Warning: unable to open an initial console.
[ 4.640706] Waiting 2 sec before mounting root device...
...
[ 6.776007] Failed to create /dev/root: -2
[ 6.778989] VFS: Cannot open root device "PARTLABEL=rootfs" or
unknown-block(179,6): error -2
[ 6.787200] Please append a correct "root=" boot option; here are the
available partitions:
[ 6.795693] 0100 65536 ram0
[ 6.795697] (driver?)
[ 6.801459] 0101 65536 ram1
[ 6.801462] (driver?)
[ 6.807532] 0102 65536 ram2
[ 6.807535] (driver?)
[ 6.813674] 0103 65536 ram3
[ 6.813677] (driver?)
[ 6.819760] 0104 65536 ram4
[ 6.819763] (driver?)
[ 6.832610] 0105 65536 ram5
[ 6.832613] (driver?)
[ 6.848685] 0106 65536 ram6
[ 6.848688] (driver?)
[ 6.864590] 0107 65536 ram7
[ 6.864593] (driver?)
[ 6.880504] 0108 65536 ram8
[ 6.880507] (driver?)
[ 6.896248] 0109 65536 ram9
[ 6.896251] (driver?)
[ 6.911828] 010a 65536 ram10
[ 6.911831] (driver?)
[ 6.927447] 010b 65536 ram11
[ 6.927450] (driver?)
[ 6.942976] 010c 65536 ram12
[ 6.942979] (driver?)
[ 6.958190] 010d 65536 ram13
[ 6.958193] (driver?)
[ 6.973205] 010e 65536 ram14
[ 6.973208] (driver?)
[ 6.988105] 010f 65536 ram15
[ 6.988108] (driver?)
[ 7.002897] b300 15388672 mmcblk0
[ 7.002901] driver: mmcblk
[ 7.018061] b301 8192 mmcblk0p1
654b73ea-7c04-c24d-9642-2a186649605c
[ 7.018064]
[ 7.035359] b302 61440 mmcblk0p2
7ef6fb83-0d6c-8c44-826b-ad11df290e0c
[ 7.035362]
[ 7.052589] b303 102400 mmcblk0p3
34883856-7d52-d548-a196-718efbd06876
[ 7.052592]
[ 7.069744] b304 153600 mmcblk0p4
8d4410d0-a4ff-c447-abb9-73350dcdd2d6
[ 7.069747]
[ 7.086888] b305 1572864 mmcblk0p5
485c2c17-a9e8-9c45-bb68-e0748a2bb1f1
[ 7.086890]
[ 7.103991] b306 3072000 mmcblk0p6
7fb2bbf3-e064-2343-b169-e69c18dbb43e
[ 7.103993]
[ 7.121290] b307 10413039 mmcblk0p7
b0ee9150-6b6a-274b-9ec3-703d29072555
[ 7.121292]
[ 7.138722] Kernel panic - not syncing: VFS: Unable to mount root fs
on unknown-block(179,6)
[ 7.151482] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
5.8.0-rc5-00064-g38d014f6d446 #8823
[ 7.164026] Hardware name: Samsung Exynos (Flattened Device Tree)
[ 7.174556] [<c011188c>] (unwind_backtrace) from [<c010d27c>]
(show_stack+0x10/0x14)
[ 7.186799] [<c010d27c>] (show_stack) from [<c05182e4>]
(dump_stack+0xbc/0xe8)
[ 7.198533] [<c05182e4>] (dump_stack) from [<c01272e0>]
(panic+0x128/0x354)
[ 7.210002] [<c01272e0>] (panic) from [<c1001580>]
(mount_block_root+0x1a8/0x240)
[ 7.221961] [<c1001580>] (mount_block_root) from [<c1001738>]
(mount_root+0x120/0x13c)
[ 7.234325] [<c1001738>] (mount_root) from [<c10018ac>]
(prepare_namespace+0x158/0x194)
[ 7.246751] [<c10018ac>] (prepare_namespace) from [<c0ab7684>]
(kernel_init+0x8/0x118)
[ 7.259086] [<c0ab7684>] (kernel_init) from [<c0100114>]
(ret_from_fork+0x14/0x20)tatic void __init populate_initrd_image(char *err)
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
On Fri, Jul 17, 2020 at 10:55:48PM +0200, Marek Szyprowski wrote:
> Hi Christoph,
>
> On 14.07.2020 21:04, Christoph Hellwig wrote:
> > Just use d_genocide instead of iterating through the root directory with
> > cumbersome userspace-like APIs. This also ensures we actually remove files
> > that are not direct children of the root entry, which the old code failed
> > to do.
> >
> > Fixes: df52092f3c97 ("fastboot: remove duplicate unpack_to_rootfs()")
> > Signed-off-by: Christoph Hellwig <[email protected]>
>
> This patch breaks initrd support ;-(
>
> I use initrd to deploy kernel modules on my test machines. It was
> automatically mounted on /initrd. /lib/modules is just a symlink to
> /initrd. I know that initrd support is marked as deprecated, but it
> would be really nice to give people some time to update their machines
> before breaking the stuff.
Looks like your setup did rely on the /dev/ notes from the built-in
initramfs to be preserved.
Can you comment out the call to d_genocide? It seems like for your
the fact that clean_rootfs didn't actually clean up was a feature and
not a bug.
I guess the old, pre-2008 code also wouldn't have worked for you in
that case.
>
> Here is the log:
>
> Kernel image @ 0x40007fc0 [ 0x000000 - 0x6dd9c8 ]
> ## Flattened Device Tree blob at 41000000
> ?? Booting using the fdt blob at 0x41000000
> ?? Loading Ramdisk to 4de3c000, end 50000000 ... OK
> ?? Loading Device Tree to 4de2d000, end 4de3b206 ... OK
>
> Starting kernel ...
>
> [??? 0.000000] Booting Linux on physical CPU 0x900
> ...
>
> [??? 0.000000] Kernel command line: root=PARTLABEL=rootfs rootwait
> console=tty1 console=ttySAC2,115200n8 earlycon rootdelay=2
> ...
>
> [??? 1.853631] Trying to unpack rootfs image as initramfs...
> [??? 1.858661] rootfs image is not initramfs (invalid magic at start of
> compressed archive); looks like an initrd
> ...
> [??? 2.204776] Freeing initrd memory: 34576K
>
> ...
>
> [??? 4.635360] Warning: unable to open an initial console.
> [??? 4.640706] Waiting 2 sec before mounting root device...
> ...
> [??? 6.776007] Failed to create /dev/root: -2
> [??? 6.778989] VFS: Cannot open root device "PARTLABEL=rootfs" or
> unknown-block(179,6): error -2
> [??? 6.787200] Please append a correct "root=" boot option; here are the
> available partitions:
> [??? 6.795693] 0100?????????? 65536 ram0
> [??? 6.795697]? (driver?)
> [??? 6.801459] 0101?????????? 65536 ram1
> [??? 6.801462]? (driver?)
> [??? 6.807532] 0102?????????? 65536 ram2
> [??? 6.807535]? (driver?)
> [??? 6.813674] 0103?????????? 65536 ram3
> [??? 6.813677]? (driver?)
> [??? 6.819760] 0104?????????? 65536 ram4
> [??? 6.819763]? (driver?)
> [??? 6.832610] 0105?????????? 65536 ram5
> [??? 6.832613]? (driver?)
> [??? 6.848685] 0106?????????? 65536 ram6
> [??? 6.848688]? (driver?)
> [??? 6.864590] 0107?????????? 65536 ram7
> [??? 6.864593]? (driver?)
> [??? 6.880504] 0108?????????? 65536 ram8
> [??? 6.880507]? (driver?)
> [??? 6.896248] 0109?????????? 65536 ram9
> [??? 6.896251]? (driver?)
> [??? 6.911828] 010a?????????? 65536 ram10
> [??? 6.911831]? (driver?)
> [??? 6.927447] 010b?????????? 65536 ram11
> [??? 6.927450]? (driver?)
> [??? 6.942976] 010c?????????? 65536 ram12
> [??? 6.942979]? (driver?)
> [??? 6.958190] 010d?????????? 65536 ram13
> [??? 6.958193]? (driver?)
> [??? 6.973205] 010e?????????? 65536 ram14
> [??? 6.973208]? (driver?)
> [??? 6.988105] 010f?????????? 65536 ram15
> [??? 6.988108]? (driver?)
> [??? 7.002897] b300??????? 15388672 mmcblk0
> [??? 7.002901]? driver: mmcblk
> [??? 7.018061]?? b301??????????? 8192 mmcblk0p1
> 654b73ea-7c04-c24d-9642-2a186649605c
> [??? 7.018064]
> [??? 7.035359]?? b302?????????? 61440 mmcblk0p2
> 7ef6fb83-0d6c-8c44-826b-ad11df290e0c
> [??? 7.035362]
> [??? 7.052589]?? b303????????? 102400 mmcblk0p3
> 34883856-7d52-d548-a196-718efbd06876
> [??? 7.052592]
> [??? 7.069744]?? b304????????? 153600 mmcblk0p4
> 8d4410d0-a4ff-c447-abb9-73350dcdd2d6
> [??? 7.069747]
> [??? 7.086888]?? b305???????? 1572864 mmcblk0p5
> 485c2c17-a9e8-9c45-bb68-e0748a2bb1f1
> [??? 7.086890]
> [??? 7.103991]?? b306???????? 3072000 mmcblk0p6
> 7fb2bbf3-e064-2343-b169-e69c18dbb43e
> [??? 7.103993]
> [??? 7.121290]?? b307??????? 10413039 mmcblk0p7
> b0ee9150-6b6a-274b-9ec3-703d29072555
> [??? 7.121292]
> [??? 7.138722] Kernel panic - not syncing: VFS: Unable to mount root fs
> on unknown-block(179,6)
> [??? 7.151482] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> 5.8.0-rc5-00064-g38d014f6d446 #8823
> [??? 7.164026] Hardware name: Samsung Exynos (Flattened Device Tree)
> [??? 7.174556] [<c011188c>] (unwind_backtrace) from [<c010d27c>]
> (show_stack+0x10/0x14)
> [??? 7.186799] [<c010d27c>] (show_stack) from [<c05182e4>]
> (dump_stack+0xbc/0xe8)
> [??? 7.198533] [<c05182e4>] (dump_stack) from [<c01272e0>]
> (panic+0x128/0x354)
> [??? 7.210002] [<c01272e0>] (panic) from [<c1001580>]
> (mount_block_root+0x1a8/0x240)
> [??? 7.221961] [<c1001580>] (mount_block_root) from [<c1001738>]
> (mount_root+0x120/0x13c)
> [??? 7.234325] [<c1001738>] (mount_root) from [<c10018ac>]
> (prepare_namespace+0x158/0x194)
> [??? 7.246751] [<c10018ac>] (prepare_namespace) from [<c0ab7684>]
> (kernel_init+0x8/0x118)
> [??? 7.259086] [<c0ab7684>] (kernel_init) from [<c0100114>]
> (ret_from_fork+0x14/0x20)tatic void __init populate_initrd_image(char *err)
>
> Best regards
> --
> Marek Szyprowski, PhD
> Samsung R&D Institute Poland
---end quoted text---
On Sat, Jul 18, 2020 at 12:00:35PM +0200, Christoph Hellwig wrote:
> On Fri, Jul 17, 2020 at 10:55:48PM +0200, Marek Szyprowski wrote:
> > Hi Christoph,
> >
> > On 14.07.2020 21:04, Christoph Hellwig wrote:
> > > Just use d_genocide instead of iterating through the root directory with
> > > cumbersome userspace-like APIs. This also ensures we actually remove files
> > > that are not direct children of the root entry, which the old code failed
> > > to do.
> > >
> > > Fixes: df52092f3c97 ("fastboot: remove duplicate unpack_to_rootfs()")
> > > Signed-off-by: Christoph Hellwig <[email protected]>
> >
> > This patch breaks initrd support ;-(
> >
> > I use initrd to deploy kernel modules on my test machines. It was
> > automatically mounted on /initrd. /lib/modules is just a symlink to
> > /initrd. I know that initrd support is marked as deprecated, but it
> > would be really nice to give people some time to update their machines
> > before breaking the stuff.
>
> Looks like your setup did rely on the /dev/ notes from the built-in
> initramfs to be preserved.
>
> Can you comment out the call to d_genocide? It seems like for your
> the fact that clean_rootfs didn't actually clean up was a feature and
> not a bug.
>
> I guess the old, pre-2008 code also wouldn't have worked for you in
> that case.
Did you get a chance to try this?
It was <2020-07-23 czw 11:22>, when Christoph Hellwig wrote:
> On Sat, Jul 18, 2020 at 12:00:35PM +0200, Christoph Hellwig wrote:
>> On Fri, Jul 17, 2020 at 10:55:48PM +0200, Marek Szyprowski wrote:
>>> On 14.07.2020 21:04, Christoph Hellwig wrote:
>>>> Just use d_genocide instead of iterating through the root directory
>>>> with cumbersome userspace-like APIs. This also ensures we actually
>>>> remove files that are not direct children of the root entry, which
>>>> the old code failed to do.
>>>>
>>>> Fixes: df52092f3c97 ("fastboot: remove duplicate unpack_to_rootfs()")
>>>> Signed-off-by: Christoph Hellwig <[email protected]>
>>>>
>>> This patch breaks initrd support ;-(
>>>
>>> I use initrd to deploy kernel modules on my test machines. It was
>>> automatically mounted on /initrd. /lib/modules is just a symlink to
>>> /initrd. I know that initrd support is marked as deprecated, but it
>>> would be really nice to give people some time to update their
>>> machines before breaking the stuff.
>>
>> Looks like your setup did rely on the /dev/ notes from the built-in
>> initramfs to be preserved.
Our initrd image contains only the modules directory and 5.8.0-rc5-next-20200717
in it.
>> Can you comment out the call to d_genocide? It seems like for your
>> the fact that clean_rootfs didn't actually clean up was a feature and
>> not a bug.
>>
>> I guess the old, pre-2008 code also wouldn't have worked for you in
>> that case.
>
> Did you get a chance to try this?
Indeed, commenting out d_genocide() helps.
Kind regards,
ŁS
PS. Marek is currently out of office.
--
Łukasz Stelmach
Samsung R&D Institute Poland
Samsung Electronics
On Thu, Jul 23, 2020 at 04:25:34PM +0200, Lukasz Stelmach wrote:
> >> Can you comment out the call to d_genocide? It seems like for your
> >> the fact that clean_rootfs didn't actually clean up was a feature and
> >> not a bug.
> >>
> >> I guess the old, pre-2008 code also wouldn't have worked for you in
> >> that case.
> >
> > Did you get a chance to try this?
>
> Indeed, commenting out d_genocide() helps.
So given that people have relied on at least the basic device nodes
like /dev/console to not go away since 2008, I wonder if we should just
remove clean_rootfs entirely
Linus, Al?
On Tue, Jul 14, 2020 at 09:04:16PM +0200, Christoph Hellwig wrote:
> static int __init
> -identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
> +identify_ramdisk_image(struct file *file, int start_block,
> + decompress_fn *decompressor)
> {
....
> - ksys_lseek(fd, start_block * BLOCK_SIZE, 0);
> kfree(buf);
> return nblocks;
> }
You do realize that you've changed behaviour of that thing if start_block != 0?
Old one used to leave the things for subsequent reads to start at start_block * 512;
new one will ignore that. So after
> - nblocks = identify_ramdisk_image(in_fd, rd_image_start, &decompressor);
> + nblocks = identify_ramdisk_image(in_file, rd_image_start, &decompressor);
you'll have in_file->f_pos left at 0 instead of rd_image_start * 512.
... affecting this
> - if (crd_load(in_fd, out_fd, decompressor) == 0)
> + if (crd_load(in_file, out_file, decompressor) == 0)
... and this
> - ksys_read(in_fd, buf, BLOCK_SIZE);
> - ksys_write(out_fd, buf, BLOCK_SIZE);
> + kernel_read(in_file, buf, BLOCK_SIZE, &in_file->f_pos);
> + kernel_write(out_file, buf, BLOCK_SIZE, &out_file->f_pos);
FWIW, I would suggest *not* bothering with ->f_pos and using two global
(well, file-static, obviously) variables instead. And kill 'pos' in
identify_ramdisk_image() as well - use the in_pos instead.
On Thu, Jul 23, 2020 at 04:27:34PM +0200, Christoph Hellwig wrote:
> On Thu, Jul 23, 2020 at 04:25:34PM +0200, Lukasz Stelmach wrote:
> > >> Can you comment out the call to d_genocide? It seems like for your
> > >> the fact that clean_rootfs didn't actually clean up was a feature and
> > >> not a bug.
> > >>
> > >> I guess the old, pre-2008 code also wouldn't have worked for you in
> > >> that case.
> > >
> > > Did you get a chance to try this?
> >
> > Indeed, commenting out d_genocide() helps.
>
> So given that people have relied on at least the basic device nodes
> like /dev/console to not go away since 2008, I wonder if we should just
> remove clean_rootfs entirely
>
> Linus, Al?
First of all, d_genocide() is simply wrong here from VFS point of view. _IF_
you want recursive removal, you need simple_recursive_remove(path.dentry, NULL).
And it's a userland-visible change of behaviour.
As for removal of clean_rootfs()... FWIW, the odds of an image that would
eventually fail accidentally getting past the signature mismatch check are
fairly low. I've no idea what scenario the author of that thing used to have;
that would be Shaohua Li <[email protected]>. Cc'd...
On Mon, Jul 27, 2020 at 03:41:49AM +0100, Al Viro wrote:
> On Thu, Jul 23, 2020 at 04:27:34PM +0200, Christoph Hellwig wrote:
> > On Thu, Jul 23, 2020 at 04:25:34PM +0200, Lukasz Stelmach wrote:
> > > >> Can you comment out the call to d_genocide? It seems like for your
> > > >> the fact that clean_rootfs didn't actually clean up was a feature and
> > > >> not a bug.
> > > >>
> > > >> I guess the old, pre-2008 code also wouldn't have worked for you in
> > > >> that case.
> > > >
> > > > Did you get a chance to try this?
> > >
> > > Indeed, commenting out d_genocide() helps.
> >
> > So given that people have relied on at least the basic device nodes
> > like /dev/console to not go away since 2008, I wonder if we should just
> > remove clean_rootfs entirely
> >
> > Linus, Al?
>
> First of all, d_genocide() is simply wrong here from VFS point of view. _IF_
> you want recursive removal, you need simple_recursive_remove(path.dentry, NULL).
> And it's a userland-visible change of behaviour.
>
> As for removal of clean_rootfs()... FWIW, the odds of an image that would
> eventually fail accidentally getting past the signature mismatch check are
> fairly low. I've no idea what scenario the author of that thing used to have;
> that would be Shaohua Li <[email protected]>. Cc'd...
Shaohua is now at Facebook.
On Tue, Jul 14, 2020 at 09:04:21PM +0200, Christoph Hellwig wrote:
> - ssize_t rv = ksys_write(fd, p, count);
> + ssize_t rv = kernel_write(file, p, count, &file->f_pos);
No. Sure, that'll work for ramfs with nobody else playing with those.
However, this is the wrong way to do such things; do *NOT* pass the
address of file->f_pos to anything. The few places that still do that
are wrong.
As a general rule, ->read() and ->write() instances should never be
given &file->f_pos. Address of a local variable - sure, no problem.
Copy it back into ->f_pos when they are done? Also fine. But not
this,
Keep that offset in a variable (static in file, argument of xwrite(),
whatever).
On Tue, Jul 14, 2020 at 09:04:22PM +0200, Christoph Hellwig wrote:
> Don't rely on the implicit set_fs(KERNEL_DS) for ksys_open to work, but
> instead open a struct file for /dev/console and then install it as FD
> 0/1/2 manually.
I really hate that one. Every time we exposed the internal details to
the fucking early init code, we paid for that afterwards. And this
goes over the top wrt the level of details being exposed.
_IF_ you want to keep that thing, move it to fs/file.c, with dire comment
re that being very special shite for init and likely cause of subsequent
trouble whenever anything gets changed, a gnat farts somewhere, etc.
Do not leave that kind of crap sitting around init/*.c; KERNEL_DS
may be a source of occasional PITA, but here you are trading it for a lot
worse one in the future.
On Mon, Jul 27, 2020 at 04:05:34AM +0100, Al Viro wrote:
> On Tue, Jul 14, 2020 at 09:04:22PM +0200, Christoph Hellwig wrote:
> > Don't rely on the implicit set_fs(KERNEL_DS) for ksys_open to work, but
> > instead open a struct file for /dev/console and then install it as FD
> > 0/1/2 manually.
>
> I really hate that one. Every time we exposed the internal details to
> the fucking early init code, we paid for that afterwards. And this
> goes over the top wrt the level of details being exposed.
>
> _IF_ you want to keep that thing, move it to fs/file.c, with dire comment
> re that being very special shite for init and likely cause of subsequent
> trouble whenever anything gets changed, a gnat farts somewhere, etc.
Err, while I'm all for keeping internals internal, fd_install and
get_unused_fd_flags are exported routines with tons of users of this
pattern all over.
On Mon, Jul 27, 2020 at 07:46:25AM +0200, Christoph Hellwig wrote:
> On Mon, Jul 27, 2020 at 04:05:34AM +0100, Al Viro wrote:
> > On Tue, Jul 14, 2020 at 09:04:22PM +0200, Christoph Hellwig wrote:
> > > Don't rely on the implicit set_fs(KERNEL_DS) for ksys_open to work, but
> > > instead open a struct file for /dev/console and then install it as FD
> > > 0/1/2 manually.
> >
> > I really hate that one. Every time we exposed the internal details to
> > the fucking early init code, we paid for that afterwards. And this
> > goes over the top wrt the level of details being exposed.
> >
> > _IF_ you want to keep that thing, move it to fs/file.c, with dire comment
> > re that being very special shite for init and likely cause of subsequent
> > trouble whenever anything gets changed, a gnat farts somewhere, etc.
>
> Err, while I'm all for keeping internals internal, fd_install and
> get_unused_fd_flags are exported routines with tons of users of this
> pattern all over.
get_file_rcu_many()? All over the place? Besides, that's _not_ the normal
pattern for get_unused_fd() - there's a very special reason we don't expect
an error from it here.
On July 26, 2020 8:05:34 PM PDT, Al Viro <[email protected]> wrote:
>On Tue, Jul 14, 2020 at 09:04:22PM +0200, Christoph Hellwig wrote:
>> Don't rely on the implicit set_fs(KERNEL_DS) for ksys_open to work,
>but
>> instead open a struct file for /dev/console and then install it as FD
>> 0/1/2 manually.
>
>I really hate that one. Every time we exposed the internal details to
>the fucking early init code, we paid for that afterwards. And this
>goes over the top wrt the level of details being exposed.
>
>_IF_ you want to keep that thing, move it to fs/file.c, with dire
>comment
>re that being very special shite for init and likely cause of
>subsequent
>trouble whenever anything gets changed, a gnat farts somewhere, etc.
>
> Do not leave that kind of crap sitting around init/*.c; KERNEL_DS
>may be a source of occasional PITA, but here you are trading it for a
>lot
>worse one in the future.
Okay... here is a perhaps idiotic idea... even if we don't want to run stuff in actual user space, could we map initramfs into user space memory before running init (execing init will tear down those mappings anyway) so that we don't need KERNEL_DS at least?
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.
On Sun, Jul 26, 2020 at 11:20:41PM -0700, [email protected] wrote:
> On July 26, 2020 8:05:34 PM PDT, Al Viro <[email protected]> wrote:
> >On Tue, Jul 14, 2020 at 09:04:22PM +0200, Christoph Hellwig wrote:
> >> Don't rely on the implicit set_fs(KERNEL_DS) for ksys_open to work,
> >but
> >> instead open a struct file for /dev/console and then install it as FD
> >> 0/1/2 manually.
> >
> >I really hate that one. Every time we exposed the internal details to
> >the fucking early init code, we paid for that afterwards. And this
> >goes over the top wrt the level of details being exposed.
> >
> >_IF_ you want to keep that thing, move it to fs/file.c, with dire
> >comment
> >re that being very special shite for init and likely cause of
> >subsequent
> >trouble whenever anything gets changed, a gnat farts somewhere, etc.
> >
> > Do not leave that kind of crap sitting around init/*.c; KERNEL_DS
> >may be a source of occasional PITA, but here you are trading it for a
> >lot
> >worse one in the future.
>
> Okay... here is a perhaps idiotic idea... even if we don't want to run stuff in actual user space, could we map initramfs into user space memory before running init (execing init will tear down those mappings anyway) so that we don't need KERNEL_DS at least?
Err, why? The changes have been pretty simple, and I'd rather not come
up with new crazy ways just to make things complicated.
On July 26, 2020 11:24:25 PM PDT, Christoph Hellwig <[email protected]> wrote:
>On Sun, Jul 26, 2020 at 11:20:41PM -0700, [email protected] wrote:
>> On July 26, 2020 8:05:34 PM PDT, Al Viro <[email protected]>
>wrote:
>> >On Tue, Jul 14, 2020 at 09:04:22PM +0200, Christoph Hellwig wrote:
>> >> Don't rely on the implicit set_fs(KERNEL_DS) for ksys_open to
>work,
>> >but
>> >> instead open a struct file for /dev/console and then install it as
>FD
>> >> 0/1/2 manually.
>> >
>> >I really hate that one. Every time we exposed the internal details
>to
>> >the fucking early init code, we paid for that afterwards. And this
>> >goes over the top wrt the level of details being exposed.
>> >
>> >_IF_ you want to keep that thing, move it to fs/file.c, with dire
>> >comment
>> >re that being very special shite for init and likely cause of
>> >subsequent
>> >trouble whenever anything gets changed, a gnat farts somewhere, etc.
>> >
>> > Do not leave that kind of crap sitting around init/*.c; KERNEL_DS
>> >may be a source of occasional PITA, but here you are trading it for
>a
>> >lot
>> >worse one in the future.
>>
>> Okay... here is a perhaps idiotic idea... even if we don't want to
>run stuff in actual user space, could we map initramfs into user space
>memory before running init (execing init will tear down those mappings
>anyway) so that we don't need KERNEL_DS at least?
>
>Err, why? The changes have been pretty simple, and I'd rather not come
>up with new crazy ways just to make things complicated.
Why? To avoid this neverending avalanche of special interfaces and layering violations. Neatly deals with non-contiguous contents and initramfs in device memory, etc. etc. etc.
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.
On Mon, Jul 27, 2020 at 07:03:22AM +0100, Al Viro wrote:
> On Mon, Jul 27, 2020 at 07:46:25AM +0200, Christoph Hellwig wrote:
> > On Mon, Jul 27, 2020 at 04:05:34AM +0100, Al Viro wrote:
> > > On Tue, Jul 14, 2020 at 09:04:22PM +0200, Christoph Hellwig wrote:
> > > > Don't rely on the implicit set_fs(KERNEL_DS) for ksys_open to work, but
> > > > instead open a struct file for /dev/console and then install it as FD
> > > > 0/1/2 manually.
> > >
> > > I really hate that one. Every time we exposed the internal details to
> > > the fucking early init code, we paid for that afterwards. And this
> > > goes over the top wrt the level of details being exposed.
> > >
> > > _IF_ you want to keep that thing, move it to fs/file.c, with dire comment
> > > re that being very special shite for init and likely cause of subsequent
> > > trouble whenever anything gets changed, a gnat farts somewhere, etc.
> >
> > Err, while I'm all for keeping internals internal, fd_install and
> > get_unused_fd_flags are exported routines with tons of users of this
> > pattern all over.
>
> get_file_rcu_many()? All over the place? Besides, that's _not_ the normal
> pattern for get_unused_fd() - there's a very special reason we don't expect
> an error from it here.
Oh well. I can add an init_dup2, but that should probably go after
the series adding fs/for-init.c or fs/init.c. I'll skip it for the
current set of fixups and will send it once we have a stable branch for
that.
On Sun, Jul 26, 2020 at 11:36:15PM -0700, [email protected] wrote:
> >Err, why? The changes have been pretty simple, and I'd rather not come
> >up with new crazy ways just to make things complicated.
>
> Why? To avoid this neverending avalanche of special interfaces and layering violations. Neatly deals with non-contiguous contents and initramfs in device memory, etc. etc. etc.
I don't think it will be all that simple. But given that linux-next
is just missing one series Al was already ok with to kill off set_fs
entirely for about half of our architectures I'd rather go ahead with
this series. If you can send a series mapping user memory that actually
cleans things up on top of it I'm not going to complain, but I'm not
sure it really is going to be all that much cleaner.
On Mon, Jul 27, 2020 at 04:54:53PM +0100, Al Viro wrote:
> On Mon, Jul 27, 2020 at 08:48:28AM +0200, Christoph Hellwig wrote:
> > On Mon, Jul 27, 2020 at 07:03:22AM +0100, Al Viro wrote:
> > > On Mon, Jul 27, 2020 at 07:46:25AM +0200, Christoph Hellwig wrote:
> > > > On Mon, Jul 27, 2020 at 04:05:34AM +0100, Al Viro wrote:
> > > > > On Tue, Jul 14, 2020 at 09:04:22PM +0200, Christoph Hellwig wrote:
> > > > > > Don't rely on the implicit set_fs(KERNEL_DS) for ksys_open to work, but
> > > > > > instead open a struct file for /dev/console and then install it as FD
> > > > > > 0/1/2 manually.
> > > > >
> > > > > I really hate that one. Every time we exposed the internal details to
> > > > > the fucking early init code, we paid for that afterwards. And this
> > > > > goes over the top wrt the level of details being exposed.
> > > > >
> > > > > _IF_ you want to keep that thing, move it to fs/file.c, with dire comment
> > > > > re that being very special shite for init and likely cause of subsequent
> > > > > trouble whenever anything gets changed, a gnat farts somewhere, etc.
> > > >
> > > > Err, while I'm all for keeping internals internal, fd_install and
> > > > get_unused_fd_flags are exported routines with tons of users of this
> > > > pattern all over.
> > >
> > > get_file_rcu_many()? All over the place? Besides, that's _not_ the normal
> > > pattern for get_unused_fd() - there's a very special reason we don't expect
> > > an error from it here.
> >
> > Oh well. I can add an init_dup2, but that should probably go after
> > the series adding fs/for-init.c or fs/init.c. I'll skip it for the
> > current set of fixups and will send it once we have a stable branch for
> > that.
>
> OK. The really serious ones are around f_pos uses and d_genocide() one.
> FWIW, cleanup_rootfs() should probably be removed - it looks rather
> pointless.
I've got all that in a series I've tested this morning, and which
I really should post now..
On Mon, Jul 27, 2020 at 08:48:28AM +0200, Christoph Hellwig wrote:
> On Mon, Jul 27, 2020 at 07:03:22AM +0100, Al Viro wrote:
> > On Mon, Jul 27, 2020 at 07:46:25AM +0200, Christoph Hellwig wrote:
> > > On Mon, Jul 27, 2020 at 04:05:34AM +0100, Al Viro wrote:
> > > > On Tue, Jul 14, 2020 at 09:04:22PM +0200, Christoph Hellwig wrote:
> > > > > Don't rely on the implicit set_fs(KERNEL_DS) for ksys_open to work, but
> > > > > instead open a struct file for /dev/console and then install it as FD
> > > > > 0/1/2 manually.
> > > >
> > > > I really hate that one. Every time we exposed the internal details to
> > > > the fucking early init code, we paid for that afterwards. And this
> > > > goes over the top wrt the level of details being exposed.
> > > >
> > > > _IF_ you want to keep that thing, move it to fs/file.c, with dire comment
> > > > re that being very special shite for init and likely cause of subsequent
> > > > trouble whenever anything gets changed, a gnat farts somewhere, etc.
> > >
> > > Err, while I'm all for keeping internals internal, fd_install and
> > > get_unused_fd_flags are exported routines with tons of users of this
> > > pattern all over.
> >
> > get_file_rcu_many()? All over the place? Besides, that's _not_ the normal
> > pattern for get_unused_fd() - there's a very special reason we don't expect
> > an error from it here.
>
> Oh well. I can add an init_dup2, but that should probably go after
> the series adding fs/for-init.c or fs/init.c. I'll skip it for the
> current set of fixups and will send it once we have a stable branch for
> that.
OK. The really serious ones are around f_pos uses and d_genocide() one.
FWIW, cleanup_rootfs() should probably be removed - it looks rather
pointless.