Hi,
This is V3 of patches. Christoph had posted V2 here.
https://lore.kernel.org/linux-fsdevel/[email protected]/
There was a small issue in last patch series that list_bdev_fs_names()
did not put an extra '\0' at the end as current callers were expecting.
To fix this, I have modified list_bdev_fs_names() and split_fs_names()
to return number of null terminated strings they have parsed. And
modified callers to use that to loop through strings (instead of
relying on an extra null at the end).
Christoph was finding it hard to find time so I took his patches,
added my changes in patch3 and reposting the patch series.
I have tested it with 9p, virtiofs and ext4 filesystems as rootfs
and it works for me.
Thanks
Vivek
Christoph Hellwig (3):
init: split get_fs_names
init: allow mounting arbitrary non-blockdevice filesystems as root
fs: simplify get_filesystem_list / get_all_fs_names
fs/filesystems.c | 27 ++++++++------
include/linux/fs.h | 2 +-
init/do_mounts.c | 90 +++++++++++++++++++++++++++++++++-------------
3 files changed, 83 insertions(+), 36 deletions(-)
--
2.31.1
From: Christoph Hellwig <[email protected]>
Just output the '\0' separate list of supported file systems for block
devices directly rather than going through a pointless round of string
manipulation.
Based on an earlier patch from Al Viro <[email protected]>.
Vivek:
Modified list_bdev_fs_names() and split_fs_names() to return number of
null terminted strings to caller. Callers now use that information to
loop through all the strings instead of relying on one extra null char
being present at the end.
Signed-off-by: Christoph Hellwig <[email protected]>
Signed-off-by: Vivek Goyal <[email protected]>
---
fs/filesystems.c | 27 +++++++++++++++----------
include/linux/fs.h | 2 +-
init/do_mounts.c | 49 ++++++++++++++++++++--------------------------
3 files changed, 39 insertions(+), 39 deletions(-)
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 90b8d879fbaf..58b9067b2391 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -209,21 +209,28 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2)
}
#endif
-int __init get_filesystem_list(char *buf)
+int __init list_bdev_fs_names(char *buf, size_t size)
{
- int len = 0;
- struct file_system_type * tmp;
+ struct file_system_type *p;
+ size_t len;
+ int count = 0;
read_lock(&file_systems_lock);
- tmp = file_systems;
- while (tmp && len < PAGE_SIZE - 80) {
- len += sprintf(buf+len, "%s\t%s\n",
- (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
- tmp->name);
- tmp = tmp->next;
+ for (p = file_systems; p; p = p->next) {
+ if (!(p->fs_flags & FS_REQUIRES_DEV))
+ continue;
+ len = strlen(p->name) + 1;
+ if (len > size) {
+ pr_warn("%s: truncating file system list\n", __func__);
+ break;
+ }
+ memcpy(buf, p->name, len);
+ buf += len;
+ size -= len;
+ count++;
}
read_unlock(&file_systems_lock);
- return len;
+ return count;
}
#ifdef CONFIG_PROC_FS
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 640574294216..c76dfc01cf9d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3622,7 +3622,7 @@ int proc_nr_dentry(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
int proc_nr_inodes(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-int __init get_filesystem_list(char *buf);
+int __init list_bdev_fs_names(char *buf, size_t size);
#define __FMODE_EXEC ((__force int) FMODE_EXEC)
#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY)
diff --git a/init/do_mounts.c b/init/do_mounts.c
index bdeb90b8d669..9b4a1f877e47 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -338,32 +338,22 @@ __setup("rootflags=", root_data_setup);
__setup("rootfstype=", fs_names_setup);
__setup("rootdelay=", root_delay_setup);
-static void __init split_fs_names(char *page, char *names)
+static int __init split_fs_names(char *page, char *names)
{
- strcpy(page, root_fs_names);
- while (*page++) {
- if (page[-1] == ',')
- page[-1] = '\0';
- }
- *page = '\0';
-}
-
-static void __init get_all_fs_names(char *page)
-{
- int len = get_filesystem_list(page);
- char *s = page, *p, *next;
+ int count = 0;
+ char *p = page;
- page[len] = '\0';
- for (p = page - 1; p; p = next) {
- next = strchr(++p, '\n');
- if (*p++ != '\t')
- continue;
- while ((*s++ = *p++) != '\n')
- ;
- s[-1] = '\0';
+ strcpy(p, root_fs_names);
+ while (*p++) {
+ if (p[-1] == ',')
+ p[-1] = '\0';
}
+ *p = '\0';
+
+ for (p = page; *p; p += strlen(p)+1)
+ count++;
- *s = '\0';
+ return count;
}
static int __init do_mount_root(const char *name, const char *fs,
@@ -409,15 +399,16 @@ void __init mount_block_root(char *name, int flags)
char *fs_names = page_address(page);
char *p;
char b[BDEVNAME_SIZE];
+ int num_fs, i;
scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
if (root_fs_names)
- split_fs_names(fs_names, root_fs_names);
+ num_fs = split_fs_names(fs_names, root_fs_names);
else
- get_all_fs_names(fs_names);
+ num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE);
retry:
- for (p = fs_names; *p; p += strlen(p)+1) {
+ for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1) {
int err = do_mount_root(name, p, flags, root_mount_data);
switch (err) {
case 0:
@@ -450,7 +441,7 @@ void __init mount_block_root(char *name, int flags)
printk("List of all partitions:\n");
printk_all_partitions();
printk("No filesystem could mount root, tried: ");
- for (p = fs_names; *p; p += strlen(p)+1)
+ for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1)
printk(" %s", p);
printk("\n");
panic("VFS: Unable to mount root fs on %s", b);
@@ -551,13 +542,15 @@ static int __init mount_nodev_root(void)
{
char *fs_names, *fstype;
int err = -EINVAL;
+ int num_fs, i;
fs_names = (void *)__get_free_page(GFP_KERNEL);
if (!fs_names)
return -EINVAL;
- split_fs_names(fs_names, root_fs_names);
+ num_fs = split_fs_names(fs_names, root_fs_names);
- for (fstype = fs_names; *fstype; fstype += strlen(fstype) + 1) {
+ for (i = 0, fstype = fs_names; i < num_fs;
+ i++, fstype += strlen(fstype) + 1) {
if (!fs_is_nodev(fstype))
continue;
err = do_mount_root(root_device_name, fstype, root_mountflags,
--
2.31.1
From: Christoph Hellwig <[email protected]>
Currently the only non-blockdevice filesystems that can be used as the
initial root filesystem are NFS and CIFS, which use the magic
"root=/dev/nfs" and "root=/dev/cifs" syntax that requires the root
device file system details to come from filesystem specific kernel
command line options.
Add a little bit of new code that allows to just pass arbitrary
string mount options to any non-blockdevice filesystems so that it can
be mounted as the root file system.
For example a virtiofs root file system can be mounted using the
following syntax:
"root=myfs rootfstype=virtiofs rw"
Based on an earlier patch from Vivek Goyal <[email protected]>.
Signed-off-by: Christoph Hellwig <[email protected]>
---
init/do_mounts.c | 43 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 43 insertions(+)
diff --git a/init/do_mounts.c b/init/do_mounts.c
index ec32de3ad52b..bdeb90b8d669 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -534,6 +534,45 @@ static int __init mount_cifs_root(void)
}
#endif
+static bool __init fs_is_nodev(char *fstype)
+{
+ struct file_system_type *fs = get_fs_type(fstype);
+ bool ret = false;
+
+ if (fs) {
+ ret = !(fs->fs_flags & FS_REQUIRES_DEV);
+ put_filesystem(fs);
+ }
+
+ return ret;
+}
+
+static int __init mount_nodev_root(void)
+{
+ char *fs_names, *fstype;
+ int err = -EINVAL;
+
+ fs_names = (void *)__get_free_page(GFP_KERNEL);
+ if (!fs_names)
+ return -EINVAL;
+ split_fs_names(fs_names, root_fs_names);
+
+ for (fstype = fs_names; *fstype; fstype += strlen(fstype) + 1) {
+ if (!fs_is_nodev(fstype))
+ continue;
+ err = do_mount_root(root_device_name, fstype, root_mountflags,
+ root_mount_data);
+ if (!err)
+ break;
+ if (err != -EACCES && err != -EINVAL)
+ panic("VFS: Unable to mount root \"%s\" (%s), err=%d\n",
+ root_device_name, fstype, err);
+ }
+
+ free_page((unsigned long)fs_names);
+ return err;
+}
+
void __init mount_root(void)
{
#ifdef CONFIG_ROOT_NFS
@@ -550,6 +589,10 @@ void __init mount_root(void)
return;
}
#endif
+ if (ROOT_DEV == 0 && root_device_name && root_fs_names) {
+ if (mount_nodev_root() == 0)
+ return;
+ }
#ifdef CONFIG_BLOCK
{
int err = create_dev("/dev/root", ROOT_DEV);
--
2.31.1
From: Christoph Hellwig <[email protected]>
Split get_fs_names into one function that splits up the command line
argument, and one that gets the list of all registered file systems.
Signed-off-by: Christoph Hellwig <[email protected]>
---
init/do_mounts.c | 48 ++++++++++++++++++++++++++----------------------
1 file changed, 26 insertions(+), 22 deletions(-)
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 74aede860de7..ec32de3ad52b 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -338,30 +338,31 @@ __setup("rootflags=", root_data_setup);
__setup("rootfstype=", fs_names_setup);
__setup("rootdelay=", root_delay_setup);
-static void __init get_fs_names(char *page)
+static void __init split_fs_names(char *page, char *names)
{
- char *s = page;
-
- if (root_fs_names) {
- strcpy(page, root_fs_names);
- while (*s++) {
- if (s[-1] == ',')
- s[-1] = '\0';
- }
- } else {
- int len = get_filesystem_list(page);
- char *p, *next;
+ strcpy(page, root_fs_names);
+ while (*page++) {
+ if (page[-1] == ',')
+ page[-1] = '\0';
+ }
+ *page = '\0';
+}
- page[len] = '\0';
- for (p = page-1; p; p = next) {
- next = strchr(++p, '\n');
- if (*p++ != '\t')
- continue;
- while ((*s++ = *p++) != '\n')
- ;
- s[-1] = '\0';
- }
+static void __init get_all_fs_names(char *page)
+{
+ int len = get_filesystem_list(page);
+ char *s = page, *p, *next;
+
+ page[len] = '\0';
+ for (p = page - 1; p; p = next) {
+ next = strchr(++p, '\n');
+ if (*p++ != '\t')
+ continue;
+ while ((*s++ = *p++) != '\n')
+ ;
+ s[-1] = '\0';
}
+
*s = '\0';
}
@@ -411,7 +412,10 @@ void __init mount_block_root(char *name, int flags)
scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
- get_fs_names(fs_names);
+ if (root_fs_names)
+ split_fs_names(fs_names, root_fs_names);
+ else
+ get_all_fs_names(fs_names);
retry:
for (p = fs_names; *p; p += strlen(p)+1) {
int err = do_mount_root(name, p, flags, root_mount_data);
--
2.31.1
On Wed, Jul 14, 2021 at 04:23:19PM -0400, Vivek Goyal wrote:
> From: Christoph Hellwig <[email protected]>
>
> Split get_fs_names into one function that splits up the command line
> argument, and one that gets the list of all registered file systems.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> init/do_mounts.c | 48 ++++++++++++++++++++++++++----------------------
> 1 file changed, 26 insertions(+), 22 deletions(-)
Reviewed-by: Stefan Hajnoczi <[email protected]>
On Wed, Jul 14, 2021 at 04:23:20PM -0400, Vivek Goyal wrote:
> From: Christoph Hellwig <[email protected]>
>
> Currently the only non-blockdevice filesystems that can be used as the
> initial root filesystem are NFS and CIFS, which use the magic
> "root=/dev/nfs" and "root=/dev/cifs" syntax that requires the root
> device file system details to come from filesystem specific kernel
> command line options.
>
> Add a little bit of new code that allows to just pass arbitrary
> string mount options to any non-blockdevice filesystems so that it can
> be mounted as the root file system.
>
> For example a virtiofs root file system can be mounted using the
> following syntax:
>
> "root=myfs rootfstype=virtiofs rw"
>
> Based on an earlier patch from Vivek Goyal <[email protected]>.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> init/do_mounts.c | 43 +++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 43 insertions(+)
Reviewed-by: Stefan Hajnoczi <[email protected]>
On Wed, Jul 14, 2021 at 04:23:21PM -0400, Vivek Goyal wrote:
> From: Christoph Hellwig <[email protected]>
>
> Just output the '\0' separate list of supported file systems for block
> devices directly rather than going through a pointless round of string
> manipulation.
>
> Based on an earlier patch from Al Viro <[email protected]>.
>
> Vivek:
> Modified list_bdev_fs_names() and split_fs_names() to return number of
> null terminted strings to caller. Callers now use that information to
> loop through all the strings instead of relying on one extra null char
> being present at the end.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Signed-off-by: Vivek Goyal <[email protected]>
> ---
> fs/filesystems.c | 27 +++++++++++++++----------
> include/linux/fs.h | 2 +-
> init/do_mounts.c | 49 ++++++++++++++++++++--------------------------
> 3 files changed, 39 insertions(+), 39 deletions(-)
Reviewed-by: Stefan Hajnoczi <[email protected]>
On Wed, Jul 14, 2021 at 04:23:18PM -0400, Vivek Goyal wrote:
> Hi,
>
> This is V3 of patches. Christoph had posted V2 here.
Hi,
Ping?
Vivek
>
> https://lore.kernel.org/linux-fsdevel/[email protected]/
>
> There was a small issue in last patch series that list_bdev_fs_names()
> did not put an extra '\0' at the end as current callers were expecting.
>
> To fix this, I have modified list_bdev_fs_names() and split_fs_names()
> to return number of null terminated strings they have parsed. And
> modified callers to use that to loop through strings (instead of
> relying on an extra null at the end).
>
> Christoph was finding it hard to find time so I took his patches,
> added my changes in patch3 and reposting the patch series.
>
> I have tested it with 9p, virtiofs and ext4 filesystems as rootfs
> and it works for me.
>
> Thanks
> Vivek
>
> Christoph Hellwig (3):
> init: split get_fs_names
> init: allow mounting arbitrary non-blockdevice filesystems as root
> fs: simplify get_filesystem_list / get_all_fs_names
>
> fs/filesystems.c | 27 ++++++++------
> include/linux/fs.h | 2 +-
> init/do_mounts.c | 90 +++++++++++++++++++++++++++++++++-------------
> 3 files changed, 83 insertions(+), 36 deletions(-)
>
> --
> 2.31.1
>
On Wed, Jul 14, 2021 at 04:23:18PM -0400, Vivek Goyal wrote:
> Hi,
>
> This is V3 of patches. Christoph had posted V2 here.
>
> https://lore.kernel.org/linux-fsdevel/[email protected]/
>
> There was a small issue in last patch series that list_bdev_fs_names()
> did not put an extra '\0' at the end as current callers were expecting.
>
> To fix this, I have modified list_bdev_fs_names() and split_fs_names()
> to return number of null terminated strings they have parsed. And
> modified callers to use that to loop through strings (instead of
> relying on an extra null at the end).
>
> Christoph was finding it hard to find time so I took his patches,
> added my changes in patch3 and reposting the patch series.
>
> I have tested it with 9p, virtiofs and ext4 filesystems as rootfs
> and it works for me.
lgtm,
Acked-by: Christian Brauner <[email protected]>
On Wed, Jul 14, 2021 at 04:23:21PM -0400, Vivek Goyal wrote:
> +static int __init split_fs_names(char *page, char *names)
> {
> + int count = 0;
> + char *p = page;
>
> + strcpy(p, root_fs_names);
> + while (*p++) {
> + if (p[-1] == ',')
> + p[-1] = '\0';
> }
> + *p = '\0';
> +
> + for (p = page; *p; p += strlen(p)+1)
> + count++;
>
> + return count;
> }
Ummm.... The last part makes no sense - it counts '\0' in the array
pointed to be page, until the first double '\0' in there. All of
which had been put there by the loop immediately prior to that one...
Incidentally, it treats stray ,, in root_fs_names as termination;
is that intentional?
On Fri, Jul 30, 2021 at 12:57:07AM +0000, Al Viro wrote:
> On Wed, Jul 14, 2021 at 04:23:21PM -0400, Vivek Goyal wrote:
>
> > +static int __init split_fs_names(char *page, char *names)
> > {
> > + int count = 0;
> > + char *p = page;
> >
> > + strcpy(p, root_fs_names);
> > + while (*p++) {
> > + if (p[-1] == ',')
> > + p[-1] = '\0';
> > }
> > + *p = '\0';
> > +
> > + for (p = page; *p; p += strlen(p)+1)
> > + count++;
> >
> > + return count;
> > }
>
> Ummm.... The last part makes no sense - it counts '\0' in the array
> pointed to be page, until the first double '\0' in there. All of
> which had been put there by the loop immediately prior to that one...
I want split_fs_names() to replace ',' with space as well as return
number of null terminated strings found. So first loop just replaces
',' with '\0' and second loop counts number of strings.
Previously split_fs_names() was only replacing ',' with '\0'. Now
we are changing the semantics and returning number of strings
left in the buffer after the replacement.
I initilaly thought that if I can manage it with single loop but
there were quite a few corner cases. So I decided to use two
loops instead. One for replacement and one for counting.
>
> Incidentally, it treats stray ,, in root_fs_names as termination;
> is that intentional?
Just trying to keep the existing behavior. Existing get_fs_names(), also
replaces all instances of ',' with '\0'. So if there are two consecutive,
',', that will result in two consecutive '\0' and caller will view
it as end of buffer.
IOW, rootfsnames=foo,,bar will effectively be treated as "rootfsname=foo".
That's the current behavior and I did not try to improve on it just
keeps on increasing the size of patches. That's probably an improvement
for some other day if somebody cares.
Thanks
Vivek