From: Mark Doffman <[email protected]>
Hi All,
The following patch series adds the ability to use a ceph distributed
file system as the root device. The functionality is similar to
NFS root but for the ceph filesystem.
The patch series adds a new root device option '/dev/ceph'.
Configuration is passed either via a new kernel parameter 'cephroot'
or DHCP option 17.
The patch series applies to v3.12-10714-gd085eb6
Thanks
Mark
Mark Doffman (3):
init: Add a new root device option, the Ceph file system
cephroot: Add DHCP option 17 configuration to ceph root fs.
Reuse root_nfs_parse_addr() for NFS and CEPH
Rob Taylor (1):
Documentation: Document the cephroot functionality
Documentation/filesystems/{ => ceph}/ceph.txt | 0
Documentation/filesystems/ceph/cephroot.txt | 85 +++++++++++++
fs/ceph/Kconfig | 10 ++
fs/ceph/Makefile | 1 +
fs/ceph/root.c | 165 ++++++++++++++++++++++++++
fs/nfs/nfsroot.c | 4 +-
include/linux/ceph/ceph_root.h | 10 ++
include/linux/nfs_fs.h | 2 +-
include/linux/root_dev.h | 1 +
init/do_mounts.c | 32 ++++-
net/ipv4/ipconfig.c | 16 ++-
11 files changed, 317 insertions(+), 9 deletions(-)
rename Documentation/filesystems/{ => ceph}/ceph.txt (100%)
create mode 100644 Documentation/filesystems/ceph/cephroot.txt
create mode 100644 fs/ceph/root.c
create mode 100644 include/linux/ceph/ceph_root.h
--
1.8.4
On 11/21/2013 02:19 PM, David Dillow wrote:
> On Wed, 2013-11-20 at 20:13 -0600, [email protected] wrote:
>> The following patch series adds the ability to use a ceph distributed
>> file system as the root device. The functionality is similar to
>> NFS root but for the ceph filesystem.
>
> Why do this in the kernel vs in the initramfs? Dracut would be a more
> appropriate place, I think, as it has support for other network root
> devices already.
I don't know if its a good idea to require an initramfs for mounting a
rootfs over the network. NFS root is currently in the kernel, and
frequently used without an initramfs.
Booting from ceph using a root device is relatively simple compared to
generating an initramfs and it has the possibility of producing smaller
images for memory constrained devices.
From: Mark Doffman <[email protected]>
Rename function root_nfs_parse_addr to root_parse_server_addr
to reflect its use in parsing an address for both
NFS and CEPH root.
Signed-off-by: Mark Doffman <[email protected]>
Reviewed-by: Ian Molton <[email protected]>
---
fs/ceph/root.c | 6 +++---
fs/nfs/nfsroot.c | 4 ++--
include/linux/nfs_fs.h | 2 +-
net/ipv4/ipconfig.c | 6 +++---
4 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/fs/ceph/root.c b/fs/ceph/root.c
index 24b8dcf..17a942f 100644
--- a/fs/ceph/root.c
+++ b/fs/ceph/root.c
@@ -18,7 +18,7 @@
#include <linux/ceph/ceph_root.h>
/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
-extern __be32 root_nfs_parse_addr(char *name); /*__init*/
+extern __be32 root_parse_server_addr(char *name); /*__init*/
#define MAXPATHLEN 1024
@@ -91,10 +91,10 @@ static int __init ceph_root_setup(char *line)
strlcpy(ceph_root_params, line, sizeof(ceph_root_params));
/*
- * Note: root_nfs_parse_addr() removes the server-ip from
+ * root_parse_server_addr() removes the server-ip from
* ceph_root_params, if it exists.
*/
- root_server_addr = root_nfs_parse_addr(ceph_root_params);
+ root_server_addr = root_parse_server_addr(ceph_root_params);
return 1;
}
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index cd3c910..4e21716 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -144,10 +144,10 @@ static int __init nfs_root_setup(char *line)
* Extract the IP address of the NFS server containing our
* root file system, if one was specified.
*
- * Note: root_nfs_parse_addr() removes the server-ip from
+ * Note: root_parse_server_addr() removes the server-ip from
* nfs_root_parms, if it exists.
*/
- root_server_addr = root_nfs_parse_addr(nfs_root_parms);
+ root_server_addr = root_parse_server_addr(nfs_root_parms);
return 1;
}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 14a4820..0d1fb6e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -414,7 +414,7 @@ static inline void nfs_display_fhandle(const struct nfs_fh *fh,
*/
extern int nfs_root_data(char **root_device, char **root_data); /*__init*/
/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
-extern __be32 root_nfs_parse_addr(char *name); /*__init*/
+extern __be32 root_parse_server_addr(char *name); /*__init*/
/*
* linux/fs/nfs/file.c
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 765eea4..55a2864 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -140,7 +140,7 @@ __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */
__be32 ic_servaddr = NONE; /* Boot server IP address */
-__be32 root_server_addr = NONE; /* Address of NFS server */
+__be32 root_server_addr = NONE; /* Address of NFS or CEPH server */
u8 root_server_path[256] = { 0, }; /* Path to mount as root */
__be32 ic_dev_xid; /* Device under configuration */
@@ -1337,7 +1337,7 @@ static const struct file_operations pnp_seq_fops = {
* need to have root_server_addr set _before_ IPConfig gets called as it
* can override it.
*/
-__be32 __init root_nfs_parse_addr(char *name)
+__be32 __init root_parse_server_addr(char *name)
{
__be32 addr;
int octets = 0;
@@ -1491,7 +1491,7 @@ static int __init ip_auto_config(void)
ic_dev = ic_first_dev->dev;
}
- addr = root_nfs_parse_addr(root_server_path);
+ addr = root_parse_server_addr(root_server_path);
if (root_server_addr == NONE)
root_server_addr = addr;
--
1.8.4
From: Mark Doffman <[email protected]>
Analogous to NFS add a new root device option, the ability
to boot using the Ceph networked file system as the root fs.
This patch adds a new root device option '/dev/ceph' that
uses a ceph networked file system. File system parameters
are passed using a new kernel parameter: 'cephroot'.
The 'cephroot' parameters are very similar to 'nfsroot'.
Signed-off-by: Mark Doffman <[email protected]>
Reviewed-by: Ian Molton <[email protected]>
---
fs/ceph/Kconfig | 10 +++
fs/ceph/Makefile | 1 +
fs/ceph/root.c | 163 +++++++++++++++++++++++++++++++++++++++++
include/linux/ceph/ceph_root.h | 10 +++
include/linux/root_dev.h | 1 +
init/do_mounts.c | 32 +++++++-
6 files changed, 216 insertions(+), 1 deletion(-)
create mode 100644 fs/ceph/root.c
create mode 100644 include/linux/ceph/ceph_root.h
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index ac9a2ef..325e83d 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -25,3 +25,13 @@ config CEPH_FSCACHE
caching support for Ceph clients using FS-Cache
endif
+
+config ROOT_CEPH
+ bool "Root file system on Ceph FS"
+ depends on CEPH_FS=y && IP_PNP
+ help
+ If you want your system to mount its root file system via CEPH,
+ choose Y here. For details, read
+ <file:Documentation/filesystems/ceph/cephroot.txt>.
+
+ If unsure say N.
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index 32e3010..af2dcbf 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -10,3 +10,4 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
debugfs.o
ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
+ceph-$(CONFIG_ROOT_CEPH) += root.o
diff --git a/fs/ceph/root.c b/fs/ceph/root.c
new file mode 100644
index 0000000..bff67fb
--- /dev/null
+++ b/fs/ceph/root.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2012 Codethink Ltd. <[email protected]>
+ *
+ * This file is released under the GPL v2
+ *
+ * Allow a CephFS filesystem to be mounted as root.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/root_dev.h>
+#include <linux/in.h>
+#include <net/ipconfig.h>
+#include <linux/ceph/ceph_root.h>
+
+/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
+extern __be32 root_nfs_parse_addr(char *name); /*__init*/
+
+#define MAXPATHLEN 1024
+
+/* Parameters passed from the kernel command line */
+static char ceph_root_params[256] __initdata;
+
+/* Address of CEPH server */
+static __be32 servaddr __initdata = htonl(INADDR_NONE);
+
+/* Name of directory to mount */
+static char ceph_export_path[MAXPATHLEN + 1] __initdata;
+
+/* Text-based mount options */
+static char ceph_root_options[256] __initdata;
+
+/* server:path string passed to mount */
+static char ceph_root_device[MAXPATHLEN + 1] __initdata;
+
+/* Address of CEPH server */
+static __be32 root_ceph_server_addr = htonl(INADDR_NONE);
+
+/*
+ * Parse out root export path and mount options from
+ * passed-in string @incoming.
+ *
+ * Copy the export path into @exppath.
+ *
+ * Returns 0 on success -E2BIG if the resulting options string is too long.
+ */
+static int __init root_ceph_parse_options(char *incoming, char *exppath,
+ const size_t exppathlen)
+{
+ char *p;
+ int res = 0;
+
+ /*
+ * Set the remote path
+ */
+ p = strsep(&incoming, ",");
+ if (*p != '\0' && strcmp(p, "default") != 0)
+ strlcpy(exppath, p, exppathlen);
+
+ /*
+ * @incoming now points to the rest of the string; if it
+ * contains something, append it to our root options buffer
+ */
+ if (incoming != NULL && *incoming != '\0') {
+ size_t len = strlen(ceph_root_options);
+ size_t destlen = sizeof(ceph_root_options);
+
+ if (len && ceph_root_options[len - 1] != ',') {
+ if (strlcat(ceph_root_options, ",", destlen) > destlen)
+ res = -E2BIG;
+ }
+
+ if (strlcat(ceph_root_options, incoming, destlen) > destlen)
+ res = -E2BIG;
+
+ }
+ return res;
+}
+
+/*
+ * Parse CephFS server and directory information passed on the kernel
+ * command line.
+ *
+ * cephroot=[<server-ip>:]<root-dir>[,<cephfs-options>]
+ */
+static int __init ceph_root_setup(char *line)
+{
+ ROOT_DEV = Root_CEPH;
+
+ strlcpy(ceph_root_params, line, sizeof(ceph_root_params));
+
+ /*
+ * Note: root_nfs_parse_addr() removes the server-ip from
+ * ceph_root_params, if it exists.
+ */
+ root_ceph_server_addr = root_nfs_parse_addr(ceph_root_params);
+
+ return 1;
+}
+
+__setup("cephroot=", ceph_root_setup);
+
+/*
+ * ceph_root_data - Return mount device and data for CEPHROOT mount.
+ *
+ * @root_device: OUT: Address of string containing CEPHROOT device.
+ * @root_data: OUT: Address of string containing CEPHROOT mount options.
+ *
+ * Returns: 0 and sets @root_device and @root_data if successful.
+ * error code if unsuccessful.
+ */
+int __init ceph_root_data(char **root_device, char **root_data)
+{
+ char *tmp = NULL;
+ const size_t tmplen = sizeof(ceph_export_path);
+ int len;
+ int ret = -E2BIG;
+
+ servaddr = root_ceph_server_addr;
+ if (servaddr == htonl(INADDR_NONE))
+ return -ENOENT;
+
+ tmp = kzalloc(tmplen, GFP_KERNEL);
+ if (tmp == NULL)
+ return -ENOMEM;
+
+ if (ceph_root_params[0] != '\0') {
+ if (root_ceph_parse_options(ceph_root_params, tmp, tmplen))
+ goto out;
+ }
+
+ /*
+ * Set up ceph_root_device. This looks like: server:/path
+ *
+ * At this point, utsname()->nodename contains our local
+ * IP address or hostname, set by ipconfig. If "%s" exists
+ * in tmp, substitute the nodename, then shovel the whole
+ * mess into ceph_root_device.
+ */
+ len = snprintf(ceph_export_path, sizeof(ceph_export_path),
+ tmp, utsname()->nodename);
+ if (len > (int)sizeof(ceph_export_path))
+ goto out;
+ len = snprintf(ceph_root_device, sizeof(ceph_root_device),
+ "%pI4:%s", &servaddr, ceph_export_path);
+ if (len > (int)sizeof(ceph_root_device))
+ goto out;
+
+ pr_debug("Root-CEPH: Root device: %s\n", ceph_root_device);
+ pr_debug("Root-CEPH: Root options: %s\n", ceph_root_options);
+ *root_device = ceph_root_device;
+ *root_data = ceph_root_options;
+
+ ret = 0;
+
+out:
+ kfree(tmp);
+ return ret;
+}
diff --git a/include/linux/ceph/ceph_root.h b/include/linux/ceph/ceph_root.h
new file mode 100644
index 0000000..e6bae63
--- /dev/null
+++ b/include/linux/ceph/ceph_root.h
@@ -0,0 +1,10 @@
+/*
+ * Copyright (C) 2012 Codethink Ltd. <[email protected]>
+ *
+ * This file is released under the GPL v2
+ *
+ * ceph_root.h
+ */
+
+/* linux/fs/ceph/root.c */
+extern int ceph_root_data(char **root_device, char **root_data); /*__init*/
diff --git a/include/linux/root_dev.h b/include/linux/root_dev.h
index ed241aa..af6b182 100644
--- a/include/linux/root_dev.h
+++ b/include/linux/root_dev.h
@@ -16,6 +16,7 @@ enum {
Root_SDA2 = MKDEV(SCSI_DISK0_MAJOR, 2),
Root_HDC1 = MKDEV(IDE1_MAJOR, 1),
Root_SR0 = MKDEV(SCSI_CDROM_MAJOR, 0),
+ Root_CEPH = MKDEV(UNNAMED_MAJOR, 254),
};
extern dev_t ROOT_DEV;
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 8e5addc..d075020 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -33,6 +33,8 @@
#include <linux/nfs_fs_sb.h>
#include <linux/nfs_mount.h>
+#include <linux/ceph/ceph_root.h>
+
#include "do_mounts.h"
int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */
@@ -199,6 +201,7 @@ done:
* a partition with a known unique id.
* 8) <major>:<minor> major and minor number of the device separated by
* a colon.
+ * 9) /dev/ceph represents Root_CEPH
*
* If name doesn't have fall into the categories above, we return (0,0).
* block_class is used to check if something is a disk name. If the disk
@@ -245,7 +248,9 @@ dev_t name_to_dev_t(char *name)
res = Root_RAM0;
if (strcmp(name, "ram") == 0)
goto done;
-
+ res = Root_CEPH;
+ if (strcmp(name, "ceph") == 0)
+ goto done;
if (strlen(name) > 31)
goto fail;
strcpy(s, name);
@@ -473,6 +478,22 @@ static int __init mount_nfs_root(void)
}
#endif
+#ifdef CONFIG_ROOT_CEPH
+static int __init mount_ceph_root(void)
+{
+ char *root_dev, *root_data;
+
+ if (ceph_root_data(&root_dev, &root_data))
+ return 0;
+
+ if (do_mount_root(root_dev, "ceph",
+ root_mountflags, root_data))
+ return 0;
+
+ return 1;
+}
+#endif
+
#if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD)
void __init change_floppy(char *fmt, ...)
{
@@ -514,6 +535,15 @@ void __init mount_root(void)
ROOT_DEV = Root_FD0;
}
#endif
+#ifdef CONFIG_ROOT_CEPH
+ if (ROOT_DEV == Root_CEPH) {
+ if (mount_ceph_root())
+ return;
+
+ printk(KERN_ERR "VFS: Unable to mount root fs via CephFS, trying floppy.\n");
+ ROOT_DEV = Root_FD0;
+ }
+#endif
#ifdef CONFIG_BLK_DEV_FD
if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
/* rd_doload is 2 for a dual initrd/ramload setup */
--
1.8.4
From: Rob Taylor <[email protected]>
Document using the cephfs as a root device, its purpose,
functionality and use.
Signed-off-by: Mark Doffman <[email protected]>
Signed-off-by: Rob Taylor <[email protected]>
Reviewed-by: Ian Molton <[email protected]>
---
Documentation/filesystems/{ => ceph}/ceph.txt | 0
Documentation/filesystems/ceph/cephroot.txt | 81 +++++++++++++++++++++++++++
2 files changed, 81 insertions(+)
rename Documentation/filesystems/{ => ceph}/ceph.txt (100%)
create mode 100644 Documentation/filesystems/ceph/cephroot.txt
diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph/ceph.txt
similarity index 100%
rename from Documentation/filesystems/ceph.txt
rename to Documentation/filesystems/ceph/ceph.txt
diff --git a/Documentation/filesystems/ceph/cephroot.txt b/Documentation/filesystems/ceph/cephroot.txt
new file mode 100644
index 0000000..ae0f5bb
--- /dev/null
+++ b/Documentation/filesystems/ceph/cephroot.txt
@@ -0,0 +1,81 @@
+Mounting the root filesystem via Ceph (cephroot)
+===============================================
+
+Written 2013 by Rob Taylor <[email protected]>
+
+derived from nfsroot.txt:
+
+Written 1996 by Gero Kuhlmann <[email protected]>
+Updated 1997 by Martin Mares <[email protected]>
+Updated 2006 by Nico Schottelius <[email protected]>
+Updated 2006 by Horms <[email protected]>
+
+
+
+In order to use a diskless system, such as an X-terminal or printer server
+for example, it is necessary for the root filesystem to be present on a
+non-disk device. This may be an initramfs (see Documentation/filesystems/
+ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/initrd.txt), a
+filesystem mounted via NFS or a filesystem mounted via Ceph. The following
+text describes on how to use Ceph for the root filesystem.
+
+For the rest of this text 'client' means the diskless system, and 'server'
+means the Ceph server.
+
+
+1.) Enabling cephroot capabilities
+ -----------------------------
+
+In order to use cephroot, CEPH_FS needs to be selected as
+built-in during configuration. Once this has been selected, the cephroot
+option will become available, which should also be selected.
+
+In the networking options, kernel level autoconfiguration can be selected,
+along with the types of autoconfiguration to support. Selecting all of
+DHCP, BOOTP and RARP is safe.
+
+
+2.) Kernel command line
+ -------------------
+
+When the kernel has been loaded by a boot loader (see below) it needs to be
+told what root fs device to use. And in the case of cephroot, where to find
+both the server and the name of the directory on the server to mount as root.
+This can be established using the following kernel command line parameters:
+
+root=/dev/ceph
+
+This is necessary to enable the pseudo-Ceph-device. Note that it's not a
+real device but just a synonym to tell the kernel to use Ceph instead of
+a real device.
+
+cephroot=<monaddr>:/[<subdir>],<ceph-opts>
+
+ <monaddr> Monitor address. Each takes the form host[:port]. If the port
+ is not specified, the Ceph default of 6789 is assumed.
+
+ <subdir> A subdirectory subdir may be specified if a subset of the file
+ system is to be mounted
+
+ <ceph-opts> Standard Ceph options. All options are separated by commas.
+ See Documentation/filesystems/ceph/ceph.txt for options and
+ their defaults.
+
+4.) References
+ ----------
+
+
+5.) Credits
+ -------
+
+ cephroot was derived from nfsroot by Rob Taylor <[email protected]>
+ and Mark Doffman <[email protected]>
+
+ The nfsroot code in the kernel and the RARP support have been written
+ by Gero Kuhlmann <[email protected]>.
+
+ The rest of the IP layer autoconfiguration code has been written
+ by Martin Mares <[email protected]>.
+
+ In order to write the initial version of nfsroot I would like to thank
+ Jens-Uwe Mager <[email protected]> for his help.
--
1.8.4
From: Mark Doffman <[email protected]>
When not configured via kernel parameters add to cephroot
the ability to configure server address, path and options
from DHCP option 17.
Signed-off-by: Mark Doffman <[email protected]>
Reviewed-by: Ian Molton <[email protected]>
---
Documentation/filesystems/ceph/cephroot.txt | 4 ++++
fs/ceph/root.c | 12 +++++++-----
net/ipv4/ipconfig.c | 10 ++++++++--
3 files changed, 19 insertions(+), 7 deletions(-)
diff --git a/Documentation/filesystems/ceph/cephroot.txt b/Documentation/filesystems/ceph/cephroot.txt
index ae0f5bb..4f12573 100644
--- a/Documentation/filesystems/ceph/cephroot.txt
+++ b/Documentation/filesystems/ceph/cephroot.txt
@@ -49,6 +49,9 @@ This is necessary to enable the pseudo-Ceph-device. Note that it's not a
real device but just a synonym to tell the kernel to use Ceph instead of
a real device.
+If cephroot is not specified, it is expected that that a valid mount will be
+found via DHCP option 17, Root Path [1]
+
cephroot=<monaddr>:/[<subdir>],<ceph-opts>
<monaddr> Monitor address. Each takes the form host[:port]. If the port
@@ -64,6 +67,7 @@ cephroot=<monaddr>:/[<subdir>],<ceph-opts>
4.) References
----------
+[1] http://tools.ietf.org/html/rfc2132
5.) Credits
-------
diff --git a/fs/ceph/root.c b/fs/ceph/root.c
index bff67fb..24b8dcf 100644
--- a/fs/ceph/root.c
+++ b/fs/ceph/root.c
@@ -37,9 +37,6 @@ static char ceph_root_options[256] __initdata;
/* server:path string passed to mount */
static char ceph_root_device[MAXPATHLEN + 1] __initdata;
-/* Address of CEPH server */
-static __be32 root_ceph_server_addr = htonl(INADDR_NONE);
-
/*
* Parse out root export path and mount options from
* passed-in string @incoming.
@@ -97,7 +94,7 @@ static int __init ceph_root_setup(char *line)
* Note: root_nfs_parse_addr() removes the server-ip from
* ceph_root_params, if it exists.
*/
- root_ceph_server_addr = root_nfs_parse_addr(ceph_root_params);
+ root_server_addr = root_nfs_parse_addr(ceph_root_params);
return 1;
}
@@ -120,7 +117,7 @@ int __init ceph_root_data(char **root_device, char **root_data)
int len;
int ret = -E2BIG;
- servaddr = root_ceph_server_addr;
+ servaddr = root_server_addr;
if (servaddr == htonl(INADDR_NONE))
return -ENOENT;
@@ -128,6 +125,11 @@ int __init ceph_root_data(char **root_device, char **root_data)
if (tmp == NULL)
return -ENOMEM;
+ if (root_server_path[0] != '\0') {
+ if (root_ceph_parse_options(root_server_path, tmp, tmplen))
+ goto out;
+ }
+
if (ceph_root_params[0] != '\0') {
if (root_ceph_parse_options(ceph_root_params, tmp, tmplen))
goto out;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index efa1138..765eea4 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1435,10 +1435,10 @@ static int __init ip_auto_config(void)
* missing values.
*/
if (ic_myaddr == NONE ||
-#ifdef CONFIG_ROOT_NFS
+#if defined(CONFIG_ROOT_NFS) || defined(CONFIG_ROOT_CEPH)
(root_server_addr == NONE &&
ic_servaddr == NONE &&
- ROOT_DEV == Root_NFS) ||
+ (ROOT_DEV == Root_NFS || ROOT_DEV == Root_CEPH)) ||
#endif
ic_first_dev->next) {
#ifdef IPCONFIG_DYNAMIC
@@ -1465,6 +1465,12 @@ static int __init ip_auto_config(void)
goto try_try_again;
}
#endif
+#ifdef CONFIG_ROOT_CEPH
+ if (ROOT_DEV == Root_CEPH) {
+ pr_err("IP-Config: Retrying forever (CEPH root)...\n");
+ goto try_try_again;
+ }
+#endif
if (--retries) {
pr_err("IP-Config: Reopening network devices...\n");
--
1.8.4
On Wed, 2013-11-20 at 20:13 -0600, [email protected] wrote:
> The following patch series adds the ability to use a ceph distributed
> file system as the root device. The functionality is similar to
> NFS root but for the ceph filesystem.
Why do this in the kernel vs in the initramfs? Dracut would be a more
appropriate place, I think, as it has support for other network root
devices already.
[adding linux-fsdevel]
Hi Mark!
There was a question on this thread earlier about whether it makes sense
to support this in-kernel or make users build an initrd. This looks
pretty simple to me and is certainly easier for users, so (with some
adjustments) I'm happy with it, but I think the folks on
linux-fsdevel may have a more informed opinion than do.
See below for a few comments...
On Wed, 20 Nov 2013, [email protected] wrote:
> From: Mark Doffman <[email protected]>
>
> Analogous to NFS add a new root device option, the ability
> to boot using the Ceph networked file system as the root fs.
>
> This patch adds a new root device option '/dev/ceph' that
> uses a ceph networked file system. File system parameters
> are passed using a new kernel parameter: 'cephroot'.
>
> The 'cephroot' parameters are very similar to 'nfsroot'.
>
> Signed-off-by: Mark Doffman <[email protected]>
> Reviewed-by: Ian Molton <[email protected]>
> ---
> fs/ceph/Kconfig | 10 +++
> fs/ceph/Makefile | 1 +
> fs/ceph/root.c | 163 +++++++++++++++++++++++++++++++++++++++++
> include/linux/ceph/ceph_root.h | 10 +++
> include/linux/root_dev.h | 1 +
> init/do_mounts.c | 32 +++++++-
> 6 files changed, 216 insertions(+), 1 deletion(-)
> create mode 100644 fs/ceph/root.c
> create mode 100644 include/linux/ceph/ceph_root.h
>
> diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
> index ac9a2ef..325e83d 100644
> --- a/fs/ceph/Kconfig
> +++ b/fs/ceph/Kconfig
> @@ -25,3 +25,13 @@ config CEPH_FSCACHE
> caching support for Ceph clients using FS-Cache
>
> endif
> +
> +config ROOT_CEPH
> + bool "Root file system on Ceph FS"
> + depends on CEPH_FS=y && IP_PNP
> + help
> + If you want your system to mount its root file system via CEPH,
> + choose Y here. For details, read
> + <file:Documentation/filesystems/ceph/cephroot.txt>.
> +
> + If unsure say N.
> diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
> index 32e3010..af2dcbf 100644
> --- a/fs/ceph/Makefile
> +++ b/fs/ceph/Makefile
> @@ -10,3 +10,4 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
> debugfs.o
>
> ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
> +ceph-$(CONFIG_ROOT_CEPH) += root.o
> diff --git a/fs/ceph/root.c b/fs/ceph/root.c
> new file mode 100644
> index 0000000..bff67fb
> --- /dev/null
> +++ b/fs/ceph/root.c
> @@ -0,0 +1,163 @@
> +/*
> + * Copyright (C) 2012 Codethink Ltd. <[email protected]>
> + *
> + * This file is released under the GPL v2
> + *
> + * Allow a CephFS filesystem to be mounted as root.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/types.h>
> +#include <linux/string.h>
> +#include <linux/init.h>
> +#include <linux/slab.h>
> +#include <linux/utsname.h>
> +#include <linux/root_dev.h>
> +#include <linux/in.h>
> +#include <net/ipconfig.h>
> +#include <linux/ceph/ceph_root.h>
> +
> +/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
> +extern __be32 root_nfs_parse_addr(char *name); /*__init*/
> +
> +#define MAXPATHLEN 1024
> +
> +/* Parameters passed from the kernel command line */
> +static char ceph_root_params[256] __initdata;
> +
> +/* Address of CEPH server */
> +static __be32 servaddr __initdata = htonl(INADDR_NONE);
IPv4 only?
> +
> +/* Name of directory to mount */
> +static char ceph_export_path[MAXPATHLEN + 1] __initdata;
> +
> +/* Text-based mount options */
> +static char ceph_root_options[256] __initdata;
> +
> +/* server:path string passed to mount */
> +static char ceph_root_device[MAXPATHLEN + 1] __initdata;
> +
> +/* Address of CEPH server */
> +static __be32 root_ceph_server_addr = htonl(INADDR_NONE);
> +
> +/*
> + * Parse out root export path and mount options from
> + * passed-in string @incoming.
> + *
> + * Copy the export path into @exppath.
> + *
> + * Returns 0 on success -E2BIG if the resulting options string is too long.
> + */
> +static int __init root_ceph_parse_options(char *incoming, char *exppath,
> + const size_t exppathlen)
> +{
> + char *p;
> + int res = 0;
> +
> + /*
> + * Set the remote path
> + */
> + p = strsep(&incoming, ",");
> + if (*p != '\0' && strcmp(p, "default") != 0)
> + strlcpy(exppath, p, exppathlen);
> +
> + /*
> + * @incoming now points to the rest of the string; if it
> + * contains something, append it to our root options buffer
> + */
> + if (incoming != NULL && *incoming != '\0') {
> + size_t len = strlen(ceph_root_options);
> + size_t destlen = sizeof(ceph_root_options);
> +
> + if (len && ceph_root_options[len - 1] != ',') {
> + if (strlcat(ceph_root_options, ",", destlen) > destlen)
> + res = -E2BIG;
> + }
> +
> + if (strlcat(ceph_root_options, incoming, destlen) > destlen)
> + res = -E2BIG;
> +
> + }
> + return res;
> +}
> +
> +/*
> + * Parse CephFS server and directory information passed on the kernel
> + * command line.
> + *
> + * cephroot=[<server-ip>:]<root-dir>[,<cephfs-options>]
> + */
I think we would be better off using the parsing code in fs/ceph/super.c,
which handles both IPv4 and IPv6, and more importantly lets you provide a
list of monitors. Providing only a single server IP makes it a single
point of failure during mount (though of course if/when we connect we will
discover the current set of mons).
Attaching the options at the end doesn't appeal to me cosmetically, but I
can see how it's useful to have it all in a single string that DHCP can
provide.
sage
> +static int __init ceph_root_setup(char *line)
> +{
> + ROOT_DEV = Root_CEPH;
> +
> + strlcpy(ceph_root_params, line, sizeof(ceph_root_params));
> +
> + /*
> + * Note: root_nfs_parse_addr() removes the server-ip from
> + * ceph_root_params, if it exists.
> + */
> + root_ceph_server_addr = root_nfs_parse_addr(ceph_root_params);
> +
> + return 1;
> +}
> +
> +__setup("cephroot=", ceph_root_setup);
> +
> +/*
> + * ceph_root_data - Return mount device and data for CEPHROOT mount.
> + *
> + * @root_device: OUT: Address of string containing CEPHROOT device.
> + * @root_data: OUT: Address of string containing CEPHROOT mount options.
> + *
> + * Returns: 0 and sets @root_device and @root_data if successful.
> + * error code if unsuccessful.
> + */
> +int __init ceph_root_data(char **root_device, char **root_data)
> +{
> + char *tmp = NULL;
> + const size_t tmplen = sizeof(ceph_export_path);
> + int len;
> + int ret = -E2BIG;
> +
> + servaddr = root_ceph_server_addr;
> + if (servaddr == htonl(INADDR_NONE))
> + return -ENOENT;
> +
> + tmp = kzalloc(tmplen, GFP_KERNEL);
> + if (tmp == NULL)
> + return -ENOMEM;
> +
> + if (ceph_root_params[0] != '\0') {
> + if (root_ceph_parse_options(ceph_root_params, tmp, tmplen))
> + goto out;
> + }
> +
> + /*
> + * Set up ceph_root_device. This looks like: server:/path
> + *
> + * At this point, utsname()->nodename contains our local
> + * IP address or hostname, set by ipconfig. If "%s" exists
> + * in tmp, substitute the nodename, then shovel the whole
> + * mess into ceph_root_device.
> + */
> + len = snprintf(ceph_export_path, sizeof(ceph_export_path),
> + tmp, utsname()->nodename);
> + if (len > (int)sizeof(ceph_export_path))
> + goto out;
> + len = snprintf(ceph_root_device, sizeof(ceph_root_device),
> + "%pI4:%s", &servaddr, ceph_export_path);
> + if (len > (int)sizeof(ceph_root_device))
> + goto out;
> +
> + pr_debug("Root-CEPH: Root device: %s\n", ceph_root_device);
> + pr_debug("Root-CEPH: Root options: %s\n", ceph_root_options);
> + *root_device = ceph_root_device;
> + *root_data = ceph_root_options;
> +
> + ret = 0;
> +
> +out:
> + kfree(tmp);
> + return ret;
> +}
> diff --git a/include/linux/ceph/ceph_root.h b/include/linux/ceph/ceph_root.h
> new file mode 100644
> index 0000000..e6bae63
> --- /dev/null
> +++ b/include/linux/ceph/ceph_root.h
> @@ -0,0 +1,10 @@
> +/*
> + * Copyright (C) 2012 Codethink Ltd. <[email protected]>
> + *
> + * This file is released under the GPL v2
> + *
> + * ceph_root.h
> + */
> +
> +/* linux/fs/ceph/root.c */
> +extern int ceph_root_data(char **root_device, char **root_data); /*__init*/
> diff --git a/include/linux/root_dev.h b/include/linux/root_dev.h
> index ed241aa..af6b182 100644
> --- a/include/linux/root_dev.h
> +++ b/include/linux/root_dev.h
> @@ -16,6 +16,7 @@ enum {
> Root_SDA2 = MKDEV(SCSI_DISK0_MAJOR, 2),
> Root_HDC1 = MKDEV(IDE1_MAJOR, 1),
> Root_SR0 = MKDEV(SCSI_CDROM_MAJOR, 0),
> + Root_CEPH = MKDEV(UNNAMED_MAJOR, 254),
> };
>
> extern dev_t ROOT_DEV;
> diff --git a/init/do_mounts.c b/init/do_mounts.c
> index 8e5addc..d075020 100644
> --- a/init/do_mounts.c
> +++ b/init/do_mounts.c
> @@ -33,6 +33,8 @@
> #include <linux/nfs_fs_sb.h>
> #include <linux/nfs_mount.h>
>
> +#include <linux/ceph/ceph_root.h>
> +
> #include "do_mounts.h"
>
> int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */
> @@ -199,6 +201,7 @@ done:
> * a partition with a known unique id.
> * 8) <major>:<minor> major and minor number of the device separated by
> * a colon.
> + * 9) /dev/ceph represents Root_CEPH
> *
> * If name doesn't have fall into the categories above, we return (0,0).
> * block_class is used to check if something is a disk name. If the disk
> @@ -245,7 +248,9 @@ dev_t name_to_dev_t(char *name)
> res = Root_RAM0;
> if (strcmp(name, "ram") == 0)
> goto done;
> -
> + res = Root_CEPH;
> + if (strcmp(name, "ceph") == 0)
> + goto done;
> if (strlen(name) > 31)
> goto fail;
> strcpy(s, name);
> @@ -473,6 +478,22 @@ static int __init mount_nfs_root(void)
> }
> #endif
>
> +#ifdef CONFIG_ROOT_CEPH
> +static int __init mount_ceph_root(void)
> +{
> + char *root_dev, *root_data;
> +
> + if (ceph_root_data(&root_dev, &root_data))
> + return 0;
> +
> + if (do_mount_root(root_dev, "ceph",
> + root_mountflags, root_data))
> + return 0;
> +
> + return 1;
> +}
> +#endif
> +
> #if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD)
> void __init change_floppy(char *fmt, ...)
> {
> @@ -514,6 +535,15 @@ void __init mount_root(void)
> ROOT_DEV = Root_FD0;
> }
> #endif
> +#ifdef CONFIG_ROOT_CEPH
> + if (ROOT_DEV == Root_CEPH) {
> + if (mount_ceph_root())
> + return;
> +
> + printk(KERN_ERR "VFS: Unable to mount root fs via CephFS, trying floppy.\n");
> + ROOT_DEV = Root_FD0;
> + }
> +#endif
> #ifdef CONFIG_BLK_DEV_FD
> if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
> /* rd_doload is 2 for a dual initrd/ramload setup */
> --
> 1.8.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
On Wed, 20 Nov 2013, [email protected] wrote:
> From: Rob Taylor <[email protected]>
>
> Document using the cephfs as a root device, its purpose,
> functionality and use.
>
> Signed-off-by: Mark Doffman <[email protected]>
> Signed-off-by: Rob Taylor <[email protected]>
> Reviewed-by: Ian Molton <[email protected]>
> ---
> Documentation/filesystems/{ => ceph}/ceph.txt | 0
> Documentation/filesystems/ceph/cephroot.txt | 81 +++++++++++++++++++++++++++
> 2 files changed, 81 insertions(+)
> rename Documentation/filesystems/{ => ceph}/ceph.txt (100%)
> create mode 100644 Documentation/filesystems/ceph/cephroot.txt
>
> diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph/ceph.txt
> similarity index 100%
> rename from Documentation/filesystems/ceph.txt
> rename to Documentation/filesystems/ceph/ceph.txt
> diff --git a/Documentation/filesystems/ceph/cephroot.txt b/Documentation/filesystems/ceph/cephroot.txt
> new file mode 100644
> index 0000000..ae0f5bb
> --- /dev/null
> +++ b/Documentation/filesystems/ceph/cephroot.txt
> @@ -0,0 +1,81 @@
> +Mounting the root filesystem via Ceph (cephroot)
> +===============================================
> +
> +Written 2013 by Rob Taylor <[email protected]>
> +
> +derived from nfsroot.txt:
> +
> +Written 1996 by Gero Kuhlmann <[email protected]>
> +Updated 1997 by Martin Mares <[email protected]>
> +Updated 2006 by Nico Schottelius <[email protected]>
> +Updated 2006 by Horms <[email protected]>
> +
> +
> +
> +In order to use a diskless system, such as an X-terminal or printer server
> +for example, it is necessary for the root filesystem to be present on a
> +non-disk device. This may be an initramfs (see Documentation/filesystems/
> +ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/initrd.txt), a
> +filesystem mounted via NFS or a filesystem mounted via Ceph. The following
> +text describes on how to use Ceph for the root filesystem.
> +
> +For the rest of this text 'client' means the diskless system, and 'server'
> +means the Ceph server.
> +
> +
> +1.) Enabling cephroot capabilities
> + -----------------------------
> +
> +In order to use cephroot, CEPH_FS needs to be selected as
> +built-in during configuration. Once this has been selected, the cephroot
> +option will become available, which should also be selected.
> +
> +In the networking options, kernel level autoconfiguration can be selected,
> +along with the types of autoconfiguration to support. Selecting all of
> +DHCP, BOOTP and RARP is safe.
> +
> +
> +2.) Kernel command line
> + -------------------
> +
> +When the kernel has been loaded by a boot loader (see below) it needs to be
> +told what root fs device to use. And in the case of cephroot, where to find
> +both the server and the name of the directory on the server to mount as root.
> +This can be established using the following kernel command line parameters:
> +
> +root=/dev/ceph
> +
> +This is necessary to enable the pseudo-Ceph-device. Note that it's not a
> +real device but just a synonym to tell the kernel to use Ceph instead of
> +a real device.
> +
> +cephroot=<monaddr>:/[<subdir>],<ceph-opts>
> +
> + <monaddr> Monitor address. Each takes the form host[:port]. If the port
> + is not specified, the Ceph default of 6789 is assumed.
> +
> + <subdir> A subdirectory subdir may be specified if a subset of the file
> + system is to be mounted
> +
> + <ceph-opts> Standard Ceph options. All options are separated by commas.
> + See Documentation/filesystems/ceph/ceph.txt for options and
> + their defaults.
Maybe there is an existing convention here, but: it seems like it would be
simpler to do something like
cephroot=<ip[:<port>][,...]>:/[<subdir>]
i.e., the existing syntax used by mount, that (among other things) can
also include a port, or be a list of mon ips, so that the parsing code
can be re-used. Then,
cephopts=<ceph-opts>
Hopefully this would avoid the parsing in root.c and make things behave
more consistently with respect to how mount(8) is used?
sage
> +
> +4.) References
> + ----------
> +
> +
> +5.) Credits
> + -------
> +
> + cephroot was derived from nfsroot by Rob Taylor <[email protected]>
> + and Mark Doffman <[email protected]>
> +
> + The nfsroot code in the kernel and the RARP support have been written
> + by Gero Kuhlmann <[email protected]>.
> +
> + The rest of the IP layer autoconfiguration code has been written
> + by Martin Mares <[email protected]>.
> +
> + In order to write the initial version of nfsroot I would like to thank
> + Jens-Uwe Mager <[email protected]> for his help.
> --
> 1.8.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
From: Rob Taylor <[email protected]>
Document using the cephfs as a root device, its purpose,
functionality and use.
Signed-off-by: Mark Doffman <[email protected]>
Signed-off-by: Rob Taylor <[email protected]>
Reviewed-by: Ian Molton <[email protected]>
---
Documentation/filesystems/{ => ceph}/ceph.txt | 0
Documentation/filesystems/ceph/cephroot.txt | 86 +++++++++++++++++++++++++++
2 files changed, 86 insertions(+)
rename Documentation/filesystems/{ => ceph}/ceph.txt (100%)
create mode 100644 Documentation/filesystems/ceph/cephroot.txt
diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph/ceph.txt
similarity index 100%
rename from Documentation/filesystems/ceph.txt
rename to Documentation/filesystems/ceph/ceph.txt
diff --git a/Documentation/filesystems/ceph/cephroot.txt b/Documentation/filesystems/ceph/cephroot.txt
new file mode 100644
index 0000000..deda4f0
--- /dev/null
+++ b/Documentation/filesystems/ceph/cephroot.txt
@@ -0,0 +1,86 @@
+Mounting the root filesystem via Ceph (cephroot)
+===============================================
+
+Written 2013 by Rob Taylor <[email protected]>
+
+derived from nfsroot.txt:
+
+Written 1996 by Gero Kuhlmann <[email protected]>
+Updated 1997 by Martin Mares <[email protected]>
+Updated 2006 by Nico Schottelius <[email protected]>
+Updated 2006 by Horms <[email protected]>
+
+
+
+In order to use a diskless system, such as an X-terminal or printer server
+for example, it is necessary for the root filesystem to be present on a
+non-disk device. This may be an initramfs (see Documentation/filesystems/
+ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/initrd.txt), a
+filesystem mounted via NFS or a filesystem mounted via Ceph. The following
+text describes on how to use Ceph for the root filesystem.
+
+For the rest of this text 'client' means the diskless system, and 'server'
+means the Ceph server.
+
+
+1.) Enabling cephroot capabilities
+ -----------------------------
+
+In order to use cephroot, CEPH_FS needs to be selected as
+built-in during configuration. Once this has been selected, the cephroot
+option will become available, which should also be selected.
+
+In the networking options, kernel level autoconfiguration can be selected,
+along with the types of autoconfiguration to support. Selecting all of
+DHCP, BOOTP and RARP is safe.
+
+
+2.) Kernel command line
+ -------------------
+
+When the kernel has been loaded by a boot loader (see below) it needs to be
+told what root fs device to use. And in the case of cephroot, where to find
+both the server and the name of the directory on the server to mount as root.
+This can be established using the following kernel command line parameters:
+
+root=/dev/ceph
+
+This is necessary to enable the pseudo-Ceph-device. Note that it's not a
+real device but just a synonym to tell the kernel to use Ceph instead of
+a real device.
+
+If cephroot is not specified, it is expected that that a valid mount will be
+found via DHCP option 17, Root Path [1]
+
+cephroot=<monaddrs>:/[<subdir>],<ceph-opts>
+
+ <monaddrs> Monitor addresses separated by commas. Each takes the form
+ host[:port]. If the port is not specified, the Ceph default
+ of 6789 is assumed.
+
+ <subdir> A subdirectory subdir may be specified if a subset of the file
+ system is to be mounted
+
+ <ceph-opts> Standard Ceph options. All options are separated by commas.
+ See Documentation/filesystems/ceph/ceph.txt for options and
+ their defaults.
+
+4.) References
+ ----------
+
+[1] http://tools.ietf.org/html/rfc2132
+
+5.) Credits
+ -------
+
+ cephroot was derived from nfsroot by Rob Taylor <[email protected]>
+ and Mark Doffman <[email protected]>
+
+ The nfsroot code in the kernel and the RARP support have been written
+ by Gero Kuhlmann <[email protected]>.
+
+ The rest of the IP layer autoconfiguration code has been written
+ by Martin Mares <[email protected]>.
+
+ In order to write the initial version of nfsroot I would like to thank
+ Jens-Uwe Mager <[email protected]> for his help.
--
1.8.4
On 01/15/2014 09:26 AM, [email protected] wrote:
> From: Rob Taylor <[email protected]>
>
> Document using the cephfs as a root device, its purpose,
> functionality and use.
>
> Signed-off-by: Mark Doffman <[email protected]>
> Signed-off-by: Rob Taylor <[email protected]>
> Reviewed-by: Ian Molton <[email protected]>
> ---
> Documentation/filesystems/{ => ceph}/ceph.txt | 0
> Documentation/filesystems/ceph/cephroot.txt | 86 +++++++++++++++++++++++++++
> 2 files changed, 86 insertions(+)
> rename Documentation/filesystems/{ => ceph}/ceph.txt (100%)
> create mode 100644 Documentation/filesystems/ceph/cephroot.txt
>
> diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph/ceph.txt
> similarity index 100%
> rename from Documentation/filesystems/ceph.txt
> rename to Documentation/filesystems/ceph/ceph.txt
> diff --git a/Documentation/filesystems/ceph/cephroot.txt b/Documentation/filesystems/ceph/cephroot.txt
> new file mode 100644
> index 0000000..deda4f0
> --- /dev/null
> +++ b/Documentation/filesystems/ceph/cephroot.txt
> @@ -0,0 +1,86 @@
> +Mounting the root filesystem via Ceph (cephroot)
> +===============================================
> +
> +Written 2013 by Rob Taylor <[email protected]>
> +
> +derived from nfsroot.txt:
> +
> +Written 1996 by Gero Kuhlmann <[email protected]>
> +Updated 1997 by Martin Mares <[email protected]>
> +Updated 2006 by Nico Schottelius <[email protected]>
> +Updated 2006 by Horms <[email protected]>
> +
> +
> +
> +In order to use a diskless system, such as an X-terminal or printer server
> +for example, it is necessary for the root filesystem to be present on a
> +non-disk device. This may be an initramfs (see Documentation/filesystems/
> +ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/initrd.txt), a
> +filesystem mounted via NFS or a filesystem mounted via Ceph. The following
> +text describes on how to use Ceph for the root filesystem.
> +
> +For the rest of this text 'client' means the diskless system, and 'server'
> +means the Ceph server.
> +
> +
> +1.) Enabling cephroot capabilities
> + -----------------------------
> +
> +In order to use cephroot, CEPH_FS needs to be selected as
> +built-in during configuration. Once this has been selected, the cephroot
> +option will become available, which should also be selected.
> +
> +In the networking options, kernel level autoconfiguration can be selected,
> +along with the types of autoconfiguration to support. Selecting all of
> +DHCP, BOOTP and RARP is safe.
> +
> +
> +2.) Kernel command line
> + -------------------
> +
> +When the kernel has been loaded by a boot loader (see below) it needs to be
> +told what root fs device to use. And in the case of cephroot, where to find
use, and
> +both the server and the name of the directory on the server to mount as root.
> +This can be established using the following kernel command line parameters:
> +
> +root=/dev/ceph
> +
> +This is necessary to enable the pseudo-Ceph-device. Note that it's not a
> +real device but just a synonym to tell the kernel to use Ceph instead of
> +a real device.
> +
> +If cephroot is not specified, it is expected that that a valid mount will be
drop duplicate: that
> +found via DHCP option 17, Root Path [1]
> +
> +cephroot=<monaddrs>:/[<subdir>],<ceph-opts>
> +
> + <monaddrs> Monitor addresses separated by commas. Each takes the form
> + host[:port]. If the port is not specified, the Ceph default
> + of 6789 is assumed.
> +
> + <subdir> A subdirectory subdir may be specified if a subset of the file
> + system is to be mounted
mounted.
> +
> + <ceph-opts> Standard Ceph options. All options are separated by commas.
> + See Documentation/filesystems/ceph/ceph.txt for options and
> + their defaults.
> +
> +4.) References
> + ----------
> +
> +[1] http://tools.ietf.org/html/rfc2132
> +
> +5.) Credits
> + -------
> +
> + cephroot was derived from nfsroot by Rob Taylor <[email protected]>
> + and Mark Doffman <[email protected]>
> +
> + The nfsroot code in the kernel and the RARP support have been written
> + by Gero Kuhlmann <[email protected]>.
> +
> + The rest of the IP layer autoconfiguration code has been written
> + by Martin Mares <[email protected]>.
> +
> + In order to write the initial version of nfsroot I would like to thank
> + Jens-Uwe Mager <[email protected]> for his help.
>
--
~Randy
From: Mark Doffman <[email protected]>
Hi All,
The following is a second version of a patch series that adds the ability to use
a ceph distributed file system as the root device.
Changes from version 1
fs/ceph/root.c:
The parsing code that takes the DHCP option 17 and kernel command line
parameters has been extensively altered.
The parsing now accepts multiple monitor addresses and ipv6 addresses.
The monitors listed in DHCP option 17 are now concatenated with those
listed on the kernel command line.
The patch series applies to v3.13-rc8-7-g3539717
Thanks
Mark
Mark Doffman (1):
init: Add a new root device option, the Ceph file system
Rob Taylor (1):
Documentation: Document the cephroot functionality
Documentation/filesystems/{ => ceph}/ceph.txt | 0
Documentation/filesystems/ceph/cephroot.txt | 86 +++++++++++++
fs/ceph/Kconfig | 10 ++
fs/ceph/Makefile | 1 +
fs/ceph/root.c | 176 ++++++++++++++++++++++++++
include/linux/ceph/ceph_root.h | 10 ++
include/linux/root_dev.h | 1 +
init/do_mounts.c | 32 ++++-
net/ipv4/ipconfig.c | 10 +-
9 files changed, 323 insertions(+), 3 deletions(-)
rename Documentation/filesystems/{ => ceph}/ceph.txt (100%)
create mode 100644 Documentation/filesystems/ceph/cephroot.txt
create mode 100644 fs/ceph/root.c
create mode 100644 include/linux/ceph/ceph_root.h
--
1.8.4
Hi Sage,
On 12/06/2013 11:57 PM, Sage Weil wrote:
> On Wed, 20 Nov 2013, [email protected] wrote:
>> From: Rob Taylor <[email protected]>
>>
>> Document using the cephfs as a root device, its purpose,
>> functionality and use.
>>
>> Signed-off-by: Mark Doffman <[email protected]>
>> Signed-off-by: Rob Taylor <[email protected]>
>> Reviewed-by: Ian Molton <[email protected]>
>> ---
>> Documentation/filesystems/{ => ceph}/ceph.txt | 0
>> Documentation/filesystems/ceph/cephroot.txt | 81 +++++++++++++++++++++++++++
>> 2 files changed, 81 insertions(+)
>> rename Documentation/filesystems/{ => ceph}/ceph.txt (100%)
>> create mode 100644 Documentation/filesystems/ceph/cephroot.txt
>>
>> diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph/ceph.txt
>> similarity index 100%
>> rename from Documentation/filesystems/ceph.txt
>> rename to Documentation/filesystems/ceph/ceph.txt
>> diff --git a/Documentation/filesystems/ceph/cephroot.txt b/Documentation/filesystems/ceph/cephroot.txt
>> new file mode 100644
>> index 0000000..ae0f5bb
>> --- /dev/null
>> +++ b/Documentation/filesystems/ceph/cephroot.txt
>> @@ -0,0 +1,81 @@
>> +Mounting the root filesystem via Ceph (cephroot)
>> +===============================================
>> +
>> +Written 2013 by Rob Taylor <[email protected]>
>> +
>> +derived from nfsroot.txt:
>> +
>> +Written 1996 by Gero Kuhlmann <[email protected]>
>> +Updated 1997 by Martin Mares <[email protected]>
>> +Updated 2006 by Nico Schottelius <[email protected]>
>> +Updated 2006 by Horms <[email protected]>
>> +
>> +
>> +
>> +In order to use a diskless system, such as an X-terminal or printer server
>> +for example, it is necessary for the root filesystem to be present on a
>> +non-disk device. This may be an initramfs (see Documentation/filesystems/
>> +ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/initrd.txt), a
>> +filesystem mounted via NFS or a filesystem mounted via Ceph. The following
>> +text describes on how to use Ceph for the root filesystem.
>> +
>> +For the rest of this text 'client' means the diskless system, and 'server'
>> +means the Ceph server.
>> +
>> +
>> +1.) Enabling cephroot capabilities
>> + -----------------------------
>> +
>> +In order to use cephroot, CEPH_FS needs to be selected as
>> +built-in during configuration. Once this has been selected, the cephroot
>> +option will become available, which should also be selected.
>> +
>> +In the networking options, kernel level autoconfiguration can be selected,
>> +along with the types of autoconfiguration to support. Selecting all of
>> +DHCP, BOOTP and RARP is safe.
>> +
>> +
>> +2.) Kernel command line
>> + -------------------
>> +
>> +When the kernel has been loaded by a boot loader (see below) it needs to be
>> +told what root fs device to use. And in the case of cephroot, where to find
>> +both the server and the name of the directory on the server to mount as root.
>> +This can be established using the following kernel command line parameters:
>> +
>> +root=/dev/ceph
>> +
>> +This is necessary to enable the pseudo-Ceph-device. Note that it's not a
>> +real device but just a synonym to tell the kernel to use Ceph instead of
>> +a real device.
>> +
>> +cephroot=<monaddr>:/[<subdir>],<ceph-opts>
>> +
>> + <monaddr> Monitor address. Each takes the form host[:port]. If the port
>> + is not specified, the Ceph default of 6789 is assumed.
>> +
>> + <subdir> A subdirectory subdir may be specified if a subset of the file
>> + system is to be mounted
>> +
>> + <ceph-opts> Standard Ceph options. All options are separated by commas.
>> + See Documentation/filesystems/ceph/ceph.txt for options and
>> + their defaults.
>
> Maybe there is an existing convention here, but: it seems like it would be
> simpler to do something like
>
> cephroot=<ip[:<port>][,...]>:/[<subdir>]
>
> i.e., the existing syntax used by mount, that (among other things) can
> also include a port, or be a list of mon ips, so that the parsing code
> can be re-used. Then,
>
> cephopts=<ceph-opts>
>
> Hopefully this would avoid the parsing in root.c and make things behave
> more consistently with respect to how mount(8) is used?
This would make things more consistent with mount, and easier! The
reason to keep it the way it is is for consistency with NFS and DHCP
option 17.
NFS concatenates the options in DHCP root-path (option 17) with the ones
placed on the kernel command line. We could separate out the device and
path strings from the options, but they would still be merged together
in the DHCP string. Some parsing would still be required to split the
DHCP string and merge with command line options. I'd prefer to keep them
together on the command line also, just to have things stay similar to NFS.
Thanks
Mark
>
> sage
>
>> +
>> +4.) References
>> + ----------
>> +
>> +
>> +5.) Credits
>> + -------
>> +
>> + cephroot was derived from nfsroot by Rob Taylor <[email protected]>
>> + and Mark Doffman <[email protected]>
>> +
>> + The nfsroot code in the kernel and the RARP support have been written
>> + by Gero Kuhlmann <[email protected]>.
>> +
>> + The rest of the IP layer autoconfiguration code has been written
>> + by Martin Mares <[email protected]>.
>> +
>> + In order to write the initial version of nfsroot I would like to thank
>> + Jens-Uwe Mager <[email protected]> for his help.
>> --
>> 1.8.4
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>
>>
From: Mark Doffman <[email protected]>
Analogous to NFS add a new root device option, the ability
to boot using the Ceph networked file system as the root fs.
This patch adds a new root device option '/dev/ceph' that
uses a ceph networked file system. File system parameters
are passed using a new kernel parameter: 'cephroot'.
The 'cephroot' parameters are very similar to 'nfsroot'.
Signed-off-by: Mark Doffman <[email protected]>
Reviewed-by: Ian Molton <[email protected]>
---
fs/ceph/Kconfig | 10 +++
fs/ceph/Makefile | 1 +
fs/ceph/root.c | 176 +++++++++++++++++++++++++++++++++++++++++
include/linux/ceph/ceph_root.h | 10 +++
include/linux/root_dev.h | 1 +
init/do_mounts.c | 32 +++++++-
net/ipv4/ipconfig.c | 10 ++-
7 files changed, 237 insertions(+), 3 deletions(-)
create mode 100644 fs/ceph/root.c
create mode 100644 include/linux/ceph/ceph_root.h
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index ac9a2ef..325e83d 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -25,3 +25,13 @@ config CEPH_FSCACHE
caching support for Ceph clients using FS-Cache
endif
+
+config ROOT_CEPH
+ bool "Root file system on Ceph FS"
+ depends on CEPH_FS=y && IP_PNP
+ help
+ If you want your system to mount its root file system via CEPH,
+ choose Y here. For details, read
+ <file:Documentation/filesystems/ceph/cephroot.txt>.
+
+ If unsure say N.
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index 32e3010..af2dcbf 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -10,3 +10,4 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
debugfs.o
ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
+ceph-$(CONFIG_ROOT_CEPH) += root.o
diff --git a/fs/ceph/root.c b/fs/ceph/root.c
new file mode 100644
index 0000000..1559c19
--- /dev/null
+++ b/fs/ceph/root.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2012 Codethink Ltd. <[email protected]>
+ *
+ * This file is released under the GPL v2
+ *
+ * Allow a CephFS filesystem to be mounted as root.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/root_dev.h>
+#include <linux/in.h>
+#include <net/ipconfig.h>
+#include <linux/ceph/ceph_root.h>
+
+#define MAXPATHLEN 1024
+
+/* Parameters passed from the kernel command line */
+static char ceph_command_line_params[256] __initdata;
+
+/* server:path string passed to mount */
+static char ceph_root_device[MAXPATHLEN + 1] __initdata;
+
+/* Name of directory to mount */
+static char ceph_export_path[MAXPATHLEN + 1] __initdata;
+
+/* Mount options */
+static char ceph_root_options[256] __initdata;
+
+/*
+ * Parse CephFS server and directory information passed on the kernel
+ * command line.
+ *
+ * cephroot=[<server-ip>][,<server-ips>]:<root-dir>[,<cephfs-options>]
+ */
+static int __init ceph_root_setup(char *line)
+{
+ ROOT_DEV = Root_CEPH;
+
+ strlcpy(ceph_command_line_params, line,
+ sizeof(ceph_command_line_params));
+
+ return 1;
+}
+
+__setup("cephroot=", ceph_root_setup);
+
+/*
+ * ceph_root_append - Concatenates an options or address string
+ * adding a ',' delimiter if neccessary.
+ *
+ * Returns 0 on success -E2BIG if the resulting string is too long.
+ */
+static int __init ceph_root_append(char *incoming,
+ char *dest,
+ const size_t destlen)
+{
+ int res = 0;
+
+ if (incoming != NULL && *incoming != '\0') {
+ size_t len = strlen(dest);
+
+ if (len && dest[len - 1] != ',') {
+ if (strlcat(dest, ",", destlen) > destlen)
+ res = -E2BIG;
+ }
+
+ if (strlcat(dest, incoming, destlen) > destlen)
+ res = -E2BIG;
+
+ }
+ return res;
+}
+
+/*
+ * ceph_root_parse_params - Parse out root export path and mount options from
+ * passed-in string @incoming.
+ *
+ * Copy the path into @path.
+ *
+ * Returns 0 on success -E2BIG if the resulting options string or device
+ * string are too long.
+ */
+static int __init ceph_root_parse_params(char *incoming, char *outpath,
+ const size_t outpathlen)
+{
+ int res = -EINVAL;
+ char *options;
+ char *path;
+
+ options = strstr(incoming, ":/");
+ if (options == NULL)
+ options = strstr(incoming, "default");
+
+ if (options != NULL) {
+ path = strsep(&options, ",");
+ if (*path != '\0' && strcmp(path, "default") != 0)
+ strlcpy(outpath, path, outpathlen);
+ res = ceph_root_append(options, ceph_root_options,
+ sizeof(ceph_root_options));
+
+ if (res == 0) {
+ *path = '\0';
+ res = ceph_root_append(incoming, ceph_root_device,
+ sizeof(ceph_root_device));
+ }
+ }
+
+ return res;
+}
+
+/*
+ * ceph_root_data - Return mount device and data for CEPHROOT mount.
+ *
+ * @root_device: OUT: Address of string containing CEPHROOT device.
+ * @root_data: OUT: Address of string containing CEPHROOT mount options.
+ *
+ * Returns: 0 and sets @root_device and @root_data if successful.
+ * error code if unsuccessful.
+ */
+int __init ceph_root_data(char **root_device, char **root_data)
+{
+ char *tmp_root_path = NULL;
+ const size_t tmplen = sizeof(ceph_export_path);
+ int len;
+ int res = -E2BIG;
+
+ tmp_root_path = kzalloc(tmplen, GFP_KERNEL);
+ if (tmp_root_path == NULL)
+ return -ENOMEM;
+
+ if (root_server_path[0] != '\0') {
+ if (ceph_root_parse_params(root_server_path, tmp_root_path,
+ tmplen))
+ goto out;
+ }
+
+ if (ceph_command_line_params[0] != '\0') {
+ if (ceph_root_parse_params(ceph_command_line_params,
+ tmp_root_path, tmplen))
+ goto out;
+ }
+
+ /*
+ * Set up ceph_root_device. This looks like: server:/path
+ *
+ * At this point, utsname()->nodename contains our local
+ * IP address or hostname, set by ipconfig. If "%s" exists
+ * in tmp_root_path, substitute the nodename, then shovel the whole
+ * mess into ceph_root_device.
+ */
+ len = snprintf(ceph_export_path, sizeof(ceph_export_path),
+ tmp_root_path, utsname()->nodename);
+ if (len > (int)sizeof(ceph_export_path))
+ goto out;
+
+ len = strlcat(ceph_root_device, ceph_export_path,
+ sizeof(ceph_root_device));
+ if (len > (int)sizeof(ceph_root_device))
+ goto out;
+
+ pr_debug("Root-CEPH: Root device: %s\n", ceph_root_device);
+ pr_debug("Root-CEPH: Root options: %s\n", ceph_root_options);
+ *root_device = ceph_root_device;
+ *root_data = ceph_root_options;
+
+ res = 0;
+
+out:
+ kfree(tmp_root_path);
+ return res;
+}
diff --git a/include/linux/ceph/ceph_root.h b/include/linux/ceph/ceph_root.h
new file mode 100644
index 0000000..e6bae63
--- /dev/null
+++ b/include/linux/ceph/ceph_root.h
@@ -0,0 +1,10 @@
+/*
+ * Copyright (C) 2012 Codethink Ltd. <[email protected]>
+ *
+ * This file is released under the GPL v2
+ *
+ * ceph_root.h
+ */
+
+/* linux/fs/ceph/root.c */
+extern int ceph_root_data(char **root_device, char **root_data); /*__init*/
diff --git a/include/linux/root_dev.h b/include/linux/root_dev.h
index ed241aa..af6b182 100644
--- a/include/linux/root_dev.h
+++ b/include/linux/root_dev.h
@@ -16,6 +16,7 @@ enum {
Root_SDA2 = MKDEV(SCSI_DISK0_MAJOR, 2),
Root_HDC1 = MKDEV(IDE1_MAJOR, 1),
Root_SR0 = MKDEV(SCSI_CDROM_MAJOR, 0),
+ Root_CEPH = MKDEV(UNNAMED_MAJOR, 254),
};
extern dev_t ROOT_DEV;
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 8e5addc..d075020 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -33,6 +33,8 @@
#include <linux/nfs_fs_sb.h>
#include <linux/nfs_mount.h>
+#include <linux/ceph/ceph_root.h>
+
#include "do_mounts.h"
int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */
@@ -199,6 +201,7 @@ done:
* a partition with a known unique id.
* 8) <major>:<minor> major and minor number of the device separated by
* a colon.
+ * 9) /dev/ceph represents Root_CEPH
*
* If name doesn't have fall into the categories above, we return (0,0).
* block_class is used to check if something is a disk name. If the disk
@@ -245,7 +248,9 @@ dev_t name_to_dev_t(char *name)
res = Root_RAM0;
if (strcmp(name, "ram") == 0)
goto done;
-
+ res = Root_CEPH;
+ if (strcmp(name, "ceph") == 0)
+ goto done;
if (strlen(name) > 31)
goto fail;
strcpy(s, name);
@@ -473,6 +478,22 @@ static int __init mount_nfs_root(void)
}
#endif
+#ifdef CONFIG_ROOT_CEPH
+static int __init mount_ceph_root(void)
+{
+ char *root_dev, *root_data;
+
+ if (ceph_root_data(&root_dev, &root_data))
+ return 0;
+
+ if (do_mount_root(root_dev, "ceph",
+ root_mountflags, root_data))
+ return 0;
+
+ return 1;
+}
+#endif
+
#if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD)
void __init change_floppy(char *fmt, ...)
{
@@ -514,6 +535,15 @@ void __init mount_root(void)
ROOT_DEV = Root_FD0;
}
#endif
+#ifdef CONFIG_ROOT_CEPH
+ if (ROOT_DEV == Root_CEPH) {
+ if (mount_ceph_root())
+ return;
+
+ printk(KERN_ERR "VFS: Unable to mount root fs via CephFS, trying floppy.\n");
+ ROOT_DEV = Root_FD0;
+ }
+#endif
#ifdef CONFIG_BLK_DEV_FD
if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
/* rd_doload is 2 for a dual initrd/ramload setup */
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index efa1138..765eea4 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1435,10 +1435,10 @@ static int __init ip_auto_config(void)
* missing values.
*/
if (ic_myaddr == NONE ||
-#ifdef CONFIG_ROOT_NFS
+#if defined(CONFIG_ROOT_NFS) || defined(CONFIG_ROOT_CEPH)
(root_server_addr == NONE &&
ic_servaddr == NONE &&
- ROOT_DEV == Root_NFS) ||
+ (ROOT_DEV == Root_NFS || ROOT_DEV == Root_CEPH)) ||
#endif
ic_first_dev->next) {
#ifdef IPCONFIG_DYNAMIC
@@ -1465,6 +1465,12 @@ static int __init ip_auto_config(void)
goto try_try_again;
}
#endif
+#ifdef CONFIG_ROOT_CEPH
+ if (ROOT_DEV == Root_CEPH) {
+ pr_err("IP-Config: Retrying forever (CEPH root)...\n");
+ goto try_try_again;
+ }
+#endif
if (--retries) {
pr_err("IP-Config: Reopening network devices...\n");
--
1.8.4