2006-01-11 17:17:25

by Mike D. Day

[permalink] [raw]
Subject: [RFC] [PATCH] sysfs support for Xen attributes

The included patch provides some sysfs helper routines so that xen
domain kernel processes can easily attributes to sysfs. The intent is
that any kernel process can add an attribute under /sys/xen just as
easily as adding a file under /proc. In other words, without using the
driver core to create a subsystem, dealing with kobjects and ksets, etc.

One example adds xen version info under /sys/xen/version

The comments desired are (1) do the helper routines in xen_sysfs
duplicate code already present in linux (or under development somewhere
else). (2) Is it appropriate for a process to create sysfs attributes
without implementing a driver subsystem or (3) are such attributes
better off living under /proc. (4) any other feedback is appreciated.


# HG changeset patch
# User [email protected]
# Node ID f6e4c20a786b9322843fb46a45f7796fc6a33b44
# Parent ed7888c838ad5cd213a24d21ae294b31a2500f4d
Export Xen information to sysfs Allow xen domain kernel to add xen
data to /sys/xen without also requiring creation of driver subsystems.

As an example, add xen version by creating /sys/xen/version

signed-off-by Mike Day <[email protected]>

diff -r ed7888c838ad -r f6e4c20a786b
linux-2.6-xen-sparse/arch/xen/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Tue Jan 10
17:53:44 2006
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Tue Jan 10
23:30:37 2006
@@ -16,3 +16,4 @@
obj-$(CONFIG_PROC_FS) += xen_proc.o
obj-$(CONFIG_NET) += skbuff.o
obj-$(CONFIG_SMP) += smpboot.o
+obj-$(CONFIG_SYSFS) += xen_sysfs.o xen_sysfs_version.o
diff -r ed7888c838ad -r f6e4c20a786b
linux-2.6-xen-sparse/arch/xen/kernel/xen_sysfs.c
--- /dev/null Tue Jan 10 17:53:44 2006
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/xen_sysfs.c Tue Jan 10
23:30:37 2006
@@ -0,0 +1,694 @@
+/*
+ copyright (c) 2006 IBM Corporation
+ Mike Day <[email protected]>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA
+*/
+
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+#include <asm-generic/bug.h>
+
+#ifdef DEBUG
+#define DPRINTK(fmt, args...) printk(KERN_DEBUG "xen_sysfs: ", fmt,
## args)
+#else
+#define DPRINTK(fmt, args...)
+#endif
+
+#ifndef BOOL
+#define BOOL int
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+
+#define __sysfs_ref__
+
+struct xen_sysfs_object;
+
+struct xen_sysfs_attr {
+ struct bin_attribute attr;
+ ssize_t (*show)(void *, char *) ;
+ ssize_t (*store)(void *, const char *, size_t) ;
+ ssize_t (*read)(void *, char *, loff_t, size_t );
+ ssize_t (*write)(void *, char *, loff_t, size_t) ;
+};
+
+
+/* flags bits */
+#define XEN_SYSFS_UNINITIALIZED 0x00
+#define XEN_SYSFS_CHAR_TYPE 0x01
+#define XEN_SYSFS_BIN_TYPE 0x02
+#define XEN_SYSFS_DIR_TYPE 0x04
+#define XEN_SYSFS_LINKED 0x08
+#define XEN_SYSFS_UNLINKED 0x10
+#define XEN_SYSFS_LINK_TYPE 0x11
+
+
+struct xen_sysfs_object {
+ struct list_head list;
+ int flags;
+ struct kobject kobj;
+ struct xen_sysfs_attr attr;
+ char * path;
+ struct list_head children;
+ struct xen_sysfs_object * parent;
+ atomic_t refcount;
+ void * user_data;
+ void (*user_data_release)(void *);
+ void (*destroy)(struct xen_sysfs_object *);
+};
+
+
+static __sysfs_ref__ struct xen_sysfs_object *
+find_object(struct xen_sysfs_object * obj, const char * path);
+
+
+static __sysfs_ref__ struct xen_sysfs_object *
+new_sysfs_object(const char * path,
+ int type,
+ int mode,
+ ssize_t (*show)(void *, char *),
+ ssize_t (*store)(void *, const char *, size_t),
+ ssize_t (*read)(void *, char *, loff_t, size_t),
+ ssize_t (*write)(void *, char *, loff_t, size_t),
+ void * user_data,
+ void (* user_data_release)(void *)) ;
+
+static void destroy_sysfs_object(struct xen_sysfs_object * obj);
+static __sysfs_ref__ struct xen_sysfs_object * __find_parent(const char
* path) ;
+static __sysfs_ref__ int __add_child(struct xen_sysfs_object *parent,
+ struct xen_sysfs_object *child);
+static void remove_child(struct xen_sysfs_object *child);
+static void get_object(struct xen_sysfs_object *);
+static int put_object(struct xen_sysfs_object *,
+ void (*)(struct xen_sysfs_object *));
+
+
+/* Is A == B ? */
+#define streq(a,b) (strcmp((a),(b)) == 0)
+
+/* Does A start with B ? */
+#define strstarts(a,b) (strncmp((a),(b),strlen(b)) == 0)
+
+
+#define __sysfs_ref__
+
+#define XEN_SYSFS_ATTR(_name, _mode, _show, _store) \
+ struct xen_sysfs_attr xen_sysfs_attr_##_name = __ATTR(_name,
_mode, _show, _store)
+
+#define __XEN_KOBJ(_parent, _dentry, _ktype) \
+ { \
+ .k_name = NULL, \
+ .parent = _parent, \
+ .dentry = _dentry, \
+ .ktype = _ktype, \
+ }
+
+static struct semaphore xen_sysfs_mut = __MUTEX_INITIALIZER(xen_sysfs_mut);
+static inline int
+sysfs_down(struct semaphore * mut)
+{
+ int err;
+ do {
+ err = down_interruptible(mut);
+ } while ( err && err == -EINTR );
+ return err;
+}
+
+#define sysfs_up(mut) up(mut)
+#define to_xen_attr(_attr) container_of(_attr, struct xen_sysfs_attr,
attr.attr)
+#define to_xen_obj(_xen_attr) container_of(_xen_attr, struct
xen_sysfs_object, attr)
+
+static ssize_t
+xen_sysfs_show(struct kobject * kobj, struct attribute * attr, char * buf)
+{
+ struct xen_sysfs_attr * xen_attr = to_xen_attr(attr);
+ struct xen_sysfs_object * xen_obj = to_xen_obj(xen_attr);
+ if(xen_attr->show)
+ return xen_attr->show(xen_obj->user_data, buf);
+ return 0;
+}
+
+static ssize_t
+xen_sysfs_store(struct kobject * kobj, struct attribute * attr,
+ const char *buf, size_t count)
+{
+ struct xen_sysfs_attr * xen_attr = to_xen_attr(attr);
+ struct xen_sysfs_object * xen_obj = to_xen_obj(xen_attr);
+ if(xen_attr->store)
+ return xen_attr->store(xen_obj->user_data, buf, count) ;
+ return 0;
+}
+
+#define to_xen_obj_bin(_kobj) container_of(_kobj, struct
xen_sysfs_object, kobj)
+
+static ssize_t
+xen_sysfs_read(struct kobject *kobj, char * buf, loff_t offset, size_t
size)
+{
+ struct xen_sysfs_object * xen_obj = to_xen_obj_bin(kobj);
+ if(xen_obj->attr.read)
+ return xen_obj->attr.read(xen_obj->user_data, buf,
offset, size);
+ return 0;
+}
+
+
+static ssize_t
+xen_sysfs_write(struct kobject *kobj, char * buf, loff_t offset, size_t
size)
+{
+ struct xen_sysfs_object * xen_obj = to_xen_obj_bin(kobj);
+ if (xen_obj->attr.write)
+ return xen_obj->attr.write(xen_obj->user_data, buf,
offset, size);
+ if(size == 0 )
+ return PAGE_SIZE;
+
+ return size;
+}
+
+static struct sysfs_ops xen_sysfs_ops = {
+ .show = xen_sysfs_show,
+ .store = xen_sysfs_store,
+};
+
+static struct kobj_type xen_kobj_type = {
+ .release = NULL,
+ .sysfs_ops = &xen_sysfs_ops,
+ .default_attrs = NULL,
+};
+
+
+/* xen sysfs root entry */
+static struct xen_sysfs_object xen_root = {
+ .flags = 0,
+ .kobj = {
+ .k_name = NULL,
+ .parent = NULL,
+ .dentry = NULL,
+ .ktype = &xen_kobj_type,
+ },
+ .attr = {
+ .attr = {
+ .attr = {
+ .name = NULL,
+ .mode = 0775,
+ },
+
+ },
+ .show = NULL,
+ .store = NULL,
+ .read = NULL,
+ .write = NULL,
+ },
+ .path = __stringify(/sys/xen),
+ .list = LIST_HEAD_INIT(xen_root.list),
+ .children = LIST_HEAD_INIT(xen_root.children),
+ .parent = NULL,
+};
+
+/* xen sysfs path functions */
+
+static BOOL
+valid_chars(const char *path)
+{
+ if( ! strstarts(path, "/sys/xen") )
+ return FALSE;
+ if(strstr(path, "//"))
+ return FALSE;
+ return (strspn(path,
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789-/_@~$") == strlen(path));
+}
+
+
+/* return value must be kfree'd */
+static char *
+dup_path(const char *path)
+{
+ char * ret;
+ int len;
+ BUG_ON( ! path );
+
+ if( FALSE == valid_chars(path) ) {
+ return NULL;
+ }
+
+ len = strlen(path) + 1;
+ ret = kcalloc(len - 1, sizeof(char), GFP_KERNEL);
+ memcpy(ret, path, len);
+ return ret;
+}
+
+
+
+static char *
+basename(const char *name)
+{
+ return strrchr(name, '/') + 1;
+}
+
+static char *
+strip_trailing_slash(char * path)
+{
+ int len = strlen(path);
+
+ char * term = path + len - 1;
+ if( *term == '/')
+ *term = 0;
+ return path;
+}
+
+/* return value must be kfree'd */
+static char * dirname(const char * name)
+{
+ char *ret;
+ int len;
+
+ len = strlen(name) - strlen(basename(name)) + 1;
+ ret = kcalloc(len, sizeof(char), GFP_KERNEL);
+ memcpy(ret, name, len - 1);
+ ret = strip_trailing_slash(ret);
+
+ return ret;
+}
+
+
+/* type must be char, bin, or dir */
+static __sysfs_ref__ struct xen_sysfs_object *
+new_sysfs_object(const char * path,
+ int type,
+ int mode,
+ ssize_t (*show)(void *, char *),
+ ssize_t (*store)(void *, const char *, size_t),
+ ssize_t (*read)(void *, char *, loff_t, size_t),
+ ssize_t (*write)(void *, char *, loff_t, size_t),
+ void * user_data,
+ void (* user_data_release)(void *))
+{
+ struct xen_sysfs_object * ret =
+ (struct xen_sysfs_object *)kcalloc(sizeof(struct
xen_sysfs_object),
+ sizeof(char),
+ GFP_KERNEL);
+ BUG_ON(ret == NULL);
+
+ ret->flags = type;
+ BUG_ON( (type & XEN_SYSFS_DIR_TYPE) && (show || store) );
+
+ if( NULL == (ret->path = dup_path(path)) ) {
+ kfree(ret);
+ return NULL;
+ }
+ kobject_set_name(&ret->kobj, basename(path));
+ kobject_init(&ret->kobj);
+ ret->attr.attr.attr.name = kobject_name(&ret->kobj);
+ ret->attr.attr.attr.owner = THIS_MODULE;
+ ret->attr.attr.attr.mode = mode;
+ ret->kobj.ktype = &xen_kobj_type;
+ if( type & XEN_SYSFS_CHAR_TYPE ) {
+ ret->attr.show = show;
+ ret->attr.store = store;
+ }
+ else if ( type & XEN_SYSFS_BIN_TYPE ) {
+ ret->attr.attr.size = PAGE_SIZE;
+ ret->attr.attr.read = xen_sysfs_read;
+ ret->attr.attr.write = xen_sysfs_write;
+ ret->attr.read = read;
+ ret->attr.write = write;
+ }
+ INIT_LIST_HEAD(&ret->list);
+ INIT_LIST_HEAD(&ret->children);
+ atomic_set(&ret->refcount, 1);
+ ret->destroy = destroy_sysfs_object;
+ return ret;
+}
+
+static void
+get_object(struct xen_sysfs_object *obj)
+{
+ BUG_ON( ! atomic_read(&obj->refcount) );
+ kobject_get(&obj->kobj);
+ atomic_inc(&obj->refcount);
+ return;
+}
+
+static int
+put_object(struct xen_sysfs_object *obj,
+ void (*release)(struct xen_sysfs_object *))
+{
+ BUG_ON( ! release );
+ BUG_ON( release == (void (*)(struct xen_sysfs_object *))kfree);
+ kobject_put(&obj->kobj);
+ if(atomic_dec_and_test(&obj->refcount)) {
+ release(obj);
+ return 1;
+ }
+ return 0;
+}
+
+
+static void
+sysfs_release(struct xen_sysfs_object * obj)
+{
+ BUG_ON( ! (obj->flags & XEN_SYSFS_UNLINKED) );
+ BUG_ON( ! list_empty(&obj->children) );
+ BUG_ON( obj->parent ) ;
+
+ kobject_cleanup(&obj->kobj);
+ if(obj->attr.attr.attr.name)
+ kfree(obj->attr.attr.attr.name);
+ if(obj->user_data && obj->user_data_release )
+ obj->user_data_release(obj->user_data);
+ if( obj->path ) {
+ kfree(obj->path);
+ obj->path = NULL;
+ }
+ if (obj->destroy)
+ obj->destroy(obj);
+ return;
+}
+
+static void
+destroy_sysfs_object(struct xen_sysfs_object * obj)
+{
+ if(obj->path)
+ kfree(obj->path);
+ BUG_ON( ! list_empty(&obj->children) ) ;
+ BUG_ON ( obj->parent );
+ kfree(obj);
+ return;
+}
+
+
+/* refcounts object when returned */
+static __sysfs_ref__ struct xen_sysfs_object *
+find_object(struct xen_sysfs_object * obj, const char * path)
+{
+ struct list_head * tmp = NULL;
+ struct xen_sysfs_object *this_obj = NULL, * tmp_obj = NULL;
+
+ if(obj->flags & XEN_SYSFS_UNLINKED) {
+ return NULL;
+ }
+ if(! strcmp(obj->path, path) ) {
+ get_object(obj);
+ return obj;
+ }
+ // if path is longer than obj-path, search children
+ if ( strstarts(path, obj->path) &&
+ strlen(path) > strlen(obj->path) &&
+ ! list_empty(&obj->children) ) {
+ list_for_each(tmp, (&obj->children)) {
+ tmp_obj = list_entry(tmp, struct
xen_sysfs_object, list);
+ if( NULL != (this_obj = find_object(tmp_obj,
path)) ) {
+ return this_obj;
+ }
+ }
+ }
+ return NULL;
+}
+
+/* parent is ref counted when returned */
+static __sysfs_ref__ struct xen_sysfs_object *
+__find_parent(const char * path)
+{
+ char * dir;
+ struct xen_sysfs_object * parent;
+
+ BUG_ON( ! path );
+ if ( ! valid_chars(path))
+ return NULL;
+ dir = dirname(path);
+ BUG_ON ( sysfs_down(&xen_sysfs_mut) );
+ parent = find_object(&xen_root, dir);
+
+ sysfs_up(&xen_sysfs_mut);
+ kfree(dir);
+
+ return parent;
+}
+
+static __sysfs_ref__ int
+__add_child(struct xen_sysfs_object *parent,
+ struct xen_sysfs_object *child)
+{
+ int err = EINVAL;
+
+ BUG_ON ( sysfs_down(&xen_sysfs_mut) );
+ list_add_tail(&child->list, &parent->children);
+ child->kobj.parent = &parent->kobj;
+ child->kobj.dentry = parent->kobj.dentry;
+ if(child->flags & XEN_SYSFS_DIR_TYPE)
+ err = sysfs_create_dir(&child->kobj);
+ else if (child->flags & XEN_SYSFS_CHAR_TYPE)
+ err = sysfs_create_file(&child->kobj,
&child->attr.attr.attr);
+ else if (child->flags & XEN_SYSFS_BIN_TYPE)
+ err = sysfs_create_bin_file(&child->kobj,
&child->attr.attr);
+ child->flags |= XEN_SYSFS_LINKED;
+ child->flags &= ~XEN_SYSFS_UNLINKED;
+ child->parent = parent;
+ sysfs_up(&xen_sysfs_mut);
+ get_object(parent);
+ return err;
+}
+
+static void remove_child(struct xen_sysfs_object *child)
+{
+ struct list_head *children;
+ struct xen_sysfs_object *tmp_obj;
+
+ children = (&child->children)->next;
+ while( children != &child->children ) {
+ tmp_obj = list_entry(children, struct xen_sysfs_object,
list );
+ remove_child(tmp_obj);
+ children = (&child->children)->next;
+ }
+ child->flags |= XEN_SYSFS_UNLINKED;
+ child->flags &= ~XEN_SYSFS_LINKED;
+ if(child->flags & XEN_SYSFS_DIR_TYPE)
+ sysfs_remove_dir(&child->kobj);
+ else if (child->flags & XEN_SYSFS_CHAR_TYPE)
+ sysfs_remove_file(&child->kobj, &child->attr.attr.attr);
+ else if (child->flags & XEN_SYSFS_BIN_TYPE)
+ sysfs_remove_bin_file(&child->kobj, &child->attr.attr);
+ list_del(&child->list);
+ put_object(child->parent, sysfs_release);
+ child->parent = NULL;
+ put_object(child, sysfs_release);
+ return;
+}
+
+
+
+
+int
+xen_sysfs_create_dir(const char * path, int mode)
+{
+ struct xen_sysfs_object * child, * parent;
+ int err;
+
+ if(path == NULL)
+ return -EINVAL;
+ if ( NULL == (parent = __find_parent(path)) )
+ return -EBADF;
+ if( NULL == (child = new_sysfs_object(path, XEN_SYSFS_DIR_TYPE,
+ mode, NULL,NULL, NULL,
+ NULL, NULL,NULL))) {
+ put_object(parent, sysfs_release);
+ return -ENOMEM;
+ }
+ err = __add_child(parent, child);
+ put_object(parent, sysfs_release);
+
+ return -err;
+}
+
+int
+xen_sysfs_remove_dir(const char* path, BOOL recursive)
+{
+ __label__ mut;
+ __label__ ref;
+ int err = 0;
+ struct xen_sysfs_object * dir;
+
+ if(path == NULL)
+ return -EINVAL;
+ BUG_ON(sysfs_down(&xen_sysfs_mut));
+ if(NULL == (dir = find_object(&xen_root, path))) {
+ err = -EBADF;
+ goto mut;
+ }
+ if(FALSE == recursive && ! list_empty(&dir->children) ) {
+ err = -EBUSY;
+ goto ref;
+ }
+ remove_child(dir);
+ref:
+ put_object(dir, sysfs_release);
+mut:
+ sysfs_up(&xen_sysfs_mut);
+ return err;
+}
+
+
+
+int
+xen_sysfs_create_file(const char * path,
+ int mode,
+ ssize_t (*show)(void *, char *),
+ ssize_t (*store)(void *, const char *, size_t),
+ void * private_data,
+ void (*private_data_release)(void *))
+{
+
+ struct xen_sysfs_object *parent, * file;
+ int err;
+
+ if(path == NULL || FALSE == valid_chars(path))
+ return -EINVAL;
+ if(NULL == ( parent = __find_parent(path)) )
+ return -EBADF;
+
+ if( NULL == ( file = new_sysfs_object(path,
+ XEN_SYSFS_CHAR_TYPE,
+ mode,
+ show,
+ store,
+ NULL,
+ NULL,
+ private_data,
+ private_data_release)))
+ return -ENOMEM;
+
+ err = __add_child(parent, file);
+ put_object(parent, sysfs_release);
+ return err;
+}
+
+
+int
+xen_sysfs_update_file(const char * path)
+{
+ __label__ mut;
+ int err;
+ struct xen_sysfs_object * obj;
+
+ if(path == NULL || FALSE == valid_chars(path))
+ return -EINVAL;
+ sysfs_down(&xen_sysfs_mut);
+
+ if(NULL == (obj = find_object(&xen_root, path))) {
+ err = -EBADF;
+ goto mut;
+ }
+
+ err = sysfs_update_file(&obj->kobj, &obj->attr.attr.attr);
+ put_object(obj, sysfs_release);
+mut:
+ sysfs_up(&xen_sysfs_mut);
+ return err;
+}
+
+
+int
+xen_sysfs_remove_file(const char* path)
+{
+ __label__ mut;
+ int err = 0;
+ struct xen_sysfs_object * file;
+
+ if(path == NULL)
+ return -EINVAL;
+ BUG_ON(sysfs_down(&xen_sysfs_mut));
+ if(NULL == (file = find_object(&xen_root, path))) {
+ err = -EBADF;
+ goto mut;
+ }
+ remove_child(file);
+ put_object(file, sysfs_release);
+mut:
+ sysfs_up(&xen_sysfs_mut);
+ return err;
+}
+
+int
+xen_sysfs_create_bin_file(const char * path,
+ int mode,
+ ssize_t (*read) (void *, char *, loff_t, size_t),
+ ssize_t (*write) (void *, char *, loff_t,
size_t),
+ void * private_data,
+ void (*private_data_release)(void *))
+{
+
+ struct xen_sysfs_object *parent, * file;
+ int err;
+
+ if(path == NULL || FALSE == valid_chars(path))
+ return -EINVAL;
+ if(NULL == ( parent = __find_parent(path)) )
+ return -EBADF;
+
+ if( NULL == ( file = new_sysfs_object(path,
+ XEN_SYSFS_BIN_TYPE,
+ mode,
+ NULL,
+ NULL,
+ read,
+ write,
+ private_data,
+ private_data_release)))
+ return -ENOMEM;
+
+ err = __add_child(parent, file);
+ put_object(parent, sysfs_release);
+ return err;
+}
+
+int __init
+xen_sysfs_init(void)
+{
+ kobject_init(&xen_root.kobj);
+ kobject_set_name(&xen_root.kobj, "xen");
+ atomic_set(&xen_root.refcount, 1);
+ return sysfs_create_dir(&xen_root.kobj);
+}
+
+arch_initcall(xen_sysfs_init);
+
+EXPORT_SYMBOL(xen_sysfs_create_dir);
+EXPORT_SYMBOL(xen_sysfs_remove_dir);
+EXPORT_SYMBOL(xen_sysfs_create_file);
+EXPORT_SYMBOL(xen_sysfs_update_file);
+EXPORT_SYMBOL(xen_sysfs_remove_file);
+
+
diff -r ed7888c838ad -r f6e4c20a786b
linux-2.6-xen-sparse/arch/xen/kernel/xen_sysfs_version.c
--- /dev/null Tue Jan 10 17:53:44 2006
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/xen_sysfs_version.c Tue Jan
10 23:30:37 2006
@@ -0,0 +1,60 @@
+/*
+ copyright (c) 2006 IBM Corporation
+ Mike Day <[email protected]>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA
+*/
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <asm/page.h>
+#include <asm-xen/xen-public/version.h>
+#include <asm-xen/xen-public/dom0_ops.h>
+#include <asm-xen/asm/hypercall.h>
+#include <asm-xen/xen_sysfs.h>
+
+extern int HYPERVISOR_xen_version(int, void*);
+
+
+static ssize_t xen_version_show(void *data, char *page)
+{
+ long version;
+ long major, minor;
+ static xen_extraversion_t extra_version;
+
+ version = HYPERVISOR_xen_version(XENVER_version, NULL);
+ major = version >> 16;
+ minor = version & 0xff;
+
+ HYPERVISOR_xen_version(XENVER_extraversion, extra_version);
+ return snprintf(page, PAGE_SIZE, "xen-%ld.%ld%s\n", major,
minor, extra_version);
+}
+
+
+
+int __init
+sysfs_xen_version_init(void)
+{
+ return xen_sysfs_create_file("/sys/xen/version",
+ 0444,
+ xen_version_show,
+ NULL,
+ NULL,
+ NULL);
+}
+
+device_initcall(sysfs_xen_version_init);
diff -r ed7888c838ad -r f6e4c20a786b
linux-2.6-xen-sparse/include/asm-xen/xen_sysfs.h
--- /dev/null Tue Jan 10 17:53:44 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/xen_sysfs.h Tue Jan 10
23:30:37 2006
@@ -0,0 +1,88 @@
+/*
+ copyright (c) 2006 IBM Corporation
+ Mike Day <[email protected]>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA
+*/
+
+
+#include <asm/page.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/string.h>
+
+
+
+#ifndef _XEN_SYSFS_H_
+#define _XEN_SYSFS_H_
+
+#ifdef __KERNEL__
+
+#ifndef BOOL
+#define BOOL int
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+
+extern int
+xen_sysfs_create_dir(const char * path, int mode);
+
+extern int
+xen_sysfs_remove_dir(const char * path, BOOL recursive);
+
+extern int
+xen_sysfs_create_file(const char * path,
+ int mode,
+ ssize_t (*show)(void * user_data, char * buf),
+ ssize_t (*store)(void * user_data,
+ const char * buf,
+ size_t length),
+ void * private_data,
+ void (*private_data_release)(void *));
+
+extern int
+xen_sysfs_update_file(const char * path);
+
+extern int
+xen_sysfs_remove_file(const char * path);
+
+
+int xen_sysfs_create_bin_file(const char * path,
+ int mode,
+ ssize_t (*read)(void * user_data,
+ char * buf,
+ loff_t offset,
+ size_t length),
+ ssize_t (*write)(void * user_data,
+ char *buf,
+ loff_t offset,
+ size_t length),
+ void * private_data,
+ void (*private_data_release)(void *));
+int xen_sysfs_remove_bin_file(const char * path);
+
+#endif /* __KERNEL__ */
+#endif /* _XEN_SYSFS_H_ */
[mdday@silverwood xen-sysfs.hg]$


--

Mike D. Day
STSM and Architect, Open Virtualization
IBM Linux Technology Center
[email protected]


2006-01-11 17:19:56

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Wed, 2006-01-11 at 12:17 -0500, Mike D. Day wrote:
> The included patch provides some sysfs helper routines so that xen
> domain kernel processes can easily attributes to sysfs. The intent is
> that any kernel process can add an attribute under /sys/xen just as
> easily as adding a file under /proc. In other words, without using the
> driver core to create a subsystem, dealing with kobjects and ksets, etc.


eh... WHY ???

so that sys gets just as much of a mess as proc already is, so that
there are 2 messes?????


2006-01-11 17:56:50

by Stephen Hemminger

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

You probably want to submit Xen as a complete patch set. Rather
than asking kernel to accept pieces of infrastructure. But
also expect the the infrastructure changes like this to be more
carefully reviewed and alternatives recommended.


On Wed, 11 Jan 2006 12:17:20 -0500
"Mike D. Day" <[email protected]> wrote:

> The included patch provides some sysfs helper routines so that xen
> domain kernel processes can easily attributes to sysfs. The intent is
> that any kernel process can add an attribute under /sys/xen just as
> easily as adding a file under /proc. In other words, without using the
> driver core to create a subsystem, dealing with kobjects and ksets, etc.
>
> One example adds xen version info under /sys/xen/version
>
> The comments desired are (1) do the helper routines in xen_sysfs
> duplicate code already present in linux (or under development somewhere
> else). (2) Is it appropriate for a process to create sysfs attributes
> without implementing a driver subsystem or (3) are such attributes
> better off living under /proc. (4) any other feedback is appreciated.
>

Debugfs? Configfs?

> # HG changeset patch
> # User [email protected]
> # Node ID f6e4c20a786b9322843fb46a45f7796fc6a33b44
> # Parent ed7888c838ad5cd213a24d21ae294b31a2500f4d
> Export Xen information to sysfs Allow xen domain kernel to add xen
> data to /sys/xen without also requiring creation of driver subsystems.
>
> As an example, add xen version by creating /sys/xen/version
>
> signed-off-by Mike Day <[email protected]>
>
> diff -r ed7888c838ad -r f6e4c20a786b
> linux-2.6-xen-sparse/arch/xen/kernel/Makefile
> --- a/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Tue Jan 10
> 17:53:44 2006
> +++ b/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Tue Jan 10
> 23:30:37 2006
> @@ -16,3 +16,4 @@
> obj-$(CONFIG_PROC_FS) += xen_proc.o
> obj-$(CONFIG_NET) += skbuff.o
> obj-$(CONFIG_SMP) += smpboot.o
> +obj-$(CONFIG_SYSFS) += xen_sysfs.o xen_sysfs_version.o
> diff -r ed7888c838ad -r f6e4c20a786b
> linux-2.6-xen-sparse/arch/xen/kernel/xen_sysfs.c
> --- /dev/null Tue Jan 10 17:53:44 2006
> +++ b/linux-2.6-xen-sparse/arch/xen/kernel/xen_sysfs.c Tue Jan 10
> 23:30:37 2006
> @@ -0,0 +1,694 @@
> +/*
> + copyright (c) 2006 IBM Corporation
> + Mike Day <[email protected]>
> +
> + This program is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; either version 2 of the License, or
> + (at your option) any later version.
> +
> + This program is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU General Public License for more details.
> +
> + You should have received a copy of the GNU General Public License
> + along with this program; if not, write to the Free Software
> + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
> 02111-1307 USA
> +*/
> +
> +
> +#include <linux/config.h>
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/kobject.h>
> +#include <linux/sysfs.h>
> +#include <linux/module.h>
> +#include <linux/string.h>
> +#include <linux/types.h>
> +#include <asm/atomic.h>
> +#include <asm/semaphore.h>
> +#include <asm-generic/bug.h>
> +
> +#ifdef DEBUG
> +#define DPRINTK(fmt, args...) printk(KERN_DEBUG "xen_sysfs: ", fmt,
> ## args)
> +#else
> +#define DPRINTK(fmt, args...)
> +#endif
> +
> +#ifndef BOOL
> +#define BOOL int
> +#endif
> +
> +#ifndef FALSE
> +#define FALSE 0
> +#endif
> +
> +#ifndef TRUE
> +#define TRUE 1
> +#endif
> +
> +#ifndef NULL
> +#define NULL 0
> +#endif

Yuck, not Linux Coding Style...

> +
> +#define __sysfs_ref__

Why?

> +
> +struct xen_sysfs_object;
> +
> +struct xen_sysfs_attr {
> + struct bin_attribute attr;
> + ssize_t (*show)(void *, char *) ;
> + ssize_t (*store)(void *, const char *, size_t) ;
> + ssize_t (*read)(void *, char *, loff_t, size_t );
> + ssize_t (*write)(void *, char *, loff_t, size_t) ;
> +};
> +
> +
> +/* flags bits */
> +#define XEN_SYSFS_UNINITIALIZED 0x00
> +#define XEN_SYSFS_CHAR_TYPE 0x01
> +#define XEN_SYSFS_BIN_TYPE 0x02
> +#define XEN_SYSFS_DIR_TYPE 0x04
> +#define XEN_SYSFS_LINKED 0x08
> +#define XEN_SYSFS_UNLINKED 0x10
> +#define XEN_SYSFS_LINK_TYPE 0x11
> +
> +
> +struct xen_sysfs_object {
> + struct list_head list;
> + int flags;
> + struct kobject kobj;
> + struct xen_sysfs_attr attr;
> + char * path;
> + struct list_head children;
> + struct xen_sysfs_object * parent;
> + atomic_t refcount;
> + void * user_data;
> + void (*user_data_release)(void *);
> + void (*destroy)(struct xen_sysfs_object *);
> +};
> +
> +
> +static __sysfs_ref__ struct xen_sysfs_object *
> +find_object(struct xen_sysfs_object * obj, const char * path);
> +
> +
> +static __sysfs_ref__ struct xen_sysfs_object *
> +new_sysfs_object(const char * path,
> + int type,
> + int mode,
> + ssize_t (*show)(void *, char *),
> + ssize_t (*store)(void *, const char *, size_t),
> + ssize_t (*read)(void *, char *, loff_t, size_t),
> + ssize_t (*write)(void *, char *, loff_t, size_t),
> + void * user_data,
> + void (* user_data_release)(void *)) ;
> +
> +static void destroy_sysfs_object(struct xen_sysfs_object * obj);
> +static __sysfs_ref__ struct xen_sysfs_object * __find_parent(const char
> * path) ;
> +static __sysfs_ref__ int __add_child(struct xen_sysfs_object *parent,
> + struct xen_sysfs_object *child);
> +static void remove_child(struct xen_sysfs_object *child);
> +static void get_object(struct xen_sysfs_object *);
> +static int put_object(struct xen_sysfs_object *,
> + void (*)(struct xen_sysfs_object *));
> +
> +
> +/* Is A == B ? */
> +#define streq(a,b) (strcmp((a),(b)) == 0)

More non-linux coding style...

> +/* Does A start with B ? */
> +#define strstarts(a,b) (strncmp((a),(b),strlen(b)) == 0)
> +
> +
> +#define __sysfs_ref__
> +
> +#define XEN_SYSFS_ATTR(_name, _mode, _show, _store) \
> + struct xen_sysfs_attr xen_sysfs_attr_##_name = __ATTR(_name,
> _mode, _show, _store)
> +
> +#define __XEN_KOBJ(_parent, _dentry, _ktype) \
> + { \
> + .k_name = NULL, \
> + .parent = _parent, \
> + .dentry = _dentry, \
> + .ktype = _ktype, \
> + }
> +
> +static struct semaphore xen_sysfs_mut = __MUTEX_INITIALIZER(xen_sysfs_mut);
> +static inline int
> +sysfs_down(struct semaphore * mut)
> +{
> + int err;
> + do {
> + err = down_interruptible(mut);
> + } while ( err && err == -EINTR );
> + return err;
> +}

Wrapping mutex is not a bad idea on several levels.



--
Stephen Hemminger <[email protected]>
OSDL http://developer.osdl.org/~shemminger

2006-01-11 18:45:52

by Dave Hansen

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Wed, 2006-01-11 at 12:17 -0500, Mike D. Day wrote:
> +#ifndef BOOL
> +#define BOOL int
> +#endif
> +
> +#ifndef FALSE
> +#define FALSE 0
> +#endif
> +
> +#ifndef TRUE
> +#define TRUE 1
> +#endif
> +
> +#ifndef NULL
> +#define NULL 0
> +#endif

Whatever you do with this driver, these need to go.

Your patch is also whitespace-challenged.

Why not make these Xen attributes part of the "system" devices? Seems a
lot more natural to me.

-- Dave

2006-01-11 23:07:19

by Greg KH

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Wed, Jan 11, 2006 at 12:17:20PM -0500, Mike D. Day wrote:
> The included patch provides some sysfs helper routines so that xen
> domain kernel processes can easily attributes to sysfs. The intent is
> that any kernel process can add an attribute under /sys/xen just as
> easily as adding a file under /proc. In other words, without using the
> driver core to create a subsystem, dealing with kobjects and ksets, etc.

Why is xen special from the rest of the kernel in regards to adding
files to sysfs? What does your infrastructure add that is not currently
already present for everyone to use today?

> One example adds xen version info under /sys/xen/version

Why is the xen version any different from any other module or driver
version in the kernel? (hint, use the interface that is availble for
this already...)

> The comments desired are (1) do the helper routines in xen_sysfs
> duplicate code already present in linux (or under development somewhere
> else).

You have access to the current tree as well as we do to be able to
answer this question :)

> (2) Is it appropriate for a process to create sysfs attributes without
> implementing a driver subsystem

You don't have to create a driver subsystem to be able to add stuff to
sysfs, what makes you think that?

> or (3) are such attributes better off living under /proc.

No, they belong in the sysfs tree like everything else. Unless you have
process specific attributes, do NOT add anything new to /proc.

> (4) any other feedback is appreciated.

did you look at debugfs? configfs?

What is wrong with the current kobject/sysfs/driver model interface that
made you want to create this extra code?

Aren't you already going to have a xen virtual bus in sysfs and the
driver model? Why not just put your needed attributes there, where they
belong (on the devices themselves)?


> linux-2.6-xen-sparse/arch/xen/kernel/xen_sysfs.c
> --- /dev/null Tue Jan 10 17:53:44 2006
> +++ b/linux-2.6-xen-sparse/arch/xen/kernel/xen_sysfs.c Tue Jan 10
> 23:30:37 2006

Your patch is linewrapped :(

> +#ifdef DEBUG
> +#define DPRINTK(fmt, args...) printk(KERN_DEBUG "xen_sysfs: ", fmt,
> ## args)
> +#else
> +#define DPRINTK(fmt, args...)
> +#endif

Don't create your own, use dev_dbg() and friends instead. pr_debug if
you absolutely don't have access to a struct device.

> +#ifndef BOOL
> +#define BOOL int
> +#endif

Heh, what OS is this code for?

> +#ifndef FALSE
> +#define FALSE 0
> +#endif
> +
> +#ifndef TRUE
> +#define TRUE 1
> +#endif

No, don't add this, it's pointless.

> +#ifndef NULL
> +#define NULL 0
> +#endif

No, that will just break sparse. Why did you add this?

> +#define __sysfs_ref__

Why?

> +struct xen_sysfs_object;
> +
> +struct xen_sysfs_attr {
> + struct bin_attribute attr;
> + ssize_t (*show)(void *, char *) ;
> + ssize_t (*store)(void *, const char *, size_t) ;
> + ssize_t (*read)(void *, char *, loff_t, size_t );
> + ssize_t (*write)(void *, char *, loff_t, size_t) ;
> +};

Why a binary attribute? Do you want to have more than one single piece
of info in here? If so, no.

I'll stop here and say that you should use the internal-to-IBM code
review process, it would probably save you a lot of time in the
future...

thanks,

greg k-h

2006-01-12 00:23:58

by Mike D. Day

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

Greg KH wrote:

> Why is xen special from the rest of the kernel in regards to adding
> files to sysfs? What does your infrastructure add that is not currently
> already present for everyone to use today?

I think it comes down to simplification for non-driver code, which is
admittedly not the mainstream use model for sysfs.

>
> Why is the xen version any different from any other module or driver
> version in the kernel? (hint, use the interface that is availble for
> this already...)

The module version? Xen is not a module nor a driver, so that interface
doesn't quite serve the purpose. True, one could create a "Xen module"
that talks to Xen the hypervisor, but then the version interface would
provide the version of the xen module, not the version of the xen
hypervisor. /sys/xen/version may not be the best example for this
discussion. What is important is that this attribute is obtained from
Xen using a hypercall. Sysfs works great to prove the xen version and
other similar xen attributes to userspace.

>
> You have access to the current tree as well as we do to be able to
> answer this question :)

Right. Dumb question.

> You don't have to create a driver subsystem to be able to add stuff to
> sysfs, what makes you think that?

Sorry, you are right. But you do need to have s struct dev or use
kobjects. What I want is an interface to create sysfs files using a path
as a parameter, rather than a struct kobject.

> did you look at debugfs?

yes

> configfs?

no. configfs may be a better choice. I would still want a higher-level
kernel interface similar to what is in the patch, as explained below.
But I think sysfs may be more appropriate because attributes show up
automatically without a user-space action being taken.

> What is wrong with the current kobject/sysfs/driver model interface that
> made you want to create this extra code?

Nothing is wrong, but I want a higher-level interface, to be able to
create files and directories using a path, and to allow a code that is
not associated with a device to create sysfs files by specifying a path.
e.g., create(path, mode, ...).

Currently in xeno-linux there are several files under /proc/xen. These
are created by different areas of the xeno-linux kernel. In xeno-linux
today there is a single higher-level routine that each of these
different areas uses to create its own file under /proc/xen. In other
words, I think there should be a unifying element to the interface
because the callers are not organized within a single module.


> Aren't you already going to have a xen virtual bus in sysfs and the
> driver model? Why not just put your needed attributes there, where they
> belong (on the devices themselves)?

the xenbus, which is now in xen 3.0, allows kernels running in xen
domains to get access to virtual devices hosted in a driver
domain/domain0. But the attributes I am creating in /sys/xen are xen
attributes, not device attributes. The difference is important to
consumers of the attributes. I could create a device just to export
hypervisor attributes, but I think the what I've done is simpler.


>>+#define __sysfs_ref__
>
>
> Why?

A simple way to denote functions that get a reference to a reference
counted object. e.g., int __sysfs_ref__ foo(void); gone.

>
>
>>+struct xen_sysfs_object;
>>+
>>+struct xen_sysfs_attr {
>>+ struct bin_attribute attr;
>>+ ssize_t (*show)(void *, char *) ;
>>+ ssize_t (*store)(void *, const char *, size_t) ;
>>+ ssize_t (*read)(void *, char *, loff_t, size_t );
>>+ ssize_t (*write)(void *, char *, loff_t, size_t) ;
>>+};
>
>
> Why a binary attribute? Do you want to have more than one single piece
> of info in here? If so, no.

To facilitate creation of binary files. struct bin_attribute contains a
struct attribute, so it is an alternative to using a union.

Mike (hoping he doesn't end up on linux kernel monkey log)

--

Mike D. Day
STSM and Architect, Open Virtualization
IBM Linux Technology Center
[email protected]

2006-01-12 00:57:28

by Greg KH

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Wed, Jan 11, 2006 at 07:23:53PM -0500, Mike D. Day wrote:
> Greg KH wrote:
>
> >Why is xen special from the rest of the kernel in regards to adding
> >files to sysfs? What does your infrastructure add that is not currently
> >already present for everyone to use today?
>
> I think it comes down to simplification for non-driver code, which is
> admittedly not the mainstream use model for sysfs.

/sys/module/ is a pretty "mainstream use model for sysfs", right?

> >Why is the xen version any different from any other module or driver
> >version in the kernel? (hint, use the interface that is availble for
> >this already...)
>
> The module version? Xen is not a module nor a driver, so that interface
> doesn't quite serve the purpose.

Then it doesn't need a separate version, as it is the same as the main
kernel version, right? Just because your code is out-of-the-tree right
now, doesn't mean it will be that way forever (although based on the
lack of patches to alieve this, it might be...)

> True, one could create a "Xen module" that talks to Xen the
> hypervisor, but then the version interface would provide the version
> of the xen module, not the version of the xen hypervisor.

Huh? You can't just throw a "MODULE_VERSION()", and a module_init()
somewhere into the xen code to get this to happen? Then all of your
configurable paramaters show up automagically.

> /sys/xen/version may not be the best example for this discussion. What
> is important is that this attribute is obtained from Xen using a
> hypercall. Sysfs works great to prove the xen version and other
> similar xen attributes to userspace.

Like what? Specifics please.

> >You have access to the current tree as well as we do to be able to
> >answer this question :)
>
> Right. Dumb question.
>
> >You don't have to create a driver subsystem to be able to add stuff to
> >sysfs, what makes you think that?
>
> Sorry, you are right. But you do need to have s struct dev or use
> kobjects. What I want is an interface to create sysfs files using a path
> as a parameter, rather than a struct kobject.

So you want to divorce the relationship in sysfs between directories and
kobjects? That's a valid proposal, but just don't do it as a xen
specific thing please, that's being selfish.

> >did you look at debugfs?
>
> yes

And what is wrong with it?

> >configfs?
>
> no. configfs may be a better choice. I would still want a higher-level
> kernel interface similar to what is in the patch, as explained below.
> But I think sysfs may be more appropriate because attributes show up
> automatically without a user-space action being taken.

Fair enough.

> >What is wrong with the current kobject/sysfs/driver model interface that
> >made you want to create this extra code?
>
> Nothing is wrong, but I want a higher-level interface, to be able to
> create files and directories using a path, and to allow a code that is
> not associated with a device to create sysfs files by specifying a path.
> e.g., create(path, mode, ...).

See my comment above about this.

But I think you will fail in this, as we want to keep a very strict
heirachy in sysfs, as userspace relies on this. See the previous
proposal from Pat Mochel to try to do this (in the lkml archives) for
the problems when he tried to do so.

> Currently in xeno-linux there are several files under /proc/xen. These
> are created by different areas of the xeno-linux kernel. In xeno-linux
> today there is a single higher-level routine that each of these
> different areas uses to create its own file under /proc/xen. In other
> words, I think there should be a unifying element to the interface
> because the callers are not organized within a single module.

Ok, but again, that's no different than anything else in the kernel,
right?

> Mike (hoping he doesn't end up on linux kernel monkey log)

Well, you posted a patch in the wrong coding style, in a format that can
not be applied due to a broken email client setup, tried to do all of
the work in your own subsection of an external kernel tree that seems to
strongly avoid getting merged into mainline, ignored the existing kernel
interfaces, and didn't cc: the subsystem maintainer. Sounds like it
might be worth mentioning, don't you think? :)

thanks,

greg k-h

2006-01-12 01:32:57

by Dave Hansen

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Wed, 2006-01-11 at 19:23 -0500, Mike D. Day wrote:
> Greg KH wrote:
>
> > Why is xen special from the rest of the kernel in regards to adding
> > files to sysfs? What does your infrastructure add that is not currently
> > already present for everyone to use today?
>
> I think it comes down to simplification for non-driver code, which is
> admittedly not the mainstream use model for sysfs.

You might also want to take a good look at how things like ACPI do
exports in sysfs: in /sys/firmware. Not that ACPI is a good example of
_anything_ :), but that is probably more compliant with the current
model than your own /sys/xen.

Do you have a definitive list of things that you want to export? Are
they things that come and go, or are they static? Do you want hotplug
events for them? Some of those things may be better fit platform
devices. Notice that ACPI has entries in /sys/firmware/acpi
and /sys/devices/system/acpi.

-- Dave

2006-01-12 01:49:20

by Mike D. Day

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

Greg KH wrote:

>>I think it comes down to simplification for non-driver code, which is
>>admittedly not the mainstream use model for sysfs.
>
> /sys/module/ is a pretty "mainstream use model for sysfs", right?

Yes, but xen is not a module. I believe /sys/xen/ is different than
/sys/module/, and provide some further reasoning below.

>>The module version? Xen is not a module nor a driver, so that interface
>>doesn't quite serve the purpose.
>
> Then it doesn't need a separate version, as it is the same as the main
> kernel version, right? Just because your code is out-of-the-tree right

No. For example, I could run linux-2.6.x in a domain under xen 3.0.0. In
this case the xen version is 3.0.0, the linux version is 2.6.x. I could
run the very same kernel on xen 3.0.1, xen 3.1, and eventually xen
4.x.x. The xen version exists outside of the linux kernel version, but
userspace will have good reasons to want to know the xen version (think
management tools).
>
> Huh? You can't just throw a "MODULE_VERSION()", and a module_init()
> somewhere into the xen code to get this to happen? Then all of your
> configurable paramaters show up automagically.

No, I can't. Xen does not have modules. Xen loads and runs linux. I am
trying to make it simple for linux to represent xen attributes under
/sys/xen. This is analogous to a kernel module representing the kernel.
I know it is weird.

>>/sys/xen/version may not be the best example for this discussion. What
>>is important is that this attribute is obtained from Xen using a
>>hypercall. Sysfs works great to prove the xen version and other
>>similar xen attributes to userspace.
>
>
> Like what? Specifics please.

What privileges are granted to the kernel by xen - can the kernel
control real devices or just virtual ones. How many other domains
(virtual machines) are being hosted by xen? How much memory is available
for ballooning (increasing the memory used by kernels through the
remapping of pages inside the hypervisor). Can the domain be migrated to
another physical host? What scheduler is Xen using (xen has plug-in
schedulers)? All the actual information resides within the xen
hypervisor, not the linux kernel.

> So you want to divorce the relationship in sysfs between directories and
> kobjects?

Not quite, just hide the relationship for users of sysfs that have no
reason to know about it.

That's a valid proposal, but just don't do it as a xen
specific thing please, that's being selfish.

ok

> But I think you will fail in this, as we want to keep a very strict
> heirachy in sysfs, as userspace relies on this. See the previous
> proposal from Pat Mochel to try to do this (in the lkml archives) for
> the problems when he tried to do so.

Hence why I created this as a xeno-linux patch. I can control where the
sysfs files get created. For example, I check to make sure the path
starts with "/sys/xen." I don't want to interfere with keeping a strict
heirarchy.

>
>>Currently in xeno-linux there are several files under /proc/xen. These
>>are created by different areas of the xeno-linux kernel. In xeno-linux
>>today there is a single higher-level routine that each of these
>>different areas uses to create its own file under /proc/xen. In other
>>words, I think there should be a unifying element to the interface
>>because the callers are not organized within a single module.
>
> Ok, but again, that's no different than anything else in the kernel,
> right?

I think that it is different. The sysfs attributes are being created by
the kernel, not a driver or module. The attribute values themselves are
located in the xen hypervisor, which is totally outside of the kernel
and everything it controls.

> not be applied due to a broken email client setup, tried to do all of
> the work in your own subsection of an external kernel tree that seems to

I worked within the xen project hoping that the code might get applied
there and later merged.

> strongly avoid getting merged into mainline, ignored the existing kernel
> interfaces,

No, I didn't ignore them. I may be mistaken, but I believe this is a
different use model.

and didn't cc: the subsystem maintainer.

Sorry, will make certain to cc: the maintainer in the future.

Mike
--

Mike D. Day
STSM and Architect, Open Virtualization
IBM Linux Technology Center
[email protected]

2006-01-12 02:30:33

by Mark Williamson

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

Guys,

I think the waters are getting a bit muddied here regarding Xen (the
hypervisor, a separate project which boots natively on the hardware, not a
module or patch to Linux) vs. the Xen Patch to Linux (allowing i386 Linux to
run on top of that hypervisor's APIs).

> >>The module version? Xen is not a module nor a driver, so that interface
> >>doesn't quite serve the purpose.
> >
> > Then it doesn't need a separate version, as it is the same as the main
> > kernel version, right? Just because your code is out-of-the-tree right

> > Huh? You can't just throw a "MODULE_VERSION()", and a module_init()
> > somewhere into the xen code to get this to happen? Then all of your
> > configurable paramaters show up automagically.

To put it another way, when Mike referred to "Xen", he meant the hypervisor
itself, not part of the patch to Linux. The version attribute under /sys/xen
is therefore describing the version of the "virtual hardware" that's provided
by the Xen<->guest OS interface, not for describing / configuring the
Xen-aware portion of Linux itself.

(side note: Xen's quite like a CPU arch / extended hardware platform in some
ways, although it's kinda orthogonal to the particular hardware platform in
use. Mike - had you looked at how CPU entries are registered
in /sys/devices/system, for instance? anything there you could leverage?)

Cheers,
Mark


--
Dave: Just a question. What use is a unicyle with no seat? And no pedals!
Mark: To answer a question with a question: What use is a skateboard?
Dave: Skateboards have wheels.
Mark: My wheel has a wheel!

2006-01-12 07:11:07

by Greg KH

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Wed, Jan 11, 2006 at 08:49:16PM -0500, Mike D. Day wrote:
> Greg KH wrote:
>
> >>I think it comes down to simplification for non-driver code, which is
> >>admittedly not the mainstream use model for sysfs.
> >
> >/sys/module/ is a pretty "mainstream use model for sysfs", right?
>
> Yes, but xen is not a module. I believe /sys/xen/ is different than
> /sys/module/, and provide some further reasoning below.

My point was that the module core code itself is a portion of the kernel
that uses sysfs that is a "non-driver" bit of code. Thus proving that
you do not have to be a driver, or device, or bus, to use sysfs easily.

> >>The module version? Xen is not a module nor a driver, so that interface
> >>doesn't quite serve the purpose.
> >
> >Then it doesn't need a separate version, as it is the same as the main
> >kernel version, right? Just because your code is out-of-the-tree right
>
> No. For example, I could run linux-2.6.x in a domain under xen 3.0.0. In
> this case the xen version is 3.0.0, the linux version is 2.6.x. I could
> run the very same kernel on xen 3.0.1, xen 3.1, and eventually xen
> 4.x.x. The xen version exists outside of the linux kernel version, but
> userspace will have good reasons to want to know the xen version (think
> management tools).

Ok, thanks for explaining this better. That makes more sense.

What other, specific sysfs files are you going to want to create?
What is the hierarchy going to look like?
What is the contents of the file going to look like?

> >So you want to divorce the relationship in sysfs between directories and
> >kobjects?
>
> Not quite, just hide the relationship for users of sysfs that have no
> reason to know about it.

I think this is happening as you are trying to port your code that
currently uses /proc (and file names there) to use sysfs instead, right?
To do this correctly, you need to stop thinking about file names and
paths, and start thinking about the hierarchy and relationship between
the files, which will allow you to create a tree of kobjects easier.

If you answer the questions above, I think we can work to figure this
out.

> >>Currently in xeno-linux there are several files under /proc/xen. These
> >>are created by different areas of the xeno-linux kernel. In xeno-linux
> >>today there is a single higher-level routine that each of these
> >>different areas uses to create its own file under /proc/xen. In other
> >>words, I think there should be a unifying element to the interface
> >>because the callers are not organized within a single module.
> >
> >Ok, but again, that's no different than anything else in the kernel,
> >right?
>
> I think that it is different. The sysfs attributes are being created by
> the kernel, not a driver or module. The attribute values themselves are
> located in the xen hypervisor, which is totally outside of the kernel
> and everything it controls.

Just because you have to make a hypervisor call to get the value of the
attribute behind a sysfs file, does not make it any different from
anything else we currently have in sysfs. It just will make the race
conditions easier to hit in your code as I imagine you will be sleeping
in the show functions more often than other parts of the kernel :)

> >not be applied due to a broken email client setup, tried to do all of
> >the work in your own subsection of an external kernel tree that seems to
>
> I worked within the xen project hoping that the code might get applied
> there and later merged.

Well, for things like this, that interact with the rest of the kernel,
it's good to work with the kernel community too, as you are doing. I'm
happy to see this start to happen, hopefully other portions of Xen will
start tricking out this way also (same thing happened a few weeks ago
with some USB stuff.)

> >strongly avoid getting merged into mainline, ignored the existing kernel
> >interfaces,
>
> No, I didn't ignore them. I may be mistaken, but I believe this is a
> different use model.

I don't. You are creating a generalized wrapper around a globally
available kernel subsystem. Don't you think that others could want to
use it, or that it hasn't been created yet for some reason?

> Sorry, will make certain to cc: the maintainer in the future.

And also please fix your email client to post patches properly. I guess
I should be happy you didn't try to post them using Notes :)

thanks,

greg k-h

2006-01-12 09:10:27

by Dave Hansen

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Wed, 2006-01-11 at 20:49 -0500, Mike D. Day wrote:
> Greg KH wrote:
> >>/sys/xen/version may not be the best example for this discussion. What
> >>is important is that this attribute is obtained from Xen using a
> >>hypercall. Sysfs works great to prove the xen version and other
> >>similar xen attributes to userspace.
> >
> >
> > Like what? Specifics please.
>
> What privileges are granted to the kernel by xen - can the kernel
> control real devices or just virtual ones.

Why wouldn't this simply be transparent from what devices Linux detects?
If Linux doesn't detect any raw PCI devices, then it obviously doesn't
have access to any.

Why don't any other hypervisors need this?

> How many other domains
> (virtual machines) are being hosted by xen? How much memory is available
> for ballooning (increasing the memory used by kernels through the
> remapping of pages inside the hypervisor).

There are definitely things that are exceedingly helpful. However,
there are at least two other hypervisor-ish things that I can think of
which do the exact same kinds of things. Perhaps it would be helpful to
collaborate with them and produce a common interface. (uml, s390, maybe
some of the powerpc hypervisors)

> Can the domain be migrated to another physical host?
> What scheduler is Xen using (xen has plug-in
> schedulers)? All the actual information resides within the xen
> hypervisor, not the linux kernel.

Other than debugging and curiosity, why are these things needed?

-- Dave

2006-01-12 09:47:50

by Pavel Machek

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes


> The comments desired are (1) do the helper routines in xen_sysfs
> duplicate code already present in linux (or under development somewhere
> else). (2) Is it appropriate for a process to create sysfs attributes
> without implementing a driver subsystem

Not sure, maybe proc is really better.

> or (3) are such attributes
> better off living under /proc. (4) any other feedback is appreciated.

> --- a/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Tue Jan 10
> 17:53:44 2006
> +++ b/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Tue Jan 10
> 23:30:37 2006

Your mailer is evil and word-wraps patches.

> +#ifndef BOOL
> +#define BOOL int
> +#endif
> +
> +#ifndef FALSE
> +#define FALSE 0
> +#endif
> +
> +#ifndef TRUE
> +#define TRUE 1
> +#endif
> +
> +#ifndef NULL
> +#define NULL 0
> +#endif

Evil!
Pavel

> +{
> + struct xen_sysfs_object * xen_obj = to_xen_obj_bin(kobj);
> + if (xen_obj->attr.write)
> + return xen_obj->attr.write(xen_obj->user_data, buf,
> offset, size);
> + if(size == 0 )

CodingStyle...
> + .path = __stringify(/sys/xen),

Eh?

> + .list = LIST_HEAD_INIT(xen_root.list),
> + .children = LIST_HEAD_INIT(xen_root.children),
> + .parent = NULL,
> +};
> +
> +/* xen sysfs path functions */
> +
> +static BOOL
> +valid_chars(const char *path)
> +{
> + if( ! strstarts(path, "/sys/xen") )
> + return FALSE;
> + if(strstr(path, "//"))
> + return FALSE;
> + return (strspn(path,
> + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
> + "abcdefghijklmnopqrstuvwxyz"
> + "0123456789-/_@~$") == strlen(path));
> +}
> +
> +
> +/* return value must be kfree'd */
> +static char *
> +dup_path(const char *path)
> +{
> + char * ret;
> + int len;
> + BUG_ON( ! path );
> +
> + if( FALSE == valid_chars(path) ) {
> + return NULL;
> + }
> +
> + len = strlen(path) + 1;
> + ret = kcalloc(len - 1, sizeof(char), GFP_KERNEL);
> + memcpy(ret, path, len);
> + return ret;
> +}
> +
> +
> +
> +static char *
> +basename(const char *name)
> +{
> + return strrchr(name, '/') + 1;
> +}
> +
> +static char *
> +strip_trailing_slash(char * path)
> +{
> + int len = strlen(path);
> +
> + char * term = path + len - 1;
> + if( *term == '/')
> + *term = 0;
> + return path;
> +}
> +
> +/* return value must be kfree'd */
> +static char * dirname(const char * name)
> +{
> + char *ret;
> + int len;
> +
> + len = strlen(name) - strlen(basename(name)) + 1;
> + ret = kcalloc(len, sizeof(char), GFP_KERNEL);
> + memcpy(ret, name, len - 1);
> + ret = strip_trailing_slash(ret);
> +
> + return ret;
> +}
> +
> +
> +/* type must be char, bin, or dir */
> +static __sysfs_ref__ struct xen_sysfs_object *
> +new_sysfs_object(const char * path,
> + int type,
> + int mode,
> + ssize_t (*show)(void *, char *),
> + ssize_t (*store)(void *, const char *, size_t),
> + ssize_t (*read)(void *, char *, loff_t, size_t),
> + ssize_t (*write)(void *, char *, loff_t, size_t),
> + void * user_data,
> + void (* user_data_release)(void *))
> +{
> + struct xen_sysfs_object * ret =
> + (struct xen_sysfs_object *)kcalloc(sizeof(struct
> xen_sysfs_object),
> + sizeof(char),
> + GFP_KERNEL);

Unneeded cast AFAICT.

> + // if path is longer than obj-path, search children
> + if ( strstarts(path, obj->path) &&

Mo C++ comments please.
Pavel
--
Thanks, Sharp!

2006-01-12 09:58:54

by Keir Fraser

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes


On 12 Jan 2006, at 01:32, Dave Hansen wrote:

> Do you have a definitive list of things that you want to export? Are
> they things that come and go, or are they static? Do you want hotplug
> events for them? Some of those things may be better fit platform
> devices. Notice that ACPI has entries in /sys/firmware/acpi
> and /sys/devices/system/acpi.

This is a good set of questions. We have about half dozen files in
/proc/xen right now. One is an obvious canididate to stick in /dev, as
it has primarily an ioctl() interface. The remainder are static, are
read-only or read-write with ascii text, and we don't want hotplug
events and other baggage.

Maybe making these attributes of a Xen system device makes sense. Are
there examples in the kernel of other subsystems/modules with a similar
miscellaneous set of files?

-- Keir

2006-01-12 12:54:51

by Gerd Hoffmann

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

Hi,

>> Huh? You can't just throw a "MODULE_VERSION()", and a module_init()
>> somewhere into the xen code to get this to happen? Then all of your
>> configurable paramaters show up automagically.
>
> No, I can't. Xen does not have modules. Xen loads and runs linux.

You can. Just look at a recent drivers/xen/blkback/blkback.c, the
module parameters specified there show up in
/sys/module/blkback/parameters, no matter whenever the code was built
statically into the kernel or as module (which curently doesn't work for
blkback anyway ...).

Any read-only attributes can trivially be implemented that way. Simple
writable stuff (balloon driver?) probably too, I don't know whenever a
notify callback on parameter changes is possible though.

The current /proc files which are not simple attributes such as
/proc/xen/{privcmd,xenbus} are a bit more tricky, not sure what the best
approach for these is. privcmd returns a filehandle which is then used
for ioctls (misc char dev maybe?). xenbus can be opened and (I think)
read(2) on to listen for any xenbus activity, much like /proc/kmsg.
Suggestions what to use here instead of procfs? Or just leave it there?

cheers,

Gerd

--
Gerd 'just married' Hoffmann <[email protected]>
I'm the hacker formerly known as Gerd Knorr.

2006-01-12 13:21:41

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

> privcmd returns a filehandle which is then used
> for ioctls (misc char dev maybe?).


EWWWWWWWWWWWWWW

what is wrong with open() ?????
things that return fd's that aren't open() (or dup and socket) are just
evil. Esp if it's in proc or sysfs.


2006-01-12 14:42:34

by Gerd Hoffmann

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

Arjan van de Ven wrote:
>> privcmd returns a filehandle which is then used
>> for ioctls (misc char dev maybe?).
>
>
> EWWWWWWWWWWWWWW
>
> what is wrong with open() ?????
> things that return fd's that aren't open() (or dup and socket) are just
> evil. Esp if it's in proc or sysfs.

Nothing is wrong with open, but probably the sentense above is a bit too
short. If you call fd = open("/proc/xen/privcmd", ...) you'll get a
filehandle returned for the thingy (as usual) and then you'll use that
filehandle to call ioctl(fd, ...), so it's the usual unix way ...

cheers,

Gerd

--
Gerd 'just married' Hoffmann <[email protected]>
I'm the hacker formerly known as Gerd Knorr.

2006-01-12 14:44:45

by Mike D. Day

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

Greg KH wrote:
> What other, specific sysfs files are you going to want to create?
> What is the hierarchy going to look like?
> What is the contents of the file going to look like?

You make a very good point. We have not agreed on the heirarchy and file
contents, and we need to do so before continuing.
Some _very rough_ ideas include

/sys/xen/version/{major minor extra version build}
/sys/xen/domain/{dom0 dom1 ... domn} (each domain could be a dir. with
attributes)
/sys/xen/hypervisor/{scheduler cpu memory}
/sys/xen/migrate/{hosts_to, hosts_from}

These will be text files with simple attrributes. Most will be
read-only. It is kind of fun to think about creating a domain by doing
something like

cat $domain_config > /sys/xen/domain/new

but there are some ugly aspects of doing so. Likewise it would be good
to add a potential migration host by writing an ip address to
/sys/xen/migrate/hosts_to

Again, we need to get this solidified before going further.

>
> I think this is happening as you are trying to port your code that
> currently uses /proc (and file names there) to use sysfs instead, right?
> To do this correctly, you need to stop thinking about file names and
> paths, and start thinking about the hierarchy and relationship between
> the files, which will allow you to create a tree of kobjects easier.

yes

> If you answer the questions above, I think we can work to figure this
> out.

Excellent, we will work on doing so.

> I should be happy you didn't try to post them using Notes :)

Make that two of us :)
--

Mike D. Day
STSM and Architect, Open Virtualization
IBM Linux Technology Center
[email protected]

2006-01-12 14:52:20

by Mike D. Day

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

Dave Hansen wrote:
> On Wed, 2006-01-11 at 20:49 -0500, Mike D. Day wrote:
>
> There are definitely things that are exceedingly helpful. However,
> there are at least two other hypervisor-ish things that I can think of
> which do the exact same kinds of things. Perhaps it would be helpful to
> collaborate with them and produce a common interface. (uml, s390, maybe
> some of the powerpc hypervisors)

yes, that is a good idea.

>>Can the domain be migrated to another physical host?
>>What scheduler is Xen using (xen has plug-in
>>schedulers)? All the actual information resides within the xen
>>hypervisor, not the linux kernel.
>
> Other than debugging and curiosity, why are these things needed?

Debugging is always a good reason :) but I'm specifically thinking of
systems management tools, deployment of virtual machines, and migration.
All of these attributes are important for tools that manage, deploy, or
migrate.

thanks,

Mike


--

Mike D. Day
STSM and Architect, Open Virtualization
IBM Linux Technology Center
[email protected]

2006-01-12 15:06:58

by Mark Williamson

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

> You make a very good point. We have not agreed on the heirarchy and file
> contents, and we need to do so before continuing.
> Some _very rough_ ideas include
>
> /sys/xen/version/{major minor extra version build}
> /sys/xen/domain/{dom0 dom1 ... domn} (each domain could be a dir. with
> attributes)
> /sys/xen/hypervisor/{scheduler cpu memory}
> /sys/xen/migrate/{hosts_to, hosts_from}

It seems to me there are a number of distinct categories these attributes come
into:

* Xen virtual hardware info (more or less corresponds to what's in /proc now,
although proc also has the special xenbus and privcmd interface files)
- hypervisor version, etc
- capabilities of this domain (admin rights, physical hardware permissions,
etc)
- other stuff relating to the Xen in use, or the domain this virtual
machine is running in

* Xen management
- info relating to the underlying hardware
- global scheduler params
- activate / deactive Xen trace buffers, etc

* Domain management
- status regarding the domains in the system
- migration controls

I'd suggest that earlier items in the above list are more important to get
into sysfs, with the lower stuff being able to follow later. Hows about we
start with defining a structure for the stuff in the first (and maybe second)
bullet points above, and work from there?

> These will be text files with simple attrributes. Most will be
> read-only. It is kind of fun to think about creating a domain by doing
> something like
>
> cat $domain_config > /sys/xen/domain/new
>
> but there are some ugly aspects of doing so. Likewise it would be good
> to add a potential migration host by writing an ip address to
> /sys/xen/migrate/hosts_to
>
> Again, we need to get this solidified before going further.

Anthony (cc-ed) did a little work on implementing something like this using
FuSE to call the existing management interfaces we have for this
functionality. IIRC, it was mostly targetted at reading information about
running domains, but it seemed like a good level to implement these
higher-level controls in a virtual FS.

Cheers,
Mark
--
Dave: Just a question. What use is a unicyle with no seat? And no pedals!
Mark: To answer a question with a question: What use is a skateboard?
Dave: Skateboards have wheels.
Mark: My wheel has a wheel!

2006-01-12 15:14:35

by Dave Hansen

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

On Thu, 2006-01-12 at 10:04 +0000, Keir Fraser wrote:
> On 12 Jan 2006, at 01:32, Dave Hansen wrote:
>
> > Do you have a definitive list of things that you want to export? Are
> > they things that come and go, or are they static? Do you want hotplug
> > events for them? Some of those things may be better fit platform
> > devices. Notice that ACPI has entries in /sys/firmware/acpi
> > and /sys/devices/system/acpi.
>
> This is a good set of questions. We have about half dozen files in
> /proc/xen right now. One is an obvious canididate to stick in /dev, as
> it has primarily an ioctl() interface.

Actually, anything with an ioctl interface is probably a good cantidate
for a writable sysfs file. The basic idea is that we prefer something
in sysfs with a discrete, unique name. It also makes it a lot easier to
develop with because you can look at the values from scripts, and you
don't have to worry about synchronizing any headers.

So, what kind of ioctls are we talking about?

> The remainder are static, are
> read-only or read-write with ascii text, and we don't want hotplug
> events and other baggage.

There really isn't much "baggage" that a static file carries around.
All you have to do is omit filling in a function pointer. Hotplug
events are one of those "for free" things that you get.

> Maybe making these attributes of a Xen system device makes sense. Are
> there examples in the kernel of other subsystems/modules with a similar
> miscellaneous set of files?

drivers/base/memory.c has a couple of little things. A writable probe
file, and a somewhat miscellaneous file for representing the units that
are being dealt with. Might be a place to start. I can certainly
answer questions about it. I would also suggest just doing a "find" or
"tree" on a few of your systems and looking for people that do similar
things to what you want.

I know I struggled to get the memory hotplug sysfs code to work at
first, but the code has been very, very low maintenance.

-- Dave

2006-01-12 15:19:19

by Mark Williamson

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

> > This is a good set of questions. We have about half dozen files in
> > /proc/xen right now. One is an obvious canididate to stick in /dev, as
> > it has primarily an ioctl() interface.
>
> Actually, anything with an ioctl interface is probably a good cantidate
> for a writable sysfs file. The basic idea is that we prefer something
> in sysfs with a discrete, unique name. It also makes it a lot easier to
> develop with because you can look at the values from scripts, and you
> don't have to worry about synchronizing any headers.
>
> So, what kind of ioctls are we talking about?

The privcmd ioctl()s are used for userspace to invoke the Xen hypercalls for
management operations, they generally pass through an mlock'ed struct
describing the operation, for inspection by the hypervisor's management code.
I'd describe it as more like a "hypercall device" than anything else.

Cheers,
Mark

> > The remainder are static, are
> > read-only or read-write with ascii text, and we don't want hotplug
> > events and other baggage.
>
> There really isn't much "baggage" that a static file carries around.
> All you have to do is omit filling in a function pointer. Hotplug
> events are one of those "for free" things that you get.
>
> > Maybe making these attributes of a Xen system device makes sense. Are
> > there examples in the kernel of other subsystems/modules with a similar
> > miscellaneous set of files?
>
> drivers/base/memory.c has a couple of little things. A writable probe
> file, and a somewhat miscellaneous file for representing the units that
> are being dealt with. Might be a place to start. I can certainly
> answer questions about it. I would also suggest just doing a "find" or
> "tree" on a few of your systems and looking for people that do similar
> things to what you want.
>
> I know I struggled to get the memory hotplug sysfs code to work at
> first, but the code has been very, very low maintenance.
>
> -- Dave
>
>
> _______________________________________________
> Xen-devel mailing list
> [email protected]
> http://lists.xensource.com/xen-devel

--
Dave: Just a question. What use is a unicyle with no seat? And no pedals!
Mark: To answer a question with a question: What use is a skateboard?
Dave: Skateboards have wheels.
Mark: My wheel has a wheel!

2006-01-12 15:20:21

by Keir Fraser

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes


On 12 Jan 2006, at 15:14, Dave Hansen wrote:

>> This is a good set of questions. We have about half dozen files in
>> /proc/xen right now. One is an obvious canididate to stick in /dev, as
>> it has primarily an ioctl() interface.
>
> Actually, anything with an ioctl interface is probably a good cantidate
> for a writable sysfs file. The basic idea is that we prefer something
> in sysfs with a discrete, unique name. It also makes it a lot easier
> to
> develop with because you can look at the values from scripts, and you
> don't have to worry about synchronizing any headers.
>
> So, what kind of ioctls are we talking about?

They are pretty low level. e.g., pass-thru a raw hypercall direct to
xen, map another VM's pages into my address space (given a list of page
frames), etc. very strongly binary, and unlikely to be useful for
scripting.

-- Keir

2006-01-12 15:28:54

by Dave Hansen

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

On Thu, 2006-01-12 at 09:52 -0500, Mike D. Day wrote:
> Dave Hansen wrote:
> > On Wed, 2006-01-11 at 20:49 -0500, Mike D. Day wrote:
> >>Can the domain be migrated to another physical host?
> >>What scheduler is Xen using (xen has plug-in
> >>schedulers)? All the actual information resides within the xen
> >>hypervisor, not the linux kernel.
> >
> > Other than debugging and curiosity, why are these things needed?
>
> Debugging is always a good reason :) but I'm specifically thinking of
> systems management tools, deployment of virtual machines, and migration.
> All of these attributes are important for tools that manage, deploy, or
> migrate.

-ETOOMANYBUZZWORDS :)

One concern I have with this approach is that it is for things for which
a need is _anticipated_, instead of things that are actually needed. It
is awesome that this is being done in advance, but you have to be
careful not to throw the kitchen sink at the problem from the beginning.

Would a potential workload manager contact the individual Xen partitions
in order to get an overview of the entire machine? Why would it not
simply contact the controlling partition?

-- Dave

2006-01-12 15:37:41

by Dave Hansen

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

On Thu, 2006-01-12 at 15:26 +0000, Keir Fraser wrote:
> On 12 Jan 2006, at 15:14, Dave Hansen wrote:
>
> >> This is a good set of questions. We have about half dozen files in
> >> /proc/xen right now. One is an obvious canididate to stick in /dev, as
> >> it has primarily an ioctl() interface.
> >
> > Actually, anything with an ioctl interface is probably a good cantidate
> > for a writable sysfs file. The basic idea is that we prefer something
> > in sysfs with a discrete, unique name. It also makes it a lot easier
> > to
> > develop with because you can look at the values from scripts, and you
> > don't have to worry about synchronizing any headers.
> >
> > So, what kind of ioctls are we talking about?
>
> They are pretty low level. e.g., pass-thru a raw hypercall direct to
> xen, map another VM's pages into my address space (given a list of page
> frames), etc. very strongly binary, and unlikely to be useful for
> scripting.

The ppc64 hypervisor does something like this today in a couple of
places. It is kinda a mess. I think that putting a generic, binary
firmware interface leads to having a bit of a crutch. It basically lets
the userspace software stack bypass Linux and talk directly to the
hypervisor. It also means that you have to have a very specialized
software stack for each hypervisor or virtualization type, which is very
bad.

This pushes things out to userspace, which is generally good. But, it
is pushing behavior and "hardware" knowledge out there, too. The
hardware knowledge, especially, is something that we usually try to
encapsulate.

Also things like inter-partition page sharing, and partition migration
are used in other hypervisors. I think it is essential to get common
interfaces to those things.

One last thing... When you say "very strongly binary" do you mean, "are
implemented now as very strongly binary", or "absolutely 100% have to be
horribly strongly binary"? They are two quite different things. :)

-- Dave

2006-01-12 15:42:57

by Anthony Liguori

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

Mark Williamson wrote:

>>These will be text files with simple attrributes. Most will be
>>read-only. It is kind of fun to think about creating a domain by doing
>>something like
>>
>>cat $domain_config > /sys/xen/domain/new
>>
>>but there are some ugly aspects of doing so. Likewise it would be good
>>to add a potential migration host by writing an ip address to
>>/sys/xen/migrate/hosts_to
>>
>>Again, we need to get this solidified before going further.
>>
>>
>
>Anthony (cc-ed) did a little work on implementing something like this using
>FuSE to call the existing management interfaces we have for this
>functionality. IIRC, it was mostly targetted at reading information about
>running domains, but it seemed like a good level to implement these
>higher-level controls in a virtual FS.
>
>
Yeah, I like this idea but I agree that sysfs is not the right place for
it (it would requiring maintaining a kobject representation of domains
in the kernel which is going to be painful).

A custom Xen filesystem is definitely the right approach (and even
already exists :-)).

Regards,

Anthony Liguori

>Cheers,
>Mark
>
>

2006-01-12 15:49:10

by Anthony Liguori

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

Dave Hansen wrote:

>The ppc64 hypervisor does something like this today in a couple of
>places. It is kinda a mess. I think that putting a generic, binary
>firmware interface leads to having a bit of a crutch. It basically lets
>the userspace software stack bypass Linux and talk directly to the
>hypervisor. It also means that you have to have a very specialized
>software stack for each hypervisor or virtualization type, which is very
>bad.
>
>This pushes things out to userspace, which is generally good. But, it
>is pushing behavior and "hardware" knowledge out there, too. The
>hardware knowledge, especially, is something that we usually try to
>encapsulate.
>
>
The Xen virtual hardware is exposed in the normal way (there is a Xen
bus so Xen devices show up under that).

>Also things like inter-partition page sharing, and partition migration
>are used in other hypervisors. I think it is essential to get common
>interfaces to those things.
>
>
In very, very different ways though.

>One last thing... When you say "very strongly binary" do you mean, "are
>implemented now as very strongly binary", or "absolutely 100% have to be
>horribly strongly binary"? They are two quite different things. :)
>
>
To expose the hypercalls to userspace via sysfs (or another high level
interface) would require a whole bunch of complex code to encode the
hypercalls and decode there results. I'm not sure having a common
interface is a compelling argument to justify this kernel-level
complexity since one can just standardize on a userspace library
(something like http://www.libvir.org).

I do agree we need a common interface though...

Regards,

Anthony Liguori

>-- Dave
>
>-
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to [email protected]
>More majordomo info at http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at http://www.tux.org/lkml/
>
>
>
>

2006-01-12 15:50:48

by Mike D. Day

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

Dave Hansen wrote:

>>Debugging is always a good reason :) but I'm specifically thinking of
>>systems management tools, deployment of virtual machines, and migration.
>>All of these attributes are important for tools that manage, deploy, or
>>migrate.
>
>
> -ETOOMANYBUZZWORDS :)

How is this helping to reach consensus?

> One concern I have with this approach is that it is for things for which
> a need is _anticipated_, instead of things that are actually needed. It
> is awesome that this is being done in advance, but you have to be
> careful not to throw the kitchen sink at the problem from the beginning.

I've got 2 problems with this comment. First, these things are actually
needed. VMWare has tools that deploy, manage, and migrate virtual
machines, PHYP does as well (it can't migrate partitions). Xen really
needs tools that do the same. It would be best if these tools are
open-source themselves and use GPL'd code to get the necessary
information from xen. If you try to use xen I think you will agree.

My second problem with this comment the implication that anticipating
needs is bad. I understand your argument but disagree on the kitchen
sink comment.

> Would a potential workload manager contact the individual Xen partitions
> in order to get an overview of the entire machine? Why would it not
> simply contact the controlling partition?

Not sure what you mean by controlling partition. Xen doesn't have a
hardware management console like PHYP does. Xen management tools need to
run on a regular linux kernel that is running within a domain. Usually
this is the first domain created, aka domain 0.

So to answer your question, of course a "workload manager" would contact
domain 0. But how does domain 0 obtain an overview of the entire
machine so it can relay that info back to the "workload manager"? Domain
0 has to get the attributes from the hypervisor. How does it do that?
The best way is to read attributes from sysfs.


--

Mike D. Day
STSM and Architect, Open Virtualization
IBM Linux Technology Center
[email protected]

2006-01-12 15:57:56

by Anthony Liguori

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

Mike D. Day wrote:

> Greg KH wrote:
>
>> What other, specific sysfs files are you going to want to create?
>> What is the hierarchy going to look like?
>> What is the contents of the file going to look like?
>
>
> You make a very good point. We have not agreed on the heirarchy and
> file contents, and we need to do so before continuing.
> Some _very rough_ ideas include
>
> /sys/xen/version/{major minor extra version build}
> /sys/xen/domain/{dom0 dom1 ... domn} (each domain could be a dir. with
> attributes)

I don't think we want to expose domain specific information in sysfs.
Right now, domain lifecycle events are managed in userspace so
maintaining the kobject hierarchy here would be awkward at best.

> /sys/xen/hypervisor/{scheduler cpu memory}
> /sys/xen/migrate/{hosts_to, hosts_from}

Same thing as above with migration. Let's try to focus on the minimum
set of things we need to expose that we cannot expose else where. There
are other options (like FUSE) for providing a general filesystem
management interface that we can do entirely in userspace.

I think Gerd mentioned earlier that ballooning can be exposed via the
module interface--that's probably a good idea. The ioctl() interfaces
should probably be misc char drivers (especially since /dev/evtchn is
already).

Here's a list of the remaining things we current expose in /proc/xen
that have no obvious place:

1) capabilities (is the domain a management domain)
2) xsd_mfn (a frame number for our bus so that userspace can connect to it)
3) xsd_evtchn (a virtual IRQ for xen bus for userspace)

I would think these would most obviously go under something like:

/sys/hypervisor/xen/

That would introduce a hypervisor subsystem. There are at least a few
hypervisors out there already so this isn't that bad of an idea
(although perhaps it may belong somewhere else in the hierarchy). Greg?

Regards,

Anthony Liguori

> These will be text files with simple attrributes. Most will be
> read-only. It is kind of fun to think about creating a domain by doing
> something like
>
> cat $domain_config > /sys/xen/domain/new
>
> but there are some ugly aspects of doing so. Likewise it would be good
> to add a potential migration host by writing an ip address to
> /sys/xen/migrate/hosts_to
>
> Again, we need to get this solidified before going further.
>
>>
>> I think this is happening as you are trying to port your code that
>> currently uses /proc (and file names there) to use sysfs instead, right?
>> To do this correctly, you need to stop thinking about file names and
>> paths, and start thinking about the hierarchy and relationship between
>> the files, which will allow you to create a tree of kobjects easier.
>
>
> yes
>
>> If you answer the questions above, I think we can work to figure this
>> out.
>
>
> Excellent, we will work on doing so.
>
>> I should be happy you didn't try to post them using Notes :)
>
>
> Make that two of us :)


2006-01-12 17:39:42

by Greg KH

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Thu, Jan 12, 2006 at 01:54:46PM +0100, Gerd Hoffmann wrote:
> Hi,
>
> >>Huh? You can't just throw a "MODULE_VERSION()", and a module_init()
> >>somewhere into the xen code to get this to happen? Then all of your
> >>configurable paramaters show up automagically.
> >
> >No, I can't. Xen does not have modules. Xen loads and runs linux.
>
> You can. Just look at a recent drivers/xen/blkback/blkback.c, the
> module parameters specified there show up in
> /sys/module/blkback/parameters, no matter whenever the code was built
> statically into the kernel or as module (which curently doesn't work for
> blkback anyway ...).
>
> Any read-only attributes can trivially be implemented that way. Simple
> writable stuff (balloon driver?) probably too, I don't know whenever a
> notify callback on parameter changes is possible though.

Yes it is.

> The current /proc files which are not simple attributes such as
> /proc/xen/{privcmd,xenbus} are a bit more tricky, not sure what the best
> approach for these is. privcmd returns a filehandle which is then used
> for ioctls (misc char dev maybe?). xenbus can be opened and (I think)
> read(2) on to listen for any xenbus activity, much like /proc/kmsg.
> Suggestions what to use here instead of procfs? Or just leave it there?

Your own filesystem? You can do that in about 200 lines of code these
days :)

And no, it does not belong in procfs.

thanks,

greg k-h

2006-01-12 17:39:50

by Greg KH

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Thu, Jan 12, 2006 at 03:42:20PM +0100, Gerd Hoffmann wrote:
> Arjan van de Ven wrote:
> >> privcmd returns a filehandle which is then used
> >>for ioctls (misc char dev maybe?).
> >
> >
> >EWWWWWWWWWWWWWW
> >
> >what is wrong with open() ?????
> >things that return fd's that aren't open() (or dup and socket) are just
> >evil. Esp if it's in proc or sysfs.
>
> Nothing is wrong with open, but probably the sentense above is a bit too
> short. If you call fd = open("/proc/xen/privcmd", ...) you'll get a
> filehandle returned for the thingy (as usual) and then you'll use that
> filehandle to call ioctl(fd, ...), so it's the usual unix way ...

Sounds like a normal filesystem, please don't abuse proc for this.

What exactly do the different ioctls do? Do they have to be ioctls?
Can you use configfs or sysfs for most of the stuff there?

thanks,

greg k-h

2006-01-12 17:40:15

by Greg KH

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

On Thu, Jan 12, 2006 at 09:57:50AM -0600, Anthony Liguori wrote:
> Here's a list of the remaining things we current expose in /proc/xen
> that have no obvious place:
>
> 1) capabilities (is the domain a management domain)

Is this just a single value or a bitfield?

> 2) xsd_mfn (a frame number for our bus so that userspace can connect to it)

Single number, right?

> 3) xsd_evtchn (a virtual IRQ for xen bus for userspace)

Again, single number?

> I would think these would most obviously go under something like:
>
> /sys/hypervisor/xen/
>
> That would introduce a hypervisor subsystem. There are at least a few
> hypervisors out there already so this isn't that bad of an idea
> (although perhaps it may belong somewhere else in the hierarchy). Greg?

I would have no problem with /sys/hypervisor/xen/ as long as you play by
the rest of the rules for sysfs (one value per file, no binary blobs
being intrepreted by the kernel, etc.)

thanks,

greg k-h

2006-01-12 17:44:14

by Greg KH

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

On Thu, Jan 12, 2006 at 09:44:38AM -0500, Mike D. Day wrote:
> Greg KH wrote:
> >What other, specific sysfs files are you going to want to create?
> >What is the hierarchy going to look like?
> >What is the contents of the file going to look like?
>
> You make a very good point. We have not agreed on the heirarchy and file
> contents, and we need to do so before continuing.
> Some _very rough_ ideas include
>
> /sys/xen/version/{major minor extra version build}
> /sys/xen/domain/{dom0 dom1 ... domn} (each domain could be a dir. with
> attributes)
> /sys/xen/hypervisor/{scheduler cpu memory}
> /sys/xen/migrate/{hosts_to, hosts_from}
>
> These will be text files with simple attrributes. Most will be
> read-only. It is kind of fun to think about creating a domain by doing
> something like
>
> cat $domain_config > /sys/xen/domain/new
>
> but there are some ugly aspects of doing so. Likewise it would be good
> to add a potential migration host by writing an ip address to
> /sys/xen/migrate/hosts_to
>
> Again, we need to get this solidified before going further.

Ok, feel free to come back when you get this information sorted out.

thanks,

greg k-h

2006-01-12 18:44:38

by Anthony Liguori

[permalink] [raw]
Subject: Re: [Xen-devel] Re: [RFC] [PATCH] sysfs support for Xen attributes

Greg KH wrote:

>On Thu, Jan 12, 2006 at 09:57:50AM -0600, Anthony Liguori wrote:
>
>
>>Here's a list of the remaining things we current expose in /proc/xen
>>that have no obvious place:
>>
>>1) capabilities (is the domain a management domain)
>>
>>
>
>Is this just a single value or a bitfield?
>
>
Right now it's a string that identifies the type of partition is it (for
instance, "control_d" for the control domain).

>>2) xsd_mfn (a frame number for our bus so that userspace can connect to it)
>>
>>
>
>Single number, right?
>
>
Yup.

>>3) xsd_evtchn (a virtual IRQ for xen bus for userspace)
>>
>>
>
>Again, single number?
>
>
Yup.

>>I would think these would most obviously go under something like:
>>
>>/sys/hypervisor/xen/
>>
>>That would introduce a hypervisor subsystem. There are at least a few
>>hypervisors out there already so this isn't that bad of an idea
>>(although perhaps it may belong somewhere else in the hierarchy). Greg?
>>
>>
>
>I would have no problem with /sys/hypervisor/xen/ as long as you play by
>the rest of the rules for sysfs (one value per file, no binary blobs
>being intrepreted by the kernel, etc.)
>
>
Great, thanks!

Regards,

Anthony Liguori

>thanks,
>
>greg k-h
>
>
>

2006-01-12 18:53:49

by Anthony Liguori

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

Greg KH wrote:

>What exactly do the different ioctls do? Do they have to be ioctls?
>Can you use configfs or sysfs for most of the stuff there?
>
>
The canonical example is /proc/xen/privcmd which is our userspace
hypercall interface. A hypercall is software interrupt with a number of
parameters passed via registers. This has to come from ring 1 for
security reasons (the kernel is running in ring 1).

We wish to make management hypercalls as the root user in userspace
which means we have to go through the kernel. Currently, we do this by
having /proc/xen/privcmd accept an ioctl() that takes a structure that
describe the register arguments. The kernel interface allows us to
control who in userspace can execute hypercalls.

It would perhaps be possible to use a read/write interface for
hypercalls but ioctl() seems a little less awkward. Suggestions are
certainly appreciated though.

Right now, I think a misc char device with an ioctl() interface seems
like the most promising way to do this. This doesn't seem like the sort
of think one would want to expose in sysfs...

Regards,

Anthony Liguori

>thanks,
>
>greg k-h
>-
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to [email protected]
>More majordomo info at http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at http://www.tux.org/lkml/
>
>
>
>

2006-01-12 18:55:25

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Thu, 2006-01-12 at 12:53 -0600, Anthony Liguori wrote:
>
> We wish to make management hypercalls as the root user in userspace
> which means we have to go through the kernel. Currently, we do this
> by
> having /proc/xen/privcmd accept an ioctl() that takes a structure
> that
> describe the register arguments. The kernel interface allows us to
> control who in userspace can execute hypercalls.

ioctls on proc is evil though (so is ioctl-on-sysfs). It's a device not
a proc file!


2006-01-12 18:59:31

by Anthony Liguori

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

Arjan van de Ven wrote:

>On Thu, 2006-01-12 at 12:53 -0600, Anthony Liguori wrote:
>
>
>>We wish to make management hypercalls as the root user in userspace
>>which means we have to go through the kernel. Currently, we do this
>>by
>>having /proc/xen/privcmd accept an ioctl() that takes a structure
>>that
>>describe the register arguments. The kernel interface allows us to
>>control who in userspace can execute hypercalls.
>>
>>
>
>ioctls on proc is evil though (so is ioctl-on-sysfs). It's a device not
>a proc file!
>
>
I full heartedly agree with you :-)

Regards,

Anthony Liguori

2006-01-12 19:02:49

by Greg KH

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Thu, Jan 12, 2006 at 12:31:18AM +0100, Pavel Machek wrote:
>
> > The comments desired are (1) do the helper routines in xen_sysfs
> > duplicate code already present in linux (or under development somewhere
> > else). (2) Is it appropriate for a process to create sysfs attributes
> > without implementing a driver subsystem
>
> Not sure, maybe proc is really better.

NO!

{sigh}

Please remember, proc is ONLY FOR PROCESS RELATED THINGS. Do not add
non-process related things to proc anymore please...

thanks,

greg k-h

2006-01-12 19:09:13

by Greg KH

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Thu, Jan 12, 2006 at 12:53:40PM -0600, Anthony Liguori wrote:
> Greg KH wrote:
>
> >What exactly do the different ioctls do? Do they have to be ioctls?
> >Can you use configfs or sysfs for most of the stuff there?
> >
> >
> The canonical example is /proc/xen/privcmd which is our userspace
> hypercall interface. A hypercall is software interrupt with a number of
> parameters passed via registers. This has to come from ring 1 for
> security reasons (the kernel is running in ring 1).
>
> We wish to make management hypercalls as the root user in userspace
> which means we have to go through the kernel. Currently, we do this by
> having /proc/xen/privcmd accept an ioctl() that takes a structure that
> describe the register arguments. The kernel interface allows us to
> control who in userspace can execute hypercalls.
>
> It would perhaps be possible to use a read/write interface for
> hypercalls but ioctl() seems a little less awkward. Suggestions are
> certainly appreciated though.
>
> Right now, I think a misc char device with an ioctl() interface seems
> like the most promising way to do this. This doesn't seem like the sort
> of think one would want to expose in sysfs...

ick ick ick.

Why not do the same thing that the Cell developers did for their
"special syscalls"? Or at the least, make it a "real" syscall like the
ppc64 developers did. It's not like there isn't a whole bunch of "prior
art" in the kernel today that you should be ignoring.

Please don't abuse /proc with ioctls like that.

And if you tried to do that with sysfs...

thanks,

greg k-h

2006-01-12 19:11:16

by Mike D. Day

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

Anthony Liguori wrote:
> Arjan van de Ven wrote:
>
>> On Thu, 2006-01-12 at 12:53 -0600, Anthony Liguori wrote:
>>
>>
>>> We wish to make management hypercalls as the root user in userspace
>>> which means we have to go through the kernel. Currently, we do this
>>> by having /proc/xen/privcmd accept an ioctl() that takes a structure
>>> that describe the register arguments. The kernel interface allows us
>>> to control who in userspace can execute hypercalls.
>>>
>>
>> ioctls on proc is evil though (so is ioctl-on-sysfs). It's a device not
>> a proc file!
>>
>>
> I full heartedly agree with you :-)

What about making hypercalls via with a read/write interface into memory
mapped by a char device? Any problems with that approach?

Mike



--

Mike D. Day
STSM and Architect, Open Virtualization
IBM Linux Technology Center
[email protected]

2006-01-12 19:18:24

by Mike D. Day

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

Greg KH wrote:

>
> Why not do the same thing that the Cell developers did for their
> "special syscalls"? Or at the least, make it a "real" syscall like the
> ppc64 developers did. It's not like there isn't a whole bunch of "prior
> art" in the kernel today that you should be ignoring.

A hypercall syscall would be good in a lot of ways. For x86/x86_64 there
are multiple hypervisors so we would need to make the syscall general
enough to support more than one hypervisor.

Mike
--

Mike D. Day
STSM and Architect, Open Virtualization
IBM Linux Technology Center
[email protected]

2006-01-12 19:31:42

by Greg KH

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Thu, Jan 12, 2006 at 02:18:16PM -0500, Mike D. Day wrote:
> Greg KH wrote:
>
> >
> >Why not do the same thing that the Cell developers did for their
> >"special syscalls"? Or at the least, make it a "real" syscall like the
> >ppc64 developers did. It's not like there isn't a whole bunch of "prior
> >art" in the kernel today that you should be ignoring.
>
> A hypercall syscall would be good in a lot of ways. For x86/x86_64 there
> are multiple hypervisors so we would need to make the syscall general
> enough to support more than one hypervisor.

Why? What's wrong with one syscall per hypervisor? It's not like we
have a problem with adding 3 syscalls vs. 1. Let the other hypervisors
also ask for a new syscall when they get added to the kernel tree.

And this also will let the kernel community monitor what you do with
that syscall more carefully (i.e. you only better use it for
pass-through hypervisor stuff, and not as a general multiplexor for
other things...)

thanks,

greg k-h

2006-01-12 19:31:42

by Greg KH

[permalink] [raw]
Subject: Re: [RFC] [PATCH] sysfs support for Xen attributes

On Thu, Jan 12, 2006 at 02:11:10PM -0500, Mike D. Day wrote:
> Anthony Liguori wrote:
> >Arjan van de Ven wrote:
> >
> >>On Thu, 2006-01-12 at 12:53 -0600, Anthony Liguori wrote:
> >>
> >>
> >>>We wish to make management hypercalls as the root user in userspace
> >>>which means we have to go through the kernel. Currently, we do this
> >>>by having /proc/xen/privcmd accept an ioctl() that takes a structure
> >>>that describe the register arguments. The kernel interface allows us
> >>>to control who in userspace can execute hypercalls.
> >>>
> >>
> >>ioctls on proc is evil though (so is ioctl-on-sysfs). It's a device not
> >>a proc file!
> >>
> >>
> >I full heartedly agree with you :-)
>
> What about making hypercalls via with a read/write interface into memory
> mapped by a char device? Any problems with that approach?

Yes, just make it a syscall as our other sub-thread details.

thanks,

greg k-h