[Sorry: been wanting to use that phrase here for the longest time]
This proof-of-concept implementation is pretty poor but the
principle (and interface) is simple:
int my_foo;
static int __init initfn(void)
{
return proc("net", "foo", my_foo, int, 0644);
}
static void __exit exitfn(void)
{
unproc("net", "foo");
}
No kernel-formatted tables: use a directory. (eg. kernel symbols
become a directory of symbol names, each containing the symbol value).
For cases when you don't want to take the overhead of creating a new
proc entry (eg. tcp socket creation), you can create directories on
demand when a user reads them using:
proc_dir("net", "subdir", dirfunc, NULL);
unproc_dir("net", "subdir");
Note that with kbuild 2.5, you can do something like:
proc(KBUILD_OBJECT, "foo", my_foo, int, 0644);
And with my previous parameter patch:
PARAM(foo, int, 0444);
declares a boot time parameter "KBUILD_OBJECT.foo=", or a module
parameter "foo=", and places it as readable in
/proc/KBUILD_OBJECT/foo.
I believe that rewriting /proc (and /proc/sys should simply die) is a
better solution than extending the interface, or avoiding it
altogether by using a new filesystem.
Of course, I don't care if it's *NOT* under /proc...
Rusty.
--
Premature optmztion is rt of all evl. --DK
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.4.13-uml/include/linux/simpleproc.h working-2.4.13-uml-proc/include/linux/simpleproc.h
--- linux-2.4.13-uml/include/linux/simpleproc.h Thu Jan 1 10:00:00 1970
+++ working-2.4.13-uml-proc/include/linux/simpleproc.h Thu Nov 1 20:17:42 2001
@@ -0,0 +1,206 @@
+/* Dynamic proc filesystem that doesn't suck. (C) 2001 Rusty Russell. */
+#ifndef _LINUX_SIMPLE_PROC_H
+#define _LINUX_SIMPLE_PROC_H
+#include <linux/config.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/stat.h>
+
+/* Commit the contents of this (NUL-terminated) buffer if possible.
+ -errno indicates error. */
+typedef int (proc_commitfn_t)(const char *dirname,
+ const char *filename,
+ const char *buffer,
+ unsigned int size,
+ void *arg);
+/* Fetch the contents into buffer: return size used (or needed), or
+ -errno. */
+typedef int (proc_fetchfn_t)(const char *dirname,
+ const char *filename,
+ char *buffer,
+ unsigned int size,
+ void *arg);
+
+/* If we're a dynamic directory, this routine gets dir contents:
+ returns size used (or needed), or -errno. */
+struct proc_dircontents;
+typedef int (proc_dirfn_t)(const char *dirname,
+ const char *filename,
+ struct proc_dircontents *buffer,
+ unsigned int maxlen,
+ void *arg);
+
+/* Register a proc entry of the given type. */
+#define proc(dir, fname, var, type, perms) \
+ __proc(dir, fname, S_IFREG|(perms), \
+ __new_proc(&var, \
+ ((perms)&S_IRUGO) ? proc_fetch_##type : NULL, \
+ ((perms)&S_IWUGO) ? proc_commit_##type : NULL, \
+ NULL))
+
+/* Register a proc entry protected by a spinlock. */
+#define proc_spinlock(dir, fname, var, type, lock, p) \
+ __proc(dir, fname, S_IFREG|(p), \
+ __new_proc_lock(&var, lock, \
+ ((p)&S_IRUGO) ? proc_fetch_##type : NULL, \
+ ((p)&S_IWUGO) ? proc_commit_##type : NULL))
+
+/* Register a proc entry protected by a semaphore. */
+#define proc_sem(dir, fname, var, type, sem, p) \
+ __proc(dir, fname, S_IFREG|(p), \
+ __new_proc_sem(&var, sem, \
+ ((p)&S_IRUGO) ? proc_fetch_##type : NULL, \
+ ((p)&S_IWUGO) ? proc_commit_##type : NULL))
+
+/* These exist, believe me */
+struct semaphore;
+struct proc_data;
+
+#ifdef CONFIG_SIMPLE_PROC_FS
+/* Low level functions */
+int __proc(const char *dirname, const char *fname, int mode,
+ struct proc_data *pdata);
+struct proc_data *__new_proc(void *arg, proc_fetchfn_t *, proc_commitfn_t *,
+ proc_dirfn_t *);
+struct proc_data *__new_proc_lock(void *arg, spinlock_t *lock,
+ proc_fetchfn_t *, proc_commitfn_t *);
+struct proc_data *__new_proc_sem(void *arg, struct semaphore *sem,
+ proc_fetchfn_t *, proc_commitfn_t *);
+
+/* Register a whole dynamic directory */
+static inline int proc_dir(const char *dir, const char *dirname,
+ proc_dirfn_t *dirfunc, void *arg)
+{
+ return __proc(dir, dirname, S_IFDIR|0555,
+ __new_proc(arg, NULL, NULL, dirfunc));
+}
+
+/* Release a dynamic proc directory */
+void unproc_dir(const char *dir, const char *fname);
+
+/* Release a proc entry */
+void unproc(const char *dir, const char *fname);
+
+#else
+static inline int proc_dir(const char *dir, const char *dirname,
+ proc_dirfn_t *dirfunc, void *arg)
+{
+ return 0;
+}
+
+static inline void unproc(const char *dir, const char *fname)
+{
+}
+
+static inline void unproc_dir(const char *dir, const char *fname)
+{
+}
+
+static inline int __proc(const char *dirname, const char *fname, int mode,
+ struct proc_data *pdata)
+{
+ return 0;
+}
+
+struct proc_data *__new_proc(void *arg,
+ proc_fetchfn_t *fetch,
+ proc_commitfn_t *commit,
+ proc_dirfn_t *dir)
+{
+ return (struct proc_data *)-1;
+}
+struct proc_data *__new_proc_lock(void *arg, spinlock_t *lock,
+ proc_fetchfn_t *fetch,
+ proc_commitfn_t *commit)
+{
+ return (struct proc_data *)-1;
+}
+struct proc_data *__new_proc_sem(void *arg, struct semaphore *sem,
+ proc_fetchfn_t *fetch,
+ proc_commitfn_t *commit)
+{
+ return (struct proc_data *)-1;
+}
+#endif /*CONFIG_PROC_FS*/
+
+/* Helper parsing routines. You can write your own, too. */
+proc_fetchfn_t proc_fetch_short;
+proc_fetchfn_t proc_fetch_ushort;
+proc_fetchfn_t proc_fetch_int;
+proc_fetchfn_t proc_fetch_uint;
+proc_fetchfn_t proc_fetch_long;
+proc_fetchfn_t proc_fetch_ulong;
+proc_fetchfn_t proc_fetch_bool;
+
+proc_commitfn_t proc_commit_short;
+proc_commitfn_t proc_commit_ushort;
+proc_commitfn_t proc_commit_int;
+proc_commitfn_t proc_commit_uint;
+proc_commitfn_t proc_commit_long;
+proc_commitfn_t proc_commit_ulong;
+proc_commitfn_t proc_commit_bool;
+
+/* Filled in by dir functions */
+struct proc_dircontents
+{
+ /* Mode of file. 0 terminates list. */
+ int mode;
+
+ /* Fetch, commit and dir functions for entry. */
+ proc_fetchfn_t *fetch;
+ proc_commitfn_t *commit;
+ proc_dirfn_t *dir;
+
+ /* Arg */
+ void *arg;
+
+ /* Name is nul-terminated, and padded to alignof this struct */
+ char name[0];
+};
+
+/* Helper to add another dircontents to the list, return updated "used" */
+static inline unsigned int proc_add_dircontents(struct proc_dircontents *pd,
+ unsigned int used,
+ unsigned int maxlen,
+ int mode,
+ proc_fetchfn_t *fetch,
+ proc_commitfn_t *commit,
+ proc_dirfn_t *dir,
+ void *arg,
+ const char *name)
+{
+ unsigned int thislen;
+
+ thislen = sizeof(*pd) + strlen(name) + 1;
+ thislen = (thislen + __alignof__(*pd) - 1) & ~(__alignof__(*pd) - 1);
+ if (used + thislen <= maxlen) {
+ pd = (void *)pd + used;
+ pd->mode = mode;
+ pd->fetch = fetch;
+ pd->commit = commit;
+ pd->dir = dir;
+ pd->arg = arg;
+ strcpy(pd->name, name);
+ }
+ return used + thislen;
+}
+
+static inline unsigned int proc_end_dircontents(struct proc_dircontents *pd,
+ unsigned int used,
+ unsigned int maxlen)
+{
+ return proc_add_dircontents(pd, used, maxlen, 0,
+ NULL, NULL, NULL, NULL, "");
+}
+
+/* Internal use */
+struct proc_data
+{
+ /* User-defined argument for routines */
+ void *arg;
+
+ proc_dirfn_t *dir;
+ proc_commitfn_t *commit;
+ proc_fetchfn_t *fetch;
+};
+#endif /* _LINUX_SIMPLE_PROC_H */
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.4.13-uml/fs/Config.in working-2.4.13-uml-proc/fs/Config.in
--- linux-2.4.13-uml/fs/Config.in Thu Oct 25 11:29:49 2001
+++ working-2.4.13-uml-proc/fs/Config.in Tue Oct 30 12:47:11 2001
@@ -50,7 +50,10 @@
tristate 'OS/2 HPFS file system support' CONFIG_HPFS_FS
-bool '/proc file system support' CONFIG_PROC_FS
+bool 'Simple /proc file system support (EXPERIMENTAL)' CONFIG_SIMPLE_PROC_FS
+if [ "$CONFIG_SIMPLE_PROC_FS" != y ]; then
+ bool '/proc file system support' CONFIG_PROC_FS
+fi
dep_bool '/dev file system support (EXPERIMENTAL)' CONFIG_DEVFS_FS $CONFIG_EXPERIMENTAL
dep_bool ' Automatically mount at boot' CONFIG_DEVFS_MOUNT $CONFIG_DEVFS_FS
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.4.13-uml/fs/Makefile working-2.4.13-uml-proc/fs/Makefile
--- linux-2.4.13-uml/fs/Makefile Thu Oct 25 11:29:49 2001
+++ working-2.4.13-uml-proc/fs/Makefile Tue Oct 30 12:47:11 2001
@@ -23,6 +23,7 @@
endif
subdir-$(CONFIG_PROC_FS) += proc
+subdir-$(CONFIG_SIMPLE_PROC_FS) += simpleproc
subdir-y += partitions
# Do not add any filesystems before this line
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.4.13-uml/fs/proc/simple_proc.c working-2.4.13-uml-proc/fs/proc/simple_proc.c
--- linux-2.4.13-uml/fs/proc/simple_proc.c Thu Jan 1 10:00:00 1970
+++ working-2.4.13-uml-proc/fs/proc/simple_proc.c Tue Oct 30 12:47:11 2001
@@ -0,0 +1,517 @@
+/* Those of you who read this, give quiet thanks that you did not
+ suffer the endless frustration of dealing with the old /proc
+ interface.
+
+ Copyright (C) 2001 Rusty Russell.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+#include <linux/proc.h>
+#include <linux/proc_fs.h>
+#include <asm/uaccess.h>
+
+/* Simplistic approach: semaphore protects all proc accesses. */
+static DECLARE_MUTEX(simple_proc_sem);
+
+/* FIXME: Use reference counts and "dead" marker to return -ENOENT if
+ unregistered while open -RR */
+struct proc_data
+{
+ void *arg;
+ int (*get)(void *, char *, int);
+ int (*set)(void *, const char *);
+
+ /* FIXME: Belongs in struct file --RR */
+ int readlen, maxreadlen, writelen, maxwritelen;
+ char *readdata, *writedata;
+};
+
+static int fill_buffer(char **buffer,
+ int *maxlen,
+ struct proc_data *pdata)
+{
+ int len;
+
+ for (;;) {
+ len = pdata->get(pdata->arg, *buffer, *maxlen);
+ /* Need more room? */
+ if (len > *maxlen) {
+ /* We need some restriction here, to avoid
+ DoS. fs/proc/generic.c wants this, but we
+ should make one or two pages eventually. */
+ if (len > PAGE_SIZE - 1024)
+ BUG();
+ kfree(*buffer);
+ *buffer = kmalloc(len, GFP_KERNEL);
+ if (!*buffer) return -ENOMEM;
+ *maxlen = len;
+ } else
+ return len;
+ }
+}
+
+/* FIXME: Get the struct file, and we can use ->private_data to store
+ this per file descriptor, rather than per file --RR */
+static int simple_read(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct proc_data *pdata = data;
+ int ret;
+
+ /* Start of read? Get fresh buffer */
+ if (off == 0) {
+ int readlen, maxreadlen;
+ char *buffer;
+
+ maxreadlen = pdata->maxreadlen;
+ buffer = kmalloc(maxreadlen, GFP_KERNEL);
+ if (!buffer) {
+ *eof = 1;
+ return -ENOMEM;
+ }
+ readlen = fill_buffer(&buffer, &maxreadlen, pdata);
+ if (readlen < 0) {
+ *eof = 1;
+ kfree(buffer);
+ return readlen;
+ }
+
+ /* Substitute buffer */
+ if (down_interruptible(&simple_proc_sem) != 0) {
+ *eof = 1;
+ kfree(buffer);
+ return -EINTR;
+ }
+ kfree(pdata->readdata);
+ pdata->maxreadlen = maxreadlen;
+ pdata->readlen = readlen;
+ pdata->readdata = buffer;
+ up(&simple_proc_sem);
+ }
+
+ /* Serve from buffer */
+ if (down_interruptible(&simple_proc_sem) != 0) {
+ *eof = 1;
+ return -EINTR;
+ }
+
+ if (off <= pdata->readlen) {
+ ret = pdata->readlen - off;
+ memcpy(page + off, pdata->readdata + off, ret);
+ } else {
+ *eof = 1;
+ ret = 0;
+ }
+ up(&simple_proc_sem);
+
+ return ret;
+}
+
+/* FIXME: Don't share the write buffer: use file->private_data */
+static int simple_write(struct file *file,
+ const char *userbuffer,
+ unsigned long count,
+ void *data)
+{
+ struct proc_data *pdata = data;
+
+ /* FIXME: commit the write(s) on close or seek. We don't have
+ that control under the current proc system, so simply
+ terminate on \n. --RR */
+ if (file->f_pos + count > pdata->maxwritelen) {
+ char *newbuffer;
+ int newmax = file->f_pos + count;
+
+ /* As in read, we need some limit, and this is from
+ fs/proc/generic.c */
+ if (newmax > PAGE_SIZE - 1024)
+ return -ENOSPC;
+
+ newbuffer = kmalloc(newmax, GFP_KERNEL);
+ if (!newbuffer)
+ return -ENOMEM;
+
+ /* Substitute buffer */
+ if (down_interruptible(&simple_proc_sem) != 0) {
+ kfree(newbuffer);
+ return -EINTR;
+ }
+ memcpy(newbuffer, pdata->writedata, pdata->writelen);
+ kfree(pdata->writedata);
+ pdata->maxwritelen = newmax;
+ pdata->writedata = newbuffer;
+ up(&simple_proc_sem);
+ }
+
+ /* Copy into buffer */
+ if (down_interruptible(&simple_proc_sem) != 0)
+ return -EINTR;
+
+ if (copy_from_user(pdata->writedata+file->f_pos, userbuffer, count)
+ != 0) {
+ up(&simple_proc_sem);
+ return -EFAULT;
+ }
+
+ file->f_pos += count;
+
+ /* If there is now a '\n' at the end of the buffer, commit */
+ if (file->f_pos > 0 && pdata->writedata[file->f_pos-1] == '\n') {
+ int set;
+ pdata->writedata[file->f_pos-1] = '\0';
+ set = pdata->set(pdata->arg, pdata->writedata);
+
+ if (set < 0) {
+ up(&simple_proc_sem);
+ return set;
+ }
+ }
+ up(&simple_proc_sem);
+ return count;
+}
+
+/* This implementation serves only as a demonstration. --RR */
+static int do_register(const char *dir,
+ const char *fname,
+ int perms,
+ struct proc_data *pdata)
+{
+ struct proc_dir_entry *entry;
+ char fullpath[strlen(dir) + 1 + strlen(fname) + 1];
+
+ sprintf(fullpath, "%s/%s", dir, fname);
+ entry = create_proc_entry(fullpath, perms, NULL);
+ if (!entry) return -EINVAL; /* -ERANDOM */
+
+ /* Populate data */
+ entry->data = pdata;
+
+ /* Set up read and write callbacks */
+ if (pdata->set) entry->read_proc = &simple_read;
+ if (pdata->get) entry->write_proc = &simple_write;
+ return 0;
+}
+
+int __register_proc(const char *dir,
+ const char *fname,
+ void *arg,
+ unsigned int perms,
+ int (*get)(void *arg, char *, int),
+ int (*set)(void *arg, const char *))
+{
+ struct proc_data *pdata;
+ int ret;
+
+ pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+
+ pdata->arg = arg;
+ pdata->get = get;
+ pdata->set = set;
+ pdata->writelen = pdata->readlen = 0;
+ pdata->readdata = pdata->writedata = NULL;
+
+ ret = do_register(dir, fname, perms, pdata);
+ if (ret < 0)
+ kfree(pdata);
+ return ret;
+}
+
+/* Wrapper for user's real proc functions */
+struct pdata_wrapper
+{
+ struct proc_data pdata;
+ int (*get)(void *, char *, int);
+ int (*set)(void *, const char *);
+ void *lock;
+ void *userarg;
+};
+
+static struct pdata_wrapper *
+new_pdata_wrapper(void *arg,
+ int (*userget)(void *, char *, int),
+ int (*userset)(void *, const char *),
+ int (*wrapperget)(void *, char *, int),
+ int (*wrapperset)(void *, const char *),
+ void *lock)
+{
+ struct pdata_wrapper *pwrap;
+
+ pwrap = kmalloc(sizeof(*pwrap), GFP_KERNEL);
+ if (pwrap) {
+ pwrap->pdata.arg = pwrap;
+ pwrap->pdata.writelen = pwrap->pdata.readlen = 0;
+ pwrap->pdata.readdata = pwrap->pdata.writedata = NULL;
+ pwrap->pdata.get = wrapperget;
+ pwrap->pdata.set = wrapperset;
+ pwrap->lock = lock;
+ pwrap->userarg = arg;
+ pwrap->get = userget;
+ pwrap->set = userset;
+ }
+ return pwrap;
+}
+
+static int do_register_wrap(const char *dir,
+ const char *fname,
+ int perms,
+ void *arg,
+ int (*userget)(void *, char *, int),
+ int (*userset)(void *, const char *),
+ int (*wrapperget)(void *, char *, int),
+ int (*wrapperset)(void *, const char *),
+ void *lock)
+{
+ struct pdata_wrapper *pwrap;
+ int ret;
+
+ pwrap = new_pdata_wrapper(arg, userget, userset, wrapperget,
+ wrapperset, lock);
+ if (!pwrap)
+ return -ENOMEM;
+ ret = do_register(dir, fname, perms, &pwrap->pdata);
+ if (ret < 0)
+ kfree(pwrap);
+ return ret;
+}
+
+static int spinlock_get(void *arg, char *buffer, int size)
+{
+ struct pdata_wrapper *pwrap = arg;
+ int ret;
+
+ spin_lock_irq(pwrap->lock);
+ ret = pwrap->get(pwrap->userarg, buffer, size);
+ spin_unlock_irq(pwrap->lock);
+
+ return ret;
+}
+
+static int spinlock_set(void *arg, const char *buffer)
+{
+ struct pdata_wrapper *pwrap = arg;
+ int ret;
+
+ spin_lock_irq(pwrap->lock);
+ ret = pwrap->set(pwrap->userarg, buffer);
+ spin_unlock_irq(pwrap->lock);
+
+ return ret;
+}
+
+int __register_proc_spinlock(const char *dir,
+ const char *fname,
+ void *arg,
+ unsigned int perms,
+ spinlock_t *lock,
+ int (*get)(void *arg, char *, int),
+ int (*set)(void *arg, const char *))
+{
+ return do_register_wrap(dir, fname, perms, arg, get, set,
+ spinlock_get, spinlock_set, lock);
+}
+
+static int rwlock_get(void *arg, char *buffer, int size)
+{
+ struct pdata_wrapper *pwrap = arg;
+ int ret;
+
+ read_lock_irq(pwrap->lock);
+ ret = pwrap->get(pwrap->userarg, buffer, size);
+ read_unlock_irq(pwrap->lock);
+
+ return ret;
+}
+
+static int rwlock_set(void *arg, const char *buffer)
+{
+ struct pdata_wrapper *pwrap = arg;
+ int ret;
+
+ write_lock_irq(pwrap->lock);
+ ret = pwrap->set(pwrap->userarg, buffer);
+ write_unlock_irq(pwrap->lock);
+
+ return ret;
+}
+
+int __register_proc_rwlock(const char *dir,
+ const char *fname,
+ void *arg,
+ unsigned int perms,
+ rwlock_t *lock,
+ int (*get)(void *arg, char *, int),
+ int (*set)(void *arg, const char *))
+{
+ return do_register_wrap(dir, fname, perms, arg, get, set,
+ rwlock_get, rwlock_set, lock);
+}
+
+static int semaphore_get(void *arg, char *buffer, int size)
+{
+ struct pdata_wrapper *pwrap = arg;
+ int ret;
+
+ if (down_interruptible(pwrap->lock) != 0)
+ return -EINTR;
+ ret = pwrap->get(pwrap->userarg, buffer, size);
+ up(pwrap->lock);
+
+ return ret;
+}
+
+static int semaphore_set(void *arg, const char *buffer)
+{
+ struct pdata_wrapper *pwrap = arg;
+ int ret;
+
+ if (down_interruptible(pwrap->lock) != 0)
+ return -EINTR;
+ ret = pwrap->set(pwrap->userarg, buffer);
+ up(pwrap->lock);
+
+ return ret;
+}
+
+int __register_proc_semaphore(const char *dir,
+ const char *fname,
+ void *arg,
+ unsigned int perms,
+ struct semaphore *lock,
+ int (*get)(void *arg, char *, int),
+ int (*set)(void *arg, const char *))
+{
+ return do_register_wrap(dir, fname, perms, arg, get, set,
+ semaphore_get, semaphore_set, lock);
+}
+
+static int rwsemaphore_get(void *arg, char *buffer, int size)
+{
+ struct pdata_wrapper *pwrap = arg;
+ int ret;
+
+ down_read(pwrap->lock);
+ ret = pwrap->get(pwrap->userarg, buffer, size);
+ up_read(pwrap->lock);
+
+ return ret;
+}
+
+static int rwsemaphore_set(void *arg, const char *buffer)
+{
+ struct pdata_wrapper *pwrap = arg;
+ int ret;
+
+ down_write(pwrap->lock);
+ ret = pwrap->set(pwrap->userarg, buffer);
+ up_write(pwrap->lock);
+
+ return ret;
+}
+
+int __register_proc_rwsemaphore(const char *dir,
+ const char *fname,
+ void *arg,
+ unsigned int perms,
+ struct rw_semaphore *lock,
+ int (*get)(void *arg, char *, int),
+ int (*set)(void *arg, const char *))
+{
+ return do_register_wrap(dir, fname, perms, arg, get, set,
+ rwsemaphore_get, rwsemaphore_set, lock);
+}
+
+int __proc_read_short(void *shortp, char *outbuf, int len)
+{
+ return snprintf(outbuf, len, "%hi", *(short *)shortp);
+}
+
+int __proc_write_short(void *shortp, const char *inbuf)
+{
+ if (sscanf(inbuf, "%hi", (short *)shortp) != 1) return -EINVAL;
+ return 0;
+}
+
+int __proc_read_ushort(void *ushortp, char *outbuf, int len)
+{
+ return snprintf(outbuf, len, "%hu", *(unsigned short *)ushortp);
+}
+
+int __proc_write_ushort(void *ushortp, const char *inbuf)
+{
+ if (sscanf(inbuf, "%hu", (unsigned short *)ushortp) != 1)
+ return -EINVAL;
+ return 0;
+}
+
+int __proc_read_int(void *intp, char *outbuf, int len)
+{
+ return snprintf(outbuf, len, "%i", *(int *)intp);
+}
+
+int __proc_write_int(void *intp, const char *inbuf)
+{
+ if (sscanf(inbuf, "%i", (int *)intp) != 1) return -EINVAL;
+ return 0;
+}
+
+int __proc_read_uint(void *uintp, char *outbuf, int len)
+{
+ return snprintf(outbuf, len, "%u", *(unsigned int *)uintp);
+}
+
+int __proc_write_uint(void *uintp, const char *inbuf)
+{
+ if (sscanf(inbuf, "%u", (unsigned int *)uintp) != 1) return -EINVAL;
+ return 0;
+}
+
+int __proc_read_long(void *longp, char *outbuf, int len)
+{
+ return snprintf(outbuf, len, "%li", *(long *)longp);
+}
+
+int __proc_write_long(void *longp, const char *inbuf)
+{
+ if (sscanf(inbuf, "%li", (long *)longp) != 1) return -EINVAL;
+ return 0;
+}
+
+int __proc_read_ulong(void *ulongp, char *outbuf, int len)
+{
+ return snprintf(outbuf, len, "%lu", *(long *)ulongp);
+}
+
+int __proc_write_ulong(void *ulongp, const char *inbuf)
+{
+ if (sscanf(inbuf, "%lu", (unsigned long *)ulongp) != 1) return -EINVAL;
+ return 0;
+}
+
+int __proc_read_bool(void *boolp, char *outbuf, int len)
+{
+ if (*(int *)boolp) return snprintf(outbuf, len, "y");
+ else return snprintf(outbuf, len, "n");
+}
+
+int __proc_write_bool(void *boolp, const char *inbuf)
+{
+ if (inbuf[0] == 'y' || inbuf[0] == 'Y')
+ *(int *)boolp = 1;
+ else if (inbuf[0] == 'n' || inbuf[0] == 'N')
+ *(int *)boolp = 0;
+ else return __proc_write_int(boolp, inbuf);
+ return 0;
+}
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.4.13-uml/fs/simpleproc/Makefile working-2.4.13-uml-proc/fs/simpleproc/Makefile
--- linux-2.4.13-uml/fs/simpleproc/Makefile Thu Jan 1 10:00:00 1970
+++ working-2.4.13-uml-proc/fs/simpleproc/Makefile Tue Oct 30 12:47:11 2001
@@ -0,0 +1,14 @@
+#
+# Makefile for the Linux proc filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile.
+
+O_TARGET := simpleproc.o
+
+obj-y := inode.o helper.o
+
+include $(TOPDIR)/Rules.make
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.4.13-uml/fs/simpleproc/helper.c working-2.4.13-uml-proc/fs/simpleproc/helper.c
--- linux-2.4.13-uml/fs/simpleproc/helper.c Thu Jan 1 10:00:00 1970
+++ working-2.4.13-uml-proc/fs/simpleproc/helper.c Thu Nov 1 20:58:13 2001
@@ -0,0 +1,277 @@
+/* Those of you who read this, give quiet thanks that you did not
+ suffer the endless frustration of dealing with the old /proc
+ interface.
+
+ Copyright (C) 2001 Rusty Russell.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+#include <linux/simpleproc.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/dcache.h>
+#include <linux/init.h>
+
+/* Wrapper for user's real proc functions */
+struct pdata_wrapper
+{
+ struct proc_data pdata;
+ proc_fetchfn_t *fetch;
+ proc_commitfn_t *commit;
+ void *lock;
+ void *userarg;
+};
+
+static struct proc_data *new_wrapper(proc_fetchfn_t *userfetch,
+ proc_commitfn_t *usercommit,
+ void *userarg,
+ proc_fetchfn_t *wrapfetch,
+ proc_commitfn_t *wrapcommit,
+ void *lock)
+{
+ struct pdata_wrapper *pwrap;
+
+ pwrap = kmalloc(sizeof(*pwrap), GFP_KERNEL);
+ if (pwrap) {
+ pwrap->pdata.arg = pwrap;
+ pwrap->pdata.fetch = wrapfetch;
+ pwrap->pdata.commit = wrapcommit;
+ pwrap->pdata.dir = NULL;
+ pwrap->fetch = userfetch;
+ pwrap->commit = usercommit;
+ pwrap->lock = lock;
+ pwrap->userarg = userarg;
+ }
+ return &pwrap->pdata;
+}
+
+static int lock_fetch(const char *dirname, const char *fname,
+ char *buffer, unsigned int size, void *arg)
+{
+ struct pdata_wrapper *pwrap = arg;
+ int ret;
+
+ spin_lock_irq(pwrap->lock);
+ ret = pwrap->fetch(dirname, fname, pwrap->userarg, size, buffer);
+ spin_unlock_irq(pwrap->lock);
+
+ return ret;
+}
+
+static int lock_commit(const char *dirname, const char *fname,
+ const char *buffer, unsigned int size, void *arg)
+{
+ struct pdata_wrapper *pwrap = arg;
+ int ret;
+
+ spin_lock_irq(pwrap->lock);
+ ret = pwrap->commit(dirname, fname, buffer, size, pwrap->userarg);
+ spin_unlock_irq(pwrap->lock);
+
+ return ret;
+}
+
+struct proc_data *__new_proc_lock(void *arg, spinlock_t *lock,
+ proc_fetchfn_t *fetch,
+ proc_commitfn_t *commit)
+{
+ return new_wrapper(fetch, commit, arg, lock_fetch, lock_commit, lock);
+}
+
+static int sem_fetch(const char *dirname, const char *fname,
+ char *buffer, unsigned int size, void *arg)
+{
+ struct pdata_wrapper *pwrap = arg;
+ int ret;
+
+ if (down_interruptible(pwrap->lock) != 0)
+ return -EINTR;
+ ret = pwrap->fetch(dirname, fname, pwrap->userarg, size, buffer);
+ up(pwrap->lock);
+
+ return ret;
+}
+
+static int sem_commit(const char *dirname, const char *fname,
+ const char *buffer, unsigned int size, void *arg)
+{
+ struct pdata_wrapper *pwrap = arg;
+ int ret;
+
+ if (down_interruptible(pwrap->lock) != 0)
+ return -EINTR;
+ ret = pwrap->commit(dirname, fname, buffer, size, pwrap->userarg);
+ up(pwrap->lock);
+
+ return ret;
+}
+
+struct proc_data *__new_proc_sem(void *arg, struct semaphore *sem,
+ proc_fetchfn_t *fetch,
+ proc_commitfn_t *commit)
+{
+ return new_wrapper(fetch, commit, arg, sem_fetch, sem_commit, sem);
+}
+
+int proc_fetch_short(const char *dir, const char *fname,
+ char *outbuf, unsigned int size, void *shortp)
+{
+ return snprintf(outbuf, size, "%hi\n", *(short *)shortp);
+}
+
+int proc_commit_short(const char *dir, const char *fname,
+ const char *inbuf, unsigned int size, void *shortp)
+{
+ if (sscanf(inbuf, "%hi", (short *)shortp) != 1) return -EINVAL;
+ return 0;
+}
+
+int proc_fetch_ushort(const char *dir, const char *fname,
+ char *outbuf, unsigned int size, void *ushortp)
+{
+ return snprintf(outbuf, size, "%hu\n", *(unsigned short *)ushortp);
+}
+
+int proc_commit_ushort(const char *dir, const char *fname,
+ const char *inbuf, unsigned int size, void *ushortp)
+{
+ if (sscanf(inbuf, "%hu", (unsigned short *)ushortp) != 1)
+ return -EINVAL;
+ return 0;
+}
+
+int proc_fetch_int(const char *dir, const char *fname,
+ char *outbuf, unsigned int size, void *intp)
+{
+ return snprintf(outbuf, size, "%i\n", *(int *)intp);
+}
+
+int proc_commit_int(const char *dir, const char *fname,
+ const char *inbuf, unsigned int size, void *intp)
+{
+ if (sscanf(inbuf, "%i", (int *)intp) != 1) return -EINVAL;
+ return 0;
+}
+
+int proc_fetch_uint(const char *dir, const char *fname,
+ char *outbuf, unsigned int size, void *uintp)
+{
+ return snprintf(outbuf, size, "%u\n", *(unsigned int *)uintp);
+}
+
+int proc_commit_uint(const char *dir, const char *fname,
+ const char *inbuf, unsigned int size, void *uintp)
+{
+ if (sscanf(inbuf, "%u", (unsigned int *)uintp) != 1) return -EINVAL;
+ return 0;
+}
+
+int proc_fetch_long(const char *dir, const char *fname,
+ char *outbuf, unsigned int size, void *longp)
+{
+ return snprintf(outbuf, size, "%li\n", *(long *)longp);
+}
+
+int proc_commit_long(const char *dir, const char *fname,
+ const char *inbuf, unsigned int size, void *longp)
+{
+ if (sscanf(inbuf, "%li", (long *)longp) != 1) return -EINVAL;
+ return 0;
+}
+
+int proc_fetch_ulong(const char *dir, const char *fname,
+ char *outbuf, unsigned int size, void *ulongp)
+{
+ return snprintf(outbuf, size, "%lu\n", *(long *)ulongp);
+}
+
+int proc_commit_ulong(const char *dir, const char *fname,
+ const char *inbuf, unsigned int size, void *ulongp)
+{
+ if (sscanf(inbuf, "%lu", (unsigned long *)ulongp) != 1) return -EINVAL;
+ return 0;
+}
+
+int proc_fetch_bool(const char *dir, const char *fname,
+ char *outbuf, unsigned int size, void *boolp)
+{
+ if (*(int *)boolp) return snprintf(outbuf, size, "y\n");
+ else return snprintf(outbuf, size, "n\n");
+}
+
+int proc_commit_bool(const char *dir, const char *fname,
+ const char *inbuf, unsigned int size, void *boolp)
+{
+ if (inbuf[0] == 'y' || inbuf[0] == 'Y')
+ *(int *)boolp = 1;
+ else if (inbuf[0] == 'n' || inbuf[0] == 'N')
+ *(int *)boolp = 0;
+ else return proc_commit_int(dir, fname, inbuf, size, boolp);
+ return 0;
+}
+
+/* Test code: delete me */
+static int number = 7;
+
+static int testfetch(const char *dirname,
+ const char *filename,
+ char *buffer,
+ unsigned int size,
+ void *arg)
+{
+ /* As an example, each one holds its own name */
+ return snprintf(buffer, size, "%s/%s\n", dirname, filename);
+}
+
+static int dirfunc(const char *dirname,
+ const char *filename,
+ struct proc_dircontents *buffer,
+ unsigned int maxlen,
+ void *arg)
+{
+ unsigned int used = 0;
+ char name[100];
+ unsigned int i;
+
+ for (i = 0; i < 10; i++) {
+ sprintf(name, "file-%u", i);
+ used = proc_add_dircontents(buffer, used, maxlen,
+ S_IFREG|0400, testfetch,
+ NULL, NULL, NULL, name);
+ }
+ /* And one infinite subdirectory example */
+ used = proc_add_dircontents(buffer, used, maxlen,
+ S_IFDIR|0555, NULL, NULL, dirfunc, NULL,
+ "subdir");
+ return proc_end_dircontents(buffer, used, maxlen);
+}
+
+static int __init init_test(void)
+{
+ int ret;
+ ret = proc("testdir", "number", number, int, 0644);
+ if (ret)
+ printk("Proc registration failed: %i\n", ret);
+ proc_dir("testdir", "subdir", dirfunc, NULL);
+ return 0;
+}
+
+static void __exit exit_test(void)
+{
+ unproc("testdir", "number");
+ unproc_dir("testdir", "subdir");
+}
+
+module_init(init_test);
+module_exit(exit_test);
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.4.13-uml/fs/simpleproc/inode.c working-2.4.13-uml-proc/fs/simpleproc/inode.c
--- linux-2.4.13-uml/fs/simpleproc/inode.c Thu Jan 1 10:00:00 1970
+++ working-2.4.13-uml-proc/fs/simpleproc/inode.c Thu Nov 1 21:29:14 2001
@@ -0,0 +1,790 @@
+/*
+ * Simple /proc filesystem for Linux.
+ *
+ * Conceptually, there are two types of directories here: static
+ * (entries are created and deleted using
+ * register_proc/unregister_proc), and dynamic (contents are created
+ * on demand using a callback).
+ *
+ * Copyright (C) 2001 Rusty Russell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/simpleproc.h>
+#include <asm/semaphore.h>
+
+#include <asm/uaccess.h>
+
+/* Proc mount point */
+struct vfsmount *proc_mnt;
+
+/* Serialize insert/delete and mounts */
+static DECLARE_MUTEX(proc_semaphore);
+
+#define SIMPLE_PROCFS_MAGIC 0x62121174
+
+/* Start with this many bytes allocated for file read */
+#define PROCFS_START_FILE 64
+/* Start with this many bytes allocated for directory read */
+#define PROCFS_START_DIR PAGE_SIZE
+/* Approximate upper ceiling for memory usage per fs */
+#define PROCFS_MAX_SIZE PAGE_SIZE
+
+/* Pre-decls for assigning */
+static struct inode_operations proc_punt_inodeops;
+static struct file_operations proc_helper_fileops;
+static struct file_operations proc_helper_dirops;
+static struct file_operations proc_punt_dirops;
+static struct inode_operations proc_helper_inodeops;
+static struct super_operations proc_ops;
+static struct dentry_operations proc_dentry_ops;
+
+struct proc_buffer
+{
+ unsigned int maxlen;
+ unsigned int len;
+ /* One is for the nul terminator */
+ char buffer[1];
+};
+
+struct proc_data *__new_proc(void *arg,
+ proc_fetchfn_t *fetch,
+ proc_commitfn_t *commit,
+ proc_dirfn_t *dir)
+{
+ struct proc_data *pdata;
+
+ pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+ if (pdata) {
+ pdata->arg = arg;
+ pdata->fetch = fetch;
+ pdata->commit = commit;
+ pdata->dir = dir;
+ }
+ return pdata;
+}
+
+/* FIXME: I have no idea what all this does: stolen from old /proc --RR */
+static int proc_statfs(struct super_block *sb, struct statfs *buf)
+{
+ buf->f_type = SIMPLE_PROCFS_MAGIC;
+ buf->f_bsize = PAGE_SIZE/sizeof(long);
+ buf->f_bfree = 0;
+ buf->f_bavail = 0;
+ buf->f_ffree = 0;
+ buf->f_namelen = NAME_MAX;
+ return 0;
+}
+
+/* Convenience routine to make an inode */
+static struct inode *
+new_proc_inode(struct super_block *sb, int mode, int is_dynamic)
+{
+ struct inode * inode = new_inode(sb);
+
+ if (!inode)
+ return NULL;
+
+ inode->i_mode = mode;
+ inode->i_uid = 0;
+ inode->i_gid = 0;
+ inode->i_blksize = PAGE_CACHE_SIZE;
+ inode->i_blocks = 0;
+ inode->i_rdev = NODEV;
+ inode->i_mapping->a_ops = NULL;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ switch (mode & S_IFMT) {
+ case S_IFREG:
+ inode->i_fop = &proc_helper_fileops;
+ break;
+ case S_IFDIR:
+ if (is_dynamic) {
+ inode->i_fop = &proc_punt_dirops;
+ inode->i_op = &proc_punt_inodeops;
+ } else {
+ inode->i_fop = &proc_helper_dirops;
+ inode->i_op = &proc_helper_inodeops;
+ }
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ return inode;
+}
+
+/* Make a new proc entry in this directory: must be holding proc_semapore */
+static int make_proc_entry(struct dentry *dir,
+ const char *fname,
+ int mode,
+ struct proc_data *pdata,
+ int is_dynamic)
+{
+ struct inode *inode;
+ struct dentry *dentry;
+ struct qstr qstr;
+
+ /* Create qstr for this entry */
+ qstr.name = fname;
+ qstr.len = strlen(fname);
+ qstr.hash = full_name_hash(qstr.name, qstr.len);
+
+ /* You can't put a static proc entry in a dynamic dir */
+ if (dir->d_inode->i_op == &proc_punt_inodeops)
+ BUG();
+
+ /* Does it already exist? */
+ dentry = d_lookup(dir, &qstr);
+ if (dentry) {
+ dput(dentry);
+ return -EEXIST;
+ }
+
+ /* Doesn't exist: create inode */
+ inode = new_proc_inode(dir->d_sb, mode, is_dynamic);
+ if (!inode)
+ return -ENOMEM;
+
+ /* Create dentry */
+ dentry = d_alloc(dir, &qstr);
+ if (!dentry) {
+ iput(inode);
+ return -ENOMEM;
+ }
+ dentry->d_op = &proc_dentry_ops;
+ dentry->d_fsdata = pdata;
+ d_add(dentry, inode);
+
+ /* Pin the dentry here, so it doesn't get pruned */
+ dget(dentry);
+ return 0;
+}
+
+/* Create (static) proc directory if neccessary. */
+/* FIXME: Keep refcnt, so we can delete when no more users */
+static struct dentry *get_proc_dir(const char *dirname)
+{
+ struct dentry *dentry;
+ struct qstr qstr;
+ const char *delim;
+
+ /* FIXME: Definitely need a better way --RR */
+ dentry = dget(proc_mnt->mnt_sb->s_root);
+ delim = dirname;
+
+ for (;;) {
+ struct dentry *newdentry;
+
+ /* Ignore multiple slashes */
+ while (*delim == '/') delim++;
+ qstr.name = delim;
+ delim = strchr(qstr.name, '/');
+ if (!delim) delim = qstr.name + strlen(qstr.name);
+ qstr.len = delim-(char *)qstr.name;
+ qstr.hash = full_name_hash(qstr.name, qstr.len);
+
+ if (qstr.len == 0)
+ break;
+
+ /* If entry doesn't exist, create it */
+ while (!(newdentry = d_lookup(dentry, &qstr))) {
+ char fname[qstr.len+1];
+ int ret;
+
+ strncpy(fname, qstr.name, qstr.len);
+ fname[qstr.len] = '\0';
+ down(&proc_semaphore);
+ ret = make_proc_entry(dentry, fname, S_IFDIR|0555,
+ NULL, 0);
+ up(&proc_semaphore);
+
+ if (ret < 0) {
+ dput(dentry);
+ return ERR_PTR(ret);
+ }
+ }
+ dput(dentry);
+ dentry = newdentry;
+ }
+ return dentry;
+}
+
+/* Actually add a proc file or dynamic directory */
+int __proc(const char *dirname, const char *fname, int mode,
+ struct proc_data *pdata)
+{
+ struct dentry *dir;
+ int ret;
+
+ if (!pdata)
+ return -ENOMEM;
+
+ dir = get_proc_dir(dirname);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ ret = make_proc_entry(dir, fname, mode, pdata, S_ISDIR(mode) ? 1 : 0);
+ dput(dir);
+ return ret;
+}
+
+static int proc_nofetch(const char *dirname, const char *fname,
+ char *outbuf, unsigned int len, void *arg)
+{
+ return -ENOENT;
+}
+
+static int proc_nocommit(const char *dirname, const char *fname,
+ const char *inbuf, unsigned int len, void *arg)
+{
+ return -ENOENT;
+}
+
+static int proc_nodir(const char *dirname,
+ const char *filename,
+ struct proc_dircontents *buffer,
+ unsigned int size,
+ void *arg)
+{
+ return -ENOENT;
+}
+
+/* Release a proc entry */
+void unproc(const char *dir, const char *fname)
+{
+ struct dentry *dentry;
+ const char *delim;
+ struct qstr qstr;
+ struct proc_data *pdata;
+
+ /* FIXME: There's a better way, right? --RR */
+ dentry = dget(proc_mnt->mnt_sb->s_root);
+
+ delim = dir;
+ for (;;) {
+ /* Ignore multiple slashes */
+ while (*delim == '/') delim++;
+ qstr.name = delim;
+ delim = strchr(qstr.name, '/');
+ if (!delim) delim = qstr.name + strlen(qstr.name);
+ qstr.len = delim-(char *)qstr.name;
+ qstr.hash = full_name_hash(qstr.name, qstr.len);
+
+ if (qstr.len == 0)
+ break;
+
+ dentry = d_lookup(dentry, &qstr);
+ if (!dentry)
+ BUG();
+ dput(dentry->d_parent);
+ }
+
+ qstr.name = fname;
+ qstr.len = strlen(fname);
+ qstr.hash = full_name_hash(qstr.name, qstr.len);
+ dentry = d_lookup(dentry, &qstr);
+ if (!dentry)
+ BUG();
+ dput(dentry->d_parent);
+
+ /* We have the dentry: change the private area so it doesn't
+ enter the caller any more. */
+ pdata = dentry->d_fsdata;
+ pdata->commit = proc_nocommit;
+ pdata->fetch = proc_nofetch;
+ pdata->dir = proc_nodir;
+
+ /* This will probably free the dentry immediately, but if not,
+ too bad. */
+ dput(dentry);
+ dput(dentry);
+}
+
+void unproc_dir(const char *dir, const char *fname)
+{
+ unproc(dir, fname);
+}
+
+/* See if /proc entry exists (entries registered in directory). */
+static struct dentry *proc_lookup(struct inode *dir,
+ struct dentry *dentry)
+{
+ /* Since we place new staticn entries in the dcache, if we get
+ here, we know the entry does not exist. Create a negative
+ dentry, and return NULL */
+ d_add(dentry, NULL);
+ return NULL;
+}
+
+/* Call callback to get directory contents */
+static struct proc_dircontents *get_dir_contents(const char *dirname,
+ const char *filename,
+ struct proc_data *pdata)
+{
+ struct proc_dircontents *ret;
+ unsigned int size = PROCFS_START_DIR;
+
+ ret = kmalloc(size, GFP_KERNEL);
+ while (ret) {
+ int used;
+ used = pdata->dir(dirname, filename, ret, size, pdata->arg);
+ if (used < 0) {
+ kfree(ret);
+ return ERR_PTR(used);
+ }
+ if (used <= size)
+ return ret;
+
+ /* Realloc larger and loop */
+ kfree(ret);
+ size = used;
+ ret = kmalloc(size, GFP_KERNEL);
+ }
+ return ERR_PTR(-ENOMEM);
+}
+
+/* Incrementing is a little tricky: round up to alignment */
+static struct proc_dircontents *next_dcont(struct proc_dircontents *dcontents)
+{
+ unsigned int len;
+
+ len = ((sizeof(*dcontents) + strlen(dcontents->name) + 1
+ + __alignof__(*dcontents) - 1)
+ & ~(__alignof__(*dcontents) - 1));
+ return (void *)dcontents + len;
+}
+
+/* Search results from callback for this name, and if found create inode */
+static struct proc_dircontents *
+find_dcontents(struct proc_dircontents *dir_contents,
+ struct dentry *dentry)
+{
+ while (dir_contents->mode) {
+ if (strcmp(dentry->d_name.name, dir_contents->name) == 0)
+ return dir_contents;
+ dir_contents = next_dcont(dir_contents);
+ }
+ /* Not found... */
+ return NULL;
+}
+
+/* Since there are no hard links in this filesystem, we can simply map
+ inodes to dentries. This is not possibly in general! */
+static struct dentry *inode_to_dentry(struct inode *inode)
+{
+ if (inode->i_dentry.next->next != &inode->i_dentry)
+ BUG();
+ return list_entry(inode->i_dentry.next, struct dentry, d_alias);
+}
+
+/* See if /proc entry exists (user controls contents of directory). */
+static struct dentry *proc_punt_lookup(struct inode *dir,
+ struct dentry *dentry)
+{
+ /* We do the whole callback on every lookup. */
+ struct proc_data *pdata;
+ struct proc_dircontents *dir_contents, *dc;
+ struct inode *inode;
+ struct dentry *parent;
+
+ /* Since we know the inode is a directory, there is only one
+ inode in the dentry alias list, so mapping inode -> dentry
+ is easy */
+ parent = inode_to_dentry(dir);
+ dir_contents = get_dir_contents(parent->d_name.name,
+ dentry->d_name.name,
+ parent->d_fsdata);
+ if (!dir_contents || IS_ERR(dir_contents))
+ return (struct dentry *)dir_contents;
+
+ /* Looks through callback-supplied list for this dentry */
+ dc = find_dcontents(dir_contents, dentry);
+ if (!dc) {
+ kfree(dir_contents);
+ return NULL;
+ }
+ inode = new_proc_inode(dentry->d_sb, dc->mode, 1);
+ if (!inode) {
+ kfree(dir_contents);
+ return ERR_PTR(-ENOMEM);
+ }
+ pdata = __new_proc(dc->arg, dc->fetch, dc->commit, dc->dir);
+ if (!pdata) {
+ iput(inode);
+ kfree(dir_contents);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ dentry->d_op = &proc_dentry_ops;
+ dentry->d_fsdata = pdata;
+ d_add(dentry, inode);
+ kfree(dir_contents);
+ return NULL;
+}
+
+/* On open, we grab contents if we're readable... */
+static int proc_file_snapshot(struct inode *inode, struct file *filp)
+{
+ unsigned int size;
+ struct proc_buffer *buf;
+ struct proc_data *pdata;
+ char *dirname;
+ unsigned long page;
+
+ pdata = filp->f_dentry->d_fsdata;
+ /* Start at this, and work up */
+ size = PROCFS_START_FILE;
+
+ if (!(filp->f_mode & FMODE_READ)) {
+ /* Allocate write buffer: */
+ buf = kmalloc(sizeof(*buf) + size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ buf->maxlen = size;
+ buf->len = 0;
+ filp->private_data = buf;
+ return 0;
+ }
+
+ /* For the moment, you can't open for read & write. Later,
+ when seek resets snapshot/commit, we should allow this. */
+ if (filp->f_mode & FMODE_WRITE)
+ return -EINVAL;
+
+ page = __get_free_page(GFP_USER);
+ if (!page)
+ return -ENOMEM;
+
+ /* FIXME: This is not right: the callbacks don't care what the
+ process's idea of root is, it only wants path after proc/. */
+ dirname = d_path(filp->f_dentry, filp->f_vfsmnt,
+ (char *)page, PAGE_SIZE);
+ if (dirname < (char *)page)
+ BUG();
+
+ /* buf[0] holds the size */
+ buf = kmalloc(sizeof(*buf)+size, GFP_KERNEL);
+ while (buf) {
+ int used;
+ used = pdata->fetch(dirname, filp->f_dentry->d_name.name,
+ buf->buffer, size, pdata->arg);
+ if (used < 0) {
+ kfree(buf);
+ free_page(page);
+ /* FIXME: if used == -ENOENT, destroy dcache entry */
+ return used;
+ }
+ if (used <= size) {
+ free_page(page);
+ filp->private_data = buf;
+ /* Nul terminate and save size */
+ buf->maxlen = size;
+ buf->len = used;
+ buf->buffer[used] = '\0';
+ return 0;
+ }
+
+ /* Realloc larger and loop */
+ kfree(buf);
+ size = used;
+ if (size > PROCFS_MAX_SIZE)
+ break;
+ buf = kmalloc(sizeof(*buf)+size, GFP_KERNEL);
+ }
+ free_page(page);
+ return -ENOMEM;
+}
+
+/* On close, we commit contents if we've been written to... */
+static int proc_file_commit(struct inode *inode, struct file *filp)
+{
+ int ret;
+ struct proc_data *pdata;
+ struct proc_buffer *buf;
+ char *dirname;
+ unsigned long page;
+
+ pdata = filp->f_dentry->d_fsdata;
+ if (!(filp->f_mode & FMODE_WRITE)) {
+ kfree(filp->private_data);
+ return 0;
+ }
+
+ page = __get_free_page(GFP_USER);
+ if (!page) {
+ kfree(filp->private_data);
+ return -ENOMEM;
+ }
+
+ /* FIXME: This is not right: the callbacks don't care what the
+ process's idea of root is, it only wants path after proc/. */
+ dirname = d_path(filp->f_dentry, filp->f_vfsmnt,
+ (char *)page, PAGE_SIZE);
+ if (dirname < (char *)page)
+ BUG();
+
+ /* nul-terminate buffer */
+ buf = filp->private_data;
+ buf->buffer[buf->len] = '\0';
+ ret = pdata->commit(dirname, filp->f_dentry->d_name.name,
+ buf->buffer, buf->len, pdata->arg);
+
+ kfree(filp->private_data);
+ free_page(page);
+ return ret;
+}
+
+/* Copy from buffer */
+static ssize_t proc_file_read(struct file *filp, char *ubuf, size_t size,
+ loff_t *off)
+{
+ struct proc_buffer *buf;
+ struct inode *inode;
+
+ /* Use inode semaphore to serialize against writes. */
+ inode = filp->f_dentry->d_inode;
+ if (down_interruptible(&inode->i_sem) != 0)
+ return -EINTR;
+
+ buf = filp->private_data;
+ if (size + *off > buf->len)
+ size = buf->len - *off;
+
+ /* Copy from static buffer */
+ if (copy_to_user(ubuf, buf->buffer, size) != 0) {
+ up(&inode->i_sem);
+ return -EFAULT;
+ }
+ up(&inode->i_sem);
+
+ *off += size;
+ return (ssize_t)size;
+}
+
+/* Copy to buffer */
+static ssize_t proc_file_write(struct file *filp,
+ const char *ubuf,
+ size_t size,
+ loff_t *off)
+{
+ struct inode *inode;
+ struct proc_buffer *buf;
+ struct proc_data *pdata;
+
+ pdata = filp->f_dentry->d_fsdata;
+
+ /* Use inode semaphore to serialize writes & reads. */
+ inode = filp->f_dentry->d_inode;
+ if (down_interruptible(&inode->i_sem) != 0)
+ return -EINTR;
+
+ buf = filp->private_data;
+ if (*off + size > buf->maxlen) {
+ struct proc_buffer *newbuffer;
+ /* Prevent them using too much memory */
+ if (*off + size > PROCFS_MAX_SIZE) {
+ up(&inode->i_sem);
+ return -ENOSPC;
+ }
+ /* Room for count at head */
+ newbuffer = kmalloc(sizeof(*newbuffer) + *off + size,
+ GFP_USER);
+ if (!newbuffer) {
+ up(&inode->i_sem);
+ return -ENOMEM;
+ }
+ memcpy(newbuffer, buf, sizeof(*buf) + buf->len);
+ kfree(filp->private_data);
+ filp->private_data = buf = newbuffer;
+ }
+
+ /* Do actual copy */
+ if (copy_from_user(buf->buffer + *off, ubuf, size) != 0) {
+ up(&inode->i_sem);
+ return -EFAULT;
+ }
+ up(&inode->i_sem);
+ buf->len += size;
+ *off += size;
+
+ return size;
+}
+
+/* Call the user's callback to get contents of this directory.
+ Generate . and .. automagically. */
+static int proc_dynamic_readdir(struct file *filp,
+ void *dirent,
+ filldir_t filldir)
+{
+ int i;
+ struct proc_dircontents *dcontents, *dp;
+ char *dirname;
+ unsigned long page;
+ struct proc_data *pdata;
+ struct dentry *dentry = filp->f_dentry;
+
+ pdata = filp->f_dentry->d_fsdata;
+
+ i = filp->f_pos;
+ switch (i) {
+ case 0:
+ if (filldir(dirent, ".", 1, 0, dentry->d_inode->i_ino, DT_DIR)
+ < 0)
+ break;
+ i++;
+ filp->f_pos++;
+ /* fallthrough */
+ case 1:
+ if (filldir(dirent, "..", 2, 0,
+ dentry->d_parent->d_inode->i_ino, DT_DIR) < 0)
+ break;
+ i++;
+ filp->f_pos++;
+ }
+
+ page = __get_free_page(GFP_USER);
+ if (!page) return -ENOMEM;
+
+ /* FIXME: This is not right: the callbacks don't care what the
+ process's idea of root is, it only wants path after proc/. */
+ dirname = d_path(filp->f_dentry, filp->f_vfsmnt,
+ (char *)page, PAGE_SIZE);
+ if (dirname < (char *)page)
+ BUG();
+
+ /* Call user callback to get directory */
+ dcontents = get_dir_contents(dirname,
+ dentry->d_name.name,
+ pdata);
+ if (IS_ERR(dcontents)) {
+ free_page(page);
+ return PTR_ERR(dcontents);
+ }
+
+ /* Skip any already-read entries... */
+ for (dp = dcontents, i -= 2; dp->mode && i; dp = next_dcont(dp), i++);
+
+ for (; dp->mode; dp = next_dcont(dp)) {
+ /* FIXME: Use non-zero inode numbers */
+ if (filldir(dirent, dp->name, strlen(dp->name),
+ filp->f_pos,
+ filp->f_pos,
+ S_ISDIR(dp->mode) ? DT_DIR : DT_REG) < 0)
+ break;
+ filp->f_pos++;
+ }
+ free_page(page);
+ kfree(dcontents);
+ return 0;
+}
+
+/* Free the private area when dentry is freed. */
+static void proc_release(struct dentry *dentry)
+{
+ kfree(dentry->d_fsdata);
+}
+
+static struct super_block *proc_read_super(struct super_block *s,
+ void *data,
+ int silent)
+{
+ struct inode * root_inode;
+
+ s->s_blocksize = 1024;
+ s->s_blocksize_bits = 10;
+ s->s_magic = SIMPLE_PROCFS_MAGIC;
+ s->s_op = &proc_ops;
+
+ root_inode = new_proc_inode(s, S_IFDIR|0555, 0);
+ if (!root_inode) return NULL;
+
+ /* Block concurrent mounts */
+ down(&proc_semaphore);
+
+ s->s_root = d_alloc_root(root_inode);
+ if (!s->s_root) {
+ iput(root_inode);
+ up(&proc_semaphore);
+ return NULL;
+ }
+ up(&proc_semaphore);
+ return s;
+}
+
+/* Proc files use these wrappers */
+static struct file_operations proc_helper_fileops = {
+ open: proc_file_snapshot,
+ release: proc_file_commit,
+ read: proc_file_read,
+ write: proc_file_write,
+};
+
+/* Directories which use normal registration mechanism, which sit in
+ the dcache */
+static struct file_operations proc_helper_dirops = {
+ read: generic_read_dir,
+ readdir: dcache_readdir,
+};
+
+/* Directories which have their own dynamic content */
+static struct file_operations proc_punt_dirops = {
+ read: generic_read_dir,
+ readdir: proc_dynamic_readdir,
+};
+
+/* You can only do lookups through these dirs: dynamic ones do callbacks... */
+static struct inode_operations proc_punt_inodeops = {
+ lookup: proc_punt_lookup,
+};
+
+/* ... static ones look up registrations */
+static struct inode_operations proc_helper_inodeops = {
+ lookup: proc_lookup,
+};
+
+static struct super_operations proc_ops = {
+ statfs: proc_statfs,
+ put_inode: force_delete,
+};
+
+static struct dentry_operations proc_dentry_ops = {
+ d_release: proc_release,
+};
+
+static DECLARE_FSTYPE(proc_fs_type, "proc", proc_read_super, FS_SINGLE);
+
+static int __init init_proc_fs(void)
+{
+ register_filesystem(&proc_fs_type);
+ proc_mnt = kern_mount(&proc_fs_type);
+ return 0;
+}
+
+static void __exit exit_proc_fs(void)
+{
+ unregister_filesystem(&proc_fs_type);
+}
+
+module_init(init_proc_fs);
+module_exit(exit_proc_fs);
+
> No kernel-formatted tables: use a directory. (eg. kernel symbols
> become a directory of symbol names, each containing the symbol value).
>
> For cases when you don't want to take the overhead of creating a new
> proc entry (eg. tcp socket creation), you can create directories on
> demand when a user reads them using:
>
> proc_dir("net", "subdir", dirfunc, NULL);
> unproc_dir("net", "subdir");
>
> Note that with kbuild 2.5, you can do something like:
>
> proc(KBUILD_OBJECT, "foo", my_foo, int, 0644);
>
> And with my previous parameter patch:
> PARAM(foo, int, 0444);
Is this designed to replace sysctl?
In general we want to support using sysctl and similar features WITHOUT
procfs support at all (of any type). Nice for embedded systems
especially.
sysctl may be ugly but it provides for a standard way of manipulating
kernel variables... sysctl(2) or via procfs or via /etc/sysctl.conf.
AFAICS your proposal, while nice and clean :), doesn't offer all the
features that sysctl presently does.
Jeff
--
Jeff Garzik | Only so many songs can be sung
Building 1024 | with two lips, two lungs, and one tongue.
MandrakeSoft | - nomeansno
On Thursday 01 November 2001 11:32, Rusty Russell wrote:
> I believe that rewriting /proc (and /proc/sys should simply die) is a
> better solution than extending the interface, or avoiding it
> altogether by using a new filesystem.
I am currently working on something like this, too. It's using Patrick
Mochel's driverfs patch
(http://www.kernel.org/pub/linux/kernel/people/mochel/device/driverfs.diff-1030)
as a base and adds the functionality of the extensions that I did to proc fs
for my device registry patch
(http://www.tjansen.de/devreg/proc_ov-2.4.7.diff).
You can get an idea of the API in the proc_ov patch: every file in the
filesystem is typed and either a string, integer, unsigned long or an enum.
The intention is that you have a single value per file, like in /proc/sys,
and not more. The API does not even allow you to have more complex files (I
plan to add a blob type though).
Unlike comparable APIs it also supports 'dynamic directories'. So you can,
for example, create a directory for each device without registering a
directory for each device. You only need a single dynamic directory with a
couple of callbacks that specify the number of directories, their names and
their contexts. Contexts are void pointers that are given to the callbacks of
the content files, in this example you would probably use the pointer to the
device's struct device as context.
bye...
Jeff Garzik wrote:
>
> > No kernel-formatted tables: use a directory. (eg. kernel symbols
> > become a directory of symbol names, each containing the symbol value).
> >
> > For cases when you don't want to take the overhead of creating a new
> > proc entry (eg. tcp socket creation), you can create directories on
> > demand when a user reads them using:
> >
> > proc_dir("net", "subdir", dirfunc, NULL);
> > unproc_dir("net", "subdir");
> >
> > Note that with kbuild 2.5, you can do something like:
> >
> > proc(KBUILD_OBJECT, "foo", my_foo, int, 0644);
> >
> > And with my previous parameter patch:
> > PARAM(foo, int, 0444);
>
> Is this designed to replace sysctl?
>
> In general we want to support using sysctl and similar features WITHOUT
> procfs support at all (of any type). Nice for embedded systems
> especially.
>
> sysctl may be ugly but it provides for a standard way of manipulating
> kernel variables... sysctl(2) or via procfs or via /etc/sysctl.conf.
>
> AFAICS your proposal, while nice and clean :), doesn't offer all the
> features that sysctl presently does.
>
> Jeff
sysctl IS NOT UGLY. Not the sysctl I know from Solaris or BSD. Both are
far more pleasant solutions then the proliferation of ad-hoc,
undocumented
ever changing, redunand, slow, overcomplex in implementation,
(insert a list of random invectives here) interfaces shown under /proc.
And yes I don't give a shit about "cool features" like:
echo "bull shit" >
/proc/this/is/some/random/peace/of/crappy/interface/design
BTW.> /proc/sys is indeed silly, since it's a "second order" interface
to something you can gat your gip on far easier already. And redundant
system
intrefaces are not a nice design.
On Thu, Nov 01, 2001 at 05:42:36AM -0500, Jeff Garzik wrote:
> > proc(KBUILD_OBJECT, "foo", my_foo, int, 0644);
> >
> > And with my previous parameter patch:
> > PARAM(foo, int, 0444);
>
> Is this designed to replace sysctl?
>
> In general we want to support using sysctl and similar features WITHOUT
> procfs support at all (of any type). Nice for embedded systems
> especially.
Agreed. It would be nice to have always 1:1 relation between sysctl and
procfs interface, so you can do EVERYTHING with both of sysctl and via
/proc ... Maybe the code should be partly common as much as possible as well.
- Gabor
> I am currently working on something like this, too. It's using Patrick
> Mochel's driverfs patch
> (http://www.kernel.org/pub/linux/kernel/people/mochel/device/driverfs.diff-1030)
> as a base and adds the functionality of the extensions that I did to proc fs
> for my device registry patch
> (http://www.tjansen.de/devreg/proc_ov-2.4.7.diff).
Hm. Sounds like everyone wants the same thing. Some kind of device filesystem.
Perhaps a projects somewhere should be started.
On Thu, 01 Nov 2001 05:42:36 -0500
Jeff Garzik <[email protected]> wrote:
> Is this designed to replace sysctl?
Well, I'd suggest replacing *all* the non-process stuff in /proc. Yes.
> In general we want to support using sysctl and similar features WITHOUT
> procfs support at all (of any type). Nice for embedded systems
> especially.
1) My example was implemented as a filesystem. You could just as easily have
a CONFIG_PROC_SYSCALL which implemented access as a syscall, ie. sysctl2().
2) It's not worth the hassle to save 7k of code (well, the final implementation
will be larger than this, but OTOH, your replacement will be non-zero size).
> AFAICS your proposal, while nice and clean :), doesn't offer all the
> features that sysctl presently does.
You're right! My code:
1) Doesn't have the feature of requiring #ifdef CONFIG_SYSCTL in every file
that uses it properly (ie. checks error returns).
2) Doesn't have the feature that compiling without CONFIG_PROC/CONFIG_SYSCTL
wastes kernel memory unless surrounded by above #ifdefs.
3) Doesn't have the feature that it takes over 90 lines to implement a working
read & write.
4) Doesn't have the feature that it's hard to create dynamic directories.
5) Doesn't have the feature that it's inherently racy against module unload.
What was I thinking????
Rusty.
On Fri Nov 02, 2001 at 12:42:52PM +1100, Rusty Russell wrote:
> On Thu, 01 Nov 2001 05:42:36 -0500
> Jeff Garzik <[email protected]> wrote:
>
> > Is this designed to replace sysctl?
>
> Well, I'd suggest replacing *all* the non-process stuff in /proc. Yes.
As I've thought about this in the past, I realized that /proc
is serving two purposes. It is exporting the list of processes,
and it is also used to export kernel and driver information.
What we really need is for procfs to be just process stuff, and the
creation of a separate kernelfs nodev filesystem though which
the kernel can share all the gory details about the hardware,
drivers, phase of the moon, etc. Since these serve two
fundamentally different tasks, doesn't it make sense to split
them into two separate filesystems?
-Erik
--
Erik B. Andersen http://codepoet-consulting.com/
--This message was written using 73% post-consumer electrons--
On Thu, 1 Nov 2001 13:06:00 +0100
Tim Jansen <[email protected]> wrote:
> On Thursday 01 November 2001 11:32, Rusty Russell wrote:
> > I believe that rewriting /proc (and /proc/sys should simply die) is a
> > better solution than extending the interface, or avoiding it
> > altogether by using a new filesystem.
>
> I am currently working on something like this, too. It's using Patrick
> Mochel's driverfs patch
> (http://www.kernel.org/pub/linux/kernel/people/mochel/device/driverfs.diff-1030)
> as a base and adds the functionality of the extensions that I did to proc fs
> for my device registry patch
> (http://www.tjansen.de/devreg/proc_ov-2.4.7.diff).
Hi Tim!
Firstly: obviously, I think that work on /proc is a worthy and excellent
thing to be doing: everyone has been complaining about it since its introduction
(for good reason).
I'm not sure about such explicit typing: see my patch (the existing types are
only for convenience: you can trivially supply your own). I agree with the
"one file, one value" idea. I also went for dynamic directories for those who
don't want to continually register/deregister.
I suggest you read my patch 8)
Rusty.
On Fri, 2 Nov 2001, Rusty Russell wrote:
> On Thu, 01 Nov 2001 05:42:36 -0500
> Jeff Garzik <[email protected]> wrote:
>
> > Is this designed to replace sysctl?
>
> Well, I'd suggest replacing *all* the non-process stuff in /proc. Yes.
Aha. Like, say it, /proc/kcore. Or /proc/mounts, yodda, yodda.
Noble idea, but there is a little problem: random massive userland
breakage. E.g. changing /proc/mounts is going to hit getmntent(3), etc.
If you are willing to audit all userland code - you are welcome.
But keep in mind that standard policy is to keep obsolete API for at least
one stable branch with warnings and remove it in the next one. So we are
talking about 2.8 here. BTW, I'm less than sure that your variant is free
of rmmod races, but that's a separate story...
Alexander Viro wrote:
>
> On Fri, 2 Nov 2001, Rusty Russell wrote:
>
> > On Thu, 01 Nov 2001 05:42:36 -0500
> > Jeff Garzik <[email protected]> wrote:
> >
> > > Is this designed to replace sysctl?
> >
> > Well, I'd suggest replacing *all* the non-process stuff in /proc. Yes.
>
> Aha. Like, say it, /proc/kcore. Or /proc/mounts, yodda, yodda.
>
> Noble idea, but there is a little problem: random massive userland
> breakage. E.g. changing /proc/mounts is going to hit getmntent(3), etc.
>
> If you are willing to audit all userland code - you are welcome.
> But keep in mind that standard policy is to keep obsolete API for at least
> one stable branch with warnings and remove it in the next one. So we are
> talking about 2.8 here. BTW, I'm less than sure that your variant is free
> of rmmod races, but that's a separate story...
Bull shit. Standard policy is currently to keep crude old
interfaces until no end of time. Here are some examples:
/proc/meminfo
total: used: free: shared: buffers: cached:
Mem: 196005888 60133376 135872512 0 3280896 31088640
Swap: 410255360 0 410255360
MemTotal: 191412 kB
MemFree: 132688 kB
MemShared: 0 kB
Buffers: 3204 kB
The first lines could have gone 2 years ago.
/proc/ksyms - this is duplicating a system call (and making stuff easier
for intrusors)
/proc/modules - same as /proc/ksysms - entierly unneccessary and
obsolete,
since 3 years!
And so on and so on...
Erik Andersen wrote:
> On Fri Nov 02, 2001 at 12:42:52PM +1100, Rusty Russell wrote:
>
>>On Thu, 01 Nov 2001 05:42:36 -0500
>>Jeff Garzik <[email protected]> wrote:
>>
>>
>>>Is this designed to replace sysctl?
>>>
>>Well, I'd suggest replacing *all* the non-process stuff in /proc. Yes.
>>
>
> As I've thought about this in the past, I realized that /proc
> is serving two purposes. It is exporting the list of processes,
> and it is also used to export kernel and driver information.
>
> What we really need is for procfs to be just process stuff, and the
> creation of a separate kernelfs nodev filesystem though which
> the kernel can share all the gory details about the hardware,
> drivers, phase of the moon, etc. Since these serve two
> fundamentally different tasks, doesn't it make sense to split
> them into two separate filesystems?
>
> -Erik
Well the way I look @ it is that /proc should be the
only interface between kernel and user space, and therefore
a better name would be /kernel. I know this is not going
to happen because of all the userspace dependencies and
also probably too Plan9esque, but it's the right direction IMHO.
The process information you refer to is KERNEL data and
therefore "other" kernel data should not be split from the /proc
hierarchy. However as said above a better name would be
/kernel and it should be organised better.
Padraig.
On Fri, 2 Nov 2001, Martin Dalecki wrote:
> Bull shit. Standard policy is currently to keep crude old
> interfaces until no end of time. Here are some examples:
[snip]
Again, standard procedure for removal of user-visible API:
* next devel and following stable branch - use of that API is
possible but produces a warning
* devel branch after that - API removed.
The fact that nobody had even started that with procfs is a separate story.
But no matter what user-visible API changes we start now, the earliest point
when the old stuff can be removed is 2.7.
In article <[email protected]>,
Alexander Viro <[email protected]> wrote:
>On Fri, 2 Nov 2001, Rusty Russell wrote:
>
>> On Thu, 01 Nov 2001 05:42:36 -0500
>> Jeff Garzik <[email protected]> wrote:
>>
>> > Is this designed to replace sysctl?
>>
>> Well, I'd suggest replacing *all* the non-process stuff in /proc. Yes.
>
>Aha. Like, say it, /proc/kcore. Or /proc/mounts, yodda, yodda.
Well in 2.5 union mounts are going to go in right? Then you could
have a compatibility "proc-compat" filesystem that reads data from
/kernel and supplies it in backwards compatible formats such as
/proc/mounts, that you union-mount over /proc
And in 2.7, rm -rf linux/fs/proc-compat
Mike.
--
"Only two things are infinite, the universe and human stupidity,
and I'm not sure about the former" -- Albert Einstein.
On Friday 02 November 2001 03:20, Rusty Russell wrote:
> I'm not sure about such explicit typing: see my patch (the existing types
> are only for convenience: you can trivially supply your own). I agree with
> the "one file, one value" idea. I also went for dynamic directories for
> those who don't want to continually register/deregister.
Explicit typing has a few advantages for the user. User-space apps could use
a ioctl to get the type (and for enums the possible values, for integers
maybe a value range). Then you can write some program that shows the user the
possible values of each file, so you don't have to keep them in mind. And you
can easily write a GUI administration tool that allows you to modify kernel
and driver parameters.
It would also make it possible to convert the content of the filesystem into
another format, for example you could automatically generate a XML Schema
definition. IMHO persistence is a desirable feature for the editable files.
bye...
On Fri, 02 Nov 2001 13:39:29 +0100,
Martin Dalecki <[email protected]> wrote:
>Bull shit. Standard policy is currently to keep crude old
>interfaces until no end of time. Here are some examples:
>...
>/proc/ksyms - this is duplicating a system call (and making stuff easier
>for intrusors)
Anybody can issue syscall query_module. Removing /proc/ksyms just
forces users to run an executable or Perl syscall(). You have not
improved security and you have made it harder to report and diagnose
problems.
Keith Owens wrote:
>
> On Fri, 02 Nov 2001 13:39:29 +0100,
> Martin Dalecki <[email protected]> wrote:
> >Bull shit. Standard policy is currently to keep crude old
> >interfaces until no end of time. Here are some examples:
> >...
> >/proc/ksyms - this is duplicating a system call (and making stuff easier
> >for intrusors)
>
> Anybody can issue syscall query_module. Removing /proc/ksyms just
> forces users to run an executable or Perl syscall(). You have not
> improved security and you have made it harder to report and diagnose
> problems.
Talking about reality:
Having perl on the box, or having to upload some special purpose
application on the box are both measures not that easy if you are
going to do a real breakin. (Read: write some buffer overflow stub)
But just echo sum stuff or therelike is
*much* easier. And then there is the capability stuff you could use
to prevent everybody from accessing the syscall interface.
You don't have much expierence with real break-ins. Don't you?
On Saturday 03 November 2001 00:31, you wrote:
> Hmm, I'd argue that a GUI tool would be fairly useless without knowing what
> the values meant anwyay, to give help, in which case you might as well know
> the types.
Take, as an example, the compression module parameter of the PWC (Philips
Webcam) driver. Currently you can specify a value between 0 for uncompressed
and 3 for high compression. If a GUI shows me that only values between 0 and
3 are allowed I could guess that I have to enter "3" for high compression
without searching for the documentation. It would be even better if I could
select four strings, "none", "low", "medium" and "high".
I do see the advantages of using strings in proc, and maybe there is another
solution: keep the type information out of the proc filesystem and save it
in a file similar to Configure.help, together with a description for a file.
I just don't know how to ensure that they are in sync.
bye...
In a message dated 11/3/01 6:47:18 AM Eastern Standard Time, [email protected]
writes:
> On Saturday 03 November 2001 00:31, you wrote:
> > Hmm, I'd argue that a GUI tool would be fairly useless without knowing
> what
> > the values meant anwyay, to give help, in which case you might as well
> know
> > the types.
>
> Take, as an example, the compression module parameter of the PWC (Philips
> Webcam) driver. Currently you can specify a value between 0 for
uncompressed
>
> and 3 for high compression. If a GUI shows me that only values between 0
and
>
> 3 are allowed I could guess that I have to enter "3" for high compression
> without searching for the documentation. It would be even better if I
could
> select four strings, "none", "low", "medium" and "high".
>
> I do see the advantages of using strings in proc, and maybe there is
another
>
> solution: keep the type information out of the proc filesystem and save it
> in a file similar to Configure.help, together with a description for a
file.
>
> I just don't know how to ensure that they are in sync.
>
It would always be possible to build a front end to the /proc file
system.
Joachim Martillo
On Sat, 3 Nov 2001 [email protected] wrote:
> > solution: keep the type information out of the proc filesystem and save it
> > in a file similar to Configure.help, together with a description for a
> file.
> It would always be possible to build a front end to the /proc file
> system.
The approach taken by powertweak (http://www..powertweak.org) was to
use an XML description of /proc to define types etc. It's worked out
quite well, and has the added advantage of being buzzword compliant 8)
Dave.
--
| Dave Jones. http://www.codemonkey.org.uk
| SuSE Labs
On Saturday 03 November 2001 14:06, [email protected] wrote:
> > I just don't know how to ensure that they are in sync.
> It would always be possible to build a front end to the /proc file
> system.
Yes, but we are talking about a file system that is used to configure ALL
drivers. So these descriptions should be written by the authors and
distributed with the kernel.
bye...
On Sat, 3 Nov 2001 12:47:08 +0100
Tim Jansen <[email protected]> wrote:
> I do see the advantages of using strings in proc, and maybe there is another
> solution: keep the type information out of the proc filesystem and save it
> in a file similar to Configure.help, together with a description for a file.
> I just don't know how to ensure that they are in sync.
The same argument applies for module parameters when they become boot parameters
(handwave reference to my previous patch). IMHO we should use a source-strainer
like the current Documentation/DocBook/ stuff does to extract these and consolidate
them.
Cheers,
Rusty.
On November 2, 2001 03:20 am, Rusty Russell wrote:
> I agree with the "one file, one value" idea.
So cat /proc/partitions goes from being a nice, easy to read and use human
interface to something other than that. Lets not go overboard.
--
Daniel
On Sun, Nov 04, 2001 at 02:40:51AM +0100, Daniel Phillips wrote:
> On November 2, 2001 03:20 am, Rusty Russell wrote:
> > I agree with the "one file, one value" idea.
>
> So cat /proc/partitions goes from being a nice, easy to read and use human
> interface to something other than that. Lets not go overboard.
/proc is usually a very nice interface that's both human- and machine-readable.
Some changes have gone in though (such as /proc/mdstat) that makes the proc
files implement something more like a pretty-printing user interface with
text-mode progress bars and what not. That's a PITA to parse.
Now, if established files in proc could just be stable, so that they would not
change unless non-backwards-compatible information absolutely must be
presented, that would be a major step in the right direction. Further, if we
could find some acceptable compromise between human- and machine- readability,
as has happened in the past...
Then, someone might just implement the equivalent of kstat (from Solaris) or
pstat (from HP-UX). Under a license so that commercial players could actually
link to the library as well (unlike the gproc library).
So call me a dreamer ;)
(For the record, it's not unlikely that I would be able to dedicate some
time to that effort in a not too distant future - say, 2.5 ?)
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
Martin Dalecki writes:
> Bull shit. Standard policy is currently to keep crude old
> interfaces until no end of time. Here are some examples:
>
> /proc/meminfo
> total: used: free: shared: buffers: cached:
> Mem: 196005888 60133376 135872512 0 3280896 31088640
> Swap: 410255360 0 410255360
> MemTotal: 191412 kB
> MemFree: 132688 kB
> MemShared: 0 kB
> Buffers: 3204 kB
>
> The first lines could have gone 2 years ago.
Kill them in the 2.5.0 kernel.
> /proc/ksyms - this is duplicating a system call (and making stuff
> easier for intrusors)
This is still used by procps.
On Sunday 04 November 2001 03:08, Jakob ?stergaard wrote:
> Now, if established files in proc could just be stable, so that they would
> not change unless non-backwards-compatible information absolutely must be
> presented, that would be a major step in the right direction. Further, if
> we could find some acceptable compromise between human- and machine-
> readability, as has happened in the past...
The problem is that it is almost impossible to offer human-readable
interfaces that will be backward-compatible. As soon as you have a
well-formatted output, like /proc/partitions, you can not add a new field
without breaking user-space applications.
What you could do is to establish rules for files like /proc/partitions ("if
there are more than 4 space-separated alphanumeric strings per line in
/proc/partitions then ignore the additional fields"), but you won't find such
a rule that is useful for every file and still offers a nice human-readable
format. And it will be quite hard to be sure that everybody really sticks to
these rules. Alternatively you could use a semi-human-readable format like
XML, which several people have proposed, but it seemed like almost nobody
liked it.
IMHO there shouldn't be any 'presentation logic' in the kernel. If you need
the things in a human-friendly format, write a 3 line shell script:
for I in `ls -d /proc/partitions/*` ; do
echo `cat $I/major` `cat $I/minor` `cat $I/blocks` `cat $I/name`
done
bye...
On Sun, Nov 04, 2001 at 01:30:06PM +0100, Tim Jansen wrote:
> The problem is that it is almost impossible to offer human-readable
> interfaces that will be backward-compatible. As soon as you have a
> well-formatted output, like /proc/partitions, you can not add a new field
> without breaking user-space applications.
> What you could do is to establish rules for files like /proc/partitions ("if
> there are more than 4 space-separated alphanumeric strings per line in
> /proc/partitions then ignore the additional fields"), but you won't find such
> a rule that is useful for every file and still offers a nice human-readable
> format.
Certainly you can further fields without breaking (well-written) apps. That's
what the first line in /proc/partitions is for. When adding a new column,
you also give it a new tag in the header. Ask RedHat how many apps broke
when they started patching sard into their kernels.
Adding new fields is even easier with /proc/stat-style key:value pairs. Both
styles are human- as well as machine readable. Problems only arise when
someone changes the semantics of a certain field without changing the tag.
But luckily these kinds of changes never happen in a stable kernel series...
Regards,
Daniel.
On Sunday 04 November 2001 14:36, Daniel Kobras wrote:
> Certainly you can further fields without breaking (well-written) apps.
> That's what the first line in /proc/partitions is for. When adding a new
> column, you also give it a new tag in the header. Ask RedHat how many apps
> broke when they started patching sard into their kernels.
The format won't help you when you have strings with whitespace or if you
want to export a list for each partition.
> Adding new fields is even easier with /proc/stat-style key:value pairs.
> Both styles are human- as well as machine readable. Problems only arise
> when someone changes the semantics of a certain field without changing the
> tag. But luckily these kinds of changes never happen in a stable kernel
> series...
I don't think that this format is very user friendly, and it has the same
limitations as /proc/partitions.
The problem is not that it is impossible to invent a new format for every
file. The problem is that you need a different format for each file.
bye...
Here's my stab at the problems - please comment,
We want to avoid these problems:
1) It is hard to parse (some) /proc files from userspace
2) As /proc files change, parsers must be changed in userspace
Still, we want to keep on offering
3) Human readable /proc files with some amount of pretty-printing
4) A /proc fs that can be changed as the kernel needs those changes
Taking care of (3) and (4):
Maintaining the current /proc files is very simple, and it offers the system
administrator a lot of functionality that isn't reasonable to take away now.
* They should stay in a form close to the current one *
Taking care of (1) and (2):
For each file "f" in /proc, there will be a ".f" file which is a
machine-readable version of "f", with the difference that it may contain extra
information that one may not want to present to the user in "f".
The dot-proc file is basically a binary encoding of Lisp (or XML), e.g. it is a
list of elements, wherein an element can itself be a list (or a character string,
or a host-native numeric type. Thus, (key,value) pairs and lists thereof are
possible, as well as tree structures etc.
All data types are stored in the architecture-native format, and a simple
library should be sufficient to parse any dot-proc file.
So, we need a small change in procfs that does not in any way break
compatibility - and we need a few lines of C under LGPL to interface with it.
Tell me what you think - It is possible that I could do this (or something
close) in the near future, unless someone shows me the problem with the
approach.
Thank you,
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, Nov 04, 2001 at 04:33:54PM +0100, Jakob ?stergaard wrote:
> For each file "f" in /proc, there will be a ".f" file which is a
> machine-readable version of "f", with the difference that it may contain extra
> information that one may not want to present to the user in "f".
>
> The dot-proc file is basically a binary encoding of Lisp (or XML), e.g. it is a
> list of elements, wherein an element can itself be a list (or a character string,
> or a host-native numeric type. Thus, (key,value) pairs and lists thereof are
> possible, as well as tree structures etc.
>
> All data types are stored in the architecture-native format, and a simple
> library should be sufficient to parse any dot-proc file.
Hmmmm. If someone would be able to implement new architecture which can
provide 1:1 sysctl/procfs support, there would be need for user space
programs parse proc filesystem. Then, /proc would be only good to administrators
to echo to/cat entries. So compatibility with old design can remain, and
new programs would be able to use the much more versatile sysctl support.
OK, it's a hard guess only. ;-)
- Gabor
On November 4, 2001 04:33 pm, Jakob ?stergaard wrote:
> Here's my stab at the problems - please comment,
>
> We want to avoid these problems:
> 1) It is hard to parse (some) /proc files from userspace
> 2) As /proc files change, parsers must be changed in userspace
>
> Still, we want to keep on offering
> 3) Human readable /proc files with some amount of pretty-printing
> 4) A /proc fs that can be changed as the kernel needs those changes
>
>
> Taking care of (3) and (4):
>
> Maintaining the current /proc files is very simple, and it offers the system
> administrator a lot of functionality that isn't reasonable to take away
now.
>
> * They should stay in a form close to the current one *
>
>
> Taking care of (1) and (2):
>
> For each file "f" in /proc, there will be a ".f" file which is a
> machine-readable version of "f", with the difference that it may contain
extra
> information that one may not want to present to the user in "f".
>
> The dot-proc file is basically a binary encoding of Lisp (or XML), e.g. it
is a
> list of elements, wherein an element can itself be a list (or a character
string,
> or a host-native numeric type. Thus, (key,value) pairs and lists thereof
are
> possible, as well as tree structures etc.
>
> All data types are stored in the architecture-native format, and a simple
> library should be sufficient to parse any dot-proc file.
>
>
> So, we need a small change in procfs that does not in any way break
> compatibility - and we need a few lines of C under LGPL to interface with
it.
>
> Tell me what you think - It is possible that I could do this (or something
> close) in the near future, unless someone shows me the problem with the
> approach.
>
> Thank you,
While the basic idea is attractive for a number of reasons, there are more
than a few questions to answer. Take a look at a typical proc function,
meminfo_read_proc for example. Its active ingredient is basically a sprintf
function:
len += sprintf(page+len,
"MemTotal: %8lu kB\n"
"MemFree: %8lu kB\n"
"MemShared: %8lu kB\n"
...,
K(i.totalram),
K(i.freeram),
K(i.sharedram),
...);
What does the equivalent look like under your scheme? Does it remain
localized in one proc routine, or does it get spread out over a few
locations, possibibly with a part of the specification outside the
kernel? Do the titles end up in your dotfile? How do you specify whatever
formatting is necessary to transform a dotfile into normal /proc output? Is
this transformation handled in user space or the kernel? How much library
support is needed?
--
Daniel
On Sunday 04 November 2001 16:33, you wrote:
> Maintaining the current /proc files is very simple, and it offers the
> system administrator a lot of functionality that isn't reasonable to take
> away now.
> * They should stay in a form close to the current one *
I doubt that it is worthwhile to keep them in the current form for any other
reason than compatibility (with existing software and people's habits).
It doesn't make sense to describe things in 200 different formats, you won't
help anybody with that. It also violates the good old principle of keeping
policy out of the kernel. And, for me, layout is clearly policy.
The reason for proc's popularity is clearly that you can use any tool, from
cat over more/less to the text editor of choice, and read the files. There
should be ways to achieve this without putting things into the kernel. Is
there is a way to implement a filesystem in user-space? What you could do is
to export the raw data using single-value-files, XML or whatever and then
provide an emulation of the old /proc files and possibly new ones in user
space. This could be as simple as writing a shell-script for each emulated
file.
> The dot-proc file is basically a binary encoding of Lisp (or XML), e.g. it
> is a list of elements, wherein an element can itself be a list (or a
Why would anybody want a binary encoding?
It needs special parsers and will be almost impossible to access from shell
scripts.
bye...
On November 4, 2001 05:45 pm, Tim Jansen wrote:
> > The dot-proc file is basically a binary encoding of Lisp (or XML), e.g. it
> > is a list of elements, wherein an element can itself be a list (or a
>
> Why would anybody want a binary encoding?
Because they have a computer?
> It needs special parsers and will be almost impossible to access from shell
> scripts.
No, look, he's proposing to put the binary encoding in hidden .files. The
good old /proc files will continue to appear and operate as they do now.
--
Daniel
On Sun, Nov 04, 2001 at 05:31:50PM +0100, Daniel Phillips wrote:
...
> While the basic idea is attractive for a number of reasons, there are more
> than a few questions to answer. Take a look at a typical proc function,
> meminfo_read_proc for example. Its active ingredient is basically a sprintf
> function:
>
> len += sprintf(page+len,
> "MemTotal: %8lu kB\n"
> "MemFree: %8lu kB\n"
> "MemShared: %8lu kB\n"
> ...,
> K(i.totalram),
> K(i.freeram),
> K(i.sharedram),
> ...);
>
> What does the equivalent look like under your scheme?
Well, in my previous mail I gave a vague description of the "media" but
the actual semantics of the informaiton.
I *suppose* we would represent the above like:
( ("version", 1),
("totalram", 1123412),
("freeram", 1243),
("sharedram", 234) )
This would be encoded in some simple binary form. Let's not get hung up in
those details for now.
Your example was very simple, and my solution is simple too - a list of
key/value pairs. But my scheme will allow for tree structures, longer
lists, etc. etc. too.
The API would probably be something like (I have 45 seconds of thought behind
this API so don't consider this a final draft)
dp_list * itall = new_dp_list();
add_dp_key(itall, "version", new_dp_int(1));
add_dp_key(itall, "totalram", new_dp_int(i.totalram));
add_dp_key(itall, "freeram", new_dp_int(i.freeram));
add_dp_key(itall, "sharedram", new_dp_int(i.sharedram));
len += dp_commit(page+len, itall);
dp_list * new_dp_list(void)
Creates a new empty list
void add_dp_key(dp_list*, const char*, dp_element*)
Creates a two-element list, filling in the two elements
with a string and a value element of arbitrary type.
This list is appended as the new last element in the
list given as the first argument.
int dp_commit(char*, dp_list*)
Will encode the list argument into the buffer given,
and free the entire list.
---
Now I agree that this API hurts the eyes already now - it should
not require a myriad of small structures with pointers to everywhere
to be allocated and de-allocated. I can do better than this,
trust me, but I can't do that until tomorrow ;)
> Does it remain
> localized in one proc routine, or does it get spread out over a few
> locations, possibibly with a part of the specification outside the
> kernel? Do the titles end up in your dotfile? How do you specify whatever
> formatting is necessary to transform a dotfile into normal /proc output? Is
> this transformation handled in user space or the kernel? How much library
> support is needed?
Wherever you have a routine for filling out a proc file, you'd add a
routine for filling out the dot-proc file.
Simple. Keeping the routines together makes it more likely that the
maintainers and occational hackers will keep the two fairly well in sync.
(remember - this scheme does not *require* anything to be in sync though)
The dot-proc file does not generate the normal proc output. Let the pretty
printing happen as it does today - changing a semicolon anywhere near an
existing proc routine is going to break half a gazillion programs in userland
anyway.
The *only* library support you need is
1) Parsing of the dot-proc files into structures convenient for the
language at hand (C first).
2) Handling of the resulting structures, eg. allocation and de-allocation,
if this is at all necessary - it may not be...
I'm not pushing style-sheets into the kernel :)
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, Nov 04, 2001 at 06:28:47PM +0100, Daniel Phillips wrote:
> On November 4, 2001 05:45 pm, Tim Jansen wrote:
> > > The dot-proc file is basically a binary encoding of Lisp (or XML), e.g. it
> > > is a list of elements, wherein an element can itself be a list (or a
> >
> > Why would anybody want a binary encoding?
>
> Because they have a computer?
Yes - good reason :)
The "fuzzy parsing" userland has to do today to get useful information
out of many proc files today is not nice at all. It eats CPU, it's
error-prone, and all in all it's just "wrong".
However - having a human-readable /proc that you can use directly with
cat, echo, your scripts, simple programs using read(), etc. is absolutely
a *very* cool feature that I don't want to let go. It is just too damn
practical.
But building a piece of software that needs to reliably read out status
information from a system providing something more and more resembling a GUI in
text-files is becoming unnecessarily time-consuming and error-prone.
>
> > It needs special parsers and will be almost impossible to access from shell
> > scripts.
>
> No, look, he's proposing to put the binary encoding in hidden .files. The
> good old /proc files will continue to appear and operate as they do now.
>
Exactly.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, Nov 04, 2001 at 05:45:45PM +0100, Tim Jansen wrote:
> On Sunday 04 November 2001 16:33, you wrote:
> > Maintaining the current /proc files is very simple, and it offers the
> > system administrator a lot of functionality that isn't reasonable to take
> > away now.
> > * They should stay in a form close to the current one *
>
> I doubt that it is worthwhile to keep them in the current form for any other
> reason than compatibility (with existing software and people's habits).
It's an essential feature for *many* sysadmins. It's just so *easy* to hack
up a script to act on the information in some file - or to take a look with
"cat" to see how you RAID resync is coming along.
> It doesn't make sense to describe things in 200 different formats, you won't
> help anybody with that. It also violates the good old principle of keeping
> policy out of the kernel. And, for me, layout is clearly policy.
User-readable, and machine-readable. I think that covers everything. And
that's two formats.
Where's the policy ? The only policy I see is the text-mode GUI in the
existing proc interface - and that is one place where I actually *like* the
policy as a user (sysadmin), but hate it as an application programmer.
>
> The reason for proc's popularity is clearly that you can use any tool, from
> cat over more/less to the text editor of choice, and read the files.
That's the reason why I want to keep the old proc files.
> There
> should be ways to achieve this without putting things into the kernel. Is
> there is a way to implement a filesystem in user-space? What you could do is
> to export the raw data using single-value-files, XML or whatever and then
> provide an emulation of the old /proc files and possibly new ones in user
> space. This could be as simple as writing a shell-script for each emulated
> file.
You're proposing a replacement of /proc ?
My proposal was at least intended to be very simple and non-intrusive.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
Jakob ?stergaard wrote:
>
> On Sun, Nov 04, 2001 at 06:28:47PM +0100, Daniel Phillips wrote:
> > On November 4, 2001 05:45 pm, Tim Jansen wrote:
> > > > The dot-proc file is basically a binary encoding of Lisp (or XML), e.g. it
> > > > is a list of elements, wherein an element can itself be a list (or a
> > >
> > > Why would anybody want a binary encoding?
> >
> > Because they have a computer?
>
> Yes - good reason :)
>
> The "fuzzy parsing" userland has to do today to get useful information
> out of many proc files today is not nice at all. It eats CPU, it's
> error-prone, and all in all it's just "wrong".
>
> However - having a human-readable /proc that you can use directly with
> cat, echo, your scripts, simple programs using read(), etc. is absolutely
> a *very* cool feature that I don't want to let go. It is just too damn
> practical.
>
> But building a piece of software that needs to reliably read out status
> information from a system providing something more and more resembling a GUI in
> text-files is becoming unnecessarily time-consuming and error-prone.
>
> >
> > > It needs special parsers and will be almost impossible to access from shell
> > > scripts.
> >
> > No, look, he's proposing to put the binary encoding in hidden .files. The
> > good old /proc files will continue to appear and operate as they do now.
> >
>
> Exactly.
>
> --
> ................................................................
> : [email protected] : And I see the elder races, :
> :.........................: putrid forms of man :
> : Jakob ?stergaard : See him rise and claim the earth, :
> : OZ9ABN : his downfall is at hand. :
> :.........................:............{Konkhra}...............:
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
A good reason could be that a simple ps -aux uses hundreds of system
calls to get the list of all the processes ...
--
SpaceWalker
[email protected]
ICQ 36157579
On Sun, Nov 04, 2001 at 06:41:59PM +0100, Jakob ?stergaard wrote:
> The "fuzzy parsing" userland has to do today to get useful information
> out of many proc files today is not nice at all. It eats CPU, it's
> error-prone, and all in all it's just "wrong".
This is because the files are human-readable, nothing to do with binary vs. plain
text. proc should be made (entirely ?) of value-per-file trees, and a back-compat
compatprocfs union mounted for the files people and programs are expecting.
> However - having a human-readable /proc that you can use directly with
> cat, echo, your scripts, simple programs using read(), etc. is absolutely
> a *very* cool feature that I don't want to let go. It is just too damn
> practical.
I don't see that it's at all useful: it just makes life harder. You yourself
state above that read(2) parsing of human readable files is "not nice at all",
and now you're saying it is "just too damn practical".
Just drop the human-readable stuff from the new /proc, please.
In what way is parsing /proc/meminfo in a script more practical than
cat /proc/meminfo/total ?
> > No, look, he's proposing to put the binary encoding in hidden .files. The
> > good old /proc files will continue to appear and operate as they do now.
> >
>
> Exactly.
This just seems needless duplication, and fragile. Representing things as directory
hierarchies and single-value files in text seems to me to be much nicer, just as
convenient, and much nicer for fs/proc/ source...
IMHO
john
--
"All this just amounts to more grist for the mill of the ill."
- Elizabeth Wurtzel
On Sun, Nov 04, 2001 at 06:48:39PM +0100, Jakob ?stergaard wrote:
> Where's the policy ? The only policy I see is the text-mode GUI in the
> existing proc interface
well exactly
> - and that is one place where I actually *like* the
> policy as a user (sysadmin)
"meminfo" versus "cat /proc/meminfo" is not too great a leap in 2.7, when
the back-compat stuff gets dropped.
regards
john
--
"All this just amounts to more grist for the mill of the ill."
- Elizabeth Wurtzel
On Sunday 04 November 2001 18:28, Daniel Phillips wrote:
> > It needs special parsers and will be almost impossible to access from
> > shell scripts.
> No, look, he's proposing to put the binary encoding in hidden .files. The
> good old /proc files will continue to appear and operate as they do now.
But as he already said:
2) As /proc files change, parsers must be changed in userspace
So if only some programs use the 'dot-files' and the other still use the
crappy text interface we still have the old problem for scripts, only with a
much larger effort.
bye...
On Sunday 04 November 2001 18:41, you wrote:
> The "fuzzy parsing" userland has to do today to get useful information
> out of many proc files today is not nice at all.
I agree, but you dont need a binary format to achieve this. A WELL-DEFINED
format is sufficient. XML is one of them, one-value-files another one. The
"fuzzy parsing" only happens because the files try to be friendly for human
readers.
> It eats CPU, it's error-prone, and all in all it's just "wrong".
How much of your CPU time is spent parsing /proc files?
> However - having a human-readable /proc that you can use directly with
> cat, echo, your scripts, simple programs using read(), etc. is
> absolutely a *very* cool feature that I don't want to let go. It is just
> too damn practical.
You shouldn't use them in scripts because they are likely to break. That's
the whole point. At least not when you want to distribute the scripts to
others. And BTW the one-value-files are much easier to parse for scripts than
any other solution that I have seen so far, including the current /proc
interface.
bye...
On Sun, 4 Nov 2001, Tim Jansen wrote:
> So if only some programs use the 'dot-files' and the other still use the
> crappy text interface we still have the old problem for scripts, only with a
> much larger effort.
Folks, could we please deep-six the "ASCII is tough" mentality? Idea of
native-endian data is so broken that it's not even funny. Exercise:
try to export such thing over the network. Another one: try to use
that in a shell script. One more: try to do it portably in Perl script.
It had been tried. Many times. It had backfired 100 times out 100.
We have the same idiocy to thank for fun trying to move a disk with UFS
volume from Solaris sparc to Solaris x86. We have the same idiocy to
thank for a lot of ugliness in X.
At the very least, use canonical bytesex and field sizes. Anything less
is just begging for trouble. And in case of procfs or its equivalents,
_use_ the_ _damn_ _ASCII_ _representations_. scanf(3) is there for
purpose.
On Sunday 04 November 2001 18:48, you wrote:
> > Is there is a way to implement a filesystem in user-space? What you could
> You're proposing a replacement of /proc ?
I was asking whether there is a way to do compatibility stuff and human
readable interfaces in user space.
bye...
On Sun, Nov 04, 2001 at 05:59:45PM +0000, John Levon wrote:
> On Sun, Nov 04, 2001 at 06:41:59PM +0100, Jakob ?stergaard wrote:
>
> > The "fuzzy parsing" userland has to do today to get useful information
> > out of many proc files today is not nice at all. It eats CPU, it's
> > error-prone, and all in all it's just "wrong".
>
> This is because the files are human-readable, nothing to do with binary vs. plain
> text. proc should be made (entirely ?) of value-per-file trees, and a back-compat
> compatprocfs union mounted for the files people and programs are expecting.
So you want generaiton and parsing of text strings whenever we pass an int from
the kernel ?
>
> > However - having a human-readable /proc that you can use directly with
> > cat, echo, your scripts, simple programs using read(), etc. is absolutely
> > a *very* cool feature that I don't want to let go. It is just too damn
> > practical.
>
> I don't see that it's at all useful: it just makes life harder. You yourself
> state above that read(2) parsing of human readable files is "not nice at all",
> and now you're saying it is "just too damn practical".
cat /proc/mdstat - that's practical !
cat /proc/cpuinfo - equally so
Anyway - I won't involve myself in the argument whether we should keep
the old /proc or not - I wanted to present my idea how we could overcome
some fundamental problems in the existing framework, non-intrusively.
>
> Just drop the human-readable stuff from the new /proc, please.
I don't care enough about it to discuss it now, but I'm sure others do ;)
>
> In what way is parsing /proc/meminfo in a script more practical than
> cat /proc/meminfo/total ?
I see your point.
There's some system overhead when converting text/integer values, but
if you're polling so often I guess you have other problems anyway...
...
>
> This just seems needless duplication, and fragile. Representing things as directory
> hierarchies and single-value files in text seems to me to be much nicer, just as
> convenient, and much nicer for fs/proc/ source...
I like the idea of single-value files.
But then how do we get the nice summary information we have today ?
Hmm... How about:
/proc/meminfo - as it was
/proc/.meminfo/ - as you suggested
That way we keep /proc looking like it was, while offering the very nice
single-value file interface to apps that needs it.
I could even live with text encoding of the values - I just hate not being able
to tell if it's supposed to be i32/u32/i64/u64/float/double/... from looking
at the variable. Type-less interfaces with implicitly typed values are
*evil*.
I'd love to have type information passed along with the value. Of course
you could add a "f"_t file for each "f", and handle eventual discrepancies
at run-time in your application.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, 4 Nov 2001, Tim Jansen wrote:
> On Sunday 04 November 2001 18:41, you wrote:
> > The "fuzzy parsing" userland has to do today to get useful information
> > out of many proc files today is not nice at all.
>
> I agree, but you dont need a binary format to achieve this. A WELL-DEFINED
> format is sufficient. XML is one of them, one-value-files another one. The
And thinking before defining an API is the third. Guess why XML is so
popular alternative...
On Sun, Nov 04, 2001 at 07:27:16PM +0100, Tim Jansen wrote:
> On Sunday 04 November 2001 18:41, you wrote:
> > The "fuzzy parsing" userland has to do today to get useful information
> > out of many proc files today is not nice at all.
>
> I agree, but you dont need a binary format to achieve this. A WELL-DEFINED
> format is sufficient. XML is one of them, one-value-files another one. The
> "fuzzy parsing" only happens because the files try to be friendly for human
> readers.
You need syntax or "transport", and then you need semantics. My approach
is identical to XML except it doesn't give you kilobytes of human-unreadable
text.
You could use text, with binary you save the extra conversions along with
errors from parsers or bad use of sscanf()/sprintf()/... K.I.S.S. :)
I see a good point in using one-value-files though, except I think there's
some type information missing.
>
>
> > It eats CPU, it's error-prone, and all in all it's just "wrong".
>
> How much of your CPU time is spent parsing /proc files?
[albatros:joe] $ time vmstat 1
procs memory swap io system cpu
r b w swpd free buff cache si so bi bo in cs us sy id
0 0 0 113908 3184 1892 130584 1 1 3 3 61 43 9 5 86
1 0 0 113908 3064 1896 130700 0 0 8 0 2301 1148 8 2 90
0 0 0 113908 3064 1896 130700 0 0 0 0 2026 893 7 2 91
0 0 0 113908 3064 1896 130700 0 0 0 0 1877 829 3 4 93
0 0 0 113908 3068 1896 130696 0 0 0 0 1946 942 5 3 92
0 0 0 113908 3072 1896 130696 0 0 0 0 2009 1034 7 5 88
0 0 0 113908 3064 1896 130704 0 0 0 0 3706 2336 4 5 90
0 0 0 113908 3064 1900 130688 0 0 0 0 2341 1671 10 3 87
0 0 0 113908 3064 1900 130736 0 0 0 0 2431 1869 15 5 79
2 0 0 113908 3064 1900 130764 0 0 0 88 2346 1440 12 3 85
^C
real 0m9.486s
user 0m0.070s
sys 0m0.120s
[albatros:joe] $
A *very* simple program (top is probably a lot worse!) uses 1% on my Dual 1.4
GHz Athlon.
Those TEN lines of status output cost me 336 MILLION clock cycles.
>
>
> > However - having a human-readable /proc that you can use directly with
> > cat, echo, your scripts, simple programs using read(), etc. is
> > absolutely a *very* cool feature that I don't want to let go. It is just
> > too damn practical.
>
> You shouldn't use them in scripts because they are likely to break. That's
> the whole point. At least not when you want to distribute the scripts to
> others. And BTW the one-value-files are much easier to parse for scripts than
> any other solution that I have seen so far, including the current /proc
> interface.
I agree that there's some really good points in using single-value files.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, 4 Nov 2001, [iso-8859-1] Jakob ?stergaard wrote:
> So you want generaiton and parsing of text strings whenever we pass an int from
> the kernel ?
"scanf is tough" --- programmer Barbie...
On Sun, Nov 04, 2001 at 07:20:39PM +0100, Tim Jansen wrote:
> On Sunday 04 November 2001 18:28, Daniel Phillips wrote:
> > > It needs special parsers and will be almost impossible to access from
> > > shell scripts.
> > No, look, he's proposing to put the binary encoding in hidden .files. The
> > good old /proc files will continue to appear and operate as they do now.
>
> But as he already said:
> 2) As /proc files change, parsers must be changed in userspace
>
> So if only some programs use the 'dot-files' and the other still use the
> crappy text interface we still have the old problem for scripts, only with a
> much larger effort.
So we have a gradual transition - nothing breaks more than it does already,
and applications can migrate to the stable API over time.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, Nov 04, 2001 at 01:30:38PM -0500, Alexander Viro wrote:
>
>
> On Sun, 4 Nov 2001, Tim Jansen wrote:
>
> > So if only some programs use the 'dot-files' and the other still use the
> > crappy text interface we still have the old problem for scripts, only with a
> > much larger effort.
>
> Folks, could we please deep-six the "ASCII is tough" mentality? Idea of
> native-endian data is so broken that it's not even funny. Exercise:
> try to export such thing over the network. Another one: try to use
> that in a shell script. One more: try to do it portably in Perl script.
So make it network byte order.
How many bugs have you heard of with bad use of sscanf() ?
The counters *are* host specific. Available memory is 32 bits somewhere, 64
other places. That's the world we live in and hiding the difficulties in ASCII
that *can* be parsed so that it only breaks "sometimes" doesn't help the
application developers.
Better to face the facts, and get over it.
> It had been tried. Many times. It had backfired 100 times out 100.
> We have the same idiocy to thank for fun trying to move a disk with UFS
> volume from Solaris sparc to Solaris x86. We have the same idiocy to
> thank for a lot of ugliness in X.
>
> At the very least, use canonical bytesex and field sizes. Anything less
> is just begging for trouble. And in case of procfs or its equivalents,
> _use_ the_ _damn_ _ASCII_ _representations_. scanf(3) is there for
> purpose.
>
scanf can be used wrongly in more ways than the two of us can imagine
together, even if we try.
I disagree with harmonizing field sizes - that doesn't make sense. What's
64 bits today is 128 tomorrow (IPv6 related things, crypto, ...), what
used to fit in 32 is in 64, some places.
Having a library that gives you either compile-time errors if you use it
wrong, or barfs loudly at run-time is one hell of a lot better than having
silent mis-parsing of ASCII values.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, Nov 04, 2001 at 07:34:00PM +0100, Tim Jansen wrote:
> On Sunday 04 November 2001 18:48, you wrote:
> > > Is there is a way to implement a filesystem in user-space? What you could
> > You're proposing a replacement of /proc ?
>
> I was asking whether there is a way to do compatibility stuff and human
> readable interfaces in user space.
Probably.
I'm just trying to:
1) Supplement an unstable "API sort-of-thing" with something
that's stable and can last.
2) Come up with a realistic idea that can be implemented, tested,
and deemed "obviously correct" and "good" in finite time
3) Not break stuff more than it already is, and allow for a gradual
transition to something that won't break mysteriously every ten
kernel releases.
The idea is that if the userland application does it's parsing wrong, it should
either not compile at all, or abort loudly at run-time, instead of getting bad
values "sometimes".
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, Nov 04, 2001 at 01:40:22PM -0500, Alexander Viro wrote:
>
>
> On Sun, 4 Nov 2001, [iso-8859-1] Jakob %stergaard wrote:
> > So you want generaiton and parsing of text strings whenever we pass an int from
> > the kernel ?
>
> "scanf is tough" --- programmer Barbie...
I'm a little scared when our VFS guy claims he never heard of excellent
programmers using scanf in a way that led to parse errors.
/me hopes VFS doesn't use scanf...
Come on Al, if you have real arguments let hear them, if you want to insult
people you gotta do better than that above. :)
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
In article <[email protected]>,
Daniel Phillips <[email protected]> wrote:
>On November 4, 2001 05:45 pm, Tim Jansen wrote:
>> > The dot-proc file is basically a binary encoding of Lisp (or XML), e.g. it
>> > is a list of elements, wherein an element can itself be a list (or a
>>
>> Why would anybody want a binary encoding?
>
>Because they have a computer?
That's a stupid argument.
The computer can parse anything.
It's us _humans_ that are limited at parsing. We like text interfaces,
because that's how we are brought up. We aren't good at binary, and
we're not good at non-linear, "structured" interfaces.
In contrast, a program can be taught to parse the ascii files quite
well, and does not have the inherent limitations we humans have. Sure,
it has _other_ limitations, but /proc being ASCII is sure as hell not
one of them.
In short: /proc is ASCII, and will so remain while I maintain a kernel.
Anything else is stupid.
Handling spaces and newlines is easy enough - see the patches from Al
Viro, for example.
Linus
On November 4, 2001 07:52 pm, Jakob ?stergaard wrote:
> On Sun, Nov 04, 2001 at 01:30:38PM -0500, Alexander Viro wrote:
> > Folks, could we please deep-six the "ASCII is tough" mentality? Idea of
> > native-endian data is so broken that it's not even funny. Exercise:
> > try to export such thing over the network. Another one: try to use
> > that in a shell script. One more: try to do it portably in Perl script.
>
> So make it network byte order.
>
> How many bugs have you heard of with bad use of sscanf() ?
Yes, and it's easy for those to be buffer overflow bugs. The extra security
risk is even more of a reason to avoid ASCII strings in internal interfaces
than the gross overhead. Do the ASCII conversions in user space, please.
No, ASCII isn't tough, it just sucks as an internal transport.
--
Daniel
On Sunday 04 November 2001 19:59, you wrote:
> The idea is that if the userland application does it's parsing wrong, it
> should either not compile at all, or abort loudly at run-time, instead of
> getting bad values "sometimes".
All the XML parser interfaces that I have seen so far allow you to do things
that will cause the code to fail when you do stupid things or are not
prepared that there may appear unknown elements. Or you use a DTD, and then
your code is guaranteed to fail after a change, which may be even worse.
One-value-files are a noticable exception, you must be VERY stupid if your
code breaks because of an additional file.
bye...
On Sun, Nov 04, 2001 at 07:07:53PM +0000, Linus Torvalds wrote:
> In article <[email protected]>,
> Daniel Phillips <[email protected]> wrote:
> >On November 4, 2001 05:45 pm, Tim Jansen wrote:
> >> > The dot-proc file is basically a binary encoding of Lisp (or XML), e.g. it
> >> > is a list of elements, wherein an element can itself be a list (or a
> >>
> >> Why would anybody want a binary encoding?
> >
> >Because they have a computer?
>
> That's a stupid argument.
>
> The computer can parse anything.
>
> It's us _humans_ that are limited at parsing. We like text interfaces,
> because that's how we are brought up. We aren't good at binary, and
> we're not good at non-linear, "structured" interfaces.
>
> In contrast, a program can be taught to parse the ascii files quite
> well, and does not have the inherent limitations we humans have. Sure,
> it has _other_ limitations, but /proc being ASCII is sure as hell not
> one of them.
>
> In short: /proc is ASCII, and will so remain while I maintain a kernel.
> Anything else is stupid.
I agree that it would be stupid *not* to have an ASCII proc.
But why not make a machine-readable /proc as well ?
>
> Handling spaces and newlines is easy enough - see the patches from Al
> Viro, for example.
Obviously none of you have parsed something like:
[albatros:joe] $ cat /proc/mdstat
Personalities : [raid0] [raid1]
read_ahead 1024 sectors
md0 : active raid1 hdc1[1] hda1[0]
51264 blocks [2/2] [UU]
md1 : active raid1 hdc5[1] hda5[0]
10240128 blocks [2/2] [UU]
md2 : active raid0 hdc7[1] hda6[0]
6661184 blocks 64k chunks
unused devices: <none>
[albatros:joe] $
Now this isn't even bad - the fun begins when a resync is running, when
mdstat contains *progress meters* like "[====> ] 42%". While being
nicely readable for a human, this is a parsing nightmare. Especially
because stuff like this changes over time.
The worst thing is, that you'll often see that your parser isn't strict
enough, and therefore won't fail loudly, but rather "mis-parse" the "GUI"
that somehow got put into /proc.
I think it's great to put these things in /proc, but not having a machine
readable form too is stupid, especially because this could be done with
*no* harm to the existing interface, and with very little code.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, Nov 04, 2001 at 08:19:39PM +0100, Tim Jansen wrote:
> On Sunday 04 November 2001 19:59, you wrote:
> > The idea is that if the userland application does it's parsing wrong, it
> > should either not compile at all, or abort loudly at run-time, instead of
> > getting bad values "sometimes".
>
> All the XML parser interfaces that I have seen so far allow you to do things
> that will cause the code to fail when you do stupid things or are not
> prepared that there may appear unknown elements. Or you use a DTD, and then
> your code is guaranteed to fail after a change, which may be even worse.
XML is pretty far from light-weight. And it's only human readable in theory.
I like the *idea*, but XML is the wrong implementation of that idea. Other than
that I think we could agree ;)
>
> One-value-files are a noticable exception, you must be VERY stupid if your
> code breaks because of an additional file.
hehe, agreed. The problem then is type information.
Consider:
-------------
int mf = open("/proc/meminfo/totalmem",O_RDONLY);
int32 mem;
read(mf, &mem, sizeof(mem));
-------------
Does this work ? Yes of course. But what if I ported my program to
a 64 bit arch... The program still compiles. It also runs. But the
values are no longer correct. Now *that* is hell.
Same story with ASCII.
I want type information.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
--On Sunday, 04 November, 2001 8:04 PM +0100 Jakob ?stergaard
<[email protected]> wrote:
> I'm a little scared when our VFS guy claims he never heard of excellent
> programmers using scanf in a way that led to parse errors.
I'd be far more scared if Al claimed he'd never heard of excellent
programmers reading binary formats, compatible between multiple
code revisions both forward and backwards, endian-ness etc., which
had never lead to parse errors of the binary structure.
If you feel it's too hard to write use scanf(), use sh, awk, perl
etc. which all have their own implementations that appear to have
served UNIX quite well for a long while.
Constructive suggestions:
1. use a textual format, make minimal
changes from current (duplicate new stuff where necessary),
but ensure each /proc interface has something which spits
out a format line (header line or whatever, perhaps an
interface version number). This at least
means that userspace tools can check this against known
previous formats, and don't have to be clairvoyant to
tell what future kernels have the same /proc interfaces.
2. Flag those entries which are sysctl mirrors as such
(perhaps in each /proc directory /proc/foo/bar/, a
/proc/foo/bar/ctl with them all in). Duplicate for the
time being rather than move. Make reading them (at
least those in the ctl directory) have a comment line
starting with a '#' at the top describing the format
(integer, boolean, string, whatever), what it does.
Ignore comment lines on write.
3. Try and rearrange all the /proc entries this way, which
means sysctl can be implemented by a straight ASCII
write - nice and easy to parse files. Accept that some
/proc reads (especially) are going to be hard.
--
Alex Bligh
On Sun, 4 Nov 2001, [iso-8859-1] Jakob ?stergaard wrote:
> I'm a little scared when our VFS guy claims he never heard of excellent
> programmers using scanf in a way that led to parse errors.
I've seen excellent programmers fscking up use of && and ||.
I've also seen quite a few guys coming (from their experience) to
the conclusions that look an awful lot similar to mine. Like, say it,
dmr and ken. Or Linus. Or Rob Pike. Or Brian Kernighan.
And frankly, when I hear about "typed" interfaces, two things come to
mind - "typed files" and CORBA. Both - architectural failures.
On Sun, 4 Nov 2001, Jakob ?stergaard wrote:
> Now this isn't even bad - the fun begins when a resync is running, when
> mdstat contains *progress meters* like "[====> ] 42%". While being
> nicely readable for a human, this is a parsing nightmare. Especially
> because stuff like this changes over time.
Any program needing to parse this would just ignore the bits between [],
and convert the percentage to an int. Hardly a 'nightmare'.
Dave.
--
| Dave Jones. http://www.codemonkey.org.uk
| SuSE Labs
On Sunday 04 November 2001 20:24, Jakob ?stergaard wrote:
> Does this work ? Yes of course. But what if I ported my program to
> a 64 bit arch... The program still compiles. It also runs. But the
> values are no longer correct. Now *that* is hell.
Actually I worry more about those programs that are already compiled and will
break when the kernel changes. But even if you recompile the code, how can
you be sure that the programmer uses longs instead of ints for those 64 bit
types? The C compiler allows the implicit conversion without warning. If you
change the type the program has to be changed, no matter what you do.
> I want type information.
BTW nobody says to one-value-files can not have types (see my earlier posts
in this thread).
bye...
On Sun, Nov 04, 2001 at 07:24:36PM -0000, Alex Bligh - linux-kernel wrote:
>
>
> --On Sunday, 04 November, 2001 8:04 PM +0100 Jakob ?stergaard
> <[email protected]> wrote:
>
> > I'm a little scared when our VFS guy claims he never heard of excellent
> > programmers using scanf in a way that led to parse errors.
>
> I'd be far more scared if Al claimed he'd never heard of excellent
> programmers reading binary formats, compatible between multiple
> code revisions both forward and backwards, endian-ness etc., which
> had never lead to parse errors of the binary structure.
Sure there is potential for error anywhere. And maybe your compiler's
type-check is broken too. But that's not an argument for not trying
to improve on things.
Please tell me, is "1610612736" a 32-bit integer, a 64-bit integer, is
it signed or unsigned ?
I could even live with parsing ASCII, as long as there'd just be type
information to go with the values. But I see no point in using ASCII
for something intended purely for machine-to-machine communication.
/proc text "GUI" files will stay, don't worry :)
> If you feel it's too hard to write use scanf(), use sh, awk, perl
> etc. which all have their own implementations that appear to have
> served UNIX quite well for a long while.
Witness ten lines of vmstat output taking 300+ millions of clock cycles.
> Constructive suggestions:
>
> 1. use a textual format, make minimal
> changes from current (duplicate new stuff where necessary),
> but ensure each /proc interface has something which spits
> out a format line (header line or whatever, perhaps an
> interface version number). This at least
> means that userspace tools can check this against known
> previous formats, and don't have to be clairvoyant to
> tell what future kernels have the same /proc interfaces.
Then we have text strings as values - some with spaces, some with quotes in
them. Then we escape our way out of that (which isn't done today by the way),
and then we start implementing a parser for that in every /proc using
application out there.
These interfaces need to be "correct", not "mostly correct".
Example: I make a symlink from "cat" to "c)(t" (sick example, but that doesn't
change my point), and do a "./c)(t /proc/self/stat":
[albatros:joe] $ ./c\)\(a /proc/self/stat
22482 (c)(a) R 22444 22482 22444 34816 22482 0 20 0 126 0 0 0 0 0 14 0 0 0 24933425 1654784 129 4294967295 134512640 134525684 3221223504 3221223112 1074798884 0 0 0 0 0 0 0 17 0
Go parse that one ! What's the name of my applications ?
It's good enough for human readers - we have the ability to reason and
make qualified quesses. Now go implement that in every single piece of
/proc reading software out there :)
If you want ASCII, we should at least have some approved parsing library
to parse this into native-machine binary structures that can be used
safely in applications. I see little point in ASCII then, but maybe it's
just me.
>
> 2. Flag those entries which are sysctl mirrors as such
> (perhaps in each /proc directory /proc/foo/bar/, a
> /proc/foo/bar/ctl with them all in). Duplicate for the
> time being rather than move. Make reading them (at
> least those in the ctl directory) have a comment line
> starting with a '#' at the top describing the format
> (integer, boolean, string, whatever), what it does.
> Ignore comment lines on write.
>
> 3. Try and rearrange all the /proc entries this way, which
> means sysctl can be implemented by a straight ASCII
> write - nice and easy to parse files. Accept that some
> /proc reads (especially) are going to be hard.
I just hate to implement a fuzzy parser with an A.I. that makes HAL look like
kid's toys, every d*mn time I need to get information from the system.
I'm not a big fan of huge re-arrangements. I do like the idea of providing
a machine-readable version of /proc.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, Nov 04, 2001 at 02:29:06PM -0500, Alexander Viro wrote:
>
>
> On Sun, 4 Nov 2001, [iso-8859-1] Jakob %stergaard wrote:
>
> > I'm a little scared when our VFS guy claims he never heard of excellent
> > programmers using scanf in a way that led to parse errors.
>
> I've seen excellent programmers fscking up use of && and ||.
>
> I've also seen quite a few guys coming (from their experience) to
> the conclusions that look an awful lot similar to mine. Like, say it,
> dmr and ken. Or Linus. Or Rob Pike. Or Brian Kernighan.
>
> And frankly, when I hear about "typed" interfaces, two things come to
> mind - "typed files" and CORBA. Both - architectural failures.
Architectural failures like "C" or "C++" for example.
Strong type information (in one form or the other) is absolutely fundamental
for achieving correctness in this kind of software.
You can't just have a "data thingy" and hope your "operator thingys" do
what you suppose they do, and that your "storage thingy" has enough room
for what you shovel into it.
C has types for a reason. C++ improved the type system for a reason. Perl
and PHP programs have run-time failures for a reason.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, Nov 04, 2001 at 08:32:17PM +0100, Dave Jones wrote:
> On Sun, 4 Nov 2001, Jakob ?stergaard wrote:
>
> > Now this isn't even bad - the fun begins when a resync is running, when
> > mdstat contains *progress meters* like "[====> ] 42%". While being
> > nicely readable for a human, this is a parsing nightmare. Especially
> > because stuff like this changes over time.
>
> Any program needing to parse this would just ignore the bits between [],
> and convert the percentage to an int. Hardly a 'nightmare'.
You didn't read the output then. The information about which disks are up
and which are failed, is put between square brackets too. You don't want
to ignore that.
So just ignore square brackets that have "=" " " and ">" between them ?
What happens when someone decides "[----> ]" looks cooler ?
Or just parse the brackets with "U" "F" and "_" between them ? What then
when someone decides that disks being resynced are marked with "R" ?
Fuzzy matching.
Nightmare.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On November 4, 2001 08:46 pm, Linus Torvalds wrote:
> On Sun, 4 Nov 2001, Daniel Phillips wrote:
> > >
> > > The computer can parse anything.
> >
> > OK, then lets keep the 'current' variable in ASCII.
>
> Yeah, the old "argument by absurdity".
>
> Did you ever take logics class? It isn't a valid argument at all.
>
> My argument is: humans want the data they want in a readable format. What
> the _hell_ does that have to do with the "current" variable?
> > > Handling spaces and newlines is easy enough - see the patches from Al
> > > Viro, for example.
> >
> > Why are we doing this parsing in the kernel when it can be done in user
> > space?
>
> We're not parsing anything.
>
> We're marshalling the data into a format that is independent of whatever
> internal representation the kernel happens to have for it that particular
> day.
>
> A representation that is valid across architectures, and a representation
> that is unambiguous. A representation that various scripts can trivially
> use, and a representation that is not bound by fixed-sized fields or other
> idiocy.
>
> In short, text strings.
>
> They have advantages even for a computer. Fixed-size binary interfaces are
> BAD for information interchange. They are bad as a word document file
> format, they are bad for email, and they are bad for /proc. Get it?
>
> Would you prefer doc-files to be standard text, marshalled into some
> logical form? Or do you prefer binary blobs of data that is limited by the
> binary format?
>
> Linus
>
>
On Sun, 4 Nov 2001, [iso-8859-1] Jakob ?stergaard wrote:
> > If you feel it's too hard to write use scanf(), use sh, awk, perl
> > etc. which all have their own implementations that appear to have
> > served UNIX quite well for a long while.
>
> Witness ten lines of vmstat output taking 300+ millions of clock cycles.
Would the esteemed sir care to check where these cycles are spent?
How about "traversing page tables of every damn process out there"?
Doesn't sound like a string operation to me...
On Sun, Nov 04, 2001 at 08:41:34PM +0100, Tim Jansen wrote:
> On Sunday 04 November 2001 20:24, Jakob ?stergaard wrote:
> > Does this work ? Yes of course. But what if I ported my program to
> > a 64 bit arch... The program still compiles. It also runs. But the
> > values are no longer correct. Now *that* is hell.
>
> Actually I worry more about those programs that are already compiled and will
> break when the kernel changes. But even if you recompile the code, how can
> you be sure that the programmer uses longs instead of ints for those 64 bit
> types? The C compiler allows the implicit conversion without warning. If you
> change the type the program has to be changed, no matter what you do.
int get(result_t * result);
u32 a;
get(&a);
This will fail at compile time if result_t is 64 bits.
In C++ you could even do overloading where conversion is possible and
still have compile time errors when it's not possible.
>
> > I want type information.
>
> BTW nobody says to one-value-files can not have types (see my earlier posts
> in this thread).
I don't dislike one-value-files - please tell me how you get type information
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, 4 Nov 2001, [iso-8859-1] Jakob ?stergaard wrote:
> Strong type information (in one form or the other) is absolutely fundamental
> for achieving correctness in this kind of software.
Like, say it, all shell programming? Or the whole idea of "file as stream of
characters"? Or pipes, for that matter...
On Sun, Nov 04, 2001 at 02:52:56PM -0500, Alexander Viro wrote:
>
>
> On Sun, 4 Nov 2001, [iso-8859-1] Jakob %stergaard wrote:
>
> > > If you feel it's too hard to write use scanf(), use sh, awk, perl
> > > etc. which all have their own implementations that appear to have
> > > served UNIX quite well for a long while.
> >
> > Witness ten lines of vmstat output taking 300+ millions of clock cycles.
>
> Would the esteemed sir care to check where these cycles are spent?
> How about "traversing page tables of every damn process out there"?
> Doesn't sound like a string operation to me...
>
I'm sure your're right. It's probably not just string operations. And maybe
then don't even dominate.
And I'm sure that vmstat doesn't use sh, awk, and perl either.
Anyway, the efficiency issues was mainly me getting side-tracked from the main
issue as I see it.
The point I wanted to make was, that we need an interface thats possible to
parse "correctly", not "mostly correctly", and we need to be able to parse it
in a way so that we do not have to rely on a myriad of small tools (that change
over time too).
You need something that's simple and correct. If it's ASCII, well let it be
ASCII. But /proc as it is today is not possible to parse reliably. See my "cat
vs. c)(a" example. You can parse it "mostly reliable", but that's just not
good enough.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, 4 Nov 2001, [iso-8859-1] Jakob ?stergaard wrote:
> So just ignore square brackets that have "=" " " and ">" between them ?
>
> What happens when someone decides "[----> ]" looks cooler ?
First of all, whoever had chosen that output did a fairly idiotic thing.
But as for your question - you _do_ know what regular expressions are,
don't you? And you do know how to do this particular regex without
any use of library functions, right?
On Sun, Nov 04, 2001 at 03:01:12PM -0500, Alexander Viro wrote:
>
>
> On Sun, 4 Nov 2001, [iso-8859-1] Jakob %stergaard wrote:
>
> > Strong type information (in one form or the other) is absolutely fundamental
> > for achieving correctness in this kind of software.
>
> Like, say it, all shell programming? Or the whole idea of "file as stream of
> characters"? Or pipes, for that matter...
>
Shell programming is great for small programs. You don't need type information
in the language when you can fit it all in your head.
Now, go write 100K lines of shell, something that does something that is not
just shoveling lines from one app into a grep and into another app. Let's say,
a database. Go implement the next Oracle replacement in bash, and tell me you
don't care about types in your language.
Why do we have "file formats", well that is because files are just streams
of characters, and we need more structure than just that.
This is exactly what I'm proposing - having "just" files in proc is fine,
but not knowing the type of the information they present is catastrophic.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, Nov 04, 2001 at 03:06:27PM -0500, Alexander Viro wrote:
>
>
> On Sun, 4 Nov 2001, [iso-8859-1] Jakob %stergaard wrote:
>
> > So just ignore square brackets that have "=" " " and ">" between them ?
> >
> > What happens when someone decides "[----> ]" looks cooler ?
>
> First of all, whoever had chosen that output did a fairly idiotic thing.
> But as for your question - you _do_ know what regular expressions are,
> don't you? And you do know how to do this particular regex without
> any use of library functions, right?
A regex won't tell me if 345987 is a signed or unsigned 32-bit or 64-bit
integer, or if it's a double.
Sure, implement arbitrary precision arithmetic in every single app out there
using /proc....
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sunday 04 November 2001 20:55, Jakob ?stergaard wrote:
> > BTW nobody says to one-value-files can not have types (see my earlier
> > posts in this thread).
> I don't dislike one-value-files - please tell me how you get type
> information
Using a ioctl that returns the type.
bye...
On Sun, Nov 04, 2001 at 09:13:35PM +0100, Tim Jansen wrote:
> On Sunday 04 November 2001 20:55, Jakob ?stergaard wrote:
> > > BTW nobody says to one-value-files can not have types (see my earlier
> > > posts in this thread).
> > I don't dislike one-value-files - please tell me how you get type
> > information
>
> Using a ioctl that returns the type.
But that's not pretty :)
Can't we think of something else ?
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
SpaceWalker writes:
> A good reason could be that a simple ps -aux uses hundreds of system
> calls to get the list of all the processes ...
First of all, "ps -aux" isn't correct usage. It is accepted only
as long as you don't have a username "x". Try "ps aux" instead.
(good versions of ps will print a warning -- use Debian)
Second of all, if you want a really fast ps, look here:
http://lwn.net/2000/0420/a/atomicps.html
>> Using a ioctl that returns the type.
>
> But that's not pretty :)
>
> Can't we think of something else ?
Well this sure isn't perfect, but to
illustrate it can be done with a text
interface (and the only restriction
is strings can't contain \n):
cat /proc/widget
# Format: '%l'
# Params: Number_of_Widgets
37
echo '38' > /proc/widget
cat /proc/widget
# Format: '%l'
# Params: Number_of_Widgets
38
cat /proc/widget | egrep -v '^#'
38
cat /proc/sprocket
# Format: '%l' '%s'
# Params: Number_of_Sprockets Master_Sprocket_Name
21
Foo Bar Baz
echo '22' > /proc/sprocket
# writes first value if no \n character written before
# close - all writes done simultaneously on close
cat /proc/sprocket | egrep -v '^#'
22
Foo Bar Baz
echo 'Master_Sprocket_Name\nBaz Foo Bar' > /proc/sprocket
cat /proc/sprocket | egrep -v '^#'
22
Baz Foo Bar
echo 'Master_Sprocket_Name\nFoo Foo Foo\nNumber_of_Sprockets\n111' >
/proc/sprocket
# Simultaneous commit if /proc driver needs it
# i.e. it has get_lock() and release_lock()
# entries
cat /proc/sprocket | egrep -v '^#'
111
Foo Foo Foo
& nice user tools look at the '# Params:' line to find
what number param they want to read / alter.
--
Alex Bligh
On Sun, Nov 04, 2001 at 08:47:53PM -0000, Alex Bligh - linux-kernel wrote:
> >> Using a ioctl that returns the type.
> >
> > But that's not pretty :)
> >
> > Can't we think of something else ?
>
> Well this sure isn't perfect, but to
> illustrate it can be done with a text
> interface (and the only restriction
> is strings can't contain \n):
Such limitations are not acceptable.
>
> cat /proc/widget
> # Format: '%l'
> # Params: Number_of_Widgets
> 37
>
> echo '38' > /proc/widget
>
> cat /proc/widget
> # Format: '%l'
> # Params: Number_of_Widgets
> 38
Good point with the parsing :)
>
> cat /proc/widget | egrep -v '^#'
> 38
>
> cat /proc/sprocket
> # Format: '%l' '%s'
> # Params: Number_of_Sprockets Master_Sprocket_Name
> 21
> Foo Bar Baz
Not one value per file ?
>
> echo '22' > /proc/sprocket
> # writes first value if no \n character written before
> # close - all writes done simultaneously on close
>
> cat /proc/sprocket | egrep -v '^#'
> 22
> Foo Bar Baz
>
> echo 'Master_Sprocket_Name\nBaz Foo Bar' > /proc/sprocket
>
> cat /proc/sprocket | egrep -v '^#'
> 22
> Baz Foo Bar
>
> echo 'Master_Sprocket_Name\nFoo Foo Foo\nNumber_of_Sprockets\n111' >
> /proc/sprocket
> # Simultaneous commit if /proc driver needs it
> # i.e. it has get_lock() and release_lock()
> # entries
> cat /proc/sprocket | egrep -v '^#'
> 111
> Foo Foo Foo
>
> & nice user tools look at the '# Params:' line to find
> what number param they want to read / alter.
How about:
We keep old proc files.
For each file, we make a .directory.
For example - for /proc/meminfo, we make a /proc/.meminfo/ directory
that contains the files
MemTotal
MemFree
MemShared
etc.
cat /proc/.meminfo/MemTotal gives you
"u32:KB:513276"
The kernel code for printing this is something like
sprintf(..., "%s:%s:%u", DPI_T_U32, DPI_U_KB, i.memtotal);
The types and the units are necessary. But furthermore we do not
want various developers to be using different ways of writing the
types and units (KB vs. kB, vs. KiB). Defines will ensure that
(if they are used - but they lend themselves to being used), and
once a new define is introduced it is fairly easy to document and
export to userland.
Not only does this format tell us exactly what's in the file (and
therefore how we should parse it), it also defines what we can write
to it (assuming we write the same types as we read - but that's a
reasonable assumption I suppose).
Problem: Could it be made simpler to parse from scripting languages,
without making it less elegant to parse in plain C ?
If the values is a string, the string will begin after the second
semicolon (safe, since no type or unit can contain a colon and won't
have to, ever), and ends at the end of the file. Voila, any character can be
in the string value.
And Al gets his #%^# text files ;)
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
=?iso-8859-1?Q?Jak writes:
> Please tell me, is "1610612736" a 32-bit integer, a 64-bit integer, is
> it signed or unsigned ?
>
> I could even live with parsing ASCII, as long as there'd just be type
> information to go with the values.
You are looking for something called the registry. It's something
that was introduced with Windows 95. It's basically a filesystem
with typed files: char, int, string, string array, etc.
> These interfaces need to be "correct", not "mostly correct".
>
> Example: I make a symlink from "cat" to "c)(t" (sick example,
> but that doesn't change my point), and do a "./c)(t /proc/self/stat":
>
> [albatros:joe] $ ./c\)\(a /proc/self/stat
> 22482 (c)(a) R 22444 22482 22444 34816 22482 0 20 0 126 0 0 0 0 0 14 0 0 0 24933425 1654784 129 4294967295 134512640 134525684 3221223504 3221223112 1074798884 0 0 0 0 0 0 0 17 0
>
> Go parse that one ! What's the name of my applications ?
Funny you should mention that one. I wrote the code used by procps
to read this file. I love that file! The parentheses issue is just
a beauty wart. People rarely feel the urge to screw with raw numbers.
In all the other files, idiots like to: add headers, change the
spelling of field names, change the order, add spaces and random
punctuation, etc. Nothing is as stable and easy to use as the
/proc/self/stat file.
> If you want ASCII, we should at least have some approved parsing
> library to parse this into native-machine binary structures
No.
>> 2. Flag those entries which are sysctl mirrors as such
>> (perhaps in each /proc directory /proc/foo/bar/, a
>> /proc/foo/bar/ctl with them all in). Duplicate for the
>> time being rather than move. Make reading them (at
>> least those in the ctl directory) have a comment line
>> starting with a '#' at the top describing the format
>> (integer, boolean, string, whatever), what it does.
>> Ignore comment lines on write.
Now you are proposing to dink with the format. See above comments.
>> 3. Try and rearrange all the /proc entries this way, which
>> means sysctl can be implemented by a straight ASCII
>> write - nice and easy to parse files.
This is exactly what the sysctl command does.
> I'm not a big fan of huge re-arrangements. I do like the idea of providing
> a machine-readable version of /proc.
Linus clearly doesn't give a fuck about /proc performance.
That's his right, and you are welcome to patch your kernel to
have something better: http://lwn.net/2000/0420/a/atomicps.html
On Sun, Nov 04, 2001 at 04:12:23PM -0500, Albert D. Cahalan wrote:
> =?iso-8859-1?Q?Jak writes:
>
> > Please tell me, is "1610612736" a 32-bit integer, a 64-bit integer, is
> > it signed or unsigned ?
> >
> > I could even live with parsing ASCII, as long as there'd just be type
> > information to go with the values.
>
> You are looking for something called the registry. It's something
> that was introduced with Windows 95. It's basically a filesystem
> with typed files: char, int, string, string array, etc.
Nope :)
It does not have "char, int, string, string array, etc." it has "String, binary
and DWORD".
Having read out 64 bit values, floating point data etc. from the registry, I'm
old enough to know that it is *NOT* what I'm looking for :)
...
> Funny you should mention that one. I wrote the code used by procps
> to read this file. I love that file! The parentheses issue is just
> a beauty wart. People rarely feel the urge to screw with raw numbers.
> In all the other files, idiots like to: add headers, change the
> spelling of field names, change the order, add spaces and random
> punctuation, etc. Nothing is as stable and easy to use as the
> /proc/self/stat file.
Imagine every field in a file by itself, with well-defined type
information and unit informaiton.
...
> Linus clearly doesn't give a fuck about /proc performance.
> That's his right, and you are welcome to patch your kernel to
> have something better: http://lwn.net/2000/0420/a/atomicps.html
Performance is one thing. Not being able to know whether numbers are i32, u32,
u64, or measured in Kilobytes or carrots is another ting.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
--On Sunday, 04 November, 2001 4:12 PM -0500 "Albert D. Cahalan"
<[email protected]> wrote:
>> Now you are proposing to dink with the format. See above comments.
Attribution error: that was me, disagreeing with Jakob - the point was
if you want to dink with the format to achieve the objectives
he seemed to be after (which I thought were to do at least
in part with consistency etc.), it is theoretically possible
to do such dinking with minimal change & certainly retain
text format (and note I said retain original /proc files too). Whether
it's worth it as a practical exercize, with all the inherent
disruption it would no doubt cause, and questionable net benefit
is a completely different question. I was just saying that
binary format wasn't necessary to achieve what I think
Jakob wanted to achieve. The full thought
experiment was in a later email. I suspect you don't disagree
given your previous post.
>>> 3. Try and rearrange all the /proc entries this way, which
>>> means sysctl can be implemented by a straight ASCII
>>> write - nice and easy to parse files.
>
> This is exactly what the sysctl command does.
Sorry, I meant 'this way a consistent interface cf
sysctl could be used for more of what's currently
done through /proc'. Last time I looked there was
stuff you could read/write to through /proc which
couldn't be done through sysctl.
--
Alex Bligh
Alexander Viro writes:
> Folks, could we please deep-six the "ASCII is tough" mentality?
Sure. How about:
It's a PITA to break out the dragon book, bloat sucks,
and I'd just rather not have to write the code.
I also like:
Trusting every little status app to not have exploitable
buffer overruns is worrisome.
The more serious problem:
ASCII formats change in unpredictable ways. (see below)
> Idea of
> native-endian data is so broken that it's not even funny. Exercise:
> try to export such thing over the network.
Ooh! You're making /proc NFS exportable?
> Another one: try to use
> that in a shell script. One more: try to do it portably in Perl script.
FOO=`ps -o foo= -p $$` # Get our FOO value out of binary /proc
> It had been tried. Many times. It had backfired 100 times out 100.
> We have the same idiocy to thank for fun trying to move a disk with UFS
> volume from Solaris sparc to Solaris x86.
Disks are slow, so native endian UFS was indeed a poor choice.
> We have the same idiocy to
> thank for a lot of ugliness in X.
This was a necessary performance hack. Hopefully nobody wrote
an X server or client that would do conditional byte swaps
all over the code.
> At the very least, use canonical bytesex and field sizes. Anything less
> is just begging for trouble. And in case of procfs or its equivalents,
> _use_ the_ _damn_ _ASCII_ _representations_. scanf(3) is there for
> purpose.
SigCgt in /proc/self/status wasn't always spelled that way.
It wasn't always in the same location either. ASCII bites
because people can't resist screwing with it.
On Sunday 04 November 2001 22:20, Jakob ?stergaard wrote:
> > > I could even live with parsing ASCII, as long as there'd just be type
> > > information to go with the values.
> > You are looking for something called the registry. It's something
> > that was introduced with Windows 95. It's basically a filesystem
> > with typed files: char, int, string, string array, etc.
> Having read out 64 bit values, floating point data etc. from the registry,
> I'm old enough to know that it is *NOT* what I'm looking for :)
Why? It's not bad only because it is from MS. IMHO the disadvantages of the
registry are:
- you need special software/syscalls to access it
- because of that making backups etc is hard
- the organization of the data is horrible
Assuming you could mount it as a regular filesystem and use it for kernel
configuration, what else are its disadvantages?
bye...
(Whoops, sorry about the 200K spam the first time, hope that was bounced
from the list, here's the abridged version.)
On November 4, 2001 08:52 pm, Alexander Viro wrote:
> On Sun, 4 Nov 2001, [iso-8859-1] Jakob ?stergaard wrote:
>
> > > If you feel it's too hard to write use scanf(), use sh, awk, perl
> > > etc. which all have their own implementations that appear to have
> > > served UNIX quite well for a long while.
> >
> > Witness ten lines of vmstat output taking 300+ millions of clock cycles.
>
> Would the esteemed sir care to check where these cycles are spent?
> How about "traversing page tables of every damn process out there"?
> Doesn't sound like a string operation to me...
Doing 'top -d .1' eats 18% of a 1GHz cpu, which is abominable. A kernel
profile courtesy of sgi's kernprof shows that scanning pages does not move
the needle, whereas sprintf does. Notice that the biggest chunk of time
is spent in user space, possibly decoding proc values. I didn't profile
user space, and I should but I'm not set up to do that just now. Another
main cause of this embarrassing waste of cpu cycles is the bazillions of
file operations. Enjoy:
Call graph (explanation follows)
granularity: each sample hit covers 4 byte(s) for 0.00% of 38.05 seconds
index % time self children called name
<spontaneous>
[1] 78.0 0.00 29.67 cpu_idle [1]
29.65 0.00 54034/54034 default_idle [2]
0.01 0.00 1805/3791 schedule [81]
0.00 0.00 1804/1806 check_pgt_cache [225]
-----------------------------------------------
29.65 0.00 54034/54034 cpu_idle [1]
[2] 77.9 29.65 0.00 54034 default_idle [2]
-----------------------------------------------
<spontaneous>
[3] 11.2 4.26 0.00 USER [3]
-----------------------------------------------
<spontaneous>
[4] 10.4 0.04 3.94 system_call [4]
0.01 2.85 19776/19776 sys_read [5]
0.01 0.48 17031/17031 sys_open [14]
0.00 0.15 5742/5742 sys_stat64 [24]
0.00 0.12 4554/4554 sys_write [28]
0.00 0.09 642/642 sys_getdents64 [30]
0.01 0.08 1544/1544 sys_select [32]
0.00 0.03 767/767 sys_poll [69]
0.01 0.02 16901/16903 sys_close [75]
0.00 0.02 3969/3969 sys_fcntl64 [82]
0.00 0.01 1046/1046 sys_ioctl [137]
0.00 0.01 134/134 old_mmap [142]
0.00 0.01 5376/5376 sys_alarm [145]
0.00 0.00 3644/3644 sys_gettimeofday [154]
0.00 0.00 1/1 sys_fork [165]
0.00 0.00 128/128 sys_access [176]
0.00 0.00 130/130 sys_munmap [188]
0.00 0.00 1/1 sys_execve [201]
0.00 0.00 385/385 sys_lseek [214]
0.00 0.00 263/263 sys_fstat64 [219]
0.00 0.00 3615/3615 sys_rt_sigaction [224]
0.00 0.00 128/128 sys_llseek [232]
0.00 0.00 17/17 sys_wait4 [237]
0.00 0.00 1/1 sys_exit [239]
0.00 0.00 4/4 sys_brk [297]
0.00 0.00 6/6 sys_writev [304]
0.00 0.00 1/1 sys_mprotect [350]
0.00 0.00 148/148 sys_time [425]
0.00 0.00 34/34 sys_rt_sigprocmask [433]
0.00 0.00 2/2 sys_setpgid [504]
0.00 0.00 1/1 sys_newuname [550]
0.00 0.00 1/1 sys_getpid [549]
0.00 0.00 1/1 sys_sigreturn [551]
-----------------------------------------------
0.01 2.85 19776/19776 system_call [4]
[5] 7.5 0.01 2.85 19776 sys_read [5]
0.01 1.50 16512/16512 proc_info_read [6]
0.00 1.27 524/524 proc_file_read [7]
0.00 0.02 781/781 tty_read [92]
0.00 0.02 1793/1798 generic_file_read [98]
0.01 0.01 19776/80631 fput [45]
0.00 0.01 166/166 sock_read [121]
0.01 0.00 19776/52388 fget [90]
-----------------------------------------------
0.01 1.50 16512/16512 sys_read [5]
[6] 4.0 0.01 1.50 16512 proc_info_read [6]
0.03 1.04 5504/5504 proc_pid_statm [8]
0.12 0.12 5504/5504 proc_pid_stat [23]
0.09 0.00 15360/27661 _generic_copy_to_user [25]
0.01 0.04 5504/5504 proc_pid_cmdline [48]
0.00 0.04 16512/20003 _get_free_pages [50]
0.02 0.00 16512/20160 _free_pages_ok [77]
0.00 0.00 16512/87748 _free_pages [107]
0.00 0.00 16512/80548 free_pages [167]
-----------------------------------------------
0.00 1.27 524/524 sys_read [5]
[7] 3.3 0.00 1.27 524 proc_file_read [7]
0.00 0.92 128/128 meminfo_read_proc [10]
0.01 0.29 128/128 kstat_read_proc [21]
0.00 0.03 12/12 ksyms_read_proc [68]
0.00 0.00 524/27661 _generic_copy_to_user [25]
0.00 0.00 128/128 loadavg_read_proc [230]
0.00 0.00 128/128 uptime_read_proc [231]
0.00 0.00 524/20003 _get_free_pages [50]
0.00 0.00 524/20160 _free_pages_ok [77]
0.00 0.00 524/87748 _free_pages [107]
0.00 0.00 524/80548 free_pages [167]
0.00 0.00 384/512 proc_calc_metrics [417]
-----------------------------------------------
0.03 1.04 5504/5504 proc_info_read [6]
[8] 2.8 0.03 1.04 5504 proc_pid_statm [8]
0.98 0.00 177792/177792 statm_pgd_range [9]
0.00 0.05 5504/44307 sprintf [16]
0.00 0.00 4352/17928 mmput [152]
-----------------------------------------------
0.98 0.00 177792/177792 proc_pid_statm [8]
[9] 2.6 0.98 0.00 177792 statm_pgd_range [9]
-----------------------------------------------
0.00 0.92 128/128 proc_file_read [7]
[10] 2.4 0.00 0.92 128 meminfo_read_proc [10]
0.92 0.00 128/128 si_swapinfo [11]
0.00 0.00 256/44307 sprintf [16]
0.00 0.00 128/128 si_meminfo [271]
0.00 0.00 128/128 nr_inactive_clean_pages [429]
0.00 0.00 128/512 proc_calc_metrics [417]
-----------------------------------------------
0.92 0.00 128/128 meminfo_read_proc [10]
[11] 2.4 0.92 0.00 128 si_swapinfo [11]
-----------------------------------------------
[12] 1.3 0.07 0.44 22903+4 <cycle 1 as a whole> [12]
0.07 0.44 22905 path_walk <cycle 1> [13]
-----------------------------------------------
2 vfs_follow_link <cycle 1> [506]
0.00 0.00 2/22903 open_exec [326]
0.02 0.11 5870/22903 _user_walk [26]
0.05 0.33 17031/22903 open_namei [19]
[13] 1.3 0.07 0.44 22905 path_walk <cycle 1> [13]
0.03 0.24 38529/38529 real_lookup [22]
0.02 0.08 85429/108334 dput [27]
0.01 0.02 63040/63041 cached_lookup [74]
0.01 0.01 63042/80076 vfs_permission [71]
0.01 0.00 63042/80076 permission [120]
0.00 0.00 22389/22389 lookup_mnt [200]
0.00 0.00 244/5999 path_release [136]
0.00 0.00 2/1809 update_atime [402]
0.00 0.00 2/2 ext2_follow_link [492]
2 vfs_follow_link <cycle 1> [506]
-----------------------------------------------
0.01 0.48 17031/17031 system_call [4]
[14] 1.3 0.01 0.48 17031 sys_open [14]
0.00 0.44 17031/17031 filp_open [15]
0.01 0.01 17031/22902 getname [86]
0.01 0.00 17031/17032 get_unused_fd [110]
0.00 0.00 17031/111161 kmem_cache_free [70]
-----------------------------------------------
0.00 0.44 17031/17031 sys_open [14]
[15] 1.2 0.00 0.44 17031 filp_open [15]
0.01 0.40 17031/17031 open_namei [19]
0.01 0.01 16902/16904 dentry_open [84]
-----------------------------------------------
0.00 0.00 128/44307 loadavg_read_proc [230]
0.00 0.00 128/44307 uptime_read_proc [231]
0.00 0.00 256/44307 meminfo_read_proc [10]
0.00 0.03 3347/44307 get_ksyms_list [67]
0.00 0.05 5504/44307 proc_pid_stat [23]
0.00 0.05 5504/44307 proc_pid_statm [8]
0.00 0.29 29440/44307 kstat_read_proc [21]
[16] 1.2 0.00 0.44 44307 sprintf [16]
0.00 0.43 44307/44310 vsprintf [17]
-----------------------------------------------
0.00 0.00 3/44310 printk [327]
0.00 0.43 44307/44310 sprintf [16]
[17] 1.1 0.00 0.43 44310 vsprintf [17]
0.09 0.34 44310/44310 vsnprintf [18]
-----------------------------------------------
0.09 0.34 44310/44310 vsprintf [17]
[18] 1.1 0.09 0.34 44310 vsnprintf [18]
0.34 0.00 281878/281878 number [20]
0.00 0.00 3843/3843 skip_atoi [280]
-----------------------------------------------
0.01 0.40 17031/17031 filp_open [15]
[19] 1.1 0.01 0.40 17031 open_namei [19]
0.05 0.33 17031/22903 path_walk <cycle 1> [13]
0.01 0.00 17031/22903 path_init [109]
0.00 0.00 17030/80076 vfs_permission [71]
0.00 0.00 17030/80076 permission [120]
0.00 0.00 129/5999 path_release [136]
0.00 0.00 1/1 do_truncate [356]
0.00 0.00 1/108334 dput [27]
0.00 0.00 1/1 lookup_hash [380]
0.00 0.00 1/2 get_write_access [494]
-----------------------------------------------
0.34 0.00 281878/281878 vsnprintf [18]
[20] 0.9 0.34 0.00 281878 number [20]
-----------------------------------------------
0.01 0.29 128/128 proc_file_read [7]
[21] 0.8 0.01 0.29 128 kstat_read_proc [21]
0.00 0.29 29440/44307 sprintf [16]
-----------------------------------------------
0.03 0.24 38529/38529 path_walk <cycle 1> [13]
[22] 0.7 0.03 0.24 38529 real_lookup [22]
0.03 0.05 22016/22016 proc_pid_lookup [31]
0.00 0.05 22017/22017 proc_root_lookup [41]
0.01 0.04 16512/16512 proc_base_lookup [47]
0.03 0.01 38529/38529 d_alloc [65]
0.01 0.00 38529/101570 d_lookup [57]
-----------------------------------------------
0.12 0.12 5504/5504 proc_info_read [6]
[23] 0.6 0.12 0.12 5504 proc_pid_stat [23]
0.00 0.05 5504/44307 sprintf [16]
0.05 0.00 5504/5504 collect_sigign_sigcatch [44]
0.01 0.00 5504/5504 get_wchan [130]
0.00 0.00 4352/17928 mmput [152]
-----------------------------------------------
0.00 0.15 5742/5742 system_call [4]
[24] 0.4 0.00 0.15 5742 sys_stat64 [24]
0.00 0.14 5742/5870 _user_walk [26]
0.00 0.01 5626/5999 path_release [136]
0.01 0.00 5626/5889 cp_new_stat64 [157]
-----------------------------------------------
0.00 0.00 166/27661 memcpy_toiovec [263]
0.00 0.00 524/27661 proc_file_read [7]
0.00 0.00 857/27661 read_chan [99]
0.06 0.00 10754/27661 filldir64 [39]
0.09 0.00 15360/27661 proc_info_read [6]
[25] 0.4 0.16 0.00 27661 _generic_copy_to_user [25]
-----------------------------------------------
0.00 0.00 128/5870 sys_access [176]
0.00 0.14 5742/5870 sys_stat64 [24]
[26] 0.4 0.00 0.14 5870 _user_walk [26]
0.02 0.11 5870/22903 path_walk <cycle 1> [13]
0.00 0.00 5870/22902 getname [86]
0.00 0.00 5870/22903 path_init [109]
0.00 0.00 5870/111161 kmem_cache_free [70]
-----------------------------------------------
0.00 0.00 1/108334 open_namei [19]
0.00 0.00 2/108334 do_exit [238]
0.00 0.01 5999/108334 path_release [136]
0.00 0.02 16903/108334 fput [45]
0.02 0.08 85429/108334 path_walk <cycle 1> [13]
[27] 0.3 0.02 0.10 108334 dput [27]
0.02 0.04 38529/38529 iput [42]
0.04 0.00 146863/203321 atomic_dec_and_lock [49]
0.01 0.00 38529/111161 kmem_cache_free [70]
0.00 0.00 38528/38528 pid_delete_dentry [212]
0.00 0.00 1/1 proc_delete_dentry [532]
-----------------------------------------------
0.00 0.12 4554/4554 system_call [4]
[28] 0.3 0.00 0.12 4554 sys_write [28]
0.01 0.08 4237/4237 tty_write [29]
0.00 0.02 317/317 sock_write [87]
0.00 0.00 4554/80631 fput [45]
0.00 0.00 4554/52388 fget [90]
-----------------------------------------------
0.01 0.08 4237/4237 sys_write [28]
[29] 0.2 0.01 0.08 4237 tty_write [29]
0.00 0.08 4237/4237 write_chan [34]
-----------------------------------------------
0.00 0.09 642/642 system_call [4]
[30] 0.2 0.00 0.09 642 sys_getdents64 [30]
0.00 0.08 642/642 vfs_readdir [33]
0.00 0.00 642/80631 fput [45]
0.00 0.00 642/52388 fget [90]
-----------------------------------------------
0.03 0.05 22016/22016 real_lookup [22]
[31] 0.2 0.03 0.05 22016 proc_pid_lookup [31]
0.01 0.03 22016/38528 proc_pid_make_inode [40]
0.01 0.00 22016/38529 d_rehash [131]
0.00 0.00 22016/38529 d_instantiate [140]
0.00 0.00 22016/87748 _free_pages [107]
0.00 0.00 22016/80548 free_pages [167]
-----------------------------------------------
0.01 0.08 1544/1544 system_call [4]
[32] 0.2 0.01 0.08 1544 sys_select [32]
0.01 0.07 1544/1544 do_select [36]
0.00 0.00 1544/1544 select_bits_alloc [205]
0.00 0.00 1545/2963 kfree [192]
0.00 0.00 1545/1545 select_bits_free [405]
-----------------------------------------------
0.00 0.08 642/642 sys_getdents64 [30]
[33] 0.2 0.00 0.08 642 vfs_readdir [33]
0.00 0.05 512/512 proc_pid_readdir [51]
0.00 0.04 640/640 proc_root_readdir [61]
0.00 0.00 2/2 ext2_readdir [338]
-----------------------------------------------
0.00 0.08 4237/4237 tty_write [29]
[34] 0.2 0.00 0.08 4237 write_chan [34]
0.02 0.02 4361/4361 opost_block [56]
0.00 0.03 4225/4225 opost [72]
0.00 0.00 4237/16341 add_wait_queue [129]
0.00 0.00 4237/16343 remove_wait_queue [156]
0.00 0.00 4237/8714 tty_hung_up_p [223]
0.00 0.00 4/12687 pty_write [59]
-----------------------------------------------
0.00 0.00 1/2050 smp_apic_timer_interrupt [329]
0.01 0.01 313/2050 stext_lock [95]
0.04 0.03 1736/2050 do_IRQ [37]
[35] 0.2 0.05 0.03 2050 do_softirq [35]
0.00 0.02 488/488 net_rx_action [83]
0.00 0.01 1428/1428 tasklet_hi_action [132]
0.00 0.00 132/132 net_tx_action [308]
0.00 0.00 2/2 tasklet_action [346]
-----------------------------------------------
0.01 0.07 1544/1544 sys_select [32]
[36] 0.2 0.01 0.07 1544 do_select [36]
0.00 0.01 4473/4473 tty_poll [108]
0.00 0.01 1545/2312 poll_freewait [106]
0.00 0.01 1228/1958 schedule_timeout [104]
0.00 0.01 5626/9901 sock_poll [101]
0.01 0.00 13757/80631 fput [45]
0.01 0.00 13757/52388 fget [90]
0.00 0.00 3658/3658 pipe_poll [164]
0.00 0.00 1544/1544 max_select_fd [193]
-----------------------------------------------
0.00 0.07 1760/1760 ret_from_intr [38]
[37] 0.2 0.00 0.07 1760 do_IRQ [37]
0.04 0.03 1736/2050 do_softirq [35]
0.00 0.00 1760/1760 handle_IRQ_event [216]
0.00 0.00 1428/1428 ack_edge_ioapic_irq [406]
0.00 0.00 1428/1428 end_edge_ioapic_irq [408]
0.00 0.00 332/332 mask_and_ack_level_ioapic_irq [420]
0.00 0.00 332/332 end_level_ioapic_irq [419]
-----------------------------------------------
<spontaneous>
[38] 0.2 0.00 0.07 ret_from_intr [38]
0.00 0.07 1760/1760 do_IRQ [37]
-----------------------------------------------
0.00 0.00 2/10882 ext2_readdir [338]
0.00 0.03 5248/10882 proc_readdir [62]
0.00 0.03 5632/10882 proc_pid_readdir [51]
[39] 0.2 0.01 0.06 10882 filldir64 [39]
0.06 0.00 10754/27661 _generic_copy_to_user [25]
-----------------------------------------------
0.01 0.02 16512/38528 proc_base_lookup [47]
0.01 0.03 22016/38528 proc_pid_lookup [31]
[40] 0.2 0.01 0.06 38528 proc_pid_make_inode [40]
0.01 0.04 38528/38528 get_empty_inode [46]
0.01 0.00 16512/16512 task_dumpable [158]
-----------------------------------------------
0.00 0.05 22017/22017 real_lookup [22]
[41] 0.2 0.00 0.05 22017 proc_root_lookup [41]
0.05 0.00 22017/22017 proc_lookup [43]
-----------------------------------------------
0.02 0.04 38529/38529 dput [27]
[42] 0.1 0.02 0.04 38529 iput [42]
0.00 0.01 38529/38529 destroy_inode [117]
0.01 0.00 38529/203321 atomic_dec_and_lock [49]
0.01 0.00 38528/87748 _free_pages [107]
0.01 0.00 38529/38529 force_delete [148]
0.00 0.00 38528/38528 proc_pid_delete_inode [199]
0.00 0.00 38528/80548 free_pages [167]
0.00 0.00 38529/38529 proc_delete_inode [260]
-----------------------------------------------
0.05 0.00 22017/22017 proc_root_lookup [41]
[43] 0.1 0.05 0.00 22017 proc_lookup [43]
0.00 0.00 1/1 proc_get_inode [349]
0.00 0.00 1/38529 d_rehash [131]
0.00 0.00 1/38529 d_instantiate [140]
-----------------------------------------------
0.05 0.00 5504/5504 proc_pid_stat [23]
[44] 0.1 0.05 0.00 5504 collect_sigign_sigcatch [44]
-----------------------------------------------
0.00 0.00 1/80631 search_binary_handler [217]
0.00 0.00 1/80631 load_elf_binary [218]
0.00 0.00 2/80631 unmap_fixup [272]
0.00 0.00 3/80631 old_mmap [142]
0.00 0.00 6/80631 sys_writev [304]
0.00 0.00 31/80631 exit_mmap [241]
0.00 0.00 128/80631 sys_llseek [232]
0.00 0.00 263/80631 sys_fstat64 [219]
0.00 0.00 384/80631 sys_lseek [214]
0.00 0.00 642/80631 sys_getdents64 [30]
0.00 0.00 1046/80631 sys_ioctl [137]
0.00 0.00 3584/80631 fcntl_setlk [96]
0.00 0.00 3969/80631 sys_fcntl64 [82]
0.00 0.00 4275/80631 do_pollfd [113]
0.00 0.00 4554/80631 sys_write [28]
0.00 0.00 11303/80631 poll_freewait [106]
0.01 0.00 13757/80631 do_select [36]
0.01 0.00 16906/80631 filp_close [100]
0.01 0.01 19776/80631 sys_read [5]
[45] 0.1 0.03 0.02 80631 fput [45]
0.00 0.02 16903/108334 dput [27]
0.00 0.00 16903/16903 locks_remove_flock [221]
0.00 0.00 132/132 ext2_release_file [278]
-----------------------------------------------
0.01 0.04 38528/38528 proc_pid_make_inode [40]
[46] 0.1 0.01 0.04 38528 get_empty_inode [46]
0.03 0.00 38528/38529 clean_inode [79]
0.01 0.00 38528/111164 kmem_cache_alloc [78]
-----------------------------------------------
0.01 0.04 16512/16512 real_lookup [22]
[47] 0.1 0.01 0.04 16512 proc_base_lookup [47]
0.01 0.02 16512/38528 proc_pid_make_inode [40]
0.00 0.00 16512/38529 d_rehash [131]
0.00 0.00 16512/38529 d_instantiate [140]
-----------------------------------------------
0.01 0.04 5504/5504 proc_info_read [6]
[48] 0.1 0.01 0.04 5504 proc_pid_cmdline [48]
0.00 0.04 4736/4736 access_process_vm [55]
0.00 0.00 4352/17928 mmput [152]
-----------------------------------------------
0.00 0.00 1/203321 free_uid [393]
0.00 0.00 17928/203321 mmput [152]
0.01 0.00 38529/203321 iput [42]
0.04 0.00 146863/203321 dput [27]
[49] 0.1 0.05 0.00 203321 atomic_dec_and_lock [49]
-----------------------------------------------
0.00 0.00 1/20003 mm_init [275]
0.00 0.00 1/20003 do_fork [166]
0.00 0.00 6/20003 pte_alloc [240]
0.00 0.00 524/20003 proc_file_read [7]
0.00 0.00 726/20003 sys_poll [69]
0.00 0.00 2233/20003 _pollwait [139]
0.00 0.04 16512/20003 proc_info_read [6]
[50] 0.1 0.00 0.04 20003 _get_free_pages [50]
0.01 0.04 20003/20162 _alloc_pages [54]
0.00 0.00 20003/20162 alloc_pages [302]
-----------------------------------------------
0.00 0.05 512/512 vfs_readdir [33]
[51] 0.1 0.00 0.05 512 proc_pid_readdir [51]
0.00 0.03 5632/10882 filldir64 [39]
0.01 0.00 512/512 get_pid_list [118]
-----------------------------------------------
0.01 0.04 293/293 error_code [53]
[52] 0.1 0.01 0.04 293 do_page_fault [52]
0.00 0.04 293/293 handle_mm_fault [58]
0.00 0.00 293/5429 find_vma [243]
-----------------------------------------------
<spontaneous>
[53] 0.1 0.00 0.05 error_code [53]
0.01 0.04 293/293 do_page_fault [52]
-----------------------------------------------
0.00 0.00 1/20162 copy_strings [265]
0.00 0.00 2/20162 filemap_nopage [342]
0.00 0.00 2/20162 grow_buffers [363]
0.00 0.00 20/20162 do_wp_page [151]
0.00 0.00 134/20162 do_anonymous_page [76]
0.01 0.04 20003/20162 _get_free_pages [50]
[54] 0.1 0.01 0.04 20162 _alloc_pages [54]
0.04 0.00 20162/20162 rmqueue [63]
-----------------------------------------------
0.00 0.04 4736/4736 proc_pid_cmdline [48]
[55] 0.1 0.00 0.04 4736 access_process_vm [55]
0.00 0.04 4736/4736 access_mm [60]
0.00 0.00 4736/17928 mmput [152]
0.00 0.00 4736/4736 find_extend_vma [264]
-----------------------------------------------
0.02 0.02 4361/4361 write_chan [34]
[56] 0.1 0.02 0.02 4361 opost_block [56]
0.00 0.01 4361/12687 pty_write [59]
0.00 0.00 4361/5138 _generic_copy_from_user [173]
0.00 0.00 4361/8586 pty_write_room [203]
-----------------------------------------------
0.01 0.00 38529/101570 real_lookup [22]
0.02 0.00 63041/101570 cached_lookup [74]
[57] 0.1 0.04 0.00 101570 d_lookup [57]
-----------------------------------------------
0.00 0.04 293/293 do_page_fault [52]
[58] 0.1 0.00 0.04 293 handle_mm_fault [58]
0.00 0.03 242/242 do_no_page [73]
0.01 0.00 51/51 do_wp_page [151]
0.00 0.00 293/325 pte_alloc [240]
-----------------------------------------------
0.00 0.00 4/12687 write_chan [34]
0.00 0.01 4361/12687 opost_block [56]
0.00 0.02 8322/12687 tty_default_put_char [80]
[59] 0.1 0.00 0.04 12687 pty_write [59]
0.02 0.02 12687/12687 n_tty_receive_buf [66]
0.00 0.00 12691/33964 n_tty_receive_room [155]
0.00 0.00 4/5138 _generic_copy_from_user [173]
-----------------------------------------------
0.00 0.04 4736/4736 access_process_vm [55]
[60] 0.1 0.00 0.04 4736 access_mm [60]
0.03 0.01 4736/4736 access_one_page [64]
-----------------------------------------------
0.00 0.04 640/640 vfs_readdir [33]
[61] 0.1 0.00 0.04 640 proc_root_readdir [61]
0.00 0.03 256/256 proc_readdir [62]
-----------------------------------------------
0.00 0.03 256/256 proc_root_readdir [61]
[62] 0.1 0.00 0.03 256 proc_readdir [62]
0.00 0.03 5248/10882 filldir64 [39]
-----------------------------------------------
0.04 0.00 20162/20162 _alloc_pages [54]
[63] 0.1 0.04 0.00 20162 rmqueue [63]
-----------------------------------------------
0.03 0.01 4736/4736 access_mm [60]
[64] 0.1 0.03 0.01 4736 access_one_page [64]
0.01 0.00 4736/6752 kunmap_high [135]
0.00 0.00 4736/6752 kmap_high [181]
0.00 0.00 4736/87748 _free_pages [107]
-----------------------------------------------
0.03 0.01 38529/38529 real_lookup [22]
[65] 0.1 0.03 0.01 38529 d_alloc [65]
0.01 0.00 38529/111164 kmem_cache_alloc [78]
-----------------------------------------------
0.02 0.02 12687/12687 pty_write [59]
[66] 0.1 0.02 0.02 12687 n_tty_receive_buf [66]
0.01 0.00 1548/2506 _wake_up [112]
0.01 0.00 12686/12686 kill_fasync [143]
0.00 0.00 12687/33964 n_tty_receive_room [155]
0.00 0.00 6/6 n_tty_receive_char [466]
-----------------------------------------------
0.00 0.03 12/12 ksyms_read_proc [68]
[67] 0.1 0.00 0.03 12 get_ksyms_list [67]
0.00 0.03 3347/44307 sprintf [16]
-----------------------------------------------
0.00 0.03 12/12 proc_file_read [7]
[68] 0.1 0.00 0.03 12 ksyms_read_proc [68]
0.00 0.03 12/12 get_ksyms_list [67]
-----------------------------------------------
0.00 0.03 767/767 system_call [4]
[69] 0.1 0.00 0.03 767 sys_poll [69]
0.00 0.02 767/767 do_poll [93]
0.00 0.00 767/2312 poll_freewait [106]
0.00 0.00 726/20003 _get_free_pages [50]
0.00 0.00 726/2963 kmalloc [161]
0.00 0.00 726/20160 _free_pages_ok [77]
0.00 0.00 726/2963 kfree [192]
0.00 0.00 726/5138 _generic_copy_from_user [173]
0.00 0.00 726/87748 _free_pages [107]
0.00 0.00 726/80548 free_pages [167]
-----------------------------------------------
0.00 0.00 1/111161 sys_execve [201]
0.00 0.00 1/111161 _mmdrop [377]
0.00 0.00 1/111161 put_files_struct [365]
0.00 0.00 1/111161 do_exit [238]
0.00 0.00 1/111161 exit_sighand [277]
0.00 0.00 1/111161 collect_signal [386]
0.00 0.00 40/111161 exit_mmap [241]
0.00 0.00 130/111161 unmap_fixup [272]
0.00 0.00 132/111161 do_munmap [159]
0.00 0.00 142/111161 kfree_skbmem [234]
0.00 0.00 1792/111161 locks_delete_lock [261]
0.00 0.00 3584/111161 fcntl_setlk [96]
0.00 0.00 5376/111161 posix_lock_file [122]
0.00 0.00 5870/111161 _user_walk [26]
0.00 0.00 17031/111161 sys_open [14]
0.01 0.00 38529/111161 dput [27]
0.01 0.00 38529/111161 destroy_inode [117]
[70] 0.1 0.03 0.00 111161 kmem_cache_free [70]
0.00 0.00 2/12 free_block [439]
-----------------------------------------------
0.00 0.00 1/80076 flush_old_exec [242]
0.00 0.00 1/80076 lookup_hash [380]
0.00 0.00 2/80076 open_exec [326]
0.00 0.00 17030/80076 open_namei [19]
0.01 0.01 63042/80076 path_walk <cycle 1> [13]
[71] 0.1 0.02 0.01 80076 vfs_permission [71]
0.00 0.01 72794/72794 in_group_p [114]
-----------------------------------------------
0.00 0.03 4225/4225 write_chan [34]
[72] 0.1 0.00 0.03 4225 opost [72]
0.00 0.03 8322/8322 tty_default_put_char [80]
0.00 0.00 4225/8586 pty_write_room [203]
-----------------------------------------------
0.00 0.03 242/242 handle_mm_fault [58]
[73] 0.1 0.00 0.03 242 do_no_page [73]
0.03 0.00 136/136 do_anonymous_page [76]
0.00 0.00 106/106 filemap_nopage [342]
-----------------------------------------------
0.00 0.00 1/63041 lookup_hash [380]
0.01 0.02 63040/63041 path_walk <cycle 1> [13]
[74] 0.1 0.01 0.02 63041 cached_lookup [74]
0.02 0.00 63041/101570 d_lookup [57]
-----------------------------------------------
0.00 0.00 1/16903 flush_old_exec [242]
0.00 0.00 1/16903 load_elf_binary [218]
0.01 0.02 16901/16903 system_call [4]
[75] 0.1 0.01 0.02 16903 sys_close [75]
0.00 0.02 16903/16906 filp_close [100]
-----------------------------------------------
0.03 0.00 136/136 do_no_page [73]
[76] 0.1 0.03 0.00 136 do_anonymous_page [76]
0.00 0.00 134/20162 _alloc_pages [54]
0.00 0.00 134/6752 kunmap_high [135]
0.00 0.00 134/6752 kmap_high [181]
0.00 0.00 134/20162 alloc_pages [302]
-----------------------------------------------
0.00 0.00 1/20160 _mmdrop [377]
0.00 0.00 1/20160 sys_wait4 [237]
0.00 0.00 1/20160 do_wp_page [151]
0.00 0.00 6/20160 clear_page_tables [341]
0.00 0.00 155/20160 free_page_and_swap_cache [266]
0.00 0.00 524/20160 proc_file_read [7]
0.00 0.00 726/20160 sys_poll [69]
0.00 0.00 2234/20160 poll_freewait [106]
0.02 0.00 16512/20160 proc_info_read [6]
[77] 0.1 0.03 0.00 20160 _free_pages_ok [77]
-----------------------------------------------
0.00 0.00 1/111164 get_new_inode [347]
0.00 0.00 1/111164 copy_files [276]
0.00 0.00 1/111164 send_signal [392]
0.00 0.00 1/111164 mprotect_fixup [352]
0.00 0.00 1/111164 setup_arg_pages [361]
0.00 0.00 2/111164 do_fork [166]
0.00 0.00 2/111164 get_unused_buffer_head [384]
0.00 0.00 21/111164 skb_clone [190]
0.00 0.00 32/111164 copy_mm [209]
0.00 0.00 122/111164 alloc_skb [187]
0.00 0.00 132/111164 do_munmap [159]
0.00 0.00 137/111164 do_mmap_pgoff [150]
0.00 0.00 10752/111164 locks_alloc_lock [171]
0.01 0.00 22902/111164 getname [86]
0.01 0.00 38528/111164 get_empty_inode [46]
0.01 0.00 38529/111164 d_alloc [65]
[78] 0.1 0.03 0.00 111164 kmem_cache_alloc [78]
0.00 0.00 1/11 kmem_cache_alloc_batch [440]
-----------------------------------------------
0.00 0.00 1/38529 get_new_inode [347]
0.03 0.00 38528/38529 get_empty_inode [46]
[79] 0.1 0.03 0.00 38529 clean_inode [79]
-----------------------------------------------
0.00 0.03 8322/8322 opost [72]
[80] 0.1 0.00 0.03 8322 tty_default_put_char [80]
0.00 0.02 8322/12687 pty_write [59]
-----------------------------------------------
0.00 0.00 1/3791 do_exit [238]
0.00 0.00 1/3791 sys_wait4 [237]
0.00 0.00 3/3791 _wait_on_buffer [331]
0.00 0.00 23/3791 reschedule [315]
0.01 0.00 1805/3791 cpu_idle [1]
0.01 0.00 1958/3791 schedule_timeout [104]
[81] 0.1 0.02 0.00 3791 schedule [81]
0.00 0.00 3764/3765 _switch_to [184]
0.00 0.00 1/1 _mmdrop [377]
-----------------------------------------------
0.00 0.02 3969/3969 system_call [4]
[82] 0.1 0.00 0.02 3969 sys_fcntl64 [82]
0.00 0.02 3969/3969 do_fcntl [94]
0.00 0.00 3969/80631 fput [45]
0.00 0.00 3969/52388 fget [90]
-----------------------------------------------
0.00 0.02 488/488 do_softirq [35]
[83] 0.1 0.00 0.02 488 net_rx_action [83]
0.00 0.02 488/488 ip_rcv [91]
-----------------------------------------------
0.00 0.00 2/16904 open_exec [326]
0.01 0.01 16902/16904 filp_open [15]
[84] 0.1 0.01 0.01 16904 dentry_open [84]
0.01 0.00 16904/16904 get_empty_filp [127]
0.00 0.00 16904/16904 file_move [172]
0.00 0.00 133/133 ext2_open_file [427]
0.00 0.00 1/2 get_write_access [494]
0.00 0.00 1/1 chrdev_open [511]
-----------------------------------------------
0.00 0.00 6/323 sock_readv_writev [306]
0.00 0.02 317/323 sock_write [87]
[85] 0.1 0.00 0.02 323 sock_sendmsg [85]
0.00 0.02 323/323 inet_sendmsg [88]
-----------------------------------------------
0.00 0.00 1/22902 sys_execve [201]
0.00 0.00 5870/22902 _user_walk [26]
0.01 0.01 17031/22902 sys_open [14]
[86] 0.1 0.01 0.02 22902 getname [86]
0.01 0.00 22902/22902 strncpy_from_user [119]
0.01 0.00 22902/111164 kmem_cache_alloc [78]
-----------------------------------------------
0.00 0.02 317/317 sys_write [28]
[87] 0.1 0.00 0.02 317 sock_write [87]
0.00 0.02 317/323 sock_sendmsg [85]
-----------------------------------------------
0.00 0.02 323/323 sock_sendmsg [85]
[88] 0.1 0.00 0.02 323 inet_sendmsg [88]
0.01 0.01 323/323 tcp_sendmsg [89]
-----------------------------------------------
0.01 0.01 323/323 inet_sendmsg [88]
[89] 0.1 0.01 0.01 323 tcp_sendmsg [89]
0.00 0.01 318/322 tcp_write_xmit [123]
0.00 0.00 349/689 alloc_skb [187]
0.00 0.00 27/27 tcp_push_one [269]
0.00 0.00 15/144 tcp_mem_schedule [292]
0.00 0.00 1/1 _release_sock [333]
-----------------------------------------------
0.00 0.00 3/52388 old_mmap [142]
0.00 0.00 6/52388 sys_writev [304]
0.00 0.00 128/52388 sys_llseek [232]
0.00 0.00 263/52388 sys_fstat64 [219]
0.00 0.00 385/52388 sys_lseek [214]
0.00 0.00 642/52388 sys_getdents64 [30]
0.00 0.00 1046/52388 sys_ioctl [137]
0.00 0.00 3584/52388 fcntl_setlk [96]
0.00 0.00 3969/52388 sys_fcntl64 [82]
0.00 0.00 4275/52388 do_pollfd [113]
0.00 0.00 4554/52388 sys_write [28]
0.01 0.00 13757/52388 do_select [36]
0.01 0.00 19776/52388 sys_read [5]
[90] 0.1 0.02 0.00 52388 fget [90]
-----------------------------------------------
0.00 0.02 488/488 net_rx_action [83]
[91] 0.1 0.00 0.02 488 ip_rcv [91]
0.00 0.02 488/488 ip_local_deliver [102]
0.00 0.00 176/176 ip_route_input [198]
-----------------------------------------------
0.00 0.02 781/781 sys_read [5]
[92] 0.1 0.00 0.02 781 tty_read [92]
0.01 0.01 781/781 read_chan [99]
-----------------------------------------------
0.00 0.02 767/767 sys_poll [69]
[93] 0.1 0.00 0.02 767 do_poll [93]
0.00 0.01 1435/1435 do_pollfd [113]
0.00 0.01 709/1958 schedule_timeout [104]
-----------------------------------------------
0.00 0.02 3969/3969 sys_fcntl64 [82]
[94] 0.1 0.00 0.02 3969 do_fcntl [94]
0.00 0.02 3584/3584 fcntl_setlk [96]
-----------------------------------------------
<spontaneous>
[95] 0.1 0.01 0.01 stext_lock [95]
0.01 0.01 313/2050 do_softirq [35]
-----------------------------------------------
0.00 0.02 3584/3584 do_fcntl [94]
[96] 0.1 0.00 0.02 3584 fcntl_setlk [96]
0.00 0.01 3584/3584 posix_lock_file [122]
0.00 0.00 3584/80631 fput [45]
0.00 0.00 3584/52388 fget [90]
0.00 0.00 3584/10752 locks_alloc_lock [171]
0.00 0.00 3584/3584 flock_to_posix_lock [245]
0.00 0.00 3584/111161 kmem_cache_free [70]
-----------------------------------------------
0.00 0.02 1798/1798 generic_file_read [98]
[97] 0.1 0.00 0.02 1798 do_generic_file_read [97]
0.01 0.00 1798/1798 file_read_actor [103]
0.00 0.00 1798/1798 generic_file_readahead [247]
0.00 0.00 1798/87748 _free_pages [107]
0.00 0.00 1798/1809 update_atime [402]
-----------------------------------------------
0.00 0.00 5/1798 kernel_read [325]
0.00 0.02 1793/1798 sys_read [5]
[98] 0.1 0.00 0.02 1798 generic_file_read [98]
0.00 0.02 1798/1798 do_generic_file_read [97]
-----------------------------------------------
0.01 0.01 781/781 tty_read [92]
[99] 0.1 0.01 0.01 781 read_chan [99]
0.00 0.00 857/27661 _generic_copy_to_user [25]
0.00 0.00 781/781 check_unthrottle [162]
0.00 0.00 781/16341 add_wait_queue [129]
0.00 0.00 781/5256 n_tty_chars_in_buffer [210]
0.00 0.00 781/16343 remove_wait_queue [156]
0.00 0.00 4/1958 schedule_timeout [104]
0.00 0.00 4/8714 tty_hung_up_p [223]
-----------------------------------------------
--
Daniel
On Sun, 4 Nov 2001, Linus Torvalds wrote:
> In article <[email protected]>,
> Daniel Phillips <[email protected]> wrote:
> >On November 4, 2001 05:45 pm, Tim Jansen wrote:
> >> > The dot-proc file is basically a binary encoding of Lisp (or XML), e.g. it
> >> > is a list of elements, wherein an element can itself be a list (or a
> >>
> >> Why would anybody want a binary encoding?
> >
> >Because they have a computer?
>
> That's a stupid argument.
>
> The computer can parse anything.
>
> It's us _humans_ that are limited at parsing. We like text interfaces,
> because that's how we are brought up. We aren't good at binary, and
> we're not good at non-linear, "structured" interfaces.
>
> In contrast, a program can be taught to parse the ascii files quite
> well, and does not have the inherent limitations we humans have. Sure,
> it has _other_ limitations, but /proc being ASCII is sure as hell not
> one of them.
>
> In short: /proc is ASCII, and will so remain while I maintain a kernel.
> Anything else is stupid.
>
OHHH,
good sense at last!!
I was starting to worry
> Handling spaces and newlines is easy enough - see the patches from Al
> Viro, for example.
>
> Linus
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
=?iso-8859-1?Q?Jak writes:
> On Sun, Nov 04, 2001 at 04:12:23PM -0500, Albert D. Cahalan wrote:
>> You are looking for something called the registry. It's something
>> that was introduced with Windows 95. It's basically a filesystem
>> with typed files: char, int, string, string array, etc.
>
> Nope :)
>
> It does not have "char, int, string, string array, etc." it
> has "String, binary and DWORD".
I'm pretty sure that newer implementations have additional types.
BTW, we could call the persistent part of our registry "reiserfs4".
> Imagine every field in a file by itself, with well-defined type
> information and unit informaiton.
I suppose I could print a warning if the type or unit info
isn't what was expected. That's insignificantly useful.
Individual files are nice, until you realize: open, read, close
> Performance is one thing. Not being able to know whether
> numbers are i32, u32, u64, or measured in Kilobytes or
> carrots is another ting.
I don't see what the code is supposed to do if it was expecting
kilobytes and you serve it carrots. Certainly nothing useful can
be done when this happens.
At 09:11 PM 11/4/01 +0100, Jakob ?stergaard wrote:
>On Sun, Nov 04, 2001 at 09:13:35PM +0100, Tim Jansen wrote:
> > On Sunday 04 November 2001 20:55, Jakob ?stergaard wrote:
> > > > BTW nobody says to one-value-files can not have types (see my earlier
> > > > posts in this thread).
> > > I don't dislike one-value-files - please tell me how you get type
> > > information
> >
> > Using a ioctl that returns the type.
>
>But that's not pretty :)
>
>Can't we think of something else ?
I absolutely love how people want to re-invent the wheel. If you want
typed access (both read AND write) in a version-independent manner, then
you really need to take a look at Simple Network Management Protocol, or
SNMP. It has everything you want: named access, types, binary data or
ASCII data or whatever data, and the ability for vendor, distribution, and
version differences to be caught quickly and easily. As new stuff is added
or changed, all you need is a replacement MIB to be able to use the stuff.
Furthermore, SNMP is script friendly in that access to the data can be
automated, with all conversions being done in userspace.
Finally, SNMP works over networks.
There are many, many security issues surrounding SNMP, but at least it
exists, is well-understood, is already implemented in multiple systems, and
it WORKS.
Why invent yet another replacement for sysctl?
My pair-o-pennies(tm) to this discussion...
Satch
> Problem: Could it be made simpler to parse from scripting languages,
> without making it less elegant to parse in plain C ?
Yes. At one point, somebody suggested XML. Now, as much as I hate the fact
that people somehow equate high-tech with tags, I think whomever originally
suggested it might be on to something. :)
Fact is, just about EVERY language out there has some sort of utility to
parse XML. There's expat for C, Perl and Python have libs, etc. We could
even write a proc DTD that could specify the valid data types.
There are two problems:
1. Performance - it's slower to go through a library that outputs XML than
do a printf("%d", pid) or the like.
2. Space - based on a little experience using XML as a transport, the space
used by the tags adds up.
3. Work - writing a good package to do this, and rewriting bits of the
kernel to use it. I'll volunteer my time.
Just a thought,
Craig
On Sun, Nov 04, 2001 at 04:06:25PM -0700, Craig Thrall wrote:
> > Problem: Could it be made simpler to parse from scripting languages,
> > without making it less elegant to parse in plain C ?
>
> Yes. At one point, somebody suggested XML. Now, as much as I hate the fact
> that people somehow equate high-tech with tags, I think whomever originally
> suggested it might be on to something. :)
>
> Fact is, just about EVERY language out there has some sort of utility to
> parse XML. There's expat for C, Perl and Python have libs, etc. We could
> even write a proc DTD that could specify the valid data types.
I would say that it's "less elegant" to have to depend on yet another (big,
complex, still evolving) library just to read out system metrics.
>
> There are two problems:
>
> 1. Performance - it's slower to go through a library that outputs XML than
> do a printf("%d", pid) or the like.
Indeed.
>
> 2. Space - based on a little experience using XML as a transport, the space
> used by the tags adds up.
Yep.
>
> 3. Work - writing a good package to do this, and rewriting bits of the
> kernel to use it. I'll volunteer my time.
4. Stability - A good XML parsing library cannot be "simple" or "small". At
least not when written in C ;)
5. Lack of benefits - we already have structure because of the filesystem in
which the information would live. The actual "tags" could be so incredibly
simple that using XML would just be shooting birds with tactical nukes. E.g.
lots of fun, but a little expensive and not really necessary.
But maybe I'm just a pessimist and should stop bitching and start coding ;)
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, 4 Nov 2001, Daniel Phillips wrote:
> Doing 'top -d .1' eats 18% of a 1GHz cpu, which is abominable. A kernel
> profile courtesy of sgi's kernprof shows that scanning pages does not move
> the needle, whereas sprintf does. Notice that the biggest chunk of time
Huh? Scanning pages is statm_pgd_range(). I'd say that it takes
seriously more than vsnprintf() - look at your own results.
On November 5, 2001 12:42 am, Alexander Viro wrote:
> On Sun, 4 Nov 2001, Daniel Phillips wrote:
>
> > Doing 'top -d .1' eats 18% of a 1GHz cpu, which is abominable. A kernel
> > profile courtesy of sgi's kernprof shows that scanning pages does not move
> > the needle, whereas sprintf does. Notice that the biggest chunk of time
>
> Huh? Scanning pages is statm_pgd_range(). I'd say that it takes
> seriously more than vsnprintf() - look at your own results.
Yes, true, 2.6 seconds for the statm_pgd_range vs 1.2 for sprintf. Still,
sprintf is definitely burning cycles, pretty much the whole 1.2 seconds would
be recovered with a binary interface.
Now look at the total time we spend in the kernel: 10.4 seconds, 4 times the
page scanning overhead. This is really wasteful.
For top does it really matter? (yes, think slow computer) What happens when
proc stabilizes and applications start relying on it heavily as a kernel
interface? If we're still turning in this kind of stunningly poor
performance, it won't be nice.
It's not that it doesn't work, it's just that it isn't the best.
--
Daniel
On Sun, 4 Nov 2001 02:40:51 +0100
Daniel Phillips <[email protected]> wrote:
> On November 2, 2001 03:20 am, Rusty Russell wrote:
> > I agree with the "one file, one value" idea.
>
> So cat /proc/partitions goes from being a nice, easy to read and use human
> interface to something other than that. Lets not go overboard.
Firstly, do not perpetuate the myth of /proc being "human readable". (Hint:
what language do humans speak?) It supposed to be "admin readable" and
"machine readable".
Secondly, it is possible to implement a table formatter which kicks in
when someone does a read() on a directory. This is not a desirable format:
look at /proc/mounts when you have a mount point with a space in it for a
good example.
Thanks!
Rusty.
Jakob ?tergaard wrote:
>Here's my stab at the problems - please comment,
>
>We want to avoid these problems:
> 1) It is hard to parse (some) /proc files from userspace
> 2) As /proc files change, parsers must be changed in userspace
>
>Still, we want to keep on offering
> 3) Human readable /proc files with some amount of pretty-printing
> 4) A /proc fs that can be changed as the kernel needs those changes
>
>
>Taking care of (3) and (4):
>
>Maintaining the current /proc files is very simple, and it offers the system
>administrator a lot of functionality that isn't reasonable to take away now.
>
> * They should stay in a form close to the current one *
>
>
>Taking care of (1) and (2):
>
>For each file "f" in /proc, there will be a ".f" file which is a
>machine-readable version of "f", with the difference that it may contain extra
>information that one may not want to present to the user in "f".
>
>The dot-proc file is basically a binary encoding of Lisp (or XML), e.g. it is a
>list of elements, wherein an element can itself be a list (or a character string,
>or a host-native numeric type. Thus, (key,value) pairs and lists thereof are
>possible, as well as tree structures etc.
>
>All data types are stored in the architecture-native format, and a simple
>library should be sufficient to parse any dot-proc file.
>
>
>So, we need a small change in procfs that does not in any way break
>compatibility - and we need a few lines of C under LGPL to interface with it.
>
>Tell me what you think - It is possible that I could do this (or something
>close) in the near future, unless someone shows me the problem with the
>approach.
>
>Thank you,
>
see http://sourceforge.net/projects/xmlprocfs/
i think this is a good idea that make the kernel output xml format
informations.
best regards.
zmwillow
On November 5, 2001 01:12 am, Rusty Russell wrote:
> On Sun, 4 Nov 2001 02:40:51 +0100
> Daniel Phillips <[email protected]> wrote:
>
> > On November 2, 2001 03:20 am, Rusty Russell wrote:
> > > I agree with the "one file, one value" idea.
> >
> > So cat /proc/partitions goes from being a nice, easy to read and use human
> > interface to something other than that. Lets not go overboard.
>
> Firstly, do not perpetuate the myth of /proc being "human readable". (Hint:
> what language do humans speak?) It supposed to be "admin readable" and
> "machine readable".
You're letting me out as a human, fair enough ;-)
> Secondly, it is possible to implement a table formatter which kicks in
> when someone does a read() on a directory. This is not a desirable format:
> look at /proc/mounts when you have a mount point with a space in it for a
> good example.
Yes, sold, if implementing the formatter is part of the plan.
Caveat: by profiling I've found that file ops on proc functions are already
eating a significant amount of cpu, going to one-value-per-file is going to
make that worse. But maybe this doesn't bother you.
--
Daniel
On Mon, 5 Nov 2001, Stuart Young wrote:
> Any reason we can't move all the process info into something like
> /proc/pid/* instead of in the root /proc tree?
Thanks, but no thanks. If we are starting to move stuff around, we
would be much better off leaving in /proc only what it was supposed
to contain - per-process information.
At 02:52 PM 4/11/01 -0500, Alexander Viro wrote:
>Would the esteemed sir care to check where these cycles are spent?
>How about "traversing page tables of every damn process out there"?
>Doesn't sound like a string operation to me...
Just a quickie....
Any reason we can't move all the process info into something like
/proc/pid/* instead of in the root /proc tree?
Should be pretty easy to do, could still have the pid's in the root /proc
tree, and if they get read, do what /proc/pci does, and log a warning about
"xxx is using old /proc interfaces". Makes it just that little bit easier
to parse processes without fiddling around if you know all the dir's are
always processes. It's also a bit of a visual cleanup when you have lots of
processes and do a 'ls /proc'.
There is probably a few other things in /proc/* that could be moved out and
put in more sensible places (eg: interrupts, irq, devices, mtrr, slabinfo,
mounts, modules, stat, etc), that really define what they belong to (a
/proc/kernel/* mebbe). Having /proc basically full of directories would
clean things up a bit. Some things don't need to change though (eg: uptime,
version).
AMC Enterprises P/L - Stuart Young
First Floor - Network and Systems Admin
3 Chesterville Rd - [email protected]
Cheltenham Vic 3192 - Ph: (03) 9584-2700
http://www.amc.com.au/ - Fax: (03) 9584-2755
At 11:05 PM 4/11/01 -0500, Alexander Viro wrote:
>On Mon, 5 Nov 2001, Stuart Young wrote:
>
> > Any reason we can't move all the process info into something like
> > /proc/pid/* instead of in the root /proc tree?
>
>Thanks, but no thanks. If we are starting to move stuff around, we
>would be much better off leaving in /proc only what it was supposed
>to contain - per-process information.
That's fair.. so (this is all speculation of course) move everything else
but process info out of there? I could handle that, makes sense, long as we
had some backward "transitional" interface, that warned about using old
interfaces. Only question is, where would we put this information in the
file system tree?
AMC Enterprises P/L - Stuart Young
First Floor - Network and Systems Admin
3 Chesterville Rd - [email protected]
Cheltenham Vic 3192 - Ph: (03) 9584-2700
http://www.amc.com.au/ - Fax: (03) 9584-2755
Alexander Viro wrote:
>
> On Sun, 4 Nov 2001, Tim Jansen wrote:
>
> > So if only some programs use the 'dot-files' and the other still use the
> > crappy text interface we still have the old problem for scripts, only with a
> > much larger effort.
>
> Folks, could we please deep-six the "ASCII is tough" mentality? Idea of
> native-endian data is so broken that it's not even funny. Exercise:
> try to export such thing over the network. Another one: try to use
> that in a shell script. One more: try to do it portably in Perl script.
>
> It had been tried. Many times. It had backfired 100 times out 100.
> We have the same idiocy to thank for fun trying to move a disk with UFS
> volume from Solaris sparc to Solaris x86. We have the same idiocy to
> thank for a lot of ugliness in X.
>
> At the very least, use canonical bytesex and field sizes. Anything less
> is just begging for trouble. And in case of procfs or its equivalents,
> _use_ the_ _damn_ _ASCII_ _representations_. scanf(3) is there for
> purpose.
And the purpose of scanf in system level applications is to introduce
nice
opportunities for buffer overruns and string formatting bugs.
On November 5, 2001 12:06 pm, Martin Dalecki wrote:
> Alexander Viro wrote:
> > At the very least, use canonical bytesex and field sizes. Anything less
> > is just begging for trouble. And in case of procfs or its equivalents,
> > _use_ the_ _damn_ _ASCII_ _representations_. scanf(3) is there for
> > purpose.
>
> And the purpose of scanf in system level applications is to introduce
> nice opportunities for buffer overruns and string formatting bugs.
I've done quite a bit more kernel profiling and I've found that overhead for
converting numbers to ascii for transport to proc is significant, and there
are other overheads as well, such as the sprintf and proc file open. These
must be matched by corresponding overhead on the user space side, which I
have not profiled. I'll take some time and present these numbers properly at
some point.
Not that I think we are going to change this way of doing things any time
soon - Linus has spoken - but at least we should know what the overheads are.
Programmers should not labor under the misaprehension that this is an
efficient interface.
--
Daniel
"Albert D. Cahalan" wrote:
Every BASTARD out there telling the world, that parsing ASCII formatted
files
is easy should be punished to providing a BNF definition of it's syntax.
Otherwise I won't trust him. Having a struct {} with a version field,
indicating
possible semantical changes wil always be easier faster more immune
to errors to use in user level programs.
Hi,
> We want to avoid these problems:
> 1) It is hard to parse (some) /proc files from userspace
> 2) As /proc files change, parsers must be changed in userspace
>
> Still, we want to keep on offering
> 3) Human readable /proc files with some amount of pretty-printing
> 4) A /proc fs that can be changed as the kernel needs those changes
I've read the whole thread, but i still don't get it. Your solution doesn't
improve (1) for parsers in scripting languages, where it is frequently far
easier to parse ASCII stuff than messing with binary things, when not almost
impossible. So we don't make any progress here. And for languages like C,
where this will have most use, there actually is solution and it is working.
So, please, can you enlighten me, what's so wrong on sysctl? It actually
provides exactly what do you want, and you even don't need to bother yourself
with open() etc ;). So it would be maybe better improving sysctl interface,
especially mirroring of all /proc stuff there, instead of arguing about scanf()
:-).
So can you please explain me merits of your approach against sysctl?
--
Petr "Pasky" Baudis
UN*X programmer, UN*X administrator, hobbies = IPv6, IRC
Real Users hate Real Programmers.
Public PGP key, geekcode and stuff: http://pasky.ji.cz/~pasky/
On Mon, 5 Nov 2001, Martin Dalecki wrote:
> "Albert D. Cahalan" wrote:
>
> Every BASTARD out there telling the world, that parsing ASCII formatted
> files
What was your username, again?
Stuart Young wrote:
>
> At 11:05 PM 4/11/01 -0500, Alexander Viro wrote:
>
> >On Mon, 5 Nov 2001, Stuart Young wrote:
> >
> > > Any reason we can't move all the process info into something like
> > > /proc/pid/* instead of in the root /proc tree?
> >
> >Thanks, but no thanks. If we are starting to move stuff around, we
> >would be much better off leaving in /proc only what it was supposed
> >to contain - per-process information.
>
We could add a file into /proc like /proc/processes that contains once
all process informations that some programs like top or ps can read only
Once.
It could save a lot of time in kernel mode scanning the process list for
each process.
later, a new version of ps or top could simply stat /proc/processes and
if it exists uses it to give informations to the user.
What do you think of this idea ?
SpaceWalker
[email protected]
ICQ 36157579
At 11:12 AM +1100 11/5/01, Rusty Russell wrote:
>Firstly, do not perpetuate the myth of /proc being "human readable". (Hint:
>what language do humans speak?) It supposed to be "admin readable" and
>"machine readable".
That's the key observation, seems to me. In our development, we've
adopted a standard of tagged values, where a single-value file is
tagged by its name, and multiple-value files have a tag:value per
line (where value might be an n-tuple).
The result is easy to parse for userland code that needs the values
and relatively easy (because ASCII and consistent) for admins to
read. A pretty-printer provides an interface for mere humans.
I suppose one could add typing information as well, but it seems to
me that a reader of /proc/stuff is either completely ignorant of the
content (eg cat), and typing is irrelevant, or it knows what's there
(eg ps) and typing is redundant, as long as there are unambiguous
tags.
I think of the tagged list of n-tuples as a kind of ASCII
representation of a simple struct. One could of course create a
general ASCII representation of a C struct, and no doubt it's been
done innumerable times, but I don't think that helps in this
application.
Of course, one tagged value can be "version"....
--
/Jonathan Lundell.
At 12:23 PM 11/5/01 +0100, Martin Dalecki wrote:
>Every BASTARD out there telling the world, that parsing ASCII formatted
>files
>is easy should be punished to providing a BNF definition of it's syntax.
>Otherwise I won't trust him. Having a struct {} with a version field,
>indicating
>possible semantical changes wil always be easier faster more immune
>to errors to use in user level programs.
I would love for the people who write the code that generates the /proc
info to be required to document the layout of the information. The best
place for that documentation is the source, and in English or other
accepted human language, in a comment block. Not in "header lines" or
other such nonsense. I don't need no stinkin' BNF, just a reasonable
description of what each field is would suffice. I would go so far as to
say there needs to be a standard established in how /proc data is formatted
so that we can create templates for the standard tools.
(I have to ask, have you ever used flex? I used to hand-code scanners, but
I find that flex is so much easier and generates smaller faster code than I
can do by hand. Changes are easy, too)
As for version fields: I HATE THEM. So much of my older code has bloat
because of "version fields" that require that I have multiple blocks of
code for the same damn thing. POSIX code that has to determine which
version of POSIX is implemented, and tailor the code at run-time to the
whims of the OS gods. BLOAT BLOAT BLOAT. Besides, you already have a
"version field", or is the release level of Linux too coarse for you?
As for easier: EASIER FOR WHOM? The sysadmin who is trying to figure out
why his system is behaving in a strange manner? You expect sysadmins to
grow C compilers and header files in order to read /proc? You can bet that
my next point will require sysadmins to look at the hidden proc files
sooner or later. To wit:
The absolute worst part of this proposal is that it provides yet another
for separate mechanism to do the same thing, and there is no clean way to
use the ASCII /proc mechanism to generate the binary. That inflates the
opportunities for error by an unmanageable amount -- you will end up
breaking BOTH methods of extracting information. Which is more
important: getting the right information at the cost of a flex/bison
scanner and some CPU time, or getting the WRONG information in the blink of
the CPU's eye? What happens when the ASCII version is broken and the
binary version is right? Who is going to take up the task of verifying
that the ASCII and the binary match?
That version field thing: you have to be willing to guarantee complete
backward compatibility of your structures, so that you can only extend the
structures, not manipulate already-defined fields. In addition, you would
need to define, for every single field, a value that indicates that no
value is present. This means that fields that are deprecated will still
have a value, but the value that would be returned would be "no
value"...and the applications that use your structures would have to know
and understand and test for this not-a-value value and react appropriately.
One think I like about SNMP is that I can parse a MIB and probe for the
information I need without worrying about versions. It's there, I know its
type, and I know what to expect in the way of values. I'm also told when
there is no value to report, either because the OS chooses not to return
one, or because the state of the system says that returning a value is
meaningless. There is already a BNF definition of a MIB, too, which
satisfies your other requirement. New version? New
MIB. Cross-checking? Yes, I can be sure that the version of the MIB I'm
using matches the version the system is using to generate the data.
Oh, the bloat thing: why do you want to bloat the kernel even more than it
is? /proc is not cheap, and there have been times when I have been tempted
to generate kernels without it. Doubling up on the /proc filesystem may
drive me to do it yet, and explore the wonders of sysctl.
I applaud the proponents of the idea for identifying a problem and
proposing an interesting fix. It's the wrong fix, but interesting anyway.
Stephen Satchell
Alexander Viro wrote:
>
> On Mon, 5 Nov 2001, Martin Dalecki wrote:
>
> > "Albert D. Cahalan" wrote:
> >
> > Every BASTARD out there telling the world, that parsing ASCII formatted
> > files
>
> What was your username, again?
root, with uid != 0 and on a masquaraded host, who cares?
Stephen Satchell wrote:
>
> At 12:23 PM 11/5/01 +0100, Martin Dalecki wrote:
> >Every BASTARD out there telling the world, that parsing ASCII formatted
> >files
> >is easy should be punished to providing a BNF definition of it's syntax.
> >Otherwise I won't trust him. Having a struct {} with a version field,
> >indicating
> >possible semantical changes wil always be easier faster more immune
> >to errors to use in user level programs.
>
> I would love for the people who write the code that generates the /proc
> info to be required to document the layout of the information. The best
> place for that documentation is the source, and in English or other
> accepted human language, in a comment block. Not in "header lines" or
> other such nonsense. I don't need no stinkin' BNF, just a reasonable
I don't agree. BNF is basically the only proper and efficient way for a
nice formal descrition of a LR parsable language. No accident most
programming
languages out there are defined in some sort of BNF.
> description of what each field is would suffice. I would go so far as to
> say there needs to be a standard established in how /proc data is formatted
> so that we can create templates for the standard tools.
>
> (I have to ask, have you ever used flex? I used to hand-code scanners, but
> I find that flex is so much easier and generates smaller faster code than I
> can do by hand. Changes are easy, too)
Short answer: yes I know them, yacc bison pure flex and lex whatever,
and
I used to use them for job projects not just toys. Trust me they are
the only proper practical way to define the syntax of something parsable
and beeing complete about it. Unless you wan't to reach the stability of
the usual perl-web hackkery.
Martin Dalecki wrote:
> Stephen Satchell wrote:
>
>>At 12:23 PM 11/5/01 +0100, Martin Dalecki wrote:
>>
>>>Every BASTARD out there telling the world, that parsing ASCII formatted
>>>files
>>>is easy should be punished to providing a BNF definition of it's syntax.
>>>Otherwise I won't trust him. Having a struct {} with a version field,
>>>indicating
>>>possible semantical changes wil always be easier faster more immune
>>>to errors to use in user level programs.
>>>
>>I would love for the people who write the code that generates the /proc
>>info to be required to document the layout of the information. The best
>>place for that documentation is the source, and in English or other
>>accepted human language, in a comment block. Not in "header lines" or
>>other such nonsense. I don't need no stinkin' BNF, just a reasonable
I would rather have a header block, as well as docs in the source.
If the header cannot easily explain it, then the header can have a URL
or other link to the full explanation. I don't expect to be able to parse
every /proc interface with a single tool, but I would like to be able to
easily parse individual ones with perl, sscanf, etc...
Ben
--
Ben Greear <[email protected]> <Ben_Greear AT excite.com>
President of Candela Technologies Inc http://www.candelatech.com
ScryMUD: http://scry.wanfear.com http://scry.wanfear.com/~greear
On Mon, 5 Nov 2001, Ben Greear wrote:
> I would rather have a header block, as well as docs in the
> source. If the header cannot easily explain it, then the header
> can have a URL or other link to the full explanation.
I think you've hit the core of the problem. There is no magical
bullet which will stop badly written userland programs from
breaking, but the kernel developers should have the courtesy
of providing documentation for the /proc files so the writers
of userland programs can have an idea what to expect.
The inline docbook stuff in the kernel should make it easy for
kernel developers to keep code and documentation in sync, while
also making it easy to generate documentation in a format which
is nice to read ;)
regards,
Rik
--
DMCA, SSSCA, W3C? Who cares? http://thefreeworld.net/ (volunteers needed)
http://www.surriel.com/ http://distro.conectiva.com/
At 8:38 AM -0800 11/5/01, Stephen Satchell wrote:
>As for version fields: I HATE THEM. So much of my older code has
>bloat because of "version fields" that require that I have multiple
>blocks of code for the same damn thing. POSIX code that has to
>determine which version of POSIX is implemented, and tailor the code
>at run-time to the whims of the OS gods. BLOAT BLOAT BLOAT.
>Besides, you already have a "version field", or is the release level
>of Linux too coarse for you?
Either too coarse or too fine, often enough, when we're talking about
a semi-independent module. Consider, though, a more legitimate
non-bloating use of a version field. Rather than try to support all
versions, use it to determine whether the two ends of the
communication channel are compatible, and fail gracefully because of
the incompatible version. Tell the user to update the app, or
whatever.
--
/Jonathan Lundell.
[email protected] (Jakob ?stergaard) wrote on 04.11.01 in <[email protected]>:
> Here's my stab at the problems - please comment,
>
> We want to avoid these problems:
> 1) It is hard to parse (some) /proc files from userspace
> 2) As /proc files change, parsers must be changed in userspace
>
> Still, we want to keep on offering
> 3) Human readable /proc files with some amount of pretty-printing
> 4) A /proc fs that can be changed as the kernel needs those changes
And here's my proposal:
Backwards compatibility can be solved by keeping procfs as-is and creating
a new kernfs. (Ok, so this could also be done as a sub-tree of proc, or
any number of other ways ...)
The rest can be solved by defining a few generic file formats, and
insisting (via the interfaces exposed to kernel code) that only those file
formats will be used.
User space can further be helped by putting a format tag into the file
name, if that is necessary - a single letter should be enough here.
As general design principles:
1. Most files should be plain text.
2. Text values should use the most obvious formatting. (Such as using
10.1.2.3 for IP addresses, or 123.4 or 0.000234 for times in seconds.) If
units are needed and SI has a base unit for that area, use it.
3. Any files that can be written to for control reasons, should have a
single value and should read and write the same value. Unless it's a
commando-type of interface, but those should be kept rare (and should
probably read back some sort of status message). On read, there should be
no white space or line ends around the value.
4. Only use binary if the subject matter doesn't make sense as text. I
don't know that we actually have need for this - we certainly don't need
another /proc/kcore, and firmware download drivers don't belong here.
5. I think I can see a use for two different table formats.
One has every line be a tag, a colon, optional white space around the
colon and the tag, and a value; tags are unique, value formatting as in 2.
UP /proc/cpuinfo, for example. Values can have embedded white space if
that is necessary.
The other has a header line of white-space-separated tags, followed by
lines of white-space-separated values, one per tag. No value should
contain white space. /proc/net/rt_cache, for example.
6. There's a provision of having a list of similar directories indexed by
either a number or a name, for per-blockdevice, per-channel, per-network-
interface and so on.
Now, obviously, there'll be something I've missed ... but I think these
are fairly sane design principles, and if we insist on everyone keeping to
the defined formats and consider everything elkse a bug to be fixed, the
result is easy to parse, easy to change without breaking sane parsers, and
still human readable.
And we have made do with exactly three file formats, and could easily
write a very small generic parser for these formats.
MfG Kai
On Monday 05 November 2001 17:49, Jonathan Lundell wrote:
> I think of the tagged list of n-tuples as a kind of ASCII
> representation of a simple struct. One could of course create a
> general ASCII representation of a C struct, and no doubt it's been
> done innumerable times, but I don't think that helps in this
> application.
But how can you represent references with those lists? Try to model the
content of /proc/bus/usb/devices with them.
bye...
On Monday 05 November 2001 14:41, Petr Baudis wrote:
> So, please, can you enlighten me, what's so wrong on sysctl?
It doesn't work for complex data, especially lists. How do you want to
configure devices, for example?
bye...
On Monday 05 November 2001 19:40, Rik van Riel wrote:
> I think you've hit the core of the problem. There is no magical
> bullet which will stop badly written userland programs from
> breaking, but the kernel developers should have the courtesy
> of providing documentation for the /proc files so the writers
> of userland programs can have an idea what to expect.
I think the core insight is that if the kernel continues to have dozens of
"human-readable" file formats in /proc, each should to be documented using a
BNF description that can guarantee that the format is still valid in the
future, even if there is the need to add additional fields.
The result of this is, of course, that it may be very hard to write
shell scripts that won't break sooner or later and that accessing the data in
C is much more work than a simple scanf.
bye...
At 11:58 AM 11/5/01 -0800, Jonathan Lundell wrote:
>Either too coarse or too fine, often enough, when we're talking about a
>semi-independent module. Consider, though, a more legitimate non-bloating
>use of a version field. Rather than try to support all versions, use it to
>determine whether the two ends of the communication channel are
>compatible, and fail gracefully because of the incompatible version. Tell
>the user to update the app, or whatever.
I have software out in the field that has been around for more than ten
years. Some of it has been maintenance-free (other than the
every-other-fortnight bug report that requires a fix) because the
underlying operating system didn't change. Some of it has been a
nightmare, requiring changes for each OS release and in some cases with
each sub-release in order to keep the feature bloat from knocking out the
functionality of the program.
Unlike many of you, my client base doesn't upgrade on a whim. They stick
with what works. That means all my software has to be able to run up and
down the version tree, and I have a real problem maintaining parallel
versions of code. In Linux, I have people on 2.0.34 still. I have people
running some of my software on old versions of Ultrix on hardware that
hasn't seen sales for over a decade. I just found out that software I
wrote 20 years ago is STILL in use, and customers were inquiring if I was
available to make changes!
And then there is the problem of who pays for my time to make the app
update. I don't charge people for updates as a rule -- that rule may have
to change for my Linux apps if this ill-thought-out idea goes into the
kernel. I expend enough effort trying to keep up with the crap coming out
of Redmond and Cupertino.
Apologies for the vent, but I just swatted another bug caused by an
undocumented change in Windows 2000 that nailed one of my apps but good. I
shudder to think what XP is going to look like when my clients start
thinking of "upgrading" their hardware and have XP foisted on them...
Satch
Tim Jansen wrote:
> On Monday 05 November 2001 19:40, Rik van Riel wrote:
>
>>I think you've hit the core of the problem. There is no magical
>>bullet which will stop badly written userland programs from
>>breaking, but the kernel developers should have the courtesy
>>of providing documentation for the /proc files so the writers
>>of userland programs can have an idea what to expect.
>>
>
> I think the core insight is that if the kernel continues to have dozens of
> "human-readable" file formats in /proc, each should to be documented using a
> BNF description that can guarantee that the format is still valid in the
> future, even if there is the need to add additional fields.
> The result of this is, of course, that it may be very hard to write
> shell scripts that won't break sooner or later and that accessing the data in
> C is much more work than a simple scanf.
So if BNF makes it harder for shell scripts and sscanf, and harder for
the kernel developers...what good does it do??? I definately don't advocate
anything more than some simple documentation about whatever format the proc
module writer uses. All of these interfaces (proc, ioctl, ...) end up being
hacks at some point, but a _documented_ hack can be called a feature :)
>
> bye...
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
>
--
Ben Greear <[email protected]> <Ben_Greear AT excite.com>
President of Candela Technologies Inc http://www.candelatech.com
ScryMUD: http://scry.wanfear.com http://scry.wanfear.com/~greear
Tim Jansen wrote:
> On Monday 05 November 2001 14:41, Petr Baudis wrote:
>
>>So, please, can you enlighten me, what's so wrong on sysctl?
>>
>
> It doesn't work for complex data, especially lists. How do you want to
> configure devices, for example?
How about this:
struct ioctl_payload {
int how_many;
int* numbers;
};
User space sets things up normally, with a valid pointer in 'numbers'.
Kernel space copy_from_user the structure, then copy_from_user the number's
memory...
Can that work? If so, numbers could of course be any data structure we want...
>
> bye...
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
>
--
Ben Greear <[email protected]> <Ben_Greear AT excite.com>
President of Candela Technologies Inc http://www.candelatech.com
ScryMUD: http://scry.wanfear.com http://scry.wanfear.com/~greear
On Mon, Nov 05, 2001 at 09:46:19PM +0100, Tim Jansen wrote:
> On Monday 05 November 2001 17:49, Jonathan Lundell wrote:
> > I think of the tagged list of n-tuples as a kind of ASCII
> > representation of a simple struct. One could of course create a
> > general ASCII representation of a C struct, and no doubt it's been
> > done innumerable times, but I don't think that helps in this
> > application.
>
> But how can you represent references with those lists? Try to model the
> content of /proc/bus/usb/devices with them.
The contents of /proc/bus/usb is the usbdevfs file system. It does not
fit into the current /proc model, or discussion.
It's only mounted at that location, for lack of a better place :)
And no, "usbdevfs" has _nothing_ to do with "devfs", it was a bad name
choice, in hindsight.
thanks,
greg k-h
> So far sysctl is only used to configure kernel parameters, so there exists
> one parameter in the system (per kernel).
Not true.. see net.ipv* stuff - for each device, same parameters are to be set.
I see no problem in this.
> An example for devices would be mass storage devices. You may want to switch
> DMA on and off per device. Using one-value-files you would have directories
> called /somepath/0/dma, /somepath/1/dma and so on, and could turn on DMA on
> device 1 by executing "echo 1 > /somepath/1/dma".
Set 1 for dev.ide.host0.bus0.target0.lun0.dma (we should stay consistent at
least with devfs, or we can give it up completely ;)
> Beside that there is the good old problem "who manages the sysctl namespace"
> problem that is even more important if you want to use sysctl for device
> drivers that may not even be in the kernel.
Well, why not maintainers of appropriate kernel sections, or even special
maintainer, like the one for device numbers. For each section of sysctl
namespace, subset of required ctls should be defined, obviously not restricting
optional bloat ;).
--
Petr "Pasky" Baudis
UN*X programmer, UN*X administrator, hobbies = IPv6, IRC
Real Users hate Real Programmers.
Public PGP key, geekcode and stuff: http://pasky.ji.cz/~pasky/
On Tuesday 06 November 2001 00:04, Greg KH wrote:
> The contents of /proc/bus/usb is the usbdevfs file system. It does not
> fit into the current /proc model, or discussion.
It's just a example of a complex data structure that cannot easily be
represented using the tagged-list form (I took it as an example because the
first version of the devreg patch used tagged lists, too, and the complexity
of representing this USB structure convinced me that tagged-lists are too
limited).
Whatever format is chosen for proc, it should be used for all data.
bye...
Daniel Phillips writes:
> I've done quite a bit more kernel profiling and I've found that
> overhead for converting numbers to ascii for transport to proc is
> significant, and there are other overheads as well, such as the
> sprintf and proc file open. These must be matched by corresponding
> overhead on the user space side, which I have not profiled. I'll
> take some time and present these numbers properly at some point.
You said "top -d .1" was 18%, with 11% user, and konsole at 9%.
So that gives:
9% konsole
7% kernel
2% top
0% X server ????
If konsole is well-written, that 9% should drop greatly as konsole
falls behind on a busy system. For example, when scrolling rapidly
it might skip whole screenfuls of data. Hopefully those characters
are rendered in a reasonably efficient way.
On Monday 05 November 2001 22:58, Ben Greear wrote:
> So if BNF makes it harder for shell scripts and sscanf, and harder for
> the kernel developers...what good does it do???
You know how to parse the file.
Take a look at /proc/partitions. Is its exact syntax obvious without
examining the source in the kernel? Can it happen that there is a space or
another unusual character in the device path and what happens then? Could it
be that someone decides that an additional column is neccessary and how can
my parser stay compatible then? Are there any suprises or special conditions
that I don't know about? Maybe one of the fields is hexadecimal but I think
it is decimal, I can't see it from looking at the file's content.
bye...
On Mon Nov 05, 2001 at 11:51:52PM +0100, Tim Jansen wrote:
> On Monday 05 November 2001 22:58, Ben Greear wrote:
> > So if BNF makes it harder for shell scripts and sscanf, and harder for
> > the kernel developers...what good does it do???
>
> You know how to parse the file.
> Take a look at /proc/partitions. Is its exact syntax obvious without
> examining the source in the kernel? Can it happen that there is a space or
> another unusual character in the device path and what happens then? Could it
Come now, it really isn't that difficult:
char name[80];
unsigned long long size;
unsigned int major, minor;
if (sscanf(line, "%4u %4u %llu %s", &major, &minor, &size, name) == 4)
{
add_partition(name, size, major, minor);
}
-Erik
--
Erik B. Andersen http://codepoet-consulting.com/
--This message was written using 73% post-consumer electrons--
You wrote:
> Alexander Viro wrote:
>> On Mon, 5 Nov 2001, Martin Dalecki wrote:
>>> "Albert D. Cahalan" wrote:
>>>
>>> Every BASTARD out there telling the world, that parsing ASCII formatted
>>> files
>>
>> What was your username, again?
>
> root, with uid != 0 and on a masquaraded host, who cares?
I think the point is that it looks like to attributed your own
words to me. Your post didn't quote anything from me, but it
started off as follows:
--------------------------------------
"Albert D. Cahalan" wrote:
Every BASTARD out there telling the world, that parsing ASCII formatted
--------------------------------------
Well, I didn't write that or anything else in your post.
On Monday 05 November 2001 23:59, Erik Andersen wrote:
> Come now, it really isn't that difficult:
> if (sscanf(line, "%4u %4u %llu %s", &major, &minor, &size, name) == 4)
> {
> add_partition(name, size, major, minor);
> }
But how can the user know this without looking into the kernel? Compare it to
/proc/mounts. Proc mounts escapes spaces and other special characters in
strings with an octal encoding (so spaces are replaced by '\040').
bye...
On Tue, 6 Nov 2001, Tim Jansen wrote:
> But how can the user know this without looking into the kernel? Compare it to
> /proc/mounts. Proc mounts escapes spaces and other special characters in
> strings with an octal encoding (so spaces are replaced by '\040').
Ah, yes - the horrible /proc/mounts. Check that code in 2.4.13-ac8, will
you?
Yes, current procfs sucks. We got a decent infrastructure that allows
to write that code easily. Again, see -ac8 and watch fs/namespace.c
code dealing with /proc/mounts.
No need to play silly buggers with "one value per file" (and invite the
Elder Ones with applications trying to use getdents()). Sigh...
On November 5, 2001 11:46 pm, Albert D. Cahalan wrote:
> Daniel Phillips writes:
>
> > I've done quite a bit more kernel profiling and I've found that
> > overhead for converting numbers to ascii for transport to proc is
> > significant, and there are other overheads as well, such as the
> > sprintf and proc file open. These must be matched by corresponding
> > overhead on the user space side, which I have not profiled. I'll
> > take some time and present these numbers properly at some point.
>
> You said "top -d .1" was 18%, with 11% user, and konsole at 9%.
> So that gives:
>
> 9% konsole
> 7% kernel
> 2% top
> 0% X server ????
No, the konsole 9% is outside of top's 18%.
> If konsole is well-written, that 9% should drop greatly as konsole
> falls behind on a busy system. For example, when scrolling rapidly
> it might skip whole screenfuls of data. Hopefully those characters
> are rendered in a reasonably efficient way.
I don't think I'll try to optimize konsole/QT/X today, thanks ;-)
Lets just not lose sight of the overhead connected with ASCII proc IO, it's a
lot more than some seem to think.
--
Daniel
At 01:54 AM 11/6/01 +0100, Daniel Phillips wrote:
>Lets just not lose sight of the overhead connected with ASCII proc IO, it's a
>lot more than some seem to think.
Any idea what the overhead connected with a binary proc IO would be? From
looking at some of the code, it would appear that you have a lot of
overhead no matter what you do.
Satch
In message <[email protected]> you write:
> Yes, sold, if implementing the formatter is part of the plan.
>
> Caveat: by profiling I've found that file ops on proc functions are already
> eating a significant amount of cpu, going to one-value-per-file is going to
> make that worse. But maybe this doesn't bother you.
What concerns me most is the pain involved in writing a /proc or
sysctl interface in the kernel today. Take kernel/module.c's
get_ksyms_list as a typical example: 45 lines of code to perform a
very trivial task. And this code is sitting in your kernel whether
proc is enabled or not. Now, I'm a huge Al Viro fan, but his proposed
improvements are in the wrong direction, IMHO.
My first priority is to have the most fool-proof possible inner kernel
interface. Second is trying to preserve some of the /proc features
which actually work well when correctness isn't a huge issue (such as
"give me everything in one table"). Efficiency of getting these
things out of the kernel is a distant last (by see my previous comment
on adapting sysctl(2)).
I'd like to see /proc (/proc/sys) FINALLY live up to its promise
(rich, logical, complete) in 2.5. We can do this by making it the
simplest option for coders and users.
Rusty.
--
Premature optmztion is rt of all evl. --DK
On Mon, Nov 05, 2001 at 01:43:11PM -0800, Stephen Satchell wrote:
> At 11:58 AM 11/5/01 -0800, Jonathan Lundell wrote:
> >use of a version field. Rather than try to support all versions, use it to
> >determine whether the two ends of the communication channel are
> >compatible, and fail gracefully because of the incompatible version. Tell
> >the user to update the app, or whatever.
[snip]
> And then there is the problem of who pays for my time to make the app
> update. I don't charge people for updates as a rule -- that rule may have
> to change for my Linux apps if this ill-thought-out idea goes into the
> kernel. I expend enough effort trying to keep up with the crap coming out
I hope you just don't mean the version number idea. Because I don't see
reason for not, instead of adding a version number to every /proc file and
breaknig everything, adding all them to a /proc/proc-version file which
would still let clients make some sanity checks.
--
____/| Ragnar H?jland Freedom - Linux - OpenGL | Brainbench MVP
\ o.O| PGP94C4B2F0D27DE025BE2302C104B78C56 B72F0822 | for Unix Programming
=(_)= "Thou shalt not follow the NULL pointer for | (http://www.brainbench.com)
U chaos and madness await thee at its end."
On Mon, Nov 05, 2001 at 05:32:34PM +0100, SpaceWalker wrote:
> Stuart Young wrote:
> >
> > At 11:05 PM 4/11/01 -0500, Alexander Viro wrote:
> >
> > >On Mon, 5 Nov 2001, Stuart Young wrote:
> > >
> > > > Any reason we can't move all the process info into something like
> > > > /proc/pid/* instead of in the root /proc tree?
> > >
> > >Thanks, but no thanks. If we are starting to move stuff around, we
> > >would be much better off leaving in /proc only what it was supposed
> > >to contain - per-process information.
> >
>
> We could add a file into /proc like /proc/processes that contains once
> all process informations that some programs like top or ps can read only
> Once.
> It could save a lot of time in kernel mode scanning the process list for
> each process.
> later, a new version of ps or top could simply stat /proc/processes and
> if it exists uses it to give informations to the user.
> What do you think of this idea ?
We would have the same "changing format of /proc/processes" parsing
problems as we have now with the rest of /proc.
Why not implement all of top in the kernel, so that you could do a
cat /dev/top and have the usual top output nicely shown ? ;)
(yes, the last one was a joke!)
Your suggestion may improve the performance of one or two userland
applications, but it does not attack the real problem: that /proc is not
machine readable.
We would be maintaining yet another /proc file, but we'd still have the
problems we have now. Implementing an A.I. in every CPU meter applet out
there, while still having to accept that the A.I. gives up on us every now and
then (when someone decides to add an ASCII art visualization of the utilization
of the various ALUs in /proc/cpuinfo for example - the worst part being that
this example is probably not even far fetched!)
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Mon, Nov 05, 2001 at 02:41:12PM +0100, Petr Baudis wrote:
> Hi,
>
> > We want to avoid these problems:
> > 1) It is hard to parse (some) /proc files from userspace
> > 2) As /proc files change, parsers must be changed in userspace
> >
> > Still, we want to keep on offering
> > 3) Human readable /proc files with some amount of pretty-printing
> > 4) A /proc fs that can be changed as the kernel needs those changes
>
> I've read the whole thread, but i still don't get it. Your solution doesn't
> improve (1) for parsers in scripting languages, where it is frequently far
> easier to parse ASCII stuff than messing with binary things, when not almost
> impossible. So we don't make any progress here. And for languages like C,
> where this will have most use, there actually is solution and it is working.
> So, please, can you enlighten me, what's so wrong on sysctl? It actually
> provides exactly what do you want, and you even don't need to bother yourself
> with open() etc ;). So it would be maybe better improving sysctl interface,
> especially mirroring of all /proc stuff there, instead of arguing about scanf()
> :-).
>
> So can you please explain me merits of your approach against sysctl?
As far as I can see, I cannot read /proc/[pid]/* info using sysctl.
Then I need the other /proc/* files as well, not just /proc/sys/*
It seems to me that the sysctl interface does not have any type checking,
so if for example I want to read the jiffies counter and supply a 32-bit
field, sysctl will happily give me the first/last 32 bits of a field that
could as well be 64 bits (bit widths do sometimes change, even on architectures
that do not change). How am I to know ?
If you look in kernel/sysctl.c, you'll see code like
if (oldval && oldlenp) {
get_user(len, oldlenp);
if (len) {
if (len > table->maxlen)
len = table->maxlen;
if(copy_to_user(oldval, table->data, len))
return -EFAULT;
if(put_user(len, oldlenp))
return -EFAULT;
}
}
if (newval && newlen) {
len = newlen;
if (len > table->maxlen)
len = table->maxlen;
if(copy_from_user(table->data, newval, len))
return -EFAULT;
}
Now is that pretty or what - Imagine someone trying to configure a 64-bit
kernel parameter with a 32 bit value ;)
This could be tightened up of course - but the problem of knowing which
unit some number may be represented in is still there. For example,
assuming I could read out partition sizes, well are they in blocks, bytes,
kilobytes, or what ? Oh, I'm supposed to *know*, and *assume* such things
never change ?
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
[email protected] (Jakob ?stergaard) wrote on 04.11.01 in <[email protected]>:
> On Sun, Nov 04, 2001 at 03:01:12PM -0500, Alexander Viro wrote:
> >
> >
> > On Sun, 4 Nov 2001, [iso-8859-1] Jakob %stergaard wrote:
> >
> > > Strong type information (in one form or the other) is absolutely
> > > fundamental for achieving correctness in this kind of software.
> >
> > Like, say it, all shell programming? Or the whole idea of "file as stream
> > of characters"? Or pipes, for that matter...
> >
>
> Shell programming is great for small programs. You don't need type
> information in the language when you can fit it all in your head.
>
> Now, go write 100K lines of shell, something that does something that is not
> just shoveling lines from one app into a grep and into another app. Let's
> say, a database. Go implement the next Oracle replacement in bash, and tell
> me you don't care about types in your language.
And now look at how large typical /proc-using code parts are. Do they
match better with your first or your second paragraph?
The first?
I thought so.
MfG Kai
> As far as I can see, I cannot read /proc/[pid]/* info using sysctl.
That can be added. We just have existing interface, and I don't propose to
stick on its actual state as it isn't convenient, but to extend it to cope our
needs.
> Then I need the other /proc/* files as well, not just /proc/sys/*
IMHO whole /proc should be mirrored by sysctl. Then, we can in 2.7 slowly move
those only to /proc/sys. Ideally, only /proc/[pid]/ and /proc/sys/ should be
present in /proc/.
> It seems to me that the sysctl interface does not have any type checking,
> so if for example I want to read the jiffies counter and supply a 32-bit
> field, sysctl will happily give me the first/last 32 bits of a field that
> could as well be 64 bits (bit widths do sometimes change, even on architectures
> that do not change). How am I to know ?
From the sysctl() manpage:
size_t *oldlenp; /* available room for old value,
overwritten by actual size of old value */
So, it will happily give you first/last 32 bits (at least something and your
application won't crash UNhappily ;), however it will write to location pointed
by oldlenp that jiffies are 64 bits.
> This could be tightened up of course - but the problem of knowing which
> unit some number may be represented in is still there. For example,
> assuming I could read out partition sizes, well are they in blocks, bytes,
> kilobytes, or what ? Oh, I'm supposed to *know*, and *assume* such things
> never change ?
Obviously they shouldn't. Any reason to change them? If you decide it would
be nice to give you also size in something different from blocks, you can just
introduce new ctl with suffix e.g. kb or so.
--
Petr "Pasky" Baudis
UN*X programmer, UN*X administrator, hobbies = IPv6, IRC
Real Users hate Real Programmers.
Public PGP key, geekcode and stuff: http://pasky.ji.cz/~pasky/
On Tue, 6 Nov 2001, Petr Baudis wrote:
> > As far as I can see, I cannot read /proc/[pid]/* info using sysctl.
> That can be added. We just have existing interface, and I don't propose to
> stick on its actual state as it isn't convenient, but to extend it to cope our
> needs.
No, that cannot. Guys, you've been told: it won't happen. I think that
was loud and clear enough.
Can it. Get a dictionary and look up the meaning of "veto".
Oh, and as for "let's extend existing interfaces just because we had flunked
'strings in C'" - if you need Hurd, you know where to find it.
On November 5, 2001 11:48 pm, Rusty Russell wrote:
> In message <[email protected]> you write:
> > Yes, sold, if implementing the formatter is part of the plan.
> >
> > Caveat: by profiling I've found that file ops on proc functions are already
> > eating a significant amount of cpu, going to one-value-per-file is going to
> > make that worse. But maybe this doesn't bother you.
>
> What concerns me most is the pain involved in writing a /proc or
> sysctl interface in the kernel today. Take kernel/module.c's
> get_ksyms_list as a typical example: 45 lines of code to perform a
> very trivial task. And this code is sitting in your kernel whether
> proc is enabled or not. Now, I'm a huge Al Viro fan, but his proposed
> improvements are in the wrong direction, IMHO.
>
> My first priority is to have the most fool-proof possible inner kernel
> interface. Second is trying to preserve some of the /proc features
> which actually work well when correctness isn't a huge issue (such as
> "give me everything in one table"). Efficiency of getting these
> things out of the kernel is a distant last (by see my previous comment
> on adapting sysctl(2)).
>
> I'd like to see /proc (/proc/sys) FINALLY live up to its promise
> (rich, logical, complete) in 2.5. We can do this by making it the
> simplest option for coders and users.
This is without a doubt the most levelheaded comment I've seen in the thread.
I'm looking at all those 6+ parameter calls and thinking about cleaning that
up with a struct, which is really what it's trying to be. I see lots of
proc reads ending with a boringly similar calc_metrics call, this is trying
to move out to the caller. I'd hope this kind of cleanup, at least, is
noncontroversial.
--
Daniel
Alexander Viro wrote:
>
> On Tue, 6 Nov 2001, Tim Jansen wrote:
>
> > But how can the user know this without looking into the kernel? Compare it to
> > /proc/mounts. Proc mounts escapes spaces and other special characters in
> > strings with an octal encoding (so spaces are replaced by '\040').
>
> Ah, yes - the horrible /proc/mounts. Check that code in 2.4.13-ac8, will
> you?
>
> Yes, current procfs sucks. We got a decent infrastructure that allows
> to write that code easily. Again, see -ac8 and watch fs/namespace.c
> code dealing with /proc/mounts.
>
> No need to play silly buggers with "one value per file" (and invite the
> Elder Ones with applications trying to use getdents()). Sigh...
Getdents() can be removed since 2.0 times. I never noticed *any*
application
actually using it.
"Albert D. Cahalan" wrote:
>
> You wrote:
> > Alexander Viro wrote:
> >> On Mon, 5 Nov 2001, Martin Dalecki wrote:
> >>> "Albert D. Cahalan" wrote:
> >>>
> >>> Every BASTARD out there telling the world, that parsing ASCII formatted
> >>> files
> >>
> >> What was your username, again?
> >
> > root, with uid != 0 and on a masquaraded host, who cares?
>
> I think the point is that it looks like to attributed your own
> words to me. Your post didn't quote anything from me, but it
> started off as follows:
Oh excuse me I didn't intend to put the pun on you personally
or disguise myself as somebody else. In fact I was kind of
supporting your point of view.
On Tue, Nov 06, 2001 at 03:34:40AM -0500, Alexander Viro wrote:
>
>
> On Tue, 6 Nov 2001, Petr Baudis wrote:
>
> > > As far as I can see, I cannot read /proc/[pid]/* info using sysctl.
> > That can be added. We just have existing interface, and I don't propose to
> > stick on its actual state as it isn't convenient, but to extend it to cope our
> > needs.
>
> No, that cannot. Guys, you've been told: it won't happen. I think that
> was loud and clear enough.
Al, sure no half-assed ad-hoc /proc substitute should go in, but there *are*
*real* problems, and just because you don't see them in your daily life doesn't
mean they don't exist.
These real problems could use a real solution. And *some* of us are at least
going to *discuss* what such a solution could be.
If, or when, we arrive at something where at least some of us agree, then we
will see if it will be your decision to include it at all. At this stage in
the discussion the final (draft) solution may not have anything to do with
filessytems at all. We don't know - or at least I don't know.
>
> Can it. Get a dictionary and look up the meaning of "veto".
Just because data is in a filesystem doesn't mean it doesn't need structure
*in* the data too.
Get over it Al.
>
> Oh, and as for "let's extend existing interfaces just because we had flunked
> 'strings in C'" - if you need Hurd, you know where to find it.
My approach would be more like making another interface that could eventually
gradually obsolete an older and inadequate one. I see nothing in /proc that's
worth extending on, as it stands today.
Clearly you have no comprehension of the problems that people are working on
solving with the new proc changes (or, rather, ideas for changes).
That's too bad. It would have been great to have constructive critisism from
someone with your experience.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Tue, Nov 06, 2001 at 09:23:00AM +0200, Kai Henningsen wrote:
> [email protected] (Jakob ?stergaard) wrote on 04.11.01 in <[email protected]>:
>
...
> >
> > Shell programming is great for small programs. You don't need type
> > information in the language when you can fit it all in your head.
> >
> > Now, go write 100K lines of shell, something that does something that is not
> > just shoveling lines from one app into a grep and into another app. Let's
> > say, a database. Go implement the next Oracle replacement in bash, and tell
> > me you don't care about types in your language.
>
> And now look at how large typical /proc-using code parts are. Do they
> match better with your first or your second paragraph?
If you write in C, you need type information. No matter if it's 5 lines or 50K.
How many of your shell languages use arbitrary precision arithmetic *always* ?
If they only do "sometimes" (for some operations) you'll be up shit creek without
a paddle once some value you thought was 32 bits turns out to be 64, and your
scripts, lacking type informaiton, handle this error "gracefully" (accounting
scripts for example where you don't check the output every day, but discover at
the end of the quarter that you're fucked because you only have the lower 32
bits of the user's network usage).
My argument with the 100K of shell was more to emphasize that type information
is necessary in complex systems.
Even if you just have 5 lines of Perl, you have a kernel too - it is a complex
system already.
>
> The first?
>
> I thought so.
Well, working for a company that makes a living of reading in /proc (and being
fairly good at it), it would be more like the second ;)
But I have also coded for HP-UX, Solaris, NT and others. I have seen how
others attack the problems of getting information out of systems, and I can see
that /proc as it is today is *not* a good answer to that problem.
There are worse systems out there than Linux, but there are better ones as
well. I see no reason why Linux shouldn't excel in this area too.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
> > > As far as I can see, I cannot read /proc/[pid]/* info using sysctl.
> > That can be added. We just have existing interface, and I don't propose to
> > stick on its actual state as it isn't convenient, but to extend it to cope
> > our needs.
> No, that cannot. Guys, you've been told: it won't happen. I think that was
> loud and clear enough.
So, if we want to be clear, we should freeze sysctl interface and focus to
/proc/? And sysctl is expected to disappear from the kernel by the time? If
not, I admit that I wasn't very much sure if exactly [pid] should go there. If
answer is not, fine, as specially /proc/[pid]/ should be parsed with no
problems with scanf() (expect "(procname)" in /proc/[pid]/stat ;), as a
difference to some nightmares in device specific proc files etc. _Those_ are
which I propose to mirror in sysctl tree. You still can put nice progress bars
here to help humans (which is great), and you won't make programmers run around
crying something about linux [developers] stupidity. And I don't see any
disadvantage in this - /proc/ should remain supported forever and nothing stops
you using it, and you won't fill it with .bloat files.. (and that actually was
what Linus told he won't accept, iirc)
> Can it. Get a dictionary and look up the meaning of "veto".
Well, 'veto' was for binary **** in /proc/. This is something completely
different. And actually done ;).
--
Petr "Pasky" Baudis
UN*X programmer, UN*X administrator, hobbies = IPv6, IRC
Real Users hate Real Programmers.
Public PGP key, geekcode and stuff: http://pasky.ji.cz/~pasky/
On Mon, 5 Nov 2001, Albert D. Cahalan wrote:
> You wrote:
> > Alexander Viro wrote:
> >> On Mon, 5 Nov 2001, Martin Dalecki wrote:
> >>> "Albert D. Cahalan" wrote:
> >>>
> >>> Every BASTARD out there telling the world, that parsing ASCII formatted
> >>> files
> >>
> >> What was your username, again?
> >
> > root, with uid != 0 and on a masquaraded host, who cares?
>
> I think the point is that it looks like to attributed your own
> words to me. Your post didn't quote anything from me, but it
> started off as follows:
>
> --------------------------------------
> "Albert D. Cahalan" wrote:
>
> Every BASTARD out there telling the world, that parsing ASCII formatted
> --------------------------------------
>
> Well, I didn't write that or anything else in your post.
>
Actually Al Viro's reply makes a lot more sense if your familiar with:
http://bofh.ntk.net/bastard.html
Gerhard
--
Gerhard Mack
[email protected]
<>< As a computer I find your faith in technology amusing.
At 08:25 AM 11/6/01 +0100, Jakob ?stergaard wrote:
>It seems to me that the sysctl interface does not have any type checking,
>so if for example I want to read the jiffies counter and supply a 32-bit
>field, sysctl will happily give me the first/last 32 bits of a field that
>could as well be 64 bits (bit widths do sometimes change, even on
>architectures
>that do not change). How am I to know ?
This fault isn't isolated to the sysctl interface. Look sometime at the
ioctl and fcntl interfaces and you will see that they have the same
problem. The issue is that the original Unix implementation of the
special-function interface assumed that only "ints" would be passed around,
and the need for special interfaces outgrew that assumption.
These functions have been so abused that POSIX refuses to "standardize"
them; instead, special APIs such as TERMIOS have been devised to put a
fairly-well defined shell around the most needed of these interfaces. (How
the implementer decides to bridge the userland/kernel barrier is not part
of the specification -- and doesn't need to be.)
The /proc API was developed to solve a specific problem. Now, people have
proposed and The Powers That Be have accepted extensions to the /proc
interface as a superior way to tune the kernel, particularly from shell
scripts, and to monitor the kernel, again from shell scripts. It's a good
thing, actually, in that it preserves the best of the Unix
mentality: don't re-invent, reuse.
What this whole discussion boils down to is people who want to tackle the
symptoms instead of the disease. The PROBLEM is that we have inadequate
standards and documentation of the /proc interface in its original ASCII
form. The proposed solution does NOTHING to address the real problem --
and I understand that because too many people here are so used to using a
hammer (code) that all problems start looking like nails.
The RIGHT tool to fix the problem is the pen, not the coding pad. I hereby
pick up that pen and put forth version 0.0.0.0.0.0.0.0.1 of the Rules of /Proc:
1) IT SHOULD NOT BE PRETTY. No tabs to line up columns. No "progress
bars." No labels except as "proc comments" (see later). No in-line labelling.
2) All signed decimal values shall be preceded by the "+" or "-" character
-- no exceptions. Implementers: this is available with *printf formats
with the + modifier, so the cost of this rule is one character per signed
value.
3) All integral decimal values shall be assumed by both programs and
humans to consist of any number of bits. [C'mon, people, dealing with
64-bit or 128-bit numbers is NOT HARD. If you don't know how,
LEARN. bc(1) can provide hints on how to do this -- use the Source,
Luke.] Numbers shall contain decimal digits [0-9] only. Zero-padding is
allowed.
4) All floating-point values shall contain a leading sign ("+" or "-") and
a decimal point (US) or comma (Europe). This rule assumes that the locale
for the kernel can be set; if this isn't true, then a period shall be used
to separate the integral part and the fractional part. Floating point
values may also contain exponents (using the *printf format %E or %G, NOT
%e -- the exponent must be preceded by the letter "e" or "E"). The
specification of a zero precision (which suppresses the output of the
decimal point or comma) is prohibited.
5) All string values matching the regular expression [!"$-+--~]* shall be
output as they are. Strings that do not match the above regular expression
shall be escaped in a standard manner, using a single routine provided in
the kernel's /proc interface to provide the proper escape sequences. The
output of that routine shall output standard backslash-character
representation of standard C-language control characters, and 3-digit octal
representation of any other character encountered. Output of the octal
representation may be truncated when such truncation would not cause
confusion -- see strace(1) for examples.
6) If you are wanting to display octal data, display it byte at a time
with a backslash. If you want to display hexadecimal data, use the "\x"
introduction, but include all bits so that the using program knows how long
the damn element is supposed to be -- NO leading -zero suppression should
be done. (Use the %x.xX format item in *printf, where "x" is the number of
hexadecimal digits.)
7) The /proc data may include comments. Comments start when an unescaped
hash character "#" is seen, and end at the next newline \n. Comments may
appear on a line of data, and the unescaped # shall be treated as end of
data for that line.
8) The regular expression ^#!([A-Za-z0-9_.-]+ )*[A-Za-z0-9_.-]$ defines a
special form of comment, which may be used to introduce header labels to an
application. As shown in the regular expression, each label is defined by
the regular subexpression [A-Za-z0-9_.-]+ and are separated by a single
space. The final (or only) label is terminated by a newline \n. No data
may appear on the header comment line. This line may only appear at the
beginning of the /proc pseudo file, and appears only ONCE.
9) The regular expression ^#=[0-9]+$ shall be used to output a optional
"version number" comment line If this appears in the /proc output, it
precedes the header comment line, and appears only ONCE.
10) Network addresses are defined as strings, either in their name form,
in dot quad notation for IPV4 numeric addresses, or in the numeric
equivalent for IPV6. Parsers can recognize the difference between a
dot-quad IP address and a floating-point number by the presence of the
second dot in the number. Network information output on /proc shall not
use the base/mask notation (123.456.789.012/255.255.255.0) and instead use
the bit-length notation (123.456.789.012/24).
11) IPX network addresses are a problem. In their normal form, they are
indistinguishable from a %F-format floating-point number with leading zeros
(which is allowed). Therefore the dot that usually appears in an IPX
network number must be replaced with the hyphen or dash "-"
character. Parsers can then differentiate an IPX network address from a
floating point number by noticing the embedded dash without the leading "e"
or "E" character. Flex handles this just fine.
-end-
This represents my first cut into a specification for the /proc interface
to deal with some of the issues that have come up in this thread. It's not
going to satisfy the "performance for me at all costs, DAMMIT" people and
it's not going to satisfy the "I like it PRETTY, DAMMIT" crowd either, but
it would provide a means for coming up with some standard tools to deal
with /proc, and a way to reign in the madness.
In particular, it means that a single tool could be developed to take a
/proc file and, in userland, make it a little more pretty. Those that
don't like table presentations can use the source of the tool to make a
display more to their liking.
The spec has a number of things missing. One issue missing is how to make
a predictable /proc subtree, so that people can find the goodies more
easily. Another issue is specifying how /proc can be used to set
parameters. (We seem to have less confusion in this area, so I didn't want
to spend any time on this aspect of the specification.)
OK, I'm clear of the firing range, start shooting holes in it.
Stephen Satchell
Stephen Satchell wrote:
> The /proc API was developed to solve a specific problem. Now, people
> have proposed and The Powers That Be have accepted extensions to the
> /proc interface as a superior way to tune the kernel, particularly from
> shell scripts, and to monitor the kernel, again from shell scripts.
> It's a good thing, actually, in that it preserves the best of the Unix
> mentality: don't re-invent, reuse.
I definately like this approach....
> The RIGHT tool to fix the problem is the pen, not the coding pad. I
> hereby pick up that pen and put forth version 0.0.0.0.0.0.0.0.1 of the
> Rules of /Proc:
>
> 1) IT SHOULD NOT BE PRETTY. No tabs to line up columns. No "progress
> bars." No labels except as "proc comments" (see later). No in-line
> labelling.
Tabs and/or multiple spaces should not be any harder to parse than
a single space, so I don't necessarily see the need to restrict them.
> 3) All integral decimal values shall be assumed by both programs and
> humans to consist of any number of bits. [C'mon, people, dealing with
> 64-bit or 128-bit numbers is NOT HARD. If you don't know how, LEARN.
> bc(1) can provide hints on how to do this -- use the Source, Luke.]
> Numbers shall contain decimal digits [0-9] only. Zero-padding is allowed.
Sometimes HEX is the best way to display things. I think we should be
able to use 0xAABBCCDD type formatting. The key here is to always prefix
with 0x so we can parse it correctly.
--
Ben Greear <[email protected]> <Ben_Greear AT excite.com>
President of Candela Technologies Inc http://www.candelatech.com
ScryMUD: http://scry.wanfear.com http://scry.wanfear.com/~greear
Stephen Satchell ([email protected]) wrote:
>The RIGHT tool to fix the problem is the pen, not the coding pad. I
>hereby pick up that pen and put forth version 0.0.0.0.0.0.0.0.1 of the
>Rules of /Proc:
Agreed.
>1) IT SHOULD NOT BE PRETTY. No tabs to line up columns. No "progress
>bars." No labels except as "proc comments" (see later). No in-line labelling.
It should not be pretty TO HUMANS. Slight difference. It should be pretty
to shellscripts and other applications though.
Yes, that means we won't be able to do a 'cat /proc/cpuinfo' anymore in the
future. Bummer.
>2) All signed decimal values shall be preceded by the "+" or "-" character
>-- no exceptions. Implementers: this is available with *printf formats
>with the + modifier, so the cost of this rule is one character per signed
>value.
Why?
>3) All integral decimal values shall be assumed by both programs and
>humans to consist of any number of bits. [C'mon, people, dealing with
>64-bit or 128-bit numbers is NOT HARD. If you don't know how,
>LEARN. bc(1) can provide hints on how to do this -- use the Source,
>Luke.] Numbers shall contain decimal digits [0-9] only. Zero-padding is
>allowed.
Ack.
>4) All floating-point values shall contain a leading sign ("+" or "-") and
>a decimal point (US) or comma (Europe). This rule assumes that the locale
>for the kernel can be set; if this isn't true, then a period shall be used
>to separate the integral part and the fractional part. Floating point
>values may also contain exponents (using the *printf format %E or %G, NOT
>%e -- the exponent must be preceded by the letter "e" or "E"). The
>specification of a zero precision (which suppresses the output of the
>decimal point or comma) is prohibited.
As long as I can parse it easily, it's fine by me. Easily parsable -> not
localised! Localisation is for userspace, not for the kernel.
[...]
>7) The /proc data may include comments. Comments start when an unescaped
>hash character "#" is seen, and end at the next newline \n. Comments may
>appear on a line of data, and the unescaped # shall be treated as end of
>data for that line.
Please don't do this. I want to be able to do 'read JIFFIES <
/proc/$jiffiesfile'. Make the name of the file speak for itself. One field
per file in a clearly defined format.
>8) The regular expression ^#!([A-Za-z0-9_.-]+ )*[A-Za-z0-9_.-]$ defines a
>special form of comment, which may be used to introduce header labels to an
>application. As shown in the regular expression, each label is defined by
>the regular subexpression [A-Za-z0-9_.-]+ and are separated by a single
>space. The final (or only) label is terminated by a newline \n. No data
>may appear on the header comment line. This line may only appear at the
>beginning of the /proc pseudo file, and appears only ONCE.
>
>9) The regular expression ^#=[0-9]+$ shall be used to output a optional
>"version number" comment line If this appears in the /proc output, it
>precedes the header comment line, and appears only ONCE.
You don't need 8) and 9) when using single fields in a file. Multiple
fields are pretty to a human but not to a simple script.
Optionally doing a
while read MOUNTPOINT DIR OPTS ; do
# blah
done < /proc/$mountfile
Would be acceptable.
>10) Network addresses are defined as strings, either in their name form,
>in dot quad notation for IPV4 numeric addresses, or in the numeric
>equivalent for IPV6. Parsers can recognize the difference between a
>dot-quad IP address and a floating-point number by the presence of the
>second dot in the number. Network information output on /proc shall not
>use the base/mask notation (123.456.789.012/255.255.255.0) and instead use
>the bit-length notation (123.456.789.012/24).
You should already know you're going to read an IP address to begin with.
>11) IPX network addresses are a problem. In their normal form, they are
>indistinguishable from a %F-format floating-point number with leading zeros
>(which is allowed). Therefore the dot that usually appears in an IPX
>network number must be replaced with the hyphen or dash "-"
>character. Parsers can then differentiate an IPX network address from a
>floating point number by noticing the embedded dash without the leading "e"
>or "E" character. Flex handles this just fine.
See 10).
As you can see, I don't really care about the user reading /proc. We should
provide a backwards compatible /proc to avoid major backage, so users would
be able to read from this interface. Please keep the new interface to the
applications, as it should have been from the start.
And yes, coding a cpuinfo.sh would be very, very easy, so we (the users)
don't need the old interface anyway.
--
Erik Hensema ([email protected]) ICQ# 8280101
Registered Linux user #38371 -- http://counter.li.org
--S279844AbRKFT36=_/vger.kernel.org--
In article <[email protected]> Erik Anderson wrote:
> Come now, it really isn't that difficult:
> char name[80];
> if (sscanf(line, "%4u %4u %llu %s", &major, &minor, &size, name) == 4)
if it's so easy to do, why do you have a great big buffer overflow here?
--
nicholas black ([email protected]) developer, trellis network security
> >1) IT SHOULD NOT BE PRETTY. No tabs to line up columns. No "progress
> >bars." No labels except as "proc comments" (see later). No in-line labelling.
>
> It should not be pretty TO HUMANS. Slight difference. It should be pretty
> to shellscripts and other applications though.
>
> Yes, that means we won't be able to do a 'cat /proc/cpuinfo' anymore in the
> future. Bummer.
What about adding a separate choice in the kernel config to allow for
/hproc (or something) human readable /proc file system?
--
Roy Sigurd Karlsbakk, MCSE, MCNE, CLS, LCA
Computers are like air conditioners.
They stop working when you open Windows.
On 6 Nov 2001, Erik Hensema wrote:
> >1) IT SHOULD NOT BE PRETTY. No tabs to line up columns. No "progress
> >bars." No labels except as "proc comments" (see later). No in-line labelling.
>
> It should not be pretty TO HUMANS. Slight difference. It should
> be pretty to shellscripts and other applications though.
I really fail to see your point, it's trivial to make
files which are easy to read by humans and also very
easy to parse by shellscripts.
PROCESSOR=0
VENDOR_ID=GenuineIntel
CPU_FAMILY=6
MODEL=6
MODEL_NAME="Celeron (Mendocino)"
.....
As you can see, this is easily readable by humans,
while "parsing" by a shell script would be as follows:
. /proc/cpuinfo
After which you could just "echo $PROCESSOR" or
something like that ...
Yes, this is probably a bad example, but it does show
that machine-readable and human-readable aren't mutually
exclusive.
regards,
Rik
--
DMCA, SSSCA, W3C? Who cares? http://thefreeworld.net/
http://www.surriel.com/ http://distro.conectiva.com/
Rik van Riel ([email protected]) wrote:
>On 6 Nov 2001, Erik Hensema wrote:
>
>> >1) IT SHOULD NOT BE PRETTY. No tabs to line up columns. No "progress
>> >bars." No labels except as "proc comments" (see later). No in-line labelling.
>>
>> It should not be pretty TO HUMANS. Slight difference. It should
>> be pretty to shellscripts and other applications though.
>
>I really fail to see your point, it's trivial to make
>files which are easy to read by humans and also very
>easy to parse by shellscripts.
Right, let me rephrase myself. There's no real need for /proc to be pretty
to humans, though it would be nice. Readability by applications should be
the priority though.
>PROCESSOR=0
>VENDOR_ID=GenuineIntel
>CPU_FAMILY=6
>MODEL=6
>MODEL_NAME="Celeron (Mendocino)"
Nice, it could work. However, the kernel does impose policy in this case
(variable naming policy, that is). But it's a nice compromise between
readability by humans and readability by programs.
--
Erik Hensema ([email protected])
I'm on the list, no need to Cc: me, though I appreciate one if your
mailer doesn't support the References header.
On Tue, 6 Nov 2001, Roy Sigurd Karlsbakk wrote:
>What about adding a separate choice in the kernel config to allow for
>/hproc (or something) human readable /proc file system?
Just think about it for a minute.
There are three ways to address "/proc":
- 100% binary interface
* human interaction doesn't belong in the kernel - period.
- optimally formated text
* not designed for humans, but in human format ("text")
- human readable
* thus the entire OS is reduced to "cat" and "echo"
Providing more than one interface/format means code duplication. It doesn't
matter how much is actually compiled. Someone has to write it. Others have
to maintain it. Suddenly a change in one place becomes a change in dozens
of places.
Personally, I vote for a 100% binary interface. (no surprise there.) It
makes things in kernel land so much cleaner, faster, and smaller. Yes,
it increases the demands on userland to some degree. However, printf/scanf
is one hell of a lot more wasteful than a simple mov.
For my worst case scenerio, answer this:
How do you tell how many processors are in a Linux box?
The kernel already knows this, but it isn't exposed to userland. So, one
must resort to ass-backward, stupid shit like counting entries in
/proc/cpuinfo. And to make things even worse, the format is different for
every arch! (I've been bitching about this for four (4) years.)
And for those misguided people who think processing text is faster than
binary, you're idiots. The values start out as binary, get converted to
text, copied to the user, and then converted back to binary. How the hell
is that faster than copying the original binary value? (Answer: it isn't.)
And those who *will* complain that binary structures are hard to work with,
(you're idiots too :-)) a struct is far easier to deal with than text
processing, esp. for anyone who knows what they are doing. Yes, changes
to the struct do tend to break applications, but the same thing happens
to text based inputs as well. Perhaps some of you will remember the stink
that arose when the layout of /proc/meminfo changed (and broke, basically,
everything.)
--Ricky
On Tuesday 06 November 2001 22:24, Rik van Riel wrote:
> I really fail to see your point, it's trivial to make
> files which are easy to read by humans and also very
> easy to parse by shellscripts.
> PROCESSOR=0
> VENDOR_ID=GenuineIntel
> CPU_FAMILY=6
> MODEL=6
> MODEL_NAME="Celeron (Mendocino)"
Wow, this is a good one...
bye..
On Tue, 6 Nov 2001, Ricky Beam wrote:
[snip]
> And those who *will* complain that binary structures are hard to work with,
> (you're idiots too :-)) a struct is far easier to deal with than text
> processing, esp. for anyone who knows what they are doing. Yes, changes
Learn C, then learn some respect to your betters[1], then come back.
*PLONK*
[1] like, say it, guys who had invented UNIX and C.
On Tue Nov 06, 2001 at 02:49:23PM -0500, [email protected] wrote:
> In article <[email protected]> Erik Anderson wrote:
> > Come now, it really isn't that difficult:
>
> > char name[80];
> > if (sscanf(line, "%4u %4u %llu %s", &major, &minor, &size, name) == 4)
>
> if it's so easy to do, why do you have a great big buffer overflow here?
Sorry, no doughnut for you. drivers/block/genhd.c:
#ifdef CONFIG_PROC_FS
int get_partition_list(char *page, char **start, off_t offset, int count)
{
...
char buf[64];
...
len += snprintf(page + len, 63, "%4d %4d %10d %s\n", gp->major, n,
gp->sizes[n], disk_name(gp, n, buf));
so each /proc/partitions line maxes out at 63 bytes. So not only
is there no overflow, I am providing 16 extra bytes of padding.
-Erik
--
Erik B. Andersen http://codepoet-consulting.com/
--This message was written using 73% post-consumer electrons--
On Tue Nov 06, 2001 at 07:24:13PM -0200, Rik van Riel wrote:
> I really fail to see your point, it's trivial to make
> files which are easy to read by humans and also very
> easy to parse by shellscripts.
>
> PROCESSOR=0
> VENDOR_ID=GenuineIntel
> CPU_FAMILY=6
> MODEL=6
> MODEL_NAME="Celeron (Mendocino)"
> .....
>
> As you can see, this is easily readable by humans,
> while "parsing" by a shell script would be as follows:
>
> . /proc/cpuinfo
>
> After which you could just "echo $PROCESSOR" or
> something like that ...
I think we have a winner! If we could establish this
as policy that would be _sweet_!
-Erik
--
Erik B. Andersen http://codepoet-consulting.com/
--This message was written using 73% post-consumer electrons--
On Tue, 2001-11-06 15:28:26 -0700, Erik Andersen <[email protected]>
wrote in message <[email protected]>:
> On Tue Nov 06, 2001 at 07:24:13PM -0200, Rik van Riel wrote:
> > PROCESSOR=0
> > VENDOR_ID=GenuineIntel
> > CPU_FAMILY=6
> > MODEL=6
> > MODEL_NAME="Celeron (Mendocino)"
> > .....
PROCESSOR=1
...
> > . /proc/cpuinfo
>
> I think we have a winner! If we could establish this
> as policy that would be _sweet_!
What do you expect on a SMP system?
MfG, JBG
--
Jan-Benedict Glaw . [email protected] . +49-172-7608481
On Tue Nov 06, 2001 at 11:33:49PM +0100, Jan-Benedict Glaw wrote:
> On Tue, 2001-11-06 15:28:26 -0700, Erik Andersen <[email protected]>
> wrote in message <[email protected]>:
> > On Tue Nov 06, 2001 at 07:24:13PM -0200, Rik van Riel wrote:
> > > PROCESSOR=0
> > > VENDOR_ID=GenuineIntel
> > > CPU_FAMILY=6
> > > MODEL=6
> > > MODEL_NAME="Celeron (Mendocino)"
> > > .....
>
> PROCESSOR=1
> ...
>
> > > . /proc/cpuinfo
> >
> > I think we have a winner! If we could establish this
> > as policy that would be _sweet_!
>
> What do you expect on a SMP system?
How about something like:
NUMBER_CPUS=8
VENDOR_ID_0=GenuineIntel
CPU_FAMILY_0=6
MODEL_0=6
MODEL_NAME_0="Celeron (Mendocino)"
...
-Erik
--
Erik B. Andersen http://codepoet-consulting.com/
--This message was written using 73% post-consumer electrons--
Jan-Benedict Glaw wrote:
> On Tue, 2001-11-06 15:28:26 -0700, Erik Andersen <[email protected]>
> wrote in message <[email protected]>:
>
>>On Tue Nov 06, 2001 at 07:24:13PM -0200, Rik van Riel wrote:
>>
>>>PROCESSOR=0
>>>VENDOR_ID=GenuineIntel
>>>CPU_FAMILY=6
>>>MODEL=6
>>>MODEL_NAME="Celeron (Mendocino)"
>>>.....
>>>
>
> PROCESSOR=1
or PROCESSOR1=1
Either way, it's still trivial to parse with perl or c/c++/Java
and probably a dozen other languages I don't know...
Ben
> ...
>
>
>>>. /proc/cpuinfo
>>>
>>I think we have a winner! If we could establish this
>>as policy that would be _sweet_!
>>
>
> What do you expect on a SMP system?
>
> MfG, JBG
>
>
--
Ben Greear <[email protected]> <Ben_Greear AT excite.com>
President of Candela Technologies Inc http://www.candelatech.com
ScryMUD: http://scry.wanfear.com http://scry.wanfear.com/~greear
On Tue, 2001-11-06 15:42:40 -0700, Erik Andersen <[email protected]>
wrote in message <[email protected]>:
> On Tue Nov 06, 2001 at 11:33:49PM +0100, Jan-Benedict Glaw wrote:
> > On Tue, 2001-11-06 15:28:26 -0700, Erik Andersen <[email protected]>
> > wrote in message <[email protected]>:
> > > On Tue Nov 06, 2001 at 07:24:13PM -0200, Rik van Riel wrote:
> > > > PROCESSOR=0
> > > > VENDOR_ID=GenuineIntel
> >
> > PROCESSOR=1
> > ...
> >
> > > > . /proc/cpuinfo
> > >
> > > I think we have a winner! If we could establish this
> > > as policy that would be _sweet_!
> >
> > What do you expect on a SMP system?
>
> How about something like:
> NUMBER_CPUS=8
> VENDOR_ID_0=GenuineIntel
Well, somebody came up with the idea of having XML in kernel. I'd really
love to see all those single-file-multiple-info files going away. Can't
we have a simple tree using one file per value / one value per file?
Would ease *many* things a lot...
MfG, JBG
--
Jan-Benedict Glaw . [email protected] . +49-172-7608481
In article <[email protected]> you wrote:
> Sorry, no doughnut for you. drivers/block/genhd.c:
> #ifdef CONFIG_PROC_FS
> int get_partition_list(char *page, char **start, off_t offset, int count)
> char buf[64];
> so each /proc/partitions line maxes out at 63 bytes. So not only
> is there no overflow, I am providing 16 extra bytes of padding.
"code poet?" you've plucked an 80 from the air. regardless of what the
kernel prints now and how it's limited (deep within drivers/block/genhd.c),
there is no reference to this silent 63 via either explicit comment or
pure code. your code remains happily ignorant of any modification to this
postcondition, and when that changes (as it surely will), you lose. it's
uninspired coding like the above that keeps the buffer overflow
technique alive.
now, i imagine you're more skilled than this, and would have invested
the time to do it properly the first time around (certainly *my*
managers wouldn't accept "buried within the backend is a hardcoded
constant...", but i work in network security). others, however, may
not be so skilled as you, and what of when they're writing your server?
c string processing is all of doable, mature, and meticulous. "done
properly by beginners" is not how i would describe it.
--
nicholas black ([email protected]) http://trellisinc.com
On Tue, 2001-11-06 15:46:43 -0700, Ben Greear <[email protected]>
wrote in message <[email protected]>:
> Jan-Benedict Glaw wrote:
> >On Tue, 2001-11-06 15:28:26 -0700, Erik Andersen <[email protected]>
> >wrote in message <[email protected]>:
> >>On Tue Nov 06, 2001 at 07:24:13PM -0200, Rik van Riel wrote:
> >>>PROCESSOR=0
> >>>VENDOR_ID=GenuineIntel
> >>>CPU_FAMILY=6
> >>>MODEL=6
> >>>MODEL_NAME="Celeron (Mendocino)"
> >>>.....
> >PROCESSOR=1
> or PROCESSOR1=1
>
> Either way, it's still trivial to parse with perl or c/c++/Java
> and probably a dozen other languages I don't know...
Come on - it's a cludge...
MfG, JBG
--
Jan-Benedict Glaw . [email protected] . +49-172-7608481
On Tue, 6 Nov 2001, Erik Andersen wrote:
> On Tue Nov 06, 2001 at 11:33:49PM +0100, Jan-Benedict Glaw wrote:
> > On Tue, 2001-11-06 15:28:26 -0700, Erik Andersen <[email protected]>
> > wrote in message <[email protected]>:
> > > On Tue Nov 06, 2001 at 07:24:13PM -0200, Rik van Riel wrote:
> > > > PROCESSOR=0
> > > > VENDOR_ID=GenuineIntel
> > > > CPU_FAMILY=6
> > > > MODEL=6
> > > > MODEL_NAME="Celeron (Mendocino)"
> > > > .....
> >
> > PROCESSOR=1
> > ...
> >
> > > > . /proc/cpuinfo
> > >
> > > I think we have a winner! If we could establish this
> > > as policy that would be _sweet_!
> >
> > What do you expect on a SMP system?
>
> How about something like:
> NUMBER_CPUS=8
> VENDOR_ID_0=GenuineIntel
> CPU_FAMILY_0=6
> MODEL_0=6
> MODEL_NAME_0="Celeron (Mendocino)"
> ...
(Though I think all caps variables are ugly, I can concede)
How about
$ cat /proc/cpus/0
PROCESSOR=0
VENDOR_ID=GenuineIntel
CPU_FAMILY=6
MODEL=6
MODEL_NAME="Celeron (Mendocino)"
.....
$ for i in `ls /proc/cpus/` ; do
cat $i
done
...
-pat
On Tue Nov 06, 2001 at 02:53:27PM -0800, Patrick Mochel wrote:
> How about
>
> $ cat /proc/cpus/0
>
> PROCESSOR=0
> VENDOR_ID=GenuineIntel
> CPU_FAMILY=6
> MODEL=6
> MODEL_NAME="Celeron (Mendocino)"
> .....
>
> $ for i in `ls /proc/cpus/` ; do
> cat $i
> done
> ...
much better.
-Erik
--
Erik B. Andersen http://codepoet-consulting.com/
--This message was written using 73% post-consumer electrons--
On 20011106 Erik Hensema wrote:
>Stephen Satchell ([email protected]) wrote:
>>The RIGHT tool to fix the problem is the pen, not the coding pad. I
>>hereby pick up that pen and put forth version 0.0.0.0.0.0.0.0.1 of the
>>Rules of /Proc:
>
>Agreed.
>
>>
>>7) The /proc data may include comments. Comments start when an unescaped
>>hash character "#" is seen, and end at the next newline \n. Comments may
>>appear on a line of data, and the unescaped # shall be treated as end of
>>data for that line.
>
Well, perhaps this is a stupid idea. But I throw it.
ASCII is good for humans, bin is good to read all info easily in a program.
Lets have both.
Once I thought the solution could be to make /proc entries behave
differently in two scenarios. Lets suppose you could open files in ASCII
or binary mode. An entry opened in ASCII returns printable info and opened
in binary does the binay output. As there is no concept of ASCII or binary
files in low-level file management, the O_DIRECT flag (or any new flag) could
be used.
And (supposing all fies in /proc are 0-sized) perhaps a seek position could be
defined for reading a format string or a set of field names, ie:
lseek(SEEK_FORMAT); read(...);
Same for writing, opening in "wa" allows to write a formatted number (ie,
echo 0xFF42 > /proc/the/fd) and "wb" allows to write binary data
(write("/proc/the/fd",&myValue)).
Just an idea...
--
J.A. Magallon # Let the source be with you...
mailto:[email protected]
Mandrake Linux release 8.2 (Cooker) for i586
Linux werewolf 2.4.14-beo #1 SMP Tue Nov 6 16:23:01 CET 2001 i686
On Tue Nov 06, 2001 at 05:47:53PM -0500, [email protected] wrote:
> In article <[email protected]> you wrote:
> > Sorry, no doughnut for you. drivers/block/genhd.c:
>
> > #ifdef CONFIG_PROC_FS
> > int get_partition_list(char *page, char **start, off_t offset, int count)
> > char buf[64];
> > so each /proc/partitions line maxes out at 63 bytes. So not only
> > is there no overflow, I am providing 16 extra bytes of padding.
>
> "code poet?" you've plucked an 80 from the air. regardless of what the
Yup, you are right. You found me out. I'm a complete impostor
and I know nothing about programming because I spent the exactly
4 seconds to write a simple example without first researching the
underlying interface. Know why? Because it was an _example_,
not a dissertation on string processing. If I was actually going
to write that code, I would have spent the extra two minute it
would have taken to read the kernel source first. Yes, fixed
buffers suck. But that is the current interface, so get over it
and get over the pointless ad hominem attacks.
> constant...", but i work in network security). others, however,
> may not be so skilled as you, and what of when they're writing
> your server?
Then I should be spending more time interviewing so I don't hire
dolts, and I would spend more time auditing their code and
teaching them how to program.
I actually avoid using /proc as much as possible in all my code
(I even wrote a patch to replace /proc with a char device about a
year ago, rejected of course for the same reasons Linus expressed
on this thread). There are many valid reasons why /proc sucks
(especially for embedded systems). But I don't consider the
ASCII-is-hard-to-parse argument valid.
-Erik
--
Erik B. Andersen http://codepoet-consulting.com/
--This message was written using 73% post-consumer electrons--
Ricky Beam wrote:
> And for those misguided people who think processing text is faster than
> binary, you're idiots. The values start out as binary, get converted to
> text, copied to the user, and then converted back to binary. How the hell
> is that faster than copying the original binary value? (Answer: it isn't.)
And then converted back to ASCII for printout on the terminal ;-).
> And those who *will* complain that binary structures are hard to work with,
> (you're idiots too :-)) a struct is far easier to deal with than text
> processing, esp. for anyone who knows what they are doing. Yes, changes
> to the struct do tend to break applications, but the same thing happens
> to text based inputs as well. Perhaps some of you will remember the stink
> that arose when the layout of /proc/meminfo changed (and broke, basically,
> everything.)
Amen.
The true problem with /proc and user land applications is that around 6
years
ago people did just give up on adapting the parsers to the ever chaning
"wonderfull" ascii interfaces those times. The second problem is that
/proc
is one of the few design "inventions" in linux, which didn't get copied
over
from some other UNIX box and Linus doesn't wan't recognize that this was
A BAD DESIGN CHOICE.
On Tue, 6 Nov 2001 [email protected] wrote:
>"code poet?" you've plucked an 80 from the air. regardless of what the
>kernel prints now and how it's limited (deep within drivers/block/genhd.c),
>there is no reference to this silent 63 via either explicit comment or
>pure code. your code remains happily ignorant of any modification to this
>postcondition, and when that changes (as it surely will), you lose. it's
>uninspired coding like the above that keeps the buffer overflow
>technique alive.
Exactly. Just because the code _currently_ won't generate more than 63
chars doesn't mean it always will. And who says the application will see
the true, kernel generated "/proc/partitions"? <raises eyebrow>
>c string processing is all of doable, mature, and meticulous. "done
>properly by beginners" is not how i would describe it.
Experience shows beginners rarely get thing right the first time out. (Or
the second or third time if they are like some of my previous students.)
--Ricky
On Tue, Nov 06, 2001 at 09:48:52AM +1100, Rusty Russell wrote:
>
> What concerns me most is the pain involved in writing a /proc or
> sysctl interface in the kernel today. Take kernel/module.c's
> get_ksyms_list as a typical example: 45 lines of code to perform a
> very trivial task. And this code is sitting in your kernel whether
> proc is enabled or not. Now, I'm a huge Al Viro fan, but his proposed
> improvements are in the wrong direction, IMHO.
I'm all for simplifying the internal kernel interfaces. What I'm not
at *all* convinced about is that it's worth it to make serious changes
to the layout of /proc, /proc/sys, etc. And the concept of being able
to very rapidly and easily get at system configuration variables
without needing to make sure that /proc is mounted is a very, very
good thing.
While sysctl isn't the most compact way of doing things, it *is*
simpler than doing things using a raw /proc interfaces. If you just
want sysctl to modify a single integer variable, it's basically just a
table entry and a call to register that table with sysctl. If you
want to do more sophisticated things, then yes, it gets more
complicated faster than it probably should.
But the bottom line is as far as I'm concerned is:
Baby. Bathwater. Let's not throw out the wrong thing....
- Ted
Hi!
> > > > It eats CPU, it's error-prone, and all in all it's just "wrong".
> > >
> > > How much of your CPU time is spent parsing /proc files?
> >
> > 30% of 486 if you run top... That's way too much and top is unusable
> > on slower machines.
> > "Not fast enough for showing processes" sounds wery wrong.
>
> Is this time actually spent parsing ascii, or is it procfs
> walking all the page tables of all processes ? ;)
About 1:1, probably. Readdir of /proc and open/read/parse/close is
pretty expensive.
Pavel
--
Philips Velo 1: 1"x4"x8", 300gram, 60, 12MB, 40bogomips, linux, mutt,
details at http://atrey.karlin.mff.cuni.cz/~pavel/velo/index.html.
Alex Bligh - linux writes:
> What amuses me about those arguing for binary structures as a long term
> solution for communicating, on a flexible but long lived basis, is that the
> entire OS Genre they appear to be advocating (UNIX et al.) has been doing
> this, on an app to app (as opposed to kernel to app) basis, for far longer
> than most of them can remember, in situations where performance is far more
> relevant, and pitted against far more 3l33t 5tud3nt2 than we we shake a
> stick at, but /still/ they persist.
quotas, process accounting, wtmp, utmp, Tux web server logs...
> Through minor local idiocy, I had trashed my local lilo partition,
> and had to try and boot with a Debian CD-Rom with a 2.2 kernel. I
> forgot to ask for single user more. Not only did it boot first time,
> it booted fully, apart from two minor things: no iptables, and
> (shock horror) the sound card didn't work it wasn't compatible.
> Similarly, I've loaded 2.4 kernels with no problems onto 2.2 systems.
>
> This "dreadful" /proc interface everyone talks about has been
> *STAGGERINGLY GOOD* in terms of forward and backward compatibility.
...
> has worked well just because kernel developers and maintainers
> have showed themselves unwilling to break existing userspace
> tools, and vice versa.
I can see that you are unfamiliar with the /proc filesystem.
You can change kernels because app developers work hard to
be tolerant of stupid /proc changes. Some of the crap that
I've stumbled across, mostly while doing procps work:
/proc/*/stat signal info was changed from decimal to hex for
a while. I changed it back, but too late: the evil hex had
already leaked out into the world, and I had to modify libproc.
Quick, is 16785472 in decimal or hex? So use the "friendly" new
/proc/*/status file instead, but...
The "SigCgt" in /proc/*/status wasn't always spelled that way.
It wasn't always in the same location either, so what to do?
That calls for another hack: assume signal values follow each other.
Kernel threads don't have memory info. Somebody added "kB" on the
end of most lines in /proc/meminfo. /proc/stat has changed in ways
that I'm glad to have forgotten. /proc/interrupts has been through
a flood of horrid changes: the PIC type going from "" to "XT PIC"
to "XT-PIC", the last column getting spaces (consider "XT PIC"!),
a new header (just waiting for extra info) and a variable number
of per-CPU columns in the middle to replace what was once a total.
Maybe /proc/cpuinfo is the worst... I hear SPARC is pretty smelly,
but hey, every arch screws app developers in some unique way.
On Wed, 7 Nov 2001, Albert D. Cahalan wrote:
> /proc/*/stat signal info was changed from decimal to hex for
> a while.
And _THAT_ should be a reason for immediate and severe LARTing.
That (and wankfests with progress bars, yodda, yodda) needs to
be stopped. But notice that switching to binary or doing
tags, etc. has nothing to that - same breakage will continue
with them unless you LART lusers who do it. Which works (or
doesn't) regardless of API.
Random API changes (and as a flipside of the same, APIs that
had never been thought through) are crap and they should be
dealt with. However, if you think that switching to binary
is going to make people think what they are doing... there's
a nice bridge I'd like to sell.
Ricky Beam wrote:
> For an example of /proc done right, find a Solaris box. What do you find
> in /proc? Gee, process information. Only process information. In. Binary.
Amen. I have enough of them at hand. And I don't miss any "wonderfull"
functionality
from linux /proc if I'm working on them.
Hi,
still,
char name[80]
if (sscanf(line, "%4u %4u %llu %80s", &major, &minor, &size, name) == 4)
would safeguard you agains whatever you might read from your line, this is
better protection agains buffer overflows than looking up the code that you
think you'll be reading the output from. This code is NOT under your control,
usually...
> On Tue Nov 06, 2001 at 02:49:23PM -0500, [email protected] wrote:
> > In article <[email protected]> Erik Anderson wrote:
> > > Come now, it really isn't that difficult:
> >
> > > char name[80];
> > > if (sscanf(line, "%4u %4u %llu %s", &major, &minor, &size, name) == 4)
> >
> > if it's so easy to do, why do you have a great big buffer overflow here?
>
>
> Sorry, no doughnut for you. drivers/block/genhd.c:
>
> #ifdef CONFIG_PROC_FS
> int get_partition_list(char *page, char **start, off_t offset, int count)
> {
> ...
> char buf[64];
>
> ...
>
> len += snprintf(page + len, 63, "%4d %4d %10d %s\n", gp->major, n,
> gp->sizes[n], disk_name(gp, n, buf));
>
> so each /proc/partitions line maxes out at 63 bytes. So not only
> is there no overflow, I am providing 16 extra bytes of padding.
>
> -Erik
>
> --
> Erik B. Andersen http://codepoet-consulting.com/
> --This message was written using 73% post-consumer electrons--
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
--
Met vriendelijke groeten,
Remco Post
SARA - Stichting Academisch Rekencentrum Amsterdam
High Performance Computing Tel. +31 20 592 8008 Fax. +31 20 668 3167
"I really didn't foresee the Internet. But then, neither did the computer
industry. Not that that tells us very much of course - the computer industry
didn't even foresee that the century was going to end." -- Douglas Adams
--On Wednesday, November 07, 2001 2:20 AM -0500 "Albert D. Cahalan" <[email protected]> wrote:
> I can see that you are unfamiliar with the /proc filesystem.
>
> You can change kernels because app developers work hard to
> be tolerant of stupid /proc changes.
> Some of the crap that
> I've stumbled across, mostly while doing procps work:
My point is two-fold:
1. Sure, you (and no doubt others) had to do lots
of work fixing userland, which
you shouldn't have had to do. But that seems to be
more down to lack of discipline in interface changes
rather than because the interface isn't binary. I am
sure it's easier to strip out a spurious 'kb' that
gets added after a number, than to deal with (say)
an extra inserted DWORD with no version traching.
2. The system survived. The interface was there. Bload
sweat and tears were no doubt expended, possibly by
the wrong people, but in practice the interface worked,
(no, not optimally). I'd suggest even with it's badly
managed changes, thouse have been less disruptive than
many other non-ascii based conventions (I'm thinking
back to Net-2E/2D). Sure, wtmp, utmp have been stable.
Not sufficiently familiar with process accounting or
quotas, though I have some possibly incorrect memory
of the latter suffering some format change which was
generated compatibility problems with user space tools?
--
Alex Bligh
As an admin, I have to say that there are few things in the world that
cheese me off more than binary logging/statistics. If you change all of
/proc to binary and assume that userspace tools will keep up with changes,
you're eliminating the use of my personal most common set of proc parsing
tools: cat and grep.
With binary, the assumption is made that someone is actually going to
maintain all of those tools, as well as that admins will actually be able
to keep them all straight. It just reeks of AIXness, with the lspv and the
giant nasty ODM database.
I understand where the binary crowd is coming from as far as collation
goes, but I personally use the simple stuff every day (cat /proc/pci) and
any sort of aggregate/collation tool (lspci) almost never.
--
Blue Lang, editor, b-side.org http://www.b-side.org
2315 McMullan Circle, Raleigh, North Carolina, 27608 919 835 1540
On Wed, 7 Nov 2001, Blue Lang wrote:
>I understand where the binary crowd is coming from as far as collation
>goes, but I personally use the simple stuff every day (cat /proc/pci) and
>any sort of aggregate/collation tool (lspci) almost never.
Just as an aside, /proc/pci was slated for deletion a long time ago. There
were warnings emitted by the kernel every time something accessed it for
some time. /proc/pci is dependent on a (large) list of names being in the
kernel to map the numbers to text. I think the plans to kill /proc/pci
have been abandoned, however. (The table makes the kernel big, but most of
it gets released once the pci bus scan is complete ala __init_data.)
As for code maint. and kernel changes breaking things... both happen already
with the text based system. Binary structures can be constructed to be
extensible without breaking old tools. Plus, the information exported from
the kernel (in the case of processes) need not change with every version
of the kernel.
I don't think people realize just how many CPU cycles are being needlessly
expended in passing information between the kernel and the user. When I
have the time, I'll add binary interfaces for various things and show exactly
how expensive the existing system is -- all for the sake of being able to
use 'cat' and 'grep'.
--Ricky
On Wed, 7 Nov 2001, Pavel Machek wrote:
> > > It eats CPU, it's error-prone, and all in all it's just "wrong".
> >
> > How much of your CPU time is spent parsing /proc files?
>
> 30% of 486 if you run top... That's way too much and top is unusable
> on slower machines.
> "Not fast enough for showing processes" sounds wery wrong.
Is this time actually spent parsing ascii, or is it procfs
walking all the page tables of all processes ? ;)
Rik
--
DMCA, SSSCA, W3C? Who cares? http://thefreeworld.net/
http://www.surriel.com/ http://distro.conectiva.com/
-----BEGIN PGP SIGNED MESSAGE-----
Hash: RIPEMD160
Replying to Ricky Beam:
> And those who *will* complain that binary structures are hard to work with,
> (you're idiots too :-)) a struct is far easier to deal with than text
> processing, esp. for anyone who knows what they are doing. Yes, changes
Just read the whole thread, and got my head explode. Let me reply to random
picked msg.
First, to these who know about kernel-user interaction in, for example,
windows. Win32 API has functions, which fill structs, defined in SDK headers.
Linux kernel is much more light-w ... or maybe for any other reason it does
not have that functions. pity. they can achieve performance you need. and no
need for parsing, yeah. (we also do have X, which implementation is much
more slow than winNT gui).
but.
How much time you will parse a single integer ? Without any text around
needs to be thrown away, optionally with 0x and considered it __int64 ?
This is much better than current /proc, yeah ? Anyway, Linus will keep proc
ASCII, and we don't have another Linus.
So proposed standard for /proc - is a good idea. Let's get rid of
progressbars, percent-o-meters with pseudographics. Maybe we should switch
from single file, for ex, cpuinfo, to dir with many INDIVIDUAL files
containing single number or feature-set in it. Splitting away parts that
need to be formatted in-kernel and then parsed in-user maybe a good decision
'coz ... maybe they are rarely used ?
Another point. Including formatting code in EVERY kernel part that resides in
/proc maybe (as for me) a bad idea - so one can do simple interface,
formatting functions, and switch modules to use them
Another point is writable /proc files - but no one in this thread said
something clever about it and ... maybe discuss it later ?
- --
Paul P 'Stingray' Komkoff 'Greatest' Jr // (icq)23200764 // (irc)Spacebar
PPKJ1-RIPE // (smtp)[email protected] // (http)stingr.net // (pgp)0xA4B4ECA4
-----BEGIN PGP SIGNATURE-----
iEYEAREDAAYFAjvptKwACgkQyMW8naS07KSA2QCgm0z0ICxmJxqjImrPMk7Denzx
CjIAnRCQ6WYMXa0lOMFFyYoHJpZ0jRuy
=8+oN
-----END PGP SIGNATURE-----
Paul P Komkoff Jr wrote:
> How much time you will parse a single integer ? Without any text around
> needs to be thrown away, optionally with 0x and considered it __int64 ?
And it's not much even with letters following like "k" and "m".
> This is much better than current /proc, yeah ? Anyway, Linus will keep proc
> ASCII, and we don't have another Linus.
Do we need another?
> So proposed standard for /proc - is a good idea. Let's get rid of
> progressbars, percent-o-meters with pseudographics. Maybe we should switch
> from single file, for ex, cpuinfo, to dir with many INDIVIDUAL files
> containing single number or feature-set in it. Splitting away parts that
> need to be formatted in-kernel and then parsed in-user maybe a good decision
> 'coz ... maybe they are rarely used ?
Sounds like a lot more open() calls to me. If one is insisting on
saving CPU time (be it in kernel space or user space), I doubt if
this accomplishes that. Not that that is everyone's goal.
> Another point. Including formatting code in EVERY kernel part that resides in
> /proc maybe (as for me) a bad idea - so one can do simple interface,
> formatting functions, and switch modules to use them
So a common core formatter for everything? That could be done in
userspace or as a module, right? Insert the module (or compile it
in directly) and /kerntxt becomes mountable and mirrors /kernel but
in text format. How about a FS type for making any arbitrary info
tree much like /proc but served via a user space process? Then it
can get the info from somewhere else, format it, and hand it back.
It could have other uses besides just /proc stuff.
> Another point is writable /proc files - but no one in this thread said
> something clever about it and ... maybe discuss it later ?
Those tend to be single value writes, true? If in binary format,
then there will need to be a "setkernel" comand or some such thing
which opens the named path, and writes the data in the indicated
binary format.
So instead of:
echo 0 > /proc/sys/net/ipv4/tcp_ecn
you might have:
setkernel /kernel/sys/net/ipv4/tcp_ecn -i4 0
FYI: I really don't care much how this gets formatted or reformatted,
as long as it isn't XML (worst of both worlds: more CPU to parse and
breaks cat and grep). Logical is nice. Fast is nice. Compact is
nice. Readable is nice. Easy to code in scripts is nice. Easy to
code in C is nice. The ultimate solution to make it possible to have
all these features at the same time ... priceless.
--
-----------------------------------------------------------------
| Phil Howard - KA9WGN | Dallas | http://linuxhomepage.com/ |
| [email protected] | Texas, USA | http://phil.ipal.org/ |
-----------------------------------------------------------------
Alex Bligh - linux writes:
> sure it's easier to strip out a spurious 'kb' that
> gets added after a number, than to deal with (say)
> an extra inserted DWORD with no version traching.
Design the kernel to make doing this difficult.
Define some offsets as follows:
#define FOO_PID 0
#define FOO_PPID 1
Now, how is anyone going to create "an extra inserted DWORD"
between those? They'd need to renumber FOO_PPID and any other
values that come after it.
The "DWORD" idea is messed up too BTW. Use __u64 everywhere.
On Tue, 6 Nov 2001 10:46:44 -0500
Theodore Tso <[email protected]> wrote:
> On Tue, Nov 06, 2001 at 09:48:52AM +1100, Rusty Russell wrote:
> >
> > What concerns me most is the pain involved in writing a /proc or
> > sysctl interface in the kernel today. Take kernel/module.c's
> > get_ksyms_list as a typical example: 45 lines of code to perform a
> > very trivial task. And this code is sitting in your kernel whether
> > proc is enabled or not. Now, I'm a huge Al Viro fan, but his proposed
> > improvements are in the wrong direction, IMHO.
>
> I'm all for simplifying the internal kernel interfaces. What I'm not
> at *all* convinced about is that it's worth it to make serious changes
> to the layout of /proc, /proc/sys, etc. And the concept of being able
> to very rapidly and easily get at system configuration variables
> without needing to make sure that /proc is mounted is a very, very
> good thing.
As these threads show, this is a big argument, involving:
1) What should the in-kernel interface look like?
2) What should the userspace interface look like?
3) Should there be a sysctl interface overlap?
I'm trying to nail down (1). Whether there is a new backwards
compatible sysctl() which takes a name instead of a number, and/or
whether the whole thing should be done in userspace, I am not going
to address.
Rusty.
> Design the kernel to make doing this difficult.
> Define some offsets as follows:
>
># define FOO_PID 0
># define FOO_PPID 1
>
> Now, how is anyone going to create "an extra inserted DWORD"
> between those? They'd need to renumber FOO_PPID and any other
> values that come after it.
For instance, take the /proc/mounts type example, where
each row is a sequence of binary values. Someone decides
to add another column, which assuming it is a DWORD^W__u64,
does exactly this, inserts a DWORD^W__u64 between the
end of one record and the start of the next as far a
poorly written parser is concerned.
The brokenness is not due to the distinction between ASCII
and binary. The brokenness is due the ill-defined nature
of the format, and poor change control. (so for instance
the ASCII version could consistently use (say) quoted strings,
with spaces between fields, and \n between records, just
as the binary version could have a record length entry at the
head of each record, and perhaps field length and identifier
versions by each field - two very similar solutions to the
problem above).
> The "DWORD" idea is messed up too BTW. Use __u64 everywhere.
OK OK :-)
--
Alex Bligh
On Thu, 8 Nov 2001, Alex Bligh - linux-kernel wrote:
>For instance, take the /proc/mounts type example, where
(bad example as /proc/mounts is supposed to be a substiture for /etc/mtab.)
>each row is a sequence of binary values. Someone decides
>to add another column, which assuming it is a DWORD^W__u64,
>does exactly this, inserts a DWORD^W__u64 between the
>end of one record and the start of the next as far a
>poorly written parser is concerned.
Then make it hard ("impossible") to write a poor parser; include a record
size in the data format. The first __u32 read is the number of bytes per
record. You then know exactly how much data to read. Adding more crap on
the end doesn't break anything.
People who think binary data formats are bad and hard to work with should
take a few minutes to look at various implementation using binary data
structures. For example, RADIUS.
>The brokenness is not due to the distinction between ASCII
>and binary. The brokenness is due the ill-defined nature
>of the format, and poor change control. (so for instance
>the ASCII version could consistently use (say) quoted strings,
>with spaces between fields, and \n between records, just
>as the binary version could have a record length entry at the
>head of each record, and perhaps field length and identifier
>versions by each field - two very similar solutions to the
>problem above).
Correct. The issue is not which is easier to work with, or endian friendly.
A properly designed structure, which we still don't have, is the key. It's
just as straight forward to tokenize binary fields as text fields. Then you
have to do something with the data in the fields. Binary is far more
efficient in almost all cases.
People should not shit a brick at the suggestion of doing _some_ things
as binary structures. The parts of /proc that really are intended for humans
(ie. driver "debug" information... /proc/slabinfo, /proc/drivers/rd/..., etc.)
don't make sense in binary. However, there are loads of things that DO make
sense in binary format -- too many things reference them for further processing
requiring conversion from/to text multiple times. The number of people
who do:
% grep -l foo /proc/[0-9]*/cmdline
instead of:
% ps auxwww | grep foo
are very VERY low.
--Ricky
Alex Bligh - linux writes:
> [Albert Cahalan]
>> Design the kernel to make doing this difficult.
>> Define some offsets as follows:
>>
>> # define FOO_PID 0
>> # define FOO_PPID 1
>>
>> Now, how is anyone going to create "an extra inserted DWORD"
>> between those? They'd need to renumber FOO_PPID and any other
>> values that come after it.
>
> For instance, take the /proc/mounts type example, where
> each row is a sequence of binary values. Someone decides
> to add another column, which assuming it is a DWORD^W__u64,
> does exactly this, inserts a DWORD^W__u64 between the
> end of one record and the start of the next as far a
> poorly written parser is concerned.
That would be a botched design to begin with.
Each row becomes a separate binary file. They are distinct
records anyway. Splitting by column would be a poor choice.
> The brokenness is not due to the distinction between ASCII
> and binary. The brokenness is due the ill-defined nature
> of the format, and poor change control.
ASCII encourages ill-defined formats and poor change control.
People make assumptions about what is valid.
[email protected] (Jakob ?stergaard) wrote on 04.11.01 in <[email protected]>:
[quoteto.xps]
> On Sun, Nov 04, 2001 at 03:06:27PM -0500, Alexander Viro wrote:
> >
> >
> > On Sun, 4 Nov 2001, [iso-8859-1] Jakob %stergaard wrote:
> >
> > > So just ignore square brackets that have "=" " " and ">" between them ?
> > >
> > > What happens when someone decides "[----> ]" looks cooler ?
> >
> > First of all, whoever had chosen that output did a fairly idiotic thing.
> > But as for your question - you _do_ know what regular expressions are,
> > don't you? And you do know how to do this particular regex without
> > any use of library functions, right?
>
> A regex won't tell me if 345987 is a signed or unsigned 32-bit or 64-bit
> integer, or if it's a double.
You do not *need* that information at runtime. If you think you do, you're
doing something badly wrong.
I cannot even imagine what program would want that information.
> Sure, implement arbitrary precision arithmetic in every single app out there
> using /proc....
Bullshit. Implement whatever arithmetic is right *for your problem*. And
notice when the value you get doesn't fit so you can tell the user he
needs a newer version. That's all.
There's no reason whatsoever to care what data type the kernel used.
MfG Kai
[email protected] (Ricky Beam) wrote on 07.11.01 in <[email protected]>:
> As for code maint. and kernel changes breaking things... both happen already
> with the text based system. Binary structures can be constructed to be
> extensible without breaking old tools. Plus, the information exported from
> the kernel (in the case of processes) need not change with every version
> of the kernel.
And the exact same thing can be done with ASCII, too - only easier.
> I don't think people realize just how many CPU cycles are being needlessly
> expended in passing information between the kernel and the user. When I
> have the time, I'll add binary interfaces for various things and show
> exactly how expensive the existing system is -- all for the sake of being
> able to use 'cat' and 'grep'.
I consider those cycles *very* well spent. Being able to use those common
tools is rather important to very many people.
Let's write a /proc ASCII coding rules document. It should document well a
few (*very* few) generic formats to use for new entries, and big fat
warnings about ever changing the format of existing tables, and it should
be easy to find in /Documentation/ - and we should immediately jump on
anyone who violates it without, in advance, discussing the problem he's
trying to solve, and convincing us that they can't be solved any other
way.
I don't much care how those formats look, as long as they're easy to parse
and to extend compatibly, and *few*.
MfG Kai
On Sun, Nov 11, 2001 at 12:06:00PM +0200, Kai Henningsen wrote:
> [email protected] (Jakob ?stergaard) wrote on 04.11.01 in <[email protected]>:
...
> >
> > A regex won't tell me if 345987 is a signed or unsigned 32-bit or 64-bit
> > integer, or if it's a double.
>
> You do not *need* that information at runtime. If you think you do, you're
> doing something badly wrong.
I would prefer to have the information at compile-time, so that I would get
a compiler error if I did something wrong.
But that's unrealistic - some counter could change it's type from kernel
release to kernel release.
Now, my program needs to deal with the data, perform operations on it,
so naturally I need to know what kind of data I'm dealing with. Most likely,
my software will *expect* some certain type, but if I have no way of verifying
that my assumption is correct, I will lose sooner or later...
>
> I cannot even imagine what program would want that information.
Uh. Any program using /proc data ?
>
> > Sure, implement arbitrary precision arithmetic in every single app out there
> > using /proc....
>
> Bullshit. Implement whatever arithmetic is right *for your problem*. And
> notice when the value you get doesn't fit so you can tell the user he
> needs a newer version. That's all.
>
> There's no reason whatsoever to care what data type the kernel used.
So my program runs for two months and then aborts with an error because
some counter just happened to no longer fit into whatever type I assumed
it was ?
Come on - you just can't code like that...
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
On Sun, 11 Nov 2001, Jakob ?stergaard wrote:
> Now, my program needs to deal with the data, perform operations on it,
> so naturally I need to know what kind of data I'm dealing with. Most likely,
> my software will *expect* some certain type, but if I have no way of verifying
> that my assumption is correct, I will lose sooner or later...
Why not read everything into a 1024-bit signed variable? Will work for
every numeric value in /proc. It's a bit of a hassle to code, but it is
possible. You only need to know the type if you want to write a numerical
value to a file in /proc, and even then the driver behind that /proc entry
should do sanity checks.
--
Ciao, Pascal
-<[ [email protected], netmail 2:241/215.72, home http://cobol.cjb.net/) ]>-
On Mon, Nov 12, 2001 at 02:43:41PM +0100, Pascal Schmidt wrote:
> On Sun, 11 Nov 2001, Jakob ?stergaard wrote:
>
> > Now, my program needs to deal with the data, perform operations on it,
> > so naturally I need to know what kind of data I'm dealing with. Most likely,
> > my software will *expect* some certain type, but if I have no way of verifying
> > that my assumption is correct, I will lose sooner or later...
>
> Why not read everything into a 1024-bit signed variable? Will work for
> every numeric value in /proc. It's a bit of a hassle to code, but it is
> possible. You only need to know the type if you want to write a numerical
> value to a file in /proc, and even then the driver behind that /proc entry
> should do sanity checks.
So for 99.9% of all cases my program will do much much more work than is
actually needed.
I may still save the data in a database, or go over the network with it,
so I should implement 1024 bit signed integers in all of that code too ?
And what happens when we do crypto and 1024 bits is not enough ?
I think the "use rediculously large datatypes" solution is a poor one,
as it can never cover all cases in the future, and it will impose a large
overhead on existing and new applications.
--
................................................................
: [email protected] : And I see the elder races, :
:.........................: putrid forms of man :
: Jakob ?stergaard : See him rise and claim the earth, :
: OZ9ABN : his downfall is at hand. :
:.........................:............{Konkhra}...............:
Hi Jakob.
>>> Sure, implement arbitrary precision arithmetic in every single app
>>> out there using /proc....
>> Bullshit. Implement whatever arithmetic is right *for your problem*.
>> And notice when the value you get doesn't fit so you can tell the
>> user he needs a newer version. That's all.
>>
>> There's no reason whatsoever to care what data type the kernel used.
> So my program runs for two months and then aborts with an error
> because some counter just happened to no longer fit into whatever
> type I assumed it was ?
>
> Come on - you just can't code like that...
There are certain assumptions you can make about any given variable
without even seeing a specific value for it. For example:
1. Does it make sense for the value to be negative? If not, use an
unsigned variable.
As an example, no systems can validly have a negative uptime, as
that implies that it hasn't yet started running. It is for this
very reason that a supposedly Roman coin inscribed with the date
"37 BC" was known to be counterfeit - who measures time from an
event that hasn't yet happenned?
2. Does it make sense for the variable to report fractional values?
If not, use integral variables.
As an example, it makes no sense to have a fractional number of
CPU's in a particular system - or, for that matter, for a given
family to have the fabled 2.4 children !!!
3. If fractional values do make sense, what accuracy is needed, and
would it make sense to use scaled integers rather than reals?
As an example, fractional values make sense for the current time
but the need for accuracy indicates that scaled integers rather
than reals are to be preferred, with the integers recording time
in units of whatever fraction of a second is deemed sufficiently
accurate for the intended purpose whilst still giving a practical
range that can be stored.
To take this one step further, and push the next version of the
Y2K problem as far into the future as possible whilst providing
a sufficient accuracy for most tasks nowadays, one could use a
64-bit unsigned variable for the current time, but, rather than
storing the number of seconds since epoch in it, store the
number of xths of a second instead.
As an example of this, a 64-bit unsigned value that measures the
number of 40 ns intervals from Jan 1 00:00:00 UTC 1970 onwards
will roll over at Jan 29 15:31:14 UTC 13661. This is a over 45%
further in the future than the Y10K rollover seen elsewhere...
( 13661 - 2000 )
---------------- * 100 % = 145.762 %
( 10000 - 2000 )
With an interval of 40 ns one can accurately convert to seconds
for backwards compatibility by simply dividing by 25,000,000.
4. Is there any inherent limit on the range it can take? If not, use
the largest available variables of the relevant type.
I've been doing this for 25 years now, and I've never regretted it.
Best wishes from Riley.
Jan-Benedict Glaw wrote:
>
> On Tue, 2001-11-06 15:28:26 -0700, Erik Andersen <[email protected]>
> wrote in message <[email protected]>:
> > On Tue Nov 06, 2001 at 07:24:13PM -0200, Rik van Riel wrote:
> > > PROCESSOR=0
> > > VENDOR_ID=GenuineIntel
> > > CPU_FAMILY=6
> > > MODEL=6
> > > MODEL_NAME="Celeron (Mendocino)"
> > > .....
>
> PROCESSOR=1
> ...
>
> > > . /proc/cpuinfo
> >
> > I think we have a winner! If we could establish this
> > as policy that would be _sweet_!
>
> What do you expect on a SMP system?
<IRONY>
ksh93 arrays
</IRONY>
--On Tuesday, 06 November, 2001 5:14 PM -0500 Alexander Viro
<[email protected]> wrote:
> On Tue, 6 Nov 2001, Ricky Beam wrote:
>
> [snip]
>> And those who *will* complain that binary structures are hard to work
with,
>> (you're idiots too :-)) a struct is far easier to deal with than text
>> processing, esp. for anyone who knows what they are doing. Yes, changes
>
> Learn C, then learn some respect to your betters[1], then come back.
>
> *PLONK*
>
> [1] like, say it, guys who had invented UNIX and C.
What amuses me about those arguing for binary structures as a long term
solution for communicating, on a flexible but long lived basis, is that the
entire OS Genre they appear to be advocating (UNIX et al.) has been doing
this, on an app to app (as opposed to kernel to app) basis, for far longer
than most of them can remember, in situations where performance is far more
relevant, and pitted against far more 3l33t 5tud3nt2 than we we shake a
stick at, but /still/ they persist.
Through minor local idiocy, I had trashed my local lilo partition, and had
to try and boot with a Debian CD-Rom with a 2.2 kernel. I forgot to ask for
single user more. Not only did it boot first time, it booted fully, apart
from two minor things: no iptables, and (shock horror) the sound card
didn't work it wasn't compatible. Similarly, I've loaded 2.4 kernels with
no problems onto 2.2 systems.
This "dreadful" /proc interface everyone talks about has been *STAGGERINGLY
GOOD* in terms of forward and backward compatibility. Sure, the innards may
smell unpleasant, but I reckon the interface, in practice, whilst not in
BNF format (BTW what is, and and, for the compsci philosophers amongst you,
'.*' as a regexp is easilly convertible into BNF and describes the /proc
interface completely - lexical and synatical analysis is immaterial without
tight semantic definition), has worked well just because kernel developers
and maintainers have showed themselves unwilling to break existing
userspace tools, and vice versa.
I think/thought we learnt our lesson on this in the fallout of the
Net2E/Net2D 'debate'. If someone is willing to stand up and say that /proc
external interface causes as many problems as the networking code did at
the time, please stand up and be counted now, preferably holding your
thesis on how to fix this for inter-app comms in Un*x in general, & forming
an orderly queue for the exit door :-)
--
Alex Bligh
--On Wednesday, 07 November, 2001 1:13 AM +0100 Martin Dalecki
<[email protected]> wrote:
> around 6 years
> ago people did just give up on adapting the parsers to the ever chaning
> "wonderfull" ascii interfaces those times.
Must have passed me by - probably too busy with regedt32 and other
such great /proc substitutes - cough...
--
Alex Bligh
On Tue, 6 Nov 2001 [email protected] wrote:
>In article <[email protected]> Erik Anderson wrote:
>> Come now, it really isn't that difficult:
>
>> char name[80];
>> if (sscanf(line, "%4u %4u %llu %s", &major, &minor, &size, name) == 4)
>
>if it's so easy to do, why do you have a great big buffer overflow here?
Because he forgot about "%80s"? But if he forgot that he may accidently
use strcpy, strcat, and gets, so...
Or maybe it was just an exercise for the reader?
--
George Greer, [email protected]
http://www.m-l.org/~greerga/
On Wed, 7 Nov 2001, Martin Dalecki wrote:
>And then converted back to ASCII for printout on the terminal ;-).
Well, they don't always get printf()'d...
>The second problem is that /proc is one of the few design "inventions" in
>linux, which didn't get copied over from some other UNIX box and Linus
>doesn't wan't recognize that this was A BAD DESIGN CHOICE.
/proc is a wonderful thing for what it was originally intended: access to
the process table without looking at the tables in the kernel memory space
(remember SunOS? what happened if /vmunix wasn't the running kernel?)
Unfortunately, /proc has become the gheto of the Linux kernel. It is now
the general dumping grounds for user/kernel interfacing. As a developer tool
it's very handy; it's also very dangerous. Developers then resort to
/proc as a perminant interface between kernel drivers and userland. (In
the *BSD world, this is a kernfs, not a procfs.)
For an example of /proc done right, find a Solaris box. What do you find
in /proc? Gee, process information. Only process information. In. Binary.
--Ricky
--On Tuesday, 06 November, 2001 8:10 PM -0500 Ricky Beam
<[email protected]> wrote:
> /proc ... it's very handy; it's also very dangerous.
as is most of UID==0 UNIX; and the problem here is?
--
Alex Bligh
Hi!
> > It eats CPU, it's error-prone, and all in all it's just "wrong".
>
> How much of your CPU time is spent parsing /proc files?
30% of 486 if you run top... That's way too much and top is unusable on slower
machines.
"Not fast enough for showing processes" sounds wery wrong.
Pavel
--
Philips Velo 1: 1"x4"x8", 300gram, 60, 12MB, 40bogomips, linux, mutt,
details at http://atrey.karlin.mff.cuni.cz/~pavel/velo/index.html.
In article <[email protected]>
[email protected] wrote:
| People should not shit a brick at the suggestion of doing _some_ things
| as binary structures. The parts of /proc that really are intended for humans
| (ie. driver "debug" information... /proc/slabinfo, /proc/drivers/rd/..., etc.)
| don't make sense in binary. However, there are loads of things that DO make
| sense in binary format -- too many things reference them for further processing
| requiring conversion from/to text multiple times. The number of people
| who do:
| % grep -l foo /proc/[0-9]*/cmdline
| instead of:
| % ps auxwww | grep foo
| are very VERY low.
There's a great savings, to convert what is initially a text string to
some "binary" format.
The advantage of text format is that humans can read it, and if it
changes they can almost always figure out what it means. Not everyone is
a C/PERL hacker who is happy writing machine independent binary
structures, while virtually every language ever written can and will
parse those text strings, and when it won't you can see why not.
Having spent a lot of time developing tools to use the contents of
/proc, I have to feel that the savings in time of not reinventing every
existing wheel is so far in excess of any possible saving that
justifying the change on effeciency grounds is at best unconvincing.
There are so many new and useful things which could be done that I
can't imaging this make-work change to something currently in place to
be a useful investment of time.
--
bill davidsen <[email protected]>
His first management concern is not solving the problem, but covering
his ass. If he lived in the middle ages he'd wear his codpiece backward.