2001-10-16 01:10:10

by Alexander Viro

[permalink] [raw]
Subject: [CFT][PATCH] large /proc/mounts and friends

Folks, patch below adds a new way to handle large files on procfs.
I've done that for /proc/mounts and /proc/ksyms, but the same trick can
be pulled for anything else. The basic idea: if file consists of records
generated by some sequence of objects, we can just describe an iterator and
use precanned functions to turn it into a file. That (and helpers for
formatting) is done in fs/seq_file.c and include/linux/seq_file.h.

Two slightly different examples of use are in fs/namespace.c and
in kernel/module.c - for /proc/mounts and /proc/ksyms resp.

Notice that helpers take care of all logics re overflows, split
entries, etc. - code that actually generates the contents becomes simpler
and cleaner.

See comments in fs/seq_file.c for description of interface - I
hope they are clear enough.

Patch works here, but it obviously needs more testing. Please,
help with that. Code review and comments on interface are also more than
welcome. It's against vanilla 2.4.13-pre3 - there is a couple of unrelated
fixes in fs/namespace.c and mm/filemap.c (already sent to Linus). Patch
follows:

diff -urN S13-pre3/fs/Makefile S13-pre3-seq/fs/Makefile
--- S13-pre3/fs/Makefile Tue Oct 9 21:47:26 2001
+++ S13-pre3-seq/fs/Makefile Mon Oct 15 20:24:18 2001
@@ -14,7 +14,7 @@
super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \
fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \
dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \
- filesystems.o namespace.o
+ filesystems.o namespace.o seq_file.o

ifeq ($(CONFIG_QUOTA),y)
obj-y += dquot.o
diff -urN S13-pre3/fs/namespace.c S13-pre3-seq/fs/namespace.c
--- S13-pre3/fs/namespace.c Thu Oct 11 19:24:34 2001
+++ S13-pre3-seq/fs/namespace.c Mon Oct 15 20:24:18 2001
@@ -22,6 +22,7 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
#include <linux/nfs_mount.h>
+#include <linux/seq_file.h>

struct vfsmount *do_kern_mount(char *type, int flags, char *name, void *data);
int do_remount_sb(struct super_block *sb, int flags, void * data);
@@ -153,6 +154,8 @@
atomic_inc(&sb->s_active);
mnt->mnt_sb = sb;
mnt->mnt_root = dget(root);
+ mnt->mnt_mountpoint = mnt->mnt_root;
+ mnt->mnt_parent = mnt;
}
return mnt;
}
@@ -165,160 +168,127 @@
kill_super(sb);
}

-/* Use octal escapes, like mount does, for embedded spaces etc. */
-static unsigned char need_escaping[] = { ' ', '\t', '\n', '\\' };
+/* iterator */
+static void *m_start(struct seq_file *m)
+{
+ down(&mount_sem);
+ if (list_empty(&vfsmntlist))
+ return NULL;
+ return list_entry(vfsmntlist.next, struct vfsmount, mnt_list);
+}

-static int
-mangle(const unsigned char *s, char *buf, int len) {
- char *sp;
- int n;
-
- sp = buf;
- while(*s && sp-buf < len-3) {
- for (n = 0; n < sizeof(need_escaping); n++) {
- if (*s == need_escaping[n]) {
- *sp++ = '\\';
- *sp++ = '0' + ((*s & 0300) >> 6);
- *sp++ = '0' + ((*s & 070) >> 3);
- *sp++ = '0' + (*s & 07);
- goto next;
- }
- }
- *sp++ = *s;
- next:
- s++;
- }
- return sp - buf; /* no trailing NUL */
-}
-
-static struct proc_fs_info {
- int flag;
- char *str;
-} fs_info[] = {
- { MS_SYNCHRONOUS, ",sync" },
- { MS_MANDLOCK, ",mand" },
- { MS_NOATIME, ",noatime" },
- { MS_NODIRATIME, ",nodiratime" },
- { 0, NULL }
-};
+static void *m_next(struct seq_file *m, void *v)
+{
+ struct list_head *p = ((struct vfsmount *)v)->mnt_list.next;
+ return p==&vfsmntlist ? NULL : list_entry(p, struct vfsmount, mnt_list);
+}

-static struct proc_fs_info mnt_info[] = {
- { MNT_NOSUID, ",nosuid" },
- { MNT_NODEV, ",nodev" },
- { MNT_NOEXEC, ",noexec" },
- { 0, NULL }
-};
+static void m_stop(struct seq_file *m, void *v)
+{
+ up(&mount_sem);
+}

-static struct proc_nfs_info {
- int flag;
- char *str;
- char *nostr;
-} nfs_info[] = {
- { NFS_MOUNT_SOFT, ",soft", ",hard" },
- { NFS_MOUNT_INTR, ",intr", "" },
- { NFS_MOUNT_POSIX, ",posix", "" },
- { NFS_MOUNT_TCP, ",tcp", ",udp" },
- { NFS_MOUNT_NOCTO, ",nocto", "" },
- { NFS_MOUNT_NOAC, ",noac", "" },
- { NFS_MOUNT_NONLM, ",nolock", ",lock" },
- { NFS_MOUNT_BROKEN_SUID, ",broken_suid", "" },
- { 0, NULL, NULL }
-};
+static inline void mangle(struct seq_file *m, const char *s)
+{
+ seq_escape(m, s, " \t\n\\");
+}

-int get_filesystem_info( char *buf )
+static void show_nfs_mount(struct seq_file *m, struct vfsmount *mnt)
{
- struct list_head *p;
- struct proc_fs_info *fs_infop;
+ static struct proc_nfs_info {
+ int flag;
+ char *str;
+ char *nostr;
+ } nfs_info[] = {
+ { NFS_MOUNT_SOFT, ",soft", ",hard" },
+ { NFS_MOUNT_INTR, ",intr", "" },
+ { NFS_MOUNT_POSIX, ",posix", "" },
+ { NFS_MOUNT_TCP, ",tcp", ",udp" },
+ { NFS_MOUNT_NOCTO, ",nocto", "" },
+ { NFS_MOUNT_NOAC, ",noac", "" },
+ { NFS_MOUNT_NONLM, ",nolock", ",lock" },
+ { NFS_MOUNT_BROKEN_SUID, ",broken_suid", "" },
+ { 0, NULL, NULL }
+ };
struct proc_nfs_info *nfs_infop;
- struct nfs_server *nfss;
- int len, prevlen;
- char *path, *buffer = (char *) __get_free_page(GFP_KERNEL);
-
- if (!buffer) return 0;
- len = prevlen = 0;
-
-#define FREEROOM ((int)PAGE_SIZE-200-len)
-#define MANGLE(s) len += mangle((s), buf+len, FREEROOM);
-
- for (p = vfsmntlist.next; p != &vfsmntlist; p = p->next) {
- struct vfsmount *tmp = list_entry(p, struct vfsmount, mnt_list);
- path = d_path(tmp->mnt_root, tmp, buffer, PAGE_SIZE);
- if (!path)
- continue;
- MANGLE(tmp->mnt_devname ? tmp->mnt_devname : "none");
- buf[len++] = ' ';
- MANGLE(path);
- buf[len++] = ' ';
- MANGLE(tmp->mnt_sb->s_type->name);
- len += sprintf(buf+len, " %s",
- tmp->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
- for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
- if (tmp->mnt_sb->s_flags & fs_infop->flag)
- MANGLE(fs_infop->str);
- }
- for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
- if (tmp->mnt_flags & fs_infop->flag)
- MANGLE(fs_infop->str);
- }
- if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
- nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
- len += sprintf(buf+len, ",v%d", nfss->rpc_ops->version);
-
- len += sprintf(buf+len, ",rsize=%d", nfss->rsize);
-
- len += sprintf(buf+len, ",wsize=%d", nfss->wsize);
-#if 0
- if (nfss->timeo != 7*HZ/10) {
- len += sprintf(buf+len, ",timeo=%d",
- nfss->timeo*10/HZ);
- }
- if (nfss->retrans != 3) {
- len += sprintf(buf+len, ",retrans=%d",
- nfss->retrans);
- }
-#endif
- if (nfss->acregmin != 3*HZ) {
- len += sprintf(buf+len, ",acregmin=%d",
- nfss->acregmin/HZ);
- }
- if (nfss->acregmax != 60*HZ) {
- len += sprintf(buf+len, ",acregmax=%d",
- nfss->acregmax/HZ);
- }
- if (nfss->acdirmin != 30*HZ) {
- len += sprintf(buf+len, ",acdirmin=%d",
- nfss->acdirmin/HZ);
- }
- if (nfss->acdirmax != 60*HZ) {
- len += sprintf(buf+len, ",acdirmax=%d",
- nfss->acdirmax/HZ);
- }
- for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
- char *str;
- if (nfss->flags & nfs_infop->flag)
- str = nfs_infop->str;
- else
- str = nfs_infop->nostr;
- MANGLE(str);
- }
- len += sprintf(buf+len, ",addr=");
- MANGLE(nfss->hostname);
- }
- len += sprintf(buf + len, " 0 0\n");
- if (FREEROOM <= 3) {
- len = prevlen;
- len += sprintf(buf+len, "# truncated\n");
- break;
- }
- prevlen = len;
+ struct nfs_server *nfss = &mnt->mnt_sb->u.nfs_sb.s_server;
+
+ seq_printf(m, ",v%d", nfss->rpc_ops->version);
+ seq_printf(m, ",rsize=%d", nfss->rsize);
+ seq_printf(m, ",wsize=%d", nfss->wsize);
+ if (nfss->acregmin != 3*HZ)
+ seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
+ if (nfss->acregmax != 60*HZ)
+ seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
+ if (nfss->acdirmin != 30*HZ)
+ seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
+ if (nfss->acdirmax != 60*HZ)
+ seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
+ for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
+ if (nfss->flags & nfs_infop->flag)
+ seq_puts(m, nfs_infop->str);
+ else
+ seq_puts(m, nfs_infop->nostr);
}
+ seq_puts(m, ",addr=");
+ mangle(m, nfss->hostname);
+}

- free_page((unsigned long) buffer);
- return len;
-#undef MANGLE
-#undef FREEROOM
+static int show_vfsmnt(struct seq_file *m, void *v)
+{
+ struct vfsmount *mnt = v;
+ static struct proc_fs_info {
+ int flag;
+ char *str;
+ } fs_info[] = {
+ { MS_SYNCHRONOUS, ",sync" },
+ { MS_MANDLOCK, ",mand" },
+ { MS_NOATIME, ",noatime" },
+ { MS_NODIRATIME, ",nodiratime" },
+ { 0, NULL }
+ };
+ static struct proc_fs_info mnt_info[] = {
+ { MNT_NOSUID, ",nosuid" },
+ { MNT_NODEV, ",nodev" },
+ { MNT_NOEXEC, ",noexec" },
+ { 0, NULL }
+ };
+ struct proc_fs_info *fs_infop;
+ char *path_buf, *path;
+
+ path_buf = (char *) __get_free_page(GFP_KERNEL);
+ if (!path_buf)
+ return -ENOMEM;
+ path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
+
+ mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
+ seq_putc(m, ' ');
+ mangle(m, path);
+ free_page((unsigned long) path_buf);
+ seq_putc(m, ' ');
+ mangle(m, mnt->mnt_sb->s_type->name);
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
+ for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
+ if (mnt->mnt_sb->s_flags & fs_infop->flag)
+ seq_puts(m, fs_infop->str);
+ }
+ for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
+ if (mnt->mnt_flags & fs_infop->flag)
+ seq_puts(m, fs_infop->str);
+ }
+ if (strcmp("nfs", mnt->mnt_sb->s_type->name) == 0)
+ show_nfs_mount(m, mnt);
+ seq_puts(m, " 0 0\n");
+ return 0;
}

+struct seq_operations mounts_op = {
+ start: m_start,
+ next: m_next,
+ stop: m_stop,
+ show: show_vfsmnt
+};
+
/*
* Doesn't take quota and stuff into account. IOW, in some cases it will
* give false negatives. The main reason why it's here is that we need
@@ -1086,7 +1056,7 @@
printk(KERN_NOTICE "Trying to unmount old root ... ");
if (!blivet) {
spin_lock(&dcache_lock);
- list_del(&old_rootmnt->mnt_list);
+ list_del_init(&old_rootmnt->mnt_list);
spin_unlock(&dcache_lock);
mntput(old_rootmnt);
mntput(old_rootmnt);
diff -urN S13-pre3/fs/proc/proc_misc.c S13-pre3-seq/fs/proc/proc_misc.c
--- S13-pre3/fs/proc/proc_misc.c Mon Oct 15 20:12:19 2001
+++ S13-pre3-seq/fs/proc/proc_misc.c Mon Oct 15 20:24:18 2001
@@ -35,6 +35,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/smp_lock.h>
+#include <linux/seq_file.h>

#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -57,12 +58,10 @@
#endif
#ifdef CONFIG_MODULES
extern int get_module_list(char *);
-extern int get_ksyms_list(char *, char **, off_t, int);
#endif
extern int get_device_list(char *);
extern int get_partition_list(char *, char **, off_t, int);
extern int get_filesystem_list(char *);
-extern int get_filesystem_info(char *);
extern int get_exec_domain_list(char *);
extern int get_irq_list(char *);
extern int get_dma_list(char *);
@@ -251,13 +250,17 @@
return proc_calc_metrics(page, start, off, count, eof, len);
}

-static int ksyms_read_proc(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+extern struct seq_operations ksyms_op;
+static int ksyms_open(struct inode *inode, struct file *file)
{
- int len = get_ksyms_list(page, start, off, count);
- if (len < count) *eof = 1;
- return len;
+ return seq_open(file, &ksyms_op);
}
+static struct file_operations proc_ksyms_operations = {
+ open: ksyms_open,
+ read: seq_read,
+ llseek: seq_lseek,
+ release: seq_release,
+};
#endif

static int kstat_read_proc(char *page, char **start, off_t off,
@@ -414,13 +417,6 @@
return len;
}

-static int mounts_read_proc(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len = get_filesystem_info(page);
- return proc_calc_metrics(page, start, off, count, eof, len);
-}
-
static int execdomains_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
@@ -505,6 +501,18 @@
write: write_profile,
};

+extern struct seq_operations mounts_op;
+static int mounts_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &mounts_op);
+}
+static struct file_operations proc_mounts_operations = {
+ open: mounts_open,
+ read: seq_read,
+ llseek: seq_lseek,
+ release: seq_release,
+};
+
struct proc_dir_entry *proc_root_kcore;

void __init proc_misc_init(void)
@@ -530,7 +538,6 @@
#endif
#ifdef CONFIG_MODULES
{"modules", modules_read_proc},
- {"ksyms", ksyms_read_proc},
#endif
{"stat", kstat_read_proc},
{"devices", devices_read_proc},
@@ -546,7 +553,6 @@
{"rtc", ds1286_read_proc},
#endif
{"locks", locks_read_proc},
- {"mounts", mounts_read_proc},
{"swaps", swaps_read_proc},
{"iomem", memory_read_proc},
{"execdomains", execdomains_read_proc},
@@ -559,6 +565,12 @@
entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
if (entry)
entry->proc_fops = &proc_kmsg_operations;
+ entry = create_proc_entry("mounts", 0, NULL);
+ if (entry)
+ entry->proc_fops = &proc_mounts_operations;
+ entry = create_proc_entry("ksyms", 0, NULL);
+ if (entry)
+ entry->proc_fops = &proc_ksyms_operations;
proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL);
if (proc_root_kcore) {
proc_root_kcore->proc_fops = &proc_kcore_operations;
diff -urN S13-pre3/fs/seq_file.c S13-pre3-seq/fs/seq_file.c
--- S13-pre3/fs/seq_file.c Wed Dec 31 19:00:00 1969
+++ S13-pre3-seq/fs/seq_file.c Mon Oct 15 20:24:18 2001
@@ -0,0 +1,239 @@
+/*
+ * linux/fs/seq_file.c
+ *
+ * helper functions for making syntetic files from sequences of records.
+ * initial implementation -- AV, Oct 2001.
+ */
+
+#include <linux/malloc.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+
+/**
+ * seq_open - initialize sequential file
+ * @file: file we initialize
+ * @op: method table describing the sequence
+ *
+ * seq_open() sets @file, associating it with a sequence described
+ * by @op. @op->start() sets the iterator up and returns the first
+ * element of sequence. @op->stop() shuts it down. @op->next()
+ * returns the next element of sequence. @op->show() prints element
+ * into the buffer. In case of error ->start() and ->next() return
+ * ERR_PTR(error). In the end of sequence they return %NULL. ->show()
+ * returns 0 in case of success and negative number in case of error.
+ */
+int seq_open(struct file *file, struct seq_operations *op)
+{
+ struct seq_file *p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ memset(p, 0, sizeof(*p));
+ sema_init(&p->sem, 1);
+ p->op = op;
+ file->private_data = p;
+ return 0;
+}
+
+/**
+ * seq_read - ->read() method for sequential files.
+ * @file, @buf, @size, @ppos: see file_operations method
+ *
+ * Ready-made ->f_op->read()
+ */
+ssize_t seq_read(struct file *file, char *buf, size_t size, loff_t *ppos)
+{
+ struct seq_file *m = (struct seq_file *)file->private_data;
+ void *p = NULL;
+ size_t copied = 0;
+ unsigned n, eaten = 0;
+ loff_t pos = *ppos;
+ int err = 0;
+
+ down(&m->sem);
+ /* grab buffer if we didn't have one */
+ if (!m->buf) {
+ m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+ if (!m->buf)
+ goto Enomem;
+ }
+ /* if not empty - flush it first */
+ if (m->count) {
+ n = min(m->count, size);
+ err = copy_to_user(buf, m->buf + m->from, n);
+ if (err)
+ goto Efault;
+ m->count -= n;
+ m->from += n;
+ size -= n;
+ buf += n;
+ copied += n;
+ if (!m->count) {
+ m->from = 0;
+ pos++;
+ }
+ if (!size)
+ goto Done;
+ }
+ /* more, more, I'm still not satisfied... */
+ while (1) {
+ int i;
+ for (p=m->op->start(m), i=0; i<pos; p=m->op->next(m, p), i++) {
+ if (!p || IS_ERR(p))
+ break;
+ }
+ err = PTR_ERR(p);
+ if (!p || IS_ERR(p))
+ break;
+ err = m->op->show(m, p);
+ if (err)
+ break;
+ if (m->count < m->size)
+ goto Fill;
+ m->op->stop(m, p);
+ kfree(m->buf);
+ m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+ if (!m->buf)
+ goto Enomem;
+ }
+ m->op->stop(m, p);
+ goto Done;
+Fill:
+ while (m->count < size) {
+ size_t offs = m->count;
+ p = m->op->next(m, p);
+ if (!p || IS_ERR(p)) {
+ err = PTR_ERR(p);
+ break;
+ }
+ err = m->op->show(m, p);
+ if (err || m->count == m->size) {
+ m->count = offs;
+ break;
+ }
+ eaten++;
+ }
+ m->op->stop(m, p);
+ n = min(m->count, size);
+ err = copy_to_user(buf, m->buf, n);
+ if (err)
+ goto Efault;
+ copied += n;
+ m->count -= n;
+ if (m->count) {
+ pos += eaten;
+ m->from += n;
+ } else {
+ pos += eaten + 1;
+ m->from = 0;
+ }
+Done:
+ if (!copied)
+ copied = err;
+ *ppos = pos;
+ up(&m->sem);
+ return copied;
+Enomem:
+ err = -ENOMEM;
+ goto Done;
+Efault:
+ err = -EFAULT;
+ goto Done;
+}
+
+/**
+ * seq_lseek - ->llseek() method for sequential files.
+ * @file, @offset, @origin: see file_operations method
+ *
+ * Ready-made ->f_op->llseek()
+ */
+loff_t seq_lseek(struct file *file, loff_t offset, int origin)
+{
+ struct seq_file *m = (struct seq_file *)file->private_data;
+ long long retval = -EINVAL;
+
+ down(&m->sem);
+ switch (origin) {
+ case 1:
+ offset += file->f_pos;
+ case 0:
+ if (offset < 0)
+ break;
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ m->count = m->from = 0;
+ }
+ retval = offset;
+ }
+ up(&m->sem);
+ return retval;
+}
+
+/**
+ * seq_release - free the structures associated with sequential file.
+ * @file: file in question
+ * @inode: file->f_dentry->d_inode
+ *
+ * Frees the structures associated with sequential file; can be used
+ * as ->f_op->release() if you don't have private data to destroy.
+ */
+int seq_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = (struct seq_file *)file->private_data;
+ kfree(m->buf);
+ kfree(m);
+ return 0;
+}
+
+/**
+ * seq_escape - print string into buffer, escaping some characters
+ * @m: target buffer
+ * @s: string
+ * @esc: set of characters that need escaping
+ *
+ * Puts string into buffer, replacing each occurence of character from
+ * @esc with usual octal escape. Returns 0 in case of success, -1 - in
+ * case of overflow.
+ */
+int seq_escape(struct seq_file *m, const char *s, const char *esc)
+{
+ char *end = m->buf + m->size;
+ char *p;
+ char c;
+
+ for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) {
+ if (!strchr(esc, c)) {
+ *p++ = c;
+ continue;
+ }
+ if (p + 3 < end) {
+ *p++ = '\\';
+ *p++ = '0' + ((c & 0300) >> 6);
+ *p++ = '0' + ((c & 070) >> 3);
+ *p++ = '0' + (c & 07);
+ continue;
+ }
+ m->count = m->size;
+ return -1;
+ }
+ m->count = p - m->buf;
+ return 0;
+}
+
+int seq_printf(struct seq_file *m, const char *f, ...)
+{
+ va_list args;
+ int len;
+
+ if (m->count < m->size) {
+ va_start(args, f);
+ len = vsnprintf(m->buf + m->count, m->size - m->count, f, args);
+ va_end(args);
+ if (m->count + len < m->size) {
+ m->count += len;
+ return 0;
+ }
+ }
+ m->count = m->size;
+ return -1;
+}
diff -urN S13-pre3/include/linux/seq_file.h S13-pre3-seq/include/linux/seq_file.h
--- S13-pre3/include/linux/seq_file.h Wed Dec 31 19:00:00 1969
+++ S13-pre3-seq/include/linux/seq_file.h Mon Oct 15 20:24:21 2001
@@ -0,0 +1,65 @@
+#ifndef _LINUX_SEQ_FILE_H
+#define _LINUX_SEQ_FILE_H
+#ifdef __KERNEL__
+
+struct seq_operations;
+
+struct seq_file {
+ char *buf;
+ size_t size;
+ size_t from;
+ size_t count;
+ struct semaphore sem;
+ struct seq_operations *op;
+};
+
+struct seq_operations {
+ void * (*start) (struct seq_file *m);
+ void (*stop) (struct seq_file *m, void *v);
+ void * (*next) (struct seq_file *m, void *v);
+ int (*show) (struct seq_file *m, void *v);
+};
+
+int seq_open(struct file *, struct seq_operations *);
+ssize_t seq_read(struct file *, char *, size_t, loff_t *);
+loff_t seq_lseek(struct file *, loff_t, int);
+int seq_release(struct inode *, struct file *);
+
+static inline size_t seq_offs(struct seq_file *m)
+{
+ return m->count;
+}
+
+static inline void seq_unroll(struct seq_file *m, size_t offs)
+{
+ m->count = offs;
+}
+
+int seq_escape(struct seq_file *, const char *, const char *);
+
+static inline int seq_putc(struct seq_file *m, char c)
+{
+ if (m->count < m->size) {
+ m->buf[m->count++] = c;
+ return 0;
+ }
+ return -1;
+}
+
+static inline int seq_puts(struct seq_file *m, const char *s)
+{
+ int len = strlen(s);
+ if (m->count + len < m->size) {
+ memcpy(m->buf + m->count, s, len);
+ m->count += len;
+ return 0;
+ }
+ m->count = m->size;
+ return -1;
+}
+
+int seq_printf(struct seq_file *, const char *, ...)
+ __attribute__ ((format (printf,2,3)));
+
+#endif
+#endif
diff -urN S13-pre3/kernel/module.c S13-pre3-seq/kernel/module.c
--- S13-pre3/kernel/module.c Sun Sep 23 16:12:09 2001
+++ S13-pre3-seq/kernel/module.c Mon Oct 15 20:24:21 2001
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/kmod.h>
+#include <linux/seq_file.h>

/*
* Originally by Anonymous (as far as I know...)
@@ -1156,51 +1157,72 @@
* Called by the /proc file system to return a current list of ksyms.
*/

-int
-get_ksyms_list(char *buf, char **start, off_t offset, int length)
-{
+struct mod_sym {
struct module *mod;
- char *p = buf;
- int len = 0; /* code from net/ipv4/proc.c */
- off_t pos = 0;
- off_t begin = 0;
-
- for (mod = module_list; mod; mod = mod->next) {
- unsigned i;
- struct module_symbol *sym;
+ int index;
+};

- if (!MOD_CAN_QUERY(mod))
- continue;
+/* iterator */

- for (i = mod->nsyms, sym = mod->syms; i > 0; --i, ++sym) {
- p = buf + len;
- if (*mod->name) {
- len += sprintf(p, "%0*lx %s\t[%s]\n",
- (int)(2*sizeof(void*)),
- sym->value, sym->name,
- mod->name);
- } else {
- len += sprintf(p, "%0*lx %s\n",
- (int)(2*sizeof(void*)),
- sym->value, sym->name);
- }
- pos = begin + len;
- if (pos < offset) {
- len = 0;
- begin = pos;
- }
- pos = begin + len;
- if (pos > offset+length)
- goto leave_the_loop;
+static void *s_start(struct seq_file *m)
+{
+ struct mod_sym *p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return ERR_PTR(-ENOMEM);
+ lock_kernel();
+ p->mod = module_list;
+ p->index = 0;
+ return p;
+}
+
+static void *s_next(struct seq_file *m, void *p)
+{
+ struct mod_sym *v = p;
+ if (++v->index < v->mod->nsyms)
+ return p;
+ v->index = 0;
+ do {
+ v->mod = v->mod->next;
+ if (!v->mod) {
+ unlock_kernel();
+ kfree(p);
+ return NULL;
}
+ } while (!v->mod->nsyms);
+ return p;
+}
+
+static void s_stop(struct seq_file *m, void *p)
+{
+ if (p && !IS_ERR(p)) {
+ unlock_kernel();
+ kfree(p);
}
-leave_the_loop:
- *start = buf + (offset - begin);
- len -= (offset - begin);
- if (len > length)
- len = length;
- return len;
}
+
+static int s_show(struct seq_file *m, void *p)
+{
+ struct mod_sym *v = p;
+ struct module_symbol *sym;
+
+ if (!MOD_CAN_QUERY(v->mod))
+ return 0;
+ sym = &v->mod->syms[v->index];
+ if (*v->mod->name)
+ seq_printf(m, "%0*lx %s\t[%s]\n", (int)(2*sizeof(void*)),
+ sym->value, sym->name, v->mod->name);
+ else
+ seq_printf(m, "%0*lx %s\n", (int)(2*sizeof(void*)),
+ sym->value, sym->name);
+ return 0;
+}
+
+struct seq_operations ksyms_op = {
+ start: s_start,
+ next: s_next,
+ stop: s_stop,
+ show: s_show
+};

#else /* CONFIG_MODULES */

diff -urN S13-pre3/mm/filemap.c S13-pre3-seq/mm/filemap.c
--- S13-pre3/mm/filemap.c Mon Oct 15 20:12:20 2001
+++ S13-pre3-seq/mm/filemap.c Mon Oct 15 20:24:45 2001
@@ -2312,7 +2312,7 @@
unsigned long pgoff)
{
unsigned char present = 0;
- struct address_space * as = &vma->vm_file->f_dentry->d_inode->i_mapping;
+ struct address_space * as = vma->vm_file->f_dentry->d_inode->i_mapping;
struct page * page, ** hash = page_hash(as, pgoff);

spin_lock(&pagecache_lock);


2001-10-16 03:02:27

by Linus Torvalds

[permalink] [raw]
Subject: Re: [CFT][PATCH] large /proc/mounts and friends


On Mon, 15 Oct 2001, Alexander Viro wrote:
>
> See comments in fs/seq_file.c for description of interface - I
> hope they are clear enough.

Al, I understand why you'd like the seq interface, but quite frankly, I
would personally much prefer a different approach: namely making the file
position be a "structured" thing instead, and have (for example) the low
12 bits be the character index, and the upper 52 bits be various "field
indexes" depending on what file it is.

If you have a character sequence number, that means that you _always_ have
to re-generate the whole file up to the new read-point. That simply does
not scale. Sure, it works well enough when the user usually reads the
whole file, but it's still a silly design.

File positions do not have to be consecutive, especially for /proc files
that already confuse things like "less" by not having a well-defined
length etc.

You have 64 bits to play with, and you can pretty much organize them any
way you want. For example, for many things it might be:

- low 12 bits are "offset in entry string"
- next X bits are "hash index"
- next Y bits are "position on hash chain"

which tends to work pretty well with things that are hashed (it mounts,
sockets, etc) and that don't necessarily have a good cardinal ordering.

Can it get confused when people insert/remove entries at the same time we
read /proc? Sure. That's pretty much unavoidable with the /proc interface,
as we can't hold any locks across user-mode system calls. But using a
structured approach may make it _much_ more likely that the user doesn't
get data where a entry is cut off in the middle, though - especially if
you make the read routine be eager to return partial reads rather than
cutting things off in the middle..

(In other words: with a structured approach you can make guarantees about
the stability of each entry - you just can't necessarily guarantee that
all entries are shown or that some entries might not be duplicated..)

This approach is actually already used for some things - the "readdir()"
thing with "FIRST_PROCESS_ENTRY", for example. But also see a better
example in "proc_pid_read_maps()" with the high bits being the "line
number", and the low bits being the offset within the line.

Final note: another _extremely_ useful thing for performance is to have a
special "EOF" value for f_pos, because all normal applications end up
having to always do at least two reads: first to get the data (usually
the user buffer is larger than the amounf of data generated), the second
to just get the "0" for EOF. If the second read can be done without any
data generation or lock handling, that often speeds up /proc accesses by
a noticeable amount.

The special EOF value fits very well with the "structure" approach.

For example, it's quite common to know that each individual entry is
limited in size (with PAGE_SIZE being a nice common max size for any
entry), and the thing that makes the whole /proc file potentially large is
that there are many entries. I'd rather have that kind of helper routines:
a helper routine where there would be a "print out entry X" routine, and
then common routines to turn that "print out entry X" into a full
proc_xxx_read() function.

Al?

Linus


2001-10-16 03:45:03

by Alexander Viro

[permalink] [raw]
Subject: Re: [CFT][PATCH] large /proc/mounts and friends



On Mon, 15 Oct 2001, Linus Torvalds wrote:

>
> If you have a character sequence number, that means that you _always_ have
> to re-generate the whole file up to the new read-point. That simply does
> not scale. Sure, it works well enough when the user usually reads the
> whole file, but it's still a silly design.

I don't. ->f_pos is an entry number. That's it.

> Can it get confused when people insert/remove entries at the same time we
> read /proc? Sure. That's pretty much unavoidable with the /proc interface,
> as we can't hold any locks across user-mode system calls. But using a
> structured approach may make it _much_ more likely that the user doesn't
> get data where a entry is cut off in the middle, though - especially if
> you make the read routine be eager to return partial reads rather than
> cutting things off in the middle..

If seq_read() returns a partial entry, it still has its tail in m->buf.
Unless you lseek() away, next call will read from that buffer before
doing anything else.

> (In other words: with a structured approach you can make guarantees about
> the stability of each entry - you just can't necessarily guarantee that
> all entries are shown or that some entries might not be duplicated..)

Already done.

> Final note: another _extremely_ useful thing for performance is to have a
> special "EOF" value for f_pos, because all normal applications end up
> having to always do at least two reads: first to get the data (usually
> the user buffer is larger than the amounf of data generated), the second
> to just get the "0" for EOF. If the second read can be done without any
> data generation or lock handling, that often speeds up /proc accesses by
> a noticeable amount.

Umm... That makes sense and it's easy to do.

I suspect that you had misinterpreted the way seq_file.c works. Actually
it's a fairly simply datagram->byte-stream buffering. I _don't_ regenerate
the whole file on each read(). Walk the sequence - sure, but that's it.
Algorithm looks so:

if buffer is non-empty
read from it
if we are done - exit
/* buffer is empty now */
repeat:
get Nth entry and try to print it into buffer
if it doesn't exist or an error had happened - exit
if it didn't fit into buffer - expand and repeat
/* now we have one record in buffer */
while we have less than user asked
get next entry and try to print into buffer
if there is none or error had happened - exit
if it doesn't fit into buffer - end the loop
copy data to user, possibly leaving the tail of last entry in buffer

That's it. ->f_pos is advanced when we cross the record boundary. Yes, it
means that positions within a record are indistinguishable. But think what
can be done with them - we can't do any meaningful arithmetics on them
anyway, so the only things that make sense is "where are we now" and
"return where we had been". Both of them don't have resolution better
than a record.

IOW, I don't see the point in giving sub-record resolution. It can be
done - I can easily fit that into mechanism above, but I don't see
what would we win from that.

Notice that we always preserve integrity of record - if read() had grabbed
a part of it, the next one is guaranteed to pick the rest. No "but it's
gone, we don't know where to pick that tail" business and certainly no
"here's a tail of something" crap inherent to ->read_proc() use.

Comments? All this stuff is bog-standard buffering, nothing fancy...

2001-10-16 03:52:33

by Linus Torvalds

[permalink] [raw]
Subject: Re: [CFT][PATCH] large /proc/mounts and friends


On Mon, 15 Oct 2001, Linus Torvalds wrote:
>
> (In other words: with a structured approach you can make guarantees about
> the stability of each entry - you just can't necessarily guarantee that
> all entries are shown or that some entries might not be duplicated..)

Note that this can actually be important, with suid applications that
trust /proc. It is a GoodThing(tm) to have a read() that never returns
"mixed" output from different lines, ie even if a mount/umount happens in
parallel with reading /proc/mounts, you never get the filenames wrong..

Some stuff definitely wants more than 1 page per entry (/proc/mount
happens to be the only one I can think of - it can have the pathname
already be PAGE_SIZE-1, with the options being another PAGE_SIZE), so some
interface like

- "proc_read_data" data structure:

struct proc_read_data {
struct semaphore sem;
int (*fillme)(struct proc_read_data *);
unsigned long this_index;
unsigned long next_index;
unsigned int buffer_len;
char buffer[0];
};

- allocate it on /proc open, de-allocate it on close, save it away in
filp->f_private_data or whatever...

- read ends up looking something like


/* Max 4 pages per entry */
#define INDEX_SHIFT (PAGE_SHIFT + 2)
#define EOF_INDEX (0xffff)

..
/* We don't do 'pread()' */
if (pos != &file->f_pos)
return -EPIPE;
struct proc_read_data *prd = file->f_private_data;
down(&prd->sem);
index = file->f_pos >> INDEX_SHIFT;
offset = file->f_pos & ((1UL << INDEX_SHIFT)-1);
if (index == prd->this_index) {
repeat:
if (index == EOF_INDEX)
goto out;
/* copy the rest of the buffer.. */
if (offset < prd->buffer_len) {
int nr = prd->buffer_len - offset;
if (nr > count)
nr = count;
copy_to_user(buffer, prd->buffer + offset, nr);
offset += nr;
count -= nr;
if (!count)
goto out;
}

/* Jump to "next" */
index = prd->next_index;
}
offset = 0;
prd->this_index = index;
prd->fillme(prd);
goto repeat;

out:
file->f_pos = (index << INDEX_SHIFT) | offset;
up(&prd->sem);
return retval;

.. and that's it (except for "fillme()", which is obviously the hard part,
and which has to fill in not only the buffer with the data for the right
index, it also has to fill in "prd->next_index" and "prd->buffer_len".

Al, do you see any problems in this? I bet a lot of /proc files will fit
this model, and need only a fairly simple "fillme()" function..

Also note that because we cache _one_ entry, we absolutely _guarantee_
that a user that just does consecutive "read()" calls will never _ever_
see inconsistent lines, regardless of what his size of the read buffer is.
And if you use "lseek()", it will work as expected within reason (trivial
caution: "fillme()" has to be careful to not trip on bogus indexes, but
return an error or zero and just set next_index to something sane).

Linus

2001-10-16 04:03:44

by Alexander Viro

[permalink] [raw]
Subject: Re: [CFT][PATCH] large /proc/mounts and friends



On Mon, 15 Oct 2001, Linus Torvalds wrote:

>
> On Mon, 15 Oct 2001, Linus Torvalds wrote:
> >
> > (In other words: with a structured approach you can make guarantees about
> > the stability of each entry - you just can't necessarily guarantee that
> > all entries are shown or that some entries might not be duplicated..)
>
> Note that this can actually be important, with suid applications that
> trust /proc. It is a GoodThing(tm) to have a read() that never returns
> "mixed" output from different lines, ie even if a mount/umount happens in
> parallel with reading /proc/mounts, you never get the filenames wrong..

Already done, and yes, reasons were precisely what you've mentioned.

> Some stuff definitely wants more than 1 page per entry (/proc/mount
> happens to be the only one I can think of - it can have the pathname
> already be PAGE_SIZE-1, with the options being another PAGE_SIZE), so some
> interface like

Also handled - we expand the buffer if needed.

> - "proc_read_data" data structure:
>
> struct proc_read_data {
> struct semaphore sem;
> int (*fillme)(struct proc_read_data *);
> unsigned long this_index;
> unsigned long next_index;
> unsigned int buffer_len;
> char buffer[0];
> };

Bingo. Except that I do separate allocation of buffer.

> - allocate it on /proc open, de-allocate it on close, save it away in
> filp->f_private_data or whatever...

Exactly, except that there's no reason to limit it to procfs.

> .. and that's it (except for "fillme()", which is obviously the hard part,
> and which has to fill in not only the buffer with the data for the right
> index, it also has to fill in "prd->next_index" and "prd->buffer_len".
>
> Al, do you see any problems in this? I bet a lot of /proc files will fit
> this model, and need only a fairly simple "fillme()" function..

It's _very_ close to what I've done.

> Also note that because we cache _one_ entry, we absolutely _guarantee_
> that a user that just does consecutive "read()" calls will never _ever_
> see inconsistent lines, regardless of what his size of the read buffer is.

Right. We should never leave more than one entry in buffer - we have every
right to try and fill several, as long as we know that all but the last one
will be immediately eaten.

Check the previous mail I've sent - it contains pretty straightforward
pseudocode for seq_read(). Aside of the fact that seq_read() simply
doesn't bother with sub-record resolution, it's pretty close to your
function.

BTW, I've missed check for pread() - good thing that you've mentioned
it in your variant...

2001-10-16 04:01:44

by Linus Torvalds

[permalink] [raw]
Subject: Re: [CFT][PATCH] large /proc/mounts and friends


On Mon, 15 Oct 2001, Alexander Viro wrote:
>
> I don't. ->f_pos is an entry number. That's it.

Ahh, ok, I did indeed misread your code. Fair enough, then that's pretty
much equivalent to what I was asking for.

The reason I like sub-positions is that I worry that some application does
an lseek() to a position it already held earlier.

But you're probably right that it doesn't really matter, and as we really
have "pipe" semantics we might as well dis-allow any lseek except to the
beginning (I know that there have been apps out there that avoid
re-opening /proc files by lseek'ing to zero and re-reading - they may not
be common enough to matter, though).

Ok, I'll re-read your patch with this in mind. But it sounds like I'm
going to approve of it with this background...

Linus


2001-10-16 04:08:35

by Alexander Viro

[permalink] [raw]
Subject: Re: [CFT][PATCH] large /proc/mounts and friends



On Mon, 15 Oct 2001, Linus Torvalds wrote:

> Ok, I'll re-read your patch with this in mind. But it sounds like I'm
> going to approve of it with this background...

Two points:
a) seq_offs() and seq_unroll() are gone - they were rudiments of
earlier code; not used and not needed in the variant I've sent.
b) I've missed the check for pread() attempts. Fixed.

PS: latency sucks...

2001-10-16 04:10:25

by Anton Blanchard

[permalink] [raw]
Subject: Re: [CFT][PATCH] large /proc/mounts and friends


Hi,

> Folks, patch below adds a new way to handle large files on procfs.
> I've done that for /proc/mounts and /proc/ksyms, but the same trick can
> be pulled for anything else. The basic idea: if file consists of records
> generated by some sequence of objects, we can just describe an iterator and
> use precanned functions to turn it into a file. That (and helpers for
> formatting) is done in fs/seq_file.c and include/linux/seq_file.h.

Ahh good I was just going to fix /proc/stat and /proc/interrupts; given
enough interrupts and cpus they both overflow 1 page and cause random
memory corruption.

Anton

2001-10-16 04:30:27

by Linus Torvalds

[permalink] [raw]
Subject: Re: [CFT][PATCH] large /proc/mounts and friends


On Tue, 16 Oct 2001, Alexander Viro wrote:
> >
> > Al, do you see any problems in this? I bet a lot of /proc files will fit
> > this model, and need only a fairly simple "fillme()" function..
>
> It's _very_ close to what I've done.

Ok, now that I read it right, I agree. And not wasting the low bits of the
position should allows some sparse indexing..

One comment: I really think your "lseek()+read()" interaction is fairly
disgusting, though. the

for (p=m->op->start(m), i=0; i<pos; p=m->op->next(m, p), i++) {
...

thing makes it impossible to "jump" to the right location directly, which
might be perfectly sane and easy for many uses. Maybe simply through a
simple "seek()" interface (that could have a fall-back with the "one entry
at a time" loop using the "next" interface if you think that's going to be
common), and a flag that says "lseek has happened".

So you'd have the start be something like

p = m->op->start(m);
if (m->did_lseek) {
m->did_lseek = 0;
p = m->op->seek(m, pos);
}
p = m->op->next(m, p);

instead of that for-loop..

Linus

2001-10-16 04:42:57

by Alexander Viro

[permalink] [raw]
Subject: Re: [CFT][PATCH] large /proc/mounts and friends



On Mon, 15 Oct 2001, Linus Torvalds wrote:

> So you'd have the start be something like
>
> p = m->op->start(m);
> if (m->did_lseek) {
> m->did_lseek = 0;
> p = m->op->seek(m, pos);
> }
> p = m->op->next(m, p);
>
> instead of that for-loop..

Umm... That assumes that we actually can keep state. Neither /proc/mounts
nor /proc/ksyms can do that (well, /proc/mounts can - at the price of really
dirty trick; we can insert a dummy element into vfsmntlist and use it as a
cursor, but I'd rather Not Go There(tm)).

However, having ->seek() (with default being a loop) makes sense. I'll
play with that area and try to get a decent API - I understand what you
want, but there are several other issues I'd like to deal with. I suspect
that ->start() semantics needs to be changed a bit...

2001-10-16 04:42:27

by Linus Torvalds

[permalink] [raw]
Subject: Re: [CFT][PATCH] large /proc/mounts and friends


On Mon, 15 Oct 2001, Linus Torvalds wrote:
>
> So you'd have the start be something like
>
> p = m->op->start(m);
> if (m->did_lseek) {
> m->did_lseek = 0;
> p = m->op->seek(m, pos);
> }
> p = m->op->next(m, p);

Actually, the "seek" is obviously unconditional. Duh.

And we really want to pass in the "file" to the next/seek functions,
because I still think that we don't want this to be just a plain linear
sequence interface - your "/proc/module" thing already shows how it could
be advantageous to have the high pos bits be the module itself.

Example:

/* module seek */
s_seek()
{
struct mod_sym *v = p;
int mod_nr = pos >> 32;
while (mod_nr && v->mod) {
mod_nr--;
v->mod = v->mod->next;
};
}

where the high 32 bits of the pos are the module count. Something that the
"increment by one" approach simply cannot handle efficiently because it
ends up having to walk every module name, and can't just skip over them.

Again, the other example of this are various hashed data-structures loke
sockets that simply do not _have_ cardinal numbers, and where it is not
really reasonable to walk the chain of (potentially tens of thousands of)
entries one-by-one, when you can do a seek directly to the right hash
bucket, and then just walk a few entries in the hash chain..

Linus

2001-10-16 05:16:47

by Keith Owens

[permalink] [raw]
Subject: Re: [CFT][PATCH] large /proc/mounts and friends

On Mon, 15 Oct 2001 21:41:49 -0700 (PDT),
Linus Torvalds <[email protected]> wrote:
> s_seek()
> {
> struct mod_sym *v = p;
> int mod_nr = pos >> 32;
> while (mod_nr && v->mod) {
> mod_nr--;
> v->mod = v->mod->next;
> };
> }

If a module is deleted between calls to s_seek() and that deletion is
before mod_nr then the caller has seen the deleted module but a later
module will transiently disappear. I don't see how counting on a
linked list which is subject to deletion at any point can deliver
reliable results. Seeing the old module is wrong but acceptable. Not
seeing a module that still exists is wrong.

2001-10-16 09:58:05

by Chris Wedgwood

[permalink] [raw]
Subject: Re: [CFT][PATCH] large /proc/mounts and friends

On Mon, Oct 15, 2001 at 09:01:17PM -0700, Linus Torvalds wrote:

But you're probably right that it doesn't really matter, and as we
really have "pipe" semantics we might as well dis-allow any lseek
except to the beginning (I know that there have been apps out
there that avoid re-opening /proc files by lseek'ing to zero and
re-reading - they may not be common enough to matter, though).

I always wondered why for a number of /proc entries that aren't really
files why we don't simply expose them as pipes as opposed to
zero-length files? Surely that will confuse fewer user-land programs
as well and feeling more technically correct?



--cw

2001-10-17 02:09:04

by Alexander Viro

[permalink] [raw]
Subject: Re: [CFT][PATCH] large /proc/mounts and friends



On Mon, 15 Oct 2001, Linus Torvalds wrote:

> Actually, the "seek" is obviously unconditional. Duh.
>
> And we really want to pass in the "file" to the next/seek functions,

I doubt it. First of all, we don't really need a new method.
Now it looks so:

pos = *ppos;
p = m->op->start(m, &pos);

and
loff_t next = pos;
p = m->op->next(m, p, &next);
/* do stuff, possibly exit the loop */
pos = next;

(and assignements _are_ needed if we don't want it sequential, since we
want to able to bail out if next record won't fit into buffer).

As for passing the file to it... Not really, since we
a) want to return new position
b) _don't_ want to screw ->f_pos, since we may need to leave it at
the old value
c) don't have any other interesting fields to look at.

New variant of patch follows. Again, that's in "it works here" department -
it _does_ need mor testing.

It's against 2.4.13-pre3 + fixes I've sent (minor cleanups in fs/namespace.c +
couple of fixes in places that are not affected by this patch).

Comments (and help with testing) are welcome...

diff -urN S13-pre3-fixes/fs/Makefile S13-pre3-seq/fs/Makefile
--- S13-pre3-fixes/fs/Makefile Tue Oct 9 21:47:26 2001
+++ S13-pre3-seq/fs/Makefile Tue Oct 16 00:10:00 2001
@@ -14,7 +14,7 @@
super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \
fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \
dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \
- filesystems.o namespace.o
+ filesystems.o namespace.o seq_file.o

ifeq ($(CONFIG_QUOTA),y)
obj-y += dquot.o
diff -urN S13-pre3-fixes/fs/namespace.c S13-pre3-seq/fs/namespace.c
--- S13-pre3-fixes/fs/namespace.c Mon Oct 15 22:22:19 2001
+++ S13-pre3-seq/fs/namespace.c Tue Oct 16 21:52:35 2001
@@ -22,6 +22,7 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
#include <linux/nfs_mount.h>
+#include <linux/seq_file.h>

struct vfsmount *do_kern_mount(char *type, int flags, char *name, void *data);
int do_remount_sb(struct super_block *sb, int flags, void * data);
@@ -167,159 +168,131 @@
kill_super(sb);
}

-/* Use octal escapes, like mount does, for embedded spaces etc. */
-static unsigned char need_escaping[] = { ' ', '\t', '\n', '\\' };
+/* iterator */
+static void *m_start(struct seq_file *m, loff_t *pos)
+{
+ struct list_head *p;
+ loff_t n = *pos;

-static int
-mangle(const unsigned char *s, char *buf, int len) {
- char *sp;
- int n;
-
- sp = buf;
- while(*s && sp-buf < len-3) {
- for (n = 0; n < sizeof(need_escaping); n++) {
- if (*s == need_escaping[n]) {
- *sp++ = '\\';
- *sp++ = '0' + ((*s & 0300) >> 6);
- *sp++ = '0' + ((*s & 070) >> 3);
- *sp++ = '0' + (*s & 07);
- goto next;
- }
- }
- *sp++ = *s;
- next:
- s++;
- }
- return sp - buf; /* no trailing NUL */
-}
-
-static struct proc_fs_info {
- int flag;
- char *str;
-} fs_info[] = {
- { MS_SYNCHRONOUS, ",sync" },
- { MS_MANDLOCK, ",mand" },
- { MS_NOATIME, ",noatime" },
- { MS_NODIRATIME, ",nodiratime" },
- { 0, NULL }
-};
+ down(&mount_sem);
+ list_for_each(p, &vfsmntlist)
+ if (!n--)
+ return list_entry(p, struct vfsmount, mnt_list);
+ return NULL;
+}

-static struct proc_fs_info mnt_info[] = {
- { MNT_NOSUID, ",nosuid" },
- { MNT_NODEV, ",nodev" },
- { MNT_NOEXEC, ",noexec" },
- { 0, NULL }
-};
+static void *m_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct list_head *p = ((struct vfsmount *)v)->mnt_list.next;
+ (*pos)++;
+ return p==&vfsmntlist ? NULL : list_entry(p, struct vfsmount, mnt_list);
+}

-static struct proc_nfs_info {
- int flag;
- char *str;
- char *nostr;
-} nfs_info[] = {
- { NFS_MOUNT_SOFT, ",soft", ",hard" },
- { NFS_MOUNT_INTR, ",intr", "" },
- { NFS_MOUNT_POSIX, ",posix", "" },
- { NFS_MOUNT_TCP, ",tcp", ",udp" },
- { NFS_MOUNT_NOCTO, ",nocto", "" },
- { NFS_MOUNT_NOAC, ",noac", "" },
- { NFS_MOUNT_NONLM, ",nolock", ",lock" },
- { NFS_MOUNT_BROKEN_SUID, ",broken_suid", "" },
- { 0, NULL, NULL }
-};
+static void m_stop(struct seq_file *m, void *v)
+{
+ up(&mount_sem);
+}

-int get_filesystem_info( char *buf )
+static inline void mangle(struct seq_file *m, const char *s)
{
- struct list_head *p;
- struct proc_fs_info *fs_infop;
+ seq_escape(m, s, " \t\n\\");
+}
+
+static void show_nfs_mount(struct seq_file *m, struct vfsmount *mnt)
+{
+ static struct proc_nfs_info {
+ int flag;
+ char *str;
+ char *nostr;
+ } nfs_info[] = {
+ { NFS_MOUNT_SOFT, ",soft", ",hard" },
+ { NFS_MOUNT_INTR, ",intr", "" },
+ { NFS_MOUNT_POSIX, ",posix", "" },
+ { NFS_MOUNT_TCP, ",tcp", ",udp" },
+ { NFS_MOUNT_NOCTO, ",nocto", "" },
+ { NFS_MOUNT_NOAC, ",noac", "" },
+ { NFS_MOUNT_NONLM, ",nolock", ",lock" },
+ { NFS_MOUNT_BROKEN_SUID, ",broken_suid", "" },
+ { 0, NULL, NULL }
+ };
struct proc_nfs_info *nfs_infop;
- struct nfs_server *nfss;
- int len, prevlen;
- char *path, *buffer = (char *) __get_free_page(GFP_KERNEL);
-
- if (!buffer) return 0;
- len = prevlen = 0;
-
-#define FREEROOM ((int)PAGE_SIZE-200-len)
-#define MANGLE(s) len += mangle((s), buf+len, FREEROOM);
-
- for (p = vfsmntlist.next; p != &vfsmntlist; p = p->next) {
- struct vfsmount *tmp = list_entry(p, struct vfsmount, mnt_list);
- path = d_path(tmp->mnt_root, tmp, buffer, PAGE_SIZE);
- if (!path)
- continue;
- MANGLE(tmp->mnt_devname ? tmp->mnt_devname : "none");
- buf[len++] = ' ';
- MANGLE(path);
- buf[len++] = ' ';
- MANGLE(tmp->mnt_sb->s_type->name);
- len += sprintf(buf+len, " %s",
- tmp->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
- for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
- if (tmp->mnt_sb->s_flags & fs_infop->flag)
- MANGLE(fs_infop->str);
- }
- for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
- if (tmp->mnt_flags & fs_infop->flag)
- MANGLE(fs_infop->str);
- }
- if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
- nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
- len += sprintf(buf+len, ",v%d", nfss->rpc_ops->version);
-
- len += sprintf(buf+len, ",rsize=%d", nfss->rsize);
-
- len += sprintf(buf+len, ",wsize=%d", nfss->wsize);
-#if 0
- if (nfss->timeo != 7*HZ/10) {
- len += sprintf(buf+len, ",timeo=%d",
- nfss->timeo*10/HZ);
- }
- if (nfss->retrans != 3) {
- len += sprintf(buf+len, ",retrans=%d",
- nfss->retrans);
- }
-#endif
- if (nfss->acregmin != 3*HZ) {
- len += sprintf(buf+len, ",acregmin=%d",
- nfss->acregmin/HZ);
- }
- if (nfss->acregmax != 60*HZ) {
- len += sprintf(buf+len, ",acregmax=%d",
- nfss->acregmax/HZ);
- }
- if (nfss->acdirmin != 30*HZ) {
- len += sprintf(buf+len, ",acdirmin=%d",
- nfss->acdirmin/HZ);
- }
- if (nfss->acdirmax != 60*HZ) {
- len += sprintf(buf+len, ",acdirmax=%d",
- nfss->acdirmax/HZ);
- }
- for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
- char *str;
- if (nfss->flags & nfs_infop->flag)
- str = nfs_infop->str;
- else
- str = nfs_infop->nostr;
- MANGLE(str);
- }
- len += sprintf(buf+len, ",addr=");
- MANGLE(nfss->hostname);
- }
- len += sprintf(buf + len, " 0 0\n");
- if (FREEROOM <= 3) {
- len = prevlen;
- len += sprintf(buf+len, "# truncated\n");
- break;
- }
- prevlen = len;
+ struct nfs_server *nfss = &mnt->mnt_sb->u.nfs_sb.s_server;
+
+ seq_printf(m, ",v%d", nfss->rpc_ops->version);
+ seq_printf(m, ",rsize=%d", nfss->rsize);
+ seq_printf(m, ",wsize=%d", nfss->wsize);
+ if (nfss->acregmin != 3*HZ)
+ seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
+ if (nfss->acregmax != 60*HZ)
+ seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
+ if (nfss->acdirmin != 30*HZ)
+ seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
+ if (nfss->acdirmax != 60*HZ)
+ seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
+ for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
+ if (nfss->flags & nfs_infop->flag)
+ seq_puts(m, nfs_infop->str);
+ else
+ seq_puts(m, nfs_infop->nostr);
}
+ seq_puts(m, ",addr=");
+ mangle(m, nfss->hostname);
+}

- free_page((unsigned long) buffer);
- return len;
-#undef MANGLE
-#undef FREEROOM
+static int show_vfsmnt(struct seq_file *m, void *v)
+{
+ struct vfsmount *mnt = v;
+ static struct proc_fs_info {
+ int flag;
+ char *str;
+ } fs_info[] = {
+ { MS_SYNCHRONOUS, ",sync" },
+ { MS_MANDLOCK, ",mand" },
+ { MS_NOATIME, ",noatime" },
+ { MS_NODIRATIME, ",nodiratime" },
+ { 0, NULL }
+ };
+ static struct proc_fs_info mnt_info[] = {
+ { MNT_NOSUID, ",nosuid" },
+ { MNT_NODEV, ",nodev" },
+ { MNT_NOEXEC, ",noexec" },
+ { 0, NULL }
+ };
+ struct proc_fs_info *fs_infop;
+ char *path_buf, *path;
+
+ path_buf = (char *) __get_free_page(GFP_KERNEL);
+ if (!path_buf)
+ return -ENOMEM;
+ path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
+
+ mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
+ seq_putc(m, ' ');
+ mangle(m, path);
+ free_page((unsigned long) path_buf);
+ seq_putc(m, ' ');
+ mangle(m, mnt->mnt_sb->s_type->name);
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
+ for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
+ if (mnt->mnt_sb->s_flags & fs_infop->flag)
+ seq_puts(m, fs_infop->str);
+ }
+ for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
+ if (mnt->mnt_flags & fs_infop->flag)
+ seq_puts(m, fs_infop->str);
+ }
+ if (strcmp("nfs", mnt->mnt_sb->s_type->name) == 0)
+ show_nfs_mount(m, mnt);
+ seq_puts(m, " 0 0\n");
+ return 0;
}
+
+struct seq_operations mounts_op = {
+ start: m_start,
+ next: m_next,
+ stop: m_stop,
+ show: show_vfsmnt
+};

/*
* Doesn't take quota and stuff into account. IOW, in some cases it will
diff -urN S13-pre3-fixes/fs/proc/proc_misc.c S13-pre3-seq/fs/proc/proc_misc.c
--- S13-pre3-fixes/fs/proc/proc_misc.c Mon Oct 15 20:12:19 2001
+++ S13-pre3-seq/fs/proc/proc_misc.c Tue Oct 16 00:10:00 2001
@@ -35,6 +35,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/smp_lock.h>
+#include <linux/seq_file.h>

#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -57,12 +58,10 @@
#endif
#ifdef CONFIG_MODULES
extern int get_module_list(char *);
-extern int get_ksyms_list(char *, char **, off_t, int);
#endif
extern int get_device_list(char *);
extern int get_partition_list(char *, char **, off_t, int);
extern int get_filesystem_list(char *);
-extern int get_filesystem_info(char *);
extern int get_exec_domain_list(char *);
extern int get_irq_list(char *);
extern int get_dma_list(char *);
@@ -251,13 +250,17 @@
return proc_calc_metrics(page, start, off, count, eof, len);
}

-static int ksyms_read_proc(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+extern struct seq_operations ksyms_op;
+static int ksyms_open(struct inode *inode, struct file *file)
{
- int len = get_ksyms_list(page, start, off, count);
- if (len < count) *eof = 1;
- return len;
+ return seq_open(file, &ksyms_op);
}
+static struct file_operations proc_ksyms_operations = {
+ open: ksyms_open,
+ read: seq_read,
+ llseek: seq_lseek,
+ release: seq_release,
+};
#endif

static int kstat_read_proc(char *page, char **start, off_t off,
@@ -414,13 +417,6 @@
return len;
}

-static int mounts_read_proc(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len = get_filesystem_info(page);
- return proc_calc_metrics(page, start, off, count, eof, len);
-}
-
static int execdomains_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
@@ -505,6 +501,18 @@
write: write_profile,
};

+extern struct seq_operations mounts_op;
+static int mounts_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &mounts_op);
+}
+static struct file_operations proc_mounts_operations = {
+ open: mounts_open,
+ read: seq_read,
+ llseek: seq_lseek,
+ release: seq_release,
+};
+
struct proc_dir_entry *proc_root_kcore;

void __init proc_misc_init(void)
@@ -530,7 +538,6 @@
#endif
#ifdef CONFIG_MODULES
{"modules", modules_read_proc},
- {"ksyms", ksyms_read_proc},
#endif
{"stat", kstat_read_proc},
{"devices", devices_read_proc},
@@ -546,7 +553,6 @@
{"rtc", ds1286_read_proc},
#endif
{"locks", locks_read_proc},
- {"mounts", mounts_read_proc},
{"swaps", swaps_read_proc},
{"iomem", memory_read_proc},
{"execdomains", execdomains_read_proc},
@@ -559,6 +565,12 @@
entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
if (entry)
entry->proc_fops = &proc_kmsg_operations;
+ entry = create_proc_entry("mounts", 0, NULL);
+ if (entry)
+ entry->proc_fops = &proc_mounts_operations;
+ entry = create_proc_entry("ksyms", 0, NULL);
+ if (entry)
+ entry->proc_fops = &proc_ksyms_operations;
proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL);
if (proc_root_kcore) {
proc_root_kcore->proc_fops = &proc_kcore_operations;
diff -urN S13-pre3-fixes/fs/seq_file.c S13-pre3-seq/fs/seq_file.c
--- S13-pre3-fixes/fs/seq_file.c Wed Dec 31 19:00:00 1969
+++ S13-pre3-seq/fs/seq_file.c Tue Oct 16 21:36:17 2001
@@ -0,0 +1,236 @@
+/*
+ * linux/fs/seq_file.c
+ *
+ * helper functions for making syntetic files from sequences of records.
+ * initial implementation -- AV, Oct 2001.
+ */
+
+#include <linux/malloc.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+
+/**
+ * seq_open - initialize sequential file
+ * @file: file we initialize
+ * @op: method table describing the sequence
+ *
+ * seq_open() sets @file, associating it with a sequence described
+ * by @op. @op->start() sets the iterator up and returns the first
+ * element of sequence. @op->stop() shuts it down. @op->next()
+ * returns the next element of sequence. @op->show() prints element
+ * into the buffer. In case of error ->start() and ->next() return
+ * ERR_PTR(error). In the end of sequence they return %NULL. ->show()
+ * returns 0 in case of success and negative number in case of error.
+ */
+int seq_open(struct file *file, struct seq_operations *op)
+{
+ struct seq_file *p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ memset(p, 0, sizeof(*p));
+ sema_init(&p->sem, 1);
+ p->op = op;
+ file->private_data = p;
+ return 0;
+}
+
+/**
+ * seq_read - ->read() method for sequential files.
+ * @file, @buf, @size, @ppos: see file_operations method
+ *
+ * Ready-made ->f_op->read()
+ */
+ssize_t seq_read(struct file *file, char *buf, size_t size, loff_t *ppos)
+{
+ struct seq_file *m = (struct seq_file *)file->private_data;
+ size_t copied = 0;
+ loff_t pos;
+ size_t n;
+ void *p;
+ int err = 0;
+
+ if (ppos != &file->f_pos)
+ return -EPIPE;
+
+ down(&m->sem);
+ /* grab buffer if we didn't have one */
+ if (!m->buf) {
+ m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+ if (!m->buf)
+ goto Enomem;
+ }
+ /* if not empty - flush it first */
+ if (m->count) {
+ n = min(m->count, size);
+ err = copy_to_user(buf, m->buf + m->from, n);
+ if (err)
+ goto Efault;
+ m->count -= n;
+ m->from += n;
+ size -= n;
+ buf += n;
+ copied += n;
+ if (!m->count)
+ (*ppos)++;
+ if (!size)
+ goto Done;
+ }
+ /* we need at least one record in buffer */
+ while (1) {
+ pos = *ppos;
+ p = m->op->start(m, &pos);
+ err = PTR_ERR(p);
+ if (!p || IS_ERR(p))
+ break;
+ err = m->op->show(m, p);
+ if (err)
+ break;
+ if (m->count < m->size)
+ goto Fill;
+ m->op->stop(m, p);
+ kfree(m->buf);
+ m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+ if (!m->buf)
+ goto Enomem;
+ }
+ m->op->stop(m, p);
+ goto Done;
+Fill:
+ /* they want more? let's try to get some more */
+ while (m->count < size) {
+ size_t offs = m->count;
+ loff_t next = pos;
+ p = m->op->next(m, p, &next);
+ if (!p || IS_ERR(p)) {
+ err = PTR_ERR(p);
+ break;
+ }
+ err = m->op->show(m, p);
+ if (err || m->count == m->size) {
+ m->count = offs;
+ break;
+ }
+ pos = next;
+ }
+ m->op->stop(m, p);
+ n = min(m->count, size);
+ err = copy_to_user(buf, m->buf, n);
+ if (err)
+ goto Efault;
+ copied += n;
+ m->count -= n;
+ if (m->count)
+ m->from = n;
+ else
+ pos++;
+ *ppos = pos;
+Done:
+ if (!copied)
+ copied = err;
+ up(&m->sem);
+ return copied;
+Enomem:
+ err = -ENOMEM;
+ goto Done;
+Efault:
+ err = -EFAULT;
+ goto Done;
+}
+
+/**
+ * seq_lseek - ->llseek() method for sequential files.
+ * @file, @offset, @origin: see file_operations method
+ *
+ * Ready-made ->f_op->llseek()
+ */
+loff_t seq_lseek(struct file *file, loff_t offset, int origin)
+{
+ struct seq_file *m = (struct seq_file *)file->private_data;
+ long long retval = -EINVAL;
+
+ down(&m->sem);
+ switch (origin) {
+ case 1:
+ offset += file->f_pos;
+ case 0:
+ if (offset < 0)
+ break;
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ m->count = 0;
+ }
+ retval = offset;
+ }
+ up(&m->sem);
+ return retval;
+}
+
+/**
+ * seq_release - free the structures associated with sequential file.
+ * @file: file in question
+ * @inode: file->f_dentry->d_inode
+ *
+ * Frees the structures associated with sequential file; can be used
+ * as ->f_op->release() if you don't have private data to destroy.
+ */
+int seq_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = (struct seq_file *)file->private_data;
+ kfree(m->buf);
+ kfree(m);
+ return 0;
+}
+
+/**
+ * seq_escape - print string into buffer, escaping some characters
+ * @m: target buffer
+ * @s: string
+ * @esc: set of characters that need escaping
+ *
+ * Puts string into buffer, replacing each occurence of character from
+ * @esc with usual octal escape. Returns 0 in case of success, -1 - in
+ * case of overflow.
+ */
+int seq_escape(struct seq_file *m, const char *s, const char *esc)
+{
+ char *end = m->buf + m->size;
+ char *p;
+ char c;
+
+ for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) {
+ if (!strchr(esc, c)) {
+ *p++ = c;
+ continue;
+ }
+ if (p + 3 < end) {
+ *p++ = '\\';
+ *p++ = '0' + ((c & 0300) >> 6);
+ *p++ = '0' + ((c & 070) >> 3);
+ *p++ = '0' + (c & 07);
+ continue;
+ }
+ m->count = m->size;
+ return -1;
+ }
+ m->count = p - m->buf;
+ return 0;
+}
+
+int seq_printf(struct seq_file *m, const char *f, ...)
+{
+ va_list args;
+ int len;
+
+ if (m->count < m->size) {
+ va_start(args, f);
+ len = vsnprintf(m->buf + m->count, m->size - m->count, f, args);
+ va_end(args);
+ if (m->count + len < m->size) {
+ m->count += len;
+ return 0;
+ }
+ }
+ m->count = m->size;
+ return -1;
+}
diff -urN S13-pre3-fixes/include/linux/seq_file.h S13-pre3-seq/include/linux/seq_file.h
--- S13-pre3-fixes/include/linux/seq_file.h Wed Dec 31 19:00:00 1969
+++ S13-pre3-seq/include/linux/seq_file.h Tue Oct 16 20:33:50 2001
@@ -0,0 +1,55 @@
+#ifndef _LINUX_SEQ_FILE_H
+#define _LINUX_SEQ_FILE_H
+#ifdef __KERNEL__
+
+struct seq_operations;
+
+struct seq_file {
+ char *buf;
+ size_t size;
+ size_t from;
+ size_t count;
+ loff_t index;
+ struct semaphore sem;
+ struct seq_operations *op;
+};
+
+struct seq_operations {
+ void * (*start) (struct seq_file *m, loff_t *pos);
+ void (*stop) (struct seq_file *m, void *v);
+ void * (*next) (struct seq_file *m, void *v, loff_t *pos);
+ int (*show) (struct seq_file *m, void *v);
+};
+
+int seq_open(struct file *, struct seq_operations *);
+ssize_t seq_read(struct file *, char *, size_t, loff_t *);
+loff_t seq_lseek(struct file *, loff_t, int);
+int seq_release(struct inode *, struct file *);
+int seq_escape(struct seq_file *, const char *, const char *);
+
+static inline int seq_putc(struct seq_file *m, char c)
+{
+ if (m->count < m->size) {
+ m->buf[m->count++] = c;
+ return 0;
+ }
+ return -1;
+}
+
+static inline int seq_puts(struct seq_file *m, const char *s)
+{
+ int len = strlen(s);
+ if (m->count + len < m->size) {
+ memcpy(m->buf + m->count, s, len);
+ m->count += len;
+ return 0;
+ }
+ m->count = m->size;
+ return -1;
+}
+
+int seq_printf(struct seq_file *, const char *, ...)
+ __attribute__ ((format (printf,2,3)));
+
+#endif
+#endif
diff -urN S13-pre3-fixes/kernel/module.c S13-pre3-seq/kernel/module.c
--- S13-pre3-fixes/kernel/module.c Sun Sep 23 16:12:09 2001
+++ S13-pre3-seq/kernel/module.c Tue Oct 16 21:46:26 2001
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/kmod.h>
+#include <linux/seq_file.h>

/*
* Originally by Anonymous (as far as I know...)
@@ -1156,51 +1157,83 @@
* Called by the /proc file system to return a current list of ksyms.
*/

-int
-get_ksyms_list(char *buf, char **start, off_t offset, int length)
-{
+struct mod_sym {
struct module *mod;
- char *p = buf;
- int len = 0; /* code from net/ipv4/proc.c */
- off_t pos = 0;
- off_t begin = 0;
-
- for (mod = module_list; mod; mod = mod->next) {
- unsigned i;
- struct module_symbol *sym;
+ int index;
+};

- if (!MOD_CAN_QUERY(mod))
- continue;
+/* iterator */

- for (i = mod->nsyms, sym = mod->syms; i > 0; --i, ++sym) {
- p = buf + len;
- if (*mod->name) {
- len += sprintf(p, "%0*lx %s\t[%s]\n",
- (int)(2*sizeof(void*)),
- sym->value, sym->name,
- mod->name);
- } else {
- len += sprintf(p, "%0*lx %s\n",
- (int)(2*sizeof(void*)),
- sym->value, sym->name);
- }
- pos = begin + len;
- if (pos < offset) {
- len = 0;
- begin = pos;
- }
- pos = begin + len;
- if (pos > offset+length)
- goto leave_the_loop;
+static void *s_start(struct seq_file *m, loff_t *pos)
+{
+ struct mod_sym *p = kmalloc(sizeof(*p), GFP_KERNEL);
+ struct module *v;
+ loff_t n = *pos;
+
+ if (!p)
+ return ERR_PTR(-ENOMEM);
+ lock_kernel();
+ for (v = module_list, n = *pos; v; n -= v->nsyms, v = v->next) {
+ if (n < v->nsyms) {
+ p->mod = v;
+ p->index = n;
+ return p;
}
}
-leave_the_loop:
- *start = buf + (offset - begin);
- len -= (offset - begin);
- if (len > length)
- len = length;
- return len;
+ unlock_kernel();
+ kfree(p);
+ return NULL;
+}
+
+static void *s_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ struct mod_sym *v = p;
+ (*pos)++;
+ if (++v->index >= v->mod->nsyms) {
+ do {
+ v->mod = v->mod->next;
+ if (!v->mod) {
+ unlock_kernel();
+ kfree(p);
+ return NULL;
+ }
+ } while (!v->mod->nsyms);
+ v->index = 0;
+ }
+ return p;
+}
+
+static void s_stop(struct seq_file *m, void *p)
+{
+ if (p && !IS_ERR(p)) {
+ unlock_kernel();
+ kfree(p);
+ }
+}
+
+static int s_show(struct seq_file *m, void *p)
+{
+ struct mod_sym *v = p;
+ struct module_symbol *sym;
+
+ if (!MOD_CAN_QUERY(v->mod))
+ return 0;
+ sym = &v->mod->syms[v->index];
+ if (*v->mod->name)
+ seq_printf(m, "%0*lx %s\t[%s]\n", (int)(2*sizeof(void*)),
+ sym->value, sym->name, v->mod->name);
+ else
+ seq_printf(m, "%0*lx %s\n", (int)(2*sizeof(void*)),
+ sym->value, sym->name);
+ return 0;
}
+
+struct seq_operations ksyms_op = {
+ start: s_start,
+ next: s_next,
+ stop: s_stop,
+ show: s_show
+};

#else /* CONFIG_MODULES */