Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S261852AbTENKbV (ORCPT ); Wed, 14 May 2003 06:31:21 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S261851AbTENKbV (ORCPT ); Wed, 14 May 2003 06:31:21 -0400 Received: from pub237.cambridge.redhat.com ([213.86.99.237]:37107 "EHLO warthog.warthog") by vger.kernel.org with ESMTP id S261847AbTENKa4 (ORCPT ); Wed, 14 May 2003 06:30:56 -0400 From: David Howells To: torvalds@transmeta.com cc: dhowells@redhat.com, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, openafs-devel@openafs.org Subject: [PATCH] PAG support, try #2 User-Agent: EMH/1.14.1 SEMI/1.14.4 (Hosorogi) FLIM/1.14.4 (=?ISO-8859-4?Q?Kashiharajing=FE-mae?=) APEL/10.4 Emacs/21.2 (i386-redhat-linux-gnu) MULE/5.0 (SAKAKI) MIME-Version: 1.0 (generated by SEMI 1.14.4 - "Hosorogi") Content-Type: text/plain; charset=US-ASCII Date: Wed, 14 May 2003 11:43:31 +0100 Message-ID: <24225.1052909011@warthog.warthog> Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 17450 Lines: 632 Hi Linus, Here's a revised patch for adding PAG support that incorporates suggestions and corrections I've been sent. David diff -uNr linux-2.5.69/arch/i386/kernel/entry.S linux-2.5.69-pag/arch/i386/kernel/entry.S --- linux-2.5.69/arch/i386/kernel/entry.S 2003-05-06 15:06:47.000000000 +0100 +++ linux-2.5.69-pag/arch/i386/kernel/entry.S 2003-05-14 10:36:24.000000000 +0100 @@ -852,6 +852,7 @@ .long sys_clock_gettime /* 265 */ .long sys_clock_getres .long sys_clock_nanosleep - + .long sys_setpag + .long sys_getpag nr_syscalls=(.-sys_call_table)/4 diff -uNr linux-2.5.69/fs/file_table.c linux-2.5.69-pag/fs/file_table.c --- linux-2.5.69/fs/file_table.c 2003-05-06 15:04:45.000000000 +0100 +++ linux-2.5.69-pag/fs/file_table.c 2003-05-14 09:08:19.000000000 +0100 @@ -166,6 +166,7 @@ if (file->f_op && file->f_op->release) file->f_op->release(inode, file); security_file_free(file); + vfs_token_put(file->f_token); fops_put(file->f_op); if (file->f_mode & FMODE_WRITE) put_write_access(inode); diff -uNr linux-2.5.69/fs/proc/array.c linux-2.5.69-pag/fs/proc/array.c --- linux-2.5.69/fs/proc/array.c 2003-05-06 15:07:08.000000000 +0100 +++ linux-2.5.69-pag/fs/proc/array.c 2003-05-13 10:58:56.000000000 +0100 @@ -154,13 +154,14 @@ read_lock(&tasklist_lock); buffer += sprintf(buffer, "State:\t%s\n" + "Pag:\t%d\n" "Tgid:\t%d\n" "Pid:\t%d\n" "PPid:\t%d\n" "TracerPid:\t%d\n" "Uid:\t%d\t%d\t%d\t%d\n" "Gid:\t%d\t%d\t%d\t%d\n", - get_task_state(p), p->tgid, + get_task_state(p), p->vfspag ? p->vfspag->pag : 0, p->tgid, p->pid, p->pid ? p->real_parent->pid : 0, p->pid && p->ptrace ? p->parent->pid : 0, p->uid, p->euid, p->suid, p->fsuid, diff -uNr linux-2.5.69/include/asm-i386/posix_types.h linux-2.5.69-pag/include/asm-i386/posix_types.h --- linux-2.5.69/include/asm-i386/posix_types.h 2003-05-06 15:04:37.000000000 +0100 +++ linux-2.5.69-pag/include/asm-i386/posix_types.h 2003-05-12 10:19:15.000000000 +0100 @@ -13,6 +13,7 @@ typedef unsigned short __kernel_nlink_t; typedef long __kernel_off_t; typedef int __kernel_pid_t; +typedef int __kernel_pag_t; typedef unsigned short __kernel_ipc_pid_t; typedef unsigned short __kernel_uid_t; typedef unsigned short __kernel_gid_t; diff -uNr linux-2.5.69/include/asm-i386/unistd.h linux-2.5.69-pag/include/asm-i386/unistd.h --- linux-2.5.69/include/asm-i386/unistd.h 2003-05-06 15:04:37.000000000 +0100 +++ linux-2.5.69-pag/include/asm-i386/unistd.h 2003-05-13 10:47:59.000000000 +0100 @@ -273,8 +273,10 @@ #define __NR_clock_gettime (__NR_timer_create+6) #define __NR_clock_getres (__NR_timer_create+7) #define __NR_clock_nanosleep (__NR_timer_create+8) +#define __NR_setpag 268 +#define __NR_getpag 269 -#define NR_syscalls 268 +#define NR_syscalls 270 /* user-visible error numbers are in the range -1 - -124: see */ diff -uNr linux-2.5.69/include/linux/cred.h linux-2.5.69-pag/include/linux/cred.h --- linux-2.5.69/include/linux/cred.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.5.69-pag/include/linux/cred.h 2003-05-14 10:57:01.000000000 +0100 @@ -0,0 +1,87 @@ +#ifndef _LINUX_CRED_H +#define _LINUX_CRED_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include + +/* + * VFS session authentication token cache + * + * This is used to store the data required for extra levels of filesystem + * security (such as AFS/NFS kerberos keys, Samba workgroup/user/pass, or NTFS + * ACLs). + * + * VFS authentication tokens contain a single blob of data, consisting of three + * parts, all next to each other: + * (1) An FS name + * (2) A key + * (3) An arbitrary chunk of data + * + * Token blobs must not be changed once passed to the core kernel for + * management + */ +struct vfs_pag { + struct rb_node node; + atomic_t usage; + pag_t pag; /* Process Authentication Group ID */ + struct list_head tokens; /* authentication tokens */ + rwlock_t lock; +}; + +struct vfs_token { + atomic_t usage; + struct list_head link; /* link in pag's list */ + unsigned short k_off; /* offset of key in blob */ + unsigned short d_off; /* offset of data in blob */ + size_t size; /* size of blob */ + void *blob; /* blob containing key + data */ +}; + +extern pag_t vfs_join_pag(pag_t pag); +extern pag_t vfs_leave_pag(void); +extern pag_t vfs_new_pag(void); +extern long sys_setpag(pag_t); +extern long sys_getpag(void); +extern void vfs_unpag(const char *fsname); + +extern void vfs_pag_put(struct vfs_pag *); + +static inline struct vfs_pag *vfs_pag_get(struct vfs_pag *vfspag) +{ + atomic_inc(&vfspag->usage); + return vfspag; +} + +static inline int is_vfs_token_valid(struct vfs_token *vtoken) +{ + return !list_empty(&vtoken->link); +} + +extern int vfs_pag_add_token(const char *fsname, + unsigned short klen, + const void *key, + size_t dlen, + const void *data, + struct vfs_token **_token); + +extern struct vfs_token *vfs_pag_find_token(const char *fsname, + unsigned short klen, + const void *key); + +extern void vfs_pag_withdraw_token(struct vfs_token *vtoken); + +static inline struct vfs_token *vfs_token_get(struct vfs_token *vtoken) +{ + atomic_inc(&vtoken->usage); + return vtoken; +} + +extern void vfs_token_put(struct vfs_token *vtoken); + +#endif /* __KERNEL__ */ +#endif /* _LINUX_CRED_H */ diff -uNr linux-2.5.69/include/linux/fs.h linux-2.5.69-pag/include/linux/fs.h --- linux-2.5.69/include/linux/fs.h 2003-05-13 11:02:22.000000000 +0100 +++ linux-2.5.69-pag/include/linux/fs.h 2003-05-13 11:02:35.000000000 +0100 @@ -430,6 +430,7 @@ mode_t f_mode; loff_t f_pos; struct fown_struct f_owner; + struct vfs_token *f_token; /* governing credential */ unsigned int f_uid, f_gid; int f_error; struct file_ra_state f_ra; diff -uNr linux-2.5.69/include/linux/sched.h linux-2.5.69-pag/include/linux/sched.h --- linux-2.5.69/include/linux/sched.h 2003-05-06 15:07:12.000000000 +0100 +++ linux-2.5.69-pag/include/linux/sched.h 2003-05-13 10:29:18.000000000 +0100 @@ -28,6 +28,7 @@ #include #include #include +#include struct exec_domain; @@ -387,6 +388,7 @@ gid_t gid,egid,sgid,fsgid; int ngroups; gid_t groups[NGROUPS]; + struct vfs_pag *vfspag; kernel_cap_t cap_effective, cap_inheritable, cap_permitted; int keep_capabilities:1; struct user_struct *user; diff -uNr linux-2.5.69/include/linux/types.h linux-2.5.69-pag/include/linux/types.h --- linux-2.5.69/include/linux/types.h 2003-05-06 15:04:31.000000000 +0100 +++ linux-2.5.69-pag/include/linux/types.h 2003-05-12 10:19:08.000000000 +0100 @@ -24,6 +24,7 @@ typedef __kernel_nlink_t nlink_t; typedef __kernel_off_t off_t; typedef __kernel_pid_t pid_t; +typedef __kernel_pag_t pag_t; typedef __kernel_daddr_t daddr_t; typedef __kernel_key_t key_t; typedef __kernel_suseconds_t suseconds_t; diff -uNr linux-2.5.69/init/main.c linux-2.5.69-pag/init/main.c --- linux-2.5.69/init/main.c 2003-05-06 15:07:12.000000000 +0100 +++ linux-2.5.69-pag/init/main.c 2003-05-13 14:08:11.000000000 +0100 @@ -80,6 +80,7 @@ extern void pidhash_init(void); extern void pidmap_init(void); extern void pte_chain_init(void); +extern void credentials_init(void); extern void radix_tree_init(void); extern void free_initmem(void); extern void populate_rootfs(void); @@ -434,6 +435,7 @@ pidmap_init(); pgtable_cache_init(); pte_chain_init(); + credentials_init(); fork_init(num_physpages); proc_caches_init(); security_scaffolding_startup(); diff -uNr linux-2.5.69/kernel/cred.c linux-2.5.69-pag/kernel/cred.c --- linux-2.5.69/kernel/cred.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.5.69-pag/kernel/cred.c 2003-05-14 11:37:36.000000000 +0100 @@ -0,0 +1,369 @@ +/* cred.c: authentication credentials management + * + * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static kmem_cache_t *vfs_token_cache; +static kmem_cache_t *vfs_pag_cache; + +static struct rb_root vfs_pag_tree; +static spinlock_t vfs_pag_lock = SPIN_LOCK_UNLOCKED; +static pag_t vfs_pag_next = 1; + +static void vfs_pag_init_once(void *_vfspag, kmem_cache_t * cachep, + unsigned long flags) +{ + struct vfs_pag *vfspag = _vfspag; + + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + memset(vfspag, 0, sizeof(*vfspag)); + INIT_LIST_HEAD(&vfspag->tokens); + rwlock_init(&vfspag->lock); + } +} + +static void vfs_token_init_once(void *_vtoken, kmem_cache_t * cachep, + unsigned long flags) +{ + struct vfs_token *vtoken = _vtoken; + + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + memset(vtoken, 0, sizeof(*vtoken)); + INIT_LIST_HEAD(&vtoken->link); + } +} + +void __init credentials_init(void) +{ + vfs_pag_cache = kmem_cache_create("vfs_pag", sizeof(struct vfs_pag), + 0, 0, vfs_pag_init_once, NULL); + if (!vfs_pag_cache) + panic("Cannot create vfs pag SLAB cache"); + + vfs_token_cache = kmem_cache_create("vfs_token", + sizeof(struct vfs_token), + 0, 0, vfs_token_init_once, NULL); + if (!vfs_token_cache) + panic("Cannot create vfs token SLAB cache"); +} + +inline pag_t vfs_join_pag(pag_t pag) +{ + struct task_struct *tsk = current; + struct vfs_pag *vfspag, *xvfspag; + struct rb_node **p, *parent; + + if (tsk->vfspag && + tsk->vfspag->pag == pag) + return pag; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + spin_lock(&vfs_pag_lock); + + parent = NULL; + p = &vfs_pag_tree.rb_node; + + while (*p) { + parent = *p; + vfspag = rb_entry(parent, struct vfs_pag, node); + + if (pag < vfspag->pag) + p = &(*p)->rb_left; + else if (pag > vfspag->pag) + p = &(*p)->rb_right; + else + goto pag_found; + } + + spin_unlock(&vfs_pag_lock); + return -ENOENT; + + pag_found: + xvfspag = xchg(&tsk->vfspag, vfs_pag_get(vfspag)); + spin_unlock(&vfs_pag_lock); + + if (xvfspag) + vfs_pag_put(xvfspag); + return pag; +} + +inline pag_t vfs_leave_pag(void) +{ + struct vfs_pag *xvfspag; + + xvfspag = xchg(¤t->vfspag, NULL); + + vfs_pag_put(xvfspag); + return 0; +} + +inline pag_t vfs_new_pag(void) +{ + struct vfs_pag *vfspag, *xvfspag; + struct rb_node **p, *parent; + + vfspag = kmem_cache_alloc(vfs_pag_cache, SLAB_KERNEL); + if (!vfspag) + return -ENOMEM; + + atomic_set(&vfspag->usage, 1); + + spin_lock(&vfs_pag_lock); + + vfspag->pag = vfs_pag_next++; + if (vfspag->pag < 1) + vfspag->pag = 1; + + parent = NULL; + p = &vfs_pag_tree.rb_node; + + while (*p) { + parent = *p; + xvfspag = rb_entry(parent, struct vfs_pag, node); + + if (vfspag->pag < xvfspag->pag) + p = &(*p)->rb_left; + else if (vfspag->pag > xvfspag->pag) + p = &(*p)->rb_right; + else + goto pag_exists; + } + goto insert_here; + + /* we found a PAG of the same ID - walk the tree from that point + * looking for the next unused PAG */ + pag_exists: + for (;;) { + vfspag->pag = vfs_pag_next++; + if (vfspag->pag < 1) + vfspag->pag = 1; + + if (!parent->rb_parent) + p = &vfs_pag_tree.rb_node; + else if (parent->rb_parent->rb_left == parent) + p = &parent->rb_parent->rb_left; + else + p = &parent->rb_parent->rb_right; + + parent = rb_next(parent); + if (!parent) + break; + + xvfspag = rb_entry(parent, struct vfs_pag, node); + if (vfspag->pag < xvfspag->pag) + goto insert_here; + } + + insert_here: + rb_link_node(&vfspag->node, parent, p); + rb_insert_color(&vfspag->node, &vfs_pag_tree); + spin_unlock(&vfs_pag_lock); + + xvfspag = xchg(¤t->vfspag, vfspag); + if (xvfspag) + vfs_pag_put(xvfspag); + + return vfspag->pag; +} + +/* + * join an existing PAG (+ve), run without PAG (0), or create and join new PAG (-1) + * - PAG IDs must be +ve, >0 and unique + * - returns ID of PAG joined or 0 if now running without a PAG + */ +long sys_setpag(pag_t pag) +{ + if (pag > 0) return vfs_join_pag(pag); + else if (pag == 0) return vfs_leave_pag(); + else if (pag == -1) return vfs_new_pag(); + else return -EINVAL; +} + +/* + * get the PAG of the current process, or 0 if it doesn't have one + */ +long sys_getpag(void) +{ + struct vfs_pag *vfspag = current->vfspag; + + return vfspag ? vfspag->pag : 0; +} + +/* + * dispose of a VFS pag + */ +void vfs_pag_put(struct vfs_pag *vfspag) +{ + struct vfs_token *vtoken; + + if (vfspag && atomic_dec_and_lock(&vfspag->usage, &vfs_pag_lock)) { + rb_erase(&vfspag->node, &vfs_pag_tree); + spin_unlock(&vfs_pag_lock); + + while (!list_empty(&vfspag->tokens)) { + vtoken = + list_entry(vfspag->tokens.next, + struct vfs_token, link); + list_del_init(&vtoken->link); + vfs_token_put(vtoken); + } + + kmem_cache_free(vfs_pag_cache, vfspag); + } +} + +/* + * dispose of a VFS token + */ +void vfs_token_put(struct vfs_token *vtoken) +{ + if (vtoken && atomic_dec_and_test(&vtoken->usage)) { + kfree(vtoken->blob); + kmem_cache_free(vfs_pag_cache, vtoken); + } +} + +/* + * add an authentication token to a pag list + */ +int vfs_pag_add_token(const char *fsname, + unsigned short klen, + const void *key, + size_t dlen, + const void *data, + struct vfs_token **_vtoken) +{ + struct vfs_token *vtoken; + struct vfs_pag *vfspag = current->vfspag; + + *_vtoken = NULL; + + if (!vfspag) + return -EACCES; + + vtoken = kmem_cache_alloc(vfs_token_cache, SLAB_KERNEL); + if (!vtoken) + return -ENOMEM; + + vtoken->k_off = strlen(fsname) + 1; + vtoken->d_off = vtoken->k_off + klen; + vtoken->size = vtoken->d_off + dlen; + + vtoken->blob = kmalloc(vtoken->size, SLAB_KERNEL); + if (!vtoken->blob) { + kfree(vtoken); + return -ENOMEM; + } + + atomic_set(&vtoken->usage, 1); + + memcpy(vtoken->blob, fsname, vtoken->k_off); + memcpy(vtoken->blob + vtoken->k_off, key, klen); + memcpy(vtoken->blob + vtoken->d_off, key, dlen); + + write_lock(&vfspag->lock); + list_add_tail(&vtoken->link, &vfspag->tokens); + write_unlock(&vfspag->lock); + + *_vtoken = vtoken; + return 0; +} + +EXPORT_SYMBOL(vfs_pag_add_token); + +/* + * search for a token covering a particular filesystem key in the specified pag list + */ +struct vfs_token *vfs_pag_find_token(const char *fsname, + unsigned short klen, + const void *key) +{ + struct vfs_token *vtoken; + struct vfs_pag *vfspag = current->vfspag; + + if (!vfspag) + return NULL; + + read_lock(&vfspag->lock); + + list_for_each_entry(vtoken, &vfspag->tokens, link) { + if (vtoken->d_off - vtoken->k_off == klen && + strcmp(vtoken->blob, fsname) == 0 && + memcmp(vtoken->blob + vtoken->k_off, key, klen) == 0) + goto found; + } + + read_unlock(&vfspag->lock); + return NULL; + + found: + vfs_token_get(vtoken); + read_unlock(&vfspag->lock); + return vtoken; +} + +EXPORT_SYMBOL(vfs_pag_find_token); + +/* + * withdraw a token from a pag list + */ +void vfs_pag_withdraw_token(struct vfs_token *vtoken) +{ + struct vfs_pag *vfspag = current->vfspag; + + if (!vfspag) + return; + + write_lock(&vfspag->lock); + list_del_init(&vtoken->link); + write_unlock(&vfspag->lock); + + vfs_token_put(vtoken); +} + +EXPORT_SYMBOL(vfs_pag_withdraw_token); + +/* + * withdraw all tokens for the named filesystem from the current PAG + */ +void vfs_unpag(const char *fsname) +{ + struct list_head *_n, *_p; + struct vfs_token *vtoken; + struct vfs_pag *vfspag = current->vfspag; + + if (!vfspag) + return; + + write_lock(&vfspag->lock); + + list_for_each_safe(_p, _n, &vfspag->tokens) { + vtoken = list_entry(_p, struct vfs_token, link); + + if (strcmp(fsname, vtoken->blob) == 0) { + list_del_init(&vtoken->link); + vfs_token_put(vtoken); + } + } + + write_unlock(&vfspag->lock); +} diff -uNr linux-2.5.69/kernel/fork.c linux-2.5.69-pag/kernel/fork.c --- linux-2.5.69/kernel/fork.c 2003-05-06 15:07:12.000000000 +0100 +++ linux-2.5.69-pag/kernel/fork.c 2003-05-14 11:12:12.000000000 +0100 @@ -884,6 +884,10 @@ if (clone_flags & CLONE_CHILD_SETTID) p->set_child_tid = child_tidptr; + + if (p->vfspag) + vfs_pag_get(p->vfspag); + /* * Clear TID on mm_release()? */ diff -uNr linux-2.5.69/kernel/Makefile linux-2.5.69-pag/kernel/Makefile --- linux-2.5.69/kernel/Makefile 2003-05-06 15:04:56.000000000 +0100 +++ linux-2.5.69-pag/kernel/Makefile 2003-05-13 10:45:27.000000000 +0100 @@ -3,7 +3,7 @@ # obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ - exit.o itimer.o time.o softirq.o resource.o \ + cred.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o futex.o pid.o \ rcupdate.o intermodule.o extable.o params.o posix-timers.o - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/