2005-03-30 03:18:03

by Gerrit Huizenga

[permalink] [raw]
Subject: [patch 6/8] CKRM: Socket Class Controller


This patch provides the extensions for CKRM to track per socket classes.
This is the base to enable socket based resource control for inbound
connection control, bandwidth control etc.

Signed-Off-By: Vivek Kashyap <[email protected]>
Signed-Off-By: Gerrit Huizenga <[email protected]>
Signed-off-by: Nishanth Aravamudan <[email protected]>

Index: linux-2.6.12-rc1/fs/rcfs/Makefile
===================================================================
--- linux-2.6.12-rc1.orig/fs/rcfs/Makefile 2005-03-18 15:16:33.370482769 -0800
+++ linux-2.6.12-rc1/fs/rcfs/Makefile 2005-03-18 15:16:37.387163297 -0800
@@ -6,3 +6,4 @@ obj-$(CONFIG_RCFS_FS) += rcfs.o

rcfs-y := super.o inode.o dir.o rootdir.o magic.o
rcfs-$(CONFIG_CKRM_TYPE_TASKCLASS) += tc_magic.o
+rcfs-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += socket_fs.o
Index: linux-2.6.12-rc1/fs/rcfs/rootdir.c
===================================================================
--- linux-2.6.12-rc1.orig/fs/rcfs/rootdir.c 2005-03-18 15:16:33.372482610 -0800
+++ linux-2.6.12-rc1/fs/rcfs/rootdir.c 2005-03-18 15:16:37.387163297 -0800
@@ -187,6 +187,10 @@ EXPORT_SYMBOL_GPL(rcfs_deregister_classt
extern struct rcfs_mfdesc tc_mfdesc;
#endif

+#ifdef CONFIG_CKRM_TYPE_SOCKETCLASS
+extern struct rcfs_mfdesc rcfs_sock_mfdesc;
+#endif
+
/* Common root and magic file entries.
* root name, root permissions, magic file names and magic file permissions
* are needed by all entities (classtypes and classification engines) existing
@@ -203,4 +207,10 @@ struct rcfs_mfdesc *genmfdesc[CKRM_MAX_C
#else
NULL,
#endif
+#ifdef CONFIG_CKRM_TYPE_SOCKETCLASS
+ &rcfs_sock_mfdesc,
+#else
+ NULL,
+#endif
+
};
Index: linux-2.6.12-rc1/fs/rcfs/socket_fs.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.12-rc1/fs/rcfs/socket_fs.c 2005-03-18 15:16:37.391162979 -0800
@@ -0,0 +1,280 @@
+/* ckrm_socketaq.c
+ *
+ * Copyright (C) Vivek Kashyap, IBM Corp. 2004
+ *
+ * Latest version, more details at http://ckrm.sf.net
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+/*******************************************************************************
+ * Socket class type
+ *
+ * Defines the root structure for socket based classes. Currently only inbound
+ * connection control is supported based on prioritized accept queues.
+ ******************************************************************************/
+
+#include <linux/rcfs.h>
+#include <net/tcp.h>
+
+extern int rcfs_create_noperm(struct inode *, struct dentry *, int,
+ struct nameidata *);
+extern int rcfs_symlink_noperm(struct inode *, struct dentry *, const char *);
+extern int rcfs_mkdir_noperm(struct inode *, struct dentry *, int);
+extern int rcfs_rmdir_noperm(struct inode *, struct dentry *);
+extern int rcfs_link_noperm(struct dentry *, struct inode *, struct dentry *);
+extern int rcfs_unlink_noperm(struct inode *, struct dentry *);
+extern int rcfs_mknod_noperm(struct inode *, struct dentry *, int mode, dev_t);
+
+extern int rcfs_rmdir(struct inode *, struct dentry *);
+extern int rcfs_unlink(struct inode *, struct dentry *);
+extern int rcfs_rename(struct inode *, struct dentry *, struct inode *,
+ struct dentry *);
+
+extern int rcfs_create_coredir(struct inode *, struct dentry *);
+
+int rcfs_sock_mkdir(struct inode *, struct dentry *, int mode);
+int rcfs_sock_rmdir(struct inode *, struct dentry *);
+struct inode_operations my_iops;
+struct inode_operations class_iops;
+struct inode_operations sub_iops;
+
+
+struct rcfs_magf def_magf = {
+ .mode = RCFS_DEFAULT_DIR_MODE,
+ .i_op = &sub_iops,
+ .i_fop = NULL,
+};
+
+struct rcfs_magf rcfs_sock_rootdesc[] = {
+ {
+ /* .name = should not be set, copy from classtype name, */
+ .mode = RCFS_DEFAULT_DIR_MODE,
+ .i_op = &my_iops,
+ /* .i_fop = &simple_dir_operations, */
+ .i_fop = NULL,
+ },
+ {
+ .name = "members",
+ .mode = RCFS_DEFAULT_FILE_MODE,
+ .i_op = &my_iops,
+ .i_fop = &members_fileops,
+ },
+ {
+ .name = "target",
+ .mode = RCFS_DEFAULT_FILE_MODE,
+ .i_op = &my_iops,
+ .i_fop = &target_fileops,
+ },
+ {
+ .name = "reclassify",
+ .mode = RCFS_DEFAULT_FILE_MODE,
+ .i_op = &my_iops,
+ .i_fop = &reclassify_fileops,
+ },
+};
+
+struct rcfs_magf rcfs_sock_magf[] = {
+ {
+ .name = "config",
+ .mode = RCFS_DEFAULT_FILE_MODE,
+ .i_op = &my_iops,
+ .i_fop = &config_fileops,
+ },
+ {
+ .name = "members",
+ .mode = RCFS_DEFAULT_FILE_MODE,
+ .i_op = &my_iops,
+ .i_fop = &members_fileops,
+ },
+ {
+ .name = "shares",
+ .mode = RCFS_DEFAULT_FILE_MODE,
+ .i_op = &my_iops,
+ .i_fop = &shares_fileops,
+ },
+ {
+ .name = "stats",
+ .mode = RCFS_DEFAULT_FILE_MODE,
+ .i_op = &my_iops,
+ .i_fop = &stats_fileops,
+ },
+ {
+ .name = "target",
+ .mode = RCFS_DEFAULT_FILE_MODE,
+ .i_op = &my_iops,
+ .i_fop = &target_fileops,
+ },
+};
+
+struct rcfs_magf sub_magf[] = {
+ {
+ .name = "config",
+ .mode = RCFS_DEFAULT_FILE_MODE,
+ .i_op = &my_iops,
+ .i_fop = &config_fileops,
+ },
+ {
+ .name = "shares",
+ .mode = RCFS_DEFAULT_FILE_MODE,
+ .i_op = &my_iops,
+ .i_fop = &shares_fileops,
+ },
+ {
+ .name = "stats",
+ .mode = RCFS_DEFAULT_FILE_MODE,
+ .i_op = &my_iops,
+ .i_fop = &stats_fileops,
+ },
+};
+
+struct rcfs_mfdesc rcfs_sock_mfdesc = {
+ .rootmf = rcfs_sock_rootdesc,
+ .rootmflen = (sizeof(rcfs_sock_rootdesc) / sizeof(struct rcfs_magf)),
+};
+
+#define SOCK_MAX_MAGF (sizeof(rcfs_sock_magf)/sizeof(struct rcfs_magf))
+#define LAQ_MAX_SUBMAGF (sizeof(sub_magf)/sizeof(struct rcfs_magf))
+
+int rcfs_sock_rmdir(struct inode *p, struct dentry *me)
+{
+ struct dentry *mftmp, *mfdentry;
+ int ret = 0;
+
+ /* delete all magic sub directories */
+ list_for_each_entry_safe(mfdentry, mftmp, &me->d_subdirs, d_child) {
+ if (S_ISDIR(mfdentry->d_inode->i_mode)) {
+ ret = rcfs_rmdir(me->d_inode, mfdentry);
+ if (ret)
+ return ret;
+ }
+ }
+ /* delete ourselves */
+ ret = rcfs_rmdir(p, me);
+
+ return ret;
+}
+
+#ifdef NUM_ACCEPT_QUEUES
+#define LAQ_NUM_ACCEPT_QUEUES NUM_ACCEPT_QUEUES
+#else
+#define LAQ_NUM_ACCEPT_QUEUES 0
+#endif
+
+int rcfs_sock_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+ int retval = 0;
+ int i, j;
+ struct dentry *pentry, *mfdentry;
+
+ if (rcfs_mknod(dir, dentry, mode | S_IFDIR, 0)) {
+ printk(KERN_ERR "rcfs_sock_mkdir: error reaching parent\n");
+ return retval;
+ }
+ /* Needed if only rcfs_mknod is used instead of i_op->mkdir */
+ dir->i_nlink++;
+
+ retval = rcfs_create_coredir(dir, dentry);
+ if (retval)
+ goto mkdir_err;
+
+ /* create the default set of magic files */
+ for (i = 0; i < SOCK_MAX_MAGF; i++) {
+ mfdentry = rcfs_create_internal(dentry, &rcfs_sock_magf[i], 0);
+ mfdentry->d_fsdata = &RCFS_IS_MAGIC;
+ rcfs_get_inode_info(mfdentry->d_inode)->core =
+ rcfs_get_inode_info(dentry->d_inode)->core;
+ rcfs_get_inode_info(mfdentry->d_inode)->mfdentry = mfdentry;
+ if (rcfs_sock_magf[i].i_fop)
+ mfdentry->d_inode->i_fop = rcfs_sock_magf[i].i_fop;
+ if (rcfs_sock_magf[i].i_op)
+ mfdentry->d_inode->i_op = rcfs_sock_magf[i].i_op;
+ }
+
+ for (i = 1; i < LAQ_NUM_ACCEPT_QUEUES; i++) {
+ j = sprintf(def_magf.name, "%d", i);
+ def_magf.name[j] = '\0';
+
+ pentry = rcfs_create_internal(dentry, &def_magf, 0);
+ retval = rcfs_create_coredir(dentry->d_inode, pentry);
+ if (retval)
+ goto mkdir_err;
+ pentry->d_fsdata = &RCFS_IS_MAGIC;
+ for (j = 0; j < LAQ_MAX_SUBMAGF; j++) {
+ mfdentry =
+ rcfs_create_internal(pentry, &sub_magf[j], 0);
+ mfdentry->d_fsdata = &RCFS_IS_MAGIC;
+ rcfs_get_inode_info(mfdentry->d_inode)->core =
+ rcfs_get_inode_info(pentry->d_inode)->core;
+ rcfs_get_inode_info(mfdentry->d_inode)->mfdentry =
+ mfdentry;
+ if (sub_magf[j].i_fop)
+ mfdentry->d_inode->i_fop = sub_magf[j].i_fop;
+ if (sub_magf[j].i_op)
+ mfdentry->d_inode->i_op = sub_magf[j].i_op;
+ }
+ pentry->d_inode->i_op = &sub_iops;
+ }
+ dentry->d_inode->i_op = &class_iops;
+ return 0;
+
+ mkdir_err:
+ /* Needed */
+ dir->i_nlink--;
+ return retval;
+}
+
+char *rcfs_sock_get_name(struct ckrm_core_class *c)
+{
+ char *p = (char *)c->name;
+
+ while (*p)
+ p++;
+ while (*p != '/' && p != c->name)
+ p--;
+
+ return ++p;
+}
+
+
+
+struct inode_operations my_iops = {
+ .create = rcfs_create_noperm,
+ .lookup = simple_lookup,
+ .link = rcfs_link_noperm,
+ .unlink = rcfs_unlink,
+ .symlink = rcfs_symlink_noperm,
+ .mkdir = rcfs_sock_mkdir,
+ .rmdir = rcfs_sock_rmdir,
+ .mknod = rcfs_mknod_noperm,
+ .rename = rcfs_rename,
+};
+
+struct inode_operations class_iops = {
+ .create = rcfs_create_noperm,
+ .lookup = simple_lookup,
+ .link = rcfs_link_noperm,
+ .unlink = rcfs_unlink_noperm,
+ .symlink = rcfs_symlink_noperm,
+ .mkdir = rcfs_mkdir_noperm,
+ .rmdir = rcfs_rmdir_noperm,
+ .mknod = rcfs_mknod_noperm,
+ .rename = rcfs_rename,
+};
+
+struct inode_operations sub_iops = {
+ .create = rcfs_create_noperm,
+ .lookup = simple_lookup,
+ .link = rcfs_link_noperm,
+ .unlink = rcfs_unlink_noperm,
+ .symlink = rcfs_symlink_noperm,
+ .mkdir = rcfs_mkdir_noperm,
+ .rmdir = rcfs_rmdir_noperm,
+ .mknod = rcfs_mknod_noperm,
+ .rename = rcfs_rename,
+};
+
Index: linux-2.6.12-rc1/include/linux/ckrm_net.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.12-rc1/include/linux/ckrm_net.h 2005-03-18 15:16:37.392162899 -0800
@@ -0,0 +1,42 @@
+/* ckrm_rc.h - Header file to be used by Resource controllers of CKRM
+ *
+ * Copyright (C) Vivek Kashyap , IBM Corp. 2004
+ *
+ * Provides data structures, macros and kernel API of CKRM for
+ * resource controllers.
+ *
+ * Latest version, more details at http://ckrm.sf.net
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#ifndef _LINUX_CKRM_NET_H
+#define _LINUX_CKRM_NET_H
+
+struct ckrm_sock_class;
+
+struct ckrm_net_struct {
+ int ns_type; /* type of net class */
+ struct sock *ns_sk; /* pointer to socket */
+ pid_t ns_tgid; /* real process id */
+ pid_t ns_pid; /* calling thread's pid */
+ struct task_struct *ns_tsk;
+ int ns_family; /* IPPROTO_IPV4 || IPPROTO_IPV6 */
+ /* Currently only IPV4 is supported */
+ union {
+ __u32 ns_dipv4; /* V4 listener's address */
+ } ns_daddr;
+ __u16 ns_dport; /* listener's port */
+ __u16 ns_sport; /* sender's port */
+ atomic_t ns_refcnt;
+ struct ckrm_sock_class *core;
+ struct list_head ckrm_link;
+};
+
+#define ns_daddrv4 ns_daddr.ns_dipv4
+
+#endif
Index: linux-2.6.12-rc1/include/net/sock.h
===================================================================
--- linux-2.6.12-rc1.orig/include/net/sock.h 2005-03-17 17:34:23.000000000 -0800
+++ linux-2.6.12-rc1/include/net/sock.h 2005-03-18 15:16:37.393162820 -0800
@@ -112,6 +112,8 @@ struct sock_common {
atomic_t skc_refcnt;
};

+struct ckrm_net_struct;
+
/**
* struct sock - network layer representation of sockets
* @__sk_common - shared layout with tcp_tw_bucket
@@ -236,6 +238,7 @@ struct sock {
struct timeval sk_stamp;
struct socket *sk_socket;
void *sk_user_data;
+ struct ckrm_net_struct *sk_ckrm_ns;
struct module *sk_owner;
struct page *sk_sndmsg_page;
struct sk_buff *sk_send_head;
Index: linux-2.6.12-rc1/include/net/tcp.h
===================================================================
--- linux-2.6.12-rc1.orig/include/net/tcp.h 2005-03-17 17:33:53.000000000 -0800
+++ linux-2.6.12-rc1/include/net/tcp.h 2005-03-18 15:16:37.396162581 -0800
@@ -800,6 +800,7 @@ extern int tcp_rcv_established(struct

extern void tcp_rcv_space_adjust(struct sock *sk);

+
enum tcp_ack_state_t
{
TCP_ACK_SCHED = 1,
@@ -930,6 +931,9 @@ extern void tcp_unhash(struct sock *sk

extern int tcp_v4_hash_connecting(struct sock *sk);

+extern struct sock * tcp_v4_lookup_listener(u32 daddr,
+ unsigned short hnum,
+ int dif);

/* From syncookies.c */
extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
Index: linux-2.6.12-rc1/init/Kconfig
===================================================================
--- linux-2.6.12-rc1.orig/init/Kconfig 2005-03-18 15:16:33.376482292 -0800
+++ linux-2.6.12-rc1/init/Kconfig 2005-03-18 15:16:37.397162502 -0800
@@ -174,6 +174,17 @@ config CKRM_TYPE_TASKCLASS

Say Y if unsure

+config CKRM_TYPE_SOCKETCLASS
+ bool "Class Manager for socket groups"
+ depends on CKRM && RCFS_FS
+ default y
+ help
+ SOCKET provides the extensions for CKRM to track per socket
+ classes. This is the base to enable socket based resource
+ control for inbound connection control, bandwidth control etc.
+
+ Say Y if unsure.
+
endmenu

config SYSCTL
Index: linux-2.6.12-rc1/kernel/ckrm/ckrm_sockc.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.12-rc1/kernel/ckrm/ckrm_sockc.c 2005-03-18 15:16:37.399162343 -0800
@@ -0,0 +1,559 @@
+/* ckrm_sock.c - Class-based Kernel Resource Management (CKRM)
+ *
+ * Copyright (C) Hubertus Franke, IBM Corp. 2003,2004
+ * (C) Shailabh Nagar, IBM Corp. 2003
+ * (C) Chandra Seetharaman, IBM Corp. 2003
+ * (C) Vivek Kashyap, IBM Corp. 2004
+ *
+ *
+ * Provides kernel API of CKRM for in-kernel,per-resource controllers
+ * (one each for cpu, memory, io, network) and callbacks for
+ * classification modules.
+ *
+ * Latest version, more details at http://ckrm.sf.net
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include <linux/mm.h>
+#include <asm/errno.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/ckrm_rc.h>
+#include <linux/parser.h>
+#include <net/tcp.h>
+
+#include <linux/ckrm_net.h>
+
+struct ckrm_sock_class {
+ struct ckrm_core_class core;
+};
+
+static struct ckrm_sock_class ckrm_sockclass_dflt_class = {
+};
+
+#define SOCKET_CLASS_TYPE_NAME "socketclass"
+
+const char *dflt_sockclass_name = SOCKET_CLASS_TYPE_NAME;
+
+static struct ckrm_core_class *ckrm_sock_alloc_class(struct ckrm_core_class *parent,
+ const char *name);
+static int ckrm_sock_free_class(struct ckrm_core_class *core);
+
+static int ckrm_sock_forced_reclassify(struct ckrm_core_class * target,
+ const char *resname);
+static int ckrm_sock_show_members(struct ckrm_core_class *core,
+ struct seq_file *seq);
+static void ckrm_sock_add_resctrl(struct ckrm_core_class *core, int resid);
+static void ckrm_sock_reclassify_class(struct ckrm_sock_class *cls);
+
+struct ckrm_classtype ct_sockclass = {
+ .mfidx = 1,
+ .name = SOCKET_CLASS_TYPE_NAME,
+ .type_id = CKRM_CLASSTYPE_SOCKET_CLASS,
+ .maxdepth = 3,
+ .resid_reserved = 0,
+ .max_res_ctlrs = CKRM_MAX_RES_CTLRS,
+ .max_resid = 0,
+ .bit_res_ctlrs = 0L,
+ .res_ctlrs_lock = SPIN_LOCK_UNLOCKED,
+ .classes = LIST_HEAD_INIT(ct_sockclass.classes),
+
+ .default_class = &ckrm_sockclass_dflt_class.core,
+
+ /* private version of functions */
+ .alloc = &ckrm_sock_alloc_class,
+ .free = &ckrm_sock_free_class,
+ .show_members = &ckrm_sock_show_members,
+ .forced_reclassify = &ckrm_sock_forced_reclassify,
+
+ /* use of default functions */
+ .show_shares = &ckrm_class_show_shares,
+ .show_stats = &ckrm_class_show_stats,
+ .show_config = &ckrm_class_show_config,
+ .set_config = &ckrm_class_set_config,
+ .set_shares = &ckrm_class_set_shares,
+ .reset_stats = &ckrm_class_reset_stats,
+
+ /* Mandatory private version. No default available */
+ .add_resctrl = &ckrm_sock_add_resctrl,
+};
+
+/* helper functions */
+
+void ckrm_ns_hold(struct ckrm_net_struct *ns)
+{
+ atomic_inc(&ns->ns_refcnt);
+ return;
+}
+
+void ckrm_ns_put(struct ckrm_net_struct *ns)
+{
+ if (atomic_dec_and_test(&ns->ns_refcnt))
+ kfree(ns);
+ return;
+}
+
+/*
+ * Change the class of a netstruct
+ *
+ * Change the task's task class to "newcls" if the task's current
+ * class (task->taskclass) is same as given "oldcls", if it is non-NULL.
+ *
+ */
+
+static void
+ckrm_sock_set_class(struct ckrm_net_struct *ns, struct ckrm_sock_class *newcls,
+ struct ckrm_sock_class *oldcls, enum ckrm_event event)
+{
+ int i;
+ struct ckrm_res_ctlr *rcbs;
+ struct ckrm_classtype *clstype;
+ void *old_res_class, *new_res_class;
+
+ if ((newcls == oldcls) || (newcls == NULL)) {
+ ns->core = (void *)oldcls;
+ return;
+ }
+
+ class_lock(class_core(newcls));
+ ns->core = newcls;
+ list_add(&ns->ckrm_link, &class_core(newcls)->objlist);
+ class_unlock(class_core(newcls));
+
+ clstype = class_isa(newcls);
+ for (i = 0; i < clstype->max_resid; i++) {
+ atomic_inc(&clstype->nr_resusers[i]);
+ old_res_class =
+ oldcls ? class_core(oldcls)->res_class[i] : NULL;
+ new_res_class =
+ newcls ? class_core(newcls)->res_class[i] : NULL;
+ rcbs = clstype->res_ctlrs[i];
+ if (rcbs && rcbs->change_resclass
+ && (old_res_class != new_res_class))
+ (*rcbs->change_resclass) (ns, old_res_class,
+ new_res_class);
+ atomic_dec(&clstype->nr_resusers[i]);
+ }
+ return;
+}
+
+static void ckrm_sock_add_resctrl(struct ckrm_core_class *core, int resid)
+{
+ struct ckrm_net_struct *ns;
+ struct ckrm_res_ctlr *rcbs;
+
+ if ((resid < 0) || (resid >= CKRM_MAX_RES_CTLRS)
+ || ((rcbs = core->classtype->res_ctlrs[resid]) == NULL))
+ return;
+
+ class_lock(core);
+ list_for_each_entry(ns, &core->objlist, ckrm_link) {
+ if (rcbs->change_resclass)
+ (*rcbs->change_resclass) (ns, NULL,
+ core->res_class[resid]);
+ }
+ class_unlock(core);
+}
+
+/**************************************************************************
+ * Functions called from classification points *
+ **************************************************************************/
+
+static void cb_sockclass_listen_start(struct sock *sk)
+{
+ struct ckrm_net_struct *ns = NULL;
+ struct ckrm_sock_class *newcls = NULL;
+ struct ckrm_res_ctlr *rcbs;
+ struct ckrm_classtype *clstype;
+ int i = 0;
+
+ /* XXX - TBD ipv6 */
+ if (sk->sk_family == AF_INET6)
+ return;
+
+ /* to store the socket address */
+ ns = (struct ckrm_net_struct *)
+ kmalloc(sizeof(struct ckrm_net_struct), GFP_ATOMIC);
+ if (!ns)
+ return;
+
+ memset(ns, 0, sizeof(*ns));
+ INIT_LIST_HEAD(&ns->ckrm_link);
+ ckrm_ns_hold(ns);
+
+ ns->ns_family = sk->sk_family;
+ if (ns->ns_family == AF_INET6) // IPv6 not supported yet.
+ return;
+
+ ns->ns_daddrv4 = inet_sk(sk)->rcv_saddr;
+ ns->ns_dport = inet_sk(sk)->num;
+
+ ns->ns_pid = current->pid;
+ ns->ns_tgid = current->tgid;
+ ns->ns_tsk = current;
+ ce_protect(&ct_sockclass);
+ CE_CLASSIFY_RET(newcls, &ct_sockclass, CKRM_EVENT_LISTEN_START, ns,
+ current);
+ ce_release(&ct_sockclass);
+
+ if (newcls == NULL) {
+ newcls = &ckrm_sockclass_dflt_class;
+ ckrm_core_grab(class_core(newcls));
+ }
+
+ class_lock(class_core(newcls));
+ list_add(&ns->ckrm_link, &class_core(newcls)->objlist);
+ ns->core = newcls;
+ class_unlock(class_core(newcls));
+
+ /*
+ * the socket is already locked
+ * take a reference on socket on our behalf
+ */
+ sock_hold(sk);
+ sk->sk_ckrm_ns = (void *)ns;
+ ns->ns_sk = sk;
+
+ /* modify its shares */
+ clstype = class_isa(newcls);
+ for (i = 0; i < clstype->max_resid; i++) {
+ atomic_inc(&clstype->nr_resusers[i]);
+ rcbs = clstype->res_ctlrs[i];
+ if (rcbs && rcbs->change_resclass) {
+ (*rcbs->change_resclass) ((void *)ns,
+ NULL,
+ class_core(newcls)->
+ res_class[i]);
+ }
+ atomic_dec(&clstype->nr_resusers[i]);
+ }
+ return;
+}
+
+static void cb_sockclass_listen_stop(struct sock *sk)
+{
+ struct ckrm_net_struct *ns = NULL;
+ struct ckrm_sock_class *newcls = NULL;
+
+ /* XXX - TBD ipv6 */
+ if (sk->sk_family == AF_INET6)
+ return;
+
+ ns = (struct ckrm_net_struct *)sk->sk_ckrm_ns;
+ if (!ns) /* listen_start called before socket_aq was loaded */
+ return;
+
+ newcls = ns->core;
+ if (newcls) {
+ class_lock(class_core(newcls));
+ list_del(&ns->ckrm_link);
+ INIT_LIST_HEAD(&ns->ckrm_link);
+ class_unlock(class_core(newcls));
+ ckrm_core_drop(class_core(newcls));
+ }
+ /* the socket is already locked */
+ sk->sk_ckrm_ns = NULL;
+ sock_put(sk);
+
+ // Should be the last count and free it
+ ckrm_ns_put(ns);
+ return;
+}
+
+static struct ckrm_event_spec ckrm_sock_events_callbacks[] = {
+ {CKRM_EVENT_LISTEN_START, {cb_sockclass_listen_start, NULL}},
+ {CKRM_EVENT_LISTEN_STOP, {cb_sockclass_listen_stop, NULL}},
+ {-1, {NULL, NULL}}
+};
+
+/**************************************************************************
+ * Class Object Creation / Destruction
+ **************************************************************************/
+
+static struct ckrm_core_class *ckrm_sock_alloc_class(struct ckrm_core_class *parent,
+ const char *name)
+{
+ struct ckrm_sock_class *sockcls;
+ sockcls = kmalloc(sizeof(struct ckrm_sock_class), GFP_KERNEL);
+ if (sockcls == NULL)
+ return NULL;
+ memset(sockcls, 0, sizeof(struct ckrm_sock_class));
+
+ ckrm_init_core_class(&ct_sockclass, class_core(sockcls), parent, name);
+
+ ce_protect(&ct_sockclass);
+ if (ct_sockclass.ce_cb_active && ct_sockclass.ce_callbacks.class_add)
+ (*ct_sockclass.ce_callbacks.class_add) (name, sockcls,
+ ct_sockclass.type_id);
+ ce_release(&ct_sockclass);
+
+ return class_core(sockcls);
+}
+
+static int ckrm_sock_free_class(struct ckrm_core_class *core)
+{
+ struct ckrm_sock_class *sockcls;
+
+ if (!ckrm_is_core_valid(core)) {
+ /* Invalid core */
+ return (-EINVAL);
+ }
+ if (core == core->classtype->default_class) {
+ /* reset the name tag */
+ core->name = dflt_sockclass_name;
+ return 0;
+ }
+
+ sockcls = class_type(struct ckrm_sock_class, core);
+
+ ce_protect(&ct_sockclass);
+
+ if (ct_sockclass.ce_cb_active && ct_sockclass.ce_callbacks.class_delete)
+ (*ct_sockclass.ce_callbacks.class_delete) (core->name, sockcls,
+ ct_sockclass.type_id);
+
+ ckrm_sock_reclassify_class(sockcls);
+
+ ce_release(&ct_sockclass);
+
+ ckrm_release_core_class(core);
+ /* Could just drop the class? Error message? */
+
+ return 0;
+}
+
+static int ckrm_sock_show_members(struct ckrm_core_class *core, struct seq_file *seq)
+{
+ struct list_head *lh;
+ struct ckrm_net_struct *ns = NULL;
+
+ class_lock(core);
+ list_for_each(lh, &core->objlist) {
+ ns = container_of(lh, struct ckrm_net_struct, ckrm_link);
+ seq_printf(seq, "%d.%d.%d.%d\\%d\n",
+ NIPQUAD(ns->ns_daddrv4), ns->ns_dport);
+ }
+ class_unlock(core);
+
+ return 0;
+}
+
+static int
+ckrm_sock_forced_reclassify_ns(struct ckrm_net_struct *tns,
+ struct ckrm_core_class *core)
+{
+ struct ckrm_net_struct *ns = NULL;
+ struct sock *sk = NULL;
+ struct ckrm_sock_class *oldcls, *newcls;
+ int rc = -EINVAL;
+
+ if (!ckrm_is_core_valid(core)) {
+ return rc;
+ }
+
+ newcls = class_type(struct ckrm_sock_class, core);
+ /*
+ * lookup the listening sockets
+ * returns with a reference count set on socket
+ */
+ if (tns->ns_family == AF_INET6)
+ return -EOPNOTSUPP;
+
+ sk = tcp_v4_lookup_listener(tns->ns_daddrv4, tns->ns_dport, 0);
+ if (!sk) {
+ printk(KERN_INFO "No such listener 0x%x:%d\n",
+ tns->ns_daddrv4, tns->ns_dport);
+ return rc;
+ }
+ lock_sock(sk);
+ if (!sk->sk_ckrm_ns) {
+ goto out;
+ }
+ ns = sk->sk_ckrm_ns;
+ ckrm_ns_hold(ns);
+ if (!capable(CAP_NET_ADMIN) && (ns->ns_tsk->user != current->user)) {
+ ckrm_ns_put(ns);
+ rc = -EPERM;
+ goto out;
+ }
+
+ oldcls = ns->core;
+ if ((oldcls == NULL) || (oldcls == newcls)) {
+ ckrm_ns_put(ns);
+ goto out;
+ }
+ /* remove the net_struct from the current class */
+ class_lock(class_core(oldcls));
+ list_del(&ns->ckrm_link);
+ INIT_LIST_HEAD(&ns->ckrm_link);
+ ns->core = NULL;
+ class_unlock(class_core(oldcls));
+
+ ckrm_sock_set_class(ns, newcls, oldcls, CKRM_EVENT_MANUAL);
+ ckrm_ns_put(ns);
+ rc = 0;
+ out:
+ release_sock(sk);
+ sock_put(sk);
+
+ return rc;
+
+}
+
+enum ckrm_sock_target_token {
+ IPV4, IPV6, SOCKC_TARGET_ERR
+};
+
+static match_table_t ckrm_sock_target_tokens = {
+ {IPV4, "ipv4=%s"},
+ {IPV6, "ipv6=%s"},
+ {SOCKC_TARGET_ERR, NULL},
+};
+
+char *v4toi(char *s, char c, __u32 * v)
+{
+ unsigned int k = 0, n = 0;
+
+ while (*s && (*s != c)) {
+ if (*s == '.') {
+ n <<= 8;
+ n |= k;
+ k = 0;
+ } else
+ k = k * 10 + *s - '0';
+ s++;
+ }
+
+ n <<= 8;
+ *v = n | k;
+
+ return s;
+}
+
+static int
+ckrm_sock_forced_reclassify(struct ckrm_core_class *target, const char *options)
+{
+ char *p, *p2;
+ struct ckrm_net_struct ns;
+ __u32 v4addr, tmp;
+
+ if (!options)
+ return -EINVAL;
+
+ if (target == NULL) {
+ unsigned long id = simple_strtol(options,NULL,0);
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ if (id != 0)
+ return -EINVAL;
+ printk("ckrm_sock_class: reclassify all not net implemented\n");
+ return 0;
+ }
+
+ while ((p = strsep((char **)&options, ",")) != NULL) {
+ substring_t args[MAX_OPT_ARGS];
+ int token;
+
+ if (!*p)
+ continue;
+ token = match_token(p, ckrm_sock_target_tokens, args);
+ switch (token) {
+
+ case IPV4:
+
+ p2 = p;
+ while (*p2 && (*p2 != '='))
+ ++p2;
+ p2++;
+ p2 = v4toi(p2, '\\', &(v4addr));
+ ns.ns_daddrv4 = htonl(v4addr);
+ ns.ns_family = AF_INET;
+ p2 = v4toi(++p2, ':', &tmp);
+ ns.ns_dport = (__u16) tmp;
+ if (*p2)
+ p2 = v4toi(++p2, '\0', &ns.ns_pid);
+ ckrm_sock_forced_reclassify_ns(&ns, target);
+ break;
+
+ case IPV6:
+ printk(KERN_INFO "rcfs: IPV6 not supported yet\n");
+ return -ENOSYS;
+ default:
+ return -EINVAL;
+ }
+ }
+ return -EINVAL;
+}
+
+/*
+ * Listen_aq reclassification.
+ */
+static void ckrm_sock_reclassify_class(struct ckrm_sock_class *cls)
+{
+ struct ckrm_net_struct *ns, *tns;
+ struct ckrm_core_class *core = class_core(cls);
+ LIST_HEAD(local_list);
+
+ if (!cls)
+ return;
+
+ if (!ckrm_validate_and_grab_core(core))
+ return;
+
+ class_lock(core);
+ /* we have the core refcnt */
+ if (list_empty(&core->objlist)) {
+ class_unlock(core);
+ ckrm_core_drop(core);
+ return;
+ }
+
+ INIT_LIST_HEAD(&local_list);
+ list_splice_init(&core->objlist, &local_list);
+ class_unlock(core);
+ ckrm_core_drop(core);
+
+ list_for_each_entry_safe(ns, tns, &local_list, ckrm_link) {
+ ckrm_ns_hold(ns);
+ list_del(&ns->ckrm_link);
+ if (ns->ns_sk) {
+ lock_sock(ns->ns_sk);
+ ckrm_sock_set_class(ns, &ckrm_sockclass_dflt_class, NULL,
+ CKRM_EVENT_MANUAL);
+ release_sock(ns->ns_sk);
+ }
+ ckrm_ns_put(ns);
+ }
+ return;
+}
+
+void __init ckrm_meta_init_sockclass(void)
+{
+ printk("...... Initializing ClassType<%s> ........\n",
+ ct_sockclass.name);
+ /* intialize the default class */
+ ckrm_init_core_class(&ct_sockclass, class_core(&ckrm_sockclass_dflt_class),
+ NULL, dflt_sockclass_name);
+
+ /* register classtype and initialize default task class */
+ ckrm_register_classtype(&ct_sockclass);
+ ckrm_register_event_set(ckrm_sock_events_callbacks);
+
+ /*
+ * note registeration of all resource controllers will be done
+ * later dynamically as these are specified as modules
+ */
+}
Index: linux-2.6.12-rc1/kernel/ckrm/Makefile
===================================================================
--- linux-2.6.12-rc1.orig/kernel/ckrm/Makefile 2005-03-18 15:16:33.379482053 -0800
+++ linux-2.6.12-rc1/kernel/ckrm/Makefile 2005-03-18 15:16:37.399162343 -0800
@@ -4,3 +4,4 @@

obj-y += ckrm_events.o ckrm.o ckrmutils.o
obj-$(CONFIG_CKRM_TYPE_TASKCLASS) += ckrm_tc.o
+obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o
Index: linux-2.6.12-rc1/net/ipv4/tcp_ipv4.c
===================================================================
--- linux-2.6.12-rc1.orig/net/ipv4/tcp_ipv4.c 2005-03-17 17:34:08.000000000 -0800
+++ linux-2.6.12-rc1/net/ipv4/tcp_ipv4.c 2005-03-18 15:16:37.401162184 -0800
@@ -448,7 +448,8 @@ static struct sock *__tcp_v4_lookup_list
}

/* Optimize the common listener case. */
-static inline struct sock *tcp_v4_lookup_listener(u32 daddr,
+/* XXX: Was inline - need to use for CKRM, fix before next release */
+struct sock *tcp_v4_lookup_listener(u32 daddr,
unsigned short hnum, int dif)
{
struct sock *sk = NULL;
@@ -2645,6 +2646,7 @@ EXPORT_SYMBOL(tcp_prot);
EXPORT_SYMBOL(tcp_put_port);
EXPORT_SYMBOL(tcp_unhash);
EXPORT_SYMBOL(tcp_v4_conn_request);
+EXPORT_SYMBOL(tcp_v4_lookup_listener);
EXPORT_SYMBOL(tcp_v4_connect);
EXPORT_SYMBOL(tcp_v4_do_rcv);
EXPORT_SYMBOL(tcp_v4_rebuild_header);

--