Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756292AbZGBCNV (ORCPT ); Wed, 1 Jul 2009 22:13:21 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1755462AbZGBCLo (ORCPT ); Wed, 1 Jul 2009 22:11:44 -0400 Received: from smtp-out.google.com ([216.239.45.13]:20765 "EHLO smtp-out.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753795AbZGBCLn (ORCPT ); Wed, 1 Jul 2009 22:11:43 -0400 DomainKey-Signature: a=rsa-sha1; s=beta; d=google.com; c=nofws; q=dns; h=from:subject:to:cc:date:message-id:in-reply-to:references: user-agent:mime-version:content-type: content-transfer-encoding:x-system-of-record; b=q6qbhcHD08ws/NPWbcbzPr/u3ZBrdRiXIlsa5aBQypQE8GYpFPN6+jbczd/3W5quq lD3iD8E1ey3vf1kPU71XA== From: Paul Menage Subject: [PATCH 8/9] [RFC] Example multi-bindable subsystem: a per-cgroup notes field To: lizf@cn.fujitsu.com, balbir@linux.vnet.ibm.com Cc: linux-kernel@vger.kernel.org, akpm@linux-foundation.org, containers@lists.linux-foundation.org, kamezawa.hiroyu@jp.fujitsu.com Date: Wed, 01 Jul 2009 19:11:34 -0700 Message-ID: <20090702021133.14469.35140.stgit@menage.mtv.corp.google.com> In-Reply-To: <20090702020624.14469.47066.stgit@menage.mtv.corp.google.com> References: <20090702020624.14469.47066.stgit@menage.mtv.corp.google.com> User-Agent: StGIT/0.14.3 MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit X-System-Of-Record: true Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5852 Lines: 212 [RFC] Example multi-bindable subsystem: a per-cgroup notes field As an example of a multiply-bindable subsystem, this patch introduces the "info" subsystem, which provides a single file, "info.notes", in which user-space middleware can store an arbitrary (by default up to one page) binary string representing configuration data about that cgroup. This reduces the need to keep additional state outside the cgroup filesystem. The maximum notes size for a hierarchy can be set by updating the "info.size" file in the root cgroup. Signed-off-by: Paul Menage --- include/linux/cgroup_subsys.h | 6 ++ init/Kconfig | 9 +++ kernel/Makefile | 1 kernel/info_cgroup.c | 133 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+), 0 deletions(-) create mode 100644 kernel/info_cgroup.c diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index f78605e..5dfea38 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -60,3 +60,9 @@ SUBSYS(net_cls) #endif /* */ + +#ifdef CONFIG_CGROUP_INFO +MULTI_SUBSYS(info) +#endif + +/* */ diff --git a/init/Kconfig b/init/Kconfig index d904d6c..3bd4685 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -604,6 +604,15 @@ config CGROUP_MEM_RES_CTLR_SWAP Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page size is 4096bytes, 512k per 1Gbytes of swap. +config CGROUP_INFO + bool "Simple application-specific info cgroup subsystem" + depends on CGROUPS + help + Provides a simple cgroups subsystem with an "info.notes" + field, which can be used by middleware to store + application-specific configuration data about a cgroup. Can + be mounted on multiple hierarchies at once. + endif # CGROUPS config MM_OWNER diff --git a/kernel/Makefile b/kernel/Makefile index 7ffdc16..e713a67 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_CGROUPS) += cgroup.o obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o +obj-$(CONFIG_CGROUP_INFO) += info_cgroup.o obj-$(CONFIG_UTS_NS) += utsname.o obj-$(CONFIG_USER_NS) += user_namespace.o obj-$(CONFIG_PID_NS) += pid_namespace.o diff --git a/kernel/info_cgroup.c b/kernel/info_cgroup.c new file mode 100644 index 0000000..34cfdb8 --- /dev/null +++ b/kernel/info_cgroup.c @@ -0,0 +1,133 @@ +/* + * info_cgroup.c - simple cgroup providing a "notes" field + */ + +#include "linux/cgroup.h" +#include "linux/err.h" +#include "linux/seq_file.h" + +struct info_cgroup { + struct cgroup_subsys_state css; + /* notes string for this cgroup */ + const char *notes; + size_t len; + /* + * size limit for notes in this hierarchy. Only relevant for + * the root cgroup. Not synchronized since it's a single word + * value and writes to it never depend on previously read + * values. + */ + size_t max_len; + spinlock_t lock; +}; + +static inline struct info_cgroup *cg_info(struct cgroup *cg) +{ + return container_of(cgroup_subsys_state(cg, info_subsys_id), + struct info_cgroup, css); +} + +static struct cgroup_subsys_state *info_create(struct cgroup_subsys *ss, + struct cgroup *cg) +{ + struct info_cgroup *info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + spin_lock_init(&info->lock); + if (!cg->parent) + info->max_len = PAGE_SIZE; + return &info->css; +} + +static void info_destroy(struct cgroup_subsys *ss, struct cgroup *cont) +{ + struct info_cgroup *css = cg_info(cont); + kfree(css->notes); + kfree(css); +} + + +static int info_read(struct cgroup *cont, + struct cftype *cft, + struct seq_file *seq) +{ + struct info_cgroup *css = cg_info(cont); + spin_lock(&css->lock); + if (css->notes) + seq_write(seq, css->notes, css->len); + spin_unlock(&css->lock); + return 0; +} + +/* + * Use a custom write function so that we can handle binary data + */ + +static ssize_t info_write(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + const char __user *userbuf, + size_t nbytes, loff_t *unused_ppos) { + struct info_cgroup *css = cg_info(cgrp); + char *notes = NULL; + if (nbytes > cg_info(cgrp->top_cgroup)->max_len) + return -E2BIG; + if (nbytes) { + notes = kmalloc(nbytes, GFP_USER); + if (!notes) + return -ENOMEM; + if (copy_from_user(notes, userbuf, nbytes)) + return -EFAULT; + } + + spin_lock(&css->lock); + kfree(css->notes); + css->notes = notes; + css->len = nbytes; + spin_unlock(&css->lock); + return nbytes; +} + +static u64 notes_size_read(struct cgroup *cont, struct cftype *cft) +{ + struct info_cgroup *css = cg_info(cont); + return css->max_len; +} + +static int notes_size_write(struct cgroup *cont, struct cftype *cft, u64 val) +{ + struct info_cgroup *css = cg_info(cont); + css->max_len = val; + return 0; +} + +static struct cftype info_files[] = { + { + .name = "notes", + .read_seq_string = info_read, + .write = info_write, + }, +}; + +static struct cftype info_root_files[] = { + { + .name = "size", + .read_u64 = notes_size_read, + .write_u64 = notes_size_write, + }, +}; + +static int info_populate(struct cgroup_subsys *ss, struct cgroup *cont) +{ + if (!cont->parent) + cgroup_add_files(cont, ss, info_root_files, + ARRAY_SIZE(info_root_files)); + return cgroup_add_files(cont, ss, info_files, ARRAY_SIZE(info_files)); +} + +struct cgroup_subsys info_subsys = { + .name = "info", + .create = info_create, + .destroy = info_destroy, + .populate = info_populate, + .subsys_id = info_subsys_id, +}; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/