Received: by 2002:ad5:474a:0:0:0:0:0 with SMTP id i10csp1015545imu; Tue, 20 Nov 2018 10:14:47 -0800 (PST) X-Google-Smtp-Source: AFSGD/Uw5ByZbgRVyctIgb66YrbQp9xFBpBgg9JsXgw3k0rAZ8Cznje5DJRTX9EaY3nakC8oFWKY X-Received: by 2002:a17:902:30f:: with SMTP id 15-v6mr3315263pld.155.1542737687751; Tue, 20 Nov 2018 10:14:47 -0800 (PST) ARC-Seal: i=1; a=rsa-sha256; t=1542737687; cv=none; d=google.com; s=arc-20160816; b=J5Yi7HxwA+x+BaO5cRoPeULeMLWTpuUuNSJJoZhjrbCycS1itHgT3DFeGvbQvzGTHq MKRiyTAKNyHWf5HvNON3dKHbdbzUe5ySpyVMZcG2bHy10bDYz2t43Xwbs9bMQw9Us3A1 oWh0Nqc73RK4+92EcQT6SsrxMbepmHwMF0xPW0CeKe53ywAdyIHc/vi6JxrnVDoBEpab 4Jr5qNthWCw3W08lal9D+4+6iB5C8Vbb6+MHiAR/uyijXHJM06JrL1+oABIempx83rAQ jkie7rVeKIGIKR22jffnNeQ4Ph1kVqqTfTIzINT2Bd3pXFVpbYHccII2TTq+n/nm5u2T EGfg== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=list-id:precedence:sender:references:in-reply-to:message-id:date :subject:cc:to:from; bh=q6GBVrz1fXRpTu+EgJhRvznUETreC9oZ0oiQZnp0DYk=; b=Eqg1KAJIGRhqDaAcHgZeQYymG/z5spGnFdh1xCcL6B6B7+z6jJQ4+Kx+s8j7csvpqZ INaFhm8NJJvRjBXCxHCIZOZZ66aO3I5pGm1YR3zRK0O+JCiGtWR/iSMjAdU2jbRHBCbh O69RLlvaFajPKZSPM4r3VStC42uPgaWdqMug+S0qASlf4w7IUByKSjJg7rqJF06Q/4gv YBtA5w09KP2BB/Gel8n1zFd3kOEhLXdzXKGlVoLvpzQZgYCoyI2D3ax0pTquWlMOfDZJ cJM+Kli8ysppfEuzRPbWuuu6EkHU00Q1IVq9mObcz0pW445e8Dnhdg15RLwMZGi9PNDH BAxw== ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org; dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=redhat.com Return-Path: Received: from vger.kernel.org (vger.kernel.org. [209.132.180.67]) by mx.google.com with ESMTP id o14si22229038pgj.59.2018.11.20.10.14.23; Tue, 20 Nov 2018 10:14:47 -0800 (PST) Received-SPF: pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) client-ip=209.132.180.67; Authentication-Results: mx.google.com; spf=pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org; dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=redhat.com Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1730146AbeKUEWi (ORCPT + 99 others); Tue, 20 Nov 2018 23:22:38 -0500 Received: from mx1.redhat.com ([209.132.183.28]:52270 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729563AbeKUEWf (ORCPT ); Tue, 20 Nov 2018 23:22:35 -0500 Received: from smtp.corp.redhat.com (int-mx03.intmail.prod.int.phx2.redhat.com [10.5.11.13]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 3E7003086265; Tue, 20 Nov 2018 17:52:13 +0000 (UTC) Received: from llong.com (ovpn-120-174.rdu2.redhat.com [10.10.120.174]) by smtp.corp.redhat.com (Postfix) with ESMTP id 6DA6F54577; Tue, 20 Nov 2018 17:52:11 +0000 (UTC) From: Waiman Long To: Tejun Heo , Li Zefan , Johannes Weiner , Peter Zijlstra , Ingo Molnar , Jonathan Corbet Cc: cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, Roman Gushchin , Jens Axboe , Andrew Morton , Dennis Zhou , Shakeel Butt , Waiman Long Subject: [PATCH v4 2/5] cgroup: Allow reenabling of controller in bypass mode Date: Tue, 20 Nov 2018 12:51:26 -0500 Message-Id: <1542736289-31338-3-git-send-email-longman@redhat.com> In-Reply-To: <1542736289-31338-1-git-send-email-longman@redhat.com> References: <1542736289-31338-1-git-send-email-longman@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.13 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.49]); Tue, 20 Nov 2018 17:52:13 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Bypassable controllers set to bypass mode in the parent's "cgroup.subtree_control" can now be optionally enabled by writing the controller name with the '+' prefix to "cgroup.controllers". Using the '#' prefix will reset it back to the bypass state. This capability allows a cgroup parent to individually enable bypassable controllers in a subset of its children instead of either all or none of them. This increases the flexibility each controller has in shaping the effective cgroup hierarchy to best suit its need. Signed-off-by: Waiman Long --- include/linux/cgroup-defs.h | 7 +++ kernel/cgroup/cgroup.c | 109 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 113 insertions(+), 3 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 5bff798..ab1b355 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -388,6 +388,13 @@ struct cgroup { u16 old_subtree_ss_mask; u16 old_subtree_bypass; + /* + * The bitmask of subsystems that are set in its parent's + * ->subtree_bypass and explicitly enabled in this cgroup. + */ + u16 enable_ss_mask; + u16 old_enable_ss_mask; + /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a361c10..fa538f2 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -424,7 +424,7 @@ static u16 cgroup_control(struct cgroup *cgrp, bool show_bypass) u16 root_ss_mask = cgrp->root->subsys_mask; if (parent) { - u16 ss_mask = parent->subtree_control; + u16 ss_mask = parent->subtree_control|cgrp->enable_ss_mask; if (show_bypass) ss_mask |= parent->subtree_bypass; @@ -447,7 +447,7 @@ static u16 cgroup_ss_mask(struct cgroup *cgrp, bool show_bypass) struct cgroup *parent = cgroup_parent(cgrp); if (parent) { - u16 ss_mask = parent->subtree_ss_mask; + u16 ss_mask = parent->subtree_ss_mask|cgrp->enable_ss_mask; if (show_bypass) @@ -2874,6 +2874,7 @@ static void cgroup_save_control(struct cgroup *cgrp) dsct->old_subtree_control = dsct->subtree_control; dsct->old_subtree_ss_mask = dsct->subtree_ss_mask; dsct->old_subtree_bypass = dsct->subtree_bypass; + dsct->old_enable_ss_mask = dsct->enable_ss_mask; dsct->old_dom_cgrp = dsct->dom_cgrp; } } @@ -2919,6 +2920,7 @@ static void cgroup_restore_control(struct cgroup *cgrp) dsct->subtree_control = dsct->old_subtree_control; dsct->subtree_ss_mask = dsct->old_subtree_ss_mask; dsct->subtree_bypass = dsct->old_subtree_bypass; + dsct->enable_ss_mask = dsct->old_enable_ss_mask; dsct->dom_cgrp = dsct->old_dom_cgrp; } } @@ -3197,7 +3199,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, } cgroup_for_each_live_child(child, cgrp) - child_enable |= child->subtree_control|child->subtree_bypass; + child_enable |= child->subtree_control|child->subtree_bypass| + child->enable_ss_mask; /* * Cannot change the state of a controller if enabled in children. @@ -3230,6 +3233,105 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, return ret ?: nbytes; } +/* + * Change bypass status of controllers for a cgroup in the default hierarchy. + */ +static ssize_t cgroup_controllers_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) +{ + u16 enable = 0, bypass = 0; + struct cgroup *cgrp, *parent; + struct cgroup_subsys *ss; + char *tok; + int ssid, ret; + + /* + * Parse input - space separated list of subsystem names prefixed + * with either + or #. + */ + buf = strstrip(buf); + while ((tok = strsep(&buf, " "))) { + if (tok[0] == '\0') + continue; + do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) { + if (!cgroup_ssid_enabled(ssid) || + strcmp(tok + 1, ss->name)) + continue; + + if (*tok == '+') { + enable |= 1 << ssid; + bypass &= ~(1 << ssid); + } else if (*tok == '#') { + bypass |= 1 << ssid; + enable &= ~(1 << ssid); + } else { + return -EINVAL; + } + break; + } while_each_subsys_mask(); + if (ssid == CGROUP_SUBSYS_COUNT) + return -EINVAL; + } + + cgrp = cgroup_kn_lock_live(of->kn, true); + if (!cgrp) + return -ENODEV; + + /* + * Write to root cgroup's controllers file is not allowed. + */ + parent = cgroup_parent(cgrp); + if (!parent) { + ret = -EINVAL; + goto out_unlock; + } + + /* + * Only controllers set into bypass mode in the parent cgroup + * can be specified here. + */ + if (~parent->subtree_bypass & (enable|bypass)) { + ret = -ENOENT; + goto out_unlock; + } + + /* + * Mask off irrelevant bits. + */ + enable &= ~cgrp->enable_ss_mask; + bypass &= cgrp->enable_ss_mask; + + if (!(enable|bypass)) { + ret = 0; + goto out_unlock; + } + + /* + * We cannot change the bypass state of a controller that is enabled + * in subtree_control. + */ + if ((cgrp->subtree_control|cgrp->subtree_bypass) & (enable|bypass)) { + ret = -EBUSY; + goto out_unlock; + } + + /* Save and update control masks and prepare csses */ + cgroup_save_control(cgrp); + + cgrp->enable_ss_mask |= enable; + cgrp->enable_ss_mask &= ~bypass; + + ret = cgroup_apply_control(cgrp); + cgroup_finalize_control(cgrp, ret); + kernfs_activate(cgrp->kn); + ret = 0; + +out_unlock: + cgroup_kn_unlock(of->kn); + return ret ?: nbytes; +} + /** * cgroup_enable_threaded - make @cgrp threaded * @cgrp: the target cgroup @@ -4573,6 +4675,7 @@ static ssize_t cgroup_threads_write(struct kernfs_open_file *of, { .name = "cgroup.controllers", .seq_show = cgroup_controllers_show, + .write = cgroup_controllers_write, }, { .name = "cgroup.subtree_control", -- 1.8.3.1