Received: by 2002:a05:6a10:9848:0:0:0:0 with SMTP id x8csp189049pxf; Wed, 10 Mar 2021 04:06:01 -0800 (PST) X-Google-Smtp-Source: ABdhPJzAuVXGpoFbch2X8119E43afBkJRGZNP8B2GszYxCSgACh4JEzkYDyhsKNXZmA5FCTMA1N3 X-Received: by 2002:a17:906:3105:: with SMTP id 5mr3335604ejx.168.1615377961168; Wed, 10 Mar 2021 04:06:01 -0800 (PST) ARC-Seal: i=1; a=rsa-sha256; t=1615377961; cv=none; d=google.com; s=arc-20160816; b=NZ62PJTcN6ng4rR+pGrVbrtz5EavQnypwJ9ROTbBPyrn3EoyZ6mKaxX/VfU48HJB65 VdSLx/zc8WaCezsUGujXc4A4wlU+6MiG5bakyJFkfdAWg95IjIV2gxBglIERSmruf9fH ER8WiUb+lItvI4FKO2r8SNxpFqZq5pJMQOq/LaLwBFnItAQVMe0jT8IgrJm0vTLRIqqQ gGj2H/8BtwCZ0fOO/waCl0qGLx8qjnkdFsKzp9nMN/5O7lvnY1rakgcu+QiCXxMJc4gF 4vumbokOD4xtKmAN/2N7M38hueMee6k+snGtpCqTHOZcAZEvGEpmQCxGPeyoejrrR+as n8ww== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=list-id:precedence:content-transfer-encoding:mime-version :references:in-reply-to:message-id:date:subject:cc:to:from; bh=dnlAvnc5tHtzGNfvZj+s+m1MXcyEbq5to4ctzYKw304=; b=tVXURU0g/DBLKgnFA4Ip+qrQwBvza50MCk7WEdqQlaVrZpLt2F4mauGyPGDSr8+eSS JjGPYVTDeZz93LTX6dwKO834N94pKLkYMNq7DmU6tOhzXIGo8kqWAOiWmwLCOjp1IKb3 ZOtd5E7O8ahsOfMQdGl7m9+KjMxKk6iS213y2ykf4FDUIoNm9mtfR867S0BR3pKhk/d2 etuWeZ06pvckq42F0ttQlmZbqKdRHyhxo9ohbAcNQHDubwXI3jNCLmbCFSAx1sksNv4p Uu0wn0VQlJaOWRvFwHhtfdyh9anpZsZKFPtt5mqbCR0p2mz2aYoprRFKMl9i/gl2Ch79 8axw== ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.18 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org; dmarc=fail (p=NONE sp=QUARANTINE dis=NONE) header.from=gmail.com Return-Path: Received: from vger.kernel.org (vger.kernel.org. [23.128.96.18]) by mx.google.com with ESMTP id yd6si12333206ejb.302.2021.03.10.04.05.33; Wed, 10 Mar 2021 04:06:01 -0800 (PST) Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.18 as permitted sender) client-ip=23.128.96.18; Authentication-Results: mx.google.com; spf=pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.18 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org; dmarc=fail (p=NONE sp=QUARANTINE dis=NONE) header.from=gmail.com Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232528AbhCJMCN (ORCPT + 99 others); Wed, 10 Mar 2021 07:02:13 -0500 Received: from raptor.unsafe.ru ([5.9.43.93]:56126 "EHLO raptor.unsafe.ru" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231272AbhCJMBp (ORCPT ); Wed, 10 Mar 2021 07:01:45 -0500 Received: from comp-core-i7-2640m-0182e6.redhat.com (ip-94-113-225-162.net.upcbroadband.cz [94.113.225.162]) by raptor.unsafe.ru (Postfix) with ESMTPSA id A860540EF6; Wed, 10 Mar 2021 12:01:43 +0000 (UTC) From: Alexey Gladkov To: LKML , io-uring@vger.kernel.org, Kernel Hardening , Linux Containers , linux-mm@kvack.org Cc: Alexey Gladkov , Andrew Morton , Christian Brauner , "Eric W . Biederman" , Jann Horn , Jens Axboe , Kees Cook , Linus Torvalds , Oleg Nesterov Subject: [PATCH v8 3/8] Use atomic_t for ucounts reference counting Date: Wed, 10 Mar 2021 13:01:28 +0100 Message-Id: <59ee3289194cd97d70085cce701bc494bfcb4fd2.1615372955.git.gladkov.alexey@gmail.com> X-Mailer: git-send-email 2.29.2 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Greylist: Sender succeeded SMTP AUTH, not delayed by milter-greylist-4.6.4 (raptor.unsafe.ru [0.0.0.0]); Wed, 10 Mar 2021 12:01:44 +0000 (UTC) Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org The current implementation of the ucounts reference counter requires the use of spin_lock. We're going to use get_ucounts() in more performance critical areas like a handling of RLIMIT_SIGPENDING. Now we need to use spin_lock only if we want to change the hashtable. Signed-off-by: Alexey Gladkov --- include/linux/user_namespace.h | 4 +-- kernel/ucount.c | 60 +++++++++++++++------------------- 2 files changed, 28 insertions(+), 36 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index f71b5a4a3e74..d84cc2c0b443 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -92,7 +92,7 @@ struct ucounts { struct hlist_node node; struct user_namespace *ns; kuid_t uid; - int count; + atomic_t count; atomic_long_t ucount[UCOUNT_COUNTS]; }; @@ -104,7 +104,7 @@ void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); -struct ucounts *get_ucounts(struct ucounts *ucounts); +struct ucounts * __must_check get_ucounts(struct ucounts *ucounts); void put_ucounts(struct ucounts *ucounts); #ifdef CONFIG_USER_NS diff --git a/kernel/ucount.c b/kernel/ucount.c index 50cc1dfb7d28..bb3203039b5e 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -11,7 +11,7 @@ struct ucounts init_ucounts = { .ns = &init_user_ns, .uid = GLOBAL_ROOT_UID, - .count = 1, + .count = ATOMIC_INIT(1), }; #define UCOUNTS_HASHTABLE_BITS 10 @@ -139,6 +139,22 @@ static void hlist_add_ucounts(struct ucounts *ucounts) spin_unlock_irq(&ucounts_lock); } +/* 127: arbitrary random number, small enough to assemble well */ +#define refcount_zero_or_close_to_overflow(ucounts) \ + ((unsigned int) atomic_read(&ucounts->count) + 127u <= 127u) + +struct ucounts *get_ucounts(struct ucounts *ucounts) +{ + if (ucounts) { + if (refcount_zero_or_close_to_overflow(ucounts)) { + WARN_ONCE(1, "ucounts: counter has reached its maximum value"); + return NULL; + } + atomic_inc(&ucounts->count); + } + return ucounts; +} + struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) { struct hlist_head *hashent = ucounts_hashentry(ns, uid); @@ -155,7 +171,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) new->ns = ns; new->uid = uid; - new->count = 0; + atomic_set(&new->count, 1); spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -163,33 +179,12 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) kfree(new); } else { hlist_add_head(&new->node, hashent); - ucounts = new; + spin_unlock_irq(&ucounts_lock); + return new; } } - if (ucounts->count == INT_MAX) - ucounts = NULL; - else - ucounts->count += 1; spin_unlock_irq(&ucounts_lock); - return ucounts; -} - -struct ucounts *get_ucounts(struct ucounts *ucounts) -{ - unsigned long flags; - - if (!ucounts) - return NULL; - - spin_lock_irqsave(&ucounts_lock, flags); - if (ucounts->count == INT_MAX) { - WARN_ONCE(1, "ucounts: counter has reached its maximum value"); - ucounts = NULL; - } else { - ucounts->count += 1; - } - spin_unlock_irqrestore(&ucounts_lock, flags); - + ucounts = get_ucounts(ucounts); return ucounts; } @@ -197,15 +192,12 @@ void put_ucounts(struct ucounts *ucounts) { unsigned long flags; - spin_lock_irqsave(&ucounts_lock, flags); - ucounts->count -= 1; - if (!ucounts->count) + if (atomic_dec_and_test(&ucounts->count)) { + spin_lock_irqsave(&ucounts_lock, flags); hlist_del_init(&ucounts->node); - else - ucounts = NULL; - spin_unlock_irqrestore(&ucounts_lock, flags); - - kfree(ucounts); + spin_unlock_irqrestore(&ucounts_lock, flags); + kfree(ucounts); + } } static inline bool atomic_long_inc_below(atomic_long_t *v, int u) -- 2.29.2