Many subsystems will not use refcount_t unless there is a way to build the
kernel so that there is no regression in speed compared to atomic_t. This
adds CONFIG_REFCOUNT_FULL to enable the full refcount_t implementation
which has the validation but is slightly slower. When not enabled,
refcount_t uses the basic unchecked atomic_t routines, which results in
no code changes compared to just using atomic_t directly.
Signed-off-by: Kees Cook <[email protected]>
---
This is v2 of this patch, which I've split from the arch-specific
alternative implementation for x86. Getting this patch in will unblock
atomic_t -> refcount_t conversion, and the x86 alternative implementation
can be developed in parallel. Changes from v1: use better atomic ops,
thanks to Elena and Peter.
---
arch/Kconfig | 9 +++++++++
include/linux/refcount.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
lib/refcount.c | 3 +++
3 files changed, 56 insertions(+)
diff --git a/arch/Kconfig b/arch/Kconfig
index 6c00e5b00f8b..fba3bf186728 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -867,4 +867,13 @@ config STRICT_MODULE_RWX
config ARCH_WANT_RELAX_ORDER
bool
+config REFCOUNT_FULL
+ bool "Perform full reference count validation at the expense of speed"
+ help
+ Enabling this switches the refcounting infrastructure from a fast
+ unchecked atomic_t implementation to a fully state checked
+ implementation, which can be slower but provides protections
+ against various use-after-free conditions that can be used in
+ security flaw exploits.
+
source "kernel/gcov/Kconfig"
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index b34aa649d204..099c32bd07b2 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -41,6 +41,7 @@ static inline unsigned int refcount_read(const refcount_t *r)
return atomic_read(&r->refs);
}
+#ifdef CONFIG_REFCOUNT_FULL
extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r);
extern void refcount_add(unsigned int i, refcount_t *r);
@@ -52,6 +53,49 @@ extern void refcount_sub(unsigned int i, refcount_t *r);
extern __must_check bool refcount_dec_and_test(refcount_t *r);
extern void refcount_dec(refcount_t *r);
+#else
+static inline __must_check bool refcount_add_not_zero(unsigned int i,
+ refcount_t *r)
+{
+ return atomic_add_unless(&r->refs, i, 0);
+}
+
+static inline void refcount_add(unsigned int i, refcount_t *r)
+{
+ atomic_add(i, &r->refs);
+}
+
+static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
+{
+ return atomic_add_unless(&r->refs, 1, 0);
+}
+
+static inline void refcount_inc(refcount_t *r)
+{
+ atomic_inc(&r->refs);
+}
+
+static inline __must_check bool refcount_sub_and_test(unsigned int i,
+ refcount_t *r)
+{
+ return atomic_sub_and_test(i, &r->refs);
+}
+
+static inline void refcount_sub(unsigned int i, refcount_t *r)
+{
+ atomic_sub(i, &r->refs);
+}
+
+static inline __must_check bool refcount_dec_and_test(refcount_t *r)
+{
+ return atomic_dec_and_test(&r->refs);
+}
+
+static inline void refcount_dec(refcount_t *r)
+{
+ atomic_dec(&r->refs);
+}
+#endif /* CONFIG_REFCOUNT_FULL */
extern __must_check bool refcount_dec_if_one(refcount_t *r);
extern __must_check bool refcount_dec_not_one(refcount_t *r);
diff --git a/lib/refcount.c b/lib/refcount.c
index 9f906783987e..5d0582a9480c 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -37,6 +37,8 @@
#include <linux/refcount.h>
#include <linux/bug.h>
+#ifdef CONFIG_REFCOUNT_FULL
+
/**
* refcount_add_not_zero - add a value to a refcount unless it is 0
* @i: the value to add to the refcount
@@ -225,6 +227,7 @@ void refcount_dec(refcount_t *r)
WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
}
EXPORT_SYMBOL(refcount_dec);
+#endif /* CONFIG_REFCOUNT_FULL */
/**
* refcount_dec_if_one - decrement a refcount if it is 1
--
2.7.4
--
Kees Cook
Pixel Security
On Wed, Jun 07, 2017 at 07:58:31PM -0700, Kees Cook wrote:
> Many subsystems will not use refcount_t unless there is a way to build the
> kernel so that there is no regression in speed compared to atomic_t. This
> adds CONFIG_REFCOUNT_FULL to enable the full refcount_t implementation
> which has the validation but is slightly slower. When not enabled,
> refcount_t uses the basic unchecked atomic_t routines, which results in
> no code changes compared to just using atomic_t directly.
>
> Signed-off-by: Kees Cook <[email protected]>
Acked-by: Greg Kroah-Hartman <[email protected]>
On Wed, Jun 07, 2017 at 07:58:31PM -0700, Kees Cook wrote:
> Many subsystems will not use refcount_t unless there is a way to build the
> kernel so that there is no regression in speed compared to atomic_t. This
> adds CONFIG_REFCOUNT_FULL to enable the full refcount_t implementation
> which has the validation but is slightly slower. When not enabled,
> refcount_t uses the basic unchecked atomic_t routines, which results in
> no code changes compared to just using atomic_t directly.
>
> Signed-off-by: Kees Cook <[email protected]>
> ---
> This is v2 of this patch, which I've split from the arch-specific
> alternative implementation for x86. Getting this patch in will unblock
> atomic_t -> refcount_t conversion, and the x86 alternative implementation
> can be developed in parallel. Changes from v1: use better atomic ops,
> thanks to Elena and Peter.
Yeah, can we get this in ASAP? Without having to always incur the over
this will allow us to convert subsystems to refcount_t broadly.
> On Wed, Jun 07, 2017 at 07:58:31PM -0700, Kees Cook wrote:
> > Many subsystems will not use refcount_t unless there is a way to build the
> > kernel so that there is no regression in speed compared to atomic_t. This
> > adds CONFIG_REFCOUNT_FULL to enable the full refcount_t implementation
> > which has the validation but is slightly slower. When not enabled,
> > refcount_t uses the basic unchecked atomic_t routines, which results in
> > no code changes compared to just using atomic_t directly.
> >
> > Signed-off-by: Kees Cook <[email protected]>
> > ---
> > This is v2 of this patch, which I've split from the arch-specific
> > alternative implementation for x86. Getting this patch in will unblock
> > atomic_t -> refcount_t conversion, and the x86 alternative implementation
> > can be developed in parallel. Changes from v1: use better atomic ops,
> > thanks to Elena and Peter.
>
> Yeah, can we get this in ASAP? Without having to always incur the over
> this will allow us to convert subsystems to refcount_t broadly.
+1. If this gets in, I can refresh the rest of the patches in net, mm, ipc, block, etc. and send them for review again.
Best Regards,
Elena
On Thu, 08 Jun 2017, Reshetova, Elena wrote:
>> On Wed, Jun 07, 2017 at 07:58:31PM -0700, Kees Cook wrote:
>> > Many subsystems will not use refcount_t unless there is a way to build the
>> > kernel so that there is no regression in speed compared to atomic_t. This
>> > adds CONFIG_REFCOUNT_FULL to enable the full refcount_t implementation
>> > which has the validation but is slightly slower. When not enabled,
>> > refcount_t uses the basic unchecked atomic_t routines, which results in
>> > no code changes compared to just using atomic_t directly.
>> >
>> > Signed-off-by: Kees Cook <[email protected]>
>> > ---
>> > This is v2 of this patch, which I've split from the arch-specific
>> > alternative implementation for x86. Getting this patch in will unblock
>> > atomic_t -> refcount_t conversion, and the x86 alternative implementation
>> > can be developed in parallel. Changes from v1: use better atomic ops,
>> > thanks to Elena and Peter.
>>
>> Yeah, can we get this in ASAP? Without having to always incur the over
>> this will allow us to convert subsystems to refcount_t broadly.
>
>+1. If this gets in, I can refresh the rest of the patches in net, mm, ipc, block, etc. and send them for review again.
Yes, this would be a prerequisite for ipc; which I initially thought didn't
take a performance hit.
Thanks,
Davidlohr
Hi Davidlohr,
On 06/08/2017 10:09 PM, Davidlohr Bueso wrote:
>
> Yes, this would be a prerequisite for ipc; which I initially thought
> didn't
> take a performance hit.
>
Did you see a regression for ipc?
The fast paths don't use the refcount, it is only used for rare situations:
- GETALL, SETALL for large arrays
- alloc undo
--
Manfred
On Fri, Jun 09, 2017 at 06:24:04AM +0200, Manfred Spraul wrote:
> Hi Davidlohr,
>
> On 06/08/2017 10:09 PM, Davidlohr Bueso wrote:
> >
> >Yes, this would be a prerequisite for ipc; which I initially thought
> >didn't
> >take a performance hit.
> >
> Did you see a regression for ipc?
I'd be most interested in having a benchmark that shows a regression.
Please share.
On Wed, Jun 7, 2017 at 10:56 PM, Greg KH <[email protected]> wrote:
> On Wed, Jun 07, 2017 at 07:58:31PM -0700, Kees Cook wrote:
>> Many subsystems will not use refcount_t unless there is a way to build the
>> kernel so that there is no regression in speed compared to atomic_t. This
>> adds CONFIG_REFCOUNT_FULL to enable the full refcount_t implementation
>> which has the validation but is slightly slower. When not enabled,
>> refcount_t uses the basic unchecked atomic_t routines, which results in
>> no code changes compared to just using atomic_t directly.
>>
>> Signed-off-by: Kees Cook <[email protected]>
>
> Acked-by: Greg Kroah-Hartman <[email protected]>
Ping. tip maintainers, can you please take this patch to unblock the
refcount_t conversions?
Thanks!
-Kees
--
Kees Cook
Pixel Security
* Kees Cook <[email protected]> wrote:
> Many subsystems will not use refcount_t unless there is a way to build the
> kernel so that there is no regression in speed compared to atomic_t. This
> adds CONFIG_REFCOUNT_FULL to enable the full refcount_t implementation
> which has the validation but is slightly slower. When not enabled,
> refcount_t uses the basic unchecked atomic_t routines, which results in
> no code changes compared to just using atomic_t directly.
>
> Signed-off-by: Kees Cook <[email protected]>
> ---
> This is v2 of this patch, which I've split from the arch-specific
> alternative implementation for x86. Getting this patch in will unblock
> atomic_t -> refcount_t conversion, and the x86 alternative implementation
> can be developed in parallel. Changes from v1: use better atomic ops,
> thanks to Elena and Peter.
> ---
> arch/Kconfig | 9 +++++++++
> include/linux/refcount.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
> lib/refcount.c | 3 +++
> 3 files changed, 56 insertions(+)
Looks almost good - sans a few stylistic nits:
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 6c00e5b00f8b..fba3bf186728 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -867,4 +867,13 @@ config STRICT_MODULE_RWX
> config ARCH_WANT_RELAX_ORDER
> bool
>
> +config REFCOUNT_FULL
> + bool "Perform full reference count validation at the expense of speed"
> + help
> + Enabling this switches the refcounting infrastructure from a fast
> + unchecked atomic_t implementation to a fully state checked
> + implementation, which can be slower but provides protections
> + against various use-after-free conditions that can be used in
> + security flaw exploits.
> +
> source "kernel/gcov/Kconfig"
> diff --git a/include/linux/refcount.h b/include/linux/refcount.h
> index b34aa649d204..099c32bd07b2 100644
> --- a/include/linux/refcount.h
> +++ b/include/linux/refcount.h
> @@ -41,6 +41,7 @@ static inline unsigned int refcount_read(const refcount_t *r)
> return atomic_read(&r->refs);
> }
>
> +#ifdef CONFIG_REFCOUNT_FULL
> extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r);
> extern void refcount_add(unsigned int i, refcount_t *r);
>
> @@ -52,6 +53,49 @@ extern void refcount_sub(unsigned int i, refcount_t *r);
>
> extern __must_check bool refcount_dec_and_test(refcount_t *r);
> extern void refcount_dec(refcount_t *r);
> +#else
> +static inline __must_check bool refcount_add_not_zero(unsigned int i,
> + refcount_t *r)
Please keep it on a single, slighly over-long line instead of the ugly line break
in the middle of the list of parameters ...
There's other such uglies in the patch as well.
Thanks,
Ingo