Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20;
MIME-Version: 1.0
References: <20221018090550.never.834-kees@kernel.org> <Y07raim32wOBRGPi@google.com>
 <202210181110.CD92A00@keescook>
In-Reply-To: <202210181110.CD92A00@keescook>
From:   Stanislav Fomichev <sdf@google.com>
Date:   Tue, 18 Oct 2022 13:07:45 -0700
Message-ID: <CAKH8qBvwKfhMYjHV=rizA0ZinArHKmBP6U_N63HTcZTmM=QQ+g@mail.gmail.com>
Subject: Re: [PATCH] bpf: Use kmalloc_size_roundup() to match ksize() usage
To:     Kees Cook <keescook@chromium.org>
Cc:     Alexei Starovoitov <ast@kernel.org>,
        Daniel Borkmann <daniel@iogearbox.net>,
        John Fastabend <john.fastabend@gmail.com>,
        Andrii Nakryiko <andrii@kernel.org>,
        Martin KaFai Lau <martin.lau@linux.dev>,
        Song Liu <song@kernel.org>, Yonghong Song <yhs@fb.com>,
        KP Singh <kpsingh@kernel.org>, Hao Luo <haoluo@google.com>,
        Jiri Olsa <jolsa@kernel.org>, bpf@vger.kernel.org,
        linux-kernel@vger.kernel.org, linux-hardening@vger.kernel.org
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk

On Tue, Oct 18, 2022 at 11:19 AM Kees Cook <keescook@chromium.org> wrote:
>
> On Tue, Oct 18, 2022 at 11:07:38AM -0700, sdf@google.com wrote:
> > On 10/18, Kees Cook wrote:
> > > Round up allocations with kmalloc_size_roundup() so that the verifier's
> > > use of ksize() is always accurate and no special handling of the memory
> > > is needed by KASAN, UBSAN_BOUNDS, nor FORTIFY_SOURCE. Pass the new size
> > > information back up to callers so they can use the space immediately,
> > > so array resizing to happen less frequently as well. Explicitly zero
> > > any trailing bytes in new allocations.
> >
> > > Additionally fix a memory allocation leak: if krealloc() fails, "arr"
> > > wasn't freed, but NULL was return to the caller of realloc_array() would
> > > be writing NULL to the lvalue, losing the reference to the original
> > > memory.
> >
> > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > > Cc: John Fastabend <john.fastabend@gmail.com>
> > > Cc: Andrii Nakryiko <andrii@kernel.org>
> > > Cc: Martin KaFai Lau <martin.lau@linux.dev>
> > > Cc: Song Liu <song@kernel.org>
> > > Cc: Yonghong Song <yhs@fb.com>
> > > Cc: KP Singh <kpsingh@kernel.org>
> > > Cc: Stanislav Fomichev <sdf@google.com>
> > > Cc: Hao Luo <haoluo@google.com>
> > > Cc: Jiri Olsa <jolsa@kernel.org>
> > > Cc: bpf@vger.kernel.org
> > > Signed-off-by: Kees Cook <keescook@chromium.org>
> > > ---
> > >   kernel/bpf/verifier.c | 49 +++++++++++++++++++++++++++----------------
> > >   1 file changed, 31 insertions(+), 18 deletions(-)
> >
> > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > index 014ee0953dbd..8a0b60207d0e 100644
> > > --- a/kernel/bpf/verifier.c
> > > +++ b/kernel/bpf/verifier.c
> > > @@ -1000,42 +1000,53 @@ static void print_insn_state(struct
> > > bpf_verifier_env *env,
> > >    */
> > >   static void *copy_array(void *dst, const void *src, size_t n, size_t
> > > size, gfp_t flags)
> > >   {
> > > -   size_t bytes;
> > > +   size_t src_bytes, dst_bytes;
> >
> > >     if (ZERO_OR_NULL_PTR(src))
> > >             goto out;
> >
> > > -   if (unlikely(check_mul_overflow(n, size, &bytes)))
> > > +   if (unlikely(check_mul_overflow(n, size, &src_bytes)))
> > >             return NULL;
> >
> > > -   if (ksize(dst) < bytes) {
> > > +   dst_bytes = kmalloc_size_roundup(src_bytes);
> > > +   if (ksize(dst) < dst_bytes) {
> >
> > Why not simply do the following here?
> >
> >       if (ksize(dst) < ksize(src)) {
> >
> > ?
>
> Yeah, if src always passes through rounding-up allocation path, that
> might work. I need to double-check that there isn't a case where "size"
> makes this go weird -- e.g. a rounded up "src" may be larger than
> "n * size", but I think that's okay because the memcpy/memset does the
> right thing.
>
> > It seems like we care about src_bytes/bytes only in this case, so maybe
> > move that check_mul_overflow under this branch as well?
> >
> >
> > >             kfree(dst);
> > > -           dst = kmalloc_track_caller(bytes, flags);
> > > +           dst = kmalloc_track_caller(dst_bytes, flags);
> > >             if (!dst)
> > >                     return NULL;
> > >     }
> >
> > > -   memcpy(dst, src, bytes);
> > > +   memcpy(dst, src, src_bytes);
> > > +   memset(dst + src_bytes, 0, dst_bytes - src_bytes);
> > >   out:
> > >     return dst ? dst : ZERO_SIZE_PTR;
> > >   }
> >
> > > -/* resize an array from old_n items to new_n items. the array is
> > > reallocated if it's too
> > > - * small to hold new_n items. new items are zeroed out if the array
> > > grows.
> > > +/* Resize an array from old_n items to *new_n items. The array is
> > > reallocated if it's too
> > > + * small to hold *new_n items. New items are zeroed out if the array
> > > grows. Allocation
> > > + * is rounded up to next kmalloc bucket size to reduce frequency of
> > > resizing. *new_n
> > > + * contains the new total number of items that will fit.
> > >    *
> > > - * Contrary to krealloc_array, does not free arr if new_n is zero.
> > > + * Contrary to krealloc, does not free arr if new_n is zero.
> > >    */
> > > -static void *realloc_array(void *arr, size_t old_n, size_t new_n,
> > > size_t size)
> > > +static void *realloc_array(void *arr, size_t old_n, size_t *new_n,
> > > size_t size)
> > >   {
> > > -   if (!new_n || old_n == new_n)
> > > +   void *old_arr = arr;
> > > +   size_t alloc_size;
> > > +
> > > +   if (!new_n || !*new_n || old_n == *new_n)
> > >             goto out;
> >
> >
> > [..]
> >
> > > -   arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
> > > -   if (!arr)
> > > +   alloc_size = kmalloc_size_roundup(size_mul(*new_n, size));
> > > +   arr = krealloc(old_arr, alloc_size, GFP_KERNEL);
> > > +   if (!arr) {
> > > +           kfree(old_arr);
> > >             return NULL;
> > > +   }
> >
> > Any reason not do hide this complexity behind krealloc_array? Why can't
> > it take care of those roundup details?
>
> It might be possible to do this with a macro, yes, but then callers
> aren't in a position to take advantage of the new size. Maybe we need
> something like:
>
>         arr = krealloc_up(old_arr, alloc_size, &new_size, GFP_KERNEL);

Maybe even krealloc_array_up(arr, &new_n, size, flags) or similar
where we return a new size?
Though I don't know if there are any other places in the kernel to
reuse it and warrant a new function..

> Thanks for looking this over!
>
> --
> Kees Cook

On Tue, Oct 18, 2022 at 11:19 AM Kees Cook <keescook@chromium.org> wrote:
>
> On Tue, Oct 18, 2022 at 11:07:38AM -0700, sdf@google.com wrote:
> > On 10/18, Kees Cook wrote:
> > > Round up allocations with kmalloc_size_roundup() so that the verifier's
> > > use of ksize() is always accurate and no special handling of the memory
> > > is needed by KASAN, UBSAN_BOUNDS, nor FORTIFY_SOURCE. Pass the new size
> > > information back up to callers so they can use the space immediately,
> > > so array resizing to happen less frequently as well. Explicitly zero
> > > any trailing bytes in new allocations.
> >
> > > Additionally fix a memory allocation leak: if krealloc() fails, "arr"
> > > wasn't freed, but NULL was return to the caller of realloc_array() would
> > > be writing NULL to the lvalue, losing the reference to the original
> > > memory.
> >
> > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > > Cc: John Fastabend <john.fastabend@gmail.com>
> > > Cc: Andrii Nakryiko <andrii@kernel.org>
> > > Cc: Martin KaFai Lau <martin.lau@linux.dev>
> > > Cc: Song Liu <song@kernel.org>
> > > Cc: Yonghong Song <yhs@fb.com>
> > > Cc: KP Singh <kpsingh@kernel.org>
> > > Cc: Stanislav Fomichev <sdf@google.com>
> > > Cc: Hao Luo <haoluo@google.com>
> > > Cc: Jiri Olsa <jolsa@kernel.org>
> > > Cc: bpf@vger.kernel.org
> > > Signed-off-by: Kees Cook <keescook@chromium.org>
> > > ---
> > >   kernel/bpf/verifier.c | 49 +++++++++++++++++++++++++++----------------
> > >   1 file changed, 31 insertions(+), 18 deletions(-)
> >
> > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > index 014ee0953dbd..8a0b60207d0e 100644
> > > --- a/kernel/bpf/verifier.c
> > > +++ b/kernel/bpf/verifier.c
> > > @@ -1000,42 +1000,53 @@ static void print_insn_state(struct
> > > bpf_verifier_env *env,
> > >    */
> > >   static void *copy_array(void *dst, const void *src, size_t n, size_t
> > > size, gfp_t flags)
> > >   {
> > > -   size_t bytes;
> > > +   size_t src_bytes, dst_bytes;
> >
> > >     if (ZERO_OR_NULL_PTR(src))
> > >             goto out;
> >
> > > -   if (unlikely(check_mul_overflow(n, size, &bytes)))
> > > +   if (unlikely(check_mul_overflow(n, size, &src_bytes)))
> > >             return NULL;
> >
> > > -   if (ksize(dst) < bytes) {
> > > +   dst_bytes = kmalloc_size_roundup(src_bytes);
> > > +   if (ksize(dst) < dst_bytes) {
> >
> > Why not simply do the following here?
> >
> >       if (ksize(dst) < ksize(src)) {
> >
> > ?
>
> Yeah, if src always passes through rounding-up allocation path, that
> might work. I need to double-check that there isn't a case where "size"
> makes this go weird -- e.g. a rounded up "src" may be larger than
> "n * size", but I think that's okay because the memcpy/memset does the
> right thing.
>
> > It seems like we care about src_bytes/bytes only in this case, so maybe
> > move that check_mul_overflow under this branch as well?
> >
> >
> > >             kfree(dst);
> > > -           dst = kmalloc_track_caller(bytes, flags);
> > > +           dst = kmalloc_track_caller(dst_bytes, flags);
> > >             if (!dst)
> > >                     return NULL;
> > >     }
> >
> > > -   memcpy(dst, src, bytes);
> > > +   memcpy(dst, src, src_bytes);
> > > +   memset(dst + src_bytes, 0, dst_bytes - src_bytes);
> > >   out:
> > >     return dst ? dst : ZERO_SIZE_PTR;
> > >   }
> >
> > > -/* resize an array from old_n items to new_n items. the array is
> > > reallocated if it's too
> > > - * small to hold new_n items. new items are zeroed out if the array
> > > grows.
> > > +/* Resize an array from old_n items to *new_n items. The array is
> > > reallocated if it's too
> > > + * small to hold *new_n items. New items are zeroed out if the array
> > > grows. Allocation
> > > + * is rounded up to next kmalloc bucket size to reduce frequency of
> > > resizing. *new_n
> > > + * contains the new total number of items that will fit.
> > >    *
> > > - * Contrary to krealloc_array, does not free arr if new_n is zero.
> > > + * Contrary to krealloc, does not free arr if new_n is zero.
> > >    */
> > > -static void *realloc_array(void *arr, size_t old_n, size_t new_n,
> > > size_t size)
> > > +static void *realloc_array(void *arr, size_t old_n, size_t *new_n,
> > > size_t size)
> > >   {
> > > -   if (!new_n || old_n == new_n)
> > > +   void *old_arr = arr;
> > > +   size_t alloc_size;
> > > +
> > > +   if (!new_n || !*new_n || old_n == *new_n)
> > >             goto out;
> >
> >
> > [..]
> >
> > > -   arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
> > > -   if (!arr)
> > > +   alloc_size = kmalloc_size_roundup(size_mul(*new_n, size));
> > > +   arr = krealloc(old_arr, alloc_size, GFP_KERNEL);
> > > +   if (!arr) {
> > > +           kfree(old_arr);
> > >             return NULL;
> > > +   }
> >
> > Any reason not do hide this complexity behind krealloc_array? Why can't
> > it take care of those roundup details?
>
> It might be possible to do this with a macro, yes, but then callers
> aren't in a position to take advantage of the new size. Maybe we need
> something like:
>
>         arr = krealloc_up(old_arr, alloc_size, &new_size, GFP_KERNEL);
>
> Thanks for looking this over!
>
> --
> Kees Cook