2023-05-10 01:26:55

by Azeem Shaikh

[permalink] [raw]
Subject: [PATCH v2] kernfs: Prefer strscpy over strlcpy calls

strlcpy() reads the entire source buffer first.
This read may exceed the destination size limit.
This is both inefficient and can lead to linear read
overflows if a source string is not NUL-terminated [1].
Since strscpy() returns -E2BIG on truncate, we rely on
strlen(src) to imitate strlcpy behavior.

This is part of a tree-wide cleanup to remove the strlcpy() function
entirely from the kernel [2].

[1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy
[2] https://github.com/KSPP/linux/issues/89

Signed-off-by: Azeem Shaikh <[email protected]>
---
fs/kernfs/dir.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 45b6919903e6..0f46d7b304b0 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -51,12 +51,19 @@ static bool kernfs_lockdep(struct kernfs_node *kn)
#endif
}

+/* strscpy_mock_strlcpy - imitates strlcpy API but uses strscpy underneath. */
+static size_t strscpy_mock_strlcpy(char *dest, const char *src, size_t count)
+{
+ strscpy(dest, src, count);
+ return strlen(src);
+}
+
static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
{
if (!kn)
- return strlcpy(buf, "(null)", buflen);
+ return strscpy_mock_strlcpy(buf, "(null)", buflen);

- return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
+ return strscpy_mock_strlcpy(buf, kn->parent ? kn->name : "/", buflen);
}

/* kernfs_node_depth - compute depth from @from to @to */
@@ -141,13 +148,13 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
int i, j;

if (!kn_to)
- return strlcpy(buf, "(null)", buflen);
+ return strscpy_mock_strlcpy(buf, "(null)", buflen);

if (!kn_from)
kn_from = kernfs_root(kn_to)->kn;

if (kn_from == kn_to)
- return strlcpy(buf, "/", buflen);
+ return strscpy_mock_strlcpy(buf, "/", buflen);

common = kernfs_common_ancestor(kn_from, kn_to);
if (WARN_ON(!common))
@@ -159,16 +166,16 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
buf[0] = '\0';

for (i = 0; i < depth_from; i++)
- len += strlcpy(buf + len, parent_str,
+ len += strscpy_mock_strlcpy(buf + len, parent_str,
len < buflen ? buflen - len : 0);

/* Calculate how many bytes we need for the rest */
for (i = depth_to - 1; i >= 0; i--) {
for (kn = kn_to, j = 0; j < i; j++)
kn = kn->parent;
- len += strlcpy(buf + len, "/",
+ len += strscpy_mock_strlcpy(buf + len, "/",
len < buflen ? buflen - len : 0);
- len += strlcpy(buf + len, kn->name,
+ len += strscpy_mock_strlcpy(buf + len, kn->name,
len < buflen ? buflen - len : 0);
}

@@ -851,7 +858,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,

spin_lock_irq(&kernfs_pr_cont_lock);

- len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
+ len = strscpy_mock_strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));

if (len >= sizeof(kernfs_pr_cont_buf)) {
spin_unlock_irq(&kernfs_pr_cont_lock);
--
2.40.1.521.gf1e218fcd8-goog



2023-05-10 01:53:57

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH v2] kernfs: Prefer strscpy over strlcpy calls

On Wed, May 10, 2023 at 01:11:22AM +0000, Azeem Shaikh wrote:
...
> +/* strscpy_mock_strlcpy - imitates strlcpy API but uses strscpy underneath. */
> +static size_t strscpy_mock_strlcpy(char *dest, const char *src, size_t count)
> +{
> + strscpy(dest, src, count);
> + return strlen(src);
> +}

I'm not sure this is a meaningful conversion. One benefit of strscpy() is
that it's less error-prone to check for overflows. What's the point of
removing strlcpy() if we end up sprinkling lesser duplicates in the tree?

> static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
> {
> if (!kn)
> - return strlcpy(buf, "(null)", buflen);
> + return strscpy_mock_strlcpy(buf, "(null)", buflen);
>
> - return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
> + return strscpy_mock_strlcpy(buf, kn->parent ? kn->name : "/", buflen);
> }

Can you follow the users and convert the users accordingly rather than
masking it from here? ie. make kernfs_name() and friends return -E2BIG when
source is too long like strscpy(). I don't think anybody cares, actually.

> /* kernfs_node_depth - compute depth from @from to @to */
> @@ -141,13 +148,13 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
> int i, j;
>
> if (!kn_to)
> - return strlcpy(buf, "(null)", buflen);
> + return strscpy_mock_strlcpy(buf, "(null)", buflen);
>
> if (!kn_from)
> kn_from = kernfs_root(kn_to)->kn;
>
> if (kn_from == kn_to)
> - return strlcpy(buf, "/", buflen);
> + return strscpy_mock_strlcpy(buf, "/", buflen);
>
> common = kernfs_common_ancestor(kn_from, kn_to);
> if (WARN_ON(!common))
> @@ -159,16 +166,16 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
> buf[0] = '\0';
>
> for (i = 0; i < depth_from; i++)
> - len += strlcpy(buf + len, parent_str,
> + len += strscpy_mock_strlcpy(buf + len, parent_str,
> len < buflen ? buflen - len : 0);
>
> /* Calculate how many bytes we need for the rest */
> for (i = depth_to - 1; i >= 0; i--) {
> for (kn = kn_to, j = 0; j < i; j++)
> kn = kn->parent;
> - len += strlcpy(buf + len, "/",
> + len += strscpy_mock_strlcpy(buf + len, "/",
> len < buflen ? buflen - len : 0);
> - len += strlcpy(buf + len, kn->name,
> + len += strscpy_mock_strlcpy(buf + len, kn->name,
> len < buflen ? buflen - len : 0);
> }

Ditto, please convert all the users accordingly. If that's not feasible, I
think it'd be better to leave it as-is. I don't see how the new code is
better.

> @@ -851,7 +858,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
>
> spin_lock_irq(&kernfs_pr_cont_lock);
>
> - len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
> + len = strscpy_mock_strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
>
> if (len >= sizeof(kernfs_pr_cont_buf)) {
> spin_unlock_irq(&kernfs_pr_cont_lock);

This is an easy conversion to strscpy().

Thanks.

--
tejun

2023-05-10 16:24:51

by Azeem Shaikh

[permalink] [raw]
Subject: Re: [PATCH v2] kernfs: Prefer strscpy over strlcpy calls

On Tue, May 9, 2023 at 9:24 PM Tejun Heo <[email protected]> wrote:
>
> On Wed, May 10, 2023 at 01:11:22AM +0000, Azeem Shaikh wrote:
> ...
> > +/* strscpy_mock_strlcpy - imitates strlcpy API but uses strscpy underneath. */
> > +static size_t strscpy_mock_strlcpy(char *dest, const char *src, size_t count)
> > +{
> > + strscpy(dest, src, count);
> > + return strlen(src);
> > +}
>
> I'm not sure this is a meaningful conversion. One benefit of strscpy() is
> that it's less error-prone to check for overflows. What's the point of
> removing strlcpy() if we end up sprinkling lesser duplicates in the tree?
>

Thanks for your patience and helpful feedback on this Tejun, very much
appreciated.
I've responded to your comments below and added my notes on the
transitive usage of the functions.
Happy to send a smaller patch which directly replaces strlcpy->strscpy.

> > static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
> > {
> > if (!kn)
> > - return strlcpy(buf, "(null)", buflen);
> > + return strscpy_mock_strlcpy(buf, "(null)", buflen);
> >
> > - return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
> > + return strscpy_mock_strlcpy(buf, kn->parent ? kn->name : "/", buflen);
> > }
>
> Can you follow the users and convert the users accordingly rather than
> masking it from here? ie. make kernfs_name() and friends return -E2BIG when
> source is too long like strscpy(). I don't think anybody cares, actually.
>

I found 4 transitive callers of kernfs_name across the kernel, all of
whom eventually ignored the return value:

1. fs/kernfs/dir.c: calls kernfs_name. Ignores return value.
2. include/linux/cgroup.h: calls kernfs_name from cgroup_name. returns
the value of kernfs_name.
3. kernel/cgroup/debug.c: calls cgroup_name. Ignores return value.
4.mm/page_owner.c: calls cgroup_name. Ignores return value.

So replacing directly with strscpy here should be safe. Let me know
what you think.

> > /* kernfs_node_depth - compute depth from @from to @to */
> > @@ -141,13 +148,13 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
> > int i, j;
> >
> > if (!kn_to)
> > - return strlcpy(buf, "(null)", buflen);
> > + return strscpy_mock_strlcpy(buf, "(null)", buflen);
> >
> > if (!kn_from)
> > kn_from = kernfs_root(kn_to)->kn;
> >
> > if (kn_from == kn_to)
> > - return strlcpy(buf, "/", buflen);
> > + return strscpy_mock_strlcpy(buf, "/", buflen);
> >
> > common = kernfs_common_ancestor(kn_from, kn_to);
> > if (WARN_ON(!common))
> > @@ -159,16 +166,16 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
> > buf[0] = '\0';
> >
> > for (i = 0; i < depth_from; i++)
> > - len += strlcpy(buf + len, parent_str,
> > + len += strscpy_mock_strlcpy(buf + len, parent_str,
> > len < buflen ? buflen - len : 0);
> >
> > /* Calculate how many bytes we need for the rest */
> > for (i = depth_to - 1; i >= 0; i--) {
> > for (kn = kn_to, j = 0; j < i; j++)
> > kn = kn->parent;
> > - len += strlcpy(buf + len, "/",
> > + len += strscpy_mock_strlcpy(buf + len, "/",
> > len < buflen ? buflen - len : 0);
> > - len += strlcpy(buf + len, kn->name,
> > + len += strscpy_mock_strlcpy(buf + len, kn->name,
> > len < buflen ? buflen - len : 0);
> > }
>
> Ditto, please convert all the users accordingly. If that's not feasible, I
> think it'd be better to leave it as-is. I don't see how the new code is
> better.
>

kernfs_path_from_node has quite a few transitive callers. I'll leave
this as-is for now and consider tackling this separately.

> > @@ -851,7 +858,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
> >
> > spin_lock_irq(&kernfs_pr_cont_lock);
> >
> > - len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
> > + len = strscpy_mock_strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
> >
> > if (len >= sizeof(kernfs_pr_cont_buf)) {
> > spin_unlock_irq(&kernfs_pr_cont_lock);
>
> This is an easy conversion to strscpy().

Ack.

>
> Thanks.


>
> --
> tejun

2023-05-10 19:09:40

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH v2] kernfs: Prefer strscpy over strlcpy calls

Hello,

On Wed, May 10, 2023 at 12:03:41PM -0400, Azeem Shaikh wrote:
> > > static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
> > > {
> > > if (!kn)
> > > - return strlcpy(buf, "(null)", buflen);
> > > + return strscpy_mock_strlcpy(buf, "(null)", buflen);
> > >
> > > - return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
> > > + return strscpy_mock_strlcpy(buf, kn->parent ? kn->name : "/", buflen);
> > > }
> >
> > Can you follow the users and convert the users accordingly rather than
> > masking it from here? ie. make kernfs_name() and friends return -E2BIG when
> > source is too long like strscpy(). I don't think anybody cares, actually.
> >
>
> I found 4 transitive callers of kernfs_name across the kernel, all of
> whom eventually ignored the return value:
>
> 1. fs/kernfs/dir.c: calls kernfs_name. Ignores return value.
> 2. include/linux/cgroup.h: calls kernfs_name from cgroup_name. returns
> the value of kernfs_name.
> 3. kernel/cgroup/debug.c: calls cgroup_name. Ignores return value.
> 4.mm/page_owner.c: calls cgroup_name. Ignores return value.
>
> So replacing directly with strscpy here should be safe. Let me know
> what you think.

That sounds great to me. I have a hard time imagining needing the length
return for single component name.

> > > /* kernfs_node_depth - compute depth from @from to @to */
> > > @@ -141,13 +148,13 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
...
> > Ditto, please convert all the users accordingly. If that's not feasible, I
> > think it'd be better to leave it as-is. I don't see how the new code is
> > better.
>
> kernfs_path_from_node has quite a few transitive callers. I'll leave
> this as-is for now and consider tackling this separately.

Yeah, I could be misremembering but istr some place which actually uses the
length return to extend buffer allocation, so this might be challenging.

Thanks.

--
tejun