2022-08-01 20:52:54

by Hao Luo

[permalink] [raw]
Subject: [PATCH bpf-next v1] bpf, iter: clean up bpf_seq_read().

Refactor bpf_seq_read() by extracting some common logic into helper
functions. I hope this makes bpf_seq_read() more readable. This is
a refactoring patch, so no behavior change is expected.

Signed-off-by: Hao Luo <[email protected]>
---
kernel/bpf/bpf_iter.c | 156 +++++++++++++++++++++++++-----------------
1 file changed, 93 insertions(+), 63 deletions(-)

diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 7e8fd49406f6..39b5b647fdb7 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -77,6 +77,83 @@ static bool bpf_iter_support_resched(struct seq_file *seq)
return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED;
}

+/* do_copy_to_user, copies seq->buf at offset seq->from to userspace and
+ * updates corresponding fields in seq. It returns -EFAULT if any error
+ * happens. The actual number of bytes copied is returned via the argument
+ * 'copied'.
+ */
+static int do_copy_to_user(struct seq_file *seq, char __user *buf, size_t size,
+ size_t *copied)
+{
+ size_t n;
+
+ n = min(seq->count, size);
+ if (copy_to_user(buf, seq->buf + seq->from, n))
+ return -EFAULT;
+
+ seq->count -= n;
+ seq->from += n;
+ *copied = n;
+ return 0;
+}
+
+/* do_seq_show, shows the given object 'p'. If 'p' is skipped or
+ * error happens, resets seq->count to 'offs'.
+ *
+ * Returns err > 0, indicating show() skips this object.
+ * Returns err = 0, indicating show() succeeds.
+ * Returns err < 0, indicating show() fails or overflow happened.
+ */
+static int do_seq_show(struct seq_file *seq, void *p, size_t offs)
+{
+ int err;
+
+ WARN_ON(IS_ERR_OR_NULL(p));
+
+ err = seq->op->show(seq, p);
+ if (err > 0) {
+ /* object is skipped, decrease seq_num, so next
+ * valid object can reuse the same seq_num.
+ */
+ bpf_iter_dec_seq_num(seq);
+ seq->count = offs;
+ return err;
+ }
+
+ if (err < 0 || seq_has_overflowed(seq)) {
+ seq->count = offs;
+ return err ? err : -E2BIG;
+ }
+
+ /* err == 0 and no overflow */
+ return 0;
+}
+
+/* do_seq_stop, stops at the given object 'p'. 'p' could be an ERR or NULL. If
+ * 'p' is an ERR or there was an overflow, reset seq->count to 'offs' and
+ * returns error. Returns 0 otherwise.
+ */
+static int do_seq_stop(struct seq_file *seq, void *p, size_t offs)
+{
+ if (IS_ERR(p)) {
+ seq->op->stop(seq, NULL);
+ seq->count = offs;
+ return PTR_ERR(p);
+ }
+
+ seq->op->stop(seq, p);
+ if (!p) {
+ if (!seq_has_overflowed(seq)) {
+ bpf_iter_done_stop(seq);
+ } else {
+ seq->count = offs;
+ if (offs == 0)
+ return -E2BIG;
+ }
+ }
+ return 0;
+}
+
/* maximum visited objects before bailing out */
#define MAX_ITER_OBJECTS 1000000

@@ -91,7 +168,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
loff_t *ppos)
{
struct seq_file *seq = file->private_data;
- size_t n, offs, copied = 0;
+ size_t offs, copied = 0;
int err = 0, num_objs = 0;
bool can_resched;
void *p;
@@ -108,40 +185,18 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
}

if (seq->count) {
- n = min(seq->count, size);
- err = copy_to_user(buf, seq->buf + seq->from, n);
- if (err) {
- err = -EFAULT;
- goto done;
- }
- seq->count -= n;
- seq->from += n;
- copied = n;
+ err = do_copy_to_user(seq, buf, size, &copied);
goto done;
}

seq->from = 0;
p = seq->op->start(seq, &seq->index);
- if (!p)
+ if (IS_ERR_OR_NULL(p))
goto stop;
- if (IS_ERR(p)) {
- err = PTR_ERR(p);
- seq->op->stop(seq, p);
- seq->count = 0;
- goto done;
- }

- err = seq->op->show(seq, p);
- if (err > 0) {
- /* object is skipped, decrease seq_num, so next
- * valid object can reuse the same seq_num.
- */
- bpf_iter_dec_seq_num(seq);
- seq->count = 0;
- } else if (err < 0 || seq_has_overflowed(seq)) {
- if (!err)
- err = -E2BIG;
- seq->op->stop(seq, p);
+ err = do_seq_show(seq, p, 0);
+ if (err < 0) {
+ do_seq_stop(seq, p, 0);
seq->count = 0;
goto done;
}
@@ -153,7 +208,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
num_objs++;
offs = seq->count;
p = seq->op->next(seq, p, &seq->index);
- if (pos == seq->index) {
+ if (unlikely(pos == seq->index)) {
pr_info_ratelimited("buggy seq_file .next function %ps "
"did not updated position index\n",
seq->op->next);
@@ -161,7 +216,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
}

if (IS_ERR_OR_NULL(p))
- break;
+ goto stop;

/* got a valid next object, increase seq_num */
bpf_iter_inc_seq_num(seq);
@@ -172,22 +227,16 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
if (num_objs >= MAX_ITER_OBJECTS) {
if (offs == 0) {
err = -EAGAIN;
- seq->op->stop(seq, p);
+ do_seq_stop(seq, p, seq->count);
goto done;
}
break;
}

- err = seq->op->show(seq, p);
- if (err > 0) {
- bpf_iter_dec_seq_num(seq);
- seq->count = offs;
- } else if (err < 0 || seq_has_overflowed(seq)) {
- seq->count = offs;
+ err = do_seq_show(seq, p, offs);
+ if (err < 0) {
if (offs == 0) {
- if (!err)
- err = -E2BIG;
- seq->op->stop(seq, p);
+ do_seq_stop(seq, p, seq->count);
goto done;
}
break;
@@ -197,30 +246,11 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
cond_resched();
}
stop:
- offs = seq->count;
- /* bpf program called if !p */
- seq->op->stop(seq, p);
- if (!p) {
- if (!seq_has_overflowed(seq)) {
- bpf_iter_done_stop(seq);
- } else {
- seq->count = offs;
- if (offs == 0) {
- err = -E2BIG;
- goto done;
- }
- }
- }
-
- n = min(seq->count, size);
- err = copy_to_user(buf, seq->buf, n);
- if (err) {
- err = -EFAULT;
+ err = do_seq_stop(seq, p, seq->count);
+ if (err)
goto done;
- }
- copied = n;
- seq->count -= n;
- seq->from = n;
+
+ err = do_copy_to_user(seq, buf, size, &copied);
done:
if (!copied)
copied = err;
--
2.37.1.455.g008518b4e5-goog



2022-08-02 11:22:59

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH bpf-next v1] bpf, iter: clean up bpf_seq_read().

On Mon, Aug 01, 2022 at 01:50:39PM -0700, Hao Luo wrote:

SNIP

> +static int do_seq_show(struct seq_file *seq, void *p, size_t offs)
> +{
> + int err;
> +
> + WARN_ON(IS_ERR_OR_NULL(p));
> +
> + err = seq->op->show(seq, p);
> + if (err > 0) {
> + /* object is skipped, decrease seq_num, so next
> + * valid object can reuse the same seq_num.
> + */
> + bpf_iter_dec_seq_num(seq);
> + seq->count = offs;
> + return err;
> + }
> +
> + if (err < 0 || seq_has_overflowed(seq)) {
> + seq->count = offs;
> + return err ? err : -E2BIG;
> + }
> +
> + /* err == 0 and no overflow */
> + return 0;
> +}
> +
> +/* do_seq_stop, stops at the given object 'p'. 'p' could be an ERR or NULL. If
> + * 'p' is an ERR or there was an overflow, reset seq->count to 'offs' and
> + * returns error. Returns 0 otherwise.
> + */
> +static int do_seq_stop(struct seq_file *seq, void *p, size_t offs)
> +{
> + if (IS_ERR(p)) {
> + seq->op->stop(seq, NULL);
> + seq->count = offs;

should we set seq->count to 0 in case of error?

jirka

> + return PTR_ERR(p);
> + }
> +
> + seq->op->stop(seq, p);
> + if (!p) {
> + if (!seq_has_overflowed(seq)) {
> + bpf_iter_done_stop(seq);
> + } else {
> + seq->count = offs;
> + if (offs == 0)
> + return -E2BIG;
> + }
> + }
> + return 0;
> +}
> +
> /* maximum visited objects before bailing out */
> #define MAX_ITER_OBJECTS 1000000
>

SNIP

2022-08-03 00:27:36

by Hao Luo

[permalink] [raw]
Subject: Re: [PATCH bpf-next v1] bpf, iter: clean up bpf_seq_read().

On Tue, Aug 2, 2022 at 4:14 AM Jiri Olsa <[email protected]> wrote:
>
> On Mon, Aug 01, 2022 at 01:50:39PM -0700, Hao Luo wrote:
>
> SNIP
>
> > +static int do_seq_show(struct seq_file *seq, void *p, size_t offs)
> > +{
> > + int err;
> > +
> > + WARN_ON(IS_ERR_OR_NULL(p));
> > +
> > + err = seq->op->show(seq, p);
> > + if (err > 0) {
> > + /* object is skipped, decrease seq_num, so next
> > + * valid object can reuse the same seq_num.
> > + */
> > + bpf_iter_dec_seq_num(seq);
> > + seq->count = offs;
> > + return err;
> > + }
> > +
> > + if (err < 0 || seq_has_overflowed(seq)) {
> > + seq->count = offs;
> > + return err ? err : -E2BIG;
> > + }
> > +
> > + /* err == 0 and no overflow */
> > + return 0;
> > +}
> > +
> > +/* do_seq_stop, stops at the given object 'p'. 'p' could be an ERR or NULL. If
> > + * 'p' is an ERR or there was an overflow, reset seq->count to 'offs' and
> > + * returns error. Returns 0 otherwise.
> > + */
> > +static int do_seq_stop(struct seq_file *seq, void *p, size_t offs)
> > +{
> > + if (IS_ERR(p)) {
> > + seq->op->stop(seq, NULL);
> > + seq->count = offs;
>
> should we set seq->count to 0 in case of error?
>

Thanks Jiri. To be honest, I don't know. There are two paths that may
lead to an error "p".

First, seq->op->start() could return ERR, in that case, '"offs'" is
zero and we set it to zero already. This is fine.

The other one, seq->op->next() could return ERR. This is a case where
bpf_seq_read() fails to handle right now, so I am not sure what to do.

Based on my understanding reading the code, if seq->count isn't
zeroed, the current read() will not copy data, but the next read()
will copy data (see the "if (seq->count)" at the beginning of
bpf_seq_read). If seq->count is zeroed, the data in buffer will be
discarded. I don't know what is right.

> jirka
>
> > + return PTR_ERR(p);
> > + }
> > +
> > + seq->op->stop(seq, p);
> > + if (!p) {
> > + if (!seq_has_overflowed(seq)) {
> > + bpf_iter_done_stop(seq);
> > + } else {
> > + seq->count = offs;
> > + if (offs == 0)
> > + return -E2BIG;
> > + }
> > + }
> > + return 0;
> > +}
> > +
> > /* maximum visited objects before bailing out */
> > #define MAX_ITER_OBJECTS 1000000
> >
>
> SNIP

2022-08-03 12:03:35

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH bpf-next v1] bpf, iter: clean up bpf_seq_read().

On Tue, Aug 02, 2022 at 05:15:50PM -0700, Hao Luo wrote:
> On Tue, Aug 2, 2022 at 4:14 AM Jiri Olsa <[email protected]> wrote:
> >
> > On Mon, Aug 01, 2022 at 01:50:39PM -0700, Hao Luo wrote:
> >
> > SNIP
> >
> > > +static int do_seq_show(struct seq_file *seq, void *p, size_t offs)
> > > +{
> > > + int err;
> > > +
> > > + WARN_ON(IS_ERR_OR_NULL(p));
> > > +
> > > + err = seq->op->show(seq, p);
> > > + if (err > 0) {
> > > + /* object is skipped, decrease seq_num, so next
> > > + * valid object can reuse the same seq_num.
> > > + */
> > > + bpf_iter_dec_seq_num(seq);
> > > + seq->count = offs;
> > > + return err;
> > > + }
> > > +
> > > + if (err < 0 || seq_has_overflowed(seq)) {
> > > + seq->count = offs;
> > > + return err ? err : -E2BIG;
> > > + }
> > > +
> > > + /* err == 0 and no overflow */
> > > + return 0;
> > > +}
> > > +
> > > +/* do_seq_stop, stops at the given object 'p'. 'p' could be an ERR or NULL. If
> > > + * 'p' is an ERR or there was an overflow, reset seq->count to 'offs' and
> > > + * returns error. Returns 0 otherwise.
> > > + */
> > > +static int do_seq_stop(struct seq_file *seq, void *p, size_t offs)
> > > +{
> > > + if (IS_ERR(p)) {
> > > + seq->op->stop(seq, NULL);
> > > + seq->count = offs;
> >
> > should we set seq->count to 0 in case of error?
> >
>
> Thanks Jiri. To be honest, I don't know. There are two paths that may
> lead to an error "p".
>
> First, seq->op->start() could return ERR, in that case, '"offs'" is
> zero and we set it to zero already. This is fine.

ah right, offs is zero at that time, looks good then

>
> The other one, seq->op->next() could return ERR. This is a case where
> bpf_seq_read() fails to handle right now, so I am not sure what to do.

but maybe we don't need to set seq->count in here, like:

static int do_seq_stop(struct seq_file *seq, void *p, size_t offs)
{
if (IS_ERR(p)) {
seq->op->stop(seq, NULL);
return PTR_ERR(p);
}

because it's already set by error path of do_seq_show

>
> Based on my understanding reading the code, if seq->count isn't
> zeroed, the current read() will not copy data, but the next read()
> will copy data (see the "if (seq->count)" at the beginning of
> bpf_seq_read). If seq->count is zeroed, the data in buffer will be
> discarded. I don't know what is right.

I think we should return the buffer up to the point there's an error,
that's why we set seq->count to previous offs value after failed
show callback

jirka

2022-08-03 12:18:57

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH bpf-next v1] bpf, iter: clean up bpf_seq_read().

On Mon, Aug 01, 2022 at 01:50:39PM -0700, Hao Luo wrote:

SNIP

> - err = seq->op->show(seq, p);
> - if (err > 0) {
> - /* object is skipped, decrease seq_num, so next
> - * valid object can reuse the same seq_num.
> - */
> - bpf_iter_dec_seq_num(seq);
> - seq->count = 0;
> - } else if (err < 0 || seq_has_overflowed(seq)) {
> - if (!err)
> - err = -E2BIG;
> - seq->op->stop(seq, p);
> + err = do_seq_show(seq, p, 0);
> + if (err < 0) {
> + do_seq_stop(seq, p, 0);
> seq->count = 0;
> goto done;
> }
> @@ -153,7 +208,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
> num_objs++;
> offs = seq->count;
> p = seq->op->next(seq, p, &seq->index);
> - if (pos == seq->index) {
> + if (unlikely(pos == seq->index)) {
> pr_info_ratelimited("buggy seq_file .next function %ps "
> "did not updated position index\n",
> seq->op->next);
> @@ -161,7 +216,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
> }
>
> if (IS_ERR_OR_NULL(p))
> - break;
> + goto stop;

we could still keep the break here

>
> /* got a valid next object, increase seq_num */
> bpf_iter_inc_seq_num(seq);
> @@ -172,22 +227,16 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
> if (num_objs >= MAX_ITER_OBJECTS) {
> if (offs == 0) {
> err = -EAGAIN;
> - seq->op->stop(seq, p);
> + do_seq_stop(seq, p, seq->count);
> goto done;
> }
> break;
> }
>
> - err = seq->op->show(seq, p);
> - if (err > 0) {
> - bpf_iter_dec_seq_num(seq);
> - seq->count = offs;
> - } else if (err < 0 || seq_has_overflowed(seq)) {
> - seq->count = offs;
> + err = do_seq_show(seq, p, offs);
> + if (err < 0) {
> if (offs == 0) {
> - if (!err)
> - err = -E2BIG;
> - seq->op->stop(seq, p);
> + do_seq_stop(seq, p, seq->count);
> goto done;
> }
> break;
> @@ -197,30 +246,11 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
> cond_resched();
> }
> stop:
> - offs = seq->count;
> - /* bpf program called if !p */
> - seq->op->stop(seq, p);
> - if (!p) {
> - if (!seq_has_overflowed(seq)) {
> - bpf_iter_done_stop(seq);
> - } else {
> - seq->count = offs;
> - if (offs == 0) {
> - err = -E2BIG;
> - goto done;
> - }
> - }
> - }
> -
> - n = min(seq->count, size);
> - err = copy_to_user(buf, seq->buf, n);
> - if (err) {
> - err = -EFAULT;
> + err = do_seq_stop(seq, p, seq->count);
> + if (err)
> goto done;

looks like we tried to copy the data before when stop failed,
now it's skipped

jirka

> - }
> - copied = n;
> - seq->count -= n;
> - seq->from = n;
> +
> + err = do_copy_to_user(seq, buf, size, &copied);
> done:
> if (!copied)
> copied = err;
> --
> 2.37.1.455.g008518b4e5-goog
>