2021-05-08 12:30:45

by Justin He

[permalink] [raw]
Subject: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

This helper is similar to d_path except for not taking seqlock/spinlock.

Signed-off-by: Jia He <[email protected]>
---
fs/d_path.c | 50 ++++++++++++++++++++++++++++++++----------
include/linux/dcache.h | 1 +
2 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index a69e2cd36e6e..985a5754a9f5 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -61,7 +61,7 @@ static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
* @root: root vfsmnt/dentry
* @buffer: pointer to the end of the buffer
* @buflen: pointer to buffer length
- *
+ * @need_lock: not ignoring renames and changes to the mount tree
* The function will first try to write out the pathname without taking any
* lock other than the RCU read lock to make sure that dentries won't go away.
* It only checks the sequence number of the global rename_lock as any change
@@ -74,7 +74,7 @@ static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
*/
static int prepend_path(const struct path *path,
const struct path *root,
- char **buffer, int *buflen)
+ char **buffer, int *buflen, bool need_lock)
{
struct dentry *dentry;
struct vfsmount *vfsmnt;
@@ -86,7 +86,8 @@ static int prepend_path(const struct path *path,

rcu_read_lock();
restart_mnt:
- read_seqbegin_or_lock(&mount_lock, &m_seq);
+ if (need_lock)
+ read_seqbegin_or_lock(&mount_lock, &m_seq);
seq = 0;
rcu_read_lock();
restart:
@@ -96,7 +97,8 @@ static int prepend_path(const struct path *path,
dentry = path->dentry;
vfsmnt = path->mnt;
mnt = real_mount(vfsmnt);
- read_seqbegin_or_lock(&rename_lock, &seq);
+ if (need_lock)
+ read_seqbegin_or_lock(&rename_lock, &seq);
while (dentry != root->dentry || vfsmnt != root->mnt) {
struct dentry * parent;

@@ -136,19 +138,21 @@ static int prepend_path(const struct path *path,
}
if (!(seq & 1))
rcu_read_unlock();
- if (need_seqretry(&rename_lock, seq)) {
+ if (need_lock && need_seqretry(&rename_lock, seq)) {
seq = 1;
goto restart;
}
- done_seqretry(&rename_lock, seq);
+ if (need_lock)
+ done_seqretry(&rename_lock, seq);

if (!(m_seq & 1))
rcu_read_unlock();
- if (need_seqretry(&mount_lock, m_seq)) {
+ if (need_lock && need_seqretry(&mount_lock, m_seq)) {
m_seq = 1;
goto restart_mnt;
}
- done_seqretry(&mount_lock, m_seq);
+ if (need_lock)
+ done_seqretry(&mount_lock, m_seq);

if (error >= 0 && bptr == *buffer) {
if (--blen < 0)
@@ -185,7 +189,7 @@ char *__d_path(const struct path *path,
int error;

prepend(&res, &buflen, "\0", 1);
- error = prepend_path(path, root, &res, &buflen);
+ error = prepend_path(path, root, &res, &buflen, true);

if (error < 0)
return ERR_PTR(error);
@@ -202,7 +206,7 @@ char *d_absolute_path(const struct path *path,
int error;

prepend(&res, &buflen, "\0", 1);
- error = prepend_path(path, &root, &res, &buflen);
+ error = prepend_path(path, &root, &res, &buflen, true);

if (error > 1)
error = -EINVAL;
@@ -225,7 +229,7 @@ static int path_with_deleted(const struct path *path,
return error;
}

- return prepend_path(path, root, buf, buflen);
+ return prepend_path(path, root, buf, buflen, true);
}

static int prepend_unreachable(char **buffer, int *buflen)
@@ -291,6 +295,28 @@ char *d_path(const struct path *path, char *buf, int buflen)
}
EXPORT_SYMBOL(d_path);

+/**
+ * d_path_fast - fast return the path of a dentry
+ * This helper is similar to d_path other than taking seqlock/spinlock.
+ */
+char *d_path_fast(const struct path *path, char *buf, int buflen)
+{
+ char *res = buf + buflen;
+ struct path root;
+ int error;
+
+ rcu_read_lock();
+ get_fs_root_rcu(current->fs, &root);
+ prepend(&res, &buflen, "\0", 1);
+ error = prepend_path(path, &root, &res, &buflen, false);
+ rcu_read_unlock();
+
+ if (error < 0)
+ res = ERR_PTR(error);
+ return res;
+}
+EXPORT_SYMBOL(d_path_fast);
+
/*
* Helper function for dentry_operations.d_dname() members
*/
@@ -445,7 +471,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
int buflen = PATH_MAX;

prepend(&cwd, &buflen, "\0", 1);
- error = prepend_path(&pwd, &root, &cwd, &buflen);
+ error = prepend_path(&pwd, &root, &cwd, &buflen, true);
rcu_read_unlock();

if (error < 0)
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c1e48014106f..9a00fb90824f 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -300,6 +300,7 @@ char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
extern char *__d_path(const struct path *, const struct path *, char *, int);
extern char *d_absolute_path(const struct path *, char *, int);
extern char *d_path(const struct path *, char *, int);
+extern char *d_path_fast(const struct path *, char *, int);
extern char *dentry_path_raw(struct dentry *, char *, int);
extern char *dentry_path(struct dentry *, char *, int);

--
2.17.1


2021-05-08 15:31:45

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 8, 2021 at 5:29 AM Jia He <[email protected]> wrote:
>
> This helper is similar to d_path except for not taking seqlock/spinlock.

I see why you did it that way, but conditional locking is something we
really really try to avoid in the kernel.

It basically makes a lot of static tools unable to follow the locking
rules, and it makes it hard for people to se what's going on too.

So instead of passing a "bool need_lock" thing down, the way to do
these things is generally to extract the code inside the locked region
into a helper function of its own, and then you have

__unlocked_version(...)
{
.. do the actual work
}

locked_version(..)
{
take_lock(..)
retval = __unlocked_version(..);
release_lock(..);
return retval;
}

this prepend_path() case is a bit more complicated because there's two
layers of locking, but I think the pattern should still work fine.

In fact, I think it would clean up prepend_path() and make it more
legible to have the two layers of mount_lock / rename_lock be done in
callers with the restarting being done as a loop in the caller rather
than as "goto restart_*".

Linus

2021-05-08 19:16:00

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 08, 2021 at 08:30:43AM -0700, Linus Torvalds wrote:

> In fact, I think it would clean up prepend_path() and make it more
> legible to have the two layers of mount_lock / rename_lock be done in
> callers with the restarting being done as a loop in the caller rather
> than as "goto restart_*".

Potentiallty delicate question is how to pass bptr/blen to the caller
of that helper...

2021-05-08 20:42:43

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 8, 2021 at 12:13 PM Al Viro <[email protected]> wrote:
>
> Potentiallty delicate question is how to pass bptr/blen to the caller
> of that helper...

So I was thinking something like this patch..

THIS IS TOTALLY UNTESTED!

I've basically introduced a "struct prepend_buffer" that acts as that
"end,len" pair and that gets passed around. It builds cleanly for me,
which actually makes me fairly happy that it might even be close
right, because the calling conventions would catch any mistake.

It looks superficially sane to me, but again - untested.

The patch ended up being bigger than I expected, but it's all fairly
straightforward - just munging the calling conventions.

Oh, and I did extract out the core of "prepend_path()" into
"prepend_entries()" just to show how it would work.

Linus


Attachments:
patch.diff (10.80 kB)

2021-05-08 21:10:20

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 08, 2021 at 01:39:45PM -0700, Linus Torvalds wrote:

> +static inline int prepend_entries(struct prepend_buffer *b, const struct path *path, const struct path *root, struct mount *mnt)

If anything, s/path/dentry/, since vfsmnt here will be equal to &mnt->mnt all along.

> +{
> + struct dentry *dentry = path->dentry;
> + struct vfsmount *vfsmnt = path->mnt;
> +
> + while (dentry != root->dentry || vfsmnt != root->mnt) {
> + int error;
> + struct dentry * parent;
> +
> + if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
> + struct mount *parent = READ_ONCE(mnt->mnt_parent);
> + struct mnt_namespace *mnt_ns;
> +
> + /* Escaped? */
> + if (dentry != vfsmnt->mnt_root)
> + return 3;
> +
> + /* Global root? */
> + if (mnt != parent) {
> + dentry = READ_ONCE(mnt->mnt_mountpoint);
> + mnt = parent;
> + vfsmnt = &mnt->mnt;
> + continue;
> + }
> + mnt_ns = READ_ONCE(mnt->mnt_ns);
> + /* open-coded is_mounted() to use local mnt_ns */
> + if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
> + return 1; // absolute root
> +
> + return 2; // detached or not attached yet
> + break;

?

> + }
> + parent = dentry->d_parent;
> + prefetch(parent);
> + error = prepend_name(b, &dentry->d_name);
> + if (error)
> + break;

return error, surely?

> + dentry = parent;
> + }
> + return 0;
> +}

FWIW, if we go that way, I would make that

while (dentry != root->dentry || &mnt->mnt != root->mnt) {
int error;
struct dentry *parent = READ_ONCE(dentry->d_parent);

if (unlikely(dentry == mnt->mnt.mnt_root)) {
struct mount *m = READ_ONCE(mnt->mnt_parent);

/* Global root? */
if (unlikely(mnt == m) {
struct mnt_namespace *mnt_ns;

mnt_ns = READ_ONCE(mnt->mnt_ns);
/* open-coded is_mounted() to use local mnt_ns */
if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
return 1; // absolute root
else
return 2; // detached or not attached yet
}
// cross into whatever it's mounted on
dentry = READ_ONCE(mnt->mnt_mountpoint);
mnt = m;
continue;
}

/* Escaped? */
if (unlikely(dentry == parent))
return 3;

prefetch(parent);
error = prepend_name(b, &dentry->d_name);
if (error)
return error;
dentry = parent;
}
return 0;

2021-05-08 22:18:47

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 8, 2021 at 2:06 PM Al Viro <[email protected]> wrote:
>
> On Sat, May 08, 2021 at 01:39:45PM -0700, Linus Torvalds wrote:
>
> > +static inline int prepend_entries(struct prepend_buffer *b, const struct path *path, const struct path *root, struct mount *mnt)
>
> If anything, s/path/dentry/, since vfsmnt here will be equal to &mnt->mnt all along.

Too subtle for me.

And is it? Because mnt is from

mnt = real_mount(path->mnt);

earlier, while vfsmount is plain "path->mnt".

> > + return 2; // detached or not attached yet
> > + break;
>
> ?

Leftover. Good catch.

> > + parent = dentry->d_parent;
> > + prefetch(parent);
> > + error = prepend_name(b, &dentry->d_name);
> > + if (error)
> > + break;
>
> return error, surely?

Surely. Bad conversion to the separate function where I missed one of
the "break" statements.

> FWIW, if we go that way, I would make that

No arguments against that - I tried to keep it with the same structure
it had when it was inside prepend_path().

Which I obviously wasn't very good at (see your fixes above ;), but it
was *meant* to be a minimal patch with no structural change.

Linus

2021-05-08 22:44:53

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 8, 2021 at 2:06 PM Al Viro <[email protected]> wrote:
>
> FWIW, if we go that way, I would make that
>
> while (dentry != root->dentry || &mnt->mnt != root->mnt) {
> int error;
> struct dentry *parent = READ_ONCE(dentry->d_parent);

Side note: you've added that READ_ONCE() to the parent reading, and I
think that's a bug-fix regardless. The old code does that plain

parent = dentry->d_parent;

(after doing the mountpoint stuff). And d_parent isn't actually
guaranteed stable here.

It probably does not matter - we are in a RCU read-locked section, so
it's not like parent will go away, but in theory we might end up with
(for example) pre-fetching a different parent than the one we then
walk down.

But your READ_ONCE() is definitely the right thing to do (whether we
do your re-org or not, and whether we do this "prepend_buffer" thing
or not).

Do you want to do a final version with your fixes?

Linus

2021-05-08 22:48:13

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 08, 2021 at 03:17:44PM -0700, Linus Torvalds wrote:
> On Sat, May 8, 2021 at 2:06 PM Al Viro <[email protected]> wrote:
> >
> > On Sat, May 08, 2021 at 01:39:45PM -0700, Linus Torvalds wrote:
> >
> > > +static inline int prepend_entries(struct prepend_buffer *b, const struct path *path, const struct path *root, struct mount *mnt)
> >
> > If anything, s/path/dentry/, since vfsmnt here will be equal to &mnt->mnt all along.
>
> Too subtle for me.
>
> And is it? Because mnt is from
>
> mnt = real_mount(path->mnt);
>
> earlier, while vfsmount is plain "path->mnt".

static inline struct mount *real_mount(struct vfsmount *mnt)
{
return container_of(mnt, struct mount, mnt);
}

2021-05-08 22:56:08

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 8, 2021 at 3:46 PM Al Viro <[email protected]> wrote:
>
> static inline struct mount *real_mount(struct vfsmount *mnt)
> {
> return container_of(mnt, struct mount, mnt);

Too subtle for me indeed.

Linus

2021-05-08 22:56:46

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 8, 2021 at 3:42 PM Linus Torvalds
<[email protected]> wrote:
>
> But your READ_ONCE() is definitely the right thing to do (whether we
> do your re-org or not, and whether we do this "prepend_buffer" thing
> or not).

Oh, and looking at it some more, I think it would probably be a good
thing to make __dentry_path() take a

struct prepend_buffer *orig

argument, the same way prepend_path() does. Also, like prepend_path(),
the terminating NUL should probably be done by the caller.

Doing those two changes would simplify the hackery we now have in
"dentry_path()" due to the "//deleted" games. The whole "restore '/'
that was overwritten by the NUL added by __dentry_path() is
unbelievably ugly.

Linus

2021-05-08 23:18:42

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 08, 2021 at 10:46:23PM +0000, Al Viro wrote:
> On Sat, May 08, 2021 at 03:17:44PM -0700, Linus Torvalds wrote:
> > On Sat, May 8, 2021 at 2:06 PM Al Viro <[email protected]> wrote:
> > >
> > > On Sat, May 08, 2021 at 01:39:45PM -0700, Linus Torvalds wrote:
> > >
> > > > +static inline int prepend_entries(struct prepend_buffer *b, const struct path *path, const struct path *root, struct mount *mnt)
> > >
> > > If anything, s/path/dentry/, since vfsmnt here will be equal to &mnt->mnt all along.
> >
> > Too subtle for me.
> >
> > And is it? Because mnt is from
> >
> > mnt = real_mount(path->mnt);
> >
> > earlier, while vfsmount is plain "path->mnt".
>
> static inline struct mount *real_mount(struct vfsmount *mnt)
> {
> return container_of(mnt, struct mount, mnt);
> }

Basically, struct vfsmount instances are always embedded into struct mount ones.
All information about the mount tree is in the latter (and is visible only if
you manage to include fs/mount.h); here we want to walk towards root, so...

Rationale: a lot places use struct vfsmount pointers, but they've no need to
access all that stuff. So struct vfsmount got trimmed down, with most of the
things that used to be there migrating into the containing structure.

[Christian Browner Cc'd]
BTW, WTF do we have struct mount.user_ns and struct vfsmount.mnt_userns?
Can they ever be different? Christian?

Sigh... Namespace flavours always remind me of old joke -
Highlander II: There Should've Been Only One...

2021-05-08 23:20:23

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 08, 2021 at 11:15:28PM +0000, Al Viro wrote:

> [Christian Browner Cc'd]

Brauner, that is. Sorry ;-/

2021-05-09 02:24:19

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 08, 2021 at 01:39:45PM -0700, Linus Torvalds wrote:
> -static int prepend(char **buffer, int *buflen, const char *str, int namelen)
> +struct prepend_buffer {
> + char *ptr;
> + int len;
> +};
> +
> +static int prepend(struct prepend_buffer *b, const char *str, int namelen)
> {
> - *buflen -= namelen;
> - if (*buflen < 0)
> + b->len -= namelen;
> + if (b->len < 0)
> return -ENAMETOOLONG;
> - *buffer -= namelen;
> - memcpy(*buffer, str, namelen);
> + b->ptr -= namelen;
> + memcpy(b->ptr, str, namelen);
> return 0;
> }

OK, that part is pretty obvious - pointers to a couple of objects in the same
stack frame replaced with a single pointer to structure consisting of those
two objects. Might actually be an optimization.

> @@ -35,16 +40,16 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen)
> *
> * Load acquire is needed to make sure that we see that terminating NUL.
> */
> -static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
> +static int prepend_name(struct prepend_buffer *b, const struct qstr *name)
> {
> const char *dname = smp_load_acquire(&name->name); /* ^^^ */
> u32 dlen = READ_ONCE(name->len);
> char *p;
>
> - *buflen -= dlen + 1;
> - if (*buflen < 0)
> + b->len -= dlen + 1;
> + if (b->len < 0)
> return -ENAMETOOLONG;
> - p = *buffer -= dlen + 1;
> + p = b->ptr -= dlen + 1;
> *p++ = '/';
> while (dlen--) {
> char c = *dname++;

Ditto.

> @@ -55,6 +60,50 @@ static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
> return 0;
> }
>
> +static inline int prepend_entries(struct prepend_buffer *b, const struct path *path, const struct path *root, struct mount *mnt)
> +{
> + struct dentry *dentry = path->dentry;
> + struct vfsmount *vfsmnt = path->mnt;
> +
> + while (dentry != root->dentry || vfsmnt != root->mnt) {
> + int error;
> + struct dentry * parent;
> +
> + if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
> + struct mount *parent = READ_ONCE(mnt->mnt_parent);
> + struct mnt_namespace *mnt_ns;
> +
> + /* Escaped? */
> + if (dentry != vfsmnt->mnt_root)
> + return 3;
> +
> + /* Global root? */
> + if (mnt != parent) {
> + dentry = READ_ONCE(mnt->mnt_mountpoint);
> + mnt = parent;
> + vfsmnt = &mnt->mnt;
> + continue;
> + }
> + mnt_ns = READ_ONCE(mnt->mnt_ns);
> + /* open-coded is_mounted() to use local mnt_ns */
> + if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
> + return 1; // absolute root
> +
> + return 2; // detached or not attached yet
> + break;
> + }
> + parent = dentry->d_parent;
> + prefetch(parent);
> + error = prepend_name(b, &dentry->d_name);
> + if (error)
> + break;
> +
> + dentry = parent;
> + }
> + return 0;
> +}

See other reply.

> +
> /**
> * prepend_path - Prepend path string to a buffer
> * @path: the dentry/vfsmount to report
> @@ -74,15 +123,12 @@ static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
> */
> static int prepend_path(const struct path *path,
> const struct path *root,
> - char **buffer, int *buflen)
> + struct prepend_buffer *orig)
> {
> - struct dentry *dentry;
> - struct vfsmount *vfsmnt;
> struct mount *mnt;
> int error = 0;
> unsigned seq, m_seq = 0;
> - char *bptr;
> - int blen;
> + struct prepend_buffer b;
>
> rcu_read_lock();
> restart_mnt:
> @@ -90,50 +136,12 @@ static int prepend_path(const struct path *path,
> seq = 0;
> rcu_read_lock();
> restart:
> - bptr = *buffer;
> - blen = *buflen;
> - error = 0;
> - dentry = path->dentry;
> - vfsmnt = path->mnt;
> - mnt = real_mount(vfsmnt);
> + b = *orig;
> + mnt = real_mount(path->mnt);
> read_seqbegin_or_lock(&rename_lock, &seq);
> - while (dentry != root->dentry || vfsmnt != root->mnt) {
> - struct dentry * parent;
>
> - if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
> - struct mount *parent = READ_ONCE(mnt->mnt_parent);
> - struct mnt_namespace *mnt_ns;
> + error = prepend_entries(&b, path, root, mnt);
>
> - /* Escaped? */
> - if (dentry != vfsmnt->mnt_root) {
> - bptr = *buffer;
> - blen = *buflen;
> - error = 3;
> - break;
> - }
> - /* Global root? */
> - if (mnt != parent) {
> - dentry = READ_ONCE(mnt->mnt_mountpoint);
> - mnt = parent;
> - vfsmnt = &mnt->mnt;
> - continue;
> - }
> - mnt_ns = READ_ONCE(mnt->mnt_ns);
> - /* open-coded is_mounted() to use local mnt_ns */
> - if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
> - error = 1; // absolute root
> - else
> - error = 2; // detached or not attached yet
> - break;
> - }
> - parent = dentry->d_parent;
> - prefetch(parent);
> - error = prepend_name(&bptr, &blen, &dentry->d_name);
> - if (error)
> - break;
> -
> - dentry = parent;
> - }
> if (!(seq & 1))
> rcu_read_unlock();
> if (need_seqretry(&rename_lock, seq)) {
> @@ -150,14 +158,17 @@ static int prepend_path(const struct path *path,
> }
> done_seqretry(&mount_lock, m_seq);
>
> - if (error >= 0 && bptr == *buffer) {
> - if (--blen < 0)
> + // Escaped? No path
> + if (error == 3)
> + b = *orig;
> +
> + if (error >= 0 && b.ptr == orig->ptr) {
> + if (--b.len < 0)
> error = -ENAMETOOLONG;
> else
> - *--bptr = '/';
> + *--b.ptr = '/';
> }
> - *buffer = bptr;
> - *buflen = blen;
> + *orig = b;
> return error;
> }
>
> @@ -181,34 +192,34 @@ char *__d_path(const struct path *path,
> const struct path *root,
> char *buf, int buflen)
> {
> - char *res = buf + buflen;
> + struct prepend_buffer b = { buf + buflen, buflen };
> int error;
>
> - prepend(&res, &buflen, "\0", 1);
> - error = prepend_path(path, root, &res, &buflen);
> + prepend(&b, "\0", 1);
> + error = prepend_path(path, root, &b);

Minor yuck: that should be "", 1 (in the original as well). Same below...
Fairly subtle point: we do *not* need to check for failures here, since
prepend_path() will attempt to produce at least something. And we'll
catch that failure just fine. However, we do depend upon buflen being
non-negative here. If we ever called that (or any other in that family,
really) with buflen == MIN_INT, we'd get seriously unpleasant results.
No such callers exist, thankfully.

> char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
> {
> - char *end = buffer + buflen;
> + struct prepend_buffer b = { buffer + buflen, buflen };
> +
> /* these dentries are never renamed, so d_lock is not needed */
> - if (prepend(&end, &buflen, " (deleted)", 11) ||
> - prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
> - prepend(&end, &buflen, "/", 1))
> - end = ERR_PTR(-ENAMETOOLONG);
> - return end;
> + if (prepend(&b, " (deleted)", 11) ||
> + prepend(&b, dentry->d_name.name, dentry->d_name.len) ||
> + prepend(&b, "/", 1))
> + return ERR_PTR(-ENAMETOOLONG);
> + return b.ptr;
> }

Umm... Interesting, especially considering the way dyname_dname() looks like.
char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
const char *fmt, ...)
{
va_list args;
char temp[64];
int sz;

va_start(args, fmt);
sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
va_end(args);

if (sz > sizeof(temp) || sz > buflen)
return ERR_PTR(-ENAMETOOLONG);

buffer += buflen - sz;
return memcpy(buffer, temp, sz);
}

Looks like there's a piece of prepend() open-coded in it. And
since all ->d_dname() instances are either simple_dname() or end up
with call of dynamic_dname()...

Might make sense to turn that method into
int (*d_dname)(struct dentry *, struct prepend_buffer *);

Followup patch, obviously, but it might be worth looking into.

Another thing that keeps bugging me about prepend_path() is the
set of return values. I mean, 0/1/2/3/-ENAMETOOLONG, and all
except 0 are unlikely? Might as well make that 0/1/2/3/-1, if
not an outright 0/1/2/3/4. And prepend() could return bool, while
we are at it (true - success, false - overflow)...

2021-05-09 02:29:51

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 08, 2021 at 03:47:38PM -0700, Linus Torvalds wrote:
> On Sat, May 8, 2021 at 3:42 PM Linus Torvalds
> <[email protected]> wrote:
> >
> > But your READ_ONCE() is definitely the right thing to do (whether we
> > do your re-org or not, and whether we do this "prepend_buffer" thing
> > or not).
>
> Oh, and looking at it some more, I think it would probably be a good
> thing to make __dentry_path() take a
>
> struct prepend_buffer *orig
>
> argument, the same way prepend_path() does. Also, like prepend_path(),
> the terminating NUL should probably be done by the caller.
>
> Doing those two changes would simplify the hackery we now have in
> "dentry_path()" due to the "//deleted" games. The whole "restore '/'
> that was overwritten by the NUL added by __dentry_path() is
> unbelievably ugly.

Agreed. Re READ_ONCE() - we are wrapped into
read_seqbegin_or_lock(&rename_lock, &seq) there, so it's more about
being explicit than about correctness considerations.

2021-05-09 03:04:13

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 8, 2021 at 7:28 PM Al Viro <[email protected]> wrote:
>
> Re READ_ONCE() - we are wrapped into
> read_seqbegin_or_lock(&rename_lock, &seq) there, so it's more about
> being explicit than about correctness considerations.

Well, part of this all is that the next step is that "vsnprintf()"
with '%pD' would basically use prepend_entries() with just the RCU
lock.

That said, even with the rename lock, that will only cause a retry on
rename - it won't necessarily fix any confusion that comes from the
compiler possibly silently re-loading 'parent' multiple times, and
getting different pointers due to a concurrent rename.

Now, those different results should all be individually ok, due to RCU
freeing, but it's _really_ confusing if 'parent' might be two
different things within the same iteration of the loop.

I don't see anything truly horrible that would happen - mainly "we'll
prefetch one parent, and then due to reloading the pointer we might
actually _use_ another parent entirely for the next iteration", but it
really is best to avoid that kind of confusion.

Linus

2021-05-09 05:09:07

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sun, May 09, 2021 at 02:20:32AM +0000, Al Viro wrote:
> Umm... Interesting, especially considering the way dyname_dname() looks like.
dynamic_dname(), obviously.

> Looks like there's a piece of prepend() open-coded in it. And
> since all ->d_dname() instances are either simple_dname() or end up
> with call of dynamic_dname()...
>
> Might make sense to turn that method into
> int (*d_dname)(struct dentry *, struct prepend_buffer *);
>
> Followup patch, obviously, but it might be worth looking into.
>
> Another thing that keeps bugging me about prepend_path() is the
> set of return values. I mean, 0/1/2/3/-ENAMETOOLONG, and all
> except 0 are unlikely? Might as well make that 0/1/2/3/-1, if
> not an outright 0/1/2/3/4. And prepend() could return bool, while
> we are at it (true - success, false - overflow)...

OK... I think I see how to carve it up sanely, will post after I
get some sleep.

2021-05-09 22:59:50

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

Al Viro <[email protected]> writes:

> On Sat, May 08, 2021 at 10:46:23PM +0000, Al Viro wrote:
>> On Sat, May 08, 2021 at 03:17:44PM -0700, Linus Torvalds wrote:
>> > On Sat, May 8, 2021 at 2:06 PM Al Viro <[email protected]> wrote:
>> > >
>> > > On Sat, May 08, 2021 at 01:39:45PM -0700, Linus Torvalds wrote:
>> > >
>> > > > +static inline int prepend_entries(struct prepend_buffer *b, const struct path *path, const struct path *root, struct mount *mnt)
>> > >
>> > > If anything, s/path/dentry/, since vfsmnt here will be equal to &mnt->mnt all along.
>> >
>> > Too subtle for me.
>> >
>> > And is it? Because mnt is from
>> >
>> > mnt = real_mount(path->mnt);
>> >
>> > earlier, while vfsmount is plain "path->mnt".
>>
>> static inline struct mount *real_mount(struct vfsmount *mnt)
>> {
>> return container_of(mnt, struct mount, mnt);
>> }
>
> Basically, struct vfsmount instances are always embedded into struct mount ones.
> All information about the mount tree is in the latter (and is visible only if
> you manage to include fs/mount.h); here we want to walk towards root, so...
>
> Rationale: a lot places use struct vfsmount pointers, but they've no need to
> access all that stuff. So struct vfsmount got trimmed down, with most of the
> things that used to be there migrating into the containing structure.
>
> [Christian Browner Cc'd]
> BTW, WTF do we have struct mount.user_ns and struct vfsmount.mnt_userns?
> Can they ever be different? Christian?

I presume you are asking about struct mnt_namespace.user_ns and
struct vfsmount.mnt_userns.

That must the idmapped mounts work.

In short mnt_namespace.user_ns is the user namespace that owns
the mount namespace.

vfsmount.mnt_userns functionally could be reduced to just some struct
uid_gid_map structures hanging off the vfsmount. It's purpose is
to add a generic translation of uids and gids on from the filesystem
view to the what we want to show userspace.

That code could probably benefit from some refactoring so it is clearer,
and some serious fixes. I reported it earlier but it looks like there
is some real breakage in chown if you use idmapped mounts.

Eric

2021-05-10 07:22:56

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sat, May 08, 2021 at 11:15:28PM +0000, Al Viro wrote:
> On Sat, May 08, 2021 at 10:46:23PM +0000, Al Viro wrote:
> > On Sat, May 08, 2021 at 03:17:44PM -0700, Linus Torvalds wrote:
> > > On Sat, May 8, 2021 at 2:06 PM Al Viro <[email protected]> wrote:
> > > >
> > > > On Sat, May 08, 2021 at 01:39:45PM -0700, Linus Torvalds wrote:
> > > >
> > > > > +static inline int prepend_entries(struct prepend_buffer *b, const struct path *path, const struct path *root, struct mount *mnt)
> > > >
> > > > If anything, s/path/dentry/, since vfsmnt here will be equal to &mnt->mnt all along.
> > >
> > > Too subtle for me.
> > >
> > > And is it? Because mnt is from
> > >
> > > mnt = real_mount(path->mnt);
> > >
> > > earlier, while vfsmount is plain "path->mnt".
> >
> > static inline struct mount *real_mount(struct vfsmount *mnt)
> > {
> > return container_of(mnt, struct mount, mnt);
> > }
>
> Basically, struct vfsmount instances are always embedded into struct mount ones.
> All information about the mount tree is in the latter (and is visible only if
> you manage to include fs/mount.h); here we want to walk towards root, so...
>
> Rationale: a lot places use struct vfsmount pointers, but they've no need to
> access all that stuff. So struct vfsmount got trimmed down, with most of the
> things that used to be there migrating into the containing structure.
>
> [Christian Browner Cc'd]
> BTW, WTF do we have struct mount.user_ns and struct vfsmount.mnt_userns?
> Can they ever be different? Christian?

Yes, they can.

>
> Sigh... Namespace flavours always remind me of old joke -
> Highlander II: There Should've Been Only One...

(I'd prefer if mount propagation would die first.)

2021-05-10 13:06:31

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Sun, May 09, 2021 at 05:58:22PM -0500, Eric W. Biederman wrote:
> Al Viro <[email protected]> writes:
>
> > On Sat, May 08, 2021 at 10:46:23PM +0000, Al Viro wrote:
> >> On Sat, May 08, 2021 at 03:17:44PM -0700, Linus Torvalds wrote:
> >> > On Sat, May 8, 2021 at 2:06 PM Al Viro <[email protected]> wrote:
> >> > >
> >> > > On Sat, May 08, 2021 at 01:39:45PM -0700, Linus Torvalds wrote:
> >> > >
> >> > > > +static inline int prepend_entries(struct prepend_buffer *b, const struct path *path, const struct path *root, struct mount *mnt)
> >> > >
> >> > > If anything, s/path/dentry/, since vfsmnt here will be equal to &mnt->mnt all along.
> >> >
> >> > Too subtle for me.
> >> >
> >> > And is it? Because mnt is from
> >> >
> >> > mnt = real_mount(path->mnt);
> >> >
> >> > earlier, while vfsmount is plain "path->mnt".
> >>
> >> static inline struct mount *real_mount(struct vfsmount *mnt)
> >> {
> >> return container_of(mnt, struct mount, mnt);
> >> }
> >
> > Basically, struct vfsmount instances are always embedded into struct mount ones.
> > All information about the mount tree is in the latter (and is visible only if
> > you manage to include fs/mount.h); here we want to walk towards root, so...
> >
> > Rationale: a lot places use struct vfsmount pointers, but they've no need to
> > access all that stuff. So struct vfsmount got trimmed down, with most of the
> > things that used to be there migrating into the containing structure.
> >
> > [Christian Browner Cc'd]
> > BTW, WTF do we have struct mount.user_ns and struct vfsmount.mnt_userns?
> > Can they ever be different? Christian?
>
> I presume you are asking about struct mnt_namespace.user_ns and
> struct vfsmount.mnt_userns.
>
> That must the idmapped mounts work.
>
> In short mnt_namespace.user_ns is the user namespace that owns
> the mount namespace.
>
> vfsmount.mnt_userns functionally could be reduced to just some struct
> uid_gid_map structures hanging off the vfsmount. It's purpose is

No. The userns can in the future be used for permission checking when
delegating features per mount.

> to add a generic translation of uids and gids on from the filesystem
> view to the what we want to show userspace.
>
> That code could probably benefit from some refactoring so it is clearer,
> and some serious fixes. I reported it earlier but it looks like there
> is some real breakage in chown if you use idmapped mounts.

You mentioned something about chown already some weeks ago here [1] and
never provided any details or reproducer for it. This code is
extensively covered by xfstests and systemd and others are already using
it so far without any issues reported by users. If there is an issue,
it'd be good to fix them and see the tests changed to cover that
particular case.

[1]: https://lore.kernel.org/lkml/[email protected]/T/#m3a9df31aa183e8797c70bc193040adfd601399ad

2021-05-10 15:26:58

by Justin He

[permalink] [raw]
Subject: RE: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

Hi Linus
I don't know how to display the attachment in this mail.
So I copied here below, together with one comment:
> -----Original Message-----
> From: Linus Torvalds <[email protected]>
> Sent: Sunday, May 9, 2021 4:40 AM
> To: Al Viro <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Al Viro
> <[email protected]>; Heiko Carstens <[email protected]>; Vasily Gorbik
> <[email protected]>; Christian Borntraeger <[email protected]>; Eric
> W . Biederman <[email protected]>; Darrick J. Wong
> <[email protected]>; Peter Zijlstra (Intel) <[email protected]>;
> Ira Weiny <[email protected]>; Eric Biggers <[email protected]>; Ahmed
> S. Darwish <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()
>
> On Sat, May 8, 2021 at 12:13 PM Al Viro <[email protected]> wrote:
> >
> > Potentiallty delicate question is how to pass bptr/blen to the caller
> > of that helper...
>
> So I was thinking something like this patch..
>
> THIS IS TOTALLY UNTESTED!
>
> I've basically introduced a "struct prepend_buffer" that acts as that
> "end,len" pair and that gets passed around. It builds cleanly for me,
> which actually makes me fairly happy that it might even be close
> right, because the calling conventions would catch any mistake.
>
> It looks superficially sane to me, but again - untested.
>
> The patch ended up being bigger than I expected, but it's all fairly
> straightforward - just munging the calling conventions.
>
> Oh, and I did extract out the core of "prepend_path()" into
> "prepend_entries()" just to show how it would work.
>
> Linus
> fs/d_path.c | 227 ++++++++++++++++++++++++++++++++----------------------------
> 1 file changed, 123 insertions(+), 104 deletions(-)
>
>diff --git a/fs/d_path.c b/fs/d_path.c
>index 270d62133996..47eb29524271 100644
>--- a/fs/d_path.c
>+++ b/fs/d_path.c
>@@ -8,13 +8,18 @@
> #include <linux/prefetch.h>
> #include "mount.h"
>
>-static int prepend(char **buffer, int *buflen, const char *str, int namelen)
>+struct prepend_buffer {
>+ char *ptr;
>+ int len;
>+};
>+
>+static int prepend(struct prepend_buffer *b, const char *str, int namelen)
> {
>- *buflen -= namelen;
>- if (*buflen < 0)
>+ b->len -= namelen;
>+ if (b->len < 0)
> return -ENAMETOOLONG;
>- *buffer -= namelen;
>- memcpy(*buffer, str, namelen);
>+ b->ptr -= namelen;
>+ memcpy(b->ptr, str, namelen);
> return 0;
> }
>
>@@ -35,16 +40,16 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen)
> *
> * Load acquire is needed to make sure that we see that terminating NUL.
> */
>-static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
>+static int prepend_name(struct prepend_buffer *b, const struct qstr *name)
> {
> const char *dname = smp_load_acquire(&name->name); /* ^^^ */
> u32 dlen = READ_ONCE(name->len);
> char *p;
>
>- *buflen -= dlen + 1;
>- if (*buflen < 0)
>+ b->len -= dlen + 1;
>+ if (b->len < 0)
> return -ENAMETOOLONG;
>- p = *buffer -= dlen + 1;
>+ p = b->ptr -= dlen + 1;
> *p++ = '/';
> while (dlen--) {
> char c = *dname++;
>@@ -55,6 +60,50 @@ static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
> return 0;
> }
>
>+static inline int prepend_entries(struct prepend_buffer *b, const struct path *path, const struct path *root, struct mount *mnt)
>+{
>+ struct dentry *dentry = path->dentry;
>+ struct vfsmount *vfsmnt = path->mnt;
>+
>+ while (dentry != root->dentry || vfsmnt != root->mnt) {
>+ int error;
>+ struct dentry * parent;
>+
>+ if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
>+ struct mount *parent = READ_ONCE(mnt->mnt_parent);
>+ struct mnt_namespace *mnt_ns;
>+
>+ /* Escaped? */
>+ if (dentry != vfsmnt->mnt_root)
>+ return 3;
>+
>+ /* Global root? */
>+ if (mnt != parent) {
>+ dentry = READ_ONCE(mnt->mnt_mountpoint);
>+ mnt = parent;
>+ vfsmnt = &mnt->mnt;
>+ continue;
>+ }
>+ mnt_ns = READ_ONCE(mnt->mnt_ns);
>+ /* open-coded is_mounted() to use local mnt_ns */
>+ if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
>+ return 1; // absolute root
>+
>+ return 2; // detached or not attached yet
>+ break;
>+ }
>+ parent = dentry->d_parent;
>+ prefetch(parent);
>+ error = prepend_name(b, &dentry->d_name);
>+ if (error)
>+ break;
>+
>+ dentry = parent;
>+ }
>+ return 0;
>+}
>+
>+
> /**
> * prepend_path - Prepend path string to a buffer
> * @path: the dentry/vfsmount to report
>@@ -74,15 +123,12 @@ static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
> */
> static int prepend_path(const struct path *path,
> const struct path *root,
>- char **buffer, int *buflen)
>+ struct prepend_buffer *orig)
> {
>- struct dentry *dentry;
>- struct vfsmount *vfsmnt;
> struct mount *mnt;
> int error = 0;
> unsigned seq, m_seq = 0;
>- char *bptr;
>- int blen;
>+ struct prepend_buffer b;
>
> rcu_read_lock();
> restart_mnt:
>@@ -90,50 +136,12 @@ static int prepend_path(const struct path *path,
> seq = 0;
> rcu_read_lock();
> restart:
>- bptr = *buffer;
>- blen = *buflen;
>- error = 0;
>- dentry = path->dentry;
>- vfsmnt = path->mnt;
>- mnt = real_mount(vfsmnt);
>+ b = *orig;
>+ mnt = real_mount(path->mnt);
> read_seqbegin_or_lock(&rename_lock, &seq);
>- while (dentry != root->dentry || vfsmnt != root->mnt) {
>- struct dentry * parent;
>
>- if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
>- struct mount *parent = READ_ONCE(mnt->mnt_parent);
>- struct mnt_namespace *mnt_ns;
>+ error = prepend_entries(&b, path, root, mnt);
>
>- /* Escaped? */
>- if (dentry != vfsmnt->mnt_root) {
>- bptr = *buffer;
>- blen = *buflen;
>- error = 3;
>- break;
>- }
>- /* Global root? */
>- if (mnt != parent) {
>- dentry = READ_ONCE(mnt->mnt_mountpoint);
>- mnt = parent;
>- vfsmnt = &mnt->mnt;
>- continue;
>- }
>- mnt_ns = READ_ONCE(mnt->mnt_ns);
>- /* open-coded is_mounted() to use local mnt_ns */
>- if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
>- error = 1; // absolute root
>- else
>- error = 2; // detached or not attached yet
>- break;
>- }
>- parent = dentry->d_parent;
>- prefetch(parent);
>- error = prepend_name(&bptr, &blen, &dentry->d_name);
>- if (error)
>- break;
>-
>- dentry = parent;
>- }
> if (!(seq & 1))
> rcu_read_unlock();
> if (need_seqretry(&rename_lock, seq)) {
>@@ -150,14 +158,17 @@ static int prepend_path(const struct path *path,
> }
> done_seqretry(&mount_lock, m_seq);
>
>- if (error >= 0 && bptr == *buffer) {
>- if (--blen < 0)
>+ // Escaped? No path
>+ if (error == 3)
>+ b = *orig;
>+
>+ if (error >= 0 && b.ptr == orig->ptr) {
>+ if (--b.len < 0)
> error = -ENAMETOOLONG;
> else
>- *--bptr = '/';
>+ *--b.ptr = '/';
> }
>- *buffer = bptr;
>- *buflen = blen;
>+ *orig = b;
> return error;
> }
>
>@@ -181,34 +192,34 @@ char *__d_path(const struct path *path,
> const struct path *root,
> char *buf, int buflen)
> {
>- char *res = buf + buflen;
>+ struct prepend_buffer b = { buf + buflen, buflen };
> int error;
>
>- prepend(&res, &buflen, "\0", 1);
>- error = prepend_path(path, root, &res, &buflen);
>+ prepend(&b, "\0", 1);
>+ error = prepend_path(path, root, &b);
>
> if (error < 0)
> return ERR_PTR(error);
> if (error > 0)
> return NULL;
>- return res;
>+ return b.ptr;
> }
>
> char *d_absolute_path(const struct path *path,
> char *buf, int buflen)
> {
> struct path root = {};
>- char *res = buf + buflen;
>+ struct prepend_buffer b = { buf + buflen, buflen };
> int error;
>
>- prepend(&res, &buflen, "\0", 1);
>- error = prepend_path(path, &root, &res, &buflen);
>+ prepend(&b, "\0", 1);
>+ error = prepend_path(path, &root, &b);
>
> if (error > 1)
> error = -EINVAL;
> if (error < 0)
> return ERR_PTR(error);
>- return res;
>+ return b.ptr;
> }
>
> /*
>@@ -216,21 +227,21 @@ char *d_absolute_path(const struct path *path,
> */
> static int path_with_deleted(const struct path *path,
> const struct path *root,
>- char **buf, int *buflen)
>+ struct prepend_buffer *b)
> {
>- prepend(buf, buflen, "\0", 1);
>+ prepend(b, "\0", 1);
> if (d_unlinked(path->dentry)) {
>- int error = prepend(buf, buflen, " (deleted)", 10);
>+ int error = prepend(b, " (deleted)", 10);
> if (error)
> return error;
> }
>
>- return prepend_path(path, root, buf, buflen);
>+ return prepend_path(path, root, b);
> }
>
>-static int prepend_unreachable(char **buffer, int *buflen)
>+static int prepend_unreachable(struct prepend_buffer *b)
> {
>- return prepend(buffer, buflen, "(unreachable)", 13);
>+ return prepend(b, "(unreachable)", 13);
> }
>
> static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
>@@ -261,7 +272,7 @@ static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
> */
> char *d_path(const struct path *path, char *buf, int buflen)
> {
>- char *res = buf + buflen;
>+ struct prepend_buffer b = { buf + buflen, buflen };
> struct path root;
> int error;
>
>@@ -282,12 +293,12 @@ char *d_path(const struct path *path, char *buf, int buflen)
>
> rcu_read_lock();
> get_fs_root_rcu(current->fs, &root);
>- error = path_with_deleted(path, &root, &res, &buflen);
>+ error = path_with_deleted(path, &root, &b);
> rcu_read_unlock();
>
> if (error < 0)
>- res = ERR_PTR(error);
>- return res;
>+ return ERR_PTR(error);
>+ return b.ptr;
> }
> EXPORT_SYMBOL(d_path);
>
>@@ -314,13 +325,14 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
>
> char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
> {
>- char *end = buffer + buflen;
>+ struct prepend_buffer b = { buffer + buflen, buflen };
>+
> /* these dentries are never renamed, so d_lock is not needed */
>- if (prepend(&end, &buflen, " (deleted)", 11) ||
>- prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
>- prepend(&end, &buflen, "/", 1))
>- end = ERR_PTR(-ENAMETOOLONG);
>- return end;
>+ if (prepend(&b, " (deleted)", 11) ||
>+ prepend(&b, dentry->d_name.name, dentry->d_name.len) ||
>+ prepend(&b, "/", 1))
>+ return ERR_PTR(-ENAMETOOLONG);
>+ return b.ptr;
> }
>
> /*
>@@ -329,8 +341,9 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
> static char *__dentry_path(const struct dentry *d, char *buf, int buflen)
> {
> const struct dentry *dentry;
>- char *end, *retval;
>- int len, seq = 0;
>+ struct prepend_buffer b;
>+ char *retval;
>+ int seq = 0;
> int error = 0;
>
> if (buflen < 2)
>@@ -339,22 +352,22 @@ static char *__dentry_path(const struct dentry *d, char *buf, int buflen)
> rcu_read_lock();
> restart:
> dentry = d;
>- end = buf + buflen;
>- len = buflen;
>- prepend(&end, &len, "\0", 1);
>+ b.ptr = buf + buflen;
>+ b.len = buflen;
>+ prepend(&b, "\0", 1);
> /* Get '/' right */
>- retval = end-1;
>+ retval = b.ptr-1;
> *retval = '/';
> read_seqbegin_or_lock(&rename_lock, &seq);
> while (!IS_ROOT(dentry)) {
> const struct dentry *parent = dentry->d_parent;
>
> prefetch(parent);
>- error = prepend_name(&end, &len, &dentry->d_name);
>+ error = prepend_name(&b, &dentry->d_name);
> if (error)
> break;
>
>- retval = end;
>+ retval = b.ptr;
> dentry = parent;
> }
> if (!(seq & 1))
>@@ -379,16 +392,23 @@ EXPORT_SYMBOL(dentry_path_raw);
>
> char *dentry_path(const struct dentry *dentry, char *buf, int buflen)
> {
>- char *p = NULL;
>+ struct prepend_buffer b = { buf + buflen, buflen };
> char *retval;
>+ char *p = NULL;
>
> if (d_unlinked(dentry)) {
>- p = buf + buflen;
>- if (prepend(&p, &buflen, "//deleted", 10) != 0)
>+ if (prepend(&b, "//deleted", 10) != 0)
> goto Elong;
>- buflen++;
>+
>+ // save away beginning of "//deleted" string
>+ // and let "__dentry_path()" overwrite one byte
>+ // with the terminating NUL that we'll restore
>+ // below.
>+ p = b.ptr;
>+ b.ptr++;
>+ b.len++;
> }
>- retval = __dentry_path(dentry, buf, buflen);
>+ retval = __dentry_path(dentry, b.ptr, b.len);

I didn't quite understand the logic here. Seems it is not equal to
the previous. Should it be s/b.ptr/buf here? Otherwise, in __dentry_path,
it will use the range [b.ptr, b.ptr+b.len] instead of [buf, buf+b.len].
Am I missing anything here?

--
Cheers,
Justin (Jia He)

> if (!IS_ERR(retval) && p)
> *p = '/'; /* restore '/' overriden with '\0' */
> return retval;
>@@ -441,11 +461,10 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
> error = -ENOENT;
> if (!d_unlinked(pwd.dentry)) {
> unsigned long len;
>- char *cwd = page + PATH_MAX;
>- int buflen = PATH_MAX;
>+ struct prepend_buffer b = { page + PATH_MAX, PATH_MAX };
>
>- prepend(&cwd, &buflen, "\0", 1);
>- error = prepend_path(&pwd, &root, &cwd, &buflen);
>+ prepend(&b, "\0", 1);
>+ error = prepend_path(&pwd, &root, &b);
> rcu_read_unlock();
>
> if (error < 0)
>@@ -453,16 +472,16 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
>
> /* Unreachable from current root */
> if (error > 0) {
>- error = prepend_unreachable(&cwd, &buflen);
>+ error = prepend_unreachable(&b);
> if (error)
> goto out;
> }
>
> error = -ERANGE;
>- len = PATH_MAX + page - cwd;
>+ len = PATH_MAX + page - b.ptr;
> if (len <= size) {
> error = len;
>- if (copy_to_user(buf, cwd, len))
>+ if (copy_to_user(buf, b.ptr, len))
> error = -EFAULT;
> }
> } else {
>
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

2021-05-10 16:18:34

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

Al Viro <[email protected]> writes:
>
> Another thing that keeps bugging me about prepend_path() is the
> set of return values. I mean, 0/1/2/3/-ENAMETOOLONG, and all
> except 0 are unlikely? Might as well make that 0/1/2/3/-1, if
> not an outright 0/1/2/3/4. And prepend() could return bool, while
> we are at it (true - success, false - overflow)...

I remember seeing that the different callers of prepend_path treated
those different cases differently.

But now that I look again the return value 3 (escaped) gets lumped
together with 2(detached).


On second look it appears that the two patterns that we actually have
are basically:

char *__d_path(const struct path *path,
const struct path *root,
char *buf, int buflen)
{
error = prepend_path(path, root, &res, &buflen);

if (error < 0)
return ERR_PTR(error);
if (error > 0)
return NULL;
return res;
}

char *d_absolute_path(const struct path *path,
char *buf, int buflen)
{
error = prepend_path(path, &root, &res, &buflen);

if (error > 1)
error = -EINVAL;
if (error < 0)
return ERR_PTR(error);
return res;
}

With d_absolute_path deciding that return value 1 absolute is not an
error.

That does look like there is plenty of room to refactor and make things
clearer.


Eric




2021-05-10 17:13:25

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

On Mon, May 10, 2021 at 8:08 AM Justin He <[email protected]> wrote:
>
> >
> > char *dentry_path(const struct dentry *dentry, char *buf, int buflen)
> > {
> >- char *p = NULL;
> >+ struct prepend_buffer b = { buf + buflen, buflen };
> > char *retval;
> >+ char *p = NULL;
> >
> > if (d_unlinked(dentry)) {
> >- p = buf + buflen;
> >- if (prepend(&p, &buflen, "//deleted", 10) != 0)
> >+ if (prepend(&b, "//deleted", 10) != 0)
> > goto Elong;
> >- buflen++;
> >+
> >+ // save away beginning of "//deleted" string
> >+ // and let "__dentry_path()" overwrite one byte
> >+ // with the terminating NUL that we'll restore
> >+ // below.
> >+ p = b.ptr;
> >+ b.ptr++;
> >+ b.len++;
> > }
> >- retval = __dentry_path(dentry, buf, buflen);
> >+ retval = __dentry_path(dentry, b.ptr, b.len);
>
> I didn't quite understand the logic here. Seems it is not equal to
> the previous. Should it be s/b.ptr/buf here? Otherwise, in __dentry_path,
> it will use the range [b.ptr, b.ptr+b.len] instead of [buf, buf+b.len].
> Am I missing anything here?

No, you're right. That __dentry_path() call should get "buf, b.len" as
arguments.

I knew it was squirrelly, but didn't think it through. I actually
wanted to change "__dentry_path()" to take a "struct prepend_buffer",
and not add the NUL at the end (so that the caller would have to do it
first), because that would have made the logic much more
straightforward (and made the semantics the same as the other internal
helpers).

And that would have fixed that bug of mine too. But then I didn't do
it, and just mentioned it as a later cleanup.

Good catch.

Linus

2021-05-19 18:55:32

by Al Viro

[permalink] [raw]
Subject: [PATCHSET] d_path cleanups

First of all, apologies for delays (one of the disks on the main
devel/testing box has become an ex-parrot, with... interesting recovery).

Here's what I've got for carve-up of cleanups. This stuff lives
in git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git #work.d_path,
individual patches in followups. 14 commits, all changes are to fs/d_path.c,
total being +133/-191. Moderately tested, seems to work here. Review
and testing would be welcome...

Part 1: trivial preliminary cleanup

1/14) d_path: "\0" is {0,0}, not {0}
A bunch of places used "\0" as a literal for 1-element array consisting
of NULs. That should've been "", obviously...

Part 2: untangling __dentry_path()

2/14) d_path: saner calling conventions for __dentry_path()
__dentry_path() used to copy the calling conventions for dentry_path().
That was fine for use in dentry_path_raw(), but it created a lot of headache
in dentry_path(), since we might need a suffix (//deleted) in there, without
NUL between it and the pathname itself. So we had to
1) (possibly) put /deleted into buffer and remember where it went
2) let __dentry_path() prepend NUL-terminated pathname
3) override NUL with / if we had done (1)
Life becomes much easier if __dentry_path() does *NOT* put NUL in there.
Then dentry_path_raw() becomes 'put "" into buffer, then __dentry_path()'
and dentry_path() - 'put "" or "//deleted" into buffer, then __dentry_path()'.

Additionally, we switch the way buffer information is passed to one similar
to what we do in prepend()/prepend_name()/etc., i.e. pass the pointer to the
end of buffer instead of that to beginning. That's what we'd been using
in __dentry_path() all along, and now that the callers are doing some prepend()
before calling __dentry_path(), they that value already on hand.

3/14) d_path: regularize handling of root dentry in __dentry_path()
All path-forming primitives boil down to sequence of prepend_name()
on dentries encountered along the way toward root. Each time we prepend
/ + dentry name to the buffer. Normally that does exactly what we want,
but there's a corner case when we don't call prepend_name() at all (in case
of __dentry_path() that happens if we are given root dentry). We obviously
want to end up with "/", rather than "", so this corner case needs to be
handled.
__dentry_path() used to manually put '/' in the end of buffer before
doing anything else, to be overwritten by the first call of prepend_name()
if one happens and to be left in place if we don't call prepend_name() at
all. That required manually checking that we had space in the buffer
(prepend_name() and prepend() take care of such checks themselves) and lead
to clumsy keeping track of return value.
A better approach is to check if the main loop has added anything
into the buffer and prepend "/" if it hasn't. A side benefit of using prepend()
is that it does the right thing if we'd already run out of buffer, making
the overflow-handling logics simpler.

NB: the above might be worth putting into commit message.

Part 3: overflow handling cleanups

We have an overcomplicated handling of overflows. Primitives (prepend() and
prepend_name()) check if we'd run out of space and return -ENAMETOOLONG.
Then it's propagated all the way out by call chain. However, the same
primitives are safe to call in case we'd *already* run out of space and
that condition is easily checked at any level of callchain. The next
5 commits use that to simplify the control flow.

4/14) d_path: get rid of path_with_deleted()
expand into the sole caller, rearrange the suffix handing
along the lines of dentry_path().

5/14) getcwd(2): saner logics around prepend_path() call
Turn
prepend_path()
if it says it has run out of space, fail with ENAMETOOLONG
if it wants "(unreachable) " prepended
do so
if that says it has run out of space, fail with ENAMETOOLONG
into
prepend_path()
if it wants "(unreachable) " prepended
do so
if we see we'd run out of space at some point
fail with ENAMETOOLONG

6/14) d_path: don't bother with return value of prepend()
Almost nothing is looking at return value of prepend() and it's
easy to get rid of the last stragglers...

7/14) d_path: lift -ENAMETOOLONG handling into callers of prepend_path()
It's easier to have prepend_path() return 0 on overflow and
check for overflow in the callers. The logics is the same for all callers
(ran out of space => ERR_PTR(-ENAMETOOLONG)), so we get a bit of boilerplate,
but even with that the callers become simpler. Added boilerplate will be
dealt with a couple of commits down the road.

8/14) d_path: make prepend_name() boolean
Unlike the case of prepend(), callers of prepend_name() really want
to see whether it has run out of space - the loops it's called in are
lockless and we could, in principle, end up spinning there for indetermined
amount of iterations. Dropping out of loop if we run out of space in the
buffers serves as a backstop for (very unlikely) cases.
However, all we care about is success/failure - we generate
ENAMETOOLONG in the callers, if not callers of callers, so we can bloody well
make it return bool.

Part 4: introduction of prepend_buffer

9/14) d_path: introduce struct prepend_buffer
We've a lot of places where we have pairs of form (pointer to end
of buffer, amount of space left in front of that). These sit in pairs of
variables located next to each other and usually passed by reference.
Turn those into instances of new type (struct prepend_buffer) and pass
reference to the pair instead of pairs of references to its fields.

Initialization (of form {buf + len, len}) turned into a macro (DECLARE_BUF),
to avoid brainos. Extraction of string (buffer contents if we hadn't run
out of space, ERR_PTR(-ENAMETOOLONG) otherwise) is done by extract_string();
that eats the leftover boilerplate from earlier in the series.

Part 5: extracting the lockless part of prepend_path()
The thing that started the entire mess had been an attempt to use d_path
machinery for vsprintf(); that can't grab rename_lock and mount_lock,
so we needed a variant of prepend_path() that would try to go without
those locks. The obvious approach is to lift the internal loop of
prepend_path() into a new primitive. However, that needs some massage
first to separate local variables - otherwise we end up with the
argument list from hell.

10/14) d_path: prepend_path(): get rid of vfsmnt
redundant - we maintain mnt and vfsmnt through the loop,
with the latter being a pointer to mnt->mnt all along.
11/14) d_path: prepend_path(): lift resetting b in case when we'd return 3 out of loop
the only place in the inner loop where we need p; we use it
to reset b in case we would return 3. We can easily check that error is 3
after the loop and do resetting there. Note that we only need that after
the *outer* loop - the body of that starts with assignment to b anyway.
12/14) d_path: prepend_path(): lift the inner loop into a new helper
ta-da

Part 6: followups

13/14) d_path: prepend_path() is unlikely to return non-zero
t14/14) getcwd(2): clean up error handling

2021-05-19 18:56:11

by Al Viro

[permalink] [raw]
Subject: [PATCH 01/14] d_path: "\0" is {0,0}, not {0}

Single-element array consisting of one NUL is spelled ""...

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index 270d62133996..01df5dfa1f88 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -184,7 +184,7 @@ char *__d_path(const struct path *path,
char *res = buf + buflen;
int error;

- prepend(&res, &buflen, "\0", 1);
+ prepend(&res, &buflen, "", 1);
error = prepend_path(path, root, &res, &buflen);

if (error < 0)
@@ -201,7 +201,7 @@ char *d_absolute_path(const struct path *path,
char *res = buf + buflen;
int error;

- prepend(&res, &buflen, "\0", 1);
+ prepend(&res, &buflen, "", 1);
error = prepend_path(path, &root, &res, &buflen);

if (error > 1)
@@ -218,7 +218,7 @@ static int path_with_deleted(const struct path *path,
const struct path *root,
char **buf, int *buflen)
{
- prepend(buf, buflen, "\0", 1);
+ prepend(buf, buflen, "", 1);
if (d_unlinked(path->dentry)) {
int error = prepend(buf, buflen, " (deleted)", 10);
if (error)
@@ -341,7 +341,7 @@ static char *__dentry_path(const struct dentry *d, char *buf, int buflen)
dentry = d;
end = buf + buflen;
len = buflen;
- prepend(&end, &len, "\0", 1);
+ prepend(&end, &len, "", 1);
/* Get '/' right */
retval = end-1;
*retval = '/';
@@ -444,7 +444,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
char *cwd = page + PATH_MAX;
int buflen = PATH_MAX;

- prepend(&cwd, &buflen, "\0", 1);
+ prepend(&cwd, &buflen, "", 1);
error = prepend_path(&pwd, &root, &cwd, &buflen);
rcu_read_unlock();

--
2.11.0


2021-05-19 18:56:43

by Al Viro

[permalink] [raw]
Subject: [PATCH 10/14] d_path: prepend_path(): get rid of vfsmnt

it's kept equal to &mnt->mnt all along.

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index 06e93dd031bf..3836f5d0b023 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -90,7 +90,6 @@ static int prepend_path(const struct path *path,
struct prepend_buffer *p)
{
struct dentry *dentry;
- struct vfsmount *vfsmnt;
struct mount *mnt;
int error = 0;
unsigned seq, m_seq = 0;
@@ -105,18 +104,17 @@ static int prepend_path(const struct path *path,
b = *p;
error = 0;
dentry = path->dentry;
- vfsmnt = path->mnt;
- mnt = real_mount(vfsmnt);
+ mnt = real_mount(path->mnt);
read_seqbegin_or_lock(&rename_lock, &seq);
- while (dentry != root->dentry || vfsmnt != root->mnt) {
+ while (dentry != root->dentry || &mnt->mnt != root->mnt) {
struct dentry * parent;

- if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+ if (dentry == mnt->mnt.mnt_root || IS_ROOT(dentry)) {
struct mount *parent = READ_ONCE(mnt->mnt_parent);
struct mnt_namespace *mnt_ns;

/* Escaped? */
- if (dentry != vfsmnt->mnt_root) {
+ if (dentry != mnt->mnt.mnt_root) {
b = *p;
error = 3;
break;
@@ -125,7 +123,6 @@ static int prepend_path(const struct path *path,
if (mnt != parent) {
dentry = READ_ONCE(mnt->mnt_mountpoint);
mnt = parent;
- vfsmnt = &mnt->mnt;
continue;
}
mnt_ns = READ_ONCE(mnt->mnt_ns);
--
2.11.0


2021-05-19 18:56:50

by Al Viro

[permalink] [raw]
Subject: [PATCH 04/14] d_path: get rid of path_with_deleted()

expand in the sole caller; transform the initial prepends similar to
what we'd done in dentry_path() (prepend_path() will fail the right
way if we call it with negative buflen, same as __dentry_path() does).

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 23 +++++------------------
1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index b3324ae7cfe2..7f3fac544bbb 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -211,23 +211,6 @@ char *d_absolute_path(const struct path *path,
return res;
}

-/*
- * same as __d_path but appends "(deleted)" for unlinked files.
- */
-static int path_with_deleted(const struct path *path,
- const struct path *root,
- char **buf, int *buflen)
-{
- prepend(buf, buflen, "", 1);
- if (d_unlinked(path->dentry)) {
- int error = prepend(buf, buflen, " (deleted)", 10);
- if (error)
- return error;
- }
-
- return prepend_path(path, root, buf, buflen);
-}
-
static int prepend_unreachable(char **buffer, int *buflen)
{
return prepend(buffer, buflen, "(unreachable)", 13);
@@ -282,7 +265,11 @@ char *d_path(const struct path *path, char *buf, int buflen)

rcu_read_lock();
get_fs_root_rcu(current->fs, &root);
- error = path_with_deleted(path, &root, &res, &buflen);
+ if (unlikely(d_unlinked(path->dentry)))
+ prepend(&res, &buflen, " (deleted)", 11);
+ else
+ prepend(&res, &buflen, "", 1);
+ error = prepend_path(path, &root, &res, &buflen);
rcu_read_unlock();

if (error < 0)
--
2.11.0


2021-05-19 18:56:57

by Al Viro

[permalink] [raw]
Subject: [PATCH 14/14] getcwd(2): clean up error handling

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 29 ++++++++++++-----------------
1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index 8a9cd44f6689..23a53f7b5c71 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -390,9 +390,11 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
rcu_read_lock();
get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);

- error = -ENOENT;
- if (!d_unlinked(pwd.dentry)) {
- unsigned long len;
+ if (unlikely(d_unlinked(pwd.dentry))) {
+ rcu_read_unlock();
+ error = -ENOENT;
+ } else {
+ unsigned len;
DECLARE_BUFFER(b, page, PATH_MAX);

prepend(&b, "", 1);
@@ -400,23 +402,16 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
prepend(&b, "(unreachable)", 13);
rcu_read_unlock();

- if (b.len < 0) {
- error = -ENAMETOOLONG;
- goto out;
- }
-
- error = -ERANGE;
len = PATH_MAX - b.len;
- if (len <= size) {
+ if (unlikely(len > PATH_MAX))
+ error = -ENAMETOOLONG;
+ else if (unlikely(len > size))
+ error = -ERANGE;
+ else if (copy_to_user(buf, b.buf, len))
+ error = -EFAULT;
+ else
error = len;
- if (copy_to_user(buf, b.buf, len))
- error = -EFAULT;
- }
- } else {
- rcu_read_unlock();
}
-
-out:
__putname(page);
return error;
}
--
2.11.0


2021-05-19 18:57:02

by Al Viro

[permalink] [raw]
Subject: [PATCH 03/14] d_path: regularize handling of root dentry in __dentry_path()

All path-forming primitives boil down to sequence of prepend_name()
on dentries encountered along the way toward root. Each time we prepend
/ + dentry name to the buffer. Normally that does exactly what we want,
but there's a corner case when we don't call prepend_name() at all (in case
of __dentry_path() that happens if we are given root dentry). We obviously
want to end up with "/", rather than "", so this corner case needs to be
handled.

__dentry_path() used to manually put '/' in the end of buffer before
doing anything else, to be overwritten by the first call of prepend_name()
if one happens and to be left in place if we don't call prepend_name() at
all. That required manually checking that we had space in the buffer
(prepend_name() and prepend() take care of such checks themselves) and lead
to clumsy keeping track of return value.

A better approach is to check if the main loop has added anything
into the buffer and prepend "/" if it hasn't. A side benefit of using prepend()
is that it does the right thing if we'd already run out of buffer, making
the overflow-handling logics simpler.

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 21 +++++----------------
1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index 1a1cf05e7780..b3324ae7cfe2 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -329,31 +329,22 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
static char *__dentry_path(const struct dentry *d, char *p, int buflen)
{
const struct dentry *dentry;
- char *end, *retval;
+ char *end;
int len, seq = 0;
- int error = 0;
-
- if (buflen < 1)
- goto Elong;

rcu_read_lock();
restart:
dentry = d;
end = p;
len = buflen;
- /* Get '/' right */
- retval = end-1;
- *retval = '/';
read_seqbegin_or_lock(&rename_lock, &seq);
while (!IS_ROOT(dentry)) {
const struct dentry *parent = dentry->d_parent;

prefetch(parent);
- error = prepend_name(&end, &len, &dentry->d_name);
- if (error)
+ if (unlikely(prepend_name(&end, &len, &dentry->d_name) < 0))
break;

- retval = end;
dentry = parent;
}
if (!(seq & 1))
@@ -363,11 +354,9 @@ static char *__dentry_path(const struct dentry *d, char *p, int buflen)
goto restart;
}
done_seqretry(&rename_lock, seq);
- if (error)
- goto Elong;
- return retval;
-Elong:
- return ERR_PTR(-ENAMETOOLONG);
+ if (len == buflen)
+ prepend(&end, &len, "/", 1);
+ return len >= 0 ? end : ERR_PTR(-ENAMETOOLONG);
}

char *dentry_path_raw(const struct dentry *dentry, char *buf, int buflen)
--
2.11.0


2021-05-19 18:57:10

by Al Viro

[permalink] [raw]
Subject: [PATCH 12/14] d_path: prepend_path(): lift the inner loop into a new helper

... and leave the rename_lock/mount_lock handling in prepend_path()
itself

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 77 ++++++++++++++++++++++++++++++-------------------------------
1 file changed, 38 insertions(+), 39 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index 9a0356cc98d3..ba629879a4bf 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -68,6 +68,42 @@ static bool prepend_name(struct prepend_buffer *p, const struct qstr *name)
return true;
}

+static int __prepend_path(const struct dentry *dentry, const struct mount *mnt,
+ const struct path *root, struct prepend_buffer *p)
+{
+ while (dentry != root->dentry || &mnt->mnt != root->mnt) {
+ const struct dentry *parent = READ_ONCE(dentry->d_parent);
+
+ if (dentry == mnt->mnt.mnt_root) {
+ struct mount *m = READ_ONCE(mnt->mnt_parent);
+ struct mnt_namespace *mnt_ns;
+
+ if (likely(mnt != m)) {
+ dentry = READ_ONCE(mnt->mnt_mountpoint);
+ mnt = m;
+ continue;
+ }
+ /* Global root */
+ mnt_ns = READ_ONCE(mnt->mnt_ns);
+ /* open-coded is_mounted() to use local mnt_ns */
+ if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
+ return 1; // absolute root
+ else
+ return 2; // detached or not attached yet
+ }
+
+ if (unlikely(dentry == parent))
+ /* Escaped? */
+ return 3;
+
+ prefetch(parent);
+ if (!prepend_name(p, &dentry->d_name))
+ break;
+ dentry = parent;
+ }
+ return 0;
+}
+
/**
* prepend_path - Prepend path string to a buffer
* @path: the dentry/vfsmount to report
@@ -89,11 +125,9 @@ static int prepend_path(const struct path *path,
const struct path *root,
struct prepend_buffer *p)
{
- struct dentry *dentry;
- struct mount *mnt;
- int error = 0;
unsigned seq, m_seq = 0;
struct prepend_buffer b;
+ int error;

rcu_read_lock();
restart_mnt:
@@ -102,43 +136,8 @@ static int prepend_path(const struct path *path,
rcu_read_lock();
restart:
b = *p;
- error = 0;
- dentry = path->dentry;
- mnt = real_mount(path->mnt);
read_seqbegin_or_lock(&rename_lock, &seq);
- while (dentry != root->dentry || &mnt->mnt != root->mnt) {
- struct dentry * parent;
-
- if (dentry == mnt->mnt.mnt_root || IS_ROOT(dentry)) {
- struct mount *parent = READ_ONCE(mnt->mnt_parent);
- struct mnt_namespace *mnt_ns;
-
- /* Escaped? */
- if (dentry != mnt->mnt.mnt_root) {
- error = 3;
- break;
- }
- /* Global root? */
- if (mnt != parent) {
- dentry = READ_ONCE(mnt->mnt_mountpoint);
- mnt = parent;
- continue;
- }
- mnt_ns = READ_ONCE(mnt->mnt_ns);
- /* open-coded is_mounted() to use local mnt_ns */
- if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
- error = 1; // absolute root
- else
- error = 2; // detached or not attached yet
- break;
- }
- parent = dentry->d_parent;
- prefetch(parent);
- if (!prepend_name(&b, &dentry->d_name))
- break;
-
- dentry = parent;
- }
+ error = __prepend_path(path->dentry, real_mount(path->mnt), root, &b);
if (!(seq & 1))
rcu_read_unlock();
if (need_seqretry(&rename_lock, seq)) {
--
2.11.0


2021-05-19 18:57:11

by Al Viro

[permalink] [raw]
Subject: [PATCH 09/14] d_path: introduce struct prepend_buffer

We've a lot of places where we have pairs of form (pointer to end
of buffer, amount of space left in front of that). These sit in pairs of
variables located next to each other and usually passed by reference.
Turn those into instances of new type (struct prepend_buffer) and pass
reference to the pair instead of pairs of references to its fields.

Declared and initialized by DECLARE_BUFFER(name, buf, buflen).

extract_string(prepend_buffer) returns the buffer contents if
no overflow has happened, ERR_PTR(ENAMETOOLONG) otherwise.
All places where we used to have that boilerplate converted to use
of that helper.

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 142 ++++++++++++++++++++++++++++++++----------------------------
1 file changed, 75 insertions(+), 67 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index 83db83446afd..06e93dd031bf 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -8,12 +8,26 @@
#include <linux/prefetch.h>
#include "mount.h"

-static void prepend(char **buffer, int *buflen, const char *str, int namelen)
+struct prepend_buffer {
+ char *buf;
+ int len;
+};
+#define DECLARE_BUFFER(__name, __buf, __len) \
+ struct prepend_buffer __name = {.buf = __buf + __len, .len = __len}
+
+static char *extract_string(struct prepend_buffer *p)
{
- *buflen -= namelen;
- if (likely(*buflen >= 0)) {
- *buffer -= namelen;
- memcpy(*buffer, str, namelen);
+ if (likely(p->len >= 0))
+ return p->buf;
+ return ERR_PTR(-ENAMETOOLONG);
+}
+
+static void prepend(struct prepend_buffer *p, const char *str, int namelen)
+{
+ p->len -= namelen;
+ if (likely(p->len >= 0)) {
+ p->buf -= namelen;
+ memcpy(p->buf, str, namelen);
}
}

@@ -34,22 +48,22 @@ static void prepend(char **buffer, int *buflen, const char *str, int namelen)
*
* Load acquire is needed to make sure that we see that terminating NUL.
*/
-static bool prepend_name(char **buffer, int *buflen, const struct qstr *name)
+static bool prepend_name(struct prepend_buffer *p, const struct qstr *name)
{
const char *dname = smp_load_acquire(&name->name); /* ^^^ */
u32 dlen = READ_ONCE(name->len);
- char *p;
+ char *s;

- *buflen -= dlen + 1;
- if (unlikely(*buflen < 0))
+ p->len -= dlen + 1;
+ if (unlikely(p->len < 0))
return false;
- p = *buffer -= dlen + 1;
- *p++ = '/';
+ s = p->buf -= dlen + 1;
+ *s++ = '/';
while (dlen--) {
char c = *dname++;
if (!c)
break;
- *p++ = c;
+ *s++ = c;
}
return true;
}
@@ -73,15 +87,14 @@ static bool prepend_name(char **buffer, int *buflen, const struct qstr *name)
*/
static int prepend_path(const struct path *path,
const struct path *root,
- char **buffer, int *buflen)
+ struct prepend_buffer *p)
{
struct dentry *dentry;
struct vfsmount *vfsmnt;
struct mount *mnt;
int error = 0;
unsigned seq, m_seq = 0;
- char *bptr;
- int blen;
+ struct prepend_buffer b;

rcu_read_lock();
restart_mnt:
@@ -89,8 +102,7 @@ static int prepend_path(const struct path *path,
seq = 0;
rcu_read_lock();
restart:
- bptr = *buffer;
- blen = *buflen;
+ b = *p;
error = 0;
dentry = path->dentry;
vfsmnt = path->mnt;
@@ -105,8 +117,7 @@ static int prepend_path(const struct path *path,

/* Escaped? */
if (dentry != vfsmnt->mnt_root) {
- bptr = *buffer;
- blen = *buflen;
+ b = *p;
error = 3;
break;
}
@@ -127,7 +138,7 @@ static int prepend_path(const struct path *path,
}
parent = dentry->d_parent;
prefetch(parent);
- if (!prepend_name(&bptr, &blen, &dentry->d_name))
+ if (!prepend_name(&b, &dentry->d_name))
break;

dentry = parent;
@@ -148,11 +159,10 @@ static int prepend_path(const struct path *path,
}
done_seqretry(&mount_lock, m_seq);

- if (blen == *buflen)
- prepend(&bptr, &blen, "/", 1);
+ if (b.len == p->len)
+ prepend(&b, "/", 1);

- *buffer = bptr;
- *buflen = blen;
+ *p = b;
return error;
}

@@ -176,24 +186,24 @@ char *__d_path(const struct path *path,
const struct path *root,
char *buf, int buflen)
{
- char *res = buf + buflen;
+ DECLARE_BUFFER(b, buf, buflen);

- prepend(&res, &buflen, "", 1);
- if (prepend_path(path, root, &res, &buflen) > 0)
+ prepend(&b, "", 1);
+ if (prepend_path(path, root, &b) > 0)
return NULL;
- return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);
+ return extract_string(&b);
}

char *d_absolute_path(const struct path *path,
char *buf, int buflen)
{
struct path root = {};
- char *res = buf + buflen;
+ DECLARE_BUFFER(b, buf, buflen);

- prepend(&res, &buflen, "", 1);
- if (prepend_path(path, &root, &res, &buflen) > 1)
+ prepend(&b, "", 1);
+ if (prepend_path(path, &root, &b) > 1)
return ERR_PTR(-EINVAL);
- return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);
+ return extract_string(&b);
}

static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
@@ -224,7 +234,7 @@ static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
*/
char *d_path(const struct path *path, char *buf, int buflen)
{
- char *res = buf + buflen;
+ DECLARE_BUFFER(b, buf, buflen);
struct path root;

/*
@@ -245,13 +255,13 @@ char *d_path(const struct path *path, char *buf, int buflen)
rcu_read_lock();
get_fs_root_rcu(current->fs, &root);
if (unlikely(d_unlinked(path->dentry)))
- prepend(&res, &buflen, " (deleted)", 11);
+ prepend(&b, " (deleted)", 11);
else
- prepend(&res, &buflen, "", 1);
- prepend_path(path, &root, &res, &buflen);
+ prepend(&b, "", 1);
+ prepend_path(path, &root, &b);
rcu_read_unlock();

- return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);
+ return extract_string(&b);
}
EXPORT_SYMBOL(d_path);

@@ -278,36 +288,34 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,

char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
{
- char *end = buffer + buflen;
+ DECLARE_BUFFER(b, buffer, buflen);
/* these dentries are never renamed, so d_lock is not needed */
- prepend(&end, &buflen, " (deleted)", 11);
- prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len);
- prepend(&end, &buflen, "/", 1);
- return buflen >= 0 ? end : ERR_PTR(-ENAMETOOLONG);
+ prepend(&b, " (deleted)", 11);
+ prepend(&b, dentry->d_name.name, dentry->d_name.len);
+ prepend(&b, "/", 1);
+ return extract_string(&b);
}

/*
* Write full pathname from the root of the filesystem into the buffer.
*/
-static char *__dentry_path(const struct dentry *d, char *p, int buflen)
+static char *__dentry_path(const struct dentry *d, struct prepend_buffer *p)
{
const struct dentry *dentry;
- char *end;
- int len, seq = 0;
+ struct prepend_buffer b;
+ int seq = 0;

rcu_read_lock();
restart:
dentry = d;
- end = p;
- len = buflen;
+ b = *p;
read_seqbegin_or_lock(&rename_lock, &seq);
while (!IS_ROOT(dentry)) {
const struct dentry *parent = dentry->d_parent;

prefetch(parent);
- if (!prepend_name(&end, &len, &dentry->d_name))
+ if (!prepend_name(&b, &dentry->d_name))
break;
-
dentry = parent;
}
if (!(seq & 1))
@@ -317,28 +325,29 @@ static char *__dentry_path(const struct dentry *d, char *p, int buflen)
goto restart;
}
done_seqretry(&rename_lock, seq);
- if (len == buflen)
- prepend(&end, &len, "/", 1);
- return len >= 0 ? end : ERR_PTR(-ENAMETOOLONG);
+ if (b.len == p->len)
+ prepend(&b, "/", 1);
+ return extract_string(&b);
}

char *dentry_path_raw(const struct dentry *dentry, char *buf, int buflen)
{
- char *p = buf + buflen;
- prepend(&p, &buflen, "", 1);
- return __dentry_path(dentry, p, buflen);
+ DECLARE_BUFFER(b, buf, buflen);
+
+ prepend(&b, "", 1);
+ return __dentry_path(dentry, &b);
}
EXPORT_SYMBOL(dentry_path_raw);

char *dentry_path(const struct dentry *dentry, char *buf, int buflen)
{
- char *p = buf + buflen;
+ DECLARE_BUFFER(b, buf, buflen);

if (unlikely(d_unlinked(dentry)))
- prepend(&p, &buflen, "//deleted", 10);
+ prepend(&b, "//deleted", 10);
else
- prepend(&p, &buflen, "", 1);
- return __dentry_path(dentry, p, buflen);
+ prepend(&b, "", 1);
+ return __dentry_path(dentry, &b);
}

static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
@@ -386,24 +395,23 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
error = -ENOENT;
if (!d_unlinked(pwd.dentry)) {
unsigned long len;
- char *cwd = page + PATH_MAX;
- int buflen = PATH_MAX;
+ DECLARE_BUFFER(b, page, PATH_MAX);

- prepend(&cwd, &buflen, "", 1);
- if (prepend_path(&pwd, &root, &cwd, &buflen) > 0)
- prepend(&cwd, &buflen, "(unreachable)", 13);
+ prepend(&b, "", 1);
+ if (prepend_path(&pwd, &root, &b) > 0)
+ prepend(&b, "(unreachable)", 13);
rcu_read_unlock();

- if (buflen < 0) {
+ if (b.len < 0) {
error = -ENAMETOOLONG;
goto out;
}

error = -ERANGE;
- len = PATH_MAX + page - cwd;
+ len = PATH_MAX - b.len;
if (len <= size) {
error = len;
- if (copy_to_user(buf, cwd, len))
+ if (copy_to_user(buf, b.buf, len))
error = -EFAULT;
}
} else {
--
2.11.0


2021-05-19 18:58:30

by Al Viro

[permalink] [raw]
Subject: [PATCH 07/14] d_path: lift -ENAMETOOLONG handling into callers of prepend_path()

The only negative value ever returned by prepend_path() is -ENAMETOOLONG
and callers can recognize that situation (overflow) by looking at the
sign of buflen. Lift that into the callers; we already have the
same logics (buf if buflen is non-negative, ERR_PTR(-ENAMETOOLONG) otherwise)
in several places and that'll become a new primitive several commits down
the road.

Make prepend_path() return 0 instead of -ENAMETOOLONG. That makes for
saner calling conventions (0/1/2/3/-ENAMETOOLONG is obnoxious) and
callers actually get simpler, especially once the aforementioned
primitive gets added.

In prepend_path() itself we switch prepending the / (in case of
empty path) to use of prepend() - no need to open-code that, compiler
will do the right thing. It's exactly the same logics as in
__dentry_path().

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 39 +++++++++++----------------------------
1 file changed, 11 insertions(+), 28 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index 72b8087aaf9c..327cc3744554 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -127,8 +127,7 @@ static int prepend_path(const struct path *path,
}
parent = dentry->d_parent;
prefetch(parent);
- error = prepend_name(&bptr, &blen, &dentry->d_name);
- if (error)
+ if (unlikely(prepend_name(&bptr, &blen, &dentry->d_name) < 0))
break;

dentry = parent;
@@ -149,12 +148,9 @@ static int prepend_path(const struct path *path,
}
done_seqretry(&mount_lock, m_seq);

- if (error >= 0 && bptr == *buffer) {
- if (--blen < 0)
- error = -ENAMETOOLONG;
- else
- *--bptr = '/';
- }
+ if (blen == *buflen)
+ prepend(&bptr, &blen, "/", 1);
+
*buffer = bptr;
*buflen = blen;
return error;
@@ -181,16 +177,11 @@ char *__d_path(const struct path *path,
char *buf, int buflen)
{
char *res = buf + buflen;
- int error;

prepend(&res, &buflen, "", 1);
- error = prepend_path(path, root, &res, &buflen);
-
- if (error < 0)
- return ERR_PTR(error);
- if (error > 0)
+ if (prepend_path(path, root, &res, &buflen) > 0)
return NULL;
- return res;
+ return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);
}

char *d_absolute_path(const struct path *path,
@@ -198,16 +189,11 @@ char *d_absolute_path(const struct path *path,
{
struct path root = {};
char *res = buf + buflen;
- int error;

prepend(&res, &buflen, "", 1);
- error = prepend_path(path, &root, &res, &buflen);
-
- if (error > 1)
- error = -EINVAL;
- if (error < 0)
- return ERR_PTR(error);
- return res;
+ if (prepend_path(path, &root, &res, &buflen) > 1)
+ return ERR_PTR(-EINVAL);
+ return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);
}

static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
@@ -240,7 +226,6 @@ char *d_path(const struct path *path, char *buf, int buflen)
{
char *res = buf + buflen;
struct path root;
- int error;

/*
* We have various synthetic filesystems that never get mounted. On
@@ -263,12 +248,10 @@ char *d_path(const struct path *path, char *buf, int buflen)
prepend(&res, &buflen, " (deleted)", 11);
else
prepend(&res, &buflen, "", 1);
- error = prepend_path(path, &root, &res, &buflen);
+ prepend_path(path, &root, &res, &buflen);
rcu_read_unlock();

- if (error < 0)
- res = ERR_PTR(error);
- return res;
+ return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);
}
EXPORT_SYMBOL(d_path);

--
2.11.0


2021-05-19 18:59:34

by Al Viro

[permalink] [raw]
Subject: [PATCH 05/14] getcwd(2): saner logics around prepend_path() call

The only negative value that might get returned by prepend_path() is
-ENAMETOOLONG, and that happens only on overflow. The same goes for
prepend_unreachable(). Overflow is detectable by observing negative
buflen, so we can simplify the control flow around the prepend_path()
call. Expand prepend_unreachable(), while we are at it - that's the
only caller.

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 17 ++++-------------
1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index 7f3fac544bbb..311d43287572 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -211,11 +211,6 @@ char *d_absolute_path(const struct path *path,
return res;
}

-static int prepend_unreachable(char **buffer, int *buflen)
-{
- return prepend(buffer, buflen, "(unreachable)", 13);
-}
-
static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
{
unsigned seq;
@@ -414,17 +409,13 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
int buflen = PATH_MAX;

prepend(&cwd, &buflen, "", 1);
- error = prepend_path(&pwd, &root, &cwd, &buflen);
+ if (prepend_path(&pwd, &root, &cwd, &buflen) > 0)
+ prepend(&cwd, &buflen, "(unreachable)", 13);
rcu_read_unlock();

- if (error < 0)
+ if (buflen < 0) {
+ error = -ENAMETOOLONG;
goto out;
-
- /* Unreachable from current root */
- if (error > 0) {
- error = prepend_unreachable(&cwd, &buflen);
- if (error)
- goto out;
}

error = -ERANGE;
--
2.11.0


2021-05-19 18:59:47

by Al Viro

[permalink] [raw]
Subject: [PATCH 13/14] d_path: prepend_path() is unlikely to return non-zero

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index ba629879a4bf..8a9cd44f6689 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -187,7 +187,7 @@ char *__d_path(const struct path *path,
DECLARE_BUFFER(b, buf, buflen);

prepend(&b, "", 1);
- if (prepend_path(path, root, &b) > 0)
+ if (unlikely(prepend_path(path, root, &b) > 0))
return NULL;
return extract_string(&b);
}
@@ -199,7 +199,7 @@ char *d_absolute_path(const struct path *path,
DECLARE_BUFFER(b, buf, buflen);

prepend(&b, "", 1);
- if (prepend_path(path, &root, &b) > 1)
+ if (unlikely(prepend_path(path, &root, &b) > 1))
return ERR_PTR(-EINVAL);
return extract_string(&b);
}
@@ -396,7 +396,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
DECLARE_BUFFER(b, page, PATH_MAX);

prepend(&b, "", 1);
- if (prepend_path(&pwd, &root, &b) > 0)
+ if (unlikely(prepend_path(&pwd, &root, &b) > 0))
prepend(&b, "(unreachable)", 13);
rcu_read_unlock();

--
2.11.0


2021-05-19 18:59:48

by Al Viro

[permalink] [raw]
Subject: [PATCH 06/14] d_path: don't bother with return value of prepend()

Only simple_dname() checks it, and there we can simply do those
calls and check for overflow (by looking of negative buflen)
in the end.

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index 311d43287572..72b8087aaf9c 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -8,14 +8,13 @@
#include <linux/prefetch.h>
#include "mount.h"

-static int prepend(char **buffer, int *buflen, const char *str, int namelen)
+static void prepend(char **buffer, int *buflen, const char *str, int namelen)
{
*buflen -= namelen;
- if (*buflen < 0)
- return -ENAMETOOLONG;
- *buffer -= namelen;
- memcpy(*buffer, str, namelen);
- return 0;
+ if (likely(*buflen >= 0)) {
+ *buffer -= namelen;
+ memcpy(*buffer, str, namelen);
+ }
}

/**
@@ -298,11 +297,10 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
{
char *end = buffer + buflen;
/* these dentries are never renamed, so d_lock is not needed */
- if (prepend(&end, &buflen, " (deleted)", 11) ||
- prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
- prepend(&end, &buflen, "/", 1))
- end = ERR_PTR(-ENAMETOOLONG);
- return end;
+ prepend(&end, &buflen, " (deleted)", 11);
+ prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len);
+ prepend(&end, &buflen, "/", 1);
+ return buflen >= 0 ? end : ERR_PTR(-ENAMETOOLONG);
}

/*
--
2.11.0


2021-05-19 18:59:55

by Al Viro

[permalink] [raw]
Subject: [PATCH 11/14] d_path: prepend_path(): lift resetting b in case when we'd return 3 out of loop

preparation to extracting the loop into helper (next commit)

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index 3836f5d0b023..9a0356cc98d3 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -115,7 +115,6 @@ static int prepend_path(const struct path *path,

/* Escaped? */
if (dentry != mnt->mnt.mnt_root) {
- b = *p;
error = 3;
break;
}
@@ -156,6 +155,9 @@ static int prepend_path(const struct path *path,
}
done_seqretry(&mount_lock, m_seq);

+ if (unlikely(error == 3))
+ b = *p;
+
if (b.len == p->len)
prepend(&b, "/", 1);

--
2.11.0


2021-05-19 19:06:50

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCHSET] d_path cleanups

On Tue, May 18, 2021 at 2:44 PM Al Viro <[email protected]> wrote:
>
> Here's what I've got for carve-up of cleanups.

Thanks, these all look logical to me.

I only read through the individual patches, I didn't test or check the
end result, but it all looked like good sane cleanups.

Linus

2021-05-19 19:21:54

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH 12/14] d_path: prepend_path(): lift the inner loop into a new helper

On Wed, May 19, 2021 at 12:48:59AM +0000, Al Viro wrote:
> ... and leave the rename_lock/mount_lock handling in prepend_path()
> itself

...

> + if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
> + return 1; // absolute root
> + else
> + return 2; // detached or not attached yet

Would it be slightly better to read

if (IS_ERR_OR_NULL(mnt_ns) || is_anon_ns(mnt_ns))
return 2; // detached or not attached yet
else
return 1; // absolute root

?

Oh, I have noticed that it's in the original piece of code (perhaps separate
change if we ever need it?).


--
With Best Regards,
Andy Shevchenko



2021-05-19 20:14:13

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 12/14] d_path: prepend_path(): lift the inner loop into a new helper

On Wed, May 19, 2021 at 11:07:04AM +0300, Andy Shevchenko wrote:
> On Wed, May 19, 2021 at 12:48:59AM +0000, Al Viro wrote:
> > ... and leave the rename_lock/mount_lock handling in prepend_path()
> > itself
>
> ...
>
> > + if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
> > + return 1; // absolute root
> > + else
> > + return 2; // detached or not attached yet
>
> Would it be slightly better to read
>
> if (IS_ERR_OR_NULL(mnt_ns) || is_anon_ns(mnt_ns))
> return 2; // detached or not attached yet
> else
> return 1; // absolute root
>
> ?
>
> Oh, I have noticed that it's in the original piece of code (perhaps separate
> change if we ever need it?).

The real readability problem here is not the negations. There are 4 possible
states for vfsmount encoded via ->mnt_ns:
1) not attached to any tree, kept alive by refcount alone.
->mnt_ns == NULL.
2) long-term unattached. Not a part of any mount tree, but we have
a known holder for it and until that's gone (making ->mnt_ns NULL), refcount
is guaranteed to remain positive. pipe_mnt is an example of such.
->mnt_ns == MNT_NS_INTERNAL, which is encoded as ERR_PTR(-1), thus the use of
IS_ERR_OR_NULL here (something I'd normally taken out and shot - use of that
primitive is a sign of lousy API or of a cargo-culted "defensive programming").
3) part of a temporary mount tree; not in anyone's namespace.
->mnt_ns points the tree in question, ->mnt_ns->seq == 0.
4) belongs to someone's namespace. ->mnt_ns points to that,
->mnt_ns->seq != 0. That's what we are looking for here.

It's kludges all the way down ;-/ Note that temporary tree can't become
a normal one or vice versa - mounts can get transferred to normal namespace,
but they will see ->mnt_ns reassigned to that. IOW, ->mnt_ns->seq can't
get changed without a change to ->mnt_ns. I suspect that the right way
to handle that would be to have that state stored as explicit flags.

All mounts are created (and destroyed) in state (1); state changes:
commit_tree() - (1) or (3) to (3) or (4)
umount_tree() - (3) or (4) to (1)
clone_private_mount() - (1) to (2)
open_detached_copy() - (1) to (3)
copy_mnt_ns() - (1) to (4)
mount_subtree() - (1) to (3)
fsmount() - (1) to (3)
init_mount_tree() - (1) to (4)
kern_mount() - (1) to (2)
kern_unmount{,_array}() - (2) to (1)

commit_tree() has a pathological call chain that has it
attach stuff to temporary tree; that's basically automount by lookup in
temporary namespace. It can distinguish it from the usual (adding to
normal namespace) by looking at the state of mountpoint we are attaching
to - or simply describe all cases as "(1) or (3) to whatever state the
mountpoint is".

One really hot path where we check (1) vs. (2,3,4) is
mntput_no_expire(), which is the initial reason behind the current
representation. However, read from ->mnt_flags is just as cheap as
that from ->mnt_ns and the same reasons that make READ_ONCE()
legitimate there would apply to ->mnt_flags as well.

We can't reuse MNT_INTERNAL for that, more's the pity -
it's used to mark the mounts (kern_mount()-created, mostly) that
need to destroyed synchronously on the final mntput(), with no
task_work_add() allowed (think of module_init() failing halfway through,
with kern_unmount() done to destroy the internal mounts already created;
we *really* don't want to delay that filesystem shutdown until insmod(2)
heads out to userland). Another headache is in LSM shite, as usual...

Anyway, sorting that out is definitely a separate story.

2021-05-19 21:10:09

by Al Viro

[permalink] [raw]
Subject: [PATCH 08/14] d_path: make prepend_name() boolean

It returns only 0 or -ENAMETOOLONG and both callers only check if
the result is negative. Might as well return true on success and
false on failure...

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index 327cc3744554..83db83446afd 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -34,15 +34,15 @@ static void prepend(char **buffer, int *buflen, const char *str, int namelen)
*
* Load acquire is needed to make sure that we see that terminating NUL.
*/
-static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
+static bool prepend_name(char **buffer, int *buflen, const struct qstr *name)
{
const char *dname = smp_load_acquire(&name->name); /* ^^^ */
u32 dlen = READ_ONCE(name->len);
char *p;

*buflen -= dlen + 1;
- if (*buflen < 0)
- return -ENAMETOOLONG;
+ if (unlikely(*buflen < 0))
+ return false;
p = *buffer -= dlen + 1;
*p++ = '/';
while (dlen--) {
@@ -51,7 +51,7 @@ static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
break;
*p++ = c;
}
- return 0;
+ return true;
}

/**
@@ -127,7 +127,7 @@ static int prepend_path(const struct path *path,
}
parent = dentry->d_parent;
prefetch(parent);
- if (unlikely(prepend_name(&bptr, &blen, &dentry->d_name) < 0))
+ if (!prepend_name(&bptr, &blen, &dentry->d_name))
break;

dentry = parent;
@@ -305,7 +305,7 @@ static char *__dentry_path(const struct dentry *d, char *p, int buflen)
const struct dentry *parent = dentry->d_parent;

prefetch(parent);
- if (unlikely(prepend_name(&end, &len, &dentry->d_name) < 0))
+ if (!prepend_name(&end, &len, &dentry->d_name))
break;

dentry = parent;
--
2.11.0


2021-05-19 21:11:24

by Al Viro

[permalink] [raw]
Subject: [PATCH 02/14] d_path: saner calling conventions for __dentry_path()

1) lift NUL-termination into the callers
2) pass pointer to the end of buffer instead of that to beginning.

(1) allows to simplify dentry_path() - we don't need to play silly
games with restoring the leading / of "//deleted" after __dentry_path()
would've overwritten it with NUL.

We also do not need to check if (either) prepend() in there fails -
if the buffer is not large enough, we'll end with negative buflen
after prepend() and __dentry_path() will return the right value
(ERR_PTR(-ENAMETOOLONG)) just fine.

Signed-off-by: Al Viro <[email protected]>
---
fs/d_path.c | 33 +++++++++++++--------------------
1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/fs/d_path.c b/fs/d_path.c
index 01df5dfa1f88..1a1cf05e7780 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -326,22 +326,21 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
/*
* Write full pathname from the root of the filesystem into the buffer.
*/
-static char *__dentry_path(const struct dentry *d, char *buf, int buflen)
+static char *__dentry_path(const struct dentry *d, char *p, int buflen)
{
const struct dentry *dentry;
char *end, *retval;
int len, seq = 0;
int error = 0;

- if (buflen < 2)
+ if (buflen < 1)
goto Elong;

rcu_read_lock();
restart:
dentry = d;
- end = buf + buflen;
+ end = p;
len = buflen;
- prepend(&end, &len, "", 1);
/* Get '/' right */
retval = end-1;
*retval = '/';
@@ -373,27 +372,21 @@ static char *__dentry_path(const struct dentry *d, char *buf, int buflen)

char *dentry_path_raw(const struct dentry *dentry, char *buf, int buflen)
{
- return __dentry_path(dentry, buf, buflen);
+ char *p = buf + buflen;
+ prepend(&p, &buflen, "", 1);
+ return __dentry_path(dentry, p, buflen);
}
EXPORT_SYMBOL(dentry_path_raw);

char *dentry_path(const struct dentry *dentry, char *buf, int buflen)
{
- char *p = NULL;
- char *retval;
-
- if (d_unlinked(dentry)) {
- p = buf + buflen;
- if (prepend(&p, &buflen, "//deleted", 10) != 0)
- goto Elong;
- buflen++;
- }
- retval = __dentry_path(dentry, buf, buflen);
- if (!IS_ERR(retval) && p)
- *p = '/'; /* restore '/' overriden with '\0' */
- return retval;
-Elong:
- return ERR_PTR(-ENAMETOOLONG);
+ char *p = buf + buflen;
+
+ if (unlikely(d_unlinked(dentry)))
+ prepend(&p, &buflen, "//deleted", 10);
+ else
+ prepend(&p, &buflen, "", 1);
+ return __dentry_path(dentry, p, buflen);
}

static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
--
2.11.0


2021-05-20 09:16:24

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 08/14] d_path: make prepend_name() boolean

Hi Al

> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:49 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCH 08/14] d_path: make prepend_name() boolean
>
> It returns only 0 or -ENAMETOOLONG and both callers only check if
> the result is negative. Might as well return true on success and
> false on failure...
>
> Signed-off-by: Al Viro <[email protected]>
> ---
> fs/d_path.c | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/fs/d_path.c b/fs/d_path.c
> index 327cc3744554..83db83446afd 100644
> --- a/fs/d_path.c
> +++ b/fs/d_path.c
> @@ -34,15 +34,15 @@ static void prepend(char **buffer, int *buflen, const
> char *str, int namelen)
> *
> * Load acquire is needed to make sure that we see that terminating NUL.
> */
> -static int prepend_name(char **buffer, int *buflen, const struct qstr
> *name)
> +static bool prepend_name(char **buffer, int *buflen, const struct qstr
> *name)
> {
> const char *dname = smp_load_acquire(&name->name); /* ^^^ */
> u32 dlen = READ_ONCE(name->len);
> char *p;
>
> *buflen -= dlen + 1;
> - if (*buflen < 0)
> - return -ENAMETOOLONG;
> + if (unlikely(*buflen < 0))
> + return false;

I don't object to this patch itself.
Just wonder whether we need to relax the check condition of "*buflen < 0" ?

Given that in vsnprintf code path, sometimes the *buflen is < 0.

Please see https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/vsprintf.c#n2698

--
Cheers,

Justin (Jia He)
> p = *buffer -= dlen + 1;
> *p++ = '/';
> while (dlen--) {
> @@ -51,7 +51,7 @@ static int prepend_name(char **buffer, int *buflen, const
> struct qstr *name)
> break;
> *p++ = c;
> }
> - return 0;
> + return true;
> }
>
> /**
> @@ -127,7 +127,7 @@ static int prepend_path(const struct path *path,
> }
> parent = dentry->d_parent;
> prefetch(parent);
> - if (unlikely(prepend_name(&bptr, &blen, &dentry->d_name) < 0))
> + if (!prepend_name(&bptr, &blen, &dentry->d_name))
> break;
>
> dentry = parent;
> @@ -305,7 +305,7 @@ static char *__dentry_path(const struct dentry *d, char
> *p, int buflen)
> const struct dentry *parent = dentry->d_parent;
>
> prefetch(parent);
> - if (unlikely(prepend_name(&end, &len, &dentry->d_name) < 0))
> + if (!prepend_name(&end, &len, &dentry->d_name))
> break;
>
> dentry = parent;
> --
> 2.11.0

IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

2021-05-20 09:23:48

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH 08/14] d_path: make prepend_name() boolean

On Thu, May 20, 2021 at 09:12:34AM +0000, Justin He wrote:

> IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

Fix it.

--
With Best Regards,
Andy Shevchenko


2021-05-21 14:34:56

by Petr Mladek

[permalink] [raw]
Subject: Re: [PATCH 08/14] d_path: make prepend_name() boolean

On Thu 2021-05-20 09:12:34, Justin He wrote:
> Hi Al
>
> > -----Original Message-----
> > From: Al Viro <[email protected]> On Behalf Of Al Viro
> > Sent: Wednesday, May 19, 2021 8:49 AM
> > To: Linus Torvalds <[email protected]>
> > Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> > Rostedt <[email protected]>; Sergey Senozhatsky
> > <[email protected]>; Andy Shevchenko
> > <[email protected]>; Rasmus Villemoes
> > <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> > Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> > Borntraeger <[email protected]>; Eric W . Biederman
> > <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> > Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> > Eric Biggers <[email protected]>; Ahmed S. Darwish
> > <[email protected]>; open list:DOCUMENTATION <linux-
> > [email protected]>; Linux Kernel Mailing List <linux-
> > [email protected]>; linux-s390 <[email protected]>; linux-
> > fsdevel <[email protected]>
> > Subject: [PATCH 08/14] d_path: make prepend_name() boolean
> >
> > It returns only 0 or -ENAMETOOLONG and both callers only check if
> > the result is negative. Might as well return true on success and
> > false on failure...
> >
> > Signed-off-by: Al Viro <[email protected]>
> > ---
> > fs/d_path.c | 12 ++++++------
> > 1 file changed, 6 insertions(+), 6 deletions(-)
> >
> > diff --git a/fs/d_path.c b/fs/d_path.c
> > index 327cc3744554..83db83446afd 100644
> > --- a/fs/d_path.c
> > +++ b/fs/d_path.c
> > @@ -34,15 +34,15 @@ static void prepend(char **buffer, int *buflen, const
> > char *str, int namelen)
> > *
> > * Load acquire is needed to make sure that we see that terminating NUL.
> > */
> > -static int prepend_name(char **buffer, int *buflen, const struct qstr
> > *name)
> > +static bool prepend_name(char **buffer, int *buflen, const struct qstr
> > *name)
> > {
> > const char *dname = smp_load_acquire(&name->name); /* ^^^ */
> > u32 dlen = READ_ONCE(name->len);
> > char *p;
> >
> > *buflen -= dlen + 1;
> > - if (*buflen < 0)
> > - return -ENAMETOOLONG;
> > + if (unlikely(*buflen < 0))
> > + return false;
>
> I don't object to this patch itself.
> Just wonder whether we need to relax the check condition of "*buflen < 0" ?
>
> Given that in vsnprintf code path, sometimes the *buflen is < 0.
>
> Please see https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/vsprintf.c#n2698

IMHO, the patch is fine. It is likely some misunderstanding.
The above link points to:

2693 str = buf;
2694 end = buf + size;
2695
2696 /* Make sure end is always >= buf */
2697 if (end < buf) {
2698 end = ((void *)-1);
2699 size = end - buf;
2700 }

"end" points right behind the end of the buffer. It is later
used instead of the buffer size. The above code handles a potential
overflow of "buf + size". I causes that "end" will be 0xffffffff
in case of the overflow.

That said. vsnprintf() returns the number of characters which would
be generated for the given input. But only the "size" is written.
This require copying the characters one by one.

It is useful to see how many characters were lost. But I am not sure
if this ever worked for the dentry functions.

Best Regards,
Petr

2021-05-21 19:26:22

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 08/14] d_path: make prepend_name() boolean

On Thu, May 20, 2021 at 04:53:38PM +0200, Petr Mladek wrote:

> That said. vsnprintf() returns the number of characters which would
> be generated for the given input. But only the "size" is written.
> This require copying the characters one by one.

Not really - that's more of avoiding trouble if the sucker gets renamed
right under prepend_name(). Note that we have no way to guarantee that
length and string pointer come from the same moment. This looking for
NUL is about protection againts stepping off the end of allocated object.

2021-06-22 14:03:47

by Justin He

[permalink] [raw]
Subject: RE: [PATCHSET] d_path cleanups



> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:43 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Al Viro
> <[email protected]>; Heiko Carstens <[email protected]>; Vasily Gorbik
> <[email protected]>; Christian Borntraeger <[email protected]>; Eric
> W . Biederman <[email protected]>; Darrick J. Wong
> <[email protected]>; Peter Zijlstra (Intel) <[email protected]>;
> Ira Weiny <[email protected]>; Eric Biggers <[email protected]>; Ahmed
> S. Darwish <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCHSET] d_path cleanups

For the whole patch series, I once tested several cases, most of them are
Related to new '%pD' behavior:
1. print '%pD' with full path of ext4 file
2. mount a ext4 filesystem upon a ext4 filesystem, and print the file
with '%pD'
3. all test_print selftests, including the new '%14pD' '%-14pD'
4. kasnprintf

In summary, please feel free to add/not_add:
Tested-by: Jia He <[email protected]>

--
Cheers,
Justin (Jia He)


2021-06-23 13:30:41

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 09/14] d_path: introduce struct prepend_buffer

Hi Al

> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:49 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCH 09/14] d_path: introduce struct prepend_buffer
>
> We've a lot of places where we have pairs of form (pointer to end
> of buffer, amount of space left in front of that). These sit in pairs of
> variables located next to each other and usually passed by reference.
> Turn those into instances of new type (struct prepend_buffer) and pass
> reference to the pair instead of pairs of references to its fields.
>
> Declared and initialized by DECLARE_BUFFER(name, buf, buflen).
>
> extract_string(prepend_buffer) returns the buffer contents if
> no overflow has happened, ERR_PTR(ENAMETOOLONG) otherwise.
> All places where we used to have that boilerplate converted to use
> of that helper.
>
> Signed-off-by: Al Viro <[email protected]>
> ---
> fs/d_path.c | 142 ++++++++++++++++++++++++++++++++-----------------------
> -----
> 1 file changed, 75 insertions(+), 67 deletions(-)
>
> diff --git a/fs/d_path.c b/fs/d_path.c
> index 83db83446afd..06e93dd031bf 100644
> --- a/fs/d_path.c
> +++ b/fs/d_path.c
> @@ -8,12 +8,26 @@
> #include <linux/prefetch.h>
> #include "mount.h"
>
> -static void prepend(char **buffer, int *buflen, const char *str, int
> namelen)
> +struct prepend_buffer {
> + char *buf;
> + int len;
> +};
> +#define DECLARE_BUFFER(__name, __buf, __len) \
> + struct prepend_buffer __name = {.buf = __buf + __len, .len = __len}
> +
> +static char *extract_string(struct prepend_buffer *p)
> {
> - *buflen -= namelen;
> - if (likely(*buflen >= 0)) {
> - *buffer -= namelen;
> - memcpy(*buffer, str, namelen);
> + if (likely(p->len >= 0))
> + return p->buf;
> + return ERR_PTR(-ENAMETOOLONG);
> +}
> +
> +static void prepend(struct prepend_buffer *p, const char *str, int
> namelen)
> +{
> + p->len -= namelen;
> + if (likely(p->len >= 0)) {
> + p->buf -= namelen;
> + memcpy(p->buf, str, namelen);
> }
> }
>
> @@ -34,22 +48,22 @@ static void prepend(char **buffer, int *buflen, const
> char *str, int namelen)
> *
> * Load acquire is needed to make sure that we see that terminating NUL.
> */
> -static bool prepend_name(char **buffer, int *buflen, const struct qstr
> *name)
> +static bool prepend_name(struct prepend_buffer *p, const struct qstr
> *name)

Please also change the parameter description in the comments of
prepend_name(), otherwise "make C=1 W=1" will report warnings.


--
Cheers,
Justin (Jia He)


> {
> const char *dname = smp_load_acquire(&name->name); /* ^^^ */
> u32 dlen = READ_ONCE(name->len);
> - char *p;
> + char *s;
>
> - *buflen -= dlen + 1;
> - if (unlikely(*buflen < 0))
> + p->len -= dlen + 1;
> + if (unlikely(p->len < 0))
> return false;
> - p = *buffer -= dlen + 1;
> - *p++ = '/';
> + s = p->buf -= dlen + 1;
> + *s++ = '/';
> while (dlen--) {
> char c = *dname++;
> if (!c)
> break;
> - *p++ = c;
> + *s++ = c;
> }
> return true;
> }
> @@ -73,15 +87,14 @@ static bool prepend_name(char **buffer, int *buflen,
> const struct qstr *name)
> */
> static int prepend_path(const struct path *path,
> const struct path *root,
> - char **buffer, int *buflen)
> + struct prepend_buffer *p)
> {
> struct dentry *dentry;
> struct vfsmount *vfsmnt;
> struct mount *mnt;
> int error = 0;
> unsigned seq, m_seq = 0;
> - char *bptr;
> - int blen;
> + struct prepend_buffer b;
>
> rcu_read_lock();
> restart_mnt:
> @@ -89,8 +102,7 @@ static int prepend_path(const struct path *path,
> seq = 0;
> rcu_read_lock();
> restart:
> - bptr = *buffer;
> - blen = *buflen;
> + b = *p;
> error = 0;
> dentry = path->dentry;
> vfsmnt = path->mnt;
> @@ -105,8 +117,7 @@ static int prepend_path(const struct path *path,
>
> /* Escaped? */
> if (dentry != vfsmnt->mnt_root) {
> - bptr = *buffer;
> - blen = *buflen;
> + b = *p;
> error = 3;
> break;
> }
> @@ -127,7 +138,7 @@ static int prepend_path(const struct path *path,
> }
> parent = dentry->d_parent;
> prefetch(parent);
> - if (!prepend_name(&bptr, &blen, &dentry->d_name))
> + if (!prepend_name(&b, &dentry->d_name))
> break;
>
> dentry = parent;
> @@ -148,11 +159,10 @@ static int prepend_path(const struct path *path,
> }
> done_seqretry(&mount_lock, m_seq);
>
> - if (blen == *buflen)
> - prepend(&bptr, &blen, "/", 1);
> + if (b.len == p->len)
> + prepend(&b, "/", 1);
>
> - *buffer = bptr;
> - *buflen = blen;
> + *p = b;
> return error;
> }
>
> @@ -176,24 +186,24 @@ char *__d_path(const struct path *path,
> const struct path *root,
> char *buf, int buflen)
> {
> - char *res = buf + buflen;
> + DECLARE_BUFFER(b, buf, buflen);
>
> - prepend(&res, &buflen, "", 1);
> - if (prepend_path(path, root, &res, &buflen) > 0)
> + prepend(&b, "", 1);
> + if (prepend_path(path, root, &b) > 0)
> return NULL;
> - return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);
> + return extract_string(&b);
> }
>
> char *d_absolute_path(const struct path *path,
> char *buf, int buflen)
> {
> struct path root = {};
> - char *res = buf + buflen;
> + DECLARE_BUFFER(b, buf, buflen);
>
> - prepend(&res, &buflen, "", 1);
> - if (prepend_path(path, &root, &res, &buflen) > 1)
> + prepend(&b, "", 1);
> + if (prepend_path(path, &root, &b) > 1)
> return ERR_PTR(-EINVAL);
> - return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);
> + return extract_string(&b);
> }
>
> static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
> @@ -224,7 +234,7 @@ static void get_fs_root_rcu(struct fs_struct *fs,
> struct path *root)
> */
> char *d_path(const struct path *path, char *buf, int buflen)
> {
> - char *res = buf + buflen;
> + DECLARE_BUFFER(b, buf, buflen);
> struct path root;
>
> /*
> @@ -245,13 +255,13 @@ char *d_path(const struct path *path, char *buf, int
> buflen)
> rcu_read_lock();
> get_fs_root_rcu(current->fs, &root);
> if (unlikely(d_unlinked(path->dentry)))
> - prepend(&res, &buflen, " (deleted)", 11);
> + prepend(&b, " (deleted)", 11);
> else
> - prepend(&res, &buflen, "", 1);
> - prepend_path(path, &root, &res, &buflen);
> + prepend(&b, "", 1);
> + prepend_path(path, &root, &b);
> rcu_read_unlock();
>
> - return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);
> + return extract_string(&b);
> }
> EXPORT_SYMBOL(d_path);
>
> @@ -278,36 +288,34 @@ char *dynamic_dname(struct dentry *dentry, char
> *buffer, int buflen,
>
> char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
> {
> - char *end = buffer + buflen;
> + DECLARE_BUFFER(b, buffer, buflen);
> /* these dentries are never renamed, so d_lock is not needed */
> - prepend(&end, &buflen, " (deleted)", 11);
> - prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len);
> - prepend(&end, &buflen, "/", 1);
> - return buflen >= 0 ? end : ERR_PTR(-ENAMETOOLONG);
> + prepend(&b, " (deleted)", 11);
> + prepend(&b, dentry->d_name.name, dentry->d_name.len);
> + prepend(&b, "/", 1);
> + return extract_string(&b);
> }
>
> /*
> * Write full pathname from the root of the filesystem into the buffer.
> */
> -static char *__dentry_path(const struct dentry *d, char *p, int buflen)
> +static char *__dentry_path(const struct dentry *d, struct prepend_buffer
> *p)
> {
> const struct dentry *dentry;
> - char *end;
> - int len, seq = 0;
> + struct prepend_buffer b;
> + int seq = 0;
>
> rcu_read_lock();
> restart:
> dentry = d;
> - end = p;
> - len = buflen;
> + b = *p;
> read_seqbegin_or_lock(&rename_lock, &seq);
> while (!IS_ROOT(dentry)) {
> const struct dentry *parent = dentry->d_parent;
>
> prefetch(parent);
> - if (!prepend_name(&end, &len, &dentry->d_name))
> + if (!prepend_name(&b, &dentry->d_name))
> break;
> -
> dentry = parent;
> }
> if (!(seq & 1))
> @@ -317,28 +325,29 @@ static char *__dentry_path(const struct dentry *d,
> char *p, int buflen)
> goto restart;
> }
> done_seqretry(&rename_lock, seq);
> - if (len == buflen)
> - prepend(&end, &len, "/", 1);
> - return len >= 0 ? end : ERR_PTR(-ENAMETOOLONG);
> + if (b.len == p->len)
> + prepend(&b, "/", 1);
> + return extract_string(&b);
> }
>
> char *dentry_path_raw(const struct dentry *dentry, char *buf, int buflen)
> {
> - char *p = buf + buflen;
> - prepend(&p, &buflen, "", 1);
> - return __dentry_path(dentry, p, buflen);
> + DECLARE_BUFFER(b, buf, buflen);
> +
> + prepend(&b, "", 1);
> + return __dentry_path(dentry, &b);
> }
> EXPORT_SYMBOL(dentry_path_raw);
>
> char *dentry_path(const struct dentry *dentry, char *buf, int buflen)
> {
> - char *p = buf + buflen;
> + DECLARE_BUFFER(b, buf, buflen);
>
> if (unlikely(d_unlinked(dentry)))
> - prepend(&p, &buflen, "//deleted", 10);
> + prepend(&b, "//deleted", 10);
> else
> - prepend(&p, &buflen, "", 1);
> - return __dentry_path(dentry, p, buflen);
> + prepend(&b, "", 1);
> + return __dentry_path(dentry, &b);
> }
>
> static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path
> *root,
> @@ -386,24 +395,23 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned
> long, size)
> error = -ENOENT;
> if (!d_unlinked(pwd.dentry)) {
> unsigned long len;
> - char *cwd = page + PATH_MAX;
> - int buflen = PATH_MAX;
> + DECLARE_BUFFER(b, page, PATH_MAX);
>
> - prepend(&cwd, &buflen, "", 1);
> - if (prepend_path(&pwd, &root, &cwd, &buflen) > 0)
> - prepend(&cwd, &buflen, "(unreachable)", 13);
> + prepend(&b, "", 1);
> + if (prepend_path(&pwd, &root, &b) > 0)
> + prepend(&b, "(unreachable)", 13);
> rcu_read_unlock();
>
> - if (buflen < 0) {
> + if (b.len < 0) {
> error = -ENAMETOOLONG;
> goto out;
> }
>
> error = -ERANGE;
> - len = PATH_MAX + page - cwd;
> + len = PATH_MAX - b.len;
> if (len <= size) {
> error = len;
> - if (copy_to_user(buf, cwd, len))
> + if (copy_to_user(buf, b.buf, len))
> error = -EFAULT;
> }
> } else {
> --
> 2.11.0

IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

2021-06-24 06:07:17

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 01/14] d_path: "\0" is {0,0}, not {0}



> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:49 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCH 01/14] d_path: "\0" is {0,0}, not {0}
>
> Single-element array consisting of one NUL is spelled ""...
>
> Signed-off-by: Al Viro <[email protected]>

Reviewed-by: Jia He <[email protected]>

--
Cheers,
Justin (Jia He)


IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

2021-06-24 06:18:09

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 06/14] d_path: don't bother with return value of prepend()



> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:49 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCH 06/14] d_path: don't bother with return value of prepend()
>
> Only simple_dname() checks it, and there we can simply do those
> calls and check for overflow (by looking of negative buflen)
> in the end.
>
> Signed-off-by: Al Viro <[email protected]>

Reviewed-by: Jia He <[email protected]>

--
Cheers,
Justin (Jia He)
> ---
> fs/d_path.c | 20 +++++++++-----------
> 1 file changed, 9 insertions(+), 11 deletions(-)
>
> diff --git a/fs/d_path.c b/fs/d_path.c
> index 311d43287572..72b8087aaf9c 100644
> --- a/fs/d_path.c
> +++ b/fs/d_path.c
> @@ -8,14 +8,13 @@
> #include <linux/prefetch.h>
> #include "mount.h"
>
> -static int prepend(char **buffer, int *buflen, const char *str, int
> namelen)
> +static void prepend(char **buffer, int *buflen, const char *str, int
> namelen)
> {
> *buflen -= namelen;
> - if (*buflen < 0)
> - return -ENAMETOOLONG;
> - *buffer -= namelen;
> - memcpy(*buffer, str, namelen);
> - return 0;
> + if (likely(*buflen >= 0)) {
> + *buffer -= namelen;
> + memcpy(*buffer, str, namelen);
> + }
> }
>
> /**
> @@ -298,11 +297,10 @@ char *simple_dname(struct dentry *dentry, char
> *buffer, int buflen)
> {
> char *end = buffer + buflen;
> /* these dentries are never renamed, so d_lock is not needed */
> - if (prepend(&end, &buflen, " (deleted)", 11) ||
> - prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len)
> ||
> - prepend(&end, &buflen, "/", 1))
> - end = ERR_PTR(-ENAMETOOLONG);
> - return end;
> + prepend(&end, &buflen, " (deleted)", 11);
> + prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len);
> + prepend(&end, &buflen, "/", 1);
> + return buflen >= 0 ? end : ERR_PTR(-ENAMETOOLONG);
> }
>
> /*
> --
> 2.11.0

IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

Subject: Re: [PATCH 09/14] d_path: introduce struct prepend_buffer

Hi folks,

<snip>

>> We've a lot of places where we have pairs of form (pointer to end
>> of buffer, amount of space left in front of that). These sit in pairs of
>> variables located next to each other and usually passed by reference.
>> Turn those into instances of new type (struct prepend_buffer) and pass
>> reference to the pair instead of pairs of references to its fields.
>>
>> Declared and initialized by DECLARE_BUFFER(name, buf, buflen).
>>
>> extract_string(prepend_buffer) returns the buffer contents if
>> no overflow has happened, ERR_PTR(ENAMETOOLONG) otherwise.
>> All places where we used to have that boilerplate converted to use
>> of that helper.

this smells like a generic enough thing to go into lib, doesn't it ?


--mtx

--
---
Hinweis: unverschlüsselte E-Mails können leicht abgehört und manipuliert
werden ! Für eine vertrauliche Kommunikation senden Sie bitte ihren
GPG/PGP-Schlüssel zu.
---
Enrico Weigelt, metux IT consult
Free software and Linux embedded engineering
[email protected] -- +49-151-27565287

2021-06-25 00:44:40

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 09/14] d_path: introduce struct prepend_buffer

Hi Enrico

> -----Original Message-----
> From: Enrico Weigelt, metux IT consult <[email protected]>
> Sent: Thursday, June 24, 2021 5:30 PM
> To: Justin He <[email protected]>; Al Viro <[email protected]>; Linus
> Torvalds <[email protected]>
> Cc: Petr Mladek <[email protected]>; Steven Rostedt <[email protected]>;
> Sergey Senozhatsky <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: Re: [PATCH 09/14] d_path: introduce struct prepend_buffer
>
> Hi folks,
>
> <snip>
>
> >> We've a lot of places where we have pairs of form (pointer to
> end
> >> of buffer, amount of space left in front of that). These sit in pairs
> of
> >> variables located next to each other and usually passed by reference.
> >> Turn those into instances of new type (struct prepend_buffer) and pass
> >> reference to the pair instead of pairs of references to its fields.
> >>
> >> Declared and initialized by DECLARE_BUFFER(name, buf, buflen).
> >>
> >> extract_string(prepend_buffer) returns the buffer contents if
> >> no overflow has happened, ERR_PTR(ENAMETOOLONG) otherwise.
> >> All places where we used to have that boilerplate converted to use
> >> of that helper.
>
> this smells like a generic enough thing to go into lib, doesn't it ?
>
Maybe, but the struct prepend_buffer also needs to be moved into lib.
Is it necessary? Is there any other user of struct prepend_buffer?

--
Cheers,
Justin (Jia He)



2021-06-25 08:02:46

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 13/14] d_path: prepend_path() is unlikely to return non-zero

Hi Al

> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:49 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCH 13/14] d_path: prepend_path() is unlikely to return non-
> zero
>
> Signed-off-by: Al Viro <[email protected]>
> ---
> fs/d_path.c | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/fs/d_path.c b/fs/d_path.c
> index ba629879a4bf..8a9cd44f6689 100644
> --- a/fs/d_path.c
> +++ b/fs/d_path.c
> @@ -187,7 +187,7 @@ char *__d_path(const struct path *path,
> DECLARE_BUFFER(b, buf, buflen);
>
> prepend(&b, "", 1);
> - if (prepend_path(path, root, &b) > 0)
> + if (unlikely(prepend_path(path, root, &b) > 0))
> return NULL;
> return extract_string(&b);
> }
> @@ -199,7 +199,7 @@ char *d_absolute_path(const struct path *path,
> DECLARE_BUFFER(b, buf, buflen);
>
> prepend(&b, "", 1);
> - if (prepend_path(path, &root, &b) > 1)
> + if (unlikely(prepend_path(path, &root, &b) > 1))
> return ERR_PTR(-EINVAL);
> return extract_string(&b);
> }
> @@ -396,7 +396,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned
> long, size)
> DECLARE_BUFFER(b, page, PATH_MAX);
>
> prepend(&b, "", 1);
> - if (prepend_path(&pwd, &root, &b) > 0)
> + if (unlikely(prepend_path(&pwd, &root, &b) > 0))
> prepend(&b, "(unreachable)", 13);
> rcu_read_unlock();
>
> --
> 2.11.0
I tested it with a debugging patch as follows:
diff --git a/fs/d_path.c b/fs/d_path.c
index aea254ac9e1f..8eecd04be7bb 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -210,6 +210,7 @@ static int prepend_path(const struct path *path,
b = *p;
read_seqbegin_or_lock(&rename_lock, &seq);
error = __prepend_path(path->dentry, real_mount(path->mnt), root, &b);
+ printk("prepend=%d",error);
if (!(seq & 1))
rcu_read_unlock();
if (need_seqretry(&rename_lock, seq)) {

Then the result seems a little different:
root@entos-ampere-02:~# dmesg |grep prepend=1 |wc -l
7417
root@entos-ampere-02:~# dmesg |grep prepend=0 |wc -l
772

The kernel is 5.13.0-rc2+ + this series + my '%pD' series

Any thoughts?

---
Cheers,
Jia He (Justin)


IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

2021-06-25 09:20:10

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 07/14] d_path: lift -ENAMETOOLONG handling into callers of prepend_path()

Hi Al

> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:49 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCH 07/14] d_path: lift -ENAMETOOLONG handling into callers of
> prepend_path()
>
> The only negative value ever returned by prepend_path() is -ENAMETOOLONG
> and callers can recognize that situation (overflow) by looking at the
> sign of buflen. Lift that into the callers; we already have the
> same logics (buf if buflen is non-negative, ERR_PTR(-ENAMETOOLONG)
> otherwise)
> in several places and that'll become a new primitive several commits down
> the road.
>
> Make prepend_path() return 0 instead of -ENAMETOOLONG. That makes for
> saner calling conventions (0/1/2/3/-ENAMETOOLONG is obnoxious) and
> callers actually get simpler, especially once the aforementioned
> primitive gets added.
>
> In prepend_path() itself we switch prepending the / (in case of
> empty path) to use of prepend() - no need to open-code that, compiler
> will do the right thing. It's exactly the same logics as in
> __dentry_path().
>
> Signed-off-by: Al Viro <[email protected]>
> ---
> fs/d_path.c | 39 +++++++++++----------------------------
> 1 file changed, 11 insertions(+), 28 deletions(-)
>
> diff --git a/fs/d_path.c b/fs/d_path.c
> index 72b8087aaf9c..327cc3744554 100644
> --- a/fs/d_path.c
> +++ b/fs/d_path.c
> @@ -127,8 +127,7 @@ static int prepend_path(const struct path *path,
> }
> parent = dentry->d_parent;
> prefetch(parent);
> - error = prepend_name(&bptr, &blen, &dentry->d_name);
> - if (error)
> + if (unlikely(prepend_name(&bptr, &blen, &dentry->d_name) < 0))
> break;
>
> dentry = parent;
> @@ -149,12 +148,9 @@ static int prepend_path(const struct path *path,
> }
> done_seqretry(&mount_lock, m_seq);
>
> - if (error >= 0 && bptr == *buffer) {
> - if (--blen < 0)
> - error = -ENAMETOOLONG;
> - else
> - *--bptr = '/';
> - }
> + if (blen == *buflen)
> + prepend(&bptr, &blen, "/", 1);
> +
> *buffer = bptr;
> *buflen = blen;
> return error;
> @@ -181,16 +177,11 @@ char *__d_path(const struct path *path,
> char *buf, int buflen)
> {
> char *res = buf + buflen;
> - int error;
>
> prepend(&res, &buflen, "", 1);
> - error = prepend_path(path, root, &res, &buflen);
> -
> - if (error < 0)
> - return ERR_PTR(error);
> - if (error > 0)
> + if (prepend_path(path, root, &res, &buflen) > 0)
> return NULL;
> - return res;
> + return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);
> }
>
> char *d_absolute_path(const struct path *path,
> @@ -198,16 +189,11 @@ char *d_absolute_path(const struct path *path,
> {
> struct path root = {};
> char *res = buf + buflen;
> - int error;
>
> prepend(&res, &buflen, "", 1);
> - error = prepend_path(path, &root, &res, &buflen);
> -
> - if (error > 1)
> - error = -EINVAL;
> - if (error < 0)
> - return ERR_PTR(error);
> - return res;
> + if (prepend_path(path, &root, &res, &buflen) > 1)
> + return ERR_PTR(-EINVAL);
> + return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);

This patch is *correct*.
But do you mind changing like:
if (buflen >= 0 || error == 1)
return res;
else
return ERR_PTR(-ENAMETOOLONG);

The reason why I comment here is that I will change the
prepend_name in __prepend_path to prepend_name_with_len.
The latter will go through all the dentries recursively instead
of returning false if p.len<0.
So (error == 1 && buflen < 0) is possible.

If you disagree, I will change it later in another single patch.


--
Cheers,
Justin (Jia He)


> }
>
> static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
> @@ -240,7 +226,6 @@ char *d_path(const struct path *path, char *buf, int
> buflen)
> {
> char *res = buf + buflen;
> struct path root;
> - int error;
>
> /*
> * We have various synthetic filesystems that never get mounted. On
> @@ -263,12 +248,10 @@ char *d_path(const struct path *path, char *buf, int
> buflen)
> prepend(&res, &buflen, " (deleted)", 11);
> else
> prepend(&res, &buflen, "", 1);
> - error = prepend_path(path, &root, &res, &buflen);
> + prepend_path(path, &root, &res, &buflen);
> rcu_read_unlock();
>
> - if (error < 0)
> - res = ERR_PTR(error);
> - return res;
> + return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);
> }
> EXPORT_SYMBOL(d_path);
>
> --
> 2.11.0

IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

2021-06-25 09:34:53

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 02/14] d_path: saner calling conventions for __dentry_path()

Hi Al

> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:49 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCH 02/14] d_path: saner calling conventions for __dentry_path()
>
> 1) lift NUL-termination into the callers
> 2) pass pointer to the end of buffer instead of that to beginning.
>
> (1) allows to simplify dentry_path() - we don't need to play silly
> games with restoring the leading / of "//deleted" after __dentry_path()
> would've overwritten it with NUL.
>
> We also do not need to check if (either) prepend() in there fails -
> if the buffer is not large enough, we'll end with negative buflen
> after prepend() and __dentry_path() will return the right value
> (ERR_PTR(-ENAMETOOLONG)) just fine.
>
> Signed-off-by: Al Viro <[email protected]>
> ---
> fs/d_path.c | 33 +++++++++++++--------------------
> 1 file changed, 13 insertions(+), 20 deletions(-)
>
> diff --git a/fs/d_path.c b/fs/d_path.c
> index 01df5dfa1f88..1a1cf05e7780 100644
> --- a/fs/d_path.c
> +++ b/fs/d_path.c
> @@ -326,22 +326,21 @@ char *simple_dname(struct dentry *dentry, char
> *buffer, int buflen)
> /*
> * Write full pathname from the root of the filesystem into the buffer.
> */
I suggest adding the comments to remind NUL terminator should be prepended
before invoking __dentry_path()


--
Cheers,
Justin (Jia He)


> -static char *__dentry_path(const struct dentry *d, char *buf, int buflen)
> +static char *__dentry_path(const struct dentry *d, char *p, int buflen)
> {
> const struct dentry *dentry;
> char *end, *retval;
> int len, seq = 0;
> int error = 0;
>
> - if (buflen < 2)
> + if (buflen < 1)
> goto Elong;
>
> rcu_read_lock();
> restart:
> dentry = d;
> - end = buf + buflen;
> + end = p;
> len = buflen;
> - prepend(&end, &len, "", 1);
> /* Get '/' right */
> retval = end-1;
> *retval = '/';
> @@ -373,27 +372,21 @@ static char *__dentry_path(const struct dentry *d,
> char *buf, int buflen)
>
> char *dentry_path_raw(const struct dentry *dentry, char *buf, int buflen)
> {
> - return __dentry_path(dentry, buf, buflen);
> + char *p = buf + buflen;
> + prepend(&p, &buflen, "", 1);
> + return __dentry_path(dentry, p, buflen);
> }
> EXPORT_SYMBOL(dentry_path_raw);
>
> char *dentry_path(const struct dentry *dentry, char *buf, int buflen)
> {
> - char *p = NULL;
> - char *retval;
> -
> - if (d_unlinked(dentry)) {
> - p = buf + buflen;
> - if (prepend(&p, &buflen, "//deleted", 10) != 0)
> - goto Elong;
> - buflen++;
> - }
> - retval = __dentry_path(dentry, buf, buflen);
> - if (!IS_ERR(retval) && p)
> - *p = '/'; /* restore '/' overriden with '\0' */
> - return retval;
> -Elong:
> - return ERR_PTR(-ENAMETOOLONG);
> + char *p = buf + buflen;
> +
> + if (unlikely(d_unlinked(dentry)))
> + prepend(&p, &buflen, "//deleted", 10);
> + else
> + prepend(&p, &buflen, "", 1);
> + return __dentry_path(dentry, p, buflen);
> }
>
> static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path
> *root,
> --
> 2.11.0

IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

2021-06-25 18:01:06

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 13/14] d_path: prepend_path() is unlikely to return non-zero

On Fri, Jun 25, 2021 at 08:00:49AM +0000, Justin He wrote:
> --- a/fs/d_path.c
> +++ b/fs/d_path.c
> @@ -210,6 +210,7 @@ static int prepend_path(const struct path *path,
> b = *p;
> read_seqbegin_or_lock(&rename_lock, &seq);
> error = __prepend_path(path->dentry, real_mount(path->mnt), root, &b);
> + printk("prepend=%d",error);
> if (!(seq & 1))
> rcu_read_unlock();
> if (need_seqretry(&rename_lock, seq)) {
>
> Then the result seems a little different:
> root@entos-ampere-02:~# dmesg |grep prepend=1 |wc -l
> 7417
> root@entos-ampere-02:~# dmesg |grep prepend=0 |wc -l
> 772
>
> The kernel is 5.13.0-rc2+ + this series + my '%pD' series
>
> Any thoughts?

On which loads? 1 here is "mount/dentry pair is in somebody
else's namespace or outside of the subtree we are chrooted
into". IOW, what's calling d_path() on your setup?

2021-06-28 03:30:19

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 13/14] d_path: prepend_path() is unlikely to return non-zero

Hi Al

> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Saturday, June 26, 2021 1:59 AM
> To: Justin He <[email protected]>
> Cc: Linus Torvalds <[email protected]>; Petr Mladek
> <[email protected]>; Steven Rostedt <[email protected]>; Sergey
> Senozhatsky <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: Re: [PATCH 13/14] d_path: prepend_path() is unlikely to return
> non-zero
>
> On Fri, Jun 25, 2021 at 08:00:49AM +0000, Justin He wrote:
> > --- a/fs/d_path.c
> > +++ b/fs/d_path.c
> > @@ -210,6 +210,7 @@ static int prepend_path(const struct path *path,
> > b = *p;
> > read_seqbegin_or_lock(&rename_lock, &seq);
> > error = __prepend_path(path->dentry, real_mount(path->mnt), root,
> &b);
> > + printk("prepend=%d",error);
> > if (!(seq & 1))
> > rcu_read_unlock();
> > if (need_seqretry(&rename_lock, seq)) {
> >
> > Then the result seems a little different:
> > root@entos-ampere-02:~# dmesg |grep prepend=1 |wc -l
> > 7417
> > root@entos-ampere-02:~# dmesg |grep prepend=0 |wc -l
> > 772
> >
> > The kernel is 5.13.0-rc2+ + this series + my '%pD' series
> >
> > Any thoughts?
>
> On which loads? 1 here is "mount/dentry pair is in somebody
> else's namespace or outside of the subtree we are chrooted
> into". IOW, what's calling d_path() on your setup?

No special loads, merely collecting the results after kernel boots up.

To narrow down, I tested your branch [1] *without* my '%pD' series:
[1] https://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git/log/?h=work.d_path

The result is the same after kernel boots up.

The call trace are as follows:
The prepend_path returns 1:
Call trace:
prepend_path+0x144/0x340
d_absolute_path+0x6c/0xb8
aa_path_name+0x1c0/0x3d8
profile_transition+0x90/0x908
apparmor_bprm_creds_for_exec+0x914/0xaf0
security_bprm_creds_for_exec+0x34/0x50
bprm_execve+0x178/0x668
do_execveat_common.isra.47+0x1b4/0x1c8
__arm64_sys_execve+0x44/0x58
invoke_syscall+0x54/0x110
el0_svc_common.constprop.2+0x5c/0xe0
do_el0_svc+0x34/0xa0
el0_svc+0x20/0x30
el0_sync_handler+0x88/0xb0
el0_sync+0x148/0x180

The prepend_path returns 0:
Call trace:
prepend_path+0x144/0x340
d_path+0x110/0x158
proc_pid_readlink+0xbc/0x1b8
vfs_readlink+0x14c/0x170
do_readlinkat+0x134/0x168
__arm64_sys_readlinkat+0x28/0x38
invoke_syscall+0x54/0x110
el0_svc_common.constprop.2+0x5c/0xe0
do_el0_svc+0x34/0xa0
el0_svc+0x20/0x30
el0_sync_handler+0x88/0xb0
el0_sync+0x148/0x180


IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

2021-06-28 04:16:50

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH 13/14] d_path: prepend_path() is unlikely to return non-zero

On Mon, Jun 28, 2021 at 03:28:19AM +0000, Justin He wrote:

> > On which loads? 1 here is "mount/dentry pair is in somebody
> > else's namespace or outside of the subtree we are chrooted
> > into". IOW, what's calling d_path() on your setup?
>
> No special loads, merely collecting the results after kernel boots up.
>
> To narrow down, I tested your branch [1] *without* my '%pD' series:
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git/log/?h=work.d_path
>
> The result is the same after kernel boots up.

IOW, you get 1 in call from d_absolute_path(). And the same commit has
- if (prepend_path(path, &root, &b) > 1)
+ if (unlikely(prepend_path(path, &root, &b) > 1))
there. What's the problem?

If you want to check mispredictions, put printks at the statements
under those if (unlikely(...)) and see how often do they trigger...

2021-06-28 04:43:25

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 13/14] d_path: prepend_path() is unlikely to return non-zero



> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:49 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCH 13/14] d_path: prepend_path() is unlikely to return non-
> zero
>
> Signed-off-by: Al Viro <[email protected]>
Reviewed-by: Jia He <[email protected]>

IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

2021-06-28 05:00:59

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 13/14] d_path: prepend_path() is unlikely to return non-zero

Hi Al

> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Monday, June 28, 2021 12:14 PM
> To: Justin He <[email protected]>
> Cc: Linus Torvalds <[email protected]>; Petr Mladek
> <[email protected]>; Steven Rostedt <[email protected]>; Sergey
> Senozhatsky <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: Re: [PATCH 13/14] d_path: prepend_path() is unlikely to return
> non-zero
>
> On Mon, Jun 28, 2021 at 03:28:19AM +0000, Justin He wrote:
>
> > > On which loads? 1 here is "mount/dentry pair is in somebody
> > > else's namespace or outside of the subtree we are chrooted
> > > into". IOW, what's calling d_path() on your setup?
> >
> > No special loads, merely collecting the results after kernel boots up.
> >
> > To narrow down, I tested your branch [1] *without* my '%pD' series:
> > [1]
> https://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git/log/?h=work.d_
> path
> >
> > The result is the same after kernel boots up.
>
> IOW, you get 1 in call from d_absolute_path(). And the same commit has
> - if (prepend_path(path, &root, &b) > 1)
> + if (unlikely(prepend_path(path, &root, &b) > 1))
> there. What's the problem?
>
Ah, sorry for the mistake.


--
Cheers,
Justin (Jia He)

> If you want to check mispredictions, put printks at the statements
> under those if (unlikely(...)) and see how often do they trigger...
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

2021-06-28 05:22:01

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 07/14] d_path: lift -ENAMETOOLONG handling into callers of prepend_path()



> -----Original Message-----
> From: Justin He
> Sent: Friday, June 25, 2021 5:18 PM
> To: Al Viro <[email protected]>; Linus Torvalds <torvalds@linux-
> foundation.org>
> Cc: Petr Mladek <[email protected]>; Steven Rostedt <[email protected]>;
> Sergey Senozhatsky <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: RE: [PATCH 07/14] d_path: lift -ENAMETOOLONG handling into callers
> of prepend_path()
>
> Hi Al
>
> > -----Original Message-----
> > From: Al Viro <[email protected]> On Behalf Of Al Viro
> > Sent: Wednesday, May 19, 2021 8:49 AM
> > To: Linus Torvalds <[email protected]>
> > Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> > Rostedt <[email protected]>; Sergey Senozhatsky
> > <[email protected]>; Andy Shevchenko
> > <[email protected]>; Rasmus Villemoes
> > <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> > Carstens <[email protected]>; Vasily Gorbik <[email protected]>;
> Christian
> > Borntraeger <[email protected]>; Eric W . Biederman
> > <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> > Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> > Eric Biggers <[email protected]>; Ahmed S. Darwish
> > <[email protected]>; open list:DOCUMENTATION <linux-
> > [email protected]>; Linux Kernel Mailing List <linux-
> > [email protected]>; linux-s390 <[email protected]>; linux-
> > fsdevel <[email protected]>
> > Subject: [PATCH 07/14] d_path: lift -ENAMETOOLONG handling into callers
> of
> > prepend_path()
> >
> > The only negative value ever returned by prepend_path() is -ENAMETOOLONG
> > and callers can recognize that situation (overflow) by looking at the
> > sign of buflen. Lift that into the callers; we already have the
> > same logics (buf if buflen is non-negative, ERR_PTR(-ENAMETOOLONG)
> > otherwise)
> > in several places and that'll become a new primitive several commits down
> > the road.
> >
> > Make prepend_path() return 0 instead of -ENAMETOOLONG. That makes for
> > saner calling conventions (0/1/2/3/-ENAMETOOLONG is obnoxious) and
> > callers actually get simpler, especially once the aforementioned
> > primitive gets added.
> >
> > In prepend_path() itself we switch prepending the / (in case of
> > empty path) to use of prepend() - no need to open-code that, compiler
> > will do the right thing. It's exactly the same logics as in
> > __dentry_path().
> >
> > Signed-off-by: Al Viro <[email protected]>
> > ---
> > fs/d_path.c | 39 +++++++++++----------------------------
> > 1 file changed, 11 insertions(+), 28 deletions(-)
> >
> > diff --git a/fs/d_path.c b/fs/d_path.c
> > index 72b8087aaf9c..327cc3744554 100644
> > --- a/fs/d_path.c
> > +++ b/fs/d_path.c
> > @@ -127,8 +127,7 @@ static int prepend_path(const struct path *path,
> > }
> > parent = dentry->d_parent;
> > prefetch(parent);
> > - error = prepend_name(&bptr, &blen, &dentry->d_name);
> > - if (error)
> > + if (unlikely(prepend_name(&bptr, &blen, &dentry->d_name) < 0))
> > break;
> >
> > dentry = parent;
> > @@ -149,12 +148,9 @@ static int prepend_path(const struct path *path,
> > }
> > done_seqretry(&mount_lock, m_seq);
> >
> > - if (error >= 0 && bptr == *buffer) {
> > - if (--blen < 0)
> > - error = -ENAMETOOLONG;
> > - else
> > - *--bptr = '/';
> > - }
> > + if (blen == *buflen)
> > + prepend(&bptr, &blen, "/", 1);
> > +
> > *buffer = bptr;
> > *buflen = blen;
> > return error;
> > @@ -181,16 +177,11 @@ char *__d_path(const struct path *path,
> > char *buf, int buflen)
> > {
> > char *res = buf + buflen;
> > - int error;
> >
> > prepend(&res, &buflen, "", 1);
> > - error = prepend_path(path, root, &res, &buflen);
> > -
> > - if (error < 0)
> > - return ERR_PTR(error);
> > - if (error > 0)
> > + if (prepend_path(path, root, &res, &buflen) > 0)
> > return NULL;
> > - return res;
> > + return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);
> > }
> >
> > char *d_absolute_path(const struct path *path,
> > @@ -198,16 +189,11 @@ char *d_absolute_path(const struct path *path,
> > {
> > struct path root = {};
> > char *res = buf + buflen;
> > - int error;
> >
> > prepend(&res, &buflen, "", 1);
> > - error = prepend_path(path, &root, &res, &buflen);
> > -
> > - if (error > 1)
> > - error = -EINVAL;
> > - if (error < 0)
> > - return ERR_PTR(error);
> > - return res;
> > + if (prepend_path(path, &root, &res, &buflen) > 1)
> > + return ERR_PTR(-EINVAL);
> > + return buflen >= 0 ? res : ERR_PTR(-ENAMETOOLONG);
>
> This patch is *correct*.
> But do you mind changing like:
> if (buflen >= 0 || error == 1)
> return res;
> else
> return ERR_PTR(-ENAMETOOLONG);
>
> The reason why I comment here is that I will change the
> prepend_name in __prepend_path to prepend_name_with_len.
> The latter will go through all the dentries recursively instead
> of returning false if p.len<0.
> So (error == 1 && buflen < 0) is possible.
Please ignore it, this is not relevant to this patch itself, I can
draft a new patch if it is needed.

Hence:
Reviewed-by: Jia He <[email protected]>

Subject: Re: [PATCH 09/14] d_path: introduce struct prepend_buffer

>> this smells like a generic enough thing to go into lib, doesn't it ?
>>
> Maybe, but the struct prepend_buffer also needs to be moved into lib.
> Is it necessary? Is there any other user of struct prepend_buffer?

Don't have a specific use case right now, but it smells like a pretty
generic string function. Is already having more than one user a hard
requirement for putting something into lib ?


--mtx

--
---
Hinweis: unverschlüsselte E-Mails können leicht abgehört und manipuliert
werden ! Für eine vertrauliche Kommunikation senden Sie bitte ihren
GPG/PGP-Schlüssel zu.
---
Enrico Weigelt, metux IT consult
Free software and Linux embedded engineering
[email protected] -- +49-151-27565287

2021-06-28 23:39:30

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH 09/14] d_path: introduce struct prepend_buffer

On Mon, Jun 28, 2021 at 06:42:48PM +0200, Enrico Weigelt, metux IT consult wrote:
> > > this smells like a generic enough thing to go into lib, doesn't it ?
> > >
> > Maybe, but the struct prepend_buffer also needs to be moved into lib.
> > Is it necessary? Is there any other user of struct prepend_buffer?
>
> Don't have a specific use case right now, but it smells like a pretty
> generic string function. Is already having more than one user a hard
> requirement for putting something into lib ?

Why it should be in the lib/? Do we have users of the same functionality
already? The rule of thumb is to avoid generalization without need.


--
With Best Regards,
Andy Shevchenko


2021-07-07 04:52:37

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 03/14] d_path: regularize handling of root dentry in __dentry_path()



> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:49 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCH 03/14] d_path: regularize handling of root dentry in
> __dentry_path()
>
> All path-forming primitives boil down to sequence of prepend_name()
> on dentries encountered along the way toward root. Each time we prepend
> / + dentry name to the buffer. Normally that does exactly what we want,
> but there's a corner case when we don't call prepend_name() at all (in case
> of __dentry_path() that happens if we are given root dentry). We obviously
> want to end up with "/", rather than "", so this corner case needs to be
> handled.
>
> __dentry_path() used to manually put '/' in the end of buffer before
> doing anything else, to be overwritten by the first call of prepend_name()
> if one happens and to be left in place if we don't call prepend_name() at
> all. That required manually checking that we had space in the buffer
> (prepend_name() and prepend() take care of such checks themselves) and lead
> to clumsy keeping track of return value.
>
> A better approach is to check if the main loop has added anything
> into the buffer and prepend "/" if it hasn't. A side benefit of using
> prepend()
> is that it does the right thing if we'd already run out of buffer, making
> the overflow-handling logics simpler.
>
> Signed-off-by: Al Viro <[email protected]>
Reviewed-by: Jia He <[email protected]>


--
Cheers,
Justin (Jia He)


IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

2021-07-07 04:54:05

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 02/14] d_path: saner calling conventions for __dentry_path()



> -----Original Message-----
> From: Justin He
> Sent: Friday, June 25, 2021 5:33 PM
> To: Al Viro <[email protected]>; Linus Torvalds <torvalds@linux-
> foundation.org>
> Cc: Petr Mladek <[email protected]>; Steven Rostedt <[email protected]>;
> Sergey Senozhatsky <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: RE: [PATCH 02/14] d_path: saner calling conventions for
> __dentry_path()
>
> Hi Al
>
> > -----Original Message-----
> > From: Al Viro <[email protected]> On Behalf Of Al Viro
> > Sent: Wednesday, May 19, 2021 8:49 AM
> > To: Linus Torvalds <[email protected]>
> > Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> > Rostedt <[email protected]>; Sergey Senozhatsky
> > <[email protected]>; Andy Shevchenko
> > <[email protected]>; Rasmus Villemoes
> > <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> > Carstens <[email protected]>; Vasily Gorbik <[email protected]>;
> Christian
> > Borntraeger <[email protected]>; Eric W . Biederman
> > <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> > Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> > Eric Biggers <[email protected]>; Ahmed S. Darwish
> > <[email protected]>; open list:DOCUMENTATION <linux-
> > [email protected]>; Linux Kernel Mailing List <linux-
> > [email protected]>; linux-s390 <[email protected]>; linux-
> > fsdevel <[email protected]>
> > Subject: [PATCH 02/14] d_path: saner calling conventions for
> __dentry_path()
> >
> > 1) lift NUL-termination into the callers
> > 2) pass pointer to the end of buffer instead of that to beginning.
> >
> > (1) allows to simplify dentry_path() - we don't need to play silly
> > games with restoring the leading / of "//deleted" after __dentry_path()
> > would've overwritten it with NUL.
> >
> > We also do not need to check if (either) prepend() in there fails -
> > if the buffer is not large enough, we'll end with negative buflen
> > after prepend() and __dentry_path() will return the right value
> > (ERR_PTR(-ENAMETOOLONG)) just fine.
> >
> > Signed-off-by: Al Viro <[email protected]>
> > ---
> > fs/d_path.c | 33 +++++++++++++--------------------
> > 1 file changed, 13 insertions(+), 20 deletions(-)
> >
> > diff --git a/fs/d_path.c b/fs/d_path.c
> > index 01df5dfa1f88..1a1cf05e7780 100644
> > --- a/fs/d_path.c
> > +++ b/fs/d_path.c
> > @@ -326,22 +326,21 @@ char *simple_dname(struct dentry *dentry, char
> > *buffer, int buflen)
> > /*
> > * Write full pathname from the root of the filesystem into the buffer.
> > */
> I suggest adding the comments to remind NUL terminator should be prepended
> before invoking __dentry_path()
>
Except for my suggestion about adding comments

Reviewed-by: Jia He <[email protected]>

2021-07-07 07:43:44

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 05/14] getcwd(2): saner logics around prepend_path() call



> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:49 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCH 05/14] getcwd(2): saner logics around prepend_path() call
>
> The only negative value that might get returned by prepend_path() is
> -ENAMETOOLONG, and that happens only on overflow. The same goes for
> prepend_unreachable(). Overflow is detectable by observing negative
> buflen, so we can simplify the control flow around the prepend_path()
> call. Expand prepend_unreachable(), while we are at it - that's the
> only caller.
>
> Signed-off-by: Al Viro <[email protected]>

Reviewed-by: Jia He <[email protected]>

--
Cheers,
Justin (Jia He)


2021-07-07 07:45:04

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 08/14] d_path: make prepend_name() boolean



> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:49 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCH 08/14] d_path: make prepend_name() boolean
>
> It returns only 0 or -ENAMETOOLONG and both callers only check if
> the result is negative. Might as well return true on success and
> false on failure...
>
> Signed-off-by: Al Viro <[email protected]>
> ---

Reviewed-by: Jia He <[email protected]>

--
Cheers,
Justin (Jia He)

2021-07-07 07:56:25

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 12/14] d_path: prepend_path(): lift the inner loop into a new helper



> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:49 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCH 12/14] d_path: prepend_path(): lift the inner loop into a
> new helper
>
> ... and leave the rename_lock/mount_lock handling in prepend_path()
> itself
>
> Signed-off-by: Al Viro <[email protected]>

Reviewed-by: Jia He <[email protected]>

--
Cheers,
Justin (Jia He)

2021-07-07 08:04:08

by Justin He

[permalink] [raw]
Subject: RE: [PATCH 14/14] getcwd(2): clean up error handling



> -----Original Message-----
> From: Al Viro <[email protected]> On Behalf Of Al Viro
> Sent: Wednesday, May 19, 2021 8:49 AM
> To: Linus Torvalds <[email protected]>
> Cc: Justin He <[email protected]>; Petr Mladek <[email protected]>; Steven
> Rostedt <[email protected]>; Sergey Senozhatsky
> <[email protected]>; Andy Shevchenko
> <[email protected]>; Rasmus Villemoes
> <[email protected]>; Jonathan Corbet <[email protected]>; Heiko
> Carstens <[email protected]>; Vasily Gorbik <[email protected]>; Christian
> Borntraeger <[email protected]>; Eric W . Biederman
> <[email protected]>; Darrick J. Wong <[email protected]>; Peter
> Zijlstra (Intel) <[email protected]>; Ira Weiny <[email protected]>;
> Eric Biggers <[email protected]>; Ahmed S. Darwish
> <[email protected]>; open list:DOCUMENTATION <linux-
> [email protected]>; Linux Kernel Mailing List <linux-
> [email protected]>; linux-s390 <[email protected]>; linux-
> fsdevel <[email protected]>
> Subject: [PATCH 14/14] getcwd(2): clean up error handling
>
> Signed-off-by: Al Viro <[email protected]>
> ---

Reviewed-by: Jia He <[email protected]>

--
Cheers,
Justin (Jia He)