This extends the current PR_SET_MDWE prctl arg with a bit to indicate
that the process doesn't want MDWE protection to propagate to children.
To implement this no-inherit mode, the tag in current->mm->flags must be
absent from MMF_INIT_MASK. This means that the encoding for "MDWE but
without inherit" is different in the prctl than in the mm flags. This
leads to a bit of bit-mangling in the prctl implementation.
Signed-off-by: Florent Revest <[email protected]>
---
include/linux/sched/coredump.h | 10 ++++++++++
include/uapi/linux/prctl.h | 1 +
kernel/fork.c | 2 +-
kernel/sys.c | 24 +++++++++++++++++++++---
tools/include/uapi/linux/prctl.h | 1 +
5 files changed, 34 insertions(+), 4 deletions(-)
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 0ee96ea7a0e9..11f5e3dacb4e 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -91,4 +91,14 @@ static inline int get_dumpable(struct mm_struct *mm)
MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK)
#define MMF_VM_MERGE_ANY 29
+#define MMF_HAS_MDWE_NO_INHERIT 30
+
+#define MMF_INIT_FLAGS(flags) ({ \
+ unsigned long new_flags = flags; \
+ if (new_flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) \
+ new_flags &= ~((1UL << MMF_HAS_MDWE) | \
+ (1UL << MMF_HAS_MDWE_NO_INHERIT)); \
+ new_flags & MMF_INIT_MASK; \
+})
+
#endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 6e9af6cbc950..dacbe824e7c3 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -284,6 +284,7 @@ struct prctl_mm_map {
/* Memory deny write / execute */
#define PR_SET_MDWE 65
# define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0)
+# define PR_MDWE_NO_INHERIT (1UL << 1)
#define PR_GET_MDWE 66
diff --git a/kernel/fork.c b/kernel/fork.c
index d17995934eb4..62d52ad99937 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1284,7 +1284,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
hugetlb_count_init(mm);
if (current->mm) {
- mm->flags = current->mm->flags & MMF_INIT_MASK;
+ mm->flags = MMF_INIT_FLAGS(current->mm->flags);
mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
} else {
mm->flags = default_dump_filter;
diff --git a/kernel/sys.c b/kernel/sys.c
index 339fee3eff6a..320eae3b12ab 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2368,9 +2368,25 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
if (arg3 || arg4 || arg5)
return -EINVAL;
- if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN))
+ if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT))
return -EINVAL;
+ /* NO_INHERIT only makes sense with REFUSE_EXEC_GAIN */
+ if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
+ return -EINVAL;
+
+ /* Can't gain NO_INHERIT from !NO_INHERIT */
+ if (bits & PR_MDWE_NO_INHERIT &&
+ test_bit(MMF_HAS_MDWE, ¤t->mm->flags) &&
+ !test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags))
+ return -EPERM;
+
+ if (bits & PR_MDWE_NO_INHERIT)
+ set_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags);
+ else if (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags)
+ && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
+ return -EPERM; /* Cannot unset the flag */
+
if (bits & PR_MDWE_REFUSE_EXEC_GAIN)
set_bit(MMF_HAS_MDWE, ¤t->mm->flags);
else if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags))
@@ -2385,8 +2401,10 @@ static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3,
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
- return test_bit(MMF_HAS_MDWE, ¤t->mm->flags) ?
- PR_MDWE_REFUSE_EXEC_GAIN : 0;
+ return (test_bit(MMF_HAS_MDWE, ¤t->mm->flags) ?
+ PR_MDWE_REFUSE_EXEC_GAIN : 0) |
+ (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags) ?
+ PR_MDWE_NO_INHERIT : 0);
}
static int prctl_get_auxv(void __user *addr, unsigned long len)
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 6e6563e97fef..f7448d99520c 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -284,6 +284,7 @@ struct prctl_mm_map {
/* Memory deny write / execute */
#define PR_SET_MDWE 65
# define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0)
+# define PR_MDWE_NO_INHERIT (1UL << 1)
#define PR_GET_MDWE 66
--
2.40.1.606.ga4b1b128d6-goog
On 17.05.23 17:03, Florent Revest wrote:
> This extends the current PR_SET_MDWE prctl arg with a bit to indicate
> that the process doesn't want MDWE protection to propagate to children.
>
> To implement this no-inherit mode, the tag in current->mm->flags must be
> absent from MMF_INIT_MASK. This means that the encoding for "MDWE but
> without inherit" is different in the prctl than in the mm flags. This
> leads to a bit of bit-mangling in the prctl implementation.
>
> Signed-off-by: Florent Revest <[email protected]>
> ---
> include/linux/sched/coredump.h | 10 ++++++++++
> include/uapi/linux/prctl.h | 1 +
> kernel/fork.c | 2 +-
> kernel/sys.c | 24 +++++++++++++++++++++---
> tools/include/uapi/linux/prctl.h | 1 +
> 5 files changed, 34 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
> index 0ee96ea7a0e9..11f5e3dacb4e 100644
> --- a/include/linux/sched/coredump.h
> +++ b/include/linux/sched/coredump.h
> @@ -91,4 +91,14 @@ static inline int get_dumpable(struct mm_struct *mm)
> MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK)
>
> #define MMF_VM_MERGE_ANY 29
> +#define MMF_HAS_MDWE_NO_INHERIT 30
> +
> +#define MMF_INIT_FLAGS(flags) ({ \
> + unsigned long new_flags = flags; \
> + if (new_flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) \
> + new_flags &= ~((1UL << MMF_HAS_MDWE) | \
> + (1UL << MMF_HAS_MDWE_NO_INHERIT)); \
> + new_flags & MMF_INIT_MASK; \
> +})
Why the desire for macros here? :)
We have a single user of MMF_INIT_FLAGS, why not inline or use a proper
inline function?
--
Thanks,
David / dhildenb
On Mon, May 22, 2023 at 11:01 AM David Hildenbrand <[email protected]> wrote:
>
> On 17.05.23 17:03, Florent Revest wrote:
> > +#define MMF_INIT_FLAGS(flags) ({ \
> > + unsigned long new_flags = flags; \
> > + if (new_flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) \
> > + new_flags &= ~((1UL << MMF_HAS_MDWE) | \
> > + (1UL << MMF_HAS_MDWE_NO_INHERIT)); \
> > + new_flags & MMF_INIT_MASK; \
> > +})
>
> Why the desire for macros here? :)
I just thought that's what the cool kids do nowadays ?! :)
Eh, I'm joking, I completely agree. Somehow this was suggested to me
in v1 as a macro and I didn't think of questioning that, but a static
inline function should be more readable indeed! I will fix this in v3.
> We have a single user of MMF_INIT_FLAGS, why not inline or use a proper
> inline function?
I have a slight preference for a separate function, so we don't spill
over too much of this logic in fork.c.
Thanks for the review David!
On Wed, May 17, 2023 at 05:03:20PM +0200, Florent Revest wrote:
> diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
> index 0ee96ea7a0e9..11f5e3dacb4e 100644
> --- a/include/linux/sched/coredump.h
> +++ b/include/linux/sched/coredump.h
> @@ -91,4 +91,14 @@ static inline int get_dumpable(struct mm_struct *mm)
> MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK)
>
> #define MMF_VM_MERGE_ANY 29
> +#define MMF_HAS_MDWE_NO_INHERIT 30
> +
> +#define MMF_INIT_FLAGS(flags) ({ \
> + unsigned long new_flags = flags; \
> + if (new_flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) \
> + new_flags &= ~((1UL << MMF_HAS_MDWE) | \
> + (1UL << MMF_HAS_MDWE_NO_INHERIT)); \
> + new_flags & MMF_INIT_MASK; \
> +})
A function is better indeed, not sure who came up with this macro idea ;).
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 339fee3eff6a..320eae3b12ab 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -2368,9 +2368,25 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
> if (arg3 || arg4 || arg5)
> return -EINVAL;
>
> - if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN))
> + if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT))
> return -EINVAL;
>
> + /* NO_INHERIT only makes sense with REFUSE_EXEC_GAIN */
> + if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
> + return -EINVAL;
> +
> + /* Can't gain NO_INHERIT from !NO_INHERIT */
> + if (bits & PR_MDWE_NO_INHERIT &&
> + test_bit(MMF_HAS_MDWE, ¤t->mm->flags) &&
> + !test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags))
> + return -EPERM;
> +
> + if (bits & PR_MDWE_NO_INHERIT)
> + set_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags);
> + else if (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags)
> + && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
> + return -EPERM; /* Cannot unset the flag */
Is this about not unsetting the MMF_HAS_MDWE bit? We already have a
check further down that covers this case.
Related to this, do we want to allow unsetting MMF_HAS_MDWE_NO_INHERIT?
It looks like it can't be unset but no error either. The above check,
IIUC, looks more like ensuring we don't clear MMF_HAS_MDWE.
Maybe we should tighten the logic here a bit and not allow any changes
after the initial flag setting:
current->mm->flags == 0, we allow:
bits == 0 or
bits == PR_MDWE_REFUSE_EXEC_GAIN or
bits == PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT
current->mm->flags != 0 (some bits were set), we only allow the exactly
the same bit combination or -EPERM.
So basically build the flags based on the PR_* input bits and compare
them with current->mm->flags when not 0, return -EPERM if different. I
think this preserves the ABI as we only have a single bit currently and
hopefully makes the logic here easier to parse.
> +
> if (bits & PR_MDWE_REFUSE_EXEC_GAIN)
> set_bit(MMF_HAS_MDWE, ¤t->mm->flags);
> else if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags))
> @@ -2385,8 +2401,10 @@ static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3,
> if (arg2 || arg3 || arg4 || arg5)
> return -EINVAL;
>
> - return test_bit(MMF_HAS_MDWE, ¤t->mm->flags) ?
> - PR_MDWE_REFUSE_EXEC_GAIN : 0;
> + return (test_bit(MMF_HAS_MDWE, ¤t->mm->flags) ?
> + PR_MDWE_REFUSE_EXEC_GAIN : 0) |
> + (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags) ?
> + PR_MDWE_NO_INHERIT : 0);
> }
Just personal preference, use explicit 'if' blocks and add bits to a
local variable variable than multiple ternary operators.
--
Catalin
On Tue, May 23, 2023 at 6:36 PM Catalin Marinas <[email protected]> wrote:
>
> On Wed, May 17, 2023 at 05:03:20PM +0200, Florent Revest wrote:
> > diff --git a/kernel/sys.c b/kernel/sys.c
> > index 339fee3eff6a..320eae3b12ab 100644
> > --- a/kernel/sys.c
> > +++ b/kernel/sys.c
> > @@ -2368,9 +2368,25 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
> > if (arg3 || arg4 || arg5)
> > return -EINVAL;
> >
> > - if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN))
> > + if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT))
> > return -EINVAL;
> >
> > + /* NO_INHERIT only makes sense with REFUSE_EXEC_GAIN */
> > + if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
> > + return -EINVAL;
> > +
> > + /* Can't gain NO_INHERIT from !NO_INHERIT */
> > + if (bits & PR_MDWE_NO_INHERIT &&
> > + test_bit(MMF_HAS_MDWE, ¤t->mm->flags) &&
> > + !test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags))
> > + return -EPERM;
> > +
> > + if (bits & PR_MDWE_NO_INHERIT)
> > + set_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags);
> > + else if (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags)
> > + && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
> > + return -EPERM; /* Cannot unset the flag */
Ugh, I had to proofread this 3 times to figure out what I was trying
to do, not great. :(
> Is this about not unsetting the MMF_HAS_MDWE bit? We already have a
> check further down that covers this case.
Actually, this was about not being able to unset _both_ NO_INHERIT and
HAS_MDWE (this would gain capabilities)... While still being able to
unset NO_INHERIT only (this reduces capabilities)
> Related to this, do we want to allow unsetting MMF_HAS_MDWE_NO_INHERIT?
> It looks like it can't be unset but no error either.
But - sigh - as you point out, that second part wasn't implemented. It
looks like I intended to add an:
else
clear_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags);
after that block but forgot... The idea was that:
- setting no_inherit is always allowed
- unsetting it is allowed iff we don't also unset REFUSE_EXEC_GAIN
The "consecutive_prctl_flags" selftests in patch 5 were supposed to
make the assumptions here easier to read and verify. This logic was
meant to be covered by "cant_disable_mdwe_no_inherit" and
"can_lower_privileges" but these tests only check that the "set" prctl
succeeds and not that the end result is the expected one so I missed
this. I'll improve the selftest in the next revision too so we can
catch this more easily next time.
> The above check,
> IIUC, looks more like ensuring we don't clear MMF_HAS_MDWE.
Indeed, without the else, it was useless.
> Maybe we should tighten the logic here a bit and not allow any changes
> after the initial flag setting:
>
> current->mm->flags == 0, we allow:
> bits == 0 or
> bits == PR_MDWE_REFUSE_EXEC_GAIN or
> bits == PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT
>
> current->mm->flags != 0 (some bits were set), we only allow the exactly
> the same bit combination or -EPERM.
>
> So basically build the flags based on the PR_* input bits and compare
> them with current->mm->flags when not 0, return -EPERM if different. I
> think this preserves the ABI as we only have a single bit currently and
> hopefully makes the logic here easier to parse.
On one hand, I thought it would be nice to have the ability to
transition from NO_INHERIT | REFUSE_EXEC_GAIN to REFUSE_EXEC_GAIN
because, conceptually, it seems to me that we shouldn't prevent a
process from dropping further capabilities.
On the other hand, I don't think I have an actual need for that
transition and I agree that if we don't try to handle it, the code
here would become a lot easier to reason about. I'd certainly sleep
better at night with the smaller likelihood of having screwed
something up... :)
Unless someone feels strongly otherwise, I'll do what you suggest in v3
> > @@ -2385,8 +2401,10 @@ static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3,
> > if (arg2 || arg3 || arg4 || arg5)
> > return -EINVAL;
> >
> > - return test_bit(MMF_HAS_MDWE, ¤t->mm->flags) ?
> > - PR_MDWE_REFUSE_EXEC_GAIN : 0;
> > + return (test_bit(MMF_HAS_MDWE, ¤t->mm->flags) ?
> > + PR_MDWE_REFUSE_EXEC_GAIN : 0) |
> > + (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags) ?
> > + PR_MDWE_NO_INHERIT : 0);
> > }
>
> Just personal preference, use explicit 'if' blocks and add bits to a
> local variable variable than multiple ternary operators.
Will do!