Because of the overlayfs getxattr recursion, the incoming inode fails
to update the selinux sid resulting in avc denials being reported
against a target context of u:object_r:unlabeled:s0.
Solution is to add a _get xattr method that calls the __vfs_getxattr
handler so that the context can be read in, rather than being denied
with an -EACCES when vfs_getxattr handler is called.
Signed-off-by: Mark Salyzyn <[email protected]>
Cc: Miklos Szeredi <[email protected]>
Cc: Jonathan Corbet <[email protected]>
Cc: Vivek Goyal <[email protected]>
Cc: Eric W. Biederman <[email protected]>
Cc: Amir Goldstein <[email protected]>
Cc: Randy Dunlap <[email protected]>
Cc: Stephen Smalley <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
---
v10 - added to patch series
---
fs/overlayfs/inode.c | 15 +++++++++++++++
fs/overlayfs/overlayfs.h | 2 ++
fs/overlayfs/super.c | 18 ++++++++++++++++++
3 files changed, 35 insertions(+)
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 7663aeb85fa3..d3b53849615c 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -362,6 +362,21 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
return err;
}
+int __ovl_xattr_get(struct dentry *dentry, struct inode *inode,
+ const char *name, void *value, size_t size)
+{
+ ssize_t res;
+ const struct cred *old_cred;
+ struct dentry *realdentry =
+ ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry);
+
+ old_cred = ovl_override_creds(dentry->d_sb);
+ res = __vfs_getxattr(realdentry, d_inode(realdentry), name, value,
+ size);
+ ovl_revert_creds(old_cred);
+ return res;
+}
+
int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
void *value, size_t size)
{
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 6934bcf030f0..73a02a263fbc 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -357,6 +357,8 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
const void *value, size_t size, int flags);
int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
void *value, size_t size);
+int __ovl_xattr_get(struct dentry *dentry, struct inode *inode,
+ const char *name, void *value, size_t size);
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
struct posix_acl *ovl_get_acl(struct inode *inode, int type);
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b368e2e102fa..82e1130de206 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -859,6 +859,14 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
}
+static int __maybe_unused
+__ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
+{
+ return __ovl_xattr_get(dentry, inode, handler->name, buffer, size);
+}
+
static int __maybe_unused
ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
@@ -939,6 +947,13 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler,
return ovl_xattr_get(dentry, inode, name, buffer, size);
}
+static int __ovl_other_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
+{
+ return __ovl_xattr_get(dentry, inode, name, buffer, size);
+}
+
static int ovl_other_xattr_set(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
@@ -952,6 +967,7 @@ ovl_posix_acl_access_xattr_handler = {
.name = XATTR_NAME_POSIX_ACL_ACCESS,
.flags = ACL_TYPE_ACCESS,
.get = ovl_posix_acl_xattr_get,
+ .__get = __ovl_posix_acl_xattr_get,
.set = ovl_posix_acl_xattr_set,
};
@@ -960,6 +976,7 @@ ovl_posix_acl_default_xattr_handler = {
.name = XATTR_NAME_POSIX_ACL_DEFAULT,
.flags = ACL_TYPE_DEFAULT,
.get = ovl_posix_acl_xattr_get,
+ .__get = __ovl_posix_acl_xattr_get,
.set = ovl_posix_acl_xattr_set,
};
@@ -972,6 +989,7 @@ static const struct xattr_handler ovl_own_xattr_handler = {
static const struct xattr_handler ovl_other_xattr_handler = {
.prefix = "", /* catch all */
.get = ovl_other_xattr_get,
+ .__get = __ovl_other_xattr_get,
.set = ovl_other_xattr_set,
};
--
2.22.0.657.g960e92d24f-goog
On Wed, Jul 24, 2019 at 10:57 PM Mark Salyzyn <[email protected]> wrote:
>
> Because of the overlayfs getxattr recursion, the incoming inode fails
> to update the selinux sid resulting in avc denials being reported
> against a target context of u:object_r:unlabeled:s0.
This description is too brief for me to understand the root problem.
What's wring with the overlayfs getxattr recursion w.r.t the selinux
security model?
Please give an example of your unprivileged mounter use case
to explain.
CC Vivek because I could really never understand all this.
>
> Solution is to add a _get xattr method that calls the __vfs_getxattr
> handler so that the context can be read in, rather than being denied
> with an -EACCES when vfs_getxattr handler is called.
>
> Signed-off-by: Mark Salyzyn <[email protected]>
> Cc: Miklos Szeredi <[email protected]>
> Cc: Jonathan Corbet <[email protected]>
> Cc: Vivek Goyal <[email protected]>
> Cc: Eric W. Biederman <[email protected]>
> Cc: Amir Goldstein <[email protected]>
> Cc: Randy Dunlap <[email protected]>
> Cc: Stephen Smalley <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> ---
> v10 - added to patch series
> ---
> fs/overlayfs/inode.c | 15 +++++++++++++++
> fs/overlayfs/overlayfs.h | 2 ++
> fs/overlayfs/super.c | 18 ++++++++++++++++++
> 3 files changed, 35 insertions(+)
>
> diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
> index 7663aeb85fa3..d3b53849615c 100644
> --- a/fs/overlayfs/inode.c
> +++ b/fs/overlayfs/inode.c
> @@ -362,6 +362,21 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
> return err;
> }
>
> +int __ovl_xattr_get(struct dentry *dentry, struct inode *inode,
> + const char *name, void *value, size_t size)
> +{
> + ssize_t res;
> + const struct cred *old_cred;
> + struct dentry *realdentry =
> + ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry);
> +
> + old_cred = ovl_override_creds(dentry->d_sb);
> + res = __vfs_getxattr(realdentry, d_inode(realdentry), name, value,
> + size);
> + ovl_revert_creds(old_cred);
> + return res;
> +}
> +
> int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
> void *value, size_t size)
> {
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index 6934bcf030f0..73a02a263fbc 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -357,6 +357,8 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
> const void *value, size_t size, int flags);
> int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
> void *value, size_t size);
> +int __ovl_xattr_get(struct dentry *dentry, struct inode *inode,
> + const char *name, void *value, size_t size);
> ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
> struct posix_acl *ovl_get_acl(struct inode *inode, int type);
> int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> index b368e2e102fa..82e1130de206 100644
> --- a/fs/overlayfs/super.c
> +++ b/fs/overlayfs/super.c
> @@ -859,6 +859,14 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
> return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
> }
>
> +static int __maybe_unused
> +__ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
> + struct dentry *dentry, struct inode *inode,
> + const char *name, void *buffer, size_t size)
> +{
> + return __ovl_xattr_get(dentry, inode, handler->name, buffer, size);
> +}
> +
> static int __maybe_unused
> ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
> struct dentry *dentry, struct inode *inode,
> @@ -939,6 +947,13 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler,
> return ovl_xattr_get(dentry, inode, name, buffer, size);
> }
>
> +static int __ovl_other_xattr_get(const struct xattr_handler *handler,
> + struct dentry *dentry, struct inode *inode,
> + const char *name, void *buffer, size_t size)
> +{
> + return __ovl_xattr_get(dentry, inode, name, buffer, size);
> +}
> +
> static int ovl_other_xattr_set(const struct xattr_handler *handler,
> struct dentry *dentry, struct inode *inode,
> const char *name, const void *value,
> @@ -952,6 +967,7 @@ ovl_posix_acl_access_xattr_handler = {
> .name = XATTR_NAME_POSIX_ACL_ACCESS,
> .flags = ACL_TYPE_ACCESS,
> .get = ovl_posix_acl_xattr_get,
> + .__get = __ovl_posix_acl_xattr_get,
> .set = ovl_posix_acl_xattr_set,
> };
>
> @@ -960,6 +976,7 @@ ovl_posix_acl_default_xattr_handler = {
> .name = XATTR_NAME_POSIX_ACL_DEFAULT,
> .flags = ACL_TYPE_DEFAULT,
> .get = ovl_posix_acl_xattr_get,
> + .__get = __ovl_posix_acl_xattr_get,
> .set = ovl_posix_acl_xattr_set,
> };
>
> @@ -972,6 +989,7 @@ static const struct xattr_handler ovl_own_xattr_handler = {
> static const struct xattr_handler ovl_other_xattr_handler = {
> .prefix = "", /* catch all */
> .get = ovl_other_xattr_get,
> + .__get = __ovl_other_xattr_get,
> .set = ovl_other_xattr_set,
> };
>
Not very professional of me to comment on the proposed solution
without understanding the problem, but my nose says this cannot
be the right solution and if it is, then you better find a much better
name for the API then __get() and document it properly.
Thanks,
Amir.
On 7/24/19 10:48 PM, Amir Goldstein wrote:
> On Wed, Jul 24, 2019 at 10:57 PM Mark Salyzyn <[email protected]> wrote:
>> Because of the overlayfs getxattr recursion, the incoming inode fails
>> to update the selinux sid resulting in avc denials being reported
>> against a target context of u:object_r:unlabeled:s0.
> This description is too brief for me to understand the root problem.
> What's wring with the overlayfs getxattr recursion w.r.t the selinux
> security model?
__vfs_getxattr (the way the security layer acquires the target sid
without recursing back to security to check the access permissions)
calls get xattr method, which in overlayfs calls vfs_getxattr on the
lower layer (which then recurses back to security to check permissions)
and reports back -EACCES if there was a denial (which is OK) and _no_
sid copied to caller's inode security data, bubbles back to the security
layer caller, which reports an invalid avc: message for
u:object_r:unlabeled:s0 (the uninitialized sid instead of the sid for
the lower filesystem target). The blocked access is 100% valid, it is
supposed to be blocked. This does however result in a cosmetic issue
that makes it impossible to use audit2allow to construct a rule that
would be usable to fix the access problem.
This problem would exist for any (in tree or out-of-tree) union
filesystems that need to reflect the __vfs_getxattr call into a
__vfs_getxattr call to the underlying filesystem.
>
> Please give an example of your unprivileged mounter use case
> to explain.
The mounter merely does not have access to the targets in one of the
referenced filesystems (for override creds = on)
In Android would be init, it does not have access to a subset of
u:object_r:*_exec:s0 and u::objects_r:vendor*:s0 labels. Based on a need
to access basis.
This gets worse if we add override creds = off, because the multitude of
callers could be denied access, and rightfully so, and we would have no
clue how to construct rules to grant them permissions using standard
tools like audit2allow.
I will figure out how to explain this in the commit message ... but do
tell me if I did not 'connect' you to the underlying problem so I can
make it clear to _everyone_.
>
> CC Vivek because I could really never understand all this.
>
>> Solution is to add a _get xattr method that calls the __vfs_getxattr
>> handler so that the context can be read in, rather than being denied
>> with an -EACCES when vfs_getxattr handler is called.
>> . . .
>> @@ -972,6 +989,7 @@ static const struct xattr_handler ovl_own_xattr_handler = {
>> static const struct xattr_handler ovl_other_xattr_handler = {
>> .prefix = "", /* catch all */
>> .get = ovl_other_xattr_get,
>> + .__get = __ovl_other_xattr_get,
>> .set = ovl_other_xattr_set,
>> };
>>
>
> Not very professional of me to comment on the proposed solution
> without understanding the problem, but my nose says this cannot
> be the right solution and if it is, then you better find a much better
> name for the API then __get() and document it properly.
Yes __get (instead of the existing get which checks sepolicy) was my
idea. get_wo_security was a close alternative.
We worked through 5 "solutions", this one privately appeared to please
the security folks. In fact the solution was their suggestion because
they noticed that __vfs_getxattr was meant to bypass sepolicy checking,
yet when nested through overlayfs, it called vfs_getxattr for the lower
filesystems and blocked necessary content (sid actually) from the upper
call in order to properly log the denial. I had originally copied just
the sid to the upper caller by adding layering violations that creeped
them out ;-/
On Thu, Jul 25, 2019 at 6:03 PM Mark Salyzyn <[email protected]> wrote:
>
> On 7/24/19 10:48 PM, Amir Goldstein wrote:
> > On Wed, Jul 24, 2019 at 10:57 PM Mark Salyzyn <[email protected]> wrote:
> >> Because of the overlayfs getxattr recursion, the incoming inode fails
> >> to update the selinux sid resulting in avc denials being reported
> >> against a target context of u:object_r:unlabeled:s0.
> > This description is too brief for me to understand the root problem.
> > What's wring with the overlayfs getxattr recursion w.r.t the selinux
> > security model?
>
> __vfs_getxattr (the way the security layer acquires the target sid
> without recursing back to security to check the access permissions)
> calls get xattr method, which in overlayfs calls vfs_getxattr on the
> lower layer (which then recurses back to security to check permissions)
> and reports back -EACCES if there was a denial (which is OK) and _no_
> sid copied to caller's inode security data, bubbles back to the security
> layer caller, which reports an invalid avc: message for
> u:object_r:unlabeled:s0 (the uninitialized sid instead of the sid for
> the lower filesystem target). The blocked access is 100% valid, it is
> supposed to be blocked. This does however result in a cosmetic issue
> that makes it impossible to use audit2allow to construct a rule that
> would be usable to fix the access problem.
>
Ahhh you are talking about getting the security.selinux.* xattrs?
I was under the impression (Vivek please correct me if I wrong)
that overlayfs objects cannot have individual security labels and
the only way to label overlayfs objects is by mount options on the
entire mount? Or is this just for lower layer objects?
Anyway, the API I would go for is adding a @flags argument to
get() which can take XATTR_NOSECURITY akin to
FMODE_NONOTIFY, GFP_NOFS, meant to avoid recursions.
Thanks,
Amir.
On 7/25/19 8:43 AM, Amir Goldstein wrote:
> On Thu, Jul 25, 2019 at 6:03 PM Mark Salyzyn <[email protected]> wrote:
>> On 7/24/19 10:48 PM, Amir Goldstein wrote:
>>> On Wed, Jul 24, 2019 at 10:57 PM Mark Salyzyn <[email protected]> wrote:
>>>> Because of the overlayfs getxattr recursion, the incoming inode fails
>>>> to update the selinux sid resulting in avc denials being reported
>>>> against a target context of u:object_r:unlabeled:s0.
>>> This description is too brief for me to understand the root problem.
>>> What's wring with the overlayfs getxattr recursion w.r.t the selinux
>>> security model?
>> __vfs_getxattr (the way the security layer acquires the target sid
>> without recursing back to security to check the access permissions)
>> calls get xattr method, which in overlayfs calls vfs_getxattr on the
>> lower layer (which then recurses back to security to check permissions)
>> and reports back -EACCES if there was a denial (which is OK) and _no_
>> sid copied to caller's inode security data, bubbles back to the security
>> layer caller, which reports an invalid avc: message for
>> u:object_r:unlabeled:s0 (the uninitialized sid instead of the sid for
>> the lower filesystem target). The blocked access is 100% valid, it is
>> supposed to be blocked. This does however result in a cosmetic issue
>> that makes it impossible to use audit2allow to construct a rule that
>> would be usable to fix the access problem.
>>
> Ahhh you are talking about getting the security.selinux.* xattrs?
> I was under the impression (Vivek please correct me if I wrong)
> that overlayfs objects cannot have individual security labels and
They can, and we _need_ them for Android's use cases, upper and lower
filesystems.
Some (most?) union filesystems (like Android's sdcardfs) set sepolicy
from the mount options, we did not need this adjustment there of course.
> the only way to label overlayfs objects is by mount options on the
> entire mount? Or is this just for lower layer objects?
>
> Anyway, the API I would go for is adding a @flags argument to
> get() which can take XATTR_NOSECURITY akin to
> FMODE_NONOTIFY, GFP_NOFS, meant to avoid recursions.
I do like it better (with the following 7 stages of grief below), best
for the future.
The change in this handler's API will affect all filesystem drivers
(well, my change affects the ABI, so it is not as-if I saved the world
from a module recompile) touching all filesystem sources with an even
larger audience of stakeholders. Larger audience of stakeholders, the
harder to get the change in ;-/. This is also concerning since I would
like this change to go to stable 4.4, 4.9, 4.14 and 4.19 where this
regression got introduced. I can either craft specific stable patches or
just let it go and deal with them in the android-common distributions
rather than seeking stable merged down. ABI/API breaks are a problem for
stable anyway ...
-- Mark
On Thu, Jul 25, 2019 at 7:22 PM Mark Salyzyn <[email protected]> wrote:
>
> On 7/25/19 8:43 AM, Amir Goldstein wrote:
> > On Thu, Jul 25, 2019 at 6:03 PM Mark Salyzyn <[email protected]> wrote:
> >> On 7/24/19 10:48 PM, Amir Goldstein wrote:
> >>> On Wed, Jul 24, 2019 at 10:57 PM Mark Salyzyn <[email protected]> wrote:
> >>>> Because of the overlayfs getxattr recursion, the incoming inode fails
> >>>> to update the selinux sid resulting in avc denials being reported
> >>>> against a target context of u:object_r:unlabeled:s0.
> >>> This description is too brief for me to understand the root problem.
> >>> What's wring with the overlayfs getxattr recursion w.r.t the selinux
> >>> security model?
> >> __vfs_getxattr (the way the security layer acquires the target sid
> >> without recursing back to security to check the access permissions)
> >> calls get xattr method, which in overlayfs calls vfs_getxattr on the
> >> lower layer (which then recurses back to security to check permissions)
> >> and reports back -EACCES if there was a denial (which is OK) and _no_
> >> sid copied to caller's inode security data, bubbles back to the security
> >> layer caller, which reports an invalid avc: message for
> >> u:object_r:unlabeled:s0 (the uninitialized sid instead of the sid for
> >> the lower filesystem target). The blocked access is 100% valid, it is
> >> supposed to be blocked. This does however result in a cosmetic issue
> >> that makes it impossible to use audit2allow to construct a rule that
> >> would be usable to fix the access problem.
> >>
> > Ahhh you are talking about getting the security.selinux.* xattrs?
> > I was under the impression (Vivek please correct me if I wrong)
> > that overlayfs objects cannot have individual security labels and
>
> They can, and we _need_ them for Android's use cases, upper and lower
> filesystems.
>
> Some (most?) union filesystems (like Android's sdcardfs) set sepolicy
> from the mount options, we did not need this adjustment there of course.
>
> > the only way to label overlayfs objects is by mount options on the
> > entire mount? Or is this just for lower layer objects?
> >
> > Anyway, the API I would go for is adding a @flags argument to
> > get() which can take XATTR_NOSECURITY akin to
> > FMODE_NONOTIFY, GFP_NOFS, meant to avoid recursions.
>
> I do like it better (with the following 7 stages of grief below), best
> for the future.
>
> The change in this handler's API will affect all filesystem drivers
> (well, my change affects the ABI, so it is not as-if I saved the world
> from a module recompile) touching all filesystem sources with an even
> larger audience of stakeholders. Larger audience of stakeholders, the
> harder to get the change in ;-/. This is also concerning since I would
> like this change to go to stable 4.4, 4.9, 4.14 and 4.19 where this
> regression got introduced. I can either craft specific stable patches or
> just let it go and deal with them in the android-common distributions
> rather than seeking stable merged down. ABI/API breaks are a problem for
> stable anyway ...
>
Use the memalloc_nofs_save/restore design pattern will avoid all that
grief.
As a matter of fact, this issue could and should be handled inside security
subsystem without bothering any other subsystem.
LSM have per task context right? That context could carry the recursion
flags to know that the getxattr call is made by the security subsystem itself.
The problem is not limited to union filesystems.
In general its a stacking issue. ecryptfs is also a stacking fs, out-of-tree
shiftfs as well. But it doesn't end there.
A filesystem on top of a loop device inside another filesystem could
also maybe result in security hook recursion (not sure if in practice).
Thanks,
Amir.
On 7/25/19 10:04 PM, Amir Goldstein wrote:
> On Thu, Jul 25, 2019 at 7:22 PM Mark Salyzyn <[email protected]> wrote:
>> On 7/25/19 8:43 AM, Amir Goldstein wrote:
>>> On Thu, Jul 25, 2019 at 6:03 PM Mark Salyzyn <[email protected]> wrote:
>>>> On 7/24/19 10:48 PM, Amir Goldstein wrote:
>>>>> On Wed, Jul 24, 2019 at 10:57 PM Mark Salyzyn <[email protected]> wrote:
>>>>>> Because of the overlayfs getxattr recursion, the incoming inode fails
>>>>>> to update the selinux sid resulting in avc denials being reported
>>>>>> against a target context of u:object_r:unlabeled:s0.
>>>>> This description is too brief for me to understand the root problem.
>>>>> What's wring with the overlayfs getxattr recursion w.r.t the selinux
>>>>> security model?
>>>> __vfs_getxattr (the way the security layer acquires the target sid
>>>> without recursing back to security to check the access permissions)
>>>> calls get xattr method, which in overlayfs calls vfs_getxattr on the
>>>> lower layer (which then recurses back to security to check permissions)
>>>> and reports back -EACCES if there was a denial (which is OK) and _no_
>>>> sid copied to caller's inode security data, bubbles back to the security
>>>> layer caller, which reports an invalid avc: message for
>>>> u:object_r:unlabeled:s0 (the uninitialized sid instead of the sid for
>>>> the lower filesystem target). The blocked access is 100% valid, it is
>>>> supposed to be blocked. This does however result in a cosmetic issue
>>>> that makes it impossible to use audit2allow to construct a rule that
>>>> would be usable to fix the access problem.
>>>>
>>> Ahhh you are talking about getting the security.selinux.* xattrs?
>>> I was under the impression (Vivek please correct me if I wrong)
>>> that overlayfs objects cannot have individual security labels and
>> They can, and we _need_ them for Android's use cases, upper and lower
>> filesystems.
>>
>> Some (most?) union filesystems (like Android's sdcardfs) set sepolicy
>> from the mount options, we did not need this adjustment there of course.
>>
>>> the only way to label overlayfs objects is by mount options on the
>>> entire mount? Or is this just for lower layer objects?
>>>
>>> Anyway, the API I would go for is adding a @flags argument to
>>> get() which can take XATTR_NOSECURITY akin to
>>> FMODE_NONOTIFY, GFP_NOFS, meant to avoid recursions.
>> I do like it better (with the following 7 stages of grief below), best
>> for the future.
>>
>> The change in this handler's API will affect all filesystem drivers
>> (well, my change affects the ABI, so it is not as-if I saved the world
>> from a module recompile) touching all filesystem sources with an even
>> larger audience of stakeholders. Larger audience of stakeholders, the
>> harder to get the change in ;-/. This is also concerning since I would
>> like this change to go to stable 4.4, 4.9, 4.14 and 4.19 where this
>> regression got introduced. I can either craft specific stable patches or
>> just let it go and deal with them in the android-common distributions
>> rather than seeking stable merged down. ABI/API breaks are a problem for
>> stable anyway ...
>>
> Use the memalloc_nofs_save/restore design pattern will avoid all that
> grief.
> As a matter of fact, this issue could and should be handled inside security
> subsystem without bothering any other subsystem.
> LSM have per task context right? That context could carry the recursion
> flags to know that the getxattr call is made by the security subsystem itself.
> The problem is not limited to union filesystems.
> In general its a stacking issue. ecryptfs is also a stacking fs, out-of-tree
> shiftfs as well. But it doesn't end there.
> A filesystem on top of a loop device inside another filesystem could
> also maybe result in security hook recursion (not sure if in practice).
>
> Thanks,
> Amir.
Good point, back to Stephen Smalley?
There are four __vfs_getxattr calls inside security, not sure I see any
natural way to determine the recursion in security/selinux I can
beg/borrow/steal from; but I get the strange feeling that it is better
to detect recursion in __vfs_getxattr in this manner, and switch out
checking in vfs_getxattr since it is localized to just fs/xattr.c.
selinux might not be the only user of __vfs_getxattr nature ...
I have implemented and tested the solution where we add a flag to the
.get method, it works. I would be tempted to submit that instead in case
someone in the future can imagine using that flag argument to solve
other problem(s) (if you build it, they will come).
<flips coin>
Will add a new per-process flag that __vfs_getxattr and vfs_getxattr
plays with and see how it works and what it looks like.
Sincerely -- Mark Salyzyn
On 7/30/19 8:55 AM, Stephen Smalley wrote:
> On 7/26/19 2:30 PM, Mark Salyzyn wrote:
>> On 7/25/19 10:04 PM, Amir Goldstein wrote:
>>> On Thu, Jul 25, 2019 at 7:22 PM Mark Salyzyn <[email protected]>
>>> wrote:
>>>> On 7/25/19 8:43 AM, Amir Goldstein wrote:
>>>>> On Thu, Jul 25, 2019 at 6:03 PM Mark Salyzyn <[email protected]>
>>>>> wrote:
>>>>>> On 7/24/19 10:48 PM, Amir Goldstein wrote:
>>>>>>> On Wed, Jul 24, 2019 at 10:57 PM Mark Salyzyn
>>>>>>> <[email protected]> wrote:
>>>>>>>> Because of the overlayfs getxattr recursion, the incoming inode
>>>>>>>> fails
>>>>>>>> to update the selinux sid resulting in avc denials being reported
>>>>>>>> against a target context of u:object_r:unlabeled:s0.
>>>>>>> This description is too brief for me to understand the root
>>>>>>> problem.
>>>>>>> What's wring with the overlayfs getxattr recursion w.r.t the
>>>>>>> selinux
>>>>>>> security model?
>>>>>> __vfs_getxattr (the way the security layer acquires the target sid
>>>>>> without recursing back to security to check the access permissions)
>>>>>> calls get xattr method, which in overlayfs calls vfs_getxattr on the
>>>>>> lower layer (which then recurses back to security to check
>>>>>> permissions)
>>>>>> and reports back -EACCES if there was a denial (which is OK) and
>>>>>> _no_
>>>>>> sid copied to caller's inode security data, bubbles back to the
>>>>>> security
>>>>>> layer caller, which reports an invalid avc: message for
>>>>>> u:object_r:unlabeled:s0 (the uninitialized sid instead of the sid
>>>>>> for
>>>>>> the lower filesystem target). The blocked access is 100% valid,
>>>>>> it is
>>>>>> supposed to be blocked. This does however result in a cosmetic issue
>>>>>> that makes it impossible to use audit2allow to construct a rule that
>>>>>> would be usable to fix the access problem.
>>>>>>
>>>>> Ahhh you are talking about getting the security.selinux.* xattrs?
>>>>> I was under the impression (Vivek please correct me if I wrong)
>>>>> that overlayfs objects cannot have individual security labels and
>>>> They can, and we _need_ them for Android's use cases, upper and lower
>>>> filesystems.
>>>>
>>>> Some (most?) union filesystems (like Android's sdcardfs) set sepolicy
>>>> from the mount options, we did not need this adjustment there of
>>>> course.
>>>>
>>>>> the only way to label overlayfs objects is by mount options on the
>>>>> entire mount? Or is this just for lower layer objects?
>>>>>
>>>>> Anyway, the API I would go for is adding a @flags argument to
>>>>> get() which can take XATTR_NOSECURITY akin to
>>>>> FMODE_NONOTIFY, GFP_NOFS, meant to avoid recursions.
>>>> I do like it better (with the following 7 stages of grief below), best
>>>> for the future.
>>>>
>>>> The change in this handler's API will affect all filesystem drivers
>>>> (well, my change affects the ABI, so it is not as-if I saved the world
>>>> from a module recompile) touching all filesystem sources with an even
>>>> larger audience of stakeholders. Larger audience of stakeholders, the
>>>> harder to get the change in ;-/. This is also concerning since I would
>>>> like this change to go to stable 4.4, 4.9, 4.14 and 4.19 where this
>>>> regression got introduced. I can either craft specific stable
>>>> patches or
>>>> just let it go and deal with them in the android-common distributions
>>>> rather than seeking stable merged down. ABI/API breaks are a
>>>> problem for
>>>> stable anyway ...
>>>>
>>> Use the memalloc_nofs_save/restore design pattern will avoid all that
>>> grief.
>>> As a matter of fact, this issue could and should be handled inside
>>> security
>>> subsystem without bothering any other subsystem.
>>> LSM have per task context right? That context could carry the recursion
>>> flags to know that the getxattr call is made by the security
>>> subsystem itself.
>>> The problem is not limited to union filesystems.
>>> In general its a stacking issue. ecryptfs is also a stacking fs,
>>> out-of-tree
>>> shiftfs as well. But it doesn't end there.
>>> A filesystem on top of a loop device inside another filesystem could
>>> also maybe result in security hook recursion (not sure if in practice).
>>>
>>> Thanks,
>>> Amir.
>>
>> Good point, back to Stephen Smalley?
>>
>> There are four __vfs_getxattr calls inside security, not sure I see
>> any natural way to determine the recursion in security/selinux I can
>> beg/borrow/steal from; but I get the strange feeling that it is
>> better to detect recursion in __vfs_getxattr in this manner, and
>> switch out checking in vfs_getxattr since it is localized to just
>> fs/xattr.c. selinux might not be the only user of __vfs_getxattr
>> nature ...
>>
>> I have implemented and tested the solution where we add a flag to the
>> .get method, it works. I would be tempted to submit that instead in
>> case someone in the future can imagine using that flag argument to
>> solve other problem(s) (if you build it, they will come).
>>
>> <flips coin>
>>
>> Will add a new per-process flag that __vfs_getxattr and vfs_getxattr
>> plays with and see how it works and what it looks like.
>
> As you say, SELinux is not the only user of __vfs_getxattr; in
> addition to the other security modules, there is the integrity/evm
> subsystem and ecryptfs. Further, __vfs_getxattr does not merely skip
> LSM/SELinux-related processing; it also skips xattr_permission(). As
> such, I don't believe this is something that can be solved entirely
> within the security subsystem.
>
> Not excited about a process flag to implicitly disable LSM/SELinux and
> other security-related processing on a code path; potential for abuse
> is high.
So you will not like my solution in "[PATCH v11 2/5] fs: __vfs_getxattr
nesting paradigm"sent out this morning; so adding the flag option and
widespread touching of _all_ the filesystem xattr.c/acl.c/inode.c/etc
files to the calls is probably the easiest to stomach with the lowest
attack surface.
Any other ideas (with less impact to tons of API/ABI/filesystems) that
we have not thought about before I spin a v12 patch set?
-- Mark
On 7/26/19 2:30 PM, Mark Salyzyn wrote:
> On 7/25/19 10:04 PM, Amir Goldstein wrote:
>> On Thu, Jul 25, 2019 at 7:22 PM Mark Salyzyn <[email protected]> wrote:
>>> On 7/25/19 8:43 AM, Amir Goldstein wrote:
>>>> On Thu, Jul 25, 2019 at 6:03 PM Mark Salyzyn <[email protected]>
>>>> wrote:
>>>>> On 7/24/19 10:48 PM, Amir Goldstein wrote:
>>>>>> On Wed, Jul 24, 2019 at 10:57 PM Mark Salyzyn
>>>>>> <[email protected]> wrote:
>>>>>>> Because of the overlayfs getxattr recursion, the incoming inode
>>>>>>> fails
>>>>>>> to update the selinux sid resulting in avc denials being reported
>>>>>>> against a target context of u:object_r:unlabeled:s0.
>>>>>> This description is too brief for me to understand the root problem.
>>>>>> What's wring with the overlayfs getxattr recursion w.r.t the selinux
>>>>>> security model?
>>>>> __vfs_getxattr (the way the security layer acquires the target sid
>>>>> without recursing back to security to check the access permissions)
>>>>> calls get xattr method, which in overlayfs calls vfs_getxattr on the
>>>>> lower layer (which then recurses back to security to check
>>>>> permissions)
>>>>> and reports back -EACCES if there was a denial (which is OK) and _no_
>>>>> sid copied to caller's inode security data, bubbles back to the
>>>>> security
>>>>> layer caller, which reports an invalid avc: message for
>>>>> u:object_r:unlabeled:s0 (the uninitialized sid instead of the sid for
>>>>> the lower filesystem target). The blocked access is 100% valid, it is
>>>>> supposed to be blocked. This does however result in a cosmetic issue
>>>>> that makes it impossible to use audit2allow to construct a rule that
>>>>> would be usable to fix the access problem.
>>>>>
>>>> Ahhh you are talking about getting the security.selinux.* xattrs?
>>>> I was under the impression (Vivek please correct me if I wrong)
>>>> that overlayfs objects cannot have individual security labels and
>>> They can, and we _need_ them for Android's use cases, upper and lower
>>> filesystems.
>>>
>>> Some (most?) union filesystems (like Android's sdcardfs) set sepolicy
>>> from the mount options, we did not need this adjustment there of course.
>>>
>>>> the only way to label overlayfs objects is by mount options on the
>>>> entire mount? Or is this just for lower layer objects?
>>>>
>>>> Anyway, the API I would go for is adding a @flags argument to
>>>> get() which can take XATTR_NOSECURITY akin to
>>>> FMODE_NONOTIFY, GFP_NOFS, meant to avoid recursions.
>>> I do like it better (with the following 7 stages of grief below), best
>>> for the future.
>>>
>>> The change in this handler's API will affect all filesystem drivers
>>> (well, my change affects the ABI, so it is not as-if I saved the world
>>> from a module recompile) touching all filesystem sources with an even
>>> larger audience of stakeholders. Larger audience of stakeholders, the
>>> harder to get the change in ;-/. This is also concerning since I would
>>> like this change to go to stable 4.4, 4.9, 4.14 and 4.19 where this
>>> regression got introduced. I can either craft specific stable patches or
>>> just let it go and deal with them in the android-common distributions
>>> rather than seeking stable merged down. ABI/API breaks are a problem for
>>> stable anyway ...
>>>
>> Use the memalloc_nofs_save/restore design pattern will avoid all that
>> grief.
>> As a matter of fact, this issue could and should be handled inside
>> security
>> subsystem without bothering any other subsystem.
>> LSM have per task context right? That context could carry the recursion
>> flags to know that the getxattr call is made by the security subsystem
>> itself.
>> The problem is not limited to union filesystems.
>> In general its a stacking issue. ecryptfs is also a stacking fs,
>> out-of-tree
>> shiftfs as well. But it doesn't end there.
>> A filesystem on top of a loop device inside another filesystem could
>> also maybe result in security hook recursion (not sure if in practice).
>>
>> Thanks,
>> Amir.
>
> Good point, back to Stephen Smalley?
>
> There are four __vfs_getxattr calls inside security, not sure I see any
> natural way to determine the recursion in security/selinux I can
> beg/borrow/steal from; but I get the strange feeling that it is better
> to detect recursion in __vfs_getxattr in this manner, and switch out
> checking in vfs_getxattr since it is localized to just fs/xattr.c.
> selinux might not be the only user of __vfs_getxattr nature ...
>
> I have implemented and tested the solution where we add a flag to the
> .get method, it works. I would be tempted to submit that instead in case
> someone in the future can imagine using that flag argument to solve
> other problem(s) (if you build it, they will come).
>
> <flips coin>
>
> Will add a new per-process flag that __vfs_getxattr and vfs_getxattr
> plays with and see how it works and what it looks like.
As you say, SELinux is not the only user of __vfs_getxattr; in addition
to the other security modules, there is the integrity/evm subsystem and
ecryptfs. Further, __vfs_getxattr does not merely skip
LSM/SELinux-related processing; it also skips xattr_permission(). As
such, I don't believe this is something that can be solved entirely
within the security subsystem.
Not excited about a process flag to implicitly disable LSM/SELinux and
other security-related processing on a code path; potential for abuse is
high.